#include "/lib/core.glsl"
#include "/lib/config.glsl"

layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in;
const vec2 workGroupsRender = vec2(1.0, 1.0);

uniform float frameTimeCounter;
uniform vec2 pixSize;
uniform vec3 cameraPositionFract;
uniform mat4 gbufferModelViewInverse, gbufferProjectionInverse;
uniform sampler2D colortex3, depthtex0;
uniform usampler2D colortex2;

uniform layout(HDR_IMG_FMT) restrict image2D colorimg1;

#ifdef END
	#include "/lib/rand.glsl"
#endif

#include "/lib/fast_math.glsl"
#include "/lib/luminance.glsl"
#include "/lib/octa_normal.glsl"
#include "/lib/srgb.glsl"
#include "/lib/skylight.glsl"
#include "/lib/fog.glsl"

#if INDEXED_BLOCK_LIGHT
	uniform usampler1D lightIndexS;
	layout(shared, binding = 2) restrict readonly buffer indexRead { uint len; vec3 offset; } index_read;

	shared uint sh_global_index_len;
	shared uint sh_index_len;
	shared vec3 sh_index_offset;
	shared ivec3 sh_bb_min;
	shared ivec3 sh_bb_max;
	shared uvec2[uint(INDEX_SIZE * LDS_RATIO)] sh_index;
#endif

void main() {
	immut ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
	immut float depth = texelFetch(depthtex0, texel, 0).r;
	immut vec2 coord = fma(gl_GlobalInvocationID.xy, pixSize, 0.5 * pixSize);
	immut vec4 view_undiv = gbufferProjectionInverse * vec4(fma(vec3(coord, depth), vec3(2.0), vec3(-1.0)), 1.0);
	immut vec3 view = view_undiv.xyz / view_undiv.w;
	immut vec3 plr = (gbufferModelViewInverse * vec4(view, 1.0)).xyz;
	immut float dist = length(plr);

	immut bool geometry = dist < fogState.x;

	uint gbuffer_data;
	uint emission;
	vec2 light;

	if (geometry) {
		gbuffer_data = texelFetch(colortex2, texel, 0).r;
		emission = bitfieldExtract(gbuffer_data, 26, 4);
		light = vec2(
			bitfieldExtract(gbuffer_data, 0, 13),
			bitfieldExtract(gbuffer_data, 13, 13)
		) / 8191.0;
	} else {
		gbuffer_data = 0u;
		emission = 0u;
		light = vec2(0.0);
	}

	#if INDEXED_BLOCK_LIGHT
		if (gl_LocalInvocationIndex == 0u) {
			sh_global_index_len = index_read.len;
			sh_index_offset = index_read.offset;
			sh_index_len = 0u;

			sh_bb_min = ivec3(INDEX_DIST);
			sh_bb_max = ivec3(-INDEX_DIST);
		}

		//if (anyInvocation(depth < 1.0)) {
			barrier();

			immut bool lit = light.x >= 1.0/15.0 && dist < float(INDEX_DIST) && emission <= 12u; // arbitrary limit ig. could be a #define

			if (lit) {
				immut ivec3 i_plr = ivec3(fma(sign(plr).xxx, vec3(0.5), plr));

				atomicMin(sh_bb_min.x, i_plr.x); atomicMax(sh_bb_max.x, i_plr.x);
				atomicMin(sh_bb_min.y, i_plr.y); atomicMax(sh_bb_max.y, i_plr.y);
				atomicMin(sh_bb_min.z, i_plr.z); atomicMax(sh_bb_max.z, i_plr.z);
			}

			barrier();

			immut vec3 bb_min = sh_bb_min;
			immut vec3 bb_max = sh_bb_max;
			immut vec3 index_offset = sh_index_offset;

			if (all(greaterThanEqual(bb_max, bb_min))) {
				immut vec3 offset = -255.5 - cameraPositionFract + index_offset;
				immut uint global_index_len = sh_global_index_len;

				for (uint i = gl_LocalInvocationIndex; i < global_index_len; i += gl_WorkGroupSize.x * gl_WorkGroupSize.y) {
					immut uvec2 light_data = texelFetch(lightIndexS, int(i), 0).rg;

					immut vec3 pos = vec3(
						bitfieldExtract(light_data.x, 0, 9),
						bitfieldExtract(light_data.x, 9, 9),
						bitfieldExtract(light_data.x, 18, 9)
					) + offset;

					immut float brightness = bitfieldExtract(light_data.x, 27, 4) + 1.0;

					// this may not be the best kind of culling but it works alright (world space bounding box)
					// it may be unneccessarily big when looking diagonally
					// maybe mix this with view or clip space tile frustum culling?
					if (all(greaterThanEqual(pos + brightness, bb_min)) && all(lessThanEqual(pos - brightness, bb_max))) {
						sh_index[atomicAdd(sh_index_len, 1u)] = light_data;
					}
				}
			}

			barrier();
		//}
	#endif

	if (geometry) {
		vec4 color_s = imageLoad(colorimg1, texel);
		immut float dist = length(plr);

		#ifdef NETHER
			const vec3 sky_light = vec3(0.3, 0.15, 0.2);
		#elif defined END
			const vec3 sky_light = vec3(0.15, 0.075, 0.2);
		#else
			immut float sky_light = 1.0 - lowp_sqrt(1.0 - max(0.1, light.y * clamp(skyState.y * 10.0, 0.25, 1.0))); // How did I even write this?
		#endif

		#ifdef LIGHT_LEVELS
			vec3 block_light = light.x > 0.0 ? mix(
				vec3(0.5, 0.25, 0.0),
				mix(
					vec3(0.75, 0.5, 0.0),
					mix(
						vec3(1.0, 0.75, 0.0),
						vec3(1.0, 1.0, 1.0),
						smoothstep(8.0, 15.0, light.x)
					),
					smoothstep(8.0, 11.0, light.x)
				),
				smoothstep(1.0, 7.0, light.x)
			) : vec3(0.5, 0.0, 0.0);
		#else
			vec3 block_light = light.xxx;
		#endif

		immut vec3 n_plr = plr / dist;

		#if INDEXED_BLOCK_LIGHT
			if (lit) {
				vec3 shine = vec3(0.0);

				immut vec4 octa_normal = texelFetch(colortex3, texel, 0);
				immut vec3 w_face_normal = normalize(octa_decode(octa_normal.xy));
				immut vec3 w_tex_normal = normalize(octa_decode(octa_normal.zw));
	
				immut float reflectance = color_s.a * float(SPECULAR);
				immut float smoothness = lowp_sqrt(color_s.a) + 1.0;

				/*
					immut float reflectance = gb_smoothness * gb_smoothness * float(SPECULAR * 3);
					immut float smoothness = fma(lowp_sqrt(gb_smoothness), 3.0, 1.0);
				*/

				immut vec3 offset = -255.5 - cameraPositionFract - plr + index_offset;

				for (uint i = 0u; i < sh_index_len; ++i) {
					immut uvec2 light_data = sh_index[i];

					immut vec3 pos = vec3(
						bitfieldExtract(light_data.x, 0, 9),
						bitfieldExtract(light_data.x, 9, 9),
						bitfieldExtract(light_data.x, 18, 9)
					) + offset;

					immut float pos_len = length(pos);
					immut float brightness = bitfieldExtract(light_data.x, 27, 4);

					if (pos_len <= brightness) {
						immut vec3 n_pos = pos * lowp_rcp(pos_len);
						immut float tex_lambertian = dot(w_tex_normal, n_pos);
						immut bool lit = tex_lambertian > 0.01 && dot(w_face_normal, n_pos) > 0.01;
						immut float lighting = lit ? fma(pow(max(dot(n_plr, n_pos), 0.0), smoothness), reflectance, tex_lambertian) : IND_ILLUM;

						shine = fma(vec3(
							bitfieldExtract(light_data.y, 22, 10),
							bitfieldExtract(light_data.y, 11, 11),
							bitfieldExtract(light_data.y, 0, 11)
						), (lighting * (brightness - pos_len)).xxx, shine);
					}

					// maybe it wouldn't be too hard to path trace here
				}

				if (dot(shine, vec3(1.0)) > 0.01) {
					shine /= 15.0 * vec3(2047.0, 2047.0, 1023.0); // Undo the multiplication from packing light color and brightness

					vec3 new_light = block_light;

					immut vec3 light_color = normalize(shine);
					new_light = new_light * light_color * lowp_rcp(luminance(light_color));

					#if INDEXED_BLOCK_LIGHT
						new_light = fma((light.x * float(INDEXED_BLOCK_LIGHT)).xxx, shine, new_light);
					#endif

					block_light = mix(block_light, new_light, max(block_light - smoothstep(float(INDEX_DIST - 15), float(INDEX_DIST), dist), 0.0));
				}
			}

			// Debug culling & LDS overflow
			// color.gb += vec2(sh_index_len < sh_global_index_len, sh_index_len == 0);
			// if (sh_index_len > uint(INDEX_SIZE * LDS_RATIO)) color *= 10;
		#endif

		vec3 color = color_s.rgb;
		color *= fma(pow(fma(float(emission), 0.25, luminance(color)), 4u) * 0.0175 + block_light, vec3(1.0, 0.8, 0.7) * lowp_rcp(1.0 + sky_light), vec3(sky_light));

		immut float fog = fog(dist);

		#ifdef NETHER
			color = mix(color, linear(fogColor), fog);
		#elif defined END
			color = mix(color, sky(n_plr), fog);
		#else
			color = mix(color, sky(sky_fog(max(n_plr.y, 0.0)), normalize(view)), fog);
		#endif

		imageStore(colorimg1, texel, vec4(
			color,
			0.0
		));
	}
}