#include "/lib/core.glsl"
#include "/lib/config.glsl"

/* Deferred Shadow Map Sampling */

layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in;

readonly
#include "/buf/indirect_sm_lit.glsl"

uniform vec3 shadowLightDirectionPlr, shadowProjScale;
uniform mat4 gbufferModelViewInverse, gbufferProjectionInverse, shadowModelView;
uniform sampler2D colortex1, colortex2, depthtex0;
uniform layout(r11f_g11f_b10f) restrict writeonly image2D deferredLight1;

#ifdef HAND_LIGHT
	readonly
	#include "/buf/hand_light.glsl"
#endif

#include "/lib/view.glsl"
#include "/lib/skylight.glsl"
#include "/lib/sample_shadow.glsl"
#include "/lib/distort.glsl"
#include "/lib/octa_normal.glsl"
#include "/lib/srgb.glsl"

void main() {
	#ifdef REAL_INT16
		immut i16vec2 texel = indirect_sm_lit.coords[gl_WorkGroupID.x] + i16vec2(gl_LocalInvocationID.xy);
	#else
		immut uint tile = indirect_sm_lit.coords[gl_WorkGroupID.x];
		immut ivec2 texel = ivec2(uvec2(bitfieldExtract(tile, 0, 16), bitfieldExtract(tile, 16, 16)) + gl_LocalInvocationID.xy);
	#endif

	immut f16vec4 octa_normal = f16vec4(texelFetch(colortex2, texel, 0));
	immut f16vec3 w_face_normal = normalize(octa_decode(octa_normal.zw));
	immut f16vec3 n_w_shadow_light = f16vec3(shadowLightDirectionPlr);
	immut float16_t face_lambertian = dot(w_face_normal, n_w_shadow_light);

	if (face_lambertian > float16_t(0.01)) {
		immut float depth = texelFetch(depthtex0, texel, 0).r;
		immut vec2 texel_size = 1.0 / vec2(view());
		immut vec2 coord = fma(vec2(texel), texel_size, 0.5 * texel_size);
		immut vec3 ndc = fma(vec3(coord, depth), vec3(2.0), vec3(-1.0));
		immut vec4 view_undiv = gbufferProjectionInverse * vec4(ndc, 1.0);
		vec3 pe = mat3(gbufferModelViewInverse) * view_undiv.xyz / view_undiv.w;

		const float16_t sm_dist = float16_t(shadowDistance * shadowDistanceRenderMul);

		immut f16vec3 abs_pe = abs(f16vec3(pe));
		immut float16_t chebychev_dist = max3(abs_pe.x, abs_pe.y, abs_pe.z);

		f16vec3 light = skylight();
		if (chebychev_dist < sm_dist) {
			immut f16vec4 color_s = f16vec4(texelFetch(colortex1, texel, 0));
			immut float16_t roughness = float16_t(1.0) - sqrt(abs(color_s.a));

			if (color_s.a < float16_t(0.0)) { // hand
				vec3 actual_ndc = ndc;
				actual_ndc.z /= MC_HAND_DEPTH;
				immut vec4 view_undiv = gbufferProjectionInverse * vec4(actual_ndc, 1.0);

				// a little janky but correct
				pe = mat3(gbufferModelViewInverse) * view_undiv.xyz / view_undiv.w;
			};

			// todo!() this bias is better than before but it would probably be best to do it in shadow screen space and offset a scaled amount of texels
			immut f16vec2 bias = shadow_bias(face_lambertian);

			vec3 s_ndc = shadowProjScale * (mat3(shadowModelView) * (
				vec3(float16_t(40.0 / shadowMapResolution) * bias.y * w_face_normal) + pe + gbufferModelViewInverse[3].xyz
			));
			s_ndc.z -= bias.x / float16_t(shadowMapResolution);

			f16vec3 sm_light = sample_shadow(fma(vec3(distort(s_ndc.xy), s_ndc.z), vec3(0.5), vec3(0.5)));

			if (dot(sm_light, f16vec3(1.0)) > float16_t(0.0)) {
				immut f16vec3 w_tex_normal = normalize(octa_decode(octa_normal.xy));
				immut float16_t tex_lambertian = dot(w_tex_normal, n_w_shadow_light);

				immut float16_t specular = brdf(w_tex_normal, normalize(pe), n_w_shadow_light, roughness) * float16_t(0.03);
				immut float16_t diffuse = tex_lambertian * (float16_t(1.0) - specular) / PI_16;

				sm_light *= float16_t(3.0) * (diffuse + specular / max(linear(color_s.rgb), float16_t(1.0e-4)));
			}

			light *= mix(sm_light, f16vec3(0.5), smoothstep(float16_t(sm_dist * (1.0 - SHADOW_FADE_DIST)), sm_dist, chebychev_dist));
		} else light *= float16_t(0.5);

		imageStore(deferredLight1, texel, vec4(light, 0.0));
	}
}