#version 430 core
const int NumThreads = 16 * 16;

void GroupMemoryBarrierWithGroupSync() {
    groupMemoryBarrier();
    barrier();
}
float linearize_depth(float d,float zNear,float zFar)
{
    float z_n = 2.0 * d - 1.0;
    return 2.0 * zNear * zFar / (zFar + zNear - z_n * (zFar - zNear));
}
layout(local_size_x = 16, local_size_y = 16) in;

layout(rg32f) uniform image2D ReductionMap;
layout(rg32f) uniform image2D outputMap;
void DepthReductionInitialCS(in uint3 GroupID : SV_GroupID,
                             in uint3 GroupThreadID : SV_GroupThreadID,
                             uint ThreadIndex : SV_GroupIndex)
{
    float minDepth = 1.0f;
    float maxDepth = 0.0f;

    ivec2 texSize = textureSize(depthTexture);
    DepthMap.GetDimensions(texSize.x, texSize.y);

    uint2 samplePos = gl_WorkGroupID.xy * ReductionTGSize + gl_LocalInvocationID.xy;
    samplePos = min(samplePos, texSize - 1);

   float depthSample = DepthMap[samplePos];

     if(depthSample < 1.0f)
     {
            // Convert to linear Z
            depthSample = linearize_depth(depthSample, nearClip, farClip);
            minDepth = min(minDepth, depthSample);
            maxDepth = max(maxDepth, depthSample);
      }

    // Store in shared memory
    depthSamples[gl_LocalInvocationIndex] = vec2(minDepth, maxDepth);
    GroupMemoryBarrierWithGroupSync();

    // Reduce
	[unroll]
	for(uint s = NumThreads / 2; s > 0; s >>= 1)
    {
		if(gl_LocalInvocationIndex < s)
        {
			depthSamples[gl_LocalInvocationIndex].x = min(depthSamples[gl_LocalInvocationIndex].x, depthSamples[gl_LocalInvocationIndex + s].x);
            depthSamples[gl_LocalInvocationIndex].y = max(depthSamples[gl_LocalInvocationIndex].y, depthSamples[gl_LocalInvocationIndex + s].y);
        }

		GroupMemoryBarrierWithGroupSync();
	}

    if(gl_LocalInvocationIndex == 0)
    {
        minDepth = depthSamples[0].x;
        maxDepth = depthSamples[0].y;
        imageStore(OutputMap, gl_WorkGroupID.xy, vec2(minDepth, maxDepth));
    }
}

// Subsequent passes of the depth reduction
void DepthReductionCS()
{
    uvec2 texSize = textureSize(ReductionMap);
    ReductionMap.GetDimensions(texSize.x, texSize.y);

    uvec2 samplePos = gl_WorkGroupID.xy * ReductionTGSize + gl_LocalInvocationID.xy;
    samplePos = min(samplePos, texSize - 1);

    float minDepth = ReductionMap[samplePos].x;
    float maxDepth = ReductionMap[samplePos].y;

    if(minDepth == 0.0f)
        minDepth = 1.0f;

    // Store in shared memory
    depthSamples[gl_LocalInvocationIndex] = float2(minDepth, maxDepth);
    GroupMemoryBarrierWithGroupSync();

    // Reduce
	for(uint s = NumThreads / 2; s > 0; s >>= 1)
    {
		if(gl_LocalInvocationIndex < s)
        {
			depthSamples[gl_LocalInvocationIndex].x = min(depthSamples[gl_LocalInvocationIndex].x, depthSamples[gl_LocalInvocationIndex + s].x);
            depthSamples[gl_LocalInvocationIndex].y = max(depthSamples[gl_LocalInvocationIndex].y, depthSamples[gl_LocalInvocationIndex + s].y);
        }

		GroupMemoryBarrierWithGroupSync();
	}

    if(ThreadIndex == 0)
    {
        minDepth = depthSamples[0].x;
        maxDepth = depthSamples[0].y;
        imageStore(OutputMap, gl_WorkGroupID.xy, vec2(minDepth, maxDepth));
    }
}