#include "/lib/core.glsl"

#include "/lib/tile.glsl"
layout(local_size_x = TILE, local_size_y = TILE, local_size_z = 1) in;

uniform restrict writeonly image2D blendWeight;
uniform sampler2D edgeS, areatex, searchtex;
uniform vec2 pixSize;

// SOMETHING IS WRONG :(

/*
	https://github.com/iryoku/smaa/blob/master/SMAA.hlsl

	Copyright (C) 2013 Jorge Jimenez (jorge@iryoku.com)
	Copyright (C) 2013 Jose I. Echevarria (joseignacioechevarria@gmail.com)
	Copyright (C) 2013 Belen Masia (bmasia@unizar.es)
	Copyright (C) 2013 Fernando Navarro (fernandn@microsoft.com)
	Copyright (C) 2013 Diego Gutierrez (diegog@unizar.es)

	Permission is hereby granted, free of charge, to any person obtaining a copy
	this software and associated documentation files (the "Software"), to deal in
	the Software without restriction, including without limitation the rights to
	use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
	of the Software, and to permit persons to whom the Software is furnished to
	do so, subject to the following conditions:

	The above copyright notice and this permission notice shall be included in
	all copies or substantial portions of the Software. As clarification, there
	is no requirement that the copyright notice and permission be included in
	binary distributions of the Software.

	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	SOFTWARE.
*/

const uint SMAA_MAX_SEARCH_STEPS = 16u;
const int SMAA_MAX_SEARCH_STEPS_DIAG = 8;
const uint SMAA_CORNER_ROUNDING = 25u;

const float SMAA_AREATEX_MAX_DISTANCE = 16.0;
const float SMAA_AREATEX_MAX_DISTANCE_DIAG = 20.0;
const vec2 SMAA_AREATEX_PIXEL_SIZE = 1.0 / vec2(160.0, 560.0);
const float SMAA_AREATEX_SUBTEX_SIZE = 1.0/7.0;
const vec2 SMAA_SEARCHTEX_SIZE = vec2(66.0, 33.0);
const vec2 SMAA_SEARCHTEX_PACKED_SIZE = vec2(64.0, 16.0);
const float SMAA_CORNER_ROUNDING_NORM = SMAA_CORNER_ROUNDING / 100.0;

immut ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
immut vec2 frag_coord = gl_GlobalInvocationID.xy + 0.5;
immut vec2 n_pix = frag_coord * pixSize;

immut vec4 in_offset_0 = fma(pixSize.xyxy, vec4(-0.25, -0.125, 1.25, -0.125), n_pix.xyxy);
immut vec4 in_offset_1 = fma(pixSize.xyxy, vec4(-0.125, -0.25, -0.125, 1.25), n_pix.xyxy);
immut vec4 in_offset_2 = pixSize.xxyy * vec4(-2.0, 2.0, -2.0, 2.0) * SMAA_MAX_SEARCH_STEPS + vec4(in_offset_0.xz, in_offset_1.yw);

vec2 decode_diag_bilinear_access(vec2 e) {
	e.r *= abs(5.0 * e.r - 3.75);
	return round(e);
}

vec4 decode_diag_bilinear_access(vec4 e) {
	e.rb *= abs(5.0 * e.rb - 3.75);
	return round(e);
}

vec2 search_diag_1(ivec2 dir, out vec2 e) {
	float w = 1.0;
	int i = -1;

	while (i < SMAA_MAX_SEARCH_STEPS_DIAG - 1 && w > 0.9) {
		++i;

		e = texelFetch(edgeS, i * dir + texel, 0).rg;

		w = dot(e, vec2(0.5));
	}

	return vec2(i, w);
}

vec2 search_diag_2(ivec2 dir, out vec2 e) {
	vec2 coord = n_pix;
	coord.x += 0.25 * pixSize.x;

	immut vec2 n_dir = pixSize * dir;

	float w = 1.0;
	int i = -1;

	while (i < SMAA_MAX_SEARCH_STEPS_DIAG - 1 && w > 0.9) {
		++i;

		e = textureLod(edgeS, i * n_dir + coord, 0.0).rg;
		e = decode_diag_bilinear_access(e);

		w = dot(e, vec2(0.5));
	}

	return vec2(i, w);
}

vec2 area_diag(vec2 dist, vec2 e) {
	vec2 tex_coord = SMAA_AREATEX_PIXEL_SIZE * ((SMAA_AREATEX_MAX_DISTANCE_DIAG * e + dist) + 0.5);
	tex_coord.x += 0.5;

	return textureLod(areatex, tex_coord, 0.0).rg;
}

vec2 calculate_diag_weights(vec2 e) {
	vec2 weights = vec2(0.0);

	vec4 d;
	vec2 end;
	if (e.r > 0.0) {
		d.xz = search_diag_1(ivec2(-1, 1), end);
		d.x += float(end.y > 0.9);
	} else d.xz = vec2(0.0);

	d.yw = search_diag_1(ivec2(1, -1), end);

	if (d.x + d.y > 2.0) {
		immut vec4 coords = fma(vec4(0.25 - d.x, d.x, d.y, -d.y - 0.25), pixSize.xyxy, n_pix.xyxy);
		vec4 c = vec4(
			textureLodOffset(edgeS, coords.xy, 0.0, ivec2(-1, 0)).rg,
			textureLodOffset(edgeS, coords.zw, 0.0, ivec2(1, 0)).rg
		);
		c.yxwz = decode_diag_bilinear_access(c);

		vec2 cc = fma(vec2(2.0), c.xz, c.yw);
		cc = mix(cc, vec2(0.0), bvec2(step(0.9, d.zw)));

		weights += area_diag(d.xy, cc);
	}

	d.xz = search_diag_2(ivec2(-1, -1), end);

	if (texelFetchOffset(edgeS, texel, 0, ivec2(1, 0)).r > 0.0) {
		d.yw = search_diag_2(ivec2(1, 1), end);
		d.y += float(end.y > 0.9);
	} else d.yw = vec2(0.0);

	if (d.x + d.y > 2.0) {
		immut vec4 coords = fma(vec4(-d.xx, d.yy), pixSize.xyxy, n_pix.xyxy);
		immut vec4 c = vec4(
			textureLodOffset(edgeS, coords.xy, 0.0, ivec2(-1, 0)).g,
			textureLodOffset(edgeS, coords.xy, 0.0, ivec2(0, -1)).r,
			textureLodOffset(edgeS, coords.zw, 0.0, ivec2(1, 0)).gr
		);
		vec2 cc = fma(vec2(2.0), c.xz, c.yw);
		cc = mix(cc, vec2(0.0), bvec2(step(0.9, d.zw)));

		weights += area_diag(d.xy, cc).gr;
	}

	return weights;
}

float search_length(vec2 e, float offset) {
	const vec2 scale = fma(SMAA_SEARCHTEX_SIZE, vec2(0.5, -1.0), vec2(-1.0, 1.0)) / SMAA_SEARCHTEX_PACKED_SIZE;
	immut vec2 bias = fma(SMAA_SEARCHTEX_SIZE, vec2(offset, 1.0), vec2(0.5, -0.5)) / SMAA_SEARCHTEX_PACKED_SIZE;

	return textureLod(searchtex, fma(e, scale, bias), 0.0).r;
}

float search_x_left() {
	vec2 tex_coord = in_offset_0.xy;

	vec2 e = vec2(0.0, 1.0);
	while (tex_coord.x > in_offset_2.x && e.g > 0.8281 && e.r == 0.0) {
		e = textureLod(edgeS, tex_coord, 0.0).rg;
		tex_coord -= vec2(2.0, 0.0) * pixSize.xy;
	}
	immut float offset = 3.25 - 255.0/127.0 * search_length(e, 0.0);
	return pixSize.x * offset + tex_coord.x;
}

float search_x_right() {
	vec2 tex_coord = in_offset_0.zw;

	vec2 e = vec2(0.0, 1.0);
	while (tex_coord.x < in_offset_2.y && e.g > 0.8281 && e.r == 0.0) {
		e = textureLod(edgeS, tex_coord, 0.0).rg;
		tex_coord += vec2(2.0, 0.0) * pixSize.xy;
	}
	immut float offset = 3.25 - 255.0/127.0 * search_length(e, 0.5);
	return -pixSize.x * offset + tex_coord.x;
}

float search_y_up() {
	vec2 tex_coord = in_offset_1.xy;

	vec2 e = vec2(1.0, 0.0);
	while (tex_coord.y > in_offset_2.z && e.r > 0.8281 && e.g == 0.0) {
		e = textureLod(edgeS, tex_coord, 0.0).rg;
		tex_coord -= vec2(0.0, 2.0) * pixSize.xy;
	}
	immut float offset = 3.25 - 255.0/127.0 * search_length(e.gr, 0.0);
	return pixSize.y * offset + tex_coord.y;
}

float search_y_down() {
	vec2 tex_coord = in_offset_1.zw;

	vec2 e = vec2(1.0, 0.0);
	while (tex_coord.y < in_offset_2.w && e.r > 0.8281 && e.g == 0.0) {
		e = textureLod(edgeS, tex_coord, 0.0).rg;
		tex_coord += vec2(0.0, 2.0) * pixSize.xy;
	}
	immut float offset = 3.25 - 255.0/127.0 * search_length(e.gr, 0.5);
	return -pixSize.y * offset + tex_coord.y;
}

vec2 area(vec2 dist, float e1, float e2) {
	/*
	immut vec2 tex_coord = SMAA_AREATEX_MAX_DISTANCE * round(4.0 * vec2(e1, e2)) + dist;

	return texelFetch(areatex, ivec2(tex_coord + 0.5), 0).rg;
	*/

	vec2 tex_coord = SMAA_AREATEX_MAX_DISTANCE * round(4.0 * vec2(e1, e2)) + dist;
	tex_coord = fma(SMAA_AREATEX_PIXEL_SIZE, tex_coord, 0.5 * SMAA_AREATEX_PIXEL_SIZE);

	return textureLod(areatex, tex_coord, 0.0).rg;
}

vec2 corner_rounding(vec2 d) {
	immut vec2 left_right = step(d, d.yx);
	return (1.0 - SMAA_CORNER_ROUNDING_NORM) / (left_right.x + left_right.y) * left_right;
}

vec2 detect_horizontal_corner_pattern(vec4 tex_coord, vec2 d) {
	immut vec2 rounding = corner_rounding(d);

	vec2 factor = vec2(1.0);
	factor.x -= rounding.x * textureLodOffset(edgeS, tex_coord.xy, 0.0, ivec2(0, 1)).r;
	factor.x -= rounding.y * textureLodOffset(edgeS, tex_coord.zw, 0.0, ivec2(1, 1)).r;
	factor.y -= rounding.x * textureLodOffset(edgeS, tex_coord.xy, 0.0, ivec2(0, -2)).r;
	factor.y -= rounding.y * textureLodOffset(edgeS, tex_coord.zw, 0.0, ivec2(1, -2)).r;

	return clamp(factor, 0.0, 1.0);
}

vec2 detect_vertical_corner_pattern(vec4 tex_coord, vec2 d) {
	immut vec2 rounding = corner_rounding(d);

	vec2 factor = vec2(1.0);
	factor.x -= rounding.x * textureLodOffset(edgeS, tex_coord.xy, 0.0, ivec2(1, 0)).g;
	factor.x -= rounding.y * textureLodOffset(edgeS, tex_coord.zw, 0.0, ivec2(1, 1)).g;
	factor.y -= rounding.x * textureLodOffset(edgeS, tex_coord.xy, 0.0, ivec2(-2, 0)).g;
	factor.y -= rounding.y * textureLodOffset(edgeS, tex_coord.zw, 0.0, ivec2(-2, 1)).g;

	return clamp(factor, 0.0, 1.0);
}

void main() {
	vec4 weights = vec4(0.0);

	vec2 e = texelFetch(edgeS, texel, 0).rg;

	if (e.g > 0.0) {
		weights.rg = calculate_diag_weights(e);

		if (weights.r == -weights.g) {
			vec3 coords = vec3(search_x_left(), in_offset_1.y, search_x_right());

			immut float e1 = textureLod(edgeS, coords.xy, 0.0).r;
			immut float e2 = textureLodOffset(edgeS, coords.zy, 0.0, ivec2(1, 0)).r;
			immut vec2 d = abs(round(coords.xz / pixSize.x - frag_coord));

			weights.rg = area(sqrt(d), e1, e2);

			coords.y = n_pix.y;
			weights.rg *= detect_horizontal_corner_pattern(coords.xyzy, d);
		} else e.r = 0.0;
	}

	if (e.r > 0.0) {
		vec3 coords = vec3(in_offset_0.x, search_y_up(), search_y_down());

		immut float e1 = textureLod(edgeS, coords.xy, 0.0).g;
		immut float e2 = textureLodOffset(edgeS, coords.xz, 0.0, ivec2(0, 1)).g;
		immut vec2 d = abs(round(coords.yz / pixSize.y - frag_coord));

		weights.ba = area(sqrt(d), e1, e2);

		coords.x = n_pix.x;
		weights.ba *= detect_vertical_corner_pattern(coords.xyxz, d);
	}

	imageStore(blendWeight, texel, weights);
}