#ifndef BNB_GUIDED_FILTER_GLSL
#define BNB_GUIDED_FILTER_GLSL

precision highp float;
precision highp int;

#include <bnb/textures_lookup.glsl>

float smooth_binarize(float x, float threshold, float c)
{
    return pow(max(0.f, x - threshold), c);
}

float cut_and_rescale(float x, float cut, float p)
{
    float cutted = max(0.f, x - cut) / (1.f - cut);
    return pow(cutted, p);
}

int roi_approx(BNB_DECLARE_SAMPLER_2D_ARGUMENT(tex), vec2 uv, int radius, vec2 step_size, int step)
{
    float eps = 1e-1;
    float mask_sum = 0.f;
    float points = 0.f;
    for (int y = -1; y <= 1; y += step) {
        for (int x = -1; x <= 1; x += step) {
            vec2 offset = vec2(float(x * radius), float(y * radius)) * step_size;
            vec2 sample_uv = uv + offset;
            float mask = textureLod(BNB_SAMPLER_2D(tex), sample_uv, 0.f).a;
            mask = smooth_binarize(mask, 0.6, 0.005);

            mask_sum += mask;
            points += 1.f;
        }
    }

    if ((1.f / points + eps < mask_sum) && (mask_sum < points - eps)) {
        return 0;
    }

    if (1.f / points + eps < mask_sum) {
        return 1;
    }
    // if (mask_sum < points - eps)
    return 2;
}

// For more info (base realisation, ...) see pr: https://bitbucket.org/BanubaLimited/banuba_sdk/pull-requests/6503/overview
vec4 bnb_guided_filter(BNB_DECLARE_SAMPLER_2D_ARGUMENT(tex), vec2 uv, int radius, float eps)
{
    vec2 tex_size = vec2(textureSize(BNB_SAMPLER_2D(tex), 0));
    float stride = 1.5f * tex_size.x / 1280.f;
    vec2 step_size = vec2(stride) / tex_size;
    int roi_result = roi_approx(BNB_PASS_SAMPLER_ARGUMENT(tex), uv, radius, step_size, 1);
    if (roi_result == 1) {
        return vec4(0.f, 0.f, 0.f, 1.f);
    }

    if (roi_result == 2) {
        return vec4(0.f, 0.f, 0.f, 0.f);
    }

    vec3 sum_I = vec3(0.0);
    float sum_p = 0.0;
    vec3 sum_Ip = vec3(0.0);

    float sum_rr = 0.0, sum_rg = 0.0, sum_rb = 0.0;
    float sum_gg = 0.0, sum_gb = 0.0, sum_bb = 0.0;

    int kernel_size = 2 * radius + 1;
    float kernel_area = float(kernel_size * kernel_size);
    vec2 texel_size = vec2(stride) / tex_size;

    for (int y = -radius; y <= radius; ++y) {
        for (int x = -radius; x <= radius; ++x) {
            vec2 offset = vec2(float(x), float(y)) * texel_size;
            vec2 sample_uv = uv + offset;
            vec4 tex = textureLod(BNB_SAMPLER_2D(tex), sample_uv, 0.f);
            vec3 I_val = tex.rgb;
            float p_val = tex.a;
            p_val = smooth_binarize(p_val, 0.2, 0.005);

            sum_I += I_val;
            sum_p += p_val;
            sum_Ip += I_val * p_val;

            sum_rr += I_val.r * I_val.r;
            sum_rg += I_val.r * I_val.g;
            sum_rb += I_val.r * I_val.b;
            sum_gg += I_val.g * I_val.g;
            sum_gb += I_val.g * I_val.b;
            sum_bb += I_val.b * I_val.b;
        }
    }

    vec3 mean_I = sum_I / kernel_area;
    float mean_p = sum_p / kernel_area;
    vec3 mean_Ip = sum_Ip / kernel_area;

    float rr = sum_rr / kernel_area - mean_I.r * mean_I.r;
    float rg = sum_rg / kernel_area - mean_I.r * mean_I.g;
    float rb = sum_rb / kernel_area - mean_I.r * mean_I.b;
    float gg = sum_gg / kernel_area - mean_I.g * mean_I.g;
    float gb = sum_gb / kernel_area - mean_I.g * mean_I.b;
    float bb = sum_bb / kernel_area - mean_I.b * mean_I.b;

    mat3 sigma = mat3(
        rr + eps, rg, rb, rg, gg + eps, gb, rb, gb, bb + eps
    );

    vec3 cov_Ip = mean_Ip - mean_I * mean_p;
    vec3 a = inverse(sigma) * cov_Ip;
    float b = mean_p - dot(a, mean_I);

    return vec4(a, b);
}

#endif // BNB_GUIDED_FILTER_GLSL
