#include "data\\shaders\\common.h"
#include "data\\shaders\\lights.h"

cbuffer DispatchParams : register( b4 )
{
    uint3 num_thread_groups;
    uint pad1;

    uint3 num_threads;
    uint pad2;
};

struct ComputeShaderInput
{
    uint3 group_id            : SV_GroupID;
    uint3 group_thread_id     : SV_GroupThreadID;
    uint3 dispatch_thread_id  : SV_DispatchThreadID;
    uint group_index          : SV_GroupIndex;
};


StructuredBuffer<Frustum> frustums : register( t0 );
StructuredBuffer<CullInfo> lights : register(t1);
Texture2D depth_texture : register( t2 );

RWStructuredBuffer<uint> light_index_list : register(u0);
RWStructuredBuffer<uint2> light_grid : register(u1);

globallycoherent RWStructuredBuffer<uint> light_index_counter : register(u2);

groupshared uint min_depth;
groupshared uint max_depth;
groupshared Frustum group_frustum;

groupshared uint light_count;
groupshared uint light_index_start_offset;
groupshared uint light_list[1024];

groupshared AABB group_aabb;

bool SphereInsidePlane(Sphere sphere, Plane plane)
{
  return dot(plane.normal, sphere.pos) - plane.d < -sphere.r;
}

bool SphereInsideFrustum(Sphere sphere, Frustum frustum, float z_near, float z_far)
{
  return !((sphere.pos.z + sphere.r < z_near || sphere.pos.z - sphere.r > z_far) ||
          SphereInsidePlane(sphere, frustum.planes[0]) ||
          SphereInsidePlane(sphere, frustum.planes[1]) ||
          SphereInsidePlane(sphere, frustum.planes[2]) ||
          SphereInsidePlane(sphere, frustum.planes[3]));
}

[numthreads(TILE_SIZE,TILE_SIZE,1)]
void main(ComputeShaderInput In)
{
  //calculate min max depth
  float depth = depth_texture.Load(int3(In.dispatch_thread_id.xy, 0)).r;
  uint u_depth = asuint( depth );
 
  if (In.group_index == 0)
  {
    min_depth = 0xffffffff;
    max_depth = 0;
    light_count = 0;
    group_frustum = frustums[In.group_id.x + (In.group_id.y * num_thread_groups.x)];
  }
  
  GroupMemoryBarrierWithGroupSync();
  
  if (In.dispatch_thread_id.x < (uint)g_screen_size.x && In.dispatch_thread_id.y < (uint)g_screen_size.y)
  {
    InterlockedMin(min_depth, u_depth);
    InterlockedMax(max_depth, u_depth);
  }

  //cull lights
  GroupMemoryBarrierWithGroupSync();

  float f_min_depth = asfloat(min_depth);
  float f_max_depth = asfloat(max_depth);

  //calculate AABB
  if(In.group_index == 0)
	{
    float3 view_space[8];
    
    view_space[0] = ScreenToView(float4(In.group_id.xy * TILE_SIZE / g_screen_size, f_min_depth, 1.0f)).xyz;
    view_space[1] = ScreenToView(float4(float2(In.group_id.x + 1, In.group_id.y) * TILE_SIZE / g_screen_size, f_min_depth, 1.0f)).xyz;
    view_space[2] = ScreenToView(float4(float2(In.group_id.x + 1, In.group_id.y + 1) * TILE_SIZE / g_screen_size, f_min_depth, 1.0f)).xyz;
    view_space[3] = ScreenToView(float4(float2(In.group_id.x, In.group_id.y + 1) * TILE_SIZE / g_screen_size, f_min_depth, 1.0f)).xyz;
    
    view_space[4] = ScreenToView(float4(In.group_id.xy * TILE_SIZE / g_screen_size, f_max_depth, 1.0f)).xyz;
    view_space[5] = ScreenToView(float4(float2(In.group_id.x + 1, In.group_id.y) * TILE_SIZE / g_screen_size, f_max_depth, 1.0f)).xyz;
    view_space[6] = ScreenToView(float4(float2(In.group_id.x + 1, In.group_id.y + 1) * TILE_SIZE / g_screen_size, f_max_depth, 1.0f)).xyz;
    view_space[7] = ScreenToView(float4(float2(In.group_id.x, In.group_id.y + 1) * TILE_SIZE / g_screen_size, f_max_depth, 1.0f)).xyz;
  
    float3 min_aabb = 1000000.xxx;
    float3 max_aabb = -1000000.xxx;
  
    for (int i = 0; i < 8; ++i)
    {
      min_aabb = min(min_aabb, view_space[i]);
      max_aabb = max(max_aabb, view_space[i]);
    }
  
    group_aabb.center = (min_aabb + max_aabb) * 0.5f;
    group_aabb.extent = abs(max_aabb - group_aabb.center);
  }
  GroupMemoryBarrierWithGroupSync();

  float min_depth_vs = ScreenToView( float4(0.0f, 0.0f, f_min_depth, 1.0f)).z;
  float max_depth_vs = ScreenToView( float4(0.0f, 0.0f, f_max_depth, 1.0f)).z;
  float near_clip_vs = ScreenToView( float4(0.0f, 0.0f, 0.0f, 1.0f)).z;
  
  Plane minPlane = { float3( 0, 0, 1 ), min_depth_vs };

  for (uint i = In.group_index; i < (uint)g_num_cullable_lights; i += TILE_SIZE * TILE_SIZE)
  {
    Sphere sphere = { lights[i].position, lights[i].radius };
    if (SphereInsideFrustum( sphere, group_frustum, near_clip_vs, max_depth_vs ))
    {
      if ( !SphereInsidePlane( sphere, minPlane ))
      {
        if (SphereIntersectsAABB(sphere, group_aabb))
        {
          uint index;
          InterlockedAdd( light_count, 1, index );
          if (index < 1024)
          {
            light_list[index] = i;
          }
        }
      }
    }
  }

  //update light grid
  GroupMemoryBarrierWithGroupSync();
  
  if (In.group_index == 0)
  {
    InterlockedAdd( light_index_counter[0], light_count, light_index_start_offset );
    int index = In.group_id.x + (In.group_id.y * num_thread_groups.x);
    light_grid[index] = uint2( light_index_start_offset, light_count );
  }
  
  //update light index list
  GroupMemoryBarrierWithGroupSync();
  
  for ( i = In.group_index; i < light_count; i += TILE_SIZE * TILE_SIZE )
  {
    light_index_list[light_index_start_offset + i] = light_list[i];
  }
}