1 files changed, 152 insertions, 11 deletions
diff --git a/src/gpu/effects/cnn_v2_effect.cc b/src/gpu/effects/cnn_v2_effect.cc
index 04fa74e..b425aba 100644
--- a/src/gpu/effects/cnn_v2_effect.cc
+++ b/src/gpu/effects/cnn_v2_effect.cc
@@ -19,6 +19,7 @@ CNNv2Effect::CNNv2Effect(const GpuContext& ctx)
       static_features_tex_(nullptr),
       static_features_view_(nullptr),
       input_mip_tex_(nullptr),
+      current_input_view_(nullptr),
       initialized_(false) {
   std::memset(input_mip_view_, 0, sizeof(input_mip_view_));
 }
@@ -91,8 +92,31 @@ void CNNv2Effect::create_textures() {
     input_mip_view_[i] = wgpuTextureCreateView(input_mip_tex_, &mip_view_desc);
   }
 
-  // Layer textures (placeholder - will be created based on config)
-  // TODO: Create layer textures based on layer_configs_
+  // Create 2 layer textures (ping-pong buffers for intermediate results)
+  // Each stores 8×f16 channels packed as 4×u32
+  for (int i = 0; i < 2; ++i) {
+    WGPUTextureDescriptor layer_desc = {};
+    layer_desc.usage = WGPUTextureUsage_StorageBinding | WGPUTextureUsage_TextureBinding;
+    layer_desc.dimension = WGPUTextureDimension_2D;
+    layer_desc.size = size;
+    layer_desc.format = WGPUTextureFormat_RGBA32Uint;
+    layer_desc.mipLevelCount = 1;
+    layer_desc.sampleCount = 1;
+
+    WGPUTexture tex = wgpuDeviceCreateTexture(ctx_.device, &layer_desc);
+    layer_textures_.push_back(tex);
+
+    WGPUTextureViewDescriptor view_desc = {};
+    view_desc.format = WGPUTextureFormat_RGBA32Uint;
+    view_desc.dimension = WGPUTextureViewDimension_2D;
+    view_desc.baseMipLevel = 0;
+    view_desc.mipLevelCount = 1;
+    view_desc.baseArrayLayer = 0;
+    view_desc.arrayLayerCount = 1;
+
+    WGPUTextureView view = wgpuTextureCreateView(tex, &view_desc);
+    layer_views_.push_back(view);
+  }
 }
 
 void CNNv2Effect::create_pipelines() {
@@ -124,25 +148,142 @@ void CNNv2Effect::create_pipelines() {
   static_pipeline_ = wgpuDeviceCreateComputePipeline(ctx_.device, &pipeline_desc);
   wgpuShaderModuleRelease(static_module);
 
+  // Create bind group layout for static features compute
+  // Bindings: 0=input_tex, 1=input_mip1, 2=input_mip2, 3=depth_tex, 4=output
+  WGPUBindGroupLayoutEntry bgl_entries[5] = {};
+
+  // Binding 0: Input texture (mip 0)
+  bgl_entries[0].binding = 0;
+  bgl_entries[0].visibility = WGPUShaderStage_Compute;
+  bgl_entries[0].texture.sampleType = WGPUTextureSampleType_Float;
+  bgl_entries[0].texture.viewDimension = WGPUTextureViewDimension_2D;
+
+  // Binding 1: Input texture (mip 1)
+  bgl_entries[1].binding = 1;
+  bgl_entries[1].visibility = WGPUShaderStage_Compute;
+  bgl_entries[1].texture.sampleType = WGPUTextureSampleType_Float;
+  bgl_entries[1].texture.viewDimension = WGPUTextureViewDimension_2D;
+
+  // Binding 2: Input texture (mip 2)
+  bgl_entries[2].binding = 2;
+  bgl_entries[2].visibility = WGPUShaderStage_Compute;
+  bgl_entries[2].texture.sampleType = WGPUTextureSampleType_Float;
+  bgl_entries[2].texture.viewDimension = WGPUTextureViewDimension_2D;
+
+  // Binding 3: Depth texture
+  bgl_entries[3].binding = 3;
+  bgl_entries[3].visibility = WGPUShaderStage_Compute;
+  bgl_entries[3].texture.sampleType = WGPUTextureSampleType_Float;
+  bgl_entries[3].texture.viewDimension = WGPUTextureViewDimension_2D;
+
+  // Binding 4: Output (static features)
+  bgl_entries[4].binding = 4;
+  bgl_entries[4].visibility = WGPUShaderStage_Compute;
+  bgl_entries[4].storageTexture.access = WGPUStorageTextureAccess_WriteOnly;
+  bgl_entries[4].storageTexture.format = WGPUTextureFormat_RGBA32Uint;
+  bgl_entries[4].storageTexture.viewDimension = WGPUTextureViewDimension_2D;
+
+  WGPUBindGroupLayoutDescriptor bgl_desc = {};
+  bgl_desc.entryCount = 5;
+  bgl_desc.entries = bgl_entries;
+
+  WGPUBindGroupLayout static_bgl = wgpuDeviceCreateBindGroupLayout(ctx_.device, &bgl_desc);
+
+  // Update pipeline layout
+  WGPUPipelineLayoutDescriptor pl_desc = {};
+  pl_desc.bindGroupLayoutCount = 1;
+  pl_desc.bindGroupLayouts = &static_bgl;
+  WGPUPipelineLayout pipeline_layout = wgpuDeviceCreatePipelineLayout(ctx_.device, &pl_desc);
+
+  // Recreate pipeline with proper layout
+  WGPUComputePipelineDescriptor pipeline_desc2 = {};
+  pipeline_desc2.compute.module = wgpuDeviceCreateShaderModule(ctx_.device, &shader_desc);
+  pipeline_desc2.compute.entryPoint = str_view("main");
+  pipeline_desc2.layout = pipeline_layout;
+
+  if (static_pipeline_) wgpuComputePipelineRelease(static_pipeline_);
+  static_pipeline_ = wgpuDeviceCreateComputePipeline(ctx_.device, &pipeline_desc2);
+
+  wgpuShaderModuleRelease(pipeline_desc2.compute.module);
+  wgpuPipelineLayoutRelease(pipeline_layout);
+  wgpuBindGroupLayoutRelease(static_bgl);
+
+  // Bind group will be created in update_bind_group()
   // TODO: Create layer pipelines
-  // TODO: Create bind groups
 }
 
 void CNNv2Effect::update_bind_group(WGPUTextureView input_view) {
-  (void)input_view;
-  // TODO: Create bind groups for static features and layers
+  if (!static_pipeline_) return;
+
+  // Cache input view
+  current_input_view_ = input_view;
+
+  // Release old bind group
+  if (static_bind_group_) {
+    wgpuBindGroupRelease(static_bind_group_);
+    static_bind_group_ = nullptr;
+  }
+
+  // Create bind group for static features compute
+  WGPUBindGroupEntry bg_entries[5] = {};
+
+  // Binding 0: Input (mip 0)
+  bg_entries[0].binding = 0;
+  bg_entries[0].textureView = input_view;
+
+  // Binding 1: Input (mip 1)
+  bg_entries[1].binding = 1;
+  bg_entries[1].textureView = input_mip_view_[0];  // Use mip 0 for now
+
+  // Binding 2: Input (mip 2)
+  bg_entries[2].binding = 2;
+  bg_entries[2].textureView = input_mip_view_[0];  // Use mip 0 for now
+
+  // Binding 3: Depth (use input for now, no depth available)
+  bg_entries[3].binding = 3;
+  bg_entries[3].textureView = input_view;
+
+  // Binding 4: Output (static features)
+  bg_entries[4].binding = 4;
+  bg_entries[4].textureView = static_features_view_;
+
+  WGPUBindGroupDescriptor bg_desc = {};
+  bg_desc.layout = wgpuComputePipelineGetBindGroupLayout(static_pipeline_, 0);
+  bg_desc.entryCount = 5;
+  bg_desc.entries = bg_entries;
+
+  static_bind_group_ = wgpuDeviceCreateBindGroup(ctx_.device, &bg_desc);
+
+  wgpuBindGroupLayoutRelease(bg_desc.layout);
+}
+
+void CNNv2Effect::compute(WGPUCommandEncoder encoder,
+                          const CommonPostProcessUniforms& uniforms) {
+  (void)uniforms;
+  if (!initialized_ || !static_pipeline_ || !static_bind_group_) return;
+
+  // Pass 1: Compute static features
+  WGPUComputePassEncoder pass = wgpuCommandEncoderBeginComputePass(encoder, nullptr);
+
+  wgpuComputePassEncoderSetPipeline(pass, static_pipeline_);
+  wgpuComputePassEncoderSetBindGroup(pass, 0, static_bind_group_, 0, nullptr);
+
+  // Dispatch workgroups (8×8 threads per group)
+  uint32_t workgroups_x = (width_ + 7) / 8;
+  uint32_t workgroups_y = (height_ + 7) / 8;
+  wgpuComputePassEncoderDispatchWorkgroups(pass, workgroups_x, workgroups_y, 1);
+
+  wgpuComputePassEncoderEnd(pass);
+  wgpuComputePassEncoderRelease(pass);
+
+  // TODO: Execute CNN layer passes
 }
 
 void CNNv2Effect::render(WGPURenderPassEncoder pass,
                          const CommonPostProcessUniforms& uniforms) {
   (void)pass;
   (void)uniforms;
-  if (!initialized_) return;
-
-  // TODO: Multi-pass execution
-  // 1. Compute static features
-  // 2. Execute CNN layers
-  // 3. Composite to output
+  // Compute-only effect, rendering is done by default composite pass
 }
 
 void CNNv2Effect::cleanup() {