1 files changed, 112 insertions, 78 deletions
diff --git a/src/effects/cnn_v2_effect.cc b/src/effects/cnn_v2_effect.cc
index 4c10ed1..7127aae 100644
--- a/src/effects/cnn_v2_effect.cc
+++ b/src/effects/cnn_v2_effect.cc
@@ -15,38 +15,24 @@
 #include <cstring>
 
 CNNv2Effect::CNNv2Effect(const GpuContext& ctx)
-    : PostProcessEffect(ctx),
-      static_pipeline_(nullptr),
-      static_bind_group_(nullptr),
-      static_params_buffer_(nullptr),
-      static_features_tex_(nullptr),
-      static_features_view_(nullptr),
-      linear_sampler_(nullptr),
-      layer_pipeline_(nullptr),
-      weights_buffer_(nullptr),
-      input_mip_tex_(nullptr),
-      current_input_view_(nullptr),
-      blend_amount_(1.0f),
-      mip_level_(0),
+    : PostProcessEffect(ctx), static_pipeline_(nullptr),
+      static_bind_group_(nullptr), static_params_buffer_(nullptr),
+      static_features_tex_(nullptr), static_features_view_(nullptr),
+      linear_sampler_(nullptr), layer_pipeline_(nullptr),
+      weights_buffer_(nullptr), input_mip_tex_(nullptr),
+      current_input_view_(nullptr), blend_amount_(1.0f), mip_level_(0),
       initialized_(false) {
   std::memset(input_mip_view_, 0, sizeof(input_mip_view_));
 }
 
 CNNv2Effect::CNNv2Effect(const GpuContext& ctx, const CNNv2EffectParams& params)
-    : PostProcessEffect(ctx),
-      static_pipeline_(nullptr),
-      static_bind_group_(nullptr),
-      static_params_buffer_(nullptr),
-      static_features_tex_(nullptr),
-      static_features_view_(nullptr),
-      linear_sampler_(nullptr),
-      layer_pipeline_(nullptr),
-      weights_buffer_(nullptr),
-      input_mip_tex_(nullptr),
-      current_input_view_(nullptr),
-      blend_amount_(params.blend_amount),
-      mip_level_(0),
-      initialized_(false) {
+    : PostProcessEffect(ctx), static_pipeline_(nullptr),
+      static_bind_group_(nullptr), static_params_buffer_(nullptr),
+      static_features_tex_(nullptr), static_features_view_(nullptr),
+      linear_sampler_(nullptr), layer_pipeline_(nullptr),
+      weights_buffer_(nullptr), input_mip_tex_(nullptr),
+      current_input_view_(nullptr), blend_amount_(params.blend_amount),
+      mip_level_(0), initialized_(false) {
   std::memset(input_mip_view_, 0, sizeof(input_mip_view_));
 }
 
@@ -56,7 +42,8 @@ CNNv2Effect::~CNNv2Effect() {
 
 void CNNv2Effect::init(MainSequence* demo) {
   (void)demo;
-  if (initialized_) return;
+  if (initialized_)
+    return;
 
   load_weights();
   create_textures();
@@ -75,7 +62,8 @@ void CNNv2Effect::resize(int width, int height) {
 void CNNv2Effect::load_weights() {
   // Load binary weights asset
   size_t weights_size = 0;
-  const uint8_t* weights_data = (const uint8_t*)GetAsset(AssetId::ASSET_WEIGHTS_CNN_V2, &weights_size);
+  const uint8_t* weights_data =
+      (const uint8_t*)GetAsset(AssetId::ASSET_WEIGHTS_CNN_V2, &weights_size);
 
   if (!weights_data || weights_size < 20) {
     // Weights not available - effect will skip
@@ -89,12 +77,14 @@ void CNNv2Effect::load_weights() {
   uint32_t num_layers = header[2];
   uint32_t total_weights = header[3];
 
-  FATAL_CHECK(magic != 0x324e4e43, "Invalid CNN v2 weights magic\n");  // 'CNN2'
+  FATAL_CHECK(magic != 0x324e4e43, "Invalid CNN v2 weights magic\n"); // 'CNN2'
 
-  // Support both version 1 (16-byte header) and version 2 (20-byte header with mip_level)
-  // TODO: Version 3 should include feature descriptor for arbitrary layout/ordering
+  // Support both version 1 (16-byte header) and version 2 (20-byte header with
+  // mip_level)
+  // TODO: Version 3 should include feature descriptor for arbitrary
+  // layout/ordering
   if (version == 1) {
-    mip_level_ = 0;  // Default for v1
+    mip_level_ = 0; // Default for v1
   } else if (version == 2) {
     mip_level_ = header[4];
   } else {
@@ -115,9 +105,10 @@ void CNNv2Effect::load_weights() {
     layer_info_.push_back(info);
   }
 
-  // Create GPU storage buffer for weights (skip header + layer info, upload only weights)
-  size_t header_size = 20;  // 5 u32
-  size_t layer_info_size = 20 * num_layers;  // 5 u32 per layer
+  // Create GPU storage buffer for weights (skip header + layer info, upload
+  // only weights)
+  size_t header_size = 20;                  // 5 u32
+  size_t layer_info_size = 20 * num_layers; // 5 u32 per layer
   size_t weights_offset = header_size + layer_info_size;
   size_t weights_only_size = weights_size - weights_offset;
 
@@ -129,7 +120,8 @@ void CNNv2Effect::load_weights() {
   weights_buffer_ = wgpuDeviceCreateBuffer(ctx_.device, &buffer_desc);
 
   // Upload only weights (skip header + layer info)
-  wgpuQueueWriteBuffer(ctx_.queue, weights_buffer_, 0, weights_data + weights_offset, weights_only_size);
+  wgpuQueueWriteBuffer(ctx_.queue, weights_buffer_, 0,
+                       weights_data + weights_offset, weights_only_size);
 
   // Create uniform buffers for layer params (one per layer)
   for (uint32_t i = 0; i < num_layers; ++i) {
@@ -153,7 +145,9 @@ void CNNv2Effect::create_textures() {
   // Input texture with mips (for multi-scale features)
   TextureWithView input_mip = gpu_create_texture_2d(
       ctx_.device, width_, height_, WGPUTextureFormat_RGBA8Unorm,
-      (WGPUTextureUsage)(WGPUTextureUsage_TextureBinding | WGPUTextureUsage_CopyDst), 3);
+      (WGPUTextureUsage)(WGPUTextureUsage_TextureBinding |
+                         WGPUTextureUsage_CopyDst),
+      3);
   input_mip_tex_ = input_mip.texture;
 
   for (int i = 0; i < 3; ++i) {
@@ -195,7 +189,8 @@ void CNNv2Effect::create_pipelines() {
 
   // Static features compute pipeline
   size_t shader_size = 0;
-  const char* static_code = (const char*)GetAsset(AssetId::ASSET_SHADER_CNN_V2_STATIC, &shader_size);
+  const char* static_code =
+      (const char*)GetAsset(AssetId::ASSET_SHADER_CNN_V2_STATIC, &shader_size);
 
   if (!static_code || shader_size == 0) {
     // Shader not available (e.g., in test mode) - skip pipeline creation
@@ -210,7 +205,8 @@ void CNNv2Effect::create_pipelines() {
   shader_desc.nextInChain = &wgsl_src.chain;
 
   // Create bind group layout for static features compute
-  // Bindings: 0=input_tex, 1=input_mip1, 2=input_mip2, 3=depth_tex, 4=output, 5=params, 6=linear_sampler
+  // Bindings: 0=input_tex, 1=input_mip1, 2=input_mip2, 3=depth_tex, 4=output,
+  // 5=params, 6=linear_sampler
   WGPUBindGroupLayout static_bgl =
       BindGroupLayoutBuilder()
           .texture(0, WGPUShaderStage_Compute)
@@ -227,28 +223,35 @@ void CNNv2Effect::create_pipelines() {
   WGPUPipelineLayoutDescriptor pl_desc = {};
   pl_desc.bindGroupLayoutCount = 1;
   pl_desc.bindGroupLayouts = &static_bgl;
-  WGPUPipelineLayout pipeline_layout = wgpuDeviceCreatePipelineLayout(ctx_.device, &pl_desc);
+  WGPUPipelineLayout pipeline_layout =
+      wgpuDeviceCreatePipelineLayout(ctx_.device, &pl_desc);
 
   // Recreate pipeline with proper layout
   WGPUComputePipelineDescriptor pipeline_desc2 = {};
-  pipeline_desc2.compute.module = wgpuDeviceCreateShaderModule(ctx_.device, &shader_desc);
+  pipeline_desc2.compute.module =
+      wgpuDeviceCreateShaderModule(ctx_.device, &shader_desc);
   pipeline_desc2.compute.entryPoint = str_view("main");
   pipeline_desc2.layout = pipeline_layout;
 
-  if (static_pipeline_) wgpuComputePipelineRelease(static_pipeline_);
-  static_pipeline_ = wgpuDeviceCreateComputePipeline(ctx_.device, &pipeline_desc2);
+  if (static_pipeline_)
+    wgpuComputePipelineRelease(static_pipeline_);
+  static_pipeline_ =
+      wgpuDeviceCreateComputePipeline(ctx_.device, &pipeline_desc2);
 
   wgpuShaderModuleRelease(pipeline_desc2.compute.module);
   wgpuPipelineLayoutRelease(pipeline_layout);
   wgpuBindGroupLayoutRelease(static_bgl);
 
   // CNN layer compute pipeline (storage buffer version)
-  if (layer_info_.empty()) return;  // No weights loaded
+  if (layer_info_.empty())
+    return; // No weights loaded
 
   size_t layer_shader_size = 0;
-  const char* layer_code = (const char*)GetAsset(AssetId::ASSET_SHADER_CNN_V2_COMPUTE, &layer_shader_size);
+  const char* layer_code = (const char*)GetAsset(
+      AssetId::ASSET_SHADER_CNN_V2_COMPUTE, &layer_shader_size);
 
-  if (!layer_code || layer_shader_size == 0) return;
+  if (!layer_code || layer_shader_size == 0)
+    return;
 
   WGPUShaderSourceWGSL layer_wgsl = {};
   layer_wgsl.chain.sType = WGPUSType_ShaderSourceWGSL;
@@ -257,11 +260,14 @@ void CNNv2Effect::create_pipelines() {
   WGPUShaderModuleDescriptor layer_shader_desc = {};
   layer_shader_desc.nextInChain = &layer_wgsl.chain;
 
-  WGPUShaderModule layer_module = wgpuDeviceCreateShaderModule(ctx_.device, &layer_shader_desc);
-  if (!layer_module) return;
+  WGPUShaderModule layer_module =
+      wgpuDeviceCreateShaderModule(ctx_.device, &layer_shader_desc);
+  if (!layer_module)
+    return;
 
   // Create bind group layout for layer compute
-  // 0=static_features, 1=layer_input, 2=output, 3=weights, 4=params, 5=original_input
+  // 0=static_features, 1=layer_input, 2=output, 3=weights, 4=params,
+  // 5=original_input
   WGPUBindGroupLayout layer_bgl =
       BindGroupLayoutBuilder()
           .uint_texture(0, WGPUShaderStage_Compute)
@@ -277,14 +283,16 @@ void CNNv2Effect::create_pipelines() {
   layer_pl_desc.bindGroupLayoutCount = 1;
   layer_pl_desc.bindGroupLayouts = &layer_bgl;
 
-  WGPUPipelineLayout layer_pipeline_layout = wgpuDeviceCreatePipelineLayout(ctx_.device, &layer_pl_desc);
+  WGPUPipelineLayout layer_pipeline_layout =
+      wgpuDeviceCreatePipelineLayout(ctx_.device, &layer_pl_desc);
 
   WGPUComputePipelineDescriptor layer_pipeline_desc = {};
   layer_pipeline_desc.compute.module = layer_module;
   layer_pipeline_desc.compute.entryPoint = str_view("main");
   layer_pipeline_desc.layout = layer_pipeline_layout;
 
-  layer_pipeline_ = wgpuDeviceCreateComputePipeline(ctx_.device, &layer_pipeline_desc);
+  layer_pipeline_ =
+      wgpuDeviceCreateComputePipeline(ctx_.device, &layer_pipeline_desc);
 
   wgpuShaderModuleRelease(layer_module);
   wgpuPipelineLayoutRelease(layer_pipeline_layout);
@@ -292,7 +300,8 @@ void CNNv2Effect::create_pipelines() {
 }
 
 void CNNv2Effect::update_bind_group(WGPUTextureView input_view) {
-  if (!static_pipeline_) return;
+  if (!static_pipeline_)
+    return;
 
   // Cache input view
   current_input_view_ = input_view;
@@ -303,7 +312,8 @@ void CNNv2Effect::update_bind_group(WGPUTextureView input_view) {
     static_bind_group_ = nullptr;
   }
 
-  // Create bind group for static features compute (manual for storage texture binding)
+  // Create bind group for static features compute (manual for storage texture
+  // binding)
   WGPUBindGroupEntry bg_entries[7] = {};
   bg_entries[0].binding = 0;
   bg_entries[0].textureView = input_view;
@@ -332,7 +342,8 @@ void CNNv2Effect::update_bind_group(WGPUTextureView input_view) {
   wgpuBindGroupLayoutRelease(layout);
 
   // Create layer bind groups
-  if (!layer_pipeline_ || layer_info_.empty()) return;
+  if (!layer_pipeline_ || layer_info_.empty())
+    return;
 
   // Release old layer bind groups
   for (auto bg : layer_bind_groups_) {
@@ -341,7 +352,8 @@ void CNNv2Effect::update_bind_group(WGPUTextureView input_view) {
   layer_bind_groups_.clear();
 
   // Get bind group layout from layer pipeline
-  WGPUBindGroupLayout layer_bgl = wgpuComputePipelineGetBindGroupLayout(layer_pipeline_, 0);
+  WGPUBindGroupLayout layer_bgl =
+      wgpuComputePipelineGetBindGroupLayout(layer_pipeline_, 0);
 
   // Create bind group for each layer
   for (size_t i = 0; i < layer_info_.size(); ++i) {
@@ -366,7 +378,8 @@ void CNNv2Effect::update_bind_group(WGPUTextureView input_view) {
 
 void CNNv2Effect::compute(WGPUCommandEncoder encoder,
                           const CommonPostProcessUniforms& uniforms) {
-  if (!initialized_ || !static_pipeline_ || !static_bind_group_) return;
+  if (!initialized_ || !static_pipeline_ || !static_bind_group_)
+    return;
 
   float effective_blend = blend_amount_;
   if (beat_modulated_) {
@@ -379,10 +392,12 @@ void CNNv2Effect::compute(WGPUCommandEncoder encoder,
   static_params.padding[0] = 0;
   static_params.padding[1] = 0;
   static_params.padding[2] = 0;
-  wgpuQueueWriteBuffer(ctx_.queue, static_params_buffer_, 0, &static_params, sizeof(static_params));
+  wgpuQueueWriteBuffer(ctx_.queue, static_params_buffer_, 0, &static_params,
+                       sizeof(static_params));
 
   // Pass 1: Compute static features
-  WGPUComputePassEncoder pass = wgpuCommandEncoderBeginComputePass(encoder, nullptr);
+  WGPUComputePassEncoder pass =
+      wgpuCommandEncoderBeginComputePass(encoder, nullptr);
 
   wgpuComputePassEncoderSetPipeline(pass, static_pipeline_);
   wgpuComputePassEncoderSetBindGroup(pass, 0, static_bind_group_, 0, nullptr);
@@ -396,7 +411,8 @@ void CNNv2Effect::compute(WGPUCommandEncoder encoder,
   wgpuComputePassEncoderRelease(pass);
 
   // Execute CNN layer passes
-  if (!layer_pipeline_ || layer_bind_groups_.empty()) return;
+  if (!layer_pipeline_ || layer_bind_groups_.empty())
+    return;
 
   // Update layer params (each layer has own buffer)
   for (size_t i = 0; i < layer_info_.size(); ++i) {
@@ -411,14 +427,18 @@ void CNNv2Effect::compute(WGPUCommandEncoder encoder,
     params.blend_amount = effective_blend;
     params.is_layer_0 = (i == 0) ? 1 : 0;
 
-    wgpuQueueWriteBuffer(ctx_.queue, layer_params_buffers_[i], 0, &params, sizeof(params));
+    wgpuQueueWriteBuffer(ctx_.queue, layer_params_buffers_[i], 0, &params,
+                         sizeof(params));
 
-    WGPUComputePassEncoder layer_pass = wgpuCommandEncoderBeginComputePass(encoder, nullptr);
+    WGPUComputePassEncoder layer_pass =
+        wgpuCommandEncoderBeginComputePass(encoder, nullptr);
 
     wgpuComputePassEncoderSetPipeline(layer_pass, layer_pipeline_);
-    wgpuComputePassEncoderSetBindGroup(layer_pass, 0, layer_bind_groups_[i], 0, nullptr);
+    wgpuComputePassEncoderSetBindGroup(layer_pass, 0, layer_bind_groups_[i], 0,
+                                       nullptr);
 
-    wgpuComputePassEncoderDispatchWorkgroups(layer_pass, workgroups_x, workgroups_y, 1);
+    wgpuComputePassEncoderDispatchWorkgroups(layer_pass, workgroups_x,
+                                             workgroups_y, 1);
 
     wgpuComputePassEncoderEnd(layer_pass);
     wgpuComputePassEncoderRelease(layer_pass);
@@ -433,26 +453,40 @@ void CNNv2Effect::render(WGPURenderPassEncoder pass,
 }
 
 void CNNv2Effect::cleanup() {
-  if (static_features_view_) wgpuTextureViewRelease(static_features_view_);
-  if (static_features_tex_) wgpuTextureRelease(static_features_tex_);
-  if (static_bind_group_) wgpuBindGroupRelease(static_bind_group_);
-  if (static_params_buffer_) wgpuBufferRelease(static_params_buffer_);
-  if (static_pipeline_) wgpuComputePipelineRelease(static_pipeline_);
-  if (linear_sampler_) wgpuSamplerRelease(linear_sampler_);
+  if (static_features_view_)
+    wgpuTextureViewRelease(static_features_view_);
+  if (static_features_tex_)
+    wgpuTextureRelease(static_features_tex_);
+  if (static_bind_group_)
+    wgpuBindGroupRelease(static_bind_group_);
+  if (static_params_buffer_)
+    wgpuBufferRelease(static_params_buffer_);
+  if (static_pipeline_)
+    wgpuComputePipelineRelease(static_pipeline_);
+  if (linear_sampler_)
+    wgpuSamplerRelease(linear_sampler_);
 
-  if (layer_pipeline_) wgpuComputePipelineRelease(layer_pipeline_);
-  if (weights_buffer_) wgpuBufferRelease(weights_buffer_);
-  for (auto buf : layer_params_buffers_) wgpuBufferRelease(buf);
+  if (layer_pipeline_)
+    wgpuComputePipelineRelease(layer_pipeline_);
+  if (weights_buffer_)
+    wgpuBufferRelease(weights_buffer_);
+  for (auto buf : layer_params_buffers_)
+    wgpuBufferRelease(buf);
   layer_params_buffers_.clear();
 
   for (int i = 0; i < 3; ++i) {
-    if (input_mip_view_[i]) wgpuTextureViewRelease(input_mip_view_[i]);
+    if (input_mip_view_[i])
+      wgpuTextureViewRelease(input_mip_view_[i]);
   }
-  if (input_mip_tex_) wgpuTextureRelease(input_mip_tex_);
+  if (input_mip_tex_)
+    wgpuTextureRelease(input_mip_tex_);
 
-  for (auto view : layer_views_) wgpuTextureViewRelease(view);
-  for (auto tex : layer_textures_) wgpuTextureRelease(tex);
-  for (auto bg : layer_bind_groups_) wgpuBindGroupRelease(bg);
+  for (auto view : layer_views_)
+    wgpuTextureViewRelease(view);
+  for (auto tex : layer_textures_)
+    wgpuTextureRelease(tex);
+  for (auto bg : layer_bind_groups_)
+    wgpuBindGroupRelease(bg);
 
   layer_views_.clear();
   layer_textures_.clear();