test_demo: Add beat-synchronized CNN post-processing with version selection

- Add --cnn-version <1|2> flag to select between CNN v1 and v2 - Implement beat_phase modulation for dynamic blend in both CNN effects - Fix CNN v2 per-layer uniform buffer sharing (each layer needs own buffer) - Fix CNN v2 y-axis orientation to match render pass convention - Add Scene1Effect as base visual layer to test_demo timeline - Reorganize CNN v2 shaders into cnn_v2/ subdirectory - Update asset paths and documentation for new shader organization Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
author: skal <pascal.massimino@gmail.com> 2026-02-12 15:10:17 +0100
committer: skal <pascal.massimino@gmail.com> 2026-02-12 15:10:17 +0100
commit: 8b30cadfc19647487986d14dba9ddba7908dd1d0 (patch)
tree: f865b42945f72bfc480e2c2a6849127bf56d1a59
parent: 1effb125973ac0948de3015be1d53ae72463858b (diff)
17 files changed, 165 insertions, 34 deletions
diff --git a/assets/final/demo_assets.txt b/assets/final/demo_assets.txt
index 96f86f9..6db6e48 100644
--- a/assets/final/demo_assets.txt
+++ b/assets/final/demo_assets.txt
@@ -60,3 +60,8 @@ SHADER_COMPUTE_GEN_MASK, NONE, shaders/compute/gen_mask.wgsl, "GPU Mask Composit
 CIRCLE_MASK_COMPUTE_SHADER, NONE, shaders/circle_mask_compute.wgsl, "Circle mask compute shader"
 CIRCLE_MASK_RENDER_SHADER, NONE, shaders/circle_mask_render.wgsl, "Circle mask render shader"
 MASKED_CUBE_SHADER, NONE, shaders/masked_cube.wgsl, "Masked cube shader"
+
+# --- CNN v2 Post-Processing ---
+SHADER_CNN_V2_STATIC, NONE, shaders/cnn_v2/cnn_v2_static.wgsl, "CNN v2 Static Features Shader"
+SHADER_CNN_V2_COMPUTE, NONE, shaders/cnn_v2/cnn_v2_compute.wgsl, "CNN v2 Compute Shader"
+WEIGHTS_CNN_V2, NONE, cnn_v2_weights.bin, "CNN v2 Weights Binary"
diff --git a/assets/test_demo.seq b/assets/test_demo.seq
index 6dc26ca..ae0301f 100644
--- a/assets/test_demo.seq
+++ b/assets/test_demo.seq
@@ -2,6 +2,7 @@
 # BPM 120 (set in test_demo.track)
 
 SEQUENCE 0.0 0 "Main Loop"
+  EFFECT + Scene1Effect 0.0 16.0
   EFFECT + FlashEffect 0.0 16.0
 
 END_DEMO 32b
diff --git a/doc/CNN_V2.md b/doc/CNN_V2.md
index 9407934..09d0841 100644
--- a/doc/CNN_V2.md
+++ b/doc/CNN_V2.md
@@ -299,7 +299,7 @@ torch.save({
 2. Extract layer configs (kernels, channels)
 3. Quantize weights to float16: `weights_f16 = weights_f32.astype(np.float16)`
 4. Generate WGSL shader per layer
-5. Write to `workspaces/<workspace>/shaders/cnn_v2_*.wgsl`
+5. Write to `workspaces/<workspace>/shaders/cnn_v2/cnn_v2_*.wgsl`
 
 **Example Generated Shader:**
 
@@ -402,7 +402,7 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
 
 ### Phase 1: Shaders (Core Infrastructure)
 
-- [ ] `workspaces/main/shaders/cnn_v2_static.wgsl` - Static features compute
+- [ ] `workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl` - Static features compute
   - [ ] RGBD sampling from framebuffer
   - [ ] UV coordinate calculation
   - [ ] sin(10\*uv.x) computation
@@ -410,7 +410,7 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
   - [ ] Float16 packing via `pack2x16float()`
   - [ ] Output to `texture_storage_2d<rgba32uint>`
 
-- [ ] `workspaces/main/shaders/cnn_v2_layer_template.wgsl` - Layer template
+- [ ] `workspaces/main/shaders/cnn_v2/cnn_v2_layer_template.wgsl` - Layer template
   - [ ] Static features unpacking
   - [ ] Previous layer unpacking (8×f16)
   - [ ] Convolution implementation (1×1, 3×3, 5×5)
@@ -492,10 +492,10 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
 
 ```
 # Shaders (generated by export script)
-workspaces/main/shaders/cnn_v2_static.wgsl       # Static features compute
-workspaces/main/shaders/cnn_v2_layer_0.wgsl      # Input layer (generated)
-workspaces/main/shaders/cnn_v2_layer_1.wgsl      # Inner layer (generated)
-workspaces/main/shaders/cnn_v2_layer_2.wgsl      # Output layer (generated)
+workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl       # Static features compute
+workspaces/main/shaders/cnn_v2/cnn_v2_layer_0.wgsl      # Input layer (generated)
+workspaces/main/shaders/cnn_v2/cnn_v2_layer_1.wgsl      # Inner layer (generated)
+workspaces/main/shaders/cnn_v2/cnn_v2_layer_2.wgsl      # Output layer (generated)
 
 # C++ implementation
 src/gpu/effects/cnn_v2_effect.h                  # Effect class header
@@ -531,7 +531,7 @@ TODO.md                                          # Add CNN v2 task
 ```
 training/train_cnn.py                            # Original training
 src/gpu/effects/cnn_effect.*                     # Original effect
-workspaces/main/shaders/cnn_*.wgsl               # Original shaders
+workspaces/main/shaders/cnn_*.wgsl               # Original v1 shaders
 ```
 
 ---
diff --git a/src/gpu/effects/cnn_effect.cc b/src/gpu/effects/cnn_effect.cc
index b2305b2..83a3365 100644
--- a/src/gpu/effects/cnn_effect.cc
+++ b/src/gpu/effects/cnn_effect.cc
@@ -79,12 +79,19 @@ void CNNEffect::resize(int width, int height) {
 
 void CNNEffect::render(WGPURenderPassEncoder pass,
                       const CommonPostProcessUniforms& uniforms) {
-  (void)uniforms;
   if (!bind_group_) {
     fprintf(stderr, "CNN render: no bind_group\n");
     return;
   }
 
+  float effective_blend = blend_amount_;
+  if (beat_modulated_) {
+    effective_blend = blend_amount_ * uniforms.beat_phase * beat_scale_;
+  }
+
+  CNNLayerParams params = {layer_index_, effective_blend, {0.0f, 0.0f}};
+  params_buffer_.update(ctx_.queue, params);
+
   wgpuRenderPassEncoderSetPipeline(pass, pipeline_);
   wgpuRenderPassEncoderSetBindGroup(pass, 0, bind_group_, 0, nullptr);
   wgpuRenderPassEncoderDraw(pass, 3, 1, 0, 0);
diff --git a/src/gpu/effects/cnn_effect.h b/src/gpu/effects/cnn_effect.h
index 1c9f0f3..3e2b7ca 100644
--- a/src/gpu/effects/cnn_effect.h
+++ b/src/gpu/effects/cnn_effect.h
@@ -34,10 +34,17 @@ class CNNEffect : public PostProcessEffect {
     return layer_index_ == 0;
   }
 
+  void set_beat_modulation(bool enabled, float scale = 1.0f) {
+    beat_modulated_ = enabled;
+    beat_scale_ = scale;
+  }
+
  private:
   int layer_index_;
   int total_layers_;
   float blend_amount_;
+  bool beat_modulated_ = false;
+  float beat_scale_ = 1.0f;
   WGPUTextureView input_view_;
   WGPUTextureView original_view_;
   UniformBuffer<CNNLayerParams> params_buffer_;
diff --git a/src/gpu/effects/cnn_v2_effect.cc b/src/gpu/effects/cnn_v2_effect.cc
index 9cb6d57..9c727ba 100644
--- a/src/gpu/effects/cnn_v2_effect.cc
+++ b/src/gpu/effects/cnn_v2_effect.cc
@@ -20,9 +20,24 @@ CNNv2Effect::CNNv2Effect(const GpuContext& ctx)
       static_features_view_(nullptr),
       layer_pipeline_(nullptr),
       weights_buffer_(nullptr),
-      layer_params_buffer_(nullptr),
       input_mip_tex_(nullptr),
       current_input_view_(nullptr),
+      blend_amount_(1.0f),
+      initialized_(false) {
+  std::memset(input_mip_view_, 0, sizeof(input_mip_view_));
+}
+
+CNNv2Effect::CNNv2Effect(const GpuContext& ctx, const CNNv2EffectParams& params)
+    : PostProcessEffect(ctx),
+      static_pipeline_(nullptr),
+      static_bind_group_(nullptr),
+      static_features_tex_(nullptr),
+      static_features_view_(nullptr),
+      layer_pipeline_(nullptr),
+      weights_buffer_(nullptr),
+      input_mip_tex_(nullptr),
+      current_input_view_(nullptr),
+      blend_amount_(params.blend_amount),
       initialized_(false) {
   std::memset(input_mip_view_, 0, sizeof(input_mip_view_));
 }
@@ -93,13 +108,16 @@ void CNNv2Effect::load_weights() {
   // Upload weights data
   wgpuQueueWriteBuffer(ctx_.queue, weights_buffer_, 0, weights_data, weights_size);
 
-  // Create uniform buffer for layer params
-  WGPUBufferDescriptor params_desc = {};
-  params_desc.size = sizeof(LayerParams);
-  params_desc.usage = WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst;
-  params_desc.mappedAtCreation = false;
+  // Create uniform buffers for layer params (one per layer)
+  for (uint32_t i = 0; i < num_layers; ++i) {
+    WGPUBufferDescriptor params_desc = {};
+    params_desc.size = sizeof(LayerParams);
+    params_desc.usage = WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst;
+    params_desc.mappedAtCreation = false;
 
-  layer_params_buffer_ = wgpuDeviceCreateBuffer(ctx_.device, &params_desc);
+    WGPUBuffer buf = wgpuDeviceCreateBuffer(ctx_.device, &params_desc);
+    layer_params_buffers_.push_back(buf);
+  }
 }
 
 void CNNv2Effect::create_textures() {
@@ -284,8 +302,8 @@ void CNNv2Effect::create_pipelines() {
   if (!layer_module) return;
 
   // Create bind group layout for layer compute
-  // 0=static_features, 1=layer_input, 2=output, 3=weights, 4=params
-  WGPUBindGroupLayoutEntry layer_bgl_entries[5] = {};
+  // 0=static_features, 1=layer_input, 2=output, 3=weights, 4=params, 5=original_input
+  WGPUBindGroupLayoutEntry layer_bgl_entries[6] = {};
 
   // Binding 0: Static features (texture)
   layer_bgl_entries[0].binding = 0;
@@ -317,8 +335,14 @@ void CNNv2Effect::create_pipelines() {
   layer_bgl_entries[4].buffer.type = WGPUBufferBindingType_Uniform;
   layer_bgl_entries[4].buffer.minBindingSize = sizeof(LayerParams);
 
+  // Binding 5: Original input (for blending)
+  layer_bgl_entries[5].binding = 5;
+  layer_bgl_entries[5].visibility = WGPUShaderStage_Compute;
+  layer_bgl_entries[5].texture.sampleType = WGPUTextureSampleType_Float;
+  layer_bgl_entries[5].texture.viewDimension = WGPUTextureViewDimension_2D;
+
   WGPUBindGroupLayoutDescriptor layer_bgl_desc = {};
-  layer_bgl_desc.entryCount = 5;
+  layer_bgl_desc.entryCount = 6;
   layer_bgl_desc.entries = layer_bgl_entries;
 
   WGPUBindGroupLayout layer_bgl = wgpuDeviceCreateBindGroupLayout(ctx_.device, &layer_bgl_desc);
@@ -399,7 +423,7 @@ void CNNv2Effect::update_bind_group(WGPUTextureView input_view) {
 
   // Create bind group for each layer
   for (size_t i = 0; i < layer_info_.size(); ++i) {
-    WGPUBindGroupEntry layer_entries[5] = {};
+    WGPUBindGroupEntry layer_entries[6] = {};
 
     // Binding 0: Static features (constant)
     layer_entries[0].binding = 0;
@@ -419,14 +443,18 @@ void CNNv2Effect::update_bind_group(WGPUTextureView input_view) {
     layer_entries[3].buffer = weights_buffer_;
     layer_entries[3].size = wgpuBufferGetSize(weights_buffer_);
 
-    // Binding 4: Layer params (will be updated per dispatch)
+    // Binding 4: Layer params (use dedicated buffer for this layer)
     layer_entries[4].binding = 4;
-    layer_entries[4].buffer = layer_params_buffer_;
+    layer_entries[4].buffer = layer_params_buffers_[i];
     layer_entries[4].size = sizeof(LayerParams);
 
+    // Binding 5: Original input (for blending)
+    layer_entries[5].binding = 5;
+    layer_entries[5].textureView = input_view;
+
     WGPUBindGroupDescriptor layer_bg_desc = {};
     layer_bg_desc.layout = layer_bgl;
-    layer_bg_desc.entryCount = 5;
+    layer_bg_desc.entryCount = 6;
     layer_bg_desc.entries = layer_entries;
 
     WGPUBindGroup layer_bg = wgpuDeviceCreateBindGroup(ctx_.device, &layer_bg_desc);
@@ -438,9 +466,13 @@ void CNNv2Effect::update_bind_group(WGPUTextureView input_view) {
 
 void CNNv2Effect::compute(WGPUCommandEncoder encoder,
                           const CommonPostProcessUniforms& uniforms) {
-  (void)uniforms;
   if (!initialized_ || !static_pipeline_ || !static_bind_group_) return;
 
+  float effective_blend = blend_amount_;
+  if (beat_modulated_) {
+    effective_blend = blend_amount_ * uniforms.beat_phase * beat_scale_;
+  }
+
   // Pass 1: Compute static features
   WGPUComputePassEncoder pass = wgpuCommandEncoderBeginComputePass(encoder, nullptr);
 
@@ -458,20 +490,20 @@ void CNNv2Effect::compute(WGPUCommandEncoder encoder,
   // Execute CNN layer passes
   if (!layer_pipeline_ || layer_bind_groups_.empty()) return;
 
+  // Update layer params (each layer has own buffer)
   for (size_t i = 0; i < layer_info_.size(); ++i) {
     const LayerInfo& info = layer_info_[i];
 
-    // Update layer params uniform buffer
     LayerParams params;
     params.kernel_size = info.kernel_size;
     params.in_channels = info.in_channels;
     params.out_channels = info.out_channels;
     params.weight_offset = info.weight_offset;
     params.is_output_layer = (i == layer_info_.size() - 1) ? 1 : 0;
+    params.blend_amount = effective_blend;
 
-    wgpuQueueWriteBuffer(ctx_.queue, layer_params_buffer_, 0, &params, sizeof(params));
+    wgpuQueueWriteBuffer(ctx_.queue, layer_params_buffers_[i], 0, &params, sizeof(params));
 
-    // Execute layer compute pass
     WGPUComputePassEncoder layer_pass = wgpuCommandEncoderBeginComputePass(encoder, nullptr);
 
     wgpuComputePassEncoderSetPipeline(layer_pass, layer_pipeline_);
@@ -499,7 +531,8 @@ void CNNv2Effect::cleanup() {
 
   if (layer_pipeline_) wgpuComputePipelineRelease(layer_pipeline_);
   if (weights_buffer_) wgpuBufferRelease(weights_buffer_);
-  if (layer_params_buffer_) wgpuBufferRelease(layer_params_buffer_);
+  for (auto buf : layer_params_buffers_) wgpuBufferRelease(buf);
+  layer_params_buffers_.clear();
 
   for (int i = 0; i < 3; ++i) {
     if (input_mip_view_[i]) wgpuTextureViewRelease(input_mip_view_[i]);
diff --git a/src/gpu/effects/cnn_v2_effect.h b/src/gpu/effects/cnn_v2_effect.h
index 6005cf5..4389e4f 100644
--- a/src/gpu/effects/cnn_v2_effect.h
+++ b/src/gpu/effects/cnn_v2_effect.h
@@ -5,9 +5,14 @@
 #include "gpu/effect.h"
 #include <vector>
 
+struct CNNv2EffectParams {
+  float blend_amount = 1.0f;
+};
+
 class CNNv2Effect : public PostProcessEffect {
 public:
   explicit CNNv2Effect(const GpuContext& ctx);
+  explicit CNNv2Effect(const GpuContext& ctx, const CNNv2EffectParams& params);
   ~CNNv2Effect();
 
   void init(MainSequence* demo) override;
@@ -18,6 +23,11 @@ public:
               const CommonPostProcessUniforms& uniforms) override;
   void update_bind_group(WGPUTextureView input_view) override;
 
+  void set_beat_modulation(bool enabled, float scale = 1.0f) {
+    beat_modulated_ = enabled;
+    beat_scale_ = scale;
+  }
+
 private:
   struct LayerInfo {
     uint32_t kernel_size;
@@ -33,6 +43,7 @@ private:
     uint32_t out_channels;
     uint32_t weight_offset;
     uint32_t is_output_layer;
+    float blend_amount;
   };
 
   void create_textures();
@@ -49,7 +60,7 @@ private:
   // CNN layers (storage buffer architecture)
   WGPUComputePipeline layer_pipeline_;      // Single pipeline for all layers
   WGPUBuffer weights_buffer_;               // Storage buffer for weights
-  WGPUBuffer layer_params_buffer_;          // Uniform buffer for per-layer params
+  std::vector<WGPUBuffer> layer_params_buffers_;  // Uniform buffers (one per layer)
   std::vector<LayerInfo> layer_info_;       // Layer metadata
   std::vector<WGPUBindGroup> layer_bind_groups_;  // Per-layer bind groups
   std::vector<WGPUTexture> layer_textures_;       // Ping-pong buffers
@@ -60,5 +71,8 @@ private:
   WGPUTextureView input_mip_view_[3];
   WGPUTextureView current_input_view_;
 
+  float blend_amount_ = 1.0f;
+  bool beat_modulated_ = false;
+  float beat_scale_ = 1.0f;
   bool initialized_;
 };
diff --git a/src/test_demo.cc b/src/test_demo.cc
index 9cbeae2..7f10c3b 100644
--- a/src/test_demo.cc
+++ b/src/test_demo.cc
@@ -22,6 +22,8 @@ extern void LoadTimeline(MainSequence& main_seq, const GpuContext& ctx);
 // Inline peak meter effect for debugging audio-visual sync
 #include "gpu/effects/post_process_helper.h"
 #include "gpu/effects/shader_composer.h"
+#include "gpu/effects/cnn_effect.h"
+#include "gpu/effects/cnn_v2_effect.h"
 
 class PeakMeterEffect : public PostProcessEffect {
  public:
@@ -98,6 +100,8 @@ class PeakMeterEffect : public PostProcessEffect {
   }
 };
 
+static int g_cnn_version = 2;  // Default to v2
+
 #if !defined(STRIP_ALL)
 static void print_usage(const char* prog_name) {
   printf("Usage: %s [OPTIONS]\n", prog_name);
@@ -107,6 +111,7 @@ static void print_usage(const char* prog_name) {
   printf("  --help              Show this help message and exit\n");
   printf("  --fullscreen        Run in fullscreen mode\n");
   printf("  --resolution WxH    Set window resolution (e.g., 1024x768)\n");
+  printf("  --cnn-version <1|2> Select CNN version (1=v1, 2=v2, default=2)\n");
   printf("  --tempo             Enable tempo variation test mode\n");
   printf(
       "                      (alternates between acceleration and "
@@ -123,6 +128,7 @@ static void print_usage(const char* prog_name) {
   printf("\nExamples:\n");
   printf("  %s --fullscreen\n", prog_name);
   printf("  %s --resolution 1024x768 --tempo\n", prog_name);
+  printf("  %s --cnn-version 1\n", prog_name);
   printf("  %s --log-peaks peaks.txt\n", prog_name);
   printf("  %s --log-peaks peaks.txt --log-peaks-fine\n", prog_name);
   printf("\nControls:\n");
@@ -184,6 +190,21 @@ int main(int argc, char** argv) {
       log_peaks_file = argv[++i];
     } else if (strcmp(argv[i], "--log-peaks-fine") == 0) {
       log_peaks_fine = true;
+    } else if (strcmp(argv[i], "--cnn-version") == 0) {
+      if (i + 1 < argc) {
+        int version = atoi(argv[++i]);
+        if (version == 1 || version == 2) {
+          g_cnn_version = version;
+        } else {
+          fprintf(stderr, "Error: --cnn-version must be 1 or 2\n\n");
+          print_usage(argv[0]);
+          return 1;
+        }
+      } else {
+        fprintf(stderr, "Error: --cnn-version requires argument\n\n");
+        print_usage(argv[0]);
+        return 1;
+      }
     } else {
       CHECK_RETURN_BEGIN(true, 1)
       print_usage(argv[0]);
@@ -205,9 +226,25 @@ int main(int argc, char** argv) {
   // Load timeline from test_demo.seq
   LoadTimeline(*gpu_get_main_sequence(), *gpu_get_context());
 
-  // Add peak meter visualization effect (renders as final post-process)
 #if !defined(STRIP_ALL)
   const GpuContext* gpu_ctx = gpu_get_context();
+
+  // Add CNN post-processing effect based on version flag
+  if (g_cnn_version == 1) {
+    CNNEffectParams params;
+    params.blend_amount = 1.0f;
+    auto* cnn = new CNNEffect(*gpu_ctx, params);
+    cnn->set_beat_modulation(true, 1.0f);
+    gpu_add_custom_effect(cnn, 0.0f, 99999.0f, 10);
+  } else if (g_cnn_version == 2) {
+    CNNv2EffectParams params;
+    params.blend_amount = 1.0f;
+    auto* cnn = new CNNv2Effect(*gpu_ctx, params);
+    cnn->set_beat_modulation(true, 1.0f);
+    gpu_add_custom_effect(cnn, 0.0f, 99999.0f, 10);
+  }
+
+  // Add peak meter visualization effect (renders as final post-process)
   auto* peak_meter = new PeakMeterEffect(*gpu_ctx);
   gpu_add_custom_effect(peak_meter, 0.0f, 99999.0f,
                         999); // High priority = renders last
diff --git a/tools/seq_compiler.cc b/tools/seq_compiler.cc
index 069122a..daf1294 100644
--- a/tools/seq_compiler.cc
+++ b/tools/seq_compiler.cc
@@ -1109,6 +1109,21 @@ int main(int argc, char* argv[]) {
                      << ");\n";
             out_file << "    }\n";
           }
+        } else if (!eff.params.empty() && eff.class_name == "CNNv2Effect") {
+          // Generate parameter struct initialization for CNNv2Effect
+          out_file << "    {\n";
+          out_file << "      CNNv2EffectParams p;\n";
+
+          for (const auto& [key, value] : eff.params) {
+            if (key == "blend") {
+              out_file << "      p.blend_amount = " << value << "f;\n";
+            }
+          }
+
+          out_file << "      seq->add_effect(std::make_shared<"
+                   << eff.class_name << ">(ctx, p), " << eff.start << "f, "
+                   << eff.end << "f, " << eff.priority << ");\n";
+          out_file << "    }\n";
         } else {
           // No parameters or unsupported effect - use default constructor
           out_file << "    seq->add_effect(std::make_shared<" << eff.class_name
diff --git a/training/export_cnn_v2_shader.py b/training/export_cnn_v2_shader.py
index 3c53ce2..add28d2 100755
--- a/training/export_cnn_v2_shader.py
+++ b/training/export_cnn_v2_shader.py
@@ -144,7 +144,7 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {{
 }}
 """
 
-    output_path = Path(output_dir) / f"cnn_v2_layer_{layer_idx}.wgsl"
+    output_path = Path(output_dir) / "cnn_v2" / f"cnn_v2_layer_{layer_idx}.wgsl"
     output_path.write_text(shader_code)
     print(f"  → {output_path}")
 
diff --git a/training/export_cnn_v2_weights.py b/training/export_cnn_v2_weights.py
index 723f572..d8c7c10 100755
--- a/training/export_cnn_v2_weights.py
+++ b/training/export_cnn_v2_weights.py
@@ -248,7 +248,7 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
 }
 """
 
-    output_path = Path(output_dir) / "cnn_v2_compute.wgsl"
+    output_path = Path(output_dir) / "cnn_v2" / "cnn_v2_compute.wgsl"
     output_path.write_text(shader_code)
     print(f"  → {output_path}")
 
diff --git a/workspaces/main/shaders/cnn_v2_compute.wgsl b/workspaces/main/shaders/cnn_v2/cnn_v2_compute.wgsl
index b19a692..1e1704d 100644
--- a/workspaces/main/shaders/cnn_v2_compute.wgsl
+++ b/workspaces/main/shaders/cnn_v2/cnn_v2_compute.wgsl
@@ -9,6 +9,7 @@ struct LayerParams {
   out_channels: u32,
   weight_offset: u32,     // Offset in f16 units
   is_output_layer: u32,   // 1 if final layer (sigmoid), 0 otherwise (relu)
+  blend_amount: f32,      // [0,1] blend with original
 }
 
 @group(0) @binding(0) var static_features: texture_2d<u32>;       // 8-channel static features
@@ -16,6 +17,7 @@ struct LayerParams {
 @group(0) @binding(2) var output_tex: texture_storage_2d<rgba32uint, write>;  // Current layer output
 @group(0) @binding(3) var<storage, read> weights_buffer: array<u32>;  // Packed f16 weights
 @group(0) @binding(4) var<uniform> params: LayerParams;
+@group(0) @binding(5) var original_input: texture_2d<f32>;        // Original RGB input for blending
 
 fn unpack_static_features(coord: vec2<i32>) -> array<f32, 8> {
   let packed = textureLoad(static_features, coord, 0);
@@ -133,5 +135,15 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
     output[c] = 0.0;
   }
 
+  // Blend with original on final layer
+  if (is_output) {
+    let original = textureLoad(original_input, coord, 0).rgb;
+    let result_rgb = vec3<f32>(output[0], output[1], output[2]);
+    let blended = mix(original, result_rgb, params.blend_amount);
+    output[0] = blended.r;
+    output[1] = blended.g;
+    output[2] = blended.b;
+  }
+
   textureStore(output_tex, coord, pack_channels(output));
 }
diff --git a/workspaces/main/shaders/cnn_v2_layer_0.wgsl b/workspaces/main/shaders/cnn_v2/cnn_v2_layer_0.wgsl
index 8e14957..8e14957 100644
--- a/workspaces/main/shaders/cnn_v2_layer_0.wgsl
+++ b/workspaces/main/shaders/cnn_v2/cnn_v2_layer_0.wgsl
diff --git a/workspaces/main/shaders/cnn_v2_layer_1.wgsl b/workspaces/main/shaders/cnn_v2/cnn_v2_layer_1.wgsl
index f490d13..f490d13 100644
--- a/workspaces/main/shaders/cnn_v2_layer_1.wgsl
+++ b/workspaces/main/shaders/cnn_v2/cnn_v2_layer_1.wgsl
diff --git a/workspaces/main/shaders/cnn_v2_layer_2.wgsl b/workspaces/main/shaders/cnn_v2/cnn_v2_layer_2.wgsl
index 2f9836a..2f9836a 100644
--- a/workspaces/main/shaders/cnn_v2_layer_2.wgsl
+++ b/workspaces/main/shaders/cnn_v2/cnn_v2_layer_2.wgsl
diff --git a/workspaces/main/shaders/cnn_v2_layer_template.wgsl b/workspaces/main/shaders/cnn_v2/cnn_v2_layer_template.wgsl
index 1bf6819..1bf6819 100644
--- a/workspaces/main/shaders/cnn_v2_layer_template.wgsl
+++ b/workspaces/main/shaders/cnn_v2/cnn_v2_layer_template.wgsl
diff --git a/workspaces/main/shaders/cnn_v2_static.wgsl b/workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl
index c3a2de7..dd07f19 100644
--- a/workspaces/main/shaders/cnn_v2_static.wgsl
+++ b/workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl
@@ -25,9 +25,9 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
   // Sample depth
   let d = textureLoad(depth_tex, coord, 0).r;
 
-  // UV coordinates (normalized [0,1])
+  // UV coordinates (normalized [0,1], bottom-left origin)
   let uv_x = f32(coord.x) / f32(dims.x);
-  let uv_y = f32(coord.y) / f32(dims.y);
+  let uv_y = 1.0 - (f32(coord.y) / f32(dims.y));
 
   // Multi-frequency position encoding
   let sin10_x = sin(10.0 * uv_x);
author	skal <pascal.massimino@gmail.com>	2026-02-12 15:10:17 +0100
committer	skal <pascal.massimino@gmail.com>	2026-02-12 15:10:17 +0100
commit	8b30cadfc19647487986d14dba9ddba7908dd1d0 (patch)
tree	f865b42945f72bfc480e2c2a6849127bf56d1a59
parent	1effb125973ac0948de3015be1d53ae72463858b (diff)