5 files changed, 184 insertions, 13 deletions
diff --git a/src/gpu/effect.cc b/src/gpu/effect.cc
index 6a4762c..0662f26 100644
--- a/src/gpu/effect.cc
+++ b/src/gpu/effect.cc
@@ -65,7 +65,7 @@ void Sequence::update_active_list(float seq_time) {
 #if !defined(STRIP_ALL)
       Effect* effect_ptr = item.effect.get();
       const char* effect_name = typeid(*effect_ptr).name();
-      printf("  [EFFECT START] %s (priority=%d, time=%.2f-%.2f)\n", effect_name,
+      printf("  [EFFECT START] <%s> (priority=%d, time=%.2f-%.2f)\n", effect_name,
              item.priority, item.start_time, item.end_time);
 #endif
       item.effect->start();
@@ -74,7 +74,7 @@ void Sequence::update_active_list(float seq_time) {
 #if !defined(STRIP_ALL)
       Effect* effect_ptr = item.effect.get();
       const char* effect_name = typeid(*effect_ptr).name();
-      printf("  [EFFECT END] %s (priority=%d)\n", effect_name, item.priority);
+      printf("  [EFFECT END] <%s> (priority=%d)\n", effect_name, item.priority);
 #endif
       item.effect->end();
       item.active = false;
@@ -339,6 +339,39 @@ void MainSequence::render_frame(float global_time, float beat, float peak,
 
       PostProcessEffect* pp =
           (PostProcessEffect*)(post_effects[i]->effect.get());
+
+      // Capture framebuffer if effect needs it
+      if (pp->needs_framebuffer_capture()) {
+        WGPUTextureView captured_view = get_auxiliary_view("captured_frame");
+        if (captured_view) {
+          // Get source texture from current_input view
+          // Note: This is a simplified blit using a render pass
+          WGPURenderPassColorAttachment capture_attachment = {};
+          capture_attachment.view = captured_view;
+          capture_attachment.resolveTarget = nullptr;
+          capture_attachment.loadOp = WGPULoadOp_Load;
+          capture_attachment.storeOp = WGPUStoreOp_Store;
+#if !defined(DEMO_CROSS_COMPILE_WIN32)
+          capture_attachment.depthSlice = WGPU_DEPTH_SLICE_UNDEFINED;
+#endif
+          WGPURenderPassDescriptor capture_desc = {
+              .colorAttachmentCount = 1, .colorAttachments = &capture_attachment};
+          WGPURenderPassEncoder capture_pass =
+              wgpuCommandEncoderBeginRenderPass(encoder, &capture_desc);
+          wgpuRenderPassEncoderSetViewport(capture_pass, 0.0f, 0.0f,
+                                           (float)width_, (float)height_, 0.0f,
+                                           1.0f);
+
+          // Use passthrough effect to copy current_input to captured_frame
+          PostProcessEffect* passthrough =
+              (PostProcessEffect*)passthrough_effect_.get();
+          passthrough->update_bind_group(current_input);
+          passthrough->render(capture_pass, 0, 0, 0, aspect_ratio);
+
+          wgpuRenderPassEncoderEnd(capture_pass);
+        }
+      }
+
       pp->update_bind_group(current_input);
 
       WGPURenderPassColorAttachment pp_attachment = {};
diff --git a/src/gpu/effect.h b/src/gpu/effect.h
index 8f35f3c..f008c8d 100644
--- a/src/gpu/effect.h
+++ b/src/gpu/effect.h
@@ -44,6 +44,12 @@ class Effect {
     return false;
   }
 
+  // If true, MainSequence will capture current framebuffer to "captured_frame"
+  // auxiliary texture before rendering this effect
+  virtual bool needs_framebuffer_capture() const {
+    return false;
+  }
+
  protected:
   const GpuContext& ctx_;
   UniformBuffer<CommonPostProcessUniforms> uniforms_;
diff --git a/src/gpu/effects/cnn_effect.cc b/src/gpu/effects/cnn_effect.cc
index 25db0c2..f5d0a51 100644
--- a/src/gpu/effects/cnn_effect.cc
+++ b/src/gpu/effects/cnn_effect.cc
@@ -4,19 +4,101 @@
 #include "gpu/effects/cnn_effect.h"
 #include "gpu/effects/post_process_helper.h"
 #include "gpu/effects/shaders.h"
+#include "gpu/effects/shader_composer.h"
+#include "gpu/effect.h"
 
-CNNEffect::CNNEffect(const GpuContext& ctx, int num_layers)
-    : PostProcessEffect(ctx), num_layers_(num_layers), input_view_(nullptr),
+// Create custom pipeline with 5 bindings (includes original texture)
+static WGPURenderPipeline create_cnn_pipeline(WGPUDevice device,
+                                               WGPUTextureFormat format,
+                                               const char* shader_code) {
+  std::string composed_shader = ShaderComposer::Get().Compose({}, shader_code);
+
+  WGPUShaderModuleDescriptor shader_desc = {};
+  WGPUShaderSourceWGSL wgsl_src = {};
+  wgsl_src.chain.sType = WGPUSType_ShaderSourceWGSL;
+  wgsl_src.code = str_view(composed_shader.c_str());
+  shader_desc.nextInChain = &wgsl_src.chain;
+  WGPUShaderModule shader_module =
+      wgpuDeviceCreateShaderModule(device, &shader_desc);
+
+  WGPUBindGroupLayoutEntry bgl_entries[5] = {};
+  bgl_entries[0].binding = 0; // sampler
+  bgl_entries[0].visibility = WGPUShaderStage_Fragment;
+  bgl_entries[0].sampler.type = WGPUSamplerBindingType_Filtering;
+  bgl_entries[1].binding = 1; // input texture
+  bgl_entries[1].visibility = WGPUShaderStage_Fragment;
+  bgl_entries[1].texture.sampleType = WGPUTextureSampleType_Float;
+  bgl_entries[1].texture.viewDimension = WGPUTextureViewDimension_2D;
+  bgl_entries[2].binding = 2; // uniforms
+  bgl_entries[2].visibility = WGPUShaderStage_Vertex | WGPUShaderStage_Fragment;
+  bgl_entries[2].buffer.type = WGPUBufferBindingType_Uniform;
+  bgl_entries[3].binding = 3; // effect params
+  bgl_entries[3].visibility = WGPUShaderStage_Fragment;
+  bgl_entries[3].buffer.type = WGPUBufferBindingType_Uniform;
+  bgl_entries[4].binding = 4; // original texture
+  bgl_entries[4].visibility = WGPUShaderStage_Fragment;
+  bgl_entries[4].texture.sampleType = WGPUTextureSampleType_Float;
+  bgl_entries[4].texture.viewDimension = WGPUTextureViewDimension_2D;
+
+  WGPUBindGroupLayoutDescriptor bgl_desc = {};
+  bgl_desc.entryCount = 5;
+  bgl_desc.entries = bgl_entries;
+  WGPUBindGroupLayout bgl = wgpuDeviceCreateBindGroupLayout(device, &bgl_desc);
+
+  WGPUPipelineLayoutDescriptor pl_desc = {};
+  pl_desc.bindGroupLayoutCount = 1;
+  pl_desc.bindGroupLayouts = &bgl;
+  WGPUPipelineLayout pl = wgpuDeviceCreatePipelineLayout(device, &pl_desc);
+
+  WGPUColorTargetState color_target = {};
+  color_target.format = format;
+  color_target.writeMask = WGPUColorWriteMask_All;
+
+  WGPUFragmentState fragment_state = {};
+  fragment_state.module = shader_module;
+  fragment_state.entryPoint = str_view("fs_main");
+  fragment_state.targetCount = 1;
+  fragment_state.targets = &color_target;
+
+  WGPURenderPipelineDescriptor pipeline_desc = {};
+  pipeline_desc.layout = pl;
+  pipeline_desc.vertex.module = shader_module;
+  pipeline_desc.vertex.entryPoint = str_view("vs_main");
+  pipeline_desc.fragment = &fragment_state;
+  pipeline_desc.primitive.topology = WGPUPrimitiveTopology_TriangleList;
+  pipeline_desc.multisample.count = 1;
+  pipeline_desc.multisample.mask = 0xFFFFFFFF;
+
+  return wgpuDeviceCreateRenderPipeline(device, &pipeline_desc);
+}
+
+CNNEffect::CNNEffect(const GpuContext& ctx)
+    : PostProcessEffect(ctx), layer_index_(0), total_layers_(1),
+      blend_amount_(1.0f), input_view_(nullptr), original_view_(nullptr),
       bind_group_(nullptr) {
-  pipeline_ = create_post_process_pipeline(ctx_.device, ctx_.format,
-                                           cnn_layer_shader_wgsl);
+  pipeline_ = create_cnn_pipeline(ctx_.device, ctx_.format,
+                                  cnn_layer_shader_wgsl);
+}
+
+CNNEffect::CNNEffect(const GpuContext& ctx, const CNNEffectParams& params)
+    : PostProcessEffect(ctx), layer_index_(params.layer_index),
+      total_layers_(params.total_layers), blend_amount_(params.blend_amount),
+      input_view_(nullptr), original_view_(nullptr), bind_group_(nullptr) {
+  pipeline_ = create_cnn_pipeline(ctx_.device, ctx_.format,
+                                  cnn_layer_shader_wgsl);
 }
 
 void CNNEffect::init(MainSequence* demo) {
   PostProcessEffect::init(demo);
+  demo_ = demo;
   params_buffer_.init(ctx_.device);
 
-  CNNLayerParams params = {0, 1, {0.0f, 0.0f}};
+  // Register captured_frame texture (used by all layers for original input)
+  if (layer_index_ == 0) {
+    demo_->register_auxiliary_texture("captured_frame", width_, height_);
+  }
+
+  CNNLayerParams params = {layer_index_, blend_amount_, {0.0f, 0.0f}};
   params_buffer_.update(ctx_.queue, params);
 }
 
@@ -31,6 +113,40 @@ void CNNEffect::render(WGPURenderPassEncoder pass, float time, float beat,
 
 void CNNEffect::update_bind_group(WGPUTextureView input_view) {
   input_view_ = input_view;
-  pp_update_bind_group(ctx_.device, pipeline_, &bind_group_,
-                      input_view_, uniforms_.get(), params_buffer_.get());
+
+  // All layers: get captured frame (original input from layer 0)
+  if (demo_) {
+    original_view_ = demo_->get_auxiliary_view("captured_frame");
+  }
+
+  // Create bind group with original texture
+  if (bind_group_)
+    wgpuBindGroupRelease(bind_group_);
+
+  WGPUBindGroupLayout bgl = wgpuRenderPipelineGetBindGroupLayout(pipeline_, 0);
+  WGPUSamplerDescriptor sd = {};
+  sd.magFilter = WGPUFilterMode_Linear;
+  sd.minFilter = WGPUFilterMode_Linear;
+  sd.maxAnisotropy = 1;
+  WGPUSampler sampler = wgpuDeviceCreateSampler(ctx_.device, &sd);
+
+  WGPUBindGroupEntry bge[5] = {};
+  bge[0].binding = 0;
+  bge[0].sampler = sampler;
+  bge[1].binding = 1;
+  bge[1].textureView = input_view_;
+  bge[2].binding = 2;
+  bge[2].buffer = uniforms_.get().buffer;
+  bge[2].size = uniforms_.get().size;
+  bge[3].binding = 3;
+  bge[3].buffer = params_buffer_.get().buffer;
+  bge[3].size = params_buffer_.get().size;
+  bge[4].binding = 4;
+  bge[4].textureView = original_view_ ? original_view_ : input_view_; // Fallback
+
+  WGPUBindGroupDescriptor bgd = {};
+  bgd.layout = bgl;
+  bgd.entryCount = 5;
+  bgd.entries = bge;
+  bind_group_ = wgpuDeviceCreateBindGroup(ctx_.device, &bgd);
 }
diff --git a/src/gpu/effects/cnn_effect.h b/src/gpu/effects/cnn_effect.h
index 9cc4935..bc074d2 100644
--- a/src/gpu/effects/cnn_effect.h
+++ b/src/gpu/effects/cnn_effect.h
@@ -7,23 +7,39 @@
 
 struct CNNLayerParams {
   int layer_index;
-  int use_residual;
+  float blend_amount;  // Blend: mix(input, output, blend_amount)
   float _pad[2];
 };
 static_assert(sizeof(CNNLayerParams) == 16);
 
+struct CNNEffectParams {
+  int layer_index = 0;        // Which layer to render (0-based)
+  int total_layers = 1;       // Total number of layers in the CNN
+  float blend_amount = 1.0f;  // Final blend with original input
+};
+
 class CNNEffect : public PostProcessEffect {
  public:
-  explicit CNNEffect(const GpuContext& ctx, int num_layers = 1);
+  explicit CNNEffect(const GpuContext& ctx);
+  explicit CNNEffect(const GpuContext& ctx, const CNNEffectParams& params);
 
   void init(MainSequence* demo) override;
   void render(WGPURenderPassEncoder pass, float time, float beat,
               float intensity, float aspect_ratio) override;
   void update_bind_group(WGPUTextureView input_view) override;
 
+  // Layer 0 needs framebuffer capture for original input
+  bool needs_framebuffer_capture() const override {
+    return layer_index_ == 0;
+  }
+
  private:
-  int num_layers_;
+  int layer_index_;
+  int total_layers_;
+  float blend_amount_;
   WGPUTextureView input_view_;
+  WGPUTextureView original_view_;
   UniformBuffer<CNNLayerParams> params_buffer_;
   WGPUBindGroup bind_group_;
+  MainSequence* demo_ = nullptr;
 };
diff --git a/src/tests/gpu/test_demo_effects.cc b/src/tests/gpu/test_demo_effects.cc
index 9281413..619b9c9 100644
--- a/src/tests/gpu/test_demo_effects.cc
+++ b/src/tests/gpu/test_demo_effects.cc
@@ -89,7 +89,7 @@ static void test_post_process_effects() {
       {"ThemeModulationEffect",
        std::make_shared<ThemeModulationEffect>(fixture.ctx())},
       {"VignetteEffect", std::make_shared<VignetteEffect>(fixture.ctx())},
-      {"CNNEffect", std::make_shared<CNNEffect>(fixture.ctx(), 1)},
+      {"CNNEffect", std::make_shared<CNNEffect>(fixture.ctx())},
   };
 
   int passed = 0;