11 files changed, 777 insertions, 170 deletions
diff --git a/doc/CNN_EFFECT.md b/doc/CNN_EFFECT.md
index ec70b13..ae0f38a 100644
--- a/doc/CNN_EFFECT.md
+++ b/doc/CNN_EFFECT.md
@@ -10,10 +10,11 @@ Trainable convolutional neural network layers for artistic stylization (painterl
 
 **Key Features:**
 - Position-aware layer 0 (coordinate input for vignetting, edge effects)
-- Multi-layer convolutions (3×3, 5×5, 7×7 kernels)
+- Multi-layer convolutions (3×3, 5×5, 7×7 kernels) with automatic chaining
+- Original input available to all layers via framebuffer capture
+- Configurable final blend with original scene
 - Modular WGSL shader architecture
 - Hardcoded weights (trained offline via PyTorch)
-- Residual connections for stable learning
 - ~5-8 KB binary footprint
 
 ---
@@ -42,19 +43,34 @@ fn cnn_conv3x3_with_coord(
 
 **Use cases:** Position-dependent stylization (vignettes, corner darkening, radial gradients)
 
+### Multi-Layer Architecture
+
+CNNEffect supports multi-layer networks via automatic effect chaining:
+
+1. **Timeline specifies total layers**: `CNNEffect layers=3 blend=0.7`
+2. **Compiler expands to chain**: 3 separate CNNEffect instances (layer 0→1→2)
+3. **Framebuffer capture**: Layer 0 captures original input to `"captured_frame"`
+4. **Original input binding**: All layers access original via `@binding(4)`
+5. **Final blend**: Last layer blends result with original: `mix(original, result, 0.7)`
+
+**Framebuffer Capture API:**
+- `Effect::needs_framebuffer_capture()` - effect requests pre-capture
+- MainSequence automatically blits input → `"captured_frame"` auxiliary texture
+- Generic mechanism usable by any effect
+
 ### File Structure
 
 ```
 src/gpu/effects/
-  cnn_effect.h/cc         # CNNEffect class
+  cnn_effect.h/cc         # CNNEffect class + framebuffer capture
 
 workspaces/main/shaders/cnn/
   cnn_activation.wgsl     # tanh, ReLU, sigmoid, leaky_relu
   cnn_conv3x3.wgsl        # 3×3 convolution (standard + coord-aware)
   cnn_conv5x5.wgsl        # 5×5 convolution (standard + coord-aware)
   cnn_conv7x7.wgsl        # 7×7 convolution (standard + coord-aware)
-  cnn_weights_generated.wgsl  # Weight arrays (auto-generated)
-  cnn_layer.wgsl          # Main shader (composes above snippets)
+  cnn_weights_generated.wgsl  # Weight arrays (auto-generated by train_cnn.py)
+  cnn_layer.wgsl          # Main shader with layer switches (auto-generated by train_cnn.py)
 ```
 
 ---
@@ -89,7 +105,7 @@ python3 training/train_cnn.py \
   --checkpoint-every 50
 ```
 
-**Multi-layer example:**
+**Multi-layer example (3 layers with varying kernel sizes):**
 ```bash
 python3 training/train_cnn.py \
   --input training/input \
@@ -100,6 +116,10 @@ python3 training/train_cnn.py \
   --checkpoint-every 100
 ```
 
+**Note:** Training script auto-generates:
+- `cnn_weights_generated.wgsl` - weight arrays for all layers
+- `cnn_layer.wgsl` - shader with layer switches and original input binding
+
 **Resume from checkpoint:**
 ```bash
 python3 training/train_cnn.py \
@@ -108,9 +128,16 @@ python3 training/train_cnn.py \
   --resume training/checkpoints/checkpoint_epoch_200.pth
 ```
 
+**Export WGSL from checkpoint (no training):**
+```bash
+python3 training/train_cnn.py \
+  --export-only training/checkpoints/checkpoint_epoch_200.pth \
+  --output workspaces/main/shaders/cnn/cnn_weights_generated.wgsl
+```
+
 ### 3. Rebuild Demo
 
-Training script auto-generates `cnn_weights_generated.wgsl`:
+Training script auto-generates both `cnn_weights_generated.wgsl` and `cnn_layer.wgsl`:
 ```bash
 cmake --build build -j4
 ./build/demo64k
@@ -122,23 +149,101 @@ cmake --build build -j4
 
 ### C++ Integration
 
+**Single layer (manual):**
 ```cpp
 #include "gpu/effects/cnn_effect.h"
 
-auto cnn = std::make_shared<CNNEffect>(ctx, /*num_layers=*/1);
+CNNEffectParams p;
+p.layer_index = 0;
+p.total_layers = 1;
+p.blend_amount = 1.0f;
+auto cnn = std::make_shared<CNNEffect>(ctx, p);
 timeline.add_effect(cnn, start_time, end_time);
 ```
 
-### Timeline Example
+**Multi-layer (automatic via timeline compiler):**
 
+Use timeline syntax - `seq_compiler` expands to multiple instances.
+
+### Timeline Examples
+
+**Single-layer CNN (full stylization):**
+```
+SEQUENCE 10.0 0
+  EFFECT + Hybrid3DEffect 0.00 5.00
+  EFFECT + CNNEffect 0.50 5.00 layers=1
+```
+
+**Multi-layer CNN with blend:**
 ```
 SEQUENCE 10.0 0
-  EFFECT CNNEffect 10.0 15.0 0
+  EFFECT + Hybrid3DEffect 0.00 5.00
+  EFFECT + CNNEffect 0.50 5.00 layers=3 blend=0.7
+```
+
+Expands to:
+```cpp
+// Layer 0 (captures original, blend=1.0)
+{
+  CNNEffectParams p;
+  p.layer_index = 0;
+  p.total_layers = 3;
+  p.blend_amount = 1.0f;
+  seq->add_effect(std::make_shared<CNNEffect>(ctx, p), 0.50f, 5.00f, 1);
+}
+// Layer 1 (blend=1.0)
+{
+  CNNEffectParams p;
+  p.layer_index = 1;
+  p.total_layers = 3;
+  p.blend_amount = 1.0f;
+  seq->add_effect(std::make_shared<CNNEffect>(ctx, p), 0.50f, 5.00f, 2);
+}
+// Layer 2 (final blend=0.7)
+{
+  CNNEffectParams p;
+  p.layer_index = 2;
+  p.total_layers = 3;
+  p.blend_amount = 0.7f;
+  seq->add_effect(std::make_shared<CNNEffect>(ctx, p), 0.50f, 5.00f, 3);
+}
 ```
 
 ---
 
-## Weight Storage
+## Shader Structure
+
+**Bindings:**
+```wgsl
+@group(0) @binding(0) var smplr: sampler;
+@group(0) @binding(1) var txt: texture_2d<f32>;              // Current layer input
+@group(0) @binding(2) var<uniform> uniforms: CommonUniforms;
+@group(0) @binding(3) var<uniform> params: CNNLayerParams;
+@group(0) @binding(4) var original_input: texture_2d<f32>;   // Layer 0 input (captured)
+```
+
+**Fragment shader logic:**
+```wgsl
+@fragment fn fs_main(@builtin(position) p: vec4<f32>) -> @location(0) vec4<f32> {
+    let uv = p.xy / uniforms.resolution;
+    let input = textureSample(txt, smplr, uv);               // Layer N-1 output
+    let original = textureSample(original_input, smplr, uv); // Layer 0 input
+
+    var result = vec4<f32>(0.0);
+
+    if (params.layer_index == 0) {
+        result = cnn_conv3x3_with_coord(txt, smplr, uv, uniforms.resolution,
+                                        rgba_weights_layer0, coord_weights_layer0, bias_layer0);
+        result = cnn_tanh(result);
+    }
+    // ... other layers
+
+    // Blend with ORIGINAL input (not previous layer)
+    return mix(original, result, params.blend_amount);
+}
+```
+
+**Weight Storage:**
 
 **Layer 0 (coordinate-aware):**
 ```wgsl
@@ -188,15 +293,36 @@ const bias_layer1 = vec4<f32>(0.0, 0.0, 0.0, 0.0);
 
 ---
 
+## Blend Parameter Behavior
+
+**blend_amount** controls final compositing with original:
+- `blend=0.0`: Pure original (no CNN effect)
+- `blend=0.5`: 50% original + 50% CNN
+- `blend=1.0`: Pure CNN output (full stylization)
+
+**Important:** Blend uses captured layer 0 input, not previous layer output.
+
+**Example use cases:**
+- `blend=1.0`: Full stylization (default)
+- `blend=0.7`: Subtle effect preserving original details
+- `blend=0.3`: Light artistic touch
+
 ## Troubleshooting
 
 **Shader compilation fails:**
 - Check `cnn_weights_generated.wgsl` syntax
 - Verify snippets registered in `shaders.cc::InitShaderComposer()`
+- Ensure `cnn_layer.wgsl` has 5 bindings (including `original_input`)
 
 **Black/corrupted output:**
 - Weights untrained (identity placeholder)
-- Check residual blending (0.3 default)
+- Check `captured_frame` auxiliary texture is registered
+- Verify layer priorities in timeline are sequential
+
+**Wrong blend result:**
+- Ensure layer 0 has `needs_framebuffer_capture() == true`
+- Check MainSequence framebuffer capture logic
+- Verify `original_input` binding is populated
 
 **Training loss not decreasing:**
 - Lower learning rate (`--learning-rate 0.0001`)
diff --git a/src/gpu/effect.cc b/src/gpu/effect.cc
index 6a4762c..0662f26 100644
--- a/src/gpu/effect.cc
+++ b/src/gpu/effect.cc
@@ -65,7 +65,7 @@ void Sequence::update_active_list(float seq_time) {
 #if !defined(STRIP_ALL)
       Effect* effect_ptr = item.effect.get();
       const char* effect_name = typeid(*effect_ptr).name();
-      printf("  [EFFECT START] %s (priority=%d, time=%.2f-%.2f)\n", effect_name,
+      printf("  [EFFECT START] <%s> (priority=%d, time=%.2f-%.2f)\n", effect_name,
              item.priority, item.start_time, item.end_time);
 #endif
       item.effect->start();
@@ -74,7 +74,7 @@ void Sequence::update_active_list(float seq_time) {
 #if !defined(STRIP_ALL)
       Effect* effect_ptr = item.effect.get();
       const char* effect_name = typeid(*effect_ptr).name();
-      printf("  [EFFECT END] %s (priority=%d)\n", effect_name, item.priority);
+      printf("  [EFFECT END] <%s> (priority=%d)\n", effect_name, item.priority);
 #endif
       item.effect->end();
       item.active = false;
@@ -339,6 +339,39 @@ void MainSequence::render_frame(float global_time, float beat, float peak,
 
       PostProcessEffect* pp =
           (PostProcessEffect*)(post_effects[i]->effect.get());
+
+      // Capture framebuffer if effect needs it
+      if (pp->needs_framebuffer_capture()) {
+        WGPUTextureView captured_view = get_auxiliary_view("captured_frame");
+        if (captured_view) {
+          // Get source texture from current_input view
+          // Note: This is a simplified blit using a render pass
+          WGPURenderPassColorAttachment capture_attachment = {};
+          capture_attachment.view = captured_view;
+          capture_attachment.resolveTarget = nullptr;
+          capture_attachment.loadOp = WGPULoadOp_Load;
+          capture_attachment.storeOp = WGPUStoreOp_Store;
+#if !defined(DEMO_CROSS_COMPILE_WIN32)
+          capture_attachment.depthSlice = WGPU_DEPTH_SLICE_UNDEFINED;
+#endif
+          WGPURenderPassDescriptor capture_desc = {
+              .colorAttachmentCount = 1, .colorAttachments = &capture_attachment};
+          WGPURenderPassEncoder capture_pass =
+              wgpuCommandEncoderBeginRenderPass(encoder, &capture_desc);
+          wgpuRenderPassEncoderSetViewport(capture_pass, 0.0f, 0.0f,
+                                           (float)width_, (float)height_, 0.0f,
+                                           1.0f);
+
+          // Use passthrough effect to copy current_input to captured_frame
+          PostProcessEffect* passthrough =
+              (PostProcessEffect*)passthrough_effect_.get();
+          passthrough->update_bind_group(current_input);
+          passthrough->render(capture_pass, 0, 0, 0, aspect_ratio);
+
+          wgpuRenderPassEncoderEnd(capture_pass);
+        }
+      }
+
       pp->update_bind_group(current_input);
 
       WGPURenderPassColorAttachment pp_attachment = {};
diff --git a/src/gpu/effect.h b/src/gpu/effect.h
index 8f35f3c..f008c8d 100644
--- a/src/gpu/effect.h
+++ b/src/gpu/effect.h
@@ -44,6 +44,12 @@ class Effect {
     return false;
   }
 
+  // If true, MainSequence will capture current framebuffer to "captured_frame"
+  // auxiliary texture before rendering this effect
+  virtual bool needs_framebuffer_capture() const {
+    return false;
+  }
+
  protected:
   const GpuContext& ctx_;
   UniformBuffer<CommonPostProcessUniforms> uniforms_;
diff --git a/src/gpu/effects/cnn_effect.cc b/src/gpu/effects/cnn_effect.cc
index 25db0c2..f5d0a51 100644
--- a/src/gpu/effects/cnn_effect.cc
+++ b/src/gpu/effects/cnn_effect.cc
@@ -4,19 +4,101 @@
 #include "gpu/effects/cnn_effect.h"
 #include "gpu/effects/post_process_helper.h"
 #include "gpu/effects/shaders.h"
+#include "gpu/effects/shader_composer.h"
+#include "gpu/effect.h"
 
-CNNEffect::CNNEffect(const GpuContext& ctx, int num_layers)
-    : PostProcessEffect(ctx), num_layers_(num_layers), input_view_(nullptr),
+// Create custom pipeline with 5 bindings (includes original texture)
+static WGPURenderPipeline create_cnn_pipeline(WGPUDevice device,
+                                               WGPUTextureFormat format,
+                                               const char* shader_code) {
+  std::string composed_shader = ShaderComposer::Get().Compose({}, shader_code);
+
+  WGPUShaderModuleDescriptor shader_desc = {};
+  WGPUShaderSourceWGSL wgsl_src = {};
+  wgsl_src.chain.sType = WGPUSType_ShaderSourceWGSL;
+  wgsl_src.code = str_view(composed_shader.c_str());
+  shader_desc.nextInChain = &wgsl_src.chain;
+  WGPUShaderModule shader_module =
+      wgpuDeviceCreateShaderModule(device, &shader_desc);
+
+  WGPUBindGroupLayoutEntry bgl_entries[5] = {};
+  bgl_entries[0].binding = 0; // sampler
+  bgl_entries[0].visibility = WGPUShaderStage_Fragment;
+  bgl_entries[0].sampler.type = WGPUSamplerBindingType_Filtering;
+  bgl_entries[1].binding = 1; // input texture
+  bgl_entries[1].visibility = WGPUShaderStage_Fragment;
+  bgl_entries[1].texture.sampleType = WGPUTextureSampleType_Float;
+  bgl_entries[1].texture.viewDimension = WGPUTextureViewDimension_2D;
+  bgl_entries[2].binding = 2; // uniforms
+  bgl_entries[2].visibility = WGPUShaderStage_Vertex | WGPUShaderStage_Fragment;
+  bgl_entries[2].buffer.type = WGPUBufferBindingType_Uniform;
+  bgl_entries[3].binding = 3; // effect params
+  bgl_entries[3].visibility = WGPUShaderStage_Fragment;
+  bgl_entries[3].buffer.type = WGPUBufferBindingType_Uniform;
+  bgl_entries[4].binding = 4; // original texture
+  bgl_entries[4].visibility = WGPUShaderStage_Fragment;
+  bgl_entries[4].texture.sampleType = WGPUTextureSampleType_Float;
+  bgl_entries[4].texture.viewDimension = WGPUTextureViewDimension_2D;
+
+  WGPUBindGroupLayoutDescriptor bgl_desc = {};
+  bgl_desc.entryCount = 5;
+  bgl_desc.entries = bgl_entries;
+  WGPUBindGroupLayout bgl = wgpuDeviceCreateBindGroupLayout(device, &bgl_desc);
+
+  WGPUPipelineLayoutDescriptor pl_desc = {};
+  pl_desc.bindGroupLayoutCount = 1;
+  pl_desc.bindGroupLayouts = &bgl;
+  WGPUPipelineLayout pl = wgpuDeviceCreatePipelineLayout(device, &pl_desc);
+
+  WGPUColorTargetState color_target = {};
+  color_target.format = format;
+  color_target.writeMask = WGPUColorWriteMask_All;
+
+  WGPUFragmentState fragment_state = {};
+  fragment_state.module = shader_module;
+  fragment_state.entryPoint = str_view("fs_main");
+  fragment_state.targetCount = 1;
+  fragment_state.targets = &color_target;
+
+  WGPURenderPipelineDescriptor pipeline_desc = {};
+  pipeline_desc.layout = pl;
+  pipeline_desc.vertex.module = shader_module;
+  pipeline_desc.vertex.entryPoint = str_view("vs_main");
+  pipeline_desc.fragment = &fragment_state;
+  pipeline_desc.primitive.topology = WGPUPrimitiveTopology_TriangleList;
+  pipeline_desc.multisample.count = 1;
+  pipeline_desc.multisample.mask = 0xFFFFFFFF;
+
+  return wgpuDeviceCreateRenderPipeline(device, &pipeline_desc);
+}
+
+CNNEffect::CNNEffect(const GpuContext& ctx)
+    : PostProcessEffect(ctx), layer_index_(0), total_layers_(1),
+      blend_amount_(1.0f), input_view_(nullptr), original_view_(nullptr),
       bind_group_(nullptr) {
-  pipeline_ = create_post_process_pipeline(ctx_.device, ctx_.format,
-                                           cnn_layer_shader_wgsl);
+  pipeline_ = create_cnn_pipeline(ctx_.device, ctx_.format,
+                                  cnn_layer_shader_wgsl);
+}
+
+CNNEffect::CNNEffect(const GpuContext& ctx, const CNNEffectParams& params)
+    : PostProcessEffect(ctx), layer_index_(params.layer_index),
+      total_layers_(params.total_layers), blend_amount_(params.blend_amount),
+      input_view_(nullptr), original_view_(nullptr), bind_group_(nullptr) {
+  pipeline_ = create_cnn_pipeline(ctx_.device, ctx_.format,
+                                  cnn_layer_shader_wgsl);
 }
 
 void CNNEffect::init(MainSequence* demo) {
   PostProcessEffect::init(demo);
+  demo_ = demo;
   params_buffer_.init(ctx_.device);
 
-  CNNLayerParams params = {0, 1, {0.0f, 0.0f}};
+  // Register captured_frame texture (used by all layers for original input)
+  if (layer_index_ == 0) {
+    demo_->register_auxiliary_texture("captured_frame", width_, height_);
+  }
+
+  CNNLayerParams params = {layer_index_, blend_amount_, {0.0f, 0.0f}};
   params_buffer_.update(ctx_.queue, params);
 }
 
@@ -31,6 +113,40 @@ void CNNEffect::render(WGPURenderPassEncoder pass, float time, float beat,
 
 void CNNEffect::update_bind_group(WGPUTextureView input_view) {
   input_view_ = input_view;
-  pp_update_bind_group(ctx_.device, pipeline_, &bind_group_,
-                      input_view_, uniforms_.get(), params_buffer_.get());
+
+  // All layers: get captured frame (original input from layer 0)
+  if (demo_) {
+    original_view_ = demo_->get_auxiliary_view("captured_frame");
+  }
+
+  // Create bind group with original texture
+  if (bind_group_)
+    wgpuBindGroupRelease(bind_group_);
+
+  WGPUBindGroupLayout bgl = wgpuRenderPipelineGetBindGroupLayout(pipeline_, 0);
+  WGPUSamplerDescriptor sd = {};
+  sd.magFilter = WGPUFilterMode_Linear;
+  sd.minFilter = WGPUFilterMode_Linear;
+  sd.maxAnisotropy = 1;
+  WGPUSampler sampler = wgpuDeviceCreateSampler(ctx_.device, &sd);
+
+  WGPUBindGroupEntry bge[5] = {};
+  bge[0].binding = 0;
+  bge[0].sampler = sampler;
+  bge[1].binding = 1;
+  bge[1].textureView = input_view_;
+  bge[2].binding = 2;
+  bge[2].buffer = uniforms_.get().buffer;
+  bge[2].size = uniforms_.get().size;
+  bge[3].binding = 3;
+  bge[3].buffer = params_buffer_.get().buffer;
+  bge[3].size = params_buffer_.get().size;
+  bge[4].binding = 4;
+  bge[4].textureView = original_view_ ? original_view_ : input_view_; // Fallback
+
+  WGPUBindGroupDescriptor bgd = {};
+  bgd.layout = bgl;
+  bgd.entryCount = 5;
+  bgd.entries = bge;
+  bind_group_ = wgpuDeviceCreateBindGroup(ctx_.device, &bgd);
 }
diff --git a/src/gpu/effects/cnn_effect.h b/src/gpu/effects/cnn_effect.h
index 9cc4935..bc074d2 100644
--- a/src/gpu/effects/cnn_effect.h
+++ b/src/gpu/effects/cnn_effect.h
@@ -7,23 +7,39 @@
 
 struct CNNLayerParams {
   int layer_index;
-  int use_residual;
+  float blend_amount;  // Blend: mix(input, output, blend_amount)
   float _pad[2];
 };
 static_assert(sizeof(CNNLayerParams) == 16);
 
+struct CNNEffectParams {
+  int layer_index = 0;        // Which layer to render (0-based)
+  int total_layers = 1;       // Total number of layers in the CNN
+  float blend_amount = 1.0f;  // Final blend with original input
+};
+
 class CNNEffect : public PostProcessEffect {
  public:
-  explicit CNNEffect(const GpuContext& ctx, int num_layers = 1);
+  explicit CNNEffect(const GpuContext& ctx);
+  explicit CNNEffect(const GpuContext& ctx, const CNNEffectParams& params);
 
   void init(MainSequence* demo) override;
   void render(WGPURenderPassEncoder pass, float time, float beat,
               float intensity, float aspect_ratio) override;
   void update_bind_group(WGPUTextureView input_view) override;
 
+  // Layer 0 needs framebuffer capture for original input
+  bool needs_framebuffer_capture() const override {
+    return layer_index_ == 0;
+  }
+
  private:
-  int num_layers_;
+  int layer_index_;
+  int total_layers_;
+  float blend_amount_;
   WGPUTextureView input_view_;
+  WGPUTextureView original_view_;
   UniformBuffer<CNNLayerParams> params_buffer_;
   WGPUBindGroup bind_group_;
+  MainSequence* demo_ = nullptr;
 };
diff --git a/src/tests/gpu/test_demo_effects.cc b/src/tests/gpu/test_demo_effects.cc
index 9281413..619b9c9 100644
--- a/src/tests/gpu/test_demo_effects.cc
+++ b/src/tests/gpu/test_demo_effects.cc
@@ -89,7 +89,7 @@ static void test_post_process_effects() {
       {"ThemeModulationEffect",
        std::make_shared<ThemeModulationEffect>(fixture.ctx())},
       {"VignetteEffect", std::make_shared<VignetteEffect>(fixture.ctx())},
-      {"CNNEffect", std::make_shared<CNNEffect>(fixture.ctx(), 1)},
+      {"CNNEffect", std::make_shared<CNNEffect>(fixture.ctx())},
   };
 
   int passed = 0;
diff --git a/tools/seq_compiler.cc b/tools/seq_compiler.cc
index 4a671f4..fad2d88 100644
--- a/tools/seq_compiler.cc
+++ b/tools/seq_compiler.cc
@@ -995,6 +995,38 @@ int main(int argc, char* argv[]) {
                    << eff.class_name << ">(ctx, p), " << eff.start << "f, "
                    << eff.end << "f, " << eff.priority << ");\n";
           out_file << "    }\n";
+        } else if (!eff.params.empty() && eff.class_name == "CNNEffect") {
+          // Generate parameter struct initialization for CNNEffect
+          // If layers>1, expand into multiple chained effect instances
+          int num_layers = 1;
+          float blend_amount = 1.0f;
+
+          for (const auto& [key, value] : eff.params) {
+            if (key == "layers") {
+              num_layers = std::stoi(value);
+            } else if (key == "blend") {
+              blend_amount = std::stof(value);
+            }
+          }
+
+          // Generate one effect per layer
+          for (int layer = 0; layer < num_layers; ++layer) {
+            out_file << "    {\n";
+            out_file << "      CNNEffectParams p;\n";
+            out_file << "      p.layer_index = " << layer << ";\n";
+            out_file << "      p.total_layers = " << num_layers << ";\n";
+            // Only apply blend_amount on the last layer
+            if (layer == num_layers - 1) {
+              out_file << "      p.blend_amount = " << blend_amount << "f;\n";
+            } else {
+              out_file << "      p.blend_amount = 1.0f;\n";
+            }
+            out_file << "      seq->add_effect(std::make_shared<"
+                     << eff.class_name << ">(ctx, p), " << eff.start << "f, "
+                     << eff.end << "f, " << (std::stoi(eff.priority) + layer)
+                     << ");\n";
+            out_file << "    }\n";
+          }
         } else {
           // No parameters or unsupported effect - use default constructor
           out_file << "    seq->add_effect(std::make_shared<" << eff.class_name
diff --git a/training/train_cnn.py b/training/train_cnn.py
index 82f0b48..1cd6579 100755
--- a/training/train_cnn.py
+++ b/training/train_cnn.py
@@ -112,8 +112,6 @@ class SimpleCNN(nn.Module):
             else:
                 self.layers.append(nn.Conv2d(3, 3, kernel_size=kernel_size, padding=padding, bias=True))
 
-        self.use_residual = True
-
     def forward(self, x):
         B, C, H, W = x.shape
         y_coords = torch.linspace(0, 1, H, device=x.device).view(1,1,H,1).expand(B,1,H,W)
@@ -128,11 +126,77 @@ class SimpleCNN(nn.Module):
             if i < len(self.layers) - 1:
                 out = torch.tanh(out)
 
-        if self.use_residual:
-            out = x + out * 0.3
         return out
 
 
+def generate_layer_shader(output_path, num_layers, kernel_sizes):
+    """Generate cnn_layer.wgsl with proper layer switches"""
+
+    with open(output_path, 'w') as f:
+        f.write("// CNN layer shader - uses modular convolution snippets\n")
+        f.write("// Supports multi-pass rendering with residual connections\n")
+        f.write("// DO NOT EDIT - Generated by train_cnn.py\n\n")
+        f.write("@group(0) @binding(0) var smplr: sampler;\n")
+        f.write("@group(0) @binding(1) var txt: texture_2d<f32>;\n\n")
+        f.write("#include \"common_uniforms\"\n")
+        f.write("#include \"cnn_activation\"\n")
+
+        # Include necessary conv functions
+        conv_sizes = set(kernel_sizes)
+        for ks in sorted(conv_sizes):
+            f.write(f"#include \"cnn_conv{ks}x{ks}\"\n")
+        f.write("#include \"cnn_weights_generated\"\n\n")
+
+        f.write("struct CNNLayerParams {\n")
+        f.write("    layer_index: i32,\n")
+        f.write("    blend_amount: f32,\n")
+        f.write("    _pad: vec2<f32>,\n")
+        f.write("};\n\n")
+        f.write("@group(0) @binding(2) var<uniform> uniforms: CommonUniforms;\n")
+        f.write("@group(0) @binding(3) var<uniform> params: CNNLayerParams;\n")
+        f.write("@group(0) @binding(4) var original_input: texture_2d<f32>;\n\n")
+        f.write("@vertex fn vs_main(@builtin(vertex_index) i: u32) -> @builtin(position) vec4<f32> {\n")
+        f.write("    var pos = array<vec2<f32>, 3>(\n")
+        f.write("        vec2<f32>(-1.0, -1.0), vec2<f32>(3.0, -1.0), vec2<f32>(-1.0, 3.0)\n")
+        f.write("    );\n")
+        f.write("    return vec4<f32>(pos[i], 0.0, 1.0);\n")
+        f.write("}\n\n")
+        f.write("@fragment fn fs_main(@builtin(position) p: vec4<f32>) -> @location(0) vec4<f32> {\n")
+        f.write("    let uv = p.xy / uniforms.resolution;\n")
+        f.write("    let input = textureSample(txt, smplr, uv);\n")
+        f.write("    let original = textureSample(original_input, smplr, uv);\n")
+        f.write("    var result = vec4<f32>(0.0);\n\n")
+
+        # Generate layer switches
+        for layer_idx in range(num_layers):
+            ks = kernel_sizes[layer_idx]
+            if layer_idx == 0:
+                f.write(f"    // Layer 0 uses coordinate-aware convolution\n")
+                f.write(f"    if (params.layer_index == {layer_idx}) {{\n")
+                f.write(f"        result = cnn_conv{ks}x{ks}_with_coord(txt, smplr, uv, uniforms.resolution,\n")
+                f.write(f"                                        rgba_weights_layer{layer_idx}, coord_weights_layer{layer_idx}, bias_layer{layer_idx});\n")
+                f.write(f"        result = cnn_tanh(result);\n")
+                f.write(f"    }}\n")
+            else:
+                is_last = layer_idx == num_layers - 1
+                f.write(f"    {'else ' if layer_idx > 0 else ''}if (params.layer_index == {layer_idx}) {{\n")
+                f.write(f"        result = cnn_conv{ks}x{ks}(txt, smplr, uv, uniforms.resolution,\n")
+                f.write(f"                                   weights_layer{layer_idx}, bias_layer{layer_idx});\n")
+                if not is_last:
+                    f.write(f"        result = cnn_tanh(result);\n")
+                f.write(f"    }}\n")
+
+        # Add else clause for invalid layer index
+        if num_layers > 1:
+            f.write(f"    else {{\n")
+            f.write(f"        result = input;\n")
+            f.write(f"    }}\n")
+
+        f.write("\n    // Blend with ORIGINAL input from layer 0\n")
+        f.write("    return mix(original, result, params.blend_amount);\n")
+        f.write("}\n")
+
+
 def export_weights_to_wgsl(model, output_path, kernel_sizes):
     """Export trained weights to WGSL format"""
 
@@ -154,10 +218,13 @@ def export_weights_to_wgsl(model, output_path, kernel_sizes):
                     row = pos // kw
                     col = pos % kw
                     f.write("  mat4x4<f32>(\n")
-                    for out_c in range(min(4, out_ch)):
+                    for out_c in range(4):
                         vals = []
-                        for in_c in range(min(4, in_ch)):
-                            vals.append(f"{weights[out_c, in_c, row, col]:.6f}")
+                        for in_c in range(4):
+                            if out_c < out_ch and in_c < in_ch:
+                                vals.append(f"{weights[out_c, in_c, row, col]:.6f}")
+                            else:
+                                vals.append("0.0")
                         f.write(f"    {', '.join(vals)},\n")
                     f.write("  )")
                     if pos < num_positions - 1:
@@ -170,7 +237,12 @@ def export_weights_to_wgsl(model, output_path, kernel_sizes):
                 coord_w = layer.coord_weights.data.cpu().numpy()
                 f.write(f"const coord_weights_layer{layer_idx} = mat2x4<f32>(\n")
                 for c in range(2):
-                    vals = [f"{coord_w[out_c, c]:.6f}" for out_c in range(min(4, coord_w.shape[0]))]
+                    vals = []
+                    for out_c in range(4):
+                        if out_c < coord_w.shape[0]:
+                            vals.append(f"{coord_w[out_c, c]:.6f}")
+                        else:
+                            vals.append("0.0")
                     f.write(f"  {', '.join(vals)}")
                     if c < 1:
                         f.write(",\n")
@@ -180,8 +252,9 @@ def export_weights_to_wgsl(model, output_path, kernel_sizes):
 
                 # Export bias
                 bias = layer.bias.data.cpu().numpy()
+                bias_vals = [f"{bias[i]:.6f}" if i < len(bias) else "0.0" for i in range(4)]
                 f.write(f"const bias_layer{layer_idx} = vec4<f32>(")
-                f.write(", ".join([f"{b:.6f}" for b in bias[:4]]))
+                f.write(", ".join(bias_vals))
                 f.write(");\n\n")
 
                 layer_idx += 1
@@ -197,10 +270,13 @@ def export_weights_to_wgsl(model, output_path, kernel_sizes):
                     row = pos // kw
                     col = pos % kw
                     f.write("  mat4x4<f32>(\n")
-                    for out_c in range(min(4, out_ch)):
+                    for out_c in range(4):
                         vals = []
-                        for in_c in range(min(4, in_ch)):
-                            vals.append(f"{weights[out_c, in_c, row, col]:.6f}")
+                        for in_c in range(4):
+                            if out_c < out_ch and in_c < in_ch:
+                                vals.append(f"{weights[out_c, in_c, row, col]:.6f}")
+                            else:
+                                vals.append("0.0")
                         f.write(f"    {', '.join(vals)},\n")
                     f.write("  )")
                     if pos < num_positions - 1:
@@ -211,8 +287,9 @@ def export_weights_to_wgsl(model, output_path, kernel_sizes):
 
                 # Export bias
                 bias = layer.bias.data.cpu().numpy()
+                bias_vals = [f"{bias[i]:.6f}" if i < len(bias) else "0.0" for i in range(4)]
                 f.write(f"const bias_layer{layer_idx} = vec4<f32>(")
-                f.write(", ".join([f"{b:.6f}" for b in bias[:4]]))
+                f.write(", ".join(bias_vals))
                 f.write(");\n\n")
 
                 layer_idx += 1
@@ -293,19 +370,57 @@ def train(args):
             }, checkpoint_path)
             print(f"Saved checkpoint to {checkpoint_path}")
 
-    # Export weights
+    # Export weights and shader
     output_path = args.output or 'workspaces/main/shaders/cnn/cnn_weights_generated.wgsl'
     print(f"\nExporting weights to {output_path}...")
     os.makedirs(os.path.dirname(output_path), exist_ok=True)
     export_weights_to_wgsl(model, output_path, kernel_sizes)
 
+    # Generate layer shader
+    shader_dir = os.path.dirname(output_path)
+    shader_path = os.path.join(shader_dir, 'cnn_layer.wgsl')
+    print(f"Generating layer shader to {shader_path}...")
+    generate_layer_shader(shader_path, args.layers, kernel_sizes)
+
     print("Training complete!")
 
 
+def export_from_checkpoint(checkpoint_path, output_path=None):
+    """Export WGSL files from checkpoint without training"""
+
+    if not os.path.exists(checkpoint_path):
+        print(f"Error: Checkpoint file '{checkpoint_path}' not found")
+        sys.exit(1)
+
+    print(f"Loading checkpoint from {checkpoint_path}...")
+    checkpoint = torch.load(checkpoint_path, map_location='cpu')
+
+    kernel_sizes = checkpoint['kernel_sizes']
+    num_layers = checkpoint['num_layers']
+
+    # Recreate model
+    model = SimpleCNN(num_layers=num_layers, kernel_sizes=kernel_sizes)
+    model.load_state_dict(checkpoint['model_state'])
+
+    # Export weights
+    output_path = output_path or 'workspaces/main/shaders/cnn/cnn_weights_generated.wgsl'
+    print(f"Exporting weights to {output_path}...")
+    os.makedirs(os.path.dirname(output_path), exist_ok=True)
+    export_weights_to_wgsl(model, output_path, kernel_sizes)
+
+    # Generate layer shader
+    shader_dir = os.path.dirname(output_path)
+    shader_path = os.path.join(shader_dir, 'cnn_layer.wgsl')
+    print(f"Generating layer shader to {shader_path}...")
+    generate_layer_shader(shader_path, num_layers, kernel_sizes)
+
+    print("Export complete!")
+
+
 def main():
     parser = argparse.ArgumentParser(description='Train CNN for image-to-image transformation')
-    parser.add_argument('--input', required=True, help='Input image directory')
-    parser.add_argument('--target', required=True, help='Target image directory')
+    parser.add_argument('--input', help='Input image directory')
+    parser.add_argument('--target', help='Target image directory')
     parser.add_argument('--layers', type=int, default=1, help='Number of CNN layers (default: 1)')
     parser.add_argument('--kernel_sizes', default='3', help='Comma-separated kernel sizes (default: 3)')
     parser.add_argument('--epochs', type=int, default=100, help='Number of training epochs (default: 100)')
@@ -315,10 +430,20 @@ def main():
     parser.add_argument('--checkpoint-every', type=int, default=0, help='Save checkpoint every N epochs (default: 0 = disabled)')
     parser.add_argument('--checkpoint-dir', help='Checkpoint directory (default: training/checkpoints)')
     parser.add_argument('--resume', help='Resume from checkpoint file')
+    parser.add_argument('--export-only', help='Export WGSL from checkpoint without training')
 
     args = parser.parse_args()
 
-    # Validate directories
+    # Export-only mode
+    if args.export_only:
+        export_from_checkpoint(args.export_only, args.output)
+        return
+
+    # Validate directories for training
+    if not args.input or not args.target:
+        print("Error: --input and --target required for training (or use --export-only)")
+        sys.exit(1)
+
     if not os.path.isdir(args.input):
         print(f"Error: Input directory '{args.input}' does not exist")
         sys.exit(1)
diff --git a/workspaces/main/shaders/cnn/cnn_layer.wgsl b/workspaces/main/shaders/cnn/cnn_layer.wgsl
index b2bab26..2285ef9 100644
--- a/workspaces/main/shaders/cnn/cnn_layer.wgsl
+++ b/workspaces/main/shaders/cnn/cnn_layer.wgsl
@@ -1,5 +1,6 @@
 // CNN layer shader - uses modular convolution snippets
 // Supports multi-pass rendering with residual connections
+// DO NOT EDIT - Generated by train_cnn.py
 
 @group(0) @binding(0) var smplr: sampler;
 @group(0) @binding(1) var txt: texture_2d<f32>;
@@ -11,12 +12,13 @@
 
 struct CNNLayerParams {
     layer_index: i32,
-    use_residual: i32,
+    blend_amount: f32,
     _pad: vec2<f32>,
 };
 
 @group(0) @binding(2) var<uniform> uniforms: CommonUniforms;
 @group(0) @binding(3) var<uniform> params: CNNLayerParams;
+@group(0) @binding(4) var original_input: texture_2d<f32>;
 
 @vertex fn vs_main(@builtin(vertex_index) i: u32) -> @builtin(position) vec4<f32> {
     var pos = array<vec2<f32>, 3>(
@@ -27,6 +29,8 @@ struct CNNLayerParams {
 
 @fragment fn fs_main(@builtin(position) p: vec4<f32>) -> @location(0) vec4<f32> {
     let uv = p.xy / uniforms.resolution;
+    let input = textureSample(txt, smplr, uv);
+    let original = textureSample(original_input, smplr, uv);
     var result = vec4<f32>(0.0);
 
     // Layer 0 uses coordinate-aware convolution
@@ -35,12 +39,19 @@ struct CNNLayerParams {
                                         rgba_weights_layer0, coord_weights_layer0, bias_layer0);
         result = cnn_tanh(result);
     }
-
-    // Residual connection
-    if (params.use_residual != 0) {
-        let input = textureSample(txt, smplr, uv);
-        result = input + result * 0.3;
+    else if (params.layer_index == 1) {
+        result = cnn_conv3x3(txt, smplr, uv, uniforms.resolution,
+                                   weights_layer1, bias_layer1);
+        result = cnn_tanh(result);
+    }
+    else if (params.layer_index == 2) {
+        result = cnn_conv3x3(txt, smplr, uv, uniforms.resolution,
+                                   weights_layer2, bias_layer2);
+    }
+    else {
+        result = input;
     }
 
-    return result;
+    // Blend with ORIGINAL input from layer 0
+    return mix(original, result, params.blend_amount);
 }
diff --git a/workspaces/main/shaders/cnn/cnn_weights_generated.wgsl b/workspaces/main/shaders/cnn/cnn_weights_generated.wgsl
index e0a7dc4..6052ac5 100644
--- a/workspaces/main/shaders/cnn/cnn_weights_generated.wgsl
+++ b/workspaces/main/shaders/cnn/cnn_weights_generated.wgsl
@@ -1,23 +1,185 @@
-// Generated CNN weights and biases
-// DO NOT EDIT MANUALLY - regenerate with scripts/train_cnn.py
+// Auto-generated CNN weights
+// DO NOT EDIT - Generated by train_cnn.py
 
-// Placeholder identity-like weights for initial testing
-// Layer 0: 3x3 convolution with coordinate awareness
 const rgba_weights_layer0: array<mat4x4<f32>, 9> = array(
-  mat4x4<f32>(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0),
-  mat4x4<f32>(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0),
-  mat4x4<f32>(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0),
-  mat4x4<f32>(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0),
-  mat4x4<f32>(1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0),
-  mat4x4<f32>(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0),
-  mat4x4<f32>(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0),
-  mat4x4<f32>(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0),
-  mat4x4<f32>(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
+  mat4x4<f32>(
+    -0.181929, -0.244329, -0.354404, 0.0,
+    -0.291597, -0.195653, 0.081896, 0.0,
+    0.081595, 0.164081, -0.236318, 0.0,
+    0.0, 0.0, 0.0, 0.0,
+  ),
+  mat4x4<f32>(
+    0.731888, 0.717648, 0.524081, 0.0,
+    -0.029760, -0.208000, 0.008438, 0.0,
+    0.442082, 0.354681, 0.049288, 0.0,
+    0.0, 0.0, 0.0, 0.0,
+  ),
+  mat4x4<f32>(
+    -0.623141, -0.695759, -0.087885, 0.0,
+    0.043135, 0.071979, 0.213065, 0.0,
+    0.011581, 0.110995, 0.034100, 0.0,
+    0.0, 0.0, 0.0, 0.0,
+  ),
+  mat4x4<f32>(
+    0.170016, 0.188298, 0.134083, 0.0,
+    -0.222954, -0.088011, 0.015668, 0.0,
+    0.921836, 0.437158, 0.061577, 0.0,
+    0.0, 0.0, 0.0, 0.0,
+  ),
+  mat4x4<f32>(
+    1.431940, 1.148113, 1.238067, 0.0,
+    -0.212535, 0.366860, 0.320956, 0.0,
+    0.771192, 0.765570, 0.029189, 0.0,
+    0.0, 0.0, 0.0, 0.0,
+  ),
+  mat4x4<f32>(
+    0.171088, 0.000155, 0.212552, 0.0,
+    0.029536, 0.447892, 0.041381, 0.0,
+    0.011807, -0.167281, -0.200702, 0.0,
+    0.0, 0.0, 0.0, 0.0,
+  ),
+  mat4x4<f32>(
+    -0.668151, -0.813927, -0.132108, 0.0,
+    -0.156250, 0.179112, -0.069585, 0.0,
+    0.403347, 0.482877, 0.182611, 0.0,
+    0.0, 0.0, 0.0, 0.0,
+  ),
+  mat4x4<f32>(
+    -0.609871, -0.768480, -0.590538, 0.0,
+    -0.171854, 0.150167, 0.105694, 0.0,
+    -0.059052, 0.066999, -0.244222, 0.0,
+    0.0, 0.0, 0.0, 0.0,
+  ),
+  mat4x4<f32>(
+    -0.112983, -0.066299, 0.117696, 0.0,
+    -0.172541, 0.095008, -0.160754, 0.0,
+    -0.369667, -0.000628, 0.163602, 0.0,
+    0.0, 0.0, 0.0, 0.0,
+  )
 );
 
 const coord_weights_layer0 = mat2x4<f32>(
-  0.0, 0.0, 0.0, 0.0,
-  0.0, 0.0, 0.0, 0.0
+  0.059076, -0.026617, -0.005155, 0.0,
+  0.135407, -0.090329, 0.058216, 0.0
 );
 
-const bias_layer0 = vec4<f32>(0.0, 0.0, 0.0, 0.0);
+const bias_layer0 = vec4<f32>(-0.526177, -0.569862, -1.370040, 0.0);
+
+const weights_layer1: array<mat4x4<f32>, 9> = array(
+  mat4x4<f32>(
+    0.180029, -1.107249, 0.570741, 0.0,
+    -0.098536, 0.079545, -0.083257, 0.0,
+    -0.020066, 0.333084, 0.039506, 0.0,
+    0.0, 0.0, 0.0, 0.0,
+  ),
+  mat4x4<f32>(
+    3.068946, -1.783570, -0.550517, 0.0,
+    -0.296369, -0.080958, 0.040260, 0.0,
+    -0.093713, -0.212577, -0.110011, 0.0,
+    0.0, 0.0, 0.0, 0.0,
+  ),
+  mat4x4<f32>(
+    2.282564, -0.538192, -0.793214, 0.0,
+    -0.395788, 0.130881, 0.078571, 0.0,
+    -0.041375, 0.061666, 0.045651, 0.0,
+    0.0, 0.0, 0.0, 0.0,
+  ),
+  mat4x4<f32>(
+    -0.267284, -1.971639, -0.099616, 0.0,
+    -0.084432, 0.139794, 0.007091, 0.0,
+    -0.103042, -0.104340, 0.067299, 0.0,
+    0.0, 0.0, 0.0, 0.0,
+  ),
+  mat4x4<f32>(
+    -5.233469, -2.252747, -3.555217, 0.0,
+    0.647940, -0.178858, 0.351633, 0.0,
+    -0.014237, -0.505881, 0.165940, 0.0,
+    0.0, 0.0, 0.0, 0.0,
+  ),
+  mat4x4<f32>(
+    -0.121700, -0.677386, -2.435040, 0.0,
+    0.084806, -0.028000, 0.380387, 0.0,
+    -0.020906, -0.279161, 0.041915, 0.0,
+    0.0, 0.0, 0.0, 0.0,
+  ),
+  mat4x4<f32>(
+    2.982562, -0.298441, -0.147775, 0.0,
+    -0.291832, 0.102875, -0.128590, 0.0,
+    -0.091786, 0.104389, -0.188678, 0.0,
+    0.0, 0.0, 0.0, 0.0,
+  ),
+  mat4x4<f32>(
+    -4.434978, -0.261830, -2.436411, 0.0,
+    0.349188, -0.245908, 0.272592, 0.0,
+    0.010322, -0.148525, -0.031531, 0.0,
+    0.0, 0.0, 0.0, 0.0,
+  ),
+  mat4x4<f32>(
+    0.129886, 1.516168, -0.755576, 0.0,
+    0.133138, -0.260276, 0.028059, 0.0,
+    0.001185, 0.141547, -0.003606, 0.0,
+    0.0, 0.0, 0.0, 0.0,
+  )
+);
+
+const bias_layer1 = vec4<f32>(1.367986, -1.148709, -0.650040, 0.0);
+
+const weights_layer2: array<mat4x4<f32>, 9> = array(
+  mat4x4<f32>(
+    -0.137003, -0.289376, 0.625000, 0.0,
+    -0.120120, -0.238968, 0.448432, 0.0,
+    -0.142094, -0.253706, 0.458181, 0.0,
+    0.0, 0.0, 0.0, 0.0,
+  ),
+  mat4x4<f32>(
+    -0.337017, -0.757585, 0.135953, 0.0,
+    -0.304432, -0.553491, 0.419907, 0.0,
+    -0.313585, -0.467667, 0.615326, 0.0,
+    0.0, 0.0, 0.0, 0.0,
+  ),
+  mat4x4<f32>(
+    -0.161089, -0.328735, 0.612679, 0.0,
+    -0.137144, -0.172882, 0.176362, 0.0,
+    -0.153195, -0.061571, 0.173977, 0.0,
+    0.0, 0.0, 0.0, 0.0,
+  ),
+  mat4x4<f32>(
+    -0.227814, -0.544193, -0.564658, 0.0,
+    -0.211743, -0.430586, 0.080349, 0.0,
+    -0.214442, -0.417501, 0.880266, 0.0,
+    0.0, 0.0, 0.0, 0.0,
+  ),
+  mat4x4<f32>(
+    -0.435370, -0.295169, -0.865976, 0.0,
+    -0.423147, -0.274780, 0.323049, 0.0,
+    -0.411180, -0.062517, 1.099769, 0.0,
+    0.0, 0.0, 0.0, 0.0,
+  ),
+  mat4x4<f32>(
+    -0.199573, -0.488030, -0.396440, 0.0,
+    -0.187844, -0.360516, -0.156646, 0.0,
+    -0.188681, -0.292304, -0.134645, 0.0,
+    0.0, 0.0, 0.0, 0.0,
+  ),
+  mat4x4<f32>(
+    -0.123218, -0.287990, 0.154656, 0.0,
+    -0.112954, -0.282778, 0.498742, 0.0,
+    -0.139083, -0.319337, 1.112621, 0.0,
+    0.0, 0.0, 0.0, 0.0,
+  ),
+  mat4x4<f32>(
+    -0.267477, -0.691374, -0.028960, 0.0,
+    -0.246348, -0.585583, 0.401194, 0.0,
+    -0.253279, -0.562875, 1.105818, 0.0,
+    0.0, 0.0, 0.0, 0.0,
+  ),
+  mat4x4<f32>(
+    -0.083133, -0.131627, 0.460039, 0.0,
+    -0.071126, -0.108601, 0.163545, 0.0,
+    -0.092579, -0.110020, 0.131282, 0.0,
+    0.0, 0.0, 0.0, 0.0,
+  )
+);
+
+const bias_layer2 = vec4<f32>(-1.805686, -0.798340, 0.462318, 0.0);
+
diff --git a/workspaces/main/timeline.seq b/workspaces/main/timeline.seq
index 3670825..5947ff8 100644
--- a/workspaces/main/timeline.seq
+++ b/workspaces/main/timeline.seq
@@ -1,121 +1,101 @@
-# WORKSPACE: main
-# ============================================================================
-# DEMO SEQUENCE DEFINITION
-# ============================================================================
-# Defines the timeline and layering of visual effects for the demo.
-# Compiled by seq_compiler into src/generated/timeline.cc at build time.
-#
-# DOCUMENTATION: See doc/SEQUENCE.md for complete syntax reference
-#
-# QUICK REFERENCE:
-#   SEQUENCE <start> <priority> ["optional_name"] [optional_end]
-#     EFFECT <+|=|-> <ClassName> <start> <end>
-#
-#   Priority modifiers: + (increment), = (same), - (decrement/background)
-#   Time notation: 0b (beats), 0.0 (seconds)
-#   Optional name: Displayed in Gantt charts (e.g., "Opening Scene")
-#
-# VALIDATION & VISUALIZATION:
-#   ./build/seq_compiler assets/demo.seq                    # Validate only
-#   ./build/seq_compiler assets/demo.seq --gantt-html=t.html  # HTML Gantt
-#
-# ============================================================================
-
+# Demo Timeline
+# Generated by Timeline Editor
 # BPM 120
 
-SEQUENCE 0b 0
-  EFFECT - FlashCubeEffect .2 3       # Background cube (priority -1 = behind everything)
-  EFFECT + FlashEffect 0.0 1. color=1.0,0.5,0.5 decay=0.95  # Red-tinted flash
-  EFFECT + FadeEffect 0.1 1.          # Priority 1
-  EFFECT + SolarizeEffect 0 4b        # Priority 2 (was 3, now contiguous)
-  EFFECT + VignetteEffect 0 6 radius=0.6 softness=0.1
+SEQUENCE 0.00 0
+  EFFECT - FlashCubeEffect 0.00 2.44
+  EFFECT + FlashEffect 0.00 1.00 color=1.0,0.5,0.5 decay=0.95
+  EFFECT + FadeEffect 0.10 1.00
+  EFFECT + SolarizeEffect 0.00 2.00
+  EFFECT + VignetteEffect 0.00 2.50 radius=0.6 softness=0.1
+
+SEQUENCE 2.50 0 "rotating cube"
+  EFFECT + CircleMaskEffect 0.00 4.00 0.50
+  EFFECT + RotatingCubeEffect 0.00 4.00
+  EFFECT + GaussianBlurEffect 1.00 2.00 strength=1.0
+  EFFECT + GaussianBlurEffect 3.00 4.00 strength=2.0
+
+SEQUENCE 5.93 0
+  EFFECT - FlashCubeEffect 0.11 1.45
+  EFFECT + FlashEffect 0.00 0.20
 
-SEQUENCE 2.0 0
-  EFFECT + CircleMaskEffect 0.0 4.0 0.50        # Priority 0 mask generator
-  EFFECT + RotatingCubeEffect 0.0 4.0           # Priority 1 (renders inside circle)
-  EFFECT + GaussianBlurEffect 1.0 2.0 strength=1.0
-  EFFECT + GaussianBlurEffect 3.0 4.0 strength=2.0
+SEQUENCE 6.90 1 "spray"
+  EFFECT + ParticleSprayEffect 0.00 2.00
+  EFFECT + ParticlesEffect 0.00 3.00
+  EFFECT = GaussianBlurEffect 0.00 2.00 strength=3.0
 
-SEQUENCE 4b 0
-  EFFECT - FlashCubeEffect 0.1 3.     # Priority -1
-  EFFECT + FlashEffect 0.0 0.2        # Priority 0 (was 4, now contiguous)
+SEQUENCE 8.50 2
+  EFFECT + ThemeModulationEffect 0.00 2.00
+  EFFECT + HeptagonEffect 0.20 2.00
+  EFFECT + ParticleSprayEffect 0.00 2.00
+  EFFECT = ParticlesEffect 0.00 2.00
+  EFFECT + Hybrid3DEffect 0.00 2.00
+  EFFECT + GaussianBlurEffect 0.00 2.00
+  EFFECT + ChromaAberrationEffect 0.00 1.50 offset=0.01 angle=1.57
 
-SEQUENCE 6b 1
-  EFFECT + ParticleSprayEffect 0 4    # Priority 0 (spray particles)
-  EFFECT + ParticlesEffect 0 4        # Priority 1
-  EFFECT = GaussianBlurEffect 0 8 strength=3.0     # Priority 1 (stronger blur)
+SEQUENCE 10.50 0
+  EFFECT + HeptagonEffect 0.00 0.20
+  EFFECT + FadeEffect 0.10 1.00
 
-SEQUENCE 7b 0
-  EFFECT + HeptagonEffect 0.0 .2      # Priority 0
-  EFFECT + FadeEffect 0.1 1.0         # Priority 1 (was 5, now contiguous)
+SEQUENCE 10.50 0
+  EFFECT + Hybrid3DEffect 0.00 5.59
+  EFFECT + CNNEffect 0.54 4.97 layers=3 blend=0.2
 
-# Post-processing chain (priority 10 = applied after scene rendering)
-# Effects are applied in priority order: lower numbers first
-SEQUENCE 8b 3
-  EFFECT + ThemeModulationEffect 0 4     # Priority 0
-  EFFECT = HeptagonEffect 0.0 4.0        # Priority 0 (same layer)
-  EFFECT + GaussianBlurEffect 0 8 strength=1.5        # Priority 1 (subtle blur)
-  EFFECT + ChromaAberrationEffect 0 6 offset=0.03 angle=0.785    # Priority 2 (diagonal, stronger)
-  EFFECT + SolarizeEffect 0 10           # Priority 3
+SEQUENCE 16.14 3
+  EFFECT + ThemeModulationEffect 0.00 4.00
+  EFFECT = HeptagonEffect 0.00 4.00
+  EFFECT + GaussianBlurEffect 0.00 5.00 strength=1.5
+  EFFECT + ChromaAberrationEffect 0.00 5.00 offset=0.03 angle=0.785
+  EFFECT + SolarizeEffect 0.00 5.00
 
-SEQUENCE 12b 2
-  EFFECT - FlashCubeEffect .2 3          # Priority -1 (background)
-  EFFECT + HeptagonEffect 0 4            # Priority 0
-  EFFECT + ParticleSprayEffect 0 4       # Priority 1 (spray particles)
-  EFFECT + ParticlesEffect 0 4           # Priority 2
+SEQUENCE 21.00 2
+  EFFECT - FlashCubeEffect 0.20 1.50
+  EFFECT + HeptagonEffect 0.00 2.00
+  EFFECT + ParticleSprayEffect 0.00 2.00
+  EFFECT + ParticlesEffect 0.00 2.00
 
-SEQUENCE 15b 2
-  EFFECT - FlashCubeEffect .2 3          # Priority -1 (background)
-  EFFECT + FlashEffect 0.0 1             # Priority 0
+SEQUENCE 22.75 2
+  EFFECT - FlashCubeEffect 0.20 1.50
+  EFFECT + FlashEffect 0.00 1.00
 
-SEQUENCE 16b 10
-  EFFECT - FlashCubeEffect .2 3          # Priority -1 (background)
-  EFFECT + GaussianBlurEffect 0 8        # Priority 0
-  EFFECT + FlashEffect 0.0 0.2           # Priority 1
-  EFFECT = FlashEffect 1b 0.2            # Priority 1 (same layer)
+SEQUENCE 23.88 10
+  EFFECT - FlashCubeEffect 0.20 1.50
+  EFFECT + GaussianBlurEffect 0.00 2.00
+  EFFECT + FlashEffect 0.00 0.20
+  EFFECT = FlashEffect 0.50 0.20
 
-SEQUENCE 17b 2
-  EFFECT + ThemeModulationEffect 0 4     # Priority 0
-  EFFECT + HeptagonEffect 0.2 2.0        # Priority 1
-  EFFECT + ParticleSprayEffect 0 4       # Priority 2 (spray particles)
-  EFFECT = ParticlesEffect 0 4           # Priority 2 (same layer)
-  EFFECT + Hybrid3DEffect 0 4            # Priority 3
-  EFFECT + GaussianBlurEffect 0 8        # Priority 4
-  EFFECT + ChromaAberrationEffect 0 6 offset=0.01 angle=1.57    # Priority 5 (vertical, subtle)
+SEQUENCE 25.59 1
+  EFFECT + ThemeModulationEffect 0.00 8.00
+  EFFECT + HeptagonEffect 0.20 2.00
+  EFFECT + ParticleSprayEffect 0.00 8.00
+  EFFECT + Hybrid3DEffect 0.00 10.00
+  EFFECT + GaussianBlurEffect 0.00 8.00
+  EFFECT + ChromaAberrationEffect 0.00 10.00
+  EFFECT + SolarizeEffect 0.00 10.00
 
-SEQUENCE 24b 1
-  EFFECT + ThemeModulationEffect 0 8     # Priority 0
-  EFFECT + HeptagonEffect 0.2 2.0        # Priority 1
-  EFFECT + ParticleSprayEffect 0 8       # Priority 2 (spray particles - longer duration)
-  EFFECT + Hybrid3DEffect 0 20           # Priority 3
-  EFFECT + GaussianBlurEffect 0 8        # Priority 4
-  EFFECT + ChromaAberrationEffect 0 10   # Priority 5
-  EFFECT + SolarizeEffect 0 10           # Priority 6
+SEQUENCE 35.31 0
+  EFFECT + ThemeModulationEffect 0.00 4.00
+  EFFECT + HeptagonEffect 0.20 2.00
+  EFFECT + GaussianBlurEffect 0.00 8.00
+  EFFECT + SolarizeEffect 0.00 2.00
 
-SEQUENCE 32b 0
-  EFFECT + ThemeModulationEffect 0 4     # Priority 0
-  EFFECT + HeptagonEffect 0 16           # Priority 1
-  EFFECT + ChromaAberrationEffect 0 16   # Priority 2
-  EFFECT + GaussianBlurEffect 0 8        # Priority 3
+SEQUENCE 42.29 0
+  EFFECT + ThemeModulationEffect 0.00 6.00
+  EFFECT = HeptagonEffect 0.20 2.00
+  EFFECT + Hybrid3DEffect 0.00 4.00
+  EFFECT + ParticleSprayEffect 0.00 5.50
+  EFFECT + HeptagonEffect 0.00 8.00
+  EFFECT + ChromaAberrationEffect 0.00 7.50
+  EFFECT + GaussianBlurEffect 0.00 8.00
 
-SEQUENCE 48b 0
-  EFFECT + ThemeModulationEffect 0 4     # Priority 0
-  EFFECT + HeptagonEffect 0.2 2.0        # Priority 1
-  EFFECT + GaussianBlurEffect 0 8        # Priority 2
-  EFFECT + SolarizeEffect 0 2            # Priority 3
+SEQUENCE 50.02 0
+  EFFECT + ThemeModulationEffect 0.00 4.00
+  EFFECT + HeptagonEffect 0.00 9.50
+  EFFECT + ChromaAberrationEffect 0.00 9.00
+  EFFECT + GaussianBlurEffect 0.00 8.00
 
-SEQUENCE 56b 0
-  EFFECT + ThemeModulationEffect 0 8     # Priority 0
-  EFFECT = HeptagonEffect 0.2 2.0        # Priority 0 (same layer)
-  EFFECT + Hybrid3DEffect 0 4            # Priority 1
-  EFFECT + ParticleSprayEffect 0 8       # Priority 2 (spray particles)
-  EFFECT + HeptagonEffect 0 16           # Priority 3
-  EFFECT + ChromaAberrationEffect 0 16   # Priority 4
-  EFFECT + GaussianBlurEffect 0 8        # Priority 5
+SEQUENCE 31.00 0
+  EFFECT + ThemeModulationEffect 0.00 3.00
+  EFFECT + VignetteEffect 0.00 3.00 radius=0.6 softness=0.3
+  EFFECT + SolarizeEffect 0.00 3.00
 
-SEQUENCE 62b 0
-  EFFECT + ThemeModulationEffect 0 3     # Priority 0
-  EFFECT + VignetteEffect 0 3 radius=0.6 softness=0.3 # New effect
-  EFFECT + SolarizeEffect 0 3            # Priority 2
-# Demo automatically exits at this time (supports beat notation)
-END_DEMO 65b