12 files changed, 290 insertions, 10 deletions
diff --git a/.claude/settings.json b/.claude/settings.json
index fb6efa5..40c1320 100644
--- a/.claude/settings.json
+++ b/.claude/settings.json
@@ -1,3 +1,20 @@
 {
-  "permissions": {}
+  "permissions": {},
+  "hooks": {
+    "PostToolUse": [
+      {
+        "matcher": "Write|Edit",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "f=$(jq -r '.tool_input.file_path'); if echo \"$f\" | grep -q 'cnn_v3/' && ! echo \"$f\" | grep -q 'cnn_v3/docs/HOWTO.md'; then echo '{\"systemMessage\": \"Reminder: update cnn_v3/docs/HOWTO.md if this change affects the pipeline.\"}'; fi"
+          },
+          {
+            "type": "command",
+            "command": "f=$(jq -r '.tool_input.file_path'); if echo \"$f\" | grep -qE '\\.(cc|h)$' && grep -qE 'str_view\\([a-zA-Z_]+_wgsl\\b' \"$f\" 2>/dev/null; then echo '{\"systemMessage\": \"Direct .wgsl asset usage detected — wrap with ShaderComposer::Get().Compose() before passing to str_view().\"}'; fi"
+          }
+        ]
+      }
+    ]
+  }
 }
diff --git a/PROJECT_CONTEXT.md b/PROJECT_CONTEXT.md
index 6b4468f..cadd514 100644
--- a/PROJECT_CONTEXT.md
+++ b/PROJECT_CONTEXT.md
@@ -36,7 +36,7 @@
 - **Audio:** Sample-accurate sync. Zero heap allocations per frame. Variable tempo. OLA-IDCT synthesis (v2 .spec): Hann analysis window, rectangular synthesis, 50% overlap, click-free. V1 (raw DCT-512) preserved for generated notes. .spec files regenerated as v2.
 - **Shaders:** Parameterized effects (UniformHelper, .seq syntax). Beat-synchronized animation support (`beat_time`, `beat_phase`). Modular WGSL composition with ShaderComposer. 27 shared common shaders (math, render, compute). Reusable snippets: `render/scratch_lines`, `render/ntsc_common` (NTSC signal processing, RGB and YIQ input variants via `sample_ntsc_signal` hook), `math/color` (YIQ/NTSC), `math/color_c64` (C64 palette, Bayer dither, border animation).
 - **3D:** Hybrid SDF/rasterization with BVH. Binary scene loader. Blender pipeline.
-- **Effects:** CNN post-processing: CNNEffect (v1) and CNNv2Effect operational. CNN v2: sigmoid activation, storage buffer weights (~3.2 KB), 7D static features, dynamic layers. Training stable, convergence validated.
+- **Effects:** CNN post-processing: CNNEffect (v1) and CNNv2Effect operational. CNN v2: sigmoid activation, storage buffer weights (~3.2 KB), 7D static features, dynamic layers. Training stable, convergence validated. **CNN v3 Phase 1 complete:** `GBufferEffect` integrated (MRT raster + pack compute, 20-channel feature textures). See `cnn_v3/docs/HOWTO.md`.
 - **Tools:** CNN test tool operational. Texture readback utility functional. Timeline editor (web-based, beat-aligned, audio playback).
 - **Build:** Asset dependency tracking. Size measurement. Hot-reload (debug-only). WSL (Windows 10) supported: native Linux build and cross-compile to `.exe` via `mingw-w64`.
 - **Sequence:** DAG-based effect routing with explicit node system. Python compiler with topological sort and ping-pong optimization. 12 effects operational (Passthrough, Placeholder, GaussianBlur, Heptagon, Particles, RotatingCube, Hybrid3D, Flash, PeakMeter, Scene1, Scene2, Scratch). Effect times are absolute (seq_compiler adds sequence start offset). See `doc/SEQUENCE.md`.
@@ -46,7 +46,7 @@
 
 ## Next Up
 
-**Active:** Spectral Brush Editor (procedural compression), CNN v2 quantization
+**Active:** CNN v3 Phase 3 (WGSL U-Net shaders), Spectral Brush Editor
 **Ongoing:** Test infrastructure maintenance (35/35 passing)
 **Future:** Size optimization (64k target), 3D enhancements
 
diff --git a/TODO.md b/TODO.md
index e7371ca..1c405ef 100644
--- a/TODO.md
+++ b/TODO.md
@@ -69,10 +69,11 @@ PyTorch / HTML WebGPU / C++ WebGPU.
 **Design:** `cnn_v3/docs/CNN_V3.md`
 
 **Phases:**
-1. ✅ G-buffer: `GBufferEffect` (MRT raster + pack compute). SDF/shadow passes TODO.
-   - New NodeTypes: `GBUF_ALBEDO`, `GBUF_DEPTH32`, `GBUF_R8`, `GBUF_RGBA32UINT`
-   - Shaders: `cnn_v3/shaders/gbuf_raster.wgsl`, `gbuf_pack.wgsl`
-   - CMake integration deferred
+1. ✅ G-buffer: `GBufferEffect` integrated. Assets, CMake, demo_effects.h, test all wired. 35/35 tests pass.
+   - NodeTypes: `GBUF_ALBEDO`, `GBUF_DEPTH32`, `GBUF_R8`, `GBUF_RGBA32UINT`
+   - Shaders: `cnn_v3/shaders/gbuf_raster.wgsl` (ShaderComposer), `gbuf_pack.wgsl`
+   - SDF/shadow passes TODO (placeholder: shadow=1, transp=0)
+   - Howto: `cnn_v3/docs/HOWTO.md`
 2. ✅ Training infrastructure: `blender_export.py`, `pack_blender_sample.py`, `pack_photo_sample.py`
 3. WGSL shaders (enc/dec/bottleneck, FiLM, deterministic ops)
 4. C++ CNNv3Effect + FiLM uniform upload
diff --git a/cmake/DemoSourceLists.cmake b/cmake/DemoSourceLists.cmake
index 3917fcd..04bbb3b 100644
--- a/cmake/DemoSourceLists.cmake
+++ b/cmake/DemoSourceLists.cmake
@@ -40,6 +40,7 @@ set(COMMON_GPU_EFFECTS
     src/effects/peak_meter_effect.cc
     src/effects/scene1_effect.cc
     src/effects/scene2_effect.cc
+    cnn_v3/src/gbuffer_effect.cc
     # TODO: Port CNN effects to v2 (complex v1 dependencies)
     # cnn_v1/src/cnn_v1_effect.cc
     # cnn_v2/src/cnn_v2_effect.cc
diff --git a/cnn_v3/README.md b/cnn_v3/README.md
index a22d823..f161bf4 100644
--- a/cnn_v3/README.md
+++ b/cnn_v3/README.md
@@ -31,7 +31,9 @@ Add images directly to these directories and commit them.
 
 ## Status
 
-**Design phase.** Architecture defined, G-buffer prerequisite pending.
+**Phase 1 complete.** G-buffer integrated (raster + pack), 35/35 tests pass.
+Training infrastructure ready. U-Net WGSL shaders are next.
 
+See `cnn_v3/docs/HOWTO.md` for the practical playbook.
 See `cnn_v3/docs/CNN_V3.md` for full design.
 See `cnn_v2/` for reference implementation.
diff --git a/cnn_v3/docs/HOWTO.md b/cnn_v3/docs/HOWTO.md
new file mode 100644
index 0000000..88d4bbc
--- /dev/null
+++ b/cnn_v3/docs/HOWTO.md
@@ -0,0 +1,235 @@
+# CNN v3 How-To
+
+Practical playbook for the CNN v3 pipeline: G-buffer effect, training data,
+training the U-Net+FiLM network, and wiring everything into the demo.
+
+See `CNN_V3.md` for the full architecture design.
+
+---
+
+## 1. Using GBufferEffect in the Demo
+
+`GBufferEffect` is a full-class effect (Path B in `doc/EFFECT_WORKFLOW.md`).
+It rasterizes proxy geometry to MRT G-buffer textures and packs them into two
+`rgba32uint` feature textures (`feat_tex0`, `feat_tex1`) consumed by the CNN.
+
+### Registration (already done)
+
+- Shaders in `assets.txt`: `SHADER_GBUF_RASTER`, `SHADER_GBUF_PACK`
+- Source in `cmake/DemoSourceLists.cmake`: `cnn_v3/src/gbuffer_effect.cc`
+- Header included in `src/gpu/demo_effects.h`
+- Test in `src/tests/gpu/test_demo_effects.cc`
+
+### Adding to a Sequence
+
+`GBufferEffect` does not exist in `seq_compiler.py` as a named effect yet
+(no `.seq` syntax integration for Phase 1). Wire it directly in C++ alongside
+your scene code, or add it to the timeline when the full CNNv3Effect is ready.
+
+**C++ wiring example** (e.g. inside a Sequence or main.cc):
+
+```cpp
+#include "../../cnn_v3/src/gbuffer_effect.h"
+
+// Allocate once alongside your scene
+auto gbuf = std::make_shared<GBufferEffect>(
+    ctx, /*inputs=*/{"prev_cnn"},  // or any dummy node
+    /*outputs=*/{"gbuf_feat0", "gbuf_feat1"},
+    /*start=*/0.0f, /*end=*/60.0f);
+
+gbuf->set_scene(&my_scene, &my_camera);
+
+// In render loop, call before CNN pass:
+gbuf->render(encoder, params, nodes);
+```
+
+### Internal passes
+
+Each frame, `GBufferEffect::render()` executes:
+
+1. **Pass 1 — MRT rasterization** (`gbuf_raster.wgsl`)
+   - Proxy box (36 verts) × N objects, instanced
+   - MRT outputs: `gbuf_albedo` (rgba16float), `gbuf_normal_mat` (rgba16float)
+   - Depth test + write into `gbuf_depth` (depth32float)
+
+2. **Pass 2/3 — SDF + Lighting** — TODO (placeholder: shadow=1, transp=0)
+
+3. **Pass 4 — Pack compute** (`gbuf_pack.wgsl`)
+   - Reads all G-buffer textures + `prev_cnn` input
+   - Writes `feat_tex0` + `feat_tex1` (rgba32uint, 20 channels, 32 bytes/pixel)
+
+### Output node names
+
+By default the outputs are named from the `outputs` vector passed to the
+constructor. Use these names when binding the CNN effect input:
+
+```
+outputs[0]  → feat_tex0   (rgba32uint: albedo.rgb, normal.xy, depth, depth_grad.xy)
+outputs[1]  → feat_tex1   (rgba32uint: mat_id, prev.rgb, mip1.rgb, mip2.rgb, shadow, transp)
+```
+
+### Scene data
+
+Call `set_scene(scene, camera)` before the first render. The effect uploads
+`GlobalUniforms` (view-proj, camera pos, resolution) and `ObjectData` (model
+matrix, color) to GPU storage buffers each frame.
+
+---
+
+## 2. Preparing Training Data
+
+CNN v3 supports two data sources: Blender renders and real photos.
+
+### 2a. From Blender Renders
+
+```bash
+# 1. In Blender: run the export script (requires Blender 3.x+)
+blender --background scene.blend --python cnn_v3/training/blender_export.py \
+    -- --output /tmp/renders/ --frames 200
+
+# 2. Pack into sample directory
+python3 cnn_v3/training/pack_blender_sample.py \
+    --render-dir /tmp/renders/frame_0001/ \
+    --output dataset/blender/sample_0001/
+```
+
+Each sample directory contains:
+```
+sample_XXXX/
+  albedo.png    — RGB uint8 (material color, pre-lighting)
+  normal.png    — RG uint8 (oct-encoded XY, remap [0,1])
+  depth.png     — R uint16 (1/z normalized, 16-bit)
+  matid.png     — R uint8 (object index / 255)
+  shadow.png    — R uint8 (0=dark, 255=lit)
+  transp.png    — R uint8 (0=opaque, 255=transparent)
+  target.png    — RGB/RGBA (stylized ground truth)
+```
+
+### 2b. From Real Photos
+
+Geometric channels are zeroed; the network degrades gracefully due to
+channel-dropout training.
+
+```bash
+python3 cnn_v3/training/pack_photo_sample.py \
+    --photo cnn_v3/training/input/photo1.jpg \
+    --output dataset/photos/sample_001/
+```
+
+The output `target.png` defaults to the input photo (no style). Copy in
+your stylized version as `target.png` before training.
+
+### Dataset layout
+
+```
+dataset/
+  blender/
+    sample_0001/  sample_0002/  ...
+  photos/
+    sample_001/   sample_002/   ...
+```
+
+Mix freely; the dataloader treats all sample directories uniformly.
+
+---
+
+## 3. Training
+
+*(Network not yet implemented — this section will be filled as Phase 3+ lands.)*
+
+**Planned command:**
+```bash
+python3 cnn_v3/training/train_cnn_v3.py \
+    --dataset dataset/ \
+    --epochs 500 \
+    --output cnn_v3/weights/cnn_v3_weights.bin
+```
+
+**FiLM conditioning** during training:
+- Beat/audio inputs are randomized per sample
+- Network learns to produce varied styles from same geometry
+
+**Validation:**
+```bash
+python3 cnn_v3/training/train_cnn_v3.py --validate \
+    --checkpoint cnn_v3/weights/cnn_v3_weights.bin \
+    --input test_frame.png
+```
+
+---
+
+## 4. Running the CNN v3 Effect (Future)
+
+Once the C++ CNNv3Effect exists:
+
+```seq
+# BPM 120
+SEQUENCE 0 0 "Scene with CNN v3"
+  EFFECT + GBufferEffect prev_cnn -> gbuf_feat0 gbuf_feat1  0 60
+  EFFECT + CNNv3Effect   gbuf_feat0 gbuf_feat1 -> sink       0 60
+```
+
+FiLM parameters are uploaded via uniform each frame:
+```cpp
+cnn_v3_effect->set_film_params(
+    params.beat_phase, params.beat_time / 8.0f, params.audio_intensity,
+    style_p0, style_p1);
+```
+
+---
+
+## 5. Per-Pixel Validation
+
+The CNN v3 design requires exact parity between PyTorch, WGSL (HTML), and C++.
+
+*(Validation tooling not yet implemented.)*
+
+**Planned workflow:**
+1. Export test input + weights as JSON
+2. Run Python reference → save per-pixel output
+3. Run HTML WebGPU tool → compare against Python
+4. Run C++ `cnn_v3_test` tool → compare against Python
+5. All comparisons must pass at ≤ 1/255 per pixel
+
+---
+
+## 6. Phase Status
+
+| Phase | Status | Notes |
+|-------|--------|-------|
+| 1 — G-buffer (raster + pack) | ✅ Done | Integrated, 35/35 tests pass |
+| 1 — G-buffer (SDF + shadow passes) | TODO | Placeholder in place |
+| 2 — Training infrastructure | ✅ Done | blender_export.py, pack_*_sample.py |
+| 3 — WGSL U-Net shaders | TODO | enc/dec/bottleneck/FiLM |
+| 4 — C++ CNNv3Effect | TODO | FiLM uniform upload |
+| 5 — Parity validation | TODO | Test vectors, ≤1/255 |
+
+---
+
+## 7. Quick Troubleshooting
+
+**GBufferEffect renders nothing / albedo is black**
+- Check `set_scene()` was called before `render()`
+- Verify scene has at least one object
+- Check camera matrix is not degenerate (near/far, aspect)
+
+**Pack shader fails to compile**
+- `gbuf_pack.wgsl` uses no `#include`s; ShaderComposer compose is a no-op
+- Check `ASSET_SHADER_GBUF_PACK` resolves in assets.txt
+
+**Raster shader fails with `#include "common_uniforms"` error**
+- `ShaderComposer::Get().Compose({"common_uniforms"}, src)` must be called
+  before passing to `wgpuDeviceCreateShaderModule` — already done in effect.cc
+
+**G-buffer outputs wrong resolution**
+- `resize()` is not yet implemented in GBufferEffect; textures are fixed
+  at construction size. Will be added when resize support is needed.
+
+---
+
+## See Also
+
+- `cnn_v3/docs/CNN_V3.md` — Full architecture design (U-Net, FiLM, feature layout)
+- `doc/EFFECT_WORKFLOW.md` — General effect integration guide
+- `cnn_v2/docs/CNN_V2.md` — Reference implementation (simpler, operational)
+- `src/tests/gpu/test_demo_effects.cc` — GBufferEffect construction test
diff --git a/cnn_v3/src/gbuffer_effect.cc b/cnn_v3/src/gbuffer_effect.cc
index fb0146e..750188f 100644
--- a/cnn_v3/src/gbuffer_effect.cc
+++ b/cnn_v3/src/gbuffer_effect.cc
@@ -4,6 +4,7 @@
 #include "gbuffer_effect.h"
 #include "3d/object.h"
 #include "gpu/gpu.h"
+#include "gpu/shader_composer.h"
 #include "util/fatal_error.h"
 #include "util/mini_math.h"
 #include <cstring>
@@ -390,9 +391,12 @@ void GBufferEffect::create_raster_pipeline() {
     return; // Asset not loaded yet; pipeline creation deferred.
   }
 
+  const std::string composed =
+      ShaderComposer::Get().Compose({"common_uniforms"}, src);
+
   WGPUShaderSourceWGSL wgsl_src = {};
   wgsl_src.chain.sType = WGPUSType_ShaderSourceWGSL;
-  wgsl_src.code = str_view(src);
+  wgsl_src.code = str_view(composed.c_str());
 
   WGPUShaderModuleDescriptor shader_desc = {};
   shader_desc.nextInChain = &wgsl_src.chain;
@@ -466,9 +470,11 @@ void GBufferEffect::create_pack_pipeline() {
     return;
   }
 
+  const std::string composed = ShaderComposer::Get().Compose({}, src);
+
   WGPUShaderSourceWGSL wgsl_src = {};
   wgsl_src.chain.sType = WGPUSType_ShaderSourceWGSL;
-  wgsl_src.code = str_view(src);
+  wgsl_src.code = str_view(composed.c_str());
 
   WGPUShaderModuleDescriptor shader_desc = {};
   shader_desc.nextInChain = &wgsl_src.chain;
diff --git a/src/effects/shaders.cc b/src/effects/shaders.cc
index 8a81bb0..1adbff5 100644
--- a/src/effects/shaders.cc
+++ b/src/effects/shaders.cc
@@ -107,6 +107,8 @@ const char* scene2_shader_wgsl = SafeGetAsset(AssetId::ASSET_SHADER_SCENE2);
 const char* scratch_shader_wgsl = SafeGetAsset(AssetId::ASSET_SHADER_SCRATCH);
 const char* ntsc_rgb_shader_wgsl = SafeGetAsset(AssetId::ASSET_SHADER_NTSC_RGB);
 const char* ntsc_yiq_shader_wgsl = SafeGetAsset(AssetId::ASSET_SHADER_NTSC_YIQ);
+const char* gbuf_raster_wgsl     = SafeGetAsset(AssetId::ASSET_SHADER_GBUF_RASTER);
+const char* gbuf_pack_wgsl       = SafeGetAsset(AssetId::ASSET_SHADER_GBUF_PACK);
 
 // Compute shaders
 const char* gen_noise_compute_wgsl =
diff --git a/src/effects/shaders.h b/src/effects/shaders.h
index 527a8a3..cf095fb 100644
--- a/src/effects/shaders.h
+++ b/src/effects/shaders.h
@@ -20,6 +20,10 @@ extern const char* scratch_shader_wgsl;
 extern const char* ntsc_rgb_shader_wgsl;
 extern const char* ntsc_yiq_shader_wgsl;
 
+// CNN v3 G-buffer shaders
+extern const char* gbuf_raster_wgsl;
+extern const char* gbuf_pack_wgsl;
+
 // Compute shaders
 extern const char* gen_noise_compute_wgsl;
 extern const char* gen_perlin_compute_wgsl;
diff --git a/src/gpu/demo_effects.h b/src/gpu/demo_effects.h
index 9d794aa..91ab6f2 100644
--- a/src/gpu/demo_effects.h
+++ b/src/gpu/demo_effects.h
@@ -32,6 +32,9 @@
 #include "effects/scratch_effect.h"
 #include "effects/ntsc_effect.h"
 
+// CNN v3 G-buffer
+#include "../../cnn_v3/src/gbuffer_effect.h"
+
 // TODO: Port CNN effects
 // #include "../../cnn_v1/src/cnn_v1_effect.h"
 // #include "../../cnn_v2/src/cnn_v2_effect.h"
diff --git a/src/tests/gpu/test_demo_effects.cc b/src/tests/gpu/test_demo_effects.cc
index 9d8cb7d..c2588f2 100644
--- a/src/tests/gpu/test_demo_effects.cc
+++ b/src/tests/gpu/test_demo_effects.cc
@@ -79,6 +79,11 @@ static void test_effects() {
       {"NtscYiq", std::make_shared<NtscYiq>(
                       fixture.ctx(), std::vector<std::string>{"source"},
                       std::vector<std::string>{"sink"}, 0.0f, 1000.0f)},
+      {"GBufferEffect",
+       std::make_shared<GBufferEffect>(
+           fixture.ctx(), std::vector<std::string>{"source"},
+           std::vector<std::string>{"gbuf_feat0", "gbuf_feat1"}, 0.0f,
+           1000.0f)},
   };
 
   int passed = 0;
diff --git a/workspaces/main/assets.txt b/workspaces/main/assets.txt
index b50f2fb..ad57d2f 100644
--- a/workspaces/main/assets.txt
+++ b/workspaces/main/assets.txt
@@ -97,6 +97,10 @@ SHADER_SCRATCH, WGSL, ../../src/effects/scratch.wgsl, "Scratch effect shader"
 SHADER_NTSC_RGB, WGSL, ../../src/effects/ntsc_rgb.wgsl, "NTSC effect shader (RGB input)"
 SHADER_NTSC_YIQ, WGSL, ../../src/effects/ntsc_yiq.wgsl, "NTSC effect shader (YIQ input)"
 SHADER_RENDER_NTSC_COMMON, WGSL, ../../src/shaders/render/ntsc_common.wgsl, "NTSC shared constants and functions snippet"
+
+# --- CNN v3 G-Buffer ---
+SHADER_GBUF_RASTER, WGSL, ../../cnn_v3/shaders/gbuf_raster.wgsl, "CNN v3 G-buffer MRT rasterization shader"
+SHADER_GBUF_PACK, WGSL, ../../cnn_v3/shaders/gbuf_pack.wgsl, "CNN v3 G-buffer feature pack compute shader"
 SHADER_DEBUG_DEBUG_PRINT, WGSL, ../../src/shaders/debug/debug_print.wgsl, "Debug print snippet"
 
 # --- Sequence Shaders ---