4 files changed, 200 insertions, 31 deletions
diff --git a/cnn_v3/docs/CNN_V3.md b/cnn_v3/docs/CNN_V3.md
index 3f8f7db..4d58811 100644
--- a/cnn_v3/docs/CNN_V3.md
+++ b/cnn_v3/docs/CNN_V3.md
@@ -156,7 +156,7 @@ Depth gradient captures surface discontinuities and orientation cues for the CNN
 |-----|--------|--------|--------|--------|
 | [0] | mat_id | prev.r | prev.g | prev.b |
 | [1] | mip1.r | mip1.g | mip1.b | mip2.r |
-| [2] | mip2.g | mip2.b | shadow | transp. |
+| [2] | mip2.g | mip2.b | dif | transp. |
 | [3] | — spare — | | | |
 
 All packed via `pack4x8unorm`. Channels:
@@ -164,7 +164,7 @@ All packed via `pack4x8unorm`. Channels:
 - **prev.rgb**: previous CNN output (temporal feedback, recurrent)
 - **mip1.rgb**: albedo at MIP 1 (½ resolution) — medium-frequency color context
 - **mip2.rgb**: albedo at MIP 2 (¼ resolution) — low-frequency color context
-- **shadow**: shadow intensity [0=fully shadowed, 1=fully lit] from shadow pass
+- **dif**: pre-multiplied occluded diffuse = `max(0, dot(normal, KEY_LIGHT)) * shadow` [0=dark, 1=fully lit]
 - **transp.**: volumetric transparency [0=opaque, 1=transparent] for fog/smoke/volumetric light
 
 **Texture 1 is fully packed. u32[3] is reserved for future use.**
@@ -188,6 +188,8 @@ fn pack_features(@builtin(global_invocation_id) id: vec3u) {
   let transp = textureLoad(gbuf_transp, coord, 0).r;
   let mat_id = unpack_mat_id(nm);            // u8 from rg16float packing
   let normal = unpack_oct_normal(nm.rg);     // vec2f
+  let nor3   = oct_decode(normal);           // vec3f unit normal
+  let dif    = max(0.0, dot(nor3, KEY_LIGHT)) * shadow;  // ch18
 
   let mip1 = textureSampleLevel(gbuf_albedo, smplr, uv, 1.0).rgb;
   let mip2 = textureSampleLevel(gbuf_albedo, smplr, uv, 2.0).rgb;
@@ -202,7 +204,7 @@ fn pack_features(@builtin(global_invocation_id) id: vec3u) {
   textureStore(feat_tex1, coord, vec4u(
     pack4x8unorm(vec4(mat_id, prev.r,  prev.g,  prev.b)),
     pack4x8unorm(vec4(mip1.r, mip1.g,  mip1.b,  mip2.r)),
-    pack4x8unorm(vec4(mip2.g, mip2.b,  shadow,  transp)),
+    pack4x8unorm(vec4(mip2.g, mip2.b,  dif,     transp)),
     0u,
   ));
 }
@@ -232,7 +234,7 @@ fn pack_features(@builtin(global_invocation_id) id: vec3u) {
 | 15 | mip2.r | u8 | Albedo MIP 2 (¼ res) |
 | 16 | mip2.g | u8 | |
 | 17 | mip2.b | u8 | |
-| 18 | shadow | u8 | Shadow intensity [0=dark, 1=lit] |
+| 18 | dif | u8 | max(0,dot(normal,KEY_LIGHT))×shadow [0=dark, 1=lit] |
 | 19 | transp. | u8 | Volumetric transparency [0=opaque, 1=clear] |
 
 UV computed in-shader. Bias = 1.0 implicit (standard NN, not stored).
@@ -244,7 +246,7 @@ Plus prev_cnn texture (RGBA8): **8 MB**.
 
 ### 16-byte fallback (budget-constrained)
 
-Drop temporal, MIPs, shadow, transparency. Geometric data only:
+Drop temporal, MIPs, dif, transparency. Geometric data only:
 
 | u32 | channels |
 |-----|----------|
@@ -436,7 +438,7 @@ Missing channels are **zero-filled** — the network degrades gracefully due to
 | prev.rgb | **0, 0, 0** (no history) |
 | mip1.rgb | Computed from photo (pyrDown ×1) |
 | mip2.rgb | Computed from photo (pyrDown ×2) |
-| shadow | **1.0** (assume fully lit) |
+| dif | **1.0** (assume fully lit; no normal/shadow data) |
 | transp. | **1 − alpha** (from photo alpha channel, or 0 if no alpha) |
 
 mip1/mip2 are still meaningful (they come from albedo, which we have).
@@ -464,7 +466,7 @@ Applied per-sample during dataloader `__getitem__`:
 
 ```python
 GEOMETRIC_CHANNELS = [3, 4, 5, 6, 7]   # normal.xy, depth, depth_grad.xy
-CONTEXT_CHANNELS   = [8, 18, 19]        # mat_id, shadow, transp
+CONTEXT_CHANNELS   = [8, 18, 19]        # mat_id, dif, transp
 TEMPORAL_CHANNELS  = [9, 10, 11]        # prev.rgb
 
 def apply_channel_dropout(feat, p_geom=0.3, p_context=0.2, p_temporal=0.5):
@@ -834,7 +836,7 @@ FiLM γ/β computed JS-side from sliders (tiny MLP forward pass in JS), uploaded
 | `bn_tex` | W/2×H/2 | rgba32uint | 8 channels f16 (bottleneck output) |
 | `dec1_tex` | W×H | rgba32uint | 4 channels f16 (dec1 output) |
 | `dec0_tex` | W×H | rgba32uint | 4 channels f16 (dec0 output) |
-| `prev_tex` | W×H | rgba8unorm | previous CNN output (temporal) |
+| `prev_tex` | W×H | rgba16float | previous CNN output (temporal, `F16X8`) |
 
 Skip connections: enc0_tex and enc1_tex are **kept alive** across the full forward pass
 (not ping-ponged away). DEC1 and DEC0 read them directly.
@@ -977,7 +979,7 @@ Reuse from existing shaders:
 
 - [ ] `cmake/DemoSourceLists.cmake` — add `cnn_v3_effect.cc` to COMMON_GPU_EFFECTS
 - [ ] `src/gpu/demo_effects.h` — add `#include "effects/cnn_v3_effect.h"`
-- [ ] `workspaces/main/timeline.seq` — add `EFFECT + CNNv3Effect`
+- [x] `workspaces/main/timeline.seq` — add `EFFECT + CNNv3Effect` (done: cnn_v3_debug sequence)
 
 ---
 
diff --git a/cnn_v3/docs/GBUF_DIF_MIGRATION.md b/cnn_v3/docs/GBUF_DIF_MIGRATION.md
new file mode 100644
index 0000000..37dde0f
--- /dev/null
+++ b/cnn_v3/docs/GBUF_DIF_MIGRATION.md
@@ -0,0 +1,136 @@
+// cnn_v3/docs/GBUF_DIF_MIGRATION.md
+// Plan: replace G-buffer shadow channel with dif (diffuse × shadow)
+// Status: IN PROGRESS — Step 1 (WGSL) complete; Steps 2–5 pending
+
+# G-Buffer `shadow` → `dif` Migration Plan
+
+## Motivation
+
+The raw `shadow` channel (ch18) is less informative than `dif = max(0, dot(normal, light_dir)) * shadow`
+because `shadow` alone ignores the diffuse Lambert term. The CNN learns better when it receives
+the pre-multiplied occluded diffuse signal directly. `albedo` is already in ch0–2, so the CNN
+can reconstruct the full shaded color as `albedo * (ambient + dif)`.
+
+## Design Decision
+
+**Replace ch18 (`shadow`) with ch18 (`dif`) in-place. Channel count stays 20.**
+
+- `dif` is a scalar: `max(0, dot(normal, KEY_LIGHT)) * shadow`
+- KEY_LIGHT = normalize(1, 2, 1) = (0.408, 0.816, 0.408) — matches `gbuf_deferred.wgsl`
+- Stored at the same position (t1.z byte 2) → no weight shape change
+- `transp` stays at ch19 (t1.z byte 3)
+- t1.w reverts to 0 (spare)
+
+### Feature layout (20 channels, unchanged count)
+
+| ch | name     | type   | range    | source         |
+|----|----------|--------|----------|----------------|
+| 0  | alb.r    | f16    | [0,1]    | feat_tex0.x lo |
+| 1  | alb.g    | f16    | [0,1]    | feat_tex0.x hi |
+| 2  | alb.b    | f16    | [0,1]    | feat_tex0.y lo |
+| 3  | nrm.x    | f16    | [-1,1]   | feat_tex0.y hi |
+| 4  | nrm.y    | f16    | [-1,1]   | feat_tex0.z lo |
+| 5  | depth    | f16    | [0,1]    | feat_tex0.z hi |
+| 6  | dzdx     | f16    | (signed) | feat_tex0.w lo |
+| 7  | dzdy     | f16    | (signed) | feat_tex0.w hi |
+| 8  | mat_id   | u8     | [0,1]    | feat_tex1.x[0] |
+| 9  | prev.r   | u8     | [0,1]    | feat_tex1.x[1] |
+| 10 | prev.g   | u8     | [0,1]    | feat_tex1.x[2] |
+| 11 | prev.b   | u8     | [0,1]    | feat_tex1.x[3] |
+| 12 | mip1.r   | u8     | [0,1]    | feat_tex1.y[0] |
+| 13 | mip1.g   | u8     | [0,1]    | feat_tex1.y[1] |
+| 14 | mip1.b   | u8     | [0,1]    | feat_tex1.y[2] |
+| 15 | mip2.r   | u8     | [0,1]    | feat_tex1.y[3] |
+| 16 | mip2.g   | u8     | [0,1]    | feat_tex1.z[0] |
+| 17 | mip2.b   | u8     | [0,1]    | feat_tex1.z[1] |
+| 18 | **dif**  | u8     | [0,1]    | feat_tex1.z[2] ← was shadow |
+| 19 | transp   | u8     | [0,1]    | feat_tex1.z[3] |
+
+---
+
+## Current State (intermediate — needs fixing)
+
+The commit tagged `wip(cnn_v3): shadow→dif intermediate` contains partial work.
+The WGSL changes are **incorrect** — `dif` is redundantly stored in t1.w (3×) and
+`shadow` was dropped from t1.z without putting `dif` in its place.
+
+### What is wrong
+
+| File | Problem |
+|---|---|
+| `gbuf_pack.wgsl` | t1.z = `mip2.g\|mip2.b\|transp\|spare` (shadow removed, dif not put there); t1.w = `dif\|dif\|dif\|spare` (redundant) |
+| `gbuf_deferred.wgsl` | reads `dif` from `t1.w.x` — should be `t1.z.z` |
+| `gbuf_view.wgsl` | expanded to 4×6 grid with ch20–22 as dif.rgb — should stay 4×5, ch18=dif |
+
+---
+
+## Implementation Checklist
+
+### Step 1 — Fix WGSL (correct the in-place swap) ✅
+
+- [x] `cnn_v3/shaders/gbuf_pack.wgsl`
+  - t1.z: `pack4x8unorm(vec4f(mip2.g, mip2.b, dif, transp))` ← dif at byte 2
+  - t1.w: `0u` ← revert to spare
+  - Remove comment line about t1.w dif
+
+- [x] `cnn_v3/shaders/gbuf_deferred.wgsl`
+  - Read: `let dif = unpack4x8unorm(t1.z).z;` ← from t1.z byte 2
+
+- [x] `cnn_v3/shaders/gbuf_view.wgsl`
+  - Revert to 4×5 grid (ROWS = 5.0)
+  - Guard: `ch >= 20u`
+  - ch18 label: `dif` (4 chars: 0x64696600)
+  - ch19 label: `trns` (unchanged)
+  - Remove row-5 cases (20u, 21u, default→dif.b)
+  - Revert `else if (comp_idx == 2u)` → `else` (drop t1.w branch)
+  - Update header comment
+
+- [x] `cnn_v3/shaders/cnn_v3_enc0.wgsl`
+  - Verify `load_feat()`: g = unpack4x8unorm(t1.z) → g.z = ch18 = dif ✓ (no change needed)
+
+### Step 2 — Python training ✅
+
+- [x] `cnn_v3/training/cnn_v3_utils.py`
+  - Added `oct_decode()` helper and `_KEY_LIGHT` constant
+  - `assemble_features()`: ch18 = `dif` computed on-the-fly
+  - Replace `shadow[..., None]` with `dif[..., None]` at index 18
+  - `CONTEXT_CHANNELS = [8, 18, 19]` — same indices, updated comment
+
+- [ ] `cnn_v3/training/pack_blender_sample.py`
+  - Optional: save `dif.png` (precomputed) alongside existing passes
+  - Not strictly required if utils.py computes on-the-fly
+
+### Step 3 — Web tool ✅
+
+- [x] `cnn_v3/tools/shaders.js` (FULL_PACK_SHADER)
+  - Add `oct_decode` inline (or inline the math)
+  - Compute `let dif = max(0., dot(oct_decode(nrm), vec3f(0.408, 0.816, 0.408))) * shd`
+  - Pack: t1.z = `pack4x8unorm(vec4f(m2.g, m2.b, dif, trp))`
+  - t1.w = `0u`
+
+### Step 4 — Test vectors
+
+- [ ] Re-run `cnn_v3/training/gen_test_vectors.py` to regenerate `test_vectors.h`
+  - ch18 value changes (dif ≠ shadow in general); old vectors are invalid
+  - Parity threshold (4.88e-4) should be unchanged
+
+### Step 5 — Docs ✅
+
+- [x] `cnn_v3/docs/CNN_V3.md` — feature table, pack pseudo-code, simple-mode defaults, CONTEXT_CHANNELS comment
+- [x] `cnn_v3/docs/HOWTO.md` — outputs description, channel table, dropout comment, FULL_PACK_SHADER description
+- [x] This file: all steps marked complete
+
+---
+
+## Architecture Impact
+
+| Dimension | Before | After |
+|---|---|---|
+| Channel count | 20 | 20 ✅ |
+| Weight shapes | Conv(20→4, ...) | Conv(20→4, ...) ✅ |
+| Total f16 weights | 1964 | 1964 ✅ |
+| Training data regen | — | Not required ✅ |
+| Parity test vectors | Valid | Must regenerate ❌ |
+| Existing trained weights | Valid | Invalidated (ch18 distribution changes) ❌ |
+
+No real training pass has occurred yet, so weight invalidation is not a concern.
diff --git a/cnn_v3/docs/HOWTO.md b/cnn_v3/docs/HOWTO.md
index 5c5cc2a..5cfc371 100644
--- a/cnn_v3/docs/HOWTO.md
+++ b/cnn_v3/docs/HOWTO.md
@@ -79,7 +79,7 @@ Each frame, `GBufferEffect::render()` executes:
 3. **Pass 3 — Transparency** — TODO (deferred; transp=0 for opaque scenes)
 
 4. **Pass 4 — Pack compute** (`gbuf_pack.wgsl`) ✅
-   - Reads all G-buffer textures + `prev_cnn` input
+   - Reads all G-buffer textures + persistent `prev_cnn` texture
    - Writes `feat_tex0` + `feat_tex1` (rgba32uint, 20 channels, 32 bytes/pixel)
    - Shadow / transp nodes cleared to 1.0 / 0.0 via zero-draw render passes
      until Pass 2/3 are implemented.
@@ -90,9 +90,38 @@ Outputs are named from the `outputs` vector passed to the constructor:
 
 ```
 outputs[0]  → feat_tex0   (rgba32uint: albedo.rgb, normal.xy, depth, depth_grad.xy)
-outputs[1]  → feat_tex1   (rgba32uint: mat_id, prev.rgb, mip1.rgb, mip2.rgb, shadow, transp)
+outputs[1]  → feat_tex1   (rgba32uint: mat_id, prev.rgb, mip1.rgb, mip2.rgb, dif, transp)
 ```
 
+### Temporal feedback (prev.rgb)
+
+`GBufferEffect` owns a persistent internal node `<prefix>_prev` (`F16X8` = Rgba16Float,
+`CopySrc|CopyDst`).  Each frame it is GPU-copied from the CNN effect's output after all
+effects render (`post_render`), then bound as `prev_cnn` in the pack shader (binding 6).
+
+**Wiring is automatic** via `wire_dag()`, called by `Sequence::init_effect_nodes()`.
+`GBufferEffect` scans the DAG for the first downstream consumer of its output nodes and
+uses that effect's output as `cnn_output_node_`.  No manual call needed.
+
+**Requirement**: the sequence must include `CNNv3Effect` downstream of `GBufferEffect`.
+In `timeline.seq`, declare a `gbuf_albedo` output node and add the effect:
+
+```seq
+NODE cnn_out gbuf_albedo
+EFFECT + GBufferEffect  source              -> gbuf_feat0 gbuf_feat1  0 60
+EFFECT + CNNv3Effect    gbuf_feat0 gbuf_feat1 -> cnn_out              0 60
+```
+
+If no CNN effect follows, `cnn_output_node_` stays empty and `post_render` is a no-op
+(prev.rgb will be zero — correct for static/debug-only sequences).
+
+Frame 0 behaviour: `_prev` is zeroed on allocation → `prev.rgb = 0`, matching the training
+convention (static frames use zero history).
+
+The copy uses `wgpuCommandEncoderCopyTextureToTexture` (no extra render pass overhead).
+`node_prev_tex_` is `F16X8` (Rgba16Float) to match the `GBUF_ALBEDO` format of CNNv3Effect's
+output — `CopyTextureToTexture` requires identical formats.
+
 ---
 
 ## 1b. GBufferEffect — Implementation Plan (Pass 2: SDF Shadow)
@@ -285,7 +314,7 @@ python3 train_cnn_v3.py \
 
 Applied per-sample in `cnn_v3_utils.apply_channel_dropout()`:
 - Geometric channels (normal, depth, depth_grad) zeroed with `p=channel_dropout_p`
-- Context channels (mat_id, shadow, transp) with `p≈0.2`
+- Context channels (mat_id, dif, transp) with `p≈0.2`
 - Temporal channels (prev.rgb) with `p=0.5`
 
 This ensures the network works for both full G-buffer and photo-only inputs.
@@ -299,10 +328,12 @@ This ensures the network works for both full G-buffer and photo-only inputs.
 ```seq
 # BPM 120
 SEQUENCE 0 0 "Scene with CNN v3"
-  EFFECT + GBufferEffect prev_cnn -> gbuf_feat0 gbuf_feat1  0 60
-  EFFECT + CNNv3Effect   gbuf_feat0 gbuf_feat1 -> sink       0 60
+  EFFECT + GBufferEffect source -> gbuf_feat0 gbuf_feat1  0 60
+  EFFECT + CNNv3Effect   gbuf_feat0 gbuf_feat1 -> sink    0 60
 ```
 
+Temporal feedback is wired automatically by `wire_dag()` — no manual call needed.
+
 FiLM parameters uploaded each frame:
 ```cpp
 cnn_v3_effect->set_film_params(
@@ -455,15 +486,15 @@ GBufViewEffect(const GpuContext& ctx,
                float start_time, float end_time)
 ```
 
-**Wiring example** (alongside GBufferEffect):
+**Wiring example** — use `timeline.seq`, temporal feedback wires automatically:
 
-```cpp
-auto gbuf  = std::make_shared<GBufferEffect>(ctx,
-    std::vector<std::string>{"prev_cnn"},
-    std::vector<std::string>{"gbuf_feat0", "gbuf_feat1"}, 0.0f, 60.0f);
-auto gview = std::make_shared<GBufViewEffect>(ctx,
-    std::vector<std::string>{"gbuf_feat0", "gbuf_feat1"},
-    std::vector<std::string>{"gbuf_view_out"}, 0.0f, 60.0f);
+```seq
+NODE gbuf_feat0 gbuf_rgba32uint
+NODE gbuf_feat1 gbuf_rgba32uint
+NODE cnn_out    gbuf_albedo
+EFFECT + GBufferEffect  source              -> gbuf_feat0 gbuf_feat1  0 60
+EFFECT + CNNv3Effect    gbuf_feat0 gbuf_feat1 -> cnn_out              0 60
+EFFECT + GBufViewEffect gbuf_feat0 gbuf_feat1 -> sink                 0 60
 ```
 
 **Grid layout** (output resolution = input resolution, channel cells each 1/4 W × 1/5 H):
@@ -474,7 +505,7 @@ auto gview = std::make_shared<GBufViewEffect>(ctx,
 | 1 | `nrm.y` remap→[0,1] | `depth` (inverted) | `dzdx` ×20+0.5 | `dzdy` ×20+0.5 |
 | 2 | `mat_id` | `prev.r` | `prev.g` | `prev.b` |
 | 3 | `mip1.r` | `mip1.g` | `mip1.b` | `mip2.r` |
-| 4 | `mip2.g` | `mip2.b` | `shadow` | `transp` |
+| 4 | `mip2.g` | `mip2.b` | `dif` | `transp` |
 
 All channels displayed as grayscale. 1-pixel gray grid lines separate cells. Dark background for out-of-range cells.
 
@@ -535,7 +566,7 @@ No sampler — all reads use `textureLoad()` (integer texel coordinates).
 
 Packs channels identically to `gbuf_pack.wgsl`:
 - `feat_tex0`: `pack2x16float(alb.rg)`, `pack2x16float(alb.b, nrm.x)`, `pack2x16float(nrm.y, depth)`, `pack2x16float(dzdx, dzdy)`
-- `feat_tex1`: `pack4x8unorm(matid,0,0,0)`, `pack4x8unorm(mip1.rgb, mip2.r)`, `pack4x8unorm(mip2.gb, shadow, transp)`
+- `feat_tex1`: `pack4x8unorm(matid,0,0,0)`, `pack4x8unorm(mip1.rgb, mip2.r)`, `pack4x8unorm(mip2.gb, dif, transp)`
 - Depth gradients: central differences on depth R channel
 - Mip1 / Mip2: box2 (2×2) / box4 (4×4) average filter on albedo
 
diff --git a/cnn_v3/docs/HOW_TO_CNN.md b/cnn_v3/docs/HOW_TO_CNN.md
index 458b68f..4966a61 100644
--- a/cnn_v3/docs/HOW_TO_CNN.md
+++ b/cnn_v3/docs/HOW_TO_CNN.md
@@ -97,7 +97,7 @@ It calls `pack_photo_sample.py` with both `--photo` and `--target` in a single s
 | `normal.png` | (128, 128, 0) uint8 | Neutral "no normal" → reconstructed (0,0,1) |
 | `depth.png` | All zeros uint16 | No depth data |
 | `matid.png` | All zeros uint8 | No material IDs |
-| `shadow.png` | 255 everywhere uint8 | Assume fully lit |
+| `shadow.png` | 255 everywhere uint8 | Assume fully lit (used to compute dif) |
 | `transp.png` | 1 − alpha uint8 | 0 = opaque |
 | `target.png` | Stylized target RGBA | Ground truth for training |
 
@@ -134,7 +134,7 @@ done
 
 ### 1b. From Blender (Full G-Buffer)
 
-Produces all 20 feature channels including normals, depth, mat IDs, and shadow.
+Produces all 20 feature channels including normals, depth, mat IDs, and dif (diffuse×shadow).
 
 #### Blender requirements
 
@@ -420,7 +420,7 @@ Applied per-sample to make the model robust to missing channels:
 | Channel group | Channels | Drop probability |
 |---------------|----------|-----------------|
 | Geometric | normal.xy, depth, depth_grad.xy [3,4,5,6,7] | `channel_dropout_p` (default 0.3) |
-| Context | mat_id, shadow, transp [8,18,19] | `channel_dropout_p × 0.67` (~0.2) |
+| Context | mat_id, dif, transp [8,18,19] | `channel_dropout_p × 0.67` (~0.2) |
 | Temporal | prev.rgb [9,10,11] | 0.5 (always) |
 
 This is why a model trained on Blender data also works on photos (geometry zeroed).
@@ -781,7 +781,7 @@ Both produced by `export_cnn_v3_weights.py` (§3).
 | Texture | Format | Size |
 |---------|--------|------|
 | `feat_tex0` | rgba32uint | W × H (8 f16: albedo, normal, depth, depth_grad) |
-| `feat_tex1` | rgba32uint | W × H (12 u8: mat_id, prev, mip1, mip2, shadow, transp) |
+| `feat_tex1` | rgba32uint | W × H (12 u8: mat_id, prev, mip1, mip2, dif, transp) |
 | `enc0_tex` | rgba16float | W × H |
 | `enc1_tex` | rgba32uint | W/2 × H/2 (8 f16 packed) |
 | `bn_tex` | rgba32uint | W/4 × H/4 |
@@ -790,7 +790,7 @@ Both produced by `export_cnn_v3_weights.py` (§3).
 
 ### Simple mode (photo input)
 
-Albedo = image RGB, mip1/mip2 from GPU mipmaps, shadow = 1.0, transp = 1 − alpha,
+Albedo = image RGB, mip1/mip2 from GPU mipmaps, dif = 1.0 (fully lit assumed), transp = 1 − alpha,
 all geometric channels (normal, depth, depth_grad, mat_id, prev) = 0.
 
 ### Browser requirements
@@ -843,7 +843,7 @@ all geometric channels (normal, depth, depth_grad, mat_id, prev) = 0.
 | 9–11 | prev.rgb | previous frame output | zero during training |
 | 12–14 | mip1.rgb | pyrdown(albedo) | f32 [0,1] |
 | 15–17 | mip2.rgb | pyrdown(mip1) | f32 [0,1] |
-| 18 | shadow | `shadow.png` | f32 [0,1] (1=lit) |
+| 18 | dif | computed | f32 [0,1] max(0,dot(normal,KEY_LIGHT))×shadow |
 | 19 | transp | `transp.png` | f32 [0,1] (0=opaque) |
 
 **Feature texture packing** (`feat_tex0` / `feat_tex1`, both `rgba32uint`):
@@ -858,6 +858,6 @@ feat_tex0 (4×u32 = 8 f16 channels via pack2x16float):
 feat_tex1 (4×u32 = 12 u8 channels + padding via pack4x8unorm):
   .x = pack4x8unorm(mat_id, prev.r,  prev.g,   prev.b)
   .y = pack4x8unorm(mip1.r, mip1.g,  mip1.b,   mip2.r)
-  .z = pack4x8unorm(mip2.g, mip2.b,  shadow,   transp)
+  .z = pack4x8unorm(mip2.g, mip2.b,  dif,      transp)
   .w = 0 (unused, 8 reserved channels)
 ```