summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorskal <pascal.massimino@gmail.com>2026-03-23 00:26:52 +0100
committerskal <pascal.massimino@gmail.com>2026-03-23 00:26:52 +0100
commit1470dd240f48652d1fe97957fe44a49b0e1ee9a6 (patch)
treec202e36a2aaed35fd8bc111457bcba89c7db8658
parent12d5d5f1762a0c00405950b6ff5e564880f0ff36 (diff)
wip(cnn_v3): shadow→dif intermediate + scene tweaks + migration plan
- gbuf_shadow.wgsl: normal bias 0.05→0.02 - gbuf_pack.wgsl: compute dif=diffuse*shadow, drop shadow from t1.z, store dif in t1.w (INTERMEDIATE — incorrect packing, see migration plan) - gbuf_deferred.wgsl: read dif from t1.w.x (matches intermediate packing) - gbuf_view.wgsl: expand to 4×6 grid, show dif.r/g/b in row 5 (INTERMEDIATE — to be reverted to 4×5 with ch18=dif) - gbuffer_effect.cc: add small hovering sphere (r=0.6) above scene; swap cube/sphere positions; both spheres pulsate - docs/GBUF_DIF_MIGRATION.md: full migration plan with checklist handoff(Claude): intermediate commit — GBUF_DIF_MIGRATION.md §Current State describes what is wrong and the full implementation checklist (5 steps).
-rw-r--r--cnn_v3/docs/GBUF_DIF_MIGRATION.md141
-rw-r--r--cnn_v3/shaders/gbuf_deferred.wgsl8
-rw-r--r--cnn_v3/shaders/gbuf_pack.wgsl11
-rw-r--r--cnn_v3/shaders/gbuf_shadow.wgsl2
-rw-r--r--cnn_v3/shaders/gbuf_view.wgsl20
-rw-r--r--cnn_v3/src/gbuffer_effect.cc16
6 files changed, 178 insertions, 20 deletions
diff --git a/cnn_v3/docs/GBUF_DIF_MIGRATION.md b/cnn_v3/docs/GBUF_DIF_MIGRATION.md
new file mode 100644
index 0000000..f1a4551
--- /dev/null
+++ b/cnn_v3/docs/GBUF_DIF_MIGRATION.md
@@ -0,0 +1,141 @@
+// cnn_v3/docs/GBUF_DIF_MIGRATION.md
+// Plan: replace G-buffer shadow channel with dif (diffuse × shadow)
+// Status: IN PROGRESS — current commit is intermediate state, see §Current State
+
+# G-Buffer `shadow` → `dif` Migration Plan
+
+## Motivation
+
+The raw `shadow` channel (ch18) is less informative than `dif = max(0, dot(normal, light_dir)) * shadow`
+because `shadow` alone ignores the diffuse Lambert term. The CNN learns better when it receives
+the pre-multiplied occluded diffuse signal directly. `albedo` is already in ch0–2, so the CNN
+can reconstruct the full shaded color as `albedo * (ambient + dif)`.
+
+## Design Decision
+
+**Replace ch18 (`shadow`) with ch18 (`dif`) in-place. Channel count stays 20.**
+
+- `dif` is a scalar: `max(0, dot(normal, KEY_LIGHT)) * shadow`
+- KEY_LIGHT = normalize(1, 2, 1) = (0.408, 0.816, 0.408) — matches `gbuf_deferred.wgsl`
+- Stored at the same position (t1.z byte 2) → no weight shape change
+- `transp` stays at ch19 (t1.z byte 3)
+- t1.w reverts to 0 (spare)
+
+### Feature layout (20 channels, unchanged count)
+
+| ch | name | type | range | source |
+|----|----------|--------|----------|----------------|
+| 0 | alb.r | f16 | [0,1] | feat_tex0.x lo |
+| 1 | alb.g | f16 | [0,1] | feat_tex0.x hi |
+| 2 | alb.b | f16 | [0,1] | feat_tex0.y lo |
+| 3 | nrm.x | f16 | [-1,1] | feat_tex0.y hi |
+| 4 | nrm.y | f16 | [-1,1] | feat_tex0.z lo |
+| 5 | depth | f16 | [0,1] | feat_tex0.z hi |
+| 6 | dzdx | f16 | (signed) | feat_tex0.w lo |
+| 7 | dzdy | f16 | (signed) | feat_tex0.w hi |
+| 8 | mat_id | u8 | [0,1] | feat_tex1.x[0] |
+| 9 | prev.r | u8 | [0,1] | feat_tex1.x[1] |
+| 10 | prev.g | u8 | [0,1] | feat_tex1.x[2] |
+| 11 | prev.b | u8 | [0,1] | feat_tex1.x[3] |
+| 12 | mip1.r | u8 | [0,1] | feat_tex1.y[0] |
+| 13 | mip1.g | u8 | [0,1] | feat_tex1.y[1] |
+| 14 | mip1.b | u8 | [0,1] | feat_tex1.y[2] |
+| 15 | mip2.r | u8 | [0,1] | feat_tex1.y[3] |
+| 16 | mip2.g | u8 | [0,1] | feat_tex1.z[0] |
+| 17 | mip2.b | u8 | [0,1] | feat_tex1.z[1] |
+| 18 | **dif** | u8 | [0,1] | feat_tex1.z[2] ← was shadow |
+| 19 | transp | u8 | [0,1] | feat_tex1.z[3] |
+
+---
+
+## Current State (intermediate — needs fixing)
+
+The commit tagged `wip(cnn_v3): shadow→dif intermediate` contains partial work.
+The WGSL changes are **incorrect** — `dif` is redundantly stored in t1.w (3×) and
+`shadow` was dropped from t1.z without putting `dif` in its place.
+
+### What is wrong
+
+| File | Problem |
+|---|---|
+| `gbuf_pack.wgsl` | t1.z = `mip2.g\|mip2.b\|transp\|spare` (shadow removed, dif not put there); t1.w = `dif\|dif\|dif\|spare` (redundant) |
+| `gbuf_deferred.wgsl` | reads `dif` from `t1.w.x` — should be `t1.z.z` |
+| `gbuf_view.wgsl` | expanded to 4×6 grid with ch20–22 as dif.rgb — should stay 4×5, ch18=dif |
+
+---
+
+## Implementation Checklist
+
+### Step 1 — Fix WGSL (correct the in-place swap)
+
+- [ ] `cnn_v3/shaders/gbuf_pack.wgsl`
+ - t1.z: `pack4x8unorm(vec4f(mip2.g, mip2.b, dif, transp))` ← dif at byte 2
+ - t1.w: `0u` ← revert to spare
+ - Remove comment line about t1.w dif
+
+- [ ] `cnn_v3/shaders/gbuf_deferred.wgsl`
+ - Read: `let dif = unpack4x8unorm(t1.z).z;` ← from t1.z byte 2
+
+- [ ] `cnn_v3/shaders/gbuf_view.wgsl`
+ - Revert to 4×5 grid (ROWS = 5.0)
+ - Guard: `ch >= 20u`
+ - ch18 label: `dif` (4 chars: 0x64696600)
+ - ch19 label: `trns` (unchanged)
+ - Remove row-5 cases (20u, 21u, default→dif.b)
+ - Revert `else if (comp_idx == 2u)` → `else` (drop t1.w branch)
+ - Update header comment
+
+- [ ] `cnn_v3/shaders/cnn_v3_enc0.wgsl`
+ - Verify `load_feat()`: g = unpack4x8unorm(t1.z) → g.z = ch18 = dif ✓ (no change needed)
+
+### Step 2 — Python training
+
+- [ ] `cnn_v3/training/cnn_v3_utils.py`
+ - `assemble_features()`: ch18 = `dif` computed on-the-fly:
+ ```python
+ KEY_LIGHT = np.array([0.408, 0.816, 0.408])
+ nor3 = oct_decode(normal) # (H,W,2) → (H,W,3)
+ diffuse = np.maximum(0, (nor3 * KEY_LIGHT).sum(-1))
+ dif = diffuse * shadow # (H,W)
+ ```
+ - Replace `shadow[..., None]` with `dif[..., None]` at index 18
+ - `CONTEXT_CHANNELS = [8, 18, 19]` — same indices, update comment
+
+- [ ] `cnn_v3/training/pack_blender_sample.py`
+ - Optional: save `dif.png` (precomputed) alongside existing passes
+ - Not strictly required if utils.py computes on-the-fly
+
+### Step 3 — Web tool
+
+- [ ] `cnn_v3/tools/shaders.js` (FULL_PACK_SHADER)
+ - Add `oct_decode` inline (or inline the math)
+ - Compute `let dif = max(0., dot(oct_decode(nrm), vec3f(0.408, 0.816, 0.408))) * shd`
+ - Pack: t1.z = `pack4x8unorm(vec4f(m2.g, m2.b, dif, trp))`
+ - t1.w = `0u`
+
+### Step 4 — Test vectors
+
+- [ ] Re-run `cnn_v3/training/gen_test_vectors.py` to regenerate `test_vectors.h`
+ - ch18 value changes (dif ≠ shadow in general); old vectors are invalid
+ - Parity threshold (4.88e-4) should be unchanged
+
+### Step 5 — Docs
+
+- [ ] `cnn_v3/docs/CNN_V3.md` — update feature table (ch18 shadow → dif)
+- [ ] `cnn_v3/docs/HOWTO.md` — §7 channel table, §3 pass-2 note
+- [ ] This file: mark steps complete as they land
+
+---
+
+## Architecture Impact
+
+| Dimension | Before | After |
+|---|---|---|
+| Channel count | 20 | 20 ✅ |
+| Weight shapes | Conv(20→4, ...) | Conv(20→4, ...) ✅ |
+| Total f16 weights | 1964 | 1964 ✅ |
+| Training data regen | — | Not required ✅ |
+| Parity test vectors | Valid | Must regenerate ❌ |
+| Existing trained weights | Valid | Invalidated (ch18 distribution changes) ❌ |
+
+No real training pass has occurred yet, so weight invalidation is not a concern.
diff --git a/cnn_v3/shaders/gbuf_deferred.wgsl b/cnn_v3/shaders/gbuf_deferred.wgsl
index 2ed4ce3..bcc42cc 100644
--- a/cnn_v3/shaders/gbuf_deferred.wgsl
+++ b/cnn_v3/shaders/gbuf_deferred.wgsl
@@ -40,9 +40,9 @@ fn fs_main(@builtin(position) pos: vec4f) -> @location(0) vec4f {
let normal = oct_decode(vec2f(bx.y, ny_d.x));
let diffuse = max(0.0, dot(normal, KEY_LIGHT));
- // feat_tex1[2] = pack4x8unorm(mip2.g, mip2.b, shadow, transp)
- let t1 = textureLoad(feat_tex1, coord, 0);
- let shadow = unpack4x8unorm(t1.z).z;
+ // feat_tex1[3] = pack4x8unorm(dif.r, dif.g, dif.b, spare) — dif = diffuse*shadow
+ let t1 = textureLoad(feat_tex1, coord, 0);
+ let dif = unpack4x8unorm(t1.w).x;
- return vec4f(albedo * (AMBIENT + diffuse * shadow), 1.0);
+ return vec4f(albedo * (AMBIENT + dif), 1.0);
}
diff --git a/cnn_v3/shaders/gbuf_pack.wgsl b/cnn_v3/shaders/gbuf_pack.wgsl
index 333589c..dd8d73b 100644
--- a/cnn_v3/shaders/gbuf_pack.wgsl
+++ b/cnn_v3/shaders/gbuf_pack.wgsl
@@ -86,6 +86,9 @@ fn pack_features(@builtin(global_invocation_id) id: vec3u) {
let mat_id_u8 = nm.b; // mat_id already in [0,1]
let shadow = textureLoad(gbuf_shadow, coord, 0).r;
let transp = textureLoad(gbuf_transp, coord, 0).r;
+ let nor = oct_decode_unorm(nm.rg);
+ let diffuse = max(0.0, dot(nor, vec3f(0.408, 0.816, 0.408)));
+ let dif = diffuse * shadow;
let prev = textureSampleLevel(prev_cnn, bilinear_sampler, uv, 0.0).rgb;
// MIP 1: 2×2 box filter (half resolution context)
@@ -103,13 +106,13 @@ fn pack_features(@builtin(global_invocation_id) id: vec3u) {
// Texture 1: 4 u32, each = pack4x8unorm of four u8 values
// [0] mat_id | prev.r | prev.g | prev.b
// [1] mip1.r | mip1.g | mip1.b | mip2.r
- // [2] mip2.g | mip2.b | shadow | transp
- // [3] spare (0)
+ // [2] mip2.g | mip2.b | transp | (spare)
+ // [3] dif.r | dif.g | dif.b | (spare) — dif = diffuse*shadow (scalar, stored in all 3)
let t1 = vec4u(
pack4x8unorm(vec4f(mat_id_u8, prev.r, prev.g, prev.b)),
pack4x8unorm(vec4f(mip1.r, mip1.g, mip1.b, mip2.r)),
- pack4x8unorm(vec4f(mip2.g, mip2.b, shadow, transp)),
- 0u
+ pack4x8unorm(vec4f(mip2.g, mip2.b, transp, 0.0)),
+ pack4x8unorm(vec4f(dif, dif, dif, 0.0))
);
textureStore(feat_tex1, coord, t1);
}
diff --git a/cnn_v3/shaders/gbuf_shadow.wgsl b/cnn_v3/shaders/gbuf_shadow.wgsl
index 6c81d66..65ae1fa 100644
--- a/cnn_v3/shaders/gbuf_shadow.wgsl
+++ b/cnn_v3/shaders/gbuf_shadow.wgsl
@@ -121,7 +121,7 @@ fn fs_main(@builtin(position) pos: vec4f) -> @location(0) vec4f {
// Use rasterized surface normal for bias — correct for sphere impostors.
let nm = textureLoad(normal_mat_tex, vec2i(pos.xy), 0);
let nor = oct_decode_unorm(nm.rg);
- let bias_pos = world + nor * 0.05;
+ let bias_pos = world + nor * 0.02;
// March shadow rays toward each light; take the darkest value.
var shadow_val = 1.0;
diff --git a/cnn_v3/shaders/gbuf_view.wgsl b/cnn_v3/shaders/gbuf_view.wgsl
index 3e7d1ff..d53b6f6 100644
--- a/cnn_v3/shaders/gbuf_view.wgsl
+++ b/cnn_v3/shaders/gbuf_view.wgsl
@@ -1,5 +1,5 @@
-// G-buffer channel visualization — 4×5 grid of 20 feature channels.
-// Takes feat_tex0 (rgba32uint, ch 0-7 f16) and feat_tex1 (rgba32uint, ch 8-19 unorm8).
+// G-buffer channel visualization — 4×6 grid of 23 feature channels.
+// Takes feat_tex0 (rgba32uint, ch 0-7 f16) and feat_tex1 (rgba32uint, ch 8-22 unorm8).
// Outputs tiled channel view to a standard rgba8unorm render target.
//
// Channel layout (row×col):
@@ -7,7 +7,8 @@
// Row 1: ch4(nrm.y) ch5(depth) ch6(dzdx) ch7(dzdy)
// Row 2: ch8(matid) ch9(prv.r) ch10(prv.g) ch11(prv.b)
// Row 3: ch12(m1.r) ch13(m1.g) ch14(m1.b) ch15(m2.r)
-// Row 4: ch16(m2.g) ch17(m2.b) ch18(shdw) ch19(trns)
+// Row 4: ch16(m2.g) ch17(m2.b) ch18(trns) ch19(spare)
+// Row 5: ch20(dif.r) ch21(dif.g) ch22(dif.b) ch23(spare)
#include "debug/debug_print"
@@ -29,12 +30,12 @@ fn fs_main(@builtin(position) pos: vec4f) -> @location(0) vec4f {
let uv = pos.xy / u.resolution;
let COLS = 4.0;
- let ROWS = 5.0;
+ let ROWS = 6.0;
let col = u32(uv.x * COLS);
let row = u32(uv.y * ROWS);
let ch = row * 4u + col;
- if (col >= 4u || ch >= 20u) {
+ if (col >= 4u || ch == 19u || ch >= 23u) {
return vec4f(0.05, 0.05, 0.05, 1.0);
}
@@ -71,7 +72,8 @@ fn fs_main(@builtin(position) pos: vec4f) -> @location(0) vec4f {
var bytes: vec4f;
if (comp_idx == 0u) { bytes = unpack4x8unorm(t.x); }
else if (comp_idx == 1u) { bytes = unpack4x8unorm(t.y); }
- else { bytes = unpack4x8unorm(t.z); }
+ else if (comp_idx == 2u) { bytes = unpack4x8unorm(t.z); }
+ else { bytes = unpack4x8unorm(t.w); }
var ba = array<f32, 4>(bytes.x, bytes.y, bytes.z, bytes.w);
v = ba[sub];
}
@@ -120,8 +122,10 @@ fn fs_main(@builtin(position) pos: vec4f) -> @location(0) vec4f {
case 15u: { out = debug_str(out, pos.xy, origin, vec4u(0x6D322E72u, 0u, 0u, 0u), 4u); } // m2.r
case 16u: { out = debug_str(out, pos.xy, origin, vec4u(0x6D322E67u, 0u, 0u, 0u), 4u); } // m2.g
case 17u: { out = debug_str(out, pos.xy, origin, vec4u(0x6D322E62u, 0u, 0u, 0u), 4u); } // m2.b
- case 18u: { out = debug_str(out, pos.xy, origin, vec4u(0x73686477u, 0u, 0u, 0u), 4u); } // shdw
- default: { out = debug_str(out, pos.xy, origin, vec4u(0x74726E73u, 0u, 0u, 0u), 4u); } // trns
+ case 18u: { out = debug_str(out, pos.xy, origin, vec4u(0x74726E73u, 0u, 0u, 0u), 4u); } // trns
+ case 20u: { out = debug_str(out, pos.xy, origin, vec4u(0x6469662Eu, 0x72000000u, 0u, 0u), 5u); } // dif.r
+ case 21u: { out = debug_str(out, pos.xy, origin, vec4u(0x6469662Eu, 0x67000000u, 0u, 0u), 5u); } // dif.g
+ default: { out = debug_str(out, pos.xy, origin, vec4u(0x6469662Eu, 0x62000000u, 0u, 0u), 5u); } // dif.b
}
return out;
}
diff --git a/cnn_v3/src/gbuffer_effect.cc b/cnn_v3/src/gbuffer_effect.cc
index 829c199..25fef4c 100644
--- a/cnn_v3/src/gbuffer_effect.cc
+++ b/cnn_v3/src/gbuffer_effect.cc
@@ -116,7 +116,7 @@ void GBufferEffect::set_scene() {
// 2 large static cubes for shadow debugging.
{
Object3D obj(ObjectType::CUBE);
- obj.position = vec3(-1.0f, 0.0f, 0.0f);
+ obj.position = vec3(1.0f, 0.0f, 0.0f);
obj.scale = vec3(0.6f, 0.6f, 0.6f);
obj.color = vec4(0.9f, 0.5f, 0.3f, 1.0f);
scene_.add_object(obj);
@@ -124,7 +124,7 @@ void GBufferEffect::set_scene() {
}
{
Object3D obj(ObjectType::SPHERE);
- obj.position = vec3(1.0f, 0.0f, 0.0f);
+ obj.position = vec3(-1.0f, 0.0f, 0.0f);
const float r = 0.9f;
obj.scale = vec3(r, r, r);
obj.color = vec4(0.3f, 0.6f, 0.9f, 1.0f);
@@ -133,7 +133,17 @@ void GBufferEffect::set_scene() {
sphere_anims_.push_back({idx, r});
}
- // (sphere removed for shadow debugging)
+ // Second sphere: small, hovering above both objects, pulsating.
+ {
+ Object3D obj(ObjectType::SPHERE);
+ obj.position = vec3(0.0f, 2.2f, 0.0f);
+ const float r = 0.6f;
+ obj.scale = vec3(r, r, r);
+ obj.color = vec4(0.9f, 0.8f, 0.2f, 1.0f);
+ const int idx = (int)scene_.objects.size();
+ scene_.add_object(obj);
+ sphere_anims_.push_back({idx, r});
+ }
// Camera: above and in front of the scene, looking at origin.
camera_.set_look_at(vec3(0.0f, 2.5f, 6.0f),