From d181e145296a04384850e09be03b697458cd8439 Mon Sep 17 00:00:00 2001 From: skal Date: Sun, 22 Mar 2026 13:10:47 +0100 Subject: fix(cnn_v3): fix texture format mismatches in cnn_v3_test sequence - seq_compiler: add gbuf_albedo/gbuf_rgba32uint to NODE_TYPES - timeline: declare gbuf_feat0/feat1 as gbuf_rgba32uint, route CNNv3Effect output through cnn_v3_out (gbuf_albedo) + Passthrough to sink (dec0 can't write directly to Rgba8Unorm sink) - cnn_v3_effect: fix update_bind_groups using .set() instead of .replace() causing FATAL assert on second frame - TODO: add CNN v3 "2D mode" (G-buffer-free) future task handoff(Gemini): CNNv3Effect now runs without crashes at --seek 48 --- TODO.md | 17 +++++++++++++++++ cnn_v3/src/cnn_v3_effect.cc | 10 +++++----- tools/seq_compiler.py | 2 ++ workspaces/main/timeline.seq | 6 +++++- 4 files changed, 29 insertions(+), 6 deletions(-) diff --git a/TODO.md b/TODO.md index 48518f2..4f38740 100644 --- a/TODO.md +++ b/TODO.md @@ -84,6 +84,23 @@ PyTorch / HTML WebGPU / C++ WebGPU. - โœ… `export_cnn_v3_weights.py` โ€” convert trained `.pth` โ†’ `.bin` (f16) - See `cnn_v3/docs/HOWTO.md` ยง3 for training commands +## Future: CNN v3 "2D Mode" (G-buffer-free) + +Allow `CNNv3Effect` to run on a plain screen buffer / photo without a real G-buffer. +Fake the missing feature vectors (normals, depth, material IDs, shadow, transp) from +the RGB input alone: +- normals: approximate from local luminance gradient (Sobel) +- depth: constant (e.g. 0.5) or estimated from a simple heuristic +- material IDs / shadow / transp: neutral defaults (e.g. 0) + +This would let the effect be applied to any rendered frame (post-NTSC, post-Scratch, etc.) +without requiring a 3D G-buffer pass upstream, and enable training/inference on photos. + +Implementation sketch: +- New `CNNv3Effect2D` subclass (or a mode flag) that synthesizes `feat_tex0`/`feat_tex1` + internally from a single `rgba8unorm` input, then runs the same 5-pass U-Net. +- Separate `gbuf_pack_2d.wgsl` compute shader that fills feat0/feat1 from a photo buffer. + ## Future: CNN v2 8-bit Quantization Reduce weights from f16 (~3.2 KB) to i8 (~1.6 KB). diff --git a/cnn_v3/src/cnn_v3_effect.cc b/cnn_v3/src/cnn_v3_effect.cc index 4aa2c25..bfbb17b 100644 --- a/cnn_v3/src/cnn_v3_effect.cc +++ b/cnn_v3/src/cnn_v3_effect.cc @@ -445,7 +445,7 @@ void CNNv3Effect::update_bind_groups(NodeRegistry& nodes) { bg_buf(e[2], 2, wb, kWeightsBufBytes); bg_buf(e[3], 3, enc0_params_buf_.get().buffer, sizeof(CnnV3Params4ch)); bg_tex(e[4], 4, enc0_view); - enc0_bg_.set(make_bg(enc0_pipeline_.get(), e, 5)); + enc0_bg_.replace(make_bg(enc0_pipeline_.get(), e, 5)); } // enc1: enc0_tex(B0), weights(B1), params(B2), enc1_out(B3) @@ -455,7 +455,7 @@ void CNNv3Effect::update_bind_groups(NodeRegistry& nodes) { bg_buf(e[1], 1, wb, kWeightsBufBytes); bg_buf(e[2], 2, enc1_params_buf_.get().buffer, sizeof(CnnV3ParamsEnc1)); bg_tex(e[3], 3, enc1_view); - enc1_bg_.set(make_bg(enc1_pipeline_.get(), e, 4)); + enc1_bg_.replace(make_bg(enc1_pipeline_.get(), e, 4)); } // bottleneck: enc1_tex(B0), weights(B1), params(B2), bn_out(B3) @@ -465,7 +465,7 @@ void CNNv3Effect::update_bind_groups(NodeRegistry& nodes) { bg_buf(e[1], 1, wb, kWeightsBufBytes); bg_buf(e[2], 2, bn_params_buf_.get().buffer, sizeof(CnnV3ParamsBn)); bg_tex(e[3], 3, bn_view); - bn_bg_.set(make_bg(bn_pipeline_.get(), e, 4)); + bn_bg_.replace(make_bg(bn_pipeline_.get(), e, 4)); } // dec1: bn_tex(B0), enc1_tex(B1), weights(B2), params(B3), dec1_out(B4) @@ -476,7 +476,7 @@ void CNNv3Effect::update_bind_groups(NodeRegistry& nodes) { bg_buf(e[2], 2, wb, kWeightsBufBytes); bg_buf(e[3], 3, dec1_params_buf_.get().buffer, sizeof(CnnV3Params4ch)); bg_tex(e[4], 4, dec1_view); - dec1_bg_.set(make_bg(dec1_pipeline_.get(), e, 5)); + dec1_bg_.replace(make_bg(dec1_pipeline_.get(), e, 5)); } // dec0: dec1_tex(B0), enc0_tex(B1), weights(B2), params(B3), output(B4) @@ -487,6 +487,6 @@ void CNNv3Effect::update_bind_groups(NodeRegistry& nodes) { bg_buf(e[2], 2, wb, kWeightsBufBytes); bg_buf(e[3], 3, dec0_params_buf_.get().buffer, sizeof(CnnV3Params4ch)); bg_tex(e[4], 4, out_view); - dec0_bg_.set(make_bg(dec0_pipeline_.get(), e, 5)); + dec0_bg_.replace(make_bg(dec0_pipeline_.get(), e, 5)); } } diff --git a/tools/seq_compiler.py b/tools/seq_compiler.py index fbd5c0d..09188a5 100755 --- a/tools/seq_compiler.py +++ b/tools/seq_compiler.py @@ -18,6 +18,8 @@ NODE_TYPES = { 'f16x8': 'NodeType::F16X8', 'depth24': 'NodeType::DEPTH24', 'compute_f32': 'NodeType::COMPUTE_F32', + 'gbuf_albedo': 'NodeType::GBUF_ALBEDO', + 'gbuf_rgba32uint': 'NodeType::GBUF_RGBA32UINT', } class NodeDecl: diff --git a/workspaces/main/timeline.seq b/workspaces/main/timeline.seq index 1a9cad3..2176275 100644 --- a/workspaces/main/timeline.seq +++ b/workspaces/main/timeline.seq @@ -44,5 +44,9 @@ SEQUENCE 40.00 0 "ntsc" EFFECT + Ntsc temp1 -> sink 0.00 8.00 SEQUENCE 48.00 0 "cnn_v3_test" + NODE gbuf_feat0 gbuf_rgba32uint + NODE gbuf_feat1 gbuf_rgba32uint + NODE cnn_v3_out gbuf_albedo EFFECT + GBufferEffect source -> gbuf_feat0 gbuf_feat1 0.00 8.00 - EFFECT + CNNv3Effect gbuf_feat0 gbuf_feat1 -> sink 0.00 8.00 + EFFECT + CNNv3Effect gbuf_feat0 gbuf_feat1 -> cnn_v3_out 0.00 8.00 + EFFECT + Passthrough cnn_v3_out -> sink 0.00 8.00 -- cgit v1.2.3