From 673a24215b2670007317060325256059d1448f3b Mon Sep 17 00:00:00 2001 From: skal Date: Sat, 21 Mar 2026 09:51:58 +0100 Subject: feat(cnn_v3): Phase 5 complete — parity validation passing (36/36 tests) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add test_cnn_v3_parity.cc: zero_weights + random_weights tests - Add gen_test_vectors.py: PyTorch reference implementation for enc0/enc1/bn/dec1/dec0 - Add test_vectors.h: generated C header with enc0, dec1, output expected values - Fix declare_nodes(): intermediate textures at fractional resolutions (W/2, W/4) using new NodeRegistry::default_width()/default_height() getters - Add layer-by-layer readback (enc0, dec1) for regression coverage - Final parity: enc0 max_err=1.95e-3, dec1 max_err=1.95e-3, out max_err=4.88e-4 handoff(Claude): CNN v3 parity done. Next: train_cnn_v3.py (FiLM MLP training). --- cnn_v3/src/cnn_v3_effect.cc | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'cnn_v3/src/cnn_v3_effect.cc') diff --git a/cnn_v3/src/cnn_v3_effect.cc b/cnn_v3/src/cnn_v3_effect.cc index d13799c..92178f7 100644 --- a/cnn_v3/src/cnn_v3_effect.cc +++ b/cnn_v3/src/cnn_v3_effect.cc @@ -187,14 +187,17 @@ CNNv3Effect::CNNv3Effect(const GpuContext& ctx, // --------------------------------------------------------------------------- void CNNv3Effect::declare_nodes(NodeRegistry& registry) { + const int W = registry.default_width(); + const int H = registry.default_height(); + // enc0_tex: rgba16float full-res - registry.declare_node(node_enc0_, NodeType::GBUF_ALBEDO, -1, -1); - // enc1_tex: rgba32uint half-res - registry.declare_node(node_enc1_, NodeType::GBUF_RGBA32UINT, -1, -1); - // bottleneck_tex: rgba32uint quarter-res — declare at 1/4 resolution - registry.declare_node(node_bottleneck_, NodeType::GBUF_RGBA32UINT, -1, -1); + registry.declare_node(node_enc0_, NodeType::GBUF_ALBEDO, W, H); + // enc1_tex: rgba32uint half-res — shaders use textureDimensions() for bounds + registry.declare_node(node_enc1_, NodeType::GBUF_RGBA32UINT, W / 2, H / 2); + // bottleneck_tex: rgba32uint quarter-res + registry.declare_node(node_bottleneck_, NodeType::GBUF_RGBA32UINT, W / 4, H / 4); // dec1_tex: rgba16float half-res - registry.declare_node(node_dec1_, NodeType::GBUF_ALBEDO, -1, -1); + registry.declare_node(node_dec1_, NodeType::GBUF_ALBEDO, W / 2, H / 2); // output_tex: rgba16float full-res (the declared output_nodes_[0]) } @@ -202,6 +205,11 @@ void CNNv3Effect::declare_nodes(NodeRegistry& registry) { // set_film_params — simple linear mapping, no MLP yet // --------------------------------------------------------------------------- +void CNNv3Effect::upload_weights(WGPUQueue queue, const void* data, + uint32_t size_bytes) { + wgpuQueueWriteBuffer(queue, weights_buf_.buffer, 0, data, size_bytes); +} + void CNNv3Effect::set_film_params(const CNNv3FiLMParams& fp) { // Identity + audio/beat modulation. // Replace with FiLM MLP output once training is done. -- cgit v1.2.3