From ce6e5b99f26e4e7c69a3cacf360bd0d492de928c Mon Sep 17 00:00:00 2001 From: skal Date: Wed, 25 Mar 2026 10:05:42 +0100 Subject: feat(cnn_v3): 3×3 dilated bottleneck + Sobel loss + FiLM warmup + architecture PNG MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Replace 1×1 pointwise bottleneck with Conv(8→8, 3×3, dilation=2): effective RF grows from ~13px to ~29px at ¼res (~+1 KB weights) - Add Sobel edge loss in training (--edge-loss-weight, default 0.1) - Add FiLM 2-phase training: freeze MLP for warmup epochs then unfreeze at lr×0.1 (--film-warmup-epochs, default 50) - Update weight layout: BN 72→584 f16, total 1964→2476 f16 (4952 B) - Cascade offsets in C++ effect, JS tool, export/gen_test_vectors scripts - Regenerate test_vectors.h (1238 u32); parity max_err=9.77e-04 - Generate dark-theme U-Net+FiLM architecture PNG (gen_architecture_png.py) - Replace ASCII art in CNN_V3.md and HOW_TO_CNN.md with PNG embed handoff(Gemini): bottleneck dilation + Sobel loss + FiLM warmup landed. Next: run first real training pass (see cnn_v3/docs/HOWTO.md §3). --- cnn_v3/tools/shaders.js | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'cnn_v3/tools') diff --git a/cnn_v3/tools/shaders.js b/cnn_v3/tools/shaders.js index 6c49864..36f53c8 100644 --- a/cnn_v3/tools/shaders.js +++ b/cnn_v3/tools/shaders.js @@ -1,9 +1,10 @@ 'use strict'; // CNN v3 WGSL shaders — matches cnn_v3/shaders/*.wgsl exactly. -// Weight offsets (f16 index): enc0=0, enc1=724, bn=1020, dec1=1092, dec0=1672, total=1964 +// Weight offsets (f16 index): enc0=0, enc1=724, bn=1020, dec1=1604, dec0=2184, total=2476 +// BN is now Conv(8→8, 3×3, dilation=2): 8*8*9+8=584 weights (was 72 for 1×1) -const ENC0_OFF=0, ENC1_OFF=724, BN_OFF=1020, DEC1_OFF=1092, DEC0_OFF=1672; -const TOTAL_F16=1964, TOTAL_U32=982; +const ENC0_OFF=0, ENC1_OFF=724, BN_OFF=1020, DEC1_OFF=1604, DEC0_OFF=2184; +const TOTAL_F16=2476, TOTAL_U32=1238; // Inlined helpers — prepended to shaders that need them. const H = ` @@ -108,7 +109,7 @@ fn main(@builtin(global_invocation_id) id:vec3u){ pack2x16float(vec2f(o[4],o[5])),pack2x16float(vec2f(o[6],o[7])))); }`; -// Bottleneck: AvgPool(enc1) + Conv(8→8, 1×1) + ReLU → rgba32uint quarter-res (no FiLM) +// Bottleneck: AvgPool(enc1) + Conv(8→8, 3×3, dilation=2) + ReLU → rgba32uint quarter-res (no FiLM) // Params (16 bytes): wo u32 _pad×3 const BN_SHADER=H+` struct P{wo:u32,_a:u32,_b:u32,_c:u32} @@ -129,10 +130,13 @@ fn avg(qc:vec2i,hd:vec2i)->array{ fn main(@builtin(global_invocation_id) id:vec3u){ let hd=vec2i(textureDimensions(e1)); let qd=hd/2; let c=vec2i(id.xy); if(c.x>=qd.x||c.y>=qd.y){return;} - let ft=avg(c,hd); var o:array; + var o:array; for(var oc:u32=0u;oc<8u;oc++){ - var s=get_w(p.wo,64u+oc); - for(var i:u32=0u;i<8u;i++){s+=get_w(p.wo,oc*8u+i)*ft[i];} + var s=get_w(p.wo,576u+oc); + for(var ky:i32=-1;ky<=1;ky++){for(var kx:i32=-1;kx<=1;kx++){ + let ft=avg(c+vec2i(kx,ky)*2,hd); let ki=u32(ky+1)*3u+u32(kx+1); + for(var i:u32=0u;i<8u;i++){s+=get_w(p.wo,oc*72u+i*9u+ki)*ft[i];} + }} o[oc]=max(0.,s); } textureStore(out,c,vec4u(pack2x16float(vec2f(o[0],o[1])),pack2x16float(vec2f(o[2],o[3])), -- cgit v1.2.3