summaryrefslogtreecommitdiff
path: root/cnn_v3/tools
diff options
context:
space:
mode:
authorskal <pascal.massimino@gmail.com>2026-03-25 10:05:42 +0100
committerskal <pascal.massimino@gmail.com>2026-03-25 10:05:42 +0100
commitce6e5b99f26e4e7c69a3cacf360bd0d492de928c (patch)
treea8d64b33a7ea1109b6b7e1043ced946cac416756 /cnn_v3/tools
parent8b4d7a49f038d7e849e6764dcc3abd1e1be01061 (diff)
feat(cnn_v3): 3×3 dilated bottleneck + Sobel loss + FiLM warmup + architecture PNG
- Replace 1×1 pointwise bottleneck with Conv(8→8, 3×3, dilation=2): effective RF grows from ~13px to ~29px at ¼res (~+1 KB weights) - Add Sobel edge loss in training (--edge-loss-weight, default 0.1) - Add FiLM 2-phase training: freeze MLP for warmup epochs then unfreeze at lr×0.1 (--film-warmup-epochs, default 50) - Update weight layout: BN 72→584 f16, total 1964→2476 f16 (4952 B) - Cascade offsets in C++ effect, JS tool, export/gen_test_vectors scripts - Regenerate test_vectors.h (1238 u32); parity max_err=9.77e-04 - Generate dark-theme U-Net+FiLM architecture PNG (gen_architecture_png.py) - Replace ASCII art in CNN_V3.md and HOW_TO_CNN.md with PNG embed handoff(Gemini): bottleneck dilation + Sobel loss + FiLM warmup landed. Next: run first real training pass (see cnn_v3/docs/HOWTO.md §3).
Diffstat (limited to 'cnn_v3/tools')
-rw-r--r--cnn_v3/tools/shaders.js18
1 files changed, 11 insertions, 7 deletions
diff --git a/cnn_v3/tools/shaders.js b/cnn_v3/tools/shaders.js
index 6c49864..36f53c8 100644
--- a/cnn_v3/tools/shaders.js
+++ b/cnn_v3/tools/shaders.js
@@ -1,9 +1,10 @@
'use strict';
// CNN v3 WGSL shaders — matches cnn_v3/shaders/*.wgsl exactly.
-// Weight offsets (f16 index): enc0=0, enc1=724, bn=1020, dec1=1092, dec0=1672, total=1964
+// Weight offsets (f16 index): enc0=0, enc1=724, bn=1020, dec1=1604, dec0=2184, total=2476
+// BN is now Conv(8→8, 3×3, dilation=2): 8*8*9+8=584 weights (was 72 for 1×1)
-const ENC0_OFF=0, ENC1_OFF=724, BN_OFF=1020, DEC1_OFF=1092, DEC0_OFF=1672;
-const TOTAL_F16=1964, TOTAL_U32=982;
+const ENC0_OFF=0, ENC1_OFF=724, BN_OFF=1020, DEC1_OFF=1604, DEC0_OFF=2184;
+const TOTAL_F16=2476, TOTAL_U32=1238;
// Inlined helpers — prepended to shaders that need them.
const H = `
@@ -108,7 +109,7 @@ fn main(@builtin(global_invocation_id) id:vec3u){
pack2x16float(vec2f(o[4],o[5])),pack2x16float(vec2f(o[6],o[7]))));
}`;
-// Bottleneck: AvgPool(enc1) + Conv(8→8, 1×1) + ReLU → rgba32uint quarter-res (no FiLM)
+// Bottleneck: AvgPool(enc1) + Conv(8→8, 3×3, dilation=2) + ReLU → rgba32uint quarter-res (no FiLM)
// Params (16 bytes): wo u32 _pad×3
const BN_SHADER=H+`
struct P{wo:u32,_a:u32,_b:u32,_c:u32}
@@ -129,10 +130,13 @@ fn avg(qc:vec2i,hd:vec2i)->array<f32,8>{
fn main(@builtin(global_invocation_id) id:vec3u){
let hd=vec2i(textureDimensions(e1)); let qd=hd/2; let c=vec2i(id.xy);
if(c.x>=qd.x||c.y>=qd.y){return;}
- let ft=avg(c,hd); var o:array<f32,8>;
+ var o:array<f32,8>;
for(var oc:u32=0u;oc<8u;oc++){
- var s=get_w(p.wo,64u+oc);
- for(var i:u32=0u;i<8u;i++){s+=get_w(p.wo,oc*8u+i)*ft[i];}
+ var s=get_w(p.wo,576u+oc);
+ for(var ky:i32=-1;ky<=1;ky++){for(var kx:i32=-1;kx<=1;kx++){
+ let ft=avg(c+vec2i(kx,ky)*2,hd); let ki=u32(ky+1)*3u+u32(kx+1);
+ for(var i:u32=0u;i<8u;i++){s+=get_w(p.wo,oc*72u+i*9u+ki)*ft[i];}
+ }}
o[oc]=max(0.,s);
}
textureStore(out,c,vec4u(pack2x16float(vec2f(o[0],o[1])),pack2x16float(vec2f(o[2],o[3])),