CNN v2: Refactor to uniform 12D→4D architecture

**Architecture changes:** - Static features (8D): p0-p3 (parametric) + uv_x, uv_y, sin(10×uv_x), bias - Input RGBD (4D): fed separately to all layers - All layers: uniform 12D→4D (4 prev/input + 8 static → 4 output) - Bias integrated in static features (bias=False in PyTorch) **Weight calculations:** - 3 layers × (12 × 3×3 × 4) = 1296 weights - f16: 2.6 KB (vs old variable arch: ~6.4 KB) **Updated files:** *Training (Python):* - train_cnn_v2.py: Uniform model, takes input_rgbd + static_features - export_cnn_v2_weights.py: Binary export for storage buffers - export_cnn_v2_shader.py: Per-layer shader export (debugging) *Shaders (WGSL):* - cnn_v2_static.wgsl: p0-p3 parametric features (mips/gradients) - cnn_v2_compute.wgsl: 12D input, 4D output, vec4 packing *Tools:* - HTML tool (cnn_v2_test): Updated for 12D→4D, layer visualization *Docs:* - CNN_V2.md: Updated architecture, training, validation sections - HOWTO.md: Reference HTML tool for validation *Removed:* - validate_cnn_v2.sh: Obsolete (used CNN v1 tool) All code consistent with bias=False (bias in static features as 1.0). handoff(Claude): CNN v2 architecture finalized and documented
author: skal <pascal.massimino@gmail.com> 2026-02-13 12:32:36 +0100
committer: skal <pascal.massimino@gmail.com> 2026-02-13 12:32:36 +0100
commit: 561d1dc446db7d1d3e02b92b43abedf1a5017850 (patch)
tree: ef9302dc1f9b6b9f8a12225580f2a3b07602656b /tools
parent: c27b34279c0d1c2a8f1dbceb0e154b585b5c6916 (diff)
1 files changed, 33 insertions, 32 deletions
diff --git a/tools/cnn_v2_test/index.html b/tools/cnn_v2_test/index.html
index 9ce3d8c..199deea 100644
--- a/tools/cnn_v2_test/index.html
+++ b/tools/cnn_v2_test/index.html
@@ -3,6 +3,12 @@
 <!--
   CNN v2 Testing Tool - WebGPU-based inference validator
 
+  Architecture:
+  - Static features (8D): p0-p3 (parametric), uv_x, uv_y, sin(10*uv_x), bias
+  - Layer 0: input RGBD (4D) + static (8D) = 12D → 4 channels
+  - Layer 1+: previous (4D) + static (8D) = 12D → 4 channels
+  - All layers: uniform 12D input, 4D output (ping-pong buffer)
+
   Features:
   - Side panel: .bin metadata display, weight statistics per layer
   - Layer inspection: 4-channel grayscale split, intermediate layer visualization
@@ -318,21 +324,19 @@ fn unpack_static_features(coord: vec2<i32>) -> array<f32, 8> {
   return array<f32, 8>(v0.x, v0.y, v1.x, v1.y, v2.x, v2.y, v3.x, v3.y);
 }
 
-fn unpack_layer_channels(coord: vec2<i32>) -> array<f32, 8> {
+fn unpack_layer_channels(coord: vec2<i32>) -> vec4<f32> {
   let packed = textureLoad(layer_input, coord, 0);
   let v0 = unpack2x16float(packed.x);
   let v1 = unpack2x16float(packed.y);
-  let v2 = unpack2x16float(packed.z);
-  let v3 = unpack2x16float(packed.w);
-  return array<f32, 8>(v0.x, v0.y, v1.x, v1.y, v2.x, v2.y, v3.x, v3.y);
+  return vec4<f32>(v0.x, v0.y, v1.x, v1.y);
 }
 
-fn pack_channels(values: array<f32, 8>) -> vec4<u32> {
+fn pack_channels(values: vec4<f32>) -> vec4<u32> {
   return vec4<u32>(
-    pack2x16float(vec2<f32>(values[0], values[1])),
-    pack2x16float(vec2<f32>(values[2], values[3])),
-    pack2x16float(vec2<f32>(values[4], values[5])),
-    pack2x16float(vec2<f32>(values[6], values[7]))
+    pack2x16float(vec2<f32>(values.x, values.y)),
+    pack2x16float(vec2<f32>(values.z, values.w)),
+    0u,
+    0u
   );
 }
 
@@ -350,16 +354,16 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
   if (coord.x >= i32(dims.x) || coord.y >= i32(dims.y)) { return; }
 
   let kernel_size = params.kernel_size;
-  let in_channels = params.in_channels;
-  let out_channels = params.out_channels;
+  let in_channels = params.in_channels;  // Always 12 (4 prev + 8 static)
+  let out_channels = params.out_channels;  // Always 4
   let weight_offset = params.weight_offset;
   let is_output = params.is_output_layer != 0u;
   let kernel_radius = i32(kernel_size / 2u);
 
   let static_feat = unpack_static_features(coord);
 
-  var output: array<f32, 8>;
-  for (var c: u32 = 0u; c < out_channels && c < 8u; c++) {
+  var output: vec4<f32> = vec4<f32>(0.0);
+  for (var c: u32 = 0u; c < 4u; c++) {
     var sum: f32 = 0.0;
     for (var ky: i32 = -kernel_radius; ky <= kernel_radius; ky++) {
       for (var kx: i32 = -kernel_radius; kx <= kernel_radius; kx++) {
@@ -375,19 +379,20 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
         let kx_idx = u32(kx + kernel_radius);
         let spatial_idx = ky_idx * kernel_size + kx_idx;
 
-        for (var i: u32 = 0u; i < 8u; i++) {
+        // Previous layer channels (4D)
+        for (var i: u32 = 0u; i < 4u; i++) {
           let w_idx = weight_offset +
                      c * in_channels * kernel_size * kernel_size +
                      i * kernel_size * kernel_size + spatial_idx;
-          sum += get_weight(w_idx) * static_local[i];
+          sum += get_weight(w_idx) * layer_local[i];
         }
 
-        let prev_channels = in_channels - 8u;
-        for (var i: u32 = 0u; i < prev_channels && i < 8u; i++) {
+        // Static features (8D)
+        for (var i: u32 = 0u; i < 8u; i++) {
           let w_idx = weight_offset +
                      c * in_channels * kernel_size * kernel_size +
-                     (8u + i) * kernel_size * kernel_size + spatial_idx;
-          sum += get_weight(w_idx) * layer_local[i];
+                     (4u + i) * kernel_size * kernel_size + spatial_idx;
+          sum += get_weight(w_idx) * static_local[i];
         }
       }
     }
@@ -399,17 +404,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
     }
   }
 
-  for (var c: u32 = out_channels; c < 8u; c++) {
-    output[c] = 0.0;
-  }
-
   if (is_output) {
     let original = textureLoad(original_input, coord, 0).rgb;
-    let result_rgb = vec3<f32>(output[0], output[1], output[2]);
+    let result_rgb = vec3<f32>(output.x, output.y, output.z);
     let blended = mix(original, result_rgb, params.blend_amount);
-    output[0] = blended.r;
-    output[1] = blended.g;
-    output[2] = blended.b;
+    output.x = blended.r;
+    output.y = blended.g;
+    output.z = blended.b;
   }
 
   textureStore(output_tex, coord, pack_channels(output));
@@ -1013,7 +1014,7 @@ class CNNTester {
       </div>
     `;
 
-    html += '<div style="font-size: 9px; color: #808080; margin-bottom: 8px; padding-bottom: 8px; border-bottom: 1px solid #404040;">Static features (7D input) + ${this.weights.layers.length} CNN layers. Showing first 4 of 8 channels.</div>';
+    html += `<div style="font-size: 9px; color: #808080; margin-bottom: 8px; padding-bottom: 8px; border-bottom: 1px solid #404040;">Static features (8D: p0-p3 + spatial) + ${this.weights.layers.length} CNN layers. All layers: 12D→4D.</div>`;
 
     html += '<div class="layer-buttons">';
     for (let i = 0; i < this.layerOutputs.length; i++) {
@@ -1116,10 +1117,10 @@ class CNNTester {
     this.log(`Visualizing ${layerName} activations (${width}×${height})`);
 
     // Update channel labels based on layer type
-    // Static features: 8 channels total (R,G,B,D,UV_X,UV_Y,sin,bias), showing first 4
-    // CNN layers: Up to 8 channels per layer, showing first 4
+    // Static features: 8 channels (p0,p1,p2,p3,uv_x,uv_y,sin10_x,bias)
+    // CNN layers: 4 channels per layer (uniform)
     const channelLabels = layerIdx === 0
-      ? ['Ch0 (R)', 'Ch1 (G)', 'Ch2 (B)', 'Ch3 (D)']
+      ? ['Ch0 (p0)', 'Ch1 (p1)', 'Ch2 (p2)', 'Ch3 (p3)']
       : ['Ch0', 'Ch1', 'Ch2', 'Ch3'];
 
     for (let c = 0; c < 4; c++) {
@@ -1169,7 +1170,7 @@ class CNNTester {
         continue;
       }
 
-      const vizScale = layerIdx === 0 ? 1.0 : 0.2;  // Static: 1.0, CNN layers: 0.2 (assumes ~5 max)
+      const vizScale = layerIdx === 0 ? 1.0 : 0.5;  // Static: 1.0, CNN layers: 0.5 (4 channels [0,1])
       const paramsBuffer = this.device.createBuffer({
         size: 8,
         usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
author	skal <pascal.massimino@gmail.com>	2026-02-13 12:32:36 +0100
committer	skal <pascal.massimino@gmail.com>	2026-02-13 12:32:36 +0100
commit	561d1dc446db7d1d3e02b92b43abedf1a5017850 (patch)
tree	ef9302dc1f9b6b9f8a12225580f2a3b07602656b /tools
parent	c27b34279c0d1c2a8f1dbceb0e154b585b5c6916 (diff)