CNN v2: parametric static features - Phases 1-4

Infrastructure for enhanced CNN post-processing with 7D feature input. Phase 1: Shaders - Static features compute (RGBD + UV + sin10_x + bias → 8×f16) - Layer template (convolution skeleton, packing/unpacking) - 3 mip level support for multi-scale features Phase 2: C++ Effect - CNNv2Effect class (multi-pass architecture) - Texture management (static features, layer buffers) - Build integration (CMakeLists, assets, tests) Phase 3: Training Pipeline - train_cnn_v2.py: PyTorch model with static feature concatenation - export_cnn_v2_shader.py: f32→f16 quantization, WGSL generation - Configurable architecture (kernels, channels) Phase 4: Validation - validate_cnn_v2.sh: End-to-end pipeline - Checkpoint → shaders → build → test images Tests: 36/36 passing Next: Complete render pipeline implementation (bind groups, multi-pass) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
author: skal <pascal.massimino@gmail.com> 2026-02-12 11:34:50 +0100
committer: skal <pascal.massimino@gmail.com> 2026-02-12 11:34:50 +0100
commit: 91d42f2d057e077c267d6775cc109a801aa315c0 (patch)
tree: 18cd67c9ce11f24149e6dafa65d176ca7143fcbb /workspaces/main/shaders
parent: 301db1f29137d3db7828e7a0103986cc845b7672 (diff)
2 files changed, 115 insertions, 0 deletions
diff --git a/workspaces/main/shaders/cnn_v2_layer_template.wgsl b/workspaces/main/shaders/cnn_v2_layer_template.wgsl
new file mode 100644
index 0000000..1bf6819
--- /dev/null
+++ b/workspaces/main/shaders/cnn_v2_layer_template.wgsl
@@ -0,0 +1,68 @@
+// CNN v2 Layer Template (placeholder for generated shaders)
+// This file documents the structure - actual layers generated by export script
+
+// Example: Layer 0 (1×1 kernel, 8→16 channels)
+// const KERNEL_SIZE: u32 = 1u;
+// const IN_CHANNELS: u32 = 8u;  // 7 features + bias
+// const OUT_CHANNELS: u32 = 16u;
+// const weights: array<f32, 128> = array(...);
+
+@group(0) @binding(0) var static_features: texture_2d<u32>;
+@group(0) @binding(1) var layer_input: texture_2d<u32>;  // Previous layer output
+@group(0) @binding(2) var output_tex: texture_storage_2d<rgba32uint, write>;
+
+fn unpack_static_features(coord: vec2<i32>) -> array<f32, 8> {
+  let packed = textureLoad(static_features, coord, 0);
+  let v0 = unpack2x16float(packed.x);
+  let v1 = unpack2x16float(packed.y);
+  let v2 = unpack2x16float(packed.z);
+  let v3 = unpack2x16float(packed.w);
+  return array<f32, 8>(v0.x, v0.y, v1.x, v1.y, v2.x, v2.y, v3.x, v3.y);
+}
+
+fn unpack_layer_channels(coord: vec2<i32>) -> array<f32, 8> {
+  let packed = textureLoad(layer_input, coord, 0);
+  let v0 = unpack2x16float(packed.x);
+  let v1 = unpack2x16float(packed.y);
+  let v2 = unpack2x16float(packed.z);
+  let v3 = unpack2x16float(packed.w);
+  return array<f32, 8>(v0.x, v0.y, v1.x, v1.y, v2.x, v2.y, v3.x, v3.y);
+}
+
+fn pack_channels(values: array<f32, 8>) -> vec4<u32> {
+  return vec4<u32>(
+    pack2x16float(vec2<f32>(values[0], values[1])),
+    pack2x16float(vec2<f32>(values[2], values[3])),
+    pack2x16float(vec2<f32>(values[4], values[5])),
+    pack2x16float(vec2<f32>(values[6], values[7]))
+  );
+}
+
+@compute @workgroup_size(8, 8)
+fn main(@builtin(global_invocation_id) id: vec3<u32>) {
+  let coord = vec2<i32>(id.xy);
+  let dims = textureDimensions(static_features);
+
+  if (coord.x >= i32(dims.x) || coord.y >= i32(dims.y)) {
+    return;
+  }
+
+  // Load static features (always available)
+  let static_feat = unpack_static_features(coord);
+
+  // Convolution loop (example for generated code)
+  // var output: array<f32, OUT_CHANNELS>;
+  // for (var c: u32 = 0u; c < OUT_CHANNELS; c++) {
+  //   var sum: f32 = 0.0;
+  //   for (var ky: i32 = -radius; ky <= radius; ky++) {
+  //     for (var kx: i32 = -radius; kx <= radius; kx++) {
+  //       let sample_coord = coord + vec2<i32>(kx, ky);
+  //       // Load static + prev layer, multiply weights, accumulate
+  //     }
+  //   }
+  //   output[c] = max(0.0, sum);  // ReLU
+  // }
+
+  // Placeholder output
+  textureStore(output_tex, coord, vec4<u32>(0u));
+}
diff --git a/workspaces/main/shaders/cnn_v2_static.wgsl b/workspaces/main/shaders/cnn_v2_static.wgsl
new file mode 100644
index 0000000..c3a2de7
--- /dev/null
+++ b/workspaces/main/shaders/cnn_v2_static.wgsl
@@ -0,0 +1,47 @@
+// CNN v2 Static Features Compute Shader
+// Generates 7D features + bias: [R, G, B, D, uv.x, uv.y, sin10_x, 1.0]
+
+@group(0) @binding(0) var input_tex: texture_2d<f32>;
+@group(0) @binding(1) var input_tex_mip1: texture_2d<f32>;
+@group(0) @binding(2) var input_tex_mip2: texture_2d<f32>;
+@group(0) @binding(3) var depth_tex: texture_2d<f32>;
+@group(0) @binding(4) var output_tex: texture_storage_2d<rgba32uint, write>;
+
+@compute @workgroup_size(8, 8)
+fn main(@builtin(global_invocation_id) id: vec3<u32>) {
+  let coord = vec2<i32>(id.xy);
+  let dims = textureDimensions(input_tex);
+
+  if (coord.x >= i32(dims.x) || coord.y >= i32(dims.y)) {
+    return;
+  }
+
+  // Sample RGBA from mip 0
+  let rgba = textureLoad(input_tex, coord, 0);
+  let r = rgba.r;
+  let g = rgba.g;
+  let b = rgba.b;
+
+  // Sample depth
+  let d = textureLoad(depth_tex, coord, 0).r;
+
+  // UV coordinates (normalized [0,1])
+  let uv_x = f32(coord.x) / f32(dims.x);
+  let uv_y = f32(coord.y) / f32(dims.y);
+
+  // Multi-frequency position encoding
+  let sin10_x = sin(10.0 * uv_x);
+
+  // Bias dimension (always 1.0)
+  let bias = 1.0;
+
+  // Pack 8×f16 into 4×u32 (rgba32uint)
+  let packed = vec4<u32>(
+    pack2x16float(vec2<f32>(r, g)),
+    pack2x16float(vec2<f32>(b, d)),
+    pack2x16float(vec2<f32>(uv_x, uv_y)),
+    pack2x16float(vec2<f32>(sin10_x, bias))
+  );
+
+  textureStore(output_tex, coord, packed);
+}
author	skal <pascal.massimino@gmail.com>	2026-02-12 11:34:50 +0100
committer	skal <pascal.massimino@gmail.com>	2026-02-12 11:34:50 +0100
commit	91d42f2d057e077c267d6775cc109a801aa315c0 (patch)
tree	18cd67c9ce11f24149e6dafa65d176ca7143fcbb /workspaces/main/shaders
parent	301db1f29137d3db7828e7a0103986cc845b7672 (diff)