summaryrefslogtreecommitdiff
path: root/workspaces
diff options
context:
space:
mode:
authorskal <pascal.massimino@gmail.com>2026-02-13 12:32:36 +0100
committerskal <pascal.massimino@gmail.com>2026-02-13 12:32:36 +0100
commit561d1dc446db7d1d3e02b92b43abedf1a5017850 (patch)
treeef9302dc1f9b6b9f8a12225580f2a3b07602656b /workspaces
parentc27b34279c0d1c2a8f1dbceb0e154b585b5c6916 (diff)
CNN v2: Refactor to uniform 12D→4D architecture
**Architecture changes:** - Static features (8D): p0-p3 (parametric) + uv_x, uv_y, sin(10×uv_x), bias - Input RGBD (4D): fed separately to all layers - All layers: uniform 12D→4D (4 prev/input + 8 static → 4 output) - Bias integrated in static features (bias=False in PyTorch) **Weight calculations:** - 3 layers × (12 × 3×3 × 4) = 1296 weights - f16: 2.6 KB (vs old variable arch: ~6.4 KB) **Updated files:** *Training (Python):* - train_cnn_v2.py: Uniform model, takes input_rgbd + static_features - export_cnn_v2_weights.py: Binary export for storage buffers - export_cnn_v2_shader.py: Per-layer shader export (debugging) *Shaders (WGSL):* - cnn_v2_static.wgsl: p0-p3 parametric features (mips/gradients) - cnn_v2_compute.wgsl: 12D input, 4D output, vec4 packing *Tools:* - HTML tool (cnn_v2_test): Updated for 12D→4D, layer visualization *Docs:* - CNN_V2.md: Updated architecture, training, validation sections - HOWTO.md: Reference HTML tool for validation *Removed:* - validate_cnn_v2.sh: Obsolete (used CNN v1 tool) All code consistent with bias=False (bias in static features as 1.0). handoff(Claude): CNN v2 architecture finalized and documented
Diffstat (limited to 'workspaces')
-rw-r--r--workspaces/main/shaders/cnn_v2/cnn_v2_compute.wgsl80
-rw-r--r--workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl23
2 files changed, 49 insertions, 54 deletions
diff --git a/workspaces/main/shaders/cnn_v2/cnn_v2_compute.wgsl b/workspaces/main/shaders/cnn_v2/cnn_v2_compute.wgsl
index 1e1704d..5c4b113 100644
--- a/workspaces/main/shaders/cnn_v2/cnn_v2_compute.wgsl
+++ b/workspaces/main/shaders/cnn_v2/cnn_v2_compute.wgsl
@@ -1,6 +1,6 @@
-// CNN v2 Compute Shader - Storage Buffer Version
-// Processes single layer per dispatch with weights from storage buffer
-// Multi-layer execution handled by C++ with ping-pong buffers
+// CNN v2 Compute Shader - Uniform 12D→4D Architecture
+// All layers: input/previous (4D) + static (8D) = 12D → 4 channels
+// Storage buffer weights, ping-pong execution
// Push constants for layer parameters (passed per dispatch)
struct LayerParams {
@@ -12,12 +12,12 @@ struct LayerParams {
blend_amount: f32, // [0,1] blend with original
}
-@group(0) @binding(0) var static_features: texture_2d<u32>; // 8-channel static features
-@group(0) @binding(1) var layer_input: texture_2d<u32>; // Previous layer output (8-channel packed)
-@group(0) @binding(2) var output_tex: texture_storage_2d<rgba32uint, write>; // Current layer output
+@group(0) @binding(0) var static_features: texture_2d<u32>; // 8D static features (p0-p3 + spatial)
+@group(0) @binding(1) var layer_input: texture_2d<u32>; // 4D previous/input (RGBD or prev layer)
+@group(0) @binding(2) var output_tex: texture_storage_2d<rgba32uint, write>; // 4D output
@group(0) @binding(3) var<storage, read> weights_buffer: array<u32>; // Packed f16 weights
@group(0) @binding(4) var<uniform> params: LayerParams;
-@group(0) @binding(5) var original_input: texture_2d<f32>; // Original RGB input for blending
+@group(0) @binding(5) var original_input: texture_2d<f32>; // Original RGB for blending
fn unpack_static_features(coord: vec2<i32>) -> array<f32, 8> {
let packed = textureLoad(static_features, coord, 0);
@@ -28,21 +28,19 @@ fn unpack_static_features(coord: vec2<i32>) -> array<f32, 8> {
return array<f32, 8>(v0.x, v0.y, v1.x, v1.y, v2.x, v2.y, v3.x, v3.y);
}
-fn unpack_layer_channels(coord: vec2<i32>) -> array<f32, 8> {
+fn unpack_layer_channels(coord: vec2<i32>) -> vec4<f32> {
let packed = textureLoad(layer_input, coord, 0);
let v0 = unpack2x16float(packed.x);
let v1 = unpack2x16float(packed.y);
- let v2 = unpack2x16float(packed.z);
- let v3 = unpack2x16float(packed.w);
- return array<f32, 8>(v0.x, v0.y, v1.x, v1.y, v2.x, v2.y, v3.x, v3.y);
+ return vec4<f32>(v0.x, v0.y, v1.x, v1.y);
}
-fn pack_channels(values: array<f32, 8>) -> vec4<u32> {
+fn pack_channels(values: vec4<f32>) -> vec4<u32> {
return vec4<u32>(
- pack2x16float(vec2<f32>(values[0], values[1])),
- pack2x16float(vec2<f32>(values[2], values[3])),
- pack2x16float(vec2<f32>(values[4], values[5])),
- pack2x16float(vec2<f32>(values[6], values[7]))
+ pack2x16float(vec2<f32>(values.x, values.y)),
+ pack2x16float(vec2<f32>(values.z, values.w)),
+ 0u, // Unused
+ 0u // Unused
);
}
@@ -68,19 +66,19 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
}
let kernel_size = params.kernel_size;
- let in_channels = params.in_channels;
- let out_channels = params.out_channels;
+ let in_channels = params.in_channels; // Always 12 (4 prev + 8 static)
+ let out_channels = params.out_channels; // Always 4
let weight_offset = params.weight_offset;
let is_output = params.is_output_layer != 0u;
let kernel_radius = i32(kernel_size / 2u);
- // Load static features (always 8D)
+ // Load static features (8D) and previous/input layer (4D)
let static_feat = unpack_static_features(coord);
- // Convolution per output channel
- var output: array<f32, 8>;
- for (var c: u32 = 0u; c < out_channels && c < 8u; c++) {
+ // Convolution: 12D input → 4D output
+ var output: vec4<f32> = vec4<f32>(0.0);
+ for (var c: u32 = 0u; c < 4u; c++) {
var sum: f32 = 0.0;
// Convolve over kernel
@@ -94,55 +92,49 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
clamp(sample_coord.y, 0, i32(dims.y) - 1)
);
- // Load input features at this spatial location
+ // Load features at this spatial location
let static_local = unpack_static_features(clamped);
- let layer_local = unpack_layer_channels(clamped);
+ let layer_local = unpack_layer_channels(clamped); // 4D
// Weight index calculation
let ky_idx = u32(ky + kernel_radius);
let kx_idx = u32(kx + kernel_radius);
let spatial_idx = ky_idx * kernel_size + kx_idx;
- // Accumulate: static features (always 8 channels)
- for (var i: u32 = 0u; i < 8u; i++) {
+ // Accumulate: previous/input channels (4D)
+ for (var i: u32 = 0u; i < 4u; i++) {
let w_idx = weight_offset +
- c * in_channels * kernel_size * kernel_size +
+ c * 12u * kernel_size * kernel_size +
i * kernel_size * kernel_size + spatial_idx;
- sum += get_weight(w_idx) * static_local[i];
+ sum += get_weight(w_idx) * layer_local[i];
}
- // Accumulate: previous layer channels (in_channels - 8)
- let prev_channels = in_channels - 8u;
- for (var i: u32 = 0u; i < prev_channels && i < 8u; i++) {
+ // Accumulate: static features (8D)
+ for (var i: u32 = 0u; i < 8u; i++) {
let w_idx = weight_offset +
- c * in_channels * kernel_size * kernel_size +
- (8u + i) * kernel_size * kernel_size + spatial_idx;
- sum += get_weight(w_idx) * layer_local[i];
+ c * 12u * kernel_size * kernel_size +
+ (4u + i) * kernel_size * kernel_size + spatial_idx;
+ sum += get_weight(w_idx) * static_local[i];
}
}
}
// Activation
if (is_output) {
- output[c] = clamp(sum, 0.0, 1.0); // Sigmoid approximation
+ output[c] = clamp(sum, 0.0, 1.0);
} else {
output[c] = max(0.0, sum); // ReLU
}
}
- // Zero unused channels
- for (var c: u32 = out_channels; c < 8u; c++) {
- output[c] = 0.0;
- }
-
// Blend with original on final layer
if (is_output) {
let original = textureLoad(original_input, coord, 0).rgb;
- let result_rgb = vec3<f32>(output[0], output[1], output[2]);
+ let result_rgb = vec3<f32>(output.x, output.y, output.z);
let blended = mix(original, result_rgb, params.blend_amount);
- output[0] = blended.r;
- output[1] = blended.g;
- output[2] = blended.b;
+ output.x = blended.r;
+ output.y = blended.g;
+ output.z = blended.b;
}
textureStore(output_tex, coord, pack_channels(output));
diff --git a/workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl b/workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl
index dd07f19..7a9e6de 100644
--- a/workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl
+++ b/workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl
@@ -1,5 +1,7 @@
// CNN v2 Static Features Compute Shader
-// Generates 7D features + bias: [R, G, B, D, uv.x, uv.y, sin10_x, 1.0]
+// Generates 8D parametric features: [p0, p1, p2, p3, uv.x, uv.y, sin10_x, bias]
+// p0-p3: Parametric features (currently RGBD from mip0, could be mip1/2, gradients, etc.)
+// Note: Input image RGBD (mip0) fed separately to Layer 0
@group(0) @binding(0) var input_tex: texture_2d<f32>;
@group(0) @binding(1) var input_tex_mip1: texture_2d<f32>;
@@ -16,14 +18,14 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
return;
}
- // Sample RGBA from mip 0
+ // Parametric features (p0-p3)
+ // TODO: Experiment with mip1 grayscale, Sobel gradients, etc.
+ // For now, use RGBD from mip 0 (same as input, but could differ)
let rgba = textureLoad(input_tex, coord, 0);
- let r = rgba.r;
- let g = rgba.g;
- let b = rgba.b;
-
- // Sample depth
- let d = textureLoad(depth_tex, coord, 0).r;
+ let p0 = rgba.r;
+ let p1 = rgba.g;
+ let p2 = rgba.b;
+ let p3 = textureLoad(depth_tex, coord, 0).r;
// UV coordinates (normalized [0,1], bottom-left origin)
let uv_x = f32(coord.x) / f32(dims.x);
@@ -36,9 +38,10 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
let bias = 1.0;
// Pack 8×f16 into 4×u32 (rgba32uint)
+ // [p0, p1, p2, p3, uv_x, uv_y, sin10_x, bias]
let packed = vec4<u32>(
- pack2x16float(vec2<f32>(r, g)),
- pack2x16float(vec2<f32>(b, d)),
+ pack2x16float(vec2<f32>(p0, p1)),
+ pack2x16float(vec2<f32>(p2, p3)),
pack2x16float(vec2<f32>(uv_x, uv_y)),
pack2x16float(vec2<f32>(sin10_x, bias))
);