From 561d1dc446db7d1d3e02b92b43abedf1a5017850 Mon Sep 17 00:00:00 2001 From: skal Date: Fri, 13 Feb 2026 12:32:36 +0100 Subject: CNN v2: Refactor to uniform 12D→4D architecture MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit **Architecture changes:** - Static features (8D): p0-p3 (parametric) + uv_x, uv_y, sin(10×uv_x), bias - Input RGBD (4D): fed separately to all layers - All layers: uniform 12D→4D (4 prev/input + 8 static → 4 output) - Bias integrated in static features (bias=False in PyTorch) **Weight calculations:** - 3 layers × (12 × 3×3 × 4) = 1296 weights - f16: 2.6 KB (vs old variable arch: ~6.4 KB) **Updated files:** *Training (Python):* - train_cnn_v2.py: Uniform model, takes input_rgbd + static_features - export_cnn_v2_weights.py: Binary export for storage buffers - export_cnn_v2_shader.py: Per-layer shader export (debugging) *Shaders (WGSL):* - cnn_v2_static.wgsl: p0-p3 parametric features (mips/gradients) - cnn_v2_compute.wgsl: 12D input, 4D output, vec4 packing *Tools:* - HTML tool (cnn_v2_test): Updated for 12D→4D, layer visualization *Docs:* - CNN_V2.md: Updated architecture, training, validation sections - HOWTO.md: Reference HTML tool for validation *Removed:* - validate_cnn_v2.sh: Obsolete (used CNN v1 tool) All code consistent with bias=False (bias in static features as 1.0). handoff(Claude): CNN v2 architecture finalized and documented --- training/export_cnn_v2_weights.py | 89 ++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 53 deletions(-) (limited to 'training/export_cnn_v2_weights.py') diff --git a/training/export_cnn_v2_weights.py b/training/export_cnn_v2_weights.py index 8a2fcdc..07254fc 100755 --- a/training/export_cnn_v2_weights.py +++ b/training/export_cnn_v2_weights.py @@ -45,53 +45,38 @@ def export_weights_binary(checkpoint_path, output_path): state_dict = checkpoint['model_state_dict'] config = checkpoint['config'] + kernel_size = config.get('kernel_size', 3) + num_layers = config.get('num_layers', 3) + print(f"Configuration:") - print(f" Kernels: {config['kernels']}") - print(f" Channels: {config['channels']}") + print(f" Kernel size: {kernel_size}×{kernel_size}") + print(f" Layers: {num_layers}") + print(f" Architecture: uniform 12D→4D (bias=False)") - # Collect layer info + # Collect layer info - all layers uniform 12D→4D layers = [] all_weights = [] weight_offset = 0 - # Layer 0: 8 → channels[0] - layer0_weights = state_dict['layer0.weight'].detach().numpy() - layer0_flat = layer0_weights.flatten() - layers.append({ - 'kernel_size': config['kernels'][0], - 'in_channels': 8, - 'out_channels': config['channels'][0], - 'weight_offset': weight_offset, - 'weight_count': len(layer0_flat) - }) - all_weights.extend(layer0_flat) - weight_offset += len(layer0_flat) - - # Layer 1: (8 + channels[0]) → channels[1] - layer1_weights = state_dict['layer1.weight'].detach().numpy() - layer1_flat = layer1_weights.flatten() - layers.append({ - 'kernel_size': config['kernels'][1], - 'in_channels': 8 + config['channels'][0], - 'out_channels': config['channels'][1], - 'weight_offset': weight_offset, - 'weight_count': len(layer1_flat) - }) - all_weights.extend(layer1_flat) - weight_offset += len(layer1_flat) - - # Layer 2: (8 + channels[1]) → 4 (RGBA output) - layer2_weights = state_dict['layer2.weight'].detach().numpy() - layer2_flat = layer2_weights.flatten() - layers.append({ - 'kernel_size': config['kernels'][2], - 'in_channels': 8 + config['channels'][1], - 'out_channels': 4, - 'weight_offset': weight_offset, - 'weight_count': len(layer2_flat) - }) - all_weights.extend(layer2_flat) - weight_offset += len(layer2_flat) + for i in range(num_layers): + layer_key = f'layers.{i}.weight' + if layer_key not in state_dict: + raise ValueError(f"Missing weights for layer {i}: {layer_key}") + + layer_weights = state_dict[layer_key].detach().numpy() + layer_flat = layer_weights.flatten() + + layers.append({ + 'kernel_size': kernel_size, + 'in_channels': 12, # 4 (input/prev) + 8 (static) + 'out_channels': 4, # Uniform output + 'weight_offset': weight_offset, + 'weight_count': len(layer_flat) + }) + all_weights.extend(layer_flat) + weight_offset += len(layer_flat) + + print(f" Layer {i}: 12D→4D, {len(layer_flat)} weights") # Convert to f16 # TODO: Use 8-bit quantization for 2× size reduction @@ -183,21 +168,19 @@ fn unpack_static_features(coord: vec2) -> array { return array(v0.x, v0.y, v1.x, v1.y, v2.x, v2.y, v3.x, v3.y); } -fn unpack_layer_channels(coord: vec2) -> array { +fn unpack_layer_channels(coord: vec2) -> vec4 { let packed = textureLoad(layer_input, coord, 0); let v0 = unpack2x16float(packed.x); let v1 = unpack2x16float(packed.y); - let v2 = unpack2x16float(packed.z); - let v3 = unpack2x16float(packed.w); - return array(v0.x, v0.y, v1.x, v1.y, v2.x, v2.y, v3.x, v3.y); + return vec4(v0.x, v0.y, v1.x, v1.y); } -fn pack_channels(values: array) -> vec4 { +fn pack_channels(values: vec4) -> vec4 { return vec4( - pack2x16float(vec2(values[0], values[1])), - pack2x16float(vec2(values[2], values[3])), - pack2x16float(vec2(values[4], values[5])), - pack2x16float(vec2(values[6], values[7])) + pack2x16float(vec2(values.x, values.y)), + pack2x16float(vec2(values.z, values.w)), + 0u, // Unused + 0u // Unused ); } @@ -238,9 +221,9 @@ fn main(@builtin(global_invocation_id) id: vec3) { let out_channels = weights[layer0_info_base + 2u]; let weight_offset = weights[layer0_info_base + 3u]; - // Convolution (simplified - expand to full kernel loop) - var output: array; - for (var c: u32 = 0u; c < min(out_channels, 8u); c++) { + // Convolution: 12D input (4 prev + 8 static) → 4D output + var output: vec4 = vec4(0.0); + for (var c: u32 = 0u; c < 4u; c++) { output[c] = 0.0; // TODO: Actual convolution } -- cgit v1.2.3