diff options
Diffstat (limited to 'training')
| -rw-r--r-- | training/diagnose_255_to_253.md | 69 | ||||
| -rwxr-xr-x | training/export_cnn_v2_weights.py | 52 | ||||
| -rwxr-xr-x | training/gen_identity_weights.py | 51 | ||||
| -rw-r--r-- | training/layers/chk_10000_3x5x3x3.pt | bin | 0 -> 5092 bytes | |||
| -rwxr-xr-x | training/test_viz_precision.py | 38 | ||||
| -rwxr-xr-x | training/train_cnn_v2.py | 26 |
6 files changed, 100 insertions, 136 deletions
diff --git a/training/diagnose_255_to_253.md b/training/diagnose_255_to_253.md deleted file mode 100644 index 764d328..0000000 --- a/training/diagnose_255_to_253.md +++ /dev/null @@ -1,69 +0,0 @@ -# Diagnosis: 255 → 253 Loss (-2 LSBs) - -## Findings - -### F16 Precision -✅ **No loss:** 1.0 → f16(0x3c00) → 1.0 (exact round-trip) - -### Visualization Scale -⚠️ **Inconsistent:** -- Layer 1 uses `vizScale = 0.5` (line 1530) -- Should render as 128, not 253 -- **User seeing 253 suggests viewing Static Features (scale=1.0), not CNN output** - -### Suspected Issue: Input Alpha Channel - -**Code:** `tools/cnn_v2_test/index.html` line 1233 -```javascript -depthData[i] = pixels[i * 4 + 3] / 255.0; // Alpha from canvas -``` - -**Hypothesis:** Input PNG alpha channel = 253 (not 255) -- Browsers may set alpha < 255 for certain images -- Pre-multiplied alpha corrections -- PNG encoder compression artifacts - -### Test - -**Check input alpha:** -```javascript -// In HTML tool console: -const canvas = document.createElement('canvas'); -canvas.width = tester.image.width; -canvas.height = tester.image.height; -const ctx = canvas.getContext('2d'); -ctx.drawImage(tester.image, 0, 0); -const imgData = ctx.getImageData(0, 0, canvas.width, canvas.height); -const alpha = imgData.data[3]; // First pixel alpha -console.log('First pixel alpha:', alpha); -``` - -### Alternative: C++ Reference - -Check if `cnn_test` tool produces same -2 loss: -```bash -# Generate solid white 8×8 test image with alpha=255 -python3 -c " -from PIL import Image -import numpy as np -img = np.ones((8, 8, 4), dtype=np.uint8) * 255 -Image.fromarray(img, 'RGBA').save('test_white_255.png') -print('Created test_white_255.png: all pixels RGBA=(255,255,255,255)') -" - -# Test with HTML tool → check if p3 = 1.0 or 0.9921875 -# Test with cnn_test → compare output -./build/cnn_test test_white_255.png output.png --cnn-version 2 --debug-hex -``` - -### Next Steps - -1. **Verify input:** Check alpha channel of user's input image -2. **Add debug:** Log first pixel RGBA values in HTML tool -3. **Compare:** Run same image through C++ cnn_test -4. **Isolate:** Test with synthetic 255 alpha image - -## Conclusion - -**Most likely:** Input image alpha ≠ 255, already 253 before CNN processing. -**Verify:** User should check input PNG metadata and alpha channel values. diff --git a/training/export_cnn_v2_weights.py b/training/export_cnn_v2_weights.py index 1086516..f64bd8d 100755 --- a/training/export_cnn_v2_weights.py +++ b/training/export_cnn_v2_weights.py @@ -12,7 +12,7 @@ import struct from pathlib import Path -def export_weights_binary(checkpoint_path, output_path): +def export_weights_binary(checkpoint_path, output_path, quiet=False): """Export CNN v2 weights to binary format. Binary format: @@ -40,7 +40,8 @@ def export_weights_binary(checkpoint_path, output_path): Returns: config dict for shader generation """ - print(f"Loading checkpoint: {checkpoint_path}") + if not quiet: + print(f"Loading checkpoint: {checkpoint_path}") checkpoint = torch.load(checkpoint_path, map_location='cpu') state_dict = checkpoint['model_state_dict'] @@ -59,11 +60,12 @@ def export_weights_binary(checkpoint_path, output_path): num_layers = config.get('num_layers', len(kernel_sizes)) mip_level = config.get('mip_level', 0) - print(f"Configuration:") - print(f" Kernel sizes: {kernel_sizes}") - print(f" Layers: {num_layers}") - print(f" Mip level: {mip_level} (p0-p3 features)") - print(f" Architecture: uniform 12D→4D (bias=False)") + if not quiet: + print(f"Configuration:") + print(f" Kernel sizes: {kernel_sizes}") + print(f" Layers: {num_layers}") + print(f" Mip level: {mip_level} (p0-p3 features)") + print(f" Architecture: uniform 12D→4D (bias=False)") # Collect layer info - all layers uniform 12D→4D layers = [] @@ -89,7 +91,8 @@ def export_weights_binary(checkpoint_path, output_path): all_weights.extend(layer_flat) weight_offset += len(layer_flat) - print(f" Layer {i}: 12D→4D, {kernel_size}×{kernel_size}, {len(layer_flat)} weights") + if not quiet: + print(f" Layer {i}: 12D→4D, {kernel_size}×{kernel_size}, {len(layer_flat)} weights") # Convert to f16 # TODO: Use 8-bit quantization for 2× size reduction @@ -104,11 +107,13 @@ def export_weights_binary(checkpoint_path, output_path): # Pack pairs using numpy view weights_u32 = all_weights_f16.view(np.uint32) - print(f"\nWeight statistics:") - print(f" Total layers: {len(layers)}") - print(f" Total weights: {len(all_weights_f16)} (f16)") - print(f" Packed: {len(weights_u32)} u32") - print(f" Binary size: {20 + len(layers) * 20 + len(weights_u32) * 4} bytes") + binary_size = 20 + len(layers) * 20 + len(weights_u32) * 4 + if not quiet: + print(f"\nWeight statistics:") + print(f" Total layers: {len(layers)}") + print(f" Total weights: {len(all_weights_f16)} (f16)") + print(f" Packed: {len(weights_u32)} u32") + print(f" Binary size: {binary_size} bytes") # Write binary file output_path = Path(output_path) @@ -135,7 +140,10 @@ def export_weights_binary(checkpoint_path, output_path): # Weights (u32 packed f16 pairs) f.write(weights_u32.tobytes()) - print(f" → {output_path}") + if quiet: + print(f" Exported {num_layers} layers, {len(all_weights_f16)} weights, {binary_size} bytes → {output_path}") + else: + print(f" → {output_path}") return { 'num_layers': len(layers), @@ -257,15 +265,19 @@ def main(): help='Output binary weights file') parser.add_argument('--output-shader', type=str, default='workspaces/main/shaders', help='Output directory for shader template') + parser.add_argument('--quiet', action='store_true', + help='Suppress detailed output') args = parser.parse_args() - print("=== CNN v2 Weight Export ===\n") - config = export_weights_binary(args.checkpoint, args.output_weights) - print() - # Shader is manually maintained in cnn_v2_compute.wgsl - # export_shader_template(config, args.output_shader) - print("\nExport complete!") + if not args.quiet: + print("=== CNN v2 Weight Export ===\n") + config = export_weights_binary(args.checkpoint, args.output_weights, quiet=args.quiet) + if not args.quiet: + print() + # Shader is manually maintained in cnn_v2_compute.wgsl + # export_shader_template(config, args.output_shader) + print("\nExport complete!") if __name__ == '__main__': diff --git a/training/gen_identity_weights.py b/training/gen_identity_weights.py index a84ea87..7865d68 100755 --- a/training/gen_identity_weights.py +++ b/training/gen_identity_weights.py @@ -4,8 +4,16 @@ Creates trivial .bin with 1 layer, 1×1 kernel, identity passthrough. Output Ch{0,1,2,3} = Input Ch{0,1,2,3} (ignores static features). +With --mix: Output Ch{i} = 0.5*prev[i] + 0.5*static_p{4+i} + (50-50 blend of prev layer with uv_x, uv_y, sin20_y, bias) + +With --p47: Output Ch{i} = static p{4+i} (uv_x, uv_y, sin20_y, bias) + (p4/uv_x→ch0, p5/uv_y→ch1, p6/sin20_y→ch2, p7/bias→ch3) + Usage: ./training/gen_identity_weights.py [output.bin] + ./training/gen_identity_weights.py --mix [output.bin] + ./training/gen_identity_weights.py --p47 [output.bin] """ import argparse @@ -14,9 +22,15 @@ import struct from pathlib import Path -def generate_identity_weights(output_path, kernel_size=1, mip_level=0): +def generate_identity_weights(output_path, kernel_size=1, mip_level=0, mix=False, p47=False): """Generate identity weights: output = input (ignores static features). + If mix=True, 50-50 blend: 0.5*p0+0.5*p4, 0.5*p1+0.5*p5, etc (avoids overflow). + If p47=True, transfers static p4-p7 (uv_x, uv_y, sin20_y, bias) to output channels. + + Input channel layout: [0-3: prev layer, 4-11: static (p0-p7)] + Static features: p0-p3 (RGB+D), p4 (uv_x), p5 (uv_y), p6 (sin20_y), p7 (bias) + Binary format: Header (20 bytes): uint32 magic ('CNN2') @@ -34,7 +48,8 @@ def generate_identity_weights(output_path, kernel_size=1, mip_level=0): Weights (u32 packed f16): Identity matrix for first 4 input channels - Zeros for static features (channels 4-11) + Zeros for static features (channels 4-11) OR + Mix matrix (p0+p4, p1+p5, p2+p6, p3+p7) if mix=True """ # Identity: 4 output channels, 12 input channels # Weight shape: [out_ch, in_ch, kernel_h, kernel_w] @@ -47,19 +62,37 @@ def generate_identity_weights(output_path, kernel_size=1, mip_level=0): # Center position for kernel center = kernel_size // 2 - # Set diagonal to 1.0 (output ch i = input ch i) - for i in range(out_channels): - weights[i, i, center, center] = 1.0 + if p47: + # p47 mode: p4→ch0, p5→ch1, p6→ch2, p7→ch3 (static features only) + # Input channels: [0-3: prev layer, 4-11: static features (p0-p7)] + # p4-p7 are at input channels 8-11 + for i in range(out_channels): + weights[i, i + 8, center, center] = 1.0 + elif mix: + # Mix mode: 50-50 blend (p0+p4, p1+p5, p2+p6, p3+p7) + # p0-p3 are at channels 0-3 (prev layer), p4-p7 at channels 8-11 (static) + for i in range(out_channels): + weights[i, i, center, center] = 0.5 # 0.5*p{i} (prev layer) + weights[i, i + 8, center, center] = 0.5 # 0.5*p{i+4} (static) + else: + # Identity: output ch i = input ch i + for i in range(out_channels): + weights[i, i, center, center] = 1.0 # Flatten weights_flat = weights.flatten() weight_count = len(weights_flat) - print(f"Generating identity weights:") + mode_name = 'p47' if p47 else ('mix' if mix else 'identity') + print(f"Generating {mode_name} weights:") print(f" Kernel size: {kernel_size}×{kernel_size}") print(f" Channels: 12D→4D") print(f" Weights: {weight_count}") print(f" Mip level: {mip_level}") + if mix: + print(f" Mode: 0.5*prev[i] + 0.5*static_p{{4+i}} (blend with uv/sin/bias)") + elif p47: + print(f" Mode: p4→ch0, p5→ch1, p6→ch2, p7→ch3") # Convert to f16 weights_f16 = np.array(weights_flat, dtype=np.float16) @@ -122,11 +155,15 @@ def main(): help='Kernel size (default: 1×1)') parser.add_argument('--mip-level', type=int, default=0, help='Mip level for p0-p3 features (default: 0)') + parser.add_argument('--mix', action='store_true', + help='Mix mode: 50-50 blend of p0-p3 and p4-p7') + parser.add_argument('--p47', action='store_true', + help='Static features only: p4→ch0, p5→ch1, p6→ch2, p7→ch3') args = parser.parse_args() print("=== Identity Weight Generator ===\n") - generate_identity_weights(args.output, args.kernel_size, args.mip_level) + generate_identity_weights(args.output, args.kernel_size, args.mip_level, args.mix, args.p47) print("\nDone!") diff --git a/training/layers/chk_10000_3x5x3x3.pt b/training/layers/chk_10000_3x5x3x3.pt Binary files differnew file mode 100644 index 0000000..6e6750c --- /dev/null +++ b/training/layers/chk_10000_3x5x3x3.pt diff --git a/training/test_viz_precision.py b/training/test_viz_precision.py deleted file mode 100755 index 143f4ea..0000000 --- a/training/test_viz_precision.py +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env python3 -"""Test WebGPU → Canvas → PNG precision loss - -Check if bgra8unorm → 2D canvas → PNG loses 2 LSBs. -""" - -import numpy as np - -# Simulate WebGPU bgra8unorm conversion -# Float [0, 1] → uint8 [0, 255] - -test_values = [ - 1.0, # Perfect white - 0.9999, # Near-white - 254.5/255, # Exactly 254.5 - 253.5/255, # Exactly 253.5 -] - -for val in test_values: - # WebGPU bgra8unorm: round(val * 255) - gpu_u8 = int(np.round(val * 255)) - - # Convert back to normalized - gpu_f32 = gpu_u8 / 255.0 - - # JavaScript canvas getImageData: uint8 - canvas_u8 = int(np.round(gpu_f32 * 255)) - - print(f"Input: {val:.6f} → GPU u8: {gpu_u8} → Canvas: {canvas_u8}") - if canvas_u8 != 255: - print(f" ⚠️ Lost {255 - canvas_u8} LSBs") - -print("\nConclusion:") -print("If WebGPU stores 1.0 as 255, canvas should read 255.") -print("If user sees 253, likely:") -print(" a) Not viewing CNN layer (viewing static features at scale=1.0)") -print(" b) Value in texture is already 253/255 = 0.9921875") -print(" c) F16 storage or unpacking issue") diff --git a/training/train_cnn_v2.py b/training/train_cnn_v2.py index 134a5ae..9e5df2f 100755 --- a/training/train_cnn_v2.py +++ b/training/train_cnn_v2.py @@ -121,7 +121,7 @@ class CNNv2(nn.Module): # Layer 0: input RGBD (4D) + static (8D) = 12D x = torch.cat([input_rgbd, static_features], dim=1) x = self.layers[0](x) - x = torch.clamp(x, 0, 1) # Output [0,1] for layer 0 + x = torch.sigmoid(x) # Soft [0,1] for layer 0 # Layer 1+: previous (4D) + static (8D) = 12D for i in range(1, self.num_layers): @@ -130,7 +130,7 @@ class CNNv2(nn.Module): if i < self.num_layers - 1: x = F.relu(x) else: - x = torch.clamp(x, 0, 1) # Final output [0,1] + x = torch.sigmoid(x) # Soft [0,1] for final layer return x @@ -329,6 +329,9 @@ def train(args): kernel_sizes = [int(k) for k in args.kernel_sizes.split(',')] if len(kernel_sizes) == 1: kernel_sizes = kernel_sizes * args.num_layers + else: + # When multiple kernel sizes provided, derive num_layers from list length + args.num_layers = len(kernel_sizes) # Create model model = CNNv2(kernel_sizes=kernel_sizes, num_layers=args.num_layers).to(device) @@ -397,6 +400,25 @@ def train(args): }, checkpoint_path) print(f" → Saved checkpoint: {checkpoint_path}") + # Always save final checkpoint + print() # Newline after training + final_checkpoint = Path(args.checkpoint_dir) / f"checkpoint_epoch_{args.epochs}.pth" + final_checkpoint.parent.mkdir(parents=True, exist_ok=True) + torch.save({ + 'epoch': args.epochs, + 'model_state_dict': model.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'loss': avg_loss, + 'config': { + 'kernel_sizes': kernel_sizes, + 'num_layers': args.num_layers, + 'mip_level': args.mip_level, + 'grayscale_loss': args.grayscale_loss, + 'features': ['p0', 'p1', 'p2', 'p3', 'uv.x', 'uv.y', 'sin20_y', 'bias'] + } + }, final_checkpoint) + print(f" → Saved final checkpoint: {final_checkpoint}") + print(f"\nTraining complete! Total time: {time.time() - start_time:.1f}s") return model |
