summaryrefslogtreecommitdiff
path: root/training
diff options
context:
space:
mode:
Diffstat (limited to 'training')
-rwxr-xr-xtraining/export_cnn_v2_weights.py52
-rwxr-xr-xtraining/gen_identity_weights.py171
-rwxr-xr-xtraining/train_cnn_v2.py28
3 files changed, 228 insertions, 23 deletions
diff --git a/training/export_cnn_v2_weights.py b/training/export_cnn_v2_weights.py
index 1086516..f64bd8d 100755
--- a/training/export_cnn_v2_weights.py
+++ b/training/export_cnn_v2_weights.py
@@ -12,7 +12,7 @@ import struct
from pathlib import Path
-def export_weights_binary(checkpoint_path, output_path):
+def export_weights_binary(checkpoint_path, output_path, quiet=False):
"""Export CNN v2 weights to binary format.
Binary format:
@@ -40,7 +40,8 @@ def export_weights_binary(checkpoint_path, output_path):
Returns:
config dict for shader generation
"""
- print(f"Loading checkpoint: {checkpoint_path}")
+ if not quiet:
+ print(f"Loading checkpoint: {checkpoint_path}")
checkpoint = torch.load(checkpoint_path, map_location='cpu')
state_dict = checkpoint['model_state_dict']
@@ -59,11 +60,12 @@ def export_weights_binary(checkpoint_path, output_path):
num_layers = config.get('num_layers', len(kernel_sizes))
mip_level = config.get('mip_level', 0)
- print(f"Configuration:")
- print(f" Kernel sizes: {kernel_sizes}")
- print(f" Layers: {num_layers}")
- print(f" Mip level: {mip_level} (p0-p3 features)")
- print(f" Architecture: uniform 12D→4D (bias=False)")
+ if not quiet:
+ print(f"Configuration:")
+ print(f" Kernel sizes: {kernel_sizes}")
+ print(f" Layers: {num_layers}")
+ print(f" Mip level: {mip_level} (p0-p3 features)")
+ print(f" Architecture: uniform 12D→4D (bias=False)")
# Collect layer info - all layers uniform 12D→4D
layers = []
@@ -89,7 +91,8 @@ def export_weights_binary(checkpoint_path, output_path):
all_weights.extend(layer_flat)
weight_offset += len(layer_flat)
- print(f" Layer {i}: 12D→4D, {kernel_size}×{kernel_size}, {len(layer_flat)} weights")
+ if not quiet:
+ print(f" Layer {i}: 12D→4D, {kernel_size}×{kernel_size}, {len(layer_flat)} weights")
# Convert to f16
# TODO: Use 8-bit quantization for 2× size reduction
@@ -104,11 +107,13 @@ def export_weights_binary(checkpoint_path, output_path):
# Pack pairs using numpy view
weights_u32 = all_weights_f16.view(np.uint32)
- print(f"\nWeight statistics:")
- print(f" Total layers: {len(layers)}")
- print(f" Total weights: {len(all_weights_f16)} (f16)")
- print(f" Packed: {len(weights_u32)} u32")
- print(f" Binary size: {20 + len(layers) * 20 + len(weights_u32) * 4} bytes")
+ binary_size = 20 + len(layers) * 20 + len(weights_u32) * 4
+ if not quiet:
+ print(f"\nWeight statistics:")
+ print(f" Total layers: {len(layers)}")
+ print(f" Total weights: {len(all_weights_f16)} (f16)")
+ print(f" Packed: {len(weights_u32)} u32")
+ print(f" Binary size: {binary_size} bytes")
# Write binary file
output_path = Path(output_path)
@@ -135,7 +140,10 @@ def export_weights_binary(checkpoint_path, output_path):
# Weights (u32 packed f16 pairs)
f.write(weights_u32.tobytes())
- print(f" → {output_path}")
+ if quiet:
+ print(f" Exported {num_layers} layers, {len(all_weights_f16)} weights, {binary_size} bytes → {output_path}")
+ else:
+ print(f" → {output_path}")
return {
'num_layers': len(layers),
@@ -257,15 +265,19 @@ def main():
help='Output binary weights file')
parser.add_argument('--output-shader', type=str, default='workspaces/main/shaders',
help='Output directory for shader template')
+ parser.add_argument('--quiet', action='store_true',
+ help='Suppress detailed output')
args = parser.parse_args()
- print("=== CNN v2 Weight Export ===\n")
- config = export_weights_binary(args.checkpoint, args.output_weights)
- print()
- # Shader is manually maintained in cnn_v2_compute.wgsl
- # export_shader_template(config, args.output_shader)
- print("\nExport complete!")
+ if not args.quiet:
+ print("=== CNN v2 Weight Export ===\n")
+ config = export_weights_binary(args.checkpoint, args.output_weights, quiet=args.quiet)
+ if not args.quiet:
+ print()
+ # Shader is manually maintained in cnn_v2_compute.wgsl
+ # export_shader_template(config, args.output_shader)
+ print("\nExport complete!")
if __name__ == '__main__':
diff --git a/training/gen_identity_weights.py b/training/gen_identity_weights.py
new file mode 100755
index 0000000..7865d68
--- /dev/null
+++ b/training/gen_identity_weights.py
@@ -0,0 +1,171 @@
+#!/usr/bin/env python3
+"""Generate Identity CNN v2 Weights
+
+Creates trivial .bin with 1 layer, 1×1 kernel, identity passthrough.
+Output Ch{0,1,2,3} = Input Ch{0,1,2,3} (ignores static features).
+
+With --mix: Output Ch{i} = 0.5*prev[i] + 0.5*static_p{4+i}
+ (50-50 blend of prev layer with uv_x, uv_y, sin20_y, bias)
+
+With --p47: Output Ch{i} = static p{4+i} (uv_x, uv_y, sin20_y, bias)
+ (p4/uv_x→ch0, p5/uv_y→ch1, p6/sin20_y→ch2, p7/bias→ch3)
+
+Usage:
+ ./training/gen_identity_weights.py [output.bin]
+ ./training/gen_identity_weights.py --mix [output.bin]
+ ./training/gen_identity_weights.py --p47 [output.bin]
+"""
+
+import argparse
+import numpy as np
+import struct
+from pathlib import Path
+
+
+def generate_identity_weights(output_path, kernel_size=1, mip_level=0, mix=False, p47=False):
+ """Generate identity weights: output = input (ignores static features).
+
+ If mix=True, 50-50 blend: 0.5*p0+0.5*p4, 0.5*p1+0.5*p5, etc (avoids overflow).
+ If p47=True, transfers static p4-p7 (uv_x, uv_y, sin20_y, bias) to output channels.
+
+ Input channel layout: [0-3: prev layer, 4-11: static (p0-p7)]
+ Static features: p0-p3 (RGB+D), p4 (uv_x), p5 (uv_y), p6 (sin20_y), p7 (bias)
+
+ Binary format:
+ Header (20 bytes):
+ uint32 magic ('CNN2')
+ uint32 version (2)
+ uint32 num_layers (1)
+ uint32 total_weights (f16 count)
+ uint32 mip_level
+
+ LayerInfo (20 bytes):
+ uint32 kernel_size
+ uint32 in_channels (12)
+ uint32 out_channels (4)
+ uint32 weight_offset (0)
+ uint32 weight_count
+
+ Weights (u32 packed f16):
+ Identity matrix for first 4 input channels
+ Zeros for static features (channels 4-11) OR
+ Mix matrix (p0+p4, p1+p5, p2+p6, p3+p7) if mix=True
+ """
+ # Identity: 4 output channels, 12 input channels
+ # Weight shape: [out_ch, in_ch, kernel_h, kernel_w]
+ in_channels = 12 # 4 input + 8 static
+ out_channels = 4
+
+ # Identity matrix: diagonal 1.0 for first 4 channels, 0.0 for rest
+ weights = np.zeros((out_channels, in_channels, kernel_size, kernel_size), dtype=np.float32)
+
+ # Center position for kernel
+ center = kernel_size // 2
+
+ if p47:
+ # p47 mode: p4→ch0, p5→ch1, p6→ch2, p7→ch3 (static features only)
+ # Input channels: [0-3: prev layer, 4-11: static features (p0-p7)]
+ # p4-p7 are at input channels 8-11
+ for i in range(out_channels):
+ weights[i, i + 8, center, center] = 1.0
+ elif mix:
+ # Mix mode: 50-50 blend (p0+p4, p1+p5, p2+p6, p3+p7)
+ # p0-p3 are at channels 0-3 (prev layer), p4-p7 at channels 8-11 (static)
+ for i in range(out_channels):
+ weights[i, i, center, center] = 0.5 # 0.5*p{i} (prev layer)
+ weights[i, i + 8, center, center] = 0.5 # 0.5*p{i+4} (static)
+ else:
+ # Identity: output ch i = input ch i
+ for i in range(out_channels):
+ weights[i, i, center, center] = 1.0
+
+ # Flatten
+ weights_flat = weights.flatten()
+ weight_count = len(weights_flat)
+
+ mode_name = 'p47' if p47 else ('mix' if mix else 'identity')
+ print(f"Generating {mode_name} weights:")
+ print(f" Kernel size: {kernel_size}×{kernel_size}")
+ print(f" Channels: 12D→4D")
+ print(f" Weights: {weight_count}")
+ print(f" Mip level: {mip_level}")
+ if mix:
+ print(f" Mode: 0.5*prev[i] + 0.5*static_p{{4+i}} (blend with uv/sin/bias)")
+ elif p47:
+ print(f" Mode: p4→ch0, p5→ch1, p6→ch2, p7→ch3")
+
+ # Convert to f16
+ weights_f16 = np.array(weights_flat, dtype=np.float16)
+
+ # Pad to even count
+ if len(weights_f16) % 2 == 1:
+ weights_f16 = np.append(weights_f16, np.float16(0.0))
+
+ # Pack f16 pairs into u32
+ weights_u32 = weights_f16.view(np.uint32)
+
+ print(f" Packed: {len(weights_u32)} u32")
+ print(f" Binary size: {20 + 20 + len(weights_u32) * 4} bytes")
+
+ # Write binary
+ output_path = Path(output_path)
+ output_path.parent.mkdir(parents=True, exist_ok=True)
+
+ with open(output_path, 'wb') as f:
+ # Header (20 bytes)
+ f.write(struct.pack('<4sIIII',
+ b'CNN2', # magic
+ 2, # version
+ 1, # num_layers
+ len(weights_f16), # total_weights
+ mip_level)) # mip_level
+
+ # Layer info (20 bytes)
+ f.write(struct.pack('<IIIII',
+ kernel_size, # kernel_size
+ in_channels, # in_channels
+ out_channels, # out_channels
+ 0, # weight_offset
+ weight_count)) # weight_count
+
+ # Weights (u32 packed f16)
+ f.write(weights_u32.tobytes())
+
+ print(f" → {output_path}")
+
+ # Verify
+ print("\nVerification:")
+ with open(output_path, 'rb') as f:
+ data = f.read()
+ magic, version, num_layers, total_weights, mip = struct.unpack('<4sIIII', data[:20])
+ print(f" Magic: {magic}")
+ print(f" Version: {version}")
+ print(f" Layers: {num_layers}")
+ print(f" Total weights: {total_weights}")
+ print(f" Mip level: {mip}")
+ print(f" File size: {len(data)} bytes")
+
+
+def main():
+ parser = argparse.ArgumentParser(description='Generate identity CNN v2 weights')
+ parser.add_argument('output', type=str, nargs='?',
+ default='workspaces/main/weights/cnn_v2_identity.bin',
+ help='Output .bin file path')
+ parser.add_argument('--kernel-size', type=int, default=1,
+ help='Kernel size (default: 1×1)')
+ parser.add_argument('--mip-level', type=int, default=0,
+ help='Mip level for p0-p3 features (default: 0)')
+ parser.add_argument('--mix', action='store_true',
+ help='Mix mode: 50-50 blend of p0-p3 and p4-p7')
+ parser.add_argument('--p47', action='store_true',
+ help='Static features only: p4→ch0, p5→ch1, p6→ch2, p7→ch3')
+
+ args = parser.parse_args()
+
+ print("=== Identity Weight Generator ===\n")
+ generate_identity_weights(args.output, args.kernel_size, args.mip_level, args.mix, args.p47)
+ print("\nDone!")
+
+
+if __name__ == '__main__':
+ main()
diff --git a/training/train_cnn_v2.py b/training/train_cnn_v2.py
index 70229ce..9e5df2f 100755
--- a/training/train_cnn_v2.py
+++ b/training/train_cnn_v2.py
@@ -61,7 +61,7 @@ def compute_static_features(rgb, depth=None, mip_level=0):
p0 = mip_rgb[:, :, 0].astype(np.float32)
p1 = mip_rgb[:, :, 1].astype(np.float32)
p2 = mip_rgb[:, :, 2].astype(np.float32)
- p3 = depth if depth is not None else np.ones((h, w), dtype=np.float32) # Default 1.0 = far plane
+ p3 = depth.astype(np.float32) if depth is not None else np.ones((h, w), dtype=np.float32) # Default 1.0 = far plane
# UV coordinates (normalized [0, 1])
uv_x = np.linspace(0, 1, w)[None, :].repeat(h, axis=0).astype(np.float32)
@@ -121,7 +121,7 @@ class CNNv2(nn.Module):
# Layer 0: input RGBD (4D) + static (8D) = 12D
x = torch.cat([input_rgbd, static_features], dim=1)
x = self.layers[0](x)
- x = torch.clamp(x, 0, 1) # Output [0,1] for layer 0
+ x = torch.sigmoid(x) # Soft [0,1] for layer 0
# Layer 1+: previous (4D) + static (8D) = 12D
for i in range(1, self.num_layers):
@@ -130,7 +130,7 @@ class CNNv2(nn.Module):
if i < self.num_layers - 1:
x = F.relu(x)
else:
- x = torch.clamp(x, 0, 1) # Final output [0,1]
+ x = torch.sigmoid(x) # Soft [0,1] for final layer
return x
@@ -329,6 +329,9 @@ def train(args):
kernel_sizes = [int(k) for k in args.kernel_sizes.split(',')]
if len(kernel_sizes) == 1:
kernel_sizes = kernel_sizes * args.num_layers
+ else:
+ # When multiple kernel sizes provided, derive num_layers from list length
+ args.num_layers = len(kernel_sizes)
# Create model
model = CNNv2(kernel_sizes=kernel_sizes, num_layers=args.num_layers).to(device)
@@ -397,6 +400,25 @@ def train(args):
}, checkpoint_path)
print(f" → Saved checkpoint: {checkpoint_path}")
+ # Always save final checkpoint
+ print() # Newline after training
+ final_checkpoint = Path(args.checkpoint_dir) / f"checkpoint_epoch_{args.epochs}.pth"
+ final_checkpoint.parent.mkdir(parents=True, exist_ok=True)
+ torch.save({
+ 'epoch': args.epochs,
+ 'model_state_dict': model.state_dict(),
+ 'optimizer_state_dict': optimizer.state_dict(),
+ 'loss': avg_loss,
+ 'config': {
+ 'kernel_sizes': kernel_sizes,
+ 'num_layers': args.num_layers,
+ 'mip_level': args.mip_level,
+ 'grayscale_loss': args.grayscale_loss,
+ 'features': ['p0', 'p1', 'p2', 'p3', 'uv.x', 'uv.y', 'sin20_y', 'bias']
+ }
+ }, final_checkpoint)
+ print(f" → Saved final checkpoint: {final_checkpoint}")
+
print(f"\nTraining complete! Total time: {time.time() - start_time:.1f}s")
return model