diff options
Diffstat (limited to 'training')
| -rwxr-xr-x | training/export_cnn_v2_weights.py | 18 | ||||
| -rwxr-xr-x | training/train_cnn_v2.py | 29 |
2 files changed, 34 insertions, 13 deletions
diff --git a/training/export_cnn_v2_weights.py b/training/export_cnn_v2_weights.py index 07254fc..bbe94dd 100755 --- a/training/export_cnn_v2_weights.py +++ b/training/export_cnn_v2_weights.py @@ -45,11 +45,20 @@ def export_weights_binary(checkpoint_path, output_path): state_dict = checkpoint['model_state_dict'] config = checkpoint['config'] - kernel_size = config.get('kernel_size', 3) - num_layers = config.get('num_layers', 3) + # Support both old (kernel_size) and new (kernel_sizes) format + if 'kernel_sizes' in config: + kernel_sizes = config['kernel_sizes'] + elif 'kernel_size' in config: + kernel_size = config['kernel_size'] + num_layers = config.get('num_layers', 3) + kernel_sizes = [kernel_size] * num_layers + else: + kernel_sizes = [3, 3, 3] # fallback + + num_layers = config.get('num_layers', len(kernel_sizes)) print(f"Configuration:") - print(f" Kernel size: {kernel_size}×{kernel_size}") + print(f" Kernel sizes: {kernel_sizes}") print(f" Layers: {num_layers}") print(f" Architecture: uniform 12D→4D (bias=False)") @@ -65,6 +74,7 @@ def export_weights_binary(checkpoint_path, output_path): layer_weights = state_dict[layer_key].detach().numpy() layer_flat = layer_weights.flatten() + kernel_size = kernel_sizes[i] layers.append({ 'kernel_size': kernel_size, @@ -76,7 +86,7 @@ def export_weights_binary(checkpoint_path, output_path): all_weights.extend(layer_flat) weight_offset += len(layer_flat) - print(f" Layer {i}: 12D→4D, {len(layer_flat)} weights") + print(f" Layer {i}: 12D→4D, {kernel_size}×{kernel_size}, {len(layer_flat)} weights") # Convert to f16 # TODO: Use 8-bit quantization for 2× size reduction diff --git a/training/train_cnn_v2.py b/training/train_cnn_v2.py index 8b3b91c..3673b97 100755 --- a/training/train_cnn_v2.py +++ b/training/train_cnn_v2.py @@ -5,6 +5,7 @@ Architecture: - Static features (8D): p0-p3 (parametric), uv_x, uv_y, sin(10×uv_x), bias - Input RGBD (4D): original image mip 0 - All layers: input RGBD (4D) + static (8D) = 12D → 4 channels +- Per-layer kernel sizes (e.g., 1×1, 3×3, 5×5) - Uniform layer structure with bias=False (bias in static features) """ @@ -61,6 +62,7 @@ class CNNv2(nn.Module): """CNN v2 - Uniform 12D→4D Architecture All layers: input RGBD (4D) + static (8D) = 12D → 4 channels + Per-layer kernel sizes supported (e.g., [1, 3, 5]) Uses bias=False (bias integrated in static features as 1.0) TODO: Add quantization-aware training (QAT) for 8-bit weights @@ -69,14 +71,18 @@ class CNNv2(nn.Module): - Target: ~1.3 KB weights (vs 2.6 KB with f16) """ - def __init__(self, kernel_size=3, num_layers=3): + def __init__(self, kernel_sizes, num_layers=3): super().__init__() - self.kernel_size = kernel_size + if isinstance(kernel_sizes, int): + kernel_sizes = [kernel_sizes] * num_layers + assert len(kernel_sizes) == num_layers, "kernel_sizes must match num_layers" + + self.kernel_sizes = kernel_sizes self.num_layers = num_layers self.layers = nn.ModuleList() # All layers: 12D input (4 RGBD + 8 static) → 4D output - for _ in range(num_layers): + for kernel_size in kernel_sizes: self.layers.append( nn.Conv2d(12, 4, kernel_size=kernel_size, padding=kernel_size//2, bias=False) @@ -295,11 +301,16 @@ def train(args): detector=args.detector) dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True) + # Parse kernel sizes + kernel_sizes = [int(k) for k in args.kernel_sizes.split(',')] + if len(kernel_sizes) == 1: + kernel_sizes = kernel_sizes * args.num_layers + # Create model - model = CNNv2(kernel_size=args.kernel_size, num_layers=args.num_layers).to(device) + model = CNNv2(kernel_sizes=kernel_sizes, num_layers=args.num_layers).to(device) total_params = sum(p.numel() for p in model.parameters()) - weights_per_layer = 12 * args.kernel_size * args.kernel_size * 4 - print(f"Model: {args.num_layers} layers, {args.kernel_size}×{args.kernel_size} kernels, {total_params} weights ({weights_per_layer}/layer)") + kernel_desc = ','.join(map(str, kernel_sizes)) + print(f"Model: {args.num_layers} layers, kernel sizes [{kernel_desc}], {total_params} weights") # Optimizer and loss optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) @@ -343,7 +354,7 @@ def train(args): 'optimizer_state_dict': optimizer.state_dict(), 'loss': avg_loss, 'config': { - 'kernel_size': args.kernel_size, + 'kernel_sizes': kernel_sizes, 'num_layers': args.num_layers, 'features': ['p0', 'p1', 'p2', 'p3', 'uv.x', 'uv.y', 'sin10_x', 'bias'] } @@ -377,8 +388,8 @@ def main(): # Mix salient points with random samples for better generalization # Model architecture - parser.add_argument('--kernel-size', type=int, default=3, - help='Kernel size (uniform for all layers, default: 3)') + parser.add_argument('--kernel-sizes', type=str, default='3', + help='Comma-separated kernel sizes per layer (e.g., "3,5,3"), single value replicates (default: 3)') parser.add_argument('--num-layers', type=int, default=3, help='Number of CNN layers (default: 3)') |
