summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorskal <pascal.massimino@gmail.com>2026-02-13 12:41:35 +0100
committerskal <pascal.massimino@gmail.com>2026-02-13 12:41:35 +0100
commita101d76e3eab4ee4d93357d71e2c7d4e0114f56f (patch)
tree54fa8bcb50bfcfa401fb58fcb2c6ad69c54a6788
parent561d1dc446db7d1d3e02b92b43abedf1a5017850 (diff)
CNN v2: Restore per-layer kernel sizes support
Training: - train_cnn_v2.py: Accept --kernel-sizes as comma-separated list - CNNv2 model: Per-layer kernel sizes (e.g., [1,3,5]) - Single value replicates across layers (e.g., "3" → [3,3,3]) Export: - export_cnn_v2_weights.py: Backward compatible with old checkpoints - Handles both kernel_size (old) and kernel_sizes (new) format Documentation: - CNN_V2.md: Updated code examples and config format - HOWTO.md: Updated training examples to show comma-separated syntax Binary format: Already supports per-layer kernel sizes (no changes) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
-rw-r--r--doc/CNN_V2.md11
-rw-r--r--doc/HOWTO.md4
-rwxr-xr-xtraining/export_cnn_v2_weights.py18
-rwxr-xr-xtraining/train_cnn_v2.py29
4 files changed, 43 insertions, 19 deletions
diff --git a/doc/CNN_V2.md b/doc/CNN_V2.md
index 4612d7a..6242747 100644
--- a/doc/CNN_V2.md
+++ b/doc/CNN_V2.md
@@ -214,12 +214,15 @@ def compute_static_features(rgb, depth):
```python
class CNNv2(nn.Module):
- def __init__(self, kernel_size=3, num_layers=3):
+ def __init__(self, kernel_sizes, num_layers=3):
super().__init__()
+ if isinstance(kernel_sizes, int):
+ kernel_sizes = [kernel_sizes] * num_layers
+ self.kernel_sizes = kernel_sizes
self.layers = nn.ModuleList()
# All layers: 12D input (4 prev + 8 static) → 4D output
- for i in range(num_layers):
+ for kernel_size in kernel_sizes:
self.layers.append(
nn.Conv2d(12, 4, kernel_size=kernel_size,
padding=kernel_size//2, bias=False)
@@ -247,7 +250,7 @@ class CNNv2(nn.Module):
```python
# Hyperparameters
-kernel_size = 3 # Uniform 3×3 kernels
+kernel_sizes = [3, 3, 3] # Per-layer kernel sizes (e.g., [1,3,5])
num_layers = 3 # Number of CNN layers
learning_rate = 1e-3
batch_size = 16
@@ -278,7 +281,7 @@ for epoch in range(epochs):
torch.save({
'state_dict': model.state_dict(), # f32 weights
'config': {
- 'kernel_size': 3,
+ 'kernel_sizes': [3, 3, 3], # Per-layer kernel sizes
'num_layers': 3,
'features': ['p0', 'p1', 'p2', 'p3', 'uv.x', 'uv.y', 'sin10_x', 'bias']
},
diff --git a/doc/HOWTO.md b/doc/HOWTO.md
index e909a5d..9c67106 100644
--- a/doc/HOWTO.md
+++ b/doc/HOWTO.md
@@ -161,10 +161,10 @@ Config: 100 epochs, 3×3 kernels, 8→4→4 channels, patch-based (harris detect
--input training/input/ --target training/target_2/ \
--epochs 100 --batch-size 16 --checkpoint-every 5
-# Custom architecture
+# Custom architecture (per-layer kernel sizes)
./training/train_cnn_v2.py \
--input training/input/ --target training/target_2/ \
- --kernel-sizes 1 3 5 --channels 16 8 4 \
+ --kernel-sizes 1,3,5 \
--epochs 5000 --batch-size 16
```
diff --git a/training/export_cnn_v2_weights.py b/training/export_cnn_v2_weights.py
index 07254fc..bbe94dd 100755
--- a/training/export_cnn_v2_weights.py
+++ b/training/export_cnn_v2_weights.py
@@ -45,11 +45,20 @@ def export_weights_binary(checkpoint_path, output_path):
state_dict = checkpoint['model_state_dict']
config = checkpoint['config']
- kernel_size = config.get('kernel_size', 3)
- num_layers = config.get('num_layers', 3)
+ # Support both old (kernel_size) and new (kernel_sizes) format
+ if 'kernel_sizes' in config:
+ kernel_sizes = config['kernel_sizes']
+ elif 'kernel_size' in config:
+ kernel_size = config['kernel_size']
+ num_layers = config.get('num_layers', 3)
+ kernel_sizes = [kernel_size] * num_layers
+ else:
+ kernel_sizes = [3, 3, 3] # fallback
+
+ num_layers = config.get('num_layers', len(kernel_sizes))
print(f"Configuration:")
- print(f" Kernel size: {kernel_size}×{kernel_size}")
+ print(f" Kernel sizes: {kernel_sizes}")
print(f" Layers: {num_layers}")
print(f" Architecture: uniform 12D→4D (bias=False)")
@@ -65,6 +74,7 @@ def export_weights_binary(checkpoint_path, output_path):
layer_weights = state_dict[layer_key].detach().numpy()
layer_flat = layer_weights.flatten()
+ kernel_size = kernel_sizes[i]
layers.append({
'kernel_size': kernel_size,
@@ -76,7 +86,7 @@ def export_weights_binary(checkpoint_path, output_path):
all_weights.extend(layer_flat)
weight_offset += len(layer_flat)
- print(f" Layer {i}: 12D→4D, {len(layer_flat)} weights")
+ print(f" Layer {i}: 12D→4D, {kernel_size}×{kernel_size}, {len(layer_flat)} weights")
# Convert to f16
# TODO: Use 8-bit quantization for 2× size reduction
diff --git a/training/train_cnn_v2.py b/training/train_cnn_v2.py
index 8b3b91c..3673b97 100755
--- a/training/train_cnn_v2.py
+++ b/training/train_cnn_v2.py
@@ -5,6 +5,7 @@ Architecture:
- Static features (8D): p0-p3 (parametric), uv_x, uv_y, sin(10×uv_x), bias
- Input RGBD (4D): original image mip 0
- All layers: input RGBD (4D) + static (8D) = 12D → 4 channels
+- Per-layer kernel sizes (e.g., 1×1, 3×3, 5×5)
- Uniform layer structure with bias=False (bias in static features)
"""
@@ -61,6 +62,7 @@ class CNNv2(nn.Module):
"""CNN v2 - Uniform 12D→4D Architecture
All layers: input RGBD (4D) + static (8D) = 12D → 4 channels
+ Per-layer kernel sizes supported (e.g., [1, 3, 5])
Uses bias=False (bias integrated in static features as 1.0)
TODO: Add quantization-aware training (QAT) for 8-bit weights
@@ -69,14 +71,18 @@ class CNNv2(nn.Module):
- Target: ~1.3 KB weights (vs 2.6 KB with f16)
"""
- def __init__(self, kernel_size=3, num_layers=3):
+ def __init__(self, kernel_sizes, num_layers=3):
super().__init__()
- self.kernel_size = kernel_size
+ if isinstance(kernel_sizes, int):
+ kernel_sizes = [kernel_sizes] * num_layers
+ assert len(kernel_sizes) == num_layers, "kernel_sizes must match num_layers"
+
+ self.kernel_sizes = kernel_sizes
self.num_layers = num_layers
self.layers = nn.ModuleList()
# All layers: 12D input (4 RGBD + 8 static) → 4D output
- for _ in range(num_layers):
+ for kernel_size in kernel_sizes:
self.layers.append(
nn.Conv2d(12, 4, kernel_size=kernel_size,
padding=kernel_size//2, bias=False)
@@ -295,11 +301,16 @@ def train(args):
detector=args.detector)
dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True)
+ # Parse kernel sizes
+ kernel_sizes = [int(k) for k in args.kernel_sizes.split(',')]
+ if len(kernel_sizes) == 1:
+ kernel_sizes = kernel_sizes * args.num_layers
+
# Create model
- model = CNNv2(kernel_size=args.kernel_size, num_layers=args.num_layers).to(device)
+ model = CNNv2(kernel_sizes=kernel_sizes, num_layers=args.num_layers).to(device)
total_params = sum(p.numel() for p in model.parameters())
- weights_per_layer = 12 * args.kernel_size * args.kernel_size * 4
- print(f"Model: {args.num_layers} layers, {args.kernel_size}×{args.kernel_size} kernels, {total_params} weights ({weights_per_layer}/layer)")
+ kernel_desc = ','.join(map(str, kernel_sizes))
+ print(f"Model: {args.num_layers} layers, kernel sizes [{kernel_desc}], {total_params} weights")
# Optimizer and loss
optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
@@ -343,7 +354,7 @@ def train(args):
'optimizer_state_dict': optimizer.state_dict(),
'loss': avg_loss,
'config': {
- 'kernel_size': args.kernel_size,
+ 'kernel_sizes': kernel_sizes,
'num_layers': args.num_layers,
'features': ['p0', 'p1', 'p2', 'p3', 'uv.x', 'uv.y', 'sin10_x', 'bias']
}
@@ -377,8 +388,8 @@ def main():
# Mix salient points with random samples for better generalization
# Model architecture
- parser.add_argument('--kernel-size', type=int, default=3,
- help='Kernel size (uniform for all layers, default: 3)')
+ parser.add_argument('--kernel-sizes', type=str, default='3',
+ help='Comma-separated kernel sizes per layer (e.g., "3,5,3"), single value replicates (default: 3)')
parser.add_argument('--num-layers', type=int, default=3,
help='Number of CNN layers (default: 3)')