diff options
| author | skal <pascal.massimino@gmail.com> | 2026-02-12 11:48:02 +0100 |
|---|---|---|
| committer | skal <pascal.massimino@gmail.com> | 2026-02-12 11:48:02 +0100 |
| commit | c878631f24ddb7514dd4db3d7ace6a0a296d4157 (patch) | |
| tree | a24ccffc8997a7e0cc0270c59c599ef44d0086a8 /scripts | |
| parent | f4ef706409ad44cac26abb46fe8b2ddb78ec6a9c (diff) | |
Fix: CNN v2 training - handle variable image sizes
Training script now resizes all images to fixed size before batching.
Issue: RuntimeError when batching variable-sized images
- Images had different dimensions (376x626 vs 344x361)
- PyTorch DataLoader requires uniform tensor sizes for batching
Solution:
- Add --image-size parameter (default: 256)
- Resize all images to target_size using LANCZOS interpolation
- Preserves aspect ratio independent training
Changes:
- train_cnn_v2.py: ImagePairDataset now resizes to fixed dimensions
- train_cnn_v2_full.sh: Added IMAGE_SIZE=256 configuration
Tested: 8 image pairs, variable sizes → uniform 256×256 batches
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
Diffstat (limited to 'scripts')
| -rwxr-xr-x | scripts/train_cnn_v2_full.sh | 131 |
1 files changed, 131 insertions, 0 deletions
diff --git a/scripts/train_cnn_v2_full.sh b/scripts/train_cnn_v2_full.sh new file mode 100755 index 0000000..119b788 --- /dev/null +++ b/scripts/train_cnn_v2_full.sh @@ -0,0 +1,131 @@ +#!/bin/bash +# Complete CNN v2 Training Pipeline +# Train → Export → Build → Validate + +set -e + +PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +cd "$PROJECT_ROOT" + +# Configuration +INPUT_DIR="training/input" +TARGET_DIR="training/target_2" +CHECKPOINT_DIR="checkpoints" +VALIDATION_DIR="validation_results" +EPOCHS=10000 +CHECKPOINT_EVERY=500 +BATCH_SIZE=8 +IMAGE_SIZE=256 +KERNEL_SIZES="1 3 5" +CHANNELS="16 8 4" + +echo "=== CNN v2 Complete Training Pipeline ===" +echo "Input: $INPUT_DIR" +echo "Target: $TARGET_DIR" +echo "Epochs: $EPOCHS" +echo "Checkpoint interval: $CHECKPOINT_EVERY" +echo "" + +# Step 1: Train model +echo "[1/4] Training CNN v2 model..." +python3 training/train_cnn_v2.py \ + --input "$INPUT_DIR" \ + --target "$TARGET_DIR" \ + --image-size $IMAGE_SIZE \ + --kernel-sizes $KERNEL_SIZES \ + --channels $CHANNELS \ + --epochs $EPOCHS \ + --batch-size $BATCH_SIZE \ + --checkpoint-dir "$CHECKPOINT_DIR" \ + --checkpoint-every $CHECKPOINT_EVERY + +if [ $? -ne 0 ]; then + echo "Error: Training failed" + exit 1 +fi + +echo "" +echo "Training complete!" +echo "" + +# Step 2: Export final checkpoint to shaders +FINAL_CHECKPOINT="$CHECKPOINT_DIR/checkpoint_epoch_${EPOCHS}.pth" + +if [ ! -f "$FINAL_CHECKPOINT" ]; then + echo "Warning: Final checkpoint not found, using latest available..." + FINAL_CHECKPOINT=$(ls -t "$CHECKPOINT_DIR"/checkpoint_epoch_*.pth | head -1) +fi + +echo "[2/4] Exporting final checkpoint to WGSL shaders..." +echo "Checkpoint: $FINAL_CHECKPOINT" +python3 training/export_cnn_v2_shader.py "$FINAL_CHECKPOINT" \ + --output-dir workspaces/main/shaders + +if [ $? -ne 0 ]; then + echo "Error: Shader export failed" + exit 1 +fi + +echo "" + +# Step 3: Rebuild with new shaders +echo "[3/4] Rebuilding demo with new shaders..." +cmake --build build -j4 --target demo64k > /dev/null 2>&1 + +if [ $? -ne 0 ]; then + echo "Error: Build failed" + exit 1 +fi + +echo " → Build complete" +echo "" + +# Step 4: Visual assessment - process all checkpoints +echo "[4/4] Visual assessment of training progression..." +mkdir -p "$VALIDATION_DIR" + +# Test first input image with checkpoints at intervals +TEST_IMAGE="$INPUT_DIR/img_000.png" +CHECKPOINT_INTERVAL=1000 + +echo " Processing checkpoints (every ${CHECKPOINT_INTERVAL} epochs)..." + +for checkpoint in "$CHECKPOINT_DIR"/checkpoint_epoch_*.pth; do + epoch=$(echo "$checkpoint" | grep -o 'epoch_[0-9]*' | cut -d'_' -f2) + + # Only process checkpoints at intervals + if [ $((epoch % CHECKPOINT_INTERVAL)) -eq 0 ] || [ "$epoch" -eq "$EPOCHS" ]; then + echo " Epoch $epoch..." + + # Export shaders for this checkpoint + python3 training/export_cnn_v2_shader.py "$checkpoint" \ + --output-dir workspaces/main/shaders > /dev/null 2>&1 + + # Rebuild + cmake --build build -j4 --target cnn_test > /dev/null 2>&1 + + # Process test image + build/cnn_test "$TEST_IMAGE" "$VALIDATION_DIR/epoch_${epoch}_output.png" 2>/dev/null + fi +done + +# Restore final checkpoint shaders +python3 training/export_cnn_v2_shader.py "$FINAL_CHECKPOINT" \ + --output-dir workspaces/main/shaders > /dev/null 2>&1 + +cmake --build build -j4 --target demo64k > /dev/null 2>&1 + +echo "" +echo "=== Training Pipeline Complete ===" +echo "" +echo "Results:" +echo " - Checkpoints: $CHECKPOINT_DIR" +echo " - Visual progression: $VALIDATION_DIR" +echo " - Final shaders: workspaces/main/shaders/cnn_v2_layer_*.wgsl" +echo "" +echo "Opening results directory..." +open "$VALIDATION_DIR" 2>/dev/null || xdg-open "$VALIDATION_DIR" 2>/dev/null || true + +echo "" +echo "Run demo to see final result:" +echo " ./build/demo64k" |
