diff options
| author | skal <pascal.massimino@gmail.com> | 2026-02-13 17:46:09 +0100 |
|---|---|---|
| committer | skal <pascal.massimino@gmail.com> | 2026-02-13 17:46:09 +0100 |
| commit | a7340d378909cadbfd72dbd1f5b756f907c2a3e0 (patch) | |
| tree | 60a34dd084d746f0c6dad50b0d5cc7f20bc0c409 | |
| parent | f6b3ea72a03850654b69986bc82bb249aaabe2e3 (diff) | |
CNN v2 training: Add --grayscale-loss option for luminance-based loss computation
Add option to compute loss on grayscale (Y = 0.299*R + 0.587*G + 0.114*B) instead of full RGBA channels. Useful for training models that prioritize luminance accuracy over color accuracy.
Changes:
- training/train_cnn_v2.py: Add --grayscale-loss flag and grayscale conversion in loss computation
- scripts/train_cnn_v2_full.sh: Add --grayscale-loss parameter support
- doc/CNN_V2.md: Document grayscale loss in training configuration and checkpoint format
- doc/HOWTO.md: Add usage examples for --grayscale-loss flag
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
| -rw-r--r-- | doc/CNN_V2.md | 14 | ||||
| -rw-r--r-- | doc/HOWTO.md | 9 | ||||
| -rw-r--r-- | scripts/gemini_commit.bat | 7 | ||||
| -rwxr-xr-x | scripts/gemini_commit.sh | 6 | ||||
| -rw-r--r-- | scripts/gemini_end.bat | 5 | ||||
| -rwxr-xr-x | scripts/gemini_end.sh | 5 | ||||
| -rw-r--r-- | scripts/gemini_start.bat | 8 | ||||
| -rwxr-xr-x | scripts/gemini_start.sh | 6 | ||||
| -rwxr-xr-x | scripts/train_cnn_v2_full.sh | 16 | ||||
| -rwxr-xr-x | training/train_cnn_v2.py | 14 |
10 files changed, 49 insertions, 41 deletions
diff --git a/doc/CNN_V2.md b/doc/CNN_V2.md index 78854ce..c827187 100644 --- a/doc/CNN_V2.md +++ b/doc/CNN_V2.md @@ -326,12 +326,13 @@ class CNNv2(nn.Module): kernel_sizes = [3, 3, 3] # Per-layer kernel sizes (e.g., [1,3,5]) num_layers = 3 # Number of CNN layers mip_level = 0 # Mip level for p0-p3: 0=orig, 1=half, 2=quarter, 3=eighth +grayscale_loss = False # Compute loss on grayscale (Y) instead of RGBA learning_rate = 1e-3 batch_size = 16 epochs = 5000 # Dataset: Input RGB, Target RGBA (preserves alpha channel from image) -# Model outputs RGBA, loss compares all 4 channels +# Model outputs RGBA, loss compares all 4 channels (or grayscale if --grayscale-loss) # Training loop (standard PyTorch f32) for epoch in range(epochs): @@ -344,7 +345,15 @@ for epoch in range(epochs): # Forward pass output = model(input_rgbd, static_feat) - loss = criterion(output, target_batch) + + # Loss computation (grayscale or RGBA) + if grayscale_loss: + # Convert RGBA to grayscale: Y = 0.299*R + 0.587*G + 0.114*B + output_gray = 0.299 * output[:, 0:1] + 0.587 * output[:, 1:2] + 0.114 * output[:, 2:3] + target_gray = 0.299 * target[:, 0:1] + 0.587 * target[:, 1:2] + 0.114 * target[:, 2:3] + loss = criterion(output_gray, target_gray) + else: + loss = criterion(output, target_batch) # Backward pass optimizer.zero_grad() @@ -361,6 +370,7 @@ torch.save({ 'kernel_sizes': [3, 3, 3], # Per-layer kernel sizes 'num_layers': 3, 'mip_level': 0, # Mip level used for p0-p3 + 'grayscale_loss': False, # Whether grayscale loss was used 'features': ['p0', 'p1', 'p2', 'p3', 'uv.x', 'uv.y', 'sin10_x', 'bias'] }, 'epoch': epoch, diff --git a/doc/HOWTO.md b/doc/HOWTO.md index 2290aa8..1e8b58b 100644 --- a/doc/HOWTO.md +++ b/doc/HOWTO.md @@ -145,6 +145,9 @@ Enhanced CNN with parametric static features (7D input: RGBD + UV + sin encoding # Custom architecture ./scripts/train_cnn_v2_full.sh --kernel-sizes 3,5,3 --num-layers 3 --mip-level 1 +# Grayscale loss (compute loss on luminance instead of RGBA) +./scripts/train_cnn_v2_full.sh --grayscale-loss + # Custom directories ./scripts/train_cnn_v2_full.sh --input training/input --target training/target_2 @@ -188,6 +191,12 @@ Enhanced CNN with parametric static features (7D input: RGBD + UV + sin encoding --input training/input/ --target training/target_2/ \ --mip-level 1 \ --epochs 100 --batch-size 16 + +# Grayscale loss (compute loss on luminance Y = 0.299*R + 0.587*G + 0.114*B) +./training/train_cnn_v2.py \ + --input training/input/ --target training/target_2/ \ + --grayscale-loss \ + --epochs 100 --batch-size 16 ``` **Export Binary Weights:** diff --git a/scripts/gemini_commit.bat b/scripts/gemini_commit.bat deleted file mode 100644 index f9d922c..0000000 --- a/scripts/gemini_commit.bat +++ /dev/null @@ -1,7 +0,0 @@ -@echo off -REM Ask Gemini to summarize work and update docs - -gemini --files ^ - TASKS.md ^ - NOTES.md ^ - "Summarize what was accomplished. Update TASKS.md with next steps." diff --git a/scripts/gemini_commit.sh b/scripts/gemini_commit.sh deleted file mode 100755 index ae327e4..0000000 --- a/scripts/gemini_commit.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/sh -# Ask Gemini to summarize work and update docs - -gemini --files TASKS.md NOTES.md "Summarize what was accomplished. - Update TASKS.md with next steps. - Add warnings or decisions to NOTES.md." diff --git a/scripts/gemini_end.bat b/scripts/gemini_end.bat deleted file mode 100644 index ab4beba..0000000 --- a/scripts/gemini_end.bat +++ /dev/null @@ -1,5 +0,0 @@ -@echo off -REM End-of-session summary - -gemini --files PROJECT_CONTEXT.md ^ - "Confirm no constraints were violated. Provide a short summary." diff --git a/scripts/gemini_end.sh b/scripts/gemini_end.sh deleted file mode 100755 index 5689acf..0000000 --- a/scripts/gemini_end.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh -# End-of-session summary - -gemini --files PROJECT_CONTEXT.md "Confirm no project constraints were violated. - Provide a short end-of-session summary." diff --git a/scripts/gemini_start.bat b/scripts/gemini_start.bat deleted file mode 100644 index 22bca70..0000000 --- a/scripts/gemini_start.bat +++ /dev/null @@ -1,8 +0,0 @@ -@echo off -REM Start a Gemini session with core context - -gemini --files ^ - PROJECT_CONTEXT.md ^ - BUILD.md ^ - PHASE2_COMPRESSION.md ^ - "Read the project context carefully. Summarize the project goals and current phase. Wait." diff --git a/scripts/gemini_start.sh b/scripts/gemini_start.sh deleted file mode 100755 index ea11bea..0000000 --- a/scripts/gemini_start.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/sh -# Start a Gemini session with core context - -gemini --files PROJECT_CONTEXT.md BUILD.md PHASE2_COMPRESSION.md "Read the project context carefully. - Summarize the project goals and current phase. - Wait for further instructions." diff --git a/scripts/train_cnn_v2_full.sh b/scripts/train_cnn_v2_full.sh index 8b09191..e444f20 100755 --- a/scripts/train_cnn_v2_full.sh +++ b/scripts/train_cnn_v2_full.sh @@ -16,6 +16,7 @@ # --kernel-sizes K Comma-separated kernel sizes (default: 3,3,3) # --num-layers N Number of layers (default: 3) # --mip-level N Mip level for p0-p3 features: 0-3 (default: 0) +# --grayscale-loss Compute loss on grayscale instead of RGBA # # PATCH PARAMETERS: # --patch-size N Patch size (default: 8) @@ -60,6 +61,7 @@ DETECTOR="harris" KERNEL_SIZES="3,3,3" NUM_LAYERS=3 MIP_LEVEL=0 +GRAYSCALE_LOSS=false FULL_IMAGE_MODE=false IMAGE_SIZE=256 @@ -143,6 +145,10 @@ while [[ $# -gt 0 ]]; do MIP_LEVEL="$2" shift 2 ;; + --grayscale-loss) + GRAYSCALE_LOSS=true + shift + ;; --patch-size) if [ -z "$2" ]; then echo "Error: --patch-size requires a number argument" @@ -267,6 +273,13 @@ fi if [ "$VALIDATE_ONLY" = false ]; then # Step 1: Train model echo "[1/4] Training CNN v2 model..." + +# Build optional flags +OPTIONAL_FLAGS="" +if [ "$GRAYSCALE_LOSS" = true ]; then + OPTIONAL_FLAGS="$OPTIONAL_FLAGS --grayscale-loss" +fi + python3 training/train_cnn_v2.py \ --input "$INPUT_DIR" \ --target "$TARGET_DIR" \ @@ -277,7 +290,8 @@ python3 training/train_cnn_v2.py \ --epochs $EPOCHS \ --batch-size $BATCH_SIZE \ --checkpoint-dir "$CHECKPOINT_DIR" \ - --checkpoint-every $CHECKPOINT_EVERY + --checkpoint-every $CHECKPOINT_EVERY \ + $OPTIONAL_FLAGS if [ $? -ne 0 ]; then echo "Error: Training failed" diff --git a/training/train_cnn_v2.py b/training/train_cnn_v2.py index a9a311a..abe07bc 100755 --- a/training/train_cnn_v2.py +++ b/training/train_cnn_v2.py @@ -350,7 +350,16 @@ def train(args): optimizer.zero_grad() output = model(input_rgbd, static_feat) - loss = criterion(output, target) + + # Compute loss (grayscale or RGBA) + if args.grayscale_loss: + # Convert RGBA to grayscale: Y = 0.299*R + 0.587*G + 0.114*B + output_gray = 0.299 * output[:, 0:1] + 0.587 * output[:, 1:2] + 0.114 * output[:, 2:3] + target_gray = 0.299 * target[:, 0:1] + 0.587 * target[:, 1:2] + 0.114 * target[:, 2:3] + loss = criterion(output_gray, target_gray) + else: + loss = criterion(output, target) + loss.backward() optimizer.step() @@ -376,6 +385,7 @@ def train(args): 'kernel_sizes': kernel_sizes, 'num_layers': args.num_layers, 'mip_level': args.mip_level, + 'grayscale_loss': args.grayscale_loss, 'features': ['p0', 'p1', 'p2', 'p3', 'uv.x', 'uv.y', 'sin20_y', 'bias'] } }, checkpoint_path) @@ -419,6 +429,8 @@ def main(): parser.add_argument('--epochs', type=int, default=5000, help='Training epochs') parser.add_argument('--batch-size', type=int, default=16, help='Batch size') parser.add_argument('--lr', type=float, default=1e-3, help='Learning rate') + parser.add_argument('--grayscale-loss', action='store_true', + help='Compute loss on grayscale (Y = 0.299*R + 0.587*G + 0.114*B) instead of RGBA') parser.add_argument('--checkpoint-dir', type=str, default='checkpoints', help='Checkpoint directory') parser.add_argument('--checkpoint-every', type=int, default=1000, |
