summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/CNN_V2.md14
-rw-r--r--doc/HOWTO.md9
-rw-r--r--scripts/gemini_commit.bat7
-rwxr-xr-xscripts/gemini_commit.sh6
-rw-r--r--scripts/gemini_end.bat5
-rwxr-xr-xscripts/gemini_end.sh5
-rw-r--r--scripts/gemini_start.bat8
-rwxr-xr-xscripts/gemini_start.sh6
-rwxr-xr-xscripts/train_cnn_v2_full.sh16
-rwxr-xr-xtraining/train_cnn_v2.py14
10 files changed, 49 insertions, 41 deletions
diff --git a/doc/CNN_V2.md b/doc/CNN_V2.md
index 78854ce..c827187 100644
--- a/doc/CNN_V2.md
+++ b/doc/CNN_V2.md
@@ -326,12 +326,13 @@ class CNNv2(nn.Module):
kernel_sizes = [3, 3, 3] # Per-layer kernel sizes (e.g., [1,3,5])
num_layers = 3 # Number of CNN layers
mip_level = 0 # Mip level for p0-p3: 0=orig, 1=half, 2=quarter, 3=eighth
+grayscale_loss = False # Compute loss on grayscale (Y) instead of RGBA
learning_rate = 1e-3
batch_size = 16
epochs = 5000
# Dataset: Input RGB, Target RGBA (preserves alpha channel from image)
-# Model outputs RGBA, loss compares all 4 channels
+# Model outputs RGBA, loss compares all 4 channels (or grayscale if --grayscale-loss)
# Training loop (standard PyTorch f32)
for epoch in range(epochs):
@@ -344,7 +345,15 @@ for epoch in range(epochs):
# Forward pass
output = model(input_rgbd, static_feat)
- loss = criterion(output, target_batch)
+
+ # Loss computation (grayscale or RGBA)
+ if grayscale_loss:
+ # Convert RGBA to grayscale: Y = 0.299*R + 0.587*G + 0.114*B
+ output_gray = 0.299 * output[:, 0:1] + 0.587 * output[:, 1:2] + 0.114 * output[:, 2:3]
+ target_gray = 0.299 * target[:, 0:1] + 0.587 * target[:, 1:2] + 0.114 * target[:, 2:3]
+ loss = criterion(output_gray, target_gray)
+ else:
+ loss = criterion(output, target_batch)
# Backward pass
optimizer.zero_grad()
@@ -361,6 +370,7 @@ torch.save({
'kernel_sizes': [3, 3, 3], # Per-layer kernel sizes
'num_layers': 3,
'mip_level': 0, # Mip level used for p0-p3
+ 'grayscale_loss': False, # Whether grayscale loss was used
'features': ['p0', 'p1', 'p2', 'p3', 'uv.x', 'uv.y', 'sin10_x', 'bias']
},
'epoch': epoch,
diff --git a/doc/HOWTO.md b/doc/HOWTO.md
index 2290aa8..1e8b58b 100644
--- a/doc/HOWTO.md
+++ b/doc/HOWTO.md
@@ -145,6 +145,9 @@ Enhanced CNN with parametric static features (7D input: RGBD + UV + sin encoding
# Custom architecture
./scripts/train_cnn_v2_full.sh --kernel-sizes 3,5,3 --num-layers 3 --mip-level 1
+# Grayscale loss (compute loss on luminance instead of RGBA)
+./scripts/train_cnn_v2_full.sh --grayscale-loss
+
# Custom directories
./scripts/train_cnn_v2_full.sh --input training/input --target training/target_2
@@ -188,6 +191,12 @@ Enhanced CNN with parametric static features (7D input: RGBD + UV + sin encoding
--input training/input/ --target training/target_2/ \
--mip-level 1 \
--epochs 100 --batch-size 16
+
+# Grayscale loss (compute loss on luminance Y = 0.299*R + 0.587*G + 0.114*B)
+./training/train_cnn_v2.py \
+ --input training/input/ --target training/target_2/ \
+ --grayscale-loss \
+ --epochs 100 --batch-size 16
```
**Export Binary Weights:**
diff --git a/scripts/gemini_commit.bat b/scripts/gemini_commit.bat
deleted file mode 100644
index f9d922c..0000000
--- a/scripts/gemini_commit.bat
+++ /dev/null
@@ -1,7 +0,0 @@
-@echo off
-REM Ask Gemini to summarize work and update docs
-
-gemini --files ^
- TASKS.md ^
- NOTES.md ^
- "Summarize what was accomplished. Update TASKS.md with next steps."
diff --git a/scripts/gemini_commit.sh b/scripts/gemini_commit.sh
deleted file mode 100755
index ae327e4..0000000
--- a/scripts/gemini_commit.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/sh
-# Ask Gemini to summarize work and update docs
-
-gemini --files TASKS.md NOTES.md "Summarize what was accomplished.
- Update TASKS.md with next steps.
- Add warnings or decisions to NOTES.md."
diff --git a/scripts/gemini_end.bat b/scripts/gemini_end.bat
deleted file mode 100644
index ab4beba..0000000
--- a/scripts/gemini_end.bat
+++ /dev/null
@@ -1,5 +0,0 @@
-@echo off
-REM End-of-session summary
-
-gemini --files PROJECT_CONTEXT.md ^
- "Confirm no constraints were violated. Provide a short summary."
diff --git a/scripts/gemini_end.sh b/scripts/gemini_end.sh
deleted file mode 100755
index 5689acf..0000000
--- a/scripts/gemini_end.sh
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/sh
-# End-of-session summary
-
-gemini --files PROJECT_CONTEXT.md "Confirm no project constraints were violated.
- Provide a short end-of-session summary."
diff --git a/scripts/gemini_start.bat b/scripts/gemini_start.bat
deleted file mode 100644
index 22bca70..0000000
--- a/scripts/gemini_start.bat
+++ /dev/null
@@ -1,8 +0,0 @@
-@echo off
-REM Start a Gemini session with core context
-
-gemini --files ^
- PROJECT_CONTEXT.md ^
- BUILD.md ^
- PHASE2_COMPRESSION.md ^
- "Read the project context carefully. Summarize the project goals and current phase. Wait."
diff --git a/scripts/gemini_start.sh b/scripts/gemini_start.sh
deleted file mode 100755
index ea11bea..0000000
--- a/scripts/gemini_start.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/sh
-# Start a Gemini session with core context
-
-gemini --files PROJECT_CONTEXT.md BUILD.md PHASE2_COMPRESSION.md "Read the project context carefully.
- Summarize the project goals and current phase.
- Wait for further instructions."
diff --git a/scripts/train_cnn_v2_full.sh b/scripts/train_cnn_v2_full.sh
index 8b09191..e444f20 100755
--- a/scripts/train_cnn_v2_full.sh
+++ b/scripts/train_cnn_v2_full.sh
@@ -16,6 +16,7 @@
# --kernel-sizes K Comma-separated kernel sizes (default: 3,3,3)
# --num-layers N Number of layers (default: 3)
# --mip-level N Mip level for p0-p3 features: 0-3 (default: 0)
+# --grayscale-loss Compute loss on grayscale instead of RGBA
#
# PATCH PARAMETERS:
# --patch-size N Patch size (default: 8)
@@ -60,6 +61,7 @@ DETECTOR="harris"
KERNEL_SIZES="3,3,3"
NUM_LAYERS=3
MIP_LEVEL=0
+GRAYSCALE_LOSS=false
FULL_IMAGE_MODE=false
IMAGE_SIZE=256
@@ -143,6 +145,10 @@ while [[ $# -gt 0 ]]; do
MIP_LEVEL="$2"
shift 2
;;
+ --grayscale-loss)
+ GRAYSCALE_LOSS=true
+ shift
+ ;;
--patch-size)
if [ -z "$2" ]; then
echo "Error: --patch-size requires a number argument"
@@ -267,6 +273,13 @@ fi
if [ "$VALIDATE_ONLY" = false ]; then
# Step 1: Train model
echo "[1/4] Training CNN v2 model..."
+
+# Build optional flags
+OPTIONAL_FLAGS=""
+if [ "$GRAYSCALE_LOSS" = true ]; then
+ OPTIONAL_FLAGS="$OPTIONAL_FLAGS --grayscale-loss"
+fi
+
python3 training/train_cnn_v2.py \
--input "$INPUT_DIR" \
--target "$TARGET_DIR" \
@@ -277,7 +290,8 @@ python3 training/train_cnn_v2.py \
--epochs $EPOCHS \
--batch-size $BATCH_SIZE \
--checkpoint-dir "$CHECKPOINT_DIR" \
- --checkpoint-every $CHECKPOINT_EVERY
+ --checkpoint-every $CHECKPOINT_EVERY \
+ $OPTIONAL_FLAGS
if [ $? -ne 0 ]; then
echo "Error: Training failed"
diff --git a/training/train_cnn_v2.py b/training/train_cnn_v2.py
index a9a311a..abe07bc 100755
--- a/training/train_cnn_v2.py
+++ b/training/train_cnn_v2.py
@@ -350,7 +350,16 @@ def train(args):
optimizer.zero_grad()
output = model(input_rgbd, static_feat)
- loss = criterion(output, target)
+
+ # Compute loss (grayscale or RGBA)
+ if args.grayscale_loss:
+ # Convert RGBA to grayscale: Y = 0.299*R + 0.587*G + 0.114*B
+ output_gray = 0.299 * output[:, 0:1] + 0.587 * output[:, 1:2] + 0.114 * output[:, 2:3]
+ target_gray = 0.299 * target[:, 0:1] + 0.587 * target[:, 1:2] + 0.114 * target[:, 2:3]
+ loss = criterion(output_gray, target_gray)
+ else:
+ loss = criterion(output, target)
+
loss.backward()
optimizer.step()
@@ -376,6 +385,7 @@ def train(args):
'kernel_sizes': kernel_sizes,
'num_layers': args.num_layers,
'mip_level': args.mip_level,
+ 'grayscale_loss': args.grayscale_loss,
'features': ['p0', 'p1', 'p2', 'p3', 'uv.x', 'uv.y', 'sin20_y', 'bias']
}
}, checkpoint_path)
@@ -419,6 +429,8 @@ def main():
parser.add_argument('--epochs', type=int, default=5000, help='Training epochs')
parser.add_argument('--batch-size', type=int, default=16, help='Batch size')
parser.add_argument('--lr', type=float, default=1e-3, help='Learning rate')
+ parser.add_argument('--grayscale-loss', action='store_true',
+ help='Compute loss on grayscale (Y = 0.299*R + 0.587*G + 0.114*B) instead of RGBA')
parser.add_argument('--checkpoint-dir', type=str, default='checkpoints',
help='Checkpoint directory')
parser.add_argument('--checkpoint-every', type=int, default=1000,