summaryrefslogtreecommitdiff
path: root/doc/CNN_V2.md
diff options
context:
space:
mode:
authorskal <pascal.massimino@gmail.com>2026-02-13 17:46:09 +0100
committerskal <pascal.massimino@gmail.com>2026-02-13 17:46:09 +0100
commita7340d378909cadbfd72dbd1f5b756f907c2a3e0 (patch)
tree60a34dd084d746f0c6dad50b0d5cc7f20bc0c409 /doc/CNN_V2.md
parentf6b3ea72a03850654b69986bc82bb249aaabe2e3 (diff)
CNN v2 training: Add --grayscale-loss option for luminance-based loss computation
Add option to compute loss on grayscale (Y = 0.299*R + 0.587*G + 0.114*B) instead of full RGBA channels. Useful for training models that prioritize luminance accuracy over color accuracy. Changes: - training/train_cnn_v2.py: Add --grayscale-loss flag and grayscale conversion in loss computation - scripts/train_cnn_v2_full.sh: Add --grayscale-loss parameter support - doc/CNN_V2.md: Document grayscale loss in training configuration and checkpoint format - doc/HOWTO.md: Add usage examples for --grayscale-loss flag Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
Diffstat (limited to 'doc/CNN_V2.md')
-rw-r--r--doc/CNN_V2.md14
1 files changed, 12 insertions, 2 deletions
diff --git a/doc/CNN_V2.md b/doc/CNN_V2.md
index 78854ce..c827187 100644
--- a/doc/CNN_V2.md
+++ b/doc/CNN_V2.md
@@ -326,12 +326,13 @@ class CNNv2(nn.Module):
kernel_sizes = [3, 3, 3] # Per-layer kernel sizes (e.g., [1,3,5])
num_layers = 3 # Number of CNN layers
mip_level = 0 # Mip level for p0-p3: 0=orig, 1=half, 2=quarter, 3=eighth
+grayscale_loss = False # Compute loss on grayscale (Y) instead of RGBA
learning_rate = 1e-3
batch_size = 16
epochs = 5000
# Dataset: Input RGB, Target RGBA (preserves alpha channel from image)
-# Model outputs RGBA, loss compares all 4 channels
+# Model outputs RGBA, loss compares all 4 channels (or grayscale if --grayscale-loss)
# Training loop (standard PyTorch f32)
for epoch in range(epochs):
@@ -344,7 +345,15 @@ for epoch in range(epochs):
# Forward pass
output = model(input_rgbd, static_feat)
- loss = criterion(output, target_batch)
+
+ # Loss computation (grayscale or RGBA)
+ if grayscale_loss:
+ # Convert RGBA to grayscale: Y = 0.299*R + 0.587*G + 0.114*B
+ output_gray = 0.299 * output[:, 0:1] + 0.587 * output[:, 1:2] + 0.114 * output[:, 2:3]
+ target_gray = 0.299 * target[:, 0:1] + 0.587 * target[:, 1:2] + 0.114 * target[:, 2:3]
+ loss = criterion(output_gray, target_gray)
+ else:
+ loss = criterion(output, target_batch)
# Backward pass
optimizer.zero_grad()
@@ -361,6 +370,7 @@ torch.save({
'kernel_sizes': [3, 3, 3], # Per-layer kernel sizes
'num_layers': 3,
'mip_level': 0, # Mip level used for p0-p3
+ 'grayscale_loss': False, # Whether grayscale loss was used
'features': ['p0', 'p1', 'p2', 'p3', 'uv.x', 'uv.y', 'sin10_x', 'bias']
},
'epoch': epoch,