diff options
Diffstat (limited to 'doc')
| -rw-r--r-- | doc/CNN_V2.md | 14 | ||||
| -rw-r--r-- | doc/HOWTO.md | 9 |
2 files changed, 21 insertions, 2 deletions
diff --git a/doc/CNN_V2.md b/doc/CNN_V2.md index 78854ce..c827187 100644 --- a/doc/CNN_V2.md +++ b/doc/CNN_V2.md @@ -326,12 +326,13 @@ class CNNv2(nn.Module): kernel_sizes = [3, 3, 3] # Per-layer kernel sizes (e.g., [1,3,5]) num_layers = 3 # Number of CNN layers mip_level = 0 # Mip level for p0-p3: 0=orig, 1=half, 2=quarter, 3=eighth +grayscale_loss = False # Compute loss on grayscale (Y) instead of RGBA learning_rate = 1e-3 batch_size = 16 epochs = 5000 # Dataset: Input RGB, Target RGBA (preserves alpha channel from image) -# Model outputs RGBA, loss compares all 4 channels +# Model outputs RGBA, loss compares all 4 channels (or grayscale if --grayscale-loss) # Training loop (standard PyTorch f32) for epoch in range(epochs): @@ -344,7 +345,15 @@ for epoch in range(epochs): # Forward pass output = model(input_rgbd, static_feat) - loss = criterion(output, target_batch) + + # Loss computation (grayscale or RGBA) + if grayscale_loss: + # Convert RGBA to grayscale: Y = 0.299*R + 0.587*G + 0.114*B + output_gray = 0.299 * output[:, 0:1] + 0.587 * output[:, 1:2] + 0.114 * output[:, 2:3] + target_gray = 0.299 * target[:, 0:1] + 0.587 * target[:, 1:2] + 0.114 * target[:, 2:3] + loss = criterion(output_gray, target_gray) + else: + loss = criterion(output, target_batch) # Backward pass optimizer.zero_grad() @@ -361,6 +370,7 @@ torch.save({ 'kernel_sizes': [3, 3, 3], # Per-layer kernel sizes 'num_layers': 3, 'mip_level': 0, # Mip level used for p0-p3 + 'grayscale_loss': False, # Whether grayscale loss was used 'features': ['p0', 'p1', 'p2', 'p3', 'uv.x', 'uv.y', 'sin10_x', 'bias'] }, 'epoch': epoch, diff --git a/doc/HOWTO.md b/doc/HOWTO.md index 2290aa8..1e8b58b 100644 --- a/doc/HOWTO.md +++ b/doc/HOWTO.md @@ -145,6 +145,9 @@ Enhanced CNN with parametric static features (7D input: RGBD + UV + sin encoding # Custom architecture ./scripts/train_cnn_v2_full.sh --kernel-sizes 3,5,3 --num-layers 3 --mip-level 1 +# Grayscale loss (compute loss on luminance instead of RGBA) +./scripts/train_cnn_v2_full.sh --grayscale-loss + # Custom directories ./scripts/train_cnn_v2_full.sh --input training/input --target training/target_2 @@ -188,6 +191,12 @@ Enhanced CNN with parametric static features (7D input: RGBD + UV + sin encoding --input training/input/ --target training/target_2/ \ --mip-level 1 \ --epochs 100 --batch-size 16 + +# Grayscale loss (compute loss on luminance Y = 0.299*R + 0.587*G + 0.114*B) +./training/train_cnn_v2.py \ + --input training/input/ --target training/target_2/ \ + --grayscale-loss \ + --epochs 100 --batch-size 16 ``` **Export Binary Weights:** |
