summaryrefslogtreecommitdiff
path: root/training
diff options
context:
space:
mode:
authorskal <pascal.massimino@gmail.com>2026-02-13 16:57:29 +0100
committerskal <pascal.massimino@gmail.com>2026-02-13 16:57:29 +0100
commit0793c20c1dd0c5f0c535f7da90337081939b2cfc (patch)
tree86f708033d93d19470e1b016e5abe52e07b340a8 /training
parent4c21145ce5e408dd38e8374eed320fcfac97c0c4 (diff)
CNN v2: Change feature #6 from sin(10*x) to sin(20*y)
Update positional encoding to use vertical coordinate at higher frequency. Changes: - train_cnn_v2.py: sin10_x → sin20_y (computed from uv_y) - cnn_v2_static.wgsl: sin10_x → sin20_y (computed from uv_y) - index.html: sin10_x → sin20_y (STATIC_SHADER) - CNN_V2.md: Update feature descriptions and examples - CNN_V2_BINARY_FORMAT.md: Update static features documentation Feature vector: [p0, p1, p2, p3, uv_x, uv_y, sin20_y, bias] Rationale: Higher frequency (20 vs 10) + vertical axis provides better spatial discrimination for position encoding. Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
Diffstat (limited to 'training')
-rwxr-xr-xtraining/train_cnn_v2.py12
1 files changed, 6 insertions, 6 deletions
diff --git a/training/train_cnn_v2.py b/training/train_cnn_v2.py
index 1487c08..a9a311a 100755
--- a/training/train_cnn_v2.py
+++ b/training/train_cnn_v2.py
@@ -30,13 +30,13 @@ def compute_static_features(rgb, depth=None, mip_level=0):
mip_level: Mip level for p0-p3 (0=original, 1=half, 2=quarter, 3=eighth)
Returns:
- (H, W, 8) static features: [p0, p1, p2, p3, uv_x, uv_y, sin10_x, bias]
+ (H, W, 8) static features: [p0, p1, p2, p3, uv_x, uv_y, sin20_y, bias]
Note: p0-p3 are parametric features generated from specified mip level
TODO: Binary format should support arbitrary layout and ordering for feature vector (7D),
alongside mip-level indication. Current layout is hardcoded as:
- [p0, p1, p2, p3, uv_x, uv_y, sin10_x, bias]
+ [p0, p1, p2, p3, uv_x, uv_y, sin20_y, bias]
Future: Allow experimentation with different feature combinations without shader recompilation.
Examples: [R, G, B, dx, dy, uv_x, bias] or [mip1.r, mip2.g, laplacian, uv_x, sin20_x, bias]
"""
@@ -68,13 +68,13 @@ def compute_static_features(rgb, depth=None, mip_level=0):
uv_y = np.linspace(0, 1, h)[:, None].repeat(w, axis=1).astype(np.float32)
# Multi-frequency position encoding
- sin10_x = np.sin(10.0 * uv_x).astype(np.float32)
+ sin20_y = np.sin(20.0 * uv_y).astype(np.float32)
# Bias dimension (always 1.0) - replaces Conv2d bias parameter
bias = np.ones((h, w), dtype=np.float32)
- # Stack: [p0, p1, p2, p3, uv.x, uv.y, sin10_x, bias]
- features = np.stack([p0, p1, p2, p3, uv_x, uv_y, sin10_x, bias], axis=-1)
+ # Stack: [p0, p1, p2, p3, uv.x, uv.y, sin20_y, bias]
+ features = np.stack([p0, p1, p2, p3, uv_x, uv_y, sin20_y, bias], axis=-1)
return features
@@ -376,7 +376,7 @@ def train(args):
'kernel_sizes': kernel_sizes,
'num_layers': args.num_layers,
'mip_level': args.mip_level,
- 'features': ['p0', 'p1', 'p2', 'p3', 'uv.x', 'uv.y', 'sin10_x', 'bias']
+ 'features': ['p0', 'p1', 'p2', 'p3', 'uv.x', 'uv.y', 'sin20_y', 'bias']
}
}, checkpoint_path)
print(f" → Saved checkpoint: {checkpoint_path}")