summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/CNN_V2.md10
-rw-r--r--doc/CNN_V2_BINARY_FORMAT.md2
-rw-r--r--tools/cnn_v2_test/index.html4
-rwxr-xr-xtraining/train_cnn_v2.py12
-rw-r--r--workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl8
5 files changed, 18 insertions, 18 deletions
diff --git a/doc/CNN_V2.md b/doc/CNN_V2.md
index 2913498..78854ce 100644
--- a/doc/CNN_V2.md
+++ b/doc/CNN_V2.md
@@ -122,12 +122,12 @@ let uv_x = coord.x / resolution.x; // Horizontal position [0,1]
let uv_y = coord.y / resolution.y; // Vertical position [0,1]
// Slot 6: Multi-frequency position encoding
-let sin10_x = sin(10.0 * uv_x); // Periodic feature (frequency=10)
+let sin20_y = sin(20.0 * uv_y); // Periodic feature (frequency=20, vertical)
// Slot 7: Bias dimension (always 1.0)
let bias = 1.0; // Learned bias per output channel
-// Packed storage: [p0, p1, p2, p3, uv.x, uv.y, sin(10*uv.x), 1.0]
+// Packed storage: [p0, p1, p2, p3, uv.x, uv.y, sin(20*uv.y), 1.0]
```
### Feature Rationale
@@ -136,7 +136,7 @@ let bias = 1.0; // Learned bias per output channel
|---------|-----------|---------|----------|
| p0-p3 | 4D | Parametric auxiliary features (mips, gradients, etc.) | Essential |
| UV coords | 2D | Spatial position awareness | Essential |
-| sin(10\*uv.x) | 1D | Periodic position encoding | Medium |
+| sin(20\*uv.y) | 1D | Periodic position encoding (vertical) | Medium |
| Bias | 1D | Learned bias (standard NN) | Essential |
**Note:** Input image RGBD (mip 0) fed only to Layer 0. Subsequent layers see static features + previous layer output.
@@ -149,8 +149,8 @@ let bias = 1.0; // Learned bias per output channel
### Future Feature Extensions
-**Option: Replace sin(10\*uv.x) with:**
-- `sin(20*uv.x)` - Higher frequency encoding
+**Option: Additional encodings:**
+- `sin(40*uv.y)` - Higher frequency encoding
- `gray_mip1` - Multi-scale luminance
- `dx`, `dy` - Sobel gradients
- `variance` - Local texture measure
diff --git a/doc/CNN_V2_BINARY_FORMAT.md b/doc/CNN_V2_BINARY_FORMAT.md
index 5239e4b..f718960 100644
--- a/doc/CNN_V2_BINARY_FORMAT.md
+++ b/doc/CNN_V2_BINARY_FORMAT.md
@@ -146,7 +146,7 @@ Not stored in .bin file (computed at runtime):
4. **p3** - Parametric feature 3 (depth or from mip level)
5. **UV_X** - Normalized x coordinate [0,1]
6. **UV_Y** - Normalized y coordinate [0,1]
-7. **sin(10 × UV_X)** - Spatial frequency encoding
+7. **sin(20 × UV_Y)** - Spatial frequency encoding (vertical, frequency=20)
8. **1.0** - Bias term
**Mip Level Usage (p0-p3):**
diff --git a/tools/cnn_v2_test/index.html b/tools/cnn_v2_test/index.html
index 88c4733..cab20ea 100644
--- a/tools/cnn_v2_test/index.html
+++ b/tools/cnn_v2_test/index.html
@@ -414,13 +414,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
let d = textureLoad(depth_tex, coord, 0).r;
let uv_x = f32(coord.x) / f32(dims.x);
let uv_y = 1.0 - (f32(coord.y) / f32(dims.y));
- let sin10_x = sin(10.0 * uv_x);
+ let sin20_y = sin(20.0 * uv_y);
let packed = vec4<u32>(
pack2x16float(vec2<f32>(rgba.r, rgba.g)),
pack2x16float(vec2<f32>(rgba.b, d)),
pack2x16float(vec2<f32>(uv_x, uv_y)),
- pack2x16float(vec2<f32>(sin10_x, 1.0))
+ pack2x16float(vec2<f32>(sin20_y, 1.0))
);
textureStore(output_tex, coord, packed);
}`;
diff --git a/training/train_cnn_v2.py b/training/train_cnn_v2.py
index 1487c08..a9a311a 100755
--- a/training/train_cnn_v2.py
+++ b/training/train_cnn_v2.py
@@ -30,13 +30,13 @@ def compute_static_features(rgb, depth=None, mip_level=0):
mip_level: Mip level for p0-p3 (0=original, 1=half, 2=quarter, 3=eighth)
Returns:
- (H, W, 8) static features: [p0, p1, p2, p3, uv_x, uv_y, sin10_x, bias]
+ (H, W, 8) static features: [p0, p1, p2, p3, uv_x, uv_y, sin20_y, bias]
Note: p0-p3 are parametric features generated from specified mip level
TODO: Binary format should support arbitrary layout and ordering for feature vector (7D),
alongside mip-level indication. Current layout is hardcoded as:
- [p0, p1, p2, p3, uv_x, uv_y, sin10_x, bias]
+ [p0, p1, p2, p3, uv_x, uv_y, sin20_y, bias]
Future: Allow experimentation with different feature combinations without shader recompilation.
Examples: [R, G, B, dx, dy, uv_x, bias] or [mip1.r, mip2.g, laplacian, uv_x, sin20_x, bias]
"""
@@ -68,13 +68,13 @@ def compute_static_features(rgb, depth=None, mip_level=0):
uv_y = np.linspace(0, 1, h)[:, None].repeat(w, axis=1).astype(np.float32)
# Multi-frequency position encoding
- sin10_x = np.sin(10.0 * uv_x).astype(np.float32)
+ sin20_y = np.sin(20.0 * uv_y).astype(np.float32)
# Bias dimension (always 1.0) - replaces Conv2d bias parameter
bias = np.ones((h, w), dtype=np.float32)
- # Stack: [p0, p1, p2, p3, uv.x, uv.y, sin10_x, bias]
- features = np.stack([p0, p1, p2, p3, uv_x, uv_y, sin10_x, bias], axis=-1)
+ # Stack: [p0, p1, p2, p3, uv.x, uv.y, sin20_y, bias]
+ features = np.stack([p0, p1, p2, p3, uv_x, uv_y, sin20_y, bias], axis=-1)
return features
@@ -376,7 +376,7 @@ def train(args):
'kernel_sizes': kernel_sizes,
'num_layers': args.num_layers,
'mip_level': args.mip_level,
- 'features': ['p0', 'p1', 'p2', 'p3', 'uv.x', 'uv.y', 'sin10_x', 'bias']
+ 'features': ['p0', 'p1', 'p2', 'p3', 'uv.x', 'uv.y', 'sin20_y', 'bias']
}
}, checkpoint_path)
print(f" → Saved checkpoint: {checkpoint_path}")
diff --git a/workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl b/workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl
index 29acddd..7039d96 100644
--- a/workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl
+++ b/workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl
@@ -1,5 +1,5 @@
// CNN v2 Static Features Compute Shader
-// Generates 8D parametric features: [p0, p1, p2, p3, uv.x, uv.y, sin10_x, bias]
+// Generates 8D parametric features: [p0, p1, p2, p3, uv.x, uv.y, sin20_y, bias]
// p0-p3: Parametric features from specified mip level (0=mip0, 1=mip1, 2=mip2, 3=mip3)
// Note: Input image RGBD (mip0) fed separately to Layer 0
//
@@ -51,18 +51,18 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
let uv_y = 1.0 - (f32(coord.y) / f32(dims.y));
// Multi-frequency position encoding
- let sin10_x = sin(10.0 * uv_x);
+ let sin20_y = sin(20.0 * uv_y);
// Bias dimension (always 1.0)
let bias = 1.0;
// Pack 8×f16 into 4×u32 (rgba32uint)
- // [p0, p1, p2, p3, uv_x, uv_y, sin10_x, bias]
+ // [p0, p1, p2, p3, uv_x, uv_y, sin20_y, bias]
let packed = vec4<u32>(
pack2x16float(vec2<f32>(p0, p1)),
pack2x16float(vec2<f32>(p2, p3)),
pack2x16float(vec2<f32>(uv_x, uv_y)),
- pack2x16float(vec2<f32>(sin10_x, bias))
+ pack2x16float(vec2<f32>(sin20_y, bias))
);
textureStore(output_tex, coord, packed);