diff options
| -rw-r--r-- | doc/CNN_V2.md | 10 | ||||
| -rw-r--r-- | doc/CNN_V2_BINARY_FORMAT.md | 2 | ||||
| -rw-r--r-- | tools/cnn_v2_test/index.html | 4 | ||||
| -rwxr-xr-x | training/train_cnn_v2.py | 12 | ||||
| -rw-r--r-- | workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl | 8 |
5 files changed, 18 insertions, 18 deletions
diff --git a/doc/CNN_V2.md b/doc/CNN_V2.md index 2913498..78854ce 100644 --- a/doc/CNN_V2.md +++ b/doc/CNN_V2.md @@ -122,12 +122,12 @@ let uv_x = coord.x / resolution.x; // Horizontal position [0,1] let uv_y = coord.y / resolution.y; // Vertical position [0,1] // Slot 6: Multi-frequency position encoding -let sin10_x = sin(10.0 * uv_x); // Periodic feature (frequency=10) +let sin20_y = sin(20.0 * uv_y); // Periodic feature (frequency=20, vertical) // Slot 7: Bias dimension (always 1.0) let bias = 1.0; // Learned bias per output channel -// Packed storage: [p0, p1, p2, p3, uv.x, uv.y, sin(10*uv.x), 1.0] +// Packed storage: [p0, p1, p2, p3, uv.x, uv.y, sin(20*uv.y), 1.0] ``` ### Feature Rationale @@ -136,7 +136,7 @@ let bias = 1.0; // Learned bias per output channel |---------|-----------|---------|----------| | p0-p3 | 4D | Parametric auxiliary features (mips, gradients, etc.) | Essential | | UV coords | 2D | Spatial position awareness | Essential | -| sin(10\*uv.x) | 1D | Periodic position encoding | Medium | +| sin(20\*uv.y) | 1D | Periodic position encoding (vertical) | Medium | | Bias | 1D | Learned bias (standard NN) | Essential | **Note:** Input image RGBD (mip 0) fed only to Layer 0. Subsequent layers see static features + previous layer output. @@ -149,8 +149,8 @@ let bias = 1.0; // Learned bias per output channel ### Future Feature Extensions -**Option: Replace sin(10\*uv.x) with:** -- `sin(20*uv.x)` - Higher frequency encoding +**Option: Additional encodings:** +- `sin(40*uv.y)` - Higher frequency encoding - `gray_mip1` - Multi-scale luminance - `dx`, `dy` - Sobel gradients - `variance` - Local texture measure diff --git a/doc/CNN_V2_BINARY_FORMAT.md b/doc/CNN_V2_BINARY_FORMAT.md index 5239e4b..f718960 100644 --- a/doc/CNN_V2_BINARY_FORMAT.md +++ b/doc/CNN_V2_BINARY_FORMAT.md @@ -146,7 +146,7 @@ Not stored in .bin file (computed at runtime): 4. **p3** - Parametric feature 3 (depth or from mip level) 5. **UV_X** - Normalized x coordinate [0,1] 6. **UV_Y** - Normalized y coordinate [0,1] -7. **sin(10 × UV_X)** - Spatial frequency encoding +7. **sin(20 × UV_Y)** - Spatial frequency encoding (vertical, frequency=20) 8. **1.0** - Bias term **Mip Level Usage (p0-p3):** diff --git a/tools/cnn_v2_test/index.html b/tools/cnn_v2_test/index.html index 88c4733..cab20ea 100644 --- a/tools/cnn_v2_test/index.html +++ b/tools/cnn_v2_test/index.html @@ -414,13 +414,13 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) { let d = textureLoad(depth_tex, coord, 0).r; let uv_x = f32(coord.x) / f32(dims.x); let uv_y = 1.0 - (f32(coord.y) / f32(dims.y)); - let sin10_x = sin(10.0 * uv_x); + let sin20_y = sin(20.0 * uv_y); let packed = vec4<u32>( pack2x16float(vec2<f32>(rgba.r, rgba.g)), pack2x16float(vec2<f32>(rgba.b, d)), pack2x16float(vec2<f32>(uv_x, uv_y)), - pack2x16float(vec2<f32>(sin10_x, 1.0)) + pack2x16float(vec2<f32>(sin20_y, 1.0)) ); textureStore(output_tex, coord, packed); }`; diff --git a/training/train_cnn_v2.py b/training/train_cnn_v2.py index 1487c08..a9a311a 100755 --- a/training/train_cnn_v2.py +++ b/training/train_cnn_v2.py @@ -30,13 +30,13 @@ def compute_static_features(rgb, depth=None, mip_level=0): mip_level: Mip level for p0-p3 (0=original, 1=half, 2=quarter, 3=eighth) Returns: - (H, W, 8) static features: [p0, p1, p2, p3, uv_x, uv_y, sin10_x, bias] + (H, W, 8) static features: [p0, p1, p2, p3, uv_x, uv_y, sin20_y, bias] Note: p0-p3 are parametric features generated from specified mip level TODO: Binary format should support arbitrary layout and ordering for feature vector (7D), alongside mip-level indication. Current layout is hardcoded as: - [p0, p1, p2, p3, uv_x, uv_y, sin10_x, bias] + [p0, p1, p2, p3, uv_x, uv_y, sin20_y, bias] Future: Allow experimentation with different feature combinations without shader recompilation. Examples: [R, G, B, dx, dy, uv_x, bias] or [mip1.r, mip2.g, laplacian, uv_x, sin20_x, bias] """ @@ -68,13 +68,13 @@ def compute_static_features(rgb, depth=None, mip_level=0): uv_y = np.linspace(0, 1, h)[:, None].repeat(w, axis=1).astype(np.float32) # Multi-frequency position encoding - sin10_x = np.sin(10.0 * uv_x).astype(np.float32) + sin20_y = np.sin(20.0 * uv_y).astype(np.float32) # Bias dimension (always 1.0) - replaces Conv2d bias parameter bias = np.ones((h, w), dtype=np.float32) - # Stack: [p0, p1, p2, p3, uv.x, uv.y, sin10_x, bias] - features = np.stack([p0, p1, p2, p3, uv_x, uv_y, sin10_x, bias], axis=-1) + # Stack: [p0, p1, p2, p3, uv.x, uv.y, sin20_y, bias] + features = np.stack([p0, p1, p2, p3, uv_x, uv_y, sin20_y, bias], axis=-1) return features @@ -376,7 +376,7 @@ def train(args): 'kernel_sizes': kernel_sizes, 'num_layers': args.num_layers, 'mip_level': args.mip_level, - 'features': ['p0', 'p1', 'p2', 'p3', 'uv.x', 'uv.y', 'sin10_x', 'bias'] + 'features': ['p0', 'p1', 'p2', 'p3', 'uv.x', 'uv.y', 'sin20_y', 'bias'] } }, checkpoint_path) print(f" → Saved checkpoint: {checkpoint_path}") diff --git a/workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl b/workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl index 29acddd..7039d96 100644 --- a/workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl +++ b/workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl @@ -1,5 +1,5 @@ // CNN v2 Static Features Compute Shader -// Generates 8D parametric features: [p0, p1, p2, p3, uv.x, uv.y, sin10_x, bias] +// Generates 8D parametric features: [p0, p1, p2, p3, uv.x, uv.y, sin20_y, bias] // p0-p3: Parametric features from specified mip level (0=mip0, 1=mip1, 2=mip2, 3=mip3) // Note: Input image RGBD (mip0) fed separately to Layer 0 // @@ -51,18 +51,18 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) { let uv_y = 1.0 - (f32(coord.y) / f32(dims.y)); // Multi-frequency position encoding - let sin10_x = sin(10.0 * uv_x); + let sin20_y = sin(20.0 * uv_y); // Bias dimension (always 1.0) let bias = 1.0; // Pack 8×f16 into 4×u32 (rgba32uint) - // [p0, p1, p2, p3, uv_x, uv_y, sin10_x, bias] + // [p0, p1, p2, p3, uv_x, uv_y, sin20_y, bias] let packed = vec4<u32>( pack2x16float(vec2<f32>(p0, p1)), pack2x16float(vec2<f32>(p2, p3)), pack2x16float(vec2<f32>(uv_x, uv_y)), - pack2x16float(vec2<f32>(sin10_x, bias)) + pack2x16float(vec2<f32>(sin20_y, bias)) ); textureStore(output_tex, coord, packed); |
