summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/CNN_V2.md30
-rw-r--r--doc/CNN_V2_BINARY_FORMAT.md31
-rw-r--r--src/gpu/effects/cnn_v2_effect.cc1
-rwxr-xr-xtraining/train_cnn_v2.py6
-rw-r--r--workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl4
5 files changed, 72 insertions, 0 deletions
diff --git a/doc/CNN_V2.md b/doc/CNN_V2.md
index a66dc1d..2913498 100644
--- a/doc/CNN_V2.md
+++ b/doc/CNN_V2.md
@@ -645,6 +645,36 @@ workspaces/main/shaders/cnn_*.wgsl # Original v1 shaders
## Future Extensions
+### Flexible Feature Layout (Binary Format v3)
+
+**TODO:** Support arbitrary feature vector layouts and ordering in binary format.
+
+**Current Limitation:**
+- Feature layout hardcoded: `[p0, p1, p2, p3, uv_x, uv_y, sin10_x, bias]`
+- Shader must match training script exactly
+- Experimentation requires shader recompilation
+
+**Proposed Enhancement:**
+- Add feature descriptor to binary format header
+- Specify feature types, sources, and ordering
+- Runtime shader generation or dynamic feature indexing
+- Examples: `[R, G, B, dx, dy, uv_x, bias]` or `[mip1.r, mip2.g, laplacian, uv_x, sin20_x, bias]`
+
+**Benefits:**
+- Training experiments without C++/shader changes
+- A/B test different feature combinations
+- Single binary format, multiple architectures
+- Faster iteration on feature engineering
+
+**Implementation Options:**
+1. **Static approach:** Generate shader code from descriptor at load time
+2. **Dynamic approach:** Array-based indexing with feature map uniform
+3. **Hybrid:** Precompile common layouts, fallback to dynamic
+
+See `doc/CNN_V2_BINARY_FORMAT.md` for proposed descriptor format.
+
+---
+
### More Features (uint8 Packing)
```wgsl
diff --git a/doc/CNN_V2_BINARY_FORMAT.md b/doc/CNN_V2_BINARY_FORMAT.md
index fd758ee..5239e4b 100644
--- a/doc/CNN_V2_BINARY_FORMAT.md
+++ b/doc/CNN_V2_BINARY_FORMAT.md
@@ -196,6 +196,37 @@ if (cumulative != total_weights) { error("Total mismatch"); }
---
+## Future Extensions
+
+**TODO: Flexible Feature Layout**
+
+Current limitation: Feature vector layout is hardcoded as `[p0, p1, p2, p3, uv_x, uv_y, sin10_x, bias]`.
+
+Proposed enhancement for version 3:
+- Add feature descriptor section to header
+- Specify feature count, types, and ordering
+- Support arbitrary 7D feature combinations (e.g., `[R, G, B, dx, dy, uv_x, bias]`)
+- Allow runtime shader generation based on descriptor
+- Enable experimentation without recompiling shaders
+
+Example descriptor format:
+```
+struct FeatureDescriptor {
+ u32 feature_count; // Number of features (typically 7-8)
+ u32 feature_types[8]; // Type enum per feature
+ u32 feature_sources[8]; // Source enum (mip0, mip1, gradient, etc.)
+ u32 reserved[8]; // Future use
+}
+```
+
+Benefits:
+- Training can experiment with different feature combinations
+- No shader recompilation needed
+- Single binary format supports multiple architectures
+- Easier A/B testing of feature effectiveness
+
+---
+
## Related Files
- `training/export_cnn_v2_weights.py` - Binary export tool
diff --git a/src/gpu/effects/cnn_v2_effect.cc b/src/gpu/effects/cnn_v2_effect.cc
index 97e4790..566686e 100644
--- a/src/gpu/effects/cnn_v2_effect.cc
+++ b/src/gpu/effects/cnn_v2_effect.cc
@@ -88,6 +88,7 @@ void CNNv2Effect::load_weights() {
FATAL_CHECK(magic != 0x324e4e43, "Invalid CNN v2 weights magic\n"); // 'CNN2'
// Support both version 1 (16-byte header) and version 2 (20-byte header with mip_level)
+ // TODO: Version 3 should include feature descriptor for arbitrary layout/ordering
if (version == 1) {
mip_level_ = 0; // Default for v1
} else if (version == 2) {
diff --git a/training/train_cnn_v2.py b/training/train_cnn_v2.py
index 3d49d13..1487c08 100755
--- a/training/train_cnn_v2.py
+++ b/training/train_cnn_v2.py
@@ -33,6 +33,12 @@ def compute_static_features(rgb, depth=None, mip_level=0):
(H, W, 8) static features: [p0, p1, p2, p3, uv_x, uv_y, sin10_x, bias]
Note: p0-p3 are parametric features generated from specified mip level
+
+ TODO: Binary format should support arbitrary layout and ordering for feature vector (7D),
+ alongside mip-level indication. Current layout is hardcoded as:
+ [p0, p1, p2, p3, uv_x, uv_y, sin10_x, bias]
+ Future: Allow experimentation with different feature combinations without shader recompilation.
+ Examples: [R, G, B, dx, dy, uv_x, bias] or [mip1.r, mip2.g, laplacian, uv_x, sin20_x, bias]
"""
h, w = rgb.shape[:2]
diff --git a/workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl b/workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl
index f71fad2..29acddd 100644
--- a/workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl
+++ b/workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl
@@ -2,6 +2,10 @@
// Generates 8D parametric features: [p0, p1, p2, p3, uv.x, uv.y, sin10_x, bias]
// p0-p3: Parametric features from specified mip level (0=mip0, 1=mip1, 2=mip2, 3=mip3)
// Note: Input image RGBD (mip0) fed separately to Layer 0
+//
+// TODO: Binary format should support arbitrary layout and ordering for feature vector (7D).
+// Current layout is hardcoded. Future versions should allow runtime-specified
+// feature combinations (e.g., [R, G, B, dx, dy, uv_x, bias] or custom encodings).
struct StaticFeatureParams {
mip_level: u32,