summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/CNN_V2.md9
-rw-r--r--tools/cnn_v2_test/index.html8
-rwxr-xr-xtraining/train_cnn_v2.py4
-rw-r--r--workspaces/main/shaders/cnn_v2/cnn_v2_compute.wgsl8
4 files changed, 16 insertions, 13 deletions
diff --git a/doc/CNN_V2.md b/doc/CNN_V2.md
index abef606..fa00b32 100644
--- a/doc/CNN_V2.md
+++ b/doc/CNN_V2.md
@@ -18,11 +18,12 @@ CNN v2 extends the original CNN post-processing effect with parametric static fe
- Bias integrated as static feature dimension
- Storage buffer architecture (dynamic layer count)
- Binary weight format v2 for runtime loading
+- Sigmoid activation for layer 0 and final layer (smooth [0,1] mapping)
**Status:** ✅ Complete. Training pipeline functional, validation tools ready, mip-level support integrated.
**Known Issues:**
-- ⚠️ **cnn_test output differs from HTML validation tool** - Visual discrepancy remains after fixing uv_y inversion and Layer 0 activation. Root cause under investigation. Both tools should produce identical output given same weights/input.
+- ⚠️ **Old checkpoints incompatible** - Models trained with `clamp()` activation won't work correctly with new `sigmoid()` implementation. Retrain from scratch with latest code.
**TODO:**
- 8-bit quantization with QAT for 2× size reduction (~1.6 KB)
@@ -106,6 +107,12 @@ Input RGBD → Static Features Compute → CNN Layers → Output RGBA
- All layers: uniform 12D input, 4D output (ping-pong buffer)
- Storage: `texture_storage_2d<rgba32uint>` (4 channels as 2×f16 pairs)
+**Activation Functions:**
+- Layer 0 & final layer: `sigmoid(x)` for smooth [0,1] mapping
+- Middle layers: `ReLU` (max(0, x))
+- Rationale: Sigmoid prevents gradient blocking at boundaries, enabling better convergence
+- Breaking change: Models trained with `clamp(x, 0, 1)` are incompatible, retrain required
+
---
## Static Features (7D + 1 bias)
diff --git a/tools/cnn_v2_test/index.html b/tools/cnn_v2_test/index.html
index 1dd2e78..2ec934d 100644
--- a/tools/cnn_v2_test/index.html
+++ b/tools/cnn_v2_test/index.html
@@ -543,12 +543,10 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
}
}
- if (is_output) {
- output[c] = clamp(sum, 0.0, 1.0);
- } else if (params.is_layer_0 != 0u) {
- output[c] = clamp(sum, 0.0, 1.0); // Layer 0: clamp [0,1]
+ if (is_output || params.is_layer_0 != 0u) {
+ output[c] = 1.0 / (1.0 + exp(-sum)); // Sigmoid [0,1]
} else {
- output[c] = max(0.0, sum); // Middle layers: ReLU
+ output[c] = max(0.0, sum); // ReLU
}
}
diff --git a/training/train_cnn_v2.py b/training/train_cnn_v2.py
index d80e3a5..9e5df2f 100755
--- a/training/train_cnn_v2.py
+++ b/training/train_cnn_v2.py
@@ -121,7 +121,7 @@ class CNNv2(nn.Module):
# Layer 0: input RGBD (4D) + static (8D) = 12D
x = torch.cat([input_rgbd, static_features], dim=1)
x = self.layers[0](x)
- x = torch.clamp(x, 0, 1) # Output [0,1] for layer 0
+ x = torch.sigmoid(x) # Soft [0,1] for layer 0
# Layer 1+: previous (4D) + static (8D) = 12D
for i in range(1, self.num_layers):
@@ -130,7 +130,7 @@ class CNNv2(nn.Module):
if i < self.num_layers - 1:
x = F.relu(x)
else:
- x = torch.clamp(x, 0, 1) # Final output [0,1]
+ x = torch.sigmoid(x) # Soft [0,1] for final layer
return x
diff --git a/workspaces/main/shaders/cnn_v2/cnn_v2_compute.wgsl b/workspaces/main/shaders/cnn_v2/cnn_v2_compute.wgsl
index 4644003..cdbfd74 100644
--- a/workspaces/main/shaders/cnn_v2/cnn_v2_compute.wgsl
+++ b/workspaces/main/shaders/cnn_v2/cnn_v2_compute.wgsl
@@ -122,12 +122,10 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
}
// Activation (matches train_cnn_v2.py)
- if (is_output) {
- output[c] = clamp(sum, 0.0, 1.0); // Output layer: clamp [0,1]
- } else if (params.is_layer_0 != 0u) {
- output[c] = clamp(sum, 0.0, 1.0); // Layer 0: clamp [0,1]
+ if (is_output || params.is_layer_0 != 0u) {
+ output[c] = 1.0 / (1.0 + exp(-sum)); // Sigmoid [0,1]
} else {
- output[c] = max(0.0, sum); // Middle layers: ReLU
+ output[c] = max(0.0, sum); // ReLU
}
}