CNN v2: Fix activation function mismatch between training and inference

Layer 0 now uses clamp [0,1] in both training and inference (was using ReLU in shaders). - index.html: Add is_layer_0 flag to LayerParams, handle Layer 0 separately - export_cnn_v2_shader.py: Generate correct activation for Layer 0 Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
author: skal <pascal.massimino@gmail.com> 2026-02-13 16:12:24 +0100
committer: skal <pascal.massimino@gmail.com> 2026-02-13 16:12:24 +0100
commit: b04816a400703ac6c364efb70ae84930d79ccb12 (patch)
tree: 257acfe047ee79c6037db0dd983b91396139d5a4
parent: b5e8abad0490e47b52d300d2d0c48425c3fac4f3 (diff)
2 files changed, 12 insertions, 5 deletions
diff --git a/tools/cnn_v2_test/index.html b/tools/cnn_v2_test/index.html
index 79c54b7..fc93223 100644
--- a/tools/cnn_v2_test/index.html
+++ b/tools/cnn_v2_test/index.html
@@ -397,6 +397,7 @@ struct LayerParams {
   weight_offset: u32,
   is_output_layer: u32,
   blend_amount: f32,
+  is_layer_0: u32,
 }
 
 @group(0) @binding(0) var static_features: texture_2d<u32>;
@@ -490,8 +491,10 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
 
     if (is_output) {
       output[c] = clamp(sum, 0.0, 1.0);
+    } else if (params.is_layer_0 != 0u) {
+      output[c] = clamp(sum, 0.0, 1.0);  // Layer 0: clamp [0,1]
     } else {
-      output[c] = max(0.0, sum);
+      output[c] = max(0.0, sum);  // Middle layers: ReLU
     }
   }
 
@@ -1105,18 +1108,19 @@ class CNNTester {
       const headerOffsetU32 = 4 + this.weights.layers.length * 5;  // Header + layer info in u32
       const absoluteWeightOffset = headerOffsetU32 * 2 + layer.weightOffset;  // Convert to f16 units
 
-      const paramsData = new Uint32Array(6);
+      const paramsData = new Uint32Array(7);
       paramsData[0] = layer.kernelSize;
       paramsData[1] = layer.inChannels;
       paramsData[2] = layer.outChannels;
       paramsData[3] = absoluteWeightOffset;  // Use absolute offset
       paramsData[4] = isOutput ? 1 : 0;
+      paramsData[6] = (i === 0) ? 1 : 0;  // is_layer_0 flag
 
       const paramsView = new Float32Array(paramsData.buffer);
       paramsView[5] = this.blendAmount;
 
       const paramsBuffer = this.device.createBuffer({
-        size: 24,
+        size: 28,
         usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
       });
       this.device.queue.writeBuffer(paramsBuffer, 0, paramsData);
diff --git a/training/export_cnn_v2_shader.py b/training/export_cnn_v2_shader.py
index ad5749c..dc475d8 100755
--- a/training/export_cnn_v2_shader.py
+++ b/training/export_cnn_v2_shader.py
@@ -35,9 +35,12 @@ def export_layer_shader(layer_idx, weights, kernel_size, output_dir, is_output_l
     )
 
     radius = kernel_size // 2
-    activation = "" if is_output_layer else "output[c] = max(0.0, sum);  // ReLU"
     if is_output_layer:
-        activation = "output[c] = clamp(sum, 0.0, 1.0);  // Sigmoid approximation"
+        activation = "output[c] = clamp(sum, 0.0, 1.0);  // Output layer"
+    elif layer_idx == 0:
+        activation = "output[c] = clamp(sum, 0.0, 1.0);  // Layer 0: clamp [0,1]"
+    else:
+        activation = "output[c] = max(0.0, sum);  // Middle layers: ReLU"
 
     shader_code = f"""// CNN v2 Layer {layer_idx} - Auto-generated (uniform 12D→4D)
 // Kernel: {kernel_size}×{kernel_size}, In: 12D (4 prev + 8 static), Out: 4D
author	skal <pascal.massimino@gmail.com>	2026-02-13 16:12:24 +0100
committer	skal <pascal.massimino@gmail.com>	2026-02-13 16:12:24 +0100
commit	b04816a400703ac6c364efb70ae84930d79ccb12 (patch)
tree	257acfe047ee79c6037db0dd983b91396139d5a4
parent	b5e8abad0490e47b52d300d2d0c48425c3fac4f3 (diff)