From 61104d5b9e1774c11f0dba3b6d6018dabc2bce8f Mon Sep 17 00:00:00 2001
From: skal <pascal.massimino@gmail.com>
Date: Tue, 10 Feb 2026 16:44:39 +0100
Subject: feat: CNN RGBD→grayscale with 7-channel augmented input
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Upgrade CNN architecture to process RGBD input, output grayscale, with
7-channel layer inputs (RGBD + UV coords + grayscale).

Architecture changes:
- Inner layers: Conv2d(7→4) output RGBD
- Final layer: Conv2d(7→1) output grayscale
- All inputs normalized to [-1,1] for tanh activation
- Removed CoordConv2d in favor of unified 7-channel input

Training (train_cnn.py):
- SimpleCNN: 7→4 (inner), 7→1 (final) architecture
- Forward: Normalize RGBD/coords/gray to [-1,1]
- Weight export: array<array<f32, 8>, 36> (inner), array<f32, 8>, 9> (final)
- Dataset: Load RGBA (RGBD) input

Shaders (cnn_conv3x3.wgsl):
- Added cnn_conv3x3_7to4: 7-channel input → RGBD output
- Added cnn_conv3x3_7to1: 7-channel input → grayscale output
- Both normalize inputs and use flattened weight arrays

Documentation:
- CNN_EFFECT.md: Updated architecture, training, weight format
- CNN_RGBD_GRAYSCALE_SUMMARY.md: Implementation summary
- HOWTO.md: Added training command example

Next: Train with RGBD input data

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 workspaces/main/shaders/cnn/cnn_conv3x3.wgsl | 100 +++++++++++++++++++++++++++
 1 file changed, 100 insertions(+)

(limited to 'workspaces/main/shaders')
diff --git a/workspaces/main/shaders/cnn/cnn_conv3x3.wgsl b/workspaces/main/shaders/cnn/cnn_conv3x3.wgsl
index 168c9e2..df58b4d 100644
--- a/workspaces/main/shaders/cnn/cnn_conv3x3.wgsl
+++ b/workspaces/main/shaders/cnn/cnn_conv3x3.wgsl
@@ -51,3 +51,103 @@ fn cnn_conv3x3_with_coord(
 
   return sum;
 }
+
+// Inner layers: 7→4 channels (RGBD output)
+// weights: array<array<f32, 8>, 36> (9 positions × 4 channels, each with 7 weights + bias)
+fn cnn_conv3x3_7to4(
+  tex: texture_2d<f32>,
+  samp: sampler,
+  uv: vec2<f32>,
+  resolution: vec2<f32>,
+  original: vec4<f32>,
+  weights: array<array<f32, 8>, 36>
+) -> vec4<f32> {
+  let step = 1.0 / resolution;
+
+  // Compute grayscale from original and normalize to [-1,1]
+  let gray_01 = 0.2126*original.r + 0.7152*original.g + 0.0722*original.b;
+  let gray = (gray_01 - 0.5) * 2.0;
+
+  // Normalize UV to [-1,1]
+  let uv_norm = (uv - 0.5) * 2.0;
+
+  var sum = vec4<f32>(0.0);
+
+  var pos = 0;
+  for (var dy = -1; dy <= 1; dy++) {
+    for (var dx = -1; dx <= 1; dx++) {
+      let offset = vec2<f32>(f32(dx), f32(dy)) * step;
+      let rgbd_01 = textureSample(tex, samp, uv + offset);
+
+      // Normalize RGBD to [-1,1]
+      let rgbd = (rgbd_01 - 0.5) * 2.0;
+
+      // 7-channel input: [R,G,B,D, uv.x, uv.y, gray] all in [-1,1]
+      let inputs = array<f32, 7>(
+        rgbd.r, rgbd.g, rgbd.b, rgbd.a,
+        uv_norm.x, uv_norm.y, gray
+      );
+
+      // Accumulate for each output channel (RGBD)
+      for (var out_c = 0; out_c < 4; out_c++) {
+        let idx = pos * 4 + out_c;
+        var channel_sum = weights[idx][7];  // Bias (8th element)
+        for (var in_c = 0; in_c < 7; in_c++) {
+          channel_sum += weights[idx][in_c] * inputs[in_c];
+        }
+        sum[out_c] += channel_sum;
+      }
+
+      pos++;
+    }
+  }
+
+  return sum;  // Output in [-1,1] range
+}
+
+// Final layer: 7→1 channel (scalar output)
+// weights: array<array<f32, 8>, 9> (9 positions, each with 7 weights + bias)
+fn cnn_conv3x3_7to1(
+  tex: texture_2d<f32>,
+  samp: sampler,
+  uv: vec2<f32>,
+  resolution: vec2<f32>,
+  original: vec4<f32>,
+  weights: array<array<f32, 8>, 9>
+) -> f32 {
+  let step = 1.0 / resolution;
+
+  // Normalize grayscale to [-1,1]
+  let gray_01 = 0.2126*original.r + 0.7152*original.g + 0.0722*original.b;
+  let gray = (gray_01 - 0.5) * 2.0;
+
+  // Normalize UV to [-1,1]
+  let uv_norm = (uv - 0.5) * 2.0;
+
+  var sum = 0.0;
+
+  var pos = 0;
+  for (var dy = -1; dy <= 1; dy++) {
+    for (var dx = -1; dx <= 1; dx++) {
+      let offset = vec2<f32>(f32(dx), f32(dy)) * step;
+      let rgbd_01 = textureSample(tex, samp, uv + offset);
+
+      // Normalize RGBD to [-1,1]
+      let rgbd = (rgbd_01 - 0.5) * 2.0;
+
+      // 7-channel input all in [-1,1]
+      sum += weights[pos][0] * rgbd.r;
+      sum += weights[pos][1] * rgbd.g;
+      sum += weights[pos][2] * rgbd.b;
+      sum += weights[pos][3] * rgbd.a;
+      sum += weights[pos][4] * uv_norm.x;
+      sum += weights[pos][5] * uv_norm.y;
+      sum += weights[pos][6] * gray;
+      sum += weights[pos][7];  // Bias
+
+      pos++;
+    }
+  }
+
+  return sum;  // Output in [-1,1], needs denormalization
+}
-- 
cgit v1.2.3