1 files changed, 116 insertions, 21 deletions
diff --git a/workspaces/main/shaders/cnn/cnn_conv3x3.wgsl b/workspaces/main/shaders/cnn/cnn_conv3x3.wgsl
index 168c9e2..96ddf5b 100644
--- a/workspaces/main/shaders/cnn/cnn_conv3x3.wgsl
+++ b/workspaces/main/shaders/cnn/cnn_conv3x3.wgsl
@@ -1,53 +1,148 @@
 // 3x3 convolution with weight indexing
-// Samples 9 pixels, applies mat4 weights per sample
 
-fn cnn_conv3x3(
+// Source layers: 7→4 channels (RGBD output)
+// Assumes 'tex' (the input) is *not* normalized to [-1,1], but is [0,1]
+// UV coordinates remain in [0,1] and are normalized internally
+// weights: array<array<f32, 8>, 36> (9 positions × 4 channels, each with 7 weights + bias)
+fn cnn_conv3x3_7to4_src(
   tex: texture_2d<f32>,
   samp: sampler,
   uv: vec2<f32>,
   resolution: vec2<f32>,
-  weights: array<mat4x4<f32>, 9>,
-  bias: vec4<f32>
+  weights: array<array<f32, 8>, 36>
 ) -> vec4<f32> {
   let step = 1.0 / resolution;
-  var sum = bias;
-  var idx = 0;
 
+  // Compute grayscale from original (converted in [-1,1])
+  let original = (textureSample(tex, samp, uv) - 0.5) * 2.0;
+  let gray = 0.2126*original.r + 0.7152*original.g + 0.0722*original.b;
+
+  // Normalize UV to [-1,1]
+  let uv_norm = (uv - 0.5) * 2.0;
+
+  var sum = vec4<f32>(0.0);
+
+  var pos = 0;
   for (var dy = -1; dy <= 1; dy++) {
     for (var dx = -1; dx <= 1; dx++) {
       let offset = vec2<f32>(f32(dx), f32(dy)) * step;
-      let sample = textureSample(tex, samp, uv + offset);
-      sum += weights[idx] * sample;
-      idx++;
+      let rgbd = (textureSample(tex, samp, uv + offset) - .5) * 2.0;  // convert to [-1,1]
+
+      // 7-channel input: [R,G,B,D, uv.x, uv.y, gray] all in [-1,1]
+      let inputs = array<f32, 7>(
+        rgbd.r, rgbd.g, rgbd.b, rgbd.a,
+        uv_norm.x, uv_norm.y, gray
+      );
+
+      // Accumulate for each output channel (RGBD)
+      for (var out_c = 0; out_c < 4; out_c++) {
+        let idx = pos * 4 + out_c;
+        var channel_sum = weights[idx][7];  // Bias (8th element)
+        for (var in_c = 0; in_c < 7; in_c++) {
+          channel_sum += weights[idx][in_c] * inputs[in_c];
+        }
+        sum[out_c] += channel_sum;
+      }
+
+      pos++;
     }
   }
 
-  return sum;
+  return sum;  // Output in [-1,1] range
 }
 
-fn cnn_conv3x3_with_coord(
+// Inner layers: 7→4 channels (RGBD output)
+// Assumes 'tex' and 'original' are already normalized to [-1,1]
+// UV coordinates remain in [0,1] and are normalized internally
+// weights: array<array<f32, 8>, 36> (9 positions × 4 channels, each with 7 weights + bias)
+fn cnn_conv3x3_7to4(
   tex: texture_2d<f32>,
   samp: sampler,
   uv: vec2<f32>,
   resolution: vec2<f32>,
-  rgba_weights: array<mat4x4<f32>, 9>,
-  coord_weights: mat2x4<f32>,
-  bias: vec4<f32>
+  original: vec4<f32>,
+  weights: array<array<f32, 8>, 36>
 ) -> vec4<f32> {
   let step = 1.0 / resolution;
-  var sum = bias;
 
-  sum += coord_weights * uv;
+  // Compute grayscale from original (already in [-1,1])
+  let gray = 0.2126*original.r + 0.7152*original.g + 0.0722*original.b;
+
+  // Normalize UV to [-1,1]
+  let uv_norm = (uv - 0.5) * 2.0;
+
+  var sum = vec4<f32>(0.0);
+
+  var pos = 0;
+  for (var dy = -1; dy <= 1; dy++) {
+    for (var dx = -1; dx <= 1; dx++) {
+      let offset = vec2<f32>(f32(dx), f32(dy)) * step;
+      let rgbd = textureSample(tex, samp, uv + offset);  // Already in [-1,1]
+
+      // 7-channel input: [R,G,B,D, uv.x, uv.y, gray] all in [-1,1]
+      let inputs = array<f32, 7>(
+        rgbd.r, rgbd.g, rgbd.b, rgbd.a,
+        uv_norm.x, uv_norm.y, gray
+      );
+
+      // Accumulate for each output channel (RGBD)
+      for (var out_c = 0; out_c < 4; out_c++) {
+        let idx = pos * 4 + out_c;
+        var channel_sum = weights[idx][7];  // Bias (8th element)
+        for (var in_c = 0; in_c < 7; in_c++) {
+          channel_sum += weights[idx][in_c] * inputs[in_c];
+        }
+        sum[out_c] += channel_sum;
+      }
+
+      pos++;
+    }
+  }
+
+  return sum;  // Output in [-1,1] range
+}
+
+// Final layer: 7→1 channel (scalar output)
+// Assumes 'tex' and 'original' are already normalized to [-1,1]
+// UV coordinates remain in [0,1] and are normalized internally
+// weights: array<array<f32, 8>, 9> (9 positions, each with 7 weights + bias)
+fn cnn_conv3x3_7to1(
+  tex: texture_2d<f32>,
+  samp: sampler,
+  uv: vec2<f32>,
+  resolution: vec2<f32>,
+  original: vec4<f32>,
+  weights: array<array<f32, 8>, 9>
+) -> f32 {
+  let step = 1.0 / resolution;
+
+  // Compute grayscale from original (already in [-1,1])
+  let gray = 0.2126*original.r + 0.7152*original.g + 0.0722*original.b;
+
+  // Normalize UV to [-1,1]
+  let uv_norm = (uv - 0.5) * 2.0;
 
-  var idx = 0;
+  var sum = 0.0;
+
+  var pos = 0;
   for (var dy = -1; dy <= 1; dy++) {
     for (var dx = -1; dx <= 1; dx++) {
       let offset = vec2<f32>(f32(dx), f32(dy)) * step;
-      let rgba = textureSample(tex, samp, uv + offset);
-      sum += rgba_weights[idx] * rgba;
-      idx++;
+      let rgbd = textureSample(tex, samp, uv + offset);  // Already in [-1,1]
+
+      // 7-channel input all in [-1,1]
+      sum += weights[pos][0] * rgbd.r;
+      sum += weights[pos][1] * rgbd.g;
+      sum += weights[pos][2] * rgbd.b;
+      sum += weights[pos][3] * rgbd.a;
+      sum += weights[pos][4] * uv_norm.x;
+      sum += weights[pos][5] * uv_norm.y;
+      sum += weights[pos][6] * gray;
+      sum += weights[pos][7];  // Bias
+
+      pos++;
     }
   }
 
-  return sum;
+  return sum;  // Output in [-1,1]
 }