summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/CNN_EFFECT.md23
-rw-r--r--doc/CNN_RGBD_GRAYSCALE_SUMMARY.md10
-rwxr-xr-xtraining/train_cnn.py7
-rw-r--r--workspaces/main/shaders/cnn/cnn_conv3x3.wgsl16
-rw-r--r--workspaces/main/shaders/cnn/cnn_conv5x5.wgsl14
-rw-r--r--workspaces/main/shaders/cnn/cnn_layer.wgsl5
6 files changed, 37 insertions, 38 deletions
diff --git a/doc/CNN_EFFECT.md b/doc/CNN_EFFECT.md
index 4659fd3..22cf985 100644
--- a/doc/CNN_EFFECT.md
+++ b/doc/CNN_EFFECT.md
@@ -38,7 +38,7 @@ fn cnn_conv3x3_7to4(
samp: sampler,
uv: vec2<f32>,
resolution: vec2<f32>,
- original: vec4<f32>, # Original RGBD [-1,1]
+ gray: f32, # Grayscale [-1,1]
weights: array<array<f32, 8>, 36> # 9 pos × 4 out × (7 weights + bias)
) -> vec4<f32>
@@ -48,7 +48,7 @@ fn cnn_conv3x3_7to1(
samp: sampler,
uv: vec2<f32>,
resolution: vec2<f32>,
- original: vec4<f32>,
+ gray: f32,
weights: array<array<f32, 8>, 9> # 9 pos × (7 weights + bias)
) -> f32
```
@@ -56,7 +56,7 @@ fn cnn_conv3x3_7to1(
**Input normalization:**
- **fs_main** normalizes textures once: `(tex - 0.5) * 2` → [-1,1]
- **Conv functions** normalize UV coords: `(uv - 0.5) * 2` → [-1,1]
-- **Grayscale** computed from normalized RGBD: `0.2126*R + 0.7152*G + 0.0722*B`
+- **Grayscale** computed once in fs_main using dot product: `dot(original.rgb, vec3(0.2126, 0.7152, 0.0722))`
- **Inter-layer data** stays in [-1,1] (no denormalization)
- **Final output** denormalized for display: `(result + 1.0) * 0.5` → [0,1]
@@ -250,20 +250,25 @@ Expands to:
```wgsl
@fragment fn fs_main(@builtin(position) p: vec4<f32>) -> @location(0) vec4<f32> {
let uv = p.xy / uniforms.resolution;
- let input = textureSample(txt, smplr, uv); // Layer N-1 output
- let original = textureSample(original_input, smplr, uv); // Layer 0 input
-
+ let original_raw = textureSample(original_input, smplr, uv);
+ let original = (original_raw - 0.5) * 2.0; // Normalize to [-1,1]
+ let gray = dot(original.rgb, vec3<f32>(0.2126, 0.7152, 0.0722));
var result = vec4<f32>(0.0);
if (params.layer_index == 0) {
- result = cnn_conv3x3_with_coord(txt, smplr, uv, uniforms.resolution,
- rgba_weights_layer0, coord_weights_layer0, bias_layer0);
+ result = cnn_conv3x3_7to4_src(txt, smplr, uv, uniforms.resolution,
+ weights_layer0);
+ result = cnn_tanh(result);
+ }
+ else if (params.layer_index == 1) {
+ result = cnn_conv5x5_7to4(txt, smplr, uv, uniforms.resolution,
+ gray, weights_layer1);
result = cnn_tanh(result);
}
// ... other layers
// Blend with ORIGINAL input (not previous layer)
- return mix(original, result, params.blend_amount);
+ return mix(original_raw, result, params.blend_amount);
}
```
diff --git a/doc/CNN_RGBD_GRAYSCALE_SUMMARY.md b/doc/CNN_RGBD_GRAYSCALE_SUMMARY.md
index 4c13693..3439f2c 100644
--- a/doc/CNN_RGBD_GRAYSCALE_SUMMARY.md
+++ b/doc/CNN_RGBD_GRAYSCALE_SUMMARY.md
@@ -20,7 +20,7 @@ Implemented CNN architecture upgrade: RGBD input → grayscale output with 7-cha
- **RGBD:** `(rgbd - 0.5) * 2`
- **UV coords:** `(uv - 0.5) * 2`
-- **Grayscale:** `(0.2126*R + 0.7152*G + 0.0722*B - 0.5) * 2`
+- **Grayscale:** `dot(original.rgb, vec3<f32>(0.2126, 0.7152, 0.0722))` (computed once, passed as parameter)
**Rationale:** Zero-centered inputs for tanh activation, better gradient flow.
@@ -48,13 +48,14 @@ Implemented CNN architecture upgrade: RGBD input → grayscale output with 7-cha
**Shaders (`/Users/skal/demo/workspaces/main/shaders/cnn/cnn_conv3x3.wgsl`):**
1. Added `cnn_conv3x3_7to4()`:
- - 7-channel input: [RGBD, uv_x, uv_y, gray]
+ - 7-channel input: [RGBD, uv_x, uv_y, gray] (gray passed as parameter)
- 4-channel output: RGBD
- Weights: `array<array<f32, 8>, 36>`
2. Added `cnn_conv3x3_7to1()`:
- - 7-channel input: [RGBD, uv_x, uv_y, gray]
+ - 7-channel input: [RGBD, uv_x, uv_y, gray] (gray passed as parameter)
- 1-channel output: grayscale
- Weights: `array<array<f32, 8>, 9>`
+3. Optimized: gray computed once in caller using `dot()`, not per-function
**Documentation (`/Users/skal/demo/doc/CNN_EFFECT.md`):**
1. Updated architecture section with RGBD→grayscale pipeline
@@ -71,7 +72,8 @@ CNNLayerParams and bind groups remain unchanged.
2. Each layer:
- Samples previous layer output (RGBD in [0,1])
- Normalizes RGBD to [-1,1]
- - Computes UV coords and grayscale, normalizes to [-1,1]
+ - Computes gray once using `dot()` (fs_main level)
+ - Normalizes UV coords to [-1,1] (inside conv functions)
- Concatenates 7-channel input
- Applies convolution with layer-specific weights
- Outputs RGBD (inner) or grayscale (final) in [-1,1]
diff --git a/training/train_cnn.py b/training/train_cnn.py
index 902daa8..6bdb15f 100755
--- a/training/train_cnn.py
+++ b/training/train_cnn.py
@@ -172,6 +172,7 @@ def generate_layer_shader(output_path, num_layers, kernel_sizes):
f.write(" let uv = p.xy / uniforms.resolution;\n")
f.write(" let original_raw = textureSample(original_input, smplr, uv);\n")
f.write(" let original = (original_raw - 0.5) * 2.0; // Normalize to [-1,1]\n")
+ f.write(" let gray = dot(original.rgb, vec3<f32>(0.2126, 0.7152, 0.0722));\n")
f.write(" var result = vec4<f32>(0.0);\n\n")
# Generate layer switches
@@ -191,13 +192,13 @@ def generate_layer_shader(output_path, num_layers, kernel_sizes):
elif not is_final:
f.write(f" else if (params.layer_index == {layer_idx}) {{\n")
f.write(f" result = {conv_fn}(txt, smplr, uv, uniforms.resolution,\n")
- f.write(f" original, weights_layer{layer_idx});\n")
+ f.write(f" gray, weights_layer{layer_idx});\n")
f.write(f" result = cnn_tanh(result); // Keep in [-1,1]\n")
f.write(f" }}\n")
else:
f.write(f" else if (params.layer_index == {layer_idx}) {{\n")
f.write(f" let gray_out = {conv_fn}(txt, smplr, uv, uniforms.resolution,\n")
- f.write(f" original, weights_layer{layer_idx});\n")
+ f.write(f" gray, weights_layer{layer_idx});\n")
f.write(f" // gray_out already in [0,1] from clipped training\n")
f.write(f" result = vec4<f32>(gray_out, gray_out, gray_out, 1.0);\n")
f.write(f" return mix(original_raw, result, params.blend_amount); // [0,1]\n")
@@ -270,7 +271,7 @@ def generate_conv_src_function(kernel_size, output_path):
# Normalize center pixel for gray channel
f.write(f" let original = (textureSample(tex, samp, uv) - 0.5) * 2.0;\n")
- f.write(f" let gray = 0.2126*original.r + 0.7152*original.g + 0.0722*original.b;\n")
+ f.write(f" let gray = dot(original.rgb, vec3<f32>(0.2126, 0.7152, 0.0722));\n")
f.write(f" let uv_norm = (uv - 0.5) * 2.0;\n\n")
f.write(f" var sum = vec4<f32>(0.0);\n")
diff --git a/workspaces/main/shaders/cnn/cnn_conv3x3.wgsl b/workspaces/main/shaders/cnn/cnn_conv3x3.wgsl
index 96ddf5b..79b0350 100644
--- a/workspaces/main/shaders/cnn/cnn_conv3x3.wgsl
+++ b/workspaces/main/shaders/cnn/cnn_conv3x3.wgsl
@@ -15,7 +15,7 @@ fn cnn_conv3x3_7to4_src(
// Compute grayscale from original (converted in [-1,1])
let original = (textureSample(tex, samp, uv) - 0.5) * 2.0;
- let gray = 0.2126*original.r + 0.7152*original.g + 0.0722*original.b;
+ let gray = dot(original.rgb, vec3<f32>(0.2126, 0.7152, 0.0722));
// Normalize UV to [-1,1]
let uv_norm = (uv - 0.5) * 2.0;
@@ -52,7 +52,7 @@ fn cnn_conv3x3_7to4_src(
}
// Inner layers: 7→4 channels (RGBD output)
-// Assumes 'tex' and 'original' are already normalized to [-1,1]
+// Assumes 'tex' is already normalized to [-1,1]
// UV coordinates remain in [0,1] and are normalized internally
// weights: array<array<f32, 8>, 36> (9 positions × 4 channels, each with 7 weights + bias)
fn cnn_conv3x3_7to4(
@@ -60,14 +60,11 @@ fn cnn_conv3x3_7to4(
samp: sampler,
uv: vec2<f32>,
resolution: vec2<f32>,
- original: vec4<f32>,
+ gray: f32,
weights: array<array<f32, 8>, 36>
) -> vec4<f32> {
let step = 1.0 / resolution;
- // Compute grayscale from original (already in [-1,1])
- let gray = 0.2126*original.r + 0.7152*original.g + 0.0722*original.b;
-
// Normalize UV to [-1,1]
let uv_norm = (uv - 0.5) * 2.0;
@@ -103,7 +100,7 @@ fn cnn_conv3x3_7to4(
}
// Final layer: 7→1 channel (scalar output)
-// Assumes 'tex' and 'original' are already normalized to [-1,1]
+// Assumes 'tex' is already normalized to [-1,1]
// UV coordinates remain in [0,1] and are normalized internally
// weights: array<array<f32, 8>, 9> (9 positions, each with 7 weights + bias)
fn cnn_conv3x3_7to1(
@@ -111,14 +108,11 @@ fn cnn_conv3x3_7to1(
samp: sampler,
uv: vec2<f32>,
resolution: vec2<f32>,
- original: vec4<f32>,
+ gray: f32,
weights: array<array<f32, 8>, 9>
) -> f32 {
let step = 1.0 / resolution;
- // Compute grayscale from original (already in [-1,1])
- let gray = 0.2126*original.r + 0.7152*original.g + 0.0722*original.b;
-
// Normalize UV to [-1,1]
let uv_norm = (uv - 0.5) * 2.0;
diff --git a/workspaces/main/shaders/cnn/cnn_conv5x5.wgsl b/workspaces/main/shaders/cnn/cnn_conv5x5.wgsl
index 0f261dd..5570589 100644
--- a/workspaces/main/shaders/cnn/cnn_conv5x5.wgsl
+++ b/workspaces/main/shaders/cnn/cnn_conv5x5.wgsl
@@ -1,5 +1,5 @@
// 5×5 variant for 7→4 channels (RGBD output)
-// Assumes 'tex' and 'original' are already normalized to [-1,1]
+// Assumes 'tex' is already normalized to [-1,1]
// UV coordinates remain in [0,1] and are normalized internally
// weights: array<array<f32, 8>, 100> (25 positions × 4 channels, each with 7 weights + bias)
fn cnn_conv5x5_7to4(
@@ -7,12 +7,10 @@ fn cnn_conv5x5_7to4(
samp: sampler,
uv: vec2<f32>,
resolution: vec2<f32>,
- original: vec4<f32>,
+ gray: f32,
weights: array<array<f32, 8>, 100>
) -> vec4<f32> {
let step = 1.0 / resolution;
-
- let gray = 0.2126*original.r + 0.7152*original.g + 0.0722*original.b;
let uv_norm = (uv - 0.5) * 2.0;
var sum = vec4<f32>(0.0);
@@ -44,7 +42,7 @@ fn cnn_conv5x5_7to4(
}
// 5×5 variant for 7→1 channel (scalar output)
-// Assumes 'tex' and 'original' are already normalized to [-1,1]
+// Assumes 'tex' is already normalized to [-1,1]
// UV coordinates remain in [0,1] and are normalized internally
// weights: array<array<f32, 8>, 25> (25 positions, each with 7 weights + bias)
fn cnn_conv5x5_7to1(
@@ -52,12 +50,10 @@ fn cnn_conv5x5_7to1(
samp: sampler,
uv: vec2<f32>,
resolution: vec2<f32>,
- original: vec4<f32>,
+ gray: f32,
weights: array<array<f32, 8>, 25>
) -> f32 {
let step = 1.0 / resolution;
-
- let gray = 0.2126*original.r + 0.7152*original.g + 0.0722*original.b;
let uv_norm = (uv - 0.5) * 2.0;
var sum = 0.0;
@@ -96,7 +92,7 @@ fn cnn_conv5x5_7to4_src(
let step = 1.0 / resolution;
let original = (textureSample(tex, samp, uv) - 0.5) * 2.0;
- let gray = 0.2126*original.r + 0.7152*original.g + 0.0722*original.b;
+ let gray = dot(original.rgb, vec3<f32>(0.2126, 0.7152, 0.0722));
let uv_norm = (uv - 0.5) * 2.0;
var sum = vec4<f32>(0.0);
diff --git a/workspaces/main/shaders/cnn/cnn_layer.wgsl b/workspaces/main/shaders/cnn/cnn_layer.wgsl
index 3f970df..e67ad31 100644
--- a/workspaces/main/shaders/cnn/cnn_layer.wgsl
+++ b/workspaces/main/shaders/cnn/cnn_layer.wgsl
@@ -32,6 +32,7 @@ struct CNNLayerParams {
let uv = p.xy / uniforms.resolution;
let original_raw = textureSample(original_input, smplr, uv);
let original = (original_raw - 0.5) * 2.0; // Normalize to [-1,1]
+ let gray = dot(original.rgb, vec3<f32>(0.2126, 0.7152, 0.0722));
var result = vec4<f32>(0.0);
// Layer 0: 7→4 (RGBD output, normalizes [0,1] input)
@@ -42,12 +43,12 @@ struct CNNLayerParams {
}
else if (params.layer_index == 1) {
result = cnn_conv5x5_7to4(txt, smplr, uv, uniforms.resolution,
- original, weights_layer1);
+ gray, weights_layer1);
result = cnn_tanh(result); // Keep in [-1,1]
}
else if (params.layer_index == 2) {
let gray_out = cnn_conv3x3_7to1(txt, smplr, uv, uniforms.resolution,
- original, weights_layer2);
+ gray, weights_layer2);
// gray_out already in [0,1] from clipped training
result = vec4<f32>(gray_out, gray_out, gray_out, 1.0);
return mix(original_raw, result, params.blend_amount); // [0,1]