From 7a05f4d33b611ba1e9b6c68e0d0bd67d6ea011ee Mon Sep 17 00:00:00 2001 From: skal Date: Tue, 10 Feb 2026 21:11:05 +0100 Subject: refactor: Optimize CNN grayscale computation Compute gray once per fragment using dot() instead of per-layer. Pass gray as f32 parameter to conv functions instead of vec4 original. Co-Authored-By: Claude Sonnet 4.5 --- doc/CNN_EFFECT.md | 23 ++++++++++++++--------- doc/CNN_RGBD_GRAYSCALE_SUMMARY.md | 10 ++++++---- training/train_cnn.py | 7 ++++--- workspaces/main/shaders/cnn/cnn_conv3x3.wgsl | 16 +++++----------- workspaces/main/shaders/cnn/cnn_conv5x5.wgsl | 14 +++++--------- workspaces/main/shaders/cnn/cnn_layer.wgsl | 5 +++-- 6 files changed, 37 insertions(+), 38 deletions(-) diff --git a/doc/CNN_EFFECT.md b/doc/CNN_EFFECT.md index 4659fd3..22cf985 100644 --- a/doc/CNN_EFFECT.md +++ b/doc/CNN_EFFECT.md @@ -38,7 +38,7 @@ fn cnn_conv3x3_7to4( samp: sampler, uv: vec2, resolution: vec2, - original: vec4, # Original RGBD [-1,1] + gray: f32, # Grayscale [-1,1] weights: array, 36> # 9 pos × 4 out × (7 weights + bias) ) -> vec4 @@ -48,7 +48,7 @@ fn cnn_conv3x3_7to1( samp: sampler, uv: vec2, resolution: vec2, - original: vec4, + gray: f32, weights: array, 9> # 9 pos × (7 weights + bias) ) -> f32 ``` @@ -56,7 +56,7 @@ fn cnn_conv3x3_7to1( **Input normalization:** - **fs_main** normalizes textures once: `(tex - 0.5) * 2` → [-1,1] - **Conv functions** normalize UV coords: `(uv - 0.5) * 2` → [-1,1] -- **Grayscale** computed from normalized RGBD: `0.2126*R + 0.7152*G + 0.0722*B` +- **Grayscale** computed once in fs_main using dot product: `dot(original.rgb, vec3(0.2126, 0.7152, 0.0722))` - **Inter-layer data** stays in [-1,1] (no denormalization) - **Final output** denormalized for display: `(result + 1.0) * 0.5` → [0,1] @@ -250,20 +250,25 @@ Expands to: ```wgsl @fragment fn fs_main(@builtin(position) p: vec4) -> @location(0) vec4 { let uv = p.xy / uniforms.resolution; - let input = textureSample(txt, smplr, uv); // Layer N-1 output - let original = textureSample(original_input, smplr, uv); // Layer 0 input - + let original_raw = textureSample(original_input, smplr, uv); + let original = (original_raw - 0.5) * 2.0; // Normalize to [-1,1] + let gray = dot(original.rgb, vec3(0.2126, 0.7152, 0.0722)); var result = vec4(0.0); if (params.layer_index == 0) { - result = cnn_conv3x3_with_coord(txt, smplr, uv, uniforms.resolution, - rgba_weights_layer0, coord_weights_layer0, bias_layer0); + result = cnn_conv3x3_7to4_src(txt, smplr, uv, uniforms.resolution, + weights_layer0); + result = cnn_tanh(result); + } + else if (params.layer_index == 1) { + result = cnn_conv5x5_7to4(txt, smplr, uv, uniforms.resolution, + gray, weights_layer1); result = cnn_tanh(result); } // ... other layers // Blend with ORIGINAL input (not previous layer) - return mix(original, result, params.blend_amount); + return mix(original_raw, result, params.blend_amount); } ``` diff --git a/doc/CNN_RGBD_GRAYSCALE_SUMMARY.md b/doc/CNN_RGBD_GRAYSCALE_SUMMARY.md index 4c13693..3439f2c 100644 --- a/doc/CNN_RGBD_GRAYSCALE_SUMMARY.md +++ b/doc/CNN_RGBD_GRAYSCALE_SUMMARY.md @@ -20,7 +20,7 @@ Implemented CNN architecture upgrade: RGBD input → grayscale output with 7-cha - **RGBD:** `(rgbd - 0.5) * 2` - **UV coords:** `(uv - 0.5) * 2` -- **Grayscale:** `(0.2126*R + 0.7152*G + 0.0722*B - 0.5) * 2` +- **Grayscale:** `dot(original.rgb, vec3(0.2126, 0.7152, 0.0722))` (computed once, passed as parameter) **Rationale:** Zero-centered inputs for tanh activation, better gradient flow. @@ -48,13 +48,14 @@ Implemented CNN architecture upgrade: RGBD input → grayscale output with 7-cha **Shaders (`/Users/skal/demo/workspaces/main/shaders/cnn/cnn_conv3x3.wgsl`):** 1. Added `cnn_conv3x3_7to4()`: - - 7-channel input: [RGBD, uv_x, uv_y, gray] + - 7-channel input: [RGBD, uv_x, uv_y, gray] (gray passed as parameter) - 4-channel output: RGBD - Weights: `array, 36>` 2. Added `cnn_conv3x3_7to1()`: - - 7-channel input: [RGBD, uv_x, uv_y, gray] + - 7-channel input: [RGBD, uv_x, uv_y, gray] (gray passed as parameter) - 1-channel output: grayscale - Weights: `array, 9>` +3. Optimized: gray computed once in caller using `dot()`, not per-function **Documentation (`/Users/skal/demo/doc/CNN_EFFECT.md`):** 1. Updated architecture section with RGBD→grayscale pipeline @@ -71,7 +72,8 @@ CNNLayerParams and bind groups remain unchanged. 2. Each layer: - Samples previous layer output (RGBD in [0,1]) - Normalizes RGBD to [-1,1] - - Computes UV coords and grayscale, normalizes to [-1,1] + - Computes gray once using `dot()` (fs_main level) + - Normalizes UV coords to [-1,1] (inside conv functions) - Concatenates 7-channel input - Applies convolution with layer-specific weights - Outputs RGBD (inner) or grayscale (final) in [-1,1] diff --git a/training/train_cnn.py b/training/train_cnn.py index 902daa8..6bdb15f 100755 --- a/training/train_cnn.py +++ b/training/train_cnn.py @@ -172,6 +172,7 @@ def generate_layer_shader(output_path, num_layers, kernel_sizes): f.write(" let uv = p.xy / uniforms.resolution;\n") f.write(" let original_raw = textureSample(original_input, smplr, uv);\n") f.write(" let original = (original_raw - 0.5) * 2.0; // Normalize to [-1,1]\n") + f.write(" let gray = dot(original.rgb, vec3(0.2126, 0.7152, 0.0722));\n") f.write(" var result = vec4(0.0);\n\n") # Generate layer switches @@ -191,13 +192,13 @@ def generate_layer_shader(output_path, num_layers, kernel_sizes): elif not is_final: f.write(f" else if (params.layer_index == {layer_idx}) {{\n") f.write(f" result = {conv_fn}(txt, smplr, uv, uniforms.resolution,\n") - f.write(f" original, weights_layer{layer_idx});\n") + f.write(f" gray, weights_layer{layer_idx});\n") f.write(f" result = cnn_tanh(result); // Keep in [-1,1]\n") f.write(f" }}\n") else: f.write(f" else if (params.layer_index == {layer_idx}) {{\n") f.write(f" let gray_out = {conv_fn}(txt, smplr, uv, uniforms.resolution,\n") - f.write(f" original, weights_layer{layer_idx});\n") + f.write(f" gray, weights_layer{layer_idx});\n") f.write(f" // gray_out already in [0,1] from clipped training\n") f.write(f" result = vec4(gray_out, gray_out, gray_out, 1.0);\n") f.write(f" return mix(original_raw, result, params.blend_amount); // [0,1]\n") @@ -270,7 +271,7 @@ def generate_conv_src_function(kernel_size, output_path): # Normalize center pixel for gray channel f.write(f" let original = (textureSample(tex, samp, uv) - 0.5) * 2.0;\n") - f.write(f" let gray = 0.2126*original.r + 0.7152*original.g + 0.0722*original.b;\n") + f.write(f" let gray = dot(original.rgb, vec3(0.2126, 0.7152, 0.0722));\n") f.write(f" let uv_norm = (uv - 0.5) * 2.0;\n\n") f.write(f" var sum = vec4(0.0);\n") diff --git a/workspaces/main/shaders/cnn/cnn_conv3x3.wgsl b/workspaces/main/shaders/cnn/cnn_conv3x3.wgsl index 96ddf5b..79b0350 100644 --- a/workspaces/main/shaders/cnn/cnn_conv3x3.wgsl +++ b/workspaces/main/shaders/cnn/cnn_conv3x3.wgsl @@ -15,7 +15,7 @@ fn cnn_conv3x3_7to4_src( // Compute grayscale from original (converted in [-1,1]) let original = (textureSample(tex, samp, uv) - 0.5) * 2.0; - let gray = 0.2126*original.r + 0.7152*original.g + 0.0722*original.b; + let gray = dot(original.rgb, vec3(0.2126, 0.7152, 0.0722)); // Normalize UV to [-1,1] let uv_norm = (uv - 0.5) * 2.0; @@ -52,7 +52,7 @@ fn cnn_conv3x3_7to4_src( } // Inner layers: 7→4 channels (RGBD output) -// Assumes 'tex' and 'original' are already normalized to [-1,1] +// Assumes 'tex' is already normalized to [-1,1] // UV coordinates remain in [0,1] and are normalized internally // weights: array, 36> (9 positions × 4 channels, each with 7 weights + bias) fn cnn_conv3x3_7to4( @@ -60,14 +60,11 @@ fn cnn_conv3x3_7to4( samp: sampler, uv: vec2, resolution: vec2, - original: vec4, + gray: f32, weights: array, 36> ) -> vec4 { let step = 1.0 / resolution; - // Compute grayscale from original (already in [-1,1]) - let gray = 0.2126*original.r + 0.7152*original.g + 0.0722*original.b; - // Normalize UV to [-1,1] let uv_norm = (uv - 0.5) * 2.0; @@ -103,7 +100,7 @@ fn cnn_conv3x3_7to4( } // Final layer: 7→1 channel (scalar output) -// Assumes 'tex' and 'original' are already normalized to [-1,1] +// Assumes 'tex' is already normalized to [-1,1] // UV coordinates remain in [0,1] and are normalized internally // weights: array, 9> (9 positions, each with 7 weights + bias) fn cnn_conv3x3_7to1( @@ -111,14 +108,11 @@ fn cnn_conv3x3_7to1( samp: sampler, uv: vec2, resolution: vec2, - original: vec4, + gray: f32, weights: array, 9> ) -> f32 { let step = 1.0 / resolution; - // Compute grayscale from original (already in [-1,1]) - let gray = 0.2126*original.r + 0.7152*original.g + 0.0722*original.b; - // Normalize UV to [-1,1] let uv_norm = (uv - 0.5) * 2.0; diff --git a/workspaces/main/shaders/cnn/cnn_conv5x5.wgsl b/workspaces/main/shaders/cnn/cnn_conv5x5.wgsl index 0f261dd..5570589 100644 --- a/workspaces/main/shaders/cnn/cnn_conv5x5.wgsl +++ b/workspaces/main/shaders/cnn/cnn_conv5x5.wgsl @@ -1,5 +1,5 @@ // 5×5 variant for 7→4 channels (RGBD output) -// Assumes 'tex' and 'original' are already normalized to [-1,1] +// Assumes 'tex' is already normalized to [-1,1] // UV coordinates remain in [0,1] and are normalized internally // weights: array, 100> (25 positions × 4 channels, each with 7 weights + bias) fn cnn_conv5x5_7to4( @@ -7,12 +7,10 @@ fn cnn_conv5x5_7to4( samp: sampler, uv: vec2, resolution: vec2, - original: vec4, + gray: f32, weights: array, 100> ) -> vec4 { let step = 1.0 / resolution; - - let gray = 0.2126*original.r + 0.7152*original.g + 0.0722*original.b; let uv_norm = (uv - 0.5) * 2.0; var sum = vec4(0.0); @@ -44,7 +42,7 @@ fn cnn_conv5x5_7to4( } // 5×5 variant for 7→1 channel (scalar output) -// Assumes 'tex' and 'original' are already normalized to [-1,1] +// Assumes 'tex' is already normalized to [-1,1] // UV coordinates remain in [0,1] and are normalized internally // weights: array, 25> (25 positions, each with 7 weights + bias) fn cnn_conv5x5_7to1( @@ -52,12 +50,10 @@ fn cnn_conv5x5_7to1( samp: sampler, uv: vec2, resolution: vec2, - original: vec4, + gray: f32, weights: array, 25> ) -> f32 { let step = 1.0 / resolution; - - let gray = 0.2126*original.r + 0.7152*original.g + 0.0722*original.b; let uv_norm = (uv - 0.5) * 2.0; var sum = 0.0; @@ -96,7 +92,7 @@ fn cnn_conv5x5_7to4_src( let step = 1.0 / resolution; let original = (textureSample(tex, samp, uv) - 0.5) * 2.0; - let gray = 0.2126*original.r + 0.7152*original.g + 0.0722*original.b; + let gray = dot(original.rgb, vec3(0.2126, 0.7152, 0.0722)); let uv_norm = (uv - 0.5) * 2.0; var sum = vec4(0.0); diff --git a/workspaces/main/shaders/cnn/cnn_layer.wgsl b/workspaces/main/shaders/cnn/cnn_layer.wgsl index 3f970df..e67ad31 100644 --- a/workspaces/main/shaders/cnn/cnn_layer.wgsl +++ b/workspaces/main/shaders/cnn/cnn_layer.wgsl @@ -32,6 +32,7 @@ struct CNNLayerParams { let uv = p.xy / uniforms.resolution; let original_raw = textureSample(original_input, smplr, uv); let original = (original_raw - 0.5) * 2.0; // Normalize to [-1,1] + let gray = dot(original.rgb, vec3(0.2126, 0.7152, 0.0722)); var result = vec4(0.0); // Layer 0: 7→4 (RGBD output, normalizes [0,1] input) @@ -42,12 +43,12 @@ struct CNNLayerParams { } else if (params.layer_index == 1) { result = cnn_conv5x5_7to4(txt, smplr, uv, uniforms.resolution, - original, weights_layer1); + gray, weights_layer1); result = cnn_tanh(result); // Keep in [-1,1] } else if (params.layer_index == 2) { let gray_out = cnn_conv3x3_7to1(txt, smplr, uv, uniforms.resolution, - original, weights_layer2); + gray, weights_layer2); // gray_out already in [0,1] from clipped training result = vec4(gray_out, gray_out, gray_out, 1.0); return mix(original_raw, result, params.blend_amount); // [0,1] -- cgit v1.2.3