diff options
| -rw-r--r-- | doc/CNN_EFFECT.md | 14 | ||||
| -rwxr-xr-x | training/train_cnn.py | 23 | ||||
| -rw-r--r-- | workspaces/main/shaders/cnn/cnn_conv3x3.wgsl | 24 | ||||
| -rw-r--r-- | workspaces/main/shaders/cnn/cnn_conv5x5.wgsl | 16 | ||||
| -rw-r--r-- | workspaces/main/shaders/cnn/cnn_layer.wgsl | 24 |
5 files changed, 46 insertions, 55 deletions
diff --git a/doc/CNN_EFFECT.md b/doc/CNN_EFFECT.md index b7d157f..d51c187 100644 --- a/doc/CNN_EFFECT.md +++ b/doc/CNN_EFFECT.md @@ -38,7 +38,7 @@ fn cnn_conv3x3_7to4( samp: sampler, uv: vec2<f32>, resolution: vec2<f32>, - original: vec4<f32>, # Original RGBD [0,1] + original: vec4<f32>, # Original RGBD [-1,1] weights: array<array<f32, 8>, 36> # 9 pos × 4 out × (7 weights + bias) ) -> vec4<f32> @@ -53,12 +53,14 @@ fn cnn_conv3x3_7to1( ) -> f32 ``` -**Input normalization (all to [-1,1]):** -- RGBD: `(rgbd - 0.5) * 2` -- UV coords: `(uv - 0.5) * 2` -- Grayscale: `(0.2126*R + 0.7152*G + 0.0722*B - 0.5) * 2` +**Input normalization:** +- **fs_main** normalizes textures once: `(tex - 0.5) * 2` → [-1,1] +- **Conv functions** normalize UV coords: `(uv - 0.5) * 2` → [-1,1] +- **Grayscale** computed from normalized RGBD: `0.2126*R + 0.7152*G + 0.0722*B` +- **Inter-layer data** stays in [-1,1] (no denormalization) +- **Final output** denormalized for display: `(result + 1.0) * 0.5` → [0,1] -**Activation:** tanh for inner layers, none for final layer +**Activation:** tanh for inner layers (output stays [-1,1]), none for final layer ### Multi-Layer Architecture diff --git a/training/train_cnn.py b/training/train_cnn.py index 8c7b2b3..2250e9c 100755 --- a/training/train_cnn.py +++ b/training/train_cnn.py @@ -167,8 +167,10 @@ def generate_layer_shader(output_path, num_layers, kernel_sizes): f.write("}\n\n") f.write("@fragment fn fs_main(@builtin(position) p: vec4<f32>) -> @location(0) vec4<f32> {\n") f.write(" let uv = p.xy / uniforms.resolution;\n") - f.write(" let input = textureSample(txt, smplr, uv);\n") - f.write(" let original = textureSample(original_input, smplr, uv);\n") + f.write(" let input_raw = textureSample(txt, smplr, uv);\n") + f.write(" let input = (input_raw - 0.5) * 2.0; // Normalize to [-1,1]\n") + f.write(" let original_raw = textureSample(original_input, smplr, uv);\n") + f.write(" let original = (original_raw - 0.5) * 2.0; // Normalize to [-1,1]\n") f.write(" var result = vec4<f32>(0.0);\n\n") # Generate layer switches @@ -182,25 +184,19 @@ def generate_layer_shader(output_path, num_layers, kernel_sizes): f.write(f" if (params.layer_index == {layer_idx}) {{\n") f.write(f" result = {conv_fn}(txt, smplr, uv, uniforms.resolution,\n") f.write(f" original, weights_layer{layer_idx});\n") - f.write(f" result = cnn_tanh(result); // Output in [-1,1]\n") - f.write(f" // Denormalize to [0,1] for texture storage\n") - f.write(f" result = (result + 1.0) * 0.5;\n") + f.write(f" result = cnn_tanh(result); // Keep in [-1,1]\n") f.write(f" }}\n") elif not is_final: f.write(f" else if (params.layer_index == {layer_idx}) {{\n") f.write(f" result = {conv_fn}(txt, smplr, uv, uniforms.resolution,\n") f.write(f" original, weights_layer{layer_idx});\n") - f.write(f" result = cnn_tanh(result); // Output in [-1,1]\n") - f.write(f" // Denormalize to [0,1] for texture storage\n") - f.write(f" result = (result + 1.0) * 0.5;\n") + f.write(f" result = cnn_tanh(result); // Keep in [-1,1]\n") f.write(f" }}\n") else: f.write(f" else if (params.layer_index == {layer_idx}) {{\n") f.write(f" let gray_out = {conv_fn}(txt, smplr, uv, uniforms.resolution,\n") f.write(f" original, weights_layer{layer_idx});\n") - f.write(f" // Denormalize from [-1,1] to [0,1]\n") - f.write(f" let gray_01 = (gray_out + 1.0) * 0.5;\n") - f.write(f" result = vec4<f32>(gray_01, gray_01, gray_01, 1.0); // Expand to RGB\n") + f.write(f" result = vec4<f32>(gray_out, gray_out, gray_out, 1.0); // Keep in [-1,1]\n") f.write(f" }}\n") # Add else clause for invalid layer index @@ -209,8 +205,9 @@ def generate_layer_shader(output_path, num_layers, kernel_sizes): f.write(f" result = input;\n") f.write(f" }}\n") - f.write("\n // Blend with ORIGINAL input from layer 0\n") - f.write(" return mix(original, result, params.blend_amount);\n") + f.write("\n // Blend with ORIGINAL input from layer 0 and denormalize for display\n") + f.write(" let blended = mix(original, result, params.blend_amount);\n") + f.write(" return (blended + 1.0) * 0.5; // Denormalize to [0,1] for display\n") f.write("}\n") diff --git a/workspaces/main/shaders/cnn/cnn_conv3x3.wgsl b/workspaces/main/shaders/cnn/cnn_conv3x3.wgsl index df58b4d..b895504 100644 --- a/workspaces/main/shaders/cnn/cnn_conv3x3.wgsl +++ b/workspaces/main/shaders/cnn/cnn_conv3x3.wgsl @@ -53,6 +53,8 @@ fn cnn_conv3x3_with_coord( } // Inner layers: 7→4 channels (RGBD output) +// Assumes 'tex' and 'original' are already normalized to [-1,1] +// UV coordinates remain in [0,1] and are normalized internally // weights: array<array<f32, 8>, 36> (9 positions × 4 channels, each with 7 weights + bias) fn cnn_conv3x3_7to4( tex: texture_2d<f32>, @@ -64,9 +66,8 @@ fn cnn_conv3x3_7to4( ) -> vec4<f32> { let step = 1.0 / resolution; - // Compute grayscale from original and normalize to [-1,1] - let gray_01 = 0.2126*original.r + 0.7152*original.g + 0.0722*original.b; - let gray = (gray_01 - 0.5) * 2.0; + // Compute grayscale from original (already in [-1,1]) + let gray = 0.2126*original.r + 0.7152*original.g + 0.0722*original.b; // Normalize UV to [-1,1] let uv_norm = (uv - 0.5) * 2.0; @@ -77,10 +78,7 @@ fn cnn_conv3x3_7to4( for (var dy = -1; dy <= 1; dy++) { for (var dx = -1; dx <= 1; dx++) { let offset = vec2<f32>(f32(dx), f32(dy)) * step; - let rgbd_01 = textureSample(tex, samp, uv + offset); - - // Normalize RGBD to [-1,1] - let rgbd = (rgbd_01 - 0.5) * 2.0; + let rgbd = textureSample(tex, samp, uv + offset); // Already in [-1,1] // 7-channel input: [R,G,B,D, uv.x, uv.y, gray] all in [-1,1] let inputs = array<f32, 7>( @@ -106,6 +104,8 @@ fn cnn_conv3x3_7to4( } // Final layer: 7→1 channel (scalar output) +// Assumes 'tex' and 'original' are already normalized to [-1,1] +// UV coordinates remain in [0,1] and are normalized internally // weights: array<array<f32, 8>, 9> (9 positions, each with 7 weights + bias) fn cnn_conv3x3_7to1( tex: texture_2d<f32>, @@ -117,9 +117,8 @@ fn cnn_conv3x3_7to1( ) -> f32 { let step = 1.0 / resolution; - // Normalize grayscale to [-1,1] - let gray_01 = 0.2126*original.r + 0.7152*original.g + 0.0722*original.b; - let gray = (gray_01 - 0.5) * 2.0; + // Compute grayscale from original (already in [-1,1]) + let gray = 0.2126*original.r + 0.7152*original.g + 0.0722*original.b; // Normalize UV to [-1,1] let uv_norm = (uv - 0.5) * 2.0; @@ -130,10 +129,7 @@ fn cnn_conv3x3_7to1( for (var dy = -1; dy <= 1; dy++) { for (var dx = -1; dx <= 1; dx++) { let offset = vec2<f32>(f32(dx), f32(dy)) * step; - let rgbd_01 = textureSample(tex, samp, uv + offset); - - // Normalize RGBD to [-1,1] - let rgbd = (rgbd_01 - 0.5) * 2.0; + let rgbd = textureSample(tex, samp, uv + offset); // Already in [-1,1] // 7-channel input all in [-1,1] sum += weights[pos][0] * rgbd.r; diff --git a/workspaces/main/shaders/cnn/cnn_conv5x5.wgsl b/workspaces/main/shaders/cnn/cnn_conv5x5.wgsl index 15eaf96..bfb4ebb 100644 --- a/workspaces/main/shaders/cnn/cnn_conv5x5.wgsl +++ b/workspaces/main/shaders/cnn/cnn_conv5x5.wgsl @@ -53,6 +53,8 @@ fn cnn_conv5x5_with_coord( } // 5×5 variant for 7→4 channels (RGBD output) +// Assumes 'tex' and 'original' are already normalized to [-1,1] +// UV coordinates remain in [0,1] and are normalized internally // weights: array<array<f32, 8>, 100> (25 positions × 4 channels, each with 7 weights + bias) fn cnn_conv5x5_7to4( tex: texture_2d<f32>, @@ -64,8 +66,7 @@ fn cnn_conv5x5_7to4( ) -> vec4<f32> { let step = 1.0 / resolution; - let gray_01 = 0.2126*original.r + 0.7152*original.g + 0.0722*original.b; - let gray = (gray_01 - 0.5) * 2.0; + let gray = 0.2126*original.r + 0.7152*original.g + 0.0722*original.b; let uv_norm = (uv - 0.5) * 2.0; var sum = vec4<f32>(0.0); @@ -74,8 +75,7 @@ fn cnn_conv5x5_7to4( for (var dy = -2; dy <= 2; dy++) { for (var dx = -2; dx <= 2; dx++) { let offset = vec2<f32>(f32(dx), f32(dy)) * step; - let rgbd_01 = textureSample(tex, samp, uv + offset); - let rgbd = (rgbd_01 - 0.5) * 2.0; + let rgbd = textureSample(tex, samp, uv + offset); // Already in [-1,1] let inputs = array<f32, 7>( rgbd.r, rgbd.g, rgbd.b, rgbd.a, @@ -98,6 +98,8 @@ fn cnn_conv5x5_7to4( } // 5×5 variant for 7→1 channel (scalar output) +// Assumes 'tex' and 'original' are already normalized to [-1,1] +// UV coordinates remain in [0,1] and are normalized internally // weights: array<array<f32, 8>, 25> (25 positions, each with 7 weights + bias) fn cnn_conv5x5_7to1( tex: texture_2d<f32>, @@ -109,8 +111,7 @@ fn cnn_conv5x5_7to1( ) -> f32 { let step = 1.0 / resolution; - let gray_01 = 0.2126*original.r + 0.7152*original.g + 0.0722*original.b; - let gray = (gray_01 - 0.5) * 2.0; + let gray = 0.2126*original.r + 0.7152*original.g + 0.0722*original.b; let uv_norm = (uv - 0.5) * 2.0; var sum = 0.0; @@ -119,8 +120,7 @@ fn cnn_conv5x5_7to1( for (var dy = -2; dy <= 2; dy++) { for (var dx = -2; dx <= 2; dx++) { let offset = vec2<f32>(f32(dx), f32(dy)) * step; - let rgbd_01 = textureSample(tex, samp, uv + offset); - let rgbd = (rgbd_01 - 0.5) * 2.0; + let rgbd = textureSample(tex, samp, uv + offset); // Already in [-1,1] sum += weights[pos][0] * rgbd.r; sum += weights[pos][1] * rgbd.g; diff --git a/workspaces/main/shaders/cnn/cnn_layer.wgsl b/workspaces/main/shaders/cnn/cnn_layer.wgsl index fad283c..f97f798 100644 --- a/workspaces/main/shaders/cnn/cnn_layer.wgsl +++ b/workspaces/main/shaders/cnn/cnn_layer.wgsl @@ -30,37 +30,33 @@ struct CNNLayerParams { @fragment fn fs_main(@builtin(position) p: vec4<f32>) -> @location(0) vec4<f32> { let uv = p.xy / uniforms.resolution; - let input = textureSample(txt, smplr, uv); - let original = textureSample(original_input, smplr, uv); + let input_raw = textureSample(txt, smplr, uv); + let input = (input_raw - 0.5) * 2.0; // Normalize to [-1,1] + let original_raw = textureSample(original_input, smplr, uv); + let original = (original_raw - 0.5) * 2.0; // Normalize to [-1,1] var result = vec4<f32>(0.0); // Layer 0: 7→4 (RGBD output) if (params.layer_index == 0) { result = cnn_conv3x3_7to4(txt, smplr, uv, uniforms.resolution, original, weights_layer0); - result = cnn_tanh(result); // Output in [-1,1] - // Denormalize to [0,1] for texture storage - result = (result + 1.0) * 0.5; + result = cnn_tanh(result); // Keep in [-1,1] } else if (params.layer_index == 1) { result = cnn_conv5x5_7to4(txt, smplr, uv, uniforms.resolution, original, weights_layer1); - result = cnn_tanh(result); // Output in [-1,1] - // Denormalize to [0,1] for texture storage - result = (result + 1.0) * 0.5; + result = cnn_tanh(result); // Keep in [-1,1] } else if (params.layer_index == 2) { let gray_out = cnn_conv3x3_7to1(txt, smplr, uv, uniforms.resolution, original, weights_layer2); - // Denormalize from [-1,1] to [0,1] - let gray_01 = (gray_out + 1.0) * 0.5; - result = vec4<f32>(gray_01, gray_01, gray_01, 1.0); // Expand to RGB + result = vec4<f32>(gray_out, gray_out, gray_out, 1.0); // Keep in [-1,1] } else { result = input; } - // Blend with ORIGINAL input from layer 0 -return original; -// return mix(original, result, params.blend_amount); + // Blend with ORIGINAL input from layer 0 and denormalize for display + let blended = mix(original, result, params.blend_amount); + return (blended + 1.0) * 0.5; // Denormalize to [0,1] for display } |
