From c49d828f101b435d73a76fcfc8444cf76aeda22f Mon Sep 17 00:00:00 2001 From: skal Date: Wed, 11 Feb 2026 00:26:25 +0100 Subject: opt: Move invariant in1 calculation outside CNN convolution loops The in1 vector (uv_norm, gray, 1.0) is loop-invariant and doesn't depend on dx/dy offset. Moving it outside the convolution loop eliminates redundant computation and enables better SIMD optimization. Updated both shader files and train.py code generation. Co-Authored-By: Claude Sonnet 4.5 --- workspaces/main/shaders/cnn/cnn_conv3x3.wgsl | 4 ++-- workspaces/main/shaders/cnn/cnn_conv5x5.wgsl | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'workspaces/main') diff --git a/workspaces/main/shaders/cnn/cnn_conv3x3.wgsl b/workspaces/main/shaders/cnn/cnn_conv3x3.wgsl index c032767..1a5a3e1 100644 --- a/workspaces/main/shaders/cnn/cnn_conv3x3.wgsl +++ b/workspaces/main/shaders/cnn/cnn_conv3x3.wgsl @@ -19,6 +19,7 @@ fn cnn_conv3x3_7to4_src( // Normalize UV to [-1,1] let uv_norm = (uv - 0.5) * 2.0; + let in1 = vec4(uv_norm, gray, 1.0); var sum = vec4(0.0); @@ -27,7 +28,6 @@ fn cnn_conv3x3_7to4_src( for (var dx = -1; dx <= 1; dx++) { let offset = vec2(f32(dx), f32(dy)) * step; let rgbd = (textureSample(tex, samp, uv + offset) - .5) * 2.0; - let in1 = vec4(uv_norm, gray, 1.0); sum.r += dot(weights[pos+0], rgbd) + dot(weights[pos+1], in1); sum.g += dot(weights[pos+2], rgbd) + dot(weights[pos+3], in1); @@ -93,6 +93,7 @@ fn cnn_conv3x3_7to1( // Normalize UV to [-1,1] let uv_norm = (uv - 0.5) * 2.0; + let in1 = vec4(uv_norm, gray, 1.0); var sum = 0.0; @@ -101,7 +102,6 @@ fn cnn_conv3x3_7to1( for (var dx = -1; dx <= 1; dx++) { let offset = vec2(f32(dx), f32(dy)) * step; let rgbd = textureSample(tex, samp, uv + offset); - let in1 = vec4(uv_norm, gray, 1.0); sum += dot(weights[pos], rgbd) + dot(weights[pos+1], in1); pos += 2; diff --git a/workspaces/main/shaders/cnn/cnn_conv5x5.wgsl b/workspaces/main/shaders/cnn/cnn_conv5x5.wgsl index 119930f..ba2a4b7 100644 --- a/workspaces/main/shaders/cnn/cnn_conv5x5.wgsl +++ b/workspaces/main/shaders/cnn/cnn_conv5x5.wgsl @@ -12,6 +12,7 @@ fn cnn_conv5x5_7to4( ) -> vec4 { let step = 1.0 / resolution; let uv_norm = (uv - 0.5) * 2.0; + let in1 = vec4(uv_norm, gray, 1.0); var sum = vec4(0.0); var pos = 0; @@ -20,7 +21,6 @@ fn cnn_conv5x5_7to4( for (var dx = -2; dx <= 2; dx++) { let offset = vec2(f32(dx), f32(dy)) * step; let rgbd = textureSample(tex, samp, uv + offset); - let in1 = vec4(uv_norm, gray, 1.0); sum.r += dot(weights[pos+0], rgbd) + dot(weights[pos+1], in1); sum.g += dot(weights[pos+2], rgbd) + dot(weights[pos+3], in1); @@ -47,6 +47,7 @@ fn cnn_conv5x5_7to1( ) -> f32 { let step = 1.0 / resolution; let uv_norm = (uv - 0.5) * 2.0; + let in1 = vec4(uv_norm, gray, 1.0); var sum = 0.0; var pos = 0; @@ -55,7 +56,6 @@ fn cnn_conv5x5_7to1( for (var dx = -2; dx <= 2; dx++) { let offset = vec2(f32(dx), f32(dy)) * step; let rgbd = textureSample(tex, samp, uv + offset); - let in1 = vec4(uv_norm, gray, 1.0); sum += dot(weights[pos], rgbd) + dot(weights[pos+1], in1); pos += 2; @@ -79,6 +79,7 @@ fn cnn_conv5x5_7to4_src( let original = (textureSample(tex, samp, uv) - 0.5) * 2.0; let gray = dot(original.rgb, vec3(0.2126, 0.7152, 0.0722)); let uv_norm = (uv - 0.5) * 2.0; + let in1 = vec4(uv_norm, gray, 1.0); var sum = vec4(0.0); var pos = 0; @@ -87,7 +88,6 @@ fn cnn_conv5x5_7to4_src( for (var dx = -2; dx <= 2; dx++) { let offset = vec2(f32(dx), f32(dy)) * step; let rgbd = (textureSample(tex, samp, uv + offset) - 0.5) * 2.0; - let in1 = vec4(uv_norm, gray, 1.0); sum.r += dot(weights[pos+0], rgbd) + dot(weights[pos+1], in1); sum.g += dot(weights[pos+2], rgbd) + dot(weights[pos+3], in1); -- cgit v1.2.3