From 8b30cadfc19647487986d14dba9ddba7908dd1d0 Mon Sep 17 00:00:00 2001 From: skal Date: Thu, 12 Feb 2026 15:10:17 +0100 Subject: test_demo: Add beat-synchronized CNN post-processing with version selection - Add --cnn-version <1|2> flag to select between CNN v1 and v2 - Implement beat_phase modulation for dynamic blend in both CNN effects - Fix CNN v2 per-layer uniform buffer sharing (each layer needs own buffer) - Fix CNN v2 y-axis orientation to match render pass convention - Add Scene1Effect as base visual layer to test_demo timeline - Reorganize CNN v2 shaders into cnn_v2/ subdirectory - Update asset paths and documentation for new shader organization Co-Authored-By: Claude Sonnet 4.5 --- workspaces/main/shaders/cnn_v2/cnn_v2_compute.wgsl | 149 +++++++++++++++++++++ 1 file changed, 149 insertions(+) create mode 100644 workspaces/main/shaders/cnn_v2/cnn_v2_compute.wgsl (limited to 'workspaces/main/shaders/cnn_v2/cnn_v2_compute.wgsl') diff --git a/workspaces/main/shaders/cnn_v2/cnn_v2_compute.wgsl b/workspaces/main/shaders/cnn_v2/cnn_v2_compute.wgsl new file mode 100644 index 0000000..1e1704d --- /dev/null +++ b/workspaces/main/shaders/cnn_v2/cnn_v2_compute.wgsl @@ -0,0 +1,149 @@ +// CNN v2 Compute Shader - Storage Buffer Version +// Processes single layer per dispatch with weights from storage buffer +// Multi-layer execution handled by C++ with ping-pong buffers + +// Push constants for layer parameters (passed per dispatch) +struct LayerParams { + kernel_size: u32, + in_channels: u32, + out_channels: u32, + weight_offset: u32, // Offset in f16 units + is_output_layer: u32, // 1 if final layer (sigmoid), 0 otherwise (relu) + blend_amount: f32, // [0,1] blend with original +} + +@group(0) @binding(0) var static_features: texture_2d; // 8-channel static features +@group(0) @binding(1) var layer_input: texture_2d; // Previous layer output (8-channel packed) +@group(0) @binding(2) var output_tex: texture_storage_2d; // Current layer output +@group(0) @binding(3) var weights_buffer: array; // Packed f16 weights +@group(0) @binding(4) var params: LayerParams; +@group(0) @binding(5) var original_input: texture_2d; // Original RGB input for blending + +fn unpack_static_features(coord: vec2) -> array { + let packed = textureLoad(static_features, coord, 0); + let v0 = unpack2x16float(packed.x); + let v1 = unpack2x16float(packed.y); + let v2 = unpack2x16float(packed.z); + let v3 = unpack2x16float(packed.w); + return array(v0.x, v0.y, v1.x, v1.y, v2.x, v2.y, v3.x, v3.y); +} + +fn unpack_layer_channels(coord: vec2) -> array { + let packed = textureLoad(layer_input, coord, 0); + let v0 = unpack2x16float(packed.x); + let v1 = unpack2x16float(packed.y); + let v2 = unpack2x16float(packed.z); + let v3 = unpack2x16float(packed.w); + return array(v0.x, v0.y, v1.x, v1.y, v2.x, v2.y, v3.x, v3.y); +} + +fn pack_channels(values: array) -> vec4 { + return vec4( + pack2x16float(vec2(values[0], values[1])), + pack2x16float(vec2(values[2], values[3])), + pack2x16float(vec2(values[4], values[5])), + pack2x16float(vec2(values[6], values[7])) + ); +} + +// Get weight from storage buffer (f16 packed as u32 pairs) +// Buffer layout: [header: 4 u32][layer_info: N×5 u32][weights: packed f16] +// TODO: Support 8-bit quantized weights (4× per u32) for 2× size reduction +fn get_weight(idx: u32) -> f32 { + // Skip header (16 bytes = 4 u32) and layer info + // Weights start after header + layer_info, but weight_offset already accounts for this + let pair_idx = idx / 2u; + let packed = weights_buffer[pair_idx]; + let unpacked = unpack2x16float(packed); + return select(unpacked.y, unpacked.x, (idx & 1u) == 0u); +} + +@compute @workgroup_size(8, 8) +fn main(@builtin(global_invocation_id) id: vec3) { + let coord = vec2(id.xy); + let dims = textureDimensions(static_features); + + if (coord.x >= i32(dims.x) || coord.y >= i32(dims.y)) { + return; + } + + let kernel_size = params.kernel_size; + let in_channels = params.in_channels; + let out_channels = params.out_channels; + let weight_offset = params.weight_offset; + let is_output = params.is_output_layer != 0u; + + let kernel_radius = i32(kernel_size / 2u); + + // Load static features (always 8D) + let static_feat = unpack_static_features(coord); + + // Convolution per output channel + var output: array; + for (var c: u32 = 0u; c < out_channels && c < 8u; c++) { + var sum: f32 = 0.0; + + // Convolve over kernel + for (var ky: i32 = -kernel_radius; ky <= kernel_radius; ky++) { + for (var kx: i32 = -kernel_radius; kx <= kernel_radius; kx++) { + let sample_coord = coord + vec2(kx, ky); + + // Border handling (clamp) + let clamped = vec2( + clamp(sample_coord.x, 0, i32(dims.x) - 1), + clamp(sample_coord.y, 0, i32(dims.y) - 1) + ); + + // Load input features at this spatial location + let static_local = unpack_static_features(clamped); + let layer_local = unpack_layer_channels(clamped); + + // Weight index calculation + let ky_idx = u32(ky + kernel_radius); + let kx_idx = u32(kx + kernel_radius); + let spatial_idx = ky_idx * kernel_size + kx_idx; + + // Accumulate: static features (always 8 channels) + for (var i: u32 = 0u; i < 8u; i++) { + let w_idx = weight_offset + + c * in_channels * kernel_size * kernel_size + + i * kernel_size * kernel_size + spatial_idx; + sum += get_weight(w_idx) * static_local[i]; + } + + // Accumulate: previous layer channels (in_channels - 8) + let prev_channels = in_channels - 8u; + for (var i: u32 = 0u; i < prev_channels && i < 8u; i++) { + let w_idx = weight_offset + + c * in_channels * kernel_size * kernel_size + + (8u + i) * kernel_size * kernel_size + spatial_idx; + sum += get_weight(w_idx) * layer_local[i]; + } + } + } + + // Activation + if (is_output) { + output[c] = clamp(sum, 0.0, 1.0); // Sigmoid approximation + } else { + output[c] = max(0.0, sum); // ReLU + } + } + + // Zero unused channels + for (var c: u32 = out_channels; c < 8u; c++) { + output[c] = 0.0; + } + + // Blend with original on final layer + if (is_output) { + let original = textureLoad(original_input, coord, 0).rgb; + let result_rgb = vec3(output[0], output[1], output[2]); + let blended = mix(original, result_rgb, params.blend_amount); + output[0] = blended.r; + output[1] = blended.g; + output[2] = blended.b; + } + + textureStore(output_tex, coord, pack_channels(output)); +} -- cgit v1.2.3