summaryrefslogtreecommitdiff
path: root/src/effects/cnn_v2_effect.cc
diff options
context:
space:
mode:
authorskal <pascal.massimino@gmail.com>2026-02-14 14:55:58 +0100
committerskal <pascal.massimino@gmail.com>2026-02-14 14:55:58 +0100
commitd6cc50eb49275bbc0de21d4c65a5172d5d65f790 (patch)
tree642ffcb971bc58fd12ffa17bed5daeb00e4df8d0 /src/effects/cnn_v2_effect.cc
parent7bb0b688de0d909828a58613c69dea85fa476400 (diff)
refactor(gpu): Relocate effects to src/effects and streamline includes
This refactoring improves the project's structure by decoupling visual effects from the core GPU module. All effect implementations have been moved from to a new top-level directory. Shared utilities like , , and have been consolidated into the parent directory. - **Motivation**: To create a clearer separation of concerns, making the codebase easier to navigate and maintain. This move treats effects as a distinct layer that depends on the core GPU module, rather than being embedded within it. - **Changes**: - Created new directory. - Moved all effect source files (, ) to . - Moved shared helpers (, , ) to . - Updated and to reflect the new file locations for all build targets. - Corrected all directives across the entire codebase (, , ) to point to the new paths. - Updated all markdown documentation ( files) to ensure file paths and architectural descriptions are accurate. - Fixed several compiler errors related to incorrect enum casting () that were exposed during cross-compilation for Windows. - **Verification**: - The entire project builds successfully for both native and Windows cross-compilation targets. - All 34 tests pass (Usage ctest [options]). - The --- Running Native Build & Tests --- Configuring with all options enabled (tests + tools)... -- -- Build Configuration: -- DEMO_SIZE_OPT: ON -- DEMO_STRIP_ALL: ON -- DEMO_FINAL_STRIP: OFF -- DEMO_STRIP_EXTERNAL_LIBS: OFF -- DEMO_BUILD_TESTS: ON -- DEMO_BUILD_TOOLS: ON -- DEMO_ENABLE_COVERAGE: OFF -- DEMO_ENABLE_DEBUG_LOGS: OFF -- DEMO_HEADLESS: OFF -- DEMO_WORKSPACE: main -- -- Loaded workspace: Main Demo -- Timeline: timeline.seq -- Music: pop_punk_drums.track -- Assets: assets.txt -- Using workspace: main -- Configuring done (0.0s) -- Generating done (0.1s) -- Build files have been written to: /Users/skal/demo/build Building all targets (demo, tests, and tools)... [ 0%] Built target validate_uniforms_script [ 1%] Built target procedural [ 2%] Validating uniform buffer sizes and alignments... [ 3%] Built target tracker_compiler [ 4%] Built target test_3d [ 4%] Built target test_maths [ 4%] Built target seq_compiler [ 4%] Built target tracker_compiler_host [ 5%] Built target asset_packer [ 5%] Built target test_procedural [ 6%] Compiling demo sequence from workspace main... [ 6%] Built target generate_tracker_music [ 6%] Built target generate_test_demo_music [ 6%] Compiling test_demo sequence... Using BPM: 90 Successfully generated timeline with 16 sequences. Using BPM: 120 Demo end time: 16.000000s Successfully generated timeline with 1 sequences. [ 6%] Built target generate_test_demo_timeline [ 6%] Built target generate_timeline Validation Warning for 'CommonPostProcessUniforms': Matching WGSL struct not found. Validation OK for 'FadeParams': Size 16 matches C++ expected size. Validation OK for 'ThemeModulationParams': Size 16 matches C++ expected size. Validation OK for 'GaussianBlurParams': Size 8 matches C++ expected size. Validation OK for 'DistortParams': Size 8 matches C++ expected size. Validation OK for 'CircleMaskParams': Size 16 matches C++ expected size. [ 6%] Built target generate_test_assets [ 7%] Built target generate_demo_assets [ 7%] Built target validate_uniforms [ 8%] Built target util [ 10%] Built target test_assets [ 11%] Built target test_shader_assets [ 12%] Built target test_file_watcher [ 15%] Built target 3d [ 21%] Built target test_platform [ 22%] Built target audio [ 23%] Built target test_window [ 26%] Built target test_fft [ 27%] Built target test_synth [ 27%] Built target test_spectral_brush [ 27%] Built target test_physics [ 28%] Built target test_dct [ 31%] Building CXX object CMakeFiles/gpu.dir/src/gpu/effect.cc.o [ 30%] Built target test_mock_backend [ 32%] Built target test_scene_loader [ 33%] Built target test_audio_backend [ 34%] Built target test_audio_gen [ 36%] Built target test_silent_backend [ 39%] Built target test_jittered_audio [ 39%] Building CXX object CMakeFiles/gpu.dir/src/effects/heptagon_effect.cc.o [ 42%] Built target test_wav_dump [ 44%] Built target test_tracker_timing [ 44%] Building CXX object CMakeFiles/gpu.dir/src/effects/particles_effect.cc.o [ 45%] Building CXX object CMakeFiles/gpu.dir/src/effects/passthrough_effect.cc.o [ 47%] Built target test_variable_tempo [ 50%] Built target test_audio_engine [ 52%] Built target test_tracker [ 52%] Building CXX object CMakeFiles/gpu.dir/src/effects/moving_ellipse_effect.cc.o [ 52%] Building CXX object CMakeFiles/gpu.dir/src/effects/particle_spray_effect.cc.o [ 52%] Building CXX object CMakeFiles/gpu.dir/src/effects/gaussian_blur_effect.cc.o [ 54%] Built target test_spectool [ 55%] Building CXX object CMakeFiles/gpu.dir/src/effects/solarize_effect.cc.o [ 55%] Building CXX object CMakeFiles/gpu.dir/src/effects/scene1_effect.cc.o [ 55%] Building CXX object CMakeFiles/gpu.dir/src/effects/chroma_aberration_effect.cc.o [ 55%] Building CXX object CMakeFiles/gpu.dir/src/gpu/shaders.cc.o [ 57%] Building CXX object CMakeFiles/gpu.dir/src/effects/vignette_effect.cc.o [ 57%] Building CXX object CMakeFiles/gpu.dir/src/gpu/post_process_helper.cc.o [ 57%] Linking CXX static library libgpu.a [ 60%] Built target gpu [ 60%] Linking CXX executable test_uniform_helper [ 60%] Linking CXX executable test_shader_composer [ 60%] Building CXX object CMakeFiles/test_sequence.dir/src/tests/assets/test_sequence.cc.o [ 61%] Linking CXX executable test_noise_functions [ 62%] Linking CXX executable test_shader_compilation [ 62%] Building CXX object CMakeFiles/test_demo.dir/src/app/test_demo.cc.o [ 62%] Building CXX object CMakeFiles/demo64k.dir/src/app/main.cc.o [ 62%] Building CXX object CMakeFiles/test_3d_render.dir/src/generated/timeline.cc.o [ 63%] Built target test_uniform_helper [ 64%] Built target test_shader_composer [ 64%] Building CXX object CMakeFiles/test_3d_physics.dir/src/generated/timeline.cc.o [ 65%] Built target test_noise_functions [ 66%] Built target test_shader_compilation [ 67%] Building CXX object CMakeFiles/test_mesh.dir/src/generated/timeline.cc.o [ 67%] Building CXX object CMakeFiles/test_effect_base.dir/src/tests/gpu/test_effect_base.cc.o [ 67%] Building CXX object CMakeFiles/test_demo_effects.dir/src/tests/gpu/test_demo_effects.cc.o [ 67%] Building CXX object CMakeFiles/test_sequence.dir/src/generated/timeline.cc.o [ 68%] Building CXX object CMakeFiles/test_demo.dir/src/generated/test_demo_timeline.cc.o [ 68%] Building CXX object CMakeFiles/demo64k.dir/src/generated/timeline.cc.o [ 68%] Linking CXX executable test_3d_render [ 68%] Building CXX object CMakeFiles/test_effect_base.dir/src/generated/timeline.cc.o [ 68%] Linking CXX executable test_3d_physics [ 68%] Linking CXX executable test_mesh [ 71%] Built target test_3d_render [ 71%] Building CXX object CMakeFiles/test_post_process_helper.dir/src/tests/gpu/test_post_process_helper.cc.o [ 72%] Building CXX object CMakeFiles/test_demo_effects.dir/src/generated/timeline.cc.o [ 72%] Linking CXX executable test_demo [ 75%] Built target test_3d_physics [ 77%] Built target test_mesh [ 77%] Linking CXX executable test_texture_manager [ 78%] Linking CXX executable test_sequence [ 78%] Linking CXX executable test_gpu_procedural [ 80%] Built target test_demo [ 81%] Linking CXX executable test_gpu_composite [ 81%] Linking CXX executable demo64k [ 83%] Built target test_sequence [ 85%] Built target test_texture_manager [ 86%] Built target test_gpu_procedural [ 86%] Linking CXX executable test_post_process_helper [ 86%] Linking CXX executable test_effect_base [ 87%] Built target test_gpu_composite [ 90%] Built target demo64k [ 92%] Built target test_post_process_helper [ 96%] Built target test_effect_base [ 96%] Linking CXX executable test_demo_effects [100%] Built target test_demo_effects Running test suite... Test project /Users/skal/demo/build Start 1: HammingWindowTest 1/34 Test #1: HammingWindowTest ................ Passed 0.00 sec Start 2: MathUtilsTest 2/34 Test #2: MathUtilsTest .................... Passed 0.00 sec Start 3: FileWatcherTest 3/34 Test #3: FileWatcherTest .................. Passed 0.00 sec Start 4: SynthEngineTest 4/34 Test #4: SynthEngineTest .................. Passed 0.00 sec Start 5: DctTest 5/34 Test #5: DctTest .......................... Passed 0.00 sec Start 6: FftTest 6/34 Test #6: FftTest .......................... Passed 0.01 sec Start 7: SpectralBrushTest 7/34 Test #7: SpectralBrushTest ................ Passed 0.01 sec Start 8: AudioGenTest 8/34 Test #8: AudioGenTest ..................... Passed 0.00 sec Start 9: AudioBackendTest 9/34 Test #9: AudioBackendTest ................. Passed 0.00 sec Start 10: SilentBackendTest 10/34 Test #10: SilentBackendTest ................ Passed 0.00 sec Start 11: MockAudioBackendTest 11/34 Test #11: MockAudioBackendTest ............. Passed 0.00 sec Start 12: WavDumpBackendTest 12/34 Test #12: WavDumpBackendTest ............... Passed 0.00 sec Start 13: JitteredAudioBackendTest 13/34 Test #13: JitteredAudioBackendTest ......... Passed 0.00 sec Start 14: TrackerTimingTest 14/34 Test #14: TrackerTimingTest ................ Passed 0.00 sec Start 15: VariableTempoTest 15/34 Test #15: VariableTempoTest ................ Passed 0.00 sec Start 16: TrackerSystemTest 16/34 Test #16: TrackerSystemTest ................ Passed 0.01 sec Start 17: AudioEngineTest 17/34 Test #17: AudioEngineTest .................. Passed 0.00 sec Start 18: ShaderAssetValidation 18/34 Test #18: ShaderAssetValidation ............ Passed 0.00 sec Start 19: ShaderCompilationTest 19/34 Test #19: ShaderCompilationTest ............ Passed 0.02 sec Start 20: NoiseFunctionsTest 20/34 Test #20: NoiseFunctionsTest ............... Passed 0.01 sec Start 21: UniformHelperTest 21/34 Test #21: UniformHelperTest ................ Passed 0.00 sec Start 22: AssetManagerTest 22/34 Test #22: AssetManagerTest ................. Passed 0.01 sec Start 23: SequenceSystemTest 23/34 Test #23: SequenceSystemTest ............... Passed 0.01 sec Start 24: ProceduralGenTest 24/34 Test #24: ProceduralGenTest ................ Passed 0.01 sec Start 25: PhysicsTest 25/34 Test #25: PhysicsTest ...................... Passed 0.01 sec Start 26: ThreeDSystemTest 26/34 Test #26: ThreeDSystemTest ................. Passed 0.00 sec Start 27: ShaderComposerTest 27/34 Test #27: ShaderComposerTest ............... Passed 0.01 sec Start 28: SceneLoaderTest 28/34 Test #28: SceneLoaderTest .................. Passed 0.01 sec Start 29: EffectBaseTest 29/34 Test #29: EffectBaseTest ................... Passed 0.04 sec Start 30: DemoEffectsTest 30/34 Test #30: DemoEffectsTest .................. Passed 0.03 sec Start 31: PostProcessHelperTest 31/34 Test #31: PostProcessHelperTest ............ Passed 0.02 sec Start 32: TextureManagerTest 32/34 Test #32: TextureManagerTest ............... Passed 0.02 sec Start 33: GpuProceduralTest 33/34 Test #33: GpuProceduralTest ................ Passed 0.18 sec Start 34: GpuCompositeTest 34/34 Test #34: GpuCompositeTest ................. Passed 0.20 sec 100% tests passed, 0 tests failed out of 34 Label Time Summary: 3d = 0.01 sec*proc (3 tests) assets = 0.02 sec*proc (2 tests) audio = 0.07 sec*proc (15 tests) gpu = 0.54 sec*proc (11 tests) util = 0.01 sec*proc (3 tests) Total Test time (real) = 0.67 sec Verifying tools compile... [ 9%] Built target procedural [ 18%] Built target tracker_compiler_host [ 18%] Built target tracker_compiler [ 18%] Built target generate_tracker_music [ 18%] Built target asset_packer [ 27%] Built target generate_demo_assets [ 27%] Built target generate_test_assets [ 36%] Built target util [ 81%] Built target audio [100%] Built target test_spectool --- Running Windows Cross-Compilation Build --- Building native tools... -- -- Build Configuration: -- DEMO_SIZE_OPT: OFF -- DEMO_STRIP_ALL: OFF -- DEMO_FINAL_STRIP: OFF -- DEMO_STRIP_EXTERNAL_LIBS: OFF -- DEMO_BUILD_TESTS: OFF -- DEMO_BUILD_TOOLS: OFF -- DEMO_ENABLE_COVERAGE: OFF -- DEMO_ENABLE_DEBUG_LOGS: OFF -- DEMO_HEADLESS: OFF -- DEMO_WORKSPACE: main -- -- Loaded workspace: Main Demo -- Timeline: timeline.seq -- Music: pop_punk_drums.track -- Assets: assets.txt -- Using workspace: main -- Configuring done (0.0s) -- Generating done (0.0s) -- Build files have been written to: /Users/skal/demo/build_native [ 50%] Built target procedural [100%] Built target asset_packer [100%] Built target seq_compiler [100%] Built target tracker_compiler_host Cross-compiling for Windows... -- -- Build Configuration: -- DEMO_SIZE_OPT: ON -- DEMO_STRIP_ALL: ON -- DEMO_FINAL_STRIP: OFF -- DEMO_STRIP_EXTERNAL_LIBS: OFF -- DEMO_BUILD_TESTS: OFF -- DEMO_BUILD_TOOLS: OFF -- DEMO_ENABLE_COVERAGE: OFF -- DEMO_ENABLE_DEBUG_LOGS: OFF -- DEMO_HEADLESS: OFF -- DEMO_WORKSPACE: main -- -- Loaded workspace: Main Demo -- Timeline: timeline.seq -- Music: pop_punk_drums.track -- Assets: assets.txt -- Using workspace: main -- Configuring done (0.0s) -- Generating done (0.0s) -- Build files have been written to: /Users/skal/demo/build_win [ 2%] Built target validate_uniforms_script [ 2%] Built target generate_timeline [ 4%] Built target generate_test_demo_timeline [ 4%] Built target generate_demo_assets [ 4%] Built target generate_test_assets [ 6%] Built target procedural [ 9%] Built target tracker_compiler_host [ 10%] Validating uniform buffer sizes and alignments... [ 11%] Built target generate_tracker_music [ 13%] Built target generate_test_demo_music [ 16%] Built target util [ 28%] Built target 3d [ 45%] Built target audio [ 49%] Building CXX object CMakeFiles/gpu.dir/src/effects/heptagon_effect.cc.obj [ 52%] Building CXX object CMakeFiles/gpu.dir/src/effects/gaussian_blur_effect.cc.obj [ 54%] Building CXX object CMakeFiles/gpu.dir/src/effects/particles_effect.cc.obj [ 54%] Building CXX object CMakeFiles/gpu.dir/src/effects/moving_ellipse_effect.cc.obj [ 54%] Building CXX object CMakeFiles/gpu.dir/src/gpu/effect.cc.obj [ 54%] Building CXX object CMakeFiles/gpu.dir/src/effects/passthrough_effect.cc.obj [ 54%] Building CXX object CMakeFiles/gpu.dir/src/effects/particle_spray_effect.cc.obj Validation Warning for 'CommonPostProcessUniforms': Matching WGSL struct not found. Validation OK for 'FadeParams': Size 16 matches C++ expected size. Validation OK for 'ThemeModulationParams': Size 16 matches C++ expected size. Validation OK for 'GaussianBlurParams': Size 8 matches C++ expected size. Validation OK for 'DistortParams': Size 8 matches C++ expected size. Validation OK for 'CircleMaskParams': Size 16 matches C++ expected size. [ 54%] Built target validate_uniforms [ 55%] Building CXX object CMakeFiles/gpu.dir/src/effects/solarize_effect.cc.obj [ 57%] Building CXX object CMakeFiles/gpu.dir/src/effects/scene1_effect.cc.obj [ 57%] Building CXX object CMakeFiles/gpu.dir/src/effects/chroma_aberration_effect.cc.obj [ 58%] Building CXX object CMakeFiles/gpu.dir/src/effects/vignette_effect.cc.obj [ 59%] Building CXX object CMakeFiles/gpu.dir/src/gpu/post_process_helper.cc.obj [ 60%] Building CXX object CMakeFiles/gpu.dir/src/gpu/shaders.cc.obj [ 62%] Linking CXX static library libgpu.a [ 77%] Built target gpu [ 79%] Building CXX object CMakeFiles/demo64k.dir/src/app/main.cc.obj [ 79%] Building CXX object CMakeFiles/test_demo.dir/src/app/test_demo.cc.obj [ 80%] Building CXX object CMakeFiles/demo64k.dir/src/generated/timeline.cc.obj [ 81%] Building CXX object CMakeFiles/test_demo.dir/src/generated/test_demo_timeline.cc.obj [ 82%] Linking CXX executable test_demo.exe [ 90%] Built target test_demo [ 91%] Linking CXX executable demo64k.exe [100%] Built target demo64k Copying MinGW DLLs... Crunching build_win/demo64k.exe... Ultimate Packer for eXecutables Copyright (C) 1996 - 2026 UPX 5.1.0 Markus Oberhumer, Laszlo Molnar & John Reiser Jan 7th 2026 File size Ratio Format Name -------------------- ------ ----------- ----------- 7036416 -> 4680704 66.52% win64/pe demo64k_packed.exe Packed 1 file. ------------------------------------------------ Size Report: -rwxr-xr-x 1 skal 89939 6.7M Feb 14 14:55 build_win/demo64k.exe -rwxr-xr-x 1 skal 89939 6.7M Feb 14 14:55 build_win/demo64k_stripped.exe -rwxr-xr-x 1 skal 89939 4.5M Feb 14 14:55 build_win/demo64k_packed.exe ------------------------------------------------ Top 20 Largest Symbols (from unstripped): ------------------------------------------------ Build complete. Output: build_win/demo64k.exe All checks passed successfully. script completes without errors. This change streamlines the project's architecture without altering any functionality.
Diffstat (limited to 'src/effects/cnn_v2_effect.cc')
-rw-r--r--src/effects/cnn_v2_effect.cc463
1 files changed, 463 insertions, 0 deletions
diff --git a/src/effects/cnn_v2_effect.cc b/src/effects/cnn_v2_effect.cc
new file mode 100644
index 0000000..4c10ed1
--- /dev/null
+++ b/src/effects/cnn_v2_effect.cc
@@ -0,0 +1,463 @@
+// CNN v2 Effect Implementation
+
+#include "effects/cnn_v2_effect.h"
+
+#if defined(USE_TEST_ASSETS)
+#include "test_assets.h"
+#else
+#include "generated/assets.h"
+#endif
+
+#include "gpu/bind_group_builder.h"
+#include "gpu/gpu.h"
+#include "util/asset_manager.h"
+#include "util/fatal_error.h"
+#include <cstring>
+
+CNNv2Effect::CNNv2Effect(const GpuContext& ctx)
+ : PostProcessEffect(ctx),
+ static_pipeline_(nullptr),
+ static_bind_group_(nullptr),
+ static_params_buffer_(nullptr),
+ static_features_tex_(nullptr),
+ static_features_view_(nullptr),
+ linear_sampler_(nullptr),
+ layer_pipeline_(nullptr),
+ weights_buffer_(nullptr),
+ input_mip_tex_(nullptr),
+ current_input_view_(nullptr),
+ blend_amount_(1.0f),
+ mip_level_(0),
+ initialized_(false) {
+ std::memset(input_mip_view_, 0, sizeof(input_mip_view_));
+}
+
+CNNv2Effect::CNNv2Effect(const GpuContext& ctx, const CNNv2EffectParams& params)
+ : PostProcessEffect(ctx),
+ static_pipeline_(nullptr),
+ static_bind_group_(nullptr),
+ static_params_buffer_(nullptr),
+ static_features_tex_(nullptr),
+ static_features_view_(nullptr),
+ linear_sampler_(nullptr),
+ layer_pipeline_(nullptr),
+ weights_buffer_(nullptr),
+ input_mip_tex_(nullptr),
+ current_input_view_(nullptr),
+ blend_amount_(params.blend_amount),
+ mip_level_(0),
+ initialized_(false) {
+ std::memset(input_mip_view_, 0, sizeof(input_mip_view_));
+}
+
+CNNv2Effect::~CNNv2Effect() {
+ cleanup();
+}
+
+void CNNv2Effect::init(MainSequence* demo) {
+ (void)demo;
+ if (initialized_) return;
+
+ load_weights();
+ create_textures();
+ create_pipelines();
+
+ initialized_ = true;
+}
+
+void CNNv2Effect::resize(int width, int height) {
+ PostProcessEffect::resize(width, height);
+ cleanup();
+ create_textures();
+ create_pipelines();
+}
+
+void CNNv2Effect::load_weights() {
+ // Load binary weights asset
+ size_t weights_size = 0;
+ const uint8_t* weights_data = (const uint8_t*)GetAsset(AssetId::ASSET_WEIGHTS_CNN_V2, &weights_size);
+
+ if (!weights_data || weights_size < 20) {
+ // Weights not available - effect will skip
+ return;
+ }
+
+ // Parse header
+ const uint32_t* header = (const uint32_t*)weights_data;
+ uint32_t magic = header[0];
+ uint32_t version = header[1];
+ uint32_t num_layers = header[2];
+ uint32_t total_weights = header[3];
+
+ FATAL_CHECK(magic != 0x324e4e43, "Invalid CNN v2 weights magic\n"); // 'CNN2'
+
+ // Support both version 1 (16-byte header) and version 2 (20-byte header with mip_level)
+ // TODO: Version 3 should include feature descriptor for arbitrary layout/ordering
+ if (version == 1) {
+ mip_level_ = 0; // Default for v1
+ } else if (version == 2) {
+ mip_level_ = header[4];
+ } else {
+ FATAL_ERROR("Unsupported CNN v2 weights version: %u\n", version);
+ }
+
+ // Parse layer info (20 bytes per layer)
+ // Offset depends on version: v1=16 bytes (4 u32), v2=20 bytes (5 u32)
+ const uint32_t header_u32_count = (version == 1) ? 4 : 5;
+ const uint32_t* layer_data = header + header_u32_count;
+ for (uint32_t i = 0; i < num_layers; ++i) {
+ LayerInfo info;
+ info.kernel_size = layer_data[i * 5 + 0];
+ info.in_channels = layer_data[i * 5 + 1];
+ info.out_channels = layer_data[i * 5 + 2];
+ info.weight_offset = layer_data[i * 5 + 3];
+ info.weight_count = layer_data[i * 5 + 4];
+ layer_info_.push_back(info);
+ }
+
+ // Create GPU storage buffer for weights (skip header + layer info, upload only weights)
+ size_t header_size = 20; // 5 u32
+ size_t layer_info_size = 20 * num_layers; // 5 u32 per layer
+ size_t weights_offset = header_size + layer_info_size;
+ size_t weights_only_size = weights_size - weights_offset;
+
+ WGPUBufferDescriptor buffer_desc = {};
+ buffer_desc.size = weights_only_size;
+ buffer_desc.usage = WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst;
+ buffer_desc.mappedAtCreation = false;
+
+ weights_buffer_ = wgpuDeviceCreateBuffer(ctx_.device, &buffer_desc);
+
+ // Upload only weights (skip header + layer info)
+ wgpuQueueWriteBuffer(ctx_.queue, weights_buffer_, 0, weights_data + weights_offset, weights_only_size);
+
+ // Create uniform buffers for layer params (one per layer)
+ for (uint32_t i = 0; i < num_layers; ++i) {
+ WGPUBufferDescriptor params_desc = {};
+ params_desc.size = sizeof(LayerParams);
+ params_desc.usage = WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst;
+ params_desc.mappedAtCreation = false;
+
+ WGPUBuffer buf = wgpuDeviceCreateBuffer(ctx_.device, &params_desc);
+ layer_params_buffers_.push_back(buf);
+ }
+}
+
+void CNNv2Effect::create_textures() {
+ // Static features texture (8×f16 packed as 4×u32)
+ TextureWithView static_tex = gpu_create_storage_texture_2d(
+ ctx_.device, width_, height_, WGPUTextureFormat_RGBA32Uint);
+ static_features_tex_ = static_tex.texture;
+ static_features_view_ = static_tex.view;
+
+ // Input texture with mips (for multi-scale features)
+ TextureWithView input_mip = gpu_create_texture_2d(
+ ctx_.device, width_, height_, WGPUTextureFormat_RGBA8Unorm,
+ (WGPUTextureUsage)(WGPUTextureUsage_TextureBinding | WGPUTextureUsage_CopyDst), 3);
+ input_mip_tex_ = input_mip.texture;
+
+ for (int i = 0; i < 3; ++i) {
+ input_mip_view_[i] =
+ gpu_create_mip_view(input_mip_tex_, WGPUTextureFormat_RGBA8Unorm, i);
+ }
+
+ // Create 2 layer textures (ping-pong buffers for intermediate results)
+ // Each stores 8×f16 channels packed as 4×u32
+ for (int i = 0; i < 2; ++i) {
+ TextureWithView layer = gpu_create_storage_texture_2d(
+ ctx_.device, width_, height_, WGPUTextureFormat_RGBA32Uint);
+ layer_textures_.push_back(layer.texture);
+ layer_views_.push_back(layer.view);
+ }
+
+ // Create uniform buffer for static feature params
+ WGPUBufferDescriptor params_desc = {};
+ params_desc.size = sizeof(StaticFeatureParams);
+ params_desc.usage = WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst;
+ params_desc.mappedAtCreation = false;
+ static_params_buffer_ = wgpuDeviceCreateBuffer(ctx_.device, &params_desc);
+}
+
+void CNNv2Effect::create_pipelines() {
+ // Create linear sampler for bilinear interpolation
+ WGPUSamplerDescriptor sampler_desc = {};
+ sampler_desc.addressModeU = WGPUAddressMode_ClampToEdge;
+ sampler_desc.addressModeV = WGPUAddressMode_ClampToEdge;
+ sampler_desc.addressModeW = WGPUAddressMode_ClampToEdge;
+ sampler_desc.magFilter = WGPUFilterMode_Linear;
+ sampler_desc.minFilter = WGPUFilterMode_Linear;
+ sampler_desc.mipmapFilter = WGPUMipmapFilterMode_Linear;
+ sampler_desc.lodMinClamp = 0.0f;
+ sampler_desc.lodMaxClamp = 32.0f;
+ sampler_desc.maxAnisotropy = 1;
+
+ linear_sampler_ = wgpuDeviceCreateSampler(ctx_.device, &sampler_desc);
+
+ // Static features compute pipeline
+ size_t shader_size = 0;
+ const char* static_code = (const char*)GetAsset(AssetId::ASSET_SHADER_CNN_V2_STATIC, &shader_size);
+
+ if (!static_code || shader_size == 0) {
+ // Shader not available (e.g., in test mode) - skip pipeline creation
+ return;
+ }
+
+ WGPUShaderSourceWGSL wgsl_src = {};
+ wgsl_src.chain.sType = WGPUSType_ShaderSourceWGSL;
+ wgsl_src.code = str_view(static_code);
+
+ WGPUShaderModuleDescriptor shader_desc = {};
+ shader_desc.nextInChain = &wgsl_src.chain;
+
+ // Create bind group layout for static features compute
+ // Bindings: 0=input_tex, 1=input_mip1, 2=input_mip2, 3=depth_tex, 4=output, 5=params, 6=linear_sampler
+ WGPUBindGroupLayout static_bgl =
+ BindGroupLayoutBuilder()
+ .texture(0, WGPUShaderStage_Compute)
+ .texture(1, WGPUShaderStage_Compute)
+ .texture(2, WGPUShaderStage_Compute)
+ .texture(3, WGPUShaderStage_Compute)
+ .storage_texture(4, WGPUShaderStage_Compute,
+ WGPUTextureFormat_RGBA32Uint)
+ .uniform(5, WGPUShaderStage_Compute, sizeof(StaticFeatureParams))
+ .sampler(6, WGPUShaderStage_Compute)
+ .build(ctx_.device);
+
+ // Update pipeline layout
+ WGPUPipelineLayoutDescriptor pl_desc = {};
+ pl_desc.bindGroupLayoutCount = 1;
+ pl_desc.bindGroupLayouts = &static_bgl;
+ WGPUPipelineLayout pipeline_layout = wgpuDeviceCreatePipelineLayout(ctx_.device, &pl_desc);
+
+ // Recreate pipeline with proper layout
+ WGPUComputePipelineDescriptor pipeline_desc2 = {};
+ pipeline_desc2.compute.module = wgpuDeviceCreateShaderModule(ctx_.device, &shader_desc);
+ pipeline_desc2.compute.entryPoint = str_view("main");
+ pipeline_desc2.layout = pipeline_layout;
+
+ if (static_pipeline_) wgpuComputePipelineRelease(static_pipeline_);
+ static_pipeline_ = wgpuDeviceCreateComputePipeline(ctx_.device, &pipeline_desc2);
+
+ wgpuShaderModuleRelease(pipeline_desc2.compute.module);
+ wgpuPipelineLayoutRelease(pipeline_layout);
+ wgpuBindGroupLayoutRelease(static_bgl);
+
+ // CNN layer compute pipeline (storage buffer version)
+ if (layer_info_.empty()) return; // No weights loaded
+
+ size_t layer_shader_size = 0;
+ const char* layer_code = (const char*)GetAsset(AssetId::ASSET_SHADER_CNN_V2_COMPUTE, &layer_shader_size);
+
+ if (!layer_code || layer_shader_size == 0) return;
+
+ WGPUShaderSourceWGSL layer_wgsl = {};
+ layer_wgsl.chain.sType = WGPUSType_ShaderSourceWGSL;
+ layer_wgsl.code = str_view(layer_code);
+
+ WGPUShaderModuleDescriptor layer_shader_desc = {};
+ layer_shader_desc.nextInChain = &layer_wgsl.chain;
+
+ WGPUShaderModule layer_module = wgpuDeviceCreateShaderModule(ctx_.device, &layer_shader_desc);
+ if (!layer_module) return;
+
+ // Create bind group layout for layer compute
+ // 0=static_features, 1=layer_input, 2=output, 3=weights, 4=params, 5=original_input
+ WGPUBindGroupLayout layer_bgl =
+ BindGroupLayoutBuilder()
+ .uint_texture(0, WGPUShaderStage_Compute)
+ .uint_texture(1, WGPUShaderStage_Compute)
+ .storage_texture(2, WGPUShaderStage_Compute,
+ WGPUTextureFormat_RGBA32Uint)
+ .storage(3, WGPUShaderStage_Compute)
+ .uniform(4, WGPUShaderStage_Compute, sizeof(LayerParams))
+ .texture(5, WGPUShaderStage_Compute)
+ .build(ctx_.device);
+
+ WGPUPipelineLayoutDescriptor layer_pl_desc = {};
+ layer_pl_desc.bindGroupLayoutCount = 1;
+ layer_pl_desc.bindGroupLayouts = &layer_bgl;
+
+ WGPUPipelineLayout layer_pipeline_layout = wgpuDeviceCreatePipelineLayout(ctx_.device, &layer_pl_desc);
+
+ WGPUComputePipelineDescriptor layer_pipeline_desc = {};
+ layer_pipeline_desc.compute.module = layer_module;
+ layer_pipeline_desc.compute.entryPoint = str_view("main");
+ layer_pipeline_desc.layout = layer_pipeline_layout;
+
+ layer_pipeline_ = wgpuDeviceCreateComputePipeline(ctx_.device, &layer_pipeline_desc);
+
+ wgpuShaderModuleRelease(layer_module);
+ wgpuPipelineLayoutRelease(layer_pipeline_layout);
+ wgpuBindGroupLayoutRelease(layer_bgl);
+}
+
+void CNNv2Effect::update_bind_group(WGPUTextureView input_view) {
+ if (!static_pipeline_) return;
+
+ // Cache input view
+ current_input_view_ = input_view;
+
+ // Release old bind group
+ if (static_bind_group_) {
+ wgpuBindGroupRelease(static_bind_group_);
+ static_bind_group_ = nullptr;
+ }
+
+ // Create bind group for static features compute (manual for storage texture binding)
+ WGPUBindGroupEntry bg_entries[7] = {};
+ bg_entries[0].binding = 0;
+ bg_entries[0].textureView = input_view;
+ bg_entries[1].binding = 1;
+ bg_entries[1].textureView = input_mip_view_[0];
+ bg_entries[2].binding = 2;
+ bg_entries[2].textureView =
+ input_mip_view_[1] ? input_mip_view_[1] : input_mip_view_[0];
+ bg_entries[3].binding = 3;
+ bg_entries[3].textureView = input_view;
+ bg_entries[4].binding = 4;
+ bg_entries[4].textureView = static_features_view_;
+ bg_entries[5].binding = 5;
+ bg_entries[5].buffer = static_params_buffer_;
+ bg_entries[5].size = sizeof(StaticFeatureParams);
+ bg_entries[6].binding = 6;
+ bg_entries[6].sampler = linear_sampler_;
+
+ WGPUBindGroupLayout layout =
+ wgpuComputePipelineGetBindGroupLayout(static_pipeline_, 0);
+ WGPUBindGroupDescriptor bg_desc = {};
+ bg_desc.layout = layout;
+ bg_desc.entryCount = 7;
+ bg_desc.entries = bg_entries;
+ static_bind_group_ = wgpuDeviceCreateBindGroup(ctx_.device, &bg_desc);
+ wgpuBindGroupLayoutRelease(layout);
+
+ // Create layer bind groups
+ if (!layer_pipeline_ || layer_info_.empty()) return;
+
+ // Release old layer bind groups
+ for (auto bg : layer_bind_groups_) {
+ wgpuBindGroupRelease(bg);
+ }
+ layer_bind_groups_.clear();
+
+ // Get bind group layout from layer pipeline
+ WGPUBindGroupLayout layer_bgl = wgpuComputePipelineGetBindGroupLayout(layer_pipeline_, 0);
+
+ // Create bind group for each layer
+ for (size_t i = 0; i < layer_info_.size(); ++i) {
+ WGPUTextureView layer_input =
+ (i == 0) ? static_features_view_ : layer_views_[i % 2];
+
+ WGPUBindGroup layer_bg =
+ BindGroupBuilder()
+ .texture(0, static_features_view_)
+ .texture(1, layer_input)
+ .texture(2, layer_views_[(i + 1) % 2])
+ .buffer(3, weights_buffer_, wgpuBufferGetSize(weights_buffer_))
+ .buffer(4, layer_params_buffers_[i], sizeof(LayerParams))
+ .texture(5, input_view)
+ .build(ctx_.device, layer_bgl);
+
+ layer_bind_groups_.push_back(layer_bg);
+ }
+
+ wgpuBindGroupLayoutRelease(layer_bgl);
+}
+
+void CNNv2Effect::compute(WGPUCommandEncoder encoder,
+ const CommonPostProcessUniforms& uniforms) {
+ if (!initialized_ || !static_pipeline_ || !static_bind_group_) return;
+
+ float effective_blend = blend_amount_;
+ if (beat_modulated_) {
+ effective_blend = blend_amount_ * uniforms.beat_phase * beat_scale_;
+ }
+
+ // Update static feature params
+ StaticFeatureParams static_params;
+ static_params.mip_level = mip_level_;
+ static_params.padding[0] = 0;
+ static_params.padding[1] = 0;
+ static_params.padding[2] = 0;
+ wgpuQueueWriteBuffer(ctx_.queue, static_params_buffer_, 0, &static_params, sizeof(static_params));
+
+ // Pass 1: Compute static features
+ WGPUComputePassEncoder pass = wgpuCommandEncoderBeginComputePass(encoder, nullptr);
+
+ wgpuComputePassEncoderSetPipeline(pass, static_pipeline_);
+ wgpuComputePassEncoderSetBindGroup(pass, 0, static_bind_group_, 0, nullptr);
+
+ // Dispatch workgroups (8×8 threads per group)
+ uint32_t workgroups_x = (width_ + 7) / 8;
+ uint32_t workgroups_y = (height_ + 7) / 8;
+ wgpuComputePassEncoderDispatchWorkgroups(pass, workgroups_x, workgroups_y, 1);
+
+ wgpuComputePassEncoderEnd(pass);
+ wgpuComputePassEncoderRelease(pass);
+
+ // Execute CNN layer passes
+ if (!layer_pipeline_ || layer_bind_groups_.empty()) return;
+
+ // Update layer params (each layer has own buffer)
+ for (size_t i = 0; i < layer_info_.size(); ++i) {
+ const LayerInfo& info = layer_info_[i];
+
+ LayerParams params;
+ params.kernel_size = info.kernel_size;
+ params.in_channels = info.in_channels;
+ params.out_channels = info.out_channels;
+ params.weight_offset = info.weight_offset;
+ params.is_output_layer = (i == layer_info_.size() - 1) ? 1 : 0;
+ params.blend_amount = effective_blend;
+ params.is_layer_0 = (i == 0) ? 1 : 0;
+
+ wgpuQueueWriteBuffer(ctx_.queue, layer_params_buffers_[i], 0, &params, sizeof(params));
+
+ WGPUComputePassEncoder layer_pass = wgpuCommandEncoderBeginComputePass(encoder, nullptr);
+
+ wgpuComputePassEncoderSetPipeline(layer_pass, layer_pipeline_);
+ wgpuComputePassEncoderSetBindGroup(layer_pass, 0, layer_bind_groups_[i], 0, nullptr);
+
+ wgpuComputePassEncoderDispatchWorkgroups(layer_pass, workgroups_x, workgroups_y, 1);
+
+ wgpuComputePassEncoderEnd(layer_pass);
+ wgpuComputePassEncoderRelease(layer_pass);
+ }
+}
+
+void CNNv2Effect::render(WGPURenderPassEncoder pass,
+ const CommonPostProcessUniforms& uniforms) {
+ (void)pass;
+ (void)uniforms;
+ // Compute-only effect, rendering is done by default composite pass
+}
+
+void CNNv2Effect::cleanup() {
+ if (static_features_view_) wgpuTextureViewRelease(static_features_view_);
+ if (static_features_tex_) wgpuTextureRelease(static_features_tex_);
+ if (static_bind_group_) wgpuBindGroupRelease(static_bind_group_);
+ if (static_params_buffer_) wgpuBufferRelease(static_params_buffer_);
+ if (static_pipeline_) wgpuComputePipelineRelease(static_pipeline_);
+ if (linear_sampler_) wgpuSamplerRelease(linear_sampler_);
+
+ if (layer_pipeline_) wgpuComputePipelineRelease(layer_pipeline_);
+ if (weights_buffer_) wgpuBufferRelease(weights_buffer_);
+ for (auto buf : layer_params_buffers_) wgpuBufferRelease(buf);
+ layer_params_buffers_.clear();
+
+ for (int i = 0; i < 3; ++i) {
+ if (input_mip_view_[i]) wgpuTextureViewRelease(input_mip_view_[i]);
+ }
+ if (input_mip_tex_) wgpuTextureRelease(input_mip_tex_);
+
+ for (auto view : layer_views_) wgpuTextureViewRelease(view);
+ for (auto tex : layer_textures_) wgpuTextureRelease(tex);
+ for (auto bg : layer_bind_groups_) wgpuBindGroupRelease(bg);
+
+ layer_views_.clear();
+ layer_textures_.clear();
+ layer_bind_groups_.clear();
+ layer_info_.clear();
+
+ initialized_ = false;
+}