diff options
| author | skal <pascal.massimino@gmail.com> | 2026-02-14 14:55:58 +0100 |
|---|---|---|
| committer | skal <pascal.massimino@gmail.com> | 2026-02-14 14:55:58 +0100 |
| commit | d6cc50eb49275bbc0de21d4c65a5172d5d65f790 (patch) | |
| tree | 642ffcb971bc58fd12ffa17bed5daeb00e4df8d0 /src/effects/cnn_v2_effect.cc | |
| parent | 7bb0b688de0d909828a58613c69dea85fa476400 (diff) | |
refactor(gpu): Relocate effects to src/effects and streamline includes
This refactoring improves the project's structure by decoupling visual effects from the core GPU module. All effect implementations have been moved from to a new top-level directory. Shared utilities like , , and have been consolidated into the parent directory.
- **Motivation**: To create a clearer separation of concerns, making the codebase easier to navigate and maintain. This move treats effects as a distinct layer that depends on the core GPU module, rather than being embedded within it.
- **Changes**:
- Created new directory.
- Moved all effect source files (, ) to .
- Moved shared helpers (, , ) to .
- Updated and to reflect the new file locations for all build targets.
- Corrected all directives across the entire codebase (, , ) to point to the new paths.
- Updated all markdown documentation ( files) to ensure file paths and architectural descriptions are accurate.
- Fixed several compiler errors related to incorrect enum casting () that were exposed during cross-compilation for Windows.
- **Verification**:
- The entire project builds successfully for both native and Windows cross-compilation targets.
- All 34 tests pass (Usage
ctest [options]).
- The --- Running Native Build & Tests ---
Configuring with all options enabled (tests + tools)...
--
-- Build Configuration:
-- DEMO_SIZE_OPT: ON
-- DEMO_STRIP_ALL: ON
-- DEMO_FINAL_STRIP: OFF
-- DEMO_STRIP_EXTERNAL_LIBS: OFF
-- DEMO_BUILD_TESTS: ON
-- DEMO_BUILD_TOOLS: ON
-- DEMO_ENABLE_COVERAGE: OFF
-- DEMO_ENABLE_DEBUG_LOGS: OFF
-- DEMO_HEADLESS: OFF
-- DEMO_WORKSPACE: main
--
-- Loaded workspace: Main Demo
-- Timeline: timeline.seq
-- Music: pop_punk_drums.track
-- Assets: assets.txt
-- Using workspace: main
-- Configuring done (0.0s)
-- Generating done (0.1s)
-- Build files have been written to: /Users/skal/demo/build
Building all targets (demo, tests, and tools)...
[ 0%] Built target validate_uniforms_script
[ 1%] Built target procedural
[ 2%] Validating uniform buffer sizes and alignments...
[ 3%] Built target tracker_compiler
[ 4%] Built target test_3d
[ 4%] Built target test_maths
[ 4%] Built target seq_compiler
[ 4%] Built target tracker_compiler_host
[ 5%] Built target asset_packer
[ 5%] Built target test_procedural
[ 6%] Compiling demo sequence from workspace main...
[ 6%] Built target generate_tracker_music
[ 6%] Built target generate_test_demo_music
[ 6%] Compiling test_demo sequence...
Using BPM: 90
Successfully generated timeline with 16 sequences.
Using BPM: 120
Demo end time: 16.000000s
Successfully generated timeline with 1 sequences.
[ 6%] Built target generate_test_demo_timeline
[ 6%] Built target generate_timeline
Validation Warning for 'CommonPostProcessUniforms': Matching WGSL struct not found.
Validation OK for 'FadeParams': Size 16 matches C++ expected size.
Validation OK for 'ThemeModulationParams': Size 16 matches C++ expected size.
Validation OK for 'GaussianBlurParams': Size 8 matches C++ expected size.
Validation OK for 'DistortParams': Size 8 matches C++ expected size.
Validation OK for 'CircleMaskParams': Size 16 matches C++ expected size.
[ 6%] Built target generate_test_assets
[ 7%] Built target generate_demo_assets
[ 7%] Built target validate_uniforms
[ 8%] Built target util
[ 10%] Built target test_assets
[ 11%] Built target test_shader_assets
[ 12%] Built target test_file_watcher
[ 15%] Built target 3d
[ 21%] Built target test_platform
[ 22%] Built target audio
[ 23%] Built target test_window
[ 26%] Built target test_fft
[ 27%] Built target test_synth
[ 27%] Built target test_spectral_brush
[ 27%] Built target test_physics
[ 28%] Built target test_dct
[ 31%] Building CXX object CMakeFiles/gpu.dir/src/gpu/effect.cc.o
[ 30%] Built target test_mock_backend
[ 32%] Built target test_scene_loader
[ 33%] Built target test_audio_backend
[ 34%] Built target test_audio_gen
[ 36%] Built target test_silent_backend
[ 39%] Built target test_jittered_audio
[ 39%] Building CXX object CMakeFiles/gpu.dir/src/effects/heptagon_effect.cc.o
[ 42%] Built target test_wav_dump
[ 44%] Built target test_tracker_timing
[ 44%] Building CXX object CMakeFiles/gpu.dir/src/effects/particles_effect.cc.o
[ 45%] Building CXX object CMakeFiles/gpu.dir/src/effects/passthrough_effect.cc.o
[ 47%] Built target test_variable_tempo
[ 50%] Built target test_audio_engine
[ 52%] Built target test_tracker
[ 52%] Building CXX object CMakeFiles/gpu.dir/src/effects/moving_ellipse_effect.cc.o
[ 52%] Building CXX object CMakeFiles/gpu.dir/src/effects/particle_spray_effect.cc.o
[ 52%] Building CXX object CMakeFiles/gpu.dir/src/effects/gaussian_blur_effect.cc.o
[ 54%] Built target test_spectool
[ 55%] Building CXX object CMakeFiles/gpu.dir/src/effects/solarize_effect.cc.o
[ 55%] Building CXX object CMakeFiles/gpu.dir/src/effects/scene1_effect.cc.o
[ 55%] Building CXX object CMakeFiles/gpu.dir/src/effects/chroma_aberration_effect.cc.o
[ 55%] Building CXX object CMakeFiles/gpu.dir/src/gpu/shaders.cc.o
[ 57%] Building CXX object CMakeFiles/gpu.dir/src/effects/vignette_effect.cc.o
[ 57%] Building CXX object CMakeFiles/gpu.dir/src/gpu/post_process_helper.cc.o
[ 57%] Linking CXX static library libgpu.a
[ 60%] Built target gpu
[ 60%] Linking CXX executable test_uniform_helper
[ 60%] Linking CXX executable test_shader_composer
[ 60%] Building CXX object CMakeFiles/test_sequence.dir/src/tests/assets/test_sequence.cc.o
[ 61%] Linking CXX executable test_noise_functions
[ 62%] Linking CXX executable test_shader_compilation
[ 62%] Building CXX object CMakeFiles/test_demo.dir/src/app/test_demo.cc.o
[ 62%] Building CXX object CMakeFiles/demo64k.dir/src/app/main.cc.o
[ 62%] Building CXX object CMakeFiles/test_3d_render.dir/src/generated/timeline.cc.o
[ 63%] Built target test_uniform_helper
[ 64%] Built target test_shader_composer
[ 64%] Building CXX object CMakeFiles/test_3d_physics.dir/src/generated/timeline.cc.o
[ 65%] Built target test_noise_functions
[ 66%] Built target test_shader_compilation
[ 67%] Building CXX object CMakeFiles/test_mesh.dir/src/generated/timeline.cc.o
[ 67%] Building CXX object CMakeFiles/test_effect_base.dir/src/tests/gpu/test_effect_base.cc.o
[ 67%] Building CXX object CMakeFiles/test_demo_effects.dir/src/tests/gpu/test_demo_effects.cc.o
[ 67%] Building CXX object CMakeFiles/test_sequence.dir/src/generated/timeline.cc.o
[ 68%] Building CXX object CMakeFiles/test_demo.dir/src/generated/test_demo_timeline.cc.o
[ 68%] Building CXX object CMakeFiles/demo64k.dir/src/generated/timeline.cc.o
[ 68%] Linking CXX executable test_3d_render
[ 68%] Building CXX object CMakeFiles/test_effect_base.dir/src/generated/timeline.cc.o
[ 68%] Linking CXX executable test_3d_physics
[ 68%] Linking CXX executable test_mesh
[ 71%] Built target test_3d_render
[ 71%] Building CXX object CMakeFiles/test_post_process_helper.dir/src/tests/gpu/test_post_process_helper.cc.o
[ 72%] Building CXX object CMakeFiles/test_demo_effects.dir/src/generated/timeline.cc.o
[ 72%] Linking CXX executable test_demo
[ 75%] Built target test_3d_physics
[ 77%] Built target test_mesh
[ 77%] Linking CXX executable test_texture_manager
[ 78%] Linking CXX executable test_sequence
[ 78%] Linking CXX executable test_gpu_procedural
[ 80%] Built target test_demo
[ 81%] Linking CXX executable test_gpu_composite
[ 81%] Linking CXX executable demo64k
[ 83%] Built target test_sequence
[ 85%] Built target test_texture_manager
[ 86%] Built target test_gpu_procedural
[ 86%] Linking CXX executable test_post_process_helper
[ 86%] Linking CXX executable test_effect_base
[ 87%] Built target test_gpu_composite
[ 90%] Built target demo64k
[ 92%] Built target test_post_process_helper
[ 96%] Built target test_effect_base
[ 96%] Linking CXX executable test_demo_effects
[100%] Built target test_demo_effects
Running test suite...
Test project /Users/skal/demo/build
Start 1: HammingWindowTest
1/34 Test #1: HammingWindowTest ................ Passed 0.00 sec
Start 2: MathUtilsTest
2/34 Test #2: MathUtilsTest .................... Passed 0.00 sec
Start 3: FileWatcherTest
3/34 Test #3: FileWatcherTest .................. Passed 0.00 sec
Start 4: SynthEngineTest
4/34 Test #4: SynthEngineTest .................. Passed 0.00 sec
Start 5: DctTest
5/34 Test #5: DctTest .......................... Passed 0.00 sec
Start 6: FftTest
6/34 Test #6: FftTest .......................... Passed 0.01 sec
Start 7: SpectralBrushTest
7/34 Test #7: SpectralBrushTest ................ Passed 0.01 sec
Start 8: AudioGenTest
8/34 Test #8: AudioGenTest ..................... Passed 0.00 sec
Start 9: AudioBackendTest
9/34 Test #9: AudioBackendTest ................. Passed 0.00 sec
Start 10: SilentBackendTest
10/34 Test #10: SilentBackendTest ................ Passed 0.00 sec
Start 11: MockAudioBackendTest
11/34 Test #11: MockAudioBackendTest ............. Passed 0.00 sec
Start 12: WavDumpBackendTest
12/34 Test #12: WavDumpBackendTest ............... Passed 0.00 sec
Start 13: JitteredAudioBackendTest
13/34 Test #13: JitteredAudioBackendTest ......... Passed 0.00 sec
Start 14: TrackerTimingTest
14/34 Test #14: TrackerTimingTest ................ Passed 0.00 sec
Start 15: VariableTempoTest
15/34 Test #15: VariableTempoTest ................ Passed 0.00 sec
Start 16: TrackerSystemTest
16/34 Test #16: TrackerSystemTest ................ Passed 0.01 sec
Start 17: AudioEngineTest
17/34 Test #17: AudioEngineTest .................. Passed 0.00 sec
Start 18: ShaderAssetValidation
18/34 Test #18: ShaderAssetValidation ............ Passed 0.00 sec
Start 19: ShaderCompilationTest
19/34 Test #19: ShaderCompilationTest ............ Passed 0.02 sec
Start 20: NoiseFunctionsTest
20/34 Test #20: NoiseFunctionsTest ............... Passed 0.01 sec
Start 21: UniformHelperTest
21/34 Test #21: UniformHelperTest ................ Passed 0.00 sec
Start 22: AssetManagerTest
22/34 Test #22: AssetManagerTest ................. Passed 0.01 sec
Start 23: SequenceSystemTest
23/34 Test #23: SequenceSystemTest ............... Passed 0.01 sec
Start 24: ProceduralGenTest
24/34 Test #24: ProceduralGenTest ................ Passed 0.01 sec
Start 25: PhysicsTest
25/34 Test #25: PhysicsTest ...................... Passed 0.01 sec
Start 26: ThreeDSystemTest
26/34 Test #26: ThreeDSystemTest ................. Passed 0.00 sec
Start 27: ShaderComposerTest
27/34 Test #27: ShaderComposerTest ............... Passed 0.01 sec
Start 28: SceneLoaderTest
28/34 Test #28: SceneLoaderTest .................. Passed 0.01 sec
Start 29: EffectBaseTest
29/34 Test #29: EffectBaseTest ................... Passed 0.04 sec
Start 30: DemoEffectsTest
30/34 Test #30: DemoEffectsTest .................. Passed 0.03 sec
Start 31: PostProcessHelperTest
31/34 Test #31: PostProcessHelperTest ............ Passed 0.02 sec
Start 32: TextureManagerTest
32/34 Test #32: TextureManagerTest ............... Passed 0.02 sec
Start 33: GpuProceduralTest
33/34 Test #33: GpuProceduralTest ................ Passed 0.18 sec
Start 34: GpuCompositeTest
34/34 Test #34: GpuCompositeTest ................. Passed 0.20 sec
100% tests passed, 0 tests failed out of 34
Label Time Summary:
3d = 0.01 sec*proc (3 tests)
assets = 0.02 sec*proc (2 tests)
audio = 0.07 sec*proc (15 tests)
gpu = 0.54 sec*proc (11 tests)
util = 0.01 sec*proc (3 tests)
Total Test time (real) = 0.67 sec
Verifying tools compile...
[ 9%] Built target procedural
[ 18%] Built target tracker_compiler_host
[ 18%] Built target tracker_compiler
[ 18%] Built target generate_tracker_music
[ 18%] Built target asset_packer
[ 27%] Built target generate_demo_assets
[ 27%] Built target generate_test_assets
[ 36%] Built target util
[ 81%] Built target audio
[100%] Built target test_spectool
--- Running Windows Cross-Compilation Build ---
Building native tools...
--
-- Build Configuration:
-- DEMO_SIZE_OPT: OFF
-- DEMO_STRIP_ALL: OFF
-- DEMO_FINAL_STRIP: OFF
-- DEMO_STRIP_EXTERNAL_LIBS: OFF
-- DEMO_BUILD_TESTS: OFF
-- DEMO_BUILD_TOOLS: OFF
-- DEMO_ENABLE_COVERAGE: OFF
-- DEMO_ENABLE_DEBUG_LOGS: OFF
-- DEMO_HEADLESS: OFF
-- DEMO_WORKSPACE: main
--
-- Loaded workspace: Main Demo
-- Timeline: timeline.seq
-- Music: pop_punk_drums.track
-- Assets: assets.txt
-- Using workspace: main
-- Configuring done (0.0s)
-- Generating done (0.0s)
-- Build files have been written to: /Users/skal/demo/build_native
[ 50%] Built target procedural
[100%] Built target asset_packer
[100%] Built target seq_compiler
[100%] Built target tracker_compiler_host
Cross-compiling for Windows...
--
-- Build Configuration:
-- DEMO_SIZE_OPT: ON
-- DEMO_STRIP_ALL: ON
-- DEMO_FINAL_STRIP: OFF
-- DEMO_STRIP_EXTERNAL_LIBS: OFF
-- DEMO_BUILD_TESTS: OFF
-- DEMO_BUILD_TOOLS: OFF
-- DEMO_ENABLE_COVERAGE: OFF
-- DEMO_ENABLE_DEBUG_LOGS: OFF
-- DEMO_HEADLESS: OFF
-- DEMO_WORKSPACE: main
--
-- Loaded workspace: Main Demo
-- Timeline: timeline.seq
-- Music: pop_punk_drums.track
-- Assets: assets.txt
-- Using workspace: main
-- Configuring done (0.0s)
-- Generating done (0.0s)
-- Build files have been written to: /Users/skal/demo/build_win
[ 2%] Built target validate_uniforms_script
[ 2%] Built target generate_timeline
[ 4%] Built target generate_test_demo_timeline
[ 4%] Built target generate_demo_assets
[ 4%] Built target generate_test_assets
[ 6%] Built target procedural
[ 9%] Built target tracker_compiler_host
[ 10%] Validating uniform buffer sizes and alignments...
[ 11%] Built target generate_tracker_music
[ 13%] Built target generate_test_demo_music
[ 16%] Built target util
[ 28%] Built target 3d
[ 45%] Built target audio
[ 49%] Building CXX object CMakeFiles/gpu.dir/src/effects/heptagon_effect.cc.obj
[ 52%] Building CXX object CMakeFiles/gpu.dir/src/effects/gaussian_blur_effect.cc.obj
[ 54%] Building CXX object CMakeFiles/gpu.dir/src/effects/particles_effect.cc.obj
[ 54%] Building CXX object CMakeFiles/gpu.dir/src/effects/moving_ellipse_effect.cc.obj
[ 54%] Building CXX object CMakeFiles/gpu.dir/src/gpu/effect.cc.obj
[ 54%] Building CXX object CMakeFiles/gpu.dir/src/effects/passthrough_effect.cc.obj
[ 54%] Building CXX object CMakeFiles/gpu.dir/src/effects/particle_spray_effect.cc.obj
Validation Warning for 'CommonPostProcessUniforms': Matching WGSL struct not found.
Validation OK for 'FadeParams': Size 16 matches C++ expected size.
Validation OK for 'ThemeModulationParams': Size 16 matches C++ expected size.
Validation OK for 'GaussianBlurParams': Size 8 matches C++ expected size.
Validation OK for 'DistortParams': Size 8 matches C++ expected size.
Validation OK for 'CircleMaskParams': Size 16 matches C++ expected size.
[ 54%] Built target validate_uniforms
[ 55%] Building CXX object CMakeFiles/gpu.dir/src/effects/solarize_effect.cc.obj
[ 57%] Building CXX object CMakeFiles/gpu.dir/src/effects/scene1_effect.cc.obj
[ 57%] Building CXX object CMakeFiles/gpu.dir/src/effects/chroma_aberration_effect.cc.obj
[ 58%] Building CXX object CMakeFiles/gpu.dir/src/effects/vignette_effect.cc.obj
[ 59%] Building CXX object CMakeFiles/gpu.dir/src/gpu/post_process_helper.cc.obj
[ 60%] Building CXX object CMakeFiles/gpu.dir/src/gpu/shaders.cc.obj
[ 62%] Linking CXX static library libgpu.a
[ 77%] Built target gpu
[ 79%] Building CXX object CMakeFiles/demo64k.dir/src/app/main.cc.obj
[ 79%] Building CXX object CMakeFiles/test_demo.dir/src/app/test_demo.cc.obj
[ 80%] Building CXX object CMakeFiles/demo64k.dir/src/generated/timeline.cc.obj
[ 81%] Building CXX object CMakeFiles/test_demo.dir/src/generated/test_demo_timeline.cc.obj
[ 82%] Linking CXX executable test_demo.exe
[ 90%] Built target test_demo
[ 91%] Linking CXX executable demo64k.exe
[100%] Built target demo64k
Copying MinGW DLLs...
Crunching build_win/demo64k.exe...
Ultimate Packer for eXecutables
Copyright (C) 1996 - 2026
UPX 5.1.0 Markus Oberhumer, Laszlo Molnar & John Reiser Jan 7th 2026
File size Ratio Format Name
-------------------- ------ ----------- -----------
7036416 -> 4680704 66.52% win64/pe demo64k_packed.exe
Packed 1 file.
------------------------------------------------
Size Report:
-rwxr-xr-x 1 skal 89939 6.7M Feb 14 14:55 build_win/demo64k.exe
-rwxr-xr-x 1 skal 89939 6.7M Feb 14 14:55 build_win/demo64k_stripped.exe
-rwxr-xr-x 1 skal 89939 4.5M Feb 14 14:55 build_win/demo64k_packed.exe
------------------------------------------------
Top 20 Largest Symbols (from unstripped):
------------------------------------------------
Build complete. Output: build_win/demo64k.exe
All checks passed successfully. script completes without errors.
This change streamlines the project's architecture without altering any functionality.
Diffstat (limited to 'src/effects/cnn_v2_effect.cc')
| -rw-r--r-- | src/effects/cnn_v2_effect.cc | 463 |
1 files changed, 463 insertions, 0 deletions
diff --git a/src/effects/cnn_v2_effect.cc b/src/effects/cnn_v2_effect.cc new file mode 100644 index 0000000..4c10ed1 --- /dev/null +++ b/src/effects/cnn_v2_effect.cc @@ -0,0 +1,463 @@ +// CNN v2 Effect Implementation + +#include "effects/cnn_v2_effect.h" + +#if defined(USE_TEST_ASSETS) +#include "test_assets.h" +#else +#include "generated/assets.h" +#endif + +#include "gpu/bind_group_builder.h" +#include "gpu/gpu.h" +#include "util/asset_manager.h" +#include "util/fatal_error.h" +#include <cstring> + +CNNv2Effect::CNNv2Effect(const GpuContext& ctx) + : PostProcessEffect(ctx), + static_pipeline_(nullptr), + static_bind_group_(nullptr), + static_params_buffer_(nullptr), + static_features_tex_(nullptr), + static_features_view_(nullptr), + linear_sampler_(nullptr), + layer_pipeline_(nullptr), + weights_buffer_(nullptr), + input_mip_tex_(nullptr), + current_input_view_(nullptr), + blend_amount_(1.0f), + mip_level_(0), + initialized_(false) { + std::memset(input_mip_view_, 0, sizeof(input_mip_view_)); +} + +CNNv2Effect::CNNv2Effect(const GpuContext& ctx, const CNNv2EffectParams& params) + : PostProcessEffect(ctx), + static_pipeline_(nullptr), + static_bind_group_(nullptr), + static_params_buffer_(nullptr), + static_features_tex_(nullptr), + static_features_view_(nullptr), + linear_sampler_(nullptr), + layer_pipeline_(nullptr), + weights_buffer_(nullptr), + input_mip_tex_(nullptr), + current_input_view_(nullptr), + blend_amount_(params.blend_amount), + mip_level_(0), + initialized_(false) { + std::memset(input_mip_view_, 0, sizeof(input_mip_view_)); +} + +CNNv2Effect::~CNNv2Effect() { + cleanup(); +} + +void CNNv2Effect::init(MainSequence* demo) { + (void)demo; + if (initialized_) return; + + load_weights(); + create_textures(); + create_pipelines(); + + initialized_ = true; +} + +void CNNv2Effect::resize(int width, int height) { + PostProcessEffect::resize(width, height); + cleanup(); + create_textures(); + create_pipelines(); +} + +void CNNv2Effect::load_weights() { + // Load binary weights asset + size_t weights_size = 0; + const uint8_t* weights_data = (const uint8_t*)GetAsset(AssetId::ASSET_WEIGHTS_CNN_V2, &weights_size); + + if (!weights_data || weights_size < 20) { + // Weights not available - effect will skip + return; + } + + // Parse header + const uint32_t* header = (const uint32_t*)weights_data; + uint32_t magic = header[0]; + uint32_t version = header[1]; + uint32_t num_layers = header[2]; + uint32_t total_weights = header[3]; + + FATAL_CHECK(magic != 0x324e4e43, "Invalid CNN v2 weights magic\n"); // 'CNN2' + + // Support both version 1 (16-byte header) and version 2 (20-byte header with mip_level) + // TODO: Version 3 should include feature descriptor for arbitrary layout/ordering + if (version == 1) { + mip_level_ = 0; // Default for v1 + } else if (version == 2) { + mip_level_ = header[4]; + } else { + FATAL_ERROR("Unsupported CNN v2 weights version: %u\n", version); + } + + // Parse layer info (20 bytes per layer) + // Offset depends on version: v1=16 bytes (4 u32), v2=20 bytes (5 u32) + const uint32_t header_u32_count = (version == 1) ? 4 : 5; + const uint32_t* layer_data = header + header_u32_count; + for (uint32_t i = 0; i < num_layers; ++i) { + LayerInfo info; + info.kernel_size = layer_data[i * 5 + 0]; + info.in_channels = layer_data[i * 5 + 1]; + info.out_channels = layer_data[i * 5 + 2]; + info.weight_offset = layer_data[i * 5 + 3]; + info.weight_count = layer_data[i * 5 + 4]; + layer_info_.push_back(info); + } + + // Create GPU storage buffer for weights (skip header + layer info, upload only weights) + size_t header_size = 20; // 5 u32 + size_t layer_info_size = 20 * num_layers; // 5 u32 per layer + size_t weights_offset = header_size + layer_info_size; + size_t weights_only_size = weights_size - weights_offset; + + WGPUBufferDescriptor buffer_desc = {}; + buffer_desc.size = weights_only_size; + buffer_desc.usage = WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst; + buffer_desc.mappedAtCreation = false; + + weights_buffer_ = wgpuDeviceCreateBuffer(ctx_.device, &buffer_desc); + + // Upload only weights (skip header + layer info) + wgpuQueueWriteBuffer(ctx_.queue, weights_buffer_, 0, weights_data + weights_offset, weights_only_size); + + // Create uniform buffers for layer params (one per layer) + for (uint32_t i = 0; i < num_layers; ++i) { + WGPUBufferDescriptor params_desc = {}; + params_desc.size = sizeof(LayerParams); + params_desc.usage = WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst; + params_desc.mappedAtCreation = false; + + WGPUBuffer buf = wgpuDeviceCreateBuffer(ctx_.device, ¶ms_desc); + layer_params_buffers_.push_back(buf); + } +} + +void CNNv2Effect::create_textures() { + // Static features texture (8×f16 packed as 4×u32) + TextureWithView static_tex = gpu_create_storage_texture_2d( + ctx_.device, width_, height_, WGPUTextureFormat_RGBA32Uint); + static_features_tex_ = static_tex.texture; + static_features_view_ = static_tex.view; + + // Input texture with mips (for multi-scale features) + TextureWithView input_mip = gpu_create_texture_2d( + ctx_.device, width_, height_, WGPUTextureFormat_RGBA8Unorm, + (WGPUTextureUsage)(WGPUTextureUsage_TextureBinding | WGPUTextureUsage_CopyDst), 3); + input_mip_tex_ = input_mip.texture; + + for (int i = 0; i < 3; ++i) { + input_mip_view_[i] = + gpu_create_mip_view(input_mip_tex_, WGPUTextureFormat_RGBA8Unorm, i); + } + + // Create 2 layer textures (ping-pong buffers for intermediate results) + // Each stores 8×f16 channels packed as 4×u32 + for (int i = 0; i < 2; ++i) { + TextureWithView layer = gpu_create_storage_texture_2d( + ctx_.device, width_, height_, WGPUTextureFormat_RGBA32Uint); + layer_textures_.push_back(layer.texture); + layer_views_.push_back(layer.view); + } + + // Create uniform buffer for static feature params + WGPUBufferDescriptor params_desc = {}; + params_desc.size = sizeof(StaticFeatureParams); + params_desc.usage = WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst; + params_desc.mappedAtCreation = false; + static_params_buffer_ = wgpuDeviceCreateBuffer(ctx_.device, ¶ms_desc); +} + +void CNNv2Effect::create_pipelines() { + // Create linear sampler for bilinear interpolation + WGPUSamplerDescriptor sampler_desc = {}; + sampler_desc.addressModeU = WGPUAddressMode_ClampToEdge; + sampler_desc.addressModeV = WGPUAddressMode_ClampToEdge; + sampler_desc.addressModeW = WGPUAddressMode_ClampToEdge; + sampler_desc.magFilter = WGPUFilterMode_Linear; + sampler_desc.minFilter = WGPUFilterMode_Linear; + sampler_desc.mipmapFilter = WGPUMipmapFilterMode_Linear; + sampler_desc.lodMinClamp = 0.0f; + sampler_desc.lodMaxClamp = 32.0f; + sampler_desc.maxAnisotropy = 1; + + linear_sampler_ = wgpuDeviceCreateSampler(ctx_.device, &sampler_desc); + + // Static features compute pipeline + size_t shader_size = 0; + const char* static_code = (const char*)GetAsset(AssetId::ASSET_SHADER_CNN_V2_STATIC, &shader_size); + + if (!static_code || shader_size == 0) { + // Shader not available (e.g., in test mode) - skip pipeline creation + return; + } + + WGPUShaderSourceWGSL wgsl_src = {}; + wgsl_src.chain.sType = WGPUSType_ShaderSourceWGSL; + wgsl_src.code = str_view(static_code); + + WGPUShaderModuleDescriptor shader_desc = {}; + shader_desc.nextInChain = &wgsl_src.chain; + + // Create bind group layout for static features compute + // Bindings: 0=input_tex, 1=input_mip1, 2=input_mip2, 3=depth_tex, 4=output, 5=params, 6=linear_sampler + WGPUBindGroupLayout static_bgl = + BindGroupLayoutBuilder() + .texture(0, WGPUShaderStage_Compute) + .texture(1, WGPUShaderStage_Compute) + .texture(2, WGPUShaderStage_Compute) + .texture(3, WGPUShaderStage_Compute) + .storage_texture(4, WGPUShaderStage_Compute, + WGPUTextureFormat_RGBA32Uint) + .uniform(5, WGPUShaderStage_Compute, sizeof(StaticFeatureParams)) + .sampler(6, WGPUShaderStage_Compute) + .build(ctx_.device); + + // Update pipeline layout + WGPUPipelineLayoutDescriptor pl_desc = {}; + pl_desc.bindGroupLayoutCount = 1; + pl_desc.bindGroupLayouts = &static_bgl; + WGPUPipelineLayout pipeline_layout = wgpuDeviceCreatePipelineLayout(ctx_.device, &pl_desc); + + // Recreate pipeline with proper layout + WGPUComputePipelineDescriptor pipeline_desc2 = {}; + pipeline_desc2.compute.module = wgpuDeviceCreateShaderModule(ctx_.device, &shader_desc); + pipeline_desc2.compute.entryPoint = str_view("main"); + pipeline_desc2.layout = pipeline_layout; + + if (static_pipeline_) wgpuComputePipelineRelease(static_pipeline_); + static_pipeline_ = wgpuDeviceCreateComputePipeline(ctx_.device, &pipeline_desc2); + + wgpuShaderModuleRelease(pipeline_desc2.compute.module); + wgpuPipelineLayoutRelease(pipeline_layout); + wgpuBindGroupLayoutRelease(static_bgl); + + // CNN layer compute pipeline (storage buffer version) + if (layer_info_.empty()) return; // No weights loaded + + size_t layer_shader_size = 0; + const char* layer_code = (const char*)GetAsset(AssetId::ASSET_SHADER_CNN_V2_COMPUTE, &layer_shader_size); + + if (!layer_code || layer_shader_size == 0) return; + + WGPUShaderSourceWGSL layer_wgsl = {}; + layer_wgsl.chain.sType = WGPUSType_ShaderSourceWGSL; + layer_wgsl.code = str_view(layer_code); + + WGPUShaderModuleDescriptor layer_shader_desc = {}; + layer_shader_desc.nextInChain = &layer_wgsl.chain; + + WGPUShaderModule layer_module = wgpuDeviceCreateShaderModule(ctx_.device, &layer_shader_desc); + if (!layer_module) return; + + // Create bind group layout for layer compute + // 0=static_features, 1=layer_input, 2=output, 3=weights, 4=params, 5=original_input + WGPUBindGroupLayout layer_bgl = + BindGroupLayoutBuilder() + .uint_texture(0, WGPUShaderStage_Compute) + .uint_texture(1, WGPUShaderStage_Compute) + .storage_texture(2, WGPUShaderStage_Compute, + WGPUTextureFormat_RGBA32Uint) + .storage(3, WGPUShaderStage_Compute) + .uniform(4, WGPUShaderStage_Compute, sizeof(LayerParams)) + .texture(5, WGPUShaderStage_Compute) + .build(ctx_.device); + + WGPUPipelineLayoutDescriptor layer_pl_desc = {}; + layer_pl_desc.bindGroupLayoutCount = 1; + layer_pl_desc.bindGroupLayouts = &layer_bgl; + + WGPUPipelineLayout layer_pipeline_layout = wgpuDeviceCreatePipelineLayout(ctx_.device, &layer_pl_desc); + + WGPUComputePipelineDescriptor layer_pipeline_desc = {}; + layer_pipeline_desc.compute.module = layer_module; + layer_pipeline_desc.compute.entryPoint = str_view("main"); + layer_pipeline_desc.layout = layer_pipeline_layout; + + layer_pipeline_ = wgpuDeviceCreateComputePipeline(ctx_.device, &layer_pipeline_desc); + + wgpuShaderModuleRelease(layer_module); + wgpuPipelineLayoutRelease(layer_pipeline_layout); + wgpuBindGroupLayoutRelease(layer_bgl); +} + +void CNNv2Effect::update_bind_group(WGPUTextureView input_view) { + if (!static_pipeline_) return; + + // Cache input view + current_input_view_ = input_view; + + // Release old bind group + if (static_bind_group_) { + wgpuBindGroupRelease(static_bind_group_); + static_bind_group_ = nullptr; + } + + // Create bind group for static features compute (manual for storage texture binding) + WGPUBindGroupEntry bg_entries[7] = {}; + bg_entries[0].binding = 0; + bg_entries[0].textureView = input_view; + bg_entries[1].binding = 1; + bg_entries[1].textureView = input_mip_view_[0]; + bg_entries[2].binding = 2; + bg_entries[2].textureView = + input_mip_view_[1] ? input_mip_view_[1] : input_mip_view_[0]; + bg_entries[3].binding = 3; + bg_entries[3].textureView = input_view; + bg_entries[4].binding = 4; + bg_entries[4].textureView = static_features_view_; + bg_entries[5].binding = 5; + bg_entries[5].buffer = static_params_buffer_; + bg_entries[5].size = sizeof(StaticFeatureParams); + bg_entries[6].binding = 6; + bg_entries[6].sampler = linear_sampler_; + + WGPUBindGroupLayout layout = + wgpuComputePipelineGetBindGroupLayout(static_pipeline_, 0); + WGPUBindGroupDescriptor bg_desc = {}; + bg_desc.layout = layout; + bg_desc.entryCount = 7; + bg_desc.entries = bg_entries; + static_bind_group_ = wgpuDeviceCreateBindGroup(ctx_.device, &bg_desc); + wgpuBindGroupLayoutRelease(layout); + + // Create layer bind groups + if (!layer_pipeline_ || layer_info_.empty()) return; + + // Release old layer bind groups + for (auto bg : layer_bind_groups_) { + wgpuBindGroupRelease(bg); + } + layer_bind_groups_.clear(); + + // Get bind group layout from layer pipeline + WGPUBindGroupLayout layer_bgl = wgpuComputePipelineGetBindGroupLayout(layer_pipeline_, 0); + + // Create bind group for each layer + for (size_t i = 0; i < layer_info_.size(); ++i) { + WGPUTextureView layer_input = + (i == 0) ? static_features_view_ : layer_views_[i % 2]; + + WGPUBindGroup layer_bg = + BindGroupBuilder() + .texture(0, static_features_view_) + .texture(1, layer_input) + .texture(2, layer_views_[(i + 1) % 2]) + .buffer(3, weights_buffer_, wgpuBufferGetSize(weights_buffer_)) + .buffer(4, layer_params_buffers_[i], sizeof(LayerParams)) + .texture(5, input_view) + .build(ctx_.device, layer_bgl); + + layer_bind_groups_.push_back(layer_bg); + } + + wgpuBindGroupLayoutRelease(layer_bgl); +} + +void CNNv2Effect::compute(WGPUCommandEncoder encoder, + const CommonPostProcessUniforms& uniforms) { + if (!initialized_ || !static_pipeline_ || !static_bind_group_) return; + + float effective_blend = blend_amount_; + if (beat_modulated_) { + effective_blend = blend_amount_ * uniforms.beat_phase * beat_scale_; + } + + // Update static feature params + StaticFeatureParams static_params; + static_params.mip_level = mip_level_; + static_params.padding[0] = 0; + static_params.padding[1] = 0; + static_params.padding[2] = 0; + wgpuQueueWriteBuffer(ctx_.queue, static_params_buffer_, 0, &static_params, sizeof(static_params)); + + // Pass 1: Compute static features + WGPUComputePassEncoder pass = wgpuCommandEncoderBeginComputePass(encoder, nullptr); + + wgpuComputePassEncoderSetPipeline(pass, static_pipeline_); + wgpuComputePassEncoderSetBindGroup(pass, 0, static_bind_group_, 0, nullptr); + + // Dispatch workgroups (8×8 threads per group) + uint32_t workgroups_x = (width_ + 7) / 8; + uint32_t workgroups_y = (height_ + 7) / 8; + wgpuComputePassEncoderDispatchWorkgroups(pass, workgroups_x, workgroups_y, 1); + + wgpuComputePassEncoderEnd(pass); + wgpuComputePassEncoderRelease(pass); + + // Execute CNN layer passes + if (!layer_pipeline_ || layer_bind_groups_.empty()) return; + + // Update layer params (each layer has own buffer) + for (size_t i = 0; i < layer_info_.size(); ++i) { + const LayerInfo& info = layer_info_[i]; + + LayerParams params; + params.kernel_size = info.kernel_size; + params.in_channels = info.in_channels; + params.out_channels = info.out_channels; + params.weight_offset = info.weight_offset; + params.is_output_layer = (i == layer_info_.size() - 1) ? 1 : 0; + params.blend_amount = effective_blend; + params.is_layer_0 = (i == 0) ? 1 : 0; + + wgpuQueueWriteBuffer(ctx_.queue, layer_params_buffers_[i], 0, ¶ms, sizeof(params)); + + WGPUComputePassEncoder layer_pass = wgpuCommandEncoderBeginComputePass(encoder, nullptr); + + wgpuComputePassEncoderSetPipeline(layer_pass, layer_pipeline_); + wgpuComputePassEncoderSetBindGroup(layer_pass, 0, layer_bind_groups_[i], 0, nullptr); + + wgpuComputePassEncoderDispatchWorkgroups(layer_pass, workgroups_x, workgroups_y, 1); + + wgpuComputePassEncoderEnd(layer_pass); + wgpuComputePassEncoderRelease(layer_pass); + } +} + +void CNNv2Effect::render(WGPURenderPassEncoder pass, + const CommonPostProcessUniforms& uniforms) { + (void)pass; + (void)uniforms; + // Compute-only effect, rendering is done by default composite pass +} + +void CNNv2Effect::cleanup() { + if (static_features_view_) wgpuTextureViewRelease(static_features_view_); + if (static_features_tex_) wgpuTextureRelease(static_features_tex_); + if (static_bind_group_) wgpuBindGroupRelease(static_bind_group_); + if (static_params_buffer_) wgpuBufferRelease(static_params_buffer_); + if (static_pipeline_) wgpuComputePipelineRelease(static_pipeline_); + if (linear_sampler_) wgpuSamplerRelease(linear_sampler_); + + if (layer_pipeline_) wgpuComputePipelineRelease(layer_pipeline_); + if (weights_buffer_) wgpuBufferRelease(weights_buffer_); + for (auto buf : layer_params_buffers_) wgpuBufferRelease(buf); + layer_params_buffers_.clear(); + + for (int i = 0; i < 3; ++i) { + if (input_mip_view_[i]) wgpuTextureViewRelease(input_mip_view_[i]); + } + if (input_mip_tex_) wgpuTextureRelease(input_mip_tex_); + + for (auto view : layer_views_) wgpuTextureViewRelease(view); + for (auto tex : layer_textures_) wgpuTextureRelease(tex); + for (auto bg : layer_bind_groups_) wgpuBindGroupRelease(bg); + + layer_views_.clear(); + layer_textures_.clear(); + layer_bind_groups_.clear(); + layer_info_.clear(); + + initialized_ = false; +} |
