From 8b30cadfc19647487986d14dba9ddba7908dd1d0 Mon Sep 17 00:00:00 2001 From: skal Date: Thu, 12 Feb 2026 15:10:17 +0100 Subject: test_demo: Add beat-synchronized CNN post-processing with version selection - Add --cnn-version <1|2> flag to select between CNN v1 and v2 - Implement beat_phase modulation for dynamic blend in both CNN effects - Fix CNN v2 per-layer uniform buffer sharing (each layer needs own buffer) - Fix CNN v2 y-axis orientation to match render pass convention - Add Scene1Effect as base visual layer to test_demo timeline - Reorganize CNN v2 shaders into cnn_v2/ subdirectory - Update asset paths and documentation for new shader organization Co-Authored-By: Claude Sonnet 4.5 --- src/gpu/effects/cnn_effect.cc | 9 ++++- src/gpu/effects/cnn_effect.h | 7 ++++ src/gpu/effects/cnn_v2_effect.cc | 71 +++++++++++++++++++++++++++++----------- src/gpu/effects/cnn_v2_effect.h | 16 ++++++++- src/test_demo.cc | 39 +++++++++++++++++++++- 5 files changed, 120 insertions(+), 22 deletions(-) (limited to 'src') diff --git a/src/gpu/effects/cnn_effect.cc b/src/gpu/effects/cnn_effect.cc index b2305b2..83a3365 100644 --- a/src/gpu/effects/cnn_effect.cc +++ b/src/gpu/effects/cnn_effect.cc @@ -79,12 +79,19 @@ void CNNEffect::resize(int width, int height) { void CNNEffect::render(WGPURenderPassEncoder pass, const CommonPostProcessUniforms& uniforms) { - (void)uniforms; if (!bind_group_) { fprintf(stderr, "CNN render: no bind_group\n"); return; } + float effective_blend = blend_amount_; + if (beat_modulated_) { + effective_blend = blend_amount_ * uniforms.beat_phase * beat_scale_; + } + + CNNLayerParams params = {layer_index_, effective_blend, {0.0f, 0.0f}}; + params_buffer_.update(ctx_.queue, params); + wgpuRenderPassEncoderSetPipeline(pass, pipeline_); wgpuRenderPassEncoderSetBindGroup(pass, 0, bind_group_, 0, nullptr); wgpuRenderPassEncoderDraw(pass, 3, 1, 0, 0); diff --git a/src/gpu/effects/cnn_effect.h b/src/gpu/effects/cnn_effect.h index 1c9f0f3..3e2b7ca 100644 --- a/src/gpu/effects/cnn_effect.h +++ b/src/gpu/effects/cnn_effect.h @@ -34,10 +34,17 @@ class CNNEffect : public PostProcessEffect { return layer_index_ == 0; } + void set_beat_modulation(bool enabled, float scale = 1.0f) { + beat_modulated_ = enabled; + beat_scale_ = scale; + } + private: int layer_index_; int total_layers_; float blend_amount_; + bool beat_modulated_ = false; + float beat_scale_ = 1.0f; WGPUTextureView input_view_; WGPUTextureView original_view_; UniformBuffer params_buffer_; diff --git a/src/gpu/effects/cnn_v2_effect.cc b/src/gpu/effects/cnn_v2_effect.cc index 9cb6d57..9c727ba 100644 --- a/src/gpu/effects/cnn_v2_effect.cc +++ b/src/gpu/effects/cnn_v2_effect.cc @@ -20,9 +20,24 @@ CNNv2Effect::CNNv2Effect(const GpuContext& ctx) static_features_view_(nullptr), layer_pipeline_(nullptr), weights_buffer_(nullptr), - layer_params_buffer_(nullptr), input_mip_tex_(nullptr), current_input_view_(nullptr), + blend_amount_(1.0f), + initialized_(false) { + std::memset(input_mip_view_, 0, sizeof(input_mip_view_)); +} + +CNNv2Effect::CNNv2Effect(const GpuContext& ctx, const CNNv2EffectParams& params) + : PostProcessEffect(ctx), + static_pipeline_(nullptr), + static_bind_group_(nullptr), + static_features_tex_(nullptr), + static_features_view_(nullptr), + layer_pipeline_(nullptr), + weights_buffer_(nullptr), + input_mip_tex_(nullptr), + current_input_view_(nullptr), + blend_amount_(params.blend_amount), initialized_(false) { std::memset(input_mip_view_, 0, sizeof(input_mip_view_)); } @@ -93,13 +108,16 @@ void CNNv2Effect::load_weights() { // Upload weights data wgpuQueueWriteBuffer(ctx_.queue, weights_buffer_, 0, weights_data, weights_size); - // Create uniform buffer for layer params - WGPUBufferDescriptor params_desc = {}; - params_desc.size = sizeof(LayerParams); - params_desc.usage = WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst; - params_desc.mappedAtCreation = false; + // Create uniform buffers for layer params (one per layer) + for (uint32_t i = 0; i < num_layers; ++i) { + WGPUBufferDescriptor params_desc = {}; + params_desc.size = sizeof(LayerParams); + params_desc.usage = WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst; + params_desc.mappedAtCreation = false; - layer_params_buffer_ = wgpuDeviceCreateBuffer(ctx_.device, ¶ms_desc); + WGPUBuffer buf = wgpuDeviceCreateBuffer(ctx_.device, ¶ms_desc); + layer_params_buffers_.push_back(buf); + } } void CNNv2Effect::create_textures() { @@ -284,8 +302,8 @@ void CNNv2Effect::create_pipelines() { if (!layer_module) return; // Create bind group layout for layer compute - // 0=static_features, 1=layer_input, 2=output, 3=weights, 4=params - WGPUBindGroupLayoutEntry layer_bgl_entries[5] = {}; + // 0=static_features, 1=layer_input, 2=output, 3=weights, 4=params, 5=original_input + WGPUBindGroupLayoutEntry layer_bgl_entries[6] = {}; // Binding 0: Static features (texture) layer_bgl_entries[0].binding = 0; @@ -317,8 +335,14 @@ void CNNv2Effect::create_pipelines() { layer_bgl_entries[4].buffer.type = WGPUBufferBindingType_Uniform; layer_bgl_entries[4].buffer.minBindingSize = sizeof(LayerParams); + // Binding 5: Original input (for blending) + layer_bgl_entries[5].binding = 5; + layer_bgl_entries[5].visibility = WGPUShaderStage_Compute; + layer_bgl_entries[5].texture.sampleType = WGPUTextureSampleType_Float; + layer_bgl_entries[5].texture.viewDimension = WGPUTextureViewDimension_2D; + WGPUBindGroupLayoutDescriptor layer_bgl_desc = {}; - layer_bgl_desc.entryCount = 5; + layer_bgl_desc.entryCount = 6; layer_bgl_desc.entries = layer_bgl_entries; WGPUBindGroupLayout layer_bgl = wgpuDeviceCreateBindGroupLayout(ctx_.device, &layer_bgl_desc); @@ -399,7 +423,7 @@ void CNNv2Effect::update_bind_group(WGPUTextureView input_view) { // Create bind group for each layer for (size_t i = 0; i < layer_info_.size(); ++i) { - WGPUBindGroupEntry layer_entries[5] = {}; + WGPUBindGroupEntry layer_entries[6] = {}; // Binding 0: Static features (constant) layer_entries[0].binding = 0; @@ -419,14 +443,18 @@ void CNNv2Effect::update_bind_group(WGPUTextureView input_view) { layer_entries[3].buffer = weights_buffer_; layer_entries[3].size = wgpuBufferGetSize(weights_buffer_); - // Binding 4: Layer params (will be updated per dispatch) + // Binding 4: Layer params (use dedicated buffer for this layer) layer_entries[4].binding = 4; - layer_entries[4].buffer = layer_params_buffer_; + layer_entries[4].buffer = layer_params_buffers_[i]; layer_entries[4].size = sizeof(LayerParams); + // Binding 5: Original input (for blending) + layer_entries[5].binding = 5; + layer_entries[5].textureView = input_view; + WGPUBindGroupDescriptor layer_bg_desc = {}; layer_bg_desc.layout = layer_bgl; - layer_bg_desc.entryCount = 5; + layer_bg_desc.entryCount = 6; layer_bg_desc.entries = layer_entries; WGPUBindGroup layer_bg = wgpuDeviceCreateBindGroup(ctx_.device, &layer_bg_desc); @@ -438,9 +466,13 @@ void CNNv2Effect::update_bind_group(WGPUTextureView input_view) { void CNNv2Effect::compute(WGPUCommandEncoder encoder, const CommonPostProcessUniforms& uniforms) { - (void)uniforms; if (!initialized_ || !static_pipeline_ || !static_bind_group_) return; + float effective_blend = blend_amount_; + if (beat_modulated_) { + effective_blend = blend_amount_ * uniforms.beat_phase * beat_scale_; + } + // Pass 1: Compute static features WGPUComputePassEncoder pass = wgpuCommandEncoderBeginComputePass(encoder, nullptr); @@ -458,20 +490,20 @@ void CNNv2Effect::compute(WGPUCommandEncoder encoder, // Execute CNN layer passes if (!layer_pipeline_ || layer_bind_groups_.empty()) return; + // Update layer params (each layer has own buffer) for (size_t i = 0; i < layer_info_.size(); ++i) { const LayerInfo& info = layer_info_[i]; - // Update layer params uniform buffer LayerParams params; params.kernel_size = info.kernel_size; params.in_channels = info.in_channels; params.out_channels = info.out_channels; params.weight_offset = info.weight_offset; params.is_output_layer = (i == layer_info_.size() - 1) ? 1 : 0; + params.blend_amount = effective_blend; - wgpuQueueWriteBuffer(ctx_.queue, layer_params_buffer_, 0, ¶ms, sizeof(params)); + wgpuQueueWriteBuffer(ctx_.queue, layer_params_buffers_[i], 0, ¶ms, sizeof(params)); - // Execute layer compute pass WGPUComputePassEncoder layer_pass = wgpuCommandEncoderBeginComputePass(encoder, nullptr); wgpuComputePassEncoderSetPipeline(layer_pass, layer_pipeline_); @@ -499,7 +531,8 @@ void CNNv2Effect::cleanup() { if (layer_pipeline_) wgpuComputePipelineRelease(layer_pipeline_); if (weights_buffer_) wgpuBufferRelease(weights_buffer_); - if (layer_params_buffer_) wgpuBufferRelease(layer_params_buffer_); + for (auto buf : layer_params_buffers_) wgpuBufferRelease(buf); + layer_params_buffers_.clear(); for (int i = 0; i < 3; ++i) { if (input_mip_view_[i]) wgpuTextureViewRelease(input_mip_view_[i]); diff --git a/src/gpu/effects/cnn_v2_effect.h b/src/gpu/effects/cnn_v2_effect.h index 6005cf5..4389e4f 100644 --- a/src/gpu/effects/cnn_v2_effect.h +++ b/src/gpu/effects/cnn_v2_effect.h @@ -5,9 +5,14 @@ #include "gpu/effect.h" #include +struct CNNv2EffectParams { + float blend_amount = 1.0f; +}; + class CNNv2Effect : public PostProcessEffect { public: explicit CNNv2Effect(const GpuContext& ctx); + explicit CNNv2Effect(const GpuContext& ctx, const CNNv2EffectParams& params); ~CNNv2Effect(); void init(MainSequence* demo) override; @@ -18,6 +23,11 @@ public: const CommonPostProcessUniforms& uniforms) override; void update_bind_group(WGPUTextureView input_view) override; + void set_beat_modulation(bool enabled, float scale = 1.0f) { + beat_modulated_ = enabled; + beat_scale_ = scale; + } + private: struct LayerInfo { uint32_t kernel_size; @@ -33,6 +43,7 @@ private: uint32_t out_channels; uint32_t weight_offset; uint32_t is_output_layer; + float blend_amount; }; void create_textures(); @@ -49,7 +60,7 @@ private: // CNN layers (storage buffer architecture) WGPUComputePipeline layer_pipeline_; // Single pipeline for all layers WGPUBuffer weights_buffer_; // Storage buffer for weights - WGPUBuffer layer_params_buffer_; // Uniform buffer for per-layer params + std::vector layer_params_buffers_; // Uniform buffers (one per layer) std::vector layer_info_; // Layer metadata std::vector layer_bind_groups_; // Per-layer bind groups std::vector layer_textures_; // Ping-pong buffers @@ -60,5 +71,8 @@ private: WGPUTextureView input_mip_view_[3]; WGPUTextureView current_input_view_; + float blend_amount_ = 1.0f; + bool beat_modulated_ = false; + float beat_scale_ = 1.0f; bool initialized_; }; diff --git a/src/test_demo.cc b/src/test_demo.cc index 9cbeae2..7f10c3b 100644 --- a/src/test_demo.cc +++ b/src/test_demo.cc @@ -22,6 +22,8 @@ extern void LoadTimeline(MainSequence& main_seq, const GpuContext& ctx); // Inline peak meter effect for debugging audio-visual sync #include "gpu/effects/post_process_helper.h" #include "gpu/effects/shader_composer.h" +#include "gpu/effects/cnn_effect.h" +#include "gpu/effects/cnn_v2_effect.h" class PeakMeterEffect : public PostProcessEffect { public: @@ -98,6 +100,8 @@ class PeakMeterEffect : public PostProcessEffect { } }; +static int g_cnn_version = 2; // Default to v2 + #if !defined(STRIP_ALL) static void print_usage(const char* prog_name) { printf("Usage: %s [OPTIONS]\n", prog_name); @@ -107,6 +111,7 @@ static void print_usage(const char* prog_name) { printf(" --help Show this help message and exit\n"); printf(" --fullscreen Run in fullscreen mode\n"); printf(" --resolution WxH Set window resolution (e.g., 1024x768)\n"); + printf(" --cnn-version <1|2> Select CNN version (1=v1, 2=v2, default=2)\n"); printf(" --tempo Enable tempo variation test mode\n"); printf( " (alternates between acceleration and " @@ -123,6 +128,7 @@ static void print_usage(const char* prog_name) { printf("\nExamples:\n"); printf(" %s --fullscreen\n", prog_name); printf(" %s --resolution 1024x768 --tempo\n", prog_name); + printf(" %s --cnn-version 1\n", prog_name); printf(" %s --log-peaks peaks.txt\n", prog_name); printf(" %s --log-peaks peaks.txt --log-peaks-fine\n", prog_name); printf("\nControls:\n"); @@ -184,6 +190,21 @@ int main(int argc, char** argv) { log_peaks_file = argv[++i]; } else if (strcmp(argv[i], "--log-peaks-fine") == 0) { log_peaks_fine = true; + } else if (strcmp(argv[i], "--cnn-version") == 0) { + if (i + 1 < argc) { + int version = atoi(argv[++i]); + if (version == 1 || version == 2) { + g_cnn_version = version; + } else { + fprintf(stderr, "Error: --cnn-version must be 1 or 2\n\n"); + print_usage(argv[0]); + return 1; + } + } else { + fprintf(stderr, "Error: --cnn-version requires argument\n\n"); + print_usage(argv[0]); + return 1; + } } else { CHECK_RETURN_BEGIN(true, 1) print_usage(argv[0]); @@ -205,9 +226,25 @@ int main(int argc, char** argv) { // Load timeline from test_demo.seq LoadTimeline(*gpu_get_main_sequence(), *gpu_get_context()); - // Add peak meter visualization effect (renders as final post-process) #if !defined(STRIP_ALL) const GpuContext* gpu_ctx = gpu_get_context(); + + // Add CNN post-processing effect based on version flag + if (g_cnn_version == 1) { + CNNEffectParams params; + params.blend_amount = 1.0f; + auto* cnn = new CNNEffect(*gpu_ctx, params); + cnn->set_beat_modulation(true, 1.0f); + gpu_add_custom_effect(cnn, 0.0f, 99999.0f, 10); + } else if (g_cnn_version == 2) { + CNNv2EffectParams params; + params.blend_amount = 1.0f; + auto* cnn = new CNNv2Effect(*gpu_ctx, params); + cnn->set_beat_modulation(true, 1.0f); + gpu_add_custom_effect(cnn, 0.0f, 99999.0f, 10); + } + + // Add peak meter visualization effect (renders as final post-process) auto* peak_meter = new PeakMeterEffect(*gpu_ctx); gpu_add_custom_effect(peak_meter, 0.0f, 99999.0f, 999); // High priority = renders last -- cgit v1.2.3