diff options
Diffstat (limited to 'src/gpu/effects/cnn_v2_effect.cc')
| -rw-r--r-- | src/gpu/effects/cnn_v2_effect.cc | 463 |
1 files changed, 0 insertions, 463 deletions
diff --git a/src/gpu/effects/cnn_v2_effect.cc b/src/gpu/effects/cnn_v2_effect.cc deleted file mode 100644 index be856a4..0000000 --- a/src/gpu/effects/cnn_v2_effect.cc +++ /dev/null @@ -1,463 +0,0 @@ -// CNN v2 Effect Implementation - -#include "gpu/effects/cnn_v2_effect.h" - -#if defined(USE_TEST_ASSETS) -#include "test_assets.h" -#else -#include "generated/assets.h" -#endif - -#include "gpu/bind_group_builder.h" -#include "gpu/gpu.h" -#include "util/asset_manager.h" -#include "util/fatal_error.h" -#include <cstring> - -CNNv2Effect::CNNv2Effect(const GpuContext& ctx) - : PostProcessEffect(ctx), - static_pipeline_(nullptr), - static_bind_group_(nullptr), - static_params_buffer_(nullptr), - static_features_tex_(nullptr), - static_features_view_(nullptr), - linear_sampler_(nullptr), - layer_pipeline_(nullptr), - weights_buffer_(nullptr), - input_mip_tex_(nullptr), - current_input_view_(nullptr), - blend_amount_(1.0f), - mip_level_(0), - initialized_(false) { - std::memset(input_mip_view_, 0, sizeof(input_mip_view_)); -} - -CNNv2Effect::CNNv2Effect(const GpuContext& ctx, const CNNv2EffectParams& params) - : PostProcessEffect(ctx), - static_pipeline_(nullptr), - static_bind_group_(nullptr), - static_params_buffer_(nullptr), - static_features_tex_(nullptr), - static_features_view_(nullptr), - linear_sampler_(nullptr), - layer_pipeline_(nullptr), - weights_buffer_(nullptr), - input_mip_tex_(nullptr), - current_input_view_(nullptr), - blend_amount_(params.blend_amount), - mip_level_(0), - initialized_(false) { - std::memset(input_mip_view_, 0, sizeof(input_mip_view_)); -} - -CNNv2Effect::~CNNv2Effect() { - cleanup(); -} - -void CNNv2Effect::init(MainSequence* demo) { - (void)demo; - if (initialized_) return; - - load_weights(); - create_textures(); - create_pipelines(); - - initialized_ = true; -} - -void CNNv2Effect::resize(int width, int height) { - PostProcessEffect::resize(width, height); - cleanup(); - create_textures(); - create_pipelines(); -} - -void CNNv2Effect::load_weights() { - // Load binary weights asset - size_t weights_size = 0; - const uint8_t* weights_data = (const uint8_t*)GetAsset(AssetId::ASSET_WEIGHTS_CNN_V2, &weights_size); - - if (!weights_data || weights_size < 20) { - // Weights not available - effect will skip - return; - } - - // Parse header - const uint32_t* header = (const uint32_t*)weights_data; - uint32_t magic = header[0]; - uint32_t version = header[1]; - uint32_t num_layers = header[2]; - uint32_t total_weights = header[3]; - - FATAL_CHECK(magic != 0x324e4e43, "Invalid CNN v2 weights magic\n"); // 'CNN2' - - // Support both version 1 (16-byte header) and version 2 (20-byte header with mip_level) - // TODO: Version 3 should include feature descriptor for arbitrary layout/ordering - if (version == 1) { - mip_level_ = 0; // Default for v1 - } else if (version == 2) { - mip_level_ = header[4]; - } else { - FATAL_ERROR("Unsupported CNN v2 weights version: %u\n", version); - } - - // Parse layer info (20 bytes per layer) - // Offset depends on version: v1=16 bytes (4 u32), v2=20 bytes (5 u32) - const uint32_t header_u32_count = (version == 1) ? 4 : 5; - const uint32_t* layer_data = header + header_u32_count; - for (uint32_t i = 0; i < num_layers; ++i) { - LayerInfo info; - info.kernel_size = layer_data[i * 5 + 0]; - info.in_channels = layer_data[i * 5 + 1]; - info.out_channels = layer_data[i * 5 + 2]; - info.weight_offset = layer_data[i * 5 + 3]; - info.weight_count = layer_data[i * 5 + 4]; - layer_info_.push_back(info); - } - - // Create GPU storage buffer for weights (skip header + layer info, upload only weights) - size_t header_size = 20; // 5 u32 - size_t layer_info_size = 20 * num_layers; // 5 u32 per layer - size_t weights_offset = header_size + layer_info_size; - size_t weights_only_size = weights_size - weights_offset; - - WGPUBufferDescriptor buffer_desc = {}; - buffer_desc.size = weights_only_size; - buffer_desc.usage = WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst; - buffer_desc.mappedAtCreation = false; - - weights_buffer_ = wgpuDeviceCreateBuffer(ctx_.device, &buffer_desc); - - // Upload only weights (skip header + layer info) - wgpuQueueWriteBuffer(ctx_.queue, weights_buffer_, 0, weights_data + weights_offset, weights_only_size); - - // Create uniform buffers for layer params (one per layer) - for (uint32_t i = 0; i < num_layers; ++i) { - WGPUBufferDescriptor params_desc = {}; - params_desc.size = sizeof(LayerParams); - params_desc.usage = WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst; - params_desc.mappedAtCreation = false; - - WGPUBuffer buf = wgpuDeviceCreateBuffer(ctx_.device, ¶ms_desc); - layer_params_buffers_.push_back(buf); - } -} - -void CNNv2Effect::create_textures() { - // Static features texture (8×f16 packed as 4×u32) - TextureWithView static_tex = gpu_create_storage_texture_2d( - ctx_.device, width_, height_, WGPUTextureFormat_RGBA32Uint); - static_features_tex_ = static_tex.texture; - static_features_view_ = static_tex.view; - - // Input texture with mips (for multi-scale features) - TextureWithView input_mip = gpu_create_texture_2d( - ctx_.device, width_, height_, WGPUTextureFormat_RGBA8Unorm, - WGPUTextureUsage_TextureBinding | WGPUTextureUsage_CopyDst, 3); - input_mip_tex_ = input_mip.texture; - - for (int i = 0; i < 3; ++i) { - input_mip_view_[i] = - gpu_create_mip_view(input_mip_tex_, WGPUTextureFormat_RGBA8Unorm, i); - } - - // Create 2 layer textures (ping-pong buffers for intermediate results) - // Each stores 8×f16 channels packed as 4×u32 - for (int i = 0; i < 2; ++i) { - TextureWithView layer = gpu_create_storage_texture_2d( - ctx_.device, width_, height_, WGPUTextureFormat_RGBA32Uint); - layer_textures_.push_back(layer.texture); - layer_views_.push_back(layer.view); - } - - // Create uniform buffer for static feature params - WGPUBufferDescriptor params_desc = {}; - params_desc.size = sizeof(StaticFeatureParams); - params_desc.usage = WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst; - params_desc.mappedAtCreation = false; - static_params_buffer_ = wgpuDeviceCreateBuffer(ctx_.device, ¶ms_desc); -} - -void CNNv2Effect::create_pipelines() { - // Create linear sampler for bilinear interpolation - WGPUSamplerDescriptor sampler_desc = {}; - sampler_desc.addressModeU = WGPUAddressMode_ClampToEdge; - sampler_desc.addressModeV = WGPUAddressMode_ClampToEdge; - sampler_desc.addressModeW = WGPUAddressMode_ClampToEdge; - sampler_desc.magFilter = WGPUFilterMode_Linear; - sampler_desc.minFilter = WGPUFilterMode_Linear; - sampler_desc.mipmapFilter = WGPUMipmapFilterMode_Linear; - sampler_desc.lodMinClamp = 0.0f; - sampler_desc.lodMaxClamp = 32.0f; - sampler_desc.maxAnisotropy = 1; - - linear_sampler_ = wgpuDeviceCreateSampler(ctx_.device, &sampler_desc); - - // Static features compute pipeline - size_t shader_size = 0; - const char* static_code = (const char*)GetAsset(AssetId::ASSET_SHADER_CNN_V2_STATIC, &shader_size); - - if (!static_code || shader_size == 0) { - // Shader not available (e.g., in test mode) - skip pipeline creation - return; - } - - WGPUShaderSourceWGSL wgsl_src = {}; - wgsl_src.chain.sType = WGPUSType_ShaderSourceWGSL; - wgsl_src.code = str_view(static_code); - - WGPUShaderModuleDescriptor shader_desc = {}; - shader_desc.nextInChain = &wgsl_src.chain; - - // Create bind group layout for static features compute - // Bindings: 0=input_tex, 1=input_mip1, 2=input_mip2, 3=depth_tex, 4=output, 5=params, 6=linear_sampler - WGPUBindGroupLayout static_bgl = - BindGroupLayoutBuilder() - .texture(0, WGPUShaderStage_Compute) - .texture(1, WGPUShaderStage_Compute) - .texture(2, WGPUShaderStage_Compute) - .texture(3, WGPUShaderStage_Compute) - .storage_texture(4, WGPUShaderStage_Compute, - WGPUTextureFormat_RGBA32Uint) - .uniform(5, WGPUShaderStage_Compute, sizeof(StaticFeatureParams)) - .sampler(6, WGPUShaderStage_Compute) - .build(ctx_.device); - - // Update pipeline layout - WGPUPipelineLayoutDescriptor pl_desc = {}; - pl_desc.bindGroupLayoutCount = 1; - pl_desc.bindGroupLayouts = &static_bgl; - WGPUPipelineLayout pipeline_layout = wgpuDeviceCreatePipelineLayout(ctx_.device, &pl_desc); - - // Recreate pipeline with proper layout - WGPUComputePipelineDescriptor pipeline_desc2 = {}; - pipeline_desc2.compute.module = wgpuDeviceCreateShaderModule(ctx_.device, &shader_desc); - pipeline_desc2.compute.entryPoint = str_view("main"); - pipeline_desc2.layout = pipeline_layout; - - if (static_pipeline_) wgpuComputePipelineRelease(static_pipeline_); - static_pipeline_ = wgpuDeviceCreateComputePipeline(ctx_.device, &pipeline_desc2); - - wgpuShaderModuleRelease(pipeline_desc2.compute.module); - wgpuPipelineLayoutRelease(pipeline_layout); - wgpuBindGroupLayoutRelease(static_bgl); - - // CNN layer compute pipeline (storage buffer version) - if (layer_info_.empty()) return; // No weights loaded - - size_t layer_shader_size = 0; - const char* layer_code = (const char*)GetAsset(AssetId::ASSET_SHADER_CNN_V2_COMPUTE, &layer_shader_size); - - if (!layer_code || layer_shader_size == 0) return; - - WGPUShaderSourceWGSL layer_wgsl = {}; - layer_wgsl.chain.sType = WGPUSType_ShaderSourceWGSL; - layer_wgsl.code = str_view(layer_code); - - WGPUShaderModuleDescriptor layer_shader_desc = {}; - layer_shader_desc.nextInChain = &layer_wgsl.chain; - - WGPUShaderModule layer_module = wgpuDeviceCreateShaderModule(ctx_.device, &layer_shader_desc); - if (!layer_module) return; - - // Create bind group layout for layer compute - // 0=static_features, 1=layer_input, 2=output, 3=weights, 4=params, 5=original_input - WGPUBindGroupLayout layer_bgl = - BindGroupLayoutBuilder() - .uint_texture(0, WGPUShaderStage_Compute) - .uint_texture(1, WGPUShaderStage_Compute) - .storage_texture(2, WGPUShaderStage_Compute, - WGPUTextureFormat_RGBA32Uint) - .storage(3, WGPUShaderStage_Compute) - .uniform(4, WGPUShaderStage_Compute, sizeof(LayerParams)) - .texture(5, WGPUShaderStage_Compute) - .build(ctx_.device); - - WGPUPipelineLayoutDescriptor layer_pl_desc = {}; - layer_pl_desc.bindGroupLayoutCount = 1; - layer_pl_desc.bindGroupLayouts = &layer_bgl; - - WGPUPipelineLayout layer_pipeline_layout = wgpuDeviceCreatePipelineLayout(ctx_.device, &layer_pl_desc); - - WGPUComputePipelineDescriptor layer_pipeline_desc = {}; - layer_pipeline_desc.compute.module = layer_module; - layer_pipeline_desc.compute.entryPoint = str_view("main"); - layer_pipeline_desc.layout = layer_pipeline_layout; - - layer_pipeline_ = wgpuDeviceCreateComputePipeline(ctx_.device, &layer_pipeline_desc); - - wgpuShaderModuleRelease(layer_module); - wgpuPipelineLayoutRelease(layer_pipeline_layout); - wgpuBindGroupLayoutRelease(layer_bgl); -} - -void CNNv2Effect::update_bind_group(WGPUTextureView input_view) { - if (!static_pipeline_) return; - - // Cache input view - current_input_view_ = input_view; - - // Release old bind group - if (static_bind_group_) { - wgpuBindGroupRelease(static_bind_group_); - static_bind_group_ = nullptr; - } - - // Create bind group for static features compute (manual for storage texture binding) - WGPUBindGroupEntry bg_entries[7] = {}; - bg_entries[0].binding = 0; - bg_entries[0].textureView = input_view; - bg_entries[1].binding = 1; - bg_entries[1].textureView = input_mip_view_[0]; - bg_entries[2].binding = 2; - bg_entries[2].textureView = - input_mip_view_[1] ? input_mip_view_[1] : input_mip_view_[0]; - bg_entries[3].binding = 3; - bg_entries[3].textureView = input_view; - bg_entries[4].binding = 4; - bg_entries[4].textureView = static_features_view_; - bg_entries[5].binding = 5; - bg_entries[5].buffer = static_params_buffer_; - bg_entries[5].size = sizeof(StaticFeatureParams); - bg_entries[6].binding = 6; - bg_entries[6].sampler = linear_sampler_; - - WGPUBindGroupLayout layout = - wgpuComputePipelineGetBindGroupLayout(static_pipeline_, 0); - WGPUBindGroupDescriptor bg_desc = {}; - bg_desc.layout = layout; - bg_desc.entryCount = 7; - bg_desc.entries = bg_entries; - static_bind_group_ = wgpuDeviceCreateBindGroup(ctx_.device, &bg_desc); - wgpuBindGroupLayoutRelease(layout); - - // Create layer bind groups - if (!layer_pipeline_ || layer_info_.empty()) return; - - // Release old layer bind groups - for (auto bg : layer_bind_groups_) { - wgpuBindGroupRelease(bg); - } - layer_bind_groups_.clear(); - - // Get bind group layout from layer pipeline - WGPUBindGroupLayout layer_bgl = wgpuComputePipelineGetBindGroupLayout(layer_pipeline_, 0); - - // Create bind group for each layer - for (size_t i = 0; i < layer_info_.size(); ++i) { - WGPUTextureView layer_input = - (i == 0) ? static_features_view_ : layer_views_[i % 2]; - - WGPUBindGroup layer_bg = - BindGroupBuilder() - .texture(0, static_features_view_) - .texture(1, layer_input) - .texture(2, layer_views_[(i + 1) % 2]) - .buffer(3, weights_buffer_, wgpuBufferGetSize(weights_buffer_)) - .buffer(4, layer_params_buffers_[i], sizeof(LayerParams)) - .texture(5, input_view) - .build(ctx_.device, layer_bgl); - - layer_bind_groups_.push_back(layer_bg); - } - - wgpuBindGroupLayoutRelease(layer_bgl); -} - -void CNNv2Effect::compute(WGPUCommandEncoder encoder, - const CommonPostProcessUniforms& uniforms) { - if (!initialized_ || !static_pipeline_ || !static_bind_group_) return; - - float effective_blend = blend_amount_; - if (beat_modulated_) { - effective_blend = blend_amount_ * uniforms.beat_phase * beat_scale_; - } - - // Update static feature params - StaticFeatureParams static_params; - static_params.mip_level = mip_level_; - static_params.padding[0] = 0; - static_params.padding[1] = 0; - static_params.padding[2] = 0; - wgpuQueueWriteBuffer(ctx_.queue, static_params_buffer_, 0, &static_params, sizeof(static_params)); - - // Pass 1: Compute static features - WGPUComputePassEncoder pass = wgpuCommandEncoderBeginComputePass(encoder, nullptr); - - wgpuComputePassEncoderSetPipeline(pass, static_pipeline_); - wgpuComputePassEncoderSetBindGroup(pass, 0, static_bind_group_, 0, nullptr); - - // Dispatch workgroups (8×8 threads per group) - uint32_t workgroups_x = (width_ + 7) / 8; - uint32_t workgroups_y = (height_ + 7) / 8; - wgpuComputePassEncoderDispatchWorkgroups(pass, workgroups_x, workgroups_y, 1); - - wgpuComputePassEncoderEnd(pass); - wgpuComputePassEncoderRelease(pass); - - // Execute CNN layer passes - if (!layer_pipeline_ || layer_bind_groups_.empty()) return; - - // Update layer params (each layer has own buffer) - for (size_t i = 0; i < layer_info_.size(); ++i) { - const LayerInfo& info = layer_info_[i]; - - LayerParams params; - params.kernel_size = info.kernel_size; - params.in_channels = info.in_channels; - params.out_channels = info.out_channels; - params.weight_offset = info.weight_offset; - params.is_output_layer = (i == layer_info_.size() - 1) ? 1 : 0; - params.blend_amount = effective_blend; - params.is_layer_0 = (i == 0) ? 1 : 0; - - wgpuQueueWriteBuffer(ctx_.queue, layer_params_buffers_[i], 0, ¶ms, sizeof(params)); - - WGPUComputePassEncoder layer_pass = wgpuCommandEncoderBeginComputePass(encoder, nullptr); - - wgpuComputePassEncoderSetPipeline(layer_pass, layer_pipeline_); - wgpuComputePassEncoderSetBindGroup(layer_pass, 0, layer_bind_groups_[i], 0, nullptr); - - wgpuComputePassEncoderDispatchWorkgroups(layer_pass, workgroups_x, workgroups_y, 1); - - wgpuComputePassEncoderEnd(layer_pass); - wgpuComputePassEncoderRelease(layer_pass); - } -} - -void CNNv2Effect::render(WGPURenderPassEncoder pass, - const CommonPostProcessUniforms& uniforms) { - (void)pass; - (void)uniforms; - // Compute-only effect, rendering is done by default composite pass -} - -void CNNv2Effect::cleanup() { - if (static_features_view_) wgpuTextureViewRelease(static_features_view_); - if (static_features_tex_) wgpuTextureRelease(static_features_tex_); - if (static_bind_group_) wgpuBindGroupRelease(static_bind_group_); - if (static_params_buffer_) wgpuBufferRelease(static_params_buffer_); - if (static_pipeline_) wgpuComputePipelineRelease(static_pipeline_); - if (linear_sampler_) wgpuSamplerRelease(linear_sampler_); - - if (layer_pipeline_) wgpuComputePipelineRelease(layer_pipeline_); - if (weights_buffer_) wgpuBufferRelease(weights_buffer_); - for (auto buf : layer_params_buffers_) wgpuBufferRelease(buf); - layer_params_buffers_.clear(); - - for (int i = 0; i < 3; ++i) { - if (input_mip_view_[i]) wgpuTextureViewRelease(input_mip_view_[i]); - } - if (input_mip_tex_) wgpuTextureRelease(input_mip_tex_); - - for (auto view : layer_views_) wgpuTextureViewRelease(view); - for (auto tex : layer_textures_) wgpuTextureRelease(tex); - for (auto bg : layer_bind_groups_) wgpuBindGroupRelease(bg); - - layer_views_.clear(); - layer_textures_.clear(); - layer_bind_groups_.clear(); - layer_info_.clear(); - - initialized_ = false; -} |
