summaryrefslogtreecommitdiff
path: root/src/effects/cnn_v2_effect.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/effects/cnn_v2_effect.cc')
-rw-r--r--src/effects/cnn_v2_effect.cc190
1 files changed, 112 insertions, 78 deletions
diff --git a/src/effects/cnn_v2_effect.cc b/src/effects/cnn_v2_effect.cc
index 4c10ed1..7127aae 100644
--- a/src/effects/cnn_v2_effect.cc
+++ b/src/effects/cnn_v2_effect.cc
@@ -15,38 +15,24 @@
#include <cstring>
CNNv2Effect::CNNv2Effect(const GpuContext& ctx)
- : PostProcessEffect(ctx),
- static_pipeline_(nullptr),
- static_bind_group_(nullptr),
- static_params_buffer_(nullptr),
- static_features_tex_(nullptr),
- static_features_view_(nullptr),
- linear_sampler_(nullptr),
- layer_pipeline_(nullptr),
- weights_buffer_(nullptr),
- input_mip_tex_(nullptr),
- current_input_view_(nullptr),
- blend_amount_(1.0f),
- mip_level_(0),
+ : PostProcessEffect(ctx), static_pipeline_(nullptr),
+ static_bind_group_(nullptr), static_params_buffer_(nullptr),
+ static_features_tex_(nullptr), static_features_view_(nullptr),
+ linear_sampler_(nullptr), layer_pipeline_(nullptr),
+ weights_buffer_(nullptr), input_mip_tex_(nullptr),
+ current_input_view_(nullptr), blend_amount_(1.0f), mip_level_(0),
initialized_(false) {
std::memset(input_mip_view_, 0, sizeof(input_mip_view_));
}
CNNv2Effect::CNNv2Effect(const GpuContext& ctx, const CNNv2EffectParams& params)
- : PostProcessEffect(ctx),
- static_pipeline_(nullptr),
- static_bind_group_(nullptr),
- static_params_buffer_(nullptr),
- static_features_tex_(nullptr),
- static_features_view_(nullptr),
- linear_sampler_(nullptr),
- layer_pipeline_(nullptr),
- weights_buffer_(nullptr),
- input_mip_tex_(nullptr),
- current_input_view_(nullptr),
- blend_amount_(params.blend_amount),
- mip_level_(0),
- initialized_(false) {
+ : PostProcessEffect(ctx), static_pipeline_(nullptr),
+ static_bind_group_(nullptr), static_params_buffer_(nullptr),
+ static_features_tex_(nullptr), static_features_view_(nullptr),
+ linear_sampler_(nullptr), layer_pipeline_(nullptr),
+ weights_buffer_(nullptr), input_mip_tex_(nullptr),
+ current_input_view_(nullptr), blend_amount_(params.blend_amount),
+ mip_level_(0), initialized_(false) {
std::memset(input_mip_view_, 0, sizeof(input_mip_view_));
}
@@ -56,7 +42,8 @@ CNNv2Effect::~CNNv2Effect() {
void CNNv2Effect::init(MainSequence* demo) {
(void)demo;
- if (initialized_) return;
+ if (initialized_)
+ return;
load_weights();
create_textures();
@@ -75,7 +62,8 @@ void CNNv2Effect::resize(int width, int height) {
void CNNv2Effect::load_weights() {
// Load binary weights asset
size_t weights_size = 0;
- const uint8_t* weights_data = (const uint8_t*)GetAsset(AssetId::ASSET_WEIGHTS_CNN_V2, &weights_size);
+ const uint8_t* weights_data =
+ (const uint8_t*)GetAsset(AssetId::ASSET_WEIGHTS_CNN_V2, &weights_size);
if (!weights_data || weights_size < 20) {
// Weights not available - effect will skip
@@ -89,12 +77,14 @@ void CNNv2Effect::load_weights() {
uint32_t num_layers = header[2];
uint32_t total_weights = header[3];
- FATAL_CHECK(magic != 0x324e4e43, "Invalid CNN v2 weights magic\n"); // 'CNN2'
+ FATAL_CHECK(magic != 0x324e4e43, "Invalid CNN v2 weights magic\n"); // 'CNN2'
- // Support both version 1 (16-byte header) and version 2 (20-byte header with mip_level)
- // TODO: Version 3 should include feature descriptor for arbitrary layout/ordering
+ // Support both version 1 (16-byte header) and version 2 (20-byte header with
+ // mip_level)
+ // TODO: Version 3 should include feature descriptor for arbitrary
+ // layout/ordering
if (version == 1) {
- mip_level_ = 0; // Default for v1
+ mip_level_ = 0; // Default for v1
} else if (version == 2) {
mip_level_ = header[4];
} else {
@@ -115,9 +105,10 @@ void CNNv2Effect::load_weights() {
layer_info_.push_back(info);
}
- // Create GPU storage buffer for weights (skip header + layer info, upload only weights)
- size_t header_size = 20; // 5 u32
- size_t layer_info_size = 20 * num_layers; // 5 u32 per layer
+ // Create GPU storage buffer for weights (skip header + layer info, upload
+ // only weights)
+ size_t header_size = 20; // 5 u32
+ size_t layer_info_size = 20 * num_layers; // 5 u32 per layer
size_t weights_offset = header_size + layer_info_size;
size_t weights_only_size = weights_size - weights_offset;
@@ -129,7 +120,8 @@ void CNNv2Effect::load_weights() {
weights_buffer_ = wgpuDeviceCreateBuffer(ctx_.device, &buffer_desc);
// Upload only weights (skip header + layer info)
- wgpuQueueWriteBuffer(ctx_.queue, weights_buffer_, 0, weights_data + weights_offset, weights_only_size);
+ wgpuQueueWriteBuffer(ctx_.queue, weights_buffer_, 0,
+ weights_data + weights_offset, weights_only_size);
// Create uniform buffers for layer params (one per layer)
for (uint32_t i = 0; i < num_layers; ++i) {
@@ -153,7 +145,9 @@ void CNNv2Effect::create_textures() {
// Input texture with mips (for multi-scale features)
TextureWithView input_mip = gpu_create_texture_2d(
ctx_.device, width_, height_, WGPUTextureFormat_RGBA8Unorm,
- (WGPUTextureUsage)(WGPUTextureUsage_TextureBinding | WGPUTextureUsage_CopyDst), 3);
+ (WGPUTextureUsage)(WGPUTextureUsage_TextureBinding |
+ WGPUTextureUsage_CopyDst),
+ 3);
input_mip_tex_ = input_mip.texture;
for (int i = 0; i < 3; ++i) {
@@ -195,7 +189,8 @@ void CNNv2Effect::create_pipelines() {
// Static features compute pipeline
size_t shader_size = 0;
- const char* static_code = (const char*)GetAsset(AssetId::ASSET_SHADER_CNN_V2_STATIC, &shader_size);
+ const char* static_code =
+ (const char*)GetAsset(AssetId::ASSET_SHADER_CNN_V2_STATIC, &shader_size);
if (!static_code || shader_size == 0) {
// Shader not available (e.g., in test mode) - skip pipeline creation
@@ -210,7 +205,8 @@ void CNNv2Effect::create_pipelines() {
shader_desc.nextInChain = &wgsl_src.chain;
// Create bind group layout for static features compute
- // Bindings: 0=input_tex, 1=input_mip1, 2=input_mip2, 3=depth_tex, 4=output, 5=params, 6=linear_sampler
+ // Bindings: 0=input_tex, 1=input_mip1, 2=input_mip2, 3=depth_tex, 4=output,
+ // 5=params, 6=linear_sampler
WGPUBindGroupLayout static_bgl =
BindGroupLayoutBuilder()
.texture(0, WGPUShaderStage_Compute)
@@ -227,28 +223,35 @@ void CNNv2Effect::create_pipelines() {
WGPUPipelineLayoutDescriptor pl_desc = {};
pl_desc.bindGroupLayoutCount = 1;
pl_desc.bindGroupLayouts = &static_bgl;
- WGPUPipelineLayout pipeline_layout = wgpuDeviceCreatePipelineLayout(ctx_.device, &pl_desc);
+ WGPUPipelineLayout pipeline_layout =
+ wgpuDeviceCreatePipelineLayout(ctx_.device, &pl_desc);
// Recreate pipeline with proper layout
WGPUComputePipelineDescriptor pipeline_desc2 = {};
- pipeline_desc2.compute.module = wgpuDeviceCreateShaderModule(ctx_.device, &shader_desc);
+ pipeline_desc2.compute.module =
+ wgpuDeviceCreateShaderModule(ctx_.device, &shader_desc);
pipeline_desc2.compute.entryPoint = str_view("main");
pipeline_desc2.layout = pipeline_layout;
- if (static_pipeline_) wgpuComputePipelineRelease(static_pipeline_);
- static_pipeline_ = wgpuDeviceCreateComputePipeline(ctx_.device, &pipeline_desc2);
+ if (static_pipeline_)
+ wgpuComputePipelineRelease(static_pipeline_);
+ static_pipeline_ =
+ wgpuDeviceCreateComputePipeline(ctx_.device, &pipeline_desc2);
wgpuShaderModuleRelease(pipeline_desc2.compute.module);
wgpuPipelineLayoutRelease(pipeline_layout);
wgpuBindGroupLayoutRelease(static_bgl);
// CNN layer compute pipeline (storage buffer version)
- if (layer_info_.empty()) return; // No weights loaded
+ if (layer_info_.empty())
+ return; // No weights loaded
size_t layer_shader_size = 0;
- const char* layer_code = (const char*)GetAsset(AssetId::ASSET_SHADER_CNN_V2_COMPUTE, &layer_shader_size);
+ const char* layer_code = (const char*)GetAsset(
+ AssetId::ASSET_SHADER_CNN_V2_COMPUTE, &layer_shader_size);
- if (!layer_code || layer_shader_size == 0) return;
+ if (!layer_code || layer_shader_size == 0)
+ return;
WGPUShaderSourceWGSL layer_wgsl = {};
layer_wgsl.chain.sType = WGPUSType_ShaderSourceWGSL;
@@ -257,11 +260,14 @@ void CNNv2Effect::create_pipelines() {
WGPUShaderModuleDescriptor layer_shader_desc = {};
layer_shader_desc.nextInChain = &layer_wgsl.chain;
- WGPUShaderModule layer_module = wgpuDeviceCreateShaderModule(ctx_.device, &layer_shader_desc);
- if (!layer_module) return;
+ WGPUShaderModule layer_module =
+ wgpuDeviceCreateShaderModule(ctx_.device, &layer_shader_desc);
+ if (!layer_module)
+ return;
// Create bind group layout for layer compute
- // 0=static_features, 1=layer_input, 2=output, 3=weights, 4=params, 5=original_input
+ // 0=static_features, 1=layer_input, 2=output, 3=weights, 4=params,
+ // 5=original_input
WGPUBindGroupLayout layer_bgl =
BindGroupLayoutBuilder()
.uint_texture(0, WGPUShaderStage_Compute)
@@ -277,14 +283,16 @@ void CNNv2Effect::create_pipelines() {
layer_pl_desc.bindGroupLayoutCount = 1;
layer_pl_desc.bindGroupLayouts = &layer_bgl;
- WGPUPipelineLayout layer_pipeline_layout = wgpuDeviceCreatePipelineLayout(ctx_.device, &layer_pl_desc);
+ WGPUPipelineLayout layer_pipeline_layout =
+ wgpuDeviceCreatePipelineLayout(ctx_.device, &layer_pl_desc);
WGPUComputePipelineDescriptor layer_pipeline_desc = {};
layer_pipeline_desc.compute.module = layer_module;
layer_pipeline_desc.compute.entryPoint = str_view("main");
layer_pipeline_desc.layout = layer_pipeline_layout;
- layer_pipeline_ = wgpuDeviceCreateComputePipeline(ctx_.device, &layer_pipeline_desc);
+ layer_pipeline_ =
+ wgpuDeviceCreateComputePipeline(ctx_.device, &layer_pipeline_desc);
wgpuShaderModuleRelease(layer_module);
wgpuPipelineLayoutRelease(layer_pipeline_layout);
@@ -292,7 +300,8 @@ void CNNv2Effect::create_pipelines() {
}
void CNNv2Effect::update_bind_group(WGPUTextureView input_view) {
- if (!static_pipeline_) return;
+ if (!static_pipeline_)
+ return;
// Cache input view
current_input_view_ = input_view;
@@ -303,7 +312,8 @@ void CNNv2Effect::update_bind_group(WGPUTextureView input_view) {
static_bind_group_ = nullptr;
}
- // Create bind group for static features compute (manual for storage texture binding)
+ // Create bind group for static features compute (manual for storage texture
+ // binding)
WGPUBindGroupEntry bg_entries[7] = {};
bg_entries[0].binding = 0;
bg_entries[0].textureView = input_view;
@@ -332,7 +342,8 @@ void CNNv2Effect::update_bind_group(WGPUTextureView input_view) {
wgpuBindGroupLayoutRelease(layout);
// Create layer bind groups
- if (!layer_pipeline_ || layer_info_.empty()) return;
+ if (!layer_pipeline_ || layer_info_.empty())
+ return;
// Release old layer bind groups
for (auto bg : layer_bind_groups_) {
@@ -341,7 +352,8 @@ void CNNv2Effect::update_bind_group(WGPUTextureView input_view) {
layer_bind_groups_.clear();
// Get bind group layout from layer pipeline
- WGPUBindGroupLayout layer_bgl = wgpuComputePipelineGetBindGroupLayout(layer_pipeline_, 0);
+ WGPUBindGroupLayout layer_bgl =
+ wgpuComputePipelineGetBindGroupLayout(layer_pipeline_, 0);
// Create bind group for each layer
for (size_t i = 0; i < layer_info_.size(); ++i) {
@@ -366,7 +378,8 @@ void CNNv2Effect::update_bind_group(WGPUTextureView input_view) {
void CNNv2Effect::compute(WGPUCommandEncoder encoder,
const CommonPostProcessUniforms& uniforms) {
- if (!initialized_ || !static_pipeline_ || !static_bind_group_) return;
+ if (!initialized_ || !static_pipeline_ || !static_bind_group_)
+ return;
float effective_blend = blend_amount_;
if (beat_modulated_) {
@@ -379,10 +392,12 @@ void CNNv2Effect::compute(WGPUCommandEncoder encoder,
static_params.padding[0] = 0;
static_params.padding[1] = 0;
static_params.padding[2] = 0;
- wgpuQueueWriteBuffer(ctx_.queue, static_params_buffer_, 0, &static_params, sizeof(static_params));
+ wgpuQueueWriteBuffer(ctx_.queue, static_params_buffer_, 0, &static_params,
+ sizeof(static_params));
// Pass 1: Compute static features
- WGPUComputePassEncoder pass = wgpuCommandEncoderBeginComputePass(encoder, nullptr);
+ WGPUComputePassEncoder pass =
+ wgpuCommandEncoderBeginComputePass(encoder, nullptr);
wgpuComputePassEncoderSetPipeline(pass, static_pipeline_);
wgpuComputePassEncoderSetBindGroup(pass, 0, static_bind_group_, 0, nullptr);
@@ -396,7 +411,8 @@ void CNNv2Effect::compute(WGPUCommandEncoder encoder,
wgpuComputePassEncoderRelease(pass);
// Execute CNN layer passes
- if (!layer_pipeline_ || layer_bind_groups_.empty()) return;
+ if (!layer_pipeline_ || layer_bind_groups_.empty())
+ return;
// Update layer params (each layer has own buffer)
for (size_t i = 0; i < layer_info_.size(); ++i) {
@@ -411,14 +427,18 @@ void CNNv2Effect::compute(WGPUCommandEncoder encoder,
params.blend_amount = effective_blend;
params.is_layer_0 = (i == 0) ? 1 : 0;
- wgpuQueueWriteBuffer(ctx_.queue, layer_params_buffers_[i], 0, &params, sizeof(params));
+ wgpuQueueWriteBuffer(ctx_.queue, layer_params_buffers_[i], 0, &params,
+ sizeof(params));
- WGPUComputePassEncoder layer_pass = wgpuCommandEncoderBeginComputePass(encoder, nullptr);
+ WGPUComputePassEncoder layer_pass =
+ wgpuCommandEncoderBeginComputePass(encoder, nullptr);
wgpuComputePassEncoderSetPipeline(layer_pass, layer_pipeline_);
- wgpuComputePassEncoderSetBindGroup(layer_pass, 0, layer_bind_groups_[i], 0, nullptr);
+ wgpuComputePassEncoderSetBindGroup(layer_pass, 0, layer_bind_groups_[i], 0,
+ nullptr);
- wgpuComputePassEncoderDispatchWorkgroups(layer_pass, workgroups_x, workgroups_y, 1);
+ wgpuComputePassEncoderDispatchWorkgroups(layer_pass, workgroups_x,
+ workgroups_y, 1);
wgpuComputePassEncoderEnd(layer_pass);
wgpuComputePassEncoderRelease(layer_pass);
@@ -433,26 +453,40 @@ void CNNv2Effect::render(WGPURenderPassEncoder pass,
}
void CNNv2Effect::cleanup() {
- if (static_features_view_) wgpuTextureViewRelease(static_features_view_);
- if (static_features_tex_) wgpuTextureRelease(static_features_tex_);
- if (static_bind_group_) wgpuBindGroupRelease(static_bind_group_);
- if (static_params_buffer_) wgpuBufferRelease(static_params_buffer_);
- if (static_pipeline_) wgpuComputePipelineRelease(static_pipeline_);
- if (linear_sampler_) wgpuSamplerRelease(linear_sampler_);
+ if (static_features_view_)
+ wgpuTextureViewRelease(static_features_view_);
+ if (static_features_tex_)
+ wgpuTextureRelease(static_features_tex_);
+ if (static_bind_group_)
+ wgpuBindGroupRelease(static_bind_group_);
+ if (static_params_buffer_)
+ wgpuBufferRelease(static_params_buffer_);
+ if (static_pipeline_)
+ wgpuComputePipelineRelease(static_pipeline_);
+ if (linear_sampler_)
+ wgpuSamplerRelease(linear_sampler_);
- if (layer_pipeline_) wgpuComputePipelineRelease(layer_pipeline_);
- if (weights_buffer_) wgpuBufferRelease(weights_buffer_);
- for (auto buf : layer_params_buffers_) wgpuBufferRelease(buf);
+ if (layer_pipeline_)
+ wgpuComputePipelineRelease(layer_pipeline_);
+ if (weights_buffer_)
+ wgpuBufferRelease(weights_buffer_);
+ for (auto buf : layer_params_buffers_)
+ wgpuBufferRelease(buf);
layer_params_buffers_.clear();
for (int i = 0; i < 3; ++i) {
- if (input_mip_view_[i]) wgpuTextureViewRelease(input_mip_view_[i]);
+ if (input_mip_view_[i])
+ wgpuTextureViewRelease(input_mip_view_[i]);
}
- if (input_mip_tex_) wgpuTextureRelease(input_mip_tex_);
+ if (input_mip_tex_)
+ wgpuTextureRelease(input_mip_tex_);
- for (auto view : layer_views_) wgpuTextureViewRelease(view);
- for (auto tex : layer_textures_) wgpuTextureRelease(tex);
- for (auto bg : layer_bind_groups_) wgpuBindGroupRelease(bg);
+ for (auto view : layer_views_)
+ wgpuTextureViewRelease(view);
+ for (auto tex : layer_textures_)
+ wgpuTextureRelease(tex);
+ for (auto bg : layer_bind_groups_)
+ wgpuBindGroupRelease(bg);
layer_views_.clear();
layer_textures_.clear();