diff options
| author | skal <pascal.massimino@gmail.com> | 2026-02-12 12:08:22 +0100 |
|---|---|---|
| committer | skal <pascal.massimino@gmail.com> | 2026-02-12 12:08:22 +0100 |
| commit | 4d87a6d781c3f159d216f4cd9251e3d7bd63554f (patch) | |
| tree | 61bb4ee18b1c981cee789b215adf73860138d6c2 /src/gpu | |
| parent | 4cbf571a0087020bedf3c565483f94bc795ed4c4 (diff) | |
CNN v2: storage buffer architecture foundation
- Add binary weight format (header + layer info + packed f16)
- New export_cnn_v2_weights.py for binary weight export
- Single cnn_v2_compute.wgsl shader with storage buffer
- Load weights in CNNv2Effect::load_weights()
- Create layer compute pipeline with 5 bindings
- Fast training config: 100 epochs, 3×3 kernels, 8→4→4 channels
Next: Complete bind group creation and multi-layer compute execution
Diffstat (limited to 'src/gpu')
| -rw-r--r-- | src/gpu/effects/cnn_v2_effect.cc | 140 | ||||
| -rw-r--r-- | src/gpu/effects/cnn_v2_effect.h | 30 |
2 files changed, 161 insertions, 9 deletions
diff --git a/src/gpu/effects/cnn_v2_effect.cc b/src/gpu/effects/cnn_v2_effect.cc index b425aba..275af68 100644 --- a/src/gpu/effects/cnn_v2_effect.cc +++ b/src/gpu/effects/cnn_v2_effect.cc @@ -18,6 +18,9 @@ CNNv2Effect::CNNv2Effect(const GpuContext& ctx) static_bind_group_(nullptr), static_features_tex_(nullptr), static_features_view_(nullptr), + layer_pipeline_(nullptr), + weights_buffer_(nullptr), + layer_params_buffer_(nullptr), input_mip_tex_(nullptr), current_input_view_(nullptr), initialized_(false) { @@ -32,6 +35,7 @@ void CNNv2Effect::init(MainSequence* demo) { (void)demo; if (initialized_) return; + load_weights(); create_textures(); create_pipelines(); @@ -45,6 +49,59 @@ void CNNv2Effect::resize(int width, int height) { create_pipelines(); } +void CNNv2Effect::load_weights() { + // Load binary weights asset + size_t weights_size = 0; + const uint8_t* weights_data = (const uint8_t*)GetAsset(AssetId::ASSET_WEIGHTS_CNN_V2, &weights_size); + + if (!weights_data || weights_size < 16) { + // Weights not available - effect will skip + return; + } + + // Parse header (16 bytes) + const uint32_t* header = (const uint32_t*)weights_data; + uint32_t magic = header[0]; + uint32_t version = header[1]; + uint32_t num_layers = header[2]; + uint32_t total_weights = header[3]; + + FATAL_CHECK(magic == 0x324e4e43, "Invalid CNN v2 weights magic\n"); // 'CNN2' + FATAL_CHECK(version == 1, "Unsupported CNN v2 weights version\n"); + + // Parse layer info (20 bytes per layer) + const uint32_t* layer_data = header + 4; + for (uint32_t i = 0; i < num_layers; ++i) { + LayerInfo info; + info.kernel_size = layer_data[i * 5 + 0]; + info.in_channels = layer_data[i * 5 + 1]; + info.out_channels = layer_data[i * 5 + 2]; + info.weight_offset = layer_data[i * 5 + 3]; + info.weight_count = layer_data[i * 5 + 4]; + layer_info_.push_back(info); + } + + // Create GPU storage buffer for weights + // Buffer contains: header + layer info + packed f16 weights (as u32) + WGPUBufferDescriptor buffer_desc = {}; + buffer_desc.size = weights_size; + buffer_desc.usage = WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst; + buffer_desc.mappedAtCreation = false; + + weights_buffer_ = wgpuDeviceCreateBuffer(ctx_.device, &buffer_desc); + + // Upload weights data + wgpuQueueWriteBuffer(ctx_.queue, weights_buffer_, 0, weights_data, weights_size); + + // Create uniform buffer for layer params + WGPUBufferDescriptor params_desc = {}; + params_desc.size = sizeof(LayerParams); + params_desc.usage = WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst; + params_desc.mappedAtCreation = false; + + layer_params_buffer_ = wgpuDeviceCreateBuffer(ctx_.device, ¶ms_desc); +} + void CNNv2Effect::create_textures() { const WGPUExtent3D size = { static_cast<uint32_t>(width_), @@ -208,8 +265,80 @@ void CNNv2Effect::create_pipelines() { wgpuPipelineLayoutRelease(pipeline_layout); wgpuBindGroupLayoutRelease(static_bgl); - // Bind group will be created in update_bind_group() - // TODO: Create layer pipelines + // CNN layer compute pipeline (storage buffer version) + if (layer_info_.empty()) return; // No weights loaded + + size_t layer_shader_size = 0; + const char* layer_code = (const char*)GetAsset(AssetId::ASSET_SHADER_CNN_V2_COMPUTE, &layer_shader_size); + + if (!layer_code || layer_shader_size == 0) return; + + WGPUShaderSourceWGSL layer_wgsl = {}; + layer_wgsl.chain.sType = WGPUSType_ShaderSourceWGSL; + layer_wgsl.code = str_view(layer_code); + + WGPUShaderModuleDescriptor layer_shader_desc = {}; + layer_shader_desc.nextInChain = &layer_wgsl.chain; + + WGPUShaderModule layer_module = wgpuDeviceCreateShaderModule(ctx_.device, &layer_shader_desc); + if (!layer_module) return; + + // Create bind group layout for layer compute + // 0=static_features, 1=layer_input, 2=output, 3=weights, 4=params + WGPUBindGroupLayoutEntry layer_bgl_entries[5] = {}; + + // Binding 0: Static features (texture) + layer_bgl_entries[0].binding = 0; + layer_bgl_entries[0].visibility = WGPUShaderStage_Compute; + layer_bgl_entries[0].texture.sampleType = WGPUTextureSampleType_Uint; + layer_bgl_entries[0].texture.viewDimension = WGPUTextureViewDimension_2D; + + // Binding 1: Layer input (texture) + layer_bgl_entries[1].binding = 1; + layer_bgl_entries[1].visibility = WGPUShaderStage_Compute; + layer_bgl_entries[1].texture.sampleType = WGPUTextureSampleType_Uint; + layer_bgl_entries[1].texture.viewDimension = WGPUTextureViewDimension_2D; + + // Binding 2: Output (storage texture) + layer_bgl_entries[2].binding = 2; + layer_bgl_entries[2].visibility = WGPUShaderStage_Compute; + layer_bgl_entries[2].storageTexture.access = WGPUStorageTextureAccess_WriteOnly; + layer_bgl_entries[2].storageTexture.format = WGPUTextureFormat_RGBA32Uint; + layer_bgl_entries[2].storageTexture.viewDimension = WGPUTextureViewDimension_2D; + + // Binding 3: Weights (storage buffer) + layer_bgl_entries[3].binding = 3; + layer_bgl_entries[3].visibility = WGPUShaderStage_Compute; + layer_bgl_entries[3].buffer.type = WGPUBufferBindingType_ReadOnlyStorage; + + // Binding 4: Layer params (uniform buffer) + layer_bgl_entries[4].binding = 4; + layer_bgl_entries[4].visibility = WGPUShaderStage_Compute; + layer_bgl_entries[4].buffer.type = WGPUBufferBindingType_Uniform; + layer_bgl_entries[4].buffer.minBindingSize = sizeof(LayerParams); + + WGPUBindGroupLayoutDescriptor layer_bgl_desc = {}; + layer_bgl_desc.entryCount = 5; + layer_bgl_desc.entries = layer_bgl_entries; + + WGPUBindGroupLayout layer_bgl = wgpuDeviceCreateBindGroupLayout(ctx_.device, &layer_bgl_desc); + + WGPUPipelineLayoutDescriptor layer_pl_desc = {}; + layer_pl_desc.bindGroupLayoutCount = 1; + layer_pl_desc.bindGroupLayouts = &layer_bgl; + + WGPUPipelineLayout layer_pipeline_layout = wgpuDeviceCreatePipelineLayout(ctx_.device, &layer_pl_desc); + + WGPUComputePipelineDescriptor layer_pipeline_desc = {}; + layer_pipeline_desc.compute.module = layer_module; + layer_pipeline_desc.compute.entryPoint = str_view("main"); + layer_pipeline_desc.layout = layer_pipeline_layout; + + layer_pipeline_ = wgpuDeviceCreateComputePipeline(ctx_.device, &layer_pipeline_desc); + + wgpuShaderModuleRelease(layer_module); + wgpuPipelineLayoutRelease(layer_pipeline_layout); + wgpuBindGroupLayoutRelease(layer_bgl); } void CNNv2Effect::update_bind_group(WGPUTextureView input_view) { @@ -292,6 +421,10 @@ void CNNv2Effect::cleanup() { if (static_bind_group_) wgpuBindGroupRelease(static_bind_group_); if (static_pipeline_) wgpuComputePipelineRelease(static_pipeline_); + if (layer_pipeline_) wgpuComputePipelineRelease(layer_pipeline_); + if (weights_buffer_) wgpuBufferRelease(weights_buffer_); + if (layer_params_buffer_) wgpuBufferRelease(layer_params_buffer_); + for (int i = 0; i < 3; ++i) { if (input_mip_view_[i]) wgpuTextureViewRelease(input_mip_view_[i]); } @@ -300,12 +433,11 @@ void CNNv2Effect::cleanup() { for (auto view : layer_views_) wgpuTextureViewRelease(view); for (auto tex : layer_textures_) wgpuTextureRelease(tex); for (auto bg : layer_bind_groups_) wgpuBindGroupRelease(bg); - for (auto pipeline : layer_pipelines_) wgpuComputePipelineRelease(pipeline); layer_views_.clear(); layer_textures_.clear(); layer_bind_groups_.clear(); - layer_pipelines_.clear(); + layer_info_.clear(); initialized_ = false; } diff --git a/src/gpu/effects/cnn_v2_effect.h b/src/gpu/effects/cnn_v2_effect.h index facf4c3..6005cf5 100644 --- a/src/gpu/effects/cnn_v2_effect.h +++ b/src/gpu/effects/cnn_v2_effect.h @@ -19,8 +19,25 @@ public: void update_bind_group(WGPUTextureView input_view) override; private: + struct LayerInfo { + uint32_t kernel_size; + uint32_t in_channels; + uint32_t out_channels; + uint32_t weight_offset; + uint32_t weight_count; + }; + + struct LayerParams { + uint32_t kernel_size; + uint32_t in_channels; + uint32_t out_channels; + uint32_t weight_offset; + uint32_t is_output_layer; + }; + void create_textures(); void create_pipelines(); + void load_weights(); void cleanup(); // Static features compute @@ -29,16 +46,19 @@ private: WGPUTexture static_features_tex_; WGPUTextureView static_features_view_; - // CNN layers (opaque implementation) - std::vector<WGPUComputePipeline> layer_pipelines_; - std::vector<WGPUBindGroup> layer_bind_groups_; - std::vector<WGPUTexture> layer_textures_; + // CNN layers (storage buffer architecture) + WGPUComputePipeline layer_pipeline_; // Single pipeline for all layers + WGPUBuffer weights_buffer_; // Storage buffer for weights + WGPUBuffer layer_params_buffer_; // Uniform buffer for per-layer params + std::vector<LayerInfo> layer_info_; // Layer metadata + std::vector<WGPUBindGroup> layer_bind_groups_; // Per-layer bind groups + std::vector<WGPUTexture> layer_textures_; // Ping-pong buffers std::vector<WGPUTextureView> layer_views_; // Input mips WGPUTexture input_mip_tex_; WGPUTextureView input_mip_view_[3]; - WGPUTextureView current_input_view_; // Cached input from update_bind_group + WGPUTextureView current_input_view_; bool initialized_; }; |
