diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/gpu/effects/cnn_v2_effect.cc | 27 | ||||
| -rw-r--r-- | src/gpu/effects/cnn_v2_effect.h | 1 |
2 files changed, 11 insertions, 17 deletions
diff --git a/src/gpu/effects/cnn_v2_effect.cc b/src/gpu/effects/cnn_v2_effect.cc index 566686e..366a232 100644 --- a/src/gpu/effects/cnn_v2_effect.cc +++ b/src/gpu/effects/cnn_v2_effect.cc @@ -111,17 +111,21 @@ void CNNv2Effect::load_weights() { layer_info_.push_back(info); } - // Create GPU storage buffer for weights - // Buffer contains: header + layer info + packed f16 weights (as u32) + // Create GPU storage buffer for weights (skip header + layer info, upload only weights) + size_t header_size = 20; // 5 u32 + size_t layer_info_size = 20 * num_layers; // 5 u32 per layer + size_t weights_offset = header_size + layer_info_size; + size_t weights_only_size = weights_size - weights_offset; + WGPUBufferDescriptor buffer_desc = {}; - buffer_desc.size = weights_size; + buffer_desc.size = weights_only_size; buffer_desc.usage = WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst; buffer_desc.mappedAtCreation = false; weights_buffer_ = wgpuDeviceCreateBuffer(ctx_.device, &buffer_desc); - // Upload weights data - wgpuQueueWriteBuffer(ctx_.queue, weights_buffer_, 0, weights_data, weights_size); + // Upload only weights (skip header + layer info) + wgpuQueueWriteBuffer(ctx_.queue, weights_buffer_, 0, weights_data + weights_offset, weights_only_size); // Create uniform buffers for layer params (one per layer) for (uint32_t i = 0; i < num_layers; ++i) { @@ -233,18 +237,6 @@ void CNNv2Effect::create_pipelines() { WGPUShaderModuleDescriptor shader_desc = {}; shader_desc.nextInChain = &wgsl_src.chain; - WGPUShaderModule static_module = wgpuDeviceCreateShaderModule(ctx_.device, &shader_desc); - if (!static_module) { - return; - } - - WGPUComputePipelineDescriptor pipeline_desc = {}; - pipeline_desc.compute.module = static_module; - pipeline_desc.compute.entryPoint = str_view("main"); - - static_pipeline_ = wgpuDeviceCreateComputePipeline(ctx_.device, &pipeline_desc); - wgpuShaderModuleRelease(static_module); - // Create bind group layout for static features compute // Bindings: 0=input_tex, 1=input_mip1, 2=input_mip2, 3=depth_tex, 4=output, 5=params WGPUBindGroupLayoutEntry bgl_entries[6] = {}; @@ -542,6 +534,7 @@ void CNNv2Effect::compute(WGPUCommandEncoder encoder, params.weight_offset = info.weight_offset; params.is_output_layer = (i == layer_info_.size() - 1) ? 1 : 0; params.blend_amount = effective_blend; + params.is_layer_0 = (i == 0) ? 1 : 0; wgpuQueueWriteBuffer(ctx_.queue, layer_params_buffers_[i], 0, ¶ms, sizeof(params)); diff --git a/src/gpu/effects/cnn_v2_effect.h b/src/gpu/effects/cnn_v2_effect.h index 47dedf5..8a2e1b6 100644 --- a/src/gpu/effects/cnn_v2_effect.h +++ b/src/gpu/effects/cnn_v2_effect.h @@ -45,6 +45,7 @@ private: uint32_t weight_offset; uint32_t is_output_layer; float blend_amount; + uint32_t is_layer_0; }; struct StaticFeatureParams { |
