summaryrefslogtreecommitdiff
path: root/src/gpu
diff options
context:
space:
mode:
Diffstat (limited to 'src/gpu')
-rw-r--r--src/gpu/effects/cnn_v2_effect.cc27
-rw-r--r--src/gpu/effects/cnn_v2_effect.h1
2 files changed, 11 insertions, 17 deletions
diff --git a/src/gpu/effects/cnn_v2_effect.cc b/src/gpu/effects/cnn_v2_effect.cc
index 566686e..366a232 100644
--- a/src/gpu/effects/cnn_v2_effect.cc
+++ b/src/gpu/effects/cnn_v2_effect.cc
@@ -111,17 +111,21 @@ void CNNv2Effect::load_weights() {
layer_info_.push_back(info);
}
- // Create GPU storage buffer for weights
- // Buffer contains: header + layer info + packed f16 weights (as u32)
+ // Create GPU storage buffer for weights (skip header + layer info, upload only weights)
+ size_t header_size = 20; // 5 u32
+ size_t layer_info_size = 20 * num_layers; // 5 u32 per layer
+ size_t weights_offset = header_size + layer_info_size;
+ size_t weights_only_size = weights_size - weights_offset;
+
WGPUBufferDescriptor buffer_desc = {};
- buffer_desc.size = weights_size;
+ buffer_desc.size = weights_only_size;
buffer_desc.usage = WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst;
buffer_desc.mappedAtCreation = false;
weights_buffer_ = wgpuDeviceCreateBuffer(ctx_.device, &buffer_desc);
- // Upload weights data
- wgpuQueueWriteBuffer(ctx_.queue, weights_buffer_, 0, weights_data, weights_size);
+ // Upload only weights (skip header + layer info)
+ wgpuQueueWriteBuffer(ctx_.queue, weights_buffer_, 0, weights_data + weights_offset, weights_only_size);
// Create uniform buffers for layer params (one per layer)
for (uint32_t i = 0; i < num_layers; ++i) {
@@ -233,18 +237,6 @@ void CNNv2Effect::create_pipelines() {
WGPUShaderModuleDescriptor shader_desc = {};
shader_desc.nextInChain = &wgsl_src.chain;
- WGPUShaderModule static_module = wgpuDeviceCreateShaderModule(ctx_.device, &shader_desc);
- if (!static_module) {
- return;
- }
-
- WGPUComputePipelineDescriptor pipeline_desc = {};
- pipeline_desc.compute.module = static_module;
- pipeline_desc.compute.entryPoint = str_view("main");
-
- static_pipeline_ = wgpuDeviceCreateComputePipeline(ctx_.device, &pipeline_desc);
- wgpuShaderModuleRelease(static_module);
-
// Create bind group layout for static features compute
// Bindings: 0=input_tex, 1=input_mip1, 2=input_mip2, 3=depth_tex, 4=output, 5=params
WGPUBindGroupLayoutEntry bgl_entries[6] = {};
@@ -542,6 +534,7 @@ void CNNv2Effect::compute(WGPUCommandEncoder encoder,
params.weight_offset = info.weight_offset;
params.is_output_layer = (i == layer_info_.size() - 1) ? 1 : 0;
params.blend_amount = effective_blend;
+ params.is_layer_0 = (i == 0) ? 1 : 0;
wgpuQueueWriteBuffer(ctx_.queue, layer_params_buffers_[i], 0, &params, sizeof(params));
diff --git a/src/gpu/effects/cnn_v2_effect.h b/src/gpu/effects/cnn_v2_effect.h
index 47dedf5..8a2e1b6 100644
--- a/src/gpu/effects/cnn_v2_effect.h
+++ b/src/gpu/effects/cnn_v2_effect.h
@@ -45,6 +45,7 @@ private:
uint32_t weight_offset;
uint32_t is_output_layer;
float blend_amount;
+ uint32_t is_layer_0;
};
struct StaticFeatureParams {