summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/gpu/effects/cnn_v2_effect.cc14
-rw-r--r--tools/cnn_test.cc13
2 files changed, 18 insertions, 9 deletions
diff --git a/src/gpu/effects/cnn_v2_effect.cc b/src/gpu/effects/cnn_v2_effect.cc
index 3985723..366a232 100644
--- a/src/gpu/effects/cnn_v2_effect.cc
+++ b/src/gpu/effects/cnn_v2_effect.cc
@@ -111,17 +111,21 @@ void CNNv2Effect::load_weights() {
layer_info_.push_back(info);
}
- // Create GPU storage buffer for weights
- // Buffer contains: header + layer info + packed f16 weights (as u32)
+ // Create GPU storage buffer for weights (skip header + layer info, upload only weights)
+ size_t header_size = 20; // 5 u32
+ size_t layer_info_size = 20 * num_layers; // 5 u32 per layer
+ size_t weights_offset = header_size + layer_info_size;
+ size_t weights_only_size = weights_size - weights_offset;
+
WGPUBufferDescriptor buffer_desc = {};
- buffer_desc.size = weights_size;
+ buffer_desc.size = weights_only_size;
buffer_desc.usage = WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst;
buffer_desc.mappedAtCreation = false;
weights_buffer_ = wgpuDeviceCreateBuffer(ctx_.device, &buffer_desc);
- // Upload weights data
- wgpuQueueWriteBuffer(ctx_.queue, weights_buffer_, 0, weights_data, weights_size);
+ // Upload only weights (skip header + layer info)
+ wgpuQueueWriteBuffer(ctx_.queue, weights_buffer_, 0, weights_data + weights_offset, weights_only_size);
// Create uniform buffers for layer params (one per layer)
for (uint32_t i = 0; i < num_layers; ++i) {
diff --git a/tools/cnn_test.cc b/tools/cnn_test.cc
index 4599512..c504c3d 100644
--- a/tools/cnn_test.cc
+++ b/tools/cnn_test.cc
@@ -684,15 +684,20 @@ static bool process_cnn_v2(WGPUDevice device, WGPUQueue queue,
info.out_channels, info.weight_count);
}
- // Create weights storage buffer
+ // Create weights storage buffer (skip header + layer info, upload only weights)
+ size_t header_size = 20; // 5 u32
+ size_t layer_info_size = 20 * layer_info.size(); // 5 u32 per layer
+ size_t weights_offset = header_size + layer_info_size;
+ size_t weights_only_size = weights_size - weights_offset;
+
WGPUBufferDescriptor weights_buffer_desc = {};
- weights_buffer_desc.size = weights_size;
+ weights_buffer_desc.size = weights_only_size;
weights_buffer_desc.usage = WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst;
weights_buffer_desc.mappedAtCreation = false;
WGPUBuffer weights_buffer =
wgpuDeviceCreateBuffer(device, &weights_buffer_desc);
- wgpuQueueWriteBuffer(queue, weights_buffer, 0, weights_data, weights_size);
+ wgpuQueueWriteBuffer(queue, weights_buffer, 0, weights_data + weights_offset, weights_only_size);
// Create input view
const WGPUTextureViewDescriptor view_desc = {
@@ -1051,7 +1056,7 @@ static bool process_cnn_v2(WGPUDevice device, WGPUQueue queue,
layer_bg_entries[3].binding = 3;
layer_bg_entries[3].buffer = weights_buffer;
- layer_bg_entries[3].size = weights_size;
+ layer_bg_entries[3].size = weights_only_size;
layer_bg_entries[4].binding = 4;
layer_bg_entries[4].buffer = layer_params_buffers[i];