summaryrefslogtreecommitdiff
path: root/src/gpu/effects/cnn_v2_effect.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/gpu/effects/cnn_v2_effect.cc')
-rw-r--r--src/gpu/effects/cnn_v2_effect.cc163
1 files changed, 152 insertions, 11 deletions
diff --git a/src/gpu/effects/cnn_v2_effect.cc b/src/gpu/effects/cnn_v2_effect.cc
index 04fa74e..b425aba 100644
--- a/src/gpu/effects/cnn_v2_effect.cc
+++ b/src/gpu/effects/cnn_v2_effect.cc
@@ -19,6 +19,7 @@ CNNv2Effect::CNNv2Effect(const GpuContext& ctx)
static_features_tex_(nullptr),
static_features_view_(nullptr),
input_mip_tex_(nullptr),
+ current_input_view_(nullptr),
initialized_(false) {
std::memset(input_mip_view_, 0, sizeof(input_mip_view_));
}
@@ -91,8 +92,31 @@ void CNNv2Effect::create_textures() {
input_mip_view_[i] = wgpuTextureCreateView(input_mip_tex_, &mip_view_desc);
}
- // Layer textures (placeholder - will be created based on config)
- // TODO: Create layer textures based on layer_configs_
+ // Create 2 layer textures (ping-pong buffers for intermediate results)
+ // Each stores 8×f16 channels packed as 4×u32
+ for (int i = 0; i < 2; ++i) {
+ WGPUTextureDescriptor layer_desc = {};
+ layer_desc.usage = WGPUTextureUsage_StorageBinding | WGPUTextureUsage_TextureBinding;
+ layer_desc.dimension = WGPUTextureDimension_2D;
+ layer_desc.size = size;
+ layer_desc.format = WGPUTextureFormat_RGBA32Uint;
+ layer_desc.mipLevelCount = 1;
+ layer_desc.sampleCount = 1;
+
+ WGPUTexture tex = wgpuDeviceCreateTexture(ctx_.device, &layer_desc);
+ layer_textures_.push_back(tex);
+
+ WGPUTextureViewDescriptor view_desc = {};
+ view_desc.format = WGPUTextureFormat_RGBA32Uint;
+ view_desc.dimension = WGPUTextureViewDimension_2D;
+ view_desc.baseMipLevel = 0;
+ view_desc.mipLevelCount = 1;
+ view_desc.baseArrayLayer = 0;
+ view_desc.arrayLayerCount = 1;
+
+ WGPUTextureView view = wgpuTextureCreateView(tex, &view_desc);
+ layer_views_.push_back(view);
+ }
}
void CNNv2Effect::create_pipelines() {
@@ -124,25 +148,142 @@ void CNNv2Effect::create_pipelines() {
static_pipeline_ = wgpuDeviceCreateComputePipeline(ctx_.device, &pipeline_desc);
wgpuShaderModuleRelease(static_module);
+ // Create bind group layout for static features compute
+ // Bindings: 0=input_tex, 1=input_mip1, 2=input_mip2, 3=depth_tex, 4=output
+ WGPUBindGroupLayoutEntry bgl_entries[5] = {};
+
+ // Binding 0: Input texture (mip 0)
+ bgl_entries[0].binding = 0;
+ bgl_entries[0].visibility = WGPUShaderStage_Compute;
+ bgl_entries[0].texture.sampleType = WGPUTextureSampleType_Float;
+ bgl_entries[0].texture.viewDimension = WGPUTextureViewDimension_2D;
+
+ // Binding 1: Input texture (mip 1)
+ bgl_entries[1].binding = 1;
+ bgl_entries[1].visibility = WGPUShaderStage_Compute;
+ bgl_entries[1].texture.sampleType = WGPUTextureSampleType_Float;
+ bgl_entries[1].texture.viewDimension = WGPUTextureViewDimension_2D;
+
+ // Binding 2: Input texture (mip 2)
+ bgl_entries[2].binding = 2;
+ bgl_entries[2].visibility = WGPUShaderStage_Compute;
+ bgl_entries[2].texture.sampleType = WGPUTextureSampleType_Float;
+ bgl_entries[2].texture.viewDimension = WGPUTextureViewDimension_2D;
+
+ // Binding 3: Depth texture
+ bgl_entries[3].binding = 3;
+ bgl_entries[3].visibility = WGPUShaderStage_Compute;
+ bgl_entries[3].texture.sampleType = WGPUTextureSampleType_Float;
+ bgl_entries[3].texture.viewDimension = WGPUTextureViewDimension_2D;
+
+ // Binding 4: Output (static features)
+ bgl_entries[4].binding = 4;
+ bgl_entries[4].visibility = WGPUShaderStage_Compute;
+ bgl_entries[4].storageTexture.access = WGPUStorageTextureAccess_WriteOnly;
+ bgl_entries[4].storageTexture.format = WGPUTextureFormat_RGBA32Uint;
+ bgl_entries[4].storageTexture.viewDimension = WGPUTextureViewDimension_2D;
+
+ WGPUBindGroupLayoutDescriptor bgl_desc = {};
+ bgl_desc.entryCount = 5;
+ bgl_desc.entries = bgl_entries;
+
+ WGPUBindGroupLayout static_bgl = wgpuDeviceCreateBindGroupLayout(ctx_.device, &bgl_desc);
+
+ // Update pipeline layout
+ WGPUPipelineLayoutDescriptor pl_desc = {};
+ pl_desc.bindGroupLayoutCount = 1;
+ pl_desc.bindGroupLayouts = &static_bgl;
+ WGPUPipelineLayout pipeline_layout = wgpuDeviceCreatePipelineLayout(ctx_.device, &pl_desc);
+
+ // Recreate pipeline with proper layout
+ WGPUComputePipelineDescriptor pipeline_desc2 = {};
+ pipeline_desc2.compute.module = wgpuDeviceCreateShaderModule(ctx_.device, &shader_desc);
+ pipeline_desc2.compute.entryPoint = str_view("main");
+ pipeline_desc2.layout = pipeline_layout;
+
+ if (static_pipeline_) wgpuComputePipelineRelease(static_pipeline_);
+ static_pipeline_ = wgpuDeviceCreateComputePipeline(ctx_.device, &pipeline_desc2);
+
+ wgpuShaderModuleRelease(pipeline_desc2.compute.module);
+ wgpuPipelineLayoutRelease(pipeline_layout);
+ wgpuBindGroupLayoutRelease(static_bgl);
+
+ // Bind group will be created in update_bind_group()
// TODO: Create layer pipelines
- // TODO: Create bind groups
}
void CNNv2Effect::update_bind_group(WGPUTextureView input_view) {
- (void)input_view;
- // TODO: Create bind groups for static features and layers
+ if (!static_pipeline_) return;
+
+ // Cache input view
+ current_input_view_ = input_view;
+
+ // Release old bind group
+ if (static_bind_group_) {
+ wgpuBindGroupRelease(static_bind_group_);
+ static_bind_group_ = nullptr;
+ }
+
+ // Create bind group for static features compute
+ WGPUBindGroupEntry bg_entries[5] = {};
+
+ // Binding 0: Input (mip 0)
+ bg_entries[0].binding = 0;
+ bg_entries[0].textureView = input_view;
+
+ // Binding 1: Input (mip 1)
+ bg_entries[1].binding = 1;
+ bg_entries[1].textureView = input_mip_view_[0]; // Use mip 0 for now
+
+ // Binding 2: Input (mip 2)
+ bg_entries[2].binding = 2;
+ bg_entries[2].textureView = input_mip_view_[0]; // Use mip 0 for now
+
+ // Binding 3: Depth (use input for now, no depth available)
+ bg_entries[3].binding = 3;
+ bg_entries[3].textureView = input_view;
+
+ // Binding 4: Output (static features)
+ bg_entries[4].binding = 4;
+ bg_entries[4].textureView = static_features_view_;
+
+ WGPUBindGroupDescriptor bg_desc = {};
+ bg_desc.layout = wgpuComputePipelineGetBindGroupLayout(static_pipeline_, 0);
+ bg_desc.entryCount = 5;
+ bg_desc.entries = bg_entries;
+
+ static_bind_group_ = wgpuDeviceCreateBindGroup(ctx_.device, &bg_desc);
+
+ wgpuBindGroupLayoutRelease(bg_desc.layout);
+}
+
+void CNNv2Effect::compute(WGPUCommandEncoder encoder,
+ const CommonPostProcessUniforms& uniforms) {
+ (void)uniforms;
+ if (!initialized_ || !static_pipeline_ || !static_bind_group_) return;
+
+ // Pass 1: Compute static features
+ WGPUComputePassEncoder pass = wgpuCommandEncoderBeginComputePass(encoder, nullptr);
+
+ wgpuComputePassEncoderSetPipeline(pass, static_pipeline_);
+ wgpuComputePassEncoderSetBindGroup(pass, 0, static_bind_group_, 0, nullptr);
+
+ // Dispatch workgroups (8×8 threads per group)
+ uint32_t workgroups_x = (width_ + 7) / 8;
+ uint32_t workgroups_y = (height_ + 7) / 8;
+ wgpuComputePassEncoderDispatchWorkgroups(pass, workgroups_x, workgroups_y, 1);
+
+ wgpuComputePassEncoderEnd(pass);
+ wgpuComputePassEncoderRelease(pass);
+
+ // TODO: Execute CNN layer passes
}
void CNNv2Effect::render(WGPURenderPassEncoder pass,
const CommonPostProcessUniforms& uniforms) {
(void)pass;
(void)uniforms;
- if (!initialized_) return;
-
- // TODO: Multi-pass execution
- // 1. Compute static features
- // 2. Execute CNN layers
- // 3. Composite to output
+ // Compute-only effect, rendering is done by default composite pass
}
void CNNv2Effect::cleanup() {