diff options
Diffstat (limited to 'src/gpu')
| -rw-r--r-- | src/gpu/effects/cnn_v2_effect.cc | 163 | ||||
| -rw-r--r-- | src/gpu/effects/cnn_v2_effect.h | 3 |
2 files changed, 155 insertions, 11 deletions
diff --git a/src/gpu/effects/cnn_v2_effect.cc b/src/gpu/effects/cnn_v2_effect.cc index 04fa74e..b425aba 100644 --- a/src/gpu/effects/cnn_v2_effect.cc +++ b/src/gpu/effects/cnn_v2_effect.cc @@ -19,6 +19,7 @@ CNNv2Effect::CNNv2Effect(const GpuContext& ctx) static_features_tex_(nullptr), static_features_view_(nullptr), input_mip_tex_(nullptr), + current_input_view_(nullptr), initialized_(false) { std::memset(input_mip_view_, 0, sizeof(input_mip_view_)); } @@ -91,8 +92,31 @@ void CNNv2Effect::create_textures() { input_mip_view_[i] = wgpuTextureCreateView(input_mip_tex_, &mip_view_desc); } - // Layer textures (placeholder - will be created based on config) - // TODO: Create layer textures based on layer_configs_ + // Create 2 layer textures (ping-pong buffers for intermediate results) + // Each stores 8×f16 channels packed as 4×u32 + for (int i = 0; i < 2; ++i) { + WGPUTextureDescriptor layer_desc = {}; + layer_desc.usage = WGPUTextureUsage_StorageBinding | WGPUTextureUsage_TextureBinding; + layer_desc.dimension = WGPUTextureDimension_2D; + layer_desc.size = size; + layer_desc.format = WGPUTextureFormat_RGBA32Uint; + layer_desc.mipLevelCount = 1; + layer_desc.sampleCount = 1; + + WGPUTexture tex = wgpuDeviceCreateTexture(ctx_.device, &layer_desc); + layer_textures_.push_back(tex); + + WGPUTextureViewDescriptor view_desc = {}; + view_desc.format = WGPUTextureFormat_RGBA32Uint; + view_desc.dimension = WGPUTextureViewDimension_2D; + view_desc.baseMipLevel = 0; + view_desc.mipLevelCount = 1; + view_desc.baseArrayLayer = 0; + view_desc.arrayLayerCount = 1; + + WGPUTextureView view = wgpuTextureCreateView(tex, &view_desc); + layer_views_.push_back(view); + } } void CNNv2Effect::create_pipelines() { @@ -124,25 +148,142 @@ void CNNv2Effect::create_pipelines() { static_pipeline_ = wgpuDeviceCreateComputePipeline(ctx_.device, &pipeline_desc); wgpuShaderModuleRelease(static_module); + // Create bind group layout for static features compute + // Bindings: 0=input_tex, 1=input_mip1, 2=input_mip2, 3=depth_tex, 4=output + WGPUBindGroupLayoutEntry bgl_entries[5] = {}; + + // Binding 0: Input texture (mip 0) + bgl_entries[0].binding = 0; + bgl_entries[0].visibility = WGPUShaderStage_Compute; + bgl_entries[0].texture.sampleType = WGPUTextureSampleType_Float; + bgl_entries[0].texture.viewDimension = WGPUTextureViewDimension_2D; + + // Binding 1: Input texture (mip 1) + bgl_entries[1].binding = 1; + bgl_entries[1].visibility = WGPUShaderStage_Compute; + bgl_entries[1].texture.sampleType = WGPUTextureSampleType_Float; + bgl_entries[1].texture.viewDimension = WGPUTextureViewDimension_2D; + + // Binding 2: Input texture (mip 2) + bgl_entries[2].binding = 2; + bgl_entries[2].visibility = WGPUShaderStage_Compute; + bgl_entries[2].texture.sampleType = WGPUTextureSampleType_Float; + bgl_entries[2].texture.viewDimension = WGPUTextureViewDimension_2D; + + // Binding 3: Depth texture + bgl_entries[3].binding = 3; + bgl_entries[3].visibility = WGPUShaderStage_Compute; + bgl_entries[3].texture.sampleType = WGPUTextureSampleType_Float; + bgl_entries[3].texture.viewDimension = WGPUTextureViewDimension_2D; + + // Binding 4: Output (static features) + bgl_entries[4].binding = 4; + bgl_entries[4].visibility = WGPUShaderStage_Compute; + bgl_entries[4].storageTexture.access = WGPUStorageTextureAccess_WriteOnly; + bgl_entries[4].storageTexture.format = WGPUTextureFormat_RGBA32Uint; + bgl_entries[4].storageTexture.viewDimension = WGPUTextureViewDimension_2D; + + WGPUBindGroupLayoutDescriptor bgl_desc = {}; + bgl_desc.entryCount = 5; + bgl_desc.entries = bgl_entries; + + WGPUBindGroupLayout static_bgl = wgpuDeviceCreateBindGroupLayout(ctx_.device, &bgl_desc); + + // Update pipeline layout + WGPUPipelineLayoutDescriptor pl_desc = {}; + pl_desc.bindGroupLayoutCount = 1; + pl_desc.bindGroupLayouts = &static_bgl; + WGPUPipelineLayout pipeline_layout = wgpuDeviceCreatePipelineLayout(ctx_.device, &pl_desc); + + // Recreate pipeline with proper layout + WGPUComputePipelineDescriptor pipeline_desc2 = {}; + pipeline_desc2.compute.module = wgpuDeviceCreateShaderModule(ctx_.device, &shader_desc); + pipeline_desc2.compute.entryPoint = str_view("main"); + pipeline_desc2.layout = pipeline_layout; + + if (static_pipeline_) wgpuComputePipelineRelease(static_pipeline_); + static_pipeline_ = wgpuDeviceCreateComputePipeline(ctx_.device, &pipeline_desc2); + + wgpuShaderModuleRelease(pipeline_desc2.compute.module); + wgpuPipelineLayoutRelease(pipeline_layout); + wgpuBindGroupLayoutRelease(static_bgl); + + // Bind group will be created in update_bind_group() // TODO: Create layer pipelines - // TODO: Create bind groups } void CNNv2Effect::update_bind_group(WGPUTextureView input_view) { - (void)input_view; - // TODO: Create bind groups for static features and layers + if (!static_pipeline_) return; + + // Cache input view + current_input_view_ = input_view; + + // Release old bind group + if (static_bind_group_) { + wgpuBindGroupRelease(static_bind_group_); + static_bind_group_ = nullptr; + } + + // Create bind group for static features compute + WGPUBindGroupEntry bg_entries[5] = {}; + + // Binding 0: Input (mip 0) + bg_entries[0].binding = 0; + bg_entries[0].textureView = input_view; + + // Binding 1: Input (mip 1) + bg_entries[1].binding = 1; + bg_entries[1].textureView = input_mip_view_[0]; // Use mip 0 for now + + // Binding 2: Input (mip 2) + bg_entries[2].binding = 2; + bg_entries[2].textureView = input_mip_view_[0]; // Use mip 0 for now + + // Binding 3: Depth (use input for now, no depth available) + bg_entries[3].binding = 3; + bg_entries[3].textureView = input_view; + + // Binding 4: Output (static features) + bg_entries[4].binding = 4; + bg_entries[4].textureView = static_features_view_; + + WGPUBindGroupDescriptor bg_desc = {}; + bg_desc.layout = wgpuComputePipelineGetBindGroupLayout(static_pipeline_, 0); + bg_desc.entryCount = 5; + bg_desc.entries = bg_entries; + + static_bind_group_ = wgpuDeviceCreateBindGroup(ctx_.device, &bg_desc); + + wgpuBindGroupLayoutRelease(bg_desc.layout); +} + +void CNNv2Effect::compute(WGPUCommandEncoder encoder, + const CommonPostProcessUniforms& uniforms) { + (void)uniforms; + if (!initialized_ || !static_pipeline_ || !static_bind_group_) return; + + // Pass 1: Compute static features + WGPUComputePassEncoder pass = wgpuCommandEncoderBeginComputePass(encoder, nullptr); + + wgpuComputePassEncoderSetPipeline(pass, static_pipeline_); + wgpuComputePassEncoderSetBindGroup(pass, 0, static_bind_group_, 0, nullptr); + + // Dispatch workgroups (8×8 threads per group) + uint32_t workgroups_x = (width_ + 7) / 8; + uint32_t workgroups_y = (height_ + 7) / 8; + wgpuComputePassEncoderDispatchWorkgroups(pass, workgroups_x, workgroups_y, 1); + + wgpuComputePassEncoderEnd(pass); + wgpuComputePassEncoderRelease(pass); + + // TODO: Execute CNN layer passes } void CNNv2Effect::render(WGPURenderPassEncoder pass, const CommonPostProcessUniforms& uniforms) { (void)pass; (void)uniforms; - if (!initialized_) return; - - // TODO: Multi-pass execution - // 1. Compute static features - // 2. Execute CNN layers - // 3. Composite to output + // Compute-only effect, rendering is done by default composite pass } void CNNv2Effect::cleanup() { diff --git a/src/gpu/effects/cnn_v2_effect.h b/src/gpu/effects/cnn_v2_effect.h index edf301e..facf4c3 100644 --- a/src/gpu/effects/cnn_v2_effect.h +++ b/src/gpu/effects/cnn_v2_effect.h @@ -12,6 +12,8 @@ public: void init(MainSequence* demo) override; void resize(int width, int height) override; + void compute(WGPUCommandEncoder encoder, + const CommonPostProcessUniforms& uniforms) override; void render(WGPURenderPassEncoder pass, const CommonPostProcessUniforms& uniforms) override; void update_bind_group(WGPUTextureView input_view) override; @@ -36,6 +38,7 @@ private: // Input mips WGPUTexture input_mip_tex_; WGPUTextureView input_mip_view_[3]; + WGPUTextureView current_input_view_; // Cached input from update_bind_group bool initialized_; }; |
