// CNN v2 Effect Implementation #include "gpu/effects/cnn_v2_effect.h" #if defined(USE_TEST_ASSETS) #include "test_assets.h" #else #include "generated/assets.h" #endif #include "util/asset_manager.h" #include "util/fatal_error.h" #include CNNv2Effect::CNNv2Effect(const GpuContext& ctx) : PostProcessEffect(ctx), static_pipeline_(nullptr), static_bind_group_(nullptr), static_features_tex_(nullptr), static_features_view_(nullptr), input_mip_tex_(nullptr), current_input_view_(nullptr), initialized_(false) { std::memset(input_mip_view_, 0, sizeof(input_mip_view_)); } CNNv2Effect::~CNNv2Effect() { cleanup(); } void CNNv2Effect::init(MainSequence* demo) { (void)demo; if (initialized_) return; create_textures(); create_pipelines(); initialized_ = true; } void CNNv2Effect::resize(int width, int height) { PostProcessEffect::resize(width, height); cleanup(); create_textures(); create_pipelines(); } void CNNv2Effect::create_textures() { const WGPUExtent3D size = { static_cast(width_), static_cast(height_), 1 }; // Static features texture (8×f16 packed as 4×u32) WGPUTextureDescriptor static_desc = {}; static_desc.usage = WGPUTextureUsage_StorageBinding | WGPUTextureUsage_TextureBinding; static_desc.dimension = WGPUTextureDimension_2D; static_desc.size = size; static_desc.format = WGPUTextureFormat_RGBA32Uint; static_desc.mipLevelCount = 1; static_desc.sampleCount = 1; static_features_tex_ = wgpuDeviceCreateTexture(ctx_.device, &static_desc); WGPUTextureViewDescriptor view_desc = {}; view_desc.format = WGPUTextureFormat_RGBA32Uint; view_desc.dimension = WGPUTextureViewDimension_2D; view_desc.baseMipLevel = 0; view_desc.mipLevelCount = 1; view_desc.baseArrayLayer = 0; view_desc.arrayLayerCount = 1; static_features_view_ = wgpuTextureCreateView(static_features_tex_, &view_desc); // Input texture with mips (for multi-scale features) WGPUTextureDescriptor input_mip_desc = {}; input_mip_desc.usage = WGPUTextureUsage_TextureBinding | WGPUTextureUsage_CopyDst; input_mip_desc.dimension = WGPUTextureDimension_2D; input_mip_desc.size = size; input_mip_desc.format = WGPUTextureFormat_RGBA8Unorm; input_mip_desc.mipLevelCount = 3; // Levels 0, 1, 2 input_mip_desc.sampleCount = 1; input_mip_tex_ = wgpuDeviceCreateTexture(ctx_.device, &input_mip_desc); for (int i = 0; i < 3; ++i) { WGPUTextureViewDescriptor mip_view_desc = {}; mip_view_desc.format = WGPUTextureFormat_RGBA8Unorm; mip_view_desc.dimension = WGPUTextureViewDimension_2D; mip_view_desc.baseMipLevel = i; mip_view_desc.mipLevelCount = 1; mip_view_desc.baseArrayLayer = 0; mip_view_desc.arrayLayerCount = 1; input_mip_view_[i] = wgpuTextureCreateView(input_mip_tex_, &mip_view_desc); } // Create 2 layer textures (ping-pong buffers for intermediate results) // Each stores 8×f16 channels packed as 4×u32 for (int i = 0; i < 2; ++i) { WGPUTextureDescriptor layer_desc = {}; layer_desc.usage = WGPUTextureUsage_StorageBinding | WGPUTextureUsage_TextureBinding; layer_desc.dimension = WGPUTextureDimension_2D; layer_desc.size = size; layer_desc.format = WGPUTextureFormat_RGBA32Uint; layer_desc.mipLevelCount = 1; layer_desc.sampleCount = 1; WGPUTexture tex = wgpuDeviceCreateTexture(ctx_.device, &layer_desc); layer_textures_.push_back(tex); WGPUTextureViewDescriptor view_desc = {}; view_desc.format = WGPUTextureFormat_RGBA32Uint; view_desc.dimension = WGPUTextureViewDimension_2D; view_desc.baseMipLevel = 0; view_desc.mipLevelCount = 1; view_desc.baseArrayLayer = 0; view_desc.arrayLayerCount = 1; WGPUTextureView view = wgpuTextureCreateView(tex, &view_desc); layer_views_.push_back(view); } } void CNNv2Effect::create_pipelines() { // Static features compute pipeline size_t shader_size = 0; const char* static_code = (const char*)GetAsset(AssetId::ASSET_SHADER_CNN_V2_STATIC, &shader_size); if (!static_code || shader_size == 0) { // Shader not available (e.g., in test mode) - skip pipeline creation return; } WGPUShaderSourceWGSL wgsl_src = {}; wgsl_src.chain.sType = WGPUSType_ShaderSourceWGSL; wgsl_src.code = str_view(static_code); WGPUShaderModuleDescriptor shader_desc = {}; shader_desc.nextInChain = &wgsl_src.chain; WGPUShaderModule static_module = wgpuDeviceCreateShaderModule(ctx_.device, &shader_desc); if (!static_module) { return; } WGPUComputePipelineDescriptor pipeline_desc = {}; pipeline_desc.compute.module = static_module; pipeline_desc.compute.entryPoint = str_view("main"); static_pipeline_ = wgpuDeviceCreateComputePipeline(ctx_.device, &pipeline_desc); wgpuShaderModuleRelease(static_module); // Create bind group layout for static features compute // Bindings: 0=input_tex, 1=input_mip1, 2=input_mip2, 3=depth_tex, 4=output WGPUBindGroupLayoutEntry bgl_entries[5] = {}; // Binding 0: Input texture (mip 0) bgl_entries[0].binding = 0; bgl_entries[0].visibility = WGPUShaderStage_Compute; bgl_entries[0].texture.sampleType = WGPUTextureSampleType_Float; bgl_entries[0].texture.viewDimension = WGPUTextureViewDimension_2D; // Binding 1: Input texture (mip 1) bgl_entries[1].binding = 1; bgl_entries[1].visibility = WGPUShaderStage_Compute; bgl_entries[1].texture.sampleType = WGPUTextureSampleType_Float; bgl_entries[1].texture.viewDimension = WGPUTextureViewDimension_2D; // Binding 2: Input texture (mip 2) bgl_entries[2].binding = 2; bgl_entries[2].visibility = WGPUShaderStage_Compute; bgl_entries[2].texture.sampleType = WGPUTextureSampleType_Float; bgl_entries[2].texture.viewDimension = WGPUTextureViewDimension_2D; // Binding 3: Depth texture bgl_entries[3].binding = 3; bgl_entries[3].visibility = WGPUShaderStage_Compute; bgl_entries[3].texture.sampleType = WGPUTextureSampleType_Float; bgl_entries[3].texture.viewDimension = WGPUTextureViewDimension_2D; // Binding 4: Output (static features) bgl_entries[4].binding = 4; bgl_entries[4].visibility = WGPUShaderStage_Compute; bgl_entries[4].storageTexture.access = WGPUStorageTextureAccess_WriteOnly; bgl_entries[4].storageTexture.format = WGPUTextureFormat_RGBA32Uint; bgl_entries[4].storageTexture.viewDimension = WGPUTextureViewDimension_2D; WGPUBindGroupLayoutDescriptor bgl_desc = {}; bgl_desc.entryCount = 5; bgl_desc.entries = bgl_entries; WGPUBindGroupLayout static_bgl = wgpuDeviceCreateBindGroupLayout(ctx_.device, &bgl_desc); // Update pipeline layout WGPUPipelineLayoutDescriptor pl_desc = {}; pl_desc.bindGroupLayoutCount = 1; pl_desc.bindGroupLayouts = &static_bgl; WGPUPipelineLayout pipeline_layout = wgpuDeviceCreatePipelineLayout(ctx_.device, &pl_desc); // Recreate pipeline with proper layout WGPUComputePipelineDescriptor pipeline_desc2 = {}; pipeline_desc2.compute.module = wgpuDeviceCreateShaderModule(ctx_.device, &shader_desc); pipeline_desc2.compute.entryPoint = str_view("main"); pipeline_desc2.layout = pipeline_layout; if (static_pipeline_) wgpuComputePipelineRelease(static_pipeline_); static_pipeline_ = wgpuDeviceCreateComputePipeline(ctx_.device, &pipeline_desc2); wgpuShaderModuleRelease(pipeline_desc2.compute.module); wgpuPipelineLayoutRelease(pipeline_layout); wgpuBindGroupLayoutRelease(static_bgl); // Bind group will be created in update_bind_group() // TODO: Create layer pipelines } void CNNv2Effect::update_bind_group(WGPUTextureView input_view) { if (!static_pipeline_) return; // Cache input view current_input_view_ = input_view; // Release old bind group if (static_bind_group_) { wgpuBindGroupRelease(static_bind_group_); static_bind_group_ = nullptr; } // Create bind group for static features compute WGPUBindGroupEntry bg_entries[5] = {}; // Binding 0: Input (mip 0) bg_entries[0].binding = 0; bg_entries[0].textureView = input_view; // Binding 1: Input (mip 1) bg_entries[1].binding = 1; bg_entries[1].textureView = input_mip_view_[0]; // Use mip 0 for now // Binding 2: Input (mip 2) bg_entries[2].binding = 2; bg_entries[2].textureView = input_mip_view_[0]; // Use mip 0 for now // Binding 3: Depth (use input for now, no depth available) bg_entries[3].binding = 3; bg_entries[3].textureView = input_view; // Binding 4: Output (static features) bg_entries[4].binding = 4; bg_entries[4].textureView = static_features_view_; WGPUBindGroupDescriptor bg_desc = {}; bg_desc.layout = wgpuComputePipelineGetBindGroupLayout(static_pipeline_, 0); bg_desc.entryCount = 5; bg_desc.entries = bg_entries; static_bind_group_ = wgpuDeviceCreateBindGroup(ctx_.device, &bg_desc); wgpuBindGroupLayoutRelease(bg_desc.layout); } void CNNv2Effect::compute(WGPUCommandEncoder encoder, const CommonPostProcessUniforms& uniforms) { (void)uniforms; if (!initialized_ || !static_pipeline_ || !static_bind_group_) return; // Pass 1: Compute static features WGPUComputePassEncoder pass = wgpuCommandEncoderBeginComputePass(encoder, nullptr); wgpuComputePassEncoderSetPipeline(pass, static_pipeline_); wgpuComputePassEncoderSetBindGroup(pass, 0, static_bind_group_, 0, nullptr); // Dispatch workgroups (8×8 threads per group) uint32_t workgroups_x = (width_ + 7) / 8; uint32_t workgroups_y = (height_ + 7) / 8; wgpuComputePassEncoderDispatchWorkgroups(pass, workgroups_x, workgroups_y, 1); wgpuComputePassEncoderEnd(pass); wgpuComputePassEncoderRelease(pass); // TODO: Execute CNN layer passes } void CNNv2Effect::render(WGPURenderPassEncoder pass, const CommonPostProcessUniforms& uniforms) { (void)pass; (void)uniforms; // Compute-only effect, rendering is done by default composite pass } void CNNv2Effect::cleanup() { if (static_features_view_) wgpuTextureViewRelease(static_features_view_); if (static_features_tex_) wgpuTextureRelease(static_features_tex_); if (static_bind_group_) wgpuBindGroupRelease(static_bind_group_); if (static_pipeline_) wgpuComputePipelineRelease(static_pipeline_); for (int i = 0; i < 3; ++i) { if (input_mip_view_[i]) wgpuTextureViewRelease(input_mip_view_[i]); } if (input_mip_tex_) wgpuTextureRelease(input_mip_tex_); for (auto view : layer_views_) wgpuTextureViewRelease(view); for (auto tex : layer_textures_) wgpuTextureRelease(tex); for (auto bg : layer_bind_groups_) wgpuBindGroupRelease(bg); for (auto pipeline : layer_pipelines_) wgpuComputePipelineRelease(pipeline); layer_views_.clear(); layer_textures_.clear(); layer_bind_groups_.clear(); layer_pipelines_.clear(); initialized_ = false; }