From 9bb5fd64776ac8a7e4b012ac2de340ddfa09a2c9 Mon Sep 17 00:00:00 2001 From: skal Date: Mon, 9 Feb 2026 14:28:46 +0100 Subject: feat: GPU procedural Phase 4 - texture composition Multi-input composite shaders with sampler support. - Dynamic bind group layouts (N input textures + 1 sampler) - dispatch_composite() for multi-input compute dispatch - create_gpu_composite_texture() API - gen_blend.wgsl and gen_mask.wgsl shaders Guarded with #if !defined(STRIP_GPU_COMPOSITE) for easy removal. Tests: - Blend two noise textures - Mask noise with grid - Multi-stage composite (composite of composites) Size: ~830 bytes (2 shaders + dispatch logic) handoff(Claude): GPU procedural Phase 4 complete --- src/gpu/effects/shaders.cc | 10 ++ src/gpu/effects/shaders.h | 4 + src/gpu/texture_manager.cc | 238 +++++++++++++++++++++++++++++++++++++++++++-- src/gpu/texture_manager.h | 25 ++++- 4 files changed, 270 insertions(+), 7 deletions(-) (limited to 'src/gpu') diff --git a/src/gpu/effects/shaders.cc b/src/gpu/effects/shaders.cc index 6ed82d5..625c5b6 100644 --- a/src/gpu/effects/shaders.cc +++ b/src/gpu/effects/shaders.cc @@ -111,6 +111,16 @@ const char* gen_grid_compute_wgsl = SafeGetAsset(AssetId::ASSET_SHADER_COMPUTE_GEN_GRID); +#if !defined(STRIP_GPU_COMPOSITE) +const char* gen_blend_compute_wgsl = + + SafeGetAsset(AssetId::ASSET_SHADER_COMPUTE_GEN_BLEND); + +const char* gen_mask_compute_wgsl = + + SafeGetAsset(AssetId::ASSET_SHADER_COMPUTE_GEN_MASK); +#endif + const char* vignette_shader_wgsl = SafeGetAsset(AssetId::ASSET_SHADER_VIGNETTE); diff --git a/src/gpu/effects/shaders.h b/src/gpu/effects/shaders.h index a0f91da..68b8834 100644 --- a/src/gpu/effects/shaders.h +++ b/src/gpu/effects/shaders.h @@ -21,3 +21,7 @@ extern const char* vignette_shader_wgsl; extern const char* gen_noise_compute_wgsl; extern const char* gen_perlin_compute_wgsl; extern const char* gen_grid_compute_wgsl; +#if !defined(STRIP_GPU_COMPOSITE) +extern const char* gen_blend_compute_wgsl; +extern const char* gen_mask_compute_wgsl; +#endif diff --git a/src/gpu/texture_manager.cc b/src/gpu/texture_manager.cc index 2b83f63..7aeb67a 100644 --- a/src/gpu/texture_manager.cc +++ b/src/gpu/texture_manager.cc @@ -21,6 +21,20 @@ void TextureManager::init(WGPUDevice device, WGPUQueue queue) { device_ = device; queue_ = queue; + +#if !defined(STRIP_GPU_COMPOSITE) + // Create linear sampler for composite shaders + WGPUSamplerDescriptor sampler_desc = {}; + sampler_desc.addressModeU = WGPUAddressMode_ClampToEdge; + sampler_desc.addressModeV = WGPUAddressMode_ClampToEdge; + sampler_desc.magFilter = WGPUFilterMode_Linear; + sampler_desc.minFilter = WGPUFilterMode_Linear; + sampler_desc.mipmapFilter = WGPUMipmapFilterMode_Linear; + sampler_desc.lodMinClamp = 0.0f; + sampler_desc.lodMaxClamp = 1.0f; + sampler_desc.maxAnisotropy = 1; + linear_sampler_ = wgpuDeviceCreateSampler(device_, &sampler_desc); +#endif } void TextureManager::shutdown() { @@ -36,6 +50,13 @@ void TextureManager::shutdown() { } } compute_pipelines_.clear(); + +#if !defined(STRIP_GPU_COMPOSITE) + if (linear_sampler_) { + wgpuSamplerRelease(linear_sampler_); + linear_sampler_ = nullptr; + } +#endif } void TextureManager::create_procedural_texture( @@ -125,7 +146,7 @@ WGPUTextureView TextureManager::get_texture_view(const std::string& name) { WGPUComputePipeline TextureManager::get_or_create_compute_pipeline( const std::string& func_name, const char* shader_code, - size_t uniform_size) { + size_t uniform_size, int num_input_textures) { auto it = compute_pipelines_.find(func_name); if (it != compute_pipelines_.end()) { return it->second.pipeline; @@ -143,22 +164,45 @@ WGPUComputePipeline TextureManager::get_or_create_compute_pipeline( WGPUShaderModule shader_module = wgpuDeviceCreateShaderModule(device_, &shader_desc); - // Bind group layout (storage texture + uniform) - WGPUBindGroupLayoutEntry bgl_entries[2] = {}; + // Dynamic bind group layout + // Binding 0: output storage texture + // Binding 1: uniform buffer + // Binding 2 to (2 + num_input_textures - 1): input textures + // Binding (2 + num_input_textures): sampler (if inputs > 0) + const int max_entries = 2 + num_input_textures + (num_input_textures > 0 ? 1 : 0); + std::vector bgl_entries(max_entries); + + // Binding 0: Output storage texture bgl_entries[0].binding = 0; bgl_entries[0].visibility = WGPUShaderStage_Compute; bgl_entries[0].storageTexture.access = WGPUStorageTextureAccess_WriteOnly; bgl_entries[0].storageTexture.format = WGPUTextureFormat_RGBA8Unorm; bgl_entries[0].storageTexture.viewDimension = WGPUTextureViewDimension_2D; + // Binding 1: Uniform buffer bgl_entries[1].binding = 1; bgl_entries[1].visibility = WGPUShaderStage_Compute; bgl_entries[1].buffer.type = WGPUBufferBindingType_Uniform; bgl_entries[1].buffer.minBindingSize = uniform_size; + // Binding 2+: Input textures + for (int i = 0; i < num_input_textures; ++i) { + bgl_entries[2 + i].binding = 2 + i; + bgl_entries[2 + i].visibility = WGPUShaderStage_Compute; + bgl_entries[2 + i].texture.sampleType = WGPUTextureSampleType_Float; + bgl_entries[2 + i].texture.viewDimension = WGPUTextureViewDimension_2D; + } + + // Binding N: Sampler (if inputs exist) + if (num_input_textures > 0) { + bgl_entries[2 + num_input_textures].binding = 2 + num_input_textures; + bgl_entries[2 + num_input_textures].visibility = WGPUShaderStage_Compute; + bgl_entries[2 + num_input_textures].sampler.type = WGPUSamplerBindingType_Filtering; + } + WGPUBindGroupLayoutDescriptor bgl_desc = {}; - bgl_desc.entryCount = 2; - bgl_desc.entries = bgl_entries; + bgl_desc.entryCount = max_entries; + bgl_desc.entries = bgl_entries.data(); WGPUBindGroupLayout bind_group_layout = wgpuDeviceCreateBindGroupLayout(device_, &bgl_desc); @@ -181,7 +225,7 @@ WGPUComputePipeline TextureManager::get_or_create_compute_pipeline( wgpuShaderModuleRelease(shader_module); // Cache pipeline - ComputePipelineInfo info = {pipeline, shader_code, uniform_size}; + ComputePipelineInfo info = {pipeline, shader_code, uniform_size, num_input_textures}; compute_pipelines_[func_name] = info; return pipeline; @@ -424,6 +468,188 @@ void TextureManager::create_gpu_grid_texture( #endif } +#if !defined(STRIP_GPU_COMPOSITE) +void TextureManager::dispatch_composite( + const std::string& func_name, WGPUTexture target, + const GpuProceduralParams& params, const void* uniform_data, + size_t uniform_size, const std::vector& input_views) { + auto it = compute_pipelines_.find(func_name); + if (it == compute_pipelines_.end()) { + return; // Pipeline not created yet + } + + WGPUComputePipeline pipeline = it->second.pipeline; + int num_inputs = (int)input_views.size(); + + // Create uniform buffer + WGPUBufferDescriptor buf_desc = {}; + buf_desc.size = uniform_size; + buf_desc.usage = WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst; + buf_desc.mappedAtCreation = WGPUOptionalBool_True; + WGPUBuffer uniform_buf = wgpuDeviceCreateBuffer(device_, &buf_desc); + void* mapped = wgpuBufferGetMappedRange(uniform_buf, 0, uniform_size); + memcpy(mapped, uniform_data, uniform_size); + wgpuBufferUnmap(uniform_buf); + + // Create storage texture view + WGPUTextureViewDescriptor view_desc = {}; + view_desc.format = WGPUTextureFormat_RGBA8Unorm; + view_desc.dimension = WGPUTextureViewDimension_2D; + view_desc.mipLevelCount = 1; + view_desc.arrayLayerCount = 1; + WGPUTextureView target_view = wgpuTextureCreateView(target, &view_desc); + + // Dynamic bind group + const int max_entries = 2 + num_inputs + (num_inputs > 0 ? 1 : 0); + std::vector bg_entries(max_entries); + + // Binding 0: Output texture + bg_entries[0].binding = 0; + bg_entries[0].textureView = target_view; + + // Binding 1: Uniform buffer + bg_entries[1].binding = 1; + bg_entries[1].buffer = uniform_buf; + bg_entries[1].size = uniform_size; + + // Binding 2+: Input textures + for (int i = 0; i < num_inputs; ++i) { + bg_entries[2 + i].binding = 2 + i; + bg_entries[2 + i].textureView = input_views[i]; + } + + // Binding N: Sampler + if (num_inputs > 0) { + bg_entries[2 + num_inputs].binding = 2 + num_inputs; + bg_entries[2 + num_inputs].sampler = linear_sampler_; + } + + // Create bind group layout (must match pipeline) + const int layout_entries_count = 2 + num_inputs + (num_inputs > 0 ? 1 : 0); + std::vector bgl_entries(layout_entries_count); + + bgl_entries[0].binding = 0; + bgl_entries[0].visibility = WGPUShaderStage_Compute; + bgl_entries[0].storageTexture.access = WGPUStorageTextureAccess_WriteOnly; + bgl_entries[0].storageTexture.format = WGPUTextureFormat_RGBA8Unorm; + bgl_entries[0].storageTexture.viewDimension = WGPUTextureViewDimension_2D; + + bgl_entries[1].binding = 1; + bgl_entries[1].visibility = WGPUShaderStage_Compute; + bgl_entries[1].buffer.type = WGPUBufferBindingType_Uniform; + bgl_entries[1].buffer.minBindingSize = uniform_size; + + for (int i = 0; i < num_inputs; ++i) { + bgl_entries[2 + i].binding = 2 + i; + bgl_entries[2 + i].visibility = WGPUShaderStage_Compute; + bgl_entries[2 + i].texture.sampleType = WGPUTextureSampleType_Float; + bgl_entries[2 + i].texture.viewDimension = WGPUTextureViewDimension_2D; + } + + if (num_inputs > 0) { + bgl_entries[2 + num_inputs].binding = 2 + num_inputs; + bgl_entries[2 + num_inputs].visibility = WGPUShaderStage_Compute; + bgl_entries[2 + num_inputs].sampler.type = WGPUSamplerBindingType_Filtering; + } + + WGPUBindGroupLayoutDescriptor bgl_desc = {}; + bgl_desc.entryCount = layout_entries_count; + bgl_desc.entries = bgl_entries.data(); + WGPUBindGroupLayout bind_group_layout = + wgpuDeviceCreateBindGroupLayout(device_, &bgl_desc); + + WGPUBindGroupDescriptor bg_desc = {}; + bg_desc.layout = bind_group_layout; + bg_desc.entryCount = max_entries; + bg_desc.entries = bg_entries.data(); + WGPUBindGroup bind_group = wgpuDeviceCreateBindGroup(device_, &bg_desc); + + // Dispatch compute + WGPUCommandEncoderDescriptor enc_desc = {}; + WGPUCommandEncoder encoder = + wgpuDeviceCreateCommandEncoder(device_, &enc_desc); + WGPUComputePassEncoder pass = + wgpuCommandEncoderBeginComputePass(encoder, nullptr); + wgpuComputePassEncoderSetPipeline(pass, pipeline); + wgpuComputePassEncoderSetBindGroup(pass, 0, bind_group, 0, nullptr); + wgpuComputePassEncoderDispatchWorkgroups(pass, (params.width + 7) / 8, + (params.height + 7) / 8, 1); + wgpuComputePassEncoderEnd(pass); + + WGPUCommandBufferDescriptor cmd_desc = {}; + WGPUCommandBuffer cmd = wgpuCommandEncoderFinish(encoder, &cmd_desc); + wgpuQueueSubmit(queue_, 1, &cmd); + + // Cleanup + wgpuCommandBufferRelease(cmd); + wgpuCommandEncoderRelease(encoder); + wgpuComputePassEncoderRelease(pass); + wgpuBindGroupRelease(bind_group); + wgpuBindGroupLayoutRelease(bind_group_layout); + wgpuBufferRelease(uniform_buf); + wgpuTextureViewRelease(target_view); +} + +void TextureManager::create_gpu_composite_texture( + const std::string& name, const std::string& shader_func, + const char* shader_code, const void* uniform_data, size_t uniform_size, + int width, int height, const std::vector& input_names) { + // Create pipeline if needed + get_or_create_compute_pipeline(shader_func, shader_code, uniform_size, + (int)input_names.size()); + + // Resolve input texture views + std::vector input_views; + input_views.reserve(input_names.size()); + for (const auto& input_name : input_names) { + WGPUTextureView view = get_texture_view(input_name); + if (!view) { + fprintf(stderr, "Error: Input texture not found: %s\n", + input_name.c_str()); + return; + } + input_views.push_back(view); + } + + // Create output texture + WGPUTextureDescriptor tex_desc = {}; + tex_desc.usage = + WGPUTextureUsage_StorageBinding | WGPUTextureUsage_TextureBinding; + tex_desc.dimension = WGPUTextureDimension_2D; + tex_desc.size = {(uint32_t)width, (uint32_t)height, 1}; + tex_desc.format = WGPUTextureFormat_RGBA8Unorm; + tex_desc.mipLevelCount = 1; + tex_desc.sampleCount = 1; + WGPUTexture texture = wgpuDeviceCreateTexture(device_, &tex_desc); + + // Dispatch composite shader + GpuProceduralParams params = {width, height, nullptr, 0}; + dispatch_composite(shader_func, texture, params, uniform_data, uniform_size, + input_views); + + // Create view + WGPUTextureViewDescriptor view_desc = {}; + view_desc.format = WGPUTextureFormat_RGBA8Unorm; + view_desc.dimension = WGPUTextureViewDimension_2D; + view_desc.mipLevelCount = 1; + view_desc.arrayLayerCount = 1; + WGPUTextureView view = wgpuTextureCreateView(texture, &view_desc); + + // Store + GpuTexture gpu_tex; + gpu_tex.texture = texture; + gpu_tex.view = view; + gpu_tex.width = width; + gpu_tex.height = height; + textures_[name] = gpu_tex; + +#if !defined(STRIP_ALL) + printf("Generated GPU composite texture: %s (%dx%d, %zu inputs)\n", + name.c_str(), width, height, input_names.size()); +#endif +} +#endif // !defined(STRIP_GPU_COMPOSITE) + #if !defined(STRIP_ALL) WGPUTextureView TextureManager::get_or_generate_gpu_texture( const std::string& name, const GpuProceduralParams& params) { diff --git a/src/gpu/texture_manager.h b/src/gpu/texture_manager.h index 63c2947..86d1f63 100644 --- a/src/gpu/texture_manager.h +++ b/src/gpu/texture_manager.h @@ -51,6 +51,17 @@ class TextureManager { void create_gpu_grid_texture(const std::string& name, const GpuProceduralParams& params); +#if !defined(STRIP_GPU_COMPOSITE) + // GPU composite generation (multi-input textures) + void create_gpu_composite_texture(const std::string& name, + const std::string& shader_func, + const char* shader_code, + const void* uniform_data, + size_t uniform_size, + int width, int height, + const std::vector& input_names); +#endif + #if !defined(STRIP_ALL) // On-demand lazy generation (stripped in final builds) WGPUTextureView get_or_generate_gpu_texture(const std::string& name, @@ -65,17 +76,29 @@ class TextureManager { WGPUComputePipeline pipeline; const char* shader_code; size_t uniform_size; + int num_input_textures; }; WGPUComputePipeline get_or_create_compute_pipeline(const std::string& func_name, const char* shader_code, - size_t uniform_size); + size_t uniform_size, + int num_input_textures = 0); void dispatch_compute(const std::string& func_name, WGPUTexture target, const GpuProceduralParams& params, const void* uniform_data, size_t uniform_size); +#if !defined(STRIP_GPU_COMPOSITE) + void dispatch_composite(const std::string& func_name, WGPUTexture target, + const GpuProceduralParams& params, + const void* uniform_data, size_t uniform_size, + const std::vector& input_views); +#endif + WGPUDevice device_; WGPUQueue queue_; std::map textures_; std::map compute_pipelines_; +#if !defined(STRIP_GPU_COMPOSITE) + WGPUSampler linear_sampler_; +#endif }; -- cgit v1.2.3