diff options
| -rw-r--r-- | doc/GEOM_BUFFER.md | 229 | ||||
| -rw-r--r-- | src/gpu/bind_group_builder.h | 22 | ||||
| -rw-r--r-- | src/gpu/effects/circle_mask_effect.cc | 46 | ||||
| -rw-r--r-- | src/gpu/effects/cnn_v2_effect.cc | 260 | ||||
| -rw-r--r-- | src/gpu/effects/rotating_cube_effect.cc | 67 | ||||
| -rw-r--r-- | src/gpu/gpu.cc | 47 | ||||
| -rw-r--r-- | src/gpu/gpu.h | 15 |
7 files changed, 405 insertions, 281 deletions
diff --git a/doc/GEOM_BUFFER.md b/doc/GEOM_BUFFER.md new file mode 100644 index 0000000..0188125 --- /dev/null +++ b/doc/GEOM_BUFFER.md @@ -0,0 +1,229 @@ +# Geometry Buffer Design [IN PROGRESS] + +**Status:** Ideation phase +**Goal:** Efficient G-buffer for deferred rendering in 64k demo + +--- + +## Overview + +Replace direct rendering with geometry buffer accumulation for advanced post-processing and lighting. + +**Target:** 8-10 bytes/pixel, 16-bit precision + +--- + +## Buffer Elements + +### Core Attributes + +| Attribute | Channels | Precision | Source | +|-----------|----------|-----------|--------| +| Albedo (RGB) | 3 | f16 | Material/procedural | +| Roughness | 1 | u8/u16 | PBR material property | +| Metallic | 1 | u8/u16 | PBR material property | +| Normal (XYZ) | 2 | f16 | Octahedral encoding | +| Depth | 1 | f16/f32 | 1/z for precision | +| Object/Material ID | 1 | u16 | Rasterization/SDF | +| Transparency | 1 | u8/u16 | Alpha channel | + +### Optional/Derived + +| Attribute | Storage | Notes | +|-----------|---------|-------| +| Depth gradient | On-demand | Compute from depth (Sobel) | +| Laplacian | On-demand | Second derivative of depth | +| Motion vectors | 2×f16 | Screen-space XY | +| AO | 1×f16 | Ambient occlusion | + +**Key insight:** Depth derivatives cheaper to compute than store (2-4 bytes/pixel saved). + +--- + +## Packing Strategies + +### Traditional Multi-Render-Target (MRT) + +``` +RT0 (RGBA16): Albedo.rgb + Roughness (packed with metallic) +RT1 (RG16): Octahedral normal (2 channels encode XYZ) +RT2 (R32F): 1/z depth (or use hardware depth buffer) +RT3 (RG16): Motion vectors XY +RT4 (R16UI): Object/Material ID +``` + +**Total:** 4-5 render targets = 8-10 bytes/pixel + +### Compute Shader + Storage Buffer (RECOMMENDED) + +**Advantages:** +- Custom bit-packing (not bound to RGBA formats) +- Compute derivatives in-pass (depth gradient, Laplacian) +- Cache-optimized tiling (Morton order) +- No MRT limits (store 20+ attributes) + +**Tradeoffs:** +- No hardware depth/early-Z during G-buffer generation +- Manual atomics if pixel overdraw +- Lose ROPs hardware optimizations + +**Struct Layout:** +```cpp +struct GBufferPixel { + u32 packed_normal; // Octahedral 16+16 + u32 rgba_rough; // RGBA8 + Roughness8 + Metallic8 (26 bits used) + f16 inv_z; // 1/z depth + u16 material_id; // Object/material + // Total: 12 bytes/pixel +}; + +// Compressed variant (8 bytes): +struct CompactGBuffer { + u32 normal_depth; // Oct16 normal + u16 quantized depth + u32 rgba_params; // RGB565 + Rough4 + Metal4 + Flags4 +}; +``` + +**Access Pattern:** +```wgsl +@group(0) @binding(0) var<storage, read_write> g_buffer: array<GBufferPixel>; + +fn write_gbuffer(pixel_id: u32, data: SurfaceData) { + g_buffer[pixel_id].packed_normal = pack_octahedral(data.normal); + g_buffer[pixel_id].rgba_rough = pack_rgba8(data.albedo) | (u32(data.roughness * 255.0) << 24); + g_buffer[pixel_id].inv_z = f16(1.0 / data.depth); + g_buffer[pixel_id].material_id = data.id; +} +``` + +--- + +## Normal Encoding + +**Octahedral mapping** (most efficient for 2-channel storage): +- Encodes unit sphere normal to 2D square +- 16-bit per channel = good precision +- Fast encode/decode (no trig) + +```cpp +vec2 octahedral_encode(vec3 n) { + n /= (abs(n.x) + abs(n.y) + abs(n.z)); + vec2 p = n.z >= 0.0 ? n.xy : (1.0 - abs(n.yx)) * sign(n.xy); + return p * 0.5 + 0.5; // [0, 1] +} + +vec3 octahedral_decode(vec2 p) { + p = p * 2.0 - 1.0; // [-1, 1] + vec3 n = vec3(p.x, p.y, 1.0 - abs(p.x) - abs(p.y)); + float t = max(-n.z, 0.0); + n.x += n.x >= 0.0 ? -t : t; + n.y += n.y >= 0.0 ? -t : t; + return normalize(n); +} +``` + +--- + +## Depth Storage + +**1/z (inverse depth):** +- Better precision distribution (more bits near camera) +- Linear in screen space +- Matches perspective projection + +**Alternatives:** +- Logarithmic depth (even better precision) +- Hardware depth buffer (R32F, free with render targets) + +--- + +## Material Properties + +**Roughness/Metallic are NOT geometry:** +- **Source:** Texture lookups, procedural noise, or constants +- **Not bump-mapping:** Bump/normal maps perturb normals (geometry) +- **PBR properties:** Control light interaction (0=smooth/dielectric, 1=rough/metal) + +**Demoscene approach:** Procedural generation or baked constants (avoid textures). + +--- + +## Post-Processing Derivatives + +**Compute on-demand** (cheaper than storing): + +```wgsl +// Depth gradient (Sobel filter) +fn depth_gradient(uv: vec2f) -> vec2f { + let dx = textureLoad(depth, uv + vec2(1,0)) - textureLoad(depth, uv - vec2(1,0)); + let dy = textureLoad(depth, uv + vec2(0,1)) - textureLoad(depth, uv - vec2(0,1)); + return vec2(dx, dy) * 0.5; +} + +// Laplacian (edge detection) +fn laplacian(uv: vec2f) -> f32 { + let c = textureLoad(depth, uv); + let n = textureLoad(depth, uv + vec2(0,1)); + let s = textureLoad(depth, uv - vec2(0,1)); + let e = textureLoad(depth, uv + vec2(1,0)); + let w = textureLoad(depth, uv - vec2(1,0)); + return (n + s + e + w) - 4.0 * c; +} +``` + +--- + +## Integration with Hybrid Renderer + +**Current:** Hybrid SDF raymarching + rasterized proxy geometry +**Future:** Both write to unified G-buffer + +```cpp +// Rasterization pass +void rasterize_geometry() { + // Vertex shader → fragment shader + // Write to G-buffer (compute or MRT) +} + +// SDF raymarching pass (compute) +void raymarch_sdf() { + // Per-pixel ray march + // Write to same G-buffer at hit points +} + +// Deferred lighting pass +void deferred_lighting() { + // Read G-buffer + // Apply PBR lighting, shadows, etc. +} +``` + +**Atomics handling:** Use depth test or tile-based sorting to avoid conflicts. + +--- + +## Size Budget + +**Target:** 1920×1080 @ 8 bytes/pixel = **16 MB** +**Compressed:** 1920×1080 @ 6 bytes/pixel = **12 MB** + +**Acceptable for 64k demo:** RAM usage OK, not binary size. + +--- + +## Next Steps + +1. Prototype compute shader G-buffer writer +2. Implement octahedral normal encoding +3. Test SDF + raster unified writes +4. Add deferred lighting pass +5. Validate depth derivative quality (gradient/Laplacian) +6. Optimize packing (aim for 6-8 bytes/pixel) + +--- + +## References + +- Octahedral mapping: "Survey of Efficient Representations for Independent Unit Vectors" (Meyer et al.) +- PBR theory: "Physically Based Rendering" (Pharr, Jakob, Humphreys) +- G-buffer design: "Deferred Rendering in Killzone 2" (Valient, 2007) diff --git a/src/gpu/bind_group_builder.h b/src/gpu/bind_group_builder.h index d63f6e2..abce9dc 100644 --- a/src/gpu/bind_group_builder.h +++ b/src/gpu/bind_group_builder.h @@ -55,6 +55,28 @@ public: return *this; } + BindGroupLayoutBuilder& uint_texture(uint32_t binding, WGPUShaderStageFlags vis) { + WGPUBindGroupLayoutEntry e{}; + e.binding = binding; + e.visibility = vis; + e.texture.sampleType = WGPUTextureSampleType_Uint; + e.texture.viewDimension = WGPUTextureViewDimension_2D; + entries_.push_back(e); + return *this; + } + + BindGroupLayoutBuilder& storage_texture(uint32_t binding, WGPUShaderStageFlags vis, + WGPUTextureFormat format) { + WGPUBindGroupLayoutEntry e{}; + e.binding = binding; + e.visibility = vis; + e.storageTexture.access = WGPUStorageTextureAccess_WriteOnly; + e.storageTexture.format = format; + e.storageTexture.viewDimension = WGPUTextureViewDimension_2D; + entries_.push_back(e); + return *this; + } + BindGroupLayoutBuilder& sampler(uint32_t binding, WGPUShaderStageFlags vis) { WGPUBindGroupLayoutEntry e{}; e.binding = binding; diff --git a/src/gpu/effects/circle_mask_effect.cc b/src/gpu/effects/circle_mask_effect.cc index f34ffb7..dfe7d03 100644 --- a/src/gpu/effects/circle_mask_effect.cc +++ b/src/gpu/effects/circle_mask_effect.cc @@ -3,8 +3,9 @@ // Generates circular mask and renders green background outside circle. #include "gpu/effects/circle_mask_effect.h" -#include "gpu/effects/shader_composer.h" #include "generated/assets.h" +#include "gpu/bind_group_builder.h" +#include "gpu/effects/shader_composer.h" CircleMaskEffect::CircleMaskEffect(const GpuContext& ctx, float radius) : Effect(ctx), radius_(radius) { @@ -83,21 +84,14 @@ void CircleMaskEffect::init(MainSequence* demo) { wgpuDeviceCreateRenderPipeline(ctx_.device, &compute_pipeline_desc); wgpuShaderModuleRelease(compute_module); - const WGPUBindGroupEntry compute_entries[] = { - {.binding = 0, - .buffer = uniforms_.get().buffer, - .size = sizeof(CommonPostProcessUniforms)}, - {.binding = 1, - .buffer = compute_params_.get().buffer, - .size = sizeof(CircleMaskParams)}, - }; - const WGPUBindGroupDescriptor compute_bg_desc = { - .layout = wgpuRenderPipelineGetBindGroupLayout(compute_pipeline_, 0), - .entryCount = 2, - .entries = compute_entries, - }; + WGPUBindGroupLayout compute_layout = + wgpuRenderPipelineGetBindGroupLayout(compute_pipeline_, 0); compute_bind_group_ = - wgpuDeviceCreateBindGroup(ctx_.device, &compute_bg_desc); + BindGroupBuilder() + .buffer(0, uniforms_.get().buffer, sizeof(CommonPostProcessUniforms)) + .buffer(1, compute_params_.get().buffer, sizeof(CircleMaskParams)) + .build(ctx_.device, compute_layout); + wgpuBindGroupLayoutRelease(compute_layout); std::string composed_render = ShaderComposer::Get().Compose({}, render_shader); @@ -172,19 +166,15 @@ void CircleMaskEffect::resize(int width, int height) { wgpuBindGroupRelease(render_bind_group_); WGPUTextureView mask_view = demo_->get_auxiliary_view("circle_mask"); - const WGPUBindGroupEntry render_entries[] = { - {.binding = 0, .textureView = mask_view}, - {.binding = 1, .sampler = mask_sampler_}, - {.binding = 2, - .buffer = uniforms_.get().buffer, - .size = sizeof(CommonPostProcessUniforms)}, - }; - const WGPUBindGroupDescriptor render_bg_desc = { - .layout = wgpuRenderPipelineGetBindGroupLayout(render_pipeline_, 0), - .entryCount = 3, - .entries = render_entries, - }; - render_bind_group_ = wgpuDeviceCreateBindGroup(ctx_.device, &render_bg_desc); + WGPUBindGroupLayout render_layout = + wgpuRenderPipelineGetBindGroupLayout(render_pipeline_, 0); + render_bind_group_ = + BindGroupBuilder() + .texture(0, mask_view) + .sampler(1, mask_sampler_) + .buffer(2, uniforms_.get().buffer, sizeof(CommonPostProcessUniforms)) + .build(ctx_.device, render_layout); + wgpuBindGroupLayoutRelease(render_layout); } void CircleMaskEffect::compute(WGPUCommandEncoder encoder, diff --git a/src/gpu/effects/cnn_v2_effect.cc b/src/gpu/effects/cnn_v2_effect.cc index d412154..be856a4 100644 --- a/src/gpu/effects/cnn_v2_effect.cc +++ b/src/gpu/effects/cnn_v2_effect.cc @@ -8,6 +8,8 @@ #include "generated/assets.h" #endif +#include "gpu/bind_group_builder.h" +#include "gpu/gpu.h" #include "util/asset_manager.h" #include "util/fatal_error.h" #include <cstring> @@ -142,76 +144,30 @@ void CNNv2Effect::load_weights() { } void CNNv2Effect::create_textures() { - const WGPUExtent3D size = { - static_cast<uint32_t>(width_), - static_cast<uint32_t>(height_), - 1 - }; - // Static features texture (8×f16 packed as 4×u32) - WGPUTextureDescriptor static_desc = {}; - static_desc.usage = WGPUTextureUsage_StorageBinding | WGPUTextureUsage_TextureBinding; - static_desc.dimension = WGPUTextureDimension_2D; - static_desc.size = size; - static_desc.format = WGPUTextureFormat_RGBA32Uint; - static_desc.mipLevelCount = 1; - static_desc.sampleCount = 1; - static_features_tex_ = wgpuDeviceCreateTexture(ctx_.device, &static_desc); - - WGPUTextureViewDescriptor view_desc = {}; - view_desc.format = WGPUTextureFormat_RGBA32Uint; - view_desc.dimension = WGPUTextureViewDimension_2D; - view_desc.baseMipLevel = 0; - view_desc.mipLevelCount = 1; - view_desc.baseArrayLayer = 0; - view_desc.arrayLayerCount = 1; - static_features_view_ = wgpuTextureCreateView(static_features_tex_, &view_desc); + TextureWithView static_tex = gpu_create_storage_texture_2d( + ctx_.device, width_, height_, WGPUTextureFormat_RGBA32Uint); + static_features_tex_ = static_tex.texture; + static_features_view_ = static_tex.view; // Input texture with mips (for multi-scale features) - WGPUTextureDescriptor input_mip_desc = {}; - input_mip_desc.usage = WGPUTextureUsage_TextureBinding | WGPUTextureUsage_CopyDst; - input_mip_desc.dimension = WGPUTextureDimension_2D; - input_mip_desc.size = size; - input_mip_desc.format = WGPUTextureFormat_RGBA8Unorm; - input_mip_desc.mipLevelCount = 3; // Levels 0, 1, 2 - input_mip_desc.sampleCount = 1; - input_mip_tex_ = wgpuDeviceCreateTexture(ctx_.device, &input_mip_desc); + TextureWithView input_mip = gpu_create_texture_2d( + ctx_.device, width_, height_, WGPUTextureFormat_RGBA8Unorm, + WGPUTextureUsage_TextureBinding | WGPUTextureUsage_CopyDst, 3); + input_mip_tex_ = input_mip.texture; for (int i = 0; i < 3; ++i) { - WGPUTextureViewDescriptor mip_view_desc = {}; - mip_view_desc.format = WGPUTextureFormat_RGBA8Unorm; - mip_view_desc.dimension = WGPUTextureViewDimension_2D; - mip_view_desc.baseMipLevel = i; - mip_view_desc.mipLevelCount = 1; - mip_view_desc.baseArrayLayer = 0; - mip_view_desc.arrayLayerCount = 1; - input_mip_view_[i] = wgpuTextureCreateView(input_mip_tex_, &mip_view_desc); + input_mip_view_[i] = + gpu_create_mip_view(input_mip_tex_, WGPUTextureFormat_RGBA8Unorm, i); } // Create 2 layer textures (ping-pong buffers for intermediate results) // Each stores 8×f16 channels packed as 4×u32 for (int i = 0; i < 2; ++i) { - WGPUTextureDescriptor layer_desc = {}; - layer_desc.usage = WGPUTextureUsage_StorageBinding | WGPUTextureUsage_TextureBinding; - layer_desc.dimension = WGPUTextureDimension_2D; - layer_desc.size = size; - layer_desc.format = WGPUTextureFormat_RGBA32Uint; - layer_desc.mipLevelCount = 1; - layer_desc.sampleCount = 1; - - WGPUTexture tex = wgpuDeviceCreateTexture(ctx_.device, &layer_desc); - layer_textures_.push_back(tex); - - WGPUTextureViewDescriptor view_desc = {}; - view_desc.format = WGPUTextureFormat_RGBA32Uint; - view_desc.dimension = WGPUTextureViewDimension_2D; - view_desc.baseMipLevel = 0; - view_desc.mipLevelCount = 1; - view_desc.baseArrayLayer = 0; - view_desc.arrayLayerCount = 1; - - WGPUTextureView view = wgpuTextureCreateView(tex, &view_desc); - layer_views_.push_back(view); + TextureWithView layer = gpu_create_storage_texture_2d( + ctx_.device, width_, height_, WGPUTextureFormat_RGBA32Uint); + layer_textures_.push_back(layer.texture); + layer_views_.push_back(layer.view); } // Create uniform buffer for static feature params @@ -255,55 +211,17 @@ void CNNv2Effect::create_pipelines() { // Create bind group layout for static features compute // Bindings: 0=input_tex, 1=input_mip1, 2=input_mip2, 3=depth_tex, 4=output, 5=params, 6=linear_sampler - WGPUBindGroupLayoutEntry bgl_entries[7] = {}; - - // Binding 0: Input texture (mip 0) - bgl_entries[0].binding = 0; - bgl_entries[0].visibility = WGPUShaderStage_Compute; - bgl_entries[0].texture.sampleType = WGPUTextureSampleType_Float; - bgl_entries[0].texture.viewDimension = WGPUTextureViewDimension_2D; - - // Binding 1: Input texture (mip 1) - bgl_entries[1].binding = 1; - bgl_entries[1].visibility = WGPUShaderStage_Compute; - bgl_entries[1].texture.sampleType = WGPUTextureSampleType_Float; - bgl_entries[1].texture.viewDimension = WGPUTextureViewDimension_2D; - - // Binding 2: Input texture (mip 2) - bgl_entries[2].binding = 2; - bgl_entries[2].visibility = WGPUShaderStage_Compute; - bgl_entries[2].texture.sampleType = WGPUTextureSampleType_Float; - bgl_entries[2].texture.viewDimension = WGPUTextureViewDimension_2D; - - // Binding 3: Depth texture - bgl_entries[3].binding = 3; - bgl_entries[3].visibility = WGPUShaderStage_Compute; - bgl_entries[3].texture.sampleType = WGPUTextureSampleType_Float; - bgl_entries[3].texture.viewDimension = WGPUTextureViewDimension_2D; - - // Binding 4: Output (static features) - bgl_entries[4].binding = 4; - bgl_entries[4].visibility = WGPUShaderStage_Compute; - bgl_entries[4].storageTexture.access = WGPUStorageTextureAccess_WriteOnly; - bgl_entries[4].storageTexture.format = WGPUTextureFormat_RGBA32Uint; - bgl_entries[4].storageTexture.viewDimension = WGPUTextureViewDimension_2D; - - // Binding 5: Params (mip_level) - bgl_entries[5].binding = 5; - bgl_entries[5].visibility = WGPUShaderStage_Compute; - bgl_entries[5].buffer.type = WGPUBufferBindingType_Uniform; - bgl_entries[5].buffer.minBindingSize = sizeof(StaticFeatureParams); - - // Binding 6: Linear sampler (for bilinear interpolation) - bgl_entries[6].binding = 6; - bgl_entries[6].visibility = WGPUShaderStage_Compute; - bgl_entries[6].sampler.type = WGPUSamplerBindingType_Filtering; - - WGPUBindGroupLayoutDescriptor bgl_desc = {}; - bgl_desc.entryCount = 7; - bgl_desc.entries = bgl_entries; - - WGPUBindGroupLayout static_bgl = wgpuDeviceCreateBindGroupLayout(ctx_.device, &bgl_desc); + WGPUBindGroupLayout static_bgl = + BindGroupLayoutBuilder() + .texture(0, WGPUShaderStage_Compute) + .texture(1, WGPUShaderStage_Compute) + .texture(2, WGPUShaderStage_Compute) + .texture(3, WGPUShaderStage_Compute) + .storage_texture(4, WGPUShaderStage_Compute, + WGPUTextureFormat_RGBA32Uint) + .uniform(5, WGPUShaderStage_Compute, sizeof(StaticFeatureParams)) + .sampler(6, WGPUShaderStage_Compute) + .build(ctx_.device); // Update pipeline layout WGPUPipelineLayoutDescriptor pl_desc = {}; @@ -344,49 +262,16 @@ void CNNv2Effect::create_pipelines() { // Create bind group layout for layer compute // 0=static_features, 1=layer_input, 2=output, 3=weights, 4=params, 5=original_input - WGPUBindGroupLayoutEntry layer_bgl_entries[6] = {}; - - // Binding 0: Static features (texture) - layer_bgl_entries[0].binding = 0; - layer_bgl_entries[0].visibility = WGPUShaderStage_Compute; - layer_bgl_entries[0].texture.sampleType = WGPUTextureSampleType_Uint; - layer_bgl_entries[0].texture.viewDimension = WGPUTextureViewDimension_2D; - - // Binding 1: Layer input (texture) - layer_bgl_entries[1].binding = 1; - layer_bgl_entries[1].visibility = WGPUShaderStage_Compute; - layer_bgl_entries[1].texture.sampleType = WGPUTextureSampleType_Uint; - layer_bgl_entries[1].texture.viewDimension = WGPUTextureViewDimension_2D; - - // Binding 2: Output (storage texture) - layer_bgl_entries[2].binding = 2; - layer_bgl_entries[2].visibility = WGPUShaderStage_Compute; - layer_bgl_entries[2].storageTexture.access = WGPUStorageTextureAccess_WriteOnly; - layer_bgl_entries[2].storageTexture.format = WGPUTextureFormat_RGBA32Uint; - layer_bgl_entries[2].storageTexture.viewDimension = WGPUTextureViewDimension_2D; - - // Binding 3: Weights (storage buffer) - layer_bgl_entries[3].binding = 3; - layer_bgl_entries[3].visibility = WGPUShaderStage_Compute; - layer_bgl_entries[3].buffer.type = WGPUBufferBindingType_ReadOnlyStorage; - - // Binding 4: Layer params (uniform buffer) - layer_bgl_entries[4].binding = 4; - layer_bgl_entries[4].visibility = WGPUShaderStage_Compute; - layer_bgl_entries[4].buffer.type = WGPUBufferBindingType_Uniform; - layer_bgl_entries[4].buffer.minBindingSize = sizeof(LayerParams); - - // Binding 5: Original input (for blending) - layer_bgl_entries[5].binding = 5; - layer_bgl_entries[5].visibility = WGPUShaderStage_Compute; - layer_bgl_entries[5].texture.sampleType = WGPUTextureSampleType_Float; - layer_bgl_entries[5].texture.viewDimension = WGPUTextureViewDimension_2D; - - WGPUBindGroupLayoutDescriptor layer_bgl_desc = {}; - layer_bgl_desc.entryCount = 6; - layer_bgl_desc.entries = layer_bgl_entries; - - WGPUBindGroupLayout layer_bgl = wgpuDeviceCreateBindGroupLayout(ctx_.device, &layer_bgl_desc); + WGPUBindGroupLayout layer_bgl = + BindGroupLayoutBuilder() + .uint_texture(0, WGPUShaderStage_Compute) + .uint_texture(1, WGPUShaderStage_Compute) + .storage_texture(2, WGPUShaderStage_Compute, + WGPUTextureFormat_RGBA32Uint) + .storage(3, WGPUShaderStage_Compute) + .uniform(4, WGPUShaderStage_Compute, sizeof(LayerParams)) + .texture(5, WGPUShaderStage_Compute) + .build(ctx_.device); WGPUPipelineLayoutDescriptor layer_pl_desc = {}; layer_pl_desc.bindGroupLayoutCount = 1; @@ -418,46 +303,33 @@ void CNNv2Effect::update_bind_group(WGPUTextureView input_view) { static_bind_group_ = nullptr; } - // Create bind group for static features compute + // Create bind group for static features compute (manual for storage texture binding) WGPUBindGroupEntry bg_entries[7] = {}; - - // Binding 0: Input (mip 0) bg_entries[0].binding = 0; bg_entries[0].textureView = input_view; - - // Binding 1: Input (mip 1) bg_entries[1].binding = 1; bg_entries[1].textureView = input_mip_view_[0]; - - // Binding 2: Input (mip 2) bg_entries[2].binding = 2; - bg_entries[2].textureView = (input_mip_view_[1]) ? input_mip_view_[1] : input_mip_view_[0]; - - // Binding 3: Depth (use input for now, no depth available) + bg_entries[2].textureView = + input_mip_view_[1] ? input_mip_view_[1] : input_mip_view_[0]; bg_entries[3].binding = 3; bg_entries[3].textureView = input_view; - - // Binding 4: Output (static features) bg_entries[4].binding = 4; bg_entries[4].textureView = static_features_view_; - - // Binding 5: Params bg_entries[5].binding = 5; bg_entries[5].buffer = static_params_buffer_; bg_entries[5].size = sizeof(StaticFeatureParams); - - // Binding 6: Linear sampler bg_entries[6].binding = 6; bg_entries[6].sampler = linear_sampler_; + WGPUBindGroupLayout layout = + wgpuComputePipelineGetBindGroupLayout(static_pipeline_, 0); WGPUBindGroupDescriptor bg_desc = {}; - bg_desc.layout = wgpuComputePipelineGetBindGroupLayout(static_pipeline_, 0); + bg_desc.layout = layout; bg_desc.entryCount = 7; bg_desc.entries = bg_entries; - static_bind_group_ = wgpuDeviceCreateBindGroup(ctx_.device, &bg_desc); - - wgpuBindGroupLayoutRelease(bg_desc.layout); + wgpuBindGroupLayoutRelease(layout); // Create layer bind groups if (!layer_pipeline_ || layer_info_.empty()) return; @@ -473,41 +345,19 @@ void CNNv2Effect::update_bind_group(WGPUTextureView input_view) { // Create bind group for each layer for (size_t i = 0; i < layer_info_.size(); ++i) { - WGPUBindGroupEntry layer_entries[6] = {}; - - // Binding 0: Static features (constant) - layer_entries[0].binding = 0; - layer_entries[0].textureView = static_features_view_; - - // Binding 1: Layer input (ping-pong: use previous layer's output) - // First layer uses static features as input, others use ping-pong buffers - layer_entries[1].binding = 1; - layer_entries[1].textureView = (i == 0) ? static_features_view_ : layer_views_[i % 2]; - - // Binding 2: Output texture (ping-pong) - layer_entries[2].binding = 2; - layer_entries[2].textureView = layer_views_[(i + 1) % 2]; - - // Binding 3: Weights buffer (constant) - layer_entries[3].binding = 3; - layer_entries[3].buffer = weights_buffer_; - layer_entries[3].size = wgpuBufferGetSize(weights_buffer_); - - // Binding 4: Layer params (use dedicated buffer for this layer) - layer_entries[4].binding = 4; - layer_entries[4].buffer = layer_params_buffers_[i]; - layer_entries[4].size = sizeof(LayerParams); - - // Binding 5: Original input (for blending) - layer_entries[5].binding = 5; - layer_entries[5].textureView = input_view; + WGPUTextureView layer_input = + (i == 0) ? static_features_view_ : layer_views_[i % 2]; - WGPUBindGroupDescriptor layer_bg_desc = {}; - layer_bg_desc.layout = layer_bgl; - layer_bg_desc.entryCount = 6; - layer_bg_desc.entries = layer_entries; + WGPUBindGroup layer_bg = + BindGroupBuilder() + .texture(0, static_features_view_) + .texture(1, layer_input) + .texture(2, layer_views_[(i + 1) % 2]) + .buffer(3, weights_buffer_, wgpuBufferGetSize(weights_buffer_)) + .buffer(4, layer_params_buffers_[i], sizeof(LayerParams)) + .texture(5, input_view) + .build(ctx_.device, layer_bgl); - WGPUBindGroup layer_bg = wgpuDeviceCreateBindGroup(ctx_.device, &layer_bg_desc); layer_bind_groups_.push_back(layer_bg); } diff --git a/src/gpu/effects/rotating_cube_effect.cc b/src/gpu/effects/rotating_cube_effect.cc index cd31100..96b02f1 100644 --- a/src/gpu/effects/rotating_cube_effect.cc +++ b/src/gpu/effects/rotating_cube_effect.cc @@ -4,7 +4,9 @@ #include "gpu/effects/rotating_cube_effect.h" #include "generated/assets.h" +#include "gpu/bind_group_builder.h" #include "gpu/effects/shader_composer.h" +#include "gpu/gpu.h" #include "gpu/sampler_cache.h" #include "util/asset_manager_utils.h" @@ -35,17 +37,11 @@ void RotatingCubeEffect::init(MainSequence* demo) { gpu_create_buffer(ctx_.device, sizeof(ObjectData), WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst); - const WGPUTextureDescriptor tex_desc = { - .usage = - WGPUTextureUsage_TextureBinding | WGPUTextureUsage_RenderAttachment, - .dimension = WGPUTextureDimension_2D, - .size = {1, 1, 1}, - .format = WGPUTextureFormat_RGBA8Unorm, - .mipLevelCount = 1, - .sampleCount = 1, - }; - noise_texture_ = wgpuDeviceCreateTexture(ctx_.device, &tex_desc); - noise_view_ = wgpuTextureCreateView(noise_texture_, nullptr); + TextureWithView noise = gpu_create_texture_2d( + ctx_.device, 1, 1, WGPUTextureFormat_RGBA8Unorm, + WGPUTextureUsage_TextureBinding | WGPUTextureUsage_RenderAttachment, 1); + noise_texture_ = noise.texture; + noise_view_ = noise.view; noise_sampler_ = SamplerCache::Get().get_or_create(ctx_.device, SamplerCache::linear()); mask_sampler_ = SamplerCache::Get().get_or_create(ctx_.device, SamplerCache::clamp()); @@ -68,45 +64,20 @@ void RotatingCubeEffect::init(MainSequence* demo) { WGPUShaderModule shader_module = wgpuDeviceCreateShaderModule(ctx_.device, &shader_desc); - const WGPUBindGroupLayoutEntry bgl_entries_0[] = { - {.binding = 0, - .visibility = WGPUShaderStage_Vertex | WGPUShaderStage_Fragment, - .buffer = {.type = WGPUBufferBindingType_Uniform, - .minBindingSize = sizeof(Uniforms)}}, - {.binding = 1, - .visibility = WGPUShaderStage_Vertex | WGPUShaderStage_Fragment, - .buffer = {.type = WGPUBufferBindingType_ReadOnlyStorage, - .minBindingSize = sizeof(ObjectData)}}, - {.binding = 3, - .visibility = WGPUShaderStage_Fragment, - .texture = {.sampleType = WGPUTextureSampleType_Float, - .viewDimension = WGPUTextureViewDimension_2D}}, - {.binding = 4, - .visibility = WGPUShaderStage_Fragment, - .sampler = {.type = WGPUSamplerBindingType_Filtering}}, - }; - const WGPUBindGroupLayoutDescriptor bgl_desc_0 = { - .entryCount = 4, - .entries = bgl_entries_0, - }; WGPUBindGroupLayout bgl_0 = - wgpuDeviceCreateBindGroupLayout(ctx_.device, &bgl_desc_0); + BindGroupLayoutBuilder() + .uniform(0, WGPUShaderStage_Vertex | WGPUShaderStage_Fragment, + sizeof(Uniforms)) + .storage(1, WGPUShaderStage_Vertex | WGPUShaderStage_Fragment, + sizeof(ObjectData)) + .texture(3, WGPUShaderStage_Fragment) + .sampler(4, WGPUShaderStage_Fragment) + .build(ctx_.device); - const WGPUBindGroupLayoutEntry bgl_entries_1[] = { - {.binding = 0, - .visibility = WGPUShaderStage_Fragment, - .texture = {.sampleType = WGPUTextureSampleType_Float, - .viewDimension = WGPUTextureViewDimension_2D}}, - {.binding = 1, - .visibility = WGPUShaderStage_Fragment, - .sampler = {.type = WGPUSamplerBindingType_Filtering}}, - }; - const WGPUBindGroupLayoutDescriptor bgl_desc_1 = { - .entryCount = 2, - .entries = bgl_entries_1, - }; - WGPUBindGroupLayout bgl_1 = - wgpuDeviceCreateBindGroupLayout(ctx_.device, &bgl_desc_1); + WGPUBindGroupLayout bgl_1 = BindGroupLayoutBuilder() + .texture(0, WGPUShaderStage_Fragment) + .sampler(1, WGPUShaderStage_Fragment) + .build(ctx_.device); const WGPUBindGroupLayout bgls[] = {bgl_0, bgl_1}; const WGPUPipelineLayoutDescriptor pl_desc = { diff --git a/src/gpu/gpu.cc b/src/gpu/gpu.cc index 41f5bcf..cf5d85d 100644 --- a/src/gpu/gpu.cc +++ b/src/gpu/gpu.cc @@ -51,6 +51,53 @@ GpuBuffer gpu_create_buffer(WGPUDevice device, size_t size, uint32_t usage, return {buffer, size}; } +TextureWithView gpu_create_texture_2d(WGPUDevice device, uint32_t width, + uint32_t height, WGPUTextureFormat format, + WGPUTextureUsage usage, + uint32_t mip_levels) { + WGPUTextureDescriptor desc = {}; + desc.usage = usage; + desc.dimension = WGPUTextureDimension_2D; + desc.size = {width, height, 1}; + desc.format = format; + desc.mipLevelCount = mip_levels; + desc.sampleCount = 1; + + WGPUTexture texture = wgpuDeviceCreateTexture(device, &desc); + + WGPUTextureViewDescriptor view_desc = {}; + view_desc.format = format; + view_desc.dimension = WGPUTextureViewDimension_2D; + view_desc.mipLevelCount = mip_levels; + view_desc.arrayLayerCount = 1; + + WGPUTextureView view = wgpuTextureCreateView(texture, &view_desc); + + return {texture, view}; +} + +TextureWithView gpu_create_storage_texture_2d(WGPUDevice device, uint32_t width, + uint32_t height, + WGPUTextureFormat format) { + return gpu_create_texture_2d( + device, width, height, format, + WGPUTextureUsage_StorageBinding | WGPUTextureUsage_TextureBinding, 1); +} + +WGPUTextureView gpu_create_mip_view(WGPUTexture texture, + WGPUTextureFormat format, + uint32_t mip_level) { + WGPUTextureViewDescriptor view_desc = {}; + view_desc.format = format; + view_desc.dimension = WGPUTextureViewDimension_2D; + view_desc.baseMipLevel = mip_level; + view_desc.mipLevelCount = 1; + view_desc.baseArrayLayer = 0; + view_desc.arrayLayerCount = 1; + + return wgpuTextureCreateView(texture, &view_desc); +} + RenderPass gpu_create_render_pass(WGPUDevice device, WGPUTextureFormat format, const char* shader_code, ResourceBinding* bindings, int num_bindings) { diff --git a/src/gpu/gpu.h b/src/gpu/gpu.h index c7ee89f..b52d6ab 100644 --- a/src/gpu/gpu.h +++ b/src/gpu/gpu.h @@ -82,8 +82,23 @@ inline void gpu_init_color_attachment(WGPURenderPassColorAttachment& attachment, #endif } +// Texture creation helper +struct TextureWithView { + WGPUTexture texture; + WGPUTextureView view; +}; + GpuBuffer gpu_create_buffer(WGPUDevice device, size_t size, uint32_t usage, const void* data = nullptr); +TextureWithView gpu_create_texture_2d(WGPUDevice device, uint32_t width, + uint32_t height, WGPUTextureFormat format, + WGPUTextureUsage usage, + uint32_t mip_levels = 1); +TextureWithView gpu_create_storage_texture_2d(WGPUDevice device, uint32_t width, + uint32_t height, + WGPUTextureFormat format); +WGPUTextureView gpu_create_mip_view(WGPUTexture texture, + WGPUTextureFormat format, uint32_t mip_level); ComputePass gpu_create_compute_pass(WGPUDevice device, const char* shader_code, ResourceBinding* bindings, int num_bindings); |
