summaryrefslogtreecommitdiff
path: root/cnn_v3/src
diff options
context:
space:
mode:
Diffstat (limited to 'cnn_v3/src')
-rw-r--r--cnn_v3/src/gbuffer_effect.cc596
-rw-r--r--cnn_v3/src/gbuffer_effect.h79
2 files changed, 675 insertions, 0 deletions
diff --git a/cnn_v3/src/gbuffer_effect.cc b/cnn_v3/src/gbuffer_effect.cc
new file mode 100644
index 0000000..fb0146e
--- /dev/null
+++ b/cnn_v3/src/gbuffer_effect.cc
@@ -0,0 +1,596 @@
+// GBufferEffect implementation
+// Rasterizes proxy geometry to MRT G-buffer, then packs into CNN v3 feature textures.
+
+#include "gbuffer_effect.h"
+#include "3d/object.h"
+#include "gpu/gpu.h"
+#include "util/fatal_error.h"
+#include "util/mini_math.h"
+#include <cstring>
+#include <vector>
+
+// Shader source (loaded from asset at runtime — declared extern by the build system)
+// For standalone use outside the asset system, the caller must ensure the WGSL
+// source strings are available. They are declared here as weak-linkable externs.
+extern const char* gbuf_raster_wgsl;
+extern const char* gbuf_pack_wgsl;
+
+// Maximum number of objects the G-buffer supports per frame.
+static const int kGBufMaxObjects = 256;
+
+// ObjectData struct that mirrors the WGSL layout in gbuf_raster.wgsl and renderer.h
+struct GBufObjectData {
+ mat4 model;
+ mat4 inv_model;
+ vec4 color;
+ vec4 params; // x = object type, y = plane_distance
+};
+static_assert(sizeof(GBufObjectData) == sizeof(float) * 40,
+ "GBufObjectData must be 160 bytes");
+
+// GlobalUniforms struct mirroring renderer.h
+struct GBufGlobalUniforms {
+ mat4 view_proj;
+ mat4 inv_view_proj;
+ vec4 camera_pos_time;
+ vec4 params; // x = num_objects
+ vec2 resolution;
+ vec2 padding;
+};
+static_assert(sizeof(GBufGlobalUniforms) == sizeof(float) * 44,
+ "GBufGlobalUniforms must be 176 bytes");
+
+// Helper: create a 1×1 placeholder texture of a given format cleared to `value`.
+static WGPUTexture create_placeholder_tex(WGPUDevice device,
+ WGPUTextureFormat format,
+ float value) {
+ WGPUTextureDescriptor desc = {};
+ desc.usage = (WGPUTextureUsage)(WGPUTextureUsage_TextureBinding |
+ WGPUTextureUsage_CopyDst);
+ desc.dimension = WGPUTextureDimension_2D;
+ desc.size = {1, 1, 1};
+ desc.format = format;
+ desc.mipLevelCount = 1;
+ desc.sampleCount = 1;
+ WGPUTexture tex = wgpuDeviceCreateTexture(device, &desc);
+ return tex;
+}
+
+// Helper: write a single RGBA float pixel to a texture via queue.
+static void write_placeholder_pixel(WGPUQueue queue, WGPUTexture tex,
+ float r, float g, float b, float a) {
+ const float data[4] = {r, g, b, a};
+ WGPUTexelCopyTextureInfo dst = {};
+ dst.texture = tex;
+ dst.mipLevel = 0;
+ dst.origin = {0, 0, 0};
+ dst.aspect = WGPUTextureAspect_All;
+
+ WGPUTexelCopyBufferLayout layout = {};
+ layout.offset = 0;
+ layout.bytesPerRow = 16; // 4 × sizeof(float)
+ layout.rowsPerImage = 1;
+
+ const WGPUExtent3D extent = {1, 1, 1};
+ wgpuQueueWriteTexture(queue, &dst, data, sizeof(data), &layout, &extent);
+}
+
+// Create bilinear sampler.
+static WGPUSampler create_bilinear_sampler(WGPUDevice device) {
+ WGPUSamplerDescriptor desc = {};
+ desc.addressModeU = WGPUAddressMode_ClampToEdge;
+ desc.addressModeV = WGPUAddressMode_ClampToEdge;
+ desc.magFilter = WGPUFilterMode_Linear;
+ desc.minFilter = WGPUFilterMode_Linear;
+ desc.mipmapFilter = WGPUMipmapFilterMode_Linear;
+ desc.maxAnisotropy = 1;
+ return wgpuDeviceCreateSampler(device, &desc);
+}
+
+// ---- GBufferEffect ----
+
+GBufferEffect::GBufferEffect(const GpuContext& ctx,
+ const std::vector<std::string>& inputs,
+ const std::vector<std::string>& outputs,
+ float start_time, float end_time)
+ : Effect(ctx, inputs, outputs, start_time, end_time) {
+ HEADLESS_RETURN_IF_NULL(ctx_.device);
+
+ // Derive internal node name prefix from the first output name.
+ const std::string& prefix = outputs.empty() ? "gbuf" : outputs[0];
+ node_albedo_ = prefix + "_albedo";
+ node_normal_mat_ = prefix + "_normal_mat";
+ node_depth_ = prefix + "_depth";
+ node_shadow_ = prefix + "_shadow";
+ node_transp_ = prefix + "_transp";
+ node_feat0_ = outputs.size() > 0 ? outputs[0] : prefix + "_feat0";
+ node_feat1_ = outputs.size() > 1 ? outputs[1] : prefix + "_feat1";
+
+ // Allocate GPU buffers for scene data.
+ global_uniforms_buf_ =
+ gpu_create_buffer(ctx_.device, sizeof(GBufGlobalUniforms),
+ WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst);
+
+ ensure_objects_buffer(kGBufMaxObjects);
+
+ // Resolution uniform for pack shader.
+ pack_res_uniform_.init(ctx_.device);
+
+ // Placeholder shadow (1.0 = fully lit) and transp (0.0 = opaque) textures.
+ shadow_placeholder_tex_.set(
+ create_placeholder_tex(ctx_.device, WGPUTextureFormat_RGBA32Float, 1.0f));
+ write_placeholder_pixel(ctx_.queue,
+ shadow_placeholder_tex_.get(), 1.0f, 0.0f, 0.0f, 1.0f);
+
+ transp_placeholder_tex_.set(
+ create_placeholder_tex(ctx_.device, WGPUTextureFormat_RGBA32Float, 0.0f));
+ write_placeholder_pixel(ctx_.queue,
+ transp_placeholder_tex_.get(), 0.0f, 0.0f, 0.0f, 1.0f);
+
+ WGPUTextureViewDescriptor vd = {};
+ vd.format = WGPUTextureFormat_RGBA32Float;
+ vd.dimension = WGPUTextureViewDimension_2D;
+ vd.baseMipLevel = 0;
+ vd.mipLevelCount = 1;
+ vd.baseArrayLayer = 0;
+ vd.arrayLayerCount = 1;
+ vd.aspect = WGPUTextureAspect_All;
+
+ shadow_placeholder_view_.set(
+ wgpuTextureCreateView(shadow_placeholder_tex_.get(), &vd));
+ transp_placeholder_view_.set(
+ wgpuTextureCreateView(transp_placeholder_tex_.get(), &vd));
+
+ create_raster_pipeline();
+ create_pack_pipeline();
+}
+
+void GBufferEffect::declare_nodes(NodeRegistry& registry) {
+ registry.declare_node(node_albedo_, NodeType::GBUF_ALBEDO, -1, -1);
+ registry.declare_node(node_normal_mat_, NodeType::GBUF_ALBEDO, -1, -1);
+ registry.declare_node(node_depth_, NodeType::GBUF_DEPTH32, -1, -1);
+ registry.declare_node(node_shadow_, NodeType::GBUF_R8, -1, -1);
+ registry.declare_node(node_transp_, NodeType::GBUF_R8, -1, -1);
+ // feat_tex0 / feat_tex1 are the declared output_nodes_ — they get registered
+ // by the sequence infrastructure; declare them here as well if not already.
+ if (!registry.has_node(node_feat0_)) {
+ registry.declare_node(node_feat0_, NodeType::GBUF_RGBA32UINT, -1, -1);
+ }
+ if (!registry.has_node(node_feat1_)) {
+ registry.declare_node(node_feat1_, NodeType::GBUF_RGBA32UINT, -1, -1);
+ }
+}
+
+void GBufferEffect::set_scene(const Scene* scene, const Camera* camera) {
+ scene_ = scene;
+ camera_ = camera;
+}
+
+void GBufferEffect::render(WGPUCommandEncoder encoder,
+ const UniformsSequenceParams& params,
+ NodeRegistry& nodes) {
+ if (!scene_ || !camera_) {
+ return;
+ }
+
+ upload_scene_data(*scene_, *camera_, params.time);
+
+ // Update resolution uniform for pack shader.
+ GBufResUniforms res_uni;
+ res_uni.resolution = params.resolution;
+ res_uni._pad0 = 0.0f;
+ res_uni._pad1 = 0.0f;
+ pack_res_uniform_.update(ctx_.queue, res_uni);
+
+ WGPUTextureView albedo_view = nodes.get_view(node_albedo_);
+ WGPUTextureView normal_mat_view = nodes.get_view(node_normal_mat_);
+ WGPUTextureView depth_view = nodes.get_view(node_depth_);
+ WGPUTextureView feat0_view = nodes.get_view(node_feat0_);
+ WGPUTextureView feat1_view = nodes.get_view(node_feat1_);
+
+ // prev_cnn: first input node if available, else dummy.
+ WGPUTextureView prev_view = nullptr;
+ if (!input_nodes_.empty()) {
+ prev_view = nodes.get_view(input_nodes_[0]);
+ }
+ if (!prev_view) {
+ prev_view = dummy_texture_view_.get();
+ }
+
+ // --- Pass 1: MRT rasterization ---
+ update_raster_bind_group(nodes);
+
+ WGPURenderPassColorAttachment color_attachments[2] = {};
+ // Attachment 0: albedo
+ color_attachments[0].view = albedo_view;
+ color_attachments[0].loadOp = WGPULoadOp_Clear;
+ color_attachments[0].storeOp = WGPUStoreOp_Store;
+ color_attachments[0].clearValue = {0.0f, 0.0f, 0.0f, 1.0f};
+ color_attachments[0].depthSlice = WGPU_DEPTH_SLICE_UNDEFINED;
+ // Attachment 1: normal_mat
+ color_attachments[1].view = normal_mat_view;
+ color_attachments[1].loadOp = WGPULoadOp_Clear;
+ color_attachments[1].storeOp = WGPUStoreOp_Store;
+ color_attachments[1].clearValue = {0.5f, 0.5f, 0.0f, 0.0f};
+ color_attachments[1].depthSlice = WGPU_DEPTH_SLICE_UNDEFINED;
+
+ WGPURenderPassDepthStencilAttachment depth_attachment = {};
+ depth_attachment.view = depth_view;
+ depth_attachment.depthLoadOp = WGPULoadOp_Clear;
+ depth_attachment.depthStoreOp = WGPUStoreOp_Store;
+ depth_attachment.depthClearValue = 1.0f;
+ depth_attachment.depthReadOnly = false;
+
+ WGPURenderPassDescriptor raster_pass_desc = {};
+ raster_pass_desc.colorAttachmentCount = 2;
+ raster_pass_desc.colorAttachments = color_attachments;
+ raster_pass_desc.depthStencilAttachment = &depth_attachment;
+
+ const int num_objects =
+ (int)(scene_->objects.size() < (size_t)kGBufMaxObjects
+ ? scene_->objects.size()
+ : (size_t)kGBufMaxObjects);
+
+ if (num_objects > 0 && raster_pipeline_.get() != nullptr) {
+ WGPURenderPassEncoder raster_pass =
+ wgpuCommandEncoderBeginRenderPass(encoder, &raster_pass_desc);
+ wgpuRenderPassEncoderSetPipeline(raster_pass, raster_pipeline_.get());
+ wgpuRenderPassEncoderSetBindGroup(raster_pass, 0,
+ raster_bind_group_.get(), 0, nullptr);
+ // Draw 36 vertices (proxy box) × num_objects instances.
+ wgpuRenderPassEncoderDraw(raster_pass, 36, (uint32_t)num_objects, 0, 0);
+ wgpuRenderPassEncoderEnd(raster_pass);
+ wgpuRenderPassEncoderRelease(raster_pass);
+ } else {
+ // Clear passes with no draws still need to be submitted.
+ WGPURenderPassEncoder raster_pass =
+ wgpuCommandEncoderBeginRenderPass(encoder, &raster_pass_desc);
+ wgpuRenderPassEncoderEnd(raster_pass);
+ wgpuRenderPassEncoderRelease(raster_pass);
+ }
+
+ // Pass 2: SDF raymarching — TODO (placeholder: shadow=1, transp=0 already set)
+ // Pass 3: Lighting/shadow — TODO
+
+ // --- Pass 4: Pack compute ---
+ // Rebuild pack bind group with current node views.
+ // Construct a temporary bilinear sampler for this pass.
+ WGPUSampler bilinear = create_bilinear_sampler(ctx_.device);
+
+ // Get texture views from nodes.
+ // shadow / transp are GBUF_R8 nodes; use their views.
+ WGPUTextureView shadow_view = nodes.get_view(node_shadow_);
+ WGPUTextureView transp_view = nodes.get_view(node_transp_);
+
+ // Build pack bind group (bindings 0-9).
+ WGPUBindGroupEntry pack_entries[10] = {};
+ pack_entries[0].binding = 0;
+ pack_entries[0].buffer = pack_res_uniform_.get().buffer;
+ pack_entries[0].size = sizeof(GBufResUniforms);
+
+ pack_entries[1].binding = 1;
+ pack_entries[1].textureView = albedo_view;
+
+ pack_entries[2].binding = 2;
+ pack_entries[2].textureView = normal_mat_view;
+
+ pack_entries[3].binding = 3;
+ pack_entries[3].textureView = depth_view;
+
+ pack_entries[4].binding = 4;
+ pack_entries[4].textureView = shadow_view;
+
+ pack_entries[5].binding = 5;
+ pack_entries[5].textureView = transp_view;
+
+ pack_entries[6].binding = 6;
+ pack_entries[6].textureView = prev_view;
+
+ pack_entries[7].binding = 7;
+ pack_entries[7].textureView = feat0_view;
+
+ pack_entries[8].binding = 8;
+ pack_entries[8].textureView = feat1_view;
+
+ pack_entries[9].binding = 9;
+ pack_entries[9].sampler = bilinear;
+
+ WGPUBindGroupLayout pack_bgl =
+ wgpuComputePipelineGetBindGroupLayout(pack_pipeline_.get(), 0);
+
+ WGPUBindGroupDescriptor pack_bg_desc = {};
+ pack_bg_desc.layout = pack_bgl;
+ pack_bg_desc.entryCount = 10;
+ pack_bg_desc.entries = pack_entries;
+
+ WGPUBindGroup pack_bg = wgpuDeviceCreateBindGroup(ctx_.device, &pack_bg_desc);
+ wgpuBindGroupLayoutRelease(pack_bgl);
+
+ WGPUComputePassDescriptor compute_pass_desc = {};
+ WGPUComputePassEncoder compute_pass =
+ wgpuCommandEncoderBeginComputePass(encoder, &compute_pass_desc);
+ wgpuComputePassEncoderSetPipeline(compute_pass, pack_pipeline_.get());
+ wgpuComputePassEncoderSetBindGroup(compute_pass, 0, pack_bg, 0, nullptr);
+
+ const uint32_t wg_x = ((uint32_t)width_ + 7u) / 8u;
+ const uint32_t wg_y = ((uint32_t)height_ + 7u) / 8u;
+ wgpuComputePassEncoderDispatchWorkgroups(compute_pass, wg_x, wg_y, 1);
+ wgpuComputePassEncoderEnd(compute_pass);
+ wgpuComputePassEncoderRelease(compute_pass);
+
+ wgpuBindGroupRelease(pack_bg);
+ wgpuSamplerRelease(bilinear);
+}
+
+// ---- private helpers ----
+
+void GBufferEffect::ensure_objects_buffer(int num_objects) {
+ if (num_objects <= objects_buf_capacity_) {
+ return;
+ }
+ if (objects_buf_.buffer) {
+ wgpuBufferRelease(objects_buf_.buffer);
+ }
+ objects_buf_ = gpu_create_buffer(
+ ctx_.device, (size_t)num_objects * sizeof(GBufObjectData),
+ WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst);
+ objects_buf_capacity_ = num_objects;
+}
+
+void GBufferEffect::upload_scene_data(const Scene& scene,
+ const Camera& camera, float time) {
+ const int num_objects =
+ (int)(scene.objects.size() < (size_t)kGBufMaxObjects
+ ? scene.objects.size()
+ : (size_t)kGBufMaxObjects);
+
+ const mat4 view = camera.get_view_matrix();
+ const mat4 proj = camera.get_projection_matrix();
+ const mat4 vp = proj * view;
+
+ GBufGlobalUniforms gu = {};
+ gu.view_proj = vp;
+ gu.inv_view_proj = vp.inverse();
+ gu.camera_pos_time = vec4(camera.position.x, camera.position.y,
+ camera.position.z, time);
+ gu.params = vec4((float)num_objects, 0.0f, 0.0f, 0.0f);
+ gu.resolution = vec2((float)width_, (float)height_);
+ gu.padding = vec2(0.0f, 0.0f);
+
+ wgpuQueueWriteBuffer(ctx_.queue, global_uniforms_buf_.buffer, 0,
+ &gu, sizeof(GBufGlobalUniforms));
+
+ // Upload object data.
+ if (num_objects > 0) {
+ ensure_objects_buffer(num_objects);
+ std::vector<GBufObjectData> obj_data;
+ obj_data.reserve((size_t)num_objects);
+ for (int i = 0; i < num_objects; ++i) {
+ const Object3D& obj = scene.objects[(size_t)i];
+ const mat4 m = obj.get_model_matrix();
+ GBufObjectData d;
+ d.model = m;
+ d.inv_model = m.inverse();
+ d.color = obj.color;
+ d.params = vec4(0.0f, 0.0f, 0.0f, 0.0f);
+ obj_data.push_back(d);
+ }
+ wgpuQueueWriteBuffer(ctx_.queue, objects_buf_.buffer, 0,
+ obj_data.data(),
+ (size_t)num_objects * sizeof(GBufObjectData));
+ }
+}
+
+void GBufferEffect::create_raster_pipeline() {
+ HEADLESS_RETURN_IF_NULL(ctx_.device);
+
+ // Load shader source.
+ const char* src = gbuf_raster_wgsl;
+ if (!src) {
+ return; // Asset not loaded yet; pipeline creation deferred.
+ }
+
+ WGPUShaderSourceWGSL wgsl_src = {};
+ wgsl_src.chain.sType = WGPUSType_ShaderSourceWGSL;
+ wgsl_src.code = str_view(src);
+
+ WGPUShaderModuleDescriptor shader_desc = {};
+ shader_desc.nextInChain = &wgsl_src.chain;
+ WGPUShaderModule shader = wgpuDeviceCreateShaderModule(ctx_.device, &shader_desc);
+
+ // Bind group layout: B0 = GlobalUniforms, B1 = ObjectsBuffer (storage read)
+ WGPUBindGroupLayoutEntry bgl_entries[2] = {};
+ bgl_entries[0].binding = 0;
+ bgl_entries[0].visibility =
+ (WGPUShaderStage)(WGPUShaderStage_Vertex | WGPUShaderStage_Fragment);
+ bgl_entries[0].buffer.type = WGPUBufferBindingType_Uniform;
+ bgl_entries[0].buffer.minBindingSize = sizeof(GBufGlobalUniforms);
+
+ bgl_entries[1].binding = 1;
+ bgl_entries[1].visibility =
+ (WGPUShaderStage)(WGPUShaderStage_Vertex | WGPUShaderStage_Fragment);
+ bgl_entries[1].buffer.type = WGPUBufferBindingType_ReadOnlyStorage;
+ bgl_entries[1].buffer.minBindingSize = sizeof(GBufObjectData);
+
+ WGPUBindGroupLayoutDescriptor bgl_desc = {};
+ bgl_desc.entryCount = 2;
+ bgl_desc.entries = bgl_entries;
+ WGPUBindGroupLayout bgl = wgpuDeviceCreateBindGroupLayout(ctx_.device, &bgl_desc);
+
+ WGPUPipelineLayoutDescriptor pl_desc = {};
+ pl_desc.bindGroupLayoutCount = 1;
+ pl_desc.bindGroupLayouts = &bgl;
+ WGPUPipelineLayout pl = wgpuDeviceCreatePipelineLayout(ctx_.device, &pl_desc);
+
+ // Two color targets: albedo (rgba16float) and normal_mat (rgba16float)
+ WGPUColorTargetState color_targets[2] = {};
+ color_targets[0].format = WGPUTextureFormat_RGBA16Float;
+ color_targets[0].writeMask = WGPUColorWriteMask_All;
+ color_targets[1].format = WGPUTextureFormat_RGBA16Float;
+ color_targets[1].writeMask = WGPUColorWriteMask_All;
+
+ WGPUFragmentState frag = {};
+ frag.module = shader;
+ frag.entryPoint = str_view("fs_main");
+ frag.targetCount = 2;
+ frag.targets = color_targets;
+
+ WGPUDepthStencilState ds = {};
+ ds.format = WGPUTextureFormat_Depth32Float;
+ ds.depthWriteEnabled = WGPUOptionalBool_True;
+ ds.depthCompare = WGPUCompareFunction_Less;
+
+ WGPURenderPipelineDescriptor pipe_desc = {};
+ pipe_desc.layout = pl;
+ pipe_desc.vertex.module = shader;
+ pipe_desc.vertex.entryPoint = str_view("vs_main");
+ pipe_desc.fragment = &frag;
+ pipe_desc.depthStencil = &ds;
+ pipe_desc.primitive.topology = WGPUPrimitiveTopology_TriangleList;
+ pipe_desc.primitive.cullMode = WGPUCullMode_Back;
+ pipe_desc.multisample.count = 1;
+ pipe_desc.multisample.mask = 0xFFFFFFFF;
+
+ raster_pipeline_.set(wgpuDeviceCreateRenderPipeline(ctx_.device, &pipe_desc));
+
+ wgpuPipelineLayoutRelease(pl);
+ wgpuBindGroupLayoutRelease(bgl);
+ wgpuShaderModuleRelease(shader);
+}
+
+void GBufferEffect::create_pack_pipeline() {
+ HEADLESS_RETURN_IF_NULL(ctx_.device);
+
+ const char* src = gbuf_pack_wgsl;
+ if (!src) {
+ return;
+ }
+
+ WGPUShaderSourceWGSL wgsl_src = {};
+ wgsl_src.chain.sType = WGPUSType_ShaderSourceWGSL;
+ wgsl_src.code = str_view(src);
+
+ WGPUShaderModuleDescriptor shader_desc = {};
+ shader_desc.nextInChain = &wgsl_src.chain;
+ WGPUShaderModule shader = wgpuDeviceCreateShaderModule(ctx_.device, &shader_desc);
+
+ // Build explicit bind group layout for bindings 0-9.
+ WGPUBindGroupLayoutEntry bgl_entries[10] = {};
+
+ // B0: resolution uniform
+ bgl_entries[0].binding = 0;
+ bgl_entries[0].visibility = WGPUShaderStage_Compute;
+ bgl_entries[0].buffer.type = WGPUBufferBindingType_Uniform;
+ bgl_entries[0].buffer.minBindingSize = sizeof(GBufResUniforms);
+
+ // B1: gbuf_albedo (texture_2d<f32>)
+ bgl_entries[1].binding = 1;
+ bgl_entries[1].visibility = WGPUShaderStage_Compute;
+ bgl_entries[1].texture.sampleType = WGPUTextureSampleType_Float;
+ bgl_entries[1].texture.viewDimension = WGPUTextureViewDimension_2D;
+
+ // B2: gbuf_normal_mat (texture_2d<f32>)
+ bgl_entries[2].binding = 2;
+ bgl_entries[2].visibility = WGPUShaderStage_Compute;
+ bgl_entries[2].texture.sampleType = WGPUTextureSampleType_Float;
+ bgl_entries[2].texture.viewDimension = WGPUTextureViewDimension_2D;
+
+ // B3: gbuf_depth (texture_depth_2d)
+ bgl_entries[3].binding = 3;
+ bgl_entries[3].visibility = WGPUShaderStage_Compute;
+ bgl_entries[3].texture.sampleType = WGPUTextureSampleType_Depth;
+ bgl_entries[3].texture.viewDimension = WGPUTextureViewDimension_2D;
+
+ // B4: gbuf_shadow (texture_2d<f32>)
+ bgl_entries[4].binding = 4;
+ bgl_entries[4].visibility = WGPUShaderStage_Compute;
+ bgl_entries[4].texture.sampleType = WGPUTextureSampleType_Float;
+ bgl_entries[4].texture.viewDimension = WGPUTextureViewDimension_2D;
+
+ // B5: gbuf_transp (texture_2d<f32>)
+ bgl_entries[5].binding = 5;
+ bgl_entries[5].visibility = WGPUShaderStage_Compute;
+ bgl_entries[5].texture.sampleType = WGPUTextureSampleType_Float;
+ bgl_entries[5].texture.viewDimension = WGPUTextureViewDimension_2D;
+
+ // B6: prev_cnn (texture_2d<f32>)
+ bgl_entries[6].binding = 6;
+ bgl_entries[6].visibility = WGPUShaderStage_Compute;
+ bgl_entries[6].texture.sampleType = WGPUTextureSampleType_Float;
+ bgl_entries[6].texture.viewDimension = WGPUTextureViewDimension_2D;
+
+ // B7: feat_tex0 (storage texture, write, rgba32uint)
+ bgl_entries[7].binding = 7;
+ bgl_entries[7].visibility = WGPUShaderStage_Compute;
+ bgl_entries[7].storageTexture.access = WGPUStorageTextureAccess_WriteOnly;
+ bgl_entries[7].storageTexture.format = WGPUTextureFormat_RGBA32Uint;
+ bgl_entries[7].storageTexture.viewDimension = WGPUTextureViewDimension_2D;
+
+ // B8: feat_tex1 (storage texture, write, rgba32uint)
+ bgl_entries[8].binding = 8;
+ bgl_entries[8].visibility = WGPUShaderStage_Compute;
+ bgl_entries[8].storageTexture.access = WGPUStorageTextureAccess_WriteOnly;
+ bgl_entries[8].storageTexture.format = WGPUTextureFormat_RGBA32Uint;
+ bgl_entries[8].storageTexture.viewDimension = WGPUTextureViewDimension_2D;
+
+ // B9: bilinear sampler
+ bgl_entries[9].binding = 9;
+ bgl_entries[9].visibility = WGPUShaderStage_Compute;
+ bgl_entries[9].sampler.type = WGPUSamplerBindingType_Filtering;
+
+ WGPUBindGroupLayoutDescriptor bgl_desc = {};
+ bgl_desc.entryCount = 10;
+ bgl_desc.entries = bgl_entries;
+ WGPUBindGroupLayout bgl = wgpuDeviceCreateBindGroupLayout(ctx_.device, &bgl_desc);
+
+ WGPUPipelineLayoutDescriptor pl_desc = {};
+ pl_desc.bindGroupLayoutCount = 1;
+ pl_desc.bindGroupLayouts = &bgl;
+ WGPUPipelineLayout pl = wgpuDeviceCreatePipelineLayout(ctx_.device, &pl_desc);
+
+ WGPUComputePipelineDescriptor pipe_desc = {};
+ pipe_desc.layout = pl;
+ pipe_desc.compute.module = shader;
+ pipe_desc.compute.entryPoint = str_view("pack_features");
+
+ pack_pipeline_.set(wgpuDeviceCreateComputePipeline(ctx_.device, &pipe_desc));
+
+ wgpuPipelineLayoutRelease(pl);
+ wgpuBindGroupLayoutRelease(bgl);
+ wgpuShaderModuleRelease(shader);
+}
+
+void GBufferEffect::update_raster_bind_group(NodeRegistry& nodes) {
+ (void)nodes;
+ // Rebuild each frame since textures may resize.
+ raster_bind_group_.replace(nullptr);
+
+ if (raster_pipeline_.get() == nullptr) {
+ return;
+ }
+
+ WGPUBindGroupEntry entries[2] = {};
+ entries[0].binding = 0;
+ entries[0].buffer = global_uniforms_buf_.buffer;
+ entries[0].size = sizeof(GBufGlobalUniforms);
+
+ entries[1].binding = 1;
+ entries[1].buffer = objects_buf_.buffer;
+ entries[1].size = (size_t)objects_buf_capacity_ * sizeof(GBufObjectData);
+
+ WGPUBindGroupLayout bgl =
+ wgpuRenderPipelineGetBindGroupLayout(raster_pipeline_.get(), 0);
+
+ WGPUBindGroupDescriptor bg_desc = {};
+ bg_desc.layout = bgl;
+ bg_desc.entryCount = 2;
+ bg_desc.entries = entries;
+
+ raster_bind_group_.replace(wgpuDeviceCreateBindGroup(ctx_.device, &bg_desc));
+ wgpuBindGroupLayoutRelease(bgl);
+}
+
+void GBufferEffect::update_pack_bind_group(NodeRegistry& nodes) {
+ (void)nodes;
+ // Pack bind group is rebuilt inline in render() to use current node views.
+}
diff --git a/cnn_v3/src/gbuffer_effect.h b/cnn_v3/src/gbuffer_effect.h
new file mode 100644
index 0000000..42fb0ec
--- /dev/null
+++ b/cnn_v3/src/gbuffer_effect.h
@@ -0,0 +1,79 @@
+// GBufferEffect: Multi-pass G-buffer rendering for CNN v3 input
+// Outputs: gbuf_feat0, gbuf_feat1 (packed rgba32uint feature textures, 32 bytes/pixel)
+
+#pragma once
+
+#include "3d/camera.h"
+#include "3d/scene.h"
+#include "gpu/effect.h"
+#include "gpu/sequence.h"
+#include "gpu/uniform_helper.h"
+#include "gpu/wgpu_resource.h"
+#include "util/mini_math.h"
+
+// Uniform for the pack compute shader
+struct GBufResUniforms {
+ vec2 resolution;
+ float _pad0;
+ float _pad1;
+};
+static_assert(sizeof(GBufResUniforms) == 16,
+ "GBufResUniforms must be 16 bytes");
+
+class GBufferEffect : public Effect {
+ public:
+ GBufferEffect(const GpuContext& ctx, const std::vector<std::string>& inputs,
+ const std::vector<std::string>& outputs, float start_time,
+ float end_time);
+
+ void declare_nodes(NodeRegistry& registry) override;
+
+ void render(WGPUCommandEncoder encoder, const UniformsSequenceParams& params,
+ NodeRegistry& nodes) override;
+
+ void set_scene(const Scene* scene, const Camera* camera);
+
+ private:
+ // Internal G-buffer node names
+ std::string node_albedo_;
+ std::string node_normal_mat_;
+ std::string node_depth_;
+ std::string node_shadow_;
+ std::string node_transp_;
+ std::string node_feat0_;
+ std::string node_feat1_;
+
+ const Scene* scene_ = nullptr;
+ const Camera* camera_ = nullptr;
+
+ // Pass 1: MRT rasterization pipeline
+ RenderPipeline raster_pipeline_;
+ BindGroup raster_bind_group_;
+
+ // Pass 4: Pack compute pipeline
+ ComputePipeline pack_pipeline_;
+ BindGroup pack_bind_group_;
+ UniformBuffer<GBufResUniforms> pack_res_uniform_;
+
+ // Placeholder textures for shadow/transp (white/black cleared once)
+ Texture shadow_placeholder_tex_;
+ TextureView shadow_placeholder_view_;
+ Texture transp_placeholder_tex_;
+ TextureView transp_placeholder_view_;
+
+ // GPU-side object data buffers (global uniforms + objects storage)
+ // These mirror the layout expected by gbuf_raster.wgsl
+ GpuBuffer global_uniforms_buf_;
+ GpuBuffer objects_buf_;
+ int objects_buf_capacity_ = 0; // number of ObjectData slots allocated
+
+ void create_raster_pipeline();
+ void create_pack_pipeline();
+
+ void update_raster_bind_group(NodeRegistry& nodes);
+ void update_pack_bind_group(NodeRegistry& nodes);
+
+ void upload_scene_data(const Scene& scene, const Camera& camera,
+ float time);
+ void ensure_objects_buffer(int num_objects);
+};