2 files changed, 675 insertions, 0 deletions
diff --git a/cnn_v3/src/gbuffer_effect.cc b/cnn_v3/src/gbuffer_effect.cc
new file mode 100644
index 0000000..fb0146e
--- /dev/null
+++ b/cnn_v3/src/gbuffer_effect.cc
@@ -0,0 +1,596 @@
+// GBufferEffect implementation
+// Rasterizes proxy geometry to MRT G-buffer, then packs into CNN v3 feature textures.
+
+#include "gbuffer_effect.h"
+#include "3d/object.h"
+#include "gpu/gpu.h"
+#include "util/fatal_error.h"
+#include "util/mini_math.h"
+#include <cstring>
+#include <vector>
+
+// Shader source (loaded from asset at runtime — declared extern by the build system)
+// For standalone use outside the asset system, the caller must ensure the WGSL
+// source strings are available.  They are declared here as weak-linkable externs.
+extern const char* gbuf_raster_wgsl;
+extern const char* gbuf_pack_wgsl;
+
+// Maximum number of objects the G-buffer supports per frame.
+static const int kGBufMaxObjects = 256;
+
+// ObjectData struct that mirrors the WGSL layout in gbuf_raster.wgsl and renderer.h
+struct GBufObjectData {
+  mat4 model;
+  mat4 inv_model;
+  vec4 color;
+  vec4 params; // x = object type, y = plane_distance
+};
+static_assert(sizeof(GBufObjectData) == sizeof(float) * 40,
+              "GBufObjectData must be 160 bytes");
+
+// GlobalUniforms struct mirroring renderer.h
+struct GBufGlobalUniforms {
+  mat4 view_proj;
+  mat4 inv_view_proj;
+  vec4 camera_pos_time;
+  vec4 params;        // x = num_objects
+  vec2 resolution;
+  vec2 padding;
+};
+static_assert(sizeof(GBufGlobalUniforms) == sizeof(float) * 44,
+              "GBufGlobalUniforms must be 176 bytes");
+
+// Helper: create a 1×1 placeholder texture of a given format cleared to `value`.
+static WGPUTexture create_placeholder_tex(WGPUDevice device,
+                                          WGPUTextureFormat format,
+                                          float value) {
+  WGPUTextureDescriptor desc = {};
+  desc.usage = (WGPUTextureUsage)(WGPUTextureUsage_TextureBinding |
+                                  WGPUTextureUsage_CopyDst);
+  desc.dimension = WGPUTextureDimension_2D;
+  desc.size = {1, 1, 1};
+  desc.format = format;
+  desc.mipLevelCount = 1;
+  desc.sampleCount = 1;
+  WGPUTexture tex = wgpuDeviceCreateTexture(device, &desc);
+  return tex;
+}
+
+// Helper: write a single RGBA float pixel to a texture via queue.
+static void write_placeholder_pixel(WGPUQueue queue, WGPUTexture tex,
+                                    float r, float g, float b, float a) {
+  const float data[4] = {r, g, b, a};
+  WGPUTexelCopyTextureInfo dst = {};
+  dst.texture = tex;
+  dst.mipLevel = 0;
+  dst.origin = {0, 0, 0};
+  dst.aspect = WGPUTextureAspect_All;
+
+  WGPUTexelCopyBufferLayout layout = {};
+  layout.offset = 0;
+  layout.bytesPerRow = 16; // 4 × sizeof(float)
+  layout.rowsPerImage = 1;
+
+  const WGPUExtent3D extent = {1, 1, 1};
+  wgpuQueueWriteTexture(queue, &dst, data, sizeof(data), &layout, &extent);
+}
+
+// Create bilinear sampler.
+static WGPUSampler create_bilinear_sampler(WGPUDevice device) {
+  WGPUSamplerDescriptor desc = {};
+  desc.addressModeU = WGPUAddressMode_ClampToEdge;
+  desc.addressModeV = WGPUAddressMode_ClampToEdge;
+  desc.magFilter = WGPUFilterMode_Linear;
+  desc.minFilter = WGPUFilterMode_Linear;
+  desc.mipmapFilter = WGPUMipmapFilterMode_Linear;
+  desc.maxAnisotropy = 1;
+  return wgpuDeviceCreateSampler(device, &desc);
+}
+
+// ---- GBufferEffect ----
+
+GBufferEffect::GBufferEffect(const GpuContext& ctx,
+                             const std::vector<std::string>& inputs,
+                             const std::vector<std::string>& outputs,
+                             float start_time, float end_time)
+    : Effect(ctx, inputs, outputs, start_time, end_time) {
+  HEADLESS_RETURN_IF_NULL(ctx_.device);
+
+  // Derive internal node name prefix from the first output name.
+  const std::string& prefix = outputs.empty() ? "gbuf" : outputs[0];
+  node_albedo_     = prefix + "_albedo";
+  node_normal_mat_ = prefix + "_normal_mat";
+  node_depth_      = prefix + "_depth";
+  node_shadow_     = prefix + "_shadow";
+  node_transp_     = prefix + "_transp";
+  node_feat0_      = outputs.size() > 0 ? outputs[0] : prefix + "_feat0";
+  node_feat1_      = outputs.size() > 1 ? outputs[1] : prefix + "_feat1";
+
+  // Allocate GPU buffers for scene data.
+  global_uniforms_buf_ =
+      gpu_create_buffer(ctx_.device, sizeof(GBufGlobalUniforms),
+                        WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst);
+
+  ensure_objects_buffer(kGBufMaxObjects);
+
+  // Resolution uniform for pack shader.
+  pack_res_uniform_.init(ctx_.device);
+
+  // Placeholder shadow (1.0 = fully lit) and transp (0.0 = opaque) textures.
+  shadow_placeholder_tex_.set(
+      create_placeholder_tex(ctx_.device, WGPUTextureFormat_RGBA32Float, 1.0f));
+  write_placeholder_pixel(ctx_.queue,
+                          shadow_placeholder_tex_.get(), 1.0f, 0.0f, 0.0f, 1.0f);
+
+  transp_placeholder_tex_.set(
+      create_placeholder_tex(ctx_.device, WGPUTextureFormat_RGBA32Float, 0.0f));
+  write_placeholder_pixel(ctx_.queue,
+                          transp_placeholder_tex_.get(), 0.0f, 0.0f, 0.0f, 1.0f);
+
+  WGPUTextureViewDescriptor vd = {};
+  vd.format = WGPUTextureFormat_RGBA32Float;
+  vd.dimension = WGPUTextureViewDimension_2D;
+  vd.baseMipLevel = 0;
+  vd.mipLevelCount = 1;
+  vd.baseArrayLayer = 0;
+  vd.arrayLayerCount = 1;
+  vd.aspect = WGPUTextureAspect_All;
+
+  shadow_placeholder_view_.set(
+      wgpuTextureCreateView(shadow_placeholder_tex_.get(), &vd));
+  transp_placeholder_view_.set(
+      wgpuTextureCreateView(transp_placeholder_tex_.get(), &vd));
+
+  create_raster_pipeline();
+  create_pack_pipeline();
+}
+
+void GBufferEffect::declare_nodes(NodeRegistry& registry) {
+  registry.declare_node(node_albedo_,     NodeType::GBUF_ALBEDO,    -1, -1);
+  registry.declare_node(node_normal_mat_, NodeType::GBUF_ALBEDO,    -1, -1);
+  registry.declare_node(node_depth_,      NodeType::GBUF_DEPTH32,   -1, -1);
+  registry.declare_node(node_shadow_,     NodeType::GBUF_R8,        -1, -1);
+  registry.declare_node(node_transp_,     NodeType::GBUF_R8,        -1, -1);
+  // feat_tex0 / feat_tex1 are the declared output_nodes_ — they get registered
+  // by the sequence infrastructure; declare them here as well if not already.
+  if (!registry.has_node(node_feat0_)) {
+    registry.declare_node(node_feat0_, NodeType::GBUF_RGBA32UINT, -1, -1);
+  }
+  if (!registry.has_node(node_feat1_)) {
+    registry.declare_node(node_feat1_, NodeType::GBUF_RGBA32UINT, -1, -1);
+  }
+}
+
+void GBufferEffect::set_scene(const Scene* scene, const Camera* camera) {
+  scene_  = scene;
+  camera_ = camera;
+}
+
+void GBufferEffect::render(WGPUCommandEncoder encoder,
+                           const UniformsSequenceParams& params,
+                           NodeRegistry& nodes) {
+  if (!scene_ || !camera_) {
+    return;
+  }
+
+  upload_scene_data(*scene_, *camera_, params.time);
+
+  // Update resolution uniform for pack shader.
+  GBufResUniforms res_uni;
+  res_uni.resolution = params.resolution;
+  res_uni._pad0 = 0.0f;
+  res_uni._pad1 = 0.0f;
+  pack_res_uniform_.update(ctx_.queue, res_uni);
+
+  WGPUTextureView albedo_view     = nodes.get_view(node_albedo_);
+  WGPUTextureView normal_mat_view = nodes.get_view(node_normal_mat_);
+  WGPUTextureView depth_view      = nodes.get_view(node_depth_);
+  WGPUTextureView feat0_view      = nodes.get_view(node_feat0_);
+  WGPUTextureView feat1_view      = nodes.get_view(node_feat1_);
+
+  // prev_cnn: first input node if available, else dummy.
+  WGPUTextureView prev_view = nullptr;
+  if (!input_nodes_.empty()) {
+    prev_view = nodes.get_view(input_nodes_[0]);
+  }
+  if (!prev_view) {
+    prev_view = dummy_texture_view_.get();
+  }
+
+  // --- Pass 1: MRT rasterization ---
+  update_raster_bind_group(nodes);
+
+  WGPURenderPassColorAttachment color_attachments[2] = {};
+  // Attachment 0: albedo
+  color_attachments[0].view = albedo_view;
+  color_attachments[0].loadOp = WGPULoadOp_Clear;
+  color_attachments[0].storeOp = WGPUStoreOp_Store;
+  color_attachments[0].clearValue = {0.0f, 0.0f, 0.0f, 1.0f};
+  color_attachments[0].depthSlice = WGPU_DEPTH_SLICE_UNDEFINED;
+  // Attachment 1: normal_mat
+  color_attachments[1].view = normal_mat_view;
+  color_attachments[1].loadOp = WGPULoadOp_Clear;
+  color_attachments[1].storeOp = WGPUStoreOp_Store;
+  color_attachments[1].clearValue = {0.5f, 0.5f, 0.0f, 0.0f};
+  color_attachments[1].depthSlice = WGPU_DEPTH_SLICE_UNDEFINED;
+
+  WGPURenderPassDepthStencilAttachment depth_attachment = {};
+  depth_attachment.view = depth_view;
+  depth_attachment.depthLoadOp = WGPULoadOp_Clear;
+  depth_attachment.depthStoreOp = WGPUStoreOp_Store;
+  depth_attachment.depthClearValue = 1.0f;
+  depth_attachment.depthReadOnly = false;
+
+  WGPURenderPassDescriptor raster_pass_desc = {};
+  raster_pass_desc.colorAttachmentCount = 2;
+  raster_pass_desc.colorAttachments = color_attachments;
+  raster_pass_desc.depthStencilAttachment = &depth_attachment;
+
+  const int num_objects =
+      (int)(scene_->objects.size() < (size_t)kGBufMaxObjects
+                ? scene_->objects.size()
+                : (size_t)kGBufMaxObjects);
+
+  if (num_objects > 0 && raster_pipeline_.get() != nullptr) {
+    WGPURenderPassEncoder raster_pass =
+        wgpuCommandEncoderBeginRenderPass(encoder, &raster_pass_desc);
+    wgpuRenderPassEncoderSetPipeline(raster_pass, raster_pipeline_.get());
+    wgpuRenderPassEncoderSetBindGroup(raster_pass, 0,
+                                     raster_bind_group_.get(), 0, nullptr);
+    // Draw 36 vertices (proxy box) × num_objects instances.
+    wgpuRenderPassEncoderDraw(raster_pass, 36, (uint32_t)num_objects, 0, 0);
+    wgpuRenderPassEncoderEnd(raster_pass);
+    wgpuRenderPassEncoderRelease(raster_pass);
+  } else {
+    // Clear passes with no draws still need to be submitted.
+    WGPURenderPassEncoder raster_pass =
+        wgpuCommandEncoderBeginRenderPass(encoder, &raster_pass_desc);
+    wgpuRenderPassEncoderEnd(raster_pass);
+    wgpuRenderPassEncoderRelease(raster_pass);
+  }
+
+  // Pass 2: SDF raymarching — TODO (placeholder: shadow=1, transp=0 already set)
+  // Pass 3: Lighting/shadow — TODO
+
+  // --- Pass 4: Pack compute ---
+  // Rebuild pack bind group with current node views.
+  // Construct a temporary bilinear sampler for this pass.
+  WGPUSampler bilinear = create_bilinear_sampler(ctx_.device);
+
+  // Get texture views from nodes.
+  // shadow / transp are GBUF_R8 nodes; use their views.
+  WGPUTextureView shadow_view = nodes.get_view(node_shadow_);
+  WGPUTextureView transp_view = nodes.get_view(node_transp_);
+
+  // Build pack bind group (bindings 0-9).
+  WGPUBindGroupEntry pack_entries[10] = {};
+  pack_entries[0].binding = 0;
+  pack_entries[0].buffer = pack_res_uniform_.get().buffer;
+  pack_entries[0].size = sizeof(GBufResUniforms);
+
+  pack_entries[1].binding = 1;
+  pack_entries[1].textureView = albedo_view;
+
+  pack_entries[2].binding = 2;
+  pack_entries[2].textureView = normal_mat_view;
+
+  pack_entries[3].binding = 3;
+  pack_entries[3].textureView = depth_view;
+
+  pack_entries[4].binding = 4;
+  pack_entries[4].textureView = shadow_view;
+
+  pack_entries[5].binding = 5;
+  pack_entries[5].textureView = transp_view;
+
+  pack_entries[6].binding = 6;
+  pack_entries[6].textureView = prev_view;
+
+  pack_entries[7].binding = 7;
+  pack_entries[7].textureView = feat0_view;
+
+  pack_entries[8].binding = 8;
+  pack_entries[8].textureView = feat1_view;
+
+  pack_entries[9].binding = 9;
+  pack_entries[9].sampler = bilinear;
+
+  WGPUBindGroupLayout pack_bgl =
+      wgpuComputePipelineGetBindGroupLayout(pack_pipeline_.get(), 0);
+
+  WGPUBindGroupDescriptor pack_bg_desc = {};
+  pack_bg_desc.layout = pack_bgl;
+  pack_bg_desc.entryCount = 10;
+  pack_bg_desc.entries = pack_entries;
+
+  WGPUBindGroup pack_bg = wgpuDeviceCreateBindGroup(ctx_.device, &pack_bg_desc);
+  wgpuBindGroupLayoutRelease(pack_bgl);
+
+  WGPUComputePassDescriptor compute_pass_desc = {};
+  WGPUComputePassEncoder compute_pass =
+      wgpuCommandEncoderBeginComputePass(encoder, &compute_pass_desc);
+  wgpuComputePassEncoderSetPipeline(compute_pass, pack_pipeline_.get());
+  wgpuComputePassEncoderSetBindGroup(compute_pass, 0, pack_bg, 0, nullptr);
+
+  const uint32_t wg_x = ((uint32_t)width_  + 7u) / 8u;
+  const uint32_t wg_y = ((uint32_t)height_ + 7u) / 8u;
+  wgpuComputePassEncoderDispatchWorkgroups(compute_pass, wg_x, wg_y, 1);
+  wgpuComputePassEncoderEnd(compute_pass);
+  wgpuComputePassEncoderRelease(compute_pass);
+
+  wgpuBindGroupRelease(pack_bg);
+  wgpuSamplerRelease(bilinear);
+}
+
+// ---- private helpers ----
+
+void GBufferEffect::ensure_objects_buffer(int num_objects) {
+  if (num_objects <= objects_buf_capacity_) {
+    return;
+  }
+  if (objects_buf_.buffer) {
+    wgpuBufferRelease(objects_buf_.buffer);
+  }
+  objects_buf_ = gpu_create_buffer(
+      ctx_.device, (size_t)num_objects * sizeof(GBufObjectData),
+      WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst);
+  objects_buf_capacity_ = num_objects;
+}
+
+void GBufferEffect::upload_scene_data(const Scene& scene,
+                                      const Camera& camera, float time) {
+  const int num_objects =
+      (int)(scene.objects.size() < (size_t)kGBufMaxObjects
+                ? scene.objects.size()
+                : (size_t)kGBufMaxObjects);
+
+  const mat4 view = camera.get_view_matrix();
+  const mat4 proj = camera.get_projection_matrix();
+  const mat4 vp   = proj * view;
+
+  GBufGlobalUniforms gu = {};
+  gu.view_proj       = vp;
+  gu.inv_view_proj   = vp.inverse();
+  gu.camera_pos_time = vec4(camera.position.x, camera.position.y,
+                            camera.position.z, time);
+  gu.params    = vec4((float)num_objects, 0.0f, 0.0f, 0.0f);
+  gu.resolution = vec2((float)width_, (float)height_);
+  gu.padding   = vec2(0.0f, 0.0f);
+
+  wgpuQueueWriteBuffer(ctx_.queue, global_uniforms_buf_.buffer, 0,
+                       &gu, sizeof(GBufGlobalUniforms));
+
+  // Upload object data.
+  if (num_objects > 0) {
+    ensure_objects_buffer(num_objects);
+    std::vector<GBufObjectData> obj_data;
+    obj_data.reserve((size_t)num_objects);
+    for (int i = 0; i < num_objects; ++i) {
+      const Object3D& obj = scene.objects[(size_t)i];
+      const mat4 m = obj.get_model_matrix();
+      GBufObjectData d;
+      d.model     = m;
+      d.inv_model = m.inverse();
+      d.color     = obj.color;
+      d.params    = vec4(0.0f, 0.0f, 0.0f, 0.0f);
+      obj_data.push_back(d);
+    }
+    wgpuQueueWriteBuffer(ctx_.queue, objects_buf_.buffer, 0,
+                         obj_data.data(),
+                         (size_t)num_objects * sizeof(GBufObjectData));
+  }
+}
+
+void GBufferEffect::create_raster_pipeline() {
+  HEADLESS_RETURN_IF_NULL(ctx_.device);
+
+  // Load shader source.
+  const char* src = gbuf_raster_wgsl;
+  if (!src) {
+    return; // Asset not loaded yet; pipeline creation deferred.
+  }
+
+  WGPUShaderSourceWGSL wgsl_src = {};
+  wgsl_src.chain.sType = WGPUSType_ShaderSourceWGSL;
+  wgsl_src.code = str_view(src);
+
+  WGPUShaderModuleDescriptor shader_desc = {};
+  shader_desc.nextInChain = &wgsl_src.chain;
+  WGPUShaderModule shader = wgpuDeviceCreateShaderModule(ctx_.device, &shader_desc);
+
+  // Bind group layout: B0 = GlobalUniforms, B1 = ObjectsBuffer (storage read)
+  WGPUBindGroupLayoutEntry bgl_entries[2] = {};
+  bgl_entries[0].binding = 0;
+  bgl_entries[0].visibility =
+      (WGPUShaderStage)(WGPUShaderStage_Vertex | WGPUShaderStage_Fragment);
+  bgl_entries[0].buffer.type = WGPUBufferBindingType_Uniform;
+  bgl_entries[0].buffer.minBindingSize = sizeof(GBufGlobalUniforms);
+
+  bgl_entries[1].binding = 1;
+  bgl_entries[1].visibility =
+      (WGPUShaderStage)(WGPUShaderStage_Vertex | WGPUShaderStage_Fragment);
+  bgl_entries[1].buffer.type = WGPUBufferBindingType_ReadOnlyStorage;
+  bgl_entries[1].buffer.minBindingSize = sizeof(GBufObjectData);
+
+  WGPUBindGroupLayoutDescriptor bgl_desc = {};
+  bgl_desc.entryCount = 2;
+  bgl_desc.entries = bgl_entries;
+  WGPUBindGroupLayout bgl = wgpuDeviceCreateBindGroupLayout(ctx_.device, &bgl_desc);
+
+  WGPUPipelineLayoutDescriptor pl_desc = {};
+  pl_desc.bindGroupLayoutCount = 1;
+  pl_desc.bindGroupLayouts = &bgl;
+  WGPUPipelineLayout pl = wgpuDeviceCreatePipelineLayout(ctx_.device, &pl_desc);
+
+  // Two color targets: albedo (rgba16float) and normal_mat (rgba16float)
+  WGPUColorTargetState color_targets[2] = {};
+  color_targets[0].format = WGPUTextureFormat_RGBA16Float;
+  color_targets[0].writeMask = WGPUColorWriteMask_All;
+  color_targets[1].format = WGPUTextureFormat_RGBA16Float;
+  color_targets[1].writeMask = WGPUColorWriteMask_All;
+
+  WGPUFragmentState frag = {};
+  frag.module = shader;
+  frag.entryPoint = str_view("fs_main");
+  frag.targetCount = 2;
+  frag.targets = color_targets;
+
+  WGPUDepthStencilState ds = {};
+  ds.format = WGPUTextureFormat_Depth32Float;
+  ds.depthWriteEnabled = WGPUOptionalBool_True;
+  ds.depthCompare = WGPUCompareFunction_Less;
+
+  WGPURenderPipelineDescriptor pipe_desc = {};
+  pipe_desc.layout = pl;
+  pipe_desc.vertex.module = shader;
+  pipe_desc.vertex.entryPoint = str_view("vs_main");
+  pipe_desc.fragment = &frag;
+  pipe_desc.depthStencil = &ds;
+  pipe_desc.primitive.topology = WGPUPrimitiveTopology_TriangleList;
+  pipe_desc.primitive.cullMode = WGPUCullMode_Back;
+  pipe_desc.multisample.count = 1;
+  pipe_desc.multisample.mask = 0xFFFFFFFF;
+
+  raster_pipeline_.set(wgpuDeviceCreateRenderPipeline(ctx_.device, &pipe_desc));
+
+  wgpuPipelineLayoutRelease(pl);
+  wgpuBindGroupLayoutRelease(bgl);
+  wgpuShaderModuleRelease(shader);
+}
+
+void GBufferEffect::create_pack_pipeline() {
+  HEADLESS_RETURN_IF_NULL(ctx_.device);
+
+  const char* src = gbuf_pack_wgsl;
+  if (!src) {
+    return;
+  }
+
+  WGPUShaderSourceWGSL wgsl_src = {};
+  wgsl_src.chain.sType = WGPUSType_ShaderSourceWGSL;
+  wgsl_src.code = str_view(src);
+
+  WGPUShaderModuleDescriptor shader_desc = {};
+  shader_desc.nextInChain = &wgsl_src.chain;
+  WGPUShaderModule shader = wgpuDeviceCreateShaderModule(ctx_.device, &shader_desc);
+
+  // Build explicit bind group layout for bindings 0-9.
+  WGPUBindGroupLayoutEntry bgl_entries[10] = {};
+
+  // B0: resolution uniform
+  bgl_entries[0].binding = 0;
+  bgl_entries[0].visibility = WGPUShaderStage_Compute;
+  bgl_entries[0].buffer.type = WGPUBufferBindingType_Uniform;
+  bgl_entries[0].buffer.minBindingSize = sizeof(GBufResUniforms);
+
+  // B1: gbuf_albedo (texture_2d<f32>)
+  bgl_entries[1].binding = 1;
+  bgl_entries[1].visibility = WGPUShaderStage_Compute;
+  bgl_entries[1].texture.sampleType = WGPUTextureSampleType_Float;
+  bgl_entries[1].texture.viewDimension = WGPUTextureViewDimension_2D;
+
+  // B2: gbuf_normal_mat (texture_2d<f32>)
+  bgl_entries[2].binding = 2;
+  bgl_entries[2].visibility = WGPUShaderStage_Compute;
+  bgl_entries[2].texture.sampleType = WGPUTextureSampleType_Float;
+  bgl_entries[2].texture.viewDimension = WGPUTextureViewDimension_2D;
+
+  // B3: gbuf_depth (texture_depth_2d)
+  bgl_entries[3].binding = 3;
+  bgl_entries[3].visibility = WGPUShaderStage_Compute;
+  bgl_entries[3].texture.sampleType = WGPUTextureSampleType_Depth;
+  bgl_entries[3].texture.viewDimension = WGPUTextureViewDimension_2D;
+
+  // B4: gbuf_shadow (texture_2d<f32>)
+  bgl_entries[4].binding = 4;
+  bgl_entries[4].visibility = WGPUShaderStage_Compute;
+  bgl_entries[4].texture.sampleType = WGPUTextureSampleType_Float;
+  bgl_entries[4].texture.viewDimension = WGPUTextureViewDimension_2D;
+
+  // B5: gbuf_transp (texture_2d<f32>)
+  bgl_entries[5].binding = 5;
+  bgl_entries[5].visibility = WGPUShaderStage_Compute;
+  bgl_entries[5].texture.sampleType = WGPUTextureSampleType_Float;
+  bgl_entries[5].texture.viewDimension = WGPUTextureViewDimension_2D;
+
+  // B6: prev_cnn (texture_2d<f32>)
+  bgl_entries[6].binding = 6;
+  bgl_entries[6].visibility = WGPUShaderStage_Compute;
+  bgl_entries[6].texture.sampleType = WGPUTextureSampleType_Float;
+  bgl_entries[6].texture.viewDimension = WGPUTextureViewDimension_2D;
+
+  // B7: feat_tex0 (storage texture, write, rgba32uint)
+  bgl_entries[7].binding = 7;
+  bgl_entries[7].visibility = WGPUShaderStage_Compute;
+  bgl_entries[7].storageTexture.access = WGPUStorageTextureAccess_WriteOnly;
+  bgl_entries[7].storageTexture.format = WGPUTextureFormat_RGBA32Uint;
+  bgl_entries[7].storageTexture.viewDimension = WGPUTextureViewDimension_2D;
+
+  // B8: feat_tex1 (storage texture, write, rgba32uint)
+  bgl_entries[8].binding = 8;
+  bgl_entries[8].visibility = WGPUShaderStage_Compute;
+  bgl_entries[8].storageTexture.access = WGPUStorageTextureAccess_WriteOnly;
+  bgl_entries[8].storageTexture.format = WGPUTextureFormat_RGBA32Uint;
+  bgl_entries[8].storageTexture.viewDimension = WGPUTextureViewDimension_2D;
+
+  // B9: bilinear sampler
+  bgl_entries[9].binding = 9;
+  bgl_entries[9].visibility = WGPUShaderStage_Compute;
+  bgl_entries[9].sampler.type = WGPUSamplerBindingType_Filtering;
+
+  WGPUBindGroupLayoutDescriptor bgl_desc = {};
+  bgl_desc.entryCount = 10;
+  bgl_desc.entries = bgl_entries;
+  WGPUBindGroupLayout bgl = wgpuDeviceCreateBindGroupLayout(ctx_.device, &bgl_desc);
+
+  WGPUPipelineLayoutDescriptor pl_desc = {};
+  pl_desc.bindGroupLayoutCount = 1;
+  pl_desc.bindGroupLayouts = &bgl;
+  WGPUPipelineLayout pl = wgpuDeviceCreatePipelineLayout(ctx_.device, &pl_desc);
+
+  WGPUComputePipelineDescriptor pipe_desc = {};
+  pipe_desc.layout = pl;
+  pipe_desc.compute.module = shader;
+  pipe_desc.compute.entryPoint = str_view("pack_features");
+
+  pack_pipeline_.set(wgpuDeviceCreateComputePipeline(ctx_.device, &pipe_desc));
+
+  wgpuPipelineLayoutRelease(pl);
+  wgpuBindGroupLayoutRelease(bgl);
+  wgpuShaderModuleRelease(shader);
+}
+
+void GBufferEffect::update_raster_bind_group(NodeRegistry& nodes) {
+  (void)nodes;
+  // Rebuild each frame since textures may resize.
+  raster_bind_group_.replace(nullptr);
+
+  if (raster_pipeline_.get() == nullptr) {
+    return;
+  }
+
+  WGPUBindGroupEntry entries[2] = {};
+  entries[0].binding = 0;
+  entries[0].buffer  = global_uniforms_buf_.buffer;
+  entries[0].size    = sizeof(GBufGlobalUniforms);
+
+  entries[1].binding = 1;
+  entries[1].buffer  = objects_buf_.buffer;
+  entries[1].size    = (size_t)objects_buf_capacity_ * sizeof(GBufObjectData);
+
+  WGPUBindGroupLayout bgl =
+      wgpuRenderPipelineGetBindGroupLayout(raster_pipeline_.get(), 0);
+
+  WGPUBindGroupDescriptor bg_desc = {};
+  bg_desc.layout = bgl;
+  bg_desc.entryCount = 2;
+  bg_desc.entries = entries;
+
+  raster_bind_group_.replace(wgpuDeviceCreateBindGroup(ctx_.device, &bg_desc));
+  wgpuBindGroupLayoutRelease(bgl);
+}
+
+void GBufferEffect::update_pack_bind_group(NodeRegistry& nodes) {
+  (void)nodes;
+  // Pack bind group is rebuilt inline in render() to use current node views.
+}
diff --git a/cnn_v3/src/gbuffer_effect.h b/cnn_v3/src/gbuffer_effect.h
new file mode 100644
index 0000000..42fb0ec
--- /dev/null
+++ b/cnn_v3/src/gbuffer_effect.h
@@ -0,0 +1,79 @@
+// GBufferEffect: Multi-pass G-buffer rendering for CNN v3 input
+// Outputs: gbuf_feat0, gbuf_feat1 (packed rgba32uint feature textures, 32 bytes/pixel)
+
+#pragma once
+
+#include "3d/camera.h"
+#include "3d/scene.h"
+#include "gpu/effect.h"
+#include "gpu/sequence.h"
+#include "gpu/uniform_helper.h"
+#include "gpu/wgpu_resource.h"
+#include "util/mini_math.h"
+
+// Uniform for the pack compute shader
+struct GBufResUniforms {
+  vec2 resolution;
+  float _pad0;
+  float _pad1;
+};
+static_assert(sizeof(GBufResUniforms) == 16,
+              "GBufResUniforms must be 16 bytes");
+
+class GBufferEffect : public Effect {
+ public:
+  GBufferEffect(const GpuContext& ctx, const std::vector<std::string>& inputs,
+                const std::vector<std::string>& outputs, float start_time,
+                float end_time);
+
+  void declare_nodes(NodeRegistry& registry) override;
+
+  void render(WGPUCommandEncoder encoder, const UniformsSequenceParams& params,
+              NodeRegistry& nodes) override;
+
+  void set_scene(const Scene* scene, const Camera* camera);
+
+ private:
+  // Internal G-buffer node names
+  std::string node_albedo_;
+  std::string node_normal_mat_;
+  std::string node_depth_;
+  std::string node_shadow_;
+  std::string node_transp_;
+  std::string node_feat0_;
+  std::string node_feat1_;
+
+  const Scene*  scene_  = nullptr;
+  const Camera* camera_ = nullptr;
+
+  // Pass 1: MRT rasterization pipeline
+  RenderPipeline raster_pipeline_;
+  BindGroup      raster_bind_group_;
+
+  // Pass 4: Pack compute pipeline
+  ComputePipeline  pack_pipeline_;
+  BindGroup        pack_bind_group_;
+  UniformBuffer<GBufResUniforms> pack_res_uniform_;
+
+  // Placeholder textures for shadow/transp (white/black cleared once)
+  Texture     shadow_placeholder_tex_;
+  TextureView shadow_placeholder_view_;
+  Texture     transp_placeholder_tex_;
+  TextureView transp_placeholder_view_;
+
+  // GPU-side object data buffers (global uniforms + objects storage)
+  // These mirror the layout expected by gbuf_raster.wgsl
+  GpuBuffer global_uniforms_buf_;
+  GpuBuffer objects_buf_;
+  int       objects_buf_capacity_ = 0; // number of ObjectData slots allocated
+
+  void create_raster_pipeline();
+  void create_pack_pipeline();
+
+  void update_raster_bind_group(NodeRegistry& nodes);
+  void update_pack_bind_group(NodeRegistry& nodes);
+
+  void upload_scene_data(const Scene& scene, const Camera& camera,
+                         float time);
+  void ensure_objects_buffer(int num_objects);
+};