10 files changed, 1518 insertions, 7 deletions
diff --git a/TODO.md b/TODO.md
index 0ced5e8..e7371ca 100644
--- a/TODO.md
+++ b/TODO.md
@@ -60,20 +60,22 @@ Ongoing shader code hygiene for granular, reusable snippets.
 
 ---
 
-## Future: CNN v3 — U-Net + FiLM
+## CNN v3 — U-Net + FiLM [IN PROGRESS]
 
 U-Net architecture with FiLM conditioning. Runtime style control via beat/audio.
 Richer G-buffer input (normals, depth, material IDs). Per-pixel testability across
 PyTorch / HTML WebGPU / C++ WebGPU.
 
-**Prerequisites:** G-buffer implementation (GEOM_BUFFER.md)
 **Design:** `cnn_v3/docs/CNN_V3.md`
 
 **Phases:**
-1. G-buffer prerequisite
-2. Training infrastructure (Blender exporter + photo pipeline)
-3. WGSL shaders (enc/dec/bottleneck, deterministic ops)
-4. C++ effect class + FiLM uniform upload
+1. ✅ G-buffer: `GBufferEffect` (MRT raster + pack compute). SDF/shadow passes TODO.
+   - New NodeTypes: `GBUF_ALBEDO`, `GBUF_DEPTH32`, `GBUF_R8`, `GBUF_RGBA32UINT`
+   - Shaders: `cnn_v3/shaders/gbuf_raster.wgsl`, `gbuf_pack.wgsl`
+   - CMake integration deferred
+2. ✅ Training infrastructure: `blender_export.py`, `pack_blender_sample.py`, `pack_photo_sample.py`
+3. WGSL shaders (enc/dec/bottleneck, FiLM, deterministic ops)
+4. C++ CNNv3Effect + FiLM uniform upload
 5. Parity validation (test vectors, ≤1/255 per pixel)
 
 ## Future: CNN v2 8-bit Quantization
diff --git a/cnn_v3/shaders/gbuf_pack.wgsl b/cnn_v3/shaders/gbuf_pack.wgsl
new file mode 100644
index 0000000..71d8471
--- /dev/null
+++ b/cnn_v3/shaders/gbuf_pack.wgsl
@@ -0,0 +1,123 @@
+// G-buffer pack compute shader for CNN v3
+// Pass 4: Pack all G-buffer channels into two rgba32uint feature textures (32 bytes/pixel)
+// Output feat_tex0 holds 8×f16 geometric channels; feat_tex1 holds 12×u8 context channels.
+
+struct GBufRes {
+    resolution: vec2f,
+}
+
+@group(0) @binding(0) var<uniform> gbuf_res:      GBufRes;
+@group(0) @binding(1) var gbuf_albedo:     texture_2d<f32>;
+@group(0) @binding(2) var gbuf_normal_mat: texture_2d<f32>;
+@group(0) @binding(3) var gbuf_depth:      texture_depth_2d;
+@group(0) @binding(4) var gbuf_shadow:     texture_2d<f32>;
+@group(0) @binding(5) var gbuf_transp:     texture_2d<f32>;
+@group(0) @binding(6) var prev_cnn:        texture_2d<f32>;
+@group(0) @binding(7) var feat_tex0:       texture_storage_2d<rgba32uint, write>;
+@group(0) @binding(8) var feat_tex1:       texture_storage_2d<rgba32uint, write>;
+@group(0) @binding(9) var bilinear_sampler: sampler;
+
+// Sample depth texture at integer coordinate, clamp to borders.
+fn load_depth(coord: vec2i) -> f32 {
+    let dims = vec2i(textureDimensions(gbuf_depth));
+    let c    = clamp(coord, vec2i(0), dims - vec2i(1));
+    return textureLoad(gbuf_depth, c, 0);
+}
+
+// Box-filter albedo: average of 2×2 texels starting at top-left corner `tl`.
+fn box2(tl: vec2i) -> vec3f {
+    let a = textureLoad(gbuf_albedo, tl + vec2i(0, 0), 0).rgb;
+    let b = textureLoad(gbuf_albedo, tl + vec2i(1, 0), 0).rgb;
+    let c = textureLoad(gbuf_albedo, tl + vec2i(0, 1), 0).rgb;
+    let d = textureLoad(gbuf_albedo, tl + vec2i(1, 1), 0).rgb;
+    return (a + b + c + d) * 0.25;
+}
+
+// Box-filter albedo: average of 4×4 texels starting at top-left corner `tl`.
+fn box4(tl: vec2i) -> vec3f {
+    var acc = vec3f(0.0);
+    for (var dy: i32 = 0; dy < 4; dy++) {
+        for (var dx: i32 = 0; dx < 4; dx++) {
+            acc += textureLoad(gbuf_albedo, tl + vec2i(dx, dy), 0).rgb;
+        }
+    }
+    return acc * (1.0 / 16.0);
+}
+
+// Decode oct-normal from [0,1] storage → [-1,1] encoded xy → reconstruct z.
+fn decode_oct_normal(rg: vec2f) -> vec3f {
+    let f = rg * 2.0 - vec2f(1.0);
+    var n = vec3f(f.x, f.y, 1.0 - abs(f.x) - abs(f.y));
+    let t = max(-n.z, 0.0);
+    n.x += select(t, -t, n.x >= 0.0);
+    n.y += select(t, -t, n.y >= 0.0);
+    return normalize(n);
+}
+
+@compute @workgroup_size(8, 8)
+fn pack_features(@builtin(global_invocation_id) id: vec3u) {
+    let coord = vec2i(id.xy);
+    let dims  = vec2i(textureDimensions(gbuf_albedo));
+    if (coord.x >= dims.x || coord.y >= dims.y) { return; }
+
+    let uv = (vec2f(coord) + vec2f(0.5)) / gbuf_res.resolution;
+
+    // --- Geometric channels (high precision, f16 packed) ---
+    let albedo      = textureLoad(gbuf_albedo,     coord, 0).rgb;
+    let nm          = textureLoad(gbuf_normal_mat, coord, 0);
+    let depth_raw   = load_depth(coord);
+
+    // Finite-difference depth gradient (central difference, clamped coords)
+    let dzdx = (load_depth(coord + vec2i(1, 0)) - load_depth(coord - vec2i(1, 0))) * 0.5;
+    let dzdy = (load_depth(coord + vec2i(0, 1)) - load_depth(coord - vec2i(0, 1))) * 0.5;
+
+    // Normal: stored as oct-encoded [0,1] in RG; extract just the encoded xy for feat_tex0
+    let normal_enc = nm.rg; // already in [0,1] — decode to get the xy for CNN input
+    let n3         = decode_oct_normal(normal_enc);
+    // Store oct-encoded in [-1,1] remapped back to what CNN expects (the [-1,1] oct xy)
+    let oct_xy     = normal_enc * 2.0 - vec2f(1.0); // remap [0,1] → [-1,1]
+
+    // Texture 0: 4 u32, each = pack2x16float of two f16 values
+    // [0] albedo.r  | albedo.g
+    // [1] albedo.b  | normal.x (oct, [-1,1])
+    // [2] normal.y  | depth
+    // [3] dzdx      | dzdy
+    let t0 = vec4u(
+        pack2x16float(albedo.rg),
+        pack2x16float(vec2f(albedo.b, oct_xy.x)),
+        pack2x16float(vec2f(oct_xy.y, depth_raw)),
+        pack2x16float(vec2f(dzdx, dzdy))
+    );
+    textureStore(feat_tex0, coord, t0);
+
+    // --- Context channels (low precision, u8 packed) ---
+    let mat_id_u8 = nm.b;                                       // mat_id already in [0,1]
+    let shadow    = textureLoad(gbuf_shadow, coord, 0).r;
+    let transp    = textureLoad(gbuf_transp, coord, 0).r;
+    let prev      = textureSampleLevel(prev_cnn, bilinear_sampler, uv, 0.0).rgb;
+
+    // MIP 1: 2×2 box filter (half resolution context)
+    // Use top-left aligned 2×2 block at half-res position
+    let tl1  = coord * 2;  // this pixel's 2×2 region in full-res (mip1 is at half-res)
+    // Actually we want to sample the neighborhood around this pixel for downsampled context.
+    // mip1: sample a 2×2 box centered on the pixel in full-res coordinates
+    let tl1c = max(coord - vec2i(0), vec2i(0));
+    let mip1 = box2(tl1c);
+
+    // mip2: sample a 4×4 box
+    let tl2c = max(coord - vec2i(1), vec2i(0));
+    let mip2 = box4(tl2c);
+
+    // Texture 1: 4 u32, each = pack4x8unorm of four u8 values
+    // [0] mat_id | prev.r | prev.g | prev.b
+    // [1] mip1.r | mip1.g | mip1.b | mip2.r
+    // [2] mip2.g | mip2.b | shadow | transp
+    // [3] spare  (0)
+    let t1 = vec4u(
+        pack4x8unorm(vec4f(mat_id_u8, prev.r, prev.g, prev.b)),
+        pack4x8unorm(vec4f(mip1.r, mip1.g, mip1.b, mip2.r)),
+        pack4x8unorm(vec4f(mip2.g, mip2.b, shadow, transp)),
+        0u
+    );
+    textureStore(feat_tex1, coord, t1);
+}
diff --git a/cnn_v3/shaders/gbuf_raster.wgsl b/cnn_v3/shaders/gbuf_raster.wgsl
new file mode 100644
index 0000000..c762db2
--- /dev/null
+++ b/cnn_v3/shaders/gbuf_raster.wgsl
@@ -0,0 +1,105 @@
+// G-buffer rasterization shader for CNN v3
+// Pass 1: Proxy geometry → MRT (albedo rgba16float, normal_mat rgba16float, depth32)
+// Uses GlobalUniforms, ObjectData, ObjectsBuffer from common_uniforms.
+
+#include "common_uniforms"
+
+@group(0) @binding(0) var<uniform> globals: GlobalUniforms;
+@group(0) @binding(1) var<storage, read> object_data: ObjectsBuffer;
+
+struct VertexOutput {
+    @builtin(position)              position:       vec4f,
+    @location(0)                    world_pos:      vec3f,
+    @location(1)                    world_normal:   vec3f,
+    @location(2)                    color:          vec4f,
+    @location(3) @interpolate(flat) instance_index: u32,
+}
+
+// Octahedral encoding: maps unit normal to [-1,1]^2
+fn oct_encode(n: vec3f) -> vec2f {
+    let inv_l1 = 1.0 / (abs(n.x) + abs(n.y) + abs(n.z));
+    var p = n.xy * inv_l1;
+    // Fold lower hemisphere
+    if (n.z < 0.0) {
+        let s = vec2f(
+            select(-1.0, 1.0, p.x >= 0.0),
+            select(-1.0, 1.0, p.y >= 0.0)
+        );
+        p = (1.0 - abs(p.yx)) * s;
+    }
+    return p; // in [-1, 1]
+}
+
+@vertex
+fn vs_main(
+    @builtin(vertex_index)   vertex_index:   u32,
+    @builtin(instance_index) instance_index: u32
+) -> VertexOutput {
+    // Proxy box vertices (same as renderer_3d.wgsl)
+    var pos = array<vec3f, 36>(
+        vec3f(-1.0, -1.0,  1.0), vec3f( 1.0, -1.0,  1.0), vec3f( 1.0,  1.0,  1.0),
+        vec3f(-1.0, -1.0,  1.0), vec3f( 1.0,  1.0,  1.0), vec3f(-1.0,  1.0,  1.0),
+        vec3f(-1.0, -1.0, -1.0), vec3f(-1.0,  1.0, -1.0), vec3f( 1.0,  1.0, -1.0),
+        vec3f(-1.0, -1.0, -1.0), vec3f( 1.0,  1.0, -1.0), vec3f( 1.0, -1.0, -1.0),
+        vec3f(-1.0,  1.0, -1.0), vec3f(-1.0,  1.0,  1.0), vec3f( 1.0,  1.0,  1.0),
+        vec3f(-1.0,  1.0, -1.0), vec3f( 1.0,  1.0,  1.0), vec3f( 1.0,  1.0, -1.0),
+        vec3f(-1.0, -1.0, -1.0), vec3f( 1.0, -1.0, -1.0), vec3f( 1.0, -1.0,  1.0),
+        vec3f(-1.0, -1.0, -1.0), vec3f( 1.0, -1.0,  1.0), vec3f(-1.0, -1.0,  1.0),
+        vec3f( 1.0, -1.0, -1.0), vec3f( 1.0,  1.0, -1.0), vec3f( 1.0,  1.0,  1.0),
+        vec3f( 1.0, -1.0, -1.0), vec3f( 1.0,  1.0,  1.0), vec3f( 1.0, -1.0,  1.0),
+        vec3f(-1.0, -1.0, -1.0), vec3f(-1.0, -1.0,  1.0), vec3f(-1.0,  1.0,  1.0),
+        vec3f(-1.0, -1.0, -1.0), vec3f(-1.0,  1.0,  1.0), vec3f(-1.0,  1.0, -1.0)
+    );
+
+    // Proxy face normals (one per 2 triangles = 6 faces × 6 verts = 36)
+    var nrm = array<vec3f, 36>(
+        vec3f(0,0,1), vec3f(0,0,1), vec3f(0,0,1),
+        vec3f(0,0,1), vec3f(0,0,1), vec3f(0,0,1),
+        vec3f(0,0,-1), vec3f(0,0,-1), vec3f(0,0,-1),
+        vec3f(0,0,-1), vec3f(0,0,-1), vec3f(0,0,-1),
+        vec3f(0,1,0), vec3f(0,1,0), vec3f(0,1,0),
+        vec3f(0,1,0), vec3f(0,1,0), vec3f(0,1,0),
+        vec3f(0,-1,0), vec3f(0,-1,0), vec3f(0,-1,0),
+        vec3f(0,-1,0), vec3f(0,-1,0), vec3f(0,-1,0),
+        vec3f(1,0,0), vec3f(1,0,0), vec3f(1,0,0),
+        vec3f(1,0,0), vec3f(1,0,0), vec3f(1,0,0),
+        vec3f(-1,0,0), vec3f(-1,0,0), vec3f(-1,0,0),
+        vec3f(-1,0,0), vec3f(-1,0,0), vec3f(-1,0,0)
+    );
+
+    let obj = object_data.objects[instance_index];
+    let p = pos[vertex_index];
+    let n = nrm[vertex_index];
+
+    let world_pos    = obj.model * vec4f(p, 1.0);
+    let clip_pos     = globals.view_proj * world_pos;
+    // Transform normal by inverse-transpose (upper-left 3×3 of inv_model^T)
+    let world_normal = normalize((obj.inv_model * vec4f(n, 0.0)).xyz);
+
+    var out: VertexOutput;
+    out.position       = clip_pos;
+    out.world_pos      = world_pos.xyz;
+    out.world_normal   = world_normal;
+    out.color          = obj.color;
+    out.instance_index = instance_index;
+    return out;
+}
+
+struct GBufOutput {
+    @location(0) albedo:     vec4f,  // rgba16float: material color
+    @location(1) normal_mat: vec4f,  // rgba16float: oct-normal XY in RG, mat_id/255 in B
+}
+
+@fragment
+fn fs_main(in: VertexOutput) -> GBufOutput {
+    let obj    = object_data.objects[in.instance_index];
+    let mat_id = f32(in.instance_index) / 255.0;
+
+    // Oct-encode world normal, remap [-1,1] → [0,1] for storage
+    let oct = oct_encode(normalize(in.world_normal)) * 0.5 + vec2f(0.5);
+
+    var out: GBufOutput;
+    out.albedo     = vec4f(in.color.rgb, 1.0);
+    out.normal_mat = vec4f(oct.x, oct.y, mat_id, 0.0);
+    return out;
+}
diff --git a/cnn_v3/src/gbuffer_effect.cc b/cnn_v3/src/gbuffer_effect.cc
new file mode 100644
index 0000000..fb0146e
--- /dev/null
+++ b/cnn_v3/src/gbuffer_effect.cc
@@ -0,0 +1,596 @@
+// GBufferEffect implementation
+// Rasterizes proxy geometry to MRT G-buffer, then packs into CNN v3 feature textures.
+
+#include "gbuffer_effect.h"
+#include "3d/object.h"
+#include "gpu/gpu.h"
+#include "util/fatal_error.h"
+#include "util/mini_math.h"
+#include <cstring>
+#include <vector>
+
+// Shader source (loaded from asset at runtime — declared extern by the build system)
+// For standalone use outside the asset system, the caller must ensure the WGSL
+// source strings are available.  They are declared here as weak-linkable externs.
+extern const char* gbuf_raster_wgsl;
+extern const char* gbuf_pack_wgsl;
+
+// Maximum number of objects the G-buffer supports per frame.
+static const int kGBufMaxObjects = 256;
+
+// ObjectData struct that mirrors the WGSL layout in gbuf_raster.wgsl and renderer.h
+struct GBufObjectData {
+  mat4 model;
+  mat4 inv_model;
+  vec4 color;
+  vec4 params; // x = object type, y = plane_distance
+};
+static_assert(sizeof(GBufObjectData) == sizeof(float) * 40,
+              "GBufObjectData must be 160 bytes");
+
+// GlobalUniforms struct mirroring renderer.h
+struct GBufGlobalUniforms {
+  mat4 view_proj;
+  mat4 inv_view_proj;
+  vec4 camera_pos_time;
+  vec4 params;        // x = num_objects
+  vec2 resolution;
+  vec2 padding;
+};
+static_assert(sizeof(GBufGlobalUniforms) == sizeof(float) * 44,
+              "GBufGlobalUniforms must be 176 bytes");
+
+// Helper: create a 1×1 placeholder texture of a given format cleared to `value`.
+static WGPUTexture create_placeholder_tex(WGPUDevice device,
+                                          WGPUTextureFormat format,
+                                          float value) {
+  WGPUTextureDescriptor desc = {};
+  desc.usage = (WGPUTextureUsage)(WGPUTextureUsage_TextureBinding |
+                                  WGPUTextureUsage_CopyDst);
+  desc.dimension = WGPUTextureDimension_2D;
+  desc.size = {1, 1, 1};
+  desc.format = format;
+  desc.mipLevelCount = 1;
+  desc.sampleCount = 1;
+  WGPUTexture tex = wgpuDeviceCreateTexture(device, &desc);
+  return tex;
+}
+
+// Helper: write a single RGBA float pixel to a texture via queue.
+static void write_placeholder_pixel(WGPUQueue queue, WGPUTexture tex,
+                                    float r, float g, float b, float a) {
+  const float data[4] = {r, g, b, a};
+  WGPUTexelCopyTextureInfo dst = {};
+  dst.texture = tex;
+  dst.mipLevel = 0;
+  dst.origin = {0, 0, 0};
+  dst.aspect = WGPUTextureAspect_All;
+
+  WGPUTexelCopyBufferLayout layout = {};
+  layout.offset = 0;
+  layout.bytesPerRow = 16; // 4 × sizeof(float)
+  layout.rowsPerImage = 1;
+
+  const WGPUExtent3D extent = {1, 1, 1};
+  wgpuQueueWriteTexture(queue, &dst, data, sizeof(data), &layout, &extent);
+}
+
+// Create bilinear sampler.
+static WGPUSampler create_bilinear_sampler(WGPUDevice device) {
+  WGPUSamplerDescriptor desc = {};
+  desc.addressModeU = WGPUAddressMode_ClampToEdge;
+  desc.addressModeV = WGPUAddressMode_ClampToEdge;
+  desc.magFilter = WGPUFilterMode_Linear;
+  desc.minFilter = WGPUFilterMode_Linear;
+  desc.mipmapFilter = WGPUMipmapFilterMode_Linear;
+  desc.maxAnisotropy = 1;
+  return wgpuDeviceCreateSampler(device, &desc);
+}
+
+// ---- GBufferEffect ----
+
+GBufferEffect::GBufferEffect(const GpuContext& ctx,
+                             const std::vector<std::string>& inputs,
+                             const std::vector<std::string>& outputs,
+                             float start_time, float end_time)
+    : Effect(ctx, inputs, outputs, start_time, end_time) {
+  HEADLESS_RETURN_IF_NULL(ctx_.device);
+
+  // Derive internal node name prefix from the first output name.
+  const std::string& prefix = outputs.empty() ? "gbuf" : outputs[0];
+  node_albedo_     = prefix + "_albedo";
+  node_normal_mat_ = prefix + "_normal_mat";
+  node_depth_      = prefix + "_depth";
+  node_shadow_     = prefix + "_shadow";
+  node_transp_     = prefix + "_transp";
+  node_feat0_      = outputs.size() > 0 ? outputs[0] : prefix + "_feat0";
+  node_feat1_      = outputs.size() > 1 ? outputs[1] : prefix + "_feat1";
+
+  // Allocate GPU buffers for scene data.
+  global_uniforms_buf_ =
+      gpu_create_buffer(ctx_.device, sizeof(GBufGlobalUniforms),
+                        WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst);
+
+  ensure_objects_buffer(kGBufMaxObjects);
+
+  // Resolution uniform for pack shader.
+  pack_res_uniform_.init(ctx_.device);
+
+  // Placeholder shadow (1.0 = fully lit) and transp (0.0 = opaque) textures.
+  shadow_placeholder_tex_.set(
+      create_placeholder_tex(ctx_.device, WGPUTextureFormat_RGBA32Float, 1.0f));
+  write_placeholder_pixel(ctx_.queue,
+                          shadow_placeholder_tex_.get(), 1.0f, 0.0f, 0.0f, 1.0f);
+
+  transp_placeholder_tex_.set(
+      create_placeholder_tex(ctx_.device, WGPUTextureFormat_RGBA32Float, 0.0f));
+  write_placeholder_pixel(ctx_.queue,
+                          transp_placeholder_tex_.get(), 0.0f, 0.0f, 0.0f, 1.0f);
+
+  WGPUTextureViewDescriptor vd = {};
+  vd.format = WGPUTextureFormat_RGBA32Float;
+  vd.dimension = WGPUTextureViewDimension_2D;
+  vd.baseMipLevel = 0;
+  vd.mipLevelCount = 1;
+  vd.baseArrayLayer = 0;
+  vd.arrayLayerCount = 1;
+  vd.aspect = WGPUTextureAspect_All;
+
+  shadow_placeholder_view_.set(
+      wgpuTextureCreateView(shadow_placeholder_tex_.get(), &vd));
+  transp_placeholder_view_.set(
+      wgpuTextureCreateView(transp_placeholder_tex_.get(), &vd));
+
+  create_raster_pipeline();
+  create_pack_pipeline();
+}
+
+void GBufferEffect::declare_nodes(NodeRegistry& registry) {
+  registry.declare_node(node_albedo_,     NodeType::GBUF_ALBEDO,    -1, -1);
+  registry.declare_node(node_normal_mat_, NodeType::GBUF_ALBEDO,    -1, -1);
+  registry.declare_node(node_depth_,      NodeType::GBUF_DEPTH32,   -1, -1);
+  registry.declare_node(node_shadow_,     NodeType::GBUF_R8,        -1, -1);
+  registry.declare_node(node_transp_,     NodeType::GBUF_R8,        -1, -1);
+  // feat_tex0 / feat_tex1 are the declared output_nodes_ — they get registered
+  // by the sequence infrastructure; declare them here as well if not already.
+  if (!registry.has_node(node_feat0_)) {
+    registry.declare_node(node_feat0_, NodeType::GBUF_RGBA32UINT, -1, -1);
+  }
+  if (!registry.has_node(node_feat1_)) {
+    registry.declare_node(node_feat1_, NodeType::GBUF_RGBA32UINT, -1, -1);
+  }
+}
+
+void GBufferEffect::set_scene(const Scene* scene, const Camera* camera) {
+  scene_  = scene;
+  camera_ = camera;
+}
+
+void GBufferEffect::render(WGPUCommandEncoder encoder,
+                           const UniformsSequenceParams& params,
+                           NodeRegistry& nodes) {
+  if (!scene_ || !camera_) {
+    return;
+  }
+
+  upload_scene_data(*scene_, *camera_, params.time);
+
+  // Update resolution uniform for pack shader.
+  GBufResUniforms res_uni;
+  res_uni.resolution = params.resolution;
+  res_uni._pad0 = 0.0f;
+  res_uni._pad1 = 0.0f;
+  pack_res_uniform_.update(ctx_.queue, res_uni);
+
+  WGPUTextureView albedo_view     = nodes.get_view(node_albedo_);
+  WGPUTextureView normal_mat_view = nodes.get_view(node_normal_mat_);
+  WGPUTextureView depth_view      = nodes.get_view(node_depth_);
+  WGPUTextureView feat0_view      = nodes.get_view(node_feat0_);
+  WGPUTextureView feat1_view      = nodes.get_view(node_feat1_);
+
+  // prev_cnn: first input node if available, else dummy.
+  WGPUTextureView prev_view = nullptr;
+  if (!input_nodes_.empty()) {
+    prev_view = nodes.get_view(input_nodes_[0]);
+  }
+  if (!prev_view) {
+    prev_view = dummy_texture_view_.get();
+  }
+
+  // --- Pass 1: MRT rasterization ---
+  update_raster_bind_group(nodes);
+
+  WGPURenderPassColorAttachment color_attachments[2] = {};
+  // Attachment 0: albedo
+  color_attachments[0].view = albedo_view;
+  color_attachments[0].loadOp = WGPULoadOp_Clear;
+  color_attachments[0].storeOp = WGPUStoreOp_Store;
+  color_attachments[0].clearValue = {0.0f, 0.0f, 0.0f, 1.0f};
+  color_attachments[0].depthSlice = WGPU_DEPTH_SLICE_UNDEFINED;
+  // Attachment 1: normal_mat
+  color_attachments[1].view = normal_mat_view;
+  color_attachments[1].loadOp = WGPULoadOp_Clear;
+  color_attachments[1].storeOp = WGPUStoreOp_Store;
+  color_attachments[1].clearValue = {0.5f, 0.5f, 0.0f, 0.0f};
+  color_attachments[1].depthSlice = WGPU_DEPTH_SLICE_UNDEFINED;
+
+  WGPURenderPassDepthStencilAttachment depth_attachment = {};
+  depth_attachment.view = depth_view;
+  depth_attachment.depthLoadOp = WGPULoadOp_Clear;
+  depth_attachment.depthStoreOp = WGPUStoreOp_Store;
+  depth_attachment.depthClearValue = 1.0f;
+  depth_attachment.depthReadOnly = false;
+
+  WGPURenderPassDescriptor raster_pass_desc = {};
+  raster_pass_desc.colorAttachmentCount = 2;
+  raster_pass_desc.colorAttachments = color_attachments;
+  raster_pass_desc.depthStencilAttachment = &depth_attachment;
+
+  const int num_objects =
+      (int)(scene_->objects.size() < (size_t)kGBufMaxObjects
+                ? scene_->objects.size()
+                : (size_t)kGBufMaxObjects);
+
+  if (num_objects > 0 && raster_pipeline_.get() != nullptr) {
+    WGPURenderPassEncoder raster_pass =
+        wgpuCommandEncoderBeginRenderPass(encoder, &raster_pass_desc);
+    wgpuRenderPassEncoderSetPipeline(raster_pass, raster_pipeline_.get());
+    wgpuRenderPassEncoderSetBindGroup(raster_pass, 0,
+                                     raster_bind_group_.get(), 0, nullptr);
+    // Draw 36 vertices (proxy box) × num_objects instances.
+    wgpuRenderPassEncoderDraw(raster_pass, 36, (uint32_t)num_objects, 0, 0);
+    wgpuRenderPassEncoderEnd(raster_pass);
+    wgpuRenderPassEncoderRelease(raster_pass);
+  } else {
+    // Clear passes with no draws still need to be submitted.
+    WGPURenderPassEncoder raster_pass =
+        wgpuCommandEncoderBeginRenderPass(encoder, &raster_pass_desc);
+    wgpuRenderPassEncoderEnd(raster_pass);
+    wgpuRenderPassEncoderRelease(raster_pass);
+  }
+
+  // Pass 2: SDF raymarching — TODO (placeholder: shadow=1, transp=0 already set)
+  // Pass 3: Lighting/shadow — TODO
+
+  // --- Pass 4: Pack compute ---
+  // Rebuild pack bind group with current node views.
+  // Construct a temporary bilinear sampler for this pass.
+  WGPUSampler bilinear = create_bilinear_sampler(ctx_.device);
+
+  // Get texture views from nodes.
+  // shadow / transp are GBUF_R8 nodes; use their views.
+  WGPUTextureView shadow_view = nodes.get_view(node_shadow_);
+  WGPUTextureView transp_view = nodes.get_view(node_transp_);
+
+  // Build pack bind group (bindings 0-9).
+  WGPUBindGroupEntry pack_entries[10] = {};
+  pack_entries[0].binding = 0;
+  pack_entries[0].buffer = pack_res_uniform_.get().buffer;
+  pack_entries[0].size = sizeof(GBufResUniforms);
+
+  pack_entries[1].binding = 1;
+  pack_entries[1].textureView = albedo_view;
+
+  pack_entries[2].binding = 2;
+  pack_entries[2].textureView = normal_mat_view;
+
+  pack_entries[3].binding = 3;
+  pack_entries[3].textureView = depth_view;
+
+  pack_entries[4].binding = 4;
+  pack_entries[4].textureView = shadow_view;
+
+  pack_entries[5].binding = 5;
+  pack_entries[5].textureView = transp_view;
+
+  pack_entries[6].binding = 6;
+  pack_entries[6].textureView = prev_view;
+
+  pack_entries[7].binding = 7;
+  pack_entries[7].textureView = feat0_view;
+
+  pack_entries[8].binding = 8;
+  pack_entries[8].textureView = feat1_view;
+
+  pack_entries[9].binding = 9;
+  pack_entries[9].sampler = bilinear;
+
+  WGPUBindGroupLayout pack_bgl =
+      wgpuComputePipelineGetBindGroupLayout(pack_pipeline_.get(), 0);
+
+  WGPUBindGroupDescriptor pack_bg_desc = {};
+  pack_bg_desc.layout = pack_bgl;
+  pack_bg_desc.entryCount = 10;
+  pack_bg_desc.entries = pack_entries;
+
+  WGPUBindGroup pack_bg = wgpuDeviceCreateBindGroup(ctx_.device, &pack_bg_desc);
+  wgpuBindGroupLayoutRelease(pack_bgl);
+
+  WGPUComputePassDescriptor compute_pass_desc = {};
+  WGPUComputePassEncoder compute_pass =
+      wgpuCommandEncoderBeginComputePass(encoder, &compute_pass_desc);
+  wgpuComputePassEncoderSetPipeline(compute_pass, pack_pipeline_.get());
+  wgpuComputePassEncoderSetBindGroup(compute_pass, 0, pack_bg, 0, nullptr);
+
+  const uint32_t wg_x = ((uint32_t)width_  + 7u) / 8u;
+  const uint32_t wg_y = ((uint32_t)height_ + 7u) / 8u;
+  wgpuComputePassEncoderDispatchWorkgroups(compute_pass, wg_x, wg_y, 1);
+  wgpuComputePassEncoderEnd(compute_pass);
+  wgpuComputePassEncoderRelease(compute_pass);
+
+  wgpuBindGroupRelease(pack_bg);
+  wgpuSamplerRelease(bilinear);
+}
+
+// ---- private helpers ----
+
+void GBufferEffect::ensure_objects_buffer(int num_objects) {
+  if (num_objects <= objects_buf_capacity_) {
+    return;
+  }
+  if (objects_buf_.buffer) {
+    wgpuBufferRelease(objects_buf_.buffer);
+  }
+  objects_buf_ = gpu_create_buffer(
+      ctx_.device, (size_t)num_objects * sizeof(GBufObjectData),
+      WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst);
+  objects_buf_capacity_ = num_objects;
+}
+
+void GBufferEffect::upload_scene_data(const Scene& scene,
+                                      const Camera& camera, float time) {
+  const int num_objects =
+      (int)(scene.objects.size() < (size_t)kGBufMaxObjects
+                ? scene.objects.size()
+                : (size_t)kGBufMaxObjects);
+
+  const mat4 view = camera.get_view_matrix();
+  const mat4 proj = camera.get_projection_matrix();
+  const mat4 vp   = proj * view;
+
+  GBufGlobalUniforms gu = {};
+  gu.view_proj       = vp;
+  gu.inv_view_proj   = vp.inverse();
+  gu.camera_pos_time = vec4(camera.position.x, camera.position.y,
+                            camera.position.z, time);
+  gu.params    = vec4((float)num_objects, 0.0f, 0.0f, 0.0f);
+  gu.resolution = vec2((float)width_, (float)height_);
+  gu.padding   = vec2(0.0f, 0.0f);
+
+  wgpuQueueWriteBuffer(ctx_.queue, global_uniforms_buf_.buffer, 0,
+                       &gu, sizeof(GBufGlobalUniforms));
+
+  // Upload object data.
+  if (num_objects > 0) {
+    ensure_objects_buffer(num_objects);
+    std::vector<GBufObjectData> obj_data;
+    obj_data.reserve((size_t)num_objects);
+    for (int i = 0; i < num_objects; ++i) {
+      const Object3D& obj = scene.objects[(size_t)i];
+      const mat4 m = obj.get_model_matrix();
+      GBufObjectData d;
+      d.model     = m;
+      d.inv_model = m.inverse();
+      d.color     = obj.color;
+      d.params    = vec4(0.0f, 0.0f, 0.0f, 0.0f);
+      obj_data.push_back(d);
+    }
+    wgpuQueueWriteBuffer(ctx_.queue, objects_buf_.buffer, 0,
+                         obj_data.data(),
+                         (size_t)num_objects * sizeof(GBufObjectData));
+  }
+}
+
+void GBufferEffect::create_raster_pipeline() {
+  HEADLESS_RETURN_IF_NULL(ctx_.device);
+
+  // Load shader source.
+  const char* src = gbuf_raster_wgsl;
+  if (!src) {
+    return; // Asset not loaded yet; pipeline creation deferred.
+  }
+
+  WGPUShaderSourceWGSL wgsl_src = {};
+  wgsl_src.chain.sType = WGPUSType_ShaderSourceWGSL;
+  wgsl_src.code = str_view(src);
+
+  WGPUShaderModuleDescriptor shader_desc = {};
+  shader_desc.nextInChain = &wgsl_src.chain;
+  WGPUShaderModule shader = wgpuDeviceCreateShaderModule(ctx_.device, &shader_desc);
+
+  // Bind group layout: B0 = GlobalUniforms, B1 = ObjectsBuffer (storage read)
+  WGPUBindGroupLayoutEntry bgl_entries[2] = {};
+  bgl_entries[0].binding = 0;
+  bgl_entries[0].visibility =
+      (WGPUShaderStage)(WGPUShaderStage_Vertex | WGPUShaderStage_Fragment);
+  bgl_entries[0].buffer.type = WGPUBufferBindingType_Uniform;
+  bgl_entries[0].buffer.minBindingSize = sizeof(GBufGlobalUniforms);
+
+  bgl_entries[1].binding = 1;
+  bgl_entries[1].visibility =
+      (WGPUShaderStage)(WGPUShaderStage_Vertex | WGPUShaderStage_Fragment);
+  bgl_entries[1].buffer.type = WGPUBufferBindingType_ReadOnlyStorage;
+  bgl_entries[1].buffer.minBindingSize = sizeof(GBufObjectData);
+
+  WGPUBindGroupLayoutDescriptor bgl_desc = {};
+  bgl_desc.entryCount = 2;
+  bgl_desc.entries = bgl_entries;
+  WGPUBindGroupLayout bgl = wgpuDeviceCreateBindGroupLayout(ctx_.device, &bgl_desc);
+
+  WGPUPipelineLayoutDescriptor pl_desc = {};
+  pl_desc.bindGroupLayoutCount = 1;
+  pl_desc.bindGroupLayouts = &bgl;
+  WGPUPipelineLayout pl = wgpuDeviceCreatePipelineLayout(ctx_.device, &pl_desc);
+
+  // Two color targets: albedo (rgba16float) and normal_mat (rgba16float)
+  WGPUColorTargetState color_targets[2] = {};
+  color_targets[0].format = WGPUTextureFormat_RGBA16Float;
+  color_targets[0].writeMask = WGPUColorWriteMask_All;
+  color_targets[1].format = WGPUTextureFormat_RGBA16Float;
+  color_targets[1].writeMask = WGPUColorWriteMask_All;
+
+  WGPUFragmentState frag = {};
+  frag.module = shader;
+  frag.entryPoint = str_view("fs_main");
+  frag.targetCount = 2;
+  frag.targets = color_targets;
+
+  WGPUDepthStencilState ds = {};
+  ds.format = WGPUTextureFormat_Depth32Float;
+  ds.depthWriteEnabled = WGPUOptionalBool_True;
+  ds.depthCompare = WGPUCompareFunction_Less;
+
+  WGPURenderPipelineDescriptor pipe_desc = {};
+  pipe_desc.layout = pl;
+  pipe_desc.vertex.module = shader;
+  pipe_desc.vertex.entryPoint = str_view("vs_main");
+  pipe_desc.fragment = &frag;
+  pipe_desc.depthStencil = &ds;
+  pipe_desc.primitive.topology = WGPUPrimitiveTopology_TriangleList;
+  pipe_desc.primitive.cullMode = WGPUCullMode_Back;
+  pipe_desc.multisample.count = 1;
+  pipe_desc.multisample.mask = 0xFFFFFFFF;
+
+  raster_pipeline_.set(wgpuDeviceCreateRenderPipeline(ctx_.device, &pipe_desc));
+
+  wgpuPipelineLayoutRelease(pl);
+  wgpuBindGroupLayoutRelease(bgl);
+  wgpuShaderModuleRelease(shader);
+}
+
+void GBufferEffect::create_pack_pipeline() {
+  HEADLESS_RETURN_IF_NULL(ctx_.device);
+
+  const char* src = gbuf_pack_wgsl;
+  if (!src) {
+    return;
+  }
+
+  WGPUShaderSourceWGSL wgsl_src = {};
+  wgsl_src.chain.sType = WGPUSType_ShaderSourceWGSL;
+  wgsl_src.code = str_view(src);
+
+  WGPUShaderModuleDescriptor shader_desc = {};
+  shader_desc.nextInChain = &wgsl_src.chain;
+  WGPUShaderModule shader = wgpuDeviceCreateShaderModule(ctx_.device, &shader_desc);
+
+  // Build explicit bind group layout for bindings 0-9.
+  WGPUBindGroupLayoutEntry bgl_entries[10] = {};
+
+  // B0: resolution uniform
+  bgl_entries[0].binding = 0;
+  bgl_entries[0].visibility = WGPUShaderStage_Compute;
+  bgl_entries[0].buffer.type = WGPUBufferBindingType_Uniform;
+  bgl_entries[0].buffer.minBindingSize = sizeof(GBufResUniforms);
+
+  // B1: gbuf_albedo (texture_2d<f32>)
+  bgl_entries[1].binding = 1;
+  bgl_entries[1].visibility = WGPUShaderStage_Compute;
+  bgl_entries[1].texture.sampleType = WGPUTextureSampleType_Float;
+  bgl_entries[1].texture.viewDimension = WGPUTextureViewDimension_2D;
+
+  // B2: gbuf_normal_mat (texture_2d<f32>)
+  bgl_entries[2].binding = 2;
+  bgl_entries[2].visibility = WGPUShaderStage_Compute;
+  bgl_entries[2].texture.sampleType = WGPUTextureSampleType_Float;
+  bgl_entries[2].texture.viewDimension = WGPUTextureViewDimension_2D;
+
+  // B3: gbuf_depth (texture_depth_2d)
+  bgl_entries[3].binding = 3;
+  bgl_entries[3].visibility = WGPUShaderStage_Compute;
+  bgl_entries[3].texture.sampleType = WGPUTextureSampleType_Depth;
+  bgl_entries[3].texture.viewDimension = WGPUTextureViewDimension_2D;
+
+  // B4: gbuf_shadow (texture_2d<f32>)
+  bgl_entries[4].binding = 4;
+  bgl_entries[4].visibility = WGPUShaderStage_Compute;
+  bgl_entries[4].texture.sampleType = WGPUTextureSampleType_Float;
+  bgl_entries[4].texture.viewDimension = WGPUTextureViewDimension_2D;
+
+  // B5: gbuf_transp (texture_2d<f32>)
+  bgl_entries[5].binding = 5;
+  bgl_entries[5].visibility = WGPUShaderStage_Compute;
+  bgl_entries[5].texture.sampleType = WGPUTextureSampleType_Float;
+  bgl_entries[5].texture.viewDimension = WGPUTextureViewDimension_2D;
+
+  // B6: prev_cnn (texture_2d<f32>)
+  bgl_entries[6].binding = 6;
+  bgl_entries[6].visibility = WGPUShaderStage_Compute;
+  bgl_entries[6].texture.sampleType = WGPUTextureSampleType_Float;
+  bgl_entries[6].texture.viewDimension = WGPUTextureViewDimension_2D;
+
+  // B7: feat_tex0 (storage texture, write, rgba32uint)
+  bgl_entries[7].binding = 7;
+  bgl_entries[7].visibility = WGPUShaderStage_Compute;
+  bgl_entries[7].storageTexture.access = WGPUStorageTextureAccess_WriteOnly;
+  bgl_entries[7].storageTexture.format = WGPUTextureFormat_RGBA32Uint;
+  bgl_entries[7].storageTexture.viewDimension = WGPUTextureViewDimension_2D;
+
+  // B8: feat_tex1 (storage texture, write, rgba32uint)
+  bgl_entries[8].binding = 8;
+  bgl_entries[8].visibility = WGPUShaderStage_Compute;
+  bgl_entries[8].storageTexture.access = WGPUStorageTextureAccess_WriteOnly;
+  bgl_entries[8].storageTexture.format = WGPUTextureFormat_RGBA32Uint;
+  bgl_entries[8].storageTexture.viewDimension = WGPUTextureViewDimension_2D;
+
+  // B9: bilinear sampler
+  bgl_entries[9].binding = 9;
+  bgl_entries[9].visibility = WGPUShaderStage_Compute;
+  bgl_entries[9].sampler.type = WGPUSamplerBindingType_Filtering;
+
+  WGPUBindGroupLayoutDescriptor bgl_desc = {};
+  bgl_desc.entryCount = 10;
+  bgl_desc.entries = bgl_entries;
+  WGPUBindGroupLayout bgl = wgpuDeviceCreateBindGroupLayout(ctx_.device, &bgl_desc);
+
+  WGPUPipelineLayoutDescriptor pl_desc = {};
+  pl_desc.bindGroupLayoutCount = 1;
+  pl_desc.bindGroupLayouts = &bgl;
+  WGPUPipelineLayout pl = wgpuDeviceCreatePipelineLayout(ctx_.device, &pl_desc);
+
+  WGPUComputePipelineDescriptor pipe_desc = {};
+  pipe_desc.layout = pl;
+  pipe_desc.compute.module = shader;
+  pipe_desc.compute.entryPoint = str_view("pack_features");
+
+  pack_pipeline_.set(wgpuDeviceCreateComputePipeline(ctx_.device, &pipe_desc));
+
+  wgpuPipelineLayoutRelease(pl);
+  wgpuBindGroupLayoutRelease(bgl);
+  wgpuShaderModuleRelease(shader);
+}
+
+void GBufferEffect::update_raster_bind_group(NodeRegistry& nodes) {
+  (void)nodes;
+  // Rebuild each frame since textures may resize.
+  raster_bind_group_.replace(nullptr);
+
+  if (raster_pipeline_.get() == nullptr) {
+    return;
+  }
+
+  WGPUBindGroupEntry entries[2] = {};
+  entries[0].binding = 0;
+  entries[0].buffer  = global_uniforms_buf_.buffer;
+  entries[0].size    = sizeof(GBufGlobalUniforms);
+
+  entries[1].binding = 1;
+  entries[1].buffer  = objects_buf_.buffer;
+  entries[1].size    = (size_t)objects_buf_capacity_ * sizeof(GBufObjectData);
+
+  WGPUBindGroupLayout bgl =
+      wgpuRenderPipelineGetBindGroupLayout(raster_pipeline_.get(), 0);
+
+  WGPUBindGroupDescriptor bg_desc = {};
+  bg_desc.layout = bgl;
+  bg_desc.entryCount = 2;
+  bg_desc.entries = entries;
+
+  raster_bind_group_.replace(wgpuDeviceCreateBindGroup(ctx_.device, &bg_desc));
+  wgpuBindGroupLayoutRelease(bgl);
+}
+
+void GBufferEffect::update_pack_bind_group(NodeRegistry& nodes) {
+  (void)nodes;
+  // Pack bind group is rebuilt inline in render() to use current node views.
+}
diff --git a/cnn_v3/src/gbuffer_effect.h b/cnn_v3/src/gbuffer_effect.h
new file mode 100644
index 0000000..42fb0ec
--- /dev/null
+++ b/cnn_v3/src/gbuffer_effect.h
@@ -0,0 +1,79 @@
+// GBufferEffect: Multi-pass G-buffer rendering for CNN v3 input
+// Outputs: gbuf_feat0, gbuf_feat1 (packed rgba32uint feature textures, 32 bytes/pixel)
+
+#pragma once
+
+#include "3d/camera.h"
+#include "3d/scene.h"
+#include "gpu/effect.h"
+#include "gpu/sequence.h"
+#include "gpu/uniform_helper.h"
+#include "gpu/wgpu_resource.h"
+#include "util/mini_math.h"
+
+// Uniform for the pack compute shader
+struct GBufResUniforms {
+  vec2 resolution;
+  float _pad0;
+  float _pad1;
+};
+static_assert(sizeof(GBufResUniforms) == 16,
+              "GBufResUniforms must be 16 bytes");
+
+class GBufferEffect : public Effect {
+ public:
+  GBufferEffect(const GpuContext& ctx, const std::vector<std::string>& inputs,
+                const std::vector<std::string>& outputs, float start_time,
+                float end_time);
+
+  void declare_nodes(NodeRegistry& registry) override;
+
+  void render(WGPUCommandEncoder encoder, const UniformsSequenceParams& params,
+              NodeRegistry& nodes) override;
+
+  void set_scene(const Scene* scene, const Camera* camera);
+
+ private:
+  // Internal G-buffer node names
+  std::string node_albedo_;
+  std::string node_normal_mat_;
+  std::string node_depth_;
+  std::string node_shadow_;
+  std::string node_transp_;
+  std::string node_feat0_;
+  std::string node_feat1_;
+
+  const Scene*  scene_  = nullptr;
+  const Camera* camera_ = nullptr;
+
+  // Pass 1: MRT rasterization pipeline
+  RenderPipeline raster_pipeline_;
+  BindGroup      raster_bind_group_;
+
+  // Pass 4: Pack compute pipeline
+  ComputePipeline  pack_pipeline_;
+  BindGroup        pack_bind_group_;
+  UniformBuffer<GBufResUniforms> pack_res_uniform_;
+
+  // Placeholder textures for shadow/transp (white/black cleared once)
+  Texture     shadow_placeholder_tex_;
+  TextureView shadow_placeholder_view_;
+  Texture     transp_placeholder_tex_;
+  TextureView transp_placeholder_view_;
+
+  // GPU-side object data buffers (global uniforms + objects storage)
+  // These mirror the layout expected by gbuf_raster.wgsl
+  GpuBuffer global_uniforms_buf_;
+  GpuBuffer objects_buf_;
+  int       objects_buf_capacity_ = 0; // number of ObjectData slots allocated
+
+  void create_raster_pipeline();
+  void create_pack_pipeline();
+
+  void update_raster_bind_group(NodeRegistry& nodes);
+  void update_pack_bind_group(NodeRegistry& nodes);
+
+  void upload_scene_data(const Scene& scene, const Camera& camera,
+                         float time);
+  void ensure_objects_buffer(int num_objects);
+};
diff --git a/cnn_v3/training/blender_export.py b/cnn_v3/training/blender_export.py
new file mode 100644
index 0000000..63dd0e3
--- /dev/null
+++ b/cnn_v3/training/blender_export.py
@@ -0,0 +1,160 @@
+"""
+Blender export script for CNN v3 G-buffer training data.
+Configures render passes and a compositor File Output node,
+then renders the current scene to a multi-layer EXR.
+
+Usage (headless):
+    blender -b scene.blend -P blender_export.py -- --output renders/frame_###
+
+Each '#' in the output path is replaced by Blender with the frame number (zero-padded).
+The script writes one multi-layer EXR per frame containing all required passes.
+
+G-buffer pass mapping:
+    Combined  → training target RGBA (beauty)
+    DiffCol   → albedo.rgb  (pre-lighting material color)
+    Normal    → normal.xy   (world-space, oct-encode in pack_blender_sample.py)
+    Z         → depth       (view-space distance, normalize in pack step)
+    IndexOB   → mat_id      (object index, u8 / 255)
+    Shadow    → shadow      (invert: shadow=1 means fully lit)
+    Alpha     → transp.     (0=opaque, 1=clear/transparent)
+"""
+
+import sys
+import argparse
+
+import bpy
+
+
+def parse_args():
+    # Blender passes its own argv; our args follow '--'.
+    argv = sys.argv
+    if "--" in argv:
+        argv = argv[argv.index("--") + 1:]
+    else:
+        argv = []
+    parser = argparse.ArgumentParser(
+        description="Configure Blender render passes and export multi-layer EXR."
+    )
+    parser.add_argument(
+        "--output",
+        default="//renders/frame_###",
+        help="Output path prefix (use ### for frame number padding). "
+             "Default: //renders/frame_###",
+    )
+    parser.add_argument(
+        "--width",  type=int, default=640,
+        help="Render width in pixels (default: 640)"
+    )
+    parser.add_argument(
+        "--height", type=int, default=360,
+        help="Render height in pixels (default: 360)"
+    )
+    parser.add_argument(
+        "--start-frame", type=int, default=None,
+        help="First frame to render (default: scene start frame)"
+    )
+    parser.add_argument(
+        "--end-frame", type=int, default=None,
+        help="Last frame to render (default: scene end frame)"
+    )
+    return parser.parse_args(argv)
+
+
+def configure_scene(args):
+    scene = bpy.context.scene
+
+    # Render dimensions
+    scene.render.resolution_x = args.width
+    scene.render.resolution_y = args.height
+    scene.render.resolution_percentage = 100
+
+    # Frame range (optional override)
+    if args.start_frame is not None:
+        scene.frame_start = args.start_frame
+    if args.end_frame is not None:
+        scene.frame_end = args.end_frame
+
+    # Use Cycles for best multi-pass support
+    scene.render.engine = "CYCLES"
+
+    # Enable required render passes on the active view layer
+    vl = scene.view_layers["ViewLayer"]
+    vl.use_pass_combined           = True   # beauty target
+    vl.use_pass_diffuse_color      = True   # albedo
+    vl.use_pass_normal             = True   # world normals
+    vl.use_pass_z                  = True   # depth (Z)
+    vl.use_pass_object_index       = True   # mat_id
+    vl.use_pass_shadow             = True   # shadow catcher
+    # Alpha is available via the combined pass alpha channel;
+    # the compositor node below also taps it separately.
+
+    print(f"[blender_export] Render passes configured on ViewLayer '{vl.name}'.")
+    print(f"  Resolution: {args.width}x{args.height}")
+    print(f"  Frames: {scene.frame_start} – {scene.frame_end}")
+
+
+def configure_compositor(args):
+    scene = bpy.context.scene
+    scene.use_nodes = True
+    tree = scene.node_tree
+
+    # Clear all existing compositor nodes
+    tree.nodes.clear()
+
+    # Render Layers node (source of all passes)
+    rl_node = tree.nodes.new("CompositorNodeRLayers")
+    rl_node.location = (0, 0)
+
+    # File Output node — multi-layer EXR (all passes in one file)
+    out_node = tree.nodes.new("CompositorNodeOutputFile")
+    out_node.location = (600, 0)
+    out_node.format.file_format = "OPEN_EXR_MULTILAYER"
+    out_node.format.exr_codec = "ZIP"
+    out_node.base_path = args.output
+
+    # Map each render pass socket to a named layer in the EXR.
+    # Slot order matters: the first slot is created by default; we rename it
+    # and add the rest.
+    pass_sockets = [
+        ("Image",           "Combined"),    # beauty / target
+        ("Diffuse Color",   "DiffCol"),     # albedo
+        ("Normal",          "Normal"),      # world normals
+        ("Depth",           "Z"),           # view-space depth
+        ("Object Index",    "IndexOB"),     # object index
+        ("Shadow",          "Shadow"),      # shadow
+        ("Alpha",           "Alpha"),       # transparency / alpha
+    ]
+
+    # The node starts with one default slot; configure it first.
+    for i, (socket_name, layer_name) in enumerate(pass_sockets):
+        if i == 0:
+            # Rename the default slot
+            out_node.file_slots[0].path = layer_name
+        else:
+            out_node.file_slots.new(layer_name)
+
+        # Link render layer socket to file output slot
+        src_socket = rl_node.outputs.get(socket_name)
+        dst_socket = out_node.inputs[i]
+        if src_socket:
+            tree.links.new(src_socket, dst_socket)
+        else:
+            print(f"[blender_export] WARNING: pass socket '{socket_name}' "
+                  f"not found on Render Layers node. Skipping.")
+
+    print(f"[blender_export] Compositor configured. Output → {args.output}")
+    print("  Layers: " + ", ".join(ln for _, ln in pass_sockets))
+
+
+def main():
+    args = parse_args()
+    configure_scene(args)
+    configure_compositor(args)
+
+    # Trigger the render (only when running headless with -b)
+    bpy.ops.render.render(animation=True)
+    print("[blender_export] Render complete.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/cnn_v3/training/pack_blender_sample.py b/cnn_v3/training/pack_blender_sample.py
new file mode 100644
index 0000000..84344c1
--- /dev/null
+++ b/cnn_v3/training/pack_blender_sample.py
@@ -0,0 +1,268 @@
+"""
+Pack a Blender multi-layer EXR into CNN v3 training sample files.
+
+Reads a multi-layer EXR produced by blender_export.py and writes separate PNG
+files per channel into an output directory, ready for the CNN v3 dataloader.
+
+Output files:
+    albedo.png    — RGB uint8  (DiffCol pass, gamma-corrected)
+    normal.png    — RG uint8   (octahedral-encoded world normal in [0,1])
+    depth.png     — R uint16   (1/(z+1) normalized to [0,1], 16-bit PNG)
+    matid.png     — R uint8    (IndexOB / 255)
+    shadow.png    — R uint8    (1 - shadow_catcher, so 255 = fully lit)
+    transp.png    — R uint8    (alpha from Combined pass, 0=opaque)
+    target.png    — RGBA uint8 (Combined beauty pass)
+
+depth_grad, mip1, mip2 are computed on-the-fly by the dataloader (not stored).
+prev = zero during training (no temporal history for static frames).
+
+Usage:
+    python3 pack_blender_sample.py --exr renders/frame_001.exr \\
+                                   --output dataset/full/sample_001/
+
+Dependencies:
+    numpy, Pillow, OpenEXR (pip install openexr)
+    — or use imageio[freeimage] as alternative EXR reader.
+"""
+
+import argparse
+import os
+import sys
+import numpy as np
+from PIL import Image
+
+
+# ---- EXR loading ----
+
+def load_exr_openexr(path: str) -> dict:
+    """Load a multi-layer EXR using the OpenEXR Python binding."""
+    import OpenEXR
+    import Imath
+
+    exr = OpenEXR.InputFile(path)
+    header = exr.header()
+    dw = header["dataWindow"]
+    width  = dw.max.x - dw.min.x + 1
+    height = dw.max.y - dw.min.y + 1
+    channels = {}
+    float_type = Imath.PixelType(Imath.PixelType.FLOAT)
+    for ch_name in header["channels"]:
+        raw = exr.channel(ch_name, float_type)
+        arr = np.frombuffer(raw, dtype=np.float32).reshape((height, width))
+        channels[ch_name] = arr
+    return channels, width, height
+
+
+def load_exr_imageio(path: str) -> dict:
+    """Load a multi-layer EXR using imageio (freeimage backend)."""
+    import imageio
+    data = imageio.imread(path, format="exr")
+    # imageio may return (H, W, C); treat as single layer
+    h, w = data.shape[:2]
+    c = data.shape[2] if data.ndim == 3 else 1
+    channels = {}
+    names = ["R", "G", "B", "A"][:c]
+    for i, n in enumerate(names):
+        channels[n] = data[:, :, i].astype(np.float32)
+    return channels, w, h
+
+
+def load_exr(path: str):
+    """Try OpenEXR first, fall back to imageio."""
+    try:
+        return load_exr_openexr(path)
+    except ImportError:
+        pass
+    try:
+        return load_exr_imageio(path)
+    except ImportError:
+        pass
+    raise ImportError(
+        "No EXR reader found. Install OpenEXR or imageio[freeimage]:\n"
+        "  pip install openexr\n"
+        "  pip install imageio[freeimage]"
+    )
+
+
+# ---- Octahedral encoding ----
+
+def oct_encode(normals: np.ndarray) -> np.ndarray:
+    """
+    Octahedral-encode world-space normals.
+
+    Args:
+        normals: (H, W, 3) float32, unit vectors.
+    Returns:
+        (H, W, 2) float32 in [0, 1] for PNG storage.
+    """
+    nx, ny, nz = normals[..., 0], normals[..., 1], normals[..., 2]
+    # L1-normalize projection onto the octahedron
+    l1 = np.abs(nx) + np.abs(ny) + np.abs(nz) + 1e-9
+    ox = nx / l1
+    oy = ny / l1
+    # Fold lower hemisphere
+    mask = nz < 0.0
+    ox_folded = np.where(mask, (1.0 - np.abs(oy)) * np.sign(ox + 1e-9), ox)
+    oy_folded = np.where(mask, (1.0 - np.abs(ox)) * np.sign(oy + 1e-9), oy)
+    # Remap [-1, 1] → [0, 1]
+    encoded = np.stack([ox_folded, oy_folded], axis=-1) * 0.5 + 0.5
+    return np.clip(encoded, 0.0, 1.0)
+
+
+# ---- Channel extraction helpers ----
+
+def get_pass_rgb(channels: dict, prefix: str) -> np.ndarray:
+    """Extract an RGB pass (prefix.R, prefix.G, prefix.B)."""
+    r = channels.get(f"{prefix}.R", channels.get("R", None))
+    g = channels.get(f"{prefix}.G", channels.get("G", None))
+    b = channels.get(f"{prefix}.B", channels.get("B", None))
+    if r is None or g is None or b is None:
+        raise KeyError(f"Could not find RGB channels for pass '{prefix}'.")
+    return np.stack([r, g, b], axis=-1)
+
+
+def get_pass_rgba(channels: dict, prefix: str) -> np.ndarray:
+    """Extract an RGBA pass."""
+    rgb = get_pass_rgb(channels, prefix)
+    a = channels.get(f"{prefix}.A", np.ones_like(rgb[..., 0]))
+    return np.concatenate([rgb, a[..., np.newaxis]], axis=-1)
+
+
+def get_pass_r(channels: dict, prefix: str, default: float = 0.0) -> np.ndarray:
+    """Extract a single-channel pass."""
+    ch = channels.get(f"{prefix}.R", channels.get(prefix, None))
+    if ch is None:
+        h, w = next(iter(channels.values())).shape[:2]
+        return np.full((h, w), default, dtype=np.float32)
+    return ch.astype(np.float32)
+
+
+def get_pass_xyz(channels: dict, prefix: str) -> np.ndarray:
+    """Extract an XYZ pass (Normal uses .X .Y .Z in Blender)."""
+    x = channels.get(f"{prefix}.X")
+    y = channels.get(f"{prefix}.Y")
+    z = channels.get(f"{prefix}.Z")
+    if x is None or y is None or z is None:
+        # Fall back to R/G/B naming
+        return get_pass_rgb(channels, prefix)
+    return np.stack([x, y, z], axis=-1)
+
+
+# ---- Main packing ----
+
+def pack_blender_sample(exr_path: str, output_dir: str) -> None:
+    os.makedirs(output_dir, exist_ok=True)
+
+    print(f"[pack_blender_sample] Loading {exr_path} …")
+    channels, width, height = load_exr(exr_path)
+    print(f"  Dimensions: {width}×{height}")
+    print(f"  Channels: {sorted(channels.keys())}")
+
+    # ---- albedo (DiffCol → RGB uint8, gamma-correct linear→sRGB) ----
+    try:
+        albedo_lin = get_pass_rgb(channels, "DiffCol")
+    except KeyError:
+        print("  WARNING: DiffCol pass not found; using zeros.")
+        albedo_lin = np.zeros((height, width, 3), dtype=np.float32)
+    # Convert linear → sRGB (approximate gamma 2.2)
+    albedo_srgb = np.clip(np.power(np.clip(albedo_lin, 0, 1), 1.0 / 2.2), 0, 1)
+    albedo_u8 = (albedo_srgb * 255.0).astype(np.uint8)
+    Image.fromarray(albedo_u8, mode="RGB").save(
+        os.path.join(output_dir, "albedo.png")
+    )
+
+    # ---- normal (Normal pass → oct-encoded RG uint8) ----
+    try:
+        # Blender world normals use .X .Y .Z channels
+        normal_xyz = get_pass_xyz(channels, "Normal")
+        # Normalize to unit length (may not be exactly unit after compression)
+        nlen = np.linalg.norm(normal_xyz, axis=-1, keepdims=True) + 1e-9
+        normal_unit = normal_xyz / nlen
+        normal_enc = oct_encode(normal_unit)       # (H, W, 2) in [0, 1]
+        normal_u8 = (normal_enc * 255.0).astype(np.uint8)
+        # Store in RGB with B=0 (unused)
+        normal_rgb = np.concatenate(
+            [normal_u8, np.zeros((height, width, 1), dtype=np.uint8)], axis=-1
+        )
+    except KeyError:
+        print("  WARNING: Normal pass not found; using zeros.")
+        normal_rgb = np.zeros((height, width, 3), dtype=np.uint8)
+    Image.fromarray(normal_rgb, mode="RGB").save(
+        os.path.join(output_dir, "normal.png")
+    )
+
+    # ---- depth (Z pass → 1/(z+1), stored as 16-bit PNG) ----
+    z_raw = get_pass_r(channels, "Z", default=0.0)
+    # 1/z style: 1/(z + 1) maps z=0→1.0, z=∞→0.0
+    depth_norm = 1.0 / (np.clip(z_raw, 0.0, None) + 1.0)
+    depth_norm = np.clip(depth_norm, 0.0, 1.0)
+    depth_u16 = (depth_norm * 65535.0).astype(np.uint16)
+    Image.fromarray(depth_u16, mode="I;16").save(
+        os.path.join(output_dir, "depth.png")
+    )
+
+    # ---- matid (IndexOB → u8) ----
+    # Blender object index is an integer; clamp to [0, 255].
+    matid_raw = get_pass_r(channels, "IndexOB", default=0.0)
+    matid_u8 = np.clip(matid_raw, 0, 255).astype(np.uint8)
+    Image.fromarray(matid_u8, mode="L").save(
+        os.path.join(output_dir, "matid.png")
+    )
+
+    # ---- shadow (Shadow pass → invert: 1=fully lit, stored u8) ----
+    # Blender Shadow pass: 1=lit, 0=shadowed.  We keep that convention
+    # (shadow=1 means fully lit), so just convert directly.
+    shadow_raw = get_pass_r(channels, "Shadow", default=1.0)
+    shadow_u8 = (np.clip(shadow_raw, 0.0, 1.0) * 255.0).astype(np.uint8)
+    Image.fromarray(shadow_u8, mode="L").save(
+        os.path.join(output_dir, "shadow.png")
+    )
+
+    # ---- transp (Alpha from Combined pass → u8, 0=opaque) ----
+    # Blender alpha: 1=opaque, 0=transparent.
+    # CNN convention: transp=0 means opaque, transp=1 means transparent.
+    # So transp = 1 - alpha.
+    try:
+        combined_rgba = get_pass_rgba(channels, "Combined")
+        alpha = combined_rgba[..., 3]
+    except KeyError:
+        alpha = np.ones((height, width), dtype=np.float32)
+    transp = 1.0 - np.clip(alpha, 0.0, 1.0)
+    transp_u8 = (transp * 255.0).astype(np.uint8)
+    Image.fromarray(transp_u8, mode="L").save(
+        os.path.join(output_dir, "transp.png")
+    )
+
+    # ---- target (Combined beauty pass → RGBA uint8, gamma-correct) ----
+    try:
+        combined_rgba = get_pass_rgba(channels, "Combined")
+        # Convert linear → sRGB for display (RGB channels only)
+        c_rgb = np.power(np.clip(combined_rgba[..., :3], 0, 1), 1.0 / 2.2)
+        c_alpha = combined_rgba[..., 3:4]
+        target_lin = np.concatenate([c_rgb, c_alpha], axis=-1)
+        target_u8 = (np.clip(target_lin, 0, 1) * 255.0).astype(np.uint8)
+    except KeyError:
+        print("  WARNING: Combined pass not found; target will be zeros.")
+        target_u8 = np.zeros((height, width, 4), dtype=np.uint8)
+    Image.fromarray(target_u8, mode="RGBA").save(
+        os.path.join(output_dir, "target.png")
+    )
+
+    print(f"[pack_blender_sample] Wrote sample to {output_dir}")
+    print("  Files: albedo.png  normal.png  depth.png  matid.png  "
+          "shadow.png  transp.png  target.png")
+    print("  Note: depth_grad, mip1, mip2 are computed on-the-fly by the dataloader.")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Pack a Blender multi-layer EXR into CNN v3 training sample files."
+    )
+    parser.add_argument("--exr",    required=True, help="Input multi-layer EXR file")
+    parser.add_argument("--output", required=True, help="Output directory for sample files")
+    args = parser.parse_args()
+    pack_blender_sample(args.exr, args.output)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/cnn_v3/training/pack_photo_sample.py b/cnn_v3/training/pack_photo_sample.py
new file mode 100644
index 0000000..b2943fb
--- /dev/null
+++ b/cnn_v3/training/pack_photo_sample.py
@@ -0,0 +1,148 @@
+"""
+Pack a photo into CNN v3 simple training sample files.
+
+Converts a single RGB or RGBA photo into the CNN v3 sample layout.
+Geometric channels (normal, depth, matid) are zeroed; the network
+degrades gracefully due to channel-dropout training.
+
+Output files:
+    albedo.png    — RGB uint8   (photo RGB)
+    normal.png    — RG uint8    (zero — no geometry data)
+    depth.png     — R uint16    (zero — no depth data)
+    matid.png     — R uint8     (zero — no material data)
+    shadow.png    — R uint8     (255 = fully lit — assume unoccluded)
+    transp.png    — R uint8     (1 - alpha, or 0 if no alpha channel)
+    target.png    — RGB/RGBA    (= albedo; no ground-truth styled target)
+
+mip1 and mip2 are computed on-the-fly by the dataloader from albedo.
+prev = zero during training (no temporal history).
+
+Usage:
+    python3 pack_photo_sample.py --photo photos/img_001.png \\
+                                 --output dataset/simple/sample_001/
+
+Dependencies:
+    numpy, Pillow
+"""
+
+import argparse
+import os
+import numpy as np
+from PIL import Image
+
+
+# ---- Mip computation ----
+
+def pyrdown(img: np.ndarray) -> np.ndarray:
+    """
+    2×2 average pooling (half resolution).
+    Args:
+        img: (H, W, C) float32 in [0, 1].
+    Returns:
+        (H//2, W//2, C) float32.
+    """
+    h, w, c = img.shape
+    h2, w2 = h // 2, w // 2
+    # Crop to even dimensions
+    cropped = img[:h2 * 2, :w2 * 2, :]
+    # Reshape and average
+    return 0.25 * (
+        cropped[0::2, 0::2, :] +
+        cropped[1::2, 0::2, :] +
+        cropped[0::2, 1::2, :] +
+        cropped[1::2, 1::2, :]
+    )
+
+
+# ---- Main packing ----
+
+def pack_photo_sample(photo_path: str, output_dir: str) -> None:
+    os.makedirs(output_dir, exist_ok=True)
+
+    print(f"[pack_photo_sample] Loading {photo_path} …")
+    img = Image.open(photo_path).convert("RGBA")
+    width, height = img.size
+    print(f"  Dimensions: {width}×{height}")
+
+    img_np = np.asarray(img, dtype=np.float32) / 255.0  # (H, W, 4) in [0, 1]
+    rgb  = img_np[..., :3]   # (H, W, 3)
+    alpha = img_np[..., 3]   # (H, W)
+
+    # ---- albedo — photo RGB ----
+    albedo_u8 = (np.clip(rgb, 0, 1) * 255.0).astype(np.uint8)
+    Image.fromarray(albedo_u8, mode="RGB").save(
+        os.path.join(output_dir, "albedo.png")
+    )
+
+    # ---- normal — zero (no geometry) ----
+    normal_zeros = np.zeros((height, width, 3), dtype=np.uint8)
+    # Encode "no normal" as (0.5, 0.5) in octahedral space → (128, 128)
+    # This maps to oct = (0, 0) → reconstructed normal = (0, 0, 1) (pointing forward)
+    normal_zeros[..., 0] = 128
+    normal_zeros[..., 1] = 128
+    Image.fromarray(normal_zeros, mode="RGB").save(
+        os.path.join(output_dir, "normal.png")
+    )
+
+    # ---- depth — zero ----
+    depth_zero = np.zeros((height, width), dtype=np.uint16)
+    Image.fromarray(depth_zero, mode="I;16").save(
+        os.path.join(output_dir, "depth.png")
+    )
+
+    # ---- matid — zero ----
+    matid_zero = np.zeros((height, width), dtype=np.uint8)
+    Image.fromarray(matid_zero, mode="L").save(
+        os.path.join(output_dir, "matid.png")
+    )
+
+    # ---- shadow — 255 (fully lit, assume unoccluded) ----
+    shadow_full = np.full((height, width), 255, dtype=np.uint8)
+    Image.fromarray(shadow_full, mode="L").save(
+        os.path.join(output_dir, "shadow.png")
+    )
+
+    # ---- transp — 1 - alpha (0=opaque, 1=transparent) ----
+    # If the photo has no meaningful alpha, this is zero everywhere.
+    transp = 1.0 - np.clip(alpha, 0.0, 1.0)
+    transp_u8 = (transp * 255.0).astype(np.uint8)
+    Image.fromarray(transp_u8, mode="L").save(
+        os.path.join(output_dir, "transp.png")
+    )
+
+    # ---- target — albedo (= photo; no GT styled target) ----
+    # Store as RGBA (keep alpha for potential masking by the dataloader).
+    target_u8 = (np.clip(img_np, 0, 1) * 255.0).astype(np.uint8)
+    Image.fromarray(target_u8, mode="RGBA").save(
+        os.path.join(output_dir, "target.png")
+    )
+
+    # ---- mip1 / mip2 — informational only, not saved ----
+    # The dataloader computes mip1/mip2 on-the-fly from albedo.
+    # Verify they look reasonable here for debugging.
+    mip1 = pyrdown(rgb)
+    mip2 = pyrdown(mip1)
+    print(f"  mip1: {mip1.shape[1]}×{mip1.shape[0]}  "
+          f"mip2: {mip2.shape[1]}×{mip2.shape[0]}  (computed on-the-fly)")
+
+    print(f"[pack_photo_sample] Wrote sample to {output_dir}")
+    print("  Files: albedo.png  normal.png  depth.png  matid.png  "
+          "shadow.png  transp.png  target.png")
+    print("  Note: normal/depth/matid are zeroed (no geometry data).")
+    print("  Note: target = albedo (no ground-truth styled target).")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Pack a photo into CNN v3 simple training sample files."
+    )
+    parser.add_argument("--photo",  required=True,
+                        help="Input photo file (RGB or RGBA PNG/JPG)")
+    parser.add_argument("--output", required=True,
+                        help="Output directory for sample files")
+    args = parser.parse_args()
+    pack_photo_sample(args.photo, args.output)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/gpu/sequence.cc b/src/gpu/sequence.cc
index 1e3be6c..91ca187 100644
--- a/src/gpu/sequence.cc
+++ b/src/gpu/sequence.cc
@@ -181,6 +181,30 @@ void NodeRegistry::create_texture(Node& node) {
     usage = (WGPUTextureUsage)(WGPUTextureUsage_StorageBinding |
                                WGPUTextureUsage_TextureBinding);
     break;
+  case NodeType::GBUF_ALBEDO:
+    format = WGPUTextureFormat_RGBA16Float;
+    usage = (WGPUTextureUsage)(WGPUTextureUsage_RenderAttachment |
+                               WGPUTextureUsage_TextureBinding |
+                               WGPUTextureUsage_StorageBinding |
+                               WGPUTextureUsage_CopySrc);
+    break;
+  case NodeType::GBUF_DEPTH32:
+    format = WGPUTextureFormat_Depth32Float;
+    usage = (WGPUTextureUsage)(WGPUTextureUsage_RenderAttachment |
+                               WGPUTextureUsage_TextureBinding |
+                               WGPUTextureUsage_CopySrc);
+    break;
+  case NodeType::GBUF_R8:
+    format = WGPUTextureFormat_RGBA8Unorm;
+    usage = (WGPUTextureUsage)(WGPUTextureUsage_StorageBinding |
+                               WGPUTextureUsage_TextureBinding |
+                               WGPUTextureUsage_RenderAttachment);
+    break;
+  case NodeType::GBUF_RGBA32UINT:
+    format = WGPUTextureFormat_RGBA32Uint;
+    usage = (WGPUTextureUsage)(WGPUTextureUsage_StorageBinding |
+                               WGPUTextureUsage_TextureBinding);
+    break;
   }
 
   WGPUTextureDescriptor desc = {};
@@ -201,7 +225,8 @@ void NodeRegistry::create_texture(Node& node) {
   view_desc.mipLevelCount = 1;
   view_desc.baseArrayLayer = 0;
   view_desc.arrayLayerCount = 1;
-  view_desc.aspect = (node.type == NodeType::DEPTH24)
+  view_desc.aspect = (node.type == NodeType::DEPTH24 ||
+                      node.type == NodeType::GBUF_DEPTH32)
                          ? WGPUTextureAspect_DepthOnly
                          : WGPUTextureAspect_All;
 
diff --git a/src/gpu/sequence.h b/src/gpu/sequence.h
index a515d1f..4592082 100644
--- a/src/gpu/sequence.h
+++ b/src/gpu/sequence.h
@@ -20,6 +20,11 @@ enum class NodeType {
   F16X8,
   DEPTH24,
   COMPUTE_F32,
+  // G-buffer types for CNN v3
+  GBUF_ALBEDO,    // rgba16float: RENDER_ATTACHMENT | TEXTURE_BINDING | STORAGE_BINDING | COPY_SRC
+  GBUF_DEPTH32,   // depth32float: RENDER_ATTACHMENT | TEXTURE_BINDING | COPY_SRC
+  GBUF_R8,        // rgba8unorm (4ch for compat): STORAGE_BINDING | TEXTURE_BINDING | RENDER_ATTACHMENT
+  GBUF_RGBA32UINT, // rgba32uint: STORAGE_BINDING | TEXTURE_BINDING
 };
 
 struct Node {