2 files changed, 228 insertions, 0 deletions
diff --git a/cnn_v3/shaders/gbuf_pack.wgsl b/cnn_v3/shaders/gbuf_pack.wgsl
new file mode 100644
index 0000000..71d8471
--- /dev/null
+++ b/cnn_v3/shaders/gbuf_pack.wgsl
@@ -0,0 +1,123 @@
+// G-buffer pack compute shader for CNN v3
+// Pass 4: Pack all G-buffer channels into two rgba32uint feature textures (32 bytes/pixel)
+// Output feat_tex0 holds 8×f16 geometric channels; feat_tex1 holds 12×u8 context channels.
+
+struct GBufRes {
+    resolution: vec2f,
+}
+
+@group(0) @binding(0) var<uniform> gbuf_res:      GBufRes;
+@group(0) @binding(1) var gbuf_albedo:     texture_2d<f32>;
+@group(0) @binding(2) var gbuf_normal_mat: texture_2d<f32>;
+@group(0) @binding(3) var gbuf_depth:      texture_depth_2d;
+@group(0) @binding(4) var gbuf_shadow:     texture_2d<f32>;
+@group(0) @binding(5) var gbuf_transp:     texture_2d<f32>;
+@group(0) @binding(6) var prev_cnn:        texture_2d<f32>;
+@group(0) @binding(7) var feat_tex0:       texture_storage_2d<rgba32uint, write>;
+@group(0) @binding(8) var feat_tex1:       texture_storage_2d<rgba32uint, write>;
+@group(0) @binding(9) var bilinear_sampler: sampler;
+
+// Sample depth texture at integer coordinate, clamp to borders.
+fn load_depth(coord: vec2i) -> f32 {
+    let dims = vec2i(textureDimensions(gbuf_depth));
+    let c    = clamp(coord, vec2i(0), dims - vec2i(1));
+    return textureLoad(gbuf_depth, c, 0);
+}
+
+// Box-filter albedo: average of 2×2 texels starting at top-left corner `tl`.
+fn box2(tl: vec2i) -> vec3f {
+    let a = textureLoad(gbuf_albedo, tl + vec2i(0, 0), 0).rgb;
+    let b = textureLoad(gbuf_albedo, tl + vec2i(1, 0), 0).rgb;
+    let c = textureLoad(gbuf_albedo, tl + vec2i(0, 1), 0).rgb;
+    let d = textureLoad(gbuf_albedo, tl + vec2i(1, 1), 0).rgb;
+    return (a + b + c + d) * 0.25;
+}
+
+// Box-filter albedo: average of 4×4 texels starting at top-left corner `tl`.
+fn box4(tl: vec2i) -> vec3f {
+    var acc = vec3f(0.0);
+    for (var dy: i32 = 0; dy < 4; dy++) {
+        for (var dx: i32 = 0; dx < 4; dx++) {
+            acc += textureLoad(gbuf_albedo, tl + vec2i(dx, dy), 0).rgb;
+        }
+    }
+    return acc * (1.0 / 16.0);
+}
+
+// Decode oct-normal from [0,1] storage → [-1,1] encoded xy → reconstruct z.
+fn decode_oct_normal(rg: vec2f) -> vec3f {
+    let f = rg * 2.0 - vec2f(1.0);
+    var n = vec3f(f.x, f.y, 1.0 - abs(f.x) - abs(f.y));
+    let t = max(-n.z, 0.0);
+    n.x += select(t, -t, n.x >= 0.0);
+    n.y += select(t, -t, n.y >= 0.0);
+    return normalize(n);
+}
+
+@compute @workgroup_size(8, 8)
+fn pack_features(@builtin(global_invocation_id) id: vec3u) {
+    let coord = vec2i(id.xy);
+    let dims  = vec2i(textureDimensions(gbuf_albedo));
+    if (coord.x >= dims.x || coord.y >= dims.y) { return; }
+
+    let uv = (vec2f(coord) + vec2f(0.5)) / gbuf_res.resolution;
+
+    // --- Geometric channels (high precision, f16 packed) ---
+    let albedo      = textureLoad(gbuf_albedo,     coord, 0).rgb;
+    let nm          = textureLoad(gbuf_normal_mat, coord, 0);
+    let depth_raw   = load_depth(coord);
+
+    // Finite-difference depth gradient (central difference, clamped coords)
+    let dzdx = (load_depth(coord + vec2i(1, 0)) - load_depth(coord - vec2i(1, 0))) * 0.5;
+    let dzdy = (load_depth(coord + vec2i(0, 1)) - load_depth(coord - vec2i(0, 1))) * 0.5;
+
+    // Normal: stored as oct-encoded [0,1] in RG; extract just the encoded xy for feat_tex0
+    let normal_enc = nm.rg; // already in [0,1] — decode to get the xy for CNN input
+    let n3         = decode_oct_normal(normal_enc);
+    // Store oct-encoded in [-1,1] remapped back to what CNN expects (the [-1,1] oct xy)
+    let oct_xy     = normal_enc * 2.0 - vec2f(1.0); // remap [0,1] → [-1,1]
+
+    // Texture 0: 4 u32, each = pack2x16float of two f16 values
+    // [0] albedo.r  | albedo.g
+    // [1] albedo.b  | normal.x (oct, [-1,1])
+    // [2] normal.y  | depth
+    // [3] dzdx      | dzdy
+    let t0 = vec4u(
+        pack2x16float(albedo.rg),
+        pack2x16float(vec2f(albedo.b, oct_xy.x)),
+        pack2x16float(vec2f(oct_xy.y, depth_raw)),
+        pack2x16float(vec2f(dzdx, dzdy))
+    );
+    textureStore(feat_tex0, coord, t0);
+
+    // --- Context channels (low precision, u8 packed) ---
+    let mat_id_u8 = nm.b;                                       // mat_id already in [0,1]
+    let shadow    = textureLoad(gbuf_shadow, coord, 0).r;
+    let transp    = textureLoad(gbuf_transp, coord, 0).r;
+    let prev      = textureSampleLevel(prev_cnn, bilinear_sampler, uv, 0.0).rgb;
+
+    // MIP 1: 2×2 box filter (half resolution context)
+    // Use top-left aligned 2×2 block at half-res position
+    let tl1  = coord * 2;  // this pixel's 2×2 region in full-res (mip1 is at half-res)
+    // Actually we want to sample the neighborhood around this pixel for downsampled context.
+    // mip1: sample a 2×2 box centered on the pixel in full-res coordinates
+    let tl1c = max(coord - vec2i(0), vec2i(0));
+    let mip1 = box2(tl1c);
+
+    // mip2: sample a 4×4 box
+    let tl2c = max(coord - vec2i(1), vec2i(0));
+    let mip2 = box4(tl2c);
+
+    // Texture 1: 4 u32, each = pack4x8unorm of four u8 values
+    // [0] mat_id | prev.r | prev.g | prev.b
+    // [1] mip1.r | mip1.g | mip1.b | mip2.r
+    // [2] mip2.g | mip2.b | shadow | transp
+    // [3] spare  (0)
+    let t1 = vec4u(
+        pack4x8unorm(vec4f(mat_id_u8, prev.r, prev.g, prev.b)),
+        pack4x8unorm(vec4f(mip1.r, mip1.g, mip1.b, mip2.r)),
+        pack4x8unorm(vec4f(mip2.g, mip2.b, shadow, transp)),
+        0u
+    );
+    textureStore(feat_tex1, coord, t1);
+}
diff --git a/cnn_v3/shaders/gbuf_raster.wgsl b/cnn_v3/shaders/gbuf_raster.wgsl
new file mode 100644
index 0000000..c762db2
--- /dev/null
+++ b/cnn_v3/shaders/gbuf_raster.wgsl
@@ -0,0 +1,105 @@
+// G-buffer rasterization shader for CNN v3
+// Pass 1: Proxy geometry → MRT (albedo rgba16float, normal_mat rgba16float, depth32)
+// Uses GlobalUniforms, ObjectData, ObjectsBuffer from common_uniforms.
+
+#include "common_uniforms"
+
+@group(0) @binding(0) var<uniform> globals: GlobalUniforms;
+@group(0) @binding(1) var<storage, read> object_data: ObjectsBuffer;
+
+struct VertexOutput {
+    @builtin(position)              position:       vec4f,
+    @location(0)                    world_pos:      vec3f,
+    @location(1)                    world_normal:   vec3f,
+    @location(2)                    color:          vec4f,
+    @location(3) @interpolate(flat) instance_index: u32,
+}
+
+// Octahedral encoding: maps unit normal to [-1,1]^2
+fn oct_encode(n: vec3f) -> vec2f {
+    let inv_l1 = 1.0 / (abs(n.x) + abs(n.y) + abs(n.z));
+    var p = n.xy * inv_l1;
+    // Fold lower hemisphere
+    if (n.z < 0.0) {
+        let s = vec2f(
+            select(-1.0, 1.0, p.x >= 0.0),
+            select(-1.0, 1.0, p.y >= 0.0)
+        );
+        p = (1.0 - abs(p.yx)) * s;
+    }
+    return p; // in [-1, 1]
+}
+
+@vertex
+fn vs_main(
+    @builtin(vertex_index)   vertex_index:   u32,
+    @builtin(instance_index) instance_index: u32
+) -> VertexOutput {
+    // Proxy box vertices (same as renderer_3d.wgsl)
+    var pos = array<vec3f, 36>(
+        vec3f(-1.0, -1.0,  1.0), vec3f( 1.0, -1.0,  1.0), vec3f( 1.0,  1.0,  1.0),
+        vec3f(-1.0, -1.0,  1.0), vec3f( 1.0,  1.0,  1.0), vec3f(-1.0,  1.0,  1.0),
+        vec3f(-1.0, -1.0, -1.0), vec3f(-1.0,  1.0, -1.0), vec3f( 1.0,  1.0, -1.0),
+        vec3f(-1.0, -1.0, -1.0), vec3f( 1.0,  1.0, -1.0), vec3f( 1.0, -1.0, -1.0),
+        vec3f(-1.0,  1.0, -1.0), vec3f(-1.0,  1.0,  1.0), vec3f( 1.0,  1.0,  1.0),
+        vec3f(-1.0,  1.0, -1.0), vec3f( 1.0,  1.0,  1.0), vec3f( 1.0,  1.0, -1.0),
+        vec3f(-1.0, -1.0, -1.0), vec3f( 1.0, -1.0, -1.0), vec3f( 1.0, -1.0,  1.0),
+        vec3f(-1.0, -1.0, -1.0), vec3f( 1.0, -1.0,  1.0), vec3f(-1.0, -1.0,  1.0),
+        vec3f( 1.0, -1.0, -1.0), vec3f( 1.0,  1.0, -1.0), vec3f( 1.0,  1.0,  1.0),
+        vec3f( 1.0, -1.0, -1.0), vec3f( 1.0,  1.0,  1.0), vec3f( 1.0, -1.0,  1.0),
+        vec3f(-1.0, -1.0, -1.0), vec3f(-1.0, -1.0,  1.0), vec3f(-1.0,  1.0,  1.0),
+        vec3f(-1.0, -1.0, -1.0), vec3f(-1.0,  1.0,  1.0), vec3f(-1.0,  1.0, -1.0)
+    );
+
+    // Proxy face normals (one per 2 triangles = 6 faces × 6 verts = 36)
+    var nrm = array<vec3f, 36>(
+        vec3f(0,0,1), vec3f(0,0,1), vec3f(0,0,1),
+        vec3f(0,0,1), vec3f(0,0,1), vec3f(0,0,1),
+        vec3f(0,0,-1), vec3f(0,0,-1), vec3f(0,0,-1),
+        vec3f(0,0,-1), vec3f(0,0,-1), vec3f(0,0,-1),
+        vec3f(0,1,0), vec3f(0,1,0), vec3f(0,1,0),
+        vec3f(0,1,0), vec3f(0,1,0), vec3f(0,1,0),
+        vec3f(0,-1,0), vec3f(0,-1,0), vec3f(0,-1,0),
+        vec3f(0,-1,0), vec3f(0,-1,0), vec3f(0,-1,0),
+        vec3f(1,0,0), vec3f(1,0,0), vec3f(1,0,0),
+        vec3f(1,0,0), vec3f(1,0,0), vec3f(1,0,0),
+        vec3f(-1,0,0), vec3f(-1,0,0), vec3f(-1,0,0),
+        vec3f(-1,0,0), vec3f(-1,0,0), vec3f(-1,0,0)
+    );
+
+    let obj = object_data.objects[instance_index];
+    let p = pos[vertex_index];
+    let n = nrm[vertex_index];
+
+    let world_pos    = obj.model * vec4f(p, 1.0);
+    let clip_pos     = globals.view_proj * world_pos;
+    // Transform normal by inverse-transpose (upper-left 3×3 of inv_model^T)
+    let world_normal = normalize((obj.inv_model * vec4f(n, 0.0)).xyz);
+
+    var out: VertexOutput;
+    out.position       = clip_pos;
+    out.world_pos      = world_pos.xyz;
+    out.world_normal   = world_normal;
+    out.color          = obj.color;
+    out.instance_index = instance_index;
+    return out;
+}
+
+struct GBufOutput {
+    @location(0) albedo:     vec4f,  // rgba16float: material color
+    @location(1) normal_mat: vec4f,  // rgba16float: oct-normal XY in RG, mat_id/255 in B
+}
+
+@fragment
+fn fs_main(in: VertexOutput) -> GBufOutput {
+    let obj    = object_data.objects[in.instance_index];
+    let mat_id = f32(in.instance_index) / 255.0;
+
+    // Oct-encode world normal, remap [-1,1] → [0,1] for storage
+    let oct = oct_encode(normalize(in.world_normal)) * 0.5 + vec2f(0.5);
+
+    var out: GBufOutput;
+    out.albedo     = vec4f(in.color.rgb, 1.0);
+    out.normal_mat = vec4f(oct.x, oct.y, mat_id, 0.0);
+    return out;
+}