diff options
| author | skal <pascal.massimino@gmail.com> | 2026-03-20 08:42:07 +0100 |
|---|---|---|
| committer | skal <pascal.massimino@gmail.com> | 2026-03-20 08:42:07 +0100 |
| commit | f74bcd843c631f82daefe543fca7741fb5bb71f4 (patch) | |
| tree | 0983e6c36fb0f9e2b152f76437ecf91ee1fd99cb /cnn_v3 | |
| parent | a160cc797afb4291d356bdc0cbcf0f110e3ef8a9 (diff) | |
feat(cnn_v3): G-buffer phase 1 + training infrastructure
G-buffer (Phase 1):
- Add NodeTypes GBUF_ALBEDO/DEPTH32/R8/RGBA32UINT to NodeRegistry
- GBufferEffect: MRT raster pass (albedo+normal_mat+depth) + pack compute
- Shaders: gbuf_raster.wgsl (MRT), gbuf_pack.wgsl (feature packing, 32B/px)
- Shadow/SDF passes stubbed (placeholder textures), CMake integration deferred
Training infrastructure (Phase 2):
- blender_export.py: headless EXR export with all G-buffer render passes
- pack_blender_sample.py: EXR → per-channel PNGs (oct-normals, 1/z depth)
- pack_photo_sample.py: photo → zero-filled G-buffer sample layout
handoff(Gemini): G-buffer phases 3-5 remain (U-Net shaders, CNNv3Effect, parity)
Diffstat (limited to 'cnn_v3')
| -rw-r--r-- | cnn_v3/shaders/gbuf_pack.wgsl | 123 | ||||
| -rw-r--r-- | cnn_v3/shaders/gbuf_raster.wgsl | 105 | ||||
| -rw-r--r-- | cnn_v3/src/gbuffer_effect.cc | 596 | ||||
| -rw-r--r-- | cnn_v3/src/gbuffer_effect.h | 79 | ||||
| -rw-r--r-- | cnn_v3/training/blender_export.py | 160 | ||||
| -rw-r--r-- | cnn_v3/training/pack_blender_sample.py | 268 | ||||
| -rw-r--r-- | cnn_v3/training/pack_photo_sample.py | 148 |
7 files changed, 1479 insertions, 0 deletions
diff --git a/cnn_v3/shaders/gbuf_pack.wgsl b/cnn_v3/shaders/gbuf_pack.wgsl new file mode 100644 index 0000000..71d8471 --- /dev/null +++ b/cnn_v3/shaders/gbuf_pack.wgsl @@ -0,0 +1,123 @@ +// G-buffer pack compute shader for CNN v3 +// Pass 4: Pack all G-buffer channels into two rgba32uint feature textures (32 bytes/pixel) +// Output feat_tex0 holds 8×f16 geometric channels; feat_tex1 holds 12×u8 context channels. + +struct GBufRes { + resolution: vec2f, +} + +@group(0) @binding(0) var<uniform> gbuf_res: GBufRes; +@group(0) @binding(1) var gbuf_albedo: texture_2d<f32>; +@group(0) @binding(2) var gbuf_normal_mat: texture_2d<f32>; +@group(0) @binding(3) var gbuf_depth: texture_depth_2d; +@group(0) @binding(4) var gbuf_shadow: texture_2d<f32>; +@group(0) @binding(5) var gbuf_transp: texture_2d<f32>; +@group(0) @binding(6) var prev_cnn: texture_2d<f32>; +@group(0) @binding(7) var feat_tex0: texture_storage_2d<rgba32uint, write>; +@group(0) @binding(8) var feat_tex1: texture_storage_2d<rgba32uint, write>; +@group(0) @binding(9) var bilinear_sampler: sampler; + +// Sample depth texture at integer coordinate, clamp to borders. +fn load_depth(coord: vec2i) -> f32 { + let dims = vec2i(textureDimensions(gbuf_depth)); + let c = clamp(coord, vec2i(0), dims - vec2i(1)); + return textureLoad(gbuf_depth, c, 0); +} + +// Box-filter albedo: average of 2×2 texels starting at top-left corner `tl`. +fn box2(tl: vec2i) -> vec3f { + let a = textureLoad(gbuf_albedo, tl + vec2i(0, 0), 0).rgb; + let b = textureLoad(gbuf_albedo, tl + vec2i(1, 0), 0).rgb; + let c = textureLoad(gbuf_albedo, tl + vec2i(0, 1), 0).rgb; + let d = textureLoad(gbuf_albedo, tl + vec2i(1, 1), 0).rgb; + return (a + b + c + d) * 0.25; +} + +// Box-filter albedo: average of 4×4 texels starting at top-left corner `tl`. +fn box4(tl: vec2i) -> vec3f { + var acc = vec3f(0.0); + for (var dy: i32 = 0; dy < 4; dy++) { + for (var dx: i32 = 0; dx < 4; dx++) { + acc += textureLoad(gbuf_albedo, tl + vec2i(dx, dy), 0).rgb; + } + } + return acc * (1.0 / 16.0); +} + +// Decode oct-normal from [0,1] storage → [-1,1] encoded xy → reconstruct z. +fn decode_oct_normal(rg: vec2f) -> vec3f { + let f = rg * 2.0 - vec2f(1.0); + var n = vec3f(f.x, f.y, 1.0 - abs(f.x) - abs(f.y)); + let t = max(-n.z, 0.0); + n.x += select(t, -t, n.x >= 0.0); + n.y += select(t, -t, n.y >= 0.0); + return normalize(n); +} + +@compute @workgroup_size(8, 8) +fn pack_features(@builtin(global_invocation_id) id: vec3u) { + let coord = vec2i(id.xy); + let dims = vec2i(textureDimensions(gbuf_albedo)); + if (coord.x >= dims.x || coord.y >= dims.y) { return; } + + let uv = (vec2f(coord) + vec2f(0.5)) / gbuf_res.resolution; + + // --- Geometric channels (high precision, f16 packed) --- + let albedo = textureLoad(gbuf_albedo, coord, 0).rgb; + let nm = textureLoad(gbuf_normal_mat, coord, 0); + let depth_raw = load_depth(coord); + + // Finite-difference depth gradient (central difference, clamped coords) + let dzdx = (load_depth(coord + vec2i(1, 0)) - load_depth(coord - vec2i(1, 0))) * 0.5; + let dzdy = (load_depth(coord + vec2i(0, 1)) - load_depth(coord - vec2i(0, 1))) * 0.5; + + // Normal: stored as oct-encoded [0,1] in RG; extract just the encoded xy for feat_tex0 + let normal_enc = nm.rg; // already in [0,1] — decode to get the xy for CNN input + let n3 = decode_oct_normal(normal_enc); + // Store oct-encoded in [-1,1] remapped back to what CNN expects (the [-1,1] oct xy) + let oct_xy = normal_enc * 2.0 - vec2f(1.0); // remap [0,1] → [-1,1] + + // Texture 0: 4 u32, each = pack2x16float of two f16 values + // [0] albedo.r | albedo.g + // [1] albedo.b | normal.x (oct, [-1,1]) + // [2] normal.y | depth + // [3] dzdx | dzdy + let t0 = vec4u( + pack2x16float(albedo.rg), + pack2x16float(vec2f(albedo.b, oct_xy.x)), + pack2x16float(vec2f(oct_xy.y, depth_raw)), + pack2x16float(vec2f(dzdx, dzdy)) + ); + textureStore(feat_tex0, coord, t0); + + // --- Context channels (low precision, u8 packed) --- + let mat_id_u8 = nm.b; // mat_id already in [0,1] + let shadow = textureLoad(gbuf_shadow, coord, 0).r; + let transp = textureLoad(gbuf_transp, coord, 0).r; + let prev = textureSampleLevel(prev_cnn, bilinear_sampler, uv, 0.0).rgb; + + // MIP 1: 2×2 box filter (half resolution context) + // Use top-left aligned 2×2 block at half-res position + let tl1 = coord * 2; // this pixel's 2×2 region in full-res (mip1 is at half-res) + // Actually we want to sample the neighborhood around this pixel for downsampled context. + // mip1: sample a 2×2 box centered on the pixel in full-res coordinates + let tl1c = max(coord - vec2i(0), vec2i(0)); + let mip1 = box2(tl1c); + + // mip2: sample a 4×4 box + let tl2c = max(coord - vec2i(1), vec2i(0)); + let mip2 = box4(tl2c); + + // Texture 1: 4 u32, each = pack4x8unorm of four u8 values + // [0] mat_id | prev.r | prev.g | prev.b + // [1] mip1.r | mip1.g | mip1.b | mip2.r + // [2] mip2.g | mip2.b | shadow | transp + // [3] spare (0) + let t1 = vec4u( + pack4x8unorm(vec4f(mat_id_u8, prev.r, prev.g, prev.b)), + pack4x8unorm(vec4f(mip1.r, mip1.g, mip1.b, mip2.r)), + pack4x8unorm(vec4f(mip2.g, mip2.b, shadow, transp)), + 0u + ); + textureStore(feat_tex1, coord, t1); +} diff --git a/cnn_v3/shaders/gbuf_raster.wgsl b/cnn_v3/shaders/gbuf_raster.wgsl new file mode 100644 index 0000000..c762db2 --- /dev/null +++ b/cnn_v3/shaders/gbuf_raster.wgsl @@ -0,0 +1,105 @@ +// G-buffer rasterization shader for CNN v3 +// Pass 1: Proxy geometry → MRT (albedo rgba16float, normal_mat rgba16float, depth32) +// Uses GlobalUniforms, ObjectData, ObjectsBuffer from common_uniforms. + +#include "common_uniforms" + +@group(0) @binding(0) var<uniform> globals: GlobalUniforms; +@group(0) @binding(1) var<storage, read> object_data: ObjectsBuffer; + +struct VertexOutput { + @builtin(position) position: vec4f, + @location(0) world_pos: vec3f, + @location(1) world_normal: vec3f, + @location(2) color: vec4f, + @location(3) @interpolate(flat) instance_index: u32, +} + +// Octahedral encoding: maps unit normal to [-1,1]^2 +fn oct_encode(n: vec3f) -> vec2f { + let inv_l1 = 1.0 / (abs(n.x) + abs(n.y) + abs(n.z)); + var p = n.xy * inv_l1; + // Fold lower hemisphere + if (n.z < 0.0) { + let s = vec2f( + select(-1.0, 1.0, p.x >= 0.0), + select(-1.0, 1.0, p.y >= 0.0) + ); + p = (1.0 - abs(p.yx)) * s; + } + return p; // in [-1, 1] +} + +@vertex +fn vs_main( + @builtin(vertex_index) vertex_index: u32, + @builtin(instance_index) instance_index: u32 +) -> VertexOutput { + // Proxy box vertices (same as renderer_3d.wgsl) + var pos = array<vec3f, 36>( + vec3f(-1.0, -1.0, 1.0), vec3f( 1.0, -1.0, 1.0), vec3f( 1.0, 1.0, 1.0), + vec3f(-1.0, -1.0, 1.0), vec3f( 1.0, 1.0, 1.0), vec3f(-1.0, 1.0, 1.0), + vec3f(-1.0, -1.0, -1.0), vec3f(-1.0, 1.0, -1.0), vec3f( 1.0, 1.0, -1.0), + vec3f(-1.0, -1.0, -1.0), vec3f( 1.0, 1.0, -1.0), vec3f( 1.0, -1.0, -1.0), + vec3f(-1.0, 1.0, -1.0), vec3f(-1.0, 1.0, 1.0), vec3f( 1.0, 1.0, 1.0), + vec3f(-1.0, 1.0, -1.0), vec3f( 1.0, 1.0, 1.0), vec3f( 1.0, 1.0, -1.0), + vec3f(-1.0, -1.0, -1.0), vec3f( 1.0, -1.0, -1.0), vec3f( 1.0, -1.0, 1.0), + vec3f(-1.0, -1.0, -1.0), vec3f( 1.0, -1.0, 1.0), vec3f(-1.0, -1.0, 1.0), + vec3f( 1.0, -1.0, -1.0), vec3f( 1.0, 1.0, -1.0), vec3f( 1.0, 1.0, 1.0), + vec3f( 1.0, -1.0, -1.0), vec3f( 1.0, 1.0, 1.0), vec3f( 1.0, -1.0, 1.0), + vec3f(-1.0, -1.0, -1.0), vec3f(-1.0, -1.0, 1.0), vec3f(-1.0, 1.0, 1.0), + vec3f(-1.0, -1.0, -1.0), vec3f(-1.0, 1.0, 1.0), vec3f(-1.0, 1.0, -1.0) + ); + + // Proxy face normals (one per 2 triangles = 6 faces × 6 verts = 36) + var nrm = array<vec3f, 36>( + vec3f(0,0,1), vec3f(0,0,1), vec3f(0,0,1), + vec3f(0,0,1), vec3f(0,0,1), vec3f(0,0,1), + vec3f(0,0,-1), vec3f(0,0,-1), vec3f(0,0,-1), + vec3f(0,0,-1), vec3f(0,0,-1), vec3f(0,0,-1), + vec3f(0,1,0), vec3f(0,1,0), vec3f(0,1,0), + vec3f(0,1,0), vec3f(0,1,0), vec3f(0,1,0), + vec3f(0,-1,0), vec3f(0,-1,0), vec3f(0,-1,0), + vec3f(0,-1,0), vec3f(0,-1,0), vec3f(0,-1,0), + vec3f(1,0,0), vec3f(1,0,0), vec3f(1,0,0), + vec3f(1,0,0), vec3f(1,0,0), vec3f(1,0,0), + vec3f(-1,0,0), vec3f(-1,0,0), vec3f(-1,0,0), + vec3f(-1,0,0), vec3f(-1,0,0), vec3f(-1,0,0) + ); + + let obj = object_data.objects[instance_index]; + let p = pos[vertex_index]; + let n = nrm[vertex_index]; + + let world_pos = obj.model * vec4f(p, 1.0); + let clip_pos = globals.view_proj * world_pos; + // Transform normal by inverse-transpose (upper-left 3×3 of inv_model^T) + let world_normal = normalize((obj.inv_model * vec4f(n, 0.0)).xyz); + + var out: VertexOutput; + out.position = clip_pos; + out.world_pos = world_pos.xyz; + out.world_normal = world_normal; + out.color = obj.color; + out.instance_index = instance_index; + return out; +} + +struct GBufOutput { + @location(0) albedo: vec4f, // rgba16float: material color + @location(1) normal_mat: vec4f, // rgba16float: oct-normal XY in RG, mat_id/255 in B +} + +@fragment +fn fs_main(in: VertexOutput) -> GBufOutput { + let obj = object_data.objects[in.instance_index]; + let mat_id = f32(in.instance_index) / 255.0; + + // Oct-encode world normal, remap [-1,1] → [0,1] for storage + let oct = oct_encode(normalize(in.world_normal)) * 0.5 + vec2f(0.5); + + var out: GBufOutput; + out.albedo = vec4f(in.color.rgb, 1.0); + out.normal_mat = vec4f(oct.x, oct.y, mat_id, 0.0); + return out; +} diff --git a/cnn_v3/src/gbuffer_effect.cc b/cnn_v3/src/gbuffer_effect.cc new file mode 100644 index 0000000..fb0146e --- /dev/null +++ b/cnn_v3/src/gbuffer_effect.cc @@ -0,0 +1,596 @@ +// GBufferEffect implementation +// Rasterizes proxy geometry to MRT G-buffer, then packs into CNN v3 feature textures. + +#include "gbuffer_effect.h" +#include "3d/object.h" +#include "gpu/gpu.h" +#include "util/fatal_error.h" +#include "util/mini_math.h" +#include <cstring> +#include <vector> + +// Shader source (loaded from asset at runtime — declared extern by the build system) +// For standalone use outside the asset system, the caller must ensure the WGSL +// source strings are available. They are declared here as weak-linkable externs. +extern const char* gbuf_raster_wgsl; +extern const char* gbuf_pack_wgsl; + +// Maximum number of objects the G-buffer supports per frame. +static const int kGBufMaxObjects = 256; + +// ObjectData struct that mirrors the WGSL layout in gbuf_raster.wgsl and renderer.h +struct GBufObjectData { + mat4 model; + mat4 inv_model; + vec4 color; + vec4 params; // x = object type, y = plane_distance +}; +static_assert(sizeof(GBufObjectData) == sizeof(float) * 40, + "GBufObjectData must be 160 bytes"); + +// GlobalUniforms struct mirroring renderer.h +struct GBufGlobalUniforms { + mat4 view_proj; + mat4 inv_view_proj; + vec4 camera_pos_time; + vec4 params; // x = num_objects + vec2 resolution; + vec2 padding; +}; +static_assert(sizeof(GBufGlobalUniforms) == sizeof(float) * 44, + "GBufGlobalUniforms must be 176 bytes"); + +// Helper: create a 1×1 placeholder texture of a given format cleared to `value`. +static WGPUTexture create_placeholder_tex(WGPUDevice device, + WGPUTextureFormat format, + float value) { + WGPUTextureDescriptor desc = {}; + desc.usage = (WGPUTextureUsage)(WGPUTextureUsage_TextureBinding | + WGPUTextureUsage_CopyDst); + desc.dimension = WGPUTextureDimension_2D; + desc.size = {1, 1, 1}; + desc.format = format; + desc.mipLevelCount = 1; + desc.sampleCount = 1; + WGPUTexture tex = wgpuDeviceCreateTexture(device, &desc); + return tex; +} + +// Helper: write a single RGBA float pixel to a texture via queue. +static void write_placeholder_pixel(WGPUQueue queue, WGPUTexture tex, + float r, float g, float b, float a) { + const float data[4] = {r, g, b, a}; + WGPUTexelCopyTextureInfo dst = {}; + dst.texture = tex; + dst.mipLevel = 0; + dst.origin = {0, 0, 0}; + dst.aspect = WGPUTextureAspect_All; + + WGPUTexelCopyBufferLayout layout = {}; + layout.offset = 0; + layout.bytesPerRow = 16; // 4 × sizeof(float) + layout.rowsPerImage = 1; + + const WGPUExtent3D extent = {1, 1, 1}; + wgpuQueueWriteTexture(queue, &dst, data, sizeof(data), &layout, &extent); +} + +// Create bilinear sampler. +static WGPUSampler create_bilinear_sampler(WGPUDevice device) { + WGPUSamplerDescriptor desc = {}; + desc.addressModeU = WGPUAddressMode_ClampToEdge; + desc.addressModeV = WGPUAddressMode_ClampToEdge; + desc.magFilter = WGPUFilterMode_Linear; + desc.minFilter = WGPUFilterMode_Linear; + desc.mipmapFilter = WGPUMipmapFilterMode_Linear; + desc.maxAnisotropy = 1; + return wgpuDeviceCreateSampler(device, &desc); +} + +// ---- GBufferEffect ---- + +GBufferEffect::GBufferEffect(const GpuContext& ctx, + const std::vector<std::string>& inputs, + const std::vector<std::string>& outputs, + float start_time, float end_time) + : Effect(ctx, inputs, outputs, start_time, end_time) { + HEADLESS_RETURN_IF_NULL(ctx_.device); + + // Derive internal node name prefix from the first output name. + const std::string& prefix = outputs.empty() ? "gbuf" : outputs[0]; + node_albedo_ = prefix + "_albedo"; + node_normal_mat_ = prefix + "_normal_mat"; + node_depth_ = prefix + "_depth"; + node_shadow_ = prefix + "_shadow"; + node_transp_ = prefix + "_transp"; + node_feat0_ = outputs.size() > 0 ? outputs[0] : prefix + "_feat0"; + node_feat1_ = outputs.size() > 1 ? outputs[1] : prefix + "_feat1"; + + // Allocate GPU buffers for scene data. + global_uniforms_buf_ = + gpu_create_buffer(ctx_.device, sizeof(GBufGlobalUniforms), + WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst); + + ensure_objects_buffer(kGBufMaxObjects); + + // Resolution uniform for pack shader. + pack_res_uniform_.init(ctx_.device); + + // Placeholder shadow (1.0 = fully lit) and transp (0.0 = opaque) textures. + shadow_placeholder_tex_.set( + create_placeholder_tex(ctx_.device, WGPUTextureFormat_RGBA32Float, 1.0f)); + write_placeholder_pixel(ctx_.queue, + shadow_placeholder_tex_.get(), 1.0f, 0.0f, 0.0f, 1.0f); + + transp_placeholder_tex_.set( + create_placeholder_tex(ctx_.device, WGPUTextureFormat_RGBA32Float, 0.0f)); + write_placeholder_pixel(ctx_.queue, + transp_placeholder_tex_.get(), 0.0f, 0.0f, 0.0f, 1.0f); + + WGPUTextureViewDescriptor vd = {}; + vd.format = WGPUTextureFormat_RGBA32Float; + vd.dimension = WGPUTextureViewDimension_2D; + vd.baseMipLevel = 0; + vd.mipLevelCount = 1; + vd.baseArrayLayer = 0; + vd.arrayLayerCount = 1; + vd.aspect = WGPUTextureAspect_All; + + shadow_placeholder_view_.set( + wgpuTextureCreateView(shadow_placeholder_tex_.get(), &vd)); + transp_placeholder_view_.set( + wgpuTextureCreateView(transp_placeholder_tex_.get(), &vd)); + + create_raster_pipeline(); + create_pack_pipeline(); +} + +void GBufferEffect::declare_nodes(NodeRegistry& registry) { + registry.declare_node(node_albedo_, NodeType::GBUF_ALBEDO, -1, -1); + registry.declare_node(node_normal_mat_, NodeType::GBUF_ALBEDO, -1, -1); + registry.declare_node(node_depth_, NodeType::GBUF_DEPTH32, -1, -1); + registry.declare_node(node_shadow_, NodeType::GBUF_R8, -1, -1); + registry.declare_node(node_transp_, NodeType::GBUF_R8, -1, -1); + // feat_tex0 / feat_tex1 are the declared output_nodes_ — they get registered + // by the sequence infrastructure; declare them here as well if not already. + if (!registry.has_node(node_feat0_)) { + registry.declare_node(node_feat0_, NodeType::GBUF_RGBA32UINT, -1, -1); + } + if (!registry.has_node(node_feat1_)) { + registry.declare_node(node_feat1_, NodeType::GBUF_RGBA32UINT, -1, -1); + } +} + +void GBufferEffect::set_scene(const Scene* scene, const Camera* camera) { + scene_ = scene; + camera_ = camera; +} + +void GBufferEffect::render(WGPUCommandEncoder encoder, + const UniformsSequenceParams& params, + NodeRegistry& nodes) { + if (!scene_ || !camera_) { + return; + } + + upload_scene_data(*scene_, *camera_, params.time); + + // Update resolution uniform for pack shader. + GBufResUniforms res_uni; + res_uni.resolution = params.resolution; + res_uni._pad0 = 0.0f; + res_uni._pad1 = 0.0f; + pack_res_uniform_.update(ctx_.queue, res_uni); + + WGPUTextureView albedo_view = nodes.get_view(node_albedo_); + WGPUTextureView normal_mat_view = nodes.get_view(node_normal_mat_); + WGPUTextureView depth_view = nodes.get_view(node_depth_); + WGPUTextureView feat0_view = nodes.get_view(node_feat0_); + WGPUTextureView feat1_view = nodes.get_view(node_feat1_); + + // prev_cnn: first input node if available, else dummy. + WGPUTextureView prev_view = nullptr; + if (!input_nodes_.empty()) { + prev_view = nodes.get_view(input_nodes_[0]); + } + if (!prev_view) { + prev_view = dummy_texture_view_.get(); + } + + // --- Pass 1: MRT rasterization --- + update_raster_bind_group(nodes); + + WGPURenderPassColorAttachment color_attachments[2] = {}; + // Attachment 0: albedo + color_attachments[0].view = albedo_view; + color_attachments[0].loadOp = WGPULoadOp_Clear; + color_attachments[0].storeOp = WGPUStoreOp_Store; + color_attachments[0].clearValue = {0.0f, 0.0f, 0.0f, 1.0f}; + color_attachments[0].depthSlice = WGPU_DEPTH_SLICE_UNDEFINED; + // Attachment 1: normal_mat + color_attachments[1].view = normal_mat_view; + color_attachments[1].loadOp = WGPULoadOp_Clear; + color_attachments[1].storeOp = WGPUStoreOp_Store; + color_attachments[1].clearValue = {0.5f, 0.5f, 0.0f, 0.0f}; + color_attachments[1].depthSlice = WGPU_DEPTH_SLICE_UNDEFINED; + + WGPURenderPassDepthStencilAttachment depth_attachment = {}; + depth_attachment.view = depth_view; + depth_attachment.depthLoadOp = WGPULoadOp_Clear; + depth_attachment.depthStoreOp = WGPUStoreOp_Store; + depth_attachment.depthClearValue = 1.0f; + depth_attachment.depthReadOnly = false; + + WGPURenderPassDescriptor raster_pass_desc = {}; + raster_pass_desc.colorAttachmentCount = 2; + raster_pass_desc.colorAttachments = color_attachments; + raster_pass_desc.depthStencilAttachment = &depth_attachment; + + const int num_objects = + (int)(scene_->objects.size() < (size_t)kGBufMaxObjects + ? scene_->objects.size() + : (size_t)kGBufMaxObjects); + + if (num_objects > 0 && raster_pipeline_.get() != nullptr) { + WGPURenderPassEncoder raster_pass = + wgpuCommandEncoderBeginRenderPass(encoder, &raster_pass_desc); + wgpuRenderPassEncoderSetPipeline(raster_pass, raster_pipeline_.get()); + wgpuRenderPassEncoderSetBindGroup(raster_pass, 0, + raster_bind_group_.get(), 0, nullptr); + // Draw 36 vertices (proxy box) × num_objects instances. + wgpuRenderPassEncoderDraw(raster_pass, 36, (uint32_t)num_objects, 0, 0); + wgpuRenderPassEncoderEnd(raster_pass); + wgpuRenderPassEncoderRelease(raster_pass); + } else { + // Clear passes with no draws still need to be submitted. + WGPURenderPassEncoder raster_pass = + wgpuCommandEncoderBeginRenderPass(encoder, &raster_pass_desc); + wgpuRenderPassEncoderEnd(raster_pass); + wgpuRenderPassEncoderRelease(raster_pass); + } + + // Pass 2: SDF raymarching — TODO (placeholder: shadow=1, transp=0 already set) + // Pass 3: Lighting/shadow — TODO + + // --- Pass 4: Pack compute --- + // Rebuild pack bind group with current node views. + // Construct a temporary bilinear sampler for this pass. + WGPUSampler bilinear = create_bilinear_sampler(ctx_.device); + + // Get texture views from nodes. + // shadow / transp are GBUF_R8 nodes; use their views. + WGPUTextureView shadow_view = nodes.get_view(node_shadow_); + WGPUTextureView transp_view = nodes.get_view(node_transp_); + + // Build pack bind group (bindings 0-9). + WGPUBindGroupEntry pack_entries[10] = {}; + pack_entries[0].binding = 0; + pack_entries[0].buffer = pack_res_uniform_.get().buffer; + pack_entries[0].size = sizeof(GBufResUniforms); + + pack_entries[1].binding = 1; + pack_entries[1].textureView = albedo_view; + + pack_entries[2].binding = 2; + pack_entries[2].textureView = normal_mat_view; + + pack_entries[3].binding = 3; + pack_entries[3].textureView = depth_view; + + pack_entries[4].binding = 4; + pack_entries[4].textureView = shadow_view; + + pack_entries[5].binding = 5; + pack_entries[5].textureView = transp_view; + + pack_entries[6].binding = 6; + pack_entries[6].textureView = prev_view; + + pack_entries[7].binding = 7; + pack_entries[7].textureView = feat0_view; + + pack_entries[8].binding = 8; + pack_entries[8].textureView = feat1_view; + + pack_entries[9].binding = 9; + pack_entries[9].sampler = bilinear; + + WGPUBindGroupLayout pack_bgl = + wgpuComputePipelineGetBindGroupLayout(pack_pipeline_.get(), 0); + + WGPUBindGroupDescriptor pack_bg_desc = {}; + pack_bg_desc.layout = pack_bgl; + pack_bg_desc.entryCount = 10; + pack_bg_desc.entries = pack_entries; + + WGPUBindGroup pack_bg = wgpuDeviceCreateBindGroup(ctx_.device, &pack_bg_desc); + wgpuBindGroupLayoutRelease(pack_bgl); + + WGPUComputePassDescriptor compute_pass_desc = {}; + WGPUComputePassEncoder compute_pass = + wgpuCommandEncoderBeginComputePass(encoder, &compute_pass_desc); + wgpuComputePassEncoderSetPipeline(compute_pass, pack_pipeline_.get()); + wgpuComputePassEncoderSetBindGroup(compute_pass, 0, pack_bg, 0, nullptr); + + const uint32_t wg_x = ((uint32_t)width_ + 7u) / 8u; + const uint32_t wg_y = ((uint32_t)height_ + 7u) / 8u; + wgpuComputePassEncoderDispatchWorkgroups(compute_pass, wg_x, wg_y, 1); + wgpuComputePassEncoderEnd(compute_pass); + wgpuComputePassEncoderRelease(compute_pass); + + wgpuBindGroupRelease(pack_bg); + wgpuSamplerRelease(bilinear); +} + +// ---- private helpers ---- + +void GBufferEffect::ensure_objects_buffer(int num_objects) { + if (num_objects <= objects_buf_capacity_) { + return; + } + if (objects_buf_.buffer) { + wgpuBufferRelease(objects_buf_.buffer); + } + objects_buf_ = gpu_create_buffer( + ctx_.device, (size_t)num_objects * sizeof(GBufObjectData), + WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst); + objects_buf_capacity_ = num_objects; +} + +void GBufferEffect::upload_scene_data(const Scene& scene, + const Camera& camera, float time) { + const int num_objects = + (int)(scene.objects.size() < (size_t)kGBufMaxObjects + ? scene.objects.size() + : (size_t)kGBufMaxObjects); + + const mat4 view = camera.get_view_matrix(); + const mat4 proj = camera.get_projection_matrix(); + const mat4 vp = proj * view; + + GBufGlobalUniforms gu = {}; + gu.view_proj = vp; + gu.inv_view_proj = vp.inverse(); + gu.camera_pos_time = vec4(camera.position.x, camera.position.y, + camera.position.z, time); + gu.params = vec4((float)num_objects, 0.0f, 0.0f, 0.0f); + gu.resolution = vec2((float)width_, (float)height_); + gu.padding = vec2(0.0f, 0.0f); + + wgpuQueueWriteBuffer(ctx_.queue, global_uniforms_buf_.buffer, 0, + &gu, sizeof(GBufGlobalUniforms)); + + // Upload object data. + if (num_objects > 0) { + ensure_objects_buffer(num_objects); + std::vector<GBufObjectData> obj_data; + obj_data.reserve((size_t)num_objects); + for (int i = 0; i < num_objects; ++i) { + const Object3D& obj = scene.objects[(size_t)i]; + const mat4 m = obj.get_model_matrix(); + GBufObjectData d; + d.model = m; + d.inv_model = m.inverse(); + d.color = obj.color; + d.params = vec4(0.0f, 0.0f, 0.0f, 0.0f); + obj_data.push_back(d); + } + wgpuQueueWriteBuffer(ctx_.queue, objects_buf_.buffer, 0, + obj_data.data(), + (size_t)num_objects * sizeof(GBufObjectData)); + } +} + +void GBufferEffect::create_raster_pipeline() { + HEADLESS_RETURN_IF_NULL(ctx_.device); + + // Load shader source. + const char* src = gbuf_raster_wgsl; + if (!src) { + return; // Asset not loaded yet; pipeline creation deferred. + } + + WGPUShaderSourceWGSL wgsl_src = {}; + wgsl_src.chain.sType = WGPUSType_ShaderSourceWGSL; + wgsl_src.code = str_view(src); + + WGPUShaderModuleDescriptor shader_desc = {}; + shader_desc.nextInChain = &wgsl_src.chain; + WGPUShaderModule shader = wgpuDeviceCreateShaderModule(ctx_.device, &shader_desc); + + // Bind group layout: B0 = GlobalUniforms, B1 = ObjectsBuffer (storage read) + WGPUBindGroupLayoutEntry bgl_entries[2] = {}; + bgl_entries[0].binding = 0; + bgl_entries[0].visibility = + (WGPUShaderStage)(WGPUShaderStage_Vertex | WGPUShaderStage_Fragment); + bgl_entries[0].buffer.type = WGPUBufferBindingType_Uniform; + bgl_entries[0].buffer.minBindingSize = sizeof(GBufGlobalUniforms); + + bgl_entries[1].binding = 1; + bgl_entries[1].visibility = + (WGPUShaderStage)(WGPUShaderStage_Vertex | WGPUShaderStage_Fragment); + bgl_entries[1].buffer.type = WGPUBufferBindingType_ReadOnlyStorage; + bgl_entries[1].buffer.minBindingSize = sizeof(GBufObjectData); + + WGPUBindGroupLayoutDescriptor bgl_desc = {}; + bgl_desc.entryCount = 2; + bgl_desc.entries = bgl_entries; + WGPUBindGroupLayout bgl = wgpuDeviceCreateBindGroupLayout(ctx_.device, &bgl_desc); + + WGPUPipelineLayoutDescriptor pl_desc = {}; + pl_desc.bindGroupLayoutCount = 1; + pl_desc.bindGroupLayouts = &bgl; + WGPUPipelineLayout pl = wgpuDeviceCreatePipelineLayout(ctx_.device, &pl_desc); + + // Two color targets: albedo (rgba16float) and normal_mat (rgba16float) + WGPUColorTargetState color_targets[2] = {}; + color_targets[0].format = WGPUTextureFormat_RGBA16Float; + color_targets[0].writeMask = WGPUColorWriteMask_All; + color_targets[1].format = WGPUTextureFormat_RGBA16Float; + color_targets[1].writeMask = WGPUColorWriteMask_All; + + WGPUFragmentState frag = {}; + frag.module = shader; + frag.entryPoint = str_view("fs_main"); + frag.targetCount = 2; + frag.targets = color_targets; + + WGPUDepthStencilState ds = {}; + ds.format = WGPUTextureFormat_Depth32Float; + ds.depthWriteEnabled = WGPUOptionalBool_True; + ds.depthCompare = WGPUCompareFunction_Less; + + WGPURenderPipelineDescriptor pipe_desc = {}; + pipe_desc.layout = pl; + pipe_desc.vertex.module = shader; + pipe_desc.vertex.entryPoint = str_view("vs_main"); + pipe_desc.fragment = &frag; + pipe_desc.depthStencil = &ds; + pipe_desc.primitive.topology = WGPUPrimitiveTopology_TriangleList; + pipe_desc.primitive.cullMode = WGPUCullMode_Back; + pipe_desc.multisample.count = 1; + pipe_desc.multisample.mask = 0xFFFFFFFF; + + raster_pipeline_.set(wgpuDeviceCreateRenderPipeline(ctx_.device, &pipe_desc)); + + wgpuPipelineLayoutRelease(pl); + wgpuBindGroupLayoutRelease(bgl); + wgpuShaderModuleRelease(shader); +} + +void GBufferEffect::create_pack_pipeline() { + HEADLESS_RETURN_IF_NULL(ctx_.device); + + const char* src = gbuf_pack_wgsl; + if (!src) { + return; + } + + WGPUShaderSourceWGSL wgsl_src = {}; + wgsl_src.chain.sType = WGPUSType_ShaderSourceWGSL; + wgsl_src.code = str_view(src); + + WGPUShaderModuleDescriptor shader_desc = {}; + shader_desc.nextInChain = &wgsl_src.chain; + WGPUShaderModule shader = wgpuDeviceCreateShaderModule(ctx_.device, &shader_desc); + + // Build explicit bind group layout for bindings 0-9. + WGPUBindGroupLayoutEntry bgl_entries[10] = {}; + + // B0: resolution uniform + bgl_entries[0].binding = 0; + bgl_entries[0].visibility = WGPUShaderStage_Compute; + bgl_entries[0].buffer.type = WGPUBufferBindingType_Uniform; + bgl_entries[0].buffer.minBindingSize = sizeof(GBufResUniforms); + + // B1: gbuf_albedo (texture_2d<f32>) + bgl_entries[1].binding = 1; + bgl_entries[1].visibility = WGPUShaderStage_Compute; + bgl_entries[1].texture.sampleType = WGPUTextureSampleType_Float; + bgl_entries[1].texture.viewDimension = WGPUTextureViewDimension_2D; + + // B2: gbuf_normal_mat (texture_2d<f32>) + bgl_entries[2].binding = 2; + bgl_entries[2].visibility = WGPUShaderStage_Compute; + bgl_entries[2].texture.sampleType = WGPUTextureSampleType_Float; + bgl_entries[2].texture.viewDimension = WGPUTextureViewDimension_2D; + + // B3: gbuf_depth (texture_depth_2d) + bgl_entries[3].binding = 3; + bgl_entries[3].visibility = WGPUShaderStage_Compute; + bgl_entries[3].texture.sampleType = WGPUTextureSampleType_Depth; + bgl_entries[3].texture.viewDimension = WGPUTextureViewDimension_2D; + + // B4: gbuf_shadow (texture_2d<f32>) + bgl_entries[4].binding = 4; + bgl_entries[4].visibility = WGPUShaderStage_Compute; + bgl_entries[4].texture.sampleType = WGPUTextureSampleType_Float; + bgl_entries[4].texture.viewDimension = WGPUTextureViewDimension_2D; + + // B5: gbuf_transp (texture_2d<f32>) + bgl_entries[5].binding = 5; + bgl_entries[5].visibility = WGPUShaderStage_Compute; + bgl_entries[5].texture.sampleType = WGPUTextureSampleType_Float; + bgl_entries[5].texture.viewDimension = WGPUTextureViewDimension_2D; + + // B6: prev_cnn (texture_2d<f32>) + bgl_entries[6].binding = 6; + bgl_entries[6].visibility = WGPUShaderStage_Compute; + bgl_entries[6].texture.sampleType = WGPUTextureSampleType_Float; + bgl_entries[6].texture.viewDimension = WGPUTextureViewDimension_2D; + + // B7: feat_tex0 (storage texture, write, rgba32uint) + bgl_entries[7].binding = 7; + bgl_entries[7].visibility = WGPUShaderStage_Compute; + bgl_entries[7].storageTexture.access = WGPUStorageTextureAccess_WriteOnly; + bgl_entries[7].storageTexture.format = WGPUTextureFormat_RGBA32Uint; + bgl_entries[7].storageTexture.viewDimension = WGPUTextureViewDimension_2D; + + // B8: feat_tex1 (storage texture, write, rgba32uint) + bgl_entries[8].binding = 8; + bgl_entries[8].visibility = WGPUShaderStage_Compute; + bgl_entries[8].storageTexture.access = WGPUStorageTextureAccess_WriteOnly; + bgl_entries[8].storageTexture.format = WGPUTextureFormat_RGBA32Uint; + bgl_entries[8].storageTexture.viewDimension = WGPUTextureViewDimension_2D; + + // B9: bilinear sampler + bgl_entries[9].binding = 9; + bgl_entries[9].visibility = WGPUShaderStage_Compute; + bgl_entries[9].sampler.type = WGPUSamplerBindingType_Filtering; + + WGPUBindGroupLayoutDescriptor bgl_desc = {}; + bgl_desc.entryCount = 10; + bgl_desc.entries = bgl_entries; + WGPUBindGroupLayout bgl = wgpuDeviceCreateBindGroupLayout(ctx_.device, &bgl_desc); + + WGPUPipelineLayoutDescriptor pl_desc = {}; + pl_desc.bindGroupLayoutCount = 1; + pl_desc.bindGroupLayouts = &bgl; + WGPUPipelineLayout pl = wgpuDeviceCreatePipelineLayout(ctx_.device, &pl_desc); + + WGPUComputePipelineDescriptor pipe_desc = {}; + pipe_desc.layout = pl; + pipe_desc.compute.module = shader; + pipe_desc.compute.entryPoint = str_view("pack_features"); + + pack_pipeline_.set(wgpuDeviceCreateComputePipeline(ctx_.device, &pipe_desc)); + + wgpuPipelineLayoutRelease(pl); + wgpuBindGroupLayoutRelease(bgl); + wgpuShaderModuleRelease(shader); +} + +void GBufferEffect::update_raster_bind_group(NodeRegistry& nodes) { + (void)nodes; + // Rebuild each frame since textures may resize. + raster_bind_group_.replace(nullptr); + + if (raster_pipeline_.get() == nullptr) { + return; + } + + WGPUBindGroupEntry entries[2] = {}; + entries[0].binding = 0; + entries[0].buffer = global_uniforms_buf_.buffer; + entries[0].size = sizeof(GBufGlobalUniforms); + + entries[1].binding = 1; + entries[1].buffer = objects_buf_.buffer; + entries[1].size = (size_t)objects_buf_capacity_ * sizeof(GBufObjectData); + + WGPUBindGroupLayout bgl = + wgpuRenderPipelineGetBindGroupLayout(raster_pipeline_.get(), 0); + + WGPUBindGroupDescriptor bg_desc = {}; + bg_desc.layout = bgl; + bg_desc.entryCount = 2; + bg_desc.entries = entries; + + raster_bind_group_.replace(wgpuDeviceCreateBindGroup(ctx_.device, &bg_desc)); + wgpuBindGroupLayoutRelease(bgl); +} + +void GBufferEffect::update_pack_bind_group(NodeRegistry& nodes) { + (void)nodes; + // Pack bind group is rebuilt inline in render() to use current node views. +} diff --git a/cnn_v3/src/gbuffer_effect.h b/cnn_v3/src/gbuffer_effect.h new file mode 100644 index 0000000..42fb0ec --- /dev/null +++ b/cnn_v3/src/gbuffer_effect.h @@ -0,0 +1,79 @@ +// GBufferEffect: Multi-pass G-buffer rendering for CNN v3 input +// Outputs: gbuf_feat0, gbuf_feat1 (packed rgba32uint feature textures, 32 bytes/pixel) + +#pragma once + +#include "3d/camera.h" +#include "3d/scene.h" +#include "gpu/effect.h" +#include "gpu/sequence.h" +#include "gpu/uniform_helper.h" +#include "gpu/wgpu_resource.h" +#include "util/mini_math.h" + +// Uniform for the pack compute shader +struct GBufResUniforms { + vec2 resolution; + float _pad0; + float _pad1; +}; +static_assert(sizeof(GBufResUniforms) == 16, + "GBufResUniforms must be 16 bytes"); + +class GBufferEffect : public Effect { + public: + GBufferEffect(const GpuContext& ctx, const std::vector<std::string>& inputs, + const std::vector<std::string>& outputs, float start_time, + float end_time); + + void declare_nodes(NodeRegistry& registry) override; + + void render(WGPUCommandEncoder encoder, const UniformsSequenceParams& params, + NodeRegistry& nodes) override; + + void set_scene(const Scene* scene, const Camera* camera); + + private: + // Internal G-buffer node names + std::string node_albedo_; + std::string node_normal_mat_; + std::string node_depth_; + std::string node_shadow_; + std::string node_transp_; + std::string node_feat0_; + std::string node_feat1_; + + const Scene* scene_ = nullptr; + const Camera* camera_ = nullptr; + + // Pass 1: MRT rasterization pipeline + RenderPipeline raster_pipeline_; + BindGroup raster_bind_group_; + + // Pass 4: Pack compute pipeline + ComputePipeline pack_pipeline_; + BindGroup pack_bind_group_; + UniformBuffer<GBufResUniforms> pack_res_uniform_; + + // Placeholder textures for shadow/transp (white/black cleared once) + Texture shadow_placeholder_tex_; + TextureView shadow_placeholder_view_; + Texture transp_placeholder_tex_; + TextureView transp_placeholder_view_; + + // GPU-side object data buffers (global uniforms + objects storage) + // These mirror the layout expected by gbuf_raster.wgsl + GpuBuffer global_uniforms_buf_; + GpuBuffer objects_buf_; + int objects_buf_capacity_ = 0; // number of ObjectData slots allocated + + void create_raster_pipeline(); + void create_pack_pipeline(); + + void update_raster_bind_group(NodeRegistry& nodes); + void update_pack_bind_group(NodeRegistry& nodes); + + void upload_scene_data(const Scene& scene, const Camera& camera, + float time); + void ensure_objects_buffer(int num_objects); +}; diff --git a/cnn_v3/training/blender_export.py b/cnn_v3/training/blender_export.py new file mode 100644 index 0000000..63dd0e3 --- /dev/null +++ b/cnn_v3/training/blender_export.py @@ -0,0 +1,160 @@ +""" +Blender export script for CNN v3 G-buffer training data. +Configures render passes and a compositor File Output node, +then renders the current scene to a multi-layer EXR. + +Usage (headless): + blender -b scene.blend -P blender_export.py -- --output renders/frame_### + +Each '#' in the output path is replaced by Blender with the frame number (zero-padded). +The script writes one multi-layer EXR per frame containing all required passes. + +G-buffer pass mapping: + Combined → training target RGBA (beauty) + DiffCol → albedo.rgb (pre-lighting material color) + Normal → normal.xy (world-space, oct-encode in pack_blender_sample.py) + Z → depth (view-space distance, normalize in pack step) + IndexOB → mat_id (object index, u8 / 255) + Shadow → shadow (invert: shadow=1 means fully lit) + Alpha → transp. (0=opaque, 1=clear/transparent) +""" + +import sys +import argparse + +import bpy + + +def parse_args(): + # Blender passes its own argv; our args follow '--'. + argv = sys.argv + if "--" in argv: + argv = argv[argv.index("--") + 1:] + else: + argv = [] + parser = argparse.ArgumentParser( + description="Configure Blender render passes and export multi-layer EXR." + ) + parser.add_argument( + "--output", + default="//renders/frame_###", + help="Output path prefix (use ### for frame number padding). " + "Default: //renders/frame_###", + ) + parser.add_argument( + "--width", type=int, default=640, + help="Render width in pixels (default: 640)" + ) + parser.add_argument( + "--height", type=int, default=360, + help="Render height in pixels (default: 360)" + ) + parser.add_argument( + "--start-frame", type=int, default=None, + help="First frame to render (default: scene start frame)" + ) + parser.add_argument( + "--end-frame", type=int, default=None, + help="Last frame to render (default: scene end frame)" + ) + return parser.parse_args(argv) + + +def configure_scene(args): + scene = bpy.context.scene + + # Render dimensions + scene.render.resolution_x = args.width + scene.render.resolution_y = args.height + scene.render.resolution_percentage = 100 + + # Frame range (optional override) + if args.start_frame is not None: + scene.frame_start = args.start_frame + if args.end_frame is not None: + scene.frame_end = args.end_frame + + # Use Cycles for best multi-pass support + scene.render.engine = "CYCLES" + + # Enable required render passes on the active view layer + vl = scene.view_layers["ViewLayer"] + vl.use_pass_combined = True # beauty target + vl.use_pass_diffuse_color = True # albedo + vl.use_pass_normal = True # world normals + vl.use_pass_z = True # depth (Z) + vl.use_pass_object_index = True # mat_id + vl.use_pass_shadow = True # shadow catcher + # Alpha is available via the combined pass alpha channel; + # the compositor node below also taps it separately. + + print(f"[blender_export] Render passes configured on ViewLayer '{vl.name}'.") + print(f" Resolution: {args.width}x{args.height}") + print(f" Frames: {scene.frame_start} – {scene.frame_end}") + + +def configure_compositor(args): + scene = bpy.context.scene + scene.use_nodes = True + tree = scene.node_tree + + # Clear all existing compositor nodes + tree.nodes.clear() + + # Render Layers node (source of all passes) + rl_node = tree.nodes.new("CompositorNodeRLayers") + rl_node.location = (0, 0) + + # File Output node — multi-layer EXR (all passes in one file) + out_node = tree.nodes.new("CompositorNodeOutputFile") + out_node.location = (600, 0) + out_node.format.file_format = "OPEN_EXR_MULTILAYER" + out_node.format.exr_codec = "ZIP" + out_node.base_path = args.output + + # Map each render pass socket to a named layer in the EXR. + # Slot order matters: the first slot is created by default; we rename it + # and add the rest. + pass_sockets = [ + ("Image", "Combined"), # beauty / target + ("Diffuse Color", "DiffCol"), # albedo + ("Normal", "Normal"), # world normals + ("Depth", "Z"), # view-space depth + ("Object Index", "IndexOB"), # object index + ("Shadow", "Shadow"), # shadow + ("Alpha", "Alpha"), # transparency / alpha + ] + + # The node starts with one default slot; configure it first. + for i, (socket_name, layer_name) in enumerate(pass_sockets): + if i == 0: + # Rename the default slot + out_node.file_slots[0].path = layer_name + else: + out_node.file_slots.new(layer_name) + + # Link render layer socket to file output slot + src_socket = rl_node.outputs.get(socket_name) + dst_socket = out_node.inputs[i] + if src_socket: + tree.links.new(src_socket, dst_socket) + else: + print(f"[blender_export] WARNING: pass socket '{socket_name}' " + f"not found on Render Layers node. Skipping.") + + print(f"[blender_export] Compositor configured. Output → {args.output}") + print(" Layers: " + ", ".join(ln for _, ln in pass_sockets)) + + +def main(): + args = parse_args() + configure_scene(args) + configure_compositor(args) + + # Trigger the render (only when running headless with -b) + bpy.ops.render.render(animation=True) + print("[blender_export] Render complete.") + + +if __name__ == "__main__": + main() diff --git a/cnn_v3/training/pack_blender_sample.py b/cnn_v3/training/pack_blender_sample.py new file mode 100644 index 0000000..84344c1 --- /dev/null +++ b/cnn_v3/training/pack_blender_sample.py @@ -0,0 +1,268 @@ +""" +Pack a Blender multi-layer EXR into CNN v3 training sample files. + +Reads a multi-layer EXR produced by blender_export.py and writes separate PNG +files per channel into an output directory, ready for the CNN v3 dataloader. + +Output files: + albedo.png — RGB uint8 (DiffCol pass, gamma-corrected) + normal.png — RG uint8 (octahedral-encoded world normal in [0,1]) + depth.png — R uint16 (1/(z+1) normalized to [0,1], 16-bit PNG) + matid.png — R uint8 (IndexOB / 255) + shadow.png — R uint8 (1 - shadow_catcher, so 255 = fully lit) + transp.png — R uint8 (alpha from Combined pass, 0=opaque) + target.png — RGBA uint8 (Combined beauty pass) + +depth_grad, mip1, mip2 are computed on-the-fly by the dataloader (not stored). +prev = zero during training (no temporal history for static frames). + +Usage: + python3 pack_blender_sample.py --exr renders/frame_001.exr \\ + --output dataset/full/sample_001/ + +Dependencies: + numpy, Pillow, OpenEXR (pip install openexr) + — or use imageio[freeimage] as alternative EXR reader. +""" + +import argparse +import os +import sys +import numpy as np +from PIL import Image + + +# ---- EXR loading ---- + +def load_exr_openexr(path: str) -> dict: + """Load a multi-layer EXR using the OpenEXR Python binding.""" + import OpenEXR + import Imath + + exr = OpenEXR.InputFile(path) + header = exr.header() + dw = header["dataWindow"] + width = dw.max.x - dw.min.x + 1 + height = dw.max.y - dw.min.y + 1 + channels = {} + float_type = Imath.PixelType(Imath.PixelType.FLOAT) + for ch_name in header["channels"]: + raw = exr.channel(ch_name, float_type) + arr = np.frombuffer(raw, dtype=np.float32).reshape((height, width)) + channels[ch_name] = arr + return channels, width, height + + +def load_exr_imageio(path: str) -> dict: + """Load a multi-layer EXR using imageio (freeimage backend).""" + import imageio + data = imageio.imread(path, format="exr") + # imageio may return (H, W, C); treat as single layer + h, w = data.shape[:2] + c = data.shape[2] if data.ndim == 3 else 1 + channels = {} + names = ["R", "G", "B", "A"][:c] + for i, n in enumerate(names): + channels[n] = data[:, :, i].astype(np.float32) + return channels, w, h + + +def load_exr(path: str): + """Try OpenEXR first, fall back to imageio.""" + try: + return load_exr_openexr(path) + except ImportError: + pass + try: + return load_exr_imageio(path) + except ImportError: + pass + raise ImportError( + "No EXR reader found. Install OpenEXR or imageio[freeimage]:\n" + " pip install openexr\n" + " pip install imageio[freeimage]" + ) + + +# ---- Octahedral encoding ---- + +def oct_encode(normals: np.ndarray) -> np.ndarray: + """ + Octahedral-encode world-space normals. + + Args: + normals: (H, W, 3) float32, unit vectors. + Returns: + (H, W, 2) float32 in [0, 1] for PNG storage. + """ + nx, ny, nz = normals[..., 0], normals[..., 1], normals[..., 2] + # L1-normalize projection onto the octahedron + l1 = np.abs(nx) + np.abs(ny) + np.abs(nz) + 1e-9 + ox = nx / l1 + oy = ny / l1 + # Fold lower hemisphere + mask = nz < 0.0 + ox_folded = np.where(mask, (1.0 - np.abs(oy)) * np.sign(ox + 1e-9), ox) + oy_folded = np.where(mask, (1.0 - np.abs(ox)) * np.sign(oy + 1e-9), oy) + # Remap [-1, 1] → [0, 1] + encoded = np.stack([ox_folded, oy_folded], axis=-1) * 0.5 + 0.5 + return np.clip(encoded, 0.0, 1.0) + + +# ---- Channel extraction helpers ---- + +def get_pass_rgb(channels: dict, prefix: str) -> np.ndarray: + """Extract an RGB pass (prefix.R, prefix.G, prefix.B).""" + r = channels.get(f"{prefix}.R", channels.get("R", None)) + g = channels.get(f"{prefix}.G", channels.get("G", None)) + b = channels.get(f"{prefix}.B", channels.get("B", None)) + if r is None or g is None or b is None: + raise KeyError(f"Could not find RGB channels for pass '{prefix}'.") + return np.stack([r, g, b], axis=-1) + + +def get_pass_rgba(channels: dict, prefix: str) -> np.ndarray: + """Extract an RGBA pass.""" + rgb = get_pass_rgb(channels, prefix) + a = channels.get(f"{prefix}.A", np.ones_like(rgb[..., 0])) + return np.concatenate([rgb, a[..., np.newaxis]], axis=-1) + + +def get_pass_r(channels: dict, prefix: str, default: float = 0.0) -> np.ndarray: + """Extract a single-channel pass.""" + ch = channels.get(f"{prefix}.R", channels.get(prefix, None)) + if ch is None: + h, w = next(iter(channels.values())).shape[:2] + return np.full((h, w), default, dtype=np.float32) + return ch.astype(np.float32) + + +def get_pass_xyz(channels: dict, prefix: str) -> np.ndarray: + """Extract an XYZ pass (Normal uses .X .Y .Z in Blender).""" + x = channels.get(f"{prefix}.X") + y = channels.get(f"{prefix}.Y") + z = channels.get(f"{prefix}.Z") + if x is None or y is None or z is None: + # Fall back to R/G/B naming + return get_pass_rgb(channels, prefix) + return np.stack([x, y, z], axis=-1) + + +# ---- Main packing ---- + +def pack_blender_sample(exr_path: str, output_dir: str) -> None: + os.makedirs(output_dir, exist_ok=True) + + print(f"[pack_blender_sample] Loading {exr_path} …") + channels, width, height = load_exr(exr_path) + print(f" Dimensions: {width}×{height}") + print(f" Channels: {sorted(channels.keys())}") + + # ---- albedo (DiffCol → RGB uint8, gamma-correct linear→sRGB) ---- + try: + albedo_lin = get_pass_rgb(channels, "DiffCol") + except KeyError: + print(" WARNING: DiffCol pass not found; using zeros.") + albedo_lin = np.zeros((height, width, 3), dtype=np.float32) + # Convert linear → sRGB (approximate gamma 2.2) + albedo_srgb = np.clip(np.power(np.clip(albedo_lin, 0, 1), 1.0 / 2.2), 0, 1) + albedo_u8 = (albedo_srgb * 255.0).astype(np.uint8) + Image.fromarray(albedo_u8, mode="RGB").save( + os.path.join(output_dir, "albedo.png") + ) + + # ---- normal (Normal pass → oct-encoded RG uint8) ---- + try: + # Blender world normals use .X .Y .Z channels + normal_xyz = get_pass_xyz(channels, "Normal") + # Normalize to unit length (may not be exactly unit after compression) + nlen = np.linalg.norm(normal_xyz, axis=-1, keepdims=True) + 1e-9 + normal_unit = normal_xyz / nlen + normal_enc = oct_encode(normal_unit) # (H, W, 2) in [0, 1] + normal_u8 = (normal_enc * 255.0).astype(np.uint8) + # Store in RGB with B=0 (unused) + normal_rgb = np.concatenate( + [normal_u8, np.zeros((height, width, 1), dtype=np.uint8)], axis=-1 + ) + except KeyError: + print(" WARNING: Normal pass not found; using zeros.") + normal_rgb = np.zeros((height, width, 3), dtype=np.uint8) + Image.fromarray(normal_rgb, mode="RGB").save( + os.path.join(output_dir, "normal.png") + ) + + # ---- depth (Z pass → 1/(z+1), stored as 16-bit PNG) ---- + z_raw = get_pass_r(channels, "Z", default=0.0) + # 1/z style: 1/(z + 1) maps z=0→1.0, z=∞→0.0 + depth_norm = 1.0 / (np.clip(z_raw, 0.0, None) + 1.0) + depth_norm = np.clip(depth_norm, 0.0, 1.0) + depth_u16 = (depth_norm * 65535.0).astype(np.uint16) + Image.fromarray(depth_u16, mode="I;16").save( + os.path.join(output_dir, "depth.png") + ) + + # ---- matid (IndexOB → u8) ---- + # Blender object index is an integer; clamp to [0, 255]. + matid_raw = get_pass_r(channels, "IndexOB", default=0.0) + matid_u8 = np.clip(matid_raw, 0, 255).astype(np.uint8) + Image.fromarray(matid_u8, mode="L").save( + os.path.join(output_dir, "matid.png") + ) + + # ---- shadow (Shadow pass → invert: 1=fully lit, stored u8) ---- + # Blender Shadow pass: 1=lit, 0=shadowed. We keep that convention + # (shadow=1 means fully lit), so just convert directly. + shadow_raw = get_pass_r(channels, "Shadow", default=1.0) + shadow_u8 = (np.clip(shadow_raw, 0.0, 1.0) * 255.0).astype(np.uint8) + Image.fromarray(shadow_u8, mode="L").save( + os.path.join(output_dir, "shadow.png") + ) + + # ---- transp (Alpha from Combined pass → u8, 0=opaque) ---- + # Blender alpha: 1=opaque, 0=transparent. + # CNN convention: transp=0 means opaque, transp=1 means transparent. + # So transp = 1 - alpha. + try: + combined_rgba = get_pass_rgba(channels, "Combined") + alpha = combined_rgba[..., 3] + except KeyError: + alpha = np.ones((height, width), dtype=np.float32) + transp = 1.0 - np.clip(alpha, 0.0, 1.0) + transp_u8 = (transp * 255.0).astype(np.uint8) + Image.fromarray(transp_u8, mode="L").save( + os.path.join(output_dir, "transp.png") + ) + + # ---- target (Combined beauty pass → RGBA uint8, gamma-correct) ---- + try: + combined_rgba = get_pass_rgba(channels, "Combined") + # Convert linear → sRGB for display (RGB channels only) + c_rgb = np.power(np.clip(combined_rgba[..., :3], 0, 1), 1.0 / 2.2) + c_alpha = combined_rgba[..., 3:4] + target_lin = np.concatenate([c_rgb, c_alpha], axis=-1) + target_u8 = (np.clip(target_lin, 0, 1) * 255.0).astype(np.uint8) + except KeyError: + print(" WARNING: Combined pass not found; target will be zeros.") + target_u8 = np.zeros((height, width, 4), dtype=np.uint8) + Image.fromarray(target_u8, mode="RGBA").save( + os.path.join(output_dir, "target.png") + ) + + print(f"[pack_blender_sample] Wrote sample to {output_dir}") + print(" Files: albedo.png normal.png depth.png matid.png " + "shadow.png transp.png target.png") + print(" Note: depth_grad, mip1, mip2 are computed on-the-fly by the dataloader.") + + +def main(): + parser = argparse.ArgumentParser( + description="Pack a Blender multi-layer EXR into CNN v3 training sample files." + ) + parser.add_argument("--exr", required=True, help="Input multi-layer EXR file") + parser.add_argument("--output", required=True, help="Output directory for sample files") + args = parser.parse_args() + pack_blender_sample(args.exr, args.output) + + +if __name__ == "__main__": + main() diff --git a/cnn_v3/training/pack_photo_sample.py b/cnn_v3/training/pack_photo_sample.py new file mode 100644 index 0000000..b2943fb --- /dev/null +++ b/cnn_v3/training/pack_photo_sample.py @@ -0,0 +1,148 @@ +""" +Pack a photo into CNN v3 simple training sample files. + +Converts a single RGB or RGBA photo into the CNN v3 sample layout. +Geometric channels (normal, depth, matid) are zeroed; the network +degrades gracefully due to channel-dropout training. + +Output files: + albedo.png — RGB uint8 (photo RGB) + normal.png — RG uint8 (zero — no geometry data) + depth.png — R uint16 (zero — no depth data) + matid.png — R uint8 (zero — no material data) + shadow.png — R uint8 (255 = fully lit — assume unoccluded) + transp.png — R uint8 (1 - alpha, or 0 if no alpha channel) + target.png — RGB/RGBA (= albedo; no ground-truth styled target) + +mip1 and mip2 are computed on-the-fly by the dataloader from albedo. +prev = zero during training (no temporal history). + +Usage: + python3 pack_photo_sample.py --photo photos/img_001.png \\ + --output dataset/simple/sample_001/ + +Dependencies: + numpy, Pillow +""" + +import argparse +import os +import numpy as np +from PIL import Image + + +# ---- Mip computation ---- + +def pyrdown(img: np.ndarray) -> np.ndarray: + """ + 2×2 average pooling (half resolution). + Args: + img: (H, W, C) float32 in [0, 1]. + Returns: + (H//2, W//2, C) float32. + """ + h, w, c = img.shape + h2, w2 = h // 2, w // 2 + # Crop to even dimensions + cropped = img[:h2 * 2, :w2 * 2, :] + # Reshape and average + return 0.25 * ( + cropped[0::2, 0::2, :] + + cropped[1::2, 0::2, :] + + cropped[0::2, 1::2, :] + + cropped[1::2, 1::2, :] + ) + + +# ---- Main packing ---- + +def pack_photo_sample(photo_path: str, output_dir: str) -> None: + os.makedirs(output_dir, exist_ok=True) + + print(f"[pack_photo_sample] Loading {photo_path} …") + img = Image.open(photo_path).convert("RGBA") + width, height = img.size + print(f" Dimensions: {width}×{height}") + + img_np = np.asarray(img, dtype=np.float32) / 255.0 # (H, W, 4) in [0, 1] + rgb = img_np[..., :3] # (H, W, 3) + alpha = img_np[..., 3] # (H, W) + + # ---- albedo — photo RGB ---- + albedo_u8 = (np.clip(rgb, 0, 1) * 255.0).astype(np.uint8) + Image.fromarray(albedo_u8, mode="RGB").save( + os.path.join(output_dir, "albedo.png") + ) + + # ---- normal — zero (no geometry) ---- + normal_zeros = np.zeros((height, width, 3), dtype=np.uint8) + # Encode "no normal" as (0.5, 0.5) in octahedral space → (128, 128) + # This maps to oct = (0, 0) → reconstructed normal = (0, 0, 1) (pointing forward) + normal_zeros[..., 0] = 128 + normal_zeros[..., 1] = 128 + Image.fromarray(normal_zeros, mode="RGB").save( + os.path.join(output_dir, "normal.png") + ) + + # ---- depth — zero ---- + depth_zero = np.zeros((height, width), dtype=np.uint16) + Image.fromarray(depth_zero, mode="I;16").save( + os.path.join(output_dir, "depth.png") + ) + + # ---- matid — zero ---- + matid_zero = np.zeros((height, width), dtype=np.uint8) + Image.fromarray(matid_zero, mode="L").save( + os.path.join(output_dir, "matid.png") + ) + + # ---- shadow — 255 (fully lit, assume unoccluded) ---- + shadow_full = np.full((height, width), 255, dtype=np.uint8) + Image.fromarray(shadow_full, mode="L").save( + os.path.join(output_dir, "shadow.png") + ) + + # ---- transp — 1 - alpha (0=opaque, 1=transparent) ---- + # If the photo has no meaningful alpha, this is zero everywhere. + transp = 1.0 - np.clip(alpha, 0.0, 1.0) + transp_u8 = (transp * 255.0).astype(np.uint8) + Image.fromarray(transp_u8, mode="L").save( + os.path.join(output_dir, "transp.png") + ) + + # ---- target — albedo (= photo; no GT styled target) ---- + # Store as RGBA (keep alpha for potential masking by the dataloader). + target_u8 = (np.clip(img_np, 0, 1) * 255.0).astype(np.uint8) + Image.fromarray(target_u8, mode="RGBA").save( + os.path.join(output_dir, "target.png") + ) + + # ---- mip1 / mip2 — informational only, not saved ---- + # The dataloader computes mip1/mip2 on-the-fly from albedo. + # Verify they look reasonable here for debugging. + mip1 = pyrdown(rgb) + mip2 = pyrdown(mip1) + print(f" mip1: {mip1.shape[1]}×{mip1.shape[0]} " + f"mip2: {mip2.shape[1]}×{mip2.shape[0]} (computed on-the-fly)") + + print(f"[pack_photo_sample] Wrote sample to {output_dir}") + print(" Files: albedo.png normal.png depth.png matid.png " + "shadow.png transp.png target.png") + print(" Note: normal/depth/matid are zeroed (no geometry data).") + print(" Note: target = albedo (no ground-truth styled target).") + + +def main(): + parser = argparse.ArgumentParser( + description="Pack a photo into CNN v3 simple training sample files." + ) + parser.add_argument("--photo", required=True, + help="Input photo file (RGB or RGBA PNG/JPG)") + parser.add_argument("--output", required=True, + help="Output directory for sample files") + args = parser.parse_args() + pack_photo_sample(args.photo, args.output) + + +if __name__ == "__main__": + main() |
