diff options
| author | skal <pascal.massimino@gmail.com> | 2026-03-20 08:42:07 +0100 |
|---|---|---|
| committer | skal <pascal.massimino@gmail.com> | 2026-03-20 08:42:07 +0100 |
| commit | f74bcd843c631f82daefe543fca7741fb5bb71f4 (patch) | |
| tree | 0983e6c36fb0f9e2b152f76437ecf91ee1fd99cb /cnn_v3/training | |
| parent | a160cc797afb4291d356bdc0cbcf0f110e3ef8a9 (diff) | |
feat(cnn_v3): G-buffer phase 1 + training infrastructure
G-buffer (Phase 1):
- Add NodeTypes GBUF_ALBEDO/DEPTH32/R8/RGBA32UINT to NodeRegistry
- GBufferEffect: MRT raster pass (albedo+normal_mat+depth) + pack compute
- Shaders: gbuf_raster.wgsl (MRT), gbuf_pack.wgsl (feature packing, 32B/px)
- Shadow/SDF passes stubbed (placeholder textures), CMake integration deferred
Training infrastructure (Phase 2):
- blender_export.py: headless EXR export with all G-buffer render passes
- pack_blender_sample.py: EXR → per-channel PNGs (oct-normals, 1/z depth)
- pack_photo_sample.py: photo → zero-filled G-buffer sample layout
handoff(Gemini): G-buffer phases 3-5 remain (U-Net shaders, CNNv3Effect, parity)
Diffstat (limited to 'cnn_v3/training')
| -rw-r--r-- | cnn_v3/training/blender_export.py | 160 | ||||
| -rw-r--r-- | cnn_v3/training/pack_blender_sample.py | 268 | ||||
| -rw-r--r-- | cnn_v3/training/pack_photo_sample.py | 148 |
3 files changed, 576 insertions, 0 deletions
diff --git a/cnn_v3/training/blender_export.py b/cnn_v3/training/blender_export.py new file mode 100644 index 0000000..63dd0e3 --- /dev/null +++ b/cnn_v3/training/blender_export.py @@ -0,0 +1,160 @@ +""" +Blender export script for CNN v3 G-buffer training data. +Configures render passes and a compositor File Output node, +then renders the current scene to a multi-layer EXR. + +Usage (headless): + blender -b scene.blend -P blender_export.py -- --output renders/frame_### + +Each '#' in the output path is replaced by Blender with the frame number (zero-padded). +The script writes one multi-layer EXR per frame containing all required passes. + +G-buffer pass mapping: + Combined → training target RGBA (beauty) + DiffCol → albedo.rgb (pre-lighting material color) + Normal → normal.xy (world-space, oct-encode in pack_blender_sample.py) + Z → depth (view-space distance, normalize in pack step) + IndexOB → mat_id (object index, u8 / 255) + Shadow → shadow (invert: shadow=1 means fully lit) + Alpha → transp. (0=opaque, 1=clear/transparent) +""" + +import sys +import argparse + +import bpy + + +def parse_args(): + # Blender passes its own argv; our args follow '--'. + argv = sys.argv + if "--" in argv: + argv = argv[argv.index("--") + 1:] + else: + argv = [] + parser = argparse.ArgumentParser( + description="Configure Blender render passes and export multi-layer EXR." + ) + parser.add_argument( + "--output", + default="//renders/frame_###", + help="Output path prefix (use ### for frame number padding). " + "Default: //renders/frame_###", + ) + parser.add_argument( + "--width", type=int, default=640, + help="Render width in pixels (default: 640)" + ) + parser.add_argument( + "--height", type=int, default=360, + help="Render height in pixels (default: 360)" + ) + parser.add_argument( + "--start-frame", type=int, default=None, + help="First frame to render (default: scene start frame)" + ) + parser.add_argument( + "--end-frame", type=int, default=None, + help="Last frame to render (default: scene end frame)" + ) + return parser.parse_args(argv) + + +def configure_scene(args): + scene = bpy.context.scene + + # Render dimensions + scene.render.resolution_x = args.width + scene.render.resolution_y = args.height + scene.render.resolution_percentage = 100 + + # Frame range (optional override) + if args.start_frame is not None: + scene.frame_start = args.start_frame + if args.end_frame is not None: + scene.frame_end = args.end_frame + + # Use Cycles for best multi-pass support + scene.render.engine = "CYCLES" + + # Enable required render passes on the active view layer + vl = scene.view_layers["ViewLayer"] + vl.use_pass_combined = True # beauty target + vl.use_pass_diffuse_color = True # albedo + vl.use_pass_normal = True # world normals + vl.use_pass_z = True # depth (Z) + vl.use_pass_object_index = True # mat_id + vl.use_pass_shadow = True # shadow catcher + # Alpha is available via the combined pass alpha channel; + # the compositor node below also taps it separately. + + print(f"[blender_export] Render passes configured on ViewLayer '{vl.name}'.") + print(f" Resolution: {args.width}x{args.height}") + print(f" Frames: {scene.frame_start} – {scene.frame_end}") + + +def configure_compositor(args): + scene = bpy.context.scene + scene.use_nodes = True + tree = scene.node_tree + + # Clear all existing compositor nodes + tree.nodes.clear() + + # Render Layers node (source of all passes) + rl_node = tree.nodes.new("CompositorNodeRLayers") + rl_node.location = (0, 0) + + # File Output node — multi-layer EXR (all passes in one file) + out_node = tree.nodes.new("CompositorNodeOutputFile") + out_node.location = (600, 0) + out_node.format.file_format = "OPEN_EXR_MULTILAYER" + out_node.format.exr_codec = "ZIP" + out_node.base_path = args.output + + # Map each render pass socket to a named layer in the EXR. + # Slot order matters: the first slot is created by default; we rename it + # and add the rest. + pass_sockets = [ + ("Image", "Combined"), # beauty / target + ("Diffuse Color", "DiffCol"), # albedo + ("Normal", "Normal"), # world normals + ("Depth", "Z"), # view-space depth + ("Object Index", "IndexOB"), # object index + ("Shadow", "Shadow"), # shadow + ("Alpha", "Alpha"), # transparency / alpha + ] + + # The node starts with one default slot; configure it first. + for i, (socket_name, layer_name) in enumerate(pass_sockets): + if i == 0: + # Rename the default slot + out_node.file_slots[0].path = layer_name + else: + out_node.file_slots.new(layer_name) + + # Link render layer socket to file output slot + src_socket = rl_node.outputs.get(socket_name) + dst_socket = out_node.inputs[i] + if src_socket: + tree.links.new(src_socket, dst_socket) + else: + print(f"[blender_export] WARNING: pass socket '{socket_name}' " + f"not found on Render Layers node. Skipping.") + + print(f"[blender_export] Compositor configured. Output → {args.output}") + print(" Layers: " + ", ".join(ln for _, ln in pass_sockets)) + + +def main(): + args = parse_args() + configure_scene(args) + configure_compositor(args) + + # Trigger the render (only when running headless with -b) + bpy.ops.render.render(animation=True) + print("[blender_export] Render complete.") + + +if __name__ == "__main__": + main() diff --git a/cnn_v3/training/pack_blender_sample.py b/cnn_v3/training/pack_blender_sample.py new file mode 100644 index 0000000..84344c1 --- /dev/null +++ b/cnn_v3/training/pack_blender_sample.py @@ -0,0 +1,268 @@ +""" +Pack a Blender multi-layer EXR into CNN v3 training sample files. + +Reads a multi-layer EXR produced by blender_export.py and writes separate PNG +files per channel into an output directory, ready for the CNN v3 dataloader. + +Output files: + albedo.png — RGB uint8 (DiffCol pass, gamma-corrected) + normal.png — RG uint8 (octahedral-encoded world normal in [0,1]) + depth.png — R uint16 (1/(z+1) normalized to [0,1], 16-bit PNG) + matid.png — R uint8 (IndexOB / 255) + shadow.png — R uint8 (1 - shadow_catcher, so 255 = fully lit) + transp.png — R uint8 (alpha from Combined pass, 0=opaque) + target.png — RGBA uint8 (Combined beauty pass) + +depth_grad, mip1, mip2 are computed on-the-fly by the dataloader (not stored). +prev = zero during training (no temporal history for static frames). + +Usage: + python3 pack_blender_sample.py --exr renders/frame_001.exr \\ + --output dataset/full/sample_001/ + +Dependencies: + numpy, Pillow, OpenEXR (pip install openexr) + — or use imageio[freeimage] as alternative EXR reader. +""" + +import argparse +import os +import sys +import numpy as np +from PIL import Image + + +# ---- EXR loading ---- + +def load_exr_openexr(path: str) -> dict: + """Load a multi-layer EXR using the OpenEXR Python binding.""" + import OpenEXR + import Imath + + exr = OpenEXR.InputFile(path) + header = exr.header() + dw = header["dataWindow"] + width = dw.max.x - dw.min.x + 1 + height = dw.max.y - dw.min.y + 1 + channels = {} + float_type = Imath.PixelType(Imath.PixelType.FLOAT) + for ch_name in header["channels"]: + raw = exr.channel(ch_name, float_type) + arr = np.frombuffer(raw, dtype=np.float32).reshape((height, width)) + channels[ch_name] = arr + return channels, width, height + + +def load_exr_imageio(path: str) -> dict: + """Load a multi-layer EXR using imageio (freeimage backend).""" + import imageio + data = imageio.imread(path, format="exr") + # imageio may return (H, W, C); treat as single layer + h, w = data.shape[:2] + c = data.shape[2] if data.ndim == 3 else 1 + channels = {} + names = ["R", "G", "B", "A"][:c] + for i, n in enumerate(names): + channels[n] = data[:, :, i].astype(np.float32) + return channels, w, h + + +def load_exr(path: str): + """Try OpenEXR first, fall back to imageio.""" + try: + return load_exr_openexr(path) + except ImportError: + pass + try: + return load_exr_imageio(path) + except ImportError: + pass + raise ImportError( + "No EXR reader found. Install OpenEXR or imageio[freeimage]:\n" + " pip install openexr\n" + " pip install imageio[freeimage]" + ) + + +# ---- Octahedral encoding ---- + +def oct_encode(normals: np.ndarray) -> np.ndarray: + """ + Octahedral-encode world-space normals. + + Args: + normals: (H, W, 3) float32, unit vectors. + Returns: + (H, W, 2) float32 in [0, 1] for PNG storage. + """ + nx, ny, nz = normals[..., 0], normals[..., 1], normals[..., 2] + # L1-normalize projection onto the octahedron + l1 = np.abs(nx) + np.abs(ny) + np.abs(nz) + 1e-9 + ox = nx / l1 + oy = ny / l1 + # Fold lower hemisphere + mask = nz < 0.0 + ox_folded = np.where(mask, (1.0 - np.abs(oy)) * np.sign(ox + 1e-9), ox) + oy_folded = np.where(mask, (1.0 - np.abs(ox)) * np.sign(oy + 1e-9), oy) + # Remap [-1, 1] → [0, 1] + encoded = np.stack([ox_folded, oy_folded], axis=-1) * 0.5 + 0.5 + return np.clip(encoded, 0.0, 1.0) + + +# ---- Channel extraction helpers ---- + +def get_pass_rgb(channels: dict, prefix: str) -> np.ndarray: + """Extract an RGB pass (prefix.R, prefix.G, prefix.B).""" + r = channels.get(f"{prefix}.R", channels.get("R", None)) + g = channels.get(f"{prefix}.G", channels.get("G", None)) + b = channels.get(f"{prefix}.B", channels.get("B", None)) + if r is None or g is None or b is None: + raise KeyError(f"Could not find RGB channels for pass '{prefix}'.") + return np.stack([r, g, b], axis=-1) + + +def get_pass_rgba(channels: dict, prefix: str) -> np.ndarray: + """Extract an RGBA pass.""" + rgb = get_pass_rgb(channels, prefix) + a = channels.get(f"{prefix}.A", np.ones_like(rgb[..., 0])) + return np.concatenate([rgb, a[..., np.newaxis]], axis=-1) + + +def get_pass_r(channels: dict, prefix: str, default: float = 0.0) -> np.ndarray: + """Extract a single-channel pass.""" + ch = channels.get(f"{prefix}.R", channels.get(prefix, None)) + if ch is None: + h, w = next(iter(channels.values())).shape[:2] + return np.full((h, w), default, dtype=np.float32) + return ch.astype(np.float32) + + +def get_pass_xyz(channels: dict, prefix: str) -> np.ndarray: + """Extract an XYZ pass (Normal uses .X .Y .Z in Blender).""" + x = channels.get(f"{prefix}.X") + y = channels.get(f"{prefix}.Y") + z = channels.get(f"{prefix}.Z") + if x is None or y is None or z is None: + # Fall back to R/G/B naming + return get_pass_rgb(channels, prefix) + return np.stack([x, y, z], axis=-1) + + +# ---- Main packing ---- + +def pack_blender_sample(exr_path: str, output_dir: str) -> None: + os.makedirs(output_dir, exist_ok=True) + + print(f"[pack_blender_sample] Loading {exr_path} …") + channels, width, height = load_exr(exr_path) + print(f" Dimensions: {width}×{height}") + print(f" Channels: {sorted(channels.keys())}") + + # ---- albedo (DiffCol → RGB uint8, gamma-correct linear→sRGB) ---- + try: + albedo_lin = get_pass_rgb(channels, "DiffCol") + except KeyError: + print(" WARNING: DiffCol pass not found; using zeros.") + albedo_lin = np.zeros((height, width, 3), dtype=np.float32) + # Convert linear → sRGB (approximate gamma 2.2) + albedo_srgb = np.clip(np.power(np.clip(albedo_lin, 0, 1), 1.0 / 2.2), 0, 1) + albedo_u8 = (albedo_srgb * 255.0).astype(np.uint8) + Image.fromarray(albedo_u8, mode="RGB").save( + os.path.join(output_dir, "albedo.png") + ) + + # ---- normal (Normal pass → oct-encoded RG uint8) ---- + try: + # Blender world normals use .X .Y .Z channels + normal_xyz = get_pass_xyz(channels, "Normal") + # Normalize to unit length (may not be exactly unit after compression) + nlen = np.linalg.norm(normal_xyz, axis=-1, keepdims=True) + 1e-9 + normal_unit = normal_xyz / nlen + normal_enc = oct_encode(normal_unit) # (H, W, 2) in [0, 1] + normal_u8 = (normal_enc * 255.0).astype(np.uint8) + # Store in RGB with B=0 (unused) + normal_rgb = np.concatenate( + [normal_u8, np.zeros((height, width, 1), dtype=np.uint8)], axis=-1 + ) + except KeyError: + print(" WARNING: Normal pass not found; using zeros.") + normal_rgb = np.zeros((height, width, 3), dtype=np.uint8) + Image.fromarray(normal_rgb, mode="RGB").save( + os.path.join(output_dir, "normal.png") + ) + + # ---- depth (Z pass → 1/(z+1), stored as 16-bit PNG) ---- + z_raw = get_pass_r(channels, "Z", default=0.0) + # 1/z style: 1/(z + 1) maps z=0→1.0, z=∞→0.0 + depth_norm = 1.0 / (np.clip(z_raw, 0.0, None) + 1.0) + depth_norm = np.clip(depth_norm, 0.0, 1.0) + depth_u16 = (depth_norm * 65535.0).astype(np.uint16) + Image.fromarray(depth_u16, mode="I;16").save( + os.path.join(output_dir, "depth.png") + ) + + # ---- matid (IndexOB → u8) ---- + # Blender object index is an integer; clamp to [0, 255]. + matid_raw = get_pass_r(channels, "IndexOB", default=0.0) + matid_u8 = np.clip(matid_raw, 0, 255).astype(np.uint8) + Image.fromarray(matid_u8, mode="L").save( + os.path.join(output_dir, "matid.png") + ) + + # ---- shadow (Shadow pass → invert: 1=fully lit, stored u8) ---- + # Blender Shadow pass: 1=lit, 0=shadowed. We keep that convention + # (shadow=1 means fully lit), so just convert directly. + shadow_raw = get_pass_r(channels, "Shadow", default=1.0) + shadow_u8 = (np.clip(shadow_raw, 0.0, 1.0) * 255.0).astype(np.uint8) + Image.fromarray(shadow_u8, mode="L").save( + os.path.join(output_dir, "shadow.png") + ) + + # ---- transp (Alpha from Combined pass → u8, 0=opaque) ---- + # Blender alpha: 1=opaque, 0=transparent. + # CNN convention: transp=0 means opaque, transp=1 means transparent. + # So transp = 1 - alpha. + try: + combined_rgba = get_pass_rgba(channels, "Combined") + alpha = combined_rgba[..., 3] + except KeyError: + alpha = np.ones((height, width), dtype=np.float32) + transp = 1.0 - np.clip(alpha, 0.0, 1.0) + transp_u8 = (transp * 255.0).astype(np.uint8) + Image.fromarray(transp_u8, mode="L").save( + os.path.join(output_dir, "transp.png") + ) + + # ---- target (Combined beauty pass → RGBA uint8, gamma-correct) ---- + try: + combined_rgba = get_pass_rgba(channels, "Combined") + # Convert linear → sRGB for display (RGB channels only) + c_rgb = np.power(np.clip(combined_rgba[..., :3], 0, 1), 1.0 / 2.2) + c_alpha = combined_rgba[..., 3:4] + target_lin = np.concatenate([c_rgb, c_alpha], axis=-1) + target_u8 = (np.clip(target_lin, 0, 1) * 255.0).astype(np.uint8) + except KeyError: + print(" WARNING: Combined pass not found; target will be zeros.") + target_u8 = np.zeros((height, width, 4), dtype=np.uint8) + Image.fromarray(target_u8, mode="RGBA").save( + os.path.join(output_dir, "target.png") + ) + + print(f"[pack_blender_sample] Wrote sample to {output_dir}") + print(" Files: albedo.png normal.png depth.png matid.png " + "shadow.png transp.png target.png") + print(" Note: depth_grad, mip1, mip2 are computed on-the-fly by the dataloader.") + + +def main(): + parser = argparse.ArgumentParser( + description="Pack a Blender multi-layer EXR into CNN v3 training sample files." + ) + parser.add_argument("--exr", required=True, help="Input multi-layer EXR file") + parser.add_argument("--output", required=True, help="Output directory for sample files") + args = parser.parse_args() + pack_blender_sample(args.exr, args.output) + + +if __name__ == "__main__": + main() diff --git a/cnn_v3/training/pack_photo_sample.py b/cnn_v3/training/pack_photo_sample.py new file mode 100644 index 0000000..b2943fb --- /dev/null +++ b/cnn_v3/training/pack_photo_sample.py @@ -0,0 +1,148 @@ +""" +Pack a photo into CNN v3 simple training sample files. + +Converts a single RGB or RGBA photo into the CNN v3 sample layout. +Geometric channels (normal, depth, matid) are zeroed; the network +degrades gracefully due to channel-dropout training. + +Output files: + albedo.png — RGB uint8 (photo RGB) + normal.png — RG uint8 (zero — no geometry data) + depth.png — R uint16 (zero — no depth data) + matid.png — R uint8 (zero — no material data) + shadow.png — R uint8 (255 = fully lit — assume unoccluded) + transp.png — R uint8 (1 - alpha, or 0 if no alpha channel) + target.png — RGB/RGBA (= albedo; no ground-truth styled target) + +mip1 and mip2 are computed on-the-fly by the dataloader from albedo. +prev = zero during training (no temporal history). + +Usage: + python3 pack_photo_sample.py --photo photos/img_001.png \\ + --output dataset/simple/sample_001/ + +Dependencies: + numpy, Pillow +""" + +import argparse +import os +import numpy as np +from PIL import Image + + +# ---- Mip computation ---- + +def pyrdown(img: np.ndarray) -> np.ndarray: + """ + 2×2 average pooling (half resolution). + Args: + img: (H, W, C) float32 in [0, 1]. + Returns: + (H//2, W//2, C) float32. + """ + h, w, c = img.shape + h2, w2 = h // 2, w // 2 + # Crop to even dimensions + cropped = img[:h2 * 2, :w2 * 2, :] + # Reshape and average + return 0.25 * ( + cropped[0::2, 0::2, :] + + cropped[1::2, 0::2, :] + + cropped[0::2, 1::2, :] + + cropped[1::2, 1::2, :] + ) + + +# ---- Main packing ---- + +def pack_photo_sample(photo_path: str, output_dir: str) -> None: + os.makedirs(output_dir, exist_ok=True) + + print(f"[pack_photo_sample] Loading {photo_path} …") + img = Image.open(photo_path).convert("RGBA") + width, height = img.size + print(f" Dimensions: {width}×{height}") + + img_np = np.asarray(img, dtype=np.float32) / 255.0 # (H, W, 4) in [0, 1] + rgb = img_np[..., :3] # (H, W, 3) + alpha = img_np[..., 3] # (H, W) + + # ---- albedo — photo RGB ---- + albedo_u8 = (np.clip(rgb, 0, 1) * 255.0).astype(np.uint8) + Image.fromarray(albedo_u8, mode="RGB").save( + os.path.join(output_dir, "albedo.png") + ) + + # ---- normal — zero (no geometry) ---- + normal_zeros = np.zeros((height, width, 3), dtype=np.uint8) + # Encode "no normal" as (0.5, 0.5) in octahedral space → (128, 128) + # This maps to oct = (0, 0) → reconstructed normal = (0, 0, 1) (pointing forward) + normal_zeros[..., 0] = 128 + normal_zeros[..., 1] = 128 + Image.fromarray(normal_zeros, mode="RGB").save( + os.path.join(output_dir, "normal.png") + ) + + # ---- depth — zero ---- + depth_zero = np.zeros((height, width), dtype=np.uint16) + Image.fromarray(depth_zero, mode="I;16").save( + os.path.join(output_dir, "depth.png") + ) + + # ---- matid — zero ---- + matid_zero = np.zeros((height, width), dtype=np.uint8) + Image.fromarray(matid_zero, mode="L").save( + os.path.join(output_dir, "matid.png") + ) + + # ---- shadow — 255 (fully lit, assume unoccluded) ---- + shadow_full = np.full((height, width), 255, dtype=np.uint8) + Image.fromarray(shadow_full, mode="L").save( + os.path.join(output_dir, "shadow.png") + ) + + # ---- transp — 1 - alpha (0=opaque, 1=transparent) ---- + # If the photo has no meaningful alpha, this is zero everywhere. + transp = 1.0 - np.clip(alpha, 0.0, 1.0) + transp_u8 = (transp * 255.0).astype(np.uint8) + Image.fromarray(transp_u8, mode="L").save( + os.path.join(output_dir, "transp.png") + ) + + # ---- target — albedo (= photo; no GT styled target) ---- + # Store as RGBA (keep alpha for potential masking by the dataloader). + target_u8 = (np.clip(img_np, 0, 1) * 255.0).astype(np.uint8) + Image.fromarray(target_u8, mode="RGBA").save( + os.path.join(output_dir, "target.png") + ) + + # ---- mip1 / mip2 — informational only, not saved ---- + # The dataloader computes mip1/mip2 on-the-fly from albedo. + # Verify they look reasonable here for debugging. + mip1 = pyrdown(rgb) + mip2 = pyrdown(mip1) + print(f" mip1: {mip1.shape[1]}×{mip1.shape[0]} " + f"mip2: {mip2.shape[1]}×{mip2.shape[0]} (computed on-the-fly)") + + print(f"[pack_photo_sample] Wrote sample to {output_dir}") + print(" Files: albedo.png normal.png depth.png matid.png " + "shadow.png transp.png target.png") + print(" Note: normal/depth/matid are zeroed (no geometry data).") + print(" Note: target = albedo (no ground-truth styled target).") + + +def main(): + parser = argparse.ArgumentParser( + description="Pack a photo into CNN v3 simple training sample files." + ) + parser.add_argument("--photo", required=True, + help="Input photo file (RGB or RGBA PNG/JPG)") + parser.add_argument("--output", required=True, + help="Output directory for sample files") + args = parser.parse_args() + pack_photo_sample(args.photo, args.output) + + +if __name__ == "__main__": + main() |
