""" Pack a photo into CNN v3 simple training sample files. Converts a single RGB or RGBA photo into the CNN v3 sample layout. Geometric channels (normal, depth, matid) are zeroed; the network degrades gracefully due to channel-dropout training. Output files: albedo.png — RGB uint8 (photo RGB) normal.png — RG uint8 (zero — no geometry data) depth.png — R uint16 (zero — no depth data) matid.png — R uint8 (zero — no material data) shadow.png — R uint8 (255 = fully lit — assume unoccluded) transp.png — R uint8 (1 - alpha, or 0 if no alpha channel) target.png — RGB/RGBA (= albedo; no ground-truth styled target) mip1 and mip2 are computed on-the-fly by the dataloader from albedo. prev = zero during training (no temporal history). Usage: python3 pack_photo_sample.py --photo input/photo1.jpg \\ --target target_1/photo1_out.png \\ --output dataset/simple/sample_001/ Dependencies: numpy, Pillow """ import argparse import os import numpy as np from PIL import Image # ---- Mip computation ---- def pyrdown(img: np.ndarray) -> np.ndarray: """ 2×2 average pooling (half resolution). Args: img: (H, W, C) float32 in [0, 1]. Returns: (H//2, W//2, C) float32. """ h, w, c = img.shape h2, w2 = h // 2, w // 2 # Crop to even dimensions cropped = img[:h2 * 2, :w2 * 2, :] # Reshape and average return 0.25 * ( cropped[0::2, 0::2, :] + cropped[1::2, 0::2, :] + cropped[0::2, 1::2, :] + cropped[1::2, 1::2, :] ) # ---- Main packing ---- def pack_photo_sample(photo_path: str, target_path: str, output_dir: str) -> None: os.makedirs(output_dir, exist_ok=True) print(f"[pack_photo_sample] Loading {photo_path} …") img = Image.open(photo_path).convert("RGBA") width, height = img.size print(f" Dimensions: {width}×{height}") img_np = np.asarray(img, dtype=np.float32) / 255.0 # (H, W, 4) in [0, 1] rgb = img_np[..., :3] # (H, W, 3) alpha = img_np[..., 3] # (H, W) # ---- albedo — photo RGB ---- albedo_u8 = (np.clip(rgb, 0, 1) * 255.0).astype(np.uint8) Image.fromarray(albedo_u8, mode="RGB").save( os.path.join(output_dir, "albedo.png") ) # ---- normal — zero (no geometry) ---- normal_zeros = np.zeros((height, width, 3), dtype=np.uint8) # Encode "no normal" as (0.5, 0.5) in octahedral space → (128, 128) # This maps to oct = (0, 0) → reconstructed normal = (0, 0, 1) (pointing forward) normal_zeros[..., 0] = 128 normal_zeros[..., 1] = 128 Image.fromarray(normal_zeros, mode="RGB").save( os.path.join(output_dir, "normal.png") ) # ---- depth — zero ---- depth_zero = np.zeros((height, width), dtype=np.uint16) Image.fromarray(depth_zero, mode="I;16").save( os.path.join(output_dir, "depth.png") ) # ---- matid — zero ---- matid_zero = np.zeros((height, width), dtype=np.uint8) Image.fromarray(matid_zero, mode="L").save( os.path.join(output_dir, "matid.png") ) # ---- shadow — 255 (fully lit, assume unoccluded) ---- shadow_full = np.full((height, width), 255, dtype=np.uint8) Image.fromarray(shadow_full, mode="L").save( os.path.join(output_dir, "shadow.png") ) # ---- transp — 1 - alpha (0=opaque, 1=transparent) ---- # If the photo has no meaningful alpha, this is zero everywhere. transp = 1.0 - np.clip(alpha, 0.0, 1.0) transp_u8 = (transp * 255.0).astype(np.uint8) Image.fromarray(transp_u8, mode="L").save( os.path.join(output_dir, "transp.png") ) # ---- target — styled ground truth ---- print(f" Loading target {target_path} …") target_img = Image.open(target_path).convert("RGBA") target_u8 = np.asarray(target_img, dtype=np.uint8) Image.fromarray(target_u8, mode="RGBA").save( os.path.join(output_dir, "target.png") ) # ---- mip1 / mip2 — informational only, not saved ---- # The dataloader computes mip1/mip2 on-the-fly from albedo. # Verify they look reasonable here for debugging. mip1 = pyrdown(rgb) mip2 = pyrdown(mip1) print(f" mip1: {mip1.shape[1]}×{mip1.shape[0]} " f"mip2: {mip2.shape[1]}×{mip2.shape[0]} (computed on-the-fly)") print(f"[pack_photo_sample] Wrote sample to {output_dir}") print(" Files: albedo.png normal.png depth.png matid.png " "shadow.png transp.png target.png") print(" Note: normal/depth/matid are zeroed (no geometry data).") def main(): parser = argparse.ArgumentParser( description="Pack a photo into CNN v3 simple training sample files." ) parser.add_argument("--photo", required=True, help="Input photo file (RGB or RGBA PNG/JPG)") parser.add_argument("--target", required=True, help="Styled ground-truth image (PNG/JPG)") parser.add_argument("--output", required=True, help="Output directory for sample files") args = parser.parse_args() pack_photo_sample(args.photo, args.target, args.output) if __name__ == "__main__": main()