diff options
Diffstat (limited to 'cnn_v3/training/pack_photo_sample.py')
| -rw-r--r-- | cnn_v3/training/pack_photo_sample.py | 148 |
1 files changed, 148 insertions, 0 deletions
diff --git a/cnn_v3/training/pack_photo_sample.py b/cnn_v3/training/pack_photo_sample.py new file mode 100644 index 0000000..b2943fb --- /dev/null +++ b/cnn_v3/training/pack_photo_sample.py @@ -0,0 +1,148 @@ +""" +Pack a photo into CNN v3 simple training sample files. + +Converts a single RGB or RGBA photo into the CNN v3 sample layout. +Geometric channels (normal, depth, matid) are zeroed; the network +degrades gracefully due to channel-dropout training. + +Output files: + albedo.png — RGB uint8 (photo RGB) + normal.png — RG uint8 (zero — no geometry data) + depth.png — R uint16 (zero — no depth data) + matid.png — R uint8 (zero — no material data) + shadow.png — R uint8 (255 = fully lit — assume unoccluded) + transp.png — R uint8 (1 - alpha, or 0 if no alpha channel) + target.png — RGB/RGBA (= albedo; no ground-truth styled target) + +mip1 and mip2 are computed on-the-fly by the dataloader from albedo. +prev = zero during training (no temporal history). + +Usage: + python3 pack_photo_sample.py --photo photos/img_001.png \\ + --output dataset/simple/sample_001/ + +Dependencies: + numpy, Pillow +""" + +import argparse +import os +import numpy as np +from PIL import Image + + +# ---- Mip computation ---- + +def pyrdown(img: np.ndarray) -> np.ndarray: + """ + 2×2 average pooling (half resolution). + Args: + img: (H, W, C) float32 in [0, 1]. + Returns: + (H//2, W//2, C) float32. + """ + h, w, c = img.shape + h2, w2 = h // 2, w // 2 + # Crop to even dimensions + cropped = img[:h2 * 2, :w2 * 2, :] + # Reshape and average + return 0.25 * ( + cropped[0::2, 0::2, :] + + cropped[1::2, 0::2, :] + + cropped[0::2, 1::2, :] + + cropped[1::2, 1::2, :] + ) + + +# ---- Main packing ---- + +def pack_photo_sample(photo_path: str, output_dir: str) -> None: + os.makedirs(output_dir, exist_ok=True) + + print(f"[pack_photo_sample] Loading {photo_path} …") + img = Image.open(photo_path).convert("RGBA") + width, height = img.size + print(f" Dimensions: {width}×{height}") + + img_np = np.asarray(img, dtype=np.float32) / 255.0 # (H, W, 4) in [0, 1] + rgb = img_np[..., :3] # (H, W, 3) + alpha = img_np[..., 3] # (H, W) + + # ---- albedo — photo RGB ---- + albedo_u8 = (np.clip(rgb, 0, 1) * 255.0).astype(np.uint8) + Image.fromarray(albedo_u8, mode="RGB").save( + os.path.join(output_dir, "albedo.png") + ) + + # ---- normal — zero (no geometry) ---- + normal_zeros = np.zeros((height, width, 3), dtype=np.uint8) + # Encode "no normal" as (0.5, 0.5) in octahedral space → (128, 128) + # This maps to oct = (0, 0) → reconstructed normal = (0, 0, 1) (pointing forward) + normal_zeros[..., 0] = 128 + normal_zeros[..., 1] = 128 + Image.fromarray(normal_zeros, mode="RGB").save( + os.path.join(output_dir, "normal.png") + ) + + # ---- depth — zero ---- + depth_zero = np.zeros((height, width), dtype=np.uint16) + Image.fromarray(depth_zero, mode="I;16").save( + os.path.join(output_dir, "depth.png") + ) + + # ---- matid — zero ---- + matid_zero = np.zeros((height, width), dtype=np.uint8) + Image.fromarray(matid_zero, mode="L").save( + os.path.join(output_dir, "matid.png") + ) + + # ---- shadow — 255 (fully lit, assume unoccluded) ---- + shadow_full = np.full((height, width), 255, dtype=np.uint8) + Image.fromarray(shadow_full, mode="L").save( + os.path.join(output_dir, "shadow.png") + ) + + # ---- transp — 1 - alpha (0=opaque, 1=transparent) ---- + # If the photo has no meaningful alpha, this is zero everywhere. + transp = 1.0 - np.clip(alpha, 0.0, 1.0) + transp_u8 = (transp * 255.0).astype(np.uint8) + Image.fromarray(transp_u8, mode="L").save( + os.path.join(output_dir, "transp.png") + ) + + # ---- target — albedo (= photo; no GT styled target) ---- + # Store as RGBA (keep alpha for potential masking by the dataloader). + target_u8 = (np.clip(img_np, 0, 1) * 255.0).astype(np.uint8) + Image.fromarray(target_u8, mode="RGBA").save( + os.path.join(output_dir, "target.png") + ) + + # ---- mip1 / mip2 — informational only, not saved ---- + # The dataloader computes mip1/mip2 on-the-fly from albedo. + # Verify they look reasonable here for debugging. + mip1 = pyrdown(rgb) + mip2 = pyrdown(mip1) + print(f" mip1: {mip1.shape[1]}×{mip1.shape[0]} " + f"mip2: {mip2.shape[1]}×{mip2.shape[0]} (computed on-the-fly)") + + print(f"[pack_photo_sample] Wrote sample to {output_dir}") + print(" Files: albedo.png normal.png depth.png matid.png " + "shadow.png transp.png target.png") + print(" Note: normal/depth/matid are zeroed (no geometry data).") + print(" Note: target = albedo (no ground-truth styled target).") + + +def main(): + parser = argparse.ArgumentParser( + description="Pack a photo into CNN v3 simple training sample files." + ) + parser.add_argument("--photo", required=True, + help="Input photo file (RGB or RGBA PNG/JPG)") + parser.add_argument("--output", required=True, + help="Output directory for sample files") + args = parser.parse_args() + pack_photo_sample(args.photo, args.output) + + +if __name__ == "__main__": + main() |
