1 files changed, 148 insertions, 0 deletions
diff --git a/cnn_v3/training/pack_photo_sample.py b/cnn_v3/training/pack_photo_sample.py
new file mode 100644
index 0000000..b2943fb
--- /dev/null
+++ b/cnn_v3/training/pack_photo_sample.py
@@ -0,0 +1,148 @@
+"""
+Pack a photo into CNN v3 simple training sample files.
+
+Converts a single RGB or RGBA photo into the CNN v3 sample layout.
+Geometric channels (normal, depth, matid) are zeroed; the network
+degrades gracefully due to channel-dropout training.
+
+Output files:
+    albedo.png    — RGB uint8   (photo RGB)
+    normal.png    — RG uint8    (zero — no geometry data)
+    depth.png     — R uint16    (zero — no depth data)
+    matid.png     — R uint8     (zero — no material data)
+    shadow.png    — R uint8     (255 = fully lit — assume unoccluded)
+    transp.png    — R uint8     (1 - alpha, or 0 if no alpha channel)
+    target.png    — RGB/RGBA    (= albedo; no ground-truth styled target)
+
+mip1 and mip2 are computed on-the-fly by the dataloader from albedo.
+prev = zero during training (no temporal history).
+
+Usage:
+    python3 pack_photo_sample.py --photo photos/img_001.png \\
+                                 --output dataset/simple/sample_001/
+
+Dependencies:
+    numpy, Pillow
+"""
+
+import argparse
+import os
+import numpy as np
+from PIL import Image
+
+
+# ---- Mip computation ----
+
+def pyrdown(img: np.ndarray) -> np.ndarray:
+    """
+    2×2 average pooling (half resolution).
+    Args:
+        img: (H, W, C) float32 in [0, 1].
+    Returns:
+        (H//2, W//2, C) float32.
+    """
+    h, w, c = img.shape
+    h2, w2 = h // 2, w // 2
+    # Crop to even dimensions
+    cropped = img[:h2 * 2, :w2 * 2, :]
+    # Reshape and average
+    return 0.25 * (
+        cropped[0::2, 0::2, :] +
+        cropped[1::2, 0::2, :] +
+        cropped[0::2, 1::2, :] +
+        cropped[1::2, 1::2, :]
+    )
+
+
+# ---- Main packing ----
+
+def pack_photo_sample(photo_path: str, output_dir: str) -> None:
+    os.makedirs(output_dir, exist_ok=True)
+
+    print(f"[pack_photo_sample] Loading {photo_path} …")
+    img = Image.open(photo_path).convert("RGBA")
+    width, height = img.size
+    print(f"  Dimensions: {width}×{height}")
+
+    img_np = np.asarray(img, dtype=np.float32) / 255.0  # (H, W, 4) in [0, 1]
+    rgb  = img_np[..., :3]   # (H, W, 3)
+    alpha = img_np[..., 3]   # (H, W)
+
+    # ---- albedo — photo RGB ----
+    albedo_u8 = (np.clip(rgb, 0, 1) * 255.0).astype(np.uint8)
+    Image.fromarray(albedo_u8, mode="RGB").save(
+        os.path.join(output_dir, "albedo.png")
+    )
+
+    # ---- normal — zero (no geometry) ----
+    normal_zeros = np.zeros((height, width, 3), dtype=np.uint8)
+    # Encode "no normal" as (0.5, 0.5) in octahedral space → (128, 128)
+    # This maps to oct = (0, 0) → reconstructed normal = (0, 0, 1) (pointing forward)
+    normal_zeros[..., 0] = 128
+    normal_zeros[..., 1] = 128
+    Image.fromarray(normal_zeros, mode="RGB").save(
+        os.path.join(output_dir, "normal.png")
+    )
+
+    # ---- depth — zero ----
+    depth_zero = np.zeros((height, width), dtype=np.uint16)
+    Image.fromarray(depth_zero, mode="I;16").save(
+        os.path.join(output_dir, "depth.png")
+    )
+
+    # ---- matid — zero ----
+    matid_zero = np.zeros((height, width), dtype=np.uint8)
+    Image.fromarray(matid_zero, mode="L").save(
+        os.path.join(output_dir, "matid.png")
+    )
+
+    # ---- shadow — 255 (fully lit, assume unoccluded) ----
+    shadow_full = np.full((height, width), 255, dtype=np.uint8)
+    Image.fromarray(shadow_full, mode="L").save(
+        os.path.join(output_dir, "shadow.png")
+    )
+
+    # ---- transp — 1 - alpha (0=opaque, 1=transparent) ----
+    # If the photo has no meaningful alpha, this is zero everywhere.
+    transp = 1.0 - np.clip(alpha, 0.0, 1.0)
+    transp_u8 = (transp * 255.0).astype(np.uint8)
+    Image.fromarray(transp_u8, mode="L").save(
+        os.path.join(output_dir, "transp.png")
+    )
+
+    # ---- target — albedo (= photo; no GT styled target) ----
+    # Store as RGBA (keep alpha for potential masking by the dataloader).
+    target_u8 = (np.clip(img_np, 0, 1) * 255.0).astype(np.uint8)
+    Image.fromarray(target_u8, mode="RGBA").save(
+        os.path.join(output_dir, "target.png")
+    )
+
+    # ---- mip1 / mip2 — informational only, not saved ----
+    # The dataloader computes mip1/mip2 on-the-fly from albedo.
+    # Verify they look reasonable here for debugging.
+    mip1 = pyrdown(rgb)
+    mip2 = pyrdown(mip1)
+    print(f"  mip1: {mip1.shape[1]}×{mip1.shape[0]}  "
+          f"mip2: {mip2.shape[1]}×{mip2.shape[0]}  (computed on-the-fly)")
+
+    print(f"[pack_photo_sample] Wrote sample to {output_dir}")
+    print("  Files: albedo.png  normal.png  depth.png  matid.png  "
+          "shadow.png  transp.png  target.png")
+    print("  Note: normal/depth/matid are zeroed (no geometry data).")
+    print("  Note: target = albedo (no ground-truth styled target).")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Pack a photo into CNN v3 simple training sample files."
+    )
+    parser.add_argument("--photo",  required=True,
+                        help="Input photo file (RGB or RGBA PNG/JPG)")
+    parser.add_argument("--output", required=True,
+                        help="Output directory for sample files")
+    args = parser.parse_args()
+    pack_photo_sample(args.photo, args.output)
+
+
+if __name__ == "__main__":
+    main()