"""
Pack a photo into CNN v3 simple training sample files.

Converts a single RGB or RGBA photo into the CNN v3 sample layout.
Geometric channels (normal, depth, matid) are zeroed; the network
degrades gracefully due to channel-dropout training.

Output files:
    albedo.png    — RGB uint8   (photo RGB)
    normal.png    — RG uint8    (zero — no geometry data)
    depth.png     — R uint16    (zero — no depth data)
    matid.png     — R uint8     (zero — no material data)
    shadow.png    — R uint8     (255 = fully lit — assume unoccluded)
    transp.png    — R uint8     (1 - alpha, or 0 if no alpha channel)
    target.png    — RGB/RGBA    (= albedo; no ground-truth styled target)

mip1 and mip2 are computed on-the-fly by the dataloader from albedo.
prev = zero during training (no temporal history).

Usage:
    python3 pack_photo_sample.py --photo input/photo1.jpg \\
                                 --target target_1/photo1_out.png \\
                                 --output dataset/simple/sample_001/

Dependencies:
    numpy, Pillow
"""

import argparse
import os
import numpy as np
from PIL import Image


# ---- Mip computation ----

def pyrdown(img: np.ndarray) -> np.ndarray:
    """
    2×2 average pooling (half resolution).
    Args:
        img: (H, W, C) float32 in [0, 1].
    Returns:
        (H//2, W//2, C) float32.
    """
    h, w, c = img.shape
    h2, w2 = h // 2, w // 2
    # Crop to even dimensions
    cropped = img[:h2 * 2, :w2 * 2, :]
    # Reshape and average
    return 0.25 * (
        cropped[0::2, 0::2, :] +
        cropped[1::2, 0::2, :] +
        cropped[0::2, 1::2, :] +
        cropped[1::2, 1::2, :]
    )


# ---- Main packing ----

def pack_photo_sample(photo_path: str, target_path: str, output_dir: str) -> None:
    os.makedirs(output_dir, exist_ok=True)

    print(f"[pack_photo_sample] Loading {photo_path} …")
    img = Image.open(photo_path).convert("RGBA")
    width, height = img.size
    print(f"  Dimensions: {width}×{height}")

    img_np = np.asarray(img, dtype=np.float32) / 255.0  # (H, W, 4) in [0, 1]
    rgb  = img_np[..., :3]   # (H, W, 3)
    alpha = img_np[..., 3]   # (H, W)

    # ---- albedo — photo RGB ----
    albedo_u8 = (np.clip(rgb, 0, 1) * 255.0).astype(np.uint8)
    Image.fromarray(albedo_u8, mode="RGB").save(
        os.path.join(output_dir, "albedo.png")
    )

    # ---- normal — zero (no geometry) ----
    normal_zeros = np.zeros((height, width, 3), dtype=np.uint8)
    # Encode "no normal" as (0.5, 0.5) in octahedral space → (128, 128)
    # This maps to oct = (0, 0) → reconstructed normal = (0, 0, 1) (pointing forward)
    normal_zeros[..., 0] = 128
    normal_zeros[..., 1] = 128
    Image.fromarray(normal_zeros, mode="RGB").save(
        os.path.join(output_dir, "normal.png")
    )

    # ---- depth — zero ----
    depth_zero = np.zeros((height, width), dtype=np.uint16)
    Image.fromarray(depth_zero, mode="I;16").save(
        os.path.join(output_dir, "depth.png")
    )

    # ---- matid — zero ----
    matid_zero = np.zeros((height, width), dtype=np.uint8)
    Image.fromarray(matid_zero, mode="L").save(
        os.path.join(output_dir, "matid.png")
    )

    # ---- shadow — 255 (fully lit, assume unoccluded) ----
    shadow_full = np.full((height, width), 255, dtype=np.uint8)
    Image.fromarray(shadow_full, mode="L").save(
        os.path.join(output_dir, "shadow.png")
    )

    # ---- transp — 1 - alpha (0=opaque, 1=transparent) ----
    # If the photo has no meaningful alpha, this is zero everywhere.
    transp = 1.0 - np.clip(alpha, 0.0, 1.0)
    transp_u8 = (transp * 255.0).astype(np.uint8)
    Image.fromarray(transp_u8, mode="L").save(
        os.path.join(output_dir, "transp.png")
    )

    # ---- target — styled ground truth ----
    print(f"  Loading target {target_path} …")
    target_img = Image.open(target_path).convert("RGBA")
    target_u8 = np.asarray(target_img, dtype=np.uint8)
    Image.fromarray(target_u8, mode="RGBA").save(
        os.path.join(output_dir, "target.png")
    )

    # ---- mip1 / mip2 — informational only, not saved ----
    # The dataloader computes mip1/mip2 on-the-fly from albedo.
    # Verify they look reasonable here for debugging.
    mip1 = pyrdown(rgb)
    mip2 = pyrdown(mip1)
    print(f"  mip1: {mip1.shape[1]}×{mip1.shape[0]}  "
          f"mip2: {mip2.shape[1]}×{mip2.shape[0]}  (computed on-the-fly)")

    print(f"[pack_photo_sample] Wrote sample to {output_dir}")
    print("  Files: albedo.png  normal.png  depth.png  matid.png  "
          "shadow.png  transp.png  target.png")
    print("  Note: normal/depth/matid are zeroed (no geometry data).")


def main():
    parser = argparse.ArgumentParser(
        description="Pack a photo into CNN v3 simple training sample files."
    )
    parser.add_argument("--photo",  required=True,
                        help="Input photo file (RGB or RGBA PNG/JPG)")
    parser.add_argument("--target", required=True,
                        help="Styled ground-truth image (PNG/JPG)")
    parser.add_argument("--output", required=True,
                        help="Output directory for sample files")
    args = parser.parse_args()
    pack_photo_sample(args.photo, args.target, args.output)


if __name__ == "__main__":
    main()