From fcd1ceb2168de76659091208f911d2c4d0c59ad7 Mon Sep 17 00:00:00 2001 From: skal Date: Sun, 22 Mar 2026 07:33:28 +0100 Subject: feat(cnn_v3): gen_sample tool + 7 simple training samples MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - pack_photo_sample.py: --target now required (no albedo fallback) - gen_sample: bash wrapper with positional args (input target output_dir) - input/photo7.jpg: copy of photo2 (second style target) - target_1: photo2_1_out→photo2_out, photo2_2_out→photo7_out - dataset/simple/sample_001..007: 7 packed photo/target pairs handoff(Gemini): training data ready; next step is train_cnn_v3.py run --- cnn_v3/training/pack_photo_sample.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'cnn_v3/training/pack_photo_sample.py') diff --git a/cnn_v3/training/pack_photo_sample.py b/cnn_v3/training/pack_photo_sample.py index b2943fb..ca80301 100644 --- a/cnn_v3/training/pack_photo_sample.py +++ b/cnn_v3/training/pack_photo_sample.py @@ -18,7 +18,8 @@ mip1 and mip2 are computed on-the-fly by the dataloader from albedo. prev = zero during training (no temporal history). Usage: - python3 pack_photo_sample.py --photo photos/img_001.png \\ + python3 pack_photo_sample.py --photo input/photo1.jpg \\ + --target target_1/photo1_out.png \\ --output dataset/simple/sample_001/ Dependencies: @@ -56,7 +57,7 @@ def pyrdown(img: np.ndarray) -> np.ndarray: # ---- Main packing ---- -def pack_photo_sample(photo_path: str, output_dir: str) -> None: +def pack_photo_sample(photo_path: str, target_path: str, output_dir: str) -> None: os.makedirs(output_dir, exist_ok=True) print(f"[pack_photo_sample] Loading {photo_path} …") @@ -110,9 +111,10 @@ def pack_photo_sample(photo_path: str, output_dir: str) -> None: os.path.join(output_dir, "transp.png") ) - # ---- target — albedo (= photo; no GT styled target) ---- - # Store as RGBA (keep alpha for potential masking by the dataloader). - target_u8 = (np.clip(img_np, 0, 1) * 255.0).astype(np.uint8) + # ---- target — styled ground truth ---- + print(f" Loading target {target_path} …") + target_img = Image.open(target_path).convert("RGBA") + target_u8 = np.asarray(target_img, dtype=np.uint8) Image.fromarray(target_u8, mode="RGBA").save( os.path.join(output_dir, "target.png") ) @@ -129,7 +131,6 @@ def pack_photo_sample(photo_path: str, output_dir: str) -> None: print(" Files: albedo.png normal.png depth.png matid.png " "shadow.png transp.png target.png") print(" Note: normal/depth/matid are zeroed (no geometry data).") - print(" Note: target = albedo (no ground-truth styled target).") def main(): @@ -138,10 +139,12 @@ def main(): ) parser.add_argument("--photo", required=True, help="Input photo file (RGB or RGBA PNG/JPG)") + parser.add_argument("--target", required=True, + help="Styled ground-truth image (PNG/JPG)") parser.add_argument("--output", required=True, help="Output directory for sample files") args = parser.parse_args() - pack_photo_sample(args.photo, args.output) + pack_photo_sample(args.photo, args.target, args.output) if __name__ == "__main__": -- cgit v1.2.3