diff options
| author | skal <pascal.massimino@gmail.com> | 2026-03-22 10:48:50 +0100 |
|---|---|---|
| committer | skal <pascal.massimino@gmail.com> | 2026-03-22 10:48:50 +0100 |
| commit | e323d793e76dec7576546068f9f4c7d8d8a1d34d (patch) | |
| tree | aa84934a39bcf7d65b642e5460048edd52bec83c | |
| parent | 4c7ca0f06eb180f4a83641466014865c334e8987 (diff) | |
fix(cnn_v3): resize target to albedo dims when sizes differ
target.png can have a different resolution than albedo.png in simple
samples; patch slicing into the smaller target produced 0×0 tensors,
crashing torch.stack in the DataLoader collate.
handoff(Gemini): target resized in _load_sample (LANCZOS) + note in HOW_TO_CNN §1c.
| -rw-r--r-- | cnn_v3/docs/HOW_TO_CNN.md | 1 | ||||
| -rw-r--r-- | cnn_v3/training/cnn_v3_utils.py | 8 |
2 files changed, 6 insertions, 3 deletions
diff --git a/cnn_v3/docs/HOW_TO_CNN.md b/cnn_v3/docs/HOW_TO_CNN.md index bb6f7a7..56ee101 100644 --- a/cnn_v3/docs/HOW_TO_CNN.md +++ b/cnn_v3/docs/HOW_TO_CNN.md @@ -274,6 +274,7 @@ dataset/ - If `simple/` or `full/` subdir is absent the dataloader scans the root directly - Minimum viable dataset: 1 sample (smoke test only); practical minimum ~50+ for training - You can mix Blender and photo samples in the same subdir; the dataloader treats them identically +- `target.png` may differ in resolution from `albedo.png` — the dataloader resizes it to match albedo automatically (LANCZOS) --- diff --git a/cnn_v3/training/cnn_v3_utils.py b/cnn_v3/training/cnn_v3_utils.py index 8da276e..5b43a4d 100644 --- a/cnn_v3/training/cnn_v3_utils.py +++ b/cnn_v3/training/cnn_v3_utils.py @@ -273,9 +273,11 @@ class CNNv3Dataset(Dataset): matid = load_gray(sd / 'matid.png') shadow = load_gray(sd / 'shadow.png') transp = load_gray(sd / 'transp.png') - target = np.asarray( - Image.open(sd / 'target.png').convert('RGBA'), - dtype=np.float32) / 255.0 + h, w = albedo.shape[:2] + target_img = Image.open(sd / 'target.png').convert('RGBA') + if target_img.size != (w, h): + target_img = target_img.resize((w, h), Image.LANCZOS) + target = np.asarray(target_img, dtype=np.float32) / 255.0 return albedo, normal, depth, matid, shadow, transp, target def __getitem__(self, idx): |
