From fb13e67acbc7d7dd2974a456fcb134966c47cee0 Mon Sep 17 00:00:00 2001
From: skal <pascal.massimino@gmail.com>
Date: Fri, 27 Mar 2026 07:59:00 +0100
Subject: fix(cnn_v3): remove dec0 ReLU, load FiLM MLP at runtime
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two bugs blocking training convergence:

1. dec0 ReLU before sigmoid constrained output to [0.5,1.0] — network
   could never produce dark pixels. Removed F.relu in train_cnn_v3.py
   and max(0,…) in cnn_v3_dec0.wgsl. Test vectors regenerated.

2. set_film_params() used hardcoded heuristics instead of the trained MLP.
   Added CNNv3FilmMlp struct + load_film_mlp() to cnn_v3_effect.h/.cc.
   MLP auto-loaded from ASSET_WEIGHTS_CNN_V3_FILM_MLP at construction;
   Linear(5→16)→ReLU→Linear(16→72) runs CPU-side each frame.

36/36 tests pass. Parity max_err=4.88e-4 unchanged.

handoff(Gemini): retrain from scratch — needs ≥50 samples (currently 11).
See cnn_v3/docs/HOWTO.md §2-3.
---
 cnn_v3/training/train_cnn_v3.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'cnn_v3/training/train_cnn_v3.py')

diff --git a/cnn_v3/training/train_cnn_v3.py b/cnn_v3/training/train_cnn_v3.py
index e48f684..fa0d2e2 100644
--- a/cnn_v3/training/train_cnn_v3.py
+++ b/cnn_v3/training/train_cnn_v3.py
@@ -10,8 +10,7 @@ Architecture (enc_channels=[8,16]):
   enc1        Conv(8→16,  3×3) + FiLM + ReLU + pool2        H/2×W/2  2× rgba32uint (16ch split)
   bottleneck  Conv(16→16, 3×3, dilation=2) + ReLU           H/4×W/4  2× rgba32uint (16ch split)
   dec1        upsample×2 + cat(enc1) Conv(32→8)  + FiLM     H/2×W/2  rgba32uint (8ch)
-  dec0        upsample×2 + cat(enc0) Conv(16→4)  + FiLM     H×W      rgba16float (4ch)
-  output      sigmoid → RGBA
+  dec0        upsample×2 + cat(enc0) Conv(16→4)  + FiLM + sigmoid H×W  rgba16float (4ch)
 
 FiLM MLP: Linear(5→16) → ReLU → Linear(16→72)
   72 = 2 × (γ+β) for enc0(8) enc1(16) dec1(8) dec0(4)
@@ -93,9 +92,9 @@ class CNNv3(nn.Module):
             torch.cat([F.interpolate(x, scale_factor=2, mode='nearest'), skip1], dim=1)
         ), gd1, bd1))
 
-        x = F.relu(film_apply(self.dec0(
+        x = film_apply(self.dec0(
             torch.cat([F.interpolate(x, scale_factor=2, mode='nearest'), skip0], dim=1)
-        ), gd0, bd0))
+        ), gd0, bd0)
 
         return torch.sigmoid(x)
 
-- 
cgit v1.2.3