3 files changed, 454 insertions, 11 deletions
diff --git a/cnn_v3/tools/index.html b/cnn_v3/tools/index.html
index eba532e..26fee9b 100644
--- a/cnn_v3/tools/index.html
+++ b/cnn_v3/tools/index.html
@@ -64,9 +64,11 @@ video{display:none}
   <div class="left">
     <input type="file" id="wFile"  accept=".bin" style="display:none">
     <input type="file" id="fFile"  accept=".bin" style="display:none">
+    <input type="file" id="sFile"  webkitdirectory style="display:none" onchange="tester.loadSampleDir(this.files)">
 
     <div class="dz" id="wDrop"  onclick="document.getElementById('wFile').click()">Drop cnn_v3_weights.bin</div>
     <div class="dz" id="fDrop"  onclick="document.getElementById('fFile').click()">Drop cnn_v3_film_mlp.bin (optional)</div>
+    <button onclick="tester.preload()" style="font-size:9px;margin-top:2px">↺ Reload from workspace weights/</button>
 
     <div class="panel">
       <div class="ph" onclick="togglePanel(this)">Input Mode <span>▼</span></div>
@@ -78,6 +80,10 @@ video{display:none}
         <div id="fullHelp" style="display:none;margin-top:6px;font-size:9px;color:#555;line-height:1.6">
           Drop PNGs: *albedo*/color · *normal* · *depth* · *matid*/index · *shadow* · *transp*/alpha
         </div>
+        <div style="margin-top:8px;border-top:1px solid #333;padding-top:8px">
+          <button onclick="document.getElementById('sFile').click()" style="width:100%">↑ Load sample directory</button>
+          <div id="sampleSt" style="font-size:9px;color:#555;margin-top:3px"></div>
+        </div>
       </div>
     </div>
 
@@ -120,15 +126,29 @@ video{display:none}
       <div class="sep"></div>
       <button onclick="tester.savePNG()">Save PNG</button>
     </div>
-    <canvas id="canvas"></canvas>
+    <div style="display:flex;gap:12px;align-items:flex-start">
+      <div style="display:flex;flex-direction:column;align-items:center;gap:3px">
+        <canvas id="canvas"></canvas>
+        <span id="cnnLabel" style="font-size:9px;color:#555"></span>
+      </div>
+      <div id="targetPane" style="display:none;flex-direction:column;align-items:center;gap:3px">
+        <canvas id="targetCanvas" style="max-width:100%;max-height:100%;image-rendering:pixelated;box-shadow:0 4px 12px rgba(0,0,0,.5)"></canvas>
+        <span style="font-size:9px;color:#555">target.png</span>
+        <span id="psnrSt" style="font-size:9px;color:#4a9eff"></span>
+      </div>
+    </div>
   </div>
 
   <div class="right">
     <div class="panel" style="flex:1;display:flex;flex-direction:column;min-height:0">
       <div class="ph">Layer Visualization</div>
-      <div class="pc" id="layerViz" style="flex:1;overflow:auto">
+      <div class="pc" id="layerViz" style="flex:1;min-height:0;overflow:auto">
         <p style="color:#444;text-align:center">Load image + weights</p>
       </div>
+      <div id="chzoomWrap" style="display:none;flex-direction:column;align-items:center;justify-content:center;gap:3px;padding:6px;border-top:1px solid #333;background:#1a1a1a;flex:1;min-height:0;overflow:hidden">
+        <span id="chzoomLbl" style="font-size:9px;color:#666;flex-shrink:0"></span>
+        <canvas id="chzoom" style="image-rendering:pixelated;display:block"></canvas>
+      </div>
     </div>
   </div>
 </div>
diff --git a/cnn_v3/tools/shaders.js b/cnn_v3/tools/shaders.js
index c3e994d..f178637 100644
--- a/cnn_v3/tools/shaders.js
+++ b/cnn_v3/tools/shaders.js
@@ -223,30 +223,85 @@ const DISP_SHADER=`
 }`;
 
 // Viz f32: show one channel of rgba16float layer
+// Uniform layout: ch(u32) _p(u32) ox(i32) oy(i32) — 16 bytes
+// ox/oy = texel offset (top-left of view); 0,0 for full-texture vignettes.
 const VIZ_F32=`
+struct Vu{ch:u32,_p:u32,ox:i32,oy:i32}
 @group(0) @binding(0) var t:texture_2d<f32>;
-@group(0) @binding(1) var<uniform> ch:u32;
+@group(0) @binding(1) var<uniform> u:Vu;
 @vertex fn vs(@builtin(vertex_index) i:u32)->@builtin(position) vec4f{
   var p=array<vec2f,6>(vec2f(-1.,-1.),vec2f(1.,-1.),vec2f(-1.,1.),vec2f(-1.,1.),vec2f(1.,-1.),vec2f(1.,1.));
   return vec4f(p[i],0.,1.);
 }
 @fragment fn fs(@builtin(position) pos:vec4f)->@location(0) vec4f{
-  let v=textureLoad(t,vec2i(pos.xy),0); var a=array<f32,4>(v.x,v.y,v.z,v.w);
-  let x=clamp(a[min(ch,3u)],0.,1.); return vec4f(x,x,x,1.);
+  let dim=vec2i(textureDimensions(t));
+  let tc=clamp(vec2i(i32(pos.x)+u.ox,i32(pos.y)+u.oy),vec2i(0),dim-vec2i(1));
+  let v=textureLoad(t,tc,0); var a=array<f32,4>(v.x,v.y,v.z,v.w);
+  let x=clamp(a[min(u.ch,3u)],0.,1.); return vec4f(x,x,x,1.);
 }`;
 
 // Viz u32: show one f16 channel of rgba32uint layer (8 channels packed)
 const VIZ_U32=`
+struct Vu{ch:u32,_p:u32,ox:i32,oy:i32}
 @group(0) @binding(0) var t:texture_2d<u32>;
-@group(0) @binding(1) var<uniform> ch:u32;
+@group(0) @binding(1) var<uniform> u:Vu;
 @vertex fn vs(@builtin(vertex_index) i:u32)->@builtin(position) vec4f{
   var p=array<vec2f,6>(vec2f(-1.,-1.),vec2f(1.,-1.),vec2f(-1.,1.),vec2f(-1.,1.),vec2f(1.,-1.),vec2f(1.,1.));
   return vec4f(p[i],0.,1.);
 }
 @fragment fn fs(@builtin(position) pos:vec4f)->@location(0) vec4f{
-  let t2=textureLoad(t,vec2i(pos.xy),0);
+  let dim=vec2i(textureDimensions(t));
+  let tc=clamp(vec2i(i32(pos.x)+u.ox,i32(pos.y)+u.oy),vec2i(0),dim-vec2i(1));
+  let t2=textureLoad(t,tc,0);
   let a=unpack2x16float(t2.x);let b=unpack2x16float(t2.y);
   let c=unpack2x16float(t2.z);let d=unpack2x16float(t2.w);
   var v=array<f32,8>(a.x,a.y,b.x,b.y,c.x,c.y,d.x,d.y);
-  let x=clamp(v[min(ch,7u)],0.,1.); return vec4f(x,x,x,1.);
+  let x=clamp(v[min(u.ch,7u)],0.,1.); return vec4f(x,x,x,1.);
 }`;
+
+// Full G-buffer pack: assembles feat_tex0/feat_tex1 from individual G-buffer images.
+// Bindings: albedo(0) normal(1) depth(2) matid(3) shadow(4) transp(5) f0(6) f1(7)
+// All source textures are rgba8unorm (browser-loaded images, R channel for depth/matid/shadow/transp).
+// Uses textureLoad() only (no sampler needed). Matches gbuf_pack.wgsl packing exactly.
+const FULL_PACK_SHADER=`
+@group(0) @binding(0) var albedo: texture_2d<f32>;
+@group(0) @binding(1) var normal: texture_2d<f32>;
+@group(0) @binding(2) var depth:  texture_2d<f32>;
+@group(0) @binding(3) var matid:  texture_2d<f32>;
+@group(0) @binding(4) var shadow: texture_2d<f32>;
+@group(0) @binding(5) var transp: texture_2d<f32>;
+@group(0) @binding(6) var f0:     texture_storage_2d<rgba32uint,write>;
+@group(0) @binding(7) var f1:     texture_storage_2d<rgba32uint,write>;
+fn ld(c:vec2i,d:vec2i)->f32{return textureLoad(depth,clamp(c,vec2i(0),d-vec2i(1)),0).r;}
+fn b2(tl:vec2i,d:vec2i)->vec3f{
+  var s=vec3f(0.);
+  for(var y:i32=0;y<2;y++){for(var x:i32=0;x<2;x++){s+=textureLoad(albedo,clamp(tl+vec2i(x,y),vec2i(0),d-vec2i(1)),0).rgb;}}
+  return s*.25;}
+fn b4(tl:vec2i,d:vec2i)->vec3f{
+  var s=vec3f(0.);
+  for(var y:i32=0;y<4;y++){for(var x:i32=0;x<4;x++){s+=textureLoad(albedo,clamp(tl+vec2i(x,y),vec2i(0),d-vec2i(1)),0).rgb;}}
+  return s*(1./16.);}
+@compute @workgroup_size(8,8)
+fn main(@builtin(global_invocation_id) id:vec3u){
+  let c=vec2i(id.xy); let d=vec2i(textureDimensions(albedo));
+  if(c.x>=d.x||c.y>=d.y){return;}
+  let alb=textureLoad(albedo,c,0).rgb;
+  let nrm=textureLoad(normal,c,0).rg;
+  let oct=nrm*2.-vec2f(1.);  // [0,1] -> [-1,1]
+  let dv=ld(c,d);
+  let dzdx=(ld(c+vec2i(1,0),d)-ld(c-vec2i(1,0),d))*.5;
+  let dzdy=(ld(c+vec2i(0,1),d)-ld(c-vec2i(0,1),d))*.5;
+  textureStore(f0,c,vec4u(
+    pack2x16float(alb.rg),
+    pack2x16float(vec2f(alb.b,oct.x)),
+    pack2x16float(vec2f(oct.y,dv)),
+    pack2x16float(vec2f(dzdx,dzdy))));
+  let mid=textureLoad(matid,c,0).r;
+  let shd=textureLoad(shadow,c,0).r;
+  let trp=textureLoad(transp,c,0).r;
+  let m1=b2(c-vec2i(0),d); let m2=b4(c-vec2i(1),d);
+  textureStore(f1,c,vec4u(
+    pack4x8unorm(vec4f(mid,0.,0.,0.)),
+    pack4x8unorm(vec4f(m1.r,m1.g,m1.b,m2.r)),
+    pack4x8unorm(vec4f(m2.g,m2.b,shd,trp)),
+    0u));}`;
diff --git a/cnn_v3/tools/tester.js b/cnn_v3/tools/tester.js
index aa765a1..0412cae 100644
--- a/cnn_v3/tools/tester.js
+++ b/cnn_v3/tools/tester.js
@@ -13,6 +13,7 @@ class CNNv3Tester {
     this.image   = null;
     this.isVideo = false;
     this.viewMode= 0;   // 0=cnn 1=orig 2=diff
+    this.targetBitmap = null;  // set when a sample dir with target.png is loaded
     this.blend   = 1.0;
     this.layerTextures = {};
     this.lastResult = null;
@@ -44,9 +45,43 @@ class CNNv3Tester {
       this.format  = navigator.gpu.getPreferredCanvasFormat();
       this.linearSampler = this.device.createSampler({magFilter:'linear',minFilter:'linear',mipmapFilter:'linear'});
       this.log('WebGPU ready');
+      this.preload();
     } catch(e) { this.setStatus(`GPU error: ${e.message}`,true); }
   }
 
+  async preload() {
+    const base = '../../workspaces/main/weights/';
+    const files = [
+      {url: base+'cnn_v3_weights.bin',  isFilm: false},
+      {url: base+'cnn_v3_film_mlp.bin', isFilm: true},
+    ];
+    for (const {url, isFilm} of files) {
+      try {
+        const r = await fetch(url);
+        if (!r.ok) { this.log(`preload skip: ${url.split('/').pop()} (${r.status})`); continue; }
+        const buf = await r.arrayBuffer();
+        const name = url.split('/').pop();
+        if (isFilm) {
+          this.filmMlp = this.parseFilm(buf);
+          const el = document.getElementById('fDrop');
+          el.textContent = `✓ ${name}`; el.classList.add('ok');
+          document.getElementById('fSt').textContent = 'FiLM MLP loaded';
+          document.getElementById('fSt').style.color = '#28a745';
+        } else {
+          this.weightsU32 = this.parseWeights(buf); this.weightsBuffer = buf;
+          if (this.weightsGPU) { this.weightsGPU.destroy(); this.weightsGPU = null; }
+          const el = document.getElementById('wDrop');
+          el.textContent = `✓ ${name}`; el.classList.add('ok');
+        }
+        this.log(`Preloaded: ${name}`);
+      } catch(e) { this.log(`preload error (${url.split('/').pop()}): ${e.message}`, 'err'); }
+    }
+    if (this.weightsU32) {
+      if (this.image || this.isVideo) this.run();
+      else this.setStatus('Weights loaded — drop image/video');
+    }
+  }
+
   getDims() {
     return this.isVideo
       ? {w:this.video.videoWidth, h:this.video.videoHeight}
@@ -106,7 +141,7 @@ class CNNv3Tester {
   filmParams() {
     const I4=[1,1,1,1],Z4=[0,0,0,0],I8=[1,1,1,1,1,1,1,1],Z8=[0,0,0,0,0,0,0,0];
     if (!this.filmMlp) return {ge0:I4,be0:Z4,ge1:I8,be1:Z8,gd1:I4,bd1:Z4,gd0:I4,bd0:Z4};
-    const v=document.getElementById;
+    const v=document.getElementById.bind(document);
     const cond=[v('sBP').value,v('sBN').value,v('sAI').value,v('sP0').value,v('sP1').value].map(Number);
     const f=this.filmFwd(cond);
     return {
@@ -350,6 +385,7 @@ class CNNv3Tester {
     this.layerTextures={feat0:f0,feat1:f1,enc0:e0,enc1:e1,bn,dec1:d1,output:ot};
     this.lastResult={ot,itex:this.inputTex,uDp,dispPL:this.getDisp(),w,h};
     this.updateVizPanel();
+    this.refreshZoom();
   }
 
   destroyLayerTex(){for(const t of Object.values(this.layerTextures||{}))try{t.destroy();}catch(_){} this.layerTextures={};}
@@ -422,10 +458,19 @@ class CNNv3Tester {
     document.getElementById(`vb_${id}`)?.classList.add('act');
     const def=this.vizDefs.find(d=>d.id===id); if(!def)return;
     const grid=document.getElementById('chgrid'); grid.innerHTML='';
+    const chName = (c) => `${def.lbl} → ${def.ch[c]||'c'+c}`;
     for(let c=0;c<def.nch;c++){
       const cell=document.createElement('div'); cell.className='chcell';
       const lbl=document.createElement('div'); lbl.className='chcell-lbl'; lbl.textContent=def.ch[c]||`c${c}`;
       const cvs=document.createElement('canvas');
+      const name=chName(c);
+      cvs.title=name;
+      cvs.onclick=(e)=>{
+        const r=cvs.getBoundingClientRect();
+        const tx=Math.round(e.offsetX/r.width*tex.width);
+        const ty=Math.round(e.offsetY/r.height*tex.height);
+        tester.zoomChannel(id,c,name,tx,ty);
+      };
       cell.appendChild(lbl); cell.appendChild(cvs); grid.appendChild(cell);
     }
     const pl=def.t==='f32'?this.getVizF32():this.getVizU32();
@@ -434,8 +479,8 @@ class CNNv3Tester {
       cvs.width=tex.width; cvs.height=tex.height;
       const ctx=cvs.getContext('webgpu'); if(!ctx)continue;
       try{ctx.configure({device:this.device,format:this.format});}catch(_){continue;}
-      const chBuf=this.device.createBuffer({size:4,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST});
-      this.device.queue.writeBuffer(chBuf,0,new Uint32Array([c]));
+      const chBuf=this.device.createBuffer({size:16,usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST});
+      this.device.queue.writeBuffer(chBuf,0,new Int32Array([c,0,0,0]));
       const bg=this.device.createBindGroup({layout:pl.getBindGroupLayout(0),
         entries:[{binding:0,resource:tex.createView()},{binding:1,resource:{buffer:chBuf}}]});
       const enc=this.device.createCommandEncoder();
@@ -447,6 +492,54 @@ class CNNv3Tester {
     await this.device.queue.onSubmittedWorkDone();
   }
 
+  zoomChannel(layerId, ch, label, clickTx=0, clickTy=0) {
+    const def = this.vizDefs?.find(d => d.id === layerId);
+    const tex = this.layerTextures[layerId];
+    if (!def || !tex || !this.device) return;
+    const wrap = document.getElementById('chzoomWrap');
+    const lbl  = document.getElementById('chzoomLbl');
+    this.activeZoom    = {layerId, ch, label, clickTx, clickTy};
+    lbl.textContent    = label;
+    wrap.style.display = 'flex';
+    // Wait for layout so clientWidth/clientHeight reflect the flex-distributed size
+    requestAnimationFrame(() => {
+      const dst   = document.getElementById('chzoom');
+      const pad   = 12;
+      const lblH  = lbl.offsetHeight + 6;
+      const availW = wrap.clientWidth  - pad;
+      const availH = wrap.clientHeight - pad - lblH;
+      const scale  = Math.min(1, availW / tex.width, availH / tex.height);
+      dst.width  = Math.round(tex.width  * scale);
+      dst.height = Math.round(tex.height * scale);
+      // Re-render via WebGPU centered on the clicked texel
+      const ox = clickTx - Math.floor(dst.width  / 2);
+      const oy = clickTy - Math.floor(dst.height / 2);
+      const pl = def.t === 'f32' ? this.getVizF32() : this.getVizU32();
+      const ctx = dst.getContext('webgpu');
+      try { ctx.configure({device: this.device, format: this.format}); } catch(_) { return; }
+      const uData = new ArrayBuffer(16);
+      const dv = new DataView(uData);
+      dv.setUint32(0, ch, true); dv.setInt32(8, ox, true); dv.setInt32(12, oy, true);
+      const chBuf = this.device.createBuffer({size:16, usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST});
+      this.device.queue.writeBuffer(chBuf, 0, uData);
+      const bg = this.device.createBindGroup({layout: pl.getBindGroupLayout(0),
+        entries:[{binding:0, resource:tex.createView()}, {binding:1, resource:{buffer:chBuf}}]});
+      const enc = this.device.createCommandEncoder();
+      const rp  = enc.beginRenderPass({colorAttachments:[{
+        view:ctx.getCurrentTexture().createView(), loadOp:'clear', storeOp:'store'}]});
+      rp.setPipeline(pl); rp.setBindGroup(0, bg); rp.draw(6); rp.end();
+      this.device.queue.submit([enc.finish()]);
+      chBuf.destroy();
+    });
+  }
+
+  refreshZoom() {
+    if (this.activeZoom) {
+      const {layerId, ch, label, clickTx, clickTy} = this.activeZoom;
+      this.zoomChannel(layerId, ch, label, clickTx, clickTy);
+    }
+  }
+
   // ── Save PNG ─────────────────────────────────────────────────────────────
 
   async savePNG() {
@@ -491,6 +584,281 @@ class CNNv3Tester {
       return(s?-1:1)*Math.pow(2,e-15)*(1+m/1024);};
     return [f(lo),f(hi)];
   }
+
+  // ── Full G-buffer pack pipeline ───────────────────────────────────────────
+
+  getFullPack() {
+    return this.pl('fullpack', () => this.computePL(FULL_PACK_SHADER, 'main'));
+  }
+
+  // Create a 1×1 rgba8unorm fallback texture with given RGBA bytes [0-255].
+  makeFallbackTex(r, g, b, a) {
+    const tex = this.device.createTexture({size:[1,1], format:'rgba8unorm',
+      usage: GPUTextureUsage.TEXTURE_BINDING|GPUTextureUsage.COPY_DST});
+    this.device.queue.writeTexture({texture:tex}, new Uint8Array([r,g,b,a]),
+      {bytesPerRow:4,rowsPerImage:1}, [1,1]);
+    return tex;
+  }
+
+  // Load an image File as a GPU rgba8unorm texture. Returns {tex, w, h}.
+  async loadGpuTex(file) {
+    const bmp = await createImageBitmap(file);
+    const w = bmp.width, h = bmp.height;
+    const tex = this.device.createTexture({size:[w,h], format:'rgba8unorm',
+      usage: GPUTextureUsage.TEXTURE_BINDING|GPUTextureUsage.COPY_DST|GPUTextureUsage.RENDER_ATTACHMENT});
+    this.device.queue.copyExternalImageToTexture({source:bmp}, {texture:tex}, [w,h]);
+    bmp.close();
+    return {tex, w, h};
+  }
+
+  // ── Load sample directory ─────────────────────────────────────────────────
+
+  async loadSampleDir(files) {
+    if (!files || files.length === 0) return;
+    if (!this.weightsU32) { this.setStatus('Load weights first', true); return; }
+
+    this.setMode('full');
+    const st = document.getElementById('sampleSt');
+    st.textContent = 'Loading…';
+
+    // Match files by name pattern
+    const match = (pat) => {
+      for (const f of files) {
+        const n = f.name.toLowerCase();
+        if (pat.some(p => n.includes(p))) return f;
+      }
+      return null;
+    };
+
+    const fAlbedo = match(['albedo', 'color']);
+    const fNormal = match(['normal', 'nrm']);
+    const fDepth  = match(['depth']);
+    const fMatid  = match(['matid', 'index', 'mat_id']);
+    const fShadow = match(['shadow']);
+    const fTransp = match(['transp', 'alpha']);
+    const fTarget = match(['target', 'output', 'ground_truth']);
+
+    if (!fAlbedo) {
+      st.textContent = '✗ No albedo.png found';
+      this.setStatus('No albedo.png in sample dir', true);
+      return;
+    }
+
+    try {
+      const t0 = performance.now();
+
+      // Load primary albedo to get dimensions
+      const {tex: albTex, w, h} = await this.loadGpuTex(fAlbedo);
+      this.canvas.width = w; this.canvas.height = h;
+      this.context.configure({device:this.device, format:this.format});
+
+      // Load optional channels — fall back to neutral 1×1 textures
+      const nrmTex = fNormal ? (await this.loadGpuTex(fNormal)).tex
+                              : this.makeFallbackTex(128, 128, 0, 255); // oct-encoded (0,0) normal
+      const dptTex = fDepth  ? (await this.loadGpuTex(fDepth)).tex
+                              : this.makeFallbackTex(0, 0, 0, 255);
+      const midTex = fMatid  ? (await this.loadGpuTex(fMatid)).tex
+                              : this.makeFallbackTex(0, 0, 0, 255);
+      const shdTex = fShadow ? (await this.loadGpuTex(fShadow)).tex
+                              : this.makeFallbackTex(255, 255, 255, 255); // fully lit
+      const trpTex = fTransp ? (await this.loadGpuTex(fTransp)).tex
+                              : this.makeFallbackTex(0, 0, 0, 255);      // fully opaque
+
+      // Load target if present
+      if (this.targetBitmap) { this.targetBitmap.close(); this.targetBitmap = null; }
+      if (fTarget) {
+        this.targetBitmap = await createImageBitmap(fTarget);
+        this.showTarget();
+      } else {
+        document.getElementById('targetPane').style.display = 'none';
+      }
+
+      // Pack G-buffer into feat0/feat1
+      const mk = (fmt, tw, th) => this.device.createTexture({size:[tw,th], format:fmt,
+        usage:GPUTextureUsage.STORAGE_BINDING|GPUTextureUsage.TEXTURE_BINDING|GPUTextureUsage.COPY_SRC});
+      const f0 = mk('rgba32uint', w, h);
+      const f1 = mk('rgba32uint', w, h);
+
+      const ceil8 = (n) => Math.ceil(n/8);
+      const pl = this.getFullPack();
+      const bg = this.device.createBindGroup({layout: pl.getBindGroupLayout(0),
+        entries: [
+          {binding:0, resource: albTex.createView()},
+          {binding:1, resource: nrmTex.createView()},
+          {binding:2, resource: dptTex.createView()},
+          {binding:3, resource: midTex.createView()},
+          {binding:4, resource: shdTex.createView()},
+          {binding:5, resource: trpTex.createView()},
+          {binding:6, resource: f0.createView()},
+          {binding:7, resource: f1.createView()},
+        ]});
+
+      const enc = this.device.createCommandEncoder();
+      const cp = enc.beginComputePass();
+      cp.setPipeline(pl); cp.setBindGroup(0, bg);
+      cp.dispatchWorkgroups(ceil8(w), ceil8(h));
+      cp.end();
+      this.device.queue.submit([enc.finish()]);
+      await this.device.queue.onSubmittedWorkDone();
+
+      // Cleanup source textures
+      [albTex, nrmTex, dptTex, midTex, shdTex, trpTex].forEach(t => t.destroy());
+
+      const found = [fAlbedo, fNormal, fDepth, fMatid, fShadow, fTransp]
+        .filter(Boolean).map(f => f.name).join(', ');
+      st.textContent = `✓ ${found}`;
+      this.log(`Sample packed: ${w}×${h}, ${((performance.now()-t0)).toFixed(0)}ms`);
+
+      // Run inference — runFromFeat takes ownership of f0/f1 (stored in layerTextures)
+      await this.runFromFeat(f0, f1, w, h);
+
+    } catch(e) {
+      st.textContent = `✗ ${e.message}`;
+      this.setStatus(`Sample error: ${e.message}`, true);
+      this.log(`Sample error: ${e.message}`, 'err');
+    }
+  }
+
+  // Show target.png in the #targetPane alongside main canvas.
+  showTarget() {
+    if (!this.targetBitmap) return;
+    const tc = document.getElementById('targetCanvas');
+    tc.width  = this.targetBitmap.width;
+    tc.height = this.targetBitmap.height;
+    const ctx2d = tc.getContext('2d');
+    ctx2d.drawImage(this.targetBitmap, 0, 0);
+    document.getElementById('targetPane').style.display = 'flex';
+  }
+
+  // Run CNN inference starting from pre-packed feat_tex0 / feat_tex1.
+  // Used by loadSampleDir() to skip the photo-pack step.
+  async runFromFeat(f0, f1, w, h) {
+    if (!this.weightsU32 || !this.device) return;
+    const t0 = performance.now();
+    const W2=w>>1, H2=h>>1, W4=W2>>1, H4=H2>>1;
+
+    this.context.configure({device:this.device, format:this.format});
+
+    // Create a neutral "original" texture so the display shader can still
+    // render Orig/Diff modes (just black for sample mode).
+    if (this.inputTex) this.inputTex.destroy();
+    this.inputTex = this.device.createTexture({size:[w,h], format:'rgba8unorm',
+      usage:GPUTextureUsage.TEXTURE_BINDING|GPUTextureUsage.COPY_DST|GPUTextureUsage.RENDER_ATTACHMENT});
+    // Leave it cleared to black — Diff mode against target would need more work
+
+    const mk = (fmt, tw, th) => this.device.createTexture({size:[tw,th], format:fmt,
+      usage:GPUTextureUsage.STORAGE_BINDING|GPUTextureUsage.TEXTURE_BINDING|GPUTextureUsage.COPY_SRC});
+    const e0=mk('rgba16float',w,h), e1=mk('rgba32uint',W2,H2);
+    const bn=mk('rgba32uint',W4,H4), d1=mk('rgba16float',W2,H2), ot=mk('rgba16float',w,h);
+
+    if (!this.weightsGPU) {
+      this.weightsGPU = this.device.createBuffer({size:this.weightsBuffer.byteLength,
+        usage:GPUBufferUsage.STORAGE|GPUBufferUsage.COPY_DST});
+      this.device.queue.writeBuffer(this.weightsGPU, 0, this.weightsBuffer);
+    }
+    const wg = this.weightsGPU;
+    const fp = this.filmParams();
+    const wu = (data) => {
+      const b = this.device.createBuffer({size:data.byteLength, usage:GPUBufferUsage.UNIFORM|GPUBufferUsage.COPY_DST});
+      this.device.queue.writeBuffer(b, 0, data); return b;
+    };
+    const uE0=wu(this.u4(ENC0_OFF,fp.ge0,fp.be0));
+    const uE1=wu(this.u8(ENC1_OFF,fp.ge1,fp.be1));
+    const uBN=wu(this.ubn(BN_OFF));
+    const uD1=wu(this.u4(DEC1_OFF,fp.gd1,fp.bd1));
+    const uD0=wu(this.u4(DEC0_OFF,fp.gd0,fp.bd0));
+    const dispData=new ArrayBuffer(16);
+    new DataView(dispData).setFloat32(4, this.blend, true);
+    const uDp=wu(dispData);
+
+    const enc = this.device.createCommandEncoder();
+    const bg  = (pl,...entries) => this.device.createBindGroup({layout:pl.getBindGroupLayout(0),
+      entries:entries.map((r,i)=>({binding:i,resource:r}))});
+    const rv  = (t) => t.createView();
+    const cp  = (pl,bgr,wx,wy) => {const p=enc.beginComputePass();p.setPipeline(pl);p.setBindGroup(0,bgr);p.dispatchWorkgroups(wx,wy);p.end();};
+    const ceil8 = (n) => Math.ceil(n/8);
+
+    cp(this.getEnc0(), bg(this.getEnc0(), rv(f0),rv(f1),{buffer:wg},{buffer:uE0},rv(e0)), ceil8(w), ceil8(h));
+    cp(this.getEnc1(), bg(this.getEnc1(), rv(e0),{buffer:wg},{buffer:uE1},rv(e1)), ceil8(W2), ceil8(H2));
+    cp(this.getBN(),   bg(this.getBN(),   rv(e1),{buffer:wg},{buffer:uBN},rv(bn)), ceil8(W4), ceil8(H4));
+    cp(this.getDec1(), bg(this.getDec1(), rv(bn),rv(e1),{buffer:wg},{buffer:uD1},rv(d1)), ceil8(W2), ceil8(H2));
+    cp(this.getDec0(), bg(this.getDec0(), rv(d1),rv(e0),{buffer:wg},{buffer:uD0},rv(ot)), ceil8(w), ceil8(h));
+
+    const dbg = bg(this.getDisp(), rv(ot), rv(this.inputTex), {buffer:uDp});
+    const rp = enc.beginRenderPass({colorAttachments:[{
+      view:this.context.getCurrentTexture().createView(), loadOp:'clear', storeOp:'store'}]});
+    rp.setPipeline(this.getDisp()); rp.setBindGroup(0, dbg); rp.draw(6); rp.end();
+
+    this.device.queue.submit([enc.finish()]);
+    await this.device.queue.onSubmittedWorkDone();
+
+    [uE0,uE1,uBN,uD1,uD0].forEach(b => b.destroy());
+
+    // Compute PSNR against target if available
+    let psnrStr = '';
+    if (this.targetBitmap) {
+      this.showTarget();
+      try { psnrStr = await this.computePSNR(ot, w, h); } catch(_) {}
+    }
+
+    this.destroyLayerTex();
+    this.layerTextures = {feat0:f0, feat1:f1, enc0:e0, enc1:e1, bn, dec1:d1, output:ot};
+    this.lastResult = {ot, itex:this.inputTex, uDp, dispPL:this.getDisp(), w, h};
+    this.updateVizPanel();
+    this.refreshZoom();
+
+    const ms = (performance.now()-t0).toFixed(1);
+    document.getElementById('cnnLabel').textContent = `CNN output  (${ms}ms)`;
+    if (psnrStr) document.getElementById('psnrSt').textContent = psnrStr;
+    this.setStatus(`Sample: ${ms}ms · ${w}×${h}`);
+    this.log(`runFromFeat: ${ms}ms`);
+  }
+
+  // Compute PSNR between CNN rgba16float output texture and target.png bitmap.
+  async computePSNR(outTex, w, h) {
+    const bpr = Math.ceil(w * 8 / 256) * 256;
+    const stg = this.device.createBuffer({size:bpr*h,
+      usage:GPUBufferUsage.COPY_DST|GPUBufferUsage.MAP_READ});
+    const enc = this.device.createCommandEncoder();
+    enc.copyTextureToBuffer({texture:outTex}, {buffer:stg, bytesPerRow:bpr, rowsPerImage:h}, [w,h]);
+    this.device.queue.submit([enc.finish()]);
+    await stg.mapAsync(GPUMapMode.READ);
+    const raw = new DataView(stg.getMappedRange());
+
+    // Decode output pixels from f16
+    const f16 = (bits) => {
+      const s=(bits>>15)&1, e=(bits>>10)&0x1F, m=bits&0x3FF;
+      if(e===0) return 0; if(e===31) return s?0:1;
+      return Math.max(0,Math.min(1,(s?-1:1)*Math.pow(2,e-15)*(1+m/1024)));
+    };
+    const cnnPx = new Float32Array(w*h*3);
+    for (let y=0;y<h;y++) for (let x=0;x<w;x++) {
+      const src=y*bpr+x*8, pi=(y*w+x)*3;
+      cnnPx[pi]  = f16(raw.getUint16(src,   true));
+      cnnPx[pi+1]= f16(raw.getUint16(src+2, true));
+      cnnPx[pi+2]= f16(raw.getUint16(src+4, true));
+    }
+    stg.unmap(); stg.destroy();
+
+    // Read target pixels via offscreen canvas
+    const oc = document.createElement('canvas');
+    oc.width = w; oc.height = h;
+    const ctx2d = oc.getContext('2d');
+    ctx2d.drawImage(this.targetBitmap, 0, 0, w, h);
+    const tgtData = ctx2d.getImageData(0, 0, w, h).data;
+
+    let mse = 0;
+    const n = w * h * 3;
+    for (let i=0; i<w*h; i++) {
+      const dr = cnnPx[i*3]   - tgtData[i*4]  /255;
+      const dg = cnnPx[i*3+1] - tgtData[i*4+1]/255;
+      const db = cnnPx[i*3+2] - tgtData[i*4+2]/255;
+      mse += dr*dr + dg*dg + db*db;
+    }
+    mse /= n;
+    const psnr = mse > 0 ? (10 * Math.log10(1 / mse)).toFixed(2) : '∞';
+    return `MSE=${mse.toFixed(5)}  PSNR=${psnr}dB`;
+  }
 }
 
 // ── UI helpers ───────────────────────────────────────────────────────────────