3 files changed, 57 insertions, 12 deletions
diff --git a/cnn_v3/docs/HOWTO.md b/cnn_v3/docs/HOWTO.md
index a1a5707..48b5d68 100644
--- a/cnn_v3/docs/HOWTO.md
+++ b/cnn_v3/docs/HOWTO.md
@@ -79,7 +79,7 @@ Each frame, `GBufferEffect::render()` executes:
 3. **Pass 3 — Transparency** — TODO (deferred; transp=0 for opaque scenes)
 
 4. **Pass 4 — Pack compute** (`gbuf_pack.wgsl`) ✅
-   - Reads all G-buffer textures + `prev_cnn` input
+   - Reads all G-buffer textures + persistent `prev_cnn` texture
    - Writes `feat_tex0` + `feat_tex1` (rgba32uint, 20 channels, 32 bytes/pixel)
    - Shadow / transp nodes cleared to 1.0 / 0.0 via zero-draw render passes
      until Pass 2/3 are implemented.
@@ -93,6 +93,23 @@ outputs[0]  → feat_tex0   (rgba32uint: albedo.rgb, normal.xy, depth, depth_gra
 outputs[1]  → feat_tex1   (rgba32uint: mat_id, prev.rgb, mip1.rgb, mip2.rgb, dif, transp)
 ```
 
+### Temporal feedback (prev.rgb)
+
+`GBufferEffect` owns a persistent internal node `<prefix>_prev` (rgba8unorm, `U8X4_NORM`).
+Each frame it is GPU-copied from the CNN effect's output before Pass 1 runs, then bound as
+`prev_cnn` in the pack shader (binding 6).
+
+**To wire temporal feedback**, call once after constructing the effects:
+```cpp
+gbuf->set_cnn_output_node("cnn_v3_out");  // name of CNNv3Effect's output node
+```
+
+Frame 0 behaviour: `_prev` is zeroed on allocation → `prev.rgb = 0`, matching the training
+convention (static frames use zero history).
+
+The copy uses `wgpuCommandEncoderCopyTextureToTexture` (no extra render pass overhead).
+Both textures must be `rgba8unorm` — the CNN output sink (`U8X4_NORM`) satisfies this.
+
 ---
 
 ## 1b. GBufferEffect — Implementation Plan (Pass 2: SDF Shadow)
@@ -299,8 +316,13 @@ This ensures the network works for both full G-buffer and photo-only inputs.
 ```seq
 # BPM 120
 SEQUENCE 0 0 "Scene with CNN v3"
-  EFFECT + GBufferEffect prev_cnn -> gbuf_feat0 gbuf_feat1  0 60
-  EFFECT + CNNv3Effect   gbuf_feat0 gbuf_feat1 -> sink       0 60
+  EFFECT + GBufferEffect source -> gbuf_feat0 gbuf_feat1  0 60
+  EFFECT + CNNv3Effect   gbuf_feat0 gbuf_feat1 -> sink    0 60
+```
+
+After constructing the effects, wire temporal feedback:
+```cpp
+gbuf_effect->set_cnn_output_node("sink");  // or whichever node receives CNN output
 ```
 
 FiLM parameters uploaded each frame:
@@ -459,8 +481,9 @@ GBufViewEffect(const GpuContext& ctx,
 
 ```cpp
 auto gbuf  = std::make_shared<GBufferEffect>(ctx,
-    std::vector<std::string>{"prev_cnn"},
+    std::vector<std::string>{},                            // no external inputs
     std::vector<std::string>{"gbuf_feat0", "gbuf_feat1"}, 0.0f, 60.0f);
+gbuf->set_cnn_output_node("cnn_out");  // wire temporal feedback after CNN is constructed
 auto gview = std::make_shared<GBufViewEffect>(ctx,
     std::vector<std::string>{"gbuf_feat0", "gbuf_feat1"},
     std::vector<std::string>{"gbuf_view_out"}, 0.0f, 60.0f);
diff --git a/cnn_v3/src/gbuffer_effect.cc b/cnn_v3/src/gbuffer_effect.cc
index 25fef4c..512843c 100644
--- a/cnn_v3/src/gbuffer_effect.cc
+++ b/cnn_v3/src/gbuffer_effect.cc
@@ -61,6 +61,7 @@ GBufferEffect::GBufferEffect(const GpuContext& ctx,
   node_depth_      = prefix + "_depth";
   node_shadow_     = prefix + "_shadow";
   node_transp_     = prefix + "_transp";
+  node_prev_tex_   = prefix + "_prev";
   // Allocate GPU buffers for scene data.
   global_uniforms_buf_ =
       gpu_create_buffer(ctx_.device, sizeof(GBufGlobalUniforms),
@@ -95,6 +96,7 @@ void GBufferEffect::declare_nodes(NodeRegistry& registry) {
   if (!registry.has_node(output_nodes_[1])) {
     registry.declare_node(output_nodes_[1], NodeType::GBUF_RGBA32UINT, -1, -1);
   }
+  registry.declare_node(node_prev_tex_, NodeType::U8X4_NORM, -1, -1);
 }
 
 void GBufferEffect::set_scene() {
@@ -230,14 +232,9 @@ void GBufferEffect::render(WGPUCommandEncoder encoder,
   WGPUTextureView feat0_view      = nodes.get_view(output_nodes_[0]);
   WGPUTextureView feat1_view      = nodes.get_view(output_nodes_[1]);
 
-  // prev_cnn: first input node if available, else dummy.
-  WGPUTextureView prev_view = nullptr;
-  if (!input_nodes_.empty()) {
-    prev_view = nodes.get_view(input_nodes_[0]);
-  }
-  if (!prev_view) {
-    prev_view = dummy_texture_view_.get();
-  }
+  // node_prev_tex_ is updated by post_render() at the end of each frame.
+  // On frame 0 it is zero (NodeRegistry zeroes new textures) — correct default.
+  WGPUTextureView prev_view = nodes.get_view(node_prev_tex_);
 
   // --- Pass 1: MRT rasterization ---
   update_raster_bind_group(nodes);
@@ -776,3 +773,15 @@ void GBufferEffect::update_raster_bind_group(NodeRegistry& nodes) {
   wgpuBindGroupLayoutRelease(bgl);
 }
 
+void GBufferEffect::post_render(WGPUCommandEncoder encoder, NodeRegistry& nodes) {
+  if (cnn_output_node_.empty() || !nodes.has_node(cnn_output_node_)) return;
+  WGPUTexelCopyTextureInfo src = {};
+  src.texture  = nodes.get_texture(cnn_output_node_);
+  src.mipLevel = 0;
+  WGPUTexelCopyTextureInfo dst = {};
+  dst.texture  = nodes.get_texture(node_prev_tex_);
+  dst.mipLevel = 0;
+  WGPUExtent3D extent = {(uint32_t)width_, (uint32_t)height_, 1};
+  wgpuCommandEncoderCopyTextureToTexture(encoder, &src, &dst, &extent);
+}
+
diff --git a/cnn_v3/src/gbuffer_effect.h b/cnn_v3/src/gbuffer_effect.h
index 13d394d..8e777f8 100644
--- a/cnn_v3/src/gbuffer_effect.h
+++ b/cnn_v3/src/gbuffer_effect.h
@@ -46,6 +46,9 @@ class GBufferEffect : public Effect {
   void render(WGPUCommandEncoder encoder, const UniformsSequenceParams& params,
               NodeRegistry& nodes) override;
 
+  // Copies cnn_output_node_ → node_prev_tex_ after all effects have rendered.
+  void post_render(WGPUCommandEncoder encoder, NodeRegistry& nodes) override;
+
   // Populate the internal scene with ~20 rotating cubes and a few pumping
   // spheres.  Must be called once before the first render().
   void set_scene();
@@ -68,6 +71,16 @@ class GBufferEffect : public Effect {
   std::string node_depth_;
   std::string node_shadow_;
   std::string node_transp_;
+  std::string node_prev_tex_;    // persistent prev-frame CNN output (rgba8unorm)
+
+  // Name of the CNN effect's output node; set by caller before first render.
+  // When non-empty, the CNN output is copied into node_prev_tex_ each frame.
+  std::string cnn_output_node_;
+
+ public:
+  void set_cnn_output_node(const std::string& name) { cnn_output_node_ = name; }
+
+ private:
 
   // Owned scene and camera — populated by set_scene()
   Scene  scene_;