6 files changed, 144 insertions, 18 deletions
diff --git a/cnn_v3/docs/HOWTO.md b/cnn_v3/docs/HOWTO.md
index a1a5707..48b5d68 100644
--- a/cnn_v3/docs/HOWTO.md
+++ b/cnn_v3/docs/HOWTO.md
@@ -79,7 +79,7 @@ Each frame, `GBufferEffect::render()` executes:
 3. **Pass 3 — Transparency** — TODO (deferred; transp=0 for opaque scenes)
 
 4. **Pass 4 — Pack compute** (`gbuf_pack.wgsl`) ✅
-   - Reads all G-buffer textures + `prev_cnn` input
+   - Reads all G-buffer textures + persistent `prev_cnn` texture
    - Writes `feat_tex0` + `feat_tex1` (rgba32uint, 20 channels, 32 bytes/pixel)
    - Shadow / transp nodes cleared to 1.0 / 0.0 via zero-draw render passes
      until Pass 2/3 are implemented.
@@ -93,6 +93,23 @@ outputs[0]  → feat_tex0   (rgba32uint: albedo.rgb, normal.xy, depth, depth_gra
 outputs[1]  → feat_tex1   (rgba32uint: mat_id, prev.rgb, mip1.rgb, mip2.rgb, dif, transp)
 ```
 
+### Temporal feedback (prev.rgb)
+
+`GBufferEffect` owns a persistent internal node `<prefix>_prev` (rgba8unorm, `U8X4_NORM`).
+Each frame it is GPU-copied from the CNN effect's output before Pass 1 runs, then bound as
+`prev_cnn` in the pack shader (binding 6).
+
+**To wire temporal feedback**, call once after constructing the effects:
+```cpp
+gbuf->set_cnn_output_node("cnn_v3_out");  // name of CNNv3Effect's output node
+```
+
+Frame 0 behaviour: `_prev` is zeroed on allocation → `prev.rgb = 0`, matching the training
+convention (static frames use zero history).
+
+The copy uses `wgpuCommandEncoderCopyTextureToTexture` (no extra render pass overhead).
+Both textures must be `rgba8unorm` — the CNN output sink (`U8X4_NORM`) satisfies this.
+
 ---
 
 ## 1b. GBufferEffect — Implementation Plan (Pass 2: SDF Shadow)
@@ -299,8 +316,13 @@ This ensures the network works for both full G-buffer and photo-only inputs.
 ```seq
 # BPM 120
 SEQUENCE 0 0 "Scene with CNN v3"
-  EFFECT + GBufferEffect prev_cnn -> gbuf_feat0 gbuf_feat1  0 60
-  EFFECT + CNNv3Effect   gbuf_feat0 gbuf_feat1 -> sink       0 60
+  EFFECT + GBufferEffect source -> gbuf_feat0 gbuf_feat1  0 60
+  EFFECT + CNNv3Effect   gbuf_feat0 gbuf_feat1 -> sink    0 60
+```
+
+After constructing the effects, wire temporal feedback:
+```cpp
+gbuf_effect->set_cnn_output_node("sink");  // or whichever node receives CNN output
 ```
 
 FiLM parameters uploaded each frame:
@@ -459,8 +481,9 @@ GBufViewEffect(const GpuContext& ctx,
 
 ```cpp
 auto gbuf  = std::make_shared<GBufferEffect>(ctx,
-    std::vector<std::string>{"prev_cnn"},
+    std::vector<std::string>{},                            // no external inputs
     std::vector<std::string>{"gbuf_feat0", "gbuf_feat1"}, 0.0f, 60.0f);
+gbuf->set_cnn_output_node("cnn_out");  // wire temporal feedback after CNN is constructed
 auto gview = std::make_shared<GBufViewEffect>(ctx,
     std::vector<std::string>{"gbuf_feat0", "gbuf_feat1"},
     std::vector<std::string>{"gbuf_view_out"}, 0.0f, 60.0f);
diff --git a/cnn_v3/src/gbuffer_effect.cc b/cnn_v3/src/gbuffer_effect.cc
index 25fef4c..512843c 100644
--- a/cnn_v3/src/gbuffer_effect.cc
+++ b/cnn_v3/src/gbuffer_effect.cc
@@ -61,6 +61,7 @@ GBufferEffect::GBufferEffect(const GpuContext& ctx,
   node_depth_      = prefix + "_depth";
   node_shadow_     = prefix + "_shadow";
   node_transp_     = prefix + "_transp";
+  node_prev_tex_   = prefix + "_prev";
   // Allocate GPU buffers for scene data.
   global_uniforms_buf_ =
       gpu_create_buffer(ctx_.device, sizeof(GBufGlobalUniforms),
@@ -95,6 +96,7 @@ void GBufferEffect::declare_nodes(NodeRegistry& registry) {
   if (!registry.has_node(output_nodes_[1])) {
     registry.declare_node(output_nodes_[1], NodeType::GBUF_RGBA32UINT, -1, -1);
   }
+  registry.declare_node(node_prev_tex_, NodeType::U8X4_NORM, -1, -1);
 }
 
 void GBufferEffect::set_scene() {
@@ -230,14 +232,9 @@ void GBufferEffect::render(WGPUCommandEncoder encoder,
   WGPUTextureView feat0_view      = nodes.get_view(output_nodes_[0]);
   WGPUTextureView feat1_view      = nodes.get_view(output_nodes_[1]);
 
-  // prev_cnn: first input node if available, else dummy.
-  WGPUTextureView prev_view = nullptr;
-  if (!input_nodes_.empty()) {
-    prev_view = nodes.get_view(input_nodes_[0]);
-  }
-  if (!prev_view) {
-    prev_view = dummy_texture_view_.get();
-  }
+  // node_prev_tex_ is updated by post_render() at the end of each frame.
+  // On frame 0 it is zero (NodeRegistry zeroes new textures) — correct default.
+  WGPUTextureView prev_view = nodes.get_view(node_prev_tex_);
 
   // --- Pass 1: MRT rasterization ---
   update_raster_bind_group(nodes);
@@ -776,3 +773,15 @@ void GBufferEffect::update_raster_bind_group(NodeRegistry& nodes) {
   wgpuBindGroupLayoutRelease(bgl);
 }
 
+void GBufferEffect::post_render(WGPUCommandEncoder encoder, NodeRegistry& nodes) {
+  if (cnn_output_node_.empty() || !nodes.has_node(cnn_output_node_)) return;
+  WGPUTexelCopyTextureInfo src = {};
+  src.texture  = nodes.get_texture(cnn_output_node_);
+  src.mipLevel = 0;
+  WGPUTexelCopyTextureInfo dst = {};
+  dst.texture  = nodes.get_texture(node_prev_tex_);
+  dst.mipLevel = 0;
+  WGPUExtent3D extent = {(uint32_t)width_, (uint32_t)height_, 1};
+  wgpuCommandEncoderCopyTextureToTexture(encoder, &src, &dst, &extent);
+}
+
diff --git a/cnn_v3/src/gbuffer_effect.h b/cnn_v3/src/gbuffer_effect.h
index 13d394d..8e777f8 100644
--- a/cnn_v3/src/gbuffer_effect.h
+++ b/cnn_v3/src/gbuffer_effect.h
@@ -46,6 +46,9 @@ class GBufferEffect : public Effect {
   void render(WGPUCommandEncoder encoder, const UniformsSequenceParams& params,
               NodeRegistry& nodes) override;
 
+  // Copies cnn_output_node_ → node_prev_tex_ after all effects have rendered.
+  void post_render(WGPUCommandEncoder encoder, NodeRegistry& nodes) override;
+
   // Populate the internal scene with ~20 rotating cubes and a few pumping
   // spheres.  Must be called once before the first render().
   void set_scene();
@@ -68,6 +71,16 @@ class GBufferEffect : public Effect {
   std::string node_depth_;
   std::string node_shadow_;
   std::string node_transp_;
+  std::string node_prev_tex_;    // persistent prev-frame CNN output (rgba8unorm)
+
+  // Name of the CNN effect's output node; set by caller before first render.
+  // When non-empty, the CNN output is copied into node_prev_tex_ each frame.
+  std::string cnn_output_node_;
+
+ public:
+  void set_cnn_output_node(const std::string& name) { cnn_output_node_ = name; }
+
+ private:
 
   // Owned scene and camera — populated by set_scene()
   Scene  scene_;
diff --git a/doc/SEQUENCE.md b/doc/SEQUENCE.md
index 202bf09..411e9d4 100644
--- a/doc/SEQUENCE.md
+++ b/doc/SEQUENCE.md
@@ -91,21 +91,90 @@ class Effect {
   std::vector<std::string> input_nodes_;
   std::vector<std::string> output_nodes_;
 
-  virtual void declare_nodes(NodeRegistry& registry) {}  // Optional temp nodes
+  // Optional: declare internal nodes (depth buffers, intermediate textures).
+  virtual void declare_nodes(NodeRegistry& registry) {}
+
+  // Required: render this effect for the current frame.
   virtual void render(WGPUCommandEncoder encoder,
                       const UniformsSequenceParams& params,
                       NodeRegistry& nodes) = 0;
+
+  // Optional: called after ALL effects in the sequence have rendered.
+  // Use for end-of-frame bookkeeping, e.g. copying temporal feedback buffers.
+  // Default implementation is a no-op.
+  virtual void post_render(WGPUCommandEncoder encoder, NodeRegistry& nodes) {}
 };
 ```
 
+### Frame execution order
+
+Each frame, `Sequence::render_effects()` runs two passes over the DAG:
+
+1. **Render pass** — `dispatch_render()` on every effect in topological order
+2. **Post-render pass** — `post_render()` on every effect in the same order
+
+This ordering guarantees that by the time any `post_render()` runs, all output
+textures for the frame are fully written.  It is safe to read any node's texture
+in `post_render()`.
+
+### Temporal feedback pattern
+
+DAG-based sequences cannot express read-after-write cycles within a single frame.
+Use `post_render()` + a persistent internal node to implement temporal feedback
+(e.g. CNN prev-frame input):
+
+```cpp
+class MyEffect : public Effect {
+  std::string node_prev_;      // internal persistent texture
+  std::string source_node_;    // node to capture at end of frame
+
+ public:
+  void set_source_node(const std::string& n) { source_node_ = n; }
+
+  void declare_nodes(NodeRegistry& reg) override {
+    reg.declare_node(node_prev_, NodeType::U8X4_NORM, -1, -1);
+  }
+
+  void render(...) override {
+    // Read node_prev_ — contains source_node_ output from the *previous* frame.
+    WGPUTextureView prev = nodes.get_view(node_prev_);
+    // ... use prev
+  }
+
+  void post_render(WGPUCommandEncoder enc, NodeRegistry& nodes) override {
+    if (source_node_.empty() || !nodes.has_node(source_node_)) return;
+    // Copy this frame's output into node_prev_ for next frame.
+    WGPUTexelCopyTextureInfo src = {.texture = nodes.get_texture(source_node_)};
+    WGPUTexelCopyTextureInfo dst = {.texture = nodes.get_texture(node_prev_)};
+    WGPUExtent3D ext = {(uint32_t)width_, (uint32_t)height_, 1};
+    wgpuCommandEncoderCopyTextureToTexture(enc, &src, &dst, &ext);
+  }
+};
+```
+
+**Why not `input_nodes_[0]` / ping-pong as prev?**  The ping-pong alias makes
+`source` equal to last frame's `sink` only when the effect is the first in the
+sequence and no post-CNN effects overwrite `sink`.  `post_render()` is
+unconditionally correct regardless of sequence structure.
+
+**Current user**: `GBufferEffect` uses this pattern for `prev.rgb` (CNN temporal
+feedback). Call `gbuf->set_cnn_output_node("cnn_out_node")` once at setup.
+
 ### Node System
 
 **Types**: Match WGSL texture formats
-- `U8X4_NORM`: RGBA8Unorm (default for source/sink/intermediate)
-- `F32X4`: RGBA32Float (HDR, compute outputs)
-- `F16X8`: 8-channel float16 (G-buffer normals/vectors)
-- `DEPTH24`: Depth24Plus (3D rendering)
-- `COMPUTE_F32`: Storage buffer (non-texture compute data)
+- `U8X4_NORM`: RGBA8Unorm — default for source/sink/intermediate; `COPY_SRC|COPY_DST`
+- `F32X4`: RGBA32Float — HDR, compute outputs
+- `F16X8`: 8-channel float16 — G-buffer normals/vectors
+- `DEPTH24`: Depth24Plus — 3D rendering
+- `COMPUTE_F32`: Storage buffer — non-texture compute data
+- `GBUF_ALBEDO`: RGBA16Float — G-buffer albedo/normal MRT; `RENDER_ATTACHMENT|TEXTURE_BINDING|STORAGE_BINDING|COPY_SRC`
+- `GBUF_DEPTH32`: Depth32Float — G-buffer depth; `RENDER_ATTACHMENT|TEXTURE_BINDING|COPY_SRC`
+- `GBUF_R8`: RGBA8Unorm — G-buffer single-channel (shadow, transp); `STORAGE_BINDING|TEXTURE_BINDING|RENDER_ATTACHMENT`
+- `GBUF_RGBA32UINT`: RGBA32Uint — packed feature textures (CNN v3 feat_tex0/1); `STORAGE_BINDING|TEXTURE_BINDING`
+
+**`COPY_SRC|COPY_DST`** is required on any node used with `wgpuCommandEncoderCopyTextureToTexture`.
+`U8X4_NORM` has both; use it for temporal feedback dest nodes.
 
 **Aliasing**: Compiler detects ping-pong patterns (Effect i writes A reads B, Effect i+1 writes B reads A) and aliases nodes to same backing texture.
 
diff --git a/src/gpu/effect.h b/src/gpu/effect.h
index 8055783..566faba 100644
--- a/src/gpu/effect.h
+++ b/src/gpu/effect.h
@@ -34,6 +34,13 @@ class Effect {
                       const UniformsSequenceParams& params,
                       NodeRegistry& nodes) = 0;
 
+  // Called after ALL effects in the sequence have rendered for this frame.
+  // Use for end-of-frame bookkeeping (e.g. copying temporal feedback buffers).
+  virtual void post_render(WGPUCommandEncoder encoder, NodeRegistry& nodes) {
+    (void)encoder;
+    (void)nodes;
+  }
+
   virtual void resize(int width, int height) {
     width_ = width;
     height_ = height;
diff --git a/src/gpu/sequence.cc b/src/gpu/sequence.cc
index 91ca187..78647b2 100644
--- a/src/gpu/sequence.cc
+++ b/src/gpu/sequence.cc
@@ -269,6 +269,11 @@ void Sequence::render_effects(WGPUCommandEncoder encoder) {
   for (const auto& dag_node : effect_dag_) {
     dag_node.effect->dispatch_render(encoder, params_, nodes_);
   }
+  // End-of-frame hook: allows effects to persist data for the next frame
+  // (e.g. temporal feedback copies) after all rendering is done.
+  for (const auto& dag_node : effect_dag_) {
+    dag_node.effect->post_render(encoder, nodes_);
+  }
 }
 
 void Sequence::resize(int width, int height) {