diff options
| -rw-r--r-- | cnn_v3/docs/HOWTO.md | 31 | ||||
| -rw-r--r-- | cnn_v3/src/gbuffer_effect.cc | 25 | ||||
| -rw-r--r-- | cnn_v3/src/gbuffer_effect.h | 13 | ||||
| -rw-r--r-- | doc/SEQUENCE.md | 81 | ||||
| -rw-r--r-- | src/gpu/effect.h | 7 | ||||
| -rw-r--r-- | src/gpu/sequence.cc | 5 |
6 files changed, 144 insertions, 18 deletions
diff --git a/cnn_v3/docs/HOWTO.md b/cnn_v3/docs/HOWTO.md index a1a5707..48b5d68 100644 --- a/cnn_v3/docs/HOWTO.md +++ b/cnn_v3/docs/HOWTO.md @@ -79,7 +79,7 @@ Each frame, `GBufferEffect::render()` executes: 3. **Pass 3 — Transparency** — TODO (deferred; transp=0 for opaque scenes) 4. **Pass 4 — Pack compute** (`gbuf_pack.wgsl`) ✅ - - Reads all G-buffer textures + `prev_cnn` input + - Reads all G-buffer textures + persistent `prev_cnn` texture - Writes `feat_tex0` + `feat_tex1` (rgba32uint, 20 channels, 32 bytes/pixel) - Shadow / transp nodes cleared to 1.0 / 0.0 via zero-draw render passes until Pass 2/3 are implemented. @@ -93,6 +93,23 @@ outputs[0] → feat_tex0 (rgba32uint: albedo.rgb, normal.xy, depth, depth_gra outputs[1] → feat_tex1 (rgba32uint: mat_id, prev.rgb, mip1.rgb, mip2.rgb, dif, transp) ``` +### Temporal feedback (prev.rgb) + +`GBufferEffect` owns a persistent internal node `<prefix>_prev` (rgba8unorm, `U8X4_NORM`). +Each frame it is GPU-copied from the CNN effect's output before Pass 1 runs, then bound as +`prev_cnn` in the pack shader (binding 6). + +**To wire temporal feedback**, call once after constructing the effects: +```cpp +gbuf->set_cnn_output_node("cnn_v3_out"); // name of CNNv3Effect's output node +``` + +Frame 0 behaviour: `_prev` is zeroed on allocation → `prev.rgb = 0`, matching the training +convention (static frames use zero history). + +The copy uses `wgpuCommandEncoderCopyTextureToTexture` (no extra render pass overhead). +Both textures must be `rgba8unorm` — the CNN output sink (`U8X4_NORM`) satisfies this. + --- ## 1b. GBufferEffect — Implementation Plan (Pass 2: SDF Shadow) @@ -299,8 +316,13 @@ This ensures the network works for both full G-buffer and photo-only inputs. ```seq # BPM 120 SEQUENCE 0 0 "Scene with CNN v3" - EFFECT + GBufferEffect prev_cnn -> gbuf_feat0 gbuf_feat1 0 60 - EFFECT + CNNv3Effect gbuf_feat0 gbuf_feat1 -> sink 0 60 + EFFECT + GBufferEffect source -> gbuf_feat0 gbuf_feat1 0 60 + EFFECT + CNNv3Effect gbuf_feat0 gbuf_feat1 -> sink 0 60 +``` + +After constructing the effects, wire temporal feedback: +```cpp +gbuf_effect->set_cnn_output_node("sink"); // or whichever node receives CNN output ``` FiLM parameters uploaded each frame: @@ -459,8 +481,9 @@ GBufViewEffect(const GpuContext& ctx, ```cpp auto gbuf = std::make_shared<GBufferEffect>(ctx, - std::vector<std::string>{"prev_cnn"}, + std::vector<std::string>{}, // no external inputs std::vector<std::string>{"gbuf_feat0", "gbuf_feat1"}, 0.0f, 60.0f); +gbuf->set_cnn_output_node("cnn_out"); // wire temporal feedback after CNN is constructed auto gview = std::make_shared<GBufViewEffect>(ctx, std::vector<std::string>{"gbuf_feat0", "gbuf_feat1"}, std::vector<std::string>{"gbuf_view_out"}, 0.0f, 60.0f); diff --git a/cnn_v3/src/gbuffer_effect.cc b/cnn_v3/src/gbuffer_effect.cc index 25fef4c..512843c 100644 --- a/cnn_v3/src/gbuffer_effect.cc +++ b/cnn_v3/src/gbuffer_effect.cc @@ -61,6 +61,7 @@ GBufferEffect::GBufferEffect(const GpuContext& ctx, node_depth_ = prefix + "_depth"; node_shadow_ = prefix + "_shadow"; node_transp_ = prefix + "_transp"; + node_prev_tex_ = prefix + "_prev"; // Allocate GPU buffers for scene data. global_uniforms_buf_ = gpu_create_buffer(ctx_.device, sizeof(GBufGlobalUniforms), @@ -95,6 +96,7 @@ void GBufferEffect::declare_nodes(NodeRegistry& registry) { if (!registry.has_node(output_nodes_[1])) { registry.declare_node(output_nodes_[1], NodeType::GBUF_RGBA32UINT, -1, -1); } + registry.declare_node(node_prev_tex_, NodeType::U8X4_NORM, -1, -1); } void GBufferEffect::set_scene() { @@ -230,14 +232,9 @@ void GBufferEffect::render(WGPUCommandEncoder encoder, WGPUTextureView feat0_view = nodes.get_view(output_nodes_[0]); WGPUTextureView feat1_view = nodes.get_view(output_nodes_[1]); - // prev_cnn: first input node if available, else dummy. - WGPUTextureView prev_view = nullptr; - if (!input_nodes_.empty()) { - prev_view = nodes.get_view(input_nodes_[0]); - } - if (!prev_view) { - prev_view = dummy_texture_view_.get(); - } + // node_prev_tex_ is updated by post_render() at the end of each frame. + // On frame 0 it is zero (NodeRegistry zeroes new textures) — correct default. + WGPUTextureView prev_view = nodes.get_view(node_prev_tex_); // --- Pass 1: MRT rasterization --- update_raster_bind_group(nodes); @@ -776,3 +773,15 @@ void GBufferEffect::update_raster_bind_group(NodeRegistry& nodes) { wgpuBindGroupLayoutRelease(bgl); } +void GBufferEffect::post_render(WGPUCommandEncoder encoder, NodeRegistry& nodes) { + if (cnn_output_node_.empty() || !nodes.has_node(cnn_output_node_)) return; + WGPUTexelCopyTextureInfo src = {}; + src.texture = nodes.get_texture(cnn_output_node_); + src.mipLevel = 0; + WGPUTexelCopyTextureInfo dst = {}; + dst.texture = nodes.get_texture(node_prev_tex_); + dst.mipLevel = 0; + WGPUExtent3D extent = {(uint32_t)width_, (uint32_t)height_, 1}; + wgpuCommandEncoderCopyTextureToTexture(encoder, &src, &dst, &extent); +} + diff --git a/cnn_v3/src/gbuffer_effect.h b/cnn_v3/src/gbuffer_effect.h index 13d394d..8e777f8 100644 --- a/cnn_v3/src/gbuffer_effect.h +++ b/cnn_v3/src/gbuffer_effect.h @@ -46,6 +46,9 @@ class GBufferEffect : public Effect { void render(WGPUCommandEncoder encoder, const UniformsSequenceParams& params, NodeRegistry& nodes) override; + // Copies cnn_output_node_ → node_prev_tex_ after all effects have rendered. + void post_render(WGPUCommandEncoder encoder, NodeRegistry& nodes) override; + // Populate the internal scene with ~20 rotating cubes and a few pumping // spheres. Must be called once before the first render(). void set_scene(); @@ -68,6 +71,16 @@ class GBufferEffect : public Effect { std::string node_depth_; std::string node_shadow_; std::string node_transp_; + std::string node_prev_tex_; // persistent prev-frame CNN output (rgba8unorm) + + // Name of the CNN effect's output node; set by caller before first render. + // When non-empty, the CNN output is copied into node_prev_tex_ each frame. + std::string cnn_output_node_; + + public: + void set_cnn_output_node(const std::string& name) { cnn_output_node_ = name; } + + private: // Owned scene and camera — populated by set_scene() Scene scene_; diff --git a/doc/SEQUENCE.md b/doc/SEQUENCE.md index 202bf09..411e9d4 100644 --- a/doc/SEQUENCE.md +++ b/doc/SEQUENCE.md @@ -91,21 +91,90 @@ class Effect { std::vector<std::string> input_nodes_; std::vector<std::string> output_nodes_; - virtual void declare_nodes(NodeRegistry& registry) {} // Optional temp nodes + // Optional: declare internal nodes (depth buffers, intermediate textures). + virtual void declare_nodes(NodeRegistry& registry) {} + + // Required: render this effect for the current frame. virtual void render(WGPUCommandEncoder encoder, const UniformsSequenceParams& params, NodeRegistry& nodes) = 0; + + // Optional: called after ALL effects in the sequence have rendered. + // Use for end-of-frame bookkeeping, e.g. copying temporal feedback buffers. + // Default implementation is a no-op. + virtual void post_render(WGPUCommandEncoder encoder, NodeRegistry& nodes) {} }; ``` +### Frame execution order + +Each frame, `Sequence::render_effects()` runs two passes over the DAG: + +1. **Render pass** — `dispatch_render()` on every effect in topological order +2. **Post-render pass** — `post_render()` on every effect in the same order + +This ordering guarantees that by the time any `post_render()` runs, all output +textures for the frame are fully written. It is safe to read any node's texture +in `post_render()`. + +### Temporal feedback pattern + +DAG-based sequences cannot express read-after-write cycles within a single frame. +Use `post_render()` + a persistent internal node to implement temporal feedback +(e.g. CNN prev-frame input): + +```cpp +class MyEffect : public Effect { + std::string node_prev_; // internal persistent texture + std::string source_node_; // node to capture at end of frame + + public: + void set_source_node(const std::string& n) { source_node_ = n; } + + void declare_nodes(NodeRegistry& reg) override { + reg.declare_node(node_prev_, NodeType::U8X4_NORM, -1, -1); + } + + void render(...) override { + // Read node_prev_ — contains source_node_ output from the *previous* frame. + WGPUTextureView prev = nodes.get_view(node_prev_); + // ... use prev + } + + void post_render(WGPUCommandEncoder enc, NodeRegistry& nodes) override { + if (source_node_.empty() || !nodes.has_node(source_node_)) return; + // Copy this frame's output into node_prev_ for next frame. + WGPUTexelCopyTextureInfo src = {.texture = nodes.get_texture(source_node_)}; + WGPUTexelCopyTextureInfo dst = {.texture = nodes.get_texture(node_prev_)}; + WGPUExtent3D ext = {(uint32_t)width_, (uint32_t)height_, 1}; + wgpuCommandEncoderCopyTextureToTexture(enc, &src, &dst, &ext); + } +}; +``` + +**Why not `input_nodes_[0]` / ping-pong as prev?** The ping-pong alias makes +`source` equal to last frame's `sink` only when the effect is the first in the +sequence and no post-CNN effects overwrite `sink`. `post_render()` is +unconditionally correct regardless of sequence structure. + +**Current user**: `GBufferEffect` uses this pattern for `prev.rgb` (CNN temporal +feedback). Call `gbuf->set_cnn_output_node("cnn_out_node")` once at setup. + ### Node System **Types**: Match WGSL texture formats -- `U8X4_NORM`: RGBA8Unorm (default for source/sink/intermediate) -- `F32X4`: RGBA32Float (HDR, compute outputs) -- `F16X8`: 8-channel float16 (G-buffer normals/vectors) -- `DEPTH24`: Depth24Plus (3D rendering) -- `COMPUTE_F32`: Storage buffer (non-texture compute data) +- `U8X4_NORM`: RGBA8Unorm — default for source/sink/intermediate; `COPY_SRC|COPY_DST` +- `F32X4`: RGBA32Float — HDR, compute outputs +- `F16X8`: 8-channel float16 — G-buffer normals/vectors +- `DEPTH24`: Depth24Plus — 3D rendering +- `COMPUTE_F32`: Storage buffer — non-texture compute data +- `GBUF_ALBEDO`: RGBA16Float — G-buffer albedo/normal MRT; `RENDER_ATTACHMENT|TEXTURE_BINDING|STORAGE_BINDING|COPY_SRC` +- `GBUF_DEPTH32`: Depth32Float — G-buffer depth; `RENDER_ATTACHMENT|TEXTURE_BINDING|COPY_SRC` +- `GBUF_R8`: RGBA8Unorm — G-buffer single-channel (shadow, transp); `STORAGE_BINDING|TEXTURE_BINDING|RENDER_ATTACHMENT` +- `GBUF_RGBA32UINT`: RGBA32Uint — packed feature textures (CNN v3 feat_tex0/1); `STORAGE_BINDING|TEXTURE_BINDING` + +**`COPY_SRC|COPY_DST`** is required on any node used with `wgpuCommandEncoderCopyTextureToTexture`. +`U8X4_NORM` has both; use it for temporal feedback dest nodes. **Aliasing**: Compiler detects ping-pong patterns (Effect i writes A reads B, Effect i+1 writes B reads A) and aliases nodes to same backing texture. diff --git a/src/gpu/effect.h b/src/gpu/effect.h index 8055783..566faba 100644 --- a/src/gpu/effect.h +++ b/src/gpu/effect.h @@ -34,6 +34,13 @@ class Effect { const UniformsSequenceParams& params, NodeRegistry& nodes) = 0; + // Called after ALL effects in the sequence have rendered for this frame. + // Use for end-of-frame bookkeeping (e.g. copying temporal feedback buffers). + virtual void post_render(WGPUCommandEncoder encoder, NodeRegistry& nodes) { + (void)encoder; + (void)nodes; + } + virtual void resize(int width, int height) { width_ = width; height_ = height; diff --git a/src/gpu/sequence.cc b/src/gpu/sequence.cc index 91ca187..78647b2 100644 --- a/src/gpu/sequence.cc +++ b/src/gpu/sequence.cc @@ -269,6 +269,11 @@ void Sequence::render_effects(WGPUCommandEncoder encoder) { for (const auto& dag_node : effect_dag_) { dag_node.effect->dispatch_render(encoder, params_, nodes_); } + // End-of-frame hook: allows effects to persist data for the next frame + // (e.g. temporal feedback copies) after all rendering is done. + for (const auto& dag_node : effect_dag_) { + dag_node.effect->post_render(encoder, nodes_); + } } void Sequence::resize(int width, int height) { |
