summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorskal <pascal.massimino@gmail.com>2026-02-12 15:10:17 +0100
committerskal <pascal.massimino@gmail.com>2026-02-12 15:10:17 +0100
commit8b30cadfc19647487986d14dba9ddba7908dd1d0 (patch)
treef865b42945f72bfc480e2c2a6849127bf56d1a59
parent1effb125973ac0948de3015be1d53ae72463858b (diff)
test_demo: Add beat-synchronized CNN post-processing with version selection
- Add --cnn-version <1|2> flag to select between CNN v1 and v2 - Implement beat_phase modulation for dynamic blend in both CNN effects - Fix CNN v2 per-layer uniform buffer sharing (each layer needs own buffer) - Fix CNN v2 y-axis orientation to match render pass convention - Add Scene1Effect as base visual layer to test_demo timeline - Reorganize CNN v2 shaders into cnn_v2/ subdirectory - Update asset paths and documentation for new shader organization Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
-rw-r--r--assets/final/demo_assets.txt5
-rw-r--r--assets/test_demo.seq1
-rw-r--r--doc/CNN_V2.md16
-rw-r--r--src/gpu/effects/cnn_effect.cc9
-rw-r--r--src/gpu/effects/cnn_effect.h7
-rw-r--r--src/gpu/effects/cnn_v2_effect.cc71
-rw-r--r--src/gpu/effects/cnn_v2_effect.h16
-rw-r--r--src/test_demo.cc39
-rw-r--r--tools/seq_compiler.cc15
-rwxr-xr-xtraining/export_cnn_v2_shader.py2
-rwxr-xr-xtraining/export_cnn_v2_weights.py2
-rw-r--r--workspaces/main/shaders/cnn_v2/cnn_v2_compute.wgsl (renamed from workspaces/main/shaders/cnn_v2_compute.wgsl)12
-rw-r--r--workspaces/main/shaders/cnn_v2/cnn_v2_layer_0.wgsl (renamed from workspaces/main/shaders/cnn_v2_layer_0.wgsl)0
-rw-r--r--workspaces/main/shaders/cnn_v2/cnn_v2_layer_1.wgsl (renamed from workspaces/main/shaders/cnn_v2_layer_1.wgsl)0
-rw-r--r--workspaces/main/shaders/cnn_v2/cnn_v2_layer_2.wgsl (renamed from workspaces/main/shaders/cnn_v2_layer_2.wgsl)0
-rw-r--r--workspaces/main/shaders/cnn_v2/cnn_v2_layer_template.wgsl (renamed from workspaces/main/shaders/cnn_v2_layer_template.wgsl)0
-rw-r--r--workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl (renamed from workspaces/main/shaders/cnn_v2_static.wgsl)4
17 files changed, 165 insertions, 34 deletions
diff --git a/assets/final/demo_assets.txt b/assets/final/demo_assets.txt
index 96f86f9..6db6e48 100644
--- a/assets/final/demo_assets.txt
+++ b/assets/final/demo_assets.txt
@@ -60,3 +60,8 @@ SHADER_COMPUTE_GEN_MASK, NONE, shaders/compute/gen_mask.wgsl, "GPU Mask Composit
CIRCLE_MASK_COMPUTE_SHADER, NONE, shaders/circle_mask_compute.wgsl, "Circle mask compute shader"
CIRCLE_MASK_RENDER_SHADER, NONE, shaders/circle_mask_render.wgsl, "Circle mask render shader"
MASKED_CUBE_SHADER, NONE, shaders/masked_cube.wgsl, "Masked cube shader"
+
+# --- CNN v2 Post-Processing ---
+SHADER_CNN_V2_STATIC, NONE, shaders/cnn_v2/cnn_v2_static.wgsl, "CNN v2 Static Features Shader"
+SHADER_CNN_V2_COMPUTE, NONE, shaders/cnn_v2/cnn_v2_compute.wgsl, "CNN v2 Compute Shader"
+WEIGHTS_CNN_V2, NONE, cnn_v2_weights.bin, "CNN v2 Weights Binary"
diff --git a/assets/test_demo.seq b/assets/test_demo.seq
index 6dc26ca..ae0301f 100644
--- a/assets/test_demo.seq
+++ b/assets/test_demo.seq
@@ -2,6 +2,7 @@
# BPM 120 (set in test_demo.track)
SEQUENCE 0.0 0 "Main Loop"
+ EFFECT + Scene1Effect 0.0 16.0
EFFECT + FlashEffect 0.0 16.0
END_DEMO 32b
diff --git a/doc/CNN_V2.md b/doc/CNN_V2.md
index 9407934..09d0841 100644
--- a/doc/CNN_V2.md
+++ b/doc/CNN_V2.md
@@ -299,7 +299,7 @@ torch.save({
2. Extract layer configs (kernels, channels)
3. Quantize weights to float16: `weights_f16 = weights_f32.astype(np.float16)`
4. Generate WGSL shader per layer
-5. Write to `workspaces/<workspace>/shaders/cnn_v2_*.wgsl`
+5. Write to `workspaces/<workspace>/shaders/cnn_v2/cnn_v2_*.wgsl`
**Example Generated Shader:**
@@ -402,7 +402,7 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
### Phase 1: Shaders (Core Infrastructure)
-- [ ] `workspaces/main/shaders/cnn_v2_static.wgsl` - Static features compute
+- [ ] `workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl` - Static features compute
- [ ] RGBD sampling from framebuffer
- [ ] UV coordinate calculation
- [ ] sin(10\*uv.x) computation
@@ -410,7 +410,7 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
- [ ] Float16 packing via `pack2x16float()`
- [ ] Output to `texture_storage_2d<rgba32uint>`
-- [ ] `workspaces/main/shaders/cnn_v2_layer_template.wgsl` - Layer template
+- [ ] `workspaces/main/shaders/cnn_v2/cnn_v2_layer_template.wgsl` - Layer template
- [ ] Static features unpacking
- [ ] Previous layer unpacking (8×f16)
- [ ] Convolution implementation (1×1, 3×3, 5×5)
@@ -492,10 +492,10 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
```
# Shaders (generated by export script)
-workspaces/main/shaders/cnn_v2_static.wgsl # Static features compute
-workspaces/main/shaders/cnn_v2_layer_0.wgsl # Input layer (generated)
-workspaces/main/shaders/cnn_v2_layer_1.wgsl # Inner layer (generated)
-workspaces/main/shaders/cnn_v2_layer_2.wgsl # Output layer (generated)
+workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl # Static features compute
+workspaces/main/shaders/cnn_v2/cnn_v2_layer_0.wgsl # Input layer (generated)
+workspaces/main/shaders/cnn_v2/cnn_v2_layer_1.wgsl # Inner layer (generated)
+workspaces/main/shaders/cnn_v2/cnn_v2_layer_2.wgsl # Output layer (generated)
# C++ implementation
src/gpu/effects/cnn_v2_effect.h # Effect class header
@@ -531,7 +531,7 @@ TODO.md # Add CNN v2 task
```
training/train_cnn.py # Original training
src/gpu/effects/cnn_effect.* # Original effect
-workspaces/main/shaders/cnn_*.wgsl # Original shaders
+workspaces/main/shaders/cnn_*.wgsl # Original v1 shaders
```
---
diff --git a/src/gpu/effects/cnn_effect.cc b/src/gpu/effects/cnn_effect.cc
index b2305b2..83a3365 100644
--- a/src/gpu/effects/cnn_effect.cc
+++ b/src/gpu/effects/cnn_effect.cc
@@ -79,12 +79,19 @@ void CNNEffect::resize(int width, int height) {
void CNNEffect::render(WGPURenderPassEncoder pass,
const CommonPostProcessUniforms& uniforms) {
- (void)uniforms;
if (!bind_group_) {
fprintf(stderr, "CNN render: no bind_group\n");
return;
}
+ float effective_blend = blend_amount_;
+ if (beat_modulated_) {
+ effective_blend = blend_amount_ * uniforms.beat_phase * beat_scale_;
+ }
+
+ CNNLayerParams params = {layer_index_, effective_blend, {0.0f, 0.0f}};
+ params_buffer_.update(ctx_.queue, params);
+
wgpuRenderPassEncoderSetPipeline(pass, pipeline_);
wgpuRenderPassEncoderSetBindGroup(pass, 0, bind_group_, 0, nullptr);
wgpuRenderPassEncoderDraw(pass, 3, 1, 0, 0);
diff --git a/src/gpu/effects/cnn_effect.h b/src/gpu/effects/cnn_effect.h
index 1c9f0f3..3e2b7ca 100644
--- a/src/gpu/effects/cnn_effect.h
+++ b/src/gpu/effects/cnn_effect.h
@@ -34,10 +34,17 @@ class CNNEffect : public PostProcessEffect {
return layer_index_ == 0;
}
+ void set_beat_modulation(bool enabled, float scale = 1.0f) {
+ beat_modulated_ = enabled;
+ beat_scale_ = scale;
+ }
+
private:
int layer_index_;
int total_layers_;
float blend_amount_;
+ bool beat_modulated_ = false;
+ float beat_scale_ = 1.0f;
WGPUTextureView input_view_;
WGPUTextureView original_view_;
UniformBuffer<CNNLayerParams> params_buffer_;
diff --git a/src/gpu/effects/cnn_v2_effect.cc b/src/gpu/effects/cnn_v2_effect.cc
index 9cb6d57..9c727ba 100644
--- a/src/gpu/effects/cnn_v2_effect.cc
+++ b/src/gpu/effects/cnn_v2_effect.cc
@@ -20,9 +20,24 @@ CNNv2Effect::CNNv2Effect(const GpuContext& ctx)
static_features_view_(nullptr),
layer_pipeline_(nullptr),
weights_buffer_(nullptr),
- layer_params_buffer_(nullptr),
input_mip_tex_(nullptr),
current_input_view_(nullptr),
+ blend_amount_(1.0f),
+ initialized_(false) {
+ std::memset(input_mip_view_, 0, sizeof(input_mip_view_));
+}
+
+CNNv2Effect::CNNv2Effect(const GpuContext& ctx, const CNNv2EffectParams& params)
+ : PostProcessEffect(ctx),
+ static_pipeline_(nullptr),
+ static_bind_group_(nullptr),
+ static_features_tex_(nullptr),
+ static_features_view_(nullptr),
+ layer_pipeline_(nullptr),
+ weights_buffer_(nullptr),
+ input_mip_tex_(nullptr),
+ current_input_view_(nullptr),
+ blend_amount_(params.blend_amount),
initialized_(false) {
std::memset(input_mip_view_, 0, sizeof(input_mip_view_));
}
@@ -93,13 +108,16 @@ void CNNv2Effect::load_weights() {
// Upload weights data
wgpuQueueWriteBuffer(ctx_.queue, weights_buffer_, 0, weights_data, weights_size);
- // Create uniform buffer for layer params
- WGPUBufferDescriptor params_desc = {};
- params_desc.size = sizeof(LayerParams);
- params_desc.usage = WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst;
- params_desc.mappedAtCreation = false;
+ // Create uniform buffers for layer params (one per layer)
+ for (uint32_t i = 0; i < num_layers; ++i) {
+ WGPUBufferDescriptor params_desc = {};
+ params_desc.size = sizeof(LayerParams);
+ params_desc.usage = WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst;
+ params_desc.mappedAtCreation = false;
- layer_params_buffer_ = wgpuDeviceCreateBuffer(ctx_.device, &params_desc);
+ WGPUBuffer buf = wgpuDeviceCreateBuffer(ctx_.device, &params_desc);
+ layer_params_buffers_.push_back(buf);
+ }
}
void CNNv2Effect::create_textures() {
@@ -284,8 +302,8 @@ void CNNv2Effect::create_pipelines() {
if (!layer_module) return;
// Create bind group layout for layer compute
- // 0=static_features, 1=layer_input, 2=output, 3=weights, 4=params
- WGPUBindGroupLayoutEntry layer_bgl_entries[5] = {};
+ // 0=static_features, 1=layer_input, 2=output, 3=weights, 4=params, 5=original_input
+ WGPUBindGroupLayoutEntry layer_bgl_entries[6] = {};
// Binding 0: Static features (texture)
layer_bgl_entries[0].binding = 0;
@@ -317,8 +335,14 @@ void CNNv2Effect::create_pipelines() {
layer_bgl_entries[4].buffer.type = WGPUBufferBindingType_Uniform;
layer_bgl_entries[4].buffer.minBindingSize = sizeof(LayerParams);
+ // Binding 5: Original input (for blending)
+ layer_bgl_entries[5].binding = 5;
+ layer_bgl_entries[5].visibility = WGPUShaderStage_Compute;
+ layer_bgl_entries[5].texture.sampleType = WGPUTextureSampleType_Float;
+ layer_bgl_entries[5].texture.viewDimension = WGPUTextureViewDimension_2D;
+
WGPUBindGroupLayoutDescriptor layer_bgl_desc = {};
- layer_bgl_desc.entryCount = 5;
+ layer_bgl_desc.entryCount = 6;
layer_bgl_desc.entries = layer_bgl_entries;
WGPUBindGroupLayout layer_bgl = wgpuDeviceCreateBindGroupLayout(ctx_.device, &layer_bgl_desc);
@@ -399,7 +423,7 @@ void CNNv2Effect::update_bind_group(WGPUTextureView input_view) {
// Create bind group for each layer
for (size_t i = 0; i < layer_info_.size(); ++i) {
- WGPUBindGroupEntry layer_entries[5] = {};
+ WGPUBindGroupEntry layer_entries[6] = {};
// Binding 0: Static features (constant)
layer_entries[0].binding = 0;
@@ -419,14 +443,18 @@ void CNNv2Effect::update_bind_group(WGPUTextureView input_view) {
layer_entries[3].buffer = weights_buffer_;
layer_entries[3].size = wgpuBufferGetSize(weights_buffer_);
- // Binding 4: Layer params (will be updated per dispatch)
+ // Binding 4: Layer params (use dedicated buffer for this layer)
layer_entries[4].binding = 4;
- layer_entries[4].buffer = layer_params_buffer_;
+ layer_entries[4].buffer = layer_params_buffers_[i];
layer_entries[4].size = sizeof(LayerParams);
+ // Binding 5: Original input (for blending)
+ layer_entries[5].binding = 5;
+ layer_entries[5].textureView = input_view;
+
WGPUBindGroupDescriptor layer_bg_desc = {};
layer_bg_desc.layout = layer_bgl;
- layer_bg_desc.entryCount = 5;
+ layer_bg_desc.entryCount = 6;
layer_bg_desc.entries = layer_entries;
WGPUBindGroup layer_bg = wgpuDeviceCreateBindGroup(ctx_.device, &layer_bg_desc);
@@ -438,9 +466,13 @@ void CNNv2Effect::update_bind_group(WGPUTextureView input_view) {
void CNNv2Effect::compute(WGPUCommandEncoder encoder,
const CommonPostProcessUniforms& uniforms) {
- (void)uniforms;
if (!initialized_ || !static_pipeline_ || !static_bind_group_) return;
+ float effective_blend = blend_amount_;
+ if (beat_modulated_) {
+ effective_blend = blend_amount_ * uniforms.beat_phase * beat_scale_;
+ }
+
// Pass 1: Compute static features
WGPUComputePassEncoder pass = wgpuCommandEncoderBeginComputePass(encoder, nullptr);
@@ -458,20 +490,20 @@ void CNNv2Effect::compute(WGPUCommandEncoder encoder,
// Execute CNN layer passes
if (!layer_pipeline_ || layer_bind_groups_.empty()) return;
+ // Update layer params (each layer has own buffer)
for (size_t i = 0; i < layer_info_.size(); ++i) {
const LayerInfo& info = layer_info_[i];
- // Update layer params uniform buffer
LayerParams params;
params.kernel_size = info.kernel_size;
params.in_channels = info.in_channels;
params.out_channels = info.out_channels;
params.weight_offset = info.weight_offset;
params.is_output_layer = (i == layer_info_.size() - 1) ? 1 : 0;
+ params.blend_amount = effective_blend;
- wgpuQueueWriteBuffer(ctx_.queue, layer_params_buffer_, 0, &params, sizeof(params));
+ wgpuQueueWriteBuffer(ctx_.queue, layer_params_buffers_[i], 0, &params, sizeof(params));
- // Execute layer compute pass
WGPUComputePassEncoder layer_pass = wgpuCommandEncoderBeginComputePass(encoder, nullptr);
wgpuComputePassEncoderSetPipeline(layer_pass, layer_pipeline_);
@@ -499,7 +531,8 @@ void CNNv2Effect::cleanup() {
if (layer_pipeline_) wgpuComputePipelineRelease(layer_pipeline_);
if (weights_buffer_) wgpuBufferRelease(weights_buffer_);
- if (layer_params_buffer_) wgpuBufferRelease(layer_params_buffer_);
+ for (auto buf : layer_params_buffers_) wgpuBufferRelease(buf);
+ layer_params_buffers_.clear();
for (int i = 0; i < 3; ++i) {
if (input_mip_view_[i]) wgpuTextureViewRelease(input_mip_view_[i]);
diff --git a/src/gpu/effects/cnn_v2_effect.h b/src/gpu/effects/cnn_v2_effect.h
index 6005cf5..4389e4f 100644
--- a/src/gpu/effects/cnn_v2_effect.h
+++ b/src/gpu/effects/cnn_v2_effect.h
@@ -5,9 +5,14 @@
#include "gpu/effect.h"
#include <vector>
+struct CNNv2EffectParams {
+ float blend_amount = 1.0f;
+};
+
class CNNv2Effect : public PostProcessEffect {
public:
explicit CNNv2Effect(const GpuContext& ctx);
+ explicit CNNv2Effect(const GpuContext& ctx, const CNNv2EffectParams& params);
~CNNv2Effect();
void init(MainSequence* demo) override;
@@ -18,6 +23,11 @@ public:
const CommonPostProcessUniforms& uniforms) override;
void update_bind_group(WGPUTextureView input_view) override;
+ void set_beat_modulation(bool enabled, float scale = 1.0f) {
+ beat_modulated_ = enabled;
+ beat_scale_ = scale;
+ }
+
private:
struct LayerInfo {
uint32_t kernel_size;
@@ -33,6 +43,7 @@ private:
uint32_t out_channels;
uint32_t weight_offset;
uint32_t is_output_layer;
+ float blend_amount;
};
void create_textures();
@@ -49,7 +60,7 @@ private:
// CNN layers (storage buffer architecture)
WGPUComputePipeline layer_pipeline_; // Single pipeline for all layers
WGPUBuffer weights_buffer_; // Storage buffer for weights
- WGPUBuffer layer_params_buffer_; // Uniform buffer for per-layer params
+ std::vector<WGPUBuffer> layer_params_buffers_; // Uniform buffers (one per layer)
std::vector<LayerInfo> layer_info_; // Layer metadata
std::vector<WGPUBindGroup> layer_bind_groups_; // Per-layer bind groups
std::vector<WGPUTexture> layer_textures_; // Ping-pong buffers
@@ -60,5 +71,8 @@ private:
WGPUTextureView input_mip_view_[3];
WGPUTextureView current_input_view_;
+ float blend_amount_ = 1.0f;
+ bool beat_modulated_ = false;
+ float beat_scale_ = 1.0f;
bool initialized_;
};
diff --git a/src/test_demo.cc b/src/test_demo.cc
index 9cbeae2..7f10c3b 100644
--- a/src/test_demo.cc
+++ b/src/test_demo.cc
@@ -22,6 +22,8 @@ extern void LoadTimeline(MainSequence& main_seq, const GpuContext& ctx);
// Inline peak meter effect for debugging audio-visual sync
#include "gpu/effects/post_process_helper.h"
#include "gpu/effects/shader_composer.h"
+#include "gpu/effects/cnn_effect.h"
+#include "gpu/effects/cnn_v2_effect.h"
class PeakMeterEffect : public PostProcessEffect {
public:
@@ -98,6 +100,8 @@ class PeakMeterEffect : public PostProcessEffect {
}
};
+static int g_cnn_version = 2; // Default to v2
+
#if !defined(STRIP_ALL)
static void print_usage(const char* prog_name) {
printf("Usage: %s [OPTIONS]\n", prog_name);
@@ -107,6 +111,7 @@ static void print_usage(const char* prog_name) {
printf(" --help Show this help message and exit\n");
printf(" --fullscreen Run in fullscreen mode\n");
printf(" --resolution WxH Set window resolution (e.g., 1024x768)\n");
+ printf(" --cnn-version <1|2> Select CNN version (1=v1, 2=v2, default=2)\n");
printf(" --tempo Enable tempo variation test mode\n");
printf(
" (alternates between acceleration and "
@@ -123,6 +128,7 @@ static void print_usage(const char* prog_name) {
printf("\nExamples:\n");
printf(" %s --fullscreen\n", prog_name);
printf(" %s --resolution 1024x768 --tempo\n", prog_name);
+ printf(" %s --cnn-version 1\n", prog_name);
printf(" %s --log-peaks peaks.txt\n", prog_name);
printf(" %s --log-peaks peaks.txt --log-peaks-fine\n", prog_name);
printf("\nControls:\n");
@@ -184,6 +190,21 @@ int main(int argc, char** argv) {
log_peaks_file = argv[++i];
} else if (strcmp(argv[i], "--log-peaks-fine") == 0) {
log_peaks_fine = true;
+ } else if (strcmp(argv[i], "--cnn-version") == 0) {
+ if (i + 1 < argc) {
+ int version = atoi(argv[++i]);
+ if (version == 1 || version == 2) {
+ g_cnn_version = version;
+ } else {
+ fprintf(stderr, "Error: --cnn-version must be 1 or 2\n\n");
+ print_usage(argv[0]);
+ return 1;
+ }
+ } else {
+ fprintf(stderr, "Error: --cnn-version requires argument\n\n");
+ print_usage(argv[0]);
+ return 1;
+ }
} else {
CHECK_RETURN_BEGIN(true, 1)
print_usage(argv[0]);
@@ -205,9 +226,25 @@ int main(int argc, char** argv) {
// Load timeline from test_demo.seq
LoadTimeline(*gpu_get_main_sequence(), *gpu_get_context());
- // Add peak meter visualization effect (renders as final post-process)
#if !defined(STRIP_ALL)
const GpuContext* gpu_ctx = gpu_get_context();
+
+ // Add CNN post-processing effect based on version flag
+ if (g_cnn_version == 1) {
+ CNNEffectParams params;
+ params.blend_amount = 1.0f;
+ auto* cnn = new CNNEffect(*gpu_ctx, params);
+ cnn->set_beat_modulation(true, 1.0f);
+ gpu_add_custom_effect(cnn, 0.0f, 99999.0f, 10);
+ } else if (g_cnn_version == 2) {
+ CNNv2EffectParams params;
+ params.blend_amount = 1.0f;
+ auto* cnn = new CNNv2Effect(*gpu_ctx, params);
+ cnn->set_beat_modulation(true, 1.0f);
+ gpu_add_custom_effect(cnn, 0.0f, 99999.0f, 10);
+ }
+
+ // Add peak meter visualization effect (renders as final post-process)
auto* peak_meter = new PeakMeterEffect(*gpu_ctx);
gpu_add_custom_effect(peak_meter, 0.0f, 99999.0f,
999); // High priority = renders last
diff --git a/tools/seq_compiler.cc b/tools/seq_compiler.cc
index 069122a..daf1294 100644
--- a/tools/seq_compiler.cc
+++ b/tools/seq_compiler.cc
@@ -1109,6 +1109,21 @@ int main(int argc, char* argv[]) {
<< ");\n";
out_file << " }\n";
}
+ } else if (!eff.params.empty() && eff.class_name == "CNNv2Effect") {
+ // Generate parameter struct initialization for CNNv2Effect
+ out_file << " {\n";
+ out_file << " CNNv2EffectParams p;\n";
+
+ for (const auto& [key, value] : eff.params) {
+ if (key == "blend") {
+ out_file << " p.blend_amount = " << value << "f;\n";
+ }
+ }
+
+ out_file << " seq->add_effect(std::make_shared<"
+ << eff.class_name << ">(ctx, p), " << eff.start << "f, "
+ << eff.end << "f, " << eff.priority << ");\n";
+ out_file << " }\n";
} else {
// No parameters or unsupported effect - use default constructor
out_file << " seq->add_effect(std::make_shared<" << eff.class_name
diff --git a/training/export_cnn_v2_shader.py b/training/export_cnn_v2_shader.py
index 3c53ce2..add28d2 100755
--- a/training/export_cnn_v2_shader.py
+++ b/training/export_cnn_v2_shader.py
@@ -144,7 +144,7 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {{
}}
"""
- output_path = Path(output_dir) / f"cnn_v2_layer_{layer_idx}.wgsl"
+ output_path = Path(output_dir) / "cnn_v2" / f"cnn_v2_layer_{layer_idx}.wgsl"
output_path.write_text(shader_code)
print(f" → {output_path}")
diff --git a/training/export_cnn_v2_weights.py b/training/export_cnn_v2_weights.py
index 723f572..d8c7c10 100755
--- a/training/export_cnn_v2_weights.py
+++ b/training/export_cnn_v2_weights.py
@@ -248,7 +248,7 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
}
"""
- output_path = Path(output_dir) / "cnn_v2_compute.wgsl"
+ output_path = Path(output_dir) / "cnn_v2" / "cnn_v2_compute.wgsl"
output_path.write_text(shader_code)
print(f" → {output_path}")
diff --git a/workspaces/main/shaders/cnn_v2_compute.wgsl b/workspaces/main/shaders/cnn_v2/cnn_v2_compute.wgsl
index b19a692..1e1704d 100644
--- a/workspaces/main/shaders/cnn_v2_compute.wgsl
+++ b/workspaces/main/shaders/cnn_v2/cnn_v2_compute.wgsl
@@ -9,6 +9,7 @@ struct LayerParams {
out_channels: u32,
weight_offset: u32, // Offset in f16 units
is_output_layer: u32, // 1 if final layer (sigmoid), 0 otherwise (relu)
+ blend_amount: f32, // [0,1] blend with original
}
@group(0) @binding(0) var static_features: texture_2d<u32>; // 8-channel static features
@@ -16,6 +17,7 @@ struct LayerParams {
@group(0) @binding(2) var output_tex: texture_storage_2d<rgba32uint, write>; // Current layer output
@group(0) @binding(3) var<storage, read> weights_buffer: array<u32>; // Packed f16 weights
@group(0) @binding(4) var<uniform> params: LayerParams;
+@group(0) @binding(5) var original_input: texture_2d<f32>; // Original RGB input for blending
fn unpack_static_features(coord: vec2<i32>) -> array<f32, 8> {
let packed = textureLoad(static_features, coord, 0);
@@ -133,5 +135,15 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
output[c] = 0.0;
}
+ // Blend with original on final layer
+ if (is_output) {
+ let original = textureLoad(original_input, coord, 0).rgb;
+ let result_rgb = vec3<f32>(output[0], output[1], output[2]);
+ let blended = mix(original, result_rgb, params.blend_amount);
+ output[0] = blended.r;
+ output[1] = blended.g;
+ output[2] = blended.b;
+ }
+
textureStore(output_tex, coord, pack_channels(output));
}
diff --git a/workspaces/main/shaders/cnn_v2_layer_0.wgsl b/workspaces/main/shaders/cnn_v2/cnn_v2_layer_0.wgsl
index 8e14957..8e14957 100644
--- a/workspaces/main/shaders/cnn_v2_layer_0.wgsl
+++ b/workspaces/main/shaders/cnn_v2/cnn_v2_layer_0.wgsl
diff --git a/workspaces/main/shaders/cnn_v2_layer_1.wgsl b/workspaces/main/shaders/cnn_v2/cnn_v2_layer_1.wgsl
index f490d13..f490d13 100644
--- a/workspaces/main/shaders/cnn_v2_layer_1.wgsl
+++ b/workspaces/main/shaders/cnn_v2/cnn_v2_layer_1.wgsl
diff --git a/workspaces/main/shaders/cnn_v2_layer_2.wgsl b/workspaces/main/shaders/cnn_v2/cnn_v2_layer_2.wgsl
index 2f9836a..2f9836a 100644
--- a/workspaces/main/shaders/cnn_v2_layer_2.wgsl
+++ b/workspaces/main/shaders/cnn_v2/cnn_v2_layer_2.wgsl
diff --git a/workspaces/main/shaders/cnn_v2_layer_template.wgsl b/workspaces/main/shaders/cnn_v2/cnn_v2_layer_template.wgsl
index 1bf6819..1bf6819 100644
--- a/workspaces/main/shaders/cnn_v2_layer_template.wgsl
+++ b/workspaces/main/shaders/cnn_v2/cnn_v2_layer_template.wgsl
diff --git a/workspaces/main/shaders/cnn_v2_static.wgsl b/workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl
index c3a2de7..dd07f19 100644
--- a/workspaces/main/shaders/cnn_v2_static.wgsl
+++ b/workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl
@@ -25,9 +25,9 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
// Sample depth
let d = textureLoad(depth_tex, coord, 0).r;
- // UV coordinates (normalized [0,1])
+ // UV coordinates (normalized [0,1], bottom-left origin)
let uv_x = f32(coord.x) / f32(dims.x);
- let uv_y = f32(coord.y) / f32(dims.y);
+ let uv_y = 1.0 - (f32(coord.y) / f32(dims.y));
// Multi-frequency position encoding
let sin10_x = sin(10.0 * uv_x);