30 files changed, 2295 insertions, 0 deletions
diff --git a/src/effects/chroma_aberration_effect.cc b/src/effects/chroma_aberration_effect.cc
new file mode 100644
index 0000000..a096f5b
--- /dev/null
+++ b/src/effects/chroma_aberration_effect.cc
@@ -0,0 +1,38 @@
+// This file is part of the 64k demo project.
+// It implements the ChromaAberrationEffect with parameterization.
+
+#include "gpu/demo_effects.h"
+#include "gpu/post_process_helper.h"
+#include "gpu/gpu.h"
+
+// --- ChromaAberrationEffect ---
+
+// Backward compatibility constructor (delegates to parameterized constructor)
+ChromaAberrationEffect::ChromaAberrationEffect(const GpuContext& ctx)
+    : ChromaAberrationEffect(ctx, ChromaAberrationParams{}) {
+}
+
+// Parameterized constructor
+ChromaAberrationEffect::ChromaAberrationEffect(
+    const GpuContext& ctx, const ChromaAberrationParams& params)
+    : PostProcessEffect(ctx), params_(params) {
+  pipeline_ = create_post_process_pipeline(ctx_.device, ctx_.format,
+                                           chroma_aberration_shader_wgsl);
+  params_buffer_.init(ctx_.device);
+}
+
+void ChromaAberrationEffect::render(WGPURenderPassEncoder pass,
+                                    const CommonPostProcessUniforms& uniforms) {
+  // Update uniforms with current state and parameters
+  uniforms_.update(ctx_.queue, uniforms);
+  params_buffer_.update(ctx_.queue, params_);
+
+  wgpuRenderPassEncoderSetPipeline(pass, pipeline_);
+  wgpuRenderPassEncoderSetBindGroup(pass, 0, bind_group_, 0, nullptr);
+  wgpuRenderPassEncoderDraw(pass, 3, 1, 0, 0);
+}
+
+void ChromaAberrationEffect::update_bind_group(WGPUTextureView input_view) {
+  pp_update_bind_group(ctx_.device, pipeline_, &bind_group_, input_view,
+                       uniforms_.get(), params_buffer_.get());
+}
diff --git a/src/effects/circle_mask_effect.cc b/src/effects/circle_mask_effect.cc
new file mode 100644
index 0000000..63c8f68
--- /dev/null
+++ b/src/effects/circle_mask_effect.cc
@@ -0,0 +1,219 @@
+// This file is part of the 64k demo project.
+// It implements CircleMaskEffect for auxiliary texture masking demonstration.
+// Generates circular mask and renders green background outside circle.
+
+#include "effects/circle_mask_effect.h"
+#include "generated/assets.h"
+#include "gpu/bind_group_builder.h"
+#include "gpu/shader_composer.h"
+
+CircleMaskEffect::CircleMaskEffect(const GpuContext& ctx, float radius)
+    : Effect(ctx), radius_(radius) {
+}
+
+CircleMaskEffect::~CircleMaskEffect() {
+  if (mask_sampler_)
+    wgpuSamplerRelease(mask_sampler_);
+  if (render_bind_group_)
+    wgpuBindGroupRelease(render_bind_group_);
+  if (render_pipeline_)
+    wgpuRenderPipelineRelease(render_pipeline_);
+  if (compute_bind_group_)
+    wgpuBindGroupRelease(compute_bind_group_);
+  if (compute_pipeline_)
+    wgpuRenderPipelineRelease(compute_pipeline_);
+}
+
+void CircleMaskEffect::init(MainSequence* demo) {
+  demo_ = demo;
+
+  // Register auxiliary texture (width_/height_ set by resize() before init())
+  demo_->register_auxiliary_texture("circle_mask", width_, height_);
+
+  compute_params_.init(ctx_.device);
+
+  // Initialize uniforms BEFORE bind group creation
+  uniforms_.update(ctx_.queue, get_common_uniforms());
+
+  WGPUSamplerDescriptor sampler_desc = {};
+  sampler_desc.addressModeU = WGPUAddressMode_ClampToEdge;
+  sampler_desc.addressModeV = WGPUAddressMode_ClampToEdge;
+  sampler_desc.magFilter = WGPUFilterMode_Linear;
+  sampler_desc.minFilter = WGPUFilterMode_Linear;
+  sampler_desc.mipmapFilter = WGPUMipmapFilterMode_Linear;
+  sampler_desc.maxAnisotropy = 1;
+  mask_sampler_ = wgpuDeviceCreateSampler(ctx_.device, &sampler_desc);
+
+  size_t compute_size, render_size;
+  const char* compute_shader = (const char*)GetAsset(
+      AssetId::ASSET_CIRCLE_MASK_COMPUTE_SHADER, &compute_size);
+  const char* render_shader = (const char*)GetAsset(
+      AssetId::ASSET_CIRCLE_MASK_RENDER_SHADER, &render_size);
+
+  // Compose shaders to resolve #include directives
+  std::string composed_compute = ShaderComposer::Get().Compose({}, compute_shader);
+
+  WGPUShaderSourceWGSL compute_wgsl = {};
+  compute_wgsl.chain.sType = WGPUSType_ShaderSourceWGSL;
+  compute_wgsl.code = str_view(composed_compute.c_str());
+
+  WGPUShaderModuleDescriptor compute_desc = {};
+  compute_desc.nextInChain = &compute_wgsl.chain;
+  WGPUShaderModule compute_module =
+      wgpuDeviceCreateShaderModule(ctx_.device, &compute_desc);
+
+  const WGPUColorTargetState compute_target = {
+      .format = ctx_.format, // Match auxiliary texture format
+      .writeMask = WGPUColorWriteMask_All,
+  };
+  WGPUFragmentState compute_frag = {};
+  compute_frag.module = compute_module;
+  compute_frag.entryPoint = str_view("fs_main");
+  compute_frag.targetCount = 1;
+  compute_frag.targets = &compute_target;
+  WGPURenderPipelineDescriptor compute_pipeline_desc = {};
+  compute_pipeline_desc.label = label_view("CircleMaskEffect_compute");
+  compute_pipeline_desc.vertex.module = compute_module;
+  compute_pipeline_desc.vertex.entryPoint = str_view("vs_main");
+  compute_pipeline_desc.primitive.topology = WGPUPrimitiveTopology_TriangleList;
+  compute_pipeline_desc.primitive.cullMode = WGPUCullMode_None;
+  compute_pipeline_desc.multisample.count = 1;
+  compute_pipeline_desc.multisample.mask = 0xFFFFFFFF;
+  compute_pipeline_desc.fragment = &compute_frag;
+  compute_pipeline_ =
+      wgpuDeviceCreateRenderPipeline(ctx_.device, &compute_pipeline_desc);
+  wgpuShaderModuleRelease(compute_module);
+
+  WGPUBindGroupLayout compute_layout =
+      wgpuRenderPipelineGetBindGroupLayout(compute_pipeline_, 0);
+  compute_bind_group_ =
+      BindGroupBuilder()
+          .buffer(0, uniforms_.get().buffer, sizeof(CommonPostProcessUniforms))
+          .buffer(1, compute_params_.get().buffer, sizeof(CircleMaskParams))
+          .build(ctx_.device, compute_layout);
+  wgpuBindGroupLayoutRelease(compute_layout);
+
+  std::string composed_render = ShaderComposer::Get().Compose({}, render_shader);
+
+  WGPUShaderSourceWGSL render_wgsl = {};
+  render_wgsl.chain.sType = WGPUSType_ShaderSourceWGSL;
+  render_wgsl.code = str_view(composed_render.c_str());
+
+  WGPUShaderModuleDescriptor render_desc = {};
+  render_desc.nextInChain = &render_wgsl.chain;
+  WGPUShaderModule render_module =
+      wgpuDeviceCreateShaderModule(ctx_.device, &render_desc);
+
+  const WGPUColorTargetState render_target = {
+      .format = ctx_.format,
+      .writeMask = WGPUColorWriteMask_All,
+  };
+  WGPUFragmentState render_frag = {};
+  render_frag.module = render_module;
+  render_frag.entryPoint = str_view("fs_main");
+  render_frag.targetCount = 1;
+  render_frag.targets = &render_target;
+  const WGPUDepthStencilState depth_stencil = {
+      .format = WGPUTextureFormat_Depth24Plus,
+      .depthWriteEnabled = WGPUOptionalBool_False, // Don't write depth
+      .depthCompare = WGPUCompareFunction_Always,  // Always pass
+  };
+
+  WGPURenderPipelineDescriptor render_pipeline_desc = {};
+  render_pipeline_desc.label = label_view("CircleMaskEffect_render");
+  render_pipeline_desc.vertex.module = render_module;
+  render_pipeline_desc.vertex.entryPoint = str_view("vs_main");
+  render_pipeline_desc.primitive.topology = WGPUPrimitiveTopology_TriangleList;
+  render_pipeline_desc.primitive.cullMode = WGPUCullMode_None;
+  render_pipeline_desc.depthStencil = &depth_stencil;
+  render_pipeline_desc.multisample.count = 1;
+  render_pipeline_desc.multisample.mask = 0xFFFFFFFF;
+  render_pipeline_desc.fragment = &render_frag;
+  render_pipeline_ =
+      wgpuDeviceCreateRenderPipeline(ctx_.device, &render_pipeline_desc);
+  wgpuShaderModuleRelease(render_module);
+
+  WGPUTextureView mask_view = demo_->get_auxiliary_view("circle_mask");
+  const WGPUBindGroupEntry render_entries[] = {
+      {.binding = 0, .textureView = mask_view},
+      {.binding = 1, .sampler = mask_sampler_},
+      {.binding = 2,
+       .buffer = uniforms_.get().buffer,
+       .size = sizeof(CommonPostProcessUniforms)},
+  };
+  const WGPUBindGroupDescriptor render_bg_desc = {
+      .layout = wgpuRenderPipelineGetBindGroupLayout(render_pipeline_, 0),
+      .entryCount = 3,
+      .entries = render_entries,
+  };
+  render_bind_group_ = wgpuDeviceCreateBindGroup(ctx_.device, &render_bg_desc);
+}
+
+void CircleMaskEffect::resize(int width, int height) {
+  if (width == width_ && height == height_)
+    return;
+
+  Effect::resize(width, height);
+
+  if (!demo_)
+    return;
+
+  // Resize auxiliary texture
+  demo_->resize_auxiliary_texture("circle_mask", width, height);
+
+  // Recreate render bind group with new texture view
+  if (render_bind_group_)
+    wgpuBindGroupRelease(render_bind_group_);
+
+  WGPUTextureView mask_view = demo_->get_auxiliary_view("circle_mask");
+  WGPUBindGroupLayout render_layout =
+      wgpuRenderPipelineGetBindGroupLayout(render_pipeline_, 0);
+  render_bind_group_ =
+      BindGroupBuilder()
+          .texture(0, mask_view)
+          .sampler(1, mask_sampler_)
+          .buffer(2, uniforms_.get().buffer, sizeof(CommonPostProcessUniforms))
+          .build(ctx_.device, render_layout);
+  wgpuBindGroupLayoutRelease(render_layout);
+}
+
+void CircleMaskEffect::compute(WGPUCommandEncoder encoder,
+                               const CommonPostProcessUniforms& uniforms) {
+  uniforms_.update(ctx_.queue, uniforms);
+
+  const CircleMaskParams params = {
+      .radius = radius_,
+  };
+  compute_params_.update(ctx_.queue, params);
+
+  WGPUTextureView mask_view = demo_->get_auxiliary_view("circle_mask");
+  WGPURenderPassColorAttachment color_attachment = {};
+  color_attachment.view = mask_view;
+  color_attachment.loadOp = WGPULoadOp_Clear;
+  color_attachment.storeOp = WGPUStoreOp_Store;
+  color_attachment.clearValue = {0.0, 0.0, 0.0, 1.0};
+#if !defined(DEMO_CROSS_COMPILE_WIN32)
+  color_attachment.depthSlice = WGPU_DEPTH_SLICE_UNDEFINED;
+#endif
+
+  WGPURenderPassDescriptor pass_desc = {};
+  pass_desc.colorAttachmentCount = 1;
+  pass_desc.colorAttachments = &color_attachment;
+
+  WGPURenderPassEncoder pass =
+      wgpuCommandEncoderBeginRenderPass(encoder, &pass_desc);
+  wgpuRenderPassEncoderSetPipeline(pass, compute_pipeline_);
+  wgpuRenderPassEncoderSetBindGroup(pass, 0, compute_bind_group_, 0, nullptr);
+  wgpuRenderPassEncoderDraw(pass, 3, 1, 0, 0);
+  wgpuRenderPassEncoderEnd(pass);
+  wgpuRenderPassEncoderRelease(pass);
+}
+
+void CircleMaskEffect::render(WGPURenderPassEncoder pass,
+                              const CommonPostProcessUniforms& uniforms) {
+  uniforms_.update(ctx_.queue, uniforms);
+
+  wgpuRenderPassEncoderSetPipeline(pass, render_pipeline_);
+  wgpuRenderPassEncoderSetBindGroup(pass, 0, render_bind_group_, 0, nullptr);
+  wgpuRenderPassEncoderDraw(pass, 3, 1, 0, 0);
+}
diff --git a/src/effects/circle_mask_effect.h b/src/effects/circle_mask_effect.h
new file mode 100644
index 0000000..53cc1bc
--- /dev/null
+++ b/src/effects/circle_mask_effect.h
@@ -0,0 +1,44 @@
+// This file is part of the 64k demo project.
+// It defines the CircleMaskEffect class for masking system demonstration.
+// Creates a circular mask and renders green outside the circle.
+
+#ifndef CIRCLE_MASK_EFFECT_H_
+#define CIRCLE_MASK_EFFECT_H_
+
+#include "gpu/effect.h"
+#include "gpu/post_process_helper.h"
+#include "gpu/uniform_helper.h"
+
+class CircleMaskEffect : public Effect {
+ public:
+  CircleMaskEffect(const GpuContext& ctx, float radius = 0.4f);
+  ~CircleMaskEffect() override;
+
+  void init(MainSequence* demo) override;
+  void resize(int width, int height) override;
+  void compute(WGPUCommandEncoder encoder,
+               const CommonPostProcessUniforms& uniforms) override;
+  void render(WGPURenderPassEncoder pass,
+              const CommonPostProcessUniforms& uniforms) override;
+
+ private:
+  struct CircleMaskParams {
+    float radius;
+    float _pad[3];
+  };
+  static_assert(sizeof(CircleMaskParams) == 16,
+                "CircleMaskParams must be 16 bytes for WGSL alignment");
+
+  MainSequence* demo_ = nullptr;
+  float radius_;
+
+  WGPURenderPipeline compute_pipeline_ = nullptr;
+  WGPUBindGroup compute_bind_group_ = nullptr;
+  UniformBuffer<CircleMaskParams> compute_params_;
+
+  WGPURenderPipeline render_pipeline_ = nullptr;
+  WGPUBindGroup render_bind_group_ = nullptr;
+  WGPUSampler mask_sampler_ = nullptr;
+};
+
+#endif /* CIRCLE_MASK_EFFECT_H_ */
diff --git a/src/effects/cnn_effect.cc b/src/effects/cnn_effect.cc
new file mode 100644
index 0000000..4475180
--- /dev/null
+++ b/src/effects/cnn_effect.cc
@@ -0,0 +1,126 @@
+// CNN post-processing effect implementation
+// Neural network-based stylization with modular WGSL
+
+#include "effects/cnn_effect.h"
+#include "gpu/post_process_helper.h"
+#include "gpu/shaders.h"
+#include "gpu/shader_composer.h"
+#include "gpu/effect.h"
+#include "gpu/bind_group_builder.h"
+#include "gpu/sampler_cache.h"
+#include "gpu/pipeline_builder.h"
+
+// Create custom pipeline with 5 bindings (includes original texture)
+static WGPURenderPipeline create_cnn_pipeline(WGPUDevice device,
+                                               WGPUTextureFormat format,
+                                               const char* shader_code) {
+  WGPUBindGroupLayout bgl = BindGroupLayoutBuilder()
+    .sampler(0, WGPUShaderStage_Fragment)
+    .texture(1, WGPUShaderStage_Fragment)
+    .uniform(2, WGPUShaderStage_Vertex | WGPUShaderStage_Fragment)
+    .uniform(3, WGPUShaderStage_Fragment)
+    .texture(4, WGPUShaderStage_Fragment)
+    .build(device);
+
+  WGPURenderPipeline pipeline = RenderPipelineBuilder(device)
+    .shader(shader_code)
+    .bind_group_layout(bgl)
+    .format(format)
+    .build();
+
+  wgpuBindGroupLayoutRelease(bgl);
+  return pipeline;
+}
+
+CNNEffect::CNNEffect(const GpuContext& ctx)
+    : PostProcessEffect(ctx), layer_index_(0), total_layers_(1),
+      blend_amount_(1.0f), input_view_(nullptr), original_view_(nullptr),
+      bind_group_(nullptr) {
+  pipeline_ = create_cnn_pipeline(ctx_.device, ctx_.format,
+                                  cnn_layer_shader_wgsl);
+}
+
+CNNEffect::CNNEffect(const GpuContext& ctx, const CNNEffectParams& params)
+    : PostProcessEffect(ctx), layer_index_(params.layer_index),
+      total_layers_(params.total_layers), blend_amount_(params.blend_amount),
+      input_view_(nullptr), original_view_(nullptr), bind_group_(nullptr) {
+  pipeline_ = create_cnn_pipeline(ctx_.device, ctx_.format,
+                                  cnn_layer_shader_wgsl);
+}
+
+void CNNEffect::init(MainSequence* demo) {
+  PostProcessEffect::init(demo);
+  demo_ = demo;
+  params_buffer_.init(ctx_.device);
+
+  // Register auxiliary texture for layer 0 (width_/height_ set by resize())
+  if (layer_index_ == 0) {
+    demo_->register_auxiliary_texture("captured_frame", width_, height_);
+  }
+
+  // Initialize uniforms BEFORE any bind group creation
+  uniforms_.update(ctx_.queue, get_common_uniforms());
+
+  CNNLayerParams params = {layer_index_, blend_amount_, {0.0f, 0.0f}};
+  params_buffer_.update(ctx_.queue, params);
+}
+
+void CNNEffect::resize(int width, int height) {
+  if (width == width_ && height == height_)
+    return;
+
+  PostProcessEffect::resize(width, height);
+
+  // Only layer 0 owns the captured_frame texture
+  if (layer_index_ == 0 && demo_) {
+    demo_->resize_auxiliary_texture("captured_frame", width, height);
+  }
+}
+
+void CNNEffect::render(WGPURenderPassEncoder pass,
+                      const CommonPostProcessUniforms& uniforms) {
+  if (!bind_group_) {
+    fprintf(stderr, "CNN render: no bind_group\n");
+    return;
+  }
+
+  float effective_blend = blend_amount_;
+  if (beat_modulated_) {
+    effective_blend = blend_amount_ * uniforms.beat_phase * beat_scale_;
+  }
+
+  CNNLayerParams params = {layer_index_, effective_blend, {0.0f, 0.0f}};
+  params_buffer_.update(ctx_.queue, params);
+
+  wgpuRenderPassEncoderSetPipeline(pass, pipeline_);
+  wgpuRenderPassEncoderSetBindGroup(pass, 0, bind_group_, 0, nullptr);
+  wgpuRenderPassEncoderDraw(pass, 3, 1, 0, 0);
+}
+
+void CNNEffect::update_bind_group(WGPUTextureView input_view) {
+  input_view_ = input_view;
+
+  // Update common uniforms (CRITICAL for UV calculation!)
+  uniforms_.update(ctx_.queue, get_common_uniforms());
+
+  // All layers: get captured frame (original input from layer 0)
+  if (demo_) {
+    original_view_ = demo_->get_auxiliary_view("captured_frame");
+  }
+
+  // Create bind group with original texture
+  if (bind_group_)
+    wgpuBindGroupRelease(bind_group_);
+
+  WGPUBindGroupLayout bgl = wgpuRenderPipelineGetBindGroupLayout(pipeline_, 0);
+  // Use clamp (not repeat) to match PyTorch Conv2d zero-padding behavior
+  WGPUSampler sampler = SamplerCache::Get().get_or_create(ctx_.device, SamplerCache::clamp());
+
+  bind_group_ = BindGroupBuilder()
+    .sampler(0, sampler)
+    .texture(1, input_view_)
+    .buffer(2, uniforms_.get().buffer, uniforms_.get().size)
+    .buffer(3, params_buffer_.get().buffer, params_buffer_.get().size)
+    .texture(4, original_view_ ? original_view_ : input_view_)
+    .build(ctx_.device, bgl);
+}
diff --git a/src/effects/cnn_effect.h b/src/effects/cnn_effect.h
new file mode 100644
index 0000000..3e2b7ca
--- /dev/null
+++ b/src/effects/cnn_effect.h
@@ -0,0 +1,53 @@
+// CNN post-processing effect header
+// Multi-layer neural network stylization
+
+#pragma once
+#include "gpu/effect.h"
+#include "gpu/uniform_helper.h"
+
+struct CNNLayerParams {
+  int layer_index;
+  float blend_amount;  // Blend: mix(input, output, blend_amount)
+  float _pad[2];
+};
+static_assert(sizeof(CNNLayerParams) == 16);
+
+struct CNNEffectParams {
+  int layer_index = 0;        // Which layer to render (0-based)
+  int total_layers = 1;       // Total number of layers in the CNN
+  float blend_amount = 1.0f;  // Final blend with original input
+};
+
+class CNNEffect : public PostProcessEffect {
+ public:
+  explicit CNNEffect(const GpuContext& ctx);
+  explicit CNNEffect(const GpuContext& ctx, const CNNEffectParams& params);
+
+  void init(MainSequence* demo) override;
+  void resize(int width, int height) override;
+  void render(WGPURenderPassEncoder pass,
+              const CommonPostProcessUniforms& uniforms) override;
+  void update_bind_group(WGPUTextureView input_view) override;
+
+  // Layer 0 needs framebuffer capture for original input
+  bool needs_framebuffer_capture() const override {
+    return layer_index_ == 0;
+  }
+
+  void set_beat_modulation(bool enabled, float scale = 1.0f) {
+    beat_modulated_ = enabled;
+    beat_scale_ = scale;
+  }
+
+ private:
+  int layer_index_;
+  int total_layers_;
+  float blend_amount_;
+  bool beat_modulated_ = false;
+  float beat_scale_ = 1.0f;
+  WGPUTextureView input_view_;
+  WGPUTextureView original_view_;
+  UniformBuffer<CNNLayerParams> params_buffer_;
+  WGPUBindGroup bind_group_;
+  MainSequence* demo_ = nullptr;
+};
diff --git a/src/effects/cnn_v2_effect.cc b/src/effects/cnn_v2_effect.cc
new file mode 100644
index 0000000..4c10ed1
--- /dev/null
+++ b/src/effects/cnn_v2_effect.cc
@@ -0,0 +1,463 @@
+// CNN v2 Effect Implementation
+
+#include "effects/cnn_v2_effect.h"
+
+#if defined(USE_TEST_ASSETS)
+#include "test_assets.h"
+#else
+#include "generated/assets.h"
+#endif
+
+#include "gpu/bind_group_builder.h"
+#include "gpu/gpu.h"
+#include "util/asset_manager.h"
+#include "util/fatal_error.h"
+#include <cstring>
+
+CNNv2Effect::CNNv2Effect(const GpuContext& ctx)
+    : PostProcessEffect(ctx),
+      static_pipeline_(nullptr),
+      static_bind_group_(nullptr),
+      static_params_buffer_(nullptr),
+      static_features_tex_(nullptr),
+      static_features_view_(nullptr),
+      linear_sampler_(nullptr),
+      layer_pipeline_(nullptr),
+      weights_buffer_(nullptr),
+      input_mip_tex_(nullptr),
+      current_input_view_(nullptr),
+      blend_amount_(1.0f),
+      mip_level_(0),
+      initialized_(false) {
+  std::memset(input_mip_view_, 0, sizeof(input_mip_view_));
+}
+
+CNNv2Effect::CNNv2Effect(const GpuContext& ctx, const CNNv2EffectParams& params)
+    : PostProcessEffect(ctx),
+      static_pipeline_(nullptr),
+      static_bind_group_(nullptr),
+      static_params_buffer_(nullptr),
+      static_features_tex_(nullptr),
+      static_features_view_(nullptr),
+      linear_sampler_(nullptr),
+      layer_pipeline_(nullptr),
+      weights_buffer_(nullptr),
+      input_mip_tex_(nullptr),
+      current_input_view_(nullptr),
+      blend_amount_(params.blend_amount),
+      mip_level_(0),
+      initialized_(false) {
+  std::memset(input_mip_view_, 0, sizeof(input_mip_view_));
+}
+
+CNNv2Effect::~CNNv2Effect() {
+  cleanup();
+}
+
+void CNNv2Effect::init(MainSequence* demo) {
+  (void)demo;
+  if (initialized_) return;
+
+  load_weights();
+  create_textures();
+  create_pipelines();
+
+  initialized_ = true;
+}
+
+void CNNv2Effect::resize(int width, int height) {
+  PostProcessEffect::resize(width, height);
+  cleanup();
+  create_textures();
+  create_pipelines();
+}
+
+void CNNv2Effect::load_weights() {
+  // Load binary weights asset
+  size_t weights_size = 0;
+  const uint8_t* weights_data = (const uint8_t*)GetAsset(AssetId::ASSET_WEIGHTS_CNN_V2, &weights_size);
+
+  if (!weights_data || weights_size < 20) {
+    // Weights not available - effect will skip
+    return;
+  }
+
+  // Parse header
+  const uint32_t* header = (const uint32_t*)weights_data;
+  uint32_t magic = header[0];
+  uint32_t version = header[1];
+  uint32_t num_layers = header[2];
+  uint32_t total_weights = header[3];
+
+  FATAL_CHECK(magic != 0x324e4e43, "Invalid CNN v2 weights magic\n");  // 'CNN2'
+
+  // Support both version 1 (16-byte header) and version 2 (20-byte header with mip_level)
+  // TODO: Version 3 should include feature descriptor for arbitrary layout/ordering
+  if (version == 1) {
+    mip_level_ = 0;  // Default for v1
+  } else if (version == 2) {
+    mip_level_ = header[4];
+  } else {
+    FATAL_ERROR("Unsupported CNN v2 weights version: %u\n", version);
+  }
+
+  // Parse layer info (20 bytes per layer)
+  // Offset depends on version: v1=16 bytes (4 u32), v2=20 bytes (5 u32)
+  const uint32_t header_u32_count = (version == 1) ? 4 : 5;
+  const uint32_t* layer_data = header + header_u32_count;
+  for (uint32_t i = 0; i < num_layers; ++i) {
+    LayerInfo info;
+    info.kernel_size = layer_data[i * 5 + 0];
+    info.in_channels = layer_data[i * 5 + 1];
+    info.out_channels = layer_data[i * 5 + 2];
+    info.weight_offset = layer_data[i * 5 + 3];
+    info.weight_count = layer_data[i * 5 + 4];
+    layer_info_.push_back(info);
+  }
+
+  // Create GPU storage buffer for weights (skip header + layer info, upload only weights)
+  size_t header_size = 20;  // 5 u32
+  size_t layer_info_size = 20 * num_layers;  // 5 u32 per layer
+  size_t weights_offset = header_size + layer_info_size;
+  size_t weights_only_size = weights_size - weights_offset;
+
+  WGPUBufferDescriptor buffer_desc = {};
+  buffer_desc.size = weights_only_size;
+  buffer_desc.usage = WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst;
+  buffer_desc.mappedAtCreation = false;
+
+  weights_buffer_ = wgpuDeviceCreateBuffer(ctx_.device, &buffer_desc);
+
+  // Upload only weights (skip header + layer info)
+  wgpuQueueWriteBuffer(ctx_.queue, weights_buffer_, 0, weights_data + weights_offset, weights_only_size);
+
+  // Create uniform buffers for layer params (one per layer)
+  for (uint32_t i = 0; i < num_layers; ++i) {
+    WGPUBufferDescriptor params_desc = {};
+    params_desc.size = sizeof(LayerParams);
+    params_desc.usage = WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst;
+    params_desc.mappedAtCreation = false;
+
+    WGPUBuffer buf = wgpuDeviceCreateBuffer(ctx_.device, &params_desc);
+    layer_params_buffers_.push_back(buf);
+  }
+}
+
+void CNNv2Effect::create_textures() {
+  // Static features texture (8×f16 packed as 4×u32)
+  TextureWithView static_tex = gpu_create_storage_texture_2d(
+      ctx_.device, width_, height_, WGPUTextureFormat_RGBA32Uint);
+  static_features_tex_ = static_tex.texture;
+  static_features_view_ = static_tex.view;
+
+  // Input texture with mips (for multi-scale features)
+  TextureWithView input_mip = gpu_create_texture_2d(
+      ctx_.device, width_, height_, WGPUTextureFormat_RGBA8Unorm,
+      (WGPUTextureUsage)(WGPUTextureUsage_TextureBinding | WGPUTextureUsage_CopyDst), 3);
+  input_mip_tex_ = input_mip.texture;
+
+  for (int i = 0; i < 3; ++i) {
+    input_mip_view_[i] =
+        gpu_create_mip_view(input_mip_tex_, WGPUTextureFormat_RGBA8Unorm, i);
+  }
+
+  // Create 2 layer textures (ping-pong buffers for intermediate results)
+  // Each stores 8×f16 channels packed as 4×u32
+  for (int i = 0; i < 2; ++i) {
+    TextureWithView layer = gpu_create_storage_texture_2d(
+        ctx_.device, width_, height_, WGPUTextureFormat_RGBA32Uint);
+    layer_textures_.push_back(layer.texture);
+    layer_views_.push_back(layer.view);
+  }
+
+  // Create uniform buffer for static feature params
+  WGPUBufferDescriptor params_desc = {};
+  params_desc.size = sizeof(StaticFeatureParams);
+  params_desc.usage = WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst;
+  params_desc.mappedAtCreation = false;
+  static_params_buffer_ = wgpuDeviceCreateBuffer(ctx_.device, &params_desc);
+}
+
+void CNNv2Effect::create_pipelines() {
+  // Create linear sampler for bilinear interpolation
+  WGPUSamplerDescriptor sampler_desc = {};
+  sampler_desc.addressModeU = WGPUAddressMode_ClampToEdge;
+  sampler_desc.addressModeV = WGPUAddressMode_ClampToEdge;
+  sampler_desc.addressModeW = WGPUAddressMode_ClampToEdge;
+  sampler_desc.magFilter = WGPUFilterMode_Linear;
+  sampler_desc.minFilter = WGPUFilterMode_Linear;
+  sampler_desc.mipmapFilter = WGPUMipmapFilterMode_Linear;
+  sampler_desc.lodMinClamp = 0.0f;
+  sampler_desc.lodMaxClamp = 32.0f;
+  sampler_desc.maxAnisotropy = 1;
+
+  linear_sampler_ = wgpuDeviceCreateSampler(ctx_.device, &sampler_desc);
+
+  // Static features compute pipeline
+  size_t shader_size = 0;
+  const char* static_code = (const char*)GetAsset(AssetId::ASSET_SHADER_CNN_V2_STATIC, &shader_size);
+
+  if (!static_code || shader_size == 0) {
+    // Shader not available (e.g., in test mode) - skip pipeline creation
+    return;
+  }
+
+  WGPUShaderSourceWGSL wgsl_src = {};
+  wgsl_src.chain.sType = WGPUSType_ShaderSourceWGSL;
+  wgsl_src.code = str_view(static_code);
+
+  WGPUShaderModuleDescriptor shader_desc = {};
+  shader_desc.nextInChain = &wgsl_src.chain;
+
+  // Create bind group layout for static features compute
+  // Bindings: 0=input_tex, 1=input_mip1, 2=input_mip2, 3=depth_tex, 4=output, 5=params, 6=linear_sampler
+  WGPUBindGroupLayout static_bgl =
+      BindGroupLayoutBuilder()
+          .texture(0, WGPUShaderStage_Compute)
+          .texture(1, WGPUShaderStage_Compute)
+          .texture(2, WGPUShaderStage_Compute)
+          .texture(3, WGPUShaderStage_Compute)
+          .storage_texture(4, WGPUShaderStage_Compute,
+                           WGPUTextureFormat_RGBA32Uint)
+          .uniform(5, WGPUShaderStage_Compute, sizeof(StaticFeatureParams))
+          .sampler(6, WGPUShaderStage_Compute)
+          .build(ctx_.device);
+
+  // Update pipeline layout
+  WGPUPipelineLayoutDescriptor pl_desc = {};
+  pl_desc.bindGroupLayoutCount = 1;
+  pl_desc.bindGroupLayouts = &static_bgl;
+  WGPUPipelineLayout pipeline_layout = wgpuDeviceCreatePipelineLayout(ctx_.device, &pl_desc);
+
+  // Recreate pipeline with proper layout
+  WGPUComputePipelineDescriptor pipeline_desc2 = {};
+  pipeline_desc2.compute.module = wgpuDeviceCreateShaderModule(ctx_.device, &shader_desc);
+  pipeline_desc2.compute.entryPoint = str_view("main");
+  pipeline_desc2.layout = pipeline_layout;
+
+  if (static_pipeline_) wgpuComputePipelineRelease(static_pipeline_);
+  static_pipeline_ = wgpuDeviceCreateComputePipeline(ctx_.device, &pipeline_desc2);
+
+  wgpuShaderModuleRelease(pipeline_desc2.compute.module);
+  wgpuPipelineLayoutRelease(pipeline_layout);
+  wgpuBindGroupLayoutRelease(static_bgl);
+
+  // CNN layer compute pipeline (storage buffer version)
+  if (layer_info_.empty()) return;  // No weights loaded
+
+  size_t layer_shader_size = 0;
+  const char* layer_code = (const char*)GetAsset(AssetId::ASSET_SHADER_CNN_V2_COMPUTE, &layer_shader_size);
+
+  if (!layer_code || layer_shader_size == 0) return;
+
+  WGPUShaderSourceWGSL layer_wgsl = {};
+  layer_wgsl.chain.sType = WGPUSType_ShaderSourceWGSL;
+  layer_wgsl.code = str_view(layer_code);
+
+  WGPUShaderModuleDescriptor layer_shader_desc = {};
+  layer_shader_desc.nextInChain = &layer_wgsl.chain;
+
+  WGPUShaderModule layer_module = wgpuDeviceCreateShaderModule(ctx_.device, &layer_shader_desc);
+  if (!layer_module) return;
+
+  // Create bind group layout for layer compute
+  // 0=static_features, 1=layer_input, 2=output, 3=weights, 4=params, 5=original_input
+  WGPUBindGroupLayout layer_bgl =
+      BindGroupLayoutBuilder()
+          .uint_texture(0, WGPUShaderStage_Compute)
+          .uint_texture(1, WGPUShaderStage_Compute)
+          .storage_texture(2, WGPUShaderStage_Compute,
+                           WGPUTextureFormat_RGBA32Uint)
+          .storage(3, WGPUShaderStage_Compute)
+          .uniform(4, WGPUShaderStage_Compute, sizeof(LayerParams))
+          .texture(5, WGPUShaderStage_Compute)
+          .build(ctx_.device);
+
+  WGPUPipelineLayoutDescriptor layer_pl_desc = {};
+  layer_pl_desc.bindGroupLayoutCount = 1;
+  layer_pl_desc.bindGroupLayouts = &layer_bgl;
+
+  WGPUPipelineLayout layer_pipeline_layout = wgpuDeviceCreatePipelineLayout(ctx_.device, &layer_pl_desc);
+
+  WGPUComputePipelineDescriptor layer_pipeline_desc = {};
+  layer_pipeline_desc.compute.module = layer_module;
+  layer_pipeline_desc.compute.entryPoint = str_view("main");
+  layer_pipeline_desc.layout = layer_pipeline_layout;
+
+  layer_pipeline_ = wgpuDeviceCreateComputePipeline(ctx_.device, &layer_pipeline_desc);
+
+  wgpuShaderModuleRelease(layer_module);
+  wgpuPipelineLayoutRelease(layer_pipeline_layout);
+  wgpuBindGroupLayoutRelease(layer_bgl);
+}
+
+void CNNv2Effect::update_bind_group(WGPUTextureView input_view) {
+  if (!static_pipeline_) return;
+
+  // Cache input view
+  current_input_view_ = input_view;
+
+  // Release old bind group
+  if (static_bind_group_) {
+    wgpuBindGroupRelease(static_bind_group_);
+    static_bind_group_ = nullptr;
+  }
+
+  // Create bind group for static features compute (manual for storage texture binding)
+  WGPUBindGroupEntry bg_entries[7] = {};
+  bg_entries[0].binding = 0;
+  bg_entries[0].textureView = input_view;
+  bg_entries[1].binding = 1;
+  bg_entries[1].textureView = input_mip_view_[0];
+  bg_entries[2].binding = 2;
+  bg_entries[2].textureView =
+      input_mip_view_[1] ? input_mip_view_[1] : input_mip_view_[0];
+  bg_entries[3].binding = 3;
+  bg_entries[3].textureView = input_view;
+  bg_entries[4].binding = 4;
+  bg_entries[4].textureView = static_features_view_;
+  bg_entries[5].binding = 5;
+  bg_entries[5].buffer = static_params_buffer_;
+  bg_entries[5].size = sizeof(StaticFeatureParams);
+  bg_entries[6].binding = 6;
+  bg_entries[6].sampler = linear_sampler_;
+
+  WGPUBindGroupLayout layout =
+      wgpuComputePipelineGetBindGroupLayout(static_pipeline_, 0);
+  WGPUBindGroupDescriptor bg_desc = {};
+  bg_desc.layout = layout;
+  bg_desc.entryCount = 7;
+  bg_desc.entries = bg_entries;
+  static_bind_group_ = wgpuDeviceCreateBindGroup(ctx_.device, &bg_desc);
+  wgpuBindGroupLayoutRelease(layout);
+
+  // Create layer bind groups
+  if (!layer_pipeline_ || layer_info_.empty()) return;
+
+  // Release old layer bind groups
+  for (auto bg : layer_bind_groups_) {
+    wgpuBindGroupRelease(bg);
+  }
+  layer_bind_groups_.clear();
+
+  // Get bind group layout from layer pipeline
+  WGPUBindGroupLayout layer_bgl = wgpuComputePipelineGetBindGroupLayout(layer_pipeline_, 0);
+
+  // Create bind group for each layer
+  for (size_t i = 0; i < layer_info_.size(); ++i) {
+    WGPUTextureView layer_input =
+        (i == 0) ? static_features_view_ : layer_views_[i % 2];
+
+    WGPUBindGroup layer_bg =
+        BindGroupBuilder()
+            .texture(0, static_features_view_)
+            .texture(1, layer_input)
+            .texture(2, layer_views_[(i + 1) % 2])
+            .buffer(3, weights_buffer_, wgpuBufferGetSize(weights_buffer_))
+            .buffer(4, layer_params_buffers_[i], sizeof(LayerParams))
+            .texture(5, input_view)
+            .build(ctx_.device, layer_bgl);
+
+    layer_bind_groups_.push_back(layer_bg);
+  }
+
+  wgpuBindGroupLayoutRelease(layer_bgl);
+}
+
+void CNNv2Effect::compute(WGPUCommandEncoder encoder,
+                          const CommonPostProcessUniforms& uniforms) {
+  if (!initialized_ || !static_pipeline_ || !static_bind_group_) return;
+
+  float effective_blend = blend_amount_;
+  if (beat_modulated_) {
+    effective_blend = blend_amount_ * uniforms.beat_phase * beat_scale_;
+  }
+
+  // Update static feature params
+  StaticFeatureParams static_params;
+  static_params.mip_level = mip_level_;
+  static_params.padding[0] = 0;
+  static_params.padding[1] = 0;
+  static_params.padding[2] = 0;
+  wgpuQueueWriteBuffer(ctx_.queue, static_params_buffer_, 0, &static_params, sizeof(static_params));
+
+  // Pass 1: Compute static features
+  WGPUComputePassEncoder pass = wgpuCommandEncoderBeginComputePass(encoder, nullptr);
+
+  wgpuComputePassEncoderSetPipeline(pass, static_pipeline_);
+  wgpuComputePassEncoderSetBindGroup(pass, 0, static_bind_group_, 0, nullptr);
+
+  // Dispatch workgroups (8×8 threads per group)
+  uint32_t workgroups_x = (width_ + 7) / 8;
+  uint32_t workgroups_y = (height_ + 7) / 8;
+  wgpuComputePassEncoderDispatchWorkgroups(pass, workgroups_x, workgroups_y, 1);
+
+  wgpuComputePassEncoderEnd(pass);
+  wgpuComputePassEncoderRelease(pass);
+
+  // Execute CNN layer passes
+  if (!layer_pipeline_ || layer_bind_groups_.empty()) return;
+
+  // Update layer params (each layer has own buffer)
+  for (size_t i = 0; i < layer_info_.size(); ++i) {
+    const LayerInfo& info = layer_info_[i];
+
+    LayerParams params;
+    params.kernel_size = info.kernel_size;
+    params.in_channels = info.in_channels;
+    params.out_channels = info.out_channels;
+    params.weight_offset = info.weight_offset;
+    params.is_output_layer = (i == layer_info_.size() - 1) ? 1 : 0;
+    params.blend_amount = effective_blend;
+    params.is_layer_0 = (i == 0) ? 1 : 0;
+
+    wgpuQueueWriteBuffer(ctx_.queue, layer_params_buffers_[i], 0, &params, sizeof(params));
+
+    WGPUComputePassEncoder layer_pass = wgpuCommandEncoderBeginComputePass(encoder, nullptr);
+
+    wgpuComputePassEncoderSetPipeline(layer_pass, layer_pipeline_);
+    wgpuComputePassEncoderSetBindGroup(layer_pass, 0, layer_bind_groups_[i], 0, nullptr);
+
+    wgpuComputePassEncoderDispatchWorkgroups(layer_pass, workgroups_x, workgroups_y, 1);
+
+    wgpuComputePassEncoderEnd(layer_pass);
+    wgpuComputePassEncoderRelease(layer_pass);
+  }
+}
+
+void CNNv2Effect::render(WGPURenderPassEncoder pass,
+                         const CommonPostProcessUniforms& uniforms) {
+  (void)pass;
+  (void)uniforms;
+  // Compute-only effect, rendering is done by default composite pass
+}
+
+void CNNv2Effect::cleanup() {
+  if (static_features_view_) wgpuTextureViewRelease(static_features_view_);
+  if (static_features_tex_) wgpuTextureRelease(static_features_tex_);
+  if (static_bind_group_) wgpuBindGroupRelease(static_bind_group_);
+  if (static_params_buffer_) wgpuBufferRelease(static_params_buffer_);
+  if (static_pipeline_) wgpuComputePipelineRelease(static_pipeline_);
+  if (linear_sampler_) wgpuSamplerRelease(linear_sampler_);
+
+  if (layer_pipeline_) wgpuComputePipelineRelease(layer_pipeline_);
+  if (weights_buffer_) wgpuBufferRelease(weights_buffer_);
+  for (auto buf : layer_params_buffers_) wgpuBufferRelease(buf);
+  layer_params_buffers_.clear();
+
+  for (int i = 0; i < 3; ++i) {
+    if (input_mip_view_[i]) wgpuTextureViewRelease(input_mip_view_[i]);
+  }
+  if (input_mip_tex_) wgpuTextureRelease(input_mip_tex_);
+
+  for (auto view : layer_views_) wgpuTextureViewRelease(view);
+  for (auto tex : layer_textures_) wgpuTextureRelease(tex);
+  for (auto bg : layer_bind_groups_) wgpuBindGroupRelease(bg);
+
+  layer_views_.clear();
+  layer_textures_.clear();
+  layer_bind_groups_.clear();
+  layer_info_.clear();
+
+  initialized_ = false;
+}
diff --git a/src/effects/cnn_v2_effect.h b/src/effects/cnn_v2_effect.h
new file mode 100644
index 0000000..d530d3b
--- /dev/null
+++ b/src/effects/cnn_v2_effect.h
@@ -0,0 +1,88 @@
+// CNN v2 Effect - Parametric Static Features
+// Multi-pass post-processing with 7D feature input
+// Supports per-layer kernel sizes (e.g., 1×1, 3×3, 5×5)
+
+#pragma once
+#include "gpu/effect.h"
+#include <vector>
+
+struct CNNv2EffectParams {
+  float blend_amount = 1.0f;
+};
+
+class CNNv2Effect : public PostProcessEffect {
+public:
+  explicit CNNv2Effect(const GpuContext& ctx);
+  explicit CNNv2Effect(const GpuContext& ctx, const CNNv2EffectParams& params);
+  ~CNNv2Effect();
+
+  void init(MainSequence* demo) override;
+  void resize(int width, int height) override;
+  void compute(WGPUCommandEncoder encoder,
+               const CommonPostProcessUniforms& uniforms) override;
+  void render(WGPURenderPassEncoder pass,
+              const CommonPostProcessUniforms& uniforms) override;
+  void update_bind_group(WGPUTextureView input_view) override;
+
+  void set_beat_modulation(bool enabled, float scale = 1.0f) {
+    beat_modulated_ = enabled;
+    beat_scale_ = scale;
+  }
+
+private:
+  struct LayerInfo {
+    uint32_t kernel_size;
+    uint32_t in_channels;
+    uint32_t out_channels;
+    uint32_t weight_offset;
+    uint32_t weight_count;
+  };
+
+  struct LayerParams {
+    uint32_t kernel_size;
+    uint32_t in_channels;
+    uint32_t out_channels;
+    uint32_t weight_offset;
+    uint32_t is_output_layer;
+    float blend_amount;
+    uint32_t is_layer_0;
+  };
+
+  struct StaticFeatureParams {
+    uint32_t mip_level;
+    uint32_t padding[3];
+  };
+
+  void create_textures();
+  void create_pipelines();
+  void load_weights();
+  void cleanup();
+
+  // Static features compute
+  WGPUComputePipeline static_pipeline_;
+  WGPUBindGroup static_bind_group_;
+  WGPUBuffer static_params_buffer_;
+  WGPUTexture static_features_tex_;
+  WGPUTextureView static_features_view_;
+  WGPUSampler linear_sampler_;
+
+  // CNN layers (storage buffer architecture)
+  WGPUComputePipeline layer_pipeline_;      // Single pipeline for all layers
+  WGPUBuffer weights_buffer_;               // Storage buffer for weights
+  std::vector<WGPUBuffer> layer_params_buffers_;  // Uniform buffers (one per layer)
+  std::vector<LayerInfo> layer_info_;       // Layer metadata
+  std::vector<WGPUBindGroup> layer_bind_groups_;  // Per-layer bind groups
+  std::vector<WGPUTexture> layer_textures_;       // Ping-pong buffers
+  std::vector<WGPUTextureView> layer_views_;
+
+  // Input mips
+  WGPUTexture input_mip_tex_;
+  WGPUTextureView input_mip_view_[3];
+  WGPUTextureView current_input_view_;
+
+  float blend_amount_ = 1.0f;
+  bool beat_modulated_ = false;
+  float beat_scale_ = 1.0f;
+  uint32_t mip_level_ = 0;
+  bool initialized_;
+};
diff --git a/src/effects/distort_effect.cc b/src/effects/distort_effect.cc
new file mode 100644
index 0000000..97622b2
--- /dev/null
+++ b/src/effects/distort_effect.cc
@@ -0,0 +1,36 @@
+// This file is part of the 64k demo project.
+// It implements the DistortEffect.
+
+#include "gpu/demo_effects.h"
+#include "gpu/gpu.h"
+
+// --- DistortEffect ---
+DistortEffect::DistortEffect(const GpuContext& ctx)
+    : DistortEffect(ctx, DistortParams()) {
+}
+
+DistortEffect::DistortEffect(const GpuContext& ctx, const DistortParams& params)
+    : PostProcessEffect(ctx), params_(params) {
+  params_buffer_.init(ctx_.device);
+  pipeline_ = create_post_process_pipeline(ctx_.device, ctx_.format,
+                                           distort_shader_wgsl);
+}
+
+void DistortEffect::render(WGPURenderPassEncoder pass,
+                           const CommonPostProcessUniforms& uniforms) {
+  // Populate CommonPostProcessUniforms
+  uniforms_.update(ctx_.queue, uniforms);
+
+  // Populate DistortParams
+  const DistortParams distort_p = {
+      .strength = params_.strength,
+      .speed = params_.speed,
+  };
+  params_buffer_.update(ctx_.queue, distort_p);
+
+  PostProcessEffect::render(pass, uniforms);
+}
+
+void DistortEffect::update_bind_group(WGPUTextureView v) {
+  pp_update_bind_group(ctx_.device, pipeline_, &bind_group_, v, uniforms_.get(), params_buffer_);
+}
+\ No newline at end of file
diff --git a/src/effects/fade_effect.cc b/src/effects/fade_effect.cc
new file mode 100644
index 0000000..fd2af69
--- /dev/null
+++ b/src/effects/fade_effect.cc
@@ -0,0 +1,98 @@
+// This file is part of the 64k demo project.
+// It implements the FadeEffect - fades to/from black based on time.
+
+#include "effects/fade_effect.h"
+#include "gpu/post_process_helper.h"
+#include <cmath>
+
+struct FadeParams {
+  float fade_amount;
+  float _pad[3];
+};
+static_assert(sizeof(FadeParams) == 16, "FadeParams must be 16 bytes for WGSL alignment");
+
+FadeEffect::FadeEffect(const GpuContext& ctx) : PostProcessEffect(ctx) {
+  const char* shader_code = R"(
+    struct VertexOutput {
+      @builtin(position) position: vec4<f32>,
+      @location(0) uv: vec2<f32>,
+    };
+
+    struct CommonUniforms {
+      resolution: vec2<f32>,
+      _pad0: f32,
+      _pad1: f32,
+      aspect_ratio: f32,
+      time: f32,
+      beat: f32,
+      audio_intensity: f32,
+    };
+
+    struct FadeParams {
+      fade_amount: f32,
+      _pad0: f32,
+      _pad1: f32,
+      _pad2: f32,
+    };
+
+    @group(0) @binding(0) var inputSampler: sampler;
+    @group(0) @binding(1) var inputTexture: texture_2d<f32>;
+    @group(0) @binding(2) var<uniform> uniforms: CommonUniforms;
+    @group(0) @binding(3) var<uniform> params: FadeParams;
+
+    @vertex
+    fn vs_main(@builtin(vertex_index) vertexIndex: u32) -> VertexOutput {
+      var output: VertexOutput;
+      var pos = array<vec2<f32>, 3>(
+        vec2<f32>(-1.0, -1.0),
+        vec2<f32>(3.0, -1.0),
+        vec2<f32>(-1.0, 3.0)
+      );
+      output.position = vec4<f32>(pos[vertexIndex], 0.0, 1.0);
+      output.uv = pos[vertexIndex] * 0.5 + 0.5;
+      return output;
+    }
+
+    @fragment
+    fn fs_main(input: VertexOutput) -> @location(0) vec4<f32> {
+      let color = textureSample(inputTexture, inputSampler, input.uv);
+      // Fade to black: 0.0 = black, 1.0 = full color
+      return vec4<f32>(color.rgb * params.fade_amount, color.a);
+    }
+  )";
+
+  pipeline_ =
+      create_post_process_pipeline(ctx_.device, ctx_.format, shader_code);
+  params_buffer_ = gpu_create_buffer(
+      ctx_.device, 16, WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst);
+}
+
+void FadeEffect::update_bind_group(WGPUTextureView input_view) {
+  pp_update_bind_group(ctx_.device, pipeline_, &bind_group_, input_view,
+                       uniforms_.get(), params_buffer_);
+}
+
+void FadeEffect::render(WGPURenderPassEncoder pass,
+                        const CommonPostProcessUniforms& uniforms) {
+  uniforms_.update(ctx_.queue, uniforms);
+
+  // Example fade pattern: fade in at start, fade out at end
+  // Customize this based on your needs
+  float fade_amount = 1.0f;
+  if (uniforms.time < 2.0f) {
+    // Fade in from black over first 2 seconds
+    fade_amount = uniforms.time / 2.0f;
+  } else if (uniforms.time > 36.0f) {
+    // Fade out to black after 36 seconds
+    fade_amount = 1.0f - ((uniforms.time - 36.0f) / 4.0f);
+    fade_amount = fmaxf(fade_amount, 0.0f);
+  }
+
+  FadeParams params = {fade_amount, {0.0f, 0.0f, 0.0f}};
+  wgpuQueueWriteBuffer(ctx_.queue, params_buffer_.buffer, 0, &params,
+                       sizeof(params));
+
+  wgpuRenderPassEncoderSetPipeline(pass, pipeline_);
+  wgpuRenderPassEncoderSetBindGroup(pass, 0, bind_group_, 0, nullptr);
+  wgpuRenderPassEncoderDraw(pass, 3, 1, 0, 0);
+}
diff --git a/src/effects/fade_effect.h b/src/effects/fade_effect.h
new file mode 100644
index 0000000..8cd7006
--- /dev/null
+++ b/src/effects/fade_effect.h
@@ -0,0 +1,20 @@
+// This file is part of the 64k demo project.
+// It declares the FadeEffect - fades to/from black.
+
+#pragma once
+
+#include "gpu/effect.h"
+#include "gpu/post_process_helper.h"
+#include "gpu/gpu.h"
+#include "gpu/uniform_helper.h"
+
+class FadeEffect : public PostProcessEffect {
+ public:
+  FadeEffect(const GpuContext& ctx);
+  void render(WGPURenderPassEncoder pass,
+              const CommonPostProcessUniforms& uniforms) override;
+  void update_bind_group(WGPUTextureView input_view) override;
+
+ private:
+  GpuBuffer params_buffer_;
+};
diff --git a/src/effects/flash_cube_effect.cc b/src/effects/flash_cube_effect.cc
new file mode 100644
index 0000000..29e9897
--- /dev/null
+++ b/src/effects/flash_cube_effect.cc
@@ -0,0 +1,104 @@
+// This file is part of the 64k demo project.
+// It implements the FlashCubeEffect - a flashing background cube with Perlin
+// noise.
+
+#include "effects/flash_cube_effect.h"
+#include "generated/assets.h"
+#include "util/asset_manager_utils.h"
+#include <cmath>
+#include <iostream>
+
+FlashCubeEffect::FlashCubeEffect(const GpuContext& ctx) : Effect(ctx) {
+}
+
+void FlashCubeEffect::resize(int width, int height) {
+  if (width == width_ && height == height_)
+    return;
+
+  Effect::resize(width, height);
+
+  if (!initialized_)
+    return;
+
+  renderer_.resize(width_, height_);
+}
+
+void FlashCubeEffect::init(MainSequence* demo) {
+  (void)demo;
+  WGPUTextureFormat format = demo->gpu_ctx.format;
+
+  renderer_.init(ctx_.device, ctx_.queue, ctx_.format);
+  renderer_.resize(width_, height_);
+  initialized_ = true;
+
+  // Texture Manager
+  texture_manager_.init(ctx_.device, ctx_.queue);
+
+  // Load Perlin noise texture
+  TextureAsset noise_tex = GetTextureAsset(AssetId::ASSET_NOISE_TEX);
+  if (noise_tex.pixels && noise_tex.width == 256 && noise_tex.height == 256) {
+    texture_manager_.create_texture("noise", noise_tex.width, noise_tex.height,
+                                    noise_tex.pixels);
+    renderer_.set_noise_texture(texture_manager_.get_texture_view("noise"));
+  } else {
+    std::cerr << "Failed to load NOISE_TEX asset for FlashCubeEffect."
+              << std::endl;
+  }
+
+  // Create a very large background cube
+  // Scale and distance ensure it's clearly behind foreground objects
+  scene_.clear();
+  Object3D cube(ObjectType::BOX);
+  cube.position = vec3(0, 0, 0);
+  cube.scale = vec3(100.0f, 100.0f, 100.0f); // Much larger cube
+  cube.color = vec4(0.3f, 0.3f, 0.5f, 1.0f); // Dark blue base color
+  scene_.add_object(cube);
+}
+
+void FlashCubeEffect::render(WGPURenderPassEncoder pass,
+                             const CommonPostProcessUniforms& uniforms) {
+  // Detect beat changes for flash trigger (using intensity as proxy for beat
+  // hits) Intensity spikes on beats, so we can use it to trigger flashes
+  if (uniforms.audio_intensity > 0.5f &&
+      flash_intensity_ < 0.3f) { // High intensity + flash cooled down
+    flash_intensity_ = 1.0f;     // Trigger full flash
+  }
+
+  // Exponential decay of flash
+  flash_intensity_ *= 0.90f; // Slower fade for more visible effect
+
+  // Always have base brightness, add flash on top
+  float base_brightness = 0.2f;
+  float flash_boost =
+      base_brightness + flash_intensity_ * 0.8f; // 0.2 to 1.0 range
+
+  scene_.objects[0].color =
+      vec4(0.4f * flash_boost, // Reddish tint
+           0.6f * flash_boost, // More green
+           1.0f * flash_boost, // Strong blue for background feel
+           1.0f);
+
+  // Slowly rotate the cube for visual interest
+  scene_.objects[0].rotation =
+      quat::from_axis(vec3(0.3f, 1, 0.2f), uniforms.time * 0.05f);
+
+  // Position camera OUTSIDE the cube looking at it from a distance
+  // This way we see the cube as a background element
+  float cam_distance = 150.0f; // Much farther to ensure it's behind everything
+  float orbit_angle = uniforms.time * 0.1f;
+
+  camera_.set_look_at(
+      vec3(std::sin(orbit_angle) * cam_distance,
+           std::cos(orbit_angle * 0.3f) * 30.0f,
+           std::cos(orbit_angle) * cam_distance), // Camera orbits around
+      vec3(0, 0, 0),                              // Look at cube center
+      vec3(0, 1, 0));
+
+  camera_.aspect_ratio = uniforms.aspect_ratio;
+  // Extend far plane to accommodate distant camera position (150 units + cube
+  // size)
+  camera_.far_plane = 300.0f;
+
+  // Draw the cube
+  renderer_.draw(pass, scene_, camera_, uniforms.time);
+}
diff --git a/src/effects/flash_cube_effect.h b/src/effects/flash_cube_effect.h
new file mode 100644
index 0000000..df30b5b
--- /dev/null
+++ b/src/effects/flash_cube_effect.h
@@ -0,0 +1,28 @@
+// This file is part of the 64k demo project.
+// It implements a flashing cube effect with Perlin noise texture.
+// The cube is large and we're inside it, flashing in sync with the beat.
+
+#pragma once
+#include "3d/camera.h"
+#include "3d/renderer.h"
+#include "3d/scene.h"
+#include "gpu/effect.h"
+#include "gpu/texture_manager.h"
+
+class FlashCubeEffect : public Effect {
+ public:
+  FlashCubeEffect(const GpuContext& ctx);
+  void init(MainSequence* demo) override;
+  void resize(int width, int height) override;
+  void render(WGPURenderPassEncoder pass,
+              const CommonPostProcessUniforms& uniforms) override;
+
+ private:
+  Renderer3D renderer_;
+  TextureManager texture_manager_;
+  Scene scene_;
+  Camera camera_;
+  float last_beat_ = 0.0f;
+  float flash_intensity_ = 0.0f;
+  bool initialized_ = false;
+};
diff --git a/src/effects/flash_effect.cc b/src/effects/flash_effect.cc
new file mode 100644
index 0000000..235412d
--- /dev/null
+++ b/src/effects/flash_effect.cc
@@ -0,0 +1,94 @@
+// This file is part of the 64k demo project.
+// It implements the FlashEffect - brief flash on u.beat hits.
+// Now supports parameterized color with per-frame animation.
+
+#include "effects/flash_effect.h"
+#include "gpu/post_process_helper.h"
+#include <cmath>
+
+// Backward compatibility constructor (delegates to parameterized constructor)
+FlashEffect::FlashEffect(const GpuContext& ctx)
+    : FlashEffect(ctx, FlashEffectParams{}) {
+}
+
+// Parameterized constructor
+FlashEffect::FlashEffect(const GpuContext& ctx, const FlashEffectParams& params)
+    : PostProcessEffect(ctx), params_(params) {
+  const char* shader_code = R"(
+    struct VertexOutput {
+      @builtin(position) position: vec4<f32>,
+      @location(0) uv: vec2<f32>,
+    };
+
+    struct Uniforms {
+      flash_intensity: f32,
+      audio_intensity: f32,
+      flash_color: vec3<f32>,  // Parameterized color
+      _pad: f32,
+    };
+
+    @group(0) @binding(0) var inputSampler: sampler;
+    @group(0) @binding(1) var inputTexture: texture_2d<f32>;
+    @group(0) @binding(2) var<uniform> uniforms: Uniforms;
+
+    @vertex
+    fn vs_main(@builtin(vertex_index) vertexIndex: u32) -> VertexOutput {
+      var output: VertexOutput;
+      var pos = array<vec2<f32>, 3>(
+        vec2<f32>(-1.0, -1.0),
+        vec2<f32>(3.0, -1.0),
+        vec2<f32>(-1.0, 3.0)
+      );
+      output.position = vec4<f32>(pos[vertexIndex], 0.0, 1.0);
+      output.uv = pos[vertexIndex] * 0.5 + 0.5;
+      return output;
+    }
+
+    @fragment
+    fn fs_main(input: VertexOutput) -> @location(0) vec4<f32> {
+      let color = textureSample(inputTexture, inputSampler, input.uv);
+      // Use parameterized flash color instead of hardcoded white
+      var flashed = mix(color.rgb, uniforms.flash_color, uniforms.flash_intensity);
+      return vec4<f32>(flashed, color.a);
+    }
+  )";
+
+  pipeline_ =
+      create_post_process_pipeline(ctx_.device, ctx_.format, shader_code);
+  flash_uniforms_.init(ctx_.device);
+}
+
+void FlashEffect::update_bind_group(WGPUTextureView input_view) {
+  pp_update_bind_group(ctx_.device, pipeline_, &bind_group_, input_view,
+                       flash_uniforms_.get(), {});
+}
+
+void FlashEffect::render(WGPURenderPassEncoder pass,
+                         const CommonPostProcessUniforms& uniforms) {
+  // Trigger flash based on configured threshold
+  if (uniforms.audio_intensity > params_.trigger_threshold && flash_intensity_ < 0.2f) {
+    flash_intensity_ = 0.8f; // Trigger flash
+  }
+
+  // Decay based on configured rate
+  flash_intensity_ *= params_.decay_rate;
+
+  // *** PER-FRAME PARAMETER COMPUTATION ***
+  // Animate color based on time and beat
+  const float r = params_.color[0] * (0.5f + 0.5f * sinf(uniforms.time * 0.5f));
+  const float g = params_.color[1] * (0.5f + 0.5f * cosf(uniforms.time * 0.7f));
+  const float b = params_.color[2] * (1.0f + 0.3f * uniforms.beat_phase);
+
+  // Update uniforms with computed (animated) values
+  const FlashUniforms u = {
+      .flash_intensity = flash_intensity_,
+      .intensity = uniforms.audio_intensity,
+      ._pad1 = {0.0f, 0.0f}, // Padding for vec3 alignment
+      .color = {r, g, b},    // Time-dependent, computed every frame
+      ._pad2 = 0.0f};
+  flash_uniforms_.update(ctx_.queue, u);
+
+  wgpuRenderPassEncoderSetPipeline(pass, pipeline_);
+  wgpuRenderPassEncoderSetBindGroup(pass, 0, bind_group_, 0, nullptr);
+  wgpuRenderPassEncoderDraw(pass, 3, 1, 0, 0);
+}
diff --git a/src/effects/flash_effect.h b/src/effects/flash_effect.h
new file mode 100644
index 0000000..1ac75a4
--- /dev/null
+++ b/src/effects/flash_effect.h
@@ -0,0 +1,45 @@
+// This file is part of the 64k demo project.
+// It declares the FlashEffect - brief white flash on beat hits.
+
+#pragma once
+
+#include "gpu/effect.h"
+#include "gpu/gpu.h"
+#include "gpu/uniform_helper.h"
+
+// Parameters for FlashEffect (set at construction time)
+struct FlashEffectParams {
+  float color[3] = {1.0f, 1.0f, 1.0f}; // Default: white
+  float decay_rate = 0.98f;            // Default: fast decay
+  float trigger_threshold = 0.7f;      // Default: trigger on strong beats
+};
+
+// Uniform data sent to GPU shader
+// IMPORTANT: Must match WGSL struct layout with proper alignment
+// vec3<f32> in WGSL has 16-byte alignment, not 12-byte!
+struct FlashUniforms {
+  float flash_intensity; // offset 0
+  float intensity;       // offset 4
+  float _pad1[2];        // offset 8-15 (padding for vec3 alignment)
+  float color[3];        // offset 16-27 (vec3 aligned to 16 bytes)
+  float _pad2;           // offset 28-31
+};
+static_assert(sizeof(FlashUniforms) == 32,
+              "FlashUniforms must be 32 bytes for WGSL alignment");
+
+class FlashEffect : public PostProcessEffect {
+ public:
+  // Backward compatibility constructor (uses default params)
+  FlashEffect(const GpuContext& ctx);
+  // New parameterized constructor
+  FlashEffect(const GpuContext& ctx, const FlashEffectParams& params);
+  void render(WGPURenderPassEncoder pass,
+              const CommonPostProcessUniforms& uniforms) override;
+  void update_bind_group(WGPUTextureView input_view) override;
+
+ private:
+  FlashEffectParams params_;
+  UniformBuffer<FlashUniforms> flash_uniforms_;
+  UniformBuffer<FlashEffectParams> params_buffer_;
+  float flash_intensity_ = 0.0f;
+};
diff --git a/src/effects/gaussian_blur_effect.cc b/src/effects/gaussian_blur_effect.cc
new file mode 100644
index 0000000..0b4beae
--- /dev/null
+++ b/src/effects/gaussian_blur_effect.cc
@@ -0,0 +1,38 @@
+// This file is part of the 64k demo project.
+// It implements the GaussianBlurEffect with parameterization.
+
+#include "gpu/demo_effects.h"
+#include "gpu/post_process_helper.h"
+#include "gpu/gpu.h"
+
+// --- GaussianBlurEffect ---
+
+// Backward compatibility constructor (delegates to parameterized constructor)
+GaussianBlurEffect::GaussianBlurEffect(const GpuContext& ctx)
+    : GaussianBlurEffect(ctx, GaussianBlurParams{}) {
+}
+
+// Parameterized constructor
+GaussianBlurEffect::GaussianBlurEffect(const GpuContext& ctx,
+                                       const GaussianBlurParams& params)
+    : PostProcessEffect(ctx), params_(params) {
+  pipeline_ = create_post_process_pipeline(ctx_.device, ctx_.format,
+                                           gaussian_blur_shader_wgsl);
+  params_buffer_.init(ctx_.device);
+}
+
+void GaussianBlurEffect::render(WGPURenderPassEncoder pass,
+                                const CommonPostProcessUniforms& uniforms) {
+  // Update uniforms with current state and parameters
+  uniforms_.update(ctx_.queue, uniforms);
+  params_buffer_.update(ctx_.queue, params_);
+
+  wgpuRenderPassEncoderSetPipeline(pass, pipeline_);
+  wgpuRenderPassEncoderSetBindGroup(pass, 0, bind_group_, 0, nullptr);
+  wgpuRenderPassEncoderDraw(pass, 3, 1, 0, 0);
+}
+
+void GaussianBlurEffect::update_bind_group(WGPUTextureView input_view) {
+  pp_update_bind_group(ctx_.device, pipeline_, &bind_group_, input_view,
+                       uniforms_.get(), params_buffer_.get());
+}
diff --git a/src/effects/heptagon_effect.cc b/src/effects/heptagon_effect.cc
new file mode 100644
index 0000000..724eabb
--- /dev/null
+++ b/src/effects/heptagon_effect.cc
@@ -0,0 +1,22 @@
+// This file is part of the 64k demo project.
+// It implements the HeptagonEffect.
+
+#include "gpu/demo_effects.h"
+#include "gpu/gpu.h"
+#include "util/mini_math.h"
+
+// --- HeptagonEffect ---
+HeptagonEffect::HeptagonEffect(const GpuContext& ctx) : Effect(ctx) {
+  // uniforms_ is initialized by Effect base class
+  ResourceBinding bindings[] = {{uniforms_.get(), WGPUBufferBindingType_Uniform}};
+  pass_ = gpu_create_render_pass(ctx_.device, ctx_.format, main_shader_wgsl,
+                                 bindings, 1);
+  pass_.vertex_count = 21;
+}
+void HeptagonEffect::render(WGPURenderPassEncoder pass,
+                            const CommonPostProcessUniforms& uniforms) {
+  uniforms_.update(ctx_.queue, uniforms);
+  wgpuRenderPassEncoderSetPipeline(pass, pass_.pipeline);
+  wgpuRenderPassEncoderSetBindGroup(pass, 0, pass_.bind_group, 0, nullptr);
+  wgpuRenderPassEncoderDraw(pass, pass_.vertex_count, 1, 0, 0);
+}
diff --git a/src/effects/hybrid_3d_effect.cc b/src/effects/hybrid_3d_effect.cc
new file mode 100644
index 0000000..1cd773d
--- /dev/null
+++ b/src/effects/hybrid_3d_effect.cc
@@ -0,0 +1,147 @@
+// This file is part of the 64k demo project.
+// It implements the Hybrid3DEffect.
+
+#include "effects/hybrid_3d_effect.h"
+#include "generated/assets.h"
+#include "util/asset_manager_utils.h"
+#include <cassert>
+#include <cmath>
+#include <iostream>
+
+Hybrid3DEffect::Hybrid3DEffect(const GpuContext& ctx) : Effect(ctx) {
+}
+
+void Hybrid3DEffect::resize(int width, int height) {
+  if (width == width_ && height == height_)
+    return;
+
+  Effect::resize(width, height);
+
+  if (!initialized_)
+    return;
+
+  renderer_.resize(width_, height_);
+}
+
+void Hybrid3DEffect::init(MainSequence* demo) {
+  (void)demo;
+  WGPUTextureFormat format =
+      demo->gpu_ctx.format; // Get current format from MainSequence (might be
+                            // different than constructor if resized)
+
+  renderer_.init(ctx_.device, ctx_.queue, ctx_.format);
+  renderer_.resize(width_, height_);
+  initialized_ = true;
+
+  // Texture Manager
+  texture_manager_.init(ctx_.device, ctx_.queue);
+
+  // Load Noise Asset
+  TextureAsset noise_tex = GetTextureAsset(AssetId::ASSET_NOISE_TEX);
+  if (noise_tex.pixels && noise_tex.width == 256 && noise_tex.height == 256) {
+    texture_manager_.create_texture("noise", noise_tex.width, noise_tex.height,
+                                    noise_tex.pixels);
+    renderer_.set_noise_texture(texture_manager_.get_texture_view("noise"));
+  } else {
+    std::cerr << "Failed to load NOISE_TEX asset." << std::endl;
+  }
+
+  // Setup Scene
+  scene_.clear();
+  Object3D center(ObjectType::BOX); // Use BOX for bumps
+  center.position = vec3(0, 0, 0);
+  center.color = vec4(1, 0, 0, 1);
+  scene_.add_object(center);
+
+  for (int i = 0; i < 8; ++i) {
+    ObjectType type = ObjectType::SPHERE;
+    if (i % 3 == 1)
+      type = ObjectType::TORUS;
+    if (i % 3 == 2)
+      type = ObjectType::BOX;
+
+    Object3D obj(type);
+
+    float angle = (i / 8.0f) * 6.28318f;
+
+    obj.position = vec3(std::cos(angle) * 4.0f, 0, std::sin(angle) * 4.0f);
+
+    obj.scale = vec3(0.7f, 0.7f, 0.7f); // Increased scale by 40%
+
+    if (type == ObjectType::SPHERE)
+      obj.color = vec4(0, 1, 0, 1);
+
+    else if (type == ObjectType::TORUS)
+      obj.color = vec4(0, 0.5, 1, 1);
+    else
+      obj.color = vec4(1, 1, 0, 1);
+
+    scene_.add_object(obj);
+  }
+}
+
+// Cubic ease-in/out function for non-linear motion
+
+static float ease_in_out_cubic(float t) {
+  t *= 2.0f;
+
+  if (t < 1.0f) {
+    return 0.5f * t * t * t;
+  }
+
+  t -= 2.0f;
+
+  return 0.5f * (t * t * t + 2.0f);
+}
+
+void Hybrid3DEffect::render(WGPURenderPassEncoder pass,
+                      const CommonPostProcessUniforms& uniforms) {
+  // Animate Objects
+
+  for (size_t i = 1; i < scene_.objects.size(); ++i) {
+    scene_.objects[i].rotation =
+        quat::from_axis(vec3(0, 1, 0), uniforms.time * 2.0f + i);
+
+    scene_.objects[i].position.y = std::sin(uniforms.time * 3.0f + i) * 1.5f;
+  }
+
+  // Camera jumps every other pattern (2 seconds) for dramatic effect
+  int pattern_num = (int)(uniforms.time / 2.0f);
+  int camera_preset = pattern_num % 4; // Cycle through 4 different angles
+
+  vec3 cam_pos, cam_target;
+
+  switch (camera_preset) {
+  case 0: // High angle, orbiting
+  {
+    float angle = uniforms.time * 0.5f;
+    cam_pos = vec3(std::sin(angle) * 12.0f, 8.0f, std::cos(angle) * 12.0f);
+    cam_target = vec3(0, 0, 0);
+  } break;
+  case 1: // Low angle, close-up
+  {
+    float angle = uniforms.time * 0.3f + 1.57f; // Offset angle
+    cam_pos = vec3(std::sin(angle) * 6.0f, 2.0f, std::cos(angle) * 6.0f);
+    cam_target = vec3(0, 1, 0);
+  } break;
+  case 2: // Side view, sweeping
+  {
+    float sweep = std::sin(uniforms.time * 0.4f) * 10.0f;
+    cam_pos = vec3(sweep, 5.0f, 8.0f);
+    cam_target = vec3(0, 0, 0);
+  } break;
+  case 3: // Top-down, rotating
+  {
+    float angle = uniforms.time * 0.6f;
+    cam_pos = vec3(std::sin(angle) * 5.0f, 12.0f, std::cos(angle) * 5.0f);
+    cam_target = vec3(0, 0, 0);
+  } break;
+  }
+
+  camera_.set_look_at(cam_pos, cam_target, vec3(0, 1, 0));
+  camera_.aspect_ratio = uniforms.aspect_ratio;
+
+  // Draw
+
+  renderer_.draw(pass, scene_, camera_, uniforms.time);
+}
diff --git a/src/effects/hybrid_3d_effect.h b/src/effects/hybrid_3d_effect.h
new file mode 100644
index 0000000..818b65c
--- /dev/null
+++ b/src/effects/hybrid_3d_effect.h
@@ -0,0 +1,29 @@
+// This file is part of the 64k demo project.
+// It defines the Hybrid3DEffect, integrating the 3D renderer into the demo
+// timeline.
+
+#pragma once
+
+#include "3d/camera.h"
+#include "3d/renderer.h"
+#include "3d/scene.h"
+#include "gpu/effect.h"
+#include "gpu/texture_manager.h"
+
+class Hybrid3DEffect : public Effect {
+ public:
+  Hybrid3DEffect(const GpuContext& ctx);
+  virtual ~Hybrid3DEffect() override = default;
+
+  void init(MainSequence* demo) override;
+  void render(WGPURenderPassEncoder pass,
+              const CommonPostProcessUniforms& uniforms) override;
+  void resize(int width, int height) override;
+
+ private:
+  Renderer3D renderer_;
+  TextureManager texture_manager_;
+  Scene scene_;
+  Camera camera_;
+  bool initialized_ = false;
+};
diff --git a/src/effects/moving_ellipse_effect.cc b/src/effects/moving_ellipse_effect.cc
new file mode 100644
index 0000000..f595de9
--- /dev/null
+++ b/src/effects/moving_ellipse_effect.cc
@@ -0,0 +1,22 @@
+// This file is part of the 64k demo project.
+// It implements the MovingEllipseEffect.
+
+#include "gpu/demo_effects.h"
+#include "gpu/post_process_helper.h"
+#include "gpu/gpu.h"
+
+// --- MovingEllipseEffect ---
+MovingEllipseEffect::MovingEllipseEffect(const GpuContext& ctx) : Effect(ctx) {
+  // uniforms_ is initialized by Effect base class
+  ResourceBinding bindings[] = {{uniforms_.get(), WGPUBufferBindingType_Uniform}};
+  pass_ = gpu_create_render_pass(ctx_.device, ctx_.format, ellipse_shader_wgsl,
+                                 bindings, 1);
+  pass_.vertex_count = 3;
+}
+void MovingEllipseEffect::render(WGPURenderPassEncoder pass,
+                                 const CommonPostProcessUniforms& uniforms) {
+  uniforms_.update(ctx_.queue, uniforms);
+  wgpuRenderPassEncoderSetPipeline(pass, pass_.pipeline);
+  wgpuRenderPassEncoderSetBindGroup(pass, 0, pass_.bind_group, 0, nullptr);
+  wgpuRenderPassEncoderDraw(pass, 3, 1, 0, 0);
+}
diff --git a/src/effects/particle_spray_effect.cc b/src/effects/particle_spray_effect.cc
new file mode 100644
index 0000000..e250f5a
--- /dev/null
+++ b/src/effects/particle_spray_effect.cc
@@ -0,0 +1,48 @@
+// This file is part of the 64k demo project.
+// It implements the ParticleSprayEffect.
+
+#include "gpu/demo_effects.h"
+#include "gpu/post_process_helper.h"
+#include "gpu/gpu.h"
+#include <vector>
+
+// --- ParticleSprayEffect ---
+ParticleSprayEffect::ParticleSprayEffect(const GpuContext& ctx) : Effect(ctx) {
+  std::vector<Particle> init_p(NUM_PARTICLES);
+  for (Particle& p : init_p)
+    p.pos[3] = 0.0f;
+  particles_buffer_ = gpu_create_buffer(
+      ctx_.device, sizeof(Particle) * NUM_PARTICLES,
+      WGPUBufferUsage_Storage | WGPUBufferUsage_Vertex, init_p.data());
+  ResourceBinding cb[] = {{particles_buffer_, WGPUBufferBindingType_Storage},
+                          {uniforms_.get(), WGPUBufferBindingType_Uniform}};
+  compute_pass_ =
+      gpu_create_compute_pass(ctx_.device, particle_spray_compute_wgsl, cb, 2);
+  compute_pass_.workgroup_size_x = (NUM_PARTICLES + 63) / 64;
+  ResourceBinding rb[] = {
+      {particles_buffer_, WGPUBufferBindingType_ReadOnlyStorage},
+      {uniforms_.get(), WGPUBufferBindingType_Uniform}};
+  render_pass_ = gpu_create_render_pass(ctx_.device, ctx_.format,
+                                        particle_render_wgsl, rb, 2);
+  render_pass_.vertex_count = 6;
+  render_pass_.instance_count = NUM_PARTICLES;
+}
+void ParticleSprayEffect::compute(WGPUCommandEncoder e,
+                                  const CommonPostProcessUniforms& uniforms) {
+  uniforms_.update(ctx_.queue, uniforms);
+  WGPUComputePassEncoder pass = wgpuCommandEncoderBeginComputePass(e, nullptr);
+  wgpuComputePassEncoderSetPipeline(pass, compute_pass_.pipeline);
+  wgpuComputePassEncoderSetBindGroup(pass, 0, compute_pass_.bind_group, 0,
+                                     nullptr);
+  wgpuComputePassEncoderDispatchWorkgroups(pass, compute_pass_.workgroup_size_x,
+                                           1, 1);
+  wgpuComputePassEncoderEnd(pass);
+}
+void ParticleSprayEffect::render(WGPURenderPassEncoder pass,
+                                 const CommonPostProcessUniforms& uniforms) {
+  (void)uniforms;
+  wgpuRenderPassEncoderSetPipeline(pass, render_pass_.pipeline);
+  wgpuRenderPassEncoderSetBindGroup(pass, 0, render_pass_.bind_group, 0,
+                                    nullptr);
+  wgpuRenderPassEncoderDraw(pass, 6, NUM_PARTICLES, 0, 0);
+}
diff --git a/src/effects/particles_effect.cc b/src/effects/particles_effect.cc
new file mode 100644
index 0000000..5762637
--- /dev/null
+++ b/src/effects/particles_effect.cc
@@ -0,0 +1,47 @@
+// This file is part of the 64k demo project.
+// It implements the ParticlesEffect.
+
+#include "gpu/demo_effects.h"
+#include "gpu/post_process_helper.h"
+#include "gpu/gpu.h"
+#include <vector>
+
+// --- ParticlesEffect ---
+ParticlesEffect::ParticlesEffect(const GpuContext& ctx) : Effect(ctx) {
+  std::vector<Particle> init_p(NUM_PARTICLES);
+  particles_buffer_ = gpu_create_buffer(
+      ctx_.device, sizeof(Particle) * NUM_PARTICLES,
+      WGPUBufferUsage_Storage | WGPUBufferUsage_Vertex, init_p.data());
+  ResourceBinding cb[] = {{particles_buffer_, WGPUBufferBindingType_Storage},
+                          {uniforms_.get(), WGPUBufferBindingType_Uniform}};
+  compute_pass_ =
+      gpu_create_compute_pass(ctx_.device, particle_compute_wgsl, cb, 2);
+  compute_pass_.workgroup_size_x = (NUM_PARTICLES + 63) / 64;
+  ResourceBinding rb[] = {
+      {particles_buffer_, WGPUBufferBindingType_ReadOnlyStorage},
+      {uniforms_.get(), WGPUBufferBindingType_Uniform}};
+  render_pass_ = gpu_create_render_pass(ctx_.device, ctx_.format,
+                                        particle_render_wgsl, rb, 2);
+  render_pass_.vertex_count = 6;
+  render_pass_.instance_count = NUM_PARTICLES;
+}
+void ParticlesEffect::compute(WGPUCommandEncoder e,
+                              const CommonPostProcessUniforms& uniforms) {
+  uniforms_.update(ctx_.queue, uniforms);
+  WGPUComputePassEncoder pass = wgpuCommandEncoderBeginComputePass(e, nullptr);
+  wgpuComputePassEncoderSetPipeline(pass, compute_pass_.pipeline);
+  wgpuComputePassEncoderSetBindGroup(pass, 0, compute_pass_.bind_group, 0,
+                                     nullptr);
+  wgpuComputePassEncoderDispatchWorkgroups(pass, compute_pass_.workgroup_size_x,
+                                           1, 1);
+  wgpuComputePassEncoderEnd(pass);
+}
+void ParticlesEffect::render(WGPURenderPassEncoder pass,
+                             const CommonPostProcessUniforms& uniforms) {
+  (void)uniforms;
+  wgpuRenderPassEncoderSetPipeline(pass, render_pass_.pipeline);
+  wgpuRenderPassEncoderSetBindGroup(pass, 0, render_pass_.bind_group, 0,
+                                    nullptr);
+  wgpuRenderPassEncoderDraw(pass, render_pass_.vertex_count,
+                            render_pass_.instance_count, 0, 0);
+}
diff --git a/src/effects/passthrough_effect.cc b/src/effects/passthrough_effect.cc
new file mode 100644
index 0000000..aedb387
--- /dev/null
+++ b/src/effects/passthrough_effect.cc
@@ -0,0 +1,17 @@
+// This file is part of the 64k demo project.
+// It implements the PassthroughEffect.
+
+#include "gpu/demo_effects.h"
+#include "gpu/gpu.h"
+
+// --- PassthroughEffect ---
+PassthroughEffect::PassthroughEffect(const GpuContext& ctx)
+    : PostProcessEffect(ctx) {
+  pipeline_ = create_post_process_pipeline(ctx_.device, ctx_.format,
+                                           passthrough_shader_wgsl);
+}
+void PassthroughEffect::update_bind_group(WGPUTextureView input_view) {
+  uniforms_.update(ctx_.queue, get_common_uniforms());
+  pp_update_bind_group(ctx_.device, pipeline_, &bind_group_, input_view,
+                       uniforms_.get(), {});
+}
diff --git a/src/effects/rotating_cube_effect.cc b/src/effects/rotating_cube_effect.cc
new file mode 100644
index 0000000..a42feaa
--- /dev/null
+++ b/src/effects/rotating_cube_effect.cc
@@ -0,0 +1,200 @@
+// This file is part of the 64k demo project.
+// It implements RotatingCubeEffect for bump-mapped rotating cube rendering.
+// Uses auxiliary texture masking to render only inside a circular region.
+
+#include "effects/rotating_cube_effect.h"
+#include "generated/assets.h"
+#include "gpu/bind_group_builder.h"
+#include "gpu/shader_composer.h"
+#include "gpu/gpu.h"
+#include "gpu/sampler_cache.h"
+#include "util/asset_manager_utils.h"
+
+RotatingCubeEffect::RotatingCubeEffect(const GpuContext& ctx) : Effect(ctx) {
+}
+
+RotatingCubeEffect::~RotatingCubeEffect() {
+  // Samplers owned by SamplerCache - don't release
+  if (noise_view_)
+    wgpuTextureViewRelease(noise_view_);
+  if (noise_texture_)
+    wgpuTextureRelease(noise_texture_);
+  if (bind_group_1_)
+    wgpuBindGroupRelease(bind_group_1_);
+  if (bind_group_0_)
+    wgpuBindGroupRelease(bind_group_0_);
+  if (pipeline_)
+    wgpuRenderPipelineRelease(pipeline_);
+}
+
+void RotatingCubeEffect::init(MainSequence* demo) {
+  demo_ = demo;
+
+  uniform_buffer_ =
+      gpu_create_buffer(ctx_.device, sizeof(Uniforms),
+                        WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst);
+  object_buffer_ =
+      gpu_create_buffer(ctx_.device, sizeof(ObjectData),
+                        WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst);
+
+  TextureWithView noise = gpu_create_texture_2d(
+      ctx_.device, 1, 1, WGPUTextureFormat_RGBA8Unorm,
+      (WGPUTextureUsage)(WGPUTextureUsage_TextureBinding | WGPUTextureUsage_RenderAttachment), 1);
+  noise_texture_ = noise.texture;
+  noise_view_ = noise.view;
+
+  noise_sampler_ = SamplerCache::Get().get_or_create(ctx_.device, SamplerCache::linear());
+  mask_sampler_ = SamplerCache::Get().get_or_create(ctx_.device, SamplerCache::clamp());
+
+  size_t shader_size;
+  const char* shader_code =
+      (const char*)GetAsset(AssetId::ASSET_MASKED_CUBE_SHADER, &shader_size);
+
+  ShaderComposer::CompositionMap composition_map;
+  composition_map["render/scene_query_mode"] = "render/scene_query_linear";
+  composed_shader_ = ShaderComposer::Get().Compose(
+      {}, std::string(shader_code, shader_size), composition_map);
+
+  WGPUShaderSourceWGSL wgsl_src = {};
+  wgsl_src.chain.sType = WGPUSType_ShaderSourceWGSL;
+  wgsl_src.code = str_view(composed_shader_.c_str());
+
+  WGPUShaderModuleDescriptor shader_desc = {};
+  shader_desc.nextInChain = &wgsl_src.chain;
+  WGPUShaderModule shader_module =
+      wgpuDeviceCreateShaderModule(ctx_.device, &shader_desc);
+
+  WGPUBindGroupLayout bgl_0 =
+      BindGroupLayoutBuilder()
+          .uniform(0, WGPUShaderStage_Vertex | WGPUShaderStage_Fragment,
+                   sizeof(Uniforms))
+          .storage(1, WGPUShaderStage_Vertex | WGPUShaderStage_Fragment,
+                   sizeof(ObjectData))
+          .texture(3, WGPUShaderStage_Fragment)
+          .sampler(4, WGPUShaderStage_Fragment)
+          .build(ctx_.device);
+
+  WGPUBindGroupLayout bgl_1 = BindGroupLayoutBuilder()
+                                  .texture(0, WGPUShaderStage_Fragment)
+                                  .sampler(1, WGPUShaderStage_Fragment)
+                                  .build(ctx_.device);
+
+  const WGPUBindGroupLayout bgls[] = {bgl_0, bgl_1};
+  const WGPUPipelineLayoutDescriptor pl_desc = {
+      .bindGroupLayoutCount = 2,
+      .bindGroupLayouts = bgls,
+  };
+  WGPUPipelineLayout pipeline_layout =
+      wgpuDeviceCreatePipelineLayout(ctx_.device, &pl_desc);
+
+  const WGPUColorTargetState color_target = {
+      .format = ctx_.format,
+      .writeMask = WGPUColorWriteMask_All,
+  };
+
+  const WGPUDepthStencilState depth_stencil = {
+      .format = WGPUTextureFormat_Depth24Plus,
+      .depthWriteEnabled = WGPUOptionalBool_True,
+      .depthCompare = WGPUCompareFunction_Less,
+  };
+
+  WGPUFragmentState fragment = {};
+  fragment.module = shader_module;
+  fragment.entryPoint = str_view("fs_main");
+  fragment.targetCount = 1;
+  fragment.targets = &color_target;
+
+  WGPURenderPipelineDescriptor pipeline_desc = {};
+  pipeline_desc.layout = pipeline_layout;
+  pipeline_desc.vertex.module = shader_module;
+  pipeline_desc.vertex.entryPoint = str_view("vs_main");
+  pipeline_desc.primitive.topology = WGPUPrimitiveTopology_TriangleList;
+  pipeline_desc.primitive.cullMode = WGPUCullMode_None;
+  pipeline_desc.depthStencil = &depth_stencil;
+  pipeline_desc.multisample.count = 1;
+  pipeline_desc.multisample.mask = 0xFFFFFFFF;
+  pipeline_desc.fragment = &fragment;
+
+  pipeline_ = wgpuDeviceCreateRenderPipeline(ctx_.device, &pipeline_desc);
+  wgpuShaderModuleRelease(shader_module);
+  wgpuPipelineLayoutRelease(pipeline_layout);
+
+  const WGPUBindGroupEntry entries_0[] = {
+      {.binding = 0,
+       .buffer = uniform_buffer_.buffer,
+       .size = sizeof(Uniforms)},
+      {.binding = 1,
+       .buffer = object_buffer_.buffer,
+       .size = sizeof(ObjectData)},
+      {.binding = 3, .textureView = noise_view_},
+      {.binding = 4, .sampler = noise_sampler_},
+  };
+
+  const WGPUBindGroupDescriptor bg_desc_0 = {
+      .layout = bgl_0,
+      .entryCount = 4,
+      .entries = entries_0,
+  };
+  bind_group_0_ = wgpuDeviceCreateBindGroup(ctx_.device, &bg_desc_0);
+  wgpuBindGroupLayoutRelease(bgl_0);
+
+  WGPUTextureView mask_view = demo_->get_auxiliary_view("circle_mask");
+  const WGPUBindGroupEntry entries_1[] = {
+      {.binding = 0, .textureView = mask_view},
+      {.binding = 1, .sampler = mask_sampler_},
+  };
+
+  const WGPUBindGroupDescriptor bg_desc_1 = {
+      .layout = bgl_1,
+      .entryCount = 2,
+      .entries = entries_1,
+  };
+  bind_group_1_ = wgpuDeviceCreateBindGroup(ctx_.device, &bg_desc_1);
+  wgpuBindGroupLayoutRelease(bgl_1);
+}
+
+void RotatingCubeEffect::render(WGPURenderPassEncoder pass,
+                                const CommonPostProcessUniforms& u) {
+  rotation_ += 0.016f * 1.5f;
+
+  const vec3 camera_pos = vec3(0, 0, 5);
+  const vec3 target = vec3(0, 0, 0);
+  const vec3 up = vec3(0, 1, 0);
+
+  const mat4 view = mat4::look_at(camera_pos, target, up);
+  const float fov = 60.0f * 3.14159f / 180.0f;
+  const mat4 proj = mat4::perspective(fov, u.aspect_ratio, 0.1f, 100.0f);
+  const mat4 view_proj = proj * view;
+
+  const quat rot = quat::from_axis(vec3(0.3f, 1.0f, 0.2f), rotation_);
+  const mat4 T = mat4::translate(vec3(0, 0, 0));
+  const mat4 R = rot.to_mat();
+  const mat4 S = mat4::scale(vec3(1.5f, 1.5f, 1.5f));
+  const mat4 model = T * R * S;
+  const mat4 inv_model = model.inverse();
+
+  const Uniforms uniforms = {
+      .view_proj = view_proj,
+      .inv_view_proj = view_proj.inverse(),
+      .camera_pos_time = vec4(camera_pos.x, camera_pos.y, camera_pos.z, u.time),
+      .params = vec4(1.0f, 0.0f, 0.0f, 0.0f),
+      .resolution = u.resolution,
+  };
+
+  const ObjectData obj_data = {
+      .model = model,
+      .inv_model = inv_model,
+      .color = vec4(0.8f, 0.4f, 0.2f, 1.0f),
+      .params = vec4(1.0f, 0.0f, 0.0f, 0.0f),
+  };
+
+  wgpuQueueWriteBuffer(ctx_.queue, uniform_buffer_.buffer, 0, &uniforms,
+                       sizeof(Uniforms));
+  wgpuQueueWriteBuffer(ctx_.queue, object_buffer_.buffer, 0, &obj_data,
+                       sizeof(ObjectData));
+
+  wgpuRenderPassEncoderSetPipeline(pass, pipeline_);
+  wgpuRenderPassEncoderSetBindGroup(pass, 0, bind_group_0_, 0, nullptr);
+  wgpuRenderPassEncoderSetBindGroup(pass, 1, bind_group_1_, 0, nullptr);
+  wgpuRenderPassEncoderDraw(pass, 36, 1, 0, 0);
+}
diff --git a/src/effects/rotating_cube_effect.h b/src/effects/rotating_cube_effect.h
new file mode 100644
index 0000000..fdf67ab
--- /dev/null
+++ b/src/effects/rotating_cube_effect.h
@@ -0,0 +1,55 @@
+// This file is part of the 64k demo project.
+// It defines RotatingCubeEffect for rendering a bump-mapped rotating cube.
+// Uses auxiliary texture masking to render only inside a circular region.
+
+#ifndef ROTATING_CUBE_EFFECT_H_
+#define ROTATING_CUBE_EFFECT_H_
+
+#include "gpu/effect.h"
+#include "gpu/gpu.h"
+#include "util/mini_math.h"
+#include <string>
+
+class RotatingCubeEffect : public Effect {
+ public:
+  RotatingCubeEffect(const GpuContext& ctx);
+  ~RotatingCubeEffect() override;
+
+  void init(MainSequence* demo) override;
+  void render(WGPURenderPassEncoder pass,
+              const CommonPostProcessUniforms& uniforms) override;
+
+ private:
+  struct Uniforms {
+    mat4 view_proj;
+    mat4 inv_view_proj;
+    vec4 camera_pos_time;
+    vec4 params;
+    vec2 resolution;
+    vec2 padding;
+  };
+
+  struct ObjectData {
+    mat4 model;
+    mat4 inv_model;
+    vec4 color;
+    vec4 params;
+  };
+
+  MainSequence* demo_ = nullptr;
+  WGPURenderPipeline pipeline_ = nullptr;
+  WGPUBindGroup bind_group_0_ = nullptr;
+  WGPUBindGroup bind_group_1_ = nullptr;
+  GpuBuffer uniform_buffer_;
+  GpuBuffer object_buffer_;
+  WGPUTexture noise_texture_ = nullptr;
+  WGPUTextureView noise_view_ = nullptr;
+  WGPUSampler noise_sampler_ = nullptr;
+  WGPUSampler mask_sampler_ = nullptr;
+  float rotation_ = 0.0f;
+
+  // Store composed shader to keep it alive for WebGPU
+  std::string composed_shader_;
+};
+
+#endif /* ROTATING_CUBE_EFFECT_H_ */
diff --git a/src/effects/scene1_effect.cc b/src/effects/scene1_effect.cc
new file mode 100644
index 0000000..c75e511
--- /dev/null
+++ b/src/effects/scene1_effect.cc
@@ -0,0 +1,20 @@
+// This file is part of the 64k demo project.
+// Scene1 effect - ShaderToy conversion (raymarching scene)
+
+#include "gpu/demo_effects.h"
+#include "gpu/gpu.h"
+
+Scene1Effect::Scene1Effect(const GpuContext& ctx) : Effect(ctx) {
+  ResourceBinding bindings[] = {{uniforms_.get(), WGPUBufferBindingType_Uniform}};
+  pass_ = gpu_create_render_pass(ctx_.device, ctx_.format, scene1_shader_wgsl,
+                                 bindings, 1);
+  pass_.vertex_count = 3;
+}
+
+void Scene1Effect::render(WGPURenderPassEncoder pass,
+                          const CommonPostProcessUniforms& uniforms) {
+  uniforms_.update(ctx_.queue, uniforms);
+  wgpuRenderPassEncoderSetPipeline(pass, pass_.pipeline);
+  wgpuRenderPassEncoderSetBindGroup(pass, 0, pass_.bind_group, 0, nullptr);
+  wgpuRenderPassEncoderDraw(pass, pass_.vertex_count, 1, 0, 0);
+}
diff --git a/src/effects/scene1_effect.h b/src/effects/scene1_effect.h
new file mode 100644
index 0000000..190ffa9
--- /dev/null
+++ b/src/effects/scene1_effect.h
@@ -0,0 +1,19 @@
+// This file is part of the 64k demo project.
+// Scene1 effect - ShaderToy conversion (raymarching scene)
+
+#ifndef SCENE1_EFFECT_H_
+#define SCENE1_EFFECT_H_
+
+#include "gpu/effect.h"
+
+class Scene1Effect : public Effect {
+ public:
+  Scene1Effect(const GpuContext& ctx);
+  void render(WGPURenderPassEncoder pass,
+              const CommonPostProcessUniforms& uniforms) override;
+
+ private:
+  RenderPass pass_;
+};
+
+#endif /* SCENE1_EFFECT_H_ */
diff --git a/src/effects/solarize_effect.cc b/src/effects/solarize_effect.cc
new file mode 100644
index 0000000..cdb9354
--- /dev/null
+++ b/src/effects/solarize_effect.cc
@@ -0,0 +1,20 @@
+// This file is part of the 64k demo project.
+// It implements the SolarizeEffect.
+
+#include "gpu/demo_effects.h"
+#include "gpu/gpu.h"
+
+// --- SolarizeEffect ---
+SolarizeEffect::SolarizeEffect(const GpuContext& ctx) : PostProcessEffect(ctx) {
+  pipeline_ = create_post_process_pipeline(ctx_.device, ctx_.format,
+                                           solarize_shader_wgsl);
+}
+void SolarizeEffect::render(WGPURenderPassEncoder pass,
+                            const CommonPostProcessUniforms& uniforms) {
+  uniforms_.update(ctx_.queue, uniforms);
+  PostProcessEffect::render(pass, uniforms);
+}
+void SolarizeEffect::update_bind_group(WGPUTextureView v) {
+  pp_update_bind_group(ctx_.device, pipeline_, &bind_group_, v, uniforms_.get(),
+                       {});
+}
diff --git a/src/effects/theme_modulation_effect.cc b/src/effects/theme_modulation_effect.cc
new file mode 100644
index 0000000..1c81d79
--- /dev/null
+++ b/src/effects/theme_modulation_effect.cc
@@ -0,0 +1,105 @@
+// This file is part of the 64k demo project.
+// It implements theme modulation (bright/dark alternation).
+
+#include "effects/theme_modulation_effect.h"
+#include "gpu/post_process_helper.h"
+#include "gpu/shaders.h"
+#include <cmath>
+
+struct ThemeModulationParams {
+  float theme_brightness;
+  float _pad[3];
+};
+static_assert(sizeof(ThemeModulationParams) == 16, "ThemeModulationParams must be 16 bytes for WGSL alignment");
+
+ThemeModulationEffect::ThemeModulationEffect(const GpuContext& ctx)
+    : PostProcessEffect(ctx) {
+  const char* shader_code = R"(
+    struct VertexOutput {
+      @builtin(position) position: vec4<f32>,
+      @location(0) uv: vec2<f32>,
+    };
+
+    struct CommonUniforms {
+      resolution: vec2<f32>,
+      _pad0: f32,
+      _pad1: f32,
+      aspect_ratio: f32,
+      time: f32,
+      beat: f32,
+      audio_intensity: f32,
+    };
+
+    struct ThemeModulationParams {
+      theme_brightness: f32,
+      _pad0: f32,
+      _pad1: f32,
+      _pad2: f32,
+    };
+
+    @group(0) @binding(0) var inputSampler: sampler;
+    @group(0) @binding(1) var inputTexture: texture_2d<f32>;
+    @group(0) @binding(2) var<uniform> uniforms: CommonUniforms;
+    @group(0) @binding(3) var<uniform> params: ThemeModulationParams;
+
+    @vertex
+    fn vs_main(@builtin(vertex_index) vertexIndex: u32) -> VertexOutput {
+      var output: VertexOutput;
+      // Large triangle trick for fullscreen coverage
+      var pos = array<vec2<f32>, 3>(
+        vec2<f32>(-1.0, -1.0),
+        vec2<f32>(3.0, -1.0),
+        vec2<f32>(-1.0, 3.0)
+      );
+      output.position = vec4<f32>(pos[vertexIndex], 0.0, 1.0);
+      output.uv = pos[vertexIndex] * 0.5 + 0.5;
+      return output;
+    }
+
+    @fragment
+    fn fs_main(input: VertexOutput) -> @location(0) vec4<f32> {
+      let color = textureSample(inputTexture, inputSampler, input.uv);
+      // Apply theme brightness modulation
+      return vec4<f32>(color.rgb * params.theme_brightness, color.a);
+    }
+  )";
+
+  pipeline_ =
+      create_post_process_pipeline(ctx_.device, ctx_.format, shader_code);
+
+  params_buffer_ = gpu_create_buffer(
+      ctx_.device, 16, WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst);
+}
+
+void ThemeModulationEffect::update_bind_group(WGPUTextureView input_view) {
+  pp_update_bind_group(ctx_.device, pipeline_, &bind_group_, input_view,
+                       uniforms_.get(), params_buffer_);
+}
+
+void ThemeModulationEffect::render(WGPURenderPassEncoder pass,
+                                   const CommonPostProcessUniforms& uniforms) {
+  uniforms_.update(ctx_.queue, uniforms);
+
+  // Alternate between bright and dark every 4 seconds (2 pattern changes)
+  // Music patterns change every 2 seconds at 120 BPM
+  float cycle_time = fmodf(uniforms.time, 8.0f);        // 8 second cycle (4 patterns)
+  bool is_dark_section = (cycle_time >= 4.0f); // Dark for second half
+
+  // Smooth transition between themes using a sine wave
+  float transition =
+      (std::sin(uniforms.time * 3.14159f / 4.0f) + 1.0f) * 0.5f; // 0.0 to 1.0
+  float bright_value = 1.0f;
+  float dark_value = 0.35f;
+  float theme_brightness =
+      bright_value + (dark_value - bright_value) * transition;
+
+  // Update params buffer
+  ThemeModulationParams params = {theme_brightness, {0.0f, 0.0f, 0.0f}};
+  wgpuQueueWriteBuffer(ctx_.queue, params_buffer_.buffer, 0, &params,
+                       sizeof(params));
+
+  // Render
+  wgpuRenderPassEncoderSetPipeline(pass, pipeline_);
+  wgpuRenderPassEncoderSetBindGroup(pass, 0, bind_group_, 0, nullptr);
+  wgpuRenderPassEncoderDraw(pass, 3, 1, 0, 0);
+}
diff --git a/src/effects/theme_modulation_effect.h b/src/effects/theme_modulation_effect.h
new file mode 100644
index 0000000..e4d4e0a
--- /dev/null
+++ b/src/effects/theme_modulation_effect.h
@@ -0,0 +1,20 @@
+// This file is part of the 64k demo project.
+// It implements a theme modulation effect that alternates between bright and
+// dark. Pattern changes every 2 seconds, so we alternate every 4 seconds (2
+// patterns).
+
+#pragma once
+#include "gpu/effect.h"
+#include "gpu/post_process_helper.h"
+#include "gpu/uniform_helper.h"
+
+class ThemeModulationEffect : public PostProcessEffect {
+ public:
+  ThemeModulationEffect(const GpuContext& ctx);
+  void render(WGPURenderPassEncoder pass,
+              const CommonPostProcessUniforms& uniforms) override;
+  void update_bind_group(WGPUTextureView input_view) override;
+
+ private:
+  GpuBuffer params_buffer_;
+};
diff --git a/src/effects/vignette_effect.cc b/src/effects/vignette_effect.cc
new file mode 100644
index 0000000..0e5f68f
--- /dev/null
+++ b/src/effects/vignette_effect.cc
@@ -0,0 +1,30 @@
+// This file is part of the 64k demo project.
+// It implements the VignetteEffect.
+
+#include "gpu/demo_effects.h"
+#include "gpu/post_process_helper.h"
+#include "gpu/gpu.h"
+
+VignetteEffect::VignetteEffect(const GpuContext& ctx)
+    : VignetteEffect(ctx, VignetteParams()) {
+}
+
+VignetteEffect::VignetteEffect(const GpuContext& ctx,
+                               const VignetteParams& params)
+    : PostProcessEffect(ctx), params_(params) {
+  params_buffer_.init(ctx_.device);
+  pipeline_ = create_post_process_pipeline(ctx_.device, ctx_.format,
+                                           vignette_shader_wgsl);
+}
+
+void VignetteEffect::render(WGPURenderPassEncoder pass,
+                            const CommonPostProcessUniforms& uniforms) {
+  uniforms_.update(ctx_.queue, uniforms);
+  params_buffer_.update(ctx_.queue, params_);
+  PostProcessEffect::render(pass, uniforms);
+}
+
+void VignetteEffect::update_bind_group(WGPUTextureView v) {
+  pp_update_bind_group(ctx_.device, pipeline_, &bind_group_, v, uniforms_.get(),
+                       params_buffer_.get());
+}