8 files changed, 377 insertions, 341 deletions
diff --git a/cnn_v3/src/cnn_v3_effect.cc b/cnn_v3/src/cnn_v3_effect.cc
index e576ceb..fa1716f 100644
--- a/cnn_v3/src/cnn_v3_effect.cc
+++ b/cnn_v3/src/cnn_v3_effect.cc
@@ -22,18 +22,23 @@
 // Format: Conv(IN→OUT, KxK) has OUT*IN*K*K weights + OUT biases
 // Layout: OIHW order (out × in × kH × kW), biases appended
 // ---------------------------------------------------------------------------
-static const uint32_t kEnc0Weights  = 20 * 8  * 9 + 8;   // Conv(20→8,  3×3)+bias = 1448
-static const uint32_t kEnc1Weights  = 8  * 16 * 9 + 16;  // Conv(8→16,  3×3)+bias = 1168
-static const uint32_t kBnWeights    = 16 * 16 * 9 + 16;  // Conv(16→16, 3×3,dil=2)+bias = 2320
-static const uint32_t kDec1Weights  = 32 * 8  * 9 + 8;   // Conv(32→8,  3×3)+bias = 2312
-static const uint32_t kDec0Weights  = 16 * 4  * 9 + 4;   // Conv(16→4,  3×3)+bias = 580
+static const uint32_t kEnc0Weights =
+    20 * 8 * 9 + 8; // Conv(20→8,  3×3)+bias = 1448
+static const uint32_t kEnc1Weights =
+    8 * 16 * 9 + 16; // Conv(8→16,  3×3)+bias = 1168
+static const uint32_t kBnWeights =
+    16 * 16 * 9 + 16; // Conv(16→16, 3×3,dil=2)+bias = 2320
+static const uint32_t kDec1Weights =
+    32 * 8 * 9 + 8; // Conv(32→8,  3×3)+bias = 2312
+static const uint32_t kDec0Weights =
+    16 * 4 * 9 + 4; // Conv(16→4,  3×3)+bias = 580
 
-static const uint32_t kEnc0Offset  = 0;
-static const uint32_t kEnc1Offset  = kEnc0Offset  + kEnc0Weights;
-static const uint32_t kBnOffset    = kEnc1Offset  + kEnc1Weights;
-static const uint32_t kDec1Offset  = kBnOffset    + kBnWeights;
-static const uint32_t kDec0Offset  = kDec1Offset  + kDec1Weights;
-static const uint32_t kTotalF16    = kDec0Offset  + kDec0Weights;
+static const uint32_t kEnc0Offset = 0;
+static const uint32_t kEnc1Offset = kEnc0Offset + kEnc0Weights;
+static const uint32_t kBnOffset = kEnc1Offset + kEnc1Weights;
+static const uint32_t kDec1Offset = kBnOffset + kBnWeights;
+static const uint32_t kDec0Offset = kDec1Offset + kDec1Weights;
+static const uint32_t kTotalF16 = kDec0Offset + kDec0Weights;
 // = 1448 + 1168 + 2320 + 2312 + 580 = 7828 f16
 
 static const uint32_t kWeightsBufBytes = ((kTotalF16 + 1) / 2) * 4;
@@ -57,7 +62,7 @@ static WGPUShaderModule make_shader(WGPUDevice device, const char* wgsl) {
 
   WGPUShaderSourceWGSL src = {};
   src.chain.sType = WGPUSType_ShaderSourceWGSL;
-  src.code        = str_view(composed.c_str());
+  src.code = str_view(composed.c_str());
 
   WGPUShaderModuleDescriptor desc = {};
   desc.nextInChain = &src.chain;
@@ -69,7 +74,7 @@ static WGPUBindGroupLayout make_bgl(WGPUDevice device,
                                     uint32_t count) {
   WGPUBindGroupLayoutDescriptor desc = {};
   desc.entryCount = count;
-  desc.entries    = entries;
+  desc.entries = entries;
   return wgpuDeviceCreateBindGroupLayout(device, &desc);
 }
 
@@ -79,14 +84,15 @@ static WGPUComputePipeline make_compute_pipeline(WGPUDevice device,
                                                  WGPUBindGroupLayout bgl) {
   WGPUPipelineLayoutDescriptor pl_desc = {};
   pl_desc.bindGroupLayoutCount = 1;
-  pl_desc.bindGroupLayouts     = &bgl;
+  pl_desc.bindGroupLayouts = &bgl;
   WGPUPipelineLayout pl = wgpuDeviceCreatePipelineLayout(device, &pl_desc);
 
   WGPUComputePipelineDescriptor pipe_desc = {};
-  pipe_desc.layout                = pl;
-  pipe_desc.compute.module        = shader;
-  pipe_desc.compute.entryPoint    = str_view(entry);
-  WGPUComputePipeline pipe = wgpuDeviceCreateComputePipeline(device, &pipe_desc);
+  pipe_desc.layout = pl;
+  pipe_desc.compute.module = shader;
+  pipe_desc.compute.entryPoint = str_view(entry);
+  WGPUComputePipeline pipe =
+      wgpuDeviceCreateComputePipeline(device, &pipe_desc);
 
   wgpuPipelineLayoutRelease(pl);
   return pipe;
@@ -95,36 +101,36 @@ static WGPUComputePipeline make_compute_pipeline(WGPUDevice device,
 // BGL entry helpers
 static WGPUBindGroupLayoutEntry bgl_uint_tex(uint32_t binding) {
   WGPUBindGroupLayoutEntry e = {};
-  e.binding                    = binding;
-  e.visibility                 = WGPUShaderStage_Compute;
-  e.texture.sampleType         = WGPUTextureSampleType_Uint;
-  e.texture.viewDimension      = WGPUTextureViewDimension_2D;
+  e.binding = binding;
+  e.visibility = WGPUShaderStage_Compute;
+  e.texture.sampleType = WGPUTextureSampleType_Uint;
+  e.texture.viewDimension = WGPUTextureViewDimension_2D;
   return e;
 }
 static WGPUBindGroupLayoutEntry bgl_storage_buf(uint32_t binding) {
   WGPUBindGroupLayoutEntry e = {};
-  e.binding          = binding;
-  e.visibility       = WGPUShaderStage_Compute;
-  e.buffer.type      = WGPUBufferBindingType_ReadOnlyStorage;
+  e.binding = binding;
+  e.visibility = WGPUShaderStage_Compute;
+  e.buffer.type = WGPUBufferBindingType_ReadOnlyStorage;
   return e;
 }
 static WGPUBindGroupLayoutEntry bgl_uniform_buf(uint32_t binding,
                                                 uint64_t min_size) {
   WGPUBindGroupLayoutEntry e = {};
-  e.binding                 = binding;
-  e.visibility              = WGPUShaderStage_Compute;
-  e.buffer.type             = WGPUBufferBindingType_Uniform;
-  e.buffer.minBindingSize   = min_size;
+  e.binding = binding;
+  e.visibility = WGPUShaderStage_Compute;
+  e.buffer.type = WGPUBufferBindingType_Uniform;
+  e.buffer.minBindingSize = min_size;
   return e;
 }
-static WGPUBindGroupLayoutEntry bgl_storage_tex_write(
-    uint32_t binding, WGPUTextureFormat fmt) {
+static WGPUBindGroupLayoutEntry bgl_storage_tex_write(uint32_t binding,
+                                                      WGPUTextureFormat fmt) {
   WGPUBindGroupLayoutEntry e = {};
-  e.binding                          = binding;
-  e.visibility                       = WGPUShaderStage_Compute;
-  e.storageTexture.access            = WGPUStorageTextureAccess_WriteOnly;
-  e.storageTexture.format            = fmt;
-  e.storageTexture.viewDimension     = WGPUTextureViewDimension_2D;
+  e.binding = binding;
+  e.visibility = WGPUShaderStage_Compute;
+  e.storageTexture.access = WGPUStorageTextureAccess_WriteOnly;
+  e.storageTexture.format = fmt;
+  e.storageTexture.viewDimension = WGPUTextureViewDimension_2D;
   return e;
 }
 
@@ -141,16 +147,16 @@ CNNv3Effect::CNNv3Effect(const GpuContext& ctx,
 
   const std::string& prefix =
       outputs.empty() ? std::string("cnn_v3") : outputs[0];
-  node_enc0_    = prefix + "_enc0";
+  node_enc0_ = prefix + "_enc0";
   node_enc1_lo_ = prefix + "_enc1_lo";
   node_enc1_hi_ = prefix + "_enc1_hi";
-  node_bn_lo_   = prefix + "_bn_lo";
-  node_bn_hi_   = prefix + "_bn_hi";
-  node_dec1_    = prefix + "_dec1";
+  node_bn_lo_ = prefix + "_bn_lo";
+  node_bn_hi_ = prefix + "_bn_hi";
+  node_dec1_ = prefix + "_dec1";
 
-  weights_buf_ = gpu_create_buffer(
-      ctx_.device, kWeightsBufBytes,
-      WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst);
+  weights_buf_ =
+      gpu_create_buffer(ctx_.device, kWeightsBufBytes,
+                        WGPUBufferUsage_Storage | WGPUBufferUsage_CopyDst);
 
   enc0_params_buf_.init(ctx_.device);
   enc1_params_buf_.init(ctx_.device);
@@ -166,7 +172,9 @@ CNNv3Effect::CNNv3Effect(const GpuContext& ctx,
   }
 
   enc1_params_.weight_offset = kEnc1Offset;
-  for (int i = 0; i < 16; ++i) { enc1_params_.gamma[i] = 1.0f; }
+  for (int i = 0; i < 16; ++i) {
+    enc1_params_.gamma[i] = 1.0f;
+  }
 
   bn_params_.weight_offset = kBnOffset;
 
@@ -177,7 +185,9 @@ CNNv3Effect::CNNv3Effect(const GpuContext& ctx,
   }
 
   dec0_params_.weight_offset = kDec0Offset;
-  for (int i = 0; i < 4; ++i) { dec0_params_.gamma[i] = 1.0f; }
+  for (int i = 0; i < 4; ++i) {
+    dec0_params_.gamma[i] = 1.0f;
+  }
 
   create_pipelines();
 
@@ -205,15 +215,15 @@ void CNNv3Effect::declare_nodes(NodeRegistry& registry) {
   const int H = registry.default_height();
 
   // enc0: rgba32uint full-res (8ch packed f16)
-  registry.declare_node(node_enc0_,    NodeType::GBUF_RGBA32UINT, W,     H);
+  registry.declare_node(node_enc0_, NodeType::GBUF_RGBA32UINT, W, H);
   // enc1: two rgba32uint half-res (8ch each = 16ch total)
   registry.declare_node(node_enc1_lo_, NodeType::GBUF_RGBA32UINT, W / 2, H / 2);
   registry.declare_node(node_enc1_hi_, NodeType::GBUF_RGBA32UINT, W / 2, H / 2);
   // bottleneck: two rgba32uint quarter-res (8ch each = 16ch total)
-  registry.declare_node(node_bn_lo_,   NodeType::GBUF_RGBA32UINT, W / 4, H / 4);
-  registry.declare_node(node_bn_hi_,   NodeType::GBUF_RGBA32UINT, W / 4, H / 4);
+  registry.declare_node(node_bn_lo_, NodeType::GBUF_RGBA32UINT, W / 4, H / 4);
+  registry.declare_node(node_bn_hi_, NodeType::GBUF_RGBA32UINT, W / 4, H / 4);
   // dec1: rgba32uint half-res (8ch packed f16)
-  registry.declare_node(node_dec1_,    NodeType::GBUF_RGBA32UINT, W / 2, H / 2);
+  registry.declare_node(node_dec1_, NodeType::GBUF_RGBA32UINT, W / 2, H / 2);
   // output_nodes_[0]: rgba16float full-res — declared externally by caller
 }
 
@@ -222,12 +232,13 @@ void CNNv3Effect::declare_nodes(NodeRegistry& registry) {
 // ---------------------------------------------------------------------------
 
 void CNNv3Effect::upload_weights(WGPUQueue queue, const void* data,
-                                  uint32_t size_bytes) {
+                                 uint32_t size_bytes) {
   wgpuQueueWriteBuffer(queue, weights_buf_.buffer, 0, data, size_bytes);
 }
 
 void CNNv3Effect::load_film_mlp(const void* data, uint32_t size_bytes) {
-  if (size_bytes != sizeof(CNNv3FilmMlp)) return;
+  if (size_bytes != sizeof(CNNv3FilmMlp))
+    return;
   memcpy(&mlp_, data, sizeof(CNNv3FilmMlp));
   mlp_loaded_ = true;
 }
@@ -246,16 +257,19 @@ void CNNv3Effect::set_film_params(const CNNv3FiLMParams& fp) {
   float h[16];
   for (int j = 0; j < 16; ++j) {
     float s = mlp_.l0_b[j];
-    for (int i = 0; i < 5; ++i) s += mlp_.l0_w[j * 5 + i] * cond[i];
+    for (int i = 0; i < 5; ++i)
+      s += mlp_.l0_w[j * 5 + i] * cond[i];
     h[j] = s > 0.f ? s : 0.f;
   }
 
   // Layer 1: Linear(16→72)
-  // Output split: g_enc0(8)|b_enc0(8)|g_enc1(16)|b_enc1(16)|g_dec1(8)|b_dec1(8)|g_dec0(4)|b_dec0(4)
+  // Output split:
+  // g_enc0(8)|b_enc0(8)|g_enc1(16)|b_enc1(16)|g_dec1(8)|b_dec1(8)|g_dec0(4)|b_dec0(4)
   float film[72];
   for (int j = 0; j < 72; ++j) {
     float s = mlp_.l1_b[j];
-    for (int i = 0; i < 16; ++i) s += mlp_.l1_w[j * 16 + i] * h[i];
+    for (int i = 0; i < 16; ++i)
+      s += mlp_.l1_w[j * 16 + i] * h[i];
     film[j] = s;
   }
 
@@ -270,9 +284,11 @@ void CNNv3Effect::set_film_params(const CNNv3FiLMParams& fp) {
     enc0_params_.beta_hi[i] = p[i + 4];
   }
   p += 8;
-  for (int i = 0; i < 16; ++i) enc1_params_.gamma[i] = p[i];
+  for (int i = 0; i < 16; ++i)
+    enc1_params_.gamma[i] = p[i];
   p += 16;
-  for (int i = 0; i < 16; ++i) enc1_params_.beta[i] = p[i];
+  for (int i = 0; i < 16; ++i)
+    enc1_params_.beta[i] = p[i];
   p += 16;
   for (int i = 0; i < 4; ++i) {
     dec1_params_.gamma_lo[i] = p[i];
@@ -284,9 +300,11 @@ void CNNv3Effect::set_film_params(const CNNv3FiLMParams& fp) {
     dec1_params_.beta_hi[i] = p[i + 4];
   }
   p += 8;
-  for (int i = 0; i < 4; ++i) dec0_params_.gamma[i] = p[i];
+  for (int i = 0; i < 4; ++i)
+    dec0_params_.gamma[i] = p[i];
   p += 4;
-  for (int i = 0; i < 4; ++i) dec0_params_.beta[i]  = p[i];
+  for (int i = 0; i < 4; ++i)
+    dec0_params_.beta[i] = p[i];
 }
 
 // ---------------------------------------------------------------------------
@@ -307,27 +325,24 @@ void CNNv3Effect::render(WGPUCommandEncoder encoder,
   const int W = (int)params.resolution.x;
   const int H = (int)params.resolution.y;
 
-  auto dispatch = [&](WGPUComputePipeline pipe, WGPUBindGroup bg,
-                      int w, int h) {
+  auto dispatch = [&](WGPUComputePipeline pipe, WGPUBindGroup bg, int w,
+                      int h) {
     WGPUComputePassDescriptor pass_desc = {};
     WGPUComputePassEncoder pass =
         wgpuCommandEncoderBeginComputePass(encoder, &pass_desc);
     wgpuComputePassEncoderSetPipeline(pass, pipe);
     wgpuComputePassEncoderSetBindGroup(pass, 0, bg, 0, nullptr);
-    wgpuComputePassEncoderDispatchWorkgroups(
-        pass,
-        (uint32_t)((w + 7) / 8),
-        (uint32_t)((h + 7) / 8),
-        1);
+    wgpuComputePassEncoderDispatchWorkgroups(pass, (uint32_t)((w + 7) / 8),
+                                             (uint32_t)((h + 7) / 8), 1);
     wgpuComputePassEncoderEnd(pass);
     wgpuComputePassEncoderRelease(pass);
   };
 
-  dispatch(enc0_pipeline_.get(), enc0_bg_.get(),  W,     H);
-  dispatch(enc1_pipeline_.get(), enc1_bg_.get(),  W / 2, H / 2);
-  dispatch(bn_pipeline_.get(),   bn_bg_.get(),    W / 4, H / 4);
-  dispatch(dec1_pipeline_.get(), dec1_bg_.get(),  W / 2, H / 2);
-  dispatch(dec0_pipeline_.get(), dec0_bg_.get(),  W,     H);
+  dispatch(enc0_pipeline_.get(), enc0_bg_.get(), W, H);
+  dispatch(enc1_pipeline_.get(), enc1_bg_.get(), W / 2, H / 2);
+  dispatch(bn_pipeline_.get(), bn_bg_.get(), W / 4, H / 4);
+  dispatch(dec1_pipeline_.get(), dec1_bg_.get(), W / 2, H / 2);
+  dispatch(dec0_pipeline_.get(), dec0_bg_.get(), W, H);
 }
 
 // ---------------------------------------------------------------------------
@@ -443,40 +458,39 @@ void CNNv3Effect::create_pipelines() {
 static void bg_tex(WGPUBindGroupEntry& e, uint32_t binding,
                    WGPUTextureView view) {
   e = {};
-  e.binding     = binding;
+  e.binding = binding;
   e.textureView = view;
 }
 static void bg_buf(WGPUBindGroupEntry& e, uint32_t binding, WGPUBuffer buf,
                    uint64_t size) {
   e = {};
   e.binding = binding;
-  e.buffer  = buf;
-  e.size    = size;
+  e.buffer = buf;
+  e.size = size;
 }
 
 void CNNv3Effect::update_bind_groups(NodeRegistry& nodes) {
   WGPUDevice dev = ctx_.device;
 
-  WGPUTextureView feat0_view   = nodes.get_view(input_nodes_[0]);
-  WGPUTextureView feat1_view   = nodes.get_view(input_nodes_[1]);
-  WGPUTextureView enc0_view    = nodes.get_view(node_enc0_);
+  WGPUTextureView feat0_view = nodes.get_view(input_nodes_[0]);
+  WGPUTextureView feat1_view = nodes.get_view(input_nodes_[1]);
+  WGPUTextureView enc0_view = nodes.get_view(node_enc0_);
   WGPUTextureView enc1_lo_view = nodes.get_view(node_enc1_lo_);
   WGPUTextureView enc1_hi_view = nodes.get_view(node_enc1_hi_);
-  WGPUTextureView bn_lo_view   = nodes.get_view(node_bn_lo_);
-  WGPUTextureView bn_hi_view   = nodes.get_view(node_bn_hi_);
-  WGPUTextureView dec1_view    = nodes.get_view(node_dec1_);
-  WGPUTextureView out_view     = nodes.get_view(output_nodes_[0]);
+  WGPUTextureView bn_lo_view = nodes.get_view(node_bn_lo_);
+  WGPUTextureView bn_hi_view = nodes.get_view(node_bn_hi_);
+  WGPUTextureView dec1_view = nodes.get_view(node_dec1_);
+  WGPUTextureView out_view = nodes.get_view(output_nodes_[0]);
 
   WGPUBuffer wb = weights_buf_.buffer;
 
   auto make_bg = [&](WGPUComputePipeline pipe, WGPUBindGroupEntry* e,
                      uint32_t count) -> WGPUBindGroup {
-    WGPUBindGroupLayout bgl =
-        wgpuComputePipelineGetBindGroupLayout(pipe, 0);
+    WGPUBindGroupLayout bgl = wgpuComputePipelineGetBindGroupLayout(pipe, 0);
     WGPUBindGroupDescriptor desc = {};
-    desc.layout     = bgl;
+    desc.layout = bgl;
     desc.entryCount = count;
-    desc.entries    = e;
+    desc.entries = e;
     WGPUBindGroup bg = wgpuDeviceCreateBindGroup(dev, &desc);
     wgpuBindGroupLayoutRelease(bgl);
     return bg;
@@ -504,7 +518,8 @@ void CNNv3Effect::update_bind_groups(NodeRegistry& nodes) {
     enc1_bg_.replace(make_bg(enc1_pipeline_.get(), e, 5));
   }
 
-  // bottleneck: enc1_lo(B0), enc1_hi(B1), weights(B2), params(B3), bn_lo(B4), bn_hi(B5)
+  // bottleneck: enc1_lo(B0), enc1_hi(B1), weights(B2), params(B3), bn_lo(B4),
+  // bn_hi(B5)
   {
     WGPUBindGroupEntry e[6] = {};
     bg_tex(e[0], 0, enc1_lo_view);
diff --git a/cnn_v3/src/cnn_v3_effect.h b/cnn_v3/src/cnn_v3_effect.h
index 589680c..ac0166f 100644
--- a/cnn_v3/src/cnn_v3_effect.h
+++ b/cnn_v3/src/cnn_v3_effect.h
@@ -38,12 +38,12 @@
 //   offset 80: beta_hi  (vec4f)
 //   total: 96 bytes
 struct CnnV3Params8ch {
-  uint32_t weight_offset;  // offset 0
-  uint32_t _pad[7];        // offsets 4-31
-  float gamma_lo[4];       // offset 32
-  float gamma_hi[4];       // offset 48
-  float beta_lo[4];        // offset 64
-  float beta_hi[4];        // offset 80
+  uint32_t weight_offset; // offset 0
+  uint32_t _pad[7];       // offsets 4-31
+  float gamma_lo[4];      // offset 32
+  float gamma_hi[4];      // offset 48
+  float beta_lo[4];       // offset 64
+  float beta_hi[4];       // offset 80
 };
 static_assert(sizeof(CnnV3Params8ch) == 96, "CnnV3Params8ch must be 96 bytes");
 
@@ -56,12 +56,13 @@ static_assert(sizeof(CnnV3Params8ch) == 96, "CnnV3Params8ch must be 96 bytes");
 //   offset 96: beta_0..3  (4x vec4f = 64 bytes)
 //   total: 160 bytes
 struct CnnV3Params16ch {
-  uint32_t weight_offset;  // offset 0
-  uint32_t _pad[7];        // offsets 4-31
-  float gamma[16];         // offsets 32-95
-  float beta[16];          // offsets 96-159
+  uint32_t weight_offset; // offset 0
+  uint32_t _pad[7];       // offsets 4-31
+  float gamma[16];        // offsets 32-95
+  float beta[16];         // offsets 96-159
 };
-static_assert(sizeof(CnnV3Params16ch) == 160, "CnnV3Params16ch must be 160 bytes");
+static_assert(sizeof(CnnV3Params16ch) == 160,
+              "CnnV3Params16ch must be 160 bytes");
 
 // dec0: 4-channel FiLM
 //
@@ -72,10 +73,10 @@ static_assert(sizeof(CnnV3Params16ch) == 160, "CnnV3Params16ch must be 160 bytes
 //   offset 48: beta  (vec4f)
 //   total: 64 bytes
 struct CnnV3Params4ch {
-  uint32_t weight_offset;  // offset 0
-  uint32_t _pad[7];        // offsets 4-31
-  float gamma[4];          // offset 32
-  float beta[4];           // offset 48
+  uint32_t weight_offset; // offset 0
+  uint32_t _pad[7];       // offsets 4-31
+  float gamma[4];         // offset 32
+  float beta[4];          // offset 48
 };
 static_assert(sizeof(CnnV3Params4ch) == 64, "CnnV3Params4ch must be 64 bytes");
 
@@ -90,20 +91,20 @@ static_assert(sizeof(CnnV3ParamsBn) == 16, "CnnV3ParamsBn must be 16 bytes");
 // FiLM conditioning inputs (CPU-side, uploaded via set_film_params each frame)
 // ---------------------------------------------------------------------------
 struct CNNv3FiLMParams {
-  float beat_phase      = 0.0f;  // 0-1 within current beat
-  float beat_norm       = 0.0f;  // beat_time / 8.0, normalized 8-beat cycle
-  float audio_intensity = 0.0f;  // peak audio level 0-1
-  float style_p0        = 0.0f;  // user-defined style param
-  float style_p1        = 0.0f;  // user-defined style param
+  float beat_phase = 0.0f;      // 0-1 within current beat
+  float beat_norm = 0.0f;       // beat_time / 8.0, normalized 8-beat cycle
+  float audio_intensity = 0.0f; // peak audio level 0-1
+  float style_p0 = 0.0f;        // user-defined style param
+  float style_p1 = 0.0f;        // user-defined style param
 };
 
 // FiLM MLP weights: Linear(5→16)→ReLU→Linear(16→72).
 // Loaded from cnn_v3_film_mlp.bin (1320 f32 = 5280 bytes).
 // Layout: l0_w(80) | l0_b(16) | l1_w(1152) | l1_b(72), all row-major f32.
 struct CNNv3FilmMlp {
-  float l0_w[16 * 5];   // (16, 5) row-major
+  float l0_w[16 * 5]; // (16, 5) row-major
   float l0_b[16];
-  float l1_w[72 * 16];  // (72, 16) row-major
+  float l1_w[72 * 16]; // (72, 16) row-major
   float l1_b[72];
 };
 static_assert(sizeof(CNNv3FilmMlp) == 1320 * 4, "CNNv3FilmMlp size mismatch");
@@ -153,21 +154,21 @@ class CNNv3Effect : public Effect {
   BindGroup dec0_bg_;
 
   // Params uniform buffers (one per pass)
-  UniformBuffer<CnnV3Params8ch>  enc0_params_buf_;
+  UniformBuffer<CnnV3Params8ch> enc0_params_buf_;
   UniformBuffer<CnnV3Params16ch> enc1_params_buf_;
-  UniformBuffer<CnnV3ParamsBn>   bn_params_buf_;
-  UniformBuffer<CnnV3Params8ch>  dec1_params_buf_;
-  UniformBuffer<CnnV3Params4ch>  dec0_params_buf_;
+  UniformBuffer<CnnV3ParamsBn> bn_params_buf_;
+  UniformBuffer<CnnV3Params8ch> dec1_params_buf_;
+  UniformBuffer<CnnV3Params4ch> dec0_params_buf_;
 
   // Shared packed-f16 weights (storage buffer, read-only in all shaders)
   GpuBuffer weights_buf_;
 
   // Per-pass params shadow (updated by set_film_params, uploaded in render)
-  CnnV3Params8ch  enc0_params_{};
+  CnnV3Params8ch enc0_params_{};
   CnnV3Params16ch enc1_params_{};
-  CnnV3ParamsBn   bn_params_{};
-  CnnV3Params8ch  dec1_params_{};
-  CnnV3Params4ch  dec0_params_{};
+  CnnV3ParamsBn bn_params_{};
+  CnnV3Params8ch dec1_params_{};
+  CnnV3Params4ch dec0_params_{};
 
   void create_pipelines();
   void update_bind_groups(NodeRegistry& nodes);
diff --git a/cnn_v3/src/gbuf_deferred_effect.cc b/cnn_v3/src/gbuf_deferred_effect.cc
index de6bd29..561f660 100644
--- a/cnn_v3/src/gbuf_deferred_effect.cc
+++ b/cnn_v3/src/gbuf_deferred_effect.cc
@@ -1,4 +1,5 @@
-// GBufDeferredEffect — simple deferred render: albedo * shadow from packed G-buffer.
+// GBufDeferredEffect — simple deferred render: albedo * shadow from packed
+// G-buffer.
 
 #include "gbuf_deferred_effect.h"
 #include "gpu/gpu.h"
@@ -10,22 +11,24 @@ extern const char* gbuf_deferred_wgsl;
 struct GBufDeferredUniforms {
   float resolution[2];
 };
-static_assert(sizeof(GBufDeferredUniforms) == 8, "GBufDeferredUniforms must be 8 bytes");
+static_assert(sizeof(GBufDeferredUniforms) == 8,
+              "GBufDeferredUniforms must be 8 bytes");
 
 static WGPUBindGroupLayoutEntry bgl_uint_tex(uint32_t binding) {
   WGPUBindGroupLayoutEntry e = {};
-  e.binding               = binding;
-  e.visibility            = WGPUShaderStage_Fragment;
-  e.texture.sampleType    = WGPUTextureSampleType_Uint;
+  e.binding = binding;
+  e.visibility = WGPUShaderStage_Fragment;
+  e.texture.sampleType = WGPUTextureSampleType_Uint;
   e.texture.viewDimension = WGPUTextureViewDimension_2D;
   return e;
 }
 
-static WGPUBindGroupLayoutEntry bgl_uniform(uint32_t binding, uint64_t min_size) {
+static WGPUBindGroupLayoutEntry bgl_uniform(uint32_t binding,
+                                            uint64_t min_size) {
   WGPUBindGroupLayoutEntry e = {};
-  e.binding               = binding;
-  e.visibility            = WGPUShaderStage_Fragment;
-  e.buffer.type           = WGPUBufferBindingType_Uniform;
+  e.binding = binding;
+  e.visibility = WGPUShaderStage_Fragment;
+  e.buffer.type = WGPUBufferBindingType_Uniform;
   e.buffer.minBindingSize = min_size;
   return e;
 }
@@ -44,40 +47,43 @@ GBufDeferredEffect::GBufDeferredEffect(const GpuContext& ctx,
   };
   WGPUBindGroupLayoutDescriptor bgl_desc = {};
   bgl_desc.entryCount = 3;
-  bgl_desc.entries    = entries;
-  WGPUBindGroupLayout bgl = wgpuDeviceCreateBindGroupLayout(ctx_.device, &bgl_desc);
+  bgl_desc.entries = entries;
+  WGPUBindGroupLayout bgl =
+      wgpuDeviceCreateBindGroupLayout(ctx_.device, &bgl_desc);
 
   WGPUPipelineLayoutDescriptor pl_desc = {};
   pl_desc.bindGroupLayoutCount = 1;
-  pl_desc.bindGroupLayouts     = &bgl;
+  pl_desc.bindGroupLayouts = &bgl;
   WGPUPipelineLayout pl = wgpuDeviceCreatePipelineLayout(ctx_.device, &pl_desc);
 
   WGPUShaderSourceWGSL wgsl_src = {};
   wgsl_src.chain.sType = WGPUSType_ShaderSourceWGSL;
-  const std::string composed = ShaderComposer::Get().Compose({}, gbuf_deferred_wgsl);
-  wgsl_src.code        = str_view(composed.c_str());
+  const std::string composed =
+      ShaderComposer::Get().Compose({}, gbuf_deferred_wgsl);
+  wgsl_src.code = str_view(composed.c_str());
   WGPUShaderModuleDescriptor shader_desc = {};
   shader_desc.nextInChain = &wgsl_src.chain;
-  WGPUShaderModule shader = wgpuDeviceCreateShaderModule(ctx_.device, &shader_desc);
+  WGPUShaderModule shader =
+      wgpuDeviceCreateShaderModule(ctx_.device, &shader_desc);
 
   WGPUColorTargetState target = {};
-  target.format    = WGPUTextureFormat_RGBA8Unorm;
+  target.format = WGPUTextureFormat_RGBA8Unorm;
   target.writeMask = WGPUColorWriteMask_All;
 
   WGPUFragmentState frag = {};
-  frag.module      = shader;
-  frag.entryPoint  = str_view("fs_main");
+  frag.module = shader;
+  frag.entryPoint = str_view("fs_main");
   frag.targetCount = 1;
-  frag.targets     = &target;
+  frag.targets = &target;
 
   WGPURenderPipelineDescriptor pipe_desc = {};
-  pipe_desc.layout               = pl;
-  pipe_desc.vertex.module        = shader;
-  pipe_desc.vertex.entryPoint    = str_view("vs_main");
-  pipe_desc.fragment             = &frag;
-  pipe_desc.primitive.topology   = WGPUPrimitiveTopology_TriangleList;
-  pipe_desc.multisample.count    = 1;
-  pipe_desc.multisample.mask     = UINT32_MAX;
+  pipe_desc.layout = pl;
+  pipe_desc.vertex.module = shader;
+  pipe_desc.vertex.entryPoint = str_view("vs_main");
+  pipe_desc.fragment = &frag;
+  pipe_desc.primitive.topology = WGPUPrimitiveTopology_TriangleList;
+  pipe_desc.multisample.count = 1;
+  pipe_desc.multisample.mask = UINT32_MAX;
 
   pipeline_.set(wgpuDeviceCreateRenderPipeline(ctx_.device, &pipe_desc));
 
@@ -89,46 +95,47 @@ GBufDeferredEffect::GBufDeferredEffect(const GpuContext& ctx,
 void GBufDeferredEffect::render(WGPUCommandEncoder encoder,
                                 const UniformsSequenceParams& params,
                                 NodeRegistry& nodes) {
-  WGPUTextureView feat0_view  = nodes.get_view(input_nodes_[0]);
-  WGPUTextureView feat1_view  = nodes.get_view(input_nodes_[1]);
+  WGPUTextureView feat0_view = nodes.get_view(input_nodes_[0]);
+  WGPUTextureView feat1_view = nodes.get_view(input_nodes_[1]);
   WGPUTextureView output_view = nodes.get_view(output_nodes_[0]);
 
-  // Upload resolution uniform into the base class uniforms buffer (first 8 bytes).
+  // Upload resolution uniform into the base class uniforms buffer (first 8
+  // bytes).
   GBufDeferredUniforms u;
   u.resolution[0] = params.resolution.x;
   u.resolution[1] = params.resolution.y;
-  wgpuQueueWriteBuffer(ctx_.queue, uniforms_buffer_.get().buffer, 0,
-                       &u, sizeof(u));
+  wgpuQueueWriteBuffer(ctx_.queue, uniforms_buffer_.get().buffer, 0, &u,
+                       sizeof(u));
 
   WGPUBindGroupLayout bgl =
       wgpuRenderPipelineGetBindGroupLayout(pipeline_.get(), 0);
 
   WGPUBindGroupEntry bg_entries[3] = {};
-  bg_entries[0].binding     = 0;
+  bg_entries[0].binding = 0;
   bg_entries[0].textureView = feat0_view;
-  bg_entries[1].binding     = 1;
+  bg_entries[1].binding = 1;
   bg_entries[1].textureView = feat1_view;
-  bg_entries[2].binding     = 2;
-  bg_entries[2].buffer      = uniforms_buffer_.get().buffer;
-  bg_entries[2].size        = sizeof(GBufDeferredUniforms);
+  bg_entries[2].binding = 2;
+  bg_entries[2].buffer = uniforms_buffer_.get().buffer;
+  bg_entries[2].size = sizeof(GBufDeferredUniforms);
 
   WGPUBindGroupDescriptor bg_desc = {};
-  bg_desc.layout     = bgl;
+  bg_desc.layout = bgl;
   bg_desc.entryCount = 3;
-  bg_desc.entries    = bg_entries;
+  bg_desc.entries = bg_entries;
   bind_group_.replace(wgpuDeviceCreateBindGroup(ctx_.device, &bg_desc));
   wgpuBindGroupLayoutRelease(bgl);
 
   WGPURenderPassColorAttachment color_att = {};
-  color_att.view       = output_view;
-  color_att.loadOp     = WGPULoadOp_Clear;
-  color_att.storeOp    = WGPUStoreOp_Store;
+  color_att.view = output_view;
+  color_att.loadOp = WGPULoadOp_Clear;
+  color_att.storeOp = WGPUStoreOp_Store;
   color_att.clearValue = {0.0f, 0.0f, 0.0f, 1.0f};
   color_att.depthSlice = WGPU_DEPTH_SLICE_UNDEFINED;
 
   WGPURenderPassDescriptor pass_desc = {};
   pass_desc.colorAttachmentCount = 1;
-  pass_desc.colorAttachments     = &color_att;
+  pass_desc.colorAttachments = &color_att;
 
   WGPURenderPassEncoder pass =
       wgpuCommandEncoderBeginRenderPass(encoder, &pass_desc);
diff --git a/cnn_v3/src/gbuf_deferred_effect.h b/cnn_v3/src/gbuf_deferred_effect.h
index 4daf13d..d0368ff 100644
--- a/cnn_v3/src/gbuf_deferred_effect.h
+++ b/cnn_v3/src/gbuf_deferred_effect.h
@@ -1,5 +1,6 @@
 // GBufDeferredEffect — simple deferred render from packed G-buffer.
-// Inputs: feat_tex0, feat_tex1 (rgba32uint). Output: albedo * shadow (rgba8unorm).
+// Inputs: feat_tex0, feat_tex1 (rgba32uint). Output: albedo * shadow
+// (rgba8unorm).
 
 #pragma once
 #include "gpu/effect.h"
@@ -10,11 +11,10 @@ class GBufDeferredEffect : public Effect {
  public:
   GBufDeferredEffect(const GpuContext& ctx,
                      const std::vector<std::string>& inputs,
-                     const std::vector<std::string>& outputs,
-                     float start_time, float end_time);
+                     const std::vector<std::string>& outputs, float start_time,
+                     float end_time);
 
-  void render(WGPUCommandEncoder encoder,
-              const UniformsSequenceParams& params,
+  void render(WGPUCommandEncoder encoder, const UniformsSequenceParams& params,
               NodeRegistry& nodes) override;
 
  private:
diff --git a/cnn_v3/src/gbuf_view_effect.cc b/cnn_v3/src/gbuf_view_effect.cc
index ccf80b0..4c23322 100644
--- a/cnn_v3/src/gbuf_view_effect.cc
+++ b/cnn_v3/src/gbuf_view_effect.cc
@@ -19,28 +19,28 @@ extern const char* gbuf_view_wgsl;
 // BGL entry: texture_2d<u32> read binding (fragment stage)
 static WGPUBindGroupLayoutEntry bgl_uint_tex_frag(uint32_t binding) {
   WGPUBindGroupLayoutEntry e = {};
-  e.binding               = binding;
-  e.visibility            = WGPUShaderStage_Fragment;
-  e.texture.sampleType    = WGPUTextureSampleType_Uint;
+  e.binding = binding;
+  e.visibility = WGPUShaderStage_Fragment;
+  e.texture.sampleType = WGPUTextureSampleType_Uint;
   e.texture.viewDimension = WGPUTextureViewDimension_2D;
   return e;
 }
 
 // BGL entry: uniform buffer (fragment stage)
 static WGPUBindGroupLayoutEntry bgl_uniform_frag(uint32_t binding,
-                                                  uint64_t min_size) {
+                                                 uint64_t min_size) {
   WGPUBindGroupLayoutEntry e = {};
-  e.binding               = binding;
-  e.visibility            = WGPUShaderStage_Fragment;
-  e.buffer.type           = WGPUBufferBindingType_Uniform;
+  e.binding = binding;
+  e.visibility = WGPUShaderStage_Fragment;
+  e.buffer.type = WGPUBufferBindingType_Uniform;
   e.buffer.minBindingSize = min_size;
   return e;
 }
 
 GBufViewEffect::GBufViewEffect(const GpuContext& ctx,
-                                const std::vector<std::string>& inputs,
-                                const std::vector<std::string>& outputs,
-                                float start_time, float end_time)
+                               const std::vector<std::string>& inputs,
+                               const std::vector<std::string>& outputs,
+                               float start_time, float end_time)
     : Effect(ctx, inputs, outputs, start_time, end_time) {
   HEADLESS_RETURN_IF_NULL(ctx_.device);
 
@@ -48,24 +48,26 @@ GBufViewEffect::GBufViewEffect(const GpuContext& ctx,
   WGPUBindGroupLayoutEntry entries[3] = {
       bgl_uint_tex_frag(0),
       bgl_uint_tex_frag(1),
-      bgl_uniform_frag(2, 8),  // only resolution (vec2f = 8 bytes) is read
+      bgl_uniform_frag(2, 8), // only resolution (vec2f = 8 bytes) is read
   };
   WGPUBindGroupLayoutDescriptor bgl_desc = {};
   bgl_desc.entryCount = 3;
-  bgl_desc.entries    = entries;
-  WGPUBindGroupLayout bgl = wgpuDeviceCreateBindGroupLayout(ctx_.device, &bgl_desc);
+  bgl_desc.entries = entries;
+  WGPUBindGroupLayout bgl =
+      wgpuDeviceCreateBindGroupLayout(ctx_.device, &bgl_desc);
 
   // Pipeline layout
   WGPUPipelineLayoutDescriptor pl_desc = {};
   pl_desc.bindGroupLayoutCount = 1;
-  pl_desc.bindGroupLayouts     = &bgl;
+  pl_desc.bindGroupLayouts = &bgl;
   WGPUPipelineLayout pl = wgpuDeviceCreatePipelineLayout(ctx_.device, &pl_desc);
 
   // Shader module
   WGPUShaderSourceWGSL wgsl_src = {};
   wgsl_src.chain.sType = WGPUSType_ShaderSourceWGSL;
-  const std::string composed = ShaderComposer::Get().Compose({}, gbuf_view_wgsl);
-  wgsl_src.code        = str_view(composed.c_str());
+  const std::string composed =
+      ShaderComposer::Get().Compose({}, gbuf_view_wgsl);
+  wgsl_src.code = str_view(composed.c_str());
   WGPUShaderModuleDescriptor shader_desc = {};
   shader_desc.nextInChain = &wgsl_src.chain;
   WGPUShaderModule shader =
@@ -73,23 +75,23 @@ GBufViewEffect::GBufViewEffect(const GpuContext& ctx,
 
   // Render pipeline
   WGPUColorTargetState target = {};
-  target.format    = WGPUTextureFormat_RGBA8Unorm;
+  target.format = WGPUTextureFormat_RGBA8Unorm;
   target.writeMask = WGPUColorWriteMask_All;
 
   WGPUFragmentState frag = {};
-  frag.module      = shader;
-  frag.entryPoint  = str_view("fs_main");
+  frag.module = shader;
+  frag.entryPoint = str_view("fs_main");
   frag.targetCount = 1;
-  frag.targets     = &target;
+  frag.targets = &target;
 
   WGPURenderPipelineDescriptor pipe_desc = {};
-  pipe_desc.layout               = pl;
-  pipe_desc.vertex.module        = shader;
-  pipe_desc.vertex.entryPoint    = str_view("vs_main");
-  pipe_desc.fragment             = &frag;
-  pipe_desc.primitive.topology   = WGPUPrimitiveTopology_TriangleList;
-  pipe_desc.multisample.count    = 1;
-  pipe_desc.multisample.mask     = UINT32_MAX;
+  pipe_desc.layout = pl;
+  pipe_desc.vertex.module = shader;
+  pipe_desc.vertex.entryPoint = str_view("vs_main");
+  pipe_desc.fragment = &frag;
+  pipe_desc.primitive.topology = WGPUPrimitiveTopology_TriangleList;
+  pipe_desc.multisample.count = 1;
+  pipe_desc.multisample.mask = UINT32_MAX;
 
   pipeline_.set(wgpuDeviceCreateRenderPipeline(ctx_.device, &pipe_desc));
 
@@ -99,10 +101,10 @@ GBufViewEffect::GBufViewEffect(const GpuContext& ctx,
 }
 
 void GBufViewEffect::render(WGPUCommandEncoder encoder,
-                             const UniformsSequenceParams& params,
-                             NodeRegistry& nodes) {
-  WGPUTextureView feat0_view  = nodes.get_view(input_nodes_[0]);
-  WGPUTextureView feat1_view  = nodes.get_view(input_nodes_[1]);
+                            const UniformsSequenceParams& params,
+                            NodeRegistry& nodes) {
+  WGPUTextureView feat0_view = nodes.get_view(input_nodes_[0]);
+  WGPUTextureView feat1_view = nodes.get_view(input_nodes_[1]);
   WGPUTextureView output_view = nodes.get_view(output_nodes_[0]);
 
   // Rebuild bind group (views may change with ping-pong or resize)
@@ -110,31 +112,31 @@ void GBufViewEffect::render(WGPUCommandEncoder encoder,
       wgpuRenderPipelineGetBindGroupLayout(pipeline_.get(), 0);
 
   WGPUBindGroupEntry bg_entries[3] = {};
-  bg_entries[0].binding     = 0;
+  bg_entries[0].binding = 0;
   bg_entries[0].textureView = feat0_view;
-  bg_entries[1].binding     = 1;
+  bg_entries[1].binding = 1;
   bg_entries[1].textureView = feat1_view;
-  bg_entries[2].binding     = 2;
-  bg_entries[2].buffer      = uniforms_buffer_.get().buffer;
-  bg_entries[2].size        = sizeof(UniformsSequenceParams);
+  bg_entries[2].binding = 2;
+  bg_entries[2].buffer = uniforms_buffer_.get().buffer;
+  bg_entries[2].size = sizeof(UniformsSequenceParams);
 
   WGPUBindGroupDescriptor bg_desc = {};
-  bg_desc.layout     = bgl;
+  bg_desc.layout = bgl;
   bg_desc.entryCount = 3;
-  bg_desc.entries    = bg_entries;
+  bg_desc.entries = bg_entries;
   bind_group_.replace(wgpuDeviceCreateBindGroup(ctx_.device, &bg_desc));
   wgpuBindGroupLayoutRelease(bgl);
 
   WGPURenderPassColorAttachment color_att = {};
-  color_att.view         = output_view;
-  color_att.loadOp       = WGPULoadOp_Clear;
-  color_att.storeOp      = WGPUStoreOp_Store;
-  color_att.clearValue   = {0.0f, 0.0f, 0.0f, 1.0f};
-  color_att.depthSlice   = WGPU_DEPTH_SLICE_UNDEFINED;
+  color_att.view = output_view;
+  color_att.loadOp = WGPULoadOp_Clear;
+  color_att.storeOp = WGPUStoreOp_Store;
+  color_att.clearValue = {0.0f, 0.0f, 0.0f, 1.0f};
+  color_att.depthSlice = WGPU_DEPTH_SLICE_UNDEFINED;
 
   WGPURenderPassDescriptor pass_desc = {};
   pass_desc.colorAttachmentCount = 1;
-  pass_desc.colorAttachments     = &color_att;
+  pass_desc.colorAttachments = &color_att;
 
   WGPURenderPassEncoder pass =
       wgpuCommandEncoderBeginRenderPass(encoder, &pass_desc);
diff --git a/cnn_v3/src/gbuf_view_effect.h b/cnn_v3/src/gbuf_view_effect.h
index d4d8139..a8fd5c9 100644
--- a/cnn_v3/src/gbuf_view_effect.h
+++ b/cnn_v3/src/gbuf_view_effect.h
@@ -1,6 +1,7 @@
 // GBufViewEffect: Visualizes G-buffer feature textures as a 4×5 channel grid.
-// Inputs: feat_tex0 (rgba32uint, ch 0-7 f16), feat_tex1 (rgba32uint, ch 8-19 unorm8)
-// Output: rgba8unorm tiled channel visualization (downscaled 4× per channel)
+// Inputs: feat_tex0 (rgba32uint, ch 0-7 f16), feat_tex1 (rgba32uint, ch 8-19
+// unorm8) Output: rgba8unorm tiled channel visualization (downscaled 4× per
+// channel)
 
 #pragma once
 
@@ -10,16 +11,14 @@
 
 class GBufViewEffect : public Effect {
  public:
-  GBufViewEffect(const GpuContext& ctx,
-                 const std::vector<std::string>& inputs,
-                 const std::vector<std::string>& outputs,
-                 float start_time, float end_time);
+  GBufViewEffect(const GpuContext& ctx, const std::vector<std::string>& inputs,
+                 const std::vector<std::string>& outputs, float start_time,
+                 float end_time);
 
-  void render(WGPUCommandEncoder encoder,
-              const UniformsSequenceParams& params,
+  void render(WGPUCommandEncoder encoder, const UniformsSequenceParams& params,
               NodeRegistry& nodes) override;
 
  private:
   RenderPipeline pipeline_;
-  BindGroup       bind_group_;
+  BindGroup bind_group_;
 };
diff --git a/cnn_v3/src/gbuffer_effect.cc b/cnn_v3/src/gbuffer_effect.cc
index 82ad8b1..395c8bc 100644
--- a/cnn_v3/src/gbuffer_effect.cc
+++ b/cnn_v3/src/gbuffer_effect.cc
@@ -1,5 +1,6 @@
 // GBufferEffect implementation
-// Rasterizes proxy geometry to MRT G-buffer, then packs into CNN v3 feature textures.
+// Rasterizes proxy geometry to MRT G-buffer, then packs into CNN v3 feature
+// textures.
 
 #include "gbuffer_effect.h"
 #include "3d/object.h"
@@ -10,9 +11,10 @@
 #include <cstring>
 #include <vector>
 
-// Shader source (loaded from asset at runtime — declared extern by the build system)
-// For standalone use outside the asset system, the caller must ensure the WGSL
-// source strings are available.  They are declared here as weak-linkable externs.
+// Shader source (loaded from asset at runtime — declared extern by the build
+// system) For standalone use outside the asset system, the caller must ensure
+// the WGSL source strings are available.  They are declared here as
+// weak-linkable externs.
 extern const char* gbuf_raster_wgsl;
 extern const char* gbuf_shadow_wgsl;
 extern const char* gbuf_pack_wgsl;
@@ -20,7 +22,8 @@ extern const char* gbuf_pack_wgsl;
 // Maximum number of objects the G-buffer supports per frame.
 static const int kGBufMaxObjects = 256;
 
-// ObjectData struct that mirrors the WGSL layout in gbuf_raster.wgsl and renderer.h
+// ObjectData struct that mirrors the WGSL layout in gbuf_raster.wgsl and
+// renderer.h
 struct GBufObjectData {
   mat4 model;
   mat4 inv_model;
@@ -38,7 +41,7 @@ struct GBufGlobalUniforms {
   mat4 view_proj;
   mat4 inv_view_proj;
   vec4 camera_pos_time;
-  vec4 params;        // x = num_objects
+  vec4 params; // x = num_objects
   vec2 resolution;
   vec2 padding;
 };
@@ -56,12 +59,12 @@ GBufferEffect::GBufferEffect(const GpuContext& ctx,
 
   // Derive internal node name prefix from the first output name.
   const std::string& prefix = outputs.empty() ? "gbuf" : outputs[0];
-  node_albedo_     = prefix + "_albedo";
+  node_albedo_ = prefix + "_albedo";
   node_normal_mat_ = prefix + "_normal_mat";
-  node_depth_      = prefix + "_depth";
-  node_shadow_     = prefix + "_shadow";
-  node_transp_     = prefix + "_transp";
-  node_prev_tex_   = prefix + "_prev";
+  node_depth_ = prefix + "_depth";
+  node_shadow_ = prefix + "_shadow";
+  node_transp_ = prefix + "_transp";
+  node_prev_tex_ = prefix + "_prev";
   // Allocate GPU buffers for scene data.
   global_uniforms_buf_ =
       gpu_create_buffer(ctx_.device, sizeof(GBufGlobalUniforms),
@@ -83,11 +86,11 @@ GBufferEffect::GBufferEffect(const GpuContext& ctx,
 }
 
 void GBufferEffect::declare_nodes(NodeRegistry& registry) {
-  registry.declare_node(node_albedo_,     NodeType::GBUF_ALBEDO,    -1, -1);
-  registry.declare_node(node_normal_mat_, NodeType::GBUF_ALBEDO,    -1, -1);
-  registry.declare_node(node_depth_,      NodeType::GBUF_DEPTH32,   -1, -1);
-  registry.declare_node(node_shadow_,     NodeType::GBUF_R8,        -1, -1);
-  registry.declare_node(node_transp_,     NodeType::GBUF_R8,        -1, -1);
+  registry.declare_node(node_albedo_, NodeType::GBUF_ALBEDO, -1, -1);
+  registry.declare_node(node_normal_mat_, NodeType::GBUF_ALBEDO, -1, -1);
+  registry.declare_node(node_depth_, NodeType::GBUF_DEPTH32, -1, -1);
+  registry.declare_node(node_shadow_, NodeType::GBUF_R8, -1, -1);
+  registry.declare_node(node_transp_, NodeType::GBUF_R8, -1, -1);
   // feat_tex0 / feat_tex1 are the declared output_nodes_ — they get registered
   // by the sequence infrastructure; declare them here as well if not already.
   if (!registry.has_node(output_nodes_[0])) {
@@ -96,7 +99,8 @@ void GBufferEffect::declare_nodes(NodeRegistry& registry) {
   if (!registry.has_node(output_nodes_[1])) {
     registry.declare_node(output_nodes_[1], NodeType::GBUF_RGBA32UINT, -1, -1);
   }
-  // F16X8 = Rgba16Float with CopySrc|CopyDst — matches CNNv3Effect output format.
+  // F16X8 = Rgba16Float with CopySrc|CopyDst — matches CNNv3Effect output
+  // format.
   registry.declare_node(node_prev_tex_, NodeType::F16X8, -1, -1);
 }
 
@@ -111,7 +115,7 @@ void GBufferEffect::set_scene() {
     seed ^= seed << 13;
     seed ^= seed >> 17;
     seed ^= seed << 5;
-    return (float)(seed >> 8) / 16777216.0f;  // [0, 1)
+    return (float)(seed >> 8) / 16777216.0f; // [0, 1)
   };
   auto rrange = [&](float lo, float hi) { return lo + rnd() * (hi - lo); };
 
@@ -120,8 +124,8 @@ void GBufferEffect::set_scene() {
   {
     Object3D obj(ObjectType::BOX);
     obj.position = vec3(1.0f, 0.0f, 0.0f);
-    obj.scale    = vec3(0.6f, 0.6f, 0.6f);
-    obj.color    = vec4(0.9f, 0.5f, 0.3f, 1.0f);
+    obj.scale = vec3(0.6f, 0.6f, 0.6f);
+    obj.color = vec4(0.9f, 0.5f, 0.3f, 1.0f);
     scene_.add_object(obj);
     cube_anims_.push_back({{0.0f, 1.0f, 0.0f}, 0.0f});
   }
@@ -129,8 +133,8 @@ void GBufferEffect::set_scene() {
     Object3D obj(ObjectType::SPHERE);
     obj.position = vec3(-1.0f, 0.0f, 0.0f);
     const float r = 0.9f;
-    obj.scale    = vec3(r, r, r);
-    obj.color    = vec4(0.3f, 0.6f, 0.9f, 1.0f);
+    obj.scale = vec3(r, r, r);
+    obj.color = vec4(0.3f, 0.6f, 0.9f, 1.0f);
     const int idx = (int)scene_.objects.size();
     scene_.add_object(obj);
     sphere_anims_.push_back({idx, r});
@@ -141,36 +145,35 @@ void GBufferEffect::set_scene() {
     Object3D obj(ObjectType::SPHERE);
     obj.position = vec3(0.0f, 2.2f, 0.0f);
     const float r = 0.6f;
-    obj.scale    = vec3(r, r, r);
-    obj.color    = vec4(0.9f, 0.8f, 0.2f, 1.0f);
+    obj.scale = vec3(r, r, r);
+    obj.color = vec4(0.9f, 0.8f, 0.2f, 1.0f);
     const int idx = (int)scene_.objects.size();
     scene_.add_object(obj);
     sphere_anims_.push_back({idx, r});
   }
 
   // Camera: above and in front of the scene, looking at origin.
-  camera_.set_look_at(vec3(0.0f, 2.5f, 6.0f),
-                      vec3(0.0f, 0.0f, 0.0f),
+  camera_.set_look_at(vec3(0.0f, 2.5f, 6.0f), vec3(0.0f, 0.0f, 0.0f),
                       vec3(0.0f, 1.0f, 0.0f));
-  camera_.fov_y_rad  = 0.7854f;  // 45°
+  camera_.fov_y_rad = 0.7854f; // 45°
   camera_.near_plane = 0.1f;
-  camera_.far_plane  = 20.0f;
+  camera_.far_plane = 20.0f;
   // aspect_ratio is updated each frame from params.resolution.
 
   scene_ready_ = true;
 }
 
 static void clear_r8_node(WGPUCommandEncoder encoder, WGPUTextureView view,
-                           float value) {
+                          float value) {
   WGPURenderPassColorAttachment att = {};
-  att.view       = view;
-  att.loadOp     = WGPULoadOp_Clear;
-  att.storeOp    = WGPUStoreOp_Store;
+  att.view = view;
+  att.loadOp = WGPULoadOp_Clear;
+  att.storeOp = WGPUStoreOp_Store;
   att.clearValue = {value, value, value, value};
   att.depthSlice = WGPU_DEPTH_SLICE_UNDEFINED;
   WGPURenderPassDescriptor pd = {};
   pd.colorAttachmentCount = 1;
-  pd.colorAttachments     = &att;
+  pd.colorAttachments = &att;
   WGPURenderPassEncoder p = wgpuCommandEncoderBeginRenderPass(encoder, &pd);
   wgpuRenderPassEncoderEnd(p);
   wgpuRenderPassEncoderRelease(p);
@@ -190,8 +193,7 @@ void GBufferEffect::render(WGPUCommandEncoder encoder,
   const float angle = params.time * 0.3f;
   const float R = 6.0f;
   camera_.set_look_at(vec3(R * sinf(angle), 2.5f, R * cosf(angle)),
-                      vec3(0.0f, 0.0f, 0.0f),
-                      vec3(0.0f, 1.0f, 0.0f));
+                      vec3(0.0f, 0.0f, 0.0f), vec3(0.0f, 1.0f, 0.0f));
 
   // Animate cubes: axis-angle rotation driven by physical time.
   for (int i = 0; i < (int)cube_anims_.size(); ++i) {
@@ -210,8 +212,8 @@ void GBufferEffect::render(WGPUCommandEncoder encoder,
     GBufLightsUniforms lu = {};
     lu.params = vec4(1.0f, 0.0f, 0.0f, 0.0f);
     // Key: warm sun, upper-right-front.
-    lu.lights[0].direction = vec4(0.408f, 0.816f, 0.408f, 0.0f);  // norm(1,2,1)
-    lu.lights[0].color     = vec4(1.00f, 0.92f, 0.78f, 1.0f);
+    lu.lights[0].direction = vec4(0.408f, 0.816f, 0.408f, 0.0f); // norm(1,2,1)
+    lu.lights[0].color = vec4(1.00f, 0.92f, 0.78f, 1.0f);
     // Fill: cool sky, upper-left-back. (disabled for debugging)
     // lu.lights[1].direction = vec4(-0.577f, 0.577f, -0.577f, 0.0f);
     // lu.lights[1].color     = vec4(0.40f, 0.45f, 0.80f, 0.4f);
@@ -227,11 +229,11 @@ void GBufferEffect::render(WGPUCommandEncoder encoder,
   res_uni._pad1 = 0.0f;
   pack_res_uniform_.update(ctx_.queue, res_uni);
 
-  WGPUTextureView albedo_view     = nodes.get_view(node_albedo_);
+  WGPUTextureView albedo_view = nodes.get_view(node_albedo_);
   WGPUTextureView normal_mat_view = nodes.get_view(node_normal_mat_);
-  WGPUTextureView depth_view      = nodes.get_view(node_depth_);
-  WGPUTextureView feat0_view      = nodes.get_view(output_nodes_[0]);
-  WGPUTextureView feat1_view      = nodes.get_view(output_nodes_[1]);
+  WGPUTextureView depth_view = nodes.get_view(node_depth_);
+  WGPUTextureView feat0_view = nodes.get_view(output_nodes_[0]);
+  WGPUTextureView feat1_view = nodes.get_view(output_nodes_[1]);
 
   // node_prev_tex_ is updated by post_render() at the end of each frame.
   // On frame 0 it is zero (NodeRegistry zeroes new textures) — correct default.
@@ -266,17 +268,16 @@ void GBufferEffect::render(WGPUCommandEncoder encoder,
   raster_pass_desc.colorAttachments = color_attachments;
   raster_pass_desc.depthStencilAttachment = &depth_attachment;
 
-  const int num_objects =
-      (int)(scene_.objects.size() < (size_t)kGBufMaxObjects
-                ? scene_.objects.size()
-                : (size_t)kGBufMaxObjects);
+  const int num_objects = (int)(scene_.objects.size() < (size_t)kGBufMaxObjects
+                                    ? scene_.objects.size()
+                                    : (size_t)kGBufMaxObjects);
 
   if (num_objects > 0 && raster_pipeline_.get() != nullptr) {
     WGPURenderPassEncoder raster_pass =
         wgpuCommandEncoderBeginRenderPass(encoder, &raster_pass_desc);
     wgpuRenderPassEncoderSetPipeline(raster_pass, raster_pipeline_.get());
-    wgpuRenderPassEncoderSetBindGroup(raster_pass, 0,
-                                     raster_bind_group_.get(), 0, nullptr);
+    wgpuRenderPassEncoderSetBindGroup(raster_pass, 0, raster_bind_group_.get(),
+                                      0, nullptr);
     // Draw 36 vertices (proxy box) × num_objects instances.
     wgpuRenderPassEncoderDraw(raster_pass, 36, (uint32_t)num_objects, 0, 0);
     wgpuRenderPassEncoderEnd(raster_pass);
@@ -293,45 +294,46 @@ void GBufferEffect::render(WGPUCommandEncoder encoder,
   if (shadow_pipeline_.get() != nullptr) {
     WGPUBindGroupEntry shadow_entries[5] = {};
     shadow_entries[0].binding = 0;
-    shadow_entries[0].buffer  = global_uniforms_buf_.buffer;
-    shadow_entries[0].size    = sizeof(GBufGlobalUniforms);
+    shadow_entries[0].buffer = global_uniforms_buf_.buffer;
+    shadow_entries[0].size = sizeof(GBufGlobalUniforms);
 
     shadow_entries[1].binding = 1;
-    shadow_entries[1].buffer  = objects_buf_.buffer;
-    shadow_entries[1].size    = (size_t)objects_buf_capacity_ * sizeof(GBufObjectData);
+    shadow_entries[1].buffer = objects_buf_.buffer;
+    shadow_entries[1].size =
+        (size_t)objects_buf_capacity_ * sizeof(GBufObjectData);
 
-    shadow_entries[2].binding     = 2;
+    shadow_entries[2].binding = 2;
     shadow_entries[2].textureView = depth_view;
 
     shadow_entries[3].binding = 3;
-    shadow_entries[3].buffer  = lights_uniform_.get().buffer;
-    shadow_entries[3].size    = sizeof(GBufLightsUniforms);
+    shadow_entries[3].buffer = lights_uniform_.get().buffer;
+    shadow_entries[3].size = sizeof(GBufLightsUniforms);
 
-    shadow_entries[4].binding     = 4;
+    shadow_entries[4].binding = 4;
     shadow_entries[4].textureView = normal_mat_view;
 
     WGPUBindGroupLayout shadow_bgl =
         wgpuRenderPipelineGetBindGroupLayout(shadow_pipeline_.get(), 0);
 
     WGPUBindGroupDescriptor shadow_bg_desc = {};
-    shadow_bg_desc.layout     = shadow_bgl;
+    shadow_bg_desc.layout = shadow_bgl;
     shadow_bg_desc.entryCount = 5;
-    shadow_bg_desc.entries    = shadow_entries;
+    shadow_bg_desc.entries = shadow_entries;
 
     WGPUBindGroup shadow_bg =
         wgpuDeviceCreateBindGroup(ctx_.device, &shadow_bg_desc);
     wgpuBindGroupLayoutRelease(shadow_bgl);
 
     WGPURenderPassColorAttachment shadow_att = {};
-    shadow_att.view       = nodes.get_view(node_shadow_);
-    shadow_att.loadOp     = WGPULoadOp_Clear;
-    shadow_att.storeOp    = WGPUStoreOp_Store;
+    shadow_att.view = nodes.get_view(node_shadow_);
+    shadow_att.loadOp = WGPULoadOp_Clear;
+    shadow_att.storeOp = WGPUStoreOp_Store;
     shadow_att.clearValue = {1.0f, 1.0f, 1.0f, 1.0f};
     shadow_att.depthSlice = WGPU_DEPTH_SLICE_UNDEFINED;
 
     WGPURenderPassDescriptor shadow_pass_desc = {};
     shadow_pass_desc.colorAttachmentCount = 1;
-    shadow_pass_desc.colorAttachments     = &shadow_att;
+    shadow_pass_desc.colorAttachments = &shadow_att;
 
     WGPURenderPassEncoder shadow_pass =
         wgpuCommandEncoderBeginRenderPass(encoder, &shadow_pass_desc);
@@ -408,7 +410,7 @@ void GBufferEffect::render(WGPUCommandEncoder encoder,
   wgpuComputePassEncoderSetPipeline(compute_pass, pack_pipeline_.get());
   wgpuComputePassEncoderSetBindGroup(compute_pass, 0, pack_bg, 0, nullptr);
 
-  const uint32_t wg_x = ((uint32_t)width_  + 7u) / 8u;
+  const uint32_t wg_x = ((uint32_t)width_ + 7u) / 8u;
   const uint32_t wg_y = ((uint32_t)height_ + 7u) / 8u;
   wgpuComputePassEncoderDispatchWorkgroups(compute_pass, wg_x, wg_y, 1);
   wgpuComputePassEncoderEnd(compute_pass);
@@ -433,29 +435,29 @@ void GBufferEffect::ensure_objects_buffer(int num_objects) {
   objects_buf_capacity_ = num_objects;
 }
 
-void GBufferEffect::upload_scene_data(const Scene& scene,
-                                      const Camera& camera, float time) {
-  const int num_objects =
-      (int)(scene.objects.size() < (size_t)kGBufMaxObjects
-                ? scene.objects.size()
-                : (size_t)kGBufMaxObjects);
+void GBufferEffect::upload_scene_data(const Scene& scene, const Camera& camera,
+                                      float time) {
+  const int num_objects = (int)(scene.objects.size() < (size_t)kGBufMaxObjects
+                                    ? scene.objects.size()
+                                    : (size_t)kGBufMaxObjects);
 
   const mat4 view = camera.get_view_matrix();
   mat4 proj = camera.get_projection_matrix();
-  proj.m[5] = -proj.m[5];  // undo post-process Y flip: G-buffer uses integer reads
-  const mat4 vp   = proj * view;
+  proj.m[5] =
+      -proj.m[5]; // undo post-process Y flip: G-buffer uses integer reads
+  const mat4 vp = proj * view;
 
   GBufGlobalUniforms gu = {};
-  gu.view_proj       = vp;
-  gu.inv_view_proj   = vp.inverse();
-  gu.camera_pos_time = vec4(camera.position.x, camera.position.y,
-                            camera.position.z, time);
-  gu.params    = vec4((float)num_objects, 0.0f, 0.0f, 0.0f);
+  gu.view_proj = vp;
+  gu.inv_view_proj = vp.inverse();
+  gu.camera_pos_time =
+      vec4(camera.position.x, camera.position.y, camera.position.z, time);
+  gu.params = vec4((float)num_objects, 0.0f, 0.0f, 0.0f);
   gu.resolution = vec2((float)width_, (float)height_);
-  gu.padding   = vec2(0.0f, 0.0f);
+  gu.padding = vec2(0.0f, 0.0f);
 
-  wgpuQueueWriteBuffer(ctx_.queue, global_uniforms_buf_.buffer, 0,
-                       &gu, sizeof(GBufGlobalUniforms));
+  wgpuQueueWriteBuffer(ctx_.queue, global_uniforms_buf_.buffer, 0, &gu,
+                       sizeof(GBufGlobalUniforms));
 
   // Upload object data (no per-frame heap alloc — reuse s_obj_staging).
   if (num_objects > 0) {
@@ -463,13 +465,12 @@ void GBufferEffect::upload_scene_data(const Scene& scene,
     for (int i = 0; i < num_objects; ++i) {
       const Object3D& obj = scene.objects[(size_t)i];
       const mat4 m = obj.get_model_matrix();
-      s_obj_staging[i].model     = m;
+      s_obj_staging[i].model = m;
       s_obj_staging[i].inv_model = m.inverse();
-      s_obj_staging[i].color     = obj.color;
-      s_obj_staging[i].params    = vec4((float)(int)obj.type, 0.0f, 0.0f, 0.0f);
+      s_obj_staging[i].color = obj.color;
+      s_obj_staging[i].params = vec4((float)(int)obj.type, 0.0f, 0.0f, 0.0f);
     }
-    wgpuQueueWriteBuffer(ctx_.queue, objects_buf_.buffer, 0,
-                         s_obj_staging,
+    wgpuQueueWriteBuffer(ctx_.queue, objects_buf_.buffer, 0, s_obj_staging,
                          (size_t)num_objects * sizeof(GBufObjectData));
   }
 }
@@ -483,8 +484,7 @@ void GBufferEffect::create_raster_pipeline() {
     return; // Asset not loaded yet; pipeline creation deferred.
   }
 
-  const std::string composed =
-      ShaderComposer::Get().Compose({}, src);
+  const std::string composed = ShaderComposer::Get().Compose({}, src);
 
   WGPUShaderSourceWGSL wgsl_src = {};
   wgsl_src.chain.sType = WGPUSType_ShaderSourceWGSL;
@@ -492,7 +492,8 @@ void GBufferEffect::create_raster_pipeline() {
 
   WGPUShaderModuleDescriptor shader_desc = {};
   shader_desc.nextInChain = &wgsl_src.chain;
-  WGPUShaderModule shader = wgpuDeviceCreateShaderModule(ctx_.device, &shader_desc);
+  WGPUShaderModule shader =
+      wgpuDeviceCreateShaderModule(ctx_.device, &shader_desc);
 
   // Bind group layout: B0 = GlobalUniforms, B1 = ObjectsBuffer (storage read)
   WGPUBindGroupLayoutEntry bgl_entries[2] = {};
@@ -511,7 +512,8 @@ void GBufferEffect::create_raster_pipeline() {
   WGPUBindGroupLayoutDescriptor bgl_desc = {};
   bgl_desc.entryCount = 2;
   bgl_desc.entries = bgl_entries;
-  WGPUBindGroupLayout bgl = wgpuDeviceCreateBindGroupLayout(ctx_.device, &bgl_desc);
+  WGPUBindGroupLayout bgl =
+      wgpuDeviceCreateBindGroupLayout(ctx_.device, &bgl_desc);
 
   WGPUPipelineLayoutDescriptor pl_desc = {};
   pl_desc.bindGroupLayoutCount = 1;
@@ -542,9 +544,9 @@ void GBufferEffect::create_raster_pipeline() {
   pipe_desc.vertex.entryPoint = str_view("vs_main");
   pipe_desc.fragment = &frag;
   pipe_desc.depthStencil = &ds;
-  pipe_desc.primitive.topology  = WGPUPrimitiveTopology_TriangleList;
-  pipe_desc.primitive.cullMode  = WGPUCullMode_Back;
-  pipe_desc.primitive.frontFace = WGPUFrontFace_CCW;  // standard (no Y flip)
+  pipe_desc.primitive.topology = WGPUPrimitiveTopology_TriangleList;
+  pipe_desc.primitive.cullMode = WGPUCullMode_Back;
+  pipe_desc.primitive.frontFace = WGPUFrontFace_CCW; // standard (no Y flip)
   pipe_desc.multisample.count = 1;
   pipe_desc.multisample.mask = 0xFFFFFFFF;
 
@@ -571,9 +573,11 @@ void GBufferEffect::create_shadow_pipeline() {
 
   WGPUShaderModuleDescriptor shader_desc = {};
   shader_desc.nextInChain = &wgsl_src.chain;
-  WGPUShaderModule shader = wgpuDeviceCreateShaderModule(ctx_.device, &shader_desc);
+  WGPUShaderModule shader =
+      wgpuDeviceCreateShaderModule(ctx_.device, &shader_desc);
 
-  // BGL: B0=GlobalUniforms, B1=ObjectsBuffer, B2=texture_depth_2d, B3=GBufLightsUniforms
+  // BGL: B0=GlobalUniforms, B1=ObjectsBuffer, B2=texture_depth_2d,
+  // B3=GBufLightsUniforms
   WGPUBindGroupLayoutEntry bgl_entries[5] = {};
 
   bgl_entries[0].binding = 0;
@@ -605,7 +609,8 @@ void GBufferEffect::create_shadow_pipeline() {
   WGPUBindGroupLayoutDescriptor bgl_desc = {};
   bgl_desc.entryCount = 5;
   bgl_desc.entries = bgl_entries;
-  WGPUBindGroupLayout bgl = wgpuDeviceCreateBindGroupLayout(ctx_.device, &bgl_desc);
+  WGPUBindGroupLayout bgl =
+      wgpuDeviceCreateBindGroupLayout(ctx_.device, &bgl_desc);
 
   WGPUPipelineLayoutDescriptor pl_desc = {};
   pl_desc.bindGroupLayoutCount = 1;
@@ -656,7 +661,8 @@ void GBufferEffect::create_pack_pipeline() {
 
   WGPUShaderModuleDescriptor shader_desc = {};
   shader_desc.nextInChain = &wgsl_src.chain;
-  WGPUShaderModule shader = wgpuDeviceCreateShaderModule(ctx_.device, &shader_desc);
+  WGPUShaderModule shader =
+      wgpuDeviceCreateShaderModule(ctx_.device, &shader_desc);
 
   // Build explicit bind group layout for bindings 0-9.
   WGPUBindGroupLayoutEntry bgl_entries[10] = {};
@@ -725,7 +731,8 @@ void GBufferEffect::create_pack_pipeline() {
   WGPUBindGroupLayoutDescriptor bgl_desc = {};
   bgl_desc.entryCount = 10;
   bgl_desc.entries = bgl_entries;
-  WGPUBindGroupLayout bgl = wgpuDeviceCreateBindGroupLayout(ctx_.device, &bgl_desc);
+  WGPUBindGroupLayout bgl =
+      wgpuDeviceCreateBindGroupLayout(ctx_.device, &bgl_desc);
 
   WGPUPipelineLayoutDescriptor pl_desc = {};
   pl_desc.bindGroupLayoutCount = 1;
@@ -755,12 +762,12 @@ void GBufferEffect::update_raster_bind_group(NodeRegistry& nodes) {
 
   WGPUBindGroupEntry entries[2] = {};
   entries[0].binding = 0;
-  entries[0].buffer  = global_uniforms_buf_.buffer;
-  entries[0].size    = sizeof(GBufGlobalUniforms);
+  entries[0].buffer = global_uniforms_buf_.buffer;
+  entries[0].size = sizeof(GBufGlobalUniforms);
 
   entries[1].binding = 1;
-  entries[1].buffer  = objects_buf_.buffer;
-  entries[1].size    = (size_t)objects_buf_capacity_ * sizeof(GBufObjectData);
+  entries[1].buffer = objects_buf_.buffer;
+  entries[1].size = (size_t)objects_buf_capacity_ * sizeof(GBufObjectData);
 
   WGPUBindGroupLayout bgl =
       wgpuRenderPipelineGetBindGroupLayout(raster_pipeline_.get(), 0);
@@ -777,20 +784,23 @@ void GBufferEffect::update_raster_bind_group(NodeRegistry& nodes) {
 void GBufferEffect::wire_dag(const std::vector<EffectDAGNode>& dag) {
   const std::string out = find_downstream_output(dag);
   // "sink" is an external view (no owned texture) — not a valid copy source.
-  if (out != "sink") cnn_output_node_ = out;
+  if (out != "sink")
+    cnn_output_node_ = out;
 }
 
-void GBufferEffect::post_render(WGPUCommandEncoder encoder, NodeRegistry& nodes) {
-  if (cnn_output_node_.empty() || !nodes.has_node(cnn_output_node_)) return;
+void GBufferEffect::post_render(WGPUCommandEncoder encoder,
+                                NodeRegistry& nodes) {
+  if (cnn_output_node_.empty() || !nodes.has_node(cnn_output_node_))
+    return;
   WGPUTexture src_tex = nodes.get_texture(cnn_output_node_);
-  if (!src_tex) return;  // external view (e.g. sink) — no owned texture to copy
+  if (!src_tex)
+    return; // external view (e.g. sink) — no owned texture to copy
   WGPUTexelCopyTextureInfo src = {};
-  src.texture  = src_tex;
+  src.texture = src_tex;
   src.mipLevel = 0;
   WGPUTexelCopyTextureInfo dst = {};
-  dst.texture  = nodes.get_texture(node_prev_tex_);
+  dst.texture = nodes.get_texture(node_prev_tex_);
   dst.mipLevel = 0;
   WGPUExtent3D extent = {(uint32_t)width_, (uint32_t)height_, 1};
   wgpuCommandEncoderCopyTextureToTexture(encoder, &src, &dst, &extent);
 }
-
diff --git a/cnn_v3/src/gbuffer_effect.h b/cnn_v3/src/gbuffer_effect.h
index 76d4347..260444a 100644
--- a/cnn_v3/src/gbuffer_effect.h
+++ b/cnn_v3/src/gbuffer_effect.h
@@ -1,5 +1,6 @@
 // GBufferEffect: Multi-pass G-buffer rendering for CNN v3 input
-// Outputs: gbuf_feat0, gbuf_feat1 (packed rgba32uint feature textures, 32 bytes/pixel)
+// Outputs: gbuf_feat0, gbuf_feat1 (packed rgba32uint feature textures, 32
+// bytes/pixel)
 
 #pragma once
 
@@ -21,16 +22,17 @@ struct GBufResUniforms {
 static_assert(sizeof(GBufResUniforms) == 16,
               "GBufResUniforms must be 16 bytes");
 
-// Single directional light: direction points *toward* the light source (world space).
+// Single directional light: direction points *toward* the light source (world
+// space).
 struct GBufLight {
-  vec4 direction;  // xyz = normalized direction toward light, w = unused
-  vec4 color;      // rgb = color, a = intensity
+  vec4 direction; // xyz = normalized direction toward light, w = unused
+  vec4 color;     // rgb = color, a = intensity
 };
 static_assert(sizeof(GBufLight) == 32, "GBufLight must be 32 bytes");
 
 struct GBufLightsUniforms {
   GBufLight lights[2];
-  vec4      params;  // x = num_lights
+  vec4 params; // x = num_lights
 };
 static_assert(sizeof(GBufLightsUniforms) == 80,
               "GBufLightsUniforms must be 80 bytes");
@@ -60,12 +62,12 @@ class GBufferEffect : public Effect {
  private:
   // Per-cube animation state (axis-angle rotation)
   struct CubeAnim {
-    vec3  axis;
-    float speed;  // radians/second, may be negative
+    vec3 axis;
+    float speed; // radians/second, may be negative
   };
   // Per-sphere animation state (radius driven by audio_intensity)
   struct SphereAnim {
-    int   obj_idx;     // index into scene_.objects
+    int obj_idx; // index into scene_.objects
     float base_radius;
   };
 
@@ -75,41 +77,42 @@ class GBufferEffect : public Effect {
   std::string node_depth_;
   std::string node_shadow_;
   std::string node_transp_;
-  std::string node_prev_tex_;    // persistent prev-frame CNN output (rgba8unorm)
+  std::string node_prev_tex_; // persistent prev-frame CNN output (rgba8unorm)
 
   // Name of the CNN effect's output node; set by caller before first render.
   // When non-empty, the CNN output is copied into node_prev_tex_ each frame.
   std::string cnn_output_node_;
 
  public:
-  void set_cnn_output_node(const std::string& name) { cnn_output_node_ = name; }
+  void set_cnn_output_node(const std::string& name) {
+    cnn_output_node_ = name;
+  }
 
  private:
-
   // Owned scene and camera — populated by set_scene()
-  Scene  scene_;
+  Scene scene_;
   Camera camera_;
-  bool   scene_ready_ = false;
+  bool scene_ready_ = false;
 
-  std::vector<CubeAnim>  cube_anims_;
+  std::vector<CubeAnim> cube_anims_;
   std::vector<SphereAnim> sphere_anims_;
 
   // Pass 1: MRT rasterization pipeline
   RenderPipeline raster_pipeline_;
-  BindGroup      raster_bind_group_;
+  BindGroup raster_bind_group_;
 
   // Pass 2: SDF shadow pipeline
   RenderPipeline shadow_pipeline_;
 
   // Pass 4: Pack compute pipeline
-  ComputePipeline  pack_pipeline_;
-  UniformBuffer<GBufResUniforms>    pack_res_uniform_;
+  ComputePipeline pack_pipeline_;
+  UniformBuffer<GBufResUniforms> pack_res_uniform_;
   UniformBuffer<GBufLightsUniforms> lights_uniform_;
 
   // GPU-side object data buffers (global uniforms + objects storage)
   GpuBuffer global_uniforms_buf_;
   GpuBuffer objects_buf_;
-  int       objects_buf_capacity_ = 0;
+  int objects_buf_capacity_ = 0;
 
   void create_raster_pipeline();
   void create_shadow_pipeline();
@@ -117,7 +120,6 @@ class GBufferEffect : public Effect {
 
   void update_raster_bind_group(NodeRegistry& nodes);
 
-  void upload_scene_data(const Scene& scene, const Camera& camera,
-                         float time);
+  void upload_scene_data(const Scene& scene, const Camera& camera, float time);
   void ensure_objects_buffer(int num_objects);
 };