1 files changed, 586 insertions, 0 deletions
diff --git a/src/gpu/texture_manager.cc b/src/gpu/texture_manager.cc
index 0c30c94..dfa6315 100644
--- a/src/gpu/texture_manager.cc
+++ b/src/gpu/texture_manager.cc
@@ -2,7 +2,10 @@
 // It implements the TextureManager.
 
 #include "gpu/texture_manager.h"
+#include "gpu/effects/shader_composer.h"
+#include "platform/platform.h"
 #include <cstdio>
+#include <cstring>
 #include <vector>
 
 #if defined(DEMO_CROSS_COMPILE_WIN32)
@@ -26,6 +29,22 @@ void TextureManager::shutdown() {
     wgpuTextureRelease(pair.second.texture);
   }
   textures_.clear();
+
+  for (auto& pair : compute_pipelines_) {
+    if (pair.second.pipeline) {
+      wgpuComputePipelineRelease(pair.second.pipeline);
+    }
+  }
+  compute_pipelines_.clear();
+
+#if !defined(STRIP_GPU_COMPOSITE)
+  for (auto& pair : samplers_) {
+    if (pair.second) {
+      wgpuSamplerRelease(pair.second);
+    }
+  }
+  samplers_.clear();
+#endif
 }
 
 void TextureManager::create_procedural_texture(
@@ -112,3 +131,570 @@ WGPUTextureView TextureManager::get_texture_view(const std::string& name) {
   }
   return nullptr;
 }
+
+WGPUComputePipeline TextureManager::get_or_create_compute_pipeline(
+    const std::string& func_name, const char* shader_code,
+    size_t uniform_size, int num_input_textures) {
+  auto it = compute_pipelines_.find(func_name);
+  if (it != compute_pipelines_.end()) {
+    return it->second.pipeline;
+  }
+
+  // Create new pipeline
+  ShaderComposer& composer = ShaderComposer::Get();
+  std::string resolved_shader = composer.Compose({}, shader_code);
+
+  WGPUShaderSourceWGSL wgsl_src = {};
+  wgsl_src.chain.sType = WGPUSType_ShaderSourceWGSL;
+  wgsl_src.code = str_view(resolved_shader.c_str());
+  WGPUShaderModuleDescriptor shader_desc = {};
+  shader_desc.nextInChain = &wgsl_src.chain;
+  WGPUShaderModule shader_module =
+      wgpuDeviceCreateShaderModule(device_, &shader_desc);
+
+  // Dynamic bind group layout
+  // Binding 0: output storage texture
+  // Binding 1: uniform buffer
+  // Binding 2 to (2 + num_input_textures - 1): input textures
+  // Binding (2 + num_input_textures): sampler (if inputs > 0)
+  const int max_entries = 2 + num_input_textures + (num_input_textures > 0 ? 1 : 0);
+  std::vector<WGPUBindGroupLayoutEntry> bgl_entries(max_entries);
+
+  // Binding 0: Output storage texture
+  bgl_entries[0].binding = 0;
+  bgl_entries[0].visibility = WGPUShaderStage_Compute;
+  bgl_entries[0].storageTexture.access = WGPUStorageTextureAccess_WriteOnly;
+  bgl_entries[0].storageTexture.format = WGPUTextureFormat_RGBA8Unorm;
+  bgl_entries[0].storageTexture.viewDimension = WGPUTextureViewDimension_2D;
+
+  // Binding 1: Uniform buffer
+  bgl_entries[1].binding = 1;
+  bgl_entries[1].visibility = WGPUShaderStage_Compute;
+  bgl_entries[1].buffer.type = WGPUBufferBindingType_Uniform;
+  bgl_entries[1].buffer.minBindingSize = uniform_size;
+
+  // Binding 2+: Input textures
+  for (int i = 0; i < num_input_textures; ++i) {
+    bgl_entries[2 + i].binding = 2 + i;
+    bgl_entries[2 + i].visibility = WGPUShaderStage_Compute;
+    bgl_entries[2 + i].texture.sampleType = WGPUTextureSampleType_Float;
+    bgl_entries[2 + i].texture.viewDimension = WGPUTextureViewDimension_2D;
+  }
+
+  // Binding N: Sampler (if inputs exist)
+  if (num_input_textures > 0) {
+    bgl_entries[2 + num_input_textures].binding = 2 + num_input_textures;
+    bgl_entries[2 + num_input_textures].visibility = WGPUShaderStage_Compute;
+    bgl_entries[2 + num_input_textures].sampler.type = WGPUSamplerBindingType_Filtering;
+  }
+
+  WGPUBindGroupLayoutDescriptor bgl_desc = {};
+  bgl_desc.entryCount = max_entries;
+  bgl_desc.entries = bgl_entries.data();
+  WGPUBindGroupLayout bind_group_layout =
+      wgpuDeviceCreateBindGroupLayout(device_, &bgl_desc);
+
+  WGPUPipelineLayoutDescriptor pl_desc = {};
+  pl_desc.bindGroupLayoutCount = 1;
+  pl_desc.bindGroupLayouts = &bind_group_layout;
+  WGPUPipelineLayout pipeline_layout =
+      wgpuDeviceCreatePipelineLayout(device_, &pl_desc);
+
+  WGPUComputePipelineDescriptor pipeline_desc = {};
+  pipeline_desc.layout = pipeline_layout;
+  pipeline_desc.compute.module = shader_module;
+  pipeline_desc.compute.entryPoint = str_view("main");
+
+  WGPUComputePipeline pipeline =
+      wgpuDeviceCreateComputePipeline(device_, &pipeline_desc);
+
+  wgpuPipelineLayoutRelease(pipeline_layout);
+  wgpuBindGroupLayoutRelease(bind_group_layout);
+  wgpuShaderModuleRelease(shader_module);
+
+  // Cache pipeline
+  ComputePipelineInfo info = {pipeline, shader_code, uniform_size, num_input_textures};
+  compute_pipelines_[func_name] = info;
+
+  return pipeline;
+}
+
+void TextureManager::dispatch_compute(const std::string& func_name,
+                                      WGPUTexture target,
+                                      const GpuProceduralParams& params,
+                                      const void* uniform_data,
+                                      size_t uniform_size) {
+  auto it = compute_pipelines_.find(func_name);
+  if (it == compute_pipelines_.end()) {
+    return; // Pipeline not created yet
+  }
+
+  WGPUComputePipeline pipeline = it->second.pipeline;
+
+  // Create uniform buffer
+  WGPUBufferDescriptor buf_desc = {};
+  buf_desc.size = uniform_size;
+  buf_desc.usage = WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst;
+  buf_desc.mappedAtCreation = WGPUOptionalBool_True;
+  WGPUBuffer uniform_buf = wgpuDeviceCreateBuffer(device_, &buf_desc);
+  void* mapped = wgpuBufferGetMappedRange(uniform_buf, 0, uniform_size);
+  memcpy(mapped, uniform_data, uniform_size);
+  wgpuBufferUnmap(uniform_buf);
+
+  // Create storage texture view
+  WGPUTextureViewDescriptor view_desc = {};
+  view_desc.format = WGPUTextureFormat_RGBA8Unorm;
+  view_desc.dimension = WGPUTextureViewDimension_2D;
+  view_desc.mipLevelCount = 1;
+  view_desc.arrayLayerCount = 1;
+  WGPUTextureView target_view = wgpuTextureCreateView(target, &view_desc);
+
+  // Create bind group layout entries (must match pipeline)
+  WGPUBindGroupLayoutEntry bgl_entries[2] = {};
+  bgl_entries[0].binding = 0;
+  bgl_entries[0].visibility = WGPUShaderStage_Compute;
+  bgl_entries[0].storageTexture.access = WGPUStorageTextureAccess_WriteOnly;
+  bgl_entries[0].storageTexture.format = WGPUTextureFormat_RGBA8Unorm;
+  bgl_entries[0].storageTexture.viewDimension = WGPUTextureViewDimension_2D;
+  bgl_entries[1].binding = 1;
+  bgl_entries[1].visibility = WGPUShaderStage_Compute;
+  bgl_entries[1].buffer.type = WGPUBufferBindingType_Uniform;
+  bgl_entries[1].buffer.minBindingSize = uniform_size;
+
+  WGPUBindGroupLayoutDescriptor bgl_desc = {};
+  bgl_desc.entryCount = 2;
+  bgl_desc.entries = bgl_entries;
+  WGPUBindGroupLayout bind_group_layout =
+      wgpuDeviceCreateBindGroupLayout(device_, &bgl_desc);
+
+  // Create bind group
+  WGPUBindGroupEntry bg_entries[2] = {};
+  bg_entries[0].binding = 0;
+  bg_entries[0].textureView = target_view;
+  bg_entries[1].binding = 1;
+  bg_entries[1].buffer = uniform_buf;
+  bg_entries[1].size = uniform_size;
+
+  WGPUBindGroupDescriptor bg_desc = {};
+  bg_desc.layout = bind_group_layout;
+  bg_desc.entryCount = 2;
+  bg_desc.entries = bg_entries;
+  WGPUBindGroup bind_group = wgpuDeviceCreateBindGroup(device_, &bg_desc);
+
+  // Dispatch compute
+  WGPUCommandEncoderDescriptor enc_desc = {};
+  WGPUCommandEncoder encoder =
+      wgpuDeviceCreateCommandEncoder(device_, &enc_desc);
+  WGPUComputePassEncoder pass =
+      wgpuCommandEncoderBeginComputePass(encoder, nullptr);
+  wgpuComputePassEncoderSetPipeline(pass, pipeline);
+  wgpuComputePassEncoderSetBindGroup(pass, 0, bind_group, 0, nullptr);
+  wgpuComputePassEncoderDispatchWorkgroups(pass, (params.width + 7) / 8,
+                                           (params.height + 7) / 8, 1);
+  wgpuComputePassEncoderEnd(pass);
+
+  WGPUCommandBufferDescriptor cmd_desc = {};
+  WGPUCommandBuffer cmd = wgpuCommandEncoderFinish(encoder, &cmd_desc);
+  wgpuQueueSubmit(queue_, 1, &cmd);
+
+  // Cleanup
+  wgpuCommandBufferRelease(cmd);
+  wgpuCommandEncoderRelease(encoder);
+  wgpuComputePassEncoderRelease(pass);
+  wgpuBindGroupRelease(bind_group);
+  wgpuBindGroupLayoutRelease(bind_group_layout);
+  wgpuBufferRelease(uniform_buf);
+  wgpuTextureViewRelease(target_view);
+}
+
+void TextureManager::create_gpu_noise_texture(
+    const std::string& name, const GpuProceduralParams& params) {
+  extern const char* gen_noise_compute_wgsl;
+  get_or_create_compute_pipeline("gen_noise", gen_noise_compute_wgsl, 16);
+
+  WGPUTextureDescriptor tex_desc = {};
+  tex_desc.usage =
+      WGPUTextureUsage_StorageBinding | WGPUTextureUsage_TextureBinding;
+  tex_desc.dimension = WGPUTextureDimension_2D;
+  tex_desc.size = {(uint32_t)params.width, (uint32_t)params.height, 1};
+  tex_desc.format = WGPUTextureFormat_RGBA8Unorm;
+  tex_desc.mipLevelCount = 1;
+  tex_desc.sampleCount = 1;
+  WGPUTexture texture = wgpuDeviceCreateTexture(device_, &tex_desc);
+
+  struct NoiseParams {
+    uint32_t width;
+    uint32_t height;
+    float seed;
+    float frequency;
+  };
+  NoiseParams uniforms = {(uint32_t)params.width, (uint32_t)params.height,
+                          params.params[0], params.params[1]};
+  dispatch_compute("gen_noise", texture, params, &uniforms, sizeof(NoiseParams));
+
+  WGPUTextureViewDescriptor view_desc = {};
+  view_desc.format = WGPUTextureFormat_RGBA8Unorm;
+  view_desc.dimension = WGPUTextureViewDimension_2D;
+  view_desc.mipLevelCount = 1;
+  view_desc.arrayLayerCount = 1;
+  WGPUTextureView view = wgpuTextureCreateView(texture, &view_desc);
+
+  GpuTexture gpu_tex;
+  gpu_tex.texture = texture;
+  gpu_tex.view = view;
+  gpu_tex.width = params.width;
+  gpu_tex.height = params.height;
+  textures_[name] = gpu_tex;
+
+#if !defined(STRIP_ALL)
+  printf("Generated GPU noise texture: %s (%dx%d)\n", name.c_str(),
+         params.width, params.height);
+#endif
+}
+
+void TextureManager::create_gpu_perlin_texture(
+    const std::string& name, const GpuProceduralParams& params) {
+  extern const char* gen_perlin_compute_wgsl;
+  get_or_create_compute_pipeline("gen_perlin", gen_perlin_compute_wgsl, 32);
+
+  WGPUTextureDescriptor tex_desc = {};
+  tex_desc.usage =
+      WGPUTextureUsage_StorageBinding | WGPUTextureUsage_TextureBinding;
+  tex_desc.dimension = WGPUTextureDimension_2D;
+  tex_desc.size = {(uint32_t)params.width, (uint32_t)params.height, 1};
+  tex_desc.format = WGPUTextureFormat_RGBA8Unorm;
+  tex_desc.mipLevelCount = 1;
+  tex_desc.sampleCount = 1;
+  WGPUTexture texture = wgpuDeviceCreateTexture(device_, &tex_desc);
+
+  struct PerlinParams {
+    uint32_t width;
+    uint32_t height;
+    float seed;
+    float frequency;
+    float amplitude;
+    float amplitude_decay;
+    uint32_t octaves;
+    float _pad0;
+  };
+  PerlinParams uniforms = {
+      (uint32_t)params.width,
+      (uint32_t)params.height,
+      params.params[0],
+      params.params[1],
+      params.num_params > 2 ? params.params[2] : 1.0f,
+      params.num_params > 3 ? params.params[3] : 0.5f,
+      params.num_params > 4 ? (uint32_t)params.params[4] : 4u,
+      0.0f};
+  dispatch_compute("gen_perlin", texture, params, &uniforms,
+                   sizeof(PerlinParams));
+
+  WGPUTextureViewDescriptor view_desc = {};
+  view_desc.format = WGPUTextureFormat_RGBA8Unorm;
+  view_desc.dimension = WGPUTextureViewDimension_2D;
+  view_desc.mipLevelCount = 1;
+  view_desc.arrayLayerCount = 1;
+  WGPUTextureView view = wgpuTextureCreateView(texture, &view_desc);
+
+  GpuTexture gpu_tex;
+  gpu_tex.texture = texture;
+  gpu_tex.view = view;
+  gpu_tex.width = params.width;
+  gpu_tex.height = params.height;
+  textures_[name] = gpu_tex;
+
+#if !defined(STRIP_ALL)
+  printf("Generated GPU perlin texture: %s (%dx%d)\n", name.c_str(),
+         params.width, params.height);
+#endif
+}
+
+void TextureManager::create_gpu_grid_texture(
+    const std::string& name, const GpuProceduralParams& params) {
+  extern const char* gen_grid_compute_wgsl;
+  get_or_create_compute_pipeline("gen_grid", gen_grid_compute_wgsl, 16);
+
+  WGPUTextureDescriptor tex_desc = {};
+  tex_desc.usage =
+      WGPUTextureUsage_StorageBinding | WGPUTextureUsage_TextureBinding;
+  tex_desc.dimension = WGPUTextureDimension_2D;
+  tex_desc.size = {(uint32_t)params.width, (uint32_t)params.height, 1};
+  tex_desc.format = WGPUTextureFormat_RGBA8Unorm;
+  tex_desc.mipLevelCount = 1;
+  tex_desc.sampleCount = 1;
+  WGPUTexture texture = wgpuDeviceCreateTexture(device_, &tex_desc);
+
+  struct GridParams {
+    uint32_t width;
+    uint32_t height;
+    uint32_t grid_size;
+    uint32_t thickness;
+  };
+  GridParams uniforms = {
+      (uint32_t)params.width, (uint32_t)params.height,
+      params.num_params > 0 ? (uint32_t)params.params[0] : 32u,
+      params.num_params > 1 ? (uint32_t)params.params[1] : 2u};
+  dispatch_compute("gen_grid", texture, params, &uniforms, sizeof(GridParams));
+
+  WGPUTextureViewDescriptor view_desc = {};
+  view_desc.format = WGPUTextureFormat_RGBA8Unorm;
+  view_desc.dimension = WGPUTextureViewDimension_2D;
+  view_desc.mipLevelCount = 1;
+  view_desc.arrayLayerCount = 1;
+  WGPUTextureView view = wgpuTextureCreateView(texture, &view_desc);
+
+  GpuTexture gpu_tex;
+  gpu_tex.texture = texture;
+  gpu_tex.view = view;
+  gpu_tex.width = params.width;
+  gpu_tex.height = params.height;
+  textures_[name] = gpu_tex;
+
+#if !defined(STRIP_ALL)
+  printf("Generated GPU grid texture: %s (%dx%d)\n", name.c_str(),
+         params.width, params.height);
+#endif
+}
+
+#if !defined(STRIP_GPU_COMPOSITE)
+WGPUSampler TextureManager::get_or_create_sampler(SamplerType type) {
+  auto it = samplers_.find(type);
+  if (it != samplers_.end()) {
+    return it->second;
+  }
+
+  WGPUSamplerDescriptor desc = {};
+  desc.lodMinClamp = 0.0f;
+  desc.lodMaxClamp = 1.0f;
+  desc.maxAnisotropy = 1;
+
+  switch (type) {
+  case SamplerType::LinearClamp:
+    desc.addressModeU = WGPUAddressMode_ClampToEdge;
+    desc.addressModeV = WGPUAddressMode_ClampToEdge;
+    desc.magFilter = WGPUFilterMode_Linear;
+    desc.minFilter = WGPUFilterMode_Linear;
+    desc.mipmapFilter = WGPUMipmapFilterMode_Linear;
+    break;
+  case SamplerType::LinearRepeat:
+    desc.addressModeU = WGPUAddressMode_Repeat;
+    desc.addressModeV = WGPUAddressMode_Repeat;
+    desc.magFilter = WGPUFilterMode_Linear;
+    desc.minFilter = WGPUFilterMode_Linear;
+    desc.mipmapFilter = WGPUMipmapFilterMode_Linear;
+    break;
+  case SamplerType::NearestClamp:
+    desc.addressModeU = WGPUAddressMode_ClampToEdge;
+    desc.addressModeV = WGPUAddressMode_ClampToEdge;
+    desc.magFilter = WGPUFilterMode_Nearest;
+    desc.minFilter = WGPUFilterMode_Nearest;
+    desc.mipmapFilter = WGPUMipmapFilterMode_Nearest;
+    break;
+  case SamplerType::NearestRepeat:
+    desc.addressModeU = WGPUAddressMode_Repeat;
+    desc.addressModeV = WGPUAddressMode_Repeat;
+    desc.magFilter = WGPUFilterMode_Nearest;
+    desc.minFilter = WGPUFilterMode_Nearest;
+    desc.mipmapFilter = WGPUMipmapFilterMode_Nearest;
+    break;
+  }
+
+  WGPUSampler sampler = wgpuDeviceCreateSampler(device_, &desc);
+  samplers_[type] = sampler;
+  return sampler;
+}
+
+void TextureManager::dispatch_composite(
+    const std::string& func_name, WGPUTexture target,
+    const GpuProceduralParams& params, const void* uniform_data,
+    size_t uniform_size, const std::vector<WGPUTextureView>& input_views,
+    SamplerType sampler_type) {
+  auto it = compute_pipelines_.find(func_name);
+  if (it == compute_pipelines_.end()) {
+    return; // Pipeline not created yet
+  }
+
+  WGPUComputePipeline pipeline = it->second.pipeline;
+  int num_inputs = (int)input_views.size();
+
+  // Create uniform buffer
+  WGPUBufferDescriptor buf_desc = {};
+  buf_desc.size = uniform_size;
+  buf_desc.usage = WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst;
+  buf_desc.mappedAtCreation = WGPUOptionalBool_True;
+  WGPUBuffer uniform_buf = wgpuDeviceCreateBuffer(device_, &buf_desc);
+  void* mapped = wgpuBufferGetMappedRange(uniform_buf, 0, uniform_size);
+  memcpy(mapped, uniform_data, uniform_size);
+  wgpuBufferUnmap(uniform_buf);
+
+  // Create storage texture view
+  WGPUTextureViewDescriptor view_desc = {};
+  view_desc.format = WGPUTextureFormat_RGBA8Unorm;
+  view_desc.dimension = WGPUTextureViewDimension_2D;
+  view_desc.mipLevelCount = 1;
+  view_desc.arrayLayerCount = 1;
+  WGPUTextureView target_view = wgpuTextureCreateView(target, &view_desc);
+
+  // Dynamic bind group
+  const int max_entries = 2 + num_inputs + (num_inputs > 0 ? 1 : 0);
+  std::vector<WGPUBindGroupEntry> bg_entries(max_entries);
+
+  // Binding 0: Output texture
+  bg_entries[0].binding = 0;
+  bg_entries[0].textureView = target_view;
+
+  // Binding 1: Uniform buffer
+  bg_entries[1].binding = 1;
+  bg_entries[1].buffer = uniform_buf;
+  bg_entries[1].size = uniform_size;
+
+  // Binding 2+: Input textures
+  for (int i = 0; i < num_inputs; ++i) {
+    bg_entries[2 + i].binding = 2 + i;
+    bg_entries[2 + i].textureView = input_views[i];
+  }
+
+  // Binding N: Sampler
+  if (num_inputs > 0) {
+    bg_entries[2 + num_inputs].binding = 2 + num_inputs;
+    bg_entries[2 + num_inputs].sampler = get_or_create_sampler(sampler_type);
+  }
+
+  // Create bind group layout (must match pipeline)
+  const int layout_entries_count = 2 + num_inputs + (num_inputs > 0 ? 1 : 0);
+  std::vector<WGPUBindGroupLayoutEntry> bgl_entries(layout_entries_count);
+
+  bgl_entries[0].binding = 0;
+  bgl_entries[0].visibility = WGPUShaderStage_Compute;
+  bgl_entries[0].storageTexture.access = WGPUStorageTextureAccess_WriteOnly;
+  bgl_entries[0].storageTexture.format = WGPUTextureFormat_RGBA8Unorm;
+  bgl_entries[0].storageTexture.viewDimension = WGPUTextureViewDimension_2D;
+
+  bgl_entries[1].binding = 1;
+  bgl_entries[1].visibility = WGPUShaderStage_Compute;
+  bgl_entries[1].buffer.type = WGPUBufferBindingType_Uniform;
+  bgl_entries[1].buffer.minBindingSize = uniform_size;
+
+  for (int i = 0; i < num_inputs; ++i) {
+    bgl_entries[2 + i].binding = 2 + i;
+    bgl_entries[2 + i].visibility = WGPUShaderStage_Compute;
+    bgl_entries[2 + i].texture.sampleType = WGPUTextureSampleType_Float;
+    bgl_entries[2 + i].texture.viewDimension = WGPUTextureViewDimension_2D;
+  }
+
+  if (num_inputs > 0) {
+    bgl_entries[2 + num_inputs].binding = 2 + num_inputs;
+    bgl_entries[2 + num_inputs].visibility = WGPUShaderStage_Compute;
+    bgl_entries[2 + num_inputs].sampler.type = WGPUSamplerBindingType_Filtering;
+  }
+
+  WGPUBindGroupLayoutDescriptor bgl_desc = {};
+  bgl_desc.entryCount = layout_entries_count;
+  bgl_desc.entries = bgl_entries.data();
+  WGPUBindGroupLayout bind_group_layout =
+      wgpuDeviceCreateBindGroupLayout(device_, &bgl_desc);
+
+  WGPUBindGroupDescriptor bg_desc = {};
+  bg_desc.layout = bind_group_layout;
+  bg_desc.entryCount = max_entries;
+  bg_desc.entries = bg_entries.data();
+  WGPUBindGroup bind_group = wgpuDeviceCreateBindGroup(device_, &bg_desc);
+
+  // Dispatch compute
+  WGPUCommandEncoderDescriptor enc_desc = {};
+  WGPUCommandEncoder encoder =
+      wgpuDeviceCreateCommandEncoder(device_, &enc_desc);
+  WGPUComputePassEncoder pass =
+      wgpuCommandEncoderBeginComputePass(encoder, nullptr);
+  wgpuComputePassEncoderSetPipeline(pass, pipeline);
+  wgpuComputePassEncoderSetBindGroup(pass, 0, bind_group, 0, nullptr);
+  wgpuComputePassEncoderDispatchWorkgroups(pass, (params.width + 7) / 8,
+                                           (params.height + 7) / 8, 1);
+  wgpuComputePassEncoderEnd(pass);
+
+  WGPUCommandBufferDescriptor cmd_desc = {};
+  WGPUCommandBuffer cmd = wgpuCommandEncoderFinish(encoder, &cmd_desc);
+  wgpuQueueSubmit(queue_, 1, &cmd);
+
+  // Cleanup
+  wgpuCommandBufferRelease(cmd);
+  wgpuCommandEncoderRelease(encoder);
+  wgpuComputePassEncoderRelease(pass);
+  wgpuBindGroupRelease(bind_group);
+  wgpuBindGroupLayoutRelease(bind_group_layout);
+  wgpuBufferRelease(uniform_buf);
+  wgpuTextureViewRelease(target_view);
+}
+
+void TextureManager::create_gpu_composite_texture(
+    const std::string& name, const std::string& shader_func,
+    const char* shader_code, const void* uniform_data, size_t uniform_size,
+    int width, int height, const std::vector<std::string>& input_names,
+    SamplerType sampler) {
+  // Create pipeline if needed
+  get_or_create_compute_pipeline(shader_func, shader_code, uniform_size,
+                                 (int)input_names.size());
+
+  // Resolve input texture views
+  std::vector<WGPUTextureView> input_views;
+  input_views.reserve(input_names.size());
+  for (const auto& input_name : input_names) {
+    WGPUTextureView view = get_texture_view(input_name);
+    if (!view) {
+      fprintf(stderr, "Error: Input texture not found: %s\n",
+              input_name.c_str());
+      return;
+    }
+    input_views.push_back(view);
+  }
+
+  // Create output texture
+  WGPUTextureDescriptor tex_desc = {};
+  tex_desc.usage =
+      WGPUTextureUsage_StorageBinding | WGPUTextureUsage_TextureBinding;
+  tex_desc.dimension = WGPUTextureDimension_2D;
+  tex_desc.size = {(uint32_t)width, (uint32_t)height, 1};
+  tex_desc.format = WGPUTextureFormat_RGBA8Unorm;
+  tex_desc.mipLevelCount = 1;
+  tex_desc.sampleCount = 1;
+  WGPUTexture texture = wgpuDeviceCreateTexture(device_, &tex_desc);
+
+  // Dispatch composite shader
+  GpuProceduralParams params = {width, height, nullptr, 0};
+  dispatch_composite(shader_func, texture, params, uniform_data, uniform_size,
+                    input_views, sampler);
+
+  // Create view
+  WGPUTextureViewDescriptor view_desc = {};
+  view_desc.format = WGPUTextureFormat_RGBA8Unorm;
+  view_desc.dimension = WGPUTextureViewDimension_2D;
+  view_desc.mipLevelCount = 1;
+  view_desc.arrayLayerCount = 1;
+  WGPUTextureView view = wgpuTextureCreateView(texture, &view_desc);
+
+  // Store
+  GpuTexture gpu_tex;
+  gpu_tex.texture = texture;
+  gpu_tex.view = view;
+  gpu_tex.width = width;
+  gpu_tex.height = height;
+  textures_[name] = gpu_tex;
+
+#if !defined(STRIP_ALL)
+  printf("Generated GPU composite texture: %s (%dx%d, %zu inputs)\n",
+         name.c_str(), width, height, input_names.size());
+#endif
+}
+#endif  // !defined(STRIP_GPU_COMPOSITE)
+
+#if !defined(STRIP_ALL)
+WGPUTextureView TextureManager::get_or_generate_gpu_texture(
+    const std::string& name, const GpuProceduralParams& params) {
+  auto it = textures_.find(name);
+  if (it != textures_.end()) {
+    return it->second.view;
+  }
+  create_gpu_noise_texture(name, params);
+  return textures_[name].view;
+}
+#endif