diff options
| -rw-r--r-- | cmake/DemoSourceLists.cmake | 1 | ||||
| -rw-r--r-- | cnn_v3/src/gbuffer_effect.cc | 2 | ||||
| -rw-r--r-- | src/3d/object.h | 5 | ||||
| -rw-r--r-- | src/3d/physics.cc | 2 | ||||
| -rw-r--r-- | src/3d/renderer.cc | 2 | ||||
| -rw-r--r-- | src/3d/renderer.h | 1 | ||||
| -rw-r--r-- | src/3d/renderer_draw.cc | 20 | ||||
| -rw-r--r-- | src/audio/fft.cc | 30 | ||||
| -rw-r--r-- | src/effects/scene2_effect.cc | 34 | ||||
| -rw-r--r-- | src/effects/scene2_effect.h | 19 | ||||
| -rw-r--r-- | src/gpu/texture_manager.cc | 154 | ||||
| -rw-r--r-- | src/gpu/texture_manager.h | 7 | ||||
| -rw-r--r-- | src/gpu/wgsl_effect.cc | 8 | ||||
| -rw-r--r-- | src/gpu/wgsl_effect.h | 5 |
14 files changed, 100 insertions, 190 deletions
diff --git a/cmake/DemoSourceLists.cmake b/cmake/DemoSourceLists.cmake index ef297f8..e80c46a 100644 --- a/cmake/DemoSourceLists.cmake +++ b/cmake/DemoSourceLists.cmake @@ -39,7 +39,6 @@ set(COMMON_GPU_EFFECTS src/effects/hybrid3_d_effect.cc src/effects/peak_meter_effect.cc src/effects/scene1_effect.cc - src/effects/scene2_effect.cc cnn_v3/src/gbuffer_effect.cc cnn_v3/src/cnn_v3_effect.cc cnn_v3/src/gbuf_view_effect.cc diff --git a/cnn_v3/src/gbuffer_effect.cc b/cnn_v3/src/gbuffer_effect.cc index 6815154..82ad8b1 100644 --- a/cnn_v3/src/gbuffer_effect.cc +++ b/cnn_v3/src/gbuffer_effect.cc @@ -118,7 +118,7 @@ void GBufferEffect::set_scene() { // 2 large cubes. // 2 large static cubes for shadow debugging. { - Object3D obj(ObjectType::CUBE); + Object3D obj(ObjectType::BOX); obj.position = vec3(1.0f, 0.0f, 0.0f); obj.scale = vec3(0.6f, 0.6f, 0.6f); obj.color = vec4(0.9f, 0.5f, 0.3f, 1.0f); diff --git a/src/3d/object.h b/src/3d/object.h index a8eb98c..e2cd15a 100644 --- a/src/3d/object.h +++ b/src/3d/object.h @@ -9,7 +9,7 @@ #include <memory> // For std::shared_ptr enum class ObjectType { - CUBE, + CUBE, // Legacy alias for BOX (value 0, kept for binary scene compat) SPHERE, PLANE, TORUS, @@ -17,6 +17,7 @@ enum class ObjectType { SKYBOX, MESH // Add more SDF types here + // NOTE: CUBE == BOX semantically. Use BOX for new code. }; struct BoundingVolume { @@ -46,7 +47,7 @@ class Object3D { std::shared_ptr<void> shared_user_data; // For tool-specific data managed with shared ownership - Object3D(ObjectType t = ObjectType::CUBE) + Object3D(ObjectType t = ObjectType::BOX) : position(0, 0, 0), rotation(0, 0, 0, 1), scale(1, 1, 1), type(t), color(1, 1, 1, 1), velocity(0, 0, 0), mass(1.0f), restitution(0.5f), is_static(false), mesh_asset_id((AssetId)0), local_extent(1, 1, 1), diff --git a/src/3d/physics.cc b/src/3d/physics.cc index db27e95..788246f 100644 --- a/src/3d/physics.cc +++ b/src/3d/physics.cc @@ -16,7 +16,7 @@ float PhysicsSystem::sample_sdf(const Object3D& obj, vec3 world_p) { float d = 1000.0f; if (obj.type == ObjectType::SPHERE) { d = q.len() - 1.0f; - } else if (obj.type == ObjectType::BOX || obj.type == ObjectType::CUBE) { + } else if (obj.type == ObjectType::BOX || obj.type == ObjectType::CUBE) { // CUBE is legacy alias d = sdf::sdBox(q, vec3(1.0f, 1.0f, 1.0f)); } else if (obj.type == ObjectType::TORUS) { d = sdf::sdTorus(q, vec2(1.0f, 0.4f)); diff --git a/src/3d/renderer.cc b/src/3d/renderer.cc index 7f2f800..e3e8ce7 100644 --- a/src/3d/renderer.cc +++ b/src/3d/renderer.cc @@ -108,10 +108,12 @@ void Renderer3D::resize(int width, int height) { void Renderer3D::set_noise_texture(WGPUTextureView noise_view) { noise_texture_view_ = noise_view; + bind_group_dirty_ = true; } void Renderer3D::set_sky_texture(WGPUTextureView sky_view) { sky_texture_view_ = sky_view; + bind_group_dirty_ = true; } void Renderer3D::add_debug_aabb(const vec3& min, const vec3& max, diff --git a/src/3d/renderer.h b/src/3d/renderer.h index 8f933b5..21192f3 100644 --- a/src/3d/renderer.h +++ b/src/3d/renderer.h @@ -125,6 +125,7 @@ class Renderer3D { BVH cpu_bvh_; // Keep a CPU-side copy for building/uploading bool bvh_enabled_ = true; bool direct_render_ = false; // true = render to surface (no post-process flip) + bool bind_group_dirty_ = true; // Recreate bind group when textures change std::map<AssetId, MeshGpuData> mesh_cache_; const MeshGpuData* temp_mesh_override_ = nullptr; // HACK for test_mesh tool diff --git a/src/3d/renderer_draw.cc b/src/3d/renderer_draw.cc index dca7113..929f261 100644 --- a/src/3d/renderer_draw.cc +++ b/src/3d/renderer_draw.cc @@ -27,8 +27,8 @@ void Renderer3D::update_uniforms(const Scene& scene, const Camera& camera, float type_id = 0.0f; switch (obj.type) { case ObjectType::SPHERE: type_id = 1.0f; break; + case ObjectType::CUBE: // fallthrough (legacy alias for BOX) case ObjectType::BOX: type_id = 2.0f; break; - case ObjectType::CUBE: type_id = 2.0f; break; // CUBE is same as BOX case ObjectType::TORUS: type_id = 3.0f; break; case ObjectType::PLANE: type_id = 4.0f; break; case ObjectType::MESH: type_id = 5.0f; break; @@ -63,7 +63,11 @@ void Renderer3D::draw(WGPURenderPassEncoder pass, const Scene& scene, const Camera& camera, float time) { update_uniforms(scene, camera, time); - // Lazy Bind Group creation + // Recreate bind group only when textures or BVH mode change + if (!bind_group_dirty_) { + // Skip bind group rebuild — reuse existing + } else { + bind_group_dirty_ = false; if (bind_group_) wgpuBindGroupRelease(bind_group_); @@ -115,11 +119,9 @@ void Renderer3D::draw(WGPURenderPassEncoder pass, const Scene& scene, bg_entries.push_back(e); } - // Select the correct pipeline and bind group layout - WGPURenderPipeline current_pipeline = - bvh_enabled_ ? pipeline_ : pipeline_no_bvh_; + WGPURenderPipeline cp = bvh_enabled_ ? pipeline_ : pipeline_no_bvh_; WGPUBindGroupLayout current_layout = - wgpuRenderPipelineGetBindGroupLayout(current_pipeline, 0); + wgpuRenderPipelineGetBindGroupLayout(cp, 0); WGPUBindGroupDescriptor bg_desc = {}; bg_desc.layout = current_layout; @@ -129,10 +131,10 @@ void Renderer3D::draw(WGPURenderPassEncoder pass, const Scene& scene, bind_group_ = wgpuDeviceCreateBindGroup(device_, &bg_desc); wgpuBindGroupLayoutRelease(current_layout); + } // end dirty block - wgpuRenderPassEncoderSetPipeline(pass, current_pipeline); - - wgpuRenderPassEncoderSetBindGroup(pass, 0, bind_group_, 0, nullptr); + WGPURenderPipeline current_pipeline = + bvh_enabled_ ? pipeline_ : pipeline_no_bvh_; uint32_t instance_count = (uint32_t)std::min((size_t)kMaxObjects, scene.objects.size()); diff --git a/src/audio/fft.cc b/src/audio/fft.cc index 7523b42..982f35f 100644 --- a/src/audio/fft.cc +++ b/src/audio/fft.cc @@ -7,6 +7,10 @@ #include <cmath> #include <cstring> +// Max supported FFT size for stack-allocated temp buffers. +// All callers use N <= 512 (DCT_SIZE). imdct needs 2*N = 1024. +static const size_t kMaxFFTSize = 1024; + // Bit-reversal permutation (in-place) // Reorders array elements by reversing their binary indices static void bit_reverse_permute(float* real, float* imag, size_t N) { @@ -102,9 +106,9 @@ void fft_inverse(float* real, float* imag, size_t N) { void dct_fft(const float* input, float* output, size_t N) { const float PI = 3.14159265358979323846f; - // Allocate temporary arrays for N-point FFT - float* real = new float[N]; - float* imag = new float[N]; + // Stack-allocated temp arrays (N <= kMaxFFTSize) + float real[kMaxFFTSize]; + float imag[kMaxFFTSize]; // Reorder input: even indices first, then odd indices reversed // [x[0], x[2], x[4], ...] followed by [x[N-1], x[N-3], x[N-5], ...] @@ -135,9 +139,6 @@ void dct_fft(const float* input, float* output, size_t N) { output[k] = dct_value * sqrtf(2.0f / N); } } - - delete[] real; - delete[] imag; } // IMDCT via FFT @@ -149,8 +150,9 @@ void imdct_fft(const float* input, float* output, size_t N) { const float PI = 3.14159265358979323846f; const size_t M = 2 * N; // output length - float* real = new float[M]; - float* imag = new float[M]; + // Stack-allocated temp arrays (M = 2*N <= kMaxFFTSize) + float real[kMaxFFTSize]; + float imag[kMaxFFTSize]; // Pre-multiply X[k] by exp(-j*pi*(2k+1)/(4N)), build 2N complex FFT input // via standard IMDCT-via-FFT algorithm (N-point complex FFT) @@ -180,9 +182,6 @@ void imdct_fft(const float* input, float* output, size_t N) { const float angle = -PI * (2.0f * n + 1.0f) / (4.0f * N); output[n] = gain * (real[n] * cosf(angle) - imag[n] * sinf(angle)); } - - delete[] real; - delete[] imag; } // IDCT (DCT-III) via FFT - inverse of the DCT-II reordering method @@ -190,9 +189,9 @@ void imdct_fft(const float* input, float* output, size_t N) { void idct_fft(const float* input, float* output, size_t N) { const float PI = 3.14159265358979323846f; - // Allocate temporary arrays for N-point FFT - float* real = new float[N]; - float* imag = new float[N]; + // Stack-allocated temp arrays (N <= kMaxFFTSize) + float real[kMaxFFTSize]; + float imag[kMaxFFTSize]; // Prepare FFT input with inverse phase correction // FFT[k] = DCT[k] * exp(+j*pi*k/(2*N)) / normalization @@ -226,7 +225,4 @@ void idct_fft(const float* input, float* output, size_t N) { output[2 * i] = real[i]; // Even positions output[2 * i + 1] = real[N - 1 - i]; // Odd positions (reversed) } - - delete[] real; - delete[] imag; } diff --git a/src/effects/scene2_effect.cc b/src/effects/scene2_effect.cc deleted file mode 100644 index 92e5ecd..0000000 --- a/src/effects/scene2_effect.cc +++ /dev/null @@ -1,34 +0,0 @@ -// This file is part of the 64k demo project. -// Scene2 effect - ShaderToy conversion (scene) -// Generated by convert_shadertoy.py - -#include "effects/scene2_effect.h" -#include "effects/shaders.h" -#include "gpu/gpu.h" -#include "gpu/post_process_helper.h" -#include "util/fatal_error.h" - -Scene2Effect::Scene2Effect(const GpuContext& ctx, - const std::vector<std::string>& inputs, - const std::vector<std::string>& outputs, - float start_time, float end_time) - : Effect(ctx, inputs, outputs, start_time, end_time) { - HEADLESS_RETURN_IF_NULL(ctx_.device); - create_nearest_sampler(); - create_dummy_scene_texture(); - pipeline_.set(create_post_process_pipeline( - ctx_.device, WGPUTextureFormat_RGBA8Unorm, scene2_shader_wgsl)); -} - -void Scene2Effect::render(WGPUCommandEncoder encoder, - const UniformsSequenceParams& params, - NodeRegistry& nodes) { - WGPUTextureView output_view = nodes.get_view(output_nodes_[0]); - - // uniforms_buffer_ auto-updated by base class dispatch_render() - pp_update_bind_group(ctx_.device, pipeline_.get(), bind_group_.get_address(), - dummy_texture_view_.get(), uniforms_buffer_.get(), - {nullptr, 0}); - - run_fullscreen_pass(encoder, pipeline_.get(), bind_group_.get(), output_view); -} diff --git a/src/effects/scene2_effect.h b/src/effects/scene2_effect.h index da4cf7e..0e26fc3 100644 --- a/src/effects/scene2_effect.h +++ b/src/effects/scene2_effect.h @@ -4,19 +4,14 @@ #pragma once -#include "gpu/effect.h" -#include "gpu/wgpu_resource.h" +#include "effects/shaders.h" +#include "gpu/wgsl_effect.h" -class Scene2Effect : public Effect { - public: +struct Scene2Effect : public WgslEffect { Scene2Effect(const GpuContext& ctx, const std::vector<std::string>& inputs, const std::vector<std::string>& outputs, float start_time, - float end_time); - - void render(WGPUCommandEncoder encoder, const UniformsSequenceParams& params, - NodeRegistry& nodes) override; - - private: - RenderPipeline pipeline_; - BindGroup bind_group_; + float end_time) + : WgslEffect(ctx, inputs, outputs, start_time, end_time, + scene2_shader_wgsl, WGPULoadOp_Clear, {}, + WgslSamplerType::Nearest) {} }; diff --git a/src/gpu/texture_manager.cc b/src/gpu/texture_manager.cc index bdeb508..20e215d 100644 --- a/src/gpu/texture_manager.cc +++ b/src/gpu/texture_manager.cc @@ -285,142 +285,76 @@ void TextureManager::dispatch_compute(const std::string& func_name, wgpuTextureViewRelease(target_view); } -void TextureManager::create_gpu_noise_texture( - const std::string& name, const GpuProceduralParams& params) { - extern const char* gen_noise_compute_wgsl; - get_or_create_compute_pipeline("gen_noise", gen_noise_compute_wgsl, 16); - - WGPUTextureDescriptor tex_desc = {}; - tex_desc.usage = - WGPUTextureUsage_StorageBinding | WGPUTextureUsage_TextureBinding; - tex_desc.dimension = WGPUTextureDimension_2D; - tex_desc.size = {(uint32_t)params.width, (uint32_t)params.height, 1}; - tex_desc.format = WGPUTextureFormat_RGBA8Unorm; - tex_desc.mipLevelCount = 1; - tex_desc.sampleCount = 1; - WGPUTexture texture = wgpuDeviceCreateTexture(device_, &tex_desc); +void TextureManager::create_gpu_procedural( + const std::string& name, const std::string& func_name, + const char* shader_code, const GpuProceduralParams& params, + const void* uniform_data, size_t uniform_size) { + get_or_create_compute_pipeline(func_name, shader_code, uniform_size); - struct NoiseParams { - uint32_t width; - uint32_t height; - float seed; - float frequency; - }; - NoiseParams uniforms = {(uint32_t)params.width, (uint32_t)params.height, - params.params[0], params.params[1]}; - dispatch_compute("gen_noise", texture, params, &uniforms, - sizeof(NoiseParams)); + TextureWithView tv = gpu_create_storage_texture_2d( + device_, (uint32_t)params.width, (uint32_t)params.height, + WGPUTextureFormat_RGBA8Unorm); - WGPUTextureView view = - gpu_create_texture_view_2d(texture, WGPUTextureFormat_RGBA8Unorm); + dispatch_compute(func_name, tv.texture, params, uniform_data, uniform_size); GpuTexture gpu_tex; - gpu_tex.texture = texture; - gpu_tex.view = view; + gpu_tex.texture = tv.texture; + gpu_tex.view = tv.view; gpu_tex.width = params.width; gpu_tex.height = params.height; textures_[name] = gpu_tex; #if !defined(STRIP_ALL) - printf("Generated GPU noise texture: %s (%dx%d)\n", name.c_str(), - params.width, params.height); + printf("Generated GPU %s texture: %s (%dx%d)\n", func_name.c_str(), + name.c_str(), params.width, params.height); #endif } +void TextureManager::create_gpu_noise_texture( + const std::string& name, const GpuProceduralParams& params) { + extern const char* gen_noise_compute_wgsl; + struct NoiseParams { + uint32_t width, height; + float seed, frequency; + }; + NoiseParams u = {(uint32_t)params.width, (uint32_t)params.height, + params.params[0], params.params[1]}; + create_gpu_procedural(name, "gen_noise", gen_noise_compute_wgsl, params, &u, + sizeof(u)); +} + void TextureManager::create_gpu_perlin_texture( const std::string& name, const GpuProceduralParams& params) { extern const char* gen_perlin_compute_wgsl; - get_or_create_compute_pipeline("gen_perlin", gen_perlin_compute_wgsl, 32); - - WGPUTextureDescriptor tex_desc = {}; - tex_desc.usage = - WGPUTextureUsage_StorageBinding | WGPUTextureUsage_TextureBinding; - tex_desc.dimension = WGPUTextureDimension_2D; - tex_desc.size = {(uint32_t)params.width, (uint32_t)params.height, 1}; - tex_desc.format = WGPUTextureFormat_RGBA8Unorm; - tex_desc.mipLevelCount = 1; - tex_desc.sampleCount = 1; - WGPUTexture texture = wgpuDeviceCreateTexture(device_, &tex_desc); - struct PerlinParams { - uint32_t width; - uint32_t height; - float seed; - float frequency; - float amplitude; - float amplitude_decay; + uint32_t width, height; + float seed, frequency, amplitude, amplitude_decay; uint32_t octaves; float _pad0; }; - PerlinParams uniforms = {(uint32_t)params.width, - (uint32_t)params.height, - params.params[0], - params.params[1], - params.num_params > 2 ? params.params[2] : 1.0f, - params.num_params > 3 ? params.params[3] : 0.5f, - params.num_params > 4 ? (uint32_t)params.params[4] - : 4u, - 0.0f}; - dispatch_compute("gen_perlin", texture, params, &uniforms, - sizeof(PerlinParams)); - - WGPUTextureView view = - gpu_create_texture_view_2d(texture, WGPUTextureFormat_RGBA8Unorm); - - GpuTexture gpu_tex; - gpu_tex.texture = texture; - gpu_tex.view = view; - gpu_tex.width = params.width; - gpu_tex.height = params.height; - textures_[name] = gpu_tex; - -#if !defined(STRIP_ALL) - printf("Generated GPU perlin texture: %s (%dx%d)\n", name.c_str(), - params.width, params.height); -#endif + PerlinParams u = {(uint32_t)params.width, + (uint32_t)params.height, + params.params[0], + params.params[1], + params.num_params > 2 ? params.params[2] : 1.0f, + params.num_params > 3 ? params.params[3] : 0.5f, + params.num_params > 4 ? (uint32_t)params.params[4] : 4u, + 0.0f}; + create_gpu_procedural(name, "gen_perlin", gen_perlin_compute_wgsl, params, &u, + sizeof(u)); } void TextureManager::create_gpu_grid_texture( const std::string& name, const GpuProceduralParams& params) { extern const char* gen_grid_compute_wgsl; - get_or_create_compute_pipeline("gen_grid", gen_grid_compute_wgsl, 16); - - WGPUTextureDescriptor tex_desc = {}; - tex_desc.usage = - WGPUTextureUsage_StorageBinding | WGPUTextureUsage_TextureBinding; - tex_desc.dimension = WGPUTextureDimension_2D; - tex_desc.size = {(uint32_t)params.width, (uint32_t)params.height, 1}; - tex_desc.format = WGPUTextureFormat_RGBA8Unorm; - tex_desc.mipLevelCount = 1; - tex_desc.sampleCount = 1; - WGPUTexture texture = wgpuDeviceCreateTexture(device_, &tex_desc); - struct GridParams { - uint32_t width; - uint32_t height; - uint32_t grid_size; - uint32_t thickness; + uint32_t width, height, grid_size, thickness; }; - GridParams uniforms = { - (uint32_t)params.width, (uint32_t)params.height, - params.num_params > 0 ? (uint32_t)params.params[0] : 32u, - params.num_params > 1 ? (uint32_t)params.params[1] : 2u}; - dispatch_compute("gen_grid", texture, params, &uniforms, sizeof(GridParams)); - - WGPUTextureView view = - gpu_create_texture_view_2d(texture, WGPUTextureFormat_RGBA8Unorm); - - GpuTexture gpu_tex; - gpu_tex.texture = texture; - gpu_tex.view = view; - gpu_tex.width = params.width; - gpu_tex.height = params.height; - textures_[name] = gpu_tex; - -#if !defined(STRIP_ALL) - printf("Generated GPU grid texture: %s (%dx%d)\n", name.c_str(), params.width, - params.height); -#endif + GridParams u = {(uint32_t)params.width, (uint32_t)params.height, + params.num_params > 0 ? (uint32_t)params.params[0] : 32u, + params.num_params > 1 ? (uint32_t)params.params[1] : 2u}; + create_gpu_procedural(name, "gen_grid", gen_grid_compute_wgsl, params, &u, + sizeof(u)); } #if !defined(STRIP_GPU_COMPOSITE) diff --git a/src/gpu/texture_manager.h b/src/gpu/texture_manager.h index ec30c7b..a5462ae 100644 --- a/src/gpu/texture_manager.h +++ b/src/gpu/texture_manager.h @@ -93,6 +93,13 @@ class TextureManager { const GpuProceduralParams& params, const void* uniform_data, size_t uniform_size); + // Shared helper: create pipeline + storage texture + dispatch + store result + void create_gpu_procedural(const std::string& name, + const std::string& func_name, + const char* shader_code, + const GpuProceduralParams& params, + const void* uniform_data, size_t uniform_size); + #if !defined(STRIP_GPU_COMPOSITE) void dispatch_composite(const std::string& func_name, WGPUTexture target, const GpuProceduralParams& params, diff --git a/src/gpu/wgsl_effect.cc b/src/gpu/wgsl_effect.cc index 1cb0ecb..4f658a5 100644 --- a/src/gpu/wgsl_effect.cc +++ b/src/gpu/wgsl_effect.cc @@ -10,13 +10,17 @@ WgslEffect::WgslEffect(const GpuContext& ctx, const std::vector<std::string>& outputs, float start_time, float end_time, const char* shader_code, WGPULoadOp load_op, - WgslEffectParams initial_params) + WgslEffectParams initial_params, + WgslSamplerType sampler_type) : Effect(ctx, inputs, outputs, start_time, end_time), effect_params(initial_params), load_op_(load_op) { HEADLESS_RETURN_IF_NULL(ctx_.device); - create_linear_sampler(); + if (sampler_type == WgslSamplerType::Nearest) + create_nearest_sampler(); + else + create_linear_sampler(); params_buffer_.init(ctx_.device); pipeline_.set(create_post_process_pipeline(ctx_.device, diff --git a/src/gpu/wgsl_effect.h b/src/gpu/wgsl_effect.h index 062f885..f487ef7 100644 --- a/src/gpu/wgsl_effect.h +++ b/src/gpu/wgsl_effect.h @@ -16,6 +16,8 @@ struct WgslEffectParams { }; static_assert(sizeof(WgslEffectParams) == 32, "WgslEffectParams must be 32 bytes"); +enum class WgslSamplerType { Linear, Nearest }; + class WgslEffect : public Effect { public: // Mutate per-frame for dynamic parameter modulation. @@ -25,7 +27,8 @@ class WgslEffect : public Effect { const std::vector<std::string>& outputs, float start_time, float end_time, const char* shader_code, WGPULoadOp load_op = WGPULoadOp_Clear, - WgslEffectParams initial_params = {}); + WgslEffectParams initial_params = {}, + WgslSamplerType sampler_type = WgslSamplerType::Linear); void render(WGPUCommandEncoder encoder, const UniformsSequenceParams& params, NodeRegistry& nodes) override; |
