summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CLAUDE.md6
-rw-r--r--CMakeLists.txt86
-rw-r--r--GEMINI.md15
-rw-r--r--PROJECT_CONTEXT.md140
-rw-r--r--TODO.md324
-rw-r--r--assets/demo.seq7
-rw-r--r--assets/final/demo_assets.txt33
-rw-r--r--assets/final/music/BASS_GUITAR_FEEL.spec (renamed from assets/final/BASS_GUITAR_FEEL.spec)bin51216 -> 51216 bytes
-rw-r--r--assets/final/music/BASS_SYNTH_1.spec (renamed from assets/final/BASS_SYNTH_1.spec)bin32784 -> 32784 bytes
-rw-r--r--assets/final/music/CRASH_DMX.spec (renamed from assets/final/CRASH_DMX.spec)bin217104 -> 217104 bytes
-rw-r--r--assets/final/music/HIHAT_CLOSED_DMX.spec (renamed from assets/final/HIHAT_CLOSED_DMX.spec)bin20496 -> 20496 bytes
-rw-r--r--assets/final/music/HIHAT_CLOSED_DUFF.spec (renamed from assets/final/HIHAT_CLOSED_DUFF.spec)bin12304 -> 12304 bytes
-rw-r--r--assets/final/music/HIHAT_CLOSED_ER_1.spec (renamed from assets/final/HIHAT_CLOSED_ER_1.spec)bin16400 -> 16400 bytes
-rw-r--r--assets/final/music/KICK_606.spec (renamed from assets/final/KICK_606.spec)bin139280 -> 139280 bytes
-rw-r--r--assets/final/music/KICK_90S_2.spec (renamed from assets/final/KICK_90S_2.spec)bin30736 -> 30736 bytes
-rw-r--r--assets/final/music/RIDE_CUP_1.spec (renamed from assets/final/RIDE_CUP_1.spec)bin3577872 -> 3577872 bytes
-rw-r--r--assets/final/music/SNARE_808.spec (renamed from assets/final/SNARE_808.spec)bin120848 -> 120848 bytes
-rw-r--r--assets/final/music/SNARE_909_TUNE_8.spec (renamed from assets/final/SNARE_909_TUNE_8.spec)bin43024 -> 43024 bytes
-rw-r--r--assets/final/music/SNARE_BLUE_ROOM.spec (renamed from assets/final/SNARE_BLUE_ROOM.spec)bin43024 -> 43024 bytes
-rw-r--r--assets/final/music/SPLASH_GROUNDED.spec (renamed from assets/final/SPLASH_GROUNDED.spec)bin57360 -> 57360 bytes
-rw-r--r--assets/final/music/SYNTH_BASS_DISTORT.spec (renamed from assets/final/SYNTH_BASS_DISTORT.spec)bin32784 -> 32784 bytes
-rw-r--r--assets/final/shaders/chroma_aberration.wgsl14
-rw-r--r--assets/final/shaders/circle_mask_compute.wgsl14
-rw-r--r--assets/final/shaders/circle_mask_render.wgsl10
-rw-r--r--assets/final/shaders/compute/gen_blend.wgsl29
-rw-r--r--assets/final/shaders/compute/gen_grid.wgsl24
-rw-r--r--assets/final/shaders/compute/gen_mask.wgsl27
-rw-r--r--assets/final/shaders/compute/gen_noise.wgsl26
-rw-r--r--assets/final/shaders/compute/gen_perlin.wgsl44
-rw-r--r--assets/final/shaders/distort.wgsl14
-rw-r--r--assets/final/shaders/ellipse.wgsl10
-rw-r--r--assets/final/shaders/gaussian_blur.wgsl14
-rw-r--r--assets/final/shaders/main_shader.wgsl10
-rw-r--r--assets/final/shaders/particle_compute.wgsl10
-rw-r--r--assets/final/shaders/particle_render.wgsl10
-rw-r--r--assets/final/shaders/particle_spray_compute.wgsl10
-rw-r--r--assets/final/shaders/passthrough.wgsl10
-rw-r--r--assets/final/shaders/solarize.wgsl10
-rw-r--r--assets/final/shaders/vignette.wgsl14
-rw-r--r--assets/final/test_demo_assets.txt6
-rw-r--r--doc/ARCHITECTURE.md60
-rw-r--r--doc/BACKLOG.md197
-rw-r--r--doc/CODING_STYLE.md109
-rw-r--r--doc/COMPLETED.md29
-rw-r--r--doc/CONTRIBUTING.md63
-rw-r--r--doc/GPU_PROCEDURAL_PHASE4.md70
-rw-r--r--doc/HOWTO.md90
-rw-r--r--doc/RECIPE.md202
-rw-r--r--doc/TOOLS_REFERENCE.md89
-rw-r--r--doc/UNIFORM_BUFFER_GUIDELINES.md106
-rwxr-xr-xscripts/gen_spectrograms.sh2
-rw-r--r--src/3d/visual_debug.cc11
-rw-r--r--src/audio/audio.cc6
-rw-r--r--src/audio/audio.h3
-rw-r--r--src/gpu/demo_effects.h55
-rw-r--r--src/gpu/effect.h5
-rw-r--r--src/gpu/effects/chroma_aberration_effect.cc1
-rw-r--r--src/gpu/effects/circle_mask_effect.cc24
-rw-r--r--src/gpu/effects/circle_mask_effect.h8
-rw-r--r--src/gpu/effects/distort_effect.cc26
-rw-r--r--src/gpu/effects/fade_effect.cc19
-rw-r--r--src/gpu/effects/fade_effect.h3
-rw-r--r--src/gpu/effects/flash_cube_effect.h2
-rw-r--r--src/gpu/effects/gaussian_blur_effect.cc1
-rw-r--r--src/gpu/effects/heptagon_effect.cc24
-rw-r--r--src/gpu/effects/moving_ellipse_effect.cc9
-rw-r--r--src/gpu/effects/particle_spray_effect.cc1
-rw-r--r--src/gpu/effects/particles_effect.cc1
-rw-r--r--src/gpu/effects/passthrough_effect.cc1
-rw-r--r--src/gpu/effects/post_process_helper.cc8
-rw-r--r--src/gpu/effects/post_process_helper.h8
-rw-r--r--src/gpu/effects/shaders.cc22
-rw-r--r--src/gpu/effects/shaders.h7
-rw-r--r--src/gpu/effects/solarize_effect.cc5
-rw-r--r--src/gpu/effects/theme_modulation_effect.cc19
-rw-r--r--src/gpu/effects/theme_modulation_effect.h3
-rw-r--r--src/gpu/effects/vignette_effect.cc5
-rw-r--r--src/gpu/gpu.cc11
-rw-r--r--src/gpu/texture_manager.cc586
-rw-r--r--src/gpu/texture_manager.h68
-rw-r--r--src/gpu/uniform_helper.h1
-rw-r--r--src/main.cc8
-rw-r--r--src/test_demo.cc39
-rw-r--r--src/tests/test_3d_render.cc39
-rw-r--r--src/tests/test_demo_effects.cc3
-rw-r--r--src/tests/test_effect_base.cc3
-rw-r--r--src/tests/test_gpu_composite.cc124
-rw-r--r--src/tests/test_gpu_procedural.cc117
-rw-r--r--src/tests/test_post_process_helper.cc8
-rw-r--r--src/tests/test_shader_compilation.cc7
-rw-r--r--src/util/asset_manager.h1
-rw-r--r--tools/asset_packer.cc63
-rw-r--r--tools/timeline_editor/index.html6
-rw-r--r--tools/validate_uniforms.py178
94 files changed, 2634 insertions, 839 deletions
diff --git a/CLAUDE.md b/CLAUDE.md
index de04208..3d41cb7 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -32,6 +32,12 @@
#
# Testing & Tools:
# doc/test_demo_README.md - test_demo tool documentation
+#
+# Architecture & Reference:
+# doc/ARCHITECTURE.md - Detailed system architecture
+# doc/CODING_STYLE.md - Code style examples
+# doc/BACKLOG.md - Untriaged future goals
+# doc/TOOLS_REFERENCE.md - Developer tools reference
# ============================================
# TIER 4: HISTORICAL ARCHIVE (Load Rarely)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2f939bc..fb6beef 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -593,6 +593,22 @@ if(DEMO_BUILD_TESTS)
target_link_libraries(test_texture_manager PRIVATE 3d gpu audio procedural util ${DEMO_LIBS})
add_dependencies(test_texture_manager generate_demo_assets)
+ # GPU Procedural Texture Test
+ add_demo_test(test_gpu_procedural GpuProceduralTest
+ src/tests/test_gpu_procedural.cc
+ ${PLATFORM_SOURCES}
+ ${GEN_DEMO_CC})
+ target_link_libraries(test_gpu_procedural PRIVATE 3d gpu audio procedural util ${DEMO_LIBS})
+ add_dependencies(test_gpu_procedural generate_demo_assets)
+
+ # GPU Composite Texture Test (Phase 4)
+ add_demo_test(test_gpu_composite GpuCompositeTest
+ src/tests/test_gpu_composite.cc
+ ${PLATFORM_SOURCES}
+ ${GEN_DEMO_CC})
+ target_link_libraries(test_gpu_composite PRIVATE 3d gpu audio procedural util ${DEMO_LIBS})
+ add_dependencies(test_gpu_composite generate_demo_assets)
+
# Gantt chart output test (bash script)
add_test(
NAME GanttOutputTest
@@ -612,46 +628,42 @@ if(DEMO_BUILD_TESTS)
)
endif()
-#-- - Extra Tools -- -
-if(DEMO_BUILD_TOOLS OR DEMO_BUILD_TESTS)
- add_demo_executable(spectool tools/spectool.cc ${PLATFORM_SOURCES} ${GEN_DEMO_CC} ${GENERATED_MUSIC_DATA_CC})
- target_compile_definitions(spectool PRIVATE DEMO_BUILD_TOOLS)
- target_link_libraries(spectool PRIVATE audio util procedural ${DEMO_LIBS})
- add_dependencies(spectool generate_tracker_music generate_demo_assets)
+# Sub-task 7: Integrate validation tool into CI/build system
- add_executable(specview tools/specview.cc)
+# Ensure the Python validation script is available
+add_custom_target(validate_uniforms_script ALL DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/tools/validate_uniforms.py)
- add_demo_executable(specplay tools/specplay.cc ${PLATFORM_SOURCES} ${GEN_DEMO_CC} ${GENERATED_MUSIC_DATA_CC})
- target_link_libraries(specplay PRIVATE audio util ${DEMO_LIBS})
- add_dependencies(specplay generate_demo_assets)
-endif()
+# Find all WGSL files recursively in src/gpu
+file(GLOB WGSL_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/src/gpu/*.wgsl ${CMAKE_CURRENT_SOURCE_DIR}/src/gpu/**/*.wgsl)
-#-- - Global Target Configuration -- -
-# NOTE: "final" target moved to line ~329 (FINAL_STRIP build)
-# Old "final" target (gen_assets + crunch_demo) removed - run scripts manually
+# List of C++ files containing uniform struct definitions and shader code
+# Add more C++ files here if new effects with structs are added.
+set(VALIDATION_CPP_FILES
+ ${CMAKE_CURRENT_SOURCE_DIR}/src/gpu/effects/heptagon_effect.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/src/gpu/effects/post_process_helper.h
+ ${CMAKE_CURRENT_SOURCE_DIR}/src/gpu/effects/fade_effect.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/src/gpu/effects/theme_modulation_effect.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/src/gpu/effects/chroma_aberration_effect.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/src/gpu/effects/vignette_effect.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/src/gpu/effects/gaussian_blur_effect.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/src/gpu/effects/distort_effect.cc
+ ${CMAKE_CURRENT_SOURCE_DIR}/src/gpu/demo_effects.h
+ ${CMAKE_CURRENT_SOURCE_DIR}/src/gpu/effects/circle_mask_effect.h
+)
-add_custom_target(pack_source
- COMMAND tar -czf demo_all.tgz --exclude=.git --exclude=build* --exclude=.gemini* --exclude=*.tgz --exclude=*.zip --exclude=.DS_Store .
- WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+# Add custom command to run the validator
+# It depends on the script itself, WGSL files, and the C++ files being validated.
+# Outputting a flag file to signal completion.
+set(VALIDATION_FLAG ${CMAKE_CURRENT_BINARY_DIR}/uniform_validation_complete.flag)
+add_custom_command(
+ OUTPUT ${VALIDATION_FLAG}
+ COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/tools/validate_uniforms.py ${VALIDATION_FLAG}
+ COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/tools/validate_uniforms.py ${CMAKE_CURRENT_SOURCE_DIR}/assets/final/shaders ${VALIDATION_CPP_FILES}
+ DEPENDS validate_uniforms_script ${WGSL_FILES} ${VALIDATION_CPP_FILES}
+ COMMENT "Validating uniform buffer sizes and alignments..."
)
-#-- - Configuration Summary -- -
-message(STATUS "")
-message(STATUS "═══════════════════════════════════════════════════════════")
-message(STATUS " 64k Demo Project - Configuration Summary")
-message(STATUS "═══════════════════════════════════════════════════════════")
-message(STATUS "")
-message(STATUS "Build Options:")
-message(STATUS " DEMO_SIZE_OPT: ${DEMO_SIZE_OPT}")
-message(STATUS " DEMO_STRIP_ALL: ${DEMO_STRIP_ALL}")
-message(STATUS " DEMO_BUILD_TESTS: ${DEMO_BUILD_TESTS}")
-message(STATUS " DEMO_BUILD_TOOLS: ${DEMO_BUILD_TOOLS}")
-message(STATUS " DEMO_ENABLE_COVERAGE: ${DEMO_ENABLE_COVERAGE}")
-message(STATUS " DEMO_ENABLE_DEBUG_LOGS: ${DEMO_ENABLE_DEBUG_LOGS}")
-message(STATUS " DEMO_ALL_OPTIONS: ${DEMO_ALL_OPTIONS}")
-message(STATUS "")
-message(STATUS "Build Type: ${CMAKE_BUILD_TYPE}")
-message(STATUS "C++ Compiler: ${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}")
-message(STATUS "")
-message(STATUS "═══════════════════════════════════════════════════════════")
-message(STATUS "")
+# Add custom target that depends on the validation output flag
+add_custom_target(validate_uniforms ALL DEPENDS ${VALIDATION_FLAG})
+
+
diff --git a/GEMINI.md b/GEMINI.md
index a9de297..6fa2692 100644
--- a/GEMINI.md
+++ b/GEMINI.md
@@ -33,6 +33,12 @@
#
# Testing & Tools:
# @doc/test_demo_README.md - test_demo tool documentation
+#
+# Architecture & Reference:
+# @doc/ARCHITECTURE.md - Detailed system architecture
+# @doc/CODING_STYLE.md - Code style examples
+# @doc/BACKLOG.md - Untriaged future goals
+# @doc/TOOLS_REFERENCE.md - Developer tools reference
# ============================================
# TIER 4: HISTORICAL ARCHIVE (Load Rarely)
@@ -104,14 +110,15 @@ IMPORTANT:
</artifact_trail>
<recent_actions>
- - Finished debugging and fixing the `DemoEffectsTest` SEGFAULT.
- - Confirmed that all 33 tests are passing.
- - Updated `GEMINI.md` to reflect the successful completion of Task #74 and set the stage for Task #75.
+ - Completed Task #75: WGSL Uniform Buffer Validation & Consolidation.
+ - Standardized uniform usage across effects (Distort, Fade, ThemeModulation, CircleMask).
+ - Created and integrated `tools/validate_uniforms.py` into the build system.
+ - Added `doc/UNIFORM_BUFFER_GUIDELINES.md` and updated `CONTRIBUTING.md`.
</recent_actions>
<task_state>
1. [COMPLETED] Task #74: Fix `DemoEffectsTest` SEGFAULT.
- 2. [IN PROGRESS] Task #75: WGSL Uniform Buffer Validation & Consolidation.
+ 2. [COMPLETED] Task #75: WGSL Uniform Buffer Validation & Consolidation.
3. [PAUSED] Task #5: Spectral Brush Editor.
4. [PAUSED] Task #18: 3D System Enhancements.
</task_state>
diff --git a/PROJECT_CONTEXT.md b/PROJECT_CONTEXT.md
index 636f339..181bffc 100644
--- a/PROJECT_CONTEXT.md
+++ b/PROJECT_CONTEXT.md
@@ -1,154 +1,90 @@
# 64k Demo Project
-Goal:
+## Goal
- Produce a <=64k native demo binary
- Same C++ codebase for Windows, macOS, Linux
-Graphics:
+## Graphics
- WebGPU via wgpu-native
- WGSL shaders
- Hybrid rendering: Rasterized proxy geometry + SDF raymarching
-Audio:
+## Audio
- 32 kHz, 16-bit stereo
- Procedurally generated samples
- Real-time additive synthesis from spectrograms (IDCT)
- Variable tempo system with music time abstraction
- Event-based pattern triggering for dynamic tempo scaling
- Modifiable Loops and Patterns, w/ script to generate them (like a Tracker)
-- Unified AudioEngine for lifecycle management (eliminates initialization fragility)
+- Unified AudioEngine for lifecycle management
-Constraints:
+## Constraints
- Size-sensitive
- Minimal dependencies
- Explicit control over all allocations
-Style:
+## Style
- Demoscene
- No engine abstractions
---
-## Project Roadmap
-**Note:** For detailed history of recently completed milestones, see `COMPLETED.md`.
+## Current Status
-### Current Status
-- Audio system: Sample-accurate synchronization achieved. Uses hardware playback time as master clock. Variable tempo support integrated. **Pipeline optimized (Task #72)**: Zero heap allocations per frame, direct ring buffer writes, explicit clipping. Comprehensive test coverage maintained.
+- Audio system: Sample-accurate synchronization. Hardware playback time as master clock. Variable tempo support. Pipeline optimized (Task #72): Zero heap allocations per frame, direct ring buffer writes. Comprehensive test coverage.
- Build system: Optimized with proper asset dependency tracking
-- Shader system: **Parameterization complete**: UniformHelper template, per-frame dynamic params, .seq syntax support. Modular with comprehensive compilation tests. **WGSL composability improved**: Common utilities extracted (`math/common_utils.wgsl`) with 12 call sites deduplicated across renderer shaders.
-- 3D rendering: Hybrid SDF/rasterization with BVH acceleration and binary scene loader. **Object data loading and parsing pipeline enhanced for primitives (e.g., plane_distance).**
-- Asset pipeline: Blender export script and binary scene ingestion supported
-- Error handling: **Dual macro system**: `FATAL_XXX` for programming errors (abort), `CHECK_RETURN` for recoverable errors (graceful return). Messages stripped in STRIP_ALL builds.
-- Testing: **32/33 tests passing (97%)** - Uniform buffer alignment fixed (Task #74). DemoEffectsTest fails due to wgpu_native library bug (not project code).
+- Shader system: Parameterization complete (UniformHelper, .seq syntax). Modular with compilation tests. WGSL composability improved (`math/common_utils.wgsl`).
+- 3D rendering: Hybrid SDF/rasterization with BVH acceleration and binary scene loader. Object data loading pipeline enhanced.
+- Asset pipeline: Blender export script and binary scene ingestion
+- Error handling: Dual macro system (`FATAL_XXX` for programming errors, `CHECK_RETURN` for recoverable errors)
+- Testing: **32/33 tests passing (97%)** - DemoEffectsTest fails due to wgpu_native library bug
---
+
## Next Up
-- **Task #5: Spectral Brush Editor** [IN PROGRESS - February 6, 2026]
- - Create web-based tool for procedurally tracing audio spectrograms
+- **Task #5: Spectral Brush Editor** [IN PROGRESS]
+ - Web-based tool for procedurally tracing audio spectrograms
- Replace large .spec assets with tiny C++ code (50-100× compression)
- - Phase 1: C++ runtime (`spectral_brush.h/cc` - Bezier curves + Gaussian profiles)
- - Phase 2: Editor UI (HTML/JS canvas, dual-layer visualization, keyboard shortcuts)
- - Phase 3: File I/O (load .wav/.spec, export procedural_params.txt + C++ code)
- - See `doc/SPECTRAL_BRUSH_EDITOR.md` for complete design
-
-- **Task #72: Audio Pipeline Streamlining** [COMPLETED - February 8, 2026]
- - ✅ Optimize data flow: Zero heap allocations per frame achieved
- - ✅ Direct additive mixing: Ring buffer two-phase write API
- - ✅ Precision: float32 internal pipeline with explicit clipping
+ - See TODO.md and `doc/SPECTRAL_BRUSH_EDITOR.md`
- **Visuals & Content**
- - [ ] **Task #52: Procedural SDF Font**: Minimal bezier/spline set for [A-Z, 0-9] and SDF rendering.
- - [ ] **Task #53: Particles Shader Polish**: Improve visual quality of particles.
- - [ ] **Task #55: SDF Random Planes Intersection**: Implement `sdPolyhedron` (crystal/gem shapes) via plane intersection.
+ - Task #52: Procedural SDF Font
+ - Task #53: Particles Shader Polish
+ - Task #55: SDF Random Planes Intersection
- **Tooling & Optimization**
- - [ ] **Task #54: Tracy Integration**: Integrate Tracy debugger for performance profiling.
- - [x] **Task #39: Visual Debugging System**: Implemented wireframe primitives (Sphere, Cone, Cross, Trajectory) for debugging.
+ - Task #54: Tracy Integration
---
+
## Design Docs Quick Reference
For detailed documentation, use Read tool to load specific docs:
-- **doc/TRACKER.md**: Audio pattern system with unit-less timing (1 unit = 4 beats). Text-based music score compiled to C++ runtime.
-- **doc/3D.md**: Hybrid SDF raymarching with BVH acceleration and Position Based Dynamics physics.
-- **doc/ASSET_SYSTEM.md**: Build-time asset packer with 16-byte alignment, enum-based O(1) retrieval, procedural generation support.
-- **doc/BUILD.md**: Multi-platform builds (Debug/STRIP_ALL/FINAL_STRIP), cross-compilation, size reporting.
-- **doc/SPECTRAL_BRUSH_EDITOR.md**: Web tool for tracing spectrograms with Bezier curves (50-100× compression).
-- **doc/SEQUENCE.md**: .seq timeline format with BPM notation, priority modifiers, Gantt visualization.
-- **doc/MASKING_SYSTEM.md**: Auxiliary texture registry for inter-effect screen-space partitioning.
-- **doc/SCENE_FORMAT.md**: Binary scene format (SCN1) with object transforms, physics, mesh references.
-- **doc/test_demo_README.md**: 16s audio/visual sync test tool with tempo variation and peak logging.
-- **doc/CONTEXT_MAINTENANCE.md**: Context hygiene protocol (archive to COMPLETED.md monthly, keep Tier 1 files lean).
+- **doc/TRACKER.md**: Audio pattern system with unit-less timing
+- **doc/3D.md**: Hybrid SDF raymarching with BVH acceleration
+- **doc/ASSET_SYSTEM.md**: Build-time asset packer with 16-byte alignment
+- **doc/BUILD.md**: Multi-platform builds (Debug/STRIP_ALL/FINAL_STRIP)
+- **doc/SPECTRAL_BRUSH_EDITOR.md**: Web tool for tracing spectrograms
+- **doc/SEQUENCE.md**: .seq timeline format with BPM notation
+- **doc/MASKING_SYSTEM.md**: Auxiliary texture registry
+- **doc/SCENE_FORMAT.md**: Binary scene format (SCN1)
+- **doc/test_demo_README.md**: 16s audio/visual sync test tool
+- **doc/CONTEXT_MAINTENANCE.md**: Context hygiene protocol
---
-## Future Goals
-- **Task #36: Blender Exporter**: Create script to export scenes to internal binary format. (Deprioritized)
-- **Task #21: Shader Optimization**
- - [ ] Use macros or code generation to factorize common WGSL code (normals, bump, lighting).
- - [ ] Implement Tri-planar mapping for better procedural textures.
-- [ ] **Task #18-B: GPU BVH & Shadows**: Optimize scene queries with a GPU-based BVH.
-- **Phase 2: Advanced Size Optimization**
- - [ ] **Task #22: Windows Native Platform**: Replace GLFW with minimal native Windows API.
- - [ ] **Task #28: Spectrogram Quantization**: Quantize spectrograms to logarithmic frequency and uint16_t.
- - [ ] **Task #35: CRT Replacement**: Investigation and implementation of CRT-free entry point.
-
----
-*For a detailed list of all completed tasks, see the git history.*
## Recently Completed (February 2026)
-- **Uniform Buffer Alignment Fix** (February 9) - Task #74: Fixed WebGPU validation errors caused by WGSL `vec3<f32>` alignment mismatches. Changed circle_mask_compute.wgsl padding from `vec3<f32>` to three `f32` fields. Demo now runs with 0 validation errors. Test suite: 32/33 passing (97%).
-
-- **Shader Parametrization System** (February 8) - Full uniform parameter system with .seq syntax support. FlashEffect now supports dynamic color/decay parameters computed per-frame. Critical WGSL alignment bugfix (vec3 = 16-byte aligned). Size: ~400-500 bytes. See `doc/COMPLETED.md` for details.
+- **WGSL Uniform Buffer Validation (Task #75)** (Feb 9) - Standardized uniform buffer layout. Validation tool integrated into build. All effects use `CommonPostProcessUniforms` (binding 2) + effect-specific params (binding 3). Added `UNIFORM_BUFFER_GUIDELINES.md`.
-- **Extended Shader Parametrization** (February 8) - Task #73 (2/4 effects complete):
- - ChromaAberrationEffect: Added offset_scale and angle parameters (diagonal/vertical aberration modes)
- - GaussianBlurEffect: Added strength parameter (configurable blur radius)
- - Both effects follow FlashEffect pattern (UniformHelper, params struct, .seq syntax)
- - Size: ~200-300 bytes per effect
+- **Uniform Buffer Alignment (Task #74)** (Feb 9) - Fixed WGSL `vec3<f32>` alignment issues. Demo runs with 0 validation errors.
-- **WGSL Shader Composability** - Extracted common utilities to `math/common_utils.wgsl`:
- - `transform_normal()` - 2 call sites (renderer_3d, mesh_render)
- - `spherical_uv()` / `spherical_uv_from_dir()` - 8 call sites (renderer_3d, skybox)
- - `grid_pattern()` - 2 call sites (renderer_3d)
- - Size savings: ~200 bytes net
+- **Shader Parametrization (Task #73)** (Feb 8) - Full uniform parameter system with .seq syntax. FlashEffect, ChromaAberrationEffect, GaussianBlurEffect support dynamic parameters. Size: ~400-500 bytes.
-- **Test Suite Optimization** - JitteredAudioBackendTest: 3.5s → 0.07s (50x speedup)
- - Reduced test duration and sleep times
- - Full CI suite now <1 second
-
-- **CHECK_RETURN Macro System** - Error handling for recoverable errors:
- - `CHECK_RETURN_IF()` - Simple validation with return
- - `CHECK_RETURN_BEGIN/END` - Complex validation with cleanup
- - `WARN_IF()` - Non-fatal warnings
- - Applied to 5 call sites (asset_manager, test_demo)
- - Size impact: ~500 bytes saved in STRIP_ALL builds
-
-## Architectural Overview
-
-### Hybrid 3D Renderer
-- **Core Idea**: Uses standard rasterization to draw proxy hulls (boxes), then raymarches inside the fragment shader to find the exact SDF surface.
-- **Transforms**: Uses `inv_model` matrices to perform all raymarching in local object space, handling rotation and non-uniform scaling correctly.
-- **Shadows**: Instance-based shadow casting with self-shadowing prevention (`skip_idx`).
-
-### Sequence & Effect System
-- **Effect**: Abstract base for visual elements. Supports `compute` and `render` phases.
-- **Sequence**: Timeline of effects with start/end times.
-- **MainSequence**: Top-level coordinator and framebuffer manager.
-- **seq_compiler**: Transpiles `assets/demo.seq` into C++ `timeline.cc`.
+---
-### Asset & Build System
-- **asset_packer**: Embeds binary assets (like `.spec` files) into C++ arrays.
-- **Runtime Manager**: O(1) retrieval with lazy procedural generation support.
-- **Automation**: `gen_assets.sh`, `build_win.sh`, and `check_all.sh` for multi-platform validation.
+For detailed architecture, see `doc/ARCHITECTURE.md`.
-### Audio Engine
-- **Synthesis**: Real-time additive synthesis from spectrograms via FFT-based IDCT (O(N log N)). Stereo output (32kHz, 16-bit, interleaved L/R). Uses orthonormal DCT-II/DCT-III transforms with Numerical Recipes reordering method.
-- **Variable Tempo**: Music time abstraction with configurable tempo_scale. Tempo changes don't affect pitch.
-- **Event-Based Tracker**: Individual TrackerEvents trigger as separate voices with dynamic beat calculation. Notes within patterns respect tempo scaling.
-- **Backend Abstraction**: `AudioBackend` interface with `MiniaudioBackend` (production), `MockAudioBackend` (testing), and `WavDumpBackend` (offline rendering).
-- **Dynamic Updates**: Double-buffered spectrograms for live thread-safe updates.
-- **Procedural Library**: Melodies and spectral filters (noise, comb) generated at runtime.
-- **Pattern System**: TrackerPatterns contain lists of TrackerEvents (beat, sample_id, volume, pan). Events trigger individually based on elapsed music time. \ No newline at end of file
+For completed tasks history, see `doc/COMPLETED.md` and git history.
diff --git a/TODO.md b/TODO.md
index 4b5819b..10f0661 100644
--- a/TODO.md
+++ b/TODO.md
@@ -2,153 +2,39 @@
This file tracks prioritized tasks with detailed attack plans.
-**Note:** For a history of recently completed tasks, see `COMPLETED.md`.
-
-## Recently Completed (February 9, 2026)
-
-- [x] **Uniform Buffer Alignment (Task #74)**: Fixed WGSL struct alignment issues across multiple shaders:
- - `circle_mask_compute.wgsl`: Changed `_pad: vec3<f32>` to three `f32` fields
- - `fade_effect.cc`: Changed EffectParams padding from `vec3<f32>` to `_pad0/1/2: f32`
- - `theme_modulation_effect.cc`: Same padding fix for EffectParams
- - Fixed ODR violation in `demo_effects.h` (incomplete FadeEffect forward declaration)
- - Renamed shadowing `uniforms_` members to `common_uniforms_`/`flash_uniforms_`
- - Result: demo64k runs without crashes, 33/33 tests passing (100%)
-
-## Previously Completed (February 8, 2026)
-
-- [x] **Shader Parametrization System**: Full uniform parameter system with .seq syntax support. FlashEffect now supports color/decay parameters with per-frame animation. See `COMPLETED.md` for details.
-- [x] **ChromaAberrationEffect Parametrization**: Added offset_scale and angle parameters. Supports diagonal and vertical aberration modes via .seq syntax.
-- [x] **GaussianBlurEffect Parametrization**: Added strength parameter. Replaces hardcoded blur radius with configurable value.
-
----
-
-## Priority 1: Uniform Buffer Alignment (Task #74) [COMPLETED - February 9, 2026]
-
-**Goal**: Fix WebGPU uniform buffer size/padding/alignment mismatches between C++ structs and WGSL shaders.
-
-**Root Cause**: WGSL `vec3<f32>` has 16-byte alignment (not 12), causing struct padding mismatches. Using `vec3<f32>` for padding fields created unpredictable struct sizes.
-
-**Fixes Applied**:
-- `circle_mask_compute.wgsl`: Changed `_pad: vec3<f32>` to three separate `f32` fields
- - Before: 24+ bytes in WGSL, 16 bytes in C++
- - After: 16 bytes in both
-- Verified all shaders use individual `f32` fields for padding (no `vec3` in padding)
-
-**Results**:
-- ✅ demo64k: Runs with **0 WebGPU validation errors**
-- ✅ Test suite: **32/33 tests passing (97%)**
-- ❌ DemoEffectsTest: SEGFAULT in wgpu_native library (unrelated to alignment fixes)
-
-**Key Lesson**: Never use `vec3<f32>` for padding in WGSL uniform structs. Always use individual `f32` fields to ensure predictable alignment.
-
----
-
-## Priority 1: WGSL Uniform Buffer Validation & Consolidation (Task #75)
-
-**Goal**: Prevent alignment bugs by consolidating uniform buffer patterns and creating automated validation.
-
-**Background**: Recent bugs (Task #74) revealed WGSL `vec3<f32>` alignment issues causing 16-byte padding where 12 bytes expected. Need systematic approach to prevent recurrence.
-
-**Attack Plan**:
-
-### Phase 1: Audit & Document (1-2 hours)
-- [ ] **1.1**: Audit all WGSL shaders for uniform struct definitions
- - List all uniform structs, their sizes, and padding strategies
- - Identify inconsistencies (vec3 padding vs individual f32 fields)
- - Document in `doc/UNIFORM_BUFFER_GUIDELINES.md`
-- [ ] **1.2**: Audit C++ struct definitions (CommonPostProcessUniforms, etc.)
- - Verify static_assert size checks exist for all uniform structs
- - Check for missing size validation
-
-### Phase 2: Consolidation (2-3 hours)
-- [ ] **2.1**: Standardize on CommonUniforms pattern
- - All post-process effects should use CommonPostProcessUniforms for binding 2
- - Effect-specific params at binding 3 (16 or 32 bytes, properly padded)
-- [ ] **2.2**: Eliminate `vec3<f32>` in padding fields
- - Replace all `_pad: vec3<f32>` with `_pad0/1/2: f32`
- - Apply to: FadeEffect, ThemeModulationEffect, any other effects
-- [ ] **2.3**: Add C++ wrapper structs with static_assert
- - Every WGSL uniform struct should have matching C++ struct
- - All structs require `static_assert(sizeof(...) == EXPECTED_SIZE)`
-
-### Phase 3: Validation Tool (3-4 hours)
-- [ ] **3.1**: Create `tools/validate_uniforms.py`
- - Parse WGSL shader files for uniform struct definitions
- - Calculate expected size using WGSL alignment rules:
- - `f32`: 4-byte aligned
- - `vec2<f32>`: 8-byte aligned
- - `vec3<f32>`: **16-byte aligned** (not 12!)
- - `vec4<f32>`: 16-byte aligned
- - Struct size: rounded to largest member alignment
-- [ ] **3.2**: Parse C++ headers for matching structs
- - Extract `sizeof()` from static_assert statements
- - Match WGSL struct names to C++ struct names
-- [ ] **3.3**: Report mismatches
- - Exit non-zero if C++ size != WGSL size
- - Print detailed alignment breakdown for debugging
-- [ ] **3.4**: Integrate into CI/build system
- - Add CMake custom command to run validation
- - Fail build if validation fails (development builds only)
- - Add to `scripts/check_all.sh`
-
-### Phase 4: Documentation (1 hour)
-- [ ] **4.1**: Write `doc/UNIFORM_BUFFER_GUIDELINES.md`
- - Explain WGSL alignment rules (with examples)
- - Document standard patterns (CommonUniforms, effect params)
- - Show correct padding techniques
- - Add examples of common mistakes
-- [ ] **4.2**: Update CONTRIBUTING.md
- - Add "Uniform Buffer Checklist" section
- - Require validation tool passes before commit
-
-**Size Impact**: Negligible (consolidation may save 50-100 bytes)
-
-**Priority**: High (prevents entire class of subtle bugs)
-
-**Dependencies**: None
+**Note:** For completed tasks, see `doc/COMPLETED.md`.
---
## Priority 1: Spectral Brush Editor (Task #5) [IN PROGRESS]
-**Goal:** Create a web-based tool for procedurally tracing audio spectrograms. Replaces large `.spec` binary assets with tiny procedural C++ code (50-100× compression).
+**Goal:** Web-based tool for procedurally tracing audio spectrograms. Replaces large `.spec` binary assets with tiny procedural C++ code (50-100× compression).
**Design Document:** See `doc/SPECTRAL_BRUSH_EDITOR.md` for complete architecture.
-**Core Concept: "Spectral Brush"**
-- **Central Curve** (Bezier): Traces time-frequency path through spectrogram
-- **Vertical Profile**: Shapes "brush stroke" around curve (Gaussian, Decaying Sinusoid, Noise)
+**Core Concept:** Bezier curves trace time-frequency paths. Gaussian profiles shape "brush strokes" around curves.
-**Workflow:**
-```
-.wav → Load in editor → Trace with Bezier curves → Export procedural_params.txt + C++ code
-```
+**Workflow:** `.wav` → Load in editor → Trace with Bezier curves → Export `procedural_params.txt` + C++ code
### Phase 1: C++ Runtime (Foundation)
-- [ ] **Files:** `src/audio/spectral_brush.h`, `src/audio/spectral_brush.cc`
+- [ ] Files: `src/audio/spectral_brush.h`, `src/audio/spectral_brush.cc`
- [ ] Define API (`ProfileType`, `draw_bezier_curve()`, `evaluate_profile()`)
- [ ] Implement linear Bezier interpolation
- [ ] Implement Gaussian profile evaluation
-- [ ] Implement home-brew deterministic RNG (for future noise support)
+- [ ] Implement home-brew deterministic RNG
- [ ] Add unit tests (`src/tests/test_spectral_brush.cc`)
- [ ] **Deliverable:** Compiles, tests pass
### Phase 2: Editor Core
-- [ ] **Files:** `tools/spectral_editor/index.html`, `script.js`, `style.css`, `dct.js` (reuse from old editor)
+- [ ] Files: `tools/spectral_editor/index.html`, `script.js`, `style.css`, `dct.js`
- [ ] HTML structure (canvas, controls, file input)
- [ ] Canvas rendering (dual-layer: reference + procedural)
-- [ ] Bezier curve editor (click to place, drag to adjust, delete control points)
+- [ ] Bezier curve editor (click, drag, delete control points)
- [ ] Profile controls (Gaussian sigma slider)
- [ ] Real-time spectrogram rendering
- [ ] Audio playback (IDCT → Web Audio API)
-- [ ] Undo/Redo system (action history with snapshots)
-- [ ] **Keyboard shortcuts:**
- - Key '1': Play procedural sound
- - Key '2': Play original .wav
- - Space: Play/pause
- - Ctrl+Z: Undo
- - Ctrl+Shift+Z: Redo
- - Delete: Remove control point
+- [ ] Undo/Redo system
+- [ ] Keyboard shortcuts (1=play procedural, 2=play original, Space, Ctrl+Z, Delete)
- [ ] **Deliverable:** Interactive editor, can trace .wav files
### Phase 3: File I/O
@@ -164,178 +50,74 @@ This file tracks prioritized tasks with detailed attack plans.
- [ ] Decaying sinusoid profile (metallic sounds)
- [ ] Noise profile (textured sounds)
- [ ] Composite profiles (add/subtract/multiply)
-- [ ] Multi-dimensional Bezier ({freq, amplitude, decay, ...})
-- [ ] Frequency snapping (snap to musical notes)
-- [ ] Generic `gen_from_params()` code generation
-**Design Decisions:**
-- Linear Bezier interpolation (Phase 1), cubic later
-- Soft parameter limits in UI (not enforced)
-- Home-brew RNG (small, deterministic)
-- Single function per sound (generic loader later)
-- Start with Bezier + Gaussian only
+**Design Decisions:** Linear Bezier (Phase 1), cubic later. Soft parameter limits. Home-brew RNG. Single function per sound initially.
**Size Impact:** 50-100× compression (5 KB .spec → ~100 bytes C++ code)
---
## Priority 2: 3D System Enhancements (Task #18)
-**Goal:** Establish a pipeline for importing complex 3D scenes to replace hardcoded geometry. **Progress:** C++ pipeline for loading and processing object-specific data (like plane_distance) is now in place. Shader integration for SDFs is pending.
+**Goal:** Establish pipeline for importing complex 3D scenes to replace hardcoded geometry.
-## Priority 3: WGSL Modularization (Task #50) [RECURRENT]
+**Progress:** C++ pipeline for loading object-specific data (plane_distance) is in place. Shader integration for SDFs pending.
-**Goal**: Refactor `ShaderComposer` and WGSL assets to support granular, reusable snippets and `#include` directives. This is an ongoing task to maintain shader code hygiene as new features are added.
+---
+## Priority 3: WGSL Modularization (Task #50) [RECURRENT]
+**Goal:** Refactor `ShaderComposer` and WGSL assets to support granular, reusable snippets. Ongoing task for shader code hygiene.
-## Phase 2: Size Optimization (Final Goal)
+### Sub-task: Split common_uniforms.wgsl (Low Priority)
+**Current:** `common_uniforms.wgsl` contains 4 structs (CommonUniforms, GlobalUniforms, ObjectData, ObjectsBuffer)
-- [ ] **Task #34: Full STL Removal**: Replace all remaining `std::vector`, `std::map`, and `std::string` usage with custom minimal containers or C-style arrays to allow for CRT replacement. (Minimal Priority - deferred to end).
+**Goal:** Split into separate files:
+- `common_uniforms/common.wgsl` - CommonUniforms only
+- `common_uniforms/global.wgsl` - GlobalUniforms only
+- `common_uniforms/object.wgsl` - ObjectData + ObjectsBuffer
-- [ ] **Task #22: Windows Native Platform**: Replace GLFW with direct Win32 API calls for the final 64k push.
+**Benefit:** Shaders only include what they need, reducing compiled size
-- [ ] **Task #28: Spectrogram Quantization**: Research optimal frequency bin distribution and implement quantization.
+**Impact:** Minimal (most shaders only use CommonUniforms)
-- [ ] **Task #35: CRT Replacement**: investigation and implementation of CRT-free entry point.
+**Priority:** Low (nice-to-have)
-## Future Goals & Ideas (Untriaged)
+### Sub-task: Type-safe shader composition (Low Priority)
+**Problem:** Recurrent error of forgetting `ShaderComposer::Get().Compose({}, code)` and using raw `code` directly. Runtime error only (crashes demo, tests may pass).
-### Audio Tools
-- [ ] **Task #64: specplay Enhancements**: Extend audio analysis tool with new features
- - **Priority 1**: Spectral visualization (ASCII art), waveform display, frequency analysis, dynamic range
- - **Priority 2**: Diff mode (compare .wav vs .spec), batch mode (CSV report, find clipping)
- - **Priority 3**: WAV export (.spec → .wav), normalization
- - **Priority 4**: Spectral envelope, harmonic analysis, onset detection
- - **Priority 5**: Interactive mode (seek, loop, volume control)
- - See `tools/specplay_README.md` for detailed feature list
+**Solution:** Use strong typing to make it compile-time error:
+```cpp
+class ComposedShader {
+ private:
+ std::string code_;
+ friend class ShaderComposer;
+ explicit ComposedShader(std::string code) : code_(std::move(code)) {}
+ public:
+ const char* c_str() const { return code_.c_str(); }
+};
+```
+
+**Changes:**
+- `ShaderComposer::Compose()` returns `ComposedShader` instead of `std::string`
+- All shader creation functions take `const ComposedShader&` instead of `const char*`
+- Cannot pass raw string to shader functions (compile error)
-- [ ] **Task #65: Data-Driven Tempo Control**: Move tempo variation from code to data files
- - **Current**: `g_tempo_scale` is hardcoded in `main.cc` with manual animation curves
- - **Goal**: Define tempo curves in `.seq` or `.track` files for data-driven tempo control
- - **Approach A**: Add TEMPO directive to `.seq` format
- - Example: `TEMPO 0.0 1.0`, `TEMPO 10.0 2.0`, `TEMPO 20.0 1.0` (time, scale pairs)
- - seq_compiler generates tempo curve array in timeline.cc
- - **Approach B**: Add tempo column to music.track
- - Each pattern trigger can specify tempo_scale override
- - tracker_compiler generates tempo events in music_data.cc
- - **Benefits**: Non-programmers can edit tempo, easier iteration, version control friendly
- - **Priority**: Low (current hardcoded approach works, but less flexible)
+**Benefits:** Impossible to forget composition (type mismatch). Self-documenting API. Compile-time error.
-- [ ] **Task #67: DCT/FFT Performance Benchmarking**: Add timing measurements to audio tests
- - **Goal**: Compare performance of different DCT/IDCT implementations
- - **Location**: Add timing code to `test_dct.cc` or `test_fft.cc`
- - **Measurements**:
- - Reference IDCT/FDCT (naive O(N²) implementation)
- - FFT-based DCT/IDCT (current O(N log N) implementation)
- - Future x86_64 SIMD-optimized versions (when implemented)
- - **Output Format**:
- - Average time per transform (microseconds)
- - Throughput (transforms per second)
- - Speedup factor vs reference implementation
- - **Test Sizes**: DCT_SIZE=512 (production), plus 128, 256, 1024 for scaling analysis
- - **Implementation**:
- - Use `std::chrono::high_resolution_clock` for timing
- - Run each test 1000+ iterations to reduce noise
- - Report min/avg/max times
- - Guard with `#if !defined(STRIP_ALL)` to avoid production overhead
- - **Benefits**: Quantify FFT speedup, validate SIMD optimizations, identify regressions
- - **Priority**: Very Low (nice-to-have for future optimization work)
+**Trade-offs:** More verbose code. Small overhead (extra std::string copy, negligible).
-- [ ] **Task #69: Convert Audio Pipeline to Clipped Int16**: Use clipped int16 for all audio processing
- - **Current**: Audio pipeline uses float32 throughout (generation, mixing, synthesis, output)
- - **Goal**: Convert to clipped int16 for faster/easier processing and reduced memory footprint
- - **Rationale**:
- - Simpler arithmetic (no float operations)
- - Smaller memory footprint (2 bytes vs 4 bytes per sample)
- - Hardware-native format (most audio devices use int16)
- - Eliminates float→int16 conversion at output stage
- - Natural clipping behavior (overflow wraps/clips automatically)
- - **Scope**:
- - Output path: Definitely convert (backends, WAV dump)
- - Synthesis: Consider keeping float32 for quality (IDCT produces float)
- - Mixing: Could use int16 with proper overflow handling
- - Asset storage: Already int16 in .spec files
- - **Implementation Phases**:
- 1. **Phase 1: Output Only** (Minimal change, ~50 lines)
- - Convert `synth_render()` output from float to int16
- - Update `MiniaudioBackend` and `WavDumpBackend` to accept int16
- - Keep all internal processing as float
- - **Benefit**: Eliminates final conversion step
- 2. **Phase 2: Mixing Stage** (Moderate change, ~200 lines)
- - Convert voice mixing to int16 arithmetic
- - Add saturation/clipping logic
- - Keep IDCT output as float, convert after synthesis
- - **Benefit**: Faster mixing, reduced memory bandwidth
- 3. **Phase 3: Full Pipeline** (Large change, ~500+ lines)
- - Convert spectrograms from float to int16 storage
- - Modify IDCT to output int16 directly
- - All synthesis in int16
- - **Benefit**: Maximum size reduction and performance
- - **Trade-offs**:
- - Quality loss: 16-bit resolution vs 32-bit float precision
- - Dynamic range: Limited to [-32768, 32767]
- - Clipping: Must handle overflow carefully in mixing stage
- - Code complexity: Saturation arithmetic more complex than float
- - **Testing Requirements**:
- - Verify no audible quality degradation
- - Ensure clipping behavior matches float version
- - Check mixing overflow doesn't cause artifacts
- - Validate WAV dumps bit-identical to hardware output
- - **Size Impact**:
- - Phase 1: Negligible (~50 bytes)
- - Phase 2: Small reduction (~100-200 bytes, faster code)
- - Phase 3: Large reduction (50% memory, ~1-2KB code savings)
- - **Priority**: Low (final optimization, after size budget is tight)
- - **Notes**:
- - This is a FINAL optimization task, only if 64k budget requires it
- - Quality must be validated - may not be worth the trade-off
- - Consider keeping float for procedural generation quality
+**Priority:** Low (recurrent but rare, easy to catch in testing)
-### Developer Tools
-- [ ] **Task #66: External Asset Loading for Debugging**: mmap() asset files instead of embedded data
- - **Current**: All assets embedded in `assets_data.cc` (regenerate on every asset change)
- - **Goal**: Load assets from external files in debug builds for faster iteration
- - **Scope**: macOS only, non-STRIP_ALL builds only
- - **Implementation**:
- - Add `DEMO_ENABLE_EXTERNAL_ASSETS` CMake option
- - Modify `GetAsset()` to check for external file first (e.g., `assets/final/<name>`)
- - Use `mmap()` to map file into memory (replaces `uint8_t asset[]` array)
- - Fallback to embedded data if file not found
- - **Benefits**: Edit shaders/assets without regenerating assets_data.cc (~10s rebuild)
- - **Trade-offs**: Adds runtime file I/O, only useful during development
- - **Priority**: Low (current workflow acceptable, but nice-to-have for rapid iteration)
+---
-### Visual Effects
-- [ ] **Task #73: Extend Shader Parametrization** [IN PROGRESS - 2/4 complete]
- - **Goal**: Extend uniform parameter system to ChromaAberrationEffect, GaussianBlurEffect, DistortEffect, SolarizeEffect
- - **Pattern**: Follow FlashEffect implementation (UniformHelper, params struct, .seq syntax)
- - **Completed**: ChromaAberrationEffect (offset_scale, angle), GaussianBlurEffect (strength)
- - **Remaining**: DistortEffect, SolarizeEffect
- - **Priority**: Medium (quality-of-life improvement for artists)
- - **Estimated Impact**: ~200-300 bytes per effect
-- [ ] **Task #52: Procedural SDF Font**: Minimal bezier/spline set for [A-Z, 0-9] and SDF rendering.
-- [ ] **Task #55: SDF Random Planes Intersection**: Implement `sdPolyhedron` (crystal/gem shapes) via plane intersection.
-- [ ] **Task #54: Tracy Integration**: Integrate Tracy debugger for performance profiling.
-- [ ] **Task #58: Advanced Shader Factorization**: Further factorize WGSL code into smaller, reusable snippets.
-- [ ] **Task #59: Comprehensive RNG Library**: Add WGSL snippets for float/vec2/vec3 noise (Perlin, Gyroid, etc.) and random number generators.
-- [ ] **Task #60: OOP Refactoring**: Investigate if more C++ code can be made object-oriented without size penalty (vs functional style).
-- [ ] **Task #61: GPU Procedural Generation**: Implement system to generate procedural data (textures, geometry) on GPU and read back to CPU.
-- [ ] **Task #62: Physics Engine Enhancements (PBD & Rotation)**:
- - [ ] **Task #62.1: Quaternion Rotation**: Implement quaternion-based rotation for `Object3D` and incorporate angular momentum into physics.
- - [ ] **Task #62.2: Position Based Dynamics (PBD)**: Refactor solver to re-evaluate velocity after resolving all collisions and constraints.
-- [ ] **Task #63: Refactor large files**: Split `src/gpu/gpu.cc`, `src/3d/visual_debug.cc` and `src/gpu/effect.cc` into sub-functionalities. (`src/3d/renderer.cc` was also over 500 lines and was taken care of in the past)
+## Phase 2: Size Optimization (Final Goal)
-### Performance Optimization
-- [ ] **Task #70: SIMD x86_64 Implementation**: Implement critical functions using intrinsics for x86_64 platforms.
- - **Goal**: Optimize hot paths for audio and procedural generation.
- - **Scope**:
- - IDCT/FDCT transforms
- - Audio mixing and voice synthesis
- - CPU-side procedural texture/geometry generation
- - **Constraint**: Non-critical; fallback to generic C++ must be maintained.
- - **Priority**: Very Low
+- [ ] **Task #34: Full STL Removal** - Replace remaining `std::vector`, `std::map`, `std::string` with custom containers
+- [ ] **Task #22: Windows Native Platform** - Replace GLFW with Win32 API
+- [ ] **Task #28: Spectrogram Quantization** - Research optimal frequency distribution
+- [ ] **Task #35: CRT Replacement** - Investigation and implementation of CRT-free entry
---
-## Future Goals \ No newline at end of file
+For untriaged future goals and ideas, see `doc/BACKLOG.md`.
diff --git a/assets/demo.seq b/assets/demo.seq
index 0dfb108..a8717f3 100644
--- a/assets/demo.seq
+++ b/assets/demo.seq
@@ -30,9 +30,10 @@ SEQUENCE 0b 0
EFFECT + VignetteEffect 0 6 radius=0.6 softness=0.1
SEQUENCE 2.0 0
- EFFECT + CircleMaskEffect 0.0 2.0 0.35 # Priority 0 (mask generator, radius 0.35)
- EFFECT + RotatingCubeEffect 0.0 2.0 # Priority 1 (renders inside circle)
- EFFECT + GaussianBlurEffect 0.0 2.0 strength=2.0 # Priority 2 (post-process blur)
+ EFFECT + CircleMaskEffect 0.0 4.0 0.50 # Priority 0 mask generator
+ EFFECT + RotatingCubeEffect 0.0 4.0 # Priority 1 (renders inside circle)
+ EFFECT + GaussianBlurEffect 1.0 2.0 strength=1.0
+ EFFECT + GaussianBlurEffect 3.0 4.0 strength=2.0
SEQUENCE 4b 0
EFFECT - FlashCubeEffect 0.1 3. # Priority -1
diff --git a/assets/final/demo_assets.txt b/assets/final/demo_assets.txt
index 05eee17..96f86f9 100644
--- a/assets/final/demo_assets.txt
+++ b/assets/final/demo_assets.txt
@@ -1,22 +1,22 @@
# Asset Name, Compression Type, Filename/Placeholder, Description
# --- Drum & Percussion Samples ---
-KICK_1, NONE, KICK_606.spec, "606 Kick"
-KICK_2, NONE, KICK_90S_2.spec, "90s Kick"
-SNARE_1, NONE, SNARE_808.spec, "808 Snare"
-SNARE_2, NONE, SNARE_909_TUNE_8.spec, "909 Snare"
-SNARE_3, NONE, SNARE_BLUE_ROOM.spec, "Snare Blue Room"
-HIHAT_1, NONE, HIHAT_CLOSED_DMX.spec, "DMX Closed Hi-hat"
-HIHAT_2, NONE, HIHAT_CLOSED_DUFF.spec, "Duff Closed Hi-hat"
-HIHAT_3, NONE, HIHAT_CLOSED_ER_1.spec, "ER-1 Closed Hi-hat"
-CRASH_1, NONE, CRASH_DMX.spec, "DMX Crash"
-RIDE_1, NONE, RIDE_CUP_1.spec, "Ride Cymbal"
-SPLASH_1, NONE, SPLASH_GROUNDED.spec, "Splash Cymbal"
+KICK_1, NONE, music/KICK_606.spec, "606 Kick"
+KICK_2, NONE, music/KICK_90S_2.spec, "90s Kick"
+SNARE_1, NONE, music/SNARE_808.spec, "808 Snare"
+SNARE_2, NONE, music/SNARE_909_TUNE_8.spec, "909 Snare"
+SNARE_3, NONE, music/SNARE_BLUE_ROOM.spec, "Snare Blue Room"
+HIHAT_1, NONE, music/HIHAT_CLOSED_DMX.spec, "DMX Closed Hi-hat"
+HIHAT_2, NONE, music/HIHAT_CLOSED_DUFF.spec, "Duff Closed Hi-hat"
+HIHAT_3, NONE, music/HIHAT_CLOSED_ER_1.spec, "ER-1 Closed Hi-hat"
+CRASH_1, NONE, music/CRASH_DMX.spec, "DMX Crash"
+RIDE_1, NONE, music/RIDE_CUP_1.spec, "Ride Cymbal"
+SPLASH_1, NONE, music/SPLASH_GROUNDED.spec, "Splash Cymbal"
# --- Melodic Samples ---
-BASS_1, NONE, BASS_GUITAR_FEEL.spec, "Bass Guitar"
-BASS_2, NONE, BASS_SYNTH_1.spec, "Synth Bass 1"
-BASS_3, NONE, SYNTH_BASS_DISTORT.spec, "Distorted Synth Bass"
+BASS_1, NONE, music/BASS_GUITAR_FEEL.spec, "Bass Guitar"
+BASS_2, NONE, music/BASS_SYNTH_1.spec, "Synth Bass 1"
+BASS_3, NONE, music/SYNTH_BASS_DISTORT.spec, "Distorted Synth Bass"
# --- Procedural Textures ---
NOISE_TEX, PROC(gen_noise, 1234, 16), _, "Procedural noise texture for bump mapping"
@@ -52,6 +52,11 @@ SHADER_MESH, NONE, shaders/mesh_render.wgsl, "Mesh Rasterization Shader"
MESH_CUBE, NONE, test_mesh.obj, "A simple cube mesh"
DODECAHEDRON, NONE, dodecahedron.obj, "A dodecahedron mesh"
SHADER_VIGNETTE, NONE, shaders/vignette.wgsl, "Vignette Shader"
+SHADER_COMPUTE_GEN_NOISE, NONE, shaders/compute/gen_noise.wgsl, "GPU Noise Compute Shader"
+SHADER_COMPUTE_GEN_PERLIN, NONE, shaders/compute/gen_perlin.wgsl, "GPU Perlin Noise Compute Shader"
+SHADER_COMPUTE_GEN_GRID, NONE, shaders/compute/gen_grid.wgsl, "GPU Grid Compute Shader"
+SHADER_COMPUTE_GEN_BLEND, NONE, shaders/compute/gen_blend.wgsl, "GPU Blend Composite Shader"
+SHADER_COMPUTE_GEN_MASK, NONE, shaders/compute/gen_mask.wgsl, "GPU Mask Composite Shader"
CIRCLE_MASK_COMPUTE_SHADER, NONE, shaders/circle_mask_compute.wgsl, "Circle mask compute shader"
CIRCLE_MASK_RENDER_SHADER, NONE, shaders/circle_mask_render.wgsl, "Circle mask render shader"
MASKED_CUBE_SHADER, NONE, shaders/masked_cube.wgsl, "Masked cube shader"
diff --git a/assets/final/BASS_GUITAR_FEEL.spec b/assets/final/music/BASS_GUITAR_FEEL.spec
index 54f49a6..54f49a6 100644
--- a/assets/final/BASS_GUITAR_FEEL.spec
+++ b/assets/final/music/BASS_GUITAR_FEEL.spec
Binary files differ
diff --git a/assets/final/BASS_SYNTH_1.spec b/assets/final/music/BASS_SYNTH_1.spec
index 33bc0a0..33bc0a0 100644
--- a/assets/final/BASS_SYNTH_1.spec
+++ b/assets/final/music/BASS_SYNTH_1.spec
Binary files differ
diff --git a/assets/final/CRASH_DMX.spec b/assets/final/music/CRASH_DMX.spec
index 45ee52d..45ee52d 100644
--- a/assets/final/CRASH_DMX.spec
+++ b/assets/final/music/CRASH_DMX.spec
Binary files differ
diff --git a/assets/final/HIHAT_CLOSED_DMX.spec b/assets/final/music/HIHAT_CLOSED_DMX.spec
index 8fce1d2..8fce1d2 100644
--- a/assets/final/HIHAT_CLOSED_DMX.spec
+++ b/assets/final/music/HIHAT_CLOSED_DMX.spec
Binary files differ
diff --git a/assets/final/HIHAT_CLOSED_DUFF.spec b/assets/final/music/HIHAT_CLOSED_DUFF.spec
index f738271..f738271 100644
--- a/assets/final/HIHAT_CLOSED_DUFF.spec
+++ b/assets/final/music/HIHAT_CLOSED_DUFF.spec
Binary files differ
diff --git a/assets/final/HIHAT_CLOSED_ER_1.spec b/assets/final/music/HIHAT_CLOSED_ER_1.spec
index bb03f5e..bb03f5e 100644
--- a/assets/final/HIHAT_CLOSED_ER_1.spec
+++ b/assets/final/music/HIHAT_CLOSED_ER_1.spec
Binary files differ
diff --git a/assets/final/KICK_606.spec b/assets/final/music/KICK_606.spec
index 10af84a..10af84a 100644
--- a/assets/final/KICK_606.spec
+++ b/assets/final/music/KICK_606.spec
Binary files differ
diff --git a/assets/final/KICK_90S_2.spec b/assets/final/music/KICK_90S_2.spec
index 126409e..126409e 100644
--- a/assets/final/KICK_90S_2.spec
+++ b/assets/final/music/KICK_90S_2.spec
Binary files differ
diff --git a/assets/final/RIDE_CUP_1.spec b/assets/final/music/RIDE_CUP_1.spec
index 78867c1..78867c1 100644
--- a/assets/final/RIDE_CUP_1.spec
+++ b/assets/final/music/RIDE_CUP_1.spec
Binary files differ
diff --git a/assets/final/SNARE_808.spec b/assets/final/music/SNARE_808.spec
index 2923c3c..2923c3c 100644
--- a/assets/final/SNARE_808.spec
+++ b/assets/final/music/SNARE_808.spec
Binary files differ
diff --git a/assets/final/SNARE_909_TUNE_8.spec b/assets/final/music/SNARE_909_TUNE_8.spec
index 4693c05..4693c05 100644
--- a/assets/final/SNARE_909_TUNE_8.spec
+++ b/assets/final/music/SNARE_909_TUNE_8.spec
Binary files differ
diff --git a/assets/final/SNARE_BLUE_ROOM.spec b/assets/final/music/SNARE_BLUE_ROOM.spec
index b24baa7..b24baa7 100644
--- a/assets/final/SNARE_BLUE_ROOM.spec
+++ b/assets/final/music/SNARE_BLUE_ROOM.spec
Binary files differ
diff --git a/assets/final/SPLASH_GROUNDED.spec b/assets/final/music/SPLASH_GROUNDED.spec
index a919be4..a919be4 100644
--- a/assets/final/SPLASH_GROUNDED.spec
+++ b/assets/final/music/SPLASH_GROUNDED.spec
Binary files differ
diff --git a/assets/final/SYNTH_BASS_DISTORT.spec b/assets/final/music/SYNTH_BASS_DISTORT.spec
index 33bc0a0..33bc0a0 100644
--- a/assets/final/SYNTH_BASS_DISTORT.spec
+++ b/assets/final/music/SYNTH_BASS_DISTORT.spec
Binary files differ
diff --git a/assets/final/shaders/chroma_aberration.wgsl b/assets/final/shaders/chroma_aberration.wgsl
index bad3624..6c942b7 100644
--- a/assets/final/shaders/chroma_aberration.wgsl
+++ b/assets/final/shaders/chroma_aberration.wgsl
@@ -1,22 +1,14 @@
@group(0) @binding(0) var smplr: sampler;
@group(0) @binding(1) var txt: texture_2d<f32>;
-struct CommonUniforms {
- resolution: vec2<f32>,
- _pad0: f32,
- _pad1: f32,
- aspect_ratio: f32,
- time: f32,
- beat: f32,
- audio_intensity: f32,
-};
-struct EffectParams {
+#include "common_uniforms"
+struct ChromaAberrationParams {
offset_scale: f32,
angle: f32,
};
@group(0) @binding(2) var<uniform> uniforms: CommonUniforms;
-@group(0) @binding(3) var<uniform> params: EffectParams;
+@group(0) @binding(3) var<uniform> params: ChromaAberrationParams;
@vertex fn vs_main(@builtin(vertex_index) i: u32) -> @builtin(position) vec4<f32> {
var pos = array<vec2<f32>, 3>(
diff --git a/assets/final/shaders/circle_mask_compute.wgsl b/assets/final/shaders/circle_mask_compute.wgsl
index 1ed6c1e..484d3dd 100644
--- a/assets/final/shaders/circle_mask_compute.wgsl
+++ b/assets/final/shaders/circle_mask_compute.wgsl
@@ -1,16 +1,8 @@
// Circle mask compute shader
// Generates a circular mask (1.0 inside, 0.0 outside)
-struct CommonUniforms {
- resolution: vec2<f32>,
- _pad0: f32,
- _pad1: f32,
- aspect_ratio: f32,
- time: f32,
- beat: f32,
- audio_intensity: f32,
-};
-struct EffectParams {
+#include "common_uniforms"
+struct CircleMaskParams {
radius: f32,
_pad0: f32,
_pad1: f32,
@@ -18,7 +10,7 @@ struct EffectParams {
};
@group(0) @binding(0) var<uniform> uniforms: CommonUniforms;
-@group(0) @binding(1) var<uniform> params: EffectParams;
+@group(0) @binding(1) var<uniform> params: CircleMaskParams;
struct VSOutput {
@builtin(position) position: vec4<f32>,
diff --git a/assets/final/shaders/circle_mask_render.wgsl b/assets/final/shaders/circle_mask_render.wgsl
index ce98f9c..cfa002e 100644
--- a/assets/final/shaders/circle_mask_render.wgsl
+++ b/assets/final/shaders/circle_mask_render.wgsl
@@ -4,15 +4,7 @@
@group(0) @binding(0) var mask_tex: texture_2d<f32>;
@group(0) @binding(1) var mask_sampler: sampler;
-struct CommonUniforms {
- resolution: vec2<f32>,
- _pad0: f32,
- _pad1: f32,
- aspect_ratio: f32,
- time: f32,
- beat: f32,
- audio_intensity: f32,
-};
+#include "common_uniforms"
@group(0) @binding(2) var<uniform> uniforms: CommonUniforms;
diff --git a/assets/final/shaders/compute/gen_blend.wgsl b/assets/final/shaders/compute/gen_blend.wgsl
new file mode 100644
index 0000000..9fc9e1e
--- /dev/null
+++ b/assets/final/shaders/compute/gen_blend.wgsl
@@ -0,0 +1,29 @@
+// This file is part of the 64k demo project.
+// GPU composite shader: Blend two textures.
+
+struct BlendParams {
+ width: u32,
+ height: u32,
+ blend_factor: f32,
+ _pad0: f32,
+}
+
+@group(0) @binding(0) var output_tex: texture_storage_2d<rgba8unorm, write>;
+@group(0) @binding(1) var<uniform> params: BlendParams;
+@group(0) @binding(2) var input_a: texture_2d<f32>;
+@group(0) @binding(3) var input_b: texture_2d<f32>;
+@group(0) @binding(4) var tex_sampler: sampler;
+
+@compute @workgroup_size(8, 8, 1)
+fn main(@builtin(global_invocation_id) id: vec3<u32>) {
+ if (id.x >= params.width || id.y >= params.height) { return; }
+
+ let uv = vec2<f32>(f32(id.x) / f32(params.width),
+ f32(id.y) / f32(params.height));
+
+ let color_a = textureSampleLevel(input_a, tex_sampler, uv, 0.0);
+ let color_b = textureSampleLevel(input_b, tex_sampler, uv, 0.0);
+ let blended = mix(color_a, color_b, params.blend_factor);
+
+ textureStore(output_tex, id.xy, blended);
+}
diff --git a/assets/final/shaders/compute/gen_grid.wgsl b/assets/final/shaders/compute/gen_grid.wgsl
new file mode 100644
index 0000000..cc5e189
--- /dev/null
+++ b/assets/final/shaders/compute/gen_grid.wgsl
@@ -0,0 +1,24 @@
+// GPU procedural grid pattern generator.
+// Simple grid lines with configurable spacing and thickness.
+
+struct GridParams {
+ width: u32,
+ height: u32,
+ grid_size: u32,
+ thickness: u32,
+}
+
+@group(0) @binding(0) var output_tex: texture_storage_2d<rgba8unorm, write>;
+@group(0) @binding(1) var<uniform> params: GridParams;
+
+@compute @workgroup_size(8, 8, 1)
+fn main(@builtin(global_invocation_id) id: vec3<u32>) {
+ if (id.x >= params.width || id.y >= params.height) { return; }
+
+ let on_line = (id.x % params.grid_size) < params.thickness ||
+ (id.y % params.grid_size) < params.thickness;
+
+ let val = select(0.0, 1.0, on_line);
+
+ textureStore(output_tex, id.xy, vec4<f32>(val, val, val, 1.0));
+}
diff --git a/assets/final/shaders/compute/gen_mask.wgsl b/assets/final/shaders/compute/gen_mask.wgsl
new file mode 100644
index 0000000..1ce9f52
--- /dev/null
+++ b/assets/final/shaders/compute/gen_mask.wgsl
@@ -0,0 +1,27 @@
+// This file is part of the 64k demo project.
+// GPU composite shader: Multiply texture A by texture B (masking).
+
+struct MaskParams {
+ width: u32,
+ height: u32,
+}
+
+@group(0) @binding(0) var output_tex: texture_storage_2d<rgba8unorm, write>;
+@group(0) @binding(1) var<uniform> params: MaskParams;
+@group(0) @binding(2) var input_a: texture_2d<f32>;
+@group(0) @binding(3) var input_b: texture_2d<f32>;
+@group(0) @binding(4) var tex_sampler: sampler;
+
+@compute @workgroup_size(8, 8, 1)
+fn main(@builtin(global_invocation_id) id: vec3<u32>) {
+ if (id.x >= params.width || id.y >= params.height) { return; }
+
+ let uv = vec2<f32>(f32(id.x) / f32(params.width),
+ f32(id.y) / f32(params.height));
+
+ let color_a = textureSampleLevel(input_a, tex_sampler, uv, 0.0);
+ let mask_b = textureSampleLevel(input_b, tex_sampler, uv, 0.0);
+ let masked = color_a * mask_b;
+
+ textureStore(output_tex, id.xy, masked);
+}
diff --git a/assets/final/shaders/compute/gen_noise.wgsl b/assets/final/shaders/compute/gen_noise.wgsl
new file mode 100644
index 0000000..5c0babd
--- /dev/null
+++ b/assets/final/shaders/compute/gen_noise.wgsl
@@ -0,0 +1,26 @@
+// GPU procedural noise texture generator.
+// Uses compute shader for parallel texture generation.
+
+#include "math/noise"
+
+struct NoiseParams {
+ width: u32,
+ height: u32,
+ seed: f32,
+ frequency: f32,
+}
+
+@group(0) @binding(0) var output_tex: texture_storage_2d<rgba8unorm, write>;
+@group(0) @binding(1) var<uniform> params: NoiseParams;
+
+@compute @workgroup_size(8, 8, 1)
+fn main(@builtin(global_invocation_id) id: vec3<u32>) {
+ if (id.x >= params.width || id.y >= params.height) { return; }
+
+ let uv = vec2<f32>(f32(id.x) / f32(params.width),
+ f32(id.y) / f32(params.height));
+ let p = uv * params.frequency + params.seed;
+ let noise = noise_2d(p);
+
+ textureStore(output_tex, id.xy, vec4<f32>(noise, noise, noise, 1.0));
+}
diff --git a/assets/final/shaders/compute/gen_perlin.wgsl b/assets/final/shaders/compute/gen_perlin.wgsl
new file mode 100644
index 0000000..73816d6
--- /dev/null
+++ b/assets/final/shaders/compute/gen_perlin.wgsl
@@ -0,0 +1,44 @@
+// GPU procedural Perlin noise texture generator.
+// Fractional Brownian Motion using value noise.
+
+#include "math/noise"
+
+struct PerlinParams {
+ width: u32,
+ height: u32,
+ seed: f32,
+ frequency: f32,
+ amplitude: f32,
+ amplitude_decay: f32,
+ octaves: u32,
+ _pad0: f32, // Padding for alignment
+}
+
+@group(0) @binding(0) var output_tex: texture_storage_2d<rgba8unorm, write>;
+@group(0) @binding(1) var<uniform> params: PerlinParams;
+
+@compute @workgroup_size(8, 8, 1)
+fn main(@builtin(global_invocation_id) id: vec3<u32>) {
+ if (id.x >= params.width || id.y >= params.height) { return; }
+
+ let uv = vec2<f32>(f32(id.x) / f32(params.width),
+ f32(id.y) / f32(params.height));
+
+ var value = 0.0;
+ var amplitude = params.amplitude;
+ var frequency = params.frequency;
+ var total_amp = 0.0;
+
+ for (var o: u32 = 0u; o < params.octaves; o++) {
+ let p = uv * frequency + params.seed;
+ value += noise_2d(p) * amplitude;
+ total_amp += amplitude;
+ frequency *= 2.0;
+ amplitude *= params.amplitude_decay;
+ }
+
+ value /= total_amp;
+ let clamped = clamp(value, 0.0, 1.0);
+
+ textureStore(output_tex, id.xy, vec4<f32>(clamped, clamped, clamped, 1.0));
+}
diff --git a/assets/final/shaders/distort.wgsl b/assets/final/shaders/distort.wgsl
index cca01c4..5d35129 100644
--- a/assets/final/shaders/distort.wgsl
+++ b/assets/final/shaders/distort.wgsl
@@ -1,15 +1,15 @@
@group(0) @binding(0) var smplr: sampler;
@group(0) @binding(1) var txt: texture_2d<f32>;
-struct CommonUniforms {
- resolution: vec2<f32>,
- aspect_ratio: f32,
- time: f32,
- beat: f32,
- audio_intensity: f32,
+#include "common_uniforms"
+
+struct DistortParams {
+ strength: f32,
+ speed: f32,
};
@group(0) @binding(2) var<uniform> uniforms: CommonUniforms;
+@group(0) @binding(3) var<uniform> params: DistortParams;
@vertex fn vs_main(@builtin(vertex_index) i: u32) -> @builtin(position) vec4<f32> {
var pos = array<vec2<f32>, 3>(
@@ -22,6 +22,6 @@ struct CommonUniforms {
@fragment fn fs_main(@builtin(position) p: vec4<f32>) -> @location(0) vec4<f32> {
let uv = p.xy / uniforms.resolution;
- let dist = 0.1 * uniforms.audio_intensity * sin(uv.y * 20.0 + uniforms.time * 5.0);
+ let dist = params.strength * uniforms.audio_intensity * sin(uv.y * 20.0 + uniforms.time * params.speed * 5.0);
return textureSample(txt, smplr, uv + vec2<f32>(dist, 0.0));
}
diff --git a/assets/final/shaders/ellipse.wgsl b/assets/final/shaders/ellipse.wgsl
index 9c6b0d9..05dfcfc 100644
--- a/assets/final/shaders/ellipse.wgsl
+++ b/assets/final/shaders/ellipse.wgsl
@@ -1,12 +1,4 @@
-struct CommonUniforms {
- resolution: vec2<f32>,
- _pad0: f32,
- _pad1: f32,
- aspect_ratio: f32,
- time: f32,
- beat: f32,
- audio_intensity: f32,
-};
+#include "common_uniforms"
@group(0) @binding(0) var<uniform> uniforms: CommonUniforms;
diff --git a/assets/final/shaders/gaussian_blur.wgsl b/assets/final/shaders/gaussian_blur.wgsl
index 3b87b10..02156f7 100644
--- a/assets/final/shaders/gaussian_blur.wgsl
+++ b/assets/final/shaders/gaussian_blur.wgsl
@@ -1,22 +1,14 @@
@group(0) @binding(0) var smplr: sampler;
@group(0) @binding(1) var txt: texture_2d<f32>;
-struct CommonUniforms {
- resolution: vec2<f32>,
- _pad0: f32,
- _pad1: f32,
- aspect_ratio: f32,
- time: f32,
- beat: f32,
- audio_intensity: f32,
-};
-struct EffectParams {
+#include "common_uniforms"
+struct GaussianBlurParams {
strength: f32,
_pad: f32,
};
@group(0) @binding(2) var<uniform> uniforms: CommonUniforms;
-@group(0) @binding(3) var<uniform> params: EffectParams;
+@group(0) @binding(3) var<uniform> params: GaussianBlurParams;
@vertex fn vs_main(@builtin(vertex_index) i: u32) -> @builtin(position) vec4<f32> {
var pos = array<vec2<f32>, 3>(
diff --git a/assets/final/shaders/main_shader.wgsl b/assets/final/shaders/main_shader.wgsl
index 7155a6d..ab0278c 100644
--- a/assets/final/shaders/main_shader.wgsl
+++ b/assets/final/shaders/main_shader.wgsl
@@ -1,12 +1,4 @@
-struct CommonUniforms {
- resolution: vec2<f32>,
- _pad0: f32,
- _pad1: f32,
- aspect_ratio: f32,
- time: f32,
- beat: f32,
- audio_intensity: f32,
-};
+#include "common_uniforms"
@group(0) @binding(0) var<uniform> uniforms: CommonUniforms;
diff --git a/assets/final/shaders/particle_compute.wgsl b/assets/final/shaders/particle_compute.wgsl
index 38a95e1..ae513c8 100644
--- a/assets/final/shaders/particle_compute.wgsl
+++ b/assets/final/shaders/particle_compute.wgsl
@@ -5,15 +5,7 @@ struct Particle {
color: vec4<f32>,
};
-struct CommonUniforms {
- resolution: vec2<f32>,
- _pad0: f32,
- _pad1: f32,
- aspect_ratio: f32,
- time: f32,
- beat: f32,
- audio_intensity: f32,
-};
+#include "common_uniforms"
@group(0) @binding(0) var<storage, read_write> particles: array<Particle>;
@group(0) @binding(1) var<uniform> uniforms: CommonUniforms;
diff --git a/assets/final/shaders/particle_render.wgsl b/assets/final/shaders/particle_render.wgsl
index 9030a3a..6a2b636 100644
--- a/assets/final/shaders/particle_render.wgsl
+++ b/assets/final/shaders/particle_render.wgsl
@@ -5,15 +5,7 @@ struct Particle {
color: vec4<f32>,
};
-struct CommonUniforms {
- resolution: vec2<f32>,
- _pad0: f32,
- _pad1: f32,
- aspect_ratio: f32,
- time: f32,
- beat: f32,
- audio_intensity: f32,
-};
+#include "common_uniforms"
@group(0) @binding(0) var<storage, read> particles: array<Particle>;
@group(0) @binding(1) var<uniform> uniforms: CommonUniforms;
diff --git a/assets/final/shaders/particle_spray_compute.wgsl b/assets/final/shaders/particle_spray_compute.wgsl
index b165971..a4041f2 100644
--- a/assets/final/shaders/particle_spray_compute.wgsl
+++ b/assets/final/shaders/particle_spray_compute.wgsl
@@ -5,15 +5,7 @@ struct Particle {
color: vec4<f32>,
};
-struct CommonUniforms {
- resolution: vec2<f32>,
- _pad0: f32,
- _pad1: f32,
- aspect_ratio: f32,
- time: f32,
- beat: f32,
- audio_intensity: f32,
-};
+#include "common_uniforms"
@group(0) @binding(0) var<storage, read_write> particles: array<Particle>;
@group(0) @binding(1) var<uniform> uniforms: CommonUniforms;
diff --git a/assets/final/shaders/passthrough.wgsl b/assets/final/shaders/passthrough.wgsl
index dfdacf4..266e231 100644
--- a/assets/final/shaders/passthrough.wgsl
+++ b/assets/final/shaders/passthrough.wgsl
@@ -1,15 +1,7 @@
@group(0) @binding(0) var smplr: sampler;
@group(0) @binding(1) var txt: texture_2d<f32>;
-struct CommonUniforms {
- resolution: vec2<f32>,
- _pad0: f32,
- _pad1: f32,
- aspect_ratio: f32,
- time: f32,
- beat: f32,
- audio_intensity: f32,
-};
+#include "common_uniforms"
@group(0) @binding(2) var<uniform> uniforms: CommonUniforms;
@vertex fn vs_main(@builtin(vertex_index) i: u32) -> @builtin(position) vec4<f32> {
diff --git a/assets/final/shaders/solarize.wgsl b/assets/final/shaders/solarize.wgsl
index 645fb9a..de15dfc 100644
--- a/assets/final/shaders/solarize.wgsl
+++ b/assets/final/shaders/solarize.wgsl
@@ -1,15 +1,7 @@
@group(0) @binding(0) var smplr: sampler;
@group(0) @binding(1) var txt: texture_2d<f32>;
-struct CommonUniforms {
- resolution: vec2<f32>,
- _pad0: f32,
- _pad1: f32,
- aspect_ratio: f32,
- time: f32,
- beat: f32,
- audio_intensity: f32,
-};
+#include "common_uniforms"
@group(0) @binding(2) var<uniform> uniforms: CommonUniforms;
diff --git a/assets/final/shaders/vignette.wgsl b/assets/final/shaders/vignette.wgsl
index 4b096d7..b129883 100644
--- a/assets/final/shaders/vignette.wgsl
+++ b/assets/final/shaders/vignette.wgsl
@@ -1,20 +1,14 @@
@group(0) @binding(0) var input_sampler: sampler;
@group(0) @binding(1) var input_tex: texture_2d<f32>;
-struct CommonUniforms {
- resolution: vec2<f32>,
- _pad0: f32,
- _pad1: f32,
- aspect_ratio: f32,
- time: f32,
- beat: f32,
- audio_intensity: f32,
-};struct EffectParams {
+#include "common_uniforms"
+
+struct VignetteParams {
radius: f32,
softness: f32,
};
@group(0) @binding(2) var<uniform> common_uniforms: CommonUniforms;
-@group(0) @binding(3) var<uniform> params: EffectParams;
+@group(0) @binding(3) var<uniform> params: VignetteParams;
@vertex
fn vs_main(@builtin(vertex_index) vertex_idx: u32) -> @builtin(position) vec4<f32> {
diff --git a/assets/final/test_demo_assets.txt b/assets/final/test_demo_assets.txt
index d679237..dec8625 100644
--- a/assets/final/test_demo_assets.txt
+++ b/assets/final/test_demo_assets.txt
@@ -1,3 +1,3 @@
-KICK_1, NONE, KICK_606.spec, "606 Kick"
-SNARE_1, NONE, SNARE_808.spec, "808 Snare"
-CRASH_1, NONE, CRASH_DMX.spec, "DMX Crash"
+KICK_1, NONE, music/KICK_606.spec, "606 Kick"
+SNARE_1, NONE, music/SNARE_808.spec, "808 Snare"
+CRASH_1, NONE, music/CRASH_DMX.spec, "DMX Crash"
diff --git a/doc/ARCHITECTURE.md b/doc/ARCHITECTURE.md
new file mode 100644
index 0000000..1a32300
--- /dev/null
+++ b/doc/ARCHITECTURE.md
@@ -0,0 +1,60 @@
+# Architectural Overview
+
+Detailed system architecture for the 64k demo project.
+
+---
+
+## Hybrid 3D Renderer
+
+**Core Idea**: Uses standard rasterization to draw proxy hulls (boxes), then raymarches inside the fragment shader to find the exact SDF surface.
+
+**Transforms**: Uses `inv_model` matrices to perform all raymarching in local object space, handling rotation and non-uniform scaling correctly.
+
+**Shadows**: Instance-based shadow casting with self-shadowing prevention (`skip_idx`).
+
+---
+
+## Sequence & Effect System
+
+**Effect**: Abstract base for visual elements. Supports `compute` and `render` phases.
+
+**Sequence**: Timeline of effects with start/end times.
+
+**MainSequence**: Top-level coordinator and framebuffer manager.
+
+**seq_compiler**: Transpiles `assets/demo.seq` into C++ `timeline.cc`.
+
+---
+
+## Asset & Build System
+
+**asset_packer**: Embeds binary assets (like `.spec` files) into C++ arrays.
+
+**Runtime Manager**: O(1) retrieval with lazy procedural generation support.
+
+**Automation**: `gen_assets.sh`, `build_win.sh`, and `check_all.sh` for multi-platform validation.
+
+---
+
+## Audio Engine
+
+### Synthesis
+Real-time additive synthesis from spectrograms via FFT-based IDCT (O(N log N)). Stereo output (32kHz, 16-bit, interleaved L/R). Uses orthonormal DCT-II/DCT-III transforms with Numerical Recipes reordering method.
+
+### Variable Tempo
+Music time abstraction with configurable tempo_scale. Tempo changes don't affect pitch.
+
+### Event-Based Tracker
+Individual TrackerEvents trigger as separate voices with dynamic beat calculation. Notes within patterns respect tempo scaling.
+
+### Backend Abstraction
+`AudioBackend` interface with `MiniaudioBackend` (production), `MockAudioBackend` (testing), and `WavDumpBackend` (offline rendering).
+
+### Dynamic Updates
+Double-buffered spectrograms for live thread-safe updates.
+
+### Procedural Library
+Melodies and spectral filters (noise, comb) generated at runtime.
+
+### Pattern System
+TrackerPatterns contain lists of TrackerEvents (beat, sample_id, volume, pan). Events trigger individually based on elapsed music time.
diff --git a/doc/BACKLOG.md b/doc/BACKLOG.md
new file mode 100644
index 0000000..403ecc9
--- /dev/null
+++ b/doc/BACKLOG.md
@@ -0,0 +1,197 @@
+# Future Goals & Ideas (Untriaged)
+
+This file contains low-priority tasks and ideas that have not yet been triaged for active development.
+
+---
+
+## Audio Tools
+
+### Task #64: specplay Enhancements
+Extend audio analysis tool with new features:
+- **Priority 1**: Spectral visualization (ASCII art), waveform display, frequency analysis, dynamic range
+- **Priority 2**: Diff mode (compare .wav vs .spec), batch mode (CSV report, find clipping)
+- **Priority 3**: WAV export (.spec → .wav), normalization
+- **Priority 4**: Spectral envelope, harmonic analysis, onset detection
+- **Priority 5**: Interactive mode (seek, loop, volume control)
+
+See `tools/specplay_README.md` for detailed feature list.
+
+### Task #65: Data-Driven Tempo Control
+Move tempo variation from code to data files.
+
+**Current**: `g_tempo_scale` is hardcoded in `main.cc` with manual animation curves
+
+**Goal**: Define tempo curves in `.seq` or `.track` files
+
+**Approach A**: Add TEMPO directive to `.seq` format
+- Example: `TEMPO 0.0 1.0`, `TEMPO 10.0 2.0`, `TEMPO 20.0 1.0`
+- seq_compiler generates tempo curve array in timeline.cc
+
+**Approach B**: Add tempo column to music.track
+- Each pattern trigger can specify tempo_scale override
+- tracker_compiler generates tempo events in music_data.cc
+
+**Benefits**: Non-programmers can edit tempo, easier iteration, version control friendly
+
+**Priority**: Low (current approach works)
+
+### Task #67: DCT/FFT Performance Benchmarking
+Add timing measurements to audio tests.
+
+**Goal**: Compare performance of different DCT/IDCT implementations
+
+**Location**: Add timing code to `test_dct.cc` or `test_fft.cc`
+
+**Measurements**:
+- Reference IDCT/FDCT (naive O(N²))
+- FFT-based DCT/IDCT (current O(N log N))
+- Future x86_64 SIMD-optimized versions
+
+**Output Format**:
+- Average time per transform (microseconds)
+- Throughput (transforms per second)
+- Speedup factor vs reference
+
+**Test Sizes**: DCT_SIZE=512 (production), plus 128, 256, 1024 for scaling
+
+**Implementation**:
+- Use `std::chrono::high_resolution_clock`
+- Run 1000+ iterations to reduce noise
+- Report min/avg/max times
+- Guard with `#if !defined(STRIP_ALL)`
+
+**Priority**: Very Low (nice-to-have)
+
+### Task #69: Convert Audio Pipeline to Clipped Int16
+Use clipped int16 for all audio processing.
+
+**Current**: Float32 throughout (generation, mixing, synthesis, output)
+
+**Goal**: Convert to int16 for faster processing and reduced memory
+
+**Rationale**:
+- Simpler arithmetic (no float operations)
+- Smaller memory footprint (2 bytes vs 4 bytes)
+- Hardware-native format (most audio devices use int16)
+- Eliminates float→int16 conversion at output
+- Natural clipping behavior
+
+**Scope**:
+- Output path: Definitely convert (backends, WAV dump)
+- Synthesis: Consider keeping float32 for quality
+- Mixing: Could use int16 with overflow handling
+- Asset storage: Already int16 in .spec files
+
+**Implementation Phases**:
+1. **Phase 1: Output Only** (~50 lines) - Convert `synth_render()` output to int16
+2. **Phase 2: Mixing Stage** (~200 lines) - Convert voice mixing to int16 arithmetic
+3. **Phase 3: Full Pipeline** (~500+ lines) - Convert spectrograms to int16 storage
+
+**Trade-offs**:
+- Quality loss: 16-bit vs 32-bit float precision
+- Dynamic range: Limited to [-32768, 32767]
+- Clipping: Must handle overflow carefully
+- Code complexity: Saturation arithmetic
+
+**Testing Requirements**:
+- Verify no audible quality degradation
+- Ensure clipping behavior matches float version
+- Check mixing overflow doesn't cause artifacts
+- Validate WAV dumps bit-identical
+
+**Size Impact**:
+- Phase 1: Negligible (~50 bytes)
+- Phase 2: ~100-200 bytes
+- Phase 3: 50% memory, ~1-2KB code savings
+
+**Priority**: Low (final optimization only if 64k budget requires it)
+
+**Notes**: Quality must be validated - may not be worth trade-off
+
+---
+
+## Developer Tools
+
+### Task #66: External Asset Loading for Debugging
+mmap() asset files instead of embedded data.
+
+**Current**: All assets embedded in `assets_data.cc` (regenerate on every change)
+
+**Goal**: Load assets from external files in debug builds for faster iteration
+
+**Scope**: macOS only, non-STRIP_ALL builds only
+
+**Implementation**:
+- Add `DEMO_ENABLE_EXTERNAL_ASSETS` CMake option
+- Modify `GetAsset()` to check for external file first (e.g., `assets/final/<name>`)
+- Use `mmap()` to map file into memory
+- Fallback to embedded data if file not found
+
+**Benefits**: Edit shaders/assets without regenerating assets_data.cc (~10s rebuild)
+
+**Trade-offs**: Adds runtime file I/O, only useful during development
+
+**Priority**: Low (current workflow acceptable)
+
+---
+
+## Visual Effects
+
+### Task #73: Extend Shader Parametrization [IN PROGRESS - 2/4 complete]
+Extend uniform parameter system to remaining effects.
+
+**Goal**: Add parametrization to DistortEffect, SolarizeEffect
+
+**Pattern**: Follow FlashEffect implementation (UniformHelper, params struct, .seq syntax)
+
+**Completed**: ChromaAberrationEffect (offset_scale, angle), GaussianBlurEffect (strength)
+
+**Priority**: Medium (quality-of-life for artists)
+
+**Estimated Impact**: ~200-300 bytes per effect
+
+### Task #52: Procedural SDF Font
+Minimal bezier/spline set for [A-Z, 0-9] and SDF rendering.
+
+### Task #55: SDF Random Planes Intersection
+Implement `sdPolyhedron` (crystal/gem shapes) via plane intersection.
+
+### Task #54: Tracy Integration
+Integrate Tracy debugger for performance profiling.
+
+### Task #58: Advanced Shader Factorization
+Further factorize WGSL code into smaller, reusable snippets.
+
+### Task #59: Comprehensive RNG Library
+Add WGSL snippets for float/vec2/vec3 noise (Perlin, Gyroid, etc.) and random number generators.
+
+### Task #60: OOP Refactoring
+Investigate if more C++ code can be made object-oriented without size penalty (vs functional style).
+
+### Task #61: GPU Procedural Generation
+Implement system to generate procedural data (textures, geometry) on GPU and read back to CPU.
+
+### Task #62: Physics Engine Enhancements (PBD & Rotation)
+- **Task #62.1**: Quaternion rotation for `Object3D` with angular momentum
+- **Task #62.2**: Position Based Dynamics (PBD) - Re-evaluate velocity after resolving collisions/constraints
+
+### Task #63: Refactor Large Files
+Split `src/gpu/gpu.cc`, `src/3d/visual_debug.cc` and `src/gpu/effect.cc` into sub-functionalities.
+
+---
+
+## Performance Optimization
+
+### Task #70: SIMD x86_64 Implementation
+Implement critical functions using intrinsics for x86_64 platforms.
+
+**Goal**: Optimize hot paths for audio and procedural generation
+
+**Scope**:
+- IDCT/FDCT transforms
+- Audio mixing and voice synthesis
+- CPU-side procedural texture/geometry generation
+
+**Constraint**: Non-critical; fallback to generic C++ must be maintained
+
+**Priority**: Very Low
diff --git a/doc/CODING_STYLE.md b/doc/CODING_STYLE.md
new file mode 100644
index 0000000..533cffb
--- /dev/null
+++ b/doc/CODING_STYLE.md
@@ -0,0 +1,109 @@
+# Coding Style Examples
+
+Detailed examples for the project's C++ coding style.
+
+---
+
+## Core Rules Examples
+
+### Const Placement
+```cpp
+const T* name // Correct
+const T *name // Wrong
+```
+
+### Pre-Increment
+```cpp
+++x // Correct
+x++ // Wrong (except when postfix needed)
+```
+
+### Operator Spacing
+```cpp
+x = (a + b) * c; // Correct - spaces around all operators
+x=(a+b)*c; // Wrong - no spaces
+```
+
+### No Auto (except complex iterators)
+```cpp
+int count = get_count(); // Correct
+auto count = get_count(); // Wrong
+
+for (auto it = map.begin(); ...) // OK - complex iterator type
+```
+
+### No C++ Casts
+```cpp
+(int)value // Correct
+static_cast<int>(value) // Wrong
+```
+
+---
+
+## Preprocessor Style
+
+```cpp
+#if defined(MY_TAG)
+ // code here
+#endif /* defined(MY_TAG) */
+```
+
+Always use `defined()` and closing comment.
+
+---
+
+## Struct Initialization
+
+### Good
+```cpp
+const WGPUDescriptor desc = {
+ .format = g_format,
+ .dimension = WGPUTextureViewDimension_2D,
+};
+```
+
+### Bad
+```cpp
+WGPUDescriptor desc = {};
+desc.format = g_format;
+desc.dimension = WGPUTextureViewDimension_2D;
+```
+
+Use designated initializers, not field-by-field assignment.
+
+---
+
+## Class Keywords Indentation
+
+```cpp
+class MyClass {
+ public: // 1 space indent
+ void foo();
+
+ private: // 1 space indent
+ int field_;
+};
+```
+
+---
+
+## Comments
+
+### Function Comments
+```cpp
+// Initializes the audio engine with default settings.
+void audio_init() {
+ ...
+}
+```
+
+One-line comment for non-obvious functions.
+
+### File Headers
+```cpp
+// demo64k - 64 kilobyte demo
+// src/audio/synth.cc
+// Audio synthesis engine
+```
+
+Three-line header for all source files.
diff --git a/doc/COMPLETED.md b/doc/COMPLETED.md
index a3c173d..49cfbe9 100644
--- a/doc/COMPLETED.md
+++ b/doc/COMPLETED.md
@@ -29,7 +29,34 @@ Detailed historical documents have been moved to `doc/archive/` for reference:
Use `read @doc/archive/FILENAME.md` to access archived documents.
-## Recently Completed (February 8, 2026)
+## Recently Completed (February 9, 2026)
+
+- [x] **WGSL Uniform Buffer Validation & Consolidation (Task #75)**
+ - **Goal**: Standardize uniform buffer usage across all post-process effects and add validation tooling
+ - **Implementation**:
+ - Refactored `DistortEffect` and others to use `CommonPostProcessUniforms` (binding 2) + `EffectParams` (binding 3)
+ - Created `tools/validate_uniforms.py` to parse C++ and WGSL (including embedded strings) and verify size/alignment
+ - Added validation step to CMake build system
+ - Renamed generic `EffectParams` to specific names (`FadeParams`, `CircleMaskParams`, etc.) in WGSL and C++
+ - Added `doc/UNIFORM_BUFFER_GUIDELINES.md` and updated `CONTRIBUTING.md`
+ - **Result**: Consistent binding layout across all effects, automatic validation on build
+
+- [x] **Uniform Buffer Alignment (Task #74)**
+ - **Goal**: Fix WGSL struct alignment issues causing validation errors and crashes
+ - **Implementation**:
+ - `circle_mask_compute.wgsl`: Changed `_pad: vec3<f32>` to three `f32` fields for correct 16-byte alignment
+ - `fade_effect.cc`: Changed EffectParams padding from `vec3<f32>` to `_pad0/1/2: f32`
+ - `theme_modulation_effect.cc`: Same padding fix for EffectParams
+ - Fixed ODR violation in `demo_effects.h` (incomplete FadeEffect forward declaration)
+ - Renamed shadowing `uniforms_` members to `common_uniforms_`/`flash_uniforms_`
+ - **Result**: demo64k runs without crashes, 32/33 tests passing (97%), 0 WebGPU validation errors
+
+- [x] **Fix test_demo Black Screen**
+ - **Issue**: `test_demo` showed black screen because it failed to load its timeline sequence (`assets/test_demo.seq`)
+ - **Fix**: Added missing `LoadTimeline` call in `src/test_demo.cc`
+ - **Result**: `FlashEffect` and `PeakMeterEffect` now render correctly
+
+## Previously Completed (February 8, 2026)
- [x] **Shader Parametrization System (Task #73 Phase 0)** (February 8, 2026)
- **Goal**: Enable per-frame dynamic parameters for shaders and effects via uniform buffers and .seq syntax
diff --git a/doc/CONTRIBUTING.md b/doc/CONTRIBUTING.md
index 3a09dbc..de6378a 100644
--- a/doc/CONTRIBUTING.md
+++ b/doc/CONTRIBUTING.md
@@ -1,5 +1,7 @@
# Contributing Guidelines
+---
+
## Commit Policy
### Verify Before Committing
@@ -8,7 +10,6 @@
```bash
./scripts/check_all.sh
```
-Runs tests, builds tools, cross-compiles Windows.
**Manual:**
```bash
@@ -26,18 +27,9 @@ cd build && ctest --output-on-failure
cmake -S . -B build_debug_check -DDEMO_ENABLE_DEBUG_LOGS=ON
cmake --build build_debug_check -j4
```
-Must compile without errors.
**Debug macros** (`src/util/debug.h`):
-- `DEBUG_LOG_AUDIO`, `DEBUG_LOG_RING_BUFFER`, `DEBUG_LOG_TRACKER`
-- `DEBUG_LOG_SYNTH`, `DEBUG_LOG_3D`, `DEBUG_LOG_ASSETS`, `DEBUG_LOG_GPU`
-
-Example:
-```cpp
-#if defined(DEBUG_LOG_AUDIO)
- DEBUG_AUDIO("[CALLBACK #%d] frames=%d\n", ++count, frames);
-#endif
-```
+- `DEBUG_LOG_AUDIO`, `DEBUG_LOG_RING_BUFFER`, `DEBUG_LOG_TRACKER`, `DEBUG_LOG_SYNTH`, `DEBUG_LOG_3D`, `DEBUG_LOG_ASSETS`, `DEBUG_LOG_GPU`
### Code Formatting
```bash
@@ -50,6 +42,8 @@ Never format `third_party/`.
- 3-line header comment
- Max 500 lines (split if larger)
+---
+
## Coding Style
### Core Rules
@@ -61,36 +55,9 @@ Never format `third_party/`.
- No `auto` (except complex iterators)
- No C++ casts (`static_cast`, `reinterpret_cast`)
-### Preprocessor
-```cpp
-#if defined(MY_TAG)
- ...
-#endif /* defined(MY_TAG) */
-```
+See `doc/CODING_STYLE.md` for detailed examples.
-### Struct Initialization
-```cpp
-// Good
-const WGPUDescriptor desc = {
- .format = g_format,
- .dimension = WGPUTextureViewDimension_2D,
-};
-
-// Bad
-WGPUDescriptor desc = {};
-desc.format = g_format;
-desc.dimension = WGPUTextureViewDimension_2D;
-```
-
-### Class Keywords
-```cpp
- private: // 1 space indent
- int field_;
-```
-
-### Comments
-- 1-line comment for non-obvious functions
-- 3-line header for all source files
+---
## Development Protocols
@@ -170,4 +137,18 @@ After hierarchy changes (moving files, renaming), verify:
./scripts/gen_coverage_report.sh
```
-Update scripts with hardcoded paths.
+---
+
+## Uniform Buffer Checklist
+
+To ensure consistency and prevent alignment-related issues:
+
+1. **Define WGSL Structs:** Pay attention to type alignment (`f32`, `vec2`, `vec3`, `vec4`) and use explicit padding where necessary.
+2. **Mirror in C++:** Create corresponding C++ structs that mirror WGSL definitions.
+3. **`static_assert` for Size:** Every C++ struct must have a `static_assert` verifying size matches WGSL.
+4. **Standard Bindings:**
+ - **Binding 2:** Always use `CommonPostProcessUniforms` for per-frame data (resolution, time, beat).
+ - **Binding 3:** Use effect-specific parameter structs for unique data.
+5. **Shader Consistency:** Ensure WGSL shaders correctly declare uniforms at specified bindings.
+6. **Validation Script:** Run `tools/validate_uniforms.py` to catch discrepancies.
+7. **Documentation:** Refer to `doc/UNIFORM_BUFFER_GUIDELINES.md` for detailed alignment rules.
diff --git a/doc/GPU_PROCEDURAL_PHASE4.md b/doc/GPU_PROCEDURAL_PHASE4.md
new file mode 100644
index 0000000..4cfc271
--- /dev/null
+++ b/doc/GPU_PROCEDURAL_PHASE4.md
@@ -0,0 +1,70 @@
+# GPU Procedural Phase 4: Texture Composition
+
+**Status:** ✅ Complete
+
+## Implementation
+
+Multi-input composite shaders with configurable sampler support.
+
+### API
+
+```cpp
+enum class SamplerType {
+ LinearClamp, LinearRepeat, NearestClamp, NearestRepeat
+};
+
+void create_gpu_composite_texture(
+ const std::string& name,
+ const std::string& shader_func,
+ const char* shader_code,
+ const void* uniform_data,
+ size_t uniform_size,
+ int width, int height,
+ const std::vector<std::string>& input_names,
+ SamplerType sampler = SamplerType::LinearClamp);
+```
+
+### Shaders
+
+**gen_blend.wgsl** - Blend two textures with lerp factor:
+- Bindings: output (0), uniform (1), input_a (2), input_b (3), sampler (4)
+- Uniform: `{u32 width, height; f32 blend_factor, _pad0}`
+
+**gen_mask.wgsl** - Multiply textures (masking):
+- Bindings: output (0), uniform (1), input_a (2), input_b (3), sampler (4)
+- Uniform: `{u32 width, height}`
+
+### Usage
+
+```cpp
+extern const char* gen_blend_compute_wgsl;
+
+struct { uint32_t width, height; float blend_factor, _pad0; } uni = {256, 256, 0.5f, 0.0f};
+
+tex_mgr.create_gpu_composite_texture(
+ "blended", "gen_blend", gen_blend_compute_wgsl,
+ &uni, sizeof(uni), 256, 256,
+ {"noise_a", "noise_b"},
+ SamplerType::LinearClamp);
+```
+
+### Features
+
+- **Dynamic bind groups:** N input textures + 1 sampler
+- **Lazy sampler creation:** Map-based cache, 4 preset types
+- **Multi-stage composition:** Composite of composites supported
+- **Guarded with `#if !defined(STRIP_GPU_COMPOSITE)`**
+
+### Size Impact
+
+- Code: ~460 lines added
+- Compressed: ~830 bytes (2 shaders + dispatch logic)
+
+### Tests
+
+`test_gpu_composite.cc`:
+- Blend two noise textures
+- Mask noise with grid
+- Multi-stage composite (composite of composites)
+
+All 35 tests passing.
diff --git a/doc/HOWTO.md b/doc/HOWTO.md
index 967b554..876d7dc 100644
--- a/doc/HOWTO.md
+++ b/doc/HOWTO.md
@@ -2,6 +2,8 @@
Common commands for building and testing.
+---
+
## Building
### Debug Build
@@ -11,10 +13,7 @@ cmake --build build -j4
./build/demo64k
```
-Options:
-- `--fullscreen`: Run in fullscreen
-- `--resolution WxH`: Set window size (e.g., 1024x768)
-- `--seek TIME`: Jump to timestamp (debug builds only)
+Options: `--fullscreen`, `--resolution WxH`, `--seek TIME` (debug only)
Keyboard: `Esc` (exit), `F` (toggle fullscreen)
@@ -45,27 +44,34 @@ cmake --build build_final -j4
- STRIP_ALL: Full checks, no debug (~64k target)
- FINAL_STRIP: No checks, no debug (absolute minimum)
-### Developer Build
+### Developer Build (Tests + Tools)
```bash
-cmake -S . -B build -DDEMO_ALL_OPTIONS=ON
+cmake -S . -B build -DDEMO_BUILD_TESTS=ON -DDEMO_BUILD_TOOLS=ON
cmake --build build -j4
```
-Enables tests, tools, size optimizations.
+
+**Note:** `DEMO_ALL_OPTIONS=ON` enables tests, tools, AND `STRIP_ALL`, which removes debug-only code. Use selective flags for debugging.
+
+---
## Build System
-**Dependency Tracking**: CMake tracks 42 demo + 17 test assets. Editing shaders/audio auto-triggers rebuild.
+**Dependency Tracking:** CMake tracks 42 demo + 17 test assets. Editing shaders/audio auto-triggers rebuild.
-**Header Organization**:
+**Header Organization:**
- `asset_manager_dcl.h`: Forward declarations
- `asset_manager.h`: Core API (GetAsset/DropAsset)
- `asset_manager_utils.h`: Typed helpers
+---
+
## Git Clone
```bash
git clone ssh://git@51.38.51.127/~/demo.git
```
+---
+
## Audio System
### AudioEngine API
@@ -90,10 +96,7 @@ audio_shutdown();
- `seek(time)`: Jump to timestamp (debug only)
**Direct Synth APIs** (performance-critical):
-- `synth_register_spectrogram()`: Register samples
-- `synth_trigger_voice()`: Trigger playback
-- `synth_get_output_peak()`: Get audio level
-- `synth_render()`: Low-level rendering
+- `synth_register_spectrogram()`, `synth_trigger_voice()`, `synth_get_output_peak()`, `synth_render()`
**Testing:**
```cpp
@@ -103,6 +106,8 @@ engine.update(1.0f);
engine.shutdown();
```
+---
+
## Auxiliary Texture Masking
Share textures between effects:
@@ -116,6 +121,8 @@ WGPUTextureView view = demo_->get_auxiliary_view("mask_name");
```
See `doc/MASKING_SYSTEM.md` for details.
+---
+
## Demo Timeline
Edit `assets/demo.seq`:
@@ -125,6 +132,8 @@ SEQUENCE 0.0 0
```
Rebuild to update timeline.
+---
+
## Testing
**Run all tests:**
@@ -140,56 +149,7 @@ cd build && ctest
- `SynthEngineTest`: Audio synthesis
- `SequenceSystemTest`: Timeline logic
-## Code Coverage (macOS)
-```bash
-brew install lcov
-./scripts/gen_coverage_report.sh [target_dir]
-```
-
-## Tools
-
-### Windows Cross-Compilation
-```bash
-./scripts/fetch_win_deps.sh
-./scripts/build_win.sh
-./scripts/run_win.sh
-```
-
-### spectool (Audio Analysis)
-```bash
-cmake -S . -B build -DDEMO_BUILD_TOOLS=ON
-cmake --build build -j4
-
-# Analyze
-./build/spectool analyze input.wav output.spec
-
-# Play
-./build/spectool play input.spec
-```
-
-### specview (Visualization)
-```bash
-./build/specview input.spec
-```
-
-### specplay (Diagnostic)
-```bash
-./build/specplay input.spec
-# or
-./build/specplay input.wav
-```
-Output: Peak, RMS, clipping detection.
-
-### Submodule Updates
-```bash
-cd third_party/wgpu-native
-git fetch
-git checkout trunk
-git reset --hard origin/trunk
-cd ../..
-git add third_party/wgpu-native
-git commit -m "chore: Update wgpu-native"
-```
+---
## Asset Management
@@ -216,3 +176,7 @@ const uint8_t* data = GetAsset(AssetId::KICK_1, &size);
```
Build system auto-runs `asset_packer` when asset lists change.
+
+---
+
+For developer tools reference (spectool, Windows cross-compilation, code coverage), see `doc/TOOLS_REFERENCE.md`.
diff --git a/doc/RECIPE.md b/doc/RECIPE.md
new file mode 100644
index 0000000..6404391
--- /dev/null
+++ b/doc/RECIPE.md
@@ -0,0 +1,202 @@
+# Recipe: Common Patterns
+
+Quick reference for implementing common patterns in the demo codebase.
+
+## Runtime Shader Composition
+
+Use `ShaderComposer` to dynamically assemble shaders from snippets.
+
+**Pattern:**
+```cpp
+#include "gpu/effects/shader_composer.h"
+#include "generated/assets.h"
+
+// 1. Load base shader template from asset
+size_t shader_size;
+const char* shader_code =
+ (const char*)GetAsset(AssetId::MY_SHADER_TEMPLATE, &shader_size);
+
+// 2. Define substitutions for dynamic parts
+ShaderComposer::CompositionMap composition_map;
+composition_map["placeholder_name"] = "actual_snippet_name";
+composition_map["fragment_main"] = "plasma_shader"; // Example
+
+// 3. Compose final shader
+std::string composed_shader = ShaderComposer::Get().Compose(
+ {}, // Optional: explicit dependencies
+ std::string(shader_code, shader_size),
+ composition_map);
+
+// 4. Create shader module
+WGPUShaderSourceWGSL wgsl_src = {};
+wgsl_src.chain.sType = WGPUSType_ShaderSourceWGSL;
+wgsl_src.code = str_view(composed_shader.c_str());
+
+WGPUShaderModuleDescriptor shader_desc = {};
+shader_desc.nextInChain = &wgsl_src.chain;
+WGPUShaderModule shader_module =
+ wgpuDeviceCreateShaderModule(ctx_.device, &shader_desc);
+```
+
+**Base shader template (WGSL asset):**
+```wgsl
+// Common bindings
+@group(0) @binding(0) var<uniform> uniforms: CommonUniforms;
+@group(0) @binding(1) var tex_sampler: sampler;
+
+// Placeholder for dynamic fragment code
+#include "fragment_main"
+
+@fragment
+fn fs_main(@location(0) uv: vec2<f32>) -> @location(0) vec4<f32> {
+ return compute_color(uv); // Implemented by included snippet
+}
+```
+
+**Register snippets at startup:**
+```cpp
+ShaderComposer::Get().RegisterSnippet("plasma_shader", R"(
+fn compute_color(uv: vec2<f32>) -> vec4<f32> {
+ let t = uniforms.time;
+ return vec4(sin(uv.x * 10.0 + t), cos(uv.y * 10.0 + t), 0.5, 1.0);
+}
+)");
+
+ShaderComposer::Get().RegisterSnippet("tunnel_shader", R"(
+fn compute_color(uv: vec2<f32>) -> vec4<f32> {
+ let r = length(uv - vec2(0.5));
+ return vec4(vec3(1.0 / r), 1.0);
+}
+)");
+```
+
+**Example usage:** `src/gpu/effects/rotating_cube_effect.cc:72-75`
+
+## QuadEffect with Auxiliary Textures
+
+Full-screen quad effect with access to previous framebuffer + side textures.
+
+**Binding layout:**
+```
+@group(0) @binding(0) - Previous framebuffer texture
+@group(0) @binding(1) - Sampler
+@group(0) @binding(2) - CommonPostProcessUniforms
+@group(0) @binding(3) - Effect-specific params
+@group(0) @binding(4+) - Auxiliary textures (optional)
+```
+
+**Access auxiliary texture:**
+```cpp
+// In effect init()
+WGPUTextureView aux_view = demo_->get_auxiliary_view("mask_name");
+
+// Bind to binding 4
+const WGPUBindGroupEntry entries[] = {
+ {.binding = 0, .textureView = prev_frame_view},
+ {.binding = 1, .sampler = sampler},
+ {.binding = 2, .buffer = common_uniforms},
+ {.binding = 3, .buffer = effect_params},
+ {.binding = 4, .textureView = aux_view}, // Side texture
+};
+```
+
+**WGSL shader:**
+```wgsl
+@group(0) @binding(0) var prev_frame: texture_2d<f32>;
+@group(0) @binding(1) var tex_sampler: sampler;
+@group(0) @binding(2) var<uniform> common: CommonPostProcessUniforms;
+@group(0) @binding(3) var<uniform> params: EffectParams;
+@group(0) @binding(4) var aux_texture: texture_2d<f32>;
+
+@fragment
+fn fs_main(@location(0) uv: vec2<f32>) -> @location(0) vec4<f32> {
+ let prev = textureSample(prev_frame, tex_sampler, uv);
+ let mask = textureSample(aux_texture, tex_sampler, uv);
+ return mix(prev, compute_effect(uv), mask.r);
+}
+```
+
+## Dynamic Effect Parameters
+
+Use `UniformHelper` for .seq-controllable parameters.
+
+**C++ param struct:**
+```cpp
+struct MyEffectParams {
+ float strength;
+ float speed;
+ float _pad0;
+ float _pad1;
+};
+static_assert(sizeof(MyEffectParams) == 16);
+
+class MyEffect : public Effect {
+ private:
+ UniformHelper<MyEffectParams> params_;
+};
+```
+
+**Effect init:**
+```cpp
+void MyEffect::init(MainSequence* demo) {
+ params_.init(ctx_.device);
+ params_.get().strength = 1.0f;
+ params_.get().speed = 2.0f;
+}
+```
+
+**Update per frame:**
+```cpp
+void MyEffect::render(WGPUTextureView prev, WGPUTextureView target,
+ float beat, const EffectParams* ep) {
+ params_.apply_optional(ep); // Updates from .seq
+ params_.upload(ctx_.queue);
+ // ... render pass
+}
+```
+
+**.seq syntax:**
+```
+EFFECT MyEffect 0.0 10.0 strength=0.5 speed=3.0
+EFFECT MyEffect 10.0 20.0 strength=2.0 # speed keeps previous value
+```
+
+**Example:** `src/gpu/effects/flash_effect.cc`, `src/gpu/effects/chroma_aberration_effect.cc`
+
+## Uniform Buffer Alignment
+
+**WGSL padding rules:**
+- `vec3<f32>` requires 16-byte alignment (use padding or switch to `vec4`)
+- Use three `f32` fields instead of single `vec3` when possible
+
+**Correct patterns:**
+```cpp
+// Option 1: Explicit padding
+struct MyUniforms {
+ vec3<f32> color;
+ f32 _pad0;
+ vec2<f32> offset;
+ f32 _pad1;
+ f32 _pad2;
+};
+
+// Option 2: Avoid vec3
+struct MyUniforms {
+ f32 color_r;
+ f32 color_g;
+ f32 color_b;
+ f32 intensity;
+ vec2<f32> offset;
+ f32 _pad0;
+ f32 _pad1;
+};
+```
+
+**Verification:**
+```cpp
+static_assert(sizeof(MyUniforms) == EXPECTED_SIZE);
+```
+
+**Validation:** Run `tools/validate_uniforms.py` before commit.
+
+**Reference:** `doc/UNIFORM_BUFFER_GUIDELINES.md`
diff --git a/doc/TOOLS_REFERENCE.md b/doc/TOOLS_REFERENCE.md
new file mode 100644
index 0000000..61412a9
--- /dev/null
+++ b/doc/TOOLS_REFERENCE.md
@@ -0,0 +1,89 @@
+# Developer Tools Reference
+
+Comprehensive reference for all developer tools in the project.
+
+---
+
+## Windows Cross-Compilation
+
+```bash
+# Fetch dependencies
+./scripts/fetch_win_deps.sh
+
+# Build Windows binary
+./scripts/build_win.sh
+
+# Run with Wine
+./scripts/run_win.sh
+```
+
+---
+
+## spectool (Audio Analysis)
+
+```bash
+# Build
+cmake -S . -B build -DDEMO_BUILD_TOOLS=ON
+cmake --build build -j4
+
+# Analyze WAV → .spec
+./build/spectool analyze input.wav output.spec
+
+# Play .spec file
+./build/spectool play input.spec
+```
+
+---
+
+## specview (Visualization)
+
+```bash
+# View spectrogram
+./build/specview input.spec
+```
+
+Displays spectrogram visualization.
+
+---
+
+## specplay (Diagnostic)
+
+```bash
+# Analyze .spec file
+./build/specplay input.spec
+
+# Or analyze .wav file
+./build/specplay input.wav
+```
+
+Output: Peak, RMS, clipping detection.
+
+---
+
+## Code Coverage (macOS)
+
+```bash
+# Install lcov
+brew install lcov
+
+# Generate coverage report
+./scripts/gen_coverage_report.sh [target_dir]
+```
+
+Creates HTML coverage report.
+
+---
+
+## Submodule Updates
+
+```bash
+cd third_party/wgpu-native
+git fetch
+git checkout trunk
+git reset --hard origin/trunk
+cd ../..
+git add third_party/wgpu-native
+git commit -m "chore: Update wgpu-native"
+```
+
+Updates wgpu-native to latest trunk.
diff --git a/doc/UNIFORM_BUFFER_GUIDELINES.md b/doc/UNIFORM_BUFFER_GUIDELINES.md
new file mode 100644
index 0000000..ac02223
--- /dev/null
+++ b/doc/UNIFORM_BUFFER_GUIDELINES.md
@@ -0,0 +1,106 @@
+# WGSL Uniform Buffer Guidelines
+
+This document outlines the rules and best practices for defining and using uniform buffers in WGSL shaders within this project, focusing on alignment, size, and consistency.
+
+## WGSL Alignment Rules
+
+Understanding WGSL's memory layout rules is crucial for correct uniform buffer implementation. The following are the general alignment requirements for common WGSL types:
+
+- `f32`: 4-byte alignment.
+- `vec2<f32>`: 8-byte alignment (4 bytes per component * 2 components = 8 bytes).
+- `vec3<f32>`: 16-byte alignment (4 bytes per component * 3 components = 12 bytes, padded to 16).
+- `vec4<f32>`: 16-byte alignment (4 bytes per component * 4 components = 16 bytes).
+- `array<T, N>`: The alignment of an array is typically the alignment of its base type `T`.
+
+Structs are padded to the alignment of their largest member. Any trailing space in a struct is also padded to match the maximum alignment of any member within the struct.
+
+## Standard Uniform Buffer Pattern
+
+To maintain consistency and facilitate efficient rendering, a standard pattern for uniform buffer usage is established:
+
+- **Binding 0 & 1:** Reserved for Sampler and Texture access (handled by `pp_update_bind_group`).
+- **Binding 2:** **Common Uniforms** (`CommonPostProcessUniforms` or similar). This buffer should contain frequently used data like resolution, aspect ratio, time, beat, and audio intensity.
+- **Binding 3:** **Effect-Specific Parameters**. This buffer holds parameters unique to a particular effect (e.g., `strength`, `speed`, `fade_amount`).
+
+This pattern ensures that common data is shared efficiently across effects, while effect-specific data remains isolated.
+
+## Defining Uniform Structs
+
+### WGSL Definitions
+
+When defining uniform structs in WGSL, adhere to the following:
+
+- **Explicit Padding:** Use padding fields (`_pad0`, `_pad1`, etc.) where necessary to ensure correct alignment, especially when mixing types of different alignment requirements (e.g., `vec2<f32>` followed by `f32`s).
+- **Use `vec2<f32>` for 8-byte padding:** If you need 8 bytes of padding, use `_pad0: vec2<f32>` instead of `_pad0: f32, _pad1: f32` for potentially better clarity and to leverage WGSL's type system.
+- **Minimize Padding:** Only add padding where required by alignment rules to reduce memory usage.
+
+**Example (CommonPostProcessUniforms / HeptagonUniforms):**
+
+```wgsl
+struct CommonUniforms {
+ resolution: vec2<f32>,
+ _pad0: vec2<f32>, // 8 bytes padding to align subsequent members
+ aspect_ratio: f32,
+ time: f32,
+ beat: f32,
+ audio_intensity: f32,
+};
+// Expected size: 32 bytes
+```
+
+**Example (EffectParams with f32 members):**
+
+```wgsl
+struct EffectParams {
+ parameter1: f32,
+ parameter2: f32,
+ // ... more parameters ...
+};
+// Expected size: 8 bytes (if only two f32s)
+```
+
+### C++ Definitions and Validation
+
+For every WGSL uniform struct, a corresponding C++ struct must exist. This C++ struct must include a `static_assert` to verify its size and alignment matches the WGSL definition.
+
+- **Mirror WGSL Structure:** The C++ struct should mirror the WGSL struct's member order and types as closely as possible to ensure accurate size calculation.
+- **`static_assert`:** Always include `static_assert(sizeof(MyStruct) == EXPECTED_SIZE, "MyStruct must be EXPECTED_SIZE bytes for WGSL alignment");`.
+- **Use `float` for `f32`:** Use `float` for `f32` in C++.
+- **Use `vec2<f32>` mapping:** If WGSL uses `vec2<f32>`, map it to an equivalent C++ type that occupies 8 bytes, typically `float[2]` or a `struct Vec2 { float x, y; }` if more complex type handling is needed.
+- **Padding:** C++ padding rules can differ from WGSL. Pay close attention to `static_assert` for validation.
+
+**Example (C++ CommonPostProcessUniforms):**
+
+```cpp
+struct CommonPostProcessUniforms {
+ vec2 resolution; // 8 bytes
+ float _pad[2]; // 8 bytes padding (matches vec2<f32> in WGSL)
+ float aspect_ratio; // 4 bytes
+ float time; // 4 bytes
+ float beat; // 4 bytes
+ float audio_intensity; // 4 bytes
+};
+static_assert(sizeof(CommonPostProcessUniforms) == 32,
+ "CommonPostProcessUniforms must be 32 bytes for WGSL alignment");
+```
+
+**Example (C++ GaussianBlurParams):**
+
+```cpp
+struct GaussianBlurParams {
+ float strength = 2.0f;
+ float _pad = 0.0f;
+};
+static_assert(sizeof(GaussianBlurParams) == 8,
+ "GaussianBlurParams must be 8 bytes for WGSL alignment");
+```
+
+## Handling Common Pitfalls
+
+- **`vec3<f32>` Padding:** Avoid using `vec3<f32>` for padding in WGSL, as it has a 16-byte alignment. If padding is needed, use `vec2<f32>` for 8 bytes or individual `f32`s for 4-byte alignment.
+- **C++ vs. WGSL Alignment:** Always rely on `static_assert` in C++ and verify against WGSL alignment rules. C++ padding rules might differ, and the `static_assert` is the ultimate arbiter.
+- **Unmatched Structs:** Ensure every WGSL uniform struct has a corresponding C++ struct with a matching `static_assert`.
+
+## Validation Tool
+
+The `tools/validate_uniforms.py` script is integrated into the build system. It automatically checks for inconsistencies between WGSL and C++ uniform struct definitions and reports any size mismatches. Ensure this script passes for all new or modified uniform definitions.
diff --git a/scripts/gen_spectrograms.sh b/scripts/gen_spectrograms.sh
index a5c1510..3213787 100755
--- a/scripts/gen_spectrograms.sh
+++ b/scripts/gen_spectrograms.sh
@@ -7,7 +7,7 @@ set -euo pipefail
# --- Configuration ---
PROJECT_ROOT=$(git rev-parse --show-toplevel)
SOURCE_DIR="${PROJECT_ROOT}/assets/originals"
-DEST_DIR="${PROJECT_ROOT}/assets/final"
+DEST_DIR="${PROJECT_ROOT}/assets/final/music"
SPECTOOL_PATH="${PROJECT_ROOT}/build/spectool"
TEMP_WAV_DIR=$(mktemp -d)
diff --git a/src/3d/visual_debug.cc b/src/3d/visual_debug.cc
index 77311f6..cd4ccce 100644
--- a/src/3d/visual_debug.cc
+++ b/src/3d/visual_debug.cc
@@ -26,7 +26,7 @@ void VisualDebug::init(WGPUDevice device, WGPUTextureFormat format) {
WGPUBufferDescriptor ub_desc = {};
ub_desc.usage = WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst;
- ub_desc.size = sizeof(mat4);
+ ub_desc.size = sizeof(GlobalUniforms);
uniform_buffer_ = wgpuDeviceCreateBuffer(device_, &ub_desc);
}
@@ -340,9 +340,12 @@ void VisualDebug::add_trajectory(const std::vector<vec3>& points,
}
void VisualDebug::update_buffers(const mat4& view_proj) {
- // Update Uniforms
+ // Update Uniforms - fill entire GlobalUniforms structure
+ GlobalUniforms uniforms = {};
+ uniforms.view_proj = view_proj;
+ // Other fields zeroed (not used by visual debug shader)
wgpuQueueWriteBuffer(wgpuDeviceGetQueue(device_), uniform_buffer_, 0,
- &view_proj, sizeof(mat4));
+ &uniforms, sizeof(GlobalUniforms));
// Update Vertices
size_t required_size = lines_.size() * 2 * sizeof(float) * 6;
@@ -385,7 +388,7 @@ void VisualDebug::update_buffers(const mat4& view_proj) {
WGPUBindGroupEntry bg_entry = {};
bg_entry.binding = 0;
bg_entry.buffer = uniform_buffer_;
- bg_entry.size = sizeof(mat4);
+ bg_entry.size = sizeof(GlobalUniforms);
WGPUBindGroupDescriptor bg_desc = {};
bg_desc.layout = bind_group_layout_;
diff --git a/src/audio/audio.cc b/src/audio/audio.cc
index 2f485a6..c5bd3d9 100644
--- a/src/audio/audio.cc
+++ b/src/audio/audio.cc
@@ -65,9 +65,11 @@ void audio_start() {
g_audio_backend->start();
}
-void audio_render_ahead(float music_time, float dt) {
+void audio_render_ahead(float music_time, float dt, float target_fill) {
// Target: maintain look-ahead buffer
- const float target_lookahead = (float)RING_BUFFER_LOOKAHEAD_MS / 1000.0f;
+ const float target_lookahead = (target_fill < 0.0f)
+ ? (float)RING_BUFFER_LOOKAHEAD_MS / 1000.0f
+ : target_fill;
// Render in small chunks to keep synth time synchronized with tracker
// Chunk size: one frame's worth of audio (~16.6ms @ 60fps)
diff --git a/src/audio/audio.h b/src/audio/audio.h
index e063a57..778d312 100644
--- a/src/audio/audio.h
+++ b/src/audio/audio.h
@@ -24,7 +24,8 @@ void audio_init();
void audio_start(); // Starts the audio device callback
// Ring buffer audio rendering (main thread fills buffer)
-void audio_render_ahead(float music_time, float dt);
+// target_fill: Target buffer fill time in seconds (default: RING_BUFFER_LOOKAHEAD_MS/1000)
+void audio_render_ahead(float music_time, float dt, float target_fill = -1.0f);
// Get current playback time (in seconds) based on samples consumed
// This is the ring buffer READ position (what's being played NOW)
diff --git a/src/gpu/demo_effects.h b/src/gpu/demo_effects.h
index 54bf657..ff7e017 100644
--- a/src/gpu/demo_effects.h
+++ b/src/gpu/demo_effects.h
@@ -7,12 +7,14 @@
#include "3d/scene.h"
#include "effect.h"
#include "gpu/effects/circle_mask_effect.h"
-#include "gpu/effects/fade_effect.h" // FadeEffect with full definition
+#include "gpu/effects/fade_effect.h" // FadeEffect with full definition
#include "gpu/effects/flash_effect.h" // FlashEffect with params support
#include "gpu/effects/post_process_helper.h"
#include "gpu/effects/rotating_cube_effect.h"
#include "gpu/effects/shaders.h"
#include "gpu/effects/theme_modulation_effect.h" // ThemeModulationEffect with full definition
+#include "gpu/effects/hybrid_3d_effect.h"
+#include "gpu/effects/flash_cube_effect.h"
#include "gpu/gpu.h"
#include "gpu/texture_manager.h"
#include "gpu/uniform_helper.h"
@@ -49,7 +51,6 @@ class ParticlesEffect : public Effect {
ComputePass compute_pass_;
RenderPass render_pass_;
GpuBuffer particles_buffer_;
- UniformBuffer<CommonPostProcessUniforms> uniforms_;
};
class PassthroughEffect : public PostProcessEffect {
@@ -58,7 +59,6 @@ class PassthroughEffect : public PostProcessEffect {
void update_bind_group(WGPUTextureView input_view) override;
private:
- UniformBuffer<CommonPostProcessUniforms> uniforms_;
};
class MovingEllipseEffect : public Effect {
@@ -83,7 +83,6 @@ class ParticleSprayEffect : public Effect {
ComputePass compute_pass_;
RenderPass render_pass_;
GpuBuffer particles_buffer_;
- UniformBuffer<CommonPostProcessUniforms> uniforms_;
};
// Parameters for GaussianBlurEffect (set at construction time)
@@ -106,7 +105,6 @@ class GaussianBlurEffect : public PostProcessEffect {
private:
GaussianBlurParams params_;
- UniformBuffer<CommonPostProcessUniforms> uniforms_;
UniformBuffer<GaussianBlurParams> params_buffer_;
};
@@ -118,7 +116,6 @@ class SolarizeEffect : public PostProcessEffect {
void update_bind_group(WGPUTextureView input_view) override;
private:
- UniformBuffer<CommonPostProcessUniforms> uniforms_;
};
// Parameters for VignetteEffect
@@ -137,7 +134,6 @@ class VignetteEffect : public PostProcessEffect {
private:
VignetteParams params_;
- UniformBuffer<CommonPostProcessUniforms> uniforms_;
UniformBuffer<VignetteParams> params_buffer_;
};
@@ -160,48 +156,33 @@ class ChromaAberrationEffect : public PostProcessEffect {
private:
ChromaAberrationParams params_;
- UniformBuffer<CommonPostProcessUniforms> uniforms_;
UniformBuffer<ChromaAberrationParams> params_buffer_;
};
-class Hybrid3DEffect : public Effect {
- public:
- Hybrid3DEffect(const GpuContext& ctx);
- void init(MainSequence* demo) override;
- void render(WGPURenderPassEncoder pass, float time, float beat,
- float intensity, float aspect_ratio) override;
-
- private:
- Renderer3D renderer_;
- TextureManager texture_manager_;
- Scene scene_;
- Camera camera_;
- int width_ = 1280;
- int height_ = 720;
+// Parameters for DistortEffect
+struct DistortParams {
+ float strength = 0.01f; // Default distortion strength
+ float speed = 1.0f; // Default distortion speed
};
+static_assert(sizeof(DistortParams) == 8, "DistortParams must be 8 bytes for WGSL alignment");
-class FlashCubeEffect : public Effect {
+class DistortEffect : public PostProcessEffect {
public:
- FlashCubeEffect(const GpuContext& ctx);
- void init(MainSequence* demo) override;
- void resize(int width, int height) override;
+ DistortEffect(const GpuContext& ctx);
+ DistortEffect(const GpuContext& ctx, const DistortParams& params);
void render(WGPURenderPassEncoder pass, float time, float beat,
float intensity, float aspect_ratio) override;
+ void update_bind_group(WGPUTextureView input_view) override;
private:
- Renderer3D renderer_;
- TextureManager texture_manager_;
- Scene scene_;
- Camera camera_;
- int width_ = 1280;
- int height_ = 720;
- float last_beat_;
- float flash_intensity_;
+ DistortParams params_;
+ UniformBuffer<DistortParams> params_buffer_;
};
-// ThemeModulationEffect now defined in gpu/effects/theme_modulation_effect.h (included above)
-// FadeEffect now defined in gpu/effects/fade_effect.h (included above)
-// FlashEffect now defined in gpu/effects/flash_effect.h (included above)
+// ThemeModulationEffect now defined in gpu/effects/theme_modulation_effect.h
+// (included above) FadeEffect now defined in gpu/effects/fade_effect.h
+// (included above) FlashEffect now defined in gpu/effects/flash_effect.h
+// (included above)
// Auto-generated functions
void LoadTimeline(MainSequence& main_seq, const GpuContext& ctx);
diff --git a/src/gpu/effect.h b/src/gpu/effect.h
index 6fdb0f4..8f35f3c 100644
--- a/src/gpu/effect.h
+++ b/src/gpu/effect.h
@@ -1,5 +1,7 @@
#pragma once
#include "gpu/gpu.h"
+#include "gpu/effects/post_process_helper.h"
+#include "gpu/uniform_helper.h"
#include <algorithm>
#include <map>
#include <memory>
@@ -12,6 +14,7 @@ class PostProcessEffect;
class Effect {
public:
Effect(const GpuContext& ctx) : ctx_(ctx) {
+ uniforms_.init(ctx.device);
}
virtual ~Effect() = default;
virtual void init(MainSequence* demo) {
@@ -43,7 +46,7 @@ class Effect {
protected:
const GpuContext& ctx_;
- GpuBuffer uniforms_;
+ UniformBuffer<CommonPostProcessUniforms> uniforms_;
int width_ = 1280;
int height_ = 720;
};
diff --git a/src/gpu/effects/chroma_aberration_effect.cc b/src/gpu/effects/chroma_aberration_effect.cc
index 7f41153..af3acc5 100644
--- a/src/gpu/effects/chroma_aberration_effect.cc
+++ b/src/gpu/effects/chroma_aberration_effect.cc
@@ -18,7 +18,6 @@ ChromaAberrationEffect::ChromaAberrationEffect(
: PostProcessEffect(ctx), params_(params) {
pipeline_ = create_post_process_pipeline(ctx_.device, ctx_.format,
chroma_aberration_shader_wgsl);
- uniforms_.init(ctx_.device);
params_buffer_.init(ctx_.device);
}
diff --git a/src/gpu/effects/circle_mask_effect.cc b/src/gpu/effects/circle_mask_effect.cc
index 5b71086..ca80cf9 100644
--- a/src/gpu/effects/circle_mask_effect.cc
+++ b/src/gpu/effects/circle_mask_effect.cc
@@ -3,6 +3,7 @@
// Generates circular mask and renders green background outside circle.
#include "gpu/effects/circle_mask_effect.h"
+#include "gpu/effects/shader_composer.h"
#include "generated/assets.h"
CircleMaskEffect::CircleMaskEffect(const GpuContext& ctx, float radius)
@@ -30,9 +31,7 @@ void CircleMaskEffect::init(MainSequence* demo) {
demo_->register_auxiliary_texture("circle_mask", width, height);
- compute_uniforms_.init(ctx_.device);
compute_params_.init(ctx_.device);
- render_uniforms_.init(ctx_.device);
WGPUSamplerDescriptor sampler_desc = {};
sampler_desc.addressModeU = WGPUAddressMode_ClampToEdge;
@@ -49,9 +48,12 @@ void CircleMaskEffect::init(MainSequence* demo) {
const char* render_shader = (const char*)GetAsset(
AssetId::ASSET_CIRCLE_MASK_RENDER_SHADER, &render_size);
+ // Compose shaders to resolve #include directives
+ std::string composed_compute = ShaderComposer::Get().Compose({}, compute_shader);
+
WGPUShaderSourceWGSL compute_wgsl = {};
compute_wgsl.chain.sType = WGPUSType_ShaderSourceWGSL;
- compute_wgsl.code = str_view(compute_shader);
+ compute_wgsl.code = str_view(composed_compute.c_str());
WGPUShaderModuleDescriptor compute_desc = {};
compute_desc.nextInChain = &compute_wgsl.chain;
@@ -82,11 +84,11 @@ void CircleMaskEffect::init(MainSequence* demo) {
const WGPUBindGroupEntry compute_entries[] = {
{.binding = 0,
- .buffer = compute_uniforms_.get().buffer,
+ .buffer = uniforms_.get().buffer,
.size = sizeof(CommonPostProcessUniforms)},
{.binding = 1,
.buffer = compute_params_.get().buffer,
- .size = sizeof(EffectParams)},
+ .size = sizeof(CircleMaskParams)},
};
const WGPUBindGroupDescriptor compute_bg_desc = {
.layout = wgpuRenderPipelineGetBindGroupLayout(compute_pipeline_, 0),
@@ -96,9 +98,11 @@ void CircleMaskEffect::init(MainSequence* demo) {
compute_bind_group_ =
wgpuDeviceCreateBindGroup(ctx_.device, &compute_bg_desc);
+ std::string composed_render = ShaderComposer::Get().Compose({}, render_shader);
+
WGPUShaderSourceWGSL render_wgsl = {};
render_wgsl.chain.sType = WGPUSType_ShaderSourceWGSL;
- render_wgsl.code = str_view(render_shader);
+ render_wgsl.code = str_view(composed_render.c_str());
WGPUShaderModuleDescriptor render_desc = {};
render_desc.nextInChain = &render_wgsl.chain;
@@ -139,7 +143,7 @@ void CircleMaskEffect::init(MainSequence* demo) {
{.binding = 0, .textureView = mask_view},
{.binding = 1, .sampler = mask_sampler_},
{.binding = 2,
- .buffer = render_uniforms_.get().buffer,
+ .buffer = uniforms_.get().buffer,
.size = sizeof(CommonPostProcessUniforms)},
};
const WGPUBindGroupDescriptor render_bg_desc = {
@@ -160,9 +164,9 @@ void CircleMaskEffect::compute(WGPUCommandEncoder encoder, float time,
.beat = beat,
.audio_intensity = intensity,
};
- compute_uniforms_.update(ctx_.queue, uniforms);
+ uniforms_.update(ctx_.queue, uniforms);
- const EffectParams params = {
+ const CircleMaskParams params = {
.radius = radius_,
};
compute_params_.update(ctx_.queue, params);
@@ -199,7 +203,7 @@ void CircleMaskEffect::render(WGPURenderPassEncoder pass, float time,
.beat = beat,
.audio_intensity = intensity,
};
- render_uniforms_.update(ctx_.queue, uniforms);
+ uniforms_.update(ctx_.queue, uniforms);
wgpuRenderPassEncoderSetPipeline(pass, render_pipeline_);
wgpuRenderPassEncoderSetBindGroup(pass, 0, render_bind_group_, 0, nullptr);
diff --git a/src/gpu/effects/circle_mask_effect.h b/src/gpu/effects/circle_mask_effect.h
index ac44210..2ddbb11 100644
--- a/src/gpu/effects/circle_mask_effect.h
+++ b/src/gpu/effects/circle_mask_effect.h
@@ -21,23 +21,23 @@ class CircleMaskEffect : public Effect {
float intensity, float aspect_ratio) override;
private:
- struct EffectParams {
+ struct CircleMaskParams {
float radius;
float _pad[3];
};
+ static_assert(sizeof(CircleMaskParams) == 16,
+ "CircleMaskParams must be 16 bytes for WGSL alignment");
MainSequence* demo_ = nullptr;
float radius_;
WGPURenderPipeline compute_pipeline_ = nullptr;
WGPUBindGroup compute_bind_group_ = nullptr;
- UniformBuffer<CommonPostProcessUniforms> compute_uniforms_;
- UniformBuffer<EffectParams> compute_params_;
+ UniformBuffer<CircleMaskParams> compute_params_;
WGPURenderPipeline render_pipeline_ = nullptr;
WGPUBindGroup render_bind_group_ = nullptr;
WGPUSampler mask_sampler_ = nullptr;
- UniformBuffer<CommonPostProcessUniforms> render_uniforms_;
};
#endif /* CIRCLE_MASK_EFFECT_H_ */
diff --git a/src/gpu/effects/distort_effect.cc b/src/gpu/effects/distort_effect.cc
index d11dfd7..52a8ec7 100644
--- a/src/gpu/effects/distort_effect.cc
+++ b/src/gpu/effects/distort_effect.cc
@@ -9,31 +9,35 @@ DistortEffect::DistortEffect(const GpuContext& ctx)
: DistortEffect(ctx, DistortParams()) {
}
-DistortEffect::DistEffect(const GpuContext& ctx, const DistortParams& params)
+DistortEffect::DistortEffect(const GpuContext& ctx, const DistortParams& params)
: PostProcessEffect(ctx), params_(params) {
- uniforms_ =
- gpu_create_buffer(ctx_.device, sizeof(DistortUniforms),
- WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst);
+ params_buffer_.init(ctx_.device);
pipeline_ = create_post_process_pipeline(ctx_.device, ctx_.format,
distort_shader_wgsl);
}
void DistortEffect::render(WGPURenderPassEncoder pass, float t, float b,
float i, float a) {
- DistortUniforms u = {
+ // Populate CommonPostProcessUniforms
+ const CommonPostProcessUniforms common_u = {
+ .resolution = {(float)width_, (float)height_},
+ .aspect_ratio = a,
.time = t,
.beat = b,
- .intensity = i,
- .aspect_ratio = a,
- .width = (float)width_,
- .height = (float)height_,
+ .audio_intensity = i,
+ };
+ uniforms_.update(ctx_.queue, common_u);
+
+ // Populate DistortParams
+ const DistortParams distort_p = {
.strength = params_.strength,
.speed = params_.speed,
};
- wgpuQueueWriteBuffer(ctx_.queue, uniforms_.buffer, 0, &u, sizeof(u));
+ params_buffer_.update(ctx_.queue, distort_p);
+
PostProcessEffect::render(pass, t, b, i, a);
}
void DistortEffect::update_bind_group(WGPUTextureView v) {
- pp_update_bind_group(ctx_.device, pipeline_, &bind_group_, v, {}, uniforms_);
+ pp_update_bind_group(ctx_.device, pipeline_, &bind_group_, v, uniforms_.get(), params_buffer_);
} \ No newline at end of file
diff --git a/src/gpu/effects/fade_effect.cc b/src/gpu/effects/fade_effect.cc
index 3efc583..39b54e0 100644
--- a/src/gpu/effects/fade_effect.cc
+++ b/src/gpu/effects/fade_effect.cc
@@ -5,6 +5,12 @@
#include "gpu/effects/post_process_helper.h"
#include <cmath>
+struct FadeParams {
+ float fade_amount;
+ float _pad[3];
+};
+static_assert(sizeof(FadeParams) == 16, "FadeParams must be 16 bytes for WGSL alignment");
+
FadeEffect::FadeEffect(const GpuContext& ctx) : PostProcessEffect(ctx) {
const char* shader_code = R"(
struct VertexOutput {
@@ -22,7 +28,7 @@ FadeEffect::FadeEffect(const GpuContext& ctx) : PostProcessEffect(ctx) {
audio_intensity: f32,
};
- struct EffectParams {
+ struct FadeParams {
fade_amount: f32,
_pad0: f32,
_pad1: f32,
@@ -32,7 +38,7 @@ FadeEffect::FadeEffect(const GpuContext& ctx) : PostProcessEffect(ctx) {
@group(0) @binding(0) var inputSampler: sampler;
@group(0) @binding(1) var inputTexture: texture_2d<f32>;
@group(0) @binding(2) var<uniform> uniforms: CommonUniforms;
- @group(0) @binding(3) var<uniform> params: EffectParams;
+ @group(0) @binding(3) var<uniform> params: FadeParams;
@vertex
fn vs_main(@builtin(vertex_index) vertexIndex: u32) -> VertexOutput {
@@ -57,14 +63,13 @@ FadeEffect::FadeEffect(const GpuContext& ctx) : PostProcessEffect(ctx) {
pipeline_ =
create_post_process_pipeline(ctx_.device, ctx_.format, shader_code);
- common_uniforms_.init(ctx_.device);
params_buffer_ = gpu_create_buffer(
ctx_.device, 16, WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst);
}
void FadeEffect::update_bind_group(WGPUTextureView input_view) {
pp_update_bind_group(ctx_.device, pipeline_, &bind_group_, input_view,
- common_uniforms_.get(), params_buffer_);
+ uniforms_.get(), params_buffer_);
}
void FadeEffect::render(WGPURenderPassEncoder pass, float time, float beat,
@@ -76,7 +81,7 @@ void FadeEffect::render(WGPURenderPassEncoder pass, float time, float beat,
.beat = beat,
.audio_intensity = intensity,
};
- common_uniforms_.update(ctx_.queue, u);
+ uniforms_.update(ctx_.queue, u);
// Example fade pattern: fade in at start, fade out at end
// Customize this based on your needs
@@ -90,8 +95,8 @@ void FadeEffect::render(WGPURenderPassEncoder pass, float time, float beat,
fade_amount = fmaxf(fade_amount, 0.0f);
}
- float params[4] = {fade_amount, 0.0f, 0.0f, 0.0f};
- wgpuQueueWriteBuffer(ctx_.queue, params_buffer_.buffer, 0, params,
+ FadeParams params = {fade_amount, {0.0f, 0.0f, 0.0f}};
+ wgpuQueueWriteBuffer(ctx_.queue, params_buffer_.buffer, 0, &params,
sizeof(params));
wgpuRenderPassEncoderSetPipeline(pass, pipeline_);
diff --git a/src/gpu/effects/fade_effect.h b/src/gpu/effects/fade_effect.h
index 22b8f76..178c360 100644
--- a/src/gpu/effects/fade_effect.h
+++ b/src/gpu/effects/fade_effect.h
@@ -4,9 +4,9 @@
#pragma once
#include "gpu/effect.h"
+#include "gpu/effects/post_process_helper.h"
#include "gpu/gpu.h"
#include "gpu/uniform_helper.h"
-#include "gpu/effects/post_process_helper.h"
class FadeEffect : public PostProcessEffect {
public:
@@ -16,6 +16,5 @@ class FadeEffect : public PostProcessEffect {
void update_bind_group(WGPUTextureView input_view) override;
private:
- UniformBuffer<CommonPostProcessUniforms> common_uniforms_;
GpuBuffer params_buffer_;
};
diff --git a/src/gpu/effects/flash_cube_effect.h b/src/gpu/effects/flash_cube_effect.h
index 7089af2..5faeb00 100644
--- a/src/gpu/effects/flash_cube_effect.h
+++ b/src/gpu/effects/flash_cube_effect.h
@@ -22,8 +22,6 @@ class FlashCubeEffect : public Effect {
TextureManager texture_manager_;
Scene scene_;
Camera camera_;
- int width_ = 1280;
- int height_ = 720;
float last_beat_ = 0.0f;
float flash_intensity_ = 0.0f;
};
diff --git a/src/gpu/effects/gaussian_blur_effect.cc b/src/gpu/effects/gaussian_blur_effect.cc
index 0cc4821..697be88 100644
--- a/src/gpu/effects/gaussian_blur_effect.cc
+++ b/src/gpu/effects/gaussian_blur_effect.cc
@@ -18,7 +18,6 @@ GaussianBlurEffect::GaussianBlurEffect(const GpuContext& ctx,
: PostProcessEffect(ctx), params_(params) {
pipeline_ = create_post_process_pipeline(ctx_.device, ctx_.format,
gaussian_blur_shader_wgsl);
- uniforms_.init(ctx_.device);
params_buffer_.init(ctx_.device);
}
diff --git a/src/gpu/effects/heptagon_effect.cc b/src/gpu/effects/heptagon_effect.cc
index b77ec53..7b0702d 100644
--- a/src/gpu/effects/heptagon_effect.cc
+++ b/src/gpu/effects/heptagon_effect.cc
@@ -5,39 +5,25 @@
#include "gpu/gpu.h"
#include "util/mini_math.h"
-// Match CommonUniforms struct from main_shader.wgsl.
-// Padded to 32 bytes for WGSL alignment rules.
-struct HeptagonUniforms {
- vec2 resolution; // 8 bytes
- float _pad0[2]; // 8 bytes padding to align next float
- float aspect_ratio; // 4 bytes
- float time; // 4 bytes
- float beat; // 4 bytes
- float audio_intensity; // 4 bytes
-};
-static_assert(sizeof(HeptagonUniforms) == 32,
- "HeptagonUniforms must be 32 bytes for WGSL alignment");
-
// --- HeptagonEffect ---
HeptagonEffect::HeptagonEffect(const GpuContext& ctx) : Effect(ctx) {
- uniforms_ =
- gpu_create_buffer(ctx_.device, sizeof(HeptagonUniforms),
- WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst);
- ResourceBinding bindings[] = {{uniforms_, WGPUBufferBindingType_Uniform}};
+ // uniforms_ is initialized by Effect base class
+ ResourceBinding bindings[] = {{uniforms_.get(), WGPUBufferBindingType_Uniform}};
pass_ = gpu_create_render_pass(ctx_.device, ctx_.format, main_shader_wgsl,
bindings, 1);
pass_.vertex_count = 21;
}
void HeptagonEffect::render(WGPURenderPassEncoder pass, float t, float b,
float i, float a) {
- HeptagonUniforms u = {
+ CommonPostProcessUniforms u = {
.resolution = {(float)width_, (float)height_},
+ ._pad = {0.0f, 0.0f},
.aspect_ratio = a,
.time = t,
.beat = b,
.audio_intensity = i,
};
- wgpuQueueWriteBuffer(ctx_.queue, uniforms_.buffer, 0, &u, sizeof(u));
+ uniforms_.update(ctx_.queue, u);
wgpuRenderPassEncoderSetPipeline(pass, pass_.pipeline);
wgpuRenderPassEncoderSetBindGroup(pass, 0, pass_.bind_group, 0, nullptr);
wgpuRenderPassEncoderDraw(pass, pass_.vertex_count, 1, 0, 0);
diff --git a/src/gpu/effects/moving_ellipse_effect.cc b/src/gpu/effects/moving_ellipse_effect.cc
index 945f807..9866f20 100644
--- a/src/gpu/effects/moving_ellipse_effect.cc
+++ b/src/gpu/effects/moving_ellipse_effect.cc
@@ -7,10 +7,8 @@
// --- MovingEllipseEffect ---
MovingEllipseEffect::MovingEllipseEffect(const GpuContext& ctx) : Effect(ctx) {
- uniforms_ =
- gpu_create_buffer(ctx_.device, sizeof(CommonPostProcessUniforms),
- WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst);
- ResourceBinding bindings[] = {{uniforms_, WGPUBufferBindingType_Uniform}};
+ // uniforms_ is initialized by Effect base class
+ ResourceBinding bindings[] = {{uniforms_.get(), WGPUBufferBindingType_Uniform}};
pass_ = gpu_create_render_pass(ctx_.device, ctx_.format, ellipse_shader_wgsl,
bindings, 1);
pass_.vertex_count = 3;
@@ -19,12 +17,13 @@ void MovingEllipseEffect::render(WGPURenderPassEncoder pass, float t, float b,
float i, float a) {
const CommonPostProcessUniforms u = {
.resolution = {(float)width_, (float)height_},
+ ._pad = {0.0f, 0.0f},
.aspect_ratio = a,
.time = t,
.beat = b,
.audio_intensity = i,
};
- wgpuQueueWriteBuffer(ctx_.queue, uniforms_.buffer, 0, &u, sizeof(u));
+ uniforms_.update(ctx_.queue, u);
wgpuRenderPassEncoderSetPipeline(pass, pass_.pipeline);
wgpuRenderPassEncoderSetBindGroup(pass, 0, pass_.bind_group, 0, nullptr);
wgpuRenderPassEncoderDraw(pass, 3, 1, 0, 0);
diff --git a/src/gpu/effects/particle_spray_effect.cc b/src/gpu/effects/particle_spray_effect.cc
index 3fd2590..a435884 100644
--- a/src/gpu/effects/particle_spray_effect.cc
+++ b/src/gpu/effects/particle_spray_effect.cc
@@ -8,7 +8,6 @@
// --- ParticleSprayEffect ---
ParticleSprayEffect::ParticleSprayEffect(const GpuContext& ctx) : Effect(ctx) {
- uniforms_.init(ctx_.device);
std::vector<Particle> init_p(NUM_PARTICLES);
for (Particle& p : init_p)
p.pos[3] = 0.0f;
diff --git a/src/gpu/effects/particles_effect.cc b/src/gpu/effects/particles_effect.cc
index 01f90a5..cd0df74 100644
--- a/src/gpu/effects/particles_effect.cc
+++ b/src/gpu/effects/particles_effect.cc
@@ -8,7 +8,6 @@
// --- ParticlesEffect ---
ParticlesEffect::ParticlesEffect(const GpuContext& ctx) : Effect(ctx) {
- uniforms_.init(ctx_.device);
std::vector<Particle> init_p(NUM_PARTICLES);
particles_buffer_ = gpu_create_buffer(
ctx_.device, sizeof(Particle) * NUM_PARTICLES,
diff --git a/src/gpu/effects/passthrough_effect.cc b/src/gpu/effects/passthrough_effect.cc
index 93cf948..01d557a 100644
--- a/src/gpu/effects/passthrough_effect.cc
+++ b/src/gpu/effects/passthrough_effect.cc
@@ -7,7 +7,6 @@
// --- PassthroughEffect ---
PassthroughEffect::PassthroughEffect(const GpuContext& ctx)
: PostProcessEffect(ctx) {
- uniforms_.init(ctx_.device);
pipeline_ = create_post_process_pipeline(ctx_.device, ctx_.format,
passthrough_shader_wgsl);
}
diff --git a/src/gpu/effects/post_process_helper.cc b/src/gpu/effects/post_process_helper.cc
index 74e052d..e99467f 100644
--- a/src/gpu/effects/post_process_helper.cc
+++ b/src/gpu/effects/post_process_helper.cc
@@ -4,16 +4,19 @@
#include "post_process_helper.h"
#include "../demo_effects.h"
#include "gpu/gpu.h"
+#include "gpu/effects/shader_composer.h"
#include <cstring>
// Helper to create a standard post-processing pipeline
WGPURenderPipeline create_post_process_pipeline(WGPUDevice device,
WGPUTextureFormat format,
const char* shader_code) {
+ std::string composed_shader = ShaderComposer::Get().Compose({}, shader_code);
+
WGPUShaderModuleDescriptor shader_desc = {};
WGPUShaderSourceWGSL wgsl_src = {};
wgsl_src.chain.sType = WGPUSType_ShaderSourceWGSL;
- wgsl_src.code = str_view(shader_code);
+ wgsl_src.code = str_view(composed_shader.c_str());
shader_desc.nextInChain = &wgsl_src.chain;
WGPUShaderModule shader_module =
wgpuDeviceCreateShaderModule(device, &shader_desc);
@@ -94,7 +97,8 @@ void pp_update_bind_group(WGPUDevice device, WGPURenderPipeline pipeline,
bge[2].buffer = uniforms.buffer;
bge[2].size = uniforms.size;
bge[3].binding = PP_BINDING_EFFECT_PARAMS;
- bge[3].buffer = effect_params.buffer ? effect_params.buffer : g_dummy_buffer.buffer;
+ bge[3].buffer =
+ effect_params.buffer ? effect_params.buffer : g_dummy_buffer.buffer;
bge[3].size = effect_params.buffer ? effect_params.size : g_dummy_buffer.size;
WGPUBindGroupDescriptor bgd = {
.layout = bgl, .entryCount = 4, .entries = bge};
diff --git a/src/gpu/effects/post_process_helper.h b/src/gpu/effects/post_process_helper.h
index 77b184f..23cde0e 100644
--- a/src/gpu/effects/post_process_helper.h
+++ b/src/gpu/effects/post_process_helper.h
@@ -19,10 +19,10 @@ static_assert(sizeof(CommonPostProcessUniforms) == 32,
"CommonPostProcessUniforms must be 32 bytes for WGSL alignment");
// Standard post-process bind group layout (group 0):
-#define PP_BINDING_SAMPLER 0 // Sampler for input texture
-#define PP_BINDING_TEXTURE 1 // Input texture (previous render pass)
-#define PP_BINDING_UNIFORMS 2 // Custom uniforms buffer
-#define PP_BINDING_EFFECT_PARAMS 3 // Effect-specific parameters
+#define PP_BINDING_SAMPLER 0 // Sampler for input texture
+#define PP_BINDING_TEXTURE 1 // Input texture (previous render pass)
+#define PP_BINDING_UNIFORMS 2 // Custom uniforms buffer
+#define PP_BINDING_EFFECT_PARAMS 3 // Effect-specific parameters
// Helper to create a standard post-processing pipeline
// Uniforms are accessible to both vertex and fragment shaders
diff --git a/src/gpu/effects/shaders.cc b/src/gpu/effects/shaders.cc
index 2e1cfe5..625c5b6 100644
--- a/src/gpu/effects/shaders.cc
+++ b/src/gpu/effects/shaders.cc
@@ -99,6 +99,28 @@ const char* chroma_aberration_shader_wgsl =
SafeGetAsset(AssetId::ASSET_SHADER_CHROMA_ABERRATION);
+const char* gen_noise_compute_wgsl =
+
+ SafeGetAsset(AssetId::ASSET_SHADER_COMPUTE_GEN_NOISE);
+
+const char* gen_perlin_compute_wgsl =
+
+ SafeGetAsset(AssetId::ASSET_SHADER_COMPUTE_GEN_PERLIN);
+
+const char* gen_grid_compute_wgsl =
+
+ SafeGetAsset(AssetId::ASSET_SHADER_COMPUTE_GEN_GRID);
+
+#if !defined(STRIP_GPU_COMPOSITE)
+const char* gen_blend_compute_wgsl =
+
+ SafeGetAsset(AssetId::ASSET_SHADER_COMPUTE_GEN_BLEND);
+
+const char* gen_mask_compute_wgsl =
+
+ SafeGetAsset(AssetId::ASSET_SHADER_COMPUTE_GEN_MASK);
+#endif
+
const char* vignette_shader_wgsl =
SafeGetAsset(AssetId::ASSET_SHADER_VIGNETTE);
diff --git a/src/gpu/effects/shaders.h b/src/gpu/effects/shaders.h
index 50b4f32..68b8834 100644
--- a/src/gpu/effects/shaders.h
+++ b/src/gpu/effects/shaders.h
@@ -18,3 +18,10 @@ extern const char* solarize_shader_wgsl;
extern const char* distort_shader_wgsl;
extern const char* chroma_aberration_shader_wgsl;
extern const char* vignette_shader_wgsl;
+extern const char* gen_noise_compute_wgsl;
+extern const char* gen_perlin_compute_wgsl;
+extern const char* gen_grid_compute_wgsl;
+#if !defined(STRIP_GPU_COMPOSITE)
+extern const char* gen_blend_compute_wgsl;
+extern const char* gen_mask_compute_wgsl;
+#endif
diff --git a/src/gpu/effects/solarize_effect.cc b/src/gpu/effects/solarize_effect.cc
index d74d708..4f47218 100644
--- a/src/gpu/effects/solarize_effect.cc
+++ b/src/gpu/effects/solarize_effect.cc
@@ -6,7 +6,6 @@
// --- SolarizeEffect ---
SolarizeEffect::SolarizeEffect(const GpuContext& ctx) : PostProcessEffect(ctx) {
- uniforms_.init(ctx.device);
pipeline_ = create_post_process_pipeline(ctx_.device, ctx_.format,
solarize_shader_wgsl);
}
@@ -23,6 +22,6 @@ void SolarizeEffect::render(WGPURenderPassEncoder pass, float t, float b,
PostProcessEffect::render(pass, t, b, i, a);
}
void SolarizeEffect::update_bind_group(WGPUTextureView v) {
- pp_update_bind_group(ctx_.device, pipeline_, &bind_group_, v,
- uniforms_.get(), {});
+ pp_update_bind_group(ctx_.device, pipeline_, &bind_group_, v, uniforms_.get(),
+ {});
}
diff --git a/src/gpu/effects/theme_modulation_effect.cc b/src/gpu/effects/theme_modulation_effect.cc
index f9ae636..b1eff90 100644
--- a/src/gpu/effects/theme_modulation_effect.cc
+++ b/src/gpu/effects/theme_modulation_effect.cc
@@ -6,6 +6,12 @@
#include "gpu/effects/shaders.h"
#include <cmath>
+struct ThemeModulationParams {
+ float theme_brightness;
+ float _pad[3];
+};
+static_assert(sizeof(ThemeModulationParams) == 16, "ThemeModulationParams must be 16 bytes for WGSL alignment");
+
ThemeModulationEffect::ThemeModulationEffect(const GpuContext& ctx)
: PostProcessEffect(ctx) {
const char* shader_code = R"(
@@ -24,7 +30,7 @@ ThemeModulationEffect::ThemeModulationEffect(const GpuContext& ctx)
audio_intensity: f32,
};
- struct EffectParams {
+ struct ThemeModulationParams {
theme_brightness: f32,
_pad0: f32,
_pad1: f32,
@@ -34,7 +40,7 @@ ThemeModulationEffect::ThemeModulationEffect(const GpuContext& ctx)
@group(0) @binding(0) var inputSampler: sampler;
@group(0) @binding(1) var inputTexture: texture_2d<f32>;
@group(0) @binding(2) var<uniform> uniforms: CommonUniforms;
- @group(0) @binding(3) var<uniform> params: EffectParams;
+ @group(0) @binding(3) var<uniform> params: ThemeModulationParams;
@vertex
fn vs_main(@builtin(vertex_index) vertexIndex: u32) -> VertexOutput {
@@ -61,14 +67,13 @@ ThemeModulationEffect::ThemeModulationEffect(const GpuContext& ctx)
pipeline_ =
create_post_process_pipeline(ctx_.device, ctx_.format, shader_code);
- common_uniforms_.init(ctx_.device);
params_buffer_ = gpu_create_buffer(
ctx_.device, 16, WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst);
}
void ThemeModulationEffect::update_bind_group(WGPUTextureView input_view) {
pp_update_bind_group(ctx_.device, pipeline_, &bind_group_, input_view,
- common_uniforms_.get(), params_buffer_);
+ uniforms_.get(), params_buffer_);
}
void ThemeModulationEffect::render(WGPURenderPassEncoder pass, float time,
@@ -81,7 +86,7 @@ void ThemeModulationEffect::render(WGPURenderPassEncoder pass, float time,
.beat = beat,
.audio_intensity = intensity,
};
- common_uniforms_.update(ctx_.queue, u);
+ uniforms_.update(ctx_.queue, u);
// Alternate between bright and dark every 4 seconds (2 pattern changes)
// Music patterns change every 2 seconds at 120 BPM
@@ -97,8 +102,8 @@ void ThemeModulationEffect::render(WGPURenderPassEncoder pass, float time,
bright_value + (dark_value - bright_value) * transition;
// Update params buffer
- float params[4] = {theme_brightness, 0.0f, 0.0f, 0.0f};
- wgpuQueueWriteBuffer(ctx_.queue, params_buffer_.buffer, 0, params,
+ ThemeModulationParams params = {theme_brightness, {0.0f, 0.0f, 0.0f}};
+ wgpuQueueWriteBuffer(ctx_.queue, params_buffer_.buffer, 0, &params,
sizeof(params));
// Render
diff --git a/src/gpu/effects/theme_modulation_effect.h b/src/gpu/effects/theme_modulation_effect.h
index 107529b..713347b 100644
--- a/src/gpu/effects/theme_modulation_effect.h
+++ b/src/gpu/effects/theme_modulation_effect.h
@@ -5,8 +5,8 @@
#pragma once
#include "gpu/effect.h"
-#include "gpu/uniform_helper.h"
#include "gpu/effects/post_process_helper.h"
+#include "gpu/uniform_helper.h"
class ThemeModulationEffect : public PostProcessEffect {
public:
@@ -16,6 +16,5 @@ class ThemeModulationEffect : public PostProcessEffect {
void update_bind_group(WGPUTextureView input_view) override;
private:
- UniformBuffer<CommonPostProcessUniforms> common_uniforms_;
GpuBuffer params_buffer_;
};
diff --git a/src/gpu/effects/vignette_effect.cc b/src/gpu/effects/vignette_effect.cc
index a4967dd..bba0372 100644
--- a/src/gpu/effects/vignette_effect.cc
+++ b/src/gpu/effects/vignette_effect.cc
@@ -12,7 +12,6 @@ VignetteEffect::VignetteEffect(const GpuContext& ctx)
VignetteEffect::VignetteEffect(const GpuContext& ctx,
const VignetteParams& params)
: PostProcessEffect(ctx), params_(params) {
- uniforms_.init(ctx_.device);
params_buffer_.init(ctx_.device);
pipeline_ = create_post_process_pipeline(ctx_.device, ctx_.format,
vignette_shader_wgsl);
@@ -33,6 +32,6 @@ void VignetteEffect::render(WGPURenderPassEncoder pass, float t, float b,
}
void VignetteEffect::update_bind_group(WGPUTextureView v) {
- pp_update_bind_group(ctx_.device, pipeline_, &bind_group_, v,
- uniforms_.get(), params_buffer_.get());
+ pp_update_bind_group(ctx_.device, pipeline_, &bind_group_, v, uniforms_.get(),
+ params_buffer_.get());
}
diff --git a/src/gpu/gpu.cc b/src/gpu/gpu.cc
index fde241d..e89a2f0 100644
--- a/src/gpu/gpu.cc
+++ b/src/gpu/gpu.cc
@@ -5,6 +5,7 @@
#include "gpu.h"
#include "effect.h"
#include "gpu/effects/shaders.h"
+#include "gpu/effects/shader_composer.h"
#include "platform/platform.h"
#include <cassert>
@@ -55,10 +56,13 @@ RenderPass gpu_create_render_pass(WGPUDevice device, WGPUTextureFormat format,
ResourceBinding* bindings, int num_bindings) {
RenderPass pass = {};
+ // Compose shader to resolve #include directives
+ std::string composed_shader = ShaderComposer::Get().Compose({}, shader_code);
+
// Create Shader Module
WGPUShaderSourceWGSL wgsl_src = {};
wgsl_src.chain.sType = WGPUSType_ShaderSourceWGSL;
- wgsl_src.code = str_view(shader_code);
+ wgsl_src.code = str_view(composed_shader.c_str());
WGPUShaderModuleDescriptor shader_desc = {};
shader_desc.nextInChain = &wgsl_src.chain;
WGPUShaderModule shader_module =
@@ -156,9 +160,12 @@ ComputePass gpu_create_compute_pass(WGPUDevice device, const char* shader_code,
int num_bindings) {
ComputePass pass = {};
+ // Compose shader to resolve #include directives
+ std::string composed_shader = ShaderComposer::Get().Compose({}, shader_code);
+
WGPUShaderSourceWGSL wgsl_src = {};
wgsl_src.chain.sType = WGPUSType_ShaderSourceWGSL;
- wgsl_src.code = str_view(shader_code);
+ wgsl_src.code = str_view(composed_shader.c_str());
WGPUShaderModuleDescriptor shader_desc = {};
shader_desc.nextInChain = &wgsl_src.chain;
WGPUShaderModule shader_module =
diff --git a/src/gpu/texture_manager.cc b/src/gpu/texture_manager.cc
index 0c30c94..dfa6315 100644
--- a/src/gpu/texture_manager.cc
+++ b/src/gpu/texture_manager.cc
@@ -2,7 +2,10 @@
// It implements the TextureManager.
#include "gpu/texture_manager.h"
+#include "gpu/effects/shader_composer.h"
+#include "platform/platform.h"
#include <cstdio>
+#include <cstring>
#include <vector>
#if defined(DEMO_CROSS_COMPILE_WIN32)
@@ -26,6 +29,22 @@ void TextureManager::shutdown() {
wgpuTextureRelease(pair.second.texture);
}
textures_.clear();
+
+ for (auto& pair : compute_pipelines_) {
+ if (pair.second.pipeline) {
+ wgpuComputePipelineRelease(pair.second.pipeline);
+ }
+ }
+ compute_pipelines_.clear();
+
+#if !defined(STRIP_GPU_COMPOSITE)
+ for (auto& pair : samplers_) {
+ if (pair.second) {
+ wgpuSamplerRelease(pair.second);
+ }
+ }
+ samplers_.clear();
+#endif
}
void TextureManager::create_procedural_texture(
@@ -112,3 +131,570 @@ WGPUTextureView TextureManager::get_texture_view(const std::string& name) {
}
return nullptr;
}
+
+WGPUComputePipeline TextureManager::get_or_create_compute_pipeline(
+ const std::string& func_name, const char* shader_code,
+ size_t uniform_size, int num_input_textures) {
+ auto it = compute_pipelines_.find(func_name);
+ if (it != compute_pipelines_.end()) {
+ return it->second.pipeline;
+ }
+
+ // Create new pipeline
+ ShaderComposer& composer = ShaderComposer::Get();
+ std::string resolved_shader = composer.Compose({}, shader_code);
+
+ WGPUShaderSourceWGSL wgsl_src = {};
+ wgsl_src.chain.sType = WGPUSType_ShaderSourceWGSL;
+ wgsl_src.code = str_view(resolved_shader.c_str());
+ WGPUShaderModuleDescriptor shader_desc = {};
+ shader_desc.nextInChain = &wgsl_src.chain;
+ WGPUShaderModule shader_module =
+ wgpuDeviceCreateShaderModule(device_, &shader_desc);
+
+ // Dynamic bind group layout
+ // Binding 0: output storage texture
+ // Binding 1: uniform buffer
+ // Binding 2 to (2 + num_input_textures - 1): input textures
+ // Binding (2 + num_input_textures): sampler (if inputs > 0)
+ const int max_entries = 2 + num_input_textures + (num_input_textures > 0 ? 1 : 0);
+ std::vector<WGPUBindGroupLayoutEntry> bgl_entries(max_entries);
+
+ // Binding 0: Output storage texture
+ bgl_entries[0].binding = 0;
+ bgl_entries[0].visibility = WGPUShaderStage_Compute;
+ bgl_entries[0].storageTexture.access = WGPUStorageTextureAccess_WriteOnly;
+ bgl_entries[0].storageTexture.format = WGPUTextureFormat_RGBA8Unorm;
+ bgl_entries[0].storageTexture.viewDimension = WGPUTextureViewDimension_2D;
+
+ // Binding 1: Uniform buffer
+ bgl_entries[1].binding = 1;
+ bgl_entries[1].visibility = WGPUShaderStage_Compute;
+ bgl_entries[1].buffer.type = WGPUBufferBindingType_Uniform;
+ bgl_entries[1].buffer.minBindingSize = uniform_size;
+
+ // Binding 2+: Input textures
+ for (int i = 0; i < num_input_textures; ++i) {
+ bgl_entries[2 + i].binding = 2 + i;
+ bgl_entries[2 + i].visibility = WGPUShaderStage_Compute;
+ bgl_entries[2 + i].texture.sampleType = WGPUTextureSampleType_Float;
+ bgl_entries[2 + i].texture.viewDimension = WGPUTextureViewDimension_2D;
+ }
+
+ // Binding N: Sampler (if inputs exist)
+ if (num_input_textures > 0) {
+ bgl_entries[2 + num_input_textures].binding = 2 + num_input_textures;
+ bgl_entries[2 + num_input_textures].visibility = WGPUShaderStage_Compute;
+ bgl_entries[2 + num_input_textures].sampler.type = WGPUSamplerBindingType_Filtering;
+ }
+
+ WGPUBindGroupLayoutDescriptor bgl_desc = {};
+ bgl_desc.entryCount = max_entries;
+ bgl_desc.entries = bgl_entries.data();
+ WGPUBindGroupLayout bind_group_layout =
+ wgpuDeviceCreateBindGroupLayout(device_, &bgl_desc);
+
+ WGPUPipelineLayoutDescriptor pl_desc = {};
+ pl_desc.bindGroupLayoutCount = 1;
+ pl_desc.bindGroupLayouts = &bind_group_layout;
+ WGPUPipelineLayout pipeline_layout =
+ wgpuDeviceCreatePipelineLayout(device_, &pl_desc);
+
+ WGPUComputePipelineDescriptor pipeline_desc = {};
+ pipeline_desc.layout = pipeline_layout;
+ pipeline_desc.compute.module = shader_module;
+ pipeline_desc.compute.entryPoint = str_view("main");
+
+ WGPUComputePipeline pipeline =
+ wgpuDeviceCreateComputePipeline(device_, &pipeline_desc);
+
+ wgpuPipelineLayoutRelease(pipeline_layout);
+ wgpuBindGroupLayoutRelease(bind_group_layout);
+ wgpuShaderModuleRelease(shader_module);
+
+ // Cache pipeline
+ ComputePipelineInfo info = {pipeline, shader_code, uniform_size, num_input_textures};
+ compute_pipelines_[func_name] = info;
+
+ return pipeline;
+}
+
+void TextureManager::dispatch_compute(const std::string& func_name,
+ WGPUTexture target,
+ const GpuProceduralParams& params,
+ const void* uniform_data,
+ size_t uniform_size) {
+ auto it = compute_pipelines_.find(func_name);
+ if (it == compute_pipelines_.end()) {
+ return; // Pipeline not created yet
+ }
+
+ WGPUComputePipeline pipeline = it->second.pipeline;
+
+ // Create uniform buffer
+ WGPUBufferDescriptor buf_desc = {};
+ buf_desc.size = uniform_size;
+ buf_desc.usage = WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst;
+ buf_desc.mappedAtCreation = WGPUOptionalBool_True;
+ WGPUBuffer uniform_buf = wgpuDeviceCreateBuffer(device_, &buf_desc);
+ void* mapped = wgpuBufferGetMappedRange(uniform_buf, 0, uniform_size);
+ memcpy(mapped, uniform_data, uniform_size);
+ wgpuBufferUnmap(uniform_buf);
+
+ // Create storage texture view
+ WGPUTextureViewDescriptor view_desc = {};
+ view_desc.format = WGPUTextureFormat_RGBA8Unorm;
+ view_desc.dimension = WGPUTextureViewDimension_2D;
+ view_desc.mipLevelCount = 1;
+ view_desc.arrayLayerCount = 1;
+ WGPUTextureView target_view = wgpuTextureCreateView(target, &view_desc);
+
+ // Create bind group layout entries (must match pipeline)
+ WGPUBindGroupLayoutEntry bgl_entries[2] = {};
+ bgl_entries[0].binding = 0;
+ bgl_entries[0].visibility = WGPUShaderStage_Compute;
+ bgl_entries[0].storageTexture.access = WGPUStorageTextureAccess_WriteOnly;
+ bgl_entries[0].storageTexture.format = WGPUTextureFormat_RGBA8Unorm;
+ bgl_entries[0].storageTexture.viewDimension = WGPUTextureViewDimension_2D;
+ bgl_entries[1].binding = 1;
+ bgl_entries[1].visibility = WGPUShaderStage_Compute;
+ bgl_entries[1].buffer.type = WGPUBufferBindingType_Uniform;
+ bgl_entries[1].buffer.minBindingSize = uniform_size;
+
+ WGPUBindGroupLayoutDescriptor bgl_desc = {};
+ bgl_desc.entryCount = 2;
+ bgl_desc.entries = bgl_entries;
+ WGPUBindGroupLayout bind_group_layout =
+ wgpuDeviceCreateBindGroupLayout(device_, &bgl_desc);
+
+ // Create bind group
+ WGPUBindGroupEntry bg_entries[2] = {};
+ bg_entries[0].binding = 0;
+ bg_entries[0].textureView = target_view;
+ bg_entries[1].binding = 1;
+ bg_entries[1].buffer = uniform_buf;
+ bg_entries[1].size = uniform_size;
+
+ WGPUBindGroupDescriptor bg_desc = {};
+ bg_desc.layout = bind_group_layout;
+ bg_desc.entryCount = 2;
+ bg_desc.entries = bg_entries;
+ WGPUBindGroup bind_group = wgpuDeviceCreateBindGroup(device_, &bg_desc);
+
+ // Dispatch compute
+ WGPUCommandEncoderDescriptor enc_desc = {};
+ WGPUCommandEncoder encoder =
+ wgpuDeviceCreateCommandEncoder(device_, &enc_desc);
+ WGPUComputePassEncoder pass =
+ wgpuCommandEncoderBeginComputePass(encoder, nullptr);
+ wgpuComputePassEncoderSetPipeline(pass, pipeline);
+ wgpuComputePassEncoderSetBindGroup(pass, 0, bind_group, 0, nullptr);
+ wgpuComputePassEncoderDispatchWorkgroups(pass, (params.width + 7) / 8,
+ (params.height + 7) / 8, 1);
+ wgpuComputePassEncoderEnd(pass);
+
+ WGPUCommandBufferDescriptor cmd_desc = {};
+ WGPUCommandBuffer cmd = wgpuCommandEncoderFinish(encoder, &cmd_desc);
+ wgpuQueueSubmit(queue_, 1, &cmd);
+
+ // Cleanup
+ wgpuCommandBufferRelease(cmd);
+ wgpuCommandEncoderRelease(encoder);
+ wgpuComputePassEncoderRelease(pass);
+ wgpuBindGroupRelease(bind_group);
+ wgpuBindGroupLayoutRelease(bind_group_layout);
+ wgpuBufferRelease(uniform_buf);
+ wgpuTextureViewRelease(target_view);
+}
+
+void TextureManager::create_gpu_noise_texture(
+ const std::string& name, const GpuProceduralParams& params) {
+ extern const char* gen_noise_compute_wgsl;
+ get_or_create_compute_pipeline("gen_noise", gen_noise_compute_wgsl, 16);
+
+ WGPUTextureDescriptor tex_desc = {};
+ tex_desc.usage =
+ WGPUTextureUsage_StorageBinding | WGPUTextureUsage_TextureBinding;
+ tex_desc.dimension = WGPUTextureDimension_2D;
+ tex_desc.size = {(uint32_t)params.width, (uint32_t)params.height, 1};
+ tex_desc.format = WGPUTextureFormat_RGBA8Unorm;
+ tex_desc.mipLevelCount = 1;
+ tex_desc.sampleCount = 1;
+ WGPUTexture texture = wgpuDeviceCreateTexture(device_, &tex_desc);
+
+ struct NoiseParams {
+ uint32_t width;
+ uint32_t height;
+ float seed;
+ float frequency;
+ };
+ NoiseParams uniforms = {(uint32_t)params.width, (uint32_t)params.height,
+ params.params[0], params.params[1]};
+ dispatch_compute("gen_noise", texture, params, &uniforms, sizeof(NoiseParams));
+
+ WGPUTextureViewDescriptor view_desc = {};
+ view_desc.format = WGPUTextureFormat_RGBA8Unorm;
+ view_desc.dimension = WGPUTextureViewDimension_2D;
+ view_desc.mipLevelCount = 1;
+ view_desc.arrayLayerCount = 1;
+ WGPUTextureView view = wgpuTextureCreateView(texture, &view_desc);
+
+ GpuTexture gpu_tex;
+ gpu_tex.texture = texture;
+ gpu_tex.view = view;
+ gpu_tex.width = params.width;
+ gpu_tex.height = params.height;
+ textures_[name] = gpu_tex;
+
+#if !defined(STRIP_ALL)
+ printf("Generated GPU noise texture: %s (%dx%d)\n", name.c_str(),
+ params.width, params.height);
+#endif
+}
+
+void TextureManager::create_gpu_perlin_texture(
+ const std::string& name, const GpuProceduralParams& params) {
+ extern const char* gen_perlin_compute_wgsl;
+ get_or_create_compute_pipeline("gen_perlin", gen_perlin_compute_wgsl, 32);
+
+ WGPUTextureDescriptor tex_desc = {};
+ tex_desc.usage =
+ WGPUTextureUsage_StorageBinding | WGPUTextureUsage_TextureBinding;
+ tex_desc.dimension = WGPUTextureDimension_2D;
+ tex_desc.size = {(uint32_t)params.width, (uint32_t)params.height, 1};
+ tex_desc.format = WGPUTextureFormat_RGBA8Unorm;
+ tex_desc.mipLevelCount = 1;
+ tex_desc.sampleCount = 1;
+ WGPUTexture texture = wgpuDeviceCreateTexture(device_, &tex_desc);
+
+ struct PerlinParams {
+ uint32_t width;
+ uint32_t height;
+ float seed;
+ float frequency;
+ float amplitude;
+ float amplitude_decay;
+ uint32_t octaves;
+ float _pad0;
+ };
+ PerlinParams uniforms = {
+ (uint32_t)params.width,
+ (uint32_t)params.height,
+ params.params[0],
+ params.params[1],
+ params.num_params > 2 ? params.params[2] : 1.0f,
+ params.num_params > 3 ? params.params[3] : 0.5f,
+ params.num_params > 4 ? (uint32_t)params.params[4] : 4u,
+ 0.0f};
+ dispatch_compute("gen_perlin", texture, params, &uniforms,
+ sizeof(PerlinParams));
+
+ WGPUTextureViewDescriptor view_desc = {};
+ view_desc.format = WGPUTextureFormat_RGBA8Unorm;
+ view_desc.dimension = WGPUTextureViewDimension_2D;
+ view_desc.mipLevelCount = 1;
+ view_desc.arrayLayerCount = 1;
+ WGPUTextureView view = wgpuTextureCreateView(texture, &view_desc);
+
+ GpuTexture gpu_tex;
+ gpu_tex.texture = texture;
+ gpu_tex.view = view;
+ gpu_tex.width = params.width;
+ gpu_tex.height = params.height;
+ textures_[name] = gpu_tex;
+
+#if !defined(STRIP_ALL)
+ printf("Generated GPU perlin texture: %s (%dx%d)\n", name.c_str(),
+ params.width, params.height);
+#endif
+}
+
+void TextureManager::create_gpu_grid_texture(
+ const std::string& name, const GpuProceduralParams& params) {
+ extern const char* gen_grid_compute_wgsl;
+ get_or_create_compute_pipeline("gen_grid", gen_grid_compute_wgsl, 16);
+
+ WGPUTextureDescriptor tex_desc = {};
+ tex_desc.usage =
+ WGPUTextureUsage_StorageBinding | WGPUTextureUsage_TextureBinding;
+ tex_desc.dimension = WGPUTextureDimension_2D;
+ tex_desc.size = {(uint32_t)params.width, (uint32_t)params.height, 1};
+ tex_desc.format = WGPUTextureFormat_RGBA8Unorm;
+ tex_desc.mipLevelCount = 1;
+ tex_desc.sampleCount = 1;
+ WGPUTexture texture = wgpuDeviceCreateTexture(device_, &tex_desc);
+
+ struct GridParams {
+ uint32_t width;
+ uint32_t height;
+ uint32_t grid_size;
+ uint32_t thickness;
+ };
+ GridParams uniforms = {
+ (uint32_t)params.width, (uint32_t)params.height,
+ params.num_params > 0 ? (uint32_t)params.params[0] : 32u,
+ params.num_params > 1 ? (uint32_t)params.params[1] : 2u};
+ dispatch_compute("gen_grid", texture, params, &uniforms, sizeof(GridParams));
+
+ WGPUTextureViewDescriptor view_desc = {};
+ view_desc.format = WGPUTextureFormat_RGBA8Unorm;
+ view_desc.dimension = WGPUTextureViewDimension_2D;
+ view_desc.mipLevelCount = 1;
+ view_desc.arrayLayerCount = 1;
+ WGPUTextureView view = wgpuTextureCreateView(texture, &view_desc);
+
+ GpuTexture gpu_tex;
+ gpu_tex.texture = texture;
+ gpu_tex.view = view;
+ gpu_tex.width = params.width;
+ gpu_tex.height = params.height;
+ textures_[name] = gpu_tex;
+
+#if !defined(STRIP_ALL)
+ printf("Generated GPU grid texture: %s (%dx%d)\n", name.c_str(),
+ params.width, params.height);
+#endif
+}
+
+#if !defined(STRIP_GPU_COMPOSITE)
+WGPUSampler TextureManager::get_or_create_sampler(SamplerType type) {
+ auto it = samplers_.find(type);
+ if (it != samplers_.end()) {
+ return it->second;
+ }
+
+ WGPUSamplerDescriptor desc = {};
+ desc.lodMinClamp = 0.0f;
+ desc.lodMaxClamp = 1.0f;
+ desc.maxAnisotropy = 1;
+
+ switch (type) {
+ case SamplerType::LinearClamp:
+ desc.addressModeU = WGPUAddressMode_ClampToEdge;
+ desc.addressModeV = WGPUAddressMode_ClampToEdge;
+ desc.magFilter = WGPUFilterMode_Linear;
+ desc.minFilter = WGPUFilterMode_Linear;
+ desc.mipmapFilter = WGPUMipmapFilterMode_Linear;
+ break;
+ case SamplerType::LinearRepeat:
+ desc.addressModeU = WGPUAddressMode_Repeat;
+ desc.addressModeV = WGPUAddressMode_Repeat;
+ desc.magFilter = WGPUFilterMode_Linear;
+ desc.minFilter = WGPUFilterMode_Linear;
+ desc.mipmapFilter = WGPUMipmapFilterMode_Linear;
+ break;
+ case SamplerType::NearestClamp:
+ desc.addressModeU = WGPUAddressMode_ClampToEdge;
+ desc.addressModeV = WGPUAddressMode_ClampToEdge;
+ desc.magFilter = WGPUFilterMode_Nearest;
+ desc.minFilter = WGPUFilterMode_Nearest;
+ desc.mipmapFilter = WGPUMipmapFilterMode_Nearest;
+ break;
+ case SamplerType::NearestRepeat:
+ desc.addressModeU = WGPUAddressMode_Repeat;
+ desc.addressModeV = WGPUAddressMode_Repeat;
+ desc.magFilter = WGPUFilterMode_Nearest;
+ desc.minFilter = WGPUFilterMode_Nearest;
+ desc.mipmapFilter = WGPUMipmapFilterMode_Nearest;
+ break;
+ }
+
+ WGPUSampler sampler = wgpuDeviceCreateSampler(device_, &desc);
+ samplers_[type] = sampler;
+ return sampler;
+}
+
+void TextureManager::dispatch_composite(
+ const std::string& func_name, WGPUTexture target,
+ const GpuProceduralParams& params, const void* uniform_data,
+ size_t uniform_size, const std::vector<WGPUTextureView>& input_views,
+ SamplerType sampler_type) {
+ auto it = compute_pipelines_.find(func_name);
+ if (it == compute_pipelines_.end()) {
+ return; // Pipeline not created yet
+ }
+
+ WGPUComputePipeline pipeline = it->second.pipeline;
+ int num_inputs = (int)input_views.size();
+
+ // Create uniform buffer
+ WGPUBufferDescriptor buf_desc = {};
+ buf_desc.size = uniform_size;
+ buf_desc.usage = WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst;
+ buf_desc.mappedAtCreation = WGPUOptionalBool_True;
+ WGPUBuffer uniform_buf = wgpuDeviceCreateBuffer(device_, &buf_desc);
+ void* mapped = wgpuBufferGetMappedRange(uniform_buf, 0, uniform_size);
+ memcpy(mapped, uniform_data, uniform_size);
+ wgpuBufferUnmap(uniform_buf);
+
+ // Create storage texture view
+ WGPUTextureViewDescriptor view_desc = {};
+ view_desc.format = WGPUTextureFormat_RGBA8Unorm;
+ view_desc.dimension = WGPUTextureViewDimension_2D;
+ view_desc.mipLevelCount = 1;
+ view_desc.arrayLayerCount = 1;
+ WGPUTextureView target_view = wgpuTextureCreateView(target, &view_desc);
+
+ // Dynamic bind group
+ const int max_entries = 2 + num_inputs + (num_inputs > 0 ? 1 : 0);
+ std::vector<WGPUBindGroupEntry> bg_entries(max_entries);
+
+ // Binding 0: Output texture
+ bg_entries[0].binding = 0;
+ bg_entries[0].textureView = target_view;
+
+ // Binding 1: Uniform buffer
+ bg_entries[1].binding = 1;
+ bg_entries[1].buffer = uniform_buf;
+ bg_entries[1].size = uniform_size;
+
+ // Binding 2+: Input textures
+ for (int i = 0; i < num_inputs; ++i) {
+ bg_entries[2 + i].binding = 2 + i;
+ bg_entries[2 + i].textureView = input_views[i];
+ }
+
+ // Binding N: Sampler
+ if (num_inputs > 0) {
+ bg_entries[2 + num_inputs].binding = 2 + num_inputs;
+ bg_entries[2 + num_inputs].sampler = get_or_create_sampler(sampler_type);
+ }
+
+ // Create bind group layout (must match pipeline)
+ const int layout_entries_count = 2 + num_inputs + (num_inputs > 0 ? 1 : 0);
+ std::vector<WGPUBindGroupLayoutEntry> bgl_entries(layout_entries_count);
+
+ bgl_entries[0].binding = 0;
+ bgl_entries[0].visibility = WGPUShaderStage_Compute;
+ bgl_entries[0].storageTexture.access = WGPUStorageTextureAccess_WriteOnly;
+ bgl_entries[0].storageTexture.format = WGPUTextureFormat_RGBA8Unorm;
+ bgl_entries[0].storageTexture.viewDimension = WGPUTextureViewDimension_2D;
+
+ bgl_entries[1].binding = 1;
+ bgl_entries[1].visibility = WGPUShaderStage_Compute;
+ bgl_entries[1].buffer.type = WGPUBufferBindingType_Uniform;
+ bgl_entries[1].buffer.minBindingSize = uniform_size;
+
+ for (int i = 0; i < num_inputs; ++i) {
+ bgl_entries[2 + i].binding = 2 + i;
+ bgl_entries[2 + i].visibility = WGPUShaderStage_Compute;
+ bgl_entries[2 + i].texture.sampleType = WGPUTextureSampleType_Float;
+ bgl_entries[2 + i].texture.viewDimension = WGPUTextureViewDimension_2D;
+ }
+
+ if (num_inputs > 0) {
+ bgl_entries[2 + num_inputs].binding = 2 + num_inputs;
+ bgl_entries[2 + num_inputs].visibility = WGPUShaderStage_Compute;
+ bgl_entries[2 + num_inputs].sampler.type = WGPUSamplerBindingType_Filtering;
+ }
+
+ WGPUBindGroupLayoutDescriptor bgl_desc = {};
+ bgl_desc.entryCount = layout_entries_count;
+ bgl_desc.entries = bgl_entries.data();
+ WGPUBindGroupLayout bind_group_layout =
+ wgpuDeviceCreateBindGroupLayout(device_, &bgl_desc);
+
+ WGPUBindGroupDescriptor bg_desc = {};
+ bg_desc.layout = bind_group_layout;
+ bg_desc.entryCount = max_entries;
+ bg_desc.entries = bg_entries.data();
+ WGPUBindGroup bind_group = wgpuDeviceCreateBindGroup(device_, &bg_desc);
+
+ // Dispatch compute
+ WGPUCommandEncoderDescriptor enc_desc = {};
+ WGPUCommandEncoder encoder =
+ wgpuDeviceCreateCommandEncoder(device_, &enc_desc);
+ WGPUComputePassEncoder pass =
+ wgpuCommandEncoderBeginComputePass(encoder, nullptr);
+ wgpuComputePassEncoderSetPipeline(pass, pipeline);
+ wgpuComputePassEncoderSetBindGroup(pass, 0, bind_group, 0, nullptr);
+ wgpuComputePassEncoderDispatchWorkgroups(pass, (params.width + 7) / 8,
+ (params.height + 7) / 8, 1);
+ wgpuComputePassEncoderEnd(pass);
+
+ WGPUCommandBufferDescriptor cmd_desc = {};
+ WGPUCommandBuffer cmd = wgpuCommandEncoderFinish(encoder, &cmd_desc);
+ wgpuQueueSubmit(queue_, 1, &cmd);
+
+ // Cleanup
+ wgpuCommandBufferRelease(cmd);
+ wgpuCommandEncoderRelease(encoder);
+ wgpuComputePassEncoderRelease(pass);
+ wgpuBindGroupRelease(bind_group);
+ wgpuBindGroupLayoutRelease(bind_group_layout);
+ wgpuBufferRelease(uniform_buf);
+ wgpuTextureViewRelease(target_view);
+}
+
+void TextureManager::create_gpu_composite_texture(
+ const std::string& name, const std::string& shader_func,
+ const char* shader_code, const void* uniform_data, size_t uniform_size,
+ int width, int height, const std::vector<std::string>& input_names,
+ SamplerType sampler) {
+ // Create pipeline if needed
+ get_or_create_compute_pipeline(shader_func, shader_code, uniform_size,
+ (int)input_names.size());
+
+ // Resolve input texture views
+ std::vector<WGPUTextureView> input_views;
+ input_views.reserve(input_names.size());
+ for (const auto& input_name : input_names) {
+ WGPUTextureView view = get_texture_view(input_name);
+ if (!view) {
+ fprintf(stderr, "Error: Input texture not found: %s\n",
+ input_name.c_str());
+ return;
+ }
+ input_views.push_back(view);
+ }
+
+ // Create output texture
+ WGPUTextureDescriptor tex_desc = {};
+ tex_desc.usage =
+ WGPUTextureUsage_StorageBinding | WGPUTextureUsage_TextureBinding;
+ tex_desc.dimension = WGPUTextureDimension_2D;
+ tex_desc.size = {(uint32_t)width, (uint32_t)height, 1};
+ tex_desc.format = WGPUTextureFormat_RGBA8Unorm;
+ tex_desc.mipLevelCount = 1;
+ tex_desc.sampleCount = 1;
+ WGPUTexture texture = wgpuDeviceCreateTexture(device_, &tex_desc);
+
+ // Dispatch composite shader
+ GpuProceduralParams params = {width, height, nullptr, 0};
+ dispatch_composite(shader_func, texture, params, uniform_data, uniform_size,
+ input_views, sampler);
+
+ // Create view
+ WGPUTextureViewDescriptor view_desc = {};
+ view_desc.format = WGPUTextureFormat_RGBA8Unorm;
+ view_desc.dimension = WGPUTextureViewDimension_2D;
+ view_desc.mipLevelCount = 1;
+ view_desc.arrayLayerCount = 1;
+ WGPUTextureView view = wgpuTextureCreateView(texture, &view_desc);
+
+ // Store
+ GpuTexture gpu_tex;
+ gpu_tex.texture = texture;
+ gpu_tex.view = view;
+ gpu_tex.width = width;
+ gpu_tex.height = height;
+ textures_[name] = gpu_tex;
+
+#if !defined(STRIP_ALL)
+ printf("Generated GPU composite texture: %s (%dx%d, %zu inputs)\n",
+ name.c_str(), width, height, input_names.size());
+#endif
+}
+#endif // !defined(STRIP_GPU_COMPOSITE)
+
+#if !defined(STRIP_ALL)
+WGPUTextureView TextureManager::get_or_generate_gpu_texture(
+ const std::string& name, const GpuProceduralParams& params) {
+ auto it = textures_.find(name);
+ if (it != textures_.end()) {
+ return it->second.view;
+ }
+ create_gpu_noise_texture(name, params);
+ return textures_[name].view;
+}
+#endif
diff --git a/src/gpu/texture_manager.h b/src/gpu/texture_manager.h
index 23fdbe8..5a2b9f8 100644
--- a/src/gpu/texture_manager.h
+++ b/src/gpu/texture_manager.h
@@ -23,6 +23,13 @@ struct GpuTexture {
int height;
};
+struct GpuProceduralParams {
+ int width;
+ int height;
+ const float* params;
+ int num_params;
+};
+
class TextureManager {
public:
void init(WGPUDevice device, WGPUQueue queue);
@@ -36,11 +43,72 @@ class TextureManager {
void create_texture(const std::string& name, int width, int height,
const uint8_t* data);
+ // GPU procedural generation
+ void create_gpu_noise_texture(const std::string& name,
+ const GpuProceduralParams& params);
+ void create_gpu_perlin_texture(const std::string& name,
+ const GpuProceduralParams& params);
+ void create_gpu_grid_texture(const std::string& name,
+ const GpuProceduralParams& params);
+
+#if !defined(STRIP_GPU_COMPOSITE)
+ enum class SamplerType {
+ LinearClamp,
+ LinearRepeat,
+ NearestClamp,
+ NearestRepeat
+ };
+
+ // GPU composite generation (multi-input textures)
+ void create_gpu_composite_texture(const std::string& name,
+ const std::string& shader_func,
+ const char* shader_code,
+ const void* uniform_data,
+ size_t uniform_size,
+ int width, int height,
+ const std::vector<std::string>& input_names,
+ SamplerType sampler = SamplerType::LinearClamp);
+#endif
+
+#if !defined(STRIP_ALL)
+ // On-demand lazy generation (stripped in final builds)
+ WGPUTextureView get_or_generate_gpu_texture(const std::string& name,
+ const GpuProceduralParams& params);
+#endif
+
// Retrieves a texture view by name (returns nullptr if not found)
WGPUTextureView get_texture_view(const std::string& name);
private:
+ struct ComputePipelineInfo {
+ WGPUComputePipeline pipeline;
+ const char* shader_code;
+ size_t uniform_size;
+ int num_input_textures;
+ };
+
+ WGPUComputePipeline get_or_create_compute_pipeline(const std::string& func_name,
+ const char* shader_code,
+ size_t uniform_size,
+ int num_input_textures = 0);
+ void dispatch_compute(const std::string& func_name, WGPUTexture target,
+ const GpuProceduralParams& params, const void* uniform_data,
+ size_t uniform_size);
+
+#if !defined(STRIP_GPU_COMPOSITE)
+ void dispatch_composite(const std::string& func_name, WGPUTexture target,
+ const GpuProceduralParams& params,
+ const void* uniform_data, size_t uniform_size,
+ const std::vector<WGPUTextureView>& input_views,
+ SamplerType sampler_type);
+#endif
+
WGPUDevice device_;
WGPUQueue queue_;
std::map<std::string, GpuTexture> textures_;
+ std::map<std::string, ComputePipelineInfo> compute_pipelines_;
+#if !defined(STRIP_GPU_COMPOSITE)
+ WGPUSampler get_or_create_sampler(SamplerType type);
+ std::map<SamplerType, WGPUSampler> samplers_;
+#endif
};
diff --git a/src/gpu/uniform_helper.h b/src/gpu/uniform_helper.h
index 151153f..8556c98 100644
--- a/src/gpu/uniform_helper.h
+++ b/src/gpu/uniform_helper.h
@@ -5,7 +5,6 @@
#pragma once
#include "gpu/gpu.h"
-#include <cstring>
// Generic uniform buffer helper
// Usage:
diff --git a/src/main.cc b/src/main.cc
index 4c44a78..59001fb 100644
--- a/src/main.cc
+++ b/src/main.cc
@@ -160,13 +160,9 @@ int main(int argc, char** argv) {
}
#endif /* !defined(STRIP_ALL) */
- // PRE-FILL: Fill ring buffer with initial 200ms before starting audio device
- // This prevents underrun on first callback
- g_audio_engine.update(g_music_time, 1.0f / 60.0f);
- audio_render_ahead(g_music_time,
- 1.0f / 60.0f); // Fill buffer with lookahead
+ // Pre-fill using same pattern as main loop (100ms)
+ fill_audio_buffer(0.1f, 0.0);
- // Start audio (or render to WAV file)
audio_start();
g_last_audio_time = audio_get_playback_time(); // Initialize after start
diff --git a/src/test_demo.cc b/src/test_demo.cc
index a438bbc..b8e9381 100644
--- a/src/test_demo.cc
+++ b/src/test_demo.cc
@@ -32,15 +32,23 @@ class PeakMeterEffect : public PostProcessEffect {
};
struct Uniforms {
- peak_value: f32,
+ resolution: vec2<f32>,
_pad0: f32,
_pad1: f32,
- _pad2: f32,
+ aspect_ratio: f32,
+ time: f32,
+ beat: f32,
+ audio_intensity: f32,
+ };
+
+ struct EffectParams {
+ unused: f32,
};
@group(0) @binding(0) var inputSampler: sampler;
@group(0) @binding(1) var inputTexture: texture_2d<f32>;
@group(0) @binding(2) var<uniform> uniforms: Uniforms;
+ @group(0) @binding(3) var<uniform> params: EffectParams;
@vertex
fn vs_main(@builtin(vertex_index) vertexIndex: u32) -> VertexOutput {
@@ -69,7 +77,7 @@ class PeakMeterEffect : public PostProcessEffect {
// Optimization: Return bar color early (avoids texture sampling for ~5% of pixels)
if (in_bar_y && in_bar_x) {
let uv_x = (input.uv.x - bar_x_min) / (bar_x_max - bar_x_min);
- let factor = step(uv_x, uniforms.peak_value);
+ let factor = step(uv_x, uniforms.audio_intensity);
return mix(vec4<f32>(0.0, 0.0, 0.0, 1.0), vec4<f32>(1.0, 0.0, 0.0,1.0), factor);
}
@@ -80,24 +88,26 @@ class PeakMeterEffect : public PostProcessEffect {
pipeline_ =
create_post_process_pipeline(ctx_.device, ctx_.format, shader_code);
- uniforms_ = gpu_create_buffer(
- ctx_.device, 16, WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst);
}
void update_bind_group(WGPUTextureView input_view) {
pp_update_bind_group(ctx_.device, pipeline_, &bind_group_, input_view,
- uniforms_, {});
+ uniforms_.get(), {});
}
void render(WGPURenderPassEncoder pass, float time, float beat,
float peak_value, float aspect_ratio) {
(void)time;
(void)beat;
- (void)aspect_ratio;
- float uniforms[4] = {peak_value, 0.0f, 0.0f, 0.0f};
- wgpuQueueWriteBuffer(ctx_.queue, uniforms_.buffer, 0, uniforms,
- sizeof(uniforms));
+ CommonPostProcessUniforms u = {
+ .resolution = {(float)width_, (float)height_},
+ .aspect_ratio = aspect_ratio,
+ .time = time,
+ .beat = beat,
+ .audio_intensity = peak_value,
+ };
+ uniforms_.update(ctx_.queue, u);
wgpuRenderPassEncoderSetPipeline(pass, pipeline_);
wgpuRenderPassEncoderSetBindGroup(pass, 0, bind_group_, 0, nullptr);
@@ -209,6 +219,9 @@ int main(int argc, char** argv) {
platform_state = platform_init(fullscreen_enabled, width, height);
gpu_init(&platform_state);
+ // Load timeline from test_demo.seq
+ LoadTimeline(*gpu_get_main_sequence(), *gpu_get_context());
+
// Add peak meter visualization effect (renders as final post-process)
#if !defined(STRIP_ALL)
const GpuContext* gpu_ctx = gpu_get_context();
@@ -253,9 +266,9 @@ int main(int argc, char** argv) {
audio_render_ahead(g_music_time, audio_dt * g_tempo_scale);
};
- // Pre-fill audio buffer
- g_audio_engine.update(g_music_time, 1.0f / 60.0f);
- audio_render_ahead(g_music_time, 1.0f / 60.0f);
+ // Pre-fill using same pattern as main loop (100ms)
+ fill_audio_buffer(0.1f, 0.0);
+
audio_start();
g_last_audio_time = audio_get_playback_time();
diff --git a/src/tests/test_3d_render.cc b/src/tests/test_3d_render.cc
index fa13a43..eee46ba 100644
--- a/src/tests/test_3d_render.cc
+++ b/src/tests/test_3d_render.cc
@@ -220,25 +220,36 @@ int main(int argc, char** argv) {
g_renderer.resize(platform_state.width, platform_state.height);
g_textures.init(g_device, g_queue);
- ProceduralTextureDef noise_def;
- noise_def.width = 256;
- noise_def.height = 256;
- noise_def.gen_func = gen_periodic_noise;
- noise_def.params.push_back(1234.0f);
- noise_def.params.push_back(16.0f);
- g_textures.create_procedural_texture("noise", noise_def);
+ // GPU Noise texture (replaces CPU procedural)
+ GpuProceduralParams noise_params = {};
+ noise_params.width = 256;
+ noise_params.height = 256;
+ float noise_vals[2] = {1234.0f, 16.0f};
+ noise_params.params = noise_vals;
+ noise_params.num_params = 2;
+ g_textures.create_gpu_noise_texture("noise", noise_params);
g_renderer.set_noise_texture(g_textures.get_texture_view("noise"));
- ProceduralTextureDef sky_def;
- sky_def.width = 512;
- sky_def.height = 256;
- sky_def.gen_func = procedural::gen_perlin;
- sky_def.params = {42.0f, 4.0f, 1.0f, 0.5f, 6.0f};
- g_textures.create_procedural_texture("sky", sky_def);
-
+ // GPU Perlin texture for sky (replaces CPU procedural)
+ GpuProceduralParams sky_params = {};
+ sky_params.width = 512;
+ sky_params.height = 256;
+ float sky_vals[5] = {42.0f, 4.0f, 1.0f, 0.5f, 6.0f};
+ sky_params.params = sky_vals;
+ sky_params.num_params = 5;
+ g_textures.create_gpu_perlin_texture("sky", sky_params);
g_renderer.set_sky_texture(g_textures.get_texture_view("sky"));
+ // GPU Grid texture (new!)
+ GpuProceduralParams grid_params = {};
+ grid_params.width = 256;
+ grid_params.height = 256;
+ float grid_vals[2] = {32.0f, 2.0f}; // grid_size, thickness
+ grid_params.params = grid_vals;
+ grid_params.num_params = 2;
+ g_textures.create_gpu_grid_texture("grid", grid_params);
+
setup_scene();
g_camera.position = vec3(0, 5, 10);
diff --git a/src/tests/test_demo_effects.cc b/src/tests/test_demo_effects.cc
index d0163c2..0d2b09a 100644
--- a/src/tests/test_demo_effects.cc
+++ b/src/tests/test_demo_effects.cc
@@ -197,6 +197,9 @@ static void test_effect_type_classification() {
int main() {
fprintf(stdout, "=== Demo Effects Tests ===\n");
+ extern void InitShaderComposer();
+ InitShaderComposer();
+
test_post_process_effects();
test_scene_effects();
test_effect_type_classification();
diff --git a/src/tests/test_effect_base.cc b/src/tests/test_effect_base.cc
index e280e05..612e9da 100644
--- a/src/tests/test_effect_base.cc
+++ b/src/tests/test_effect_base.cc
@@ -249,6 +249,9 @@ static void test_pixel_helpers() {
int main() {
fprintf(stdout, "=== Effect Base Tests ===\n");
+ extern void InitShaderComposer();
+ InitShaderComposer();
+
test_webgpu_fixture();
test_offscreen_render_target();
test_effect_construction();
diff --git a/src/tests/test_gpu_composite.cc b/src/tests/test_gpu_composite.cc
new file mode 100644
index 0000000..e5ac788
--- /dev/null
+++ b/src/tests/test_gpu_composite.cc
@@ -0,0 +1,124 @@
+// This file is part of the 64k demo project.
+// Tests GPU composite texture generation (Phase 4).
+
+#include "gpu/gpu.h"
+#include "gpu/texture_manager.h"
+#include "platform/platform.h"
+#include <cstdint>
+#include <cstdio>
+#include <vector>
+
+#if !defined(STRIP_GPU_COMPOSITE)
+
+int main() {
+ printf("GPU Composite Test: Starting...\n");
+
+ // Initialize GPU
+ PlatformState platform = platform_init(false, 256, 256);
+ if (!platform.window) {
+ fprintf(stderr, "Error: Failed to create window\n");
+ return 1;
+ }
+
+ gpu_init(&platform);
+ const GpuContext* ctx = gpu_get_context();
+
+ extern void InitShaderComposer();
+ InitShaderComposer();
+
+ TextureManager tex_mgr;
+ tex_mgr.init(ctx->device, ctx->queue);
+
+ // Create base textures
+ float noise_params_a[2] = {1234.0f, 4.0f};
+ GpuProceduralParams noise_a = {256, 256, noise_params_a, 2};
+ tex_mgr.create_gpu_noise_texture("noise_a", noise_a);
+
+ float noise_params_b[2] = {5678.0f, 8.0f};
+ GpuProceduralParams noise_b = {256, 256, noise_params_b, 2};
+ tex_mgr.create_gpu_noise_texture("noise_b", noise_b);
+
+ float grid_params[2] = {32.0f, 2.0f};
+ GpuProceduralParams grid = {256, 256, grid_params, 2};
+ tex_mgr.create_gpu_grid_texture("grid", grid);
+
+ printf("SUCCESS: Base textures created (noise_a, noise_b, grid)\n");
+
+ // Test blend composite
+ extern const char* gen_blend_compute_wgsl;
+ struct {
+ uint32_t width, height;
+ float blend_factor, _pad0;
+ } blend_uni = {256, 256, 0.5f, 0.0f};
+
+ std::vector<std::string> blend_inputs = {"noise_a", "noise_b"};
+ tex_mgr.create_gpu_composite_texture("blended", "gen_blend",
+ gen_blend_compute_wgsl, &blend_uni,
+ sizeof(blend_uni), 256, 256, blend_inputs);
+
+ WGPUTextureView blended_view = tex_mgr.get_texture_view("blended");
+ if (!blended_view) {
+ fprintf(stderr, "Error: Blended texture not created\n");
+ tex_mgr.shutdown();
+ gpu_shutdown();
+ return 1;
+ }
+ printf("SUCCESS: Blend composite created (noise_a + noise_b)\n");
+
+ // Test mask composite
+ extern const char* gen_mask_compute_wgsl;
+ struct {
+ uint32_t width, height;
+ } mask_uni = {256, 256};
+
+ std::vector<std::string> mask_inputs = {"noise_a", "grid"};
+ tex_mgr.create_gpu_composite_texture("masked", "gen_mask", gen_mask_compute_wgsl,
+ &mask_uni, sizeof(mask_uni), 256, 256,
+ mask_inputs);
+
+ WGPUTextureView masked_view = tex_mgr.get_texture_view("masked");
+ if (!masked_view) {
+ fprintf(stderr, "Error: Masked texture not created\n");
+ tex_mgr.shutdown();
+ gpu_shutdown();
+ return 1;
+ }
+ printf("SUCCESS: Mask composite created (noise_a * grid)\n");
+
+ // Test multi-stage composite (composite of composite)
+ struct {
+ uint32_t width, height;
+ float blend_factor, _pad0;
+ } blend2_uni = {256, 256, 0.7f, 0.0f};
+
+ std::vector<std::string> blend2_inputs = {"blended", "masked"};
+ tex_mgr.create_gpu_composite_texture("final", "gen_blend",
+ gen_blend_compute_wgsl, &blend2_uni,
+ sizeof(blend2_uni), 256, 256, blend2_inputs);
+
+ WGPUTextureView final_view = tex_mgr.get_texture_view("final");
+ if (!final_view) {
+ fprintf(stderr, "Error: Multi-stage composite not created\n");
+ tex_mgr.shutdown();
+ gpu_shutdown();
+ return 1;
+ }
+ printf("SUCCESS: Multi-stage composite (composite of composites)\n");
+
+ // Cleanup
+ tex_mgr.shutdown();
+ gpu_shutdown();
+ platform_shutdown(&platform);
+
+ printf("All GPU composite tests passed!\n");
+ return 0;
+}
+
+#else
+
+int main() {
+ printf("GPU Composite Test: SKIPPED (STRIP_GPU_COMPOSITE defined)\n");
+ return 0;
+}
+
+#endif
diff --git a/src/tests/test_gpu_procedural.cc b/src/tests/test_gpu_procedural.cc
new file mode 100644
index 0000000..f1bade0
--- /dev/null
+++ b/src/tests/test_gpu_procedural.cc
@@ -0,0 +1,117 @@
+// This file is part of the 64k demo project.
+// Tests GPU procedural texture generation.
+
+#include "gpu/gpu.h"
+#include "gpu/texture_manager.h"
+#include "platform/platform.h"
+#include <cstdio>
+
+int main() {
+ printf("GPU Procedural Test: Starting...\n");
+
+ // Minimal GPU initialization for testing
+ PlatformState platform = platform_init(false, 256, 256);
+ if (!platform.window) {
+ fprintf(stderr, "Error: Failed to create window\n");
+ return 1;
+ }
+
+ gpu_init(&platform);
+ const GpuContext* ctx = gpu_get_context();
+
+ // Initialize shader composer (needed for #include resolution)
+ extern void InitShaderComposer();
+ InitShaderComposer();
+
+ // Create TextureManager
+ TextureManager tex_mgr;
+ tex_mgr.init(ctx->device, ctx->queue);
+
+ // Test GPU noise generation
+ GpuProceduralParams params = {};
+ params.width = 256;
+ params.height = 256;
+ float proc_params[2] = {0.0f, 4.0f}; // seed, frequency
+ params.params = proc_params;
+ params.num_params = 2;
+
+ tex_mgr.create_gpu_noise_texture("test_noise", params);
+
+ // Verify texture exists
+ WGPUTextureView view = tex_mgr.get_texture_view("test_noise");
+ if (!view) {
+ fprintf(stderr, "Error: GPU noise texture not created\n");
+ tex_mgr.shutdown();
+ gpu_shutdown();
+ return 1;
+ }
+ printf("SUCCESS: GPU noise texture created (256x256)\n");
+
+ // Test pipeline caching (create second noise texture)
+ tex_mgr.create_gpu_noise_texture("test_noise_2", params);
+ WGPUTextureView view2 = tex_mgr.get_texture_view("test_noise_2");
+ if (!view2) {
+ fprintf(stderr, "Error: Second GPU noise texture not created\n");
+ tex_mgr.shutdown();
+ gpu_shutdown();
+ return 1;
+ }
+ printf("SUCCESS: Pipeline caching works (second noise texture)\n");
+
+ // Test GPU perlin generation
+ float perlin_params[5] = {42.0f, 4.0f, 1.0f, 0.5f, 6.0f};
+ GpuProceduralParams perlin = {512, 256, perlin_params, 5};
+ tex_mgr.create_gpu_perlin_texture("test_perlin", perlin);
+ WGPUTextureView perlin_view = tex_mgr.get_texture_view("test_perlin");
+ if (!perlin_view) {
+ fprintf(stderr, "Error: GPU perlin texture not created\n");
+ tex_mgr.shutdown();
+ gpu_shutdown();
+ return 1;
+ }
+ printf("SUCCESS: GPU perlin texture created (512x256)\n");
+
+ // Test GPU grid generation
+ float grid_params[2] = {32.0f, 2.0f};
+ GpuProceduralParams grid = {256, 256, grid_params, 2};
+ tex_mgr.create_gpu_grid_texture("test_grid", grid);
+ WGPUTextureView grid_view = tex_mgr.get_texture_view("test_grid");
+ if (!grid_view) {
+ fprintf(stderr, "Error: GPU grid texture not created\n");
+ tex_mgr.shutdown();
+ gpu_shutdown();
+ return 1;
+ }
+ printf("SUCCESS: GPU grid texture created (256x256)\n");
+
+ // Test multiple pipelines coexist
+ printf("SUCCESS: All three GPU generators work (unified pipeline system)\n");
+
+ // Test variable-size textures
+ float noise_small[2] = {999.0f, 8.0f};
+ GpuProceduralParams small = {128, 64, noise_small, 2};
+ tex_mgr.create_gpu_noise_texture("noise_128x64", small);
+ if (!tex_mgr.get_texture_view("noise_128x64")) {
+ fprintf(stderr, "Error: Variable-size texture (128x64) not created\n");
+ tex_mgr.shutdown();
+ gpu_shutdown();
+ return 1;
+ }
+
+ float noise_large[2] = {777.0f, 2.0f};
+ GpuProceduralParams large = {1024, 512, noise_large, 2};
+ tex_mgr.create_gpu_noise_texture("noise_1024x512", large);
+ if (!tex_mgr.get_texture_view("noise_1024x512")) {
+ fprintf(stderr, "Error: Variable-size texture (1024x512) not created\n");
+ tex_mgr.shutdown();
+ gpu_shutdown();
+ return 1;
+ }
+ printf("SUCCESS: Variable-size textures work (128x64, 1024x512)\n");
+
+ // Cleanup
+ tex_mgr.shutdown();
+ gpu_shutdown();
+ platform_shutdown(&platform);
+ return 0;
+}
diff --git a/src/tests/test_post_process_helper.cc b/src/tests/test_post_process_helper.cc
index 104bbc3..36d193e 100644
--- a/src/tests/test_post_process_helper.cc
+++ b/src/tests/test_post_process_helper.cc
@@ -182,14 +182,14 @@ static void test_bind_group_update() {
// Create initial bind group
WGPUBindGroup bind_group = nullptr;
- pp_update_bind_group(fixture.device(), pipeline, &bind_group, view1,
- uniforms, dummy_effect_params_buffer);
+ pp_update_bind_group(fixture.device(), pipeline, &bind_group, view1, uniforms,
+ dummy_effect_params_buffer);
assert(bind_group != nullptr && "Initial bind group should be created");
fprintf(stdout, " ✓ Initial bind group created\n");
// Update bind group (should release old and create new)
- pp_update_bind_group(fixture.device(), pipeline, &bind_group, view2,
- uniforms, dummy_effect_params_buffer);
+ pp_update_bind_group(fixture.device(), pipeline, &bind_group, view2, uniforms,
+ dummy_effect_params_buffer);
assert(bind_group != nullptr && "Updated bind group should be created");
fprintf(stdout, " ✓ Bind group updated successfully\n");
diff --git a/src/tests/test_shader_compilation.cc b/src/tests/test_shader_compilation.cc
index e2c0adc..a322e8a 100644
--- a/src/tests/test_shader_compilation.cc
+++ b/src/tests/test_shader_compilation.cc
@@ -115,16 +115,19 @@ static bool test_shader_compilation(const char* name, const char* shader_code) {
return true; // Not a failure, just skipped
}
+ // Compose shader to resolve #include directives
+ std::string composed_shader = ShaderComposer::Get().Compose({}, shader_code);
+
#if defined(DEMO_CROSS_COMPILE_WIN32)
WGPUShaderModuleWGSLDescriptor wgsl_desc = {};
wgsl_desc.chain.sType = WGPUSType_ShaderModuleWGSLDescriptor;
- wgsl_desc.code = shader_code;
+ wgsl_desc.code = composed_shader.c_str();
WGPUShaderModuleDescriptor shader_desc = {};
shader_desc.nextInChain = (const WGPUChainedStruct*)&wgsl_desc.chain;
#else
WGPUShaderSourceWGSL wgsl_desc = {};
wgsl_desc.chain.sType = WGPUSType_ShaderSourceWGSL;
- wgsl_desc.code = str_view(shader_code);
+ wgsl_desc.code = str_view(composed_shader.c_str());
WGPUShaderModuleDescriptor shader_desc = {};
shader_desc.nextInChain = (const WGPUChainedStruct*)&wgsl_desc.chain;
#endif
diff --git a/src/util/asset_manager.h b/src/util/asset_manager.h
index 1e0638c..168bfca 100644
--- a/src/util/asset_manager.h
+++ b/src/util/asset_manager.h
@@ -10,6 +10,7 @@ struct AssetRecord {
size_t size; // Size of the asset data
bool is_procedural; // True if data was dynamically allocated by a procedural
// generator
+ bool is_gpu_procedural; // True if GPU compute shader generates texture
const char* proc_func_name_str; // Name of procedural generation function
// (string literal)
const float* proc_params; // Parameters for procedural generation (static,
diff --git a/tools/asset_packer.cc b/tools/asset_packer.cc
index 0d26cf6..4aaa0e7 100644
--- a/tools/asset_packer.cc
+++ b/tools/asset_packer.cc
@@ -52,6 +52,7 @@ struct AssetBuildInfo {
std::string name;
std::string filename; // Original filename for static assets
bool is_procedural;
+ bool is_gpu_procedural;
std::string proc_func_name; // Function name string
std::vector<float> proc_params; // Parameters for procedural function
@@ -182,9 +183,64 @@ int main(int argc, char* argv[]) {
info.params_array_name = "ASSET_PROC_PARAMS_" + info.name;
info.func_name_str_name = "ASSET_PROC_FUNC_STR_" + info.name;
info.is_procedural = false;
+ info.is_gpu_procedural = false;
- if (compression_type_str.rfind("PROC(", 0) == 0) {
+ if (compression_type_str.rfind("PROC_GPU(", 0) == 0) {
info.is_procedural = true;
+ info.is_gpu_procedural = true;
+ size_t open_paren = compression_type_str.find('(');
+ size_t close_paren = compression_type_str.rfind(')');
+ if (open_paren == std::string::npos ||
+ close_paren == std::string::npos) {
+ fprintf(stderr,
+ "Error: Invalid PROC_GPU() syntax for asset: %s, string: [%s]\n",
+ info.name.c_str(), compression_type_str.c_str());
+ return 1;
+ }
+ std::string func_and_params_str = compression_type_str.substr(
+ open_paren + 1, close_paren - open_paren - 1);
+
+ size_t params_start = func_and_params_str.find(',');
+ if (params_start != std::string::npos) {
+ std::string params_str = func_and_params_str.substr(params_start + 1);
+ info.proc_func_name = func_and_params_str.substr(0, params_start);
+
+ size_t current_pos = 0;
+ while (current_pos < params_str.length()) {
+ size_t comma_pos = params_str.find(',', current_pos);
+ std::string param_val_str =
+ (comma_pos == std::string::npos)
+ ? params_str.substr(current_pos)
+ : params_str.substr(current_pos, comma_pos - current_pos);
+ param_val_str.erase(0, param_val_str.find_first_not_of(" \t\r\n"));
+ param_val_str.erase(param_val_str.find_last_not_of(" \t\r\n") + 1);
+ try {
+ info.proc_params.push_back(std::stof(param_val_str));
+ } catch (...) {
+ fprintf(stderr, "Error: Invalid proc param for %s: %s\n",
+ info.name.c_str(), param_val_str.c_str());
+ return 1;
+ }
+ if (comma_pos == std::string::npos)
+ break;
+ current_pos = comma_pos + 1;
+ }
+ } else {
+ info.proc_func_name = func_and_params_str;
+ }
+
+ // Validate GPU procedural function name
+ if (info.proc_func_name != "gen_noise" &&
+ info.proc_func_name != "gen_perlin" &&
+ info.proc_func_name != "gen_grid") {
+ fprintf(stderr,
+ "Error: PROC_GPU only supports gen_noise, gen_perlin, gen_grid, got: %s for asset: %s\n",
+ info.proc_func_name.c_str(), info.name.c_str());
+ return 1;
+ }
+ } else if (compression_type_str.rfind("PROC(", 0) == 0) {
+ info.is_procedural = true;
+ info.is_gpu_procedural = false;
size_t open_paren = compression_type_str.find('(');
size_t close_paren = compression_type_str.rfind(')');
if (open_paren == std::string::npos ||
@@ -500,12 +556,13 @@ int main(int argc, char* argv[]) {
for (const auto& info : asset_build_infos) {
fprintf(assets_data_cc_file, " { ");
if (info.is_procedural) {
- fprintf(assets_data_cc_file, "nullptr, 0, true, %s, %s, %zu",
+ fprintf(assets_data_cc_file, "nullptr, 0, true, %s, %s, %s, %zu",
+ info.is_gpu_procedural ? "true" : "false",
info.func_name_str_name.c_str(), info.params_array_name.c_str(),
info.proc_params.size());
} else {
fprintf(assets_data_cc_file,
- "%s, ASSET_SIZE_%s, false, nullptr, nullptr, 0",
+ "%s, ASSET_SIZE_%s, false, false, nullptr, nullptr, 0",
info.data_array_name.c_str(), info.name.c_str());
}
fprintf(assets_data_cc_file, " },\n");
diff --git a/tools/timeline_editor/index.html b/tools/timeline_editor/index.html
index 074b711..db71beb 100644
--- a/tools/timeline_editor/index.html
+++ b/tools/timeline_editor/index.html
@@ -601,7 +601,11 @@
const modifier = effect.priorityModifier || '+';
output += ` EFFECT ${modifier} ${effect.className} ${effect.startTime.toFixed(2)} ${effect.endTime.toFixed(2)}`;
if (effect.args) {
- output += ` ${effect.args}`;
+ // Strip priority comments from args
+ const cleanArgs = effect.args.replace(/\s*#\s*Priority:\s*\d+/i, '').trim();
+ if (cleanArgs) {
+ output += ` ${cleanArgs}`;
+ }
}
output += '\n';
}
diff --git a/tools/validate_uniforms.py b/tools/validate_uniforms.py
new file mode 100644
index 0000000..40d1b0f
--- /dev/null
+++ b/tools/validate_uniforms.py
@@ -0,0 +1,178 @@
+#!/usr/bin/env python3
+
+import sys
+import re
+import os
+
+# WGSL alignment rules (simplified for common types)
+WGSL_ALIGNMENT = {
+ "f32": 4,
+ "vec2<f32>": 8,
+ "vec3<f32>": 16,
+ "vec4<f32>": 16,
+ # Add other types as needed (e.g., u32, i32, mat4x4<f32>)
+}
+
+def get_wgsl_type_size_and_alignment(type_name):
+ type_name = type_name.strip()
+ if type_name in WGSL_ALIGNMENT:
+ return WGSL_ALIGNMENT[type_name], WGSL_ALIGNMENT[type_name]
+ # Handle arrays, e.g., array<f32, 5>
+ if type_name.startswith("array"):
+ match = re.search(r"array<([\w<>, ]+)>", type_name)
+ if match:
+ inner_type = match.group(1).split(",")[0].strip()
+ # For simplicity, assume scalar array doesn't change alignment of base type
+ return get_wgsl_type_size_and_alignment(inner_type)
+ # Handle structs recursively (simplified, assumes no nested structs for now)
+ return 0, 0 # Unknown or complex type
+
+def parse_wgsl_struct(wgsl_content):
+ structs = {}
+ # Regex to find struct definitions: struct StructName { ... }
+ struct_matches = re.finditer(r"struct\s+(\w+)\s*\{\s*(.*?)\s*\}", wgsl_content, re.DOTALL)
+ for struct_match in struct_matches:
+ struct_name = struct_match.group(1)
+ members_content = struct_match.group(2)
+ members = []
+ # Regex to find members: member_name: member_type
+ # Adjusted regex to handle types with brackets and spaces, and comments.
+ # CHANGED: \s to [ \t] to avoid consuming newlines
+ member_matches = re.finditer(r"(\w+)\s*:\s*([\w<>,\[\] \t]+)(?:\s*//.*)?", members_content)
+ for member_match in member_matches:
+ member_name = member_match.group(1)
+ member_type = member_match.group(2).strip()
+ if member_type.endswith(','):
+ member_type = member_type[:-1].strip()
+ members.append((member_name, member_type))
+ structs[struct_name] = members
+ # print(f"DEBUG: Parsed WGSL struct '{struct_name}' with members: {members}")
+ return structs
+
+def find_embedded_wgsl_in_cpp(cpp_content):
+ # Regex to find raw string literals R"(...)" which often contain WGSL
+ wgsl_blocks = []
+ matches = re.finditer(r'R"\((.*?)\)"', cpp_content, re.DOTALL)
+ for match in matches:
+ wgsl_blocks.append(match.group(1))
+ return wgsl_blocks
+
+def calculate_wgsl_struct_size(struct_name, struct_members):
+ total_size = 0
+ max_alignment = 0
+ members_info = []
+
+ for member_name, member_type in struct_members:
+ size, alignment = get_wgsl_type_size_and_alignment(member_type)
+ if size == 0: # If type is unknown or complex, we can't reliably calculate
+ # print(f"Warning: Unknown or complex WGSL type '{member_type}' for member '{member_name}'. Cannot reliably calculate size.", file=sys.stderr)
+ return 0, 0
+ members_info.append((member_name, member_type, size, alignment))
+ max_alignment = max(max_alignment, alignment)
+
+ current_offset = 0
+ for member_name, member_type, size, alignment in members_info:
+ # Align current offset to the alignment of the current member
+ current_offset = (current_offset + alignment - 1) & ~(alignment - 1)
+ current_offset += size
+
+ # The total size of the struct is the final offset, padded to the max alignment
+ if max_alignment > 0:
+ total_size = (current_offset + max_alignment - 1) & ~(max_alignment - 1)
+ else:
+ total_size = current_offset
+
+ return total_size, max_alignment
+
+def parse_cpp_static_asserts(cpp_content):
+ cpp_structs = {}
+ # Regex to find C++ struct definitions with static_asserts for sizeof
+ # This regex is simplified and might need adjustments for more complex C++ code
+ struct_matches = re.finditer(r"struct\s+(\w+)\s*\{\s*(.*?)\s*\}\s*;.*?static_assert\(sizeof\(\1\)\s*==\s*(\d+)\s*,.*?\);", cpp_content, re.DOTALL | re.MULTILINE)
+ for struct_match in struct_matches:
+ struct_name = struct_match.group(1)
+ members_content = struct_match.group(2)
+ expected_size = int(struct_match.group(3))
+ members = []
+ # Regex to find members: type member_name;
+ member_matches = re.finditer(r"(.*?)\s+(\w+)\s*(?:=\s*.*?|\s*\{.*?\})?;", members_content)
+ for member_match in member_matches:
+ member_type = member_match.group(1).strip()
+ member_name = member_match.group(2).strip()
+ members.append((member_name, member_type))
+ cpp_structs[struct_name] = {"members": members, "expected_size": expected_size}
+ return cpp_structs
+
+def validate_uniforms(wgsl_files, cpp_files):
+ all_wgsl_structs = {}
+
+ # Parse separate WGSL files
+ for file_path in wgsl_files:
+ try:
+ with open(file_path, 'r') as f:
+ wgsl_content = f.read()
+ structs = parse_wgsl_struct(wgsl_content)
+ all_wgsl_structs.update(structs)
+ except Exception as e:
+ print(f"Error parsing WGSL file {file_path}: {e}", file=sys.stderr)
+ continue
+
+ # Parse C++ files for embedded WGSL and static_asserts
+ for cpp_file_path in cpp_files:
+ try:
+ with open(cpp_file_path, 'r') as f:
+ cpp_content = f.read()
+
+ # Parse embedded WGSL
+ wgsl_blocks = find_embedded_wgsl_in_cpp(cpp_content)
+ for block in wgsl_blocks:
+ structs = parse_wgsl_struct(block)
+ all_wgsl_structs.update(structs)
+
+ # Parse C++ structs and static_asserts
+ cpp_structs = parse_cpp_static_asserts(cpp_content)
+ for struct_name, data in cpp_structs.items():
+ expected_size = data["expected_size"]
+ # Try to find the matching WGSL struct
+ if struct_name in all_wgsl_structs:
+ wgsl_members = all_wgsl_structs[struct_name]
+ calculated_wgsl_size, wgsl_max_alignment = calculate_wgsl_struct_size(struct_name, wgsl_members)
+
+ if calculated_wgsl_size == 0: # If calculation failed
+ # print(f"Validation Warning for '{struct_name}': Could not calculate WGSL size.")
+ continue
+
+ if calculated_wgsl_size != expected_size:
+ print(f"Validation Mismatch for '{struct_name}':\n WGSL Calculated Size: {calculated_wgsl_size}\n C++ Expected Size: {expected_size}\n Max WGSL Alignment: {wgsl_max_alignment}", file=sys.stderr)
+ sys.exit(1)
+ else:
+ print(f"Validation OK for '{struct_name}': Size {calculated_wgsl_size} matches C++ expected size.")
+ else:
+ print(f"Validation Warning for '{struct_name}': Matching WGSL struct not found.")
+ except Exception as e:
+ print(f"Error processing C++ file {cpp_file_path}: {e}", file=sys.stderr)
+ continue
+
+def main():
+ if len(sys.argv) < 3:
+ print("Usage: validate_uniforms.py <wgsl_dir_or_file> <cpp_file1> [<cpp_file2> ...]", file=sys.stderr)
+ sys.exit(1)
+
+ wgsl_input = sys.argv[1]
+ cpp_files = sys.argv[2:]
+
+ wgsl_files = []
+ if os.path.isfile(wgsl_input):
+ wgsl_files.append(wgsl_input)
+ elif os.path.isdir(wgsl_input):
+ for root, _, files in os.walk(wgsl_input):
+ for file in files:
+ if file.endswith(".wgsl"):
+ wgsl_files.append(os.path.join(root, file))
+
+ # We proceed even if wgsl_files is empty, because C++ files might contain embedded WGSL
+
+ validate_uniforms(wgsl_files, cpp_files)
+
+if __name__ == "__main__":
+ main() \ No newline at end of file