diff options
94 files changed, 2634 insertions, 839 deletions
@@ -32,6 +32,12 @@ # # Testing & Tools: # doc/test_demo_README.md - test_demo tool documentation +# +# Architecture & Reference: +# doc/ARCHITECTURE.md - Detailed system architecture +# doc/CODING_STYLE.md - Code style examples +# doc/BACKLOG.md - Untriaged future goals +# doc/TOOLS_REFERENCE.md - Developer tools reference # ============================================ # TIER 4: HISTORICAL ARCHIVE (Load Rarely) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2f939bc..fb6beef 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -593,6 +593,22 @@ if(DEMO_BUILD_TESTS) target_link_libraries(test_texture_manager PRIVATE 3d gpu audio procedural util ${DEMO_LIBS}) add_dependencies(test_texture_manager generate_demo_assets) + # GPU Procedural Texture Test + add_demo_test(test_gpu_procedural GpuProceduralTest + src/tests/test_gpu_procedural.cc + ${PLATFORM_SOURCES} + ${GEN_DEMO_CC}) + target_link_libraries(test_gpu_procedural PRIVATE 3d gpu audio procedural util ${DEMO_LIBS}) + add_dependencies(test_gpu_procedural generate_demo_assets) + + # GPU Composite Texture Test (Phase 4) + add_demo_test(test_gpu_composite GpuCompositeTest + src/tests/test_gpu_composite.cc + ${PLATFORM_SOURCES} + ${GEN_DEMO_CC}) + target_link_libraries(test_gpu_composite PRIVATE 3d gpu audio procedural util ${DEMO_LIBS}) + add_dependencies(test_gpu_composite generate_demo_assets) + # Gantt chart output test (bash script) add_test( NAME GanttOutputTest @@ -612,46 +628,42 @@ if(DEMO_BUILD_TESTS) ) endif() -#-- - Extra Tools -- - -if(DEMO_BUILD_TOOLS OR DEMO_BUILD_TESTS) - add_demo_executable(spectool tools/spectool.cc ${PLATFORM_SOURCES} ${GEN_DEMO_CC} ${GENERATED_MUSIC_DATA_CC}) - target_compile_definitions(spectool PRIVATE DEMO_BUILD_TOOLS) - target_link_libraries(spectool PRIVATE audio util procedural ${DEMO_LIBS}) - add_dependencies(spectool generate_tracker_music generate_demo_assets) +# Sub-task 7: Integrate validation tool into CI/build system - add_executable(specview tools/specview.cc) +# Ensure the Python validation script is available +add_custom_target(validate_uniforms_script ALL DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/tools/validate_uniforms.py) - add_demo_executable(specplay tools/specplay.cc ${PLATFORM_SOURCES} ${GEN_DEMO_CC} ${GENERATED_MUSIC_DATA_CC}) - target_link_libraries(specplay PRIVATE audio util ${DEMO_LIBS}) - add_dependencies(specplay generate_demo_assets) -endif() +# Find all WGSL files recursively in src/gpu +file(GLOB WGSL_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/src/gpu/*.wgsl ${CMAKE_CURRENT_SOURCE_DIR}/src/gpu/**/*.wgsl) -#-- - Global Target Configuration -- - -# NOTE: "final" target moved to line ~329 (FINAL_STRIP build) -# Old "final" target (gen_assets + crunch_demo) removed - run scripts manually +# List of C++ files containing uniform struct definitions and shader code +# Add more C++ files here if new effects with structs are added. +set(VALIDATION_CPP_FILES + ${CMAKE_CURRENT_SOURCE_DIR}/src/gpu/effects/heptagon_effect.cc + ${CMAKE_CURRENT_SOURCE_DIR}/src/gpu/effects/post_process_helper.h + ${CMAKE_CURRENT_SOURCE_DIR}/src/gpu/effects/fade_effect.cc + ${CMAKE_CURRENT_SOURCE_DIR}/src/gpu/effects/theme_modulation_effect.cc + ${CMAKE_CURRENT_SOURCE_DIR}/src/gpu/effects/chroma_aberration_effect.cc + ${CMAKE_CURRENT_SOURCE_DIR}/src/gpu/effects/vignette_effect.cc + ${CMAKE_CURRENT_SOURCE_DIR}/src/gpu/effects/gaussian_blur_effect.cc + ${CMAKE_CURRENT_SOURCE_DIR}/src/gpu/effects/distort_effect.cc + ${CMAKE_CURRENT_SOURCE_DIR}/src/gpu/demo_effects.h + ${CMAKE_CURRENT_SOURCE_DIR}/src/gpu/effects/circle_mask_effect.h +) -add_custom_target(pack_source - COMMAND tar -czf demo_all.tgz --exclude=.git --exclude=build* --exclude=.gemini* --exclude=*.tgz --exclude=*.zip --exclude=.DS_Store . - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} +# Add custom command to run the validator +# It depends on the script itself, WGSL files, and the C++ files being validated. +# Outputting a flag file to signal completion. +set(VALIDATION_FLAG ${CMAKE_CURRENT_BINARY_DIR}/uniform_validation_complete.flag) +add_custom_command( + OUTPUT ${VALIDATION_FLAG} + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/tools/validate_uniforms.py ${VALIDATION_FLAG} + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/tools/validate_uniforms.py ${CMAKE_CURRENT_SOURCE_DIR}/assets/final/shaders ${VALIDATION_CPP_FILES} + DEPENDS validate_uniforms_script ${WGSL_FILES} ${VALIDATION_CPP_FILES} + COMMENT "Validating uniform buffer sizes and alignments..." ) -#-- - Configuration Summary -- - -message(STATUS "") -message(STATUS "═══════════════════════════════════════════════════════════") -message(STATUS " 64k Demo Project - Configuration Summary") -message(STATUS "═══════════════════════════════════════════════════════════") -message(STATUS "") -message(STATUS "Build Options:") -message(STATUS " DEMO_SIZE_OPT: ${DEMO_SIZE_OPT}") -message(STATUS " DEMO_STRIP_ALL: ${DEMO_STRIP_ALL}") -message(STATUS " DEMO_BUILD_TESTS: ${DEMO_BUILD_TESTS}") -message(STATUS " DEMO_BUILD_TOOLS: ${DEMO_BUILD_TOOLS}") -message(STATUS " DEMO_ENABLE_COVERAGE: ${DEMO_ENABLE_COVERAGE}") -message(STATUS " DEMO_ENABLE_DEBUG_LOGS: ${DEMO_ENABLE_DEBUG_LOGS}") -message(STATUS " DEMO_ALL_OPTIONS: ${DEMO_ALL_OPTIONS}") -message(STATUS "") -message(STATUS "Build Type: ${CMAKE_BUILD_TYPE}") -message(STATUS "C++ Compiler: ${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}") -message(STATUS "") -message(STATUS "═══════════════════════════════════════════════════════════") -message(STATUS "") +# Add custom target that depends on the validation output flag +add_custom_target(validate_uniforms ALL DEPENDS ${VALIDATION_FLAG}) + + @@ -33,6 +33,12 @@ # # Testing & Tools: # @doc/test_demo_README.md - test_demo tool documentation +# +# Architecture & Reference: +# @doc/ARCHITECTURE.md - Detailed system architecture +# @doc/CODING_STYLE.md - Code style examples +# @doc/BACKLOG.md - Untriaged future goals +# @doc/TOOLS_REFERENCE.md - Developer tools reference # ============================================ # TIER 4: HISTORICAL ARCHIVE (Load Rarely) @@ -104,14 +110,15 @@ IMPORTANT: </artifact_trail> <recent_actions> - - Finished debugging and fixing the `DemoEffectsTest` SEGFAULT. - - Confirmed that all 33 tests are passing. - - Updated `GEMINI.md` to reflect the successful completion of Task #74 and set the stage for Task #75. + - Completed Task #75: WGSL Uniform Buffer Validation & Consolidation. + - Standardized uniform usage across effects (Distort, Fade, ThemeModulation, CircleMask). + - Created and integrated `tools/validate_uniforms.py` into the build system. + - Added `doc/UNIFORM_BUFFER_GUIDELINES.md` and updated `CONTRIBUTING.md`. </recent_actions> <task_state> 1. [COMPLETED] Task #74: Fix `DemoEffectsTest` SEGFAULT. - 2. [IN PROGRESS] Task #75: WGSL Uniform Buffer Validation & Consolidation. + 2. [COMPLETED] Task #75: WGSL Uniform Buffer Validation & Consolidation. 3. [PAUSED] Task #5: Spectral Brush Editor. 4. [PAUSED] Task #18: 3D System Enhancements. </task_state> diff --git a/PROJECT_CONTEXT.md b/PROJECT_CONTEXT.md index 636f339..181bffc 100644 --- a/PROJECT_CONTEXT.md +++ b/PROJECT_CONTEXT.md @@ -1,154 +1,90 @@ # 64k Demo Project -Goal: +## Goal - Produce a <=64k native demo binary - Same C++ codebase for Windows, macOS, Linux -Graphics: +## Graphics - WebGPU via wgpu-native - WGSL shaders - Hybrid rendering: Rasterized proxy geometry + SDF raymarching -Audio: +## Audio - 32 kHz, 16-bit stereo - Procedurally generated samples - Real-time additive synthesis from spectrograms (IDCT) - Variable tempo system with music time abstraction - Event-based pattern triggering for dynamic tempo scaling - Modifiable Loops and Patterns, w/ script to generate them (like a Tracker) -- Unified AudioEngine for lifecycle management (eliminates initialization fragility) +- Unified AudioEngine for lifecycle management -Constraints: +## Constraints - Size-sensitive - Minimal dependencies - Explicit control over all allocations -Style: +## Style - Demoscene - No engine abstractions --- -## Project Roadmap -**Note:** For detailed history of recently completed milestones, see `COMPLETED.md`. +## Current Status -### Current Status -- Audio system: Sample-accurate synchronization achieved. Uses hardware playback time as master clock. Variable tempo support integrated. **Pipeline optimized (Task #72)**: Zero heap allocations per frame, direct ring buffer writes, explicit clipping. Comprehensive test coverage maintained. +- Audio system: Sample-accurate synchronization. Hardware playback time as master clock. Variable tempo support. Pipeline optimized (Task #72): Zero heap allocations per frame, direct ring buffer writes. Comprehensive test coverage. - Build system: Optimized with proper asset dependency tracking -- Shader system: **Parameterization complete**: UniformHelper template, per-frame dynamic params, .seq syntax support. Modular with comprehensive compilation tests. **WGSL composability improved**: Common utilities extracted (`math/common_utils.wgsl`) with 12 call sites deduplicated across renderer shaders. -- 3D rendering: Hybrid SDF/rasterization with BVH acceleration and binary scene loader. **Object data loading and parsing pipeline enhanced for primitives (e.g., plane_distance).** -- Asset pipeline: Blender export script and binary scene ingestion supported -- Error handling: **Dual macro system**: `FATAL_XXX` for programming errors (abort), `CHECK_RETURN` for recoverable errors (graceful return). Messages stripped in STRIP_ALL builds. -- Testing: **32/33 tests passing (97%)** - Uniform buffer alignment fixed (Task #74). DemoEffectsTest fails due to wgpu_native library bug (not project code). +- Shader system: Parameterization complete (UniformHelper, .seq syntax). Modular with compilation tests. WGSL composability improved (`math/common_utils.wgsl`). +- 3D rendering: Hybrid SDF/rasterization with BVH acceleration and binary scene loader. Object data loading pipeline enhanced. +- Asset pipeline: Blender export script and binary scene ingestion +- Error handling: Dual macro system (`FATAL_XXX` for programming errors, `CHECK_RETURN` for recoverable errors) +- Testing: **32/33 tests passing (97%)** - DemoEffectsTest fails due to wgpu_native library bug --- + ## Next Up -- **Task #5: Spectral Brush Editor** [IN PROGRESS - February 6, 2026] - - Create web-based tool for procedurally tracing audio spectrograms +- **Task #5: Spectral Brush Editor** [IN PROGRESS] + - Web-based tool for procedurally tracing audio spectrograms - Replace large .spec assets with tiny C++ code (50-100× compression) - - Phase 1: C++ runtime (`spectral_brush.h/cc` - Bezier curves + Gaussian profiles) - - Phase 2: Editor UI (HTML/JS canvas, dual-layer visualization, keyboard shortcuts) - - Phase 3: File I/O (load .wav/.spec, export procedural_params.txt + C++ code) - - See `doc/SPECTRAL_BRUSH_EDITOR.md` for complete design - -- **Task #72: Audio Pipeline Streamlining** [COMPLETED - February 8, 2026] - - ✅ Optimize data flow: Zero heap allocations per frame achieved - - ✅ Direct additive mixing: Ring buffer two-phase write API - - ✅ Precision: float32 internal pipeline with explicit clipping + - See TODO.md and `doc/SPECTRAL_BRUSH_EDITOR.md` - **Visuals & Content** - - [ ] **Task #52: Procedural SDF Font**: Minimal bezier/spline set for [A-Z, 0-9] and SDF rendering. - - [ ] **Task #53: Particles Shader Polish**: Improve visual quality of particles. - - [ ] **Task #55: SDF Random Planes Intersection**: Implement `sdPolyhedron` (crystal/gem shapes) via plane intersection. + - Task #52: Procedural SDF Font + - Task #53: Particles Shader Polish + - Task #55: SDF Random Planes Intersection - **Tooling & Optimization** - - [ ] **Task #54: Tracy Integration**: Integrate Tracy debugger for performance profiling. - - [x] **Task #39: Visual Debugging System**: Implemented wireframe primitives (Sphere, Cone, Cross, Trajectory) for debugging. + - Task #54: Tracy Integration --- + ## Design Docs Quick Reference For detailed documentation, use Read tool to load specific docs: -- **doc/TRACKER.md**: Audio pattern system with unit-less timing (1 unit = 4 beats). Text-based music score compiled to C++ runtime. -- **doc/3D.md**: Hybrid SDF raymarching with BVH acceleration and Position Based Dynamics physics. -- **doc/ASSET_SYSTEM.md**: Build-time asset packer with 16-byte alignment, enum-based O(1) retrieval, procedural generation support. -- **doc/BUILD.md**: Multi-platform builds (Debug/STRIP_ALL/FINAL_STRIP), cross-compilation, size reporting. -- **doc/SPECTRAL_BRUSH_EDITOR.md**: Web tool for tracing spectrograms with Bezier curves (50-100× compression). -- **doc/SEQUENCE.md**: .seq timeline format with BPM notation, priority modifiers, Gantt visualization. -- **doc/MASKING_SYSTEM.md**: Auxiliary texture registry for inter-effect screen-space partitioning. -- **doc/SCENE_FORMAT.md**: Binary scene format (SCN1) with object transforms, physics, mesh references. -- **doc/test_demo_README.md**: 16s audio/visual sync test tool with tempo variation and peak logging. -- **doc/CONTEXT_MAINTENANCE.md**: Context hygiene protocol (archive to COMPLETED.md monthly, keep Tier 1 files lean). +- **doc/TRACKER.md**: Audio pattern system with unit-less timing +- **doc/3D.md**: Hybrid SDF raymarching with BVH acceleration +- **doc/ASSET_SYSTEM.md**: Build-time asset packer with 16-byte alignment +- **doc/BUILD.md**: Multi-platform builds (Debug/STRIP_ALL/FINAL_STRIP) +- **doc/SPECTRAL_BRUSH_EDITOR.md**: Web tool for tracing spectrograms +- **doc/SEQUENCE.md**: .seq timeline format with BPM notation +- **doc/MASKING_SYSTEM.md**: Auxiliary texture registry +- **doc/SCENE_FORMAT.md**: Binary scene format (SCN1) +- **doc/test_demo_README.md**: 16s audio/visual sync test tool +- **doc/CONTEXT_MAINTENANCE.md**: Context hygiene protocol --- -## Future Goals -- **Task #36: Blender Exporter**: Create script to export scenes to internal binary format. (Deprioritized) -- **Task #21: Shader Optimization** - - [ ] Use macros or code generation to factorize common WGSL code (normals, bump, lighting). - - [ ] Implement Tri-planar mapping for better procedural textures. -- [ ] **Task #18-B: GPU BVH & Shadows**: Optimize scene queries with a GPU-based BVH. -- **Phase 2: Advanced Size Optimization** - - [ ] **Task #22: Windows Native Platform**: Replace GLFW with minimal native Windows API. - - [ ] **Task #28: Spectrogram Quantization**: Quantize spectrograms to logarithmic frequency and uint16_t. - - [ ] **Task #35: CRT Replacement**: Investigation and implementation of CRT-free entry point. - ---- -*For a detailed list of all completed tasks, see the git history.* ## Recently Completed (February 2026) -- **Uniform Buffer Alignment Fix** (February 9) - Task #74: Fixed WebGPU validation errors caused by WGSL `vec3<f32>` alignment mismatches. Changed circle_mask_compute.wgsl padding from `vec3<f32>` to three `f32` fields. Demo now runs with 0 validation errors. Test suite: 32/33 passing (97%). - -- **Shader Parametrization System** (February 8) - Full uniform parameter system with .seq syntax support. FlashEffect now supports dynamic color/decay parameters computed per-frame. Critical WGSL alignment bugfix (vec3 = 16-byte aligned). Size: ~400-500 bytes. See `doc/COMPLETED.md` for details. +- **WGSL Uniform Buffer Validation (Task #75)** (Feb 9) - Standardized uniform buffer layout. Validation tool integrated into build. All effects use `CommonPostProcessUniforms` (binding 2) + effect-specific params (binding 3). Added `UNIFORM_BUFFER_GUIDELINES.md`. -- **Extended Shader Parametrization** (February 8) - Task #73 (2/4 effects complete): - - ChromaAberrationEffect: Added offset_scale and angle parameters (diagonal/vertical aberration modes) - - GaussianBlurEffect: Added strength parameter (configurable blur radius) - - Both effects follow FlashEffect pattern (UniformHelper, params struct, .seq syntax) - - Size: ~200-300 bytes per effect +- **Uniform Buffer Alignment (Task #74)** (Feb 9) - Fixed WGSL `vec3<f32>` alignment issues. Demo runs with 0 validation errors. -- **WGSL Shader Composability** - Extracted common utilities to `math/common_utils.wgsl`: - - `transform_normal()` - 2 call sites (renderer_3d, mesh_render) - - `spherical_uv()` / `spherical_uv_from_dir()` - 8 call sites (renderer_3d, skybox) - - `grid_pattern()` - 2 call sites (renderer_3d) - - Size savings: ~200 bytes net +- **Shader Parametrization (Task #73)** (Feb 8) - Full uniform parameter system with .seq syntax. FlashEffect, ChromaAberrationEffect, GaussianBlurEffect support dynamic parameters. Size: ~400-500 bytes. -- **Test Suite Optimization** - JitteredAudioBackendTest: 3.5s → 0.07s (50x speedup) - - Reduced test duration and sleep times - - Full CI suite now <1 second - -- **CHECK_RETURN Macro System** - Error handling for recoverable errors: - - `CHECK_RETURN_IF()` - Simple validation with return - - `CHECK_RETURN_BEGIN/END` - Complex validation with cleanup - - `WARN_IF()` - Non-fatal warnings - - Applied to 5 call sites (asset_manager, test_demo) - - Size impact: ~500 bytes saved in STRIP_ALL builds - -## Architectural Overview - -### Hybrid 3D Renderer -- **Core Idea**: Uses standard rasterization to draw proxy hulls (boxes), then raymarches inside the fragment shader to find the exact SDF surface. -- **Transforms**: Uses `inv_model` matrices to perform all raymarching in local object space, handling rotation and non-uniform scaling correctly. -- **Shadows**: Instance-based shadow casting with self-shadowing prevention (`skip_idx`). - -### Sequence & Effect System -- **Effect**: Abstract base for visual elements. Supports `compute` and `render` phases. -- **Sequence**: Timeline of effects with start/end times. -- **MainSequence**: Top-level coordinator and framebuffer manager. -- **seq_compiler**: Transpiles `assets/demo.seq` into C++ `timeline.cc`. +--- -### Asset & Build System -- **asset_packer**: Embeds binary assets (like `.spec` files) into C++ arrays. -- **Runtime Manager**: O(1) retrieval with lazy procedural generation support. -- **Automation**: `gen_assets.sh`, `build_win.sh`, and `check_all.sh` for multi-platform validation. +For detailed architecture, see `doc/ARCHITECTURE.md`. -### Audio Engine -- **Synthesis**: Real-time additive synthesis from spectrograms via FFT-based IDCT (O(N log N)). Stereo output (32kHz, 16-bit, interleaved L/R). Uses orthonormal DCT-II/DCT-III transforms with Numerical Recipes reordering method. -- **Variable Tempo**: Music time abstraction with configurable tempo_scale. Tempo changes don't affect pitch. -- **Event-Based Tracker**: Individual TrackerEvents trigger as separate voices with dynamic beat calculation. Notes within patterns respect tempo scaling. -- **Backend Abstraction**: `AudioBackend` interface with `MiniaudioBackend` (production), `MockAudioBackend` (testing), and `WavDumpBackend` (offline rendering). -- **Dynamic Updates**: Double-buffered spectrograms for live thread-safe updates. -- **Procedural Library**: Melodies and spectral filters (noise, comb) generated at runtime. -- **Pattern System**: TrackerPatterns contain lists of TrackerEvents (beat, sample_id, volume, pan). Events trigger individually based on elapsed music time.
\ No newline at end of file +For completed tasks history, see `doc/COMPLETED.md` and git history. @@ -2,153 +2,39 @@ This file tracks prioritized tasks with detailed attack plans. -**Note:** For a history of recently completed tasks, see `COMPLETED.md`. - -## Recently Completed (February 9, 2026) - -- [x] **Uniform Buffer Alignment (Task #74)**: Fixed WGSL struct alignment issues across multiple shaders: - - `circle_mask_compute.wgsl`: Changed `_pad: vec3<f32>` to three `f32` fields - - `fade_effect.cc`: Changed EffectParams padding from `vec3<f32>` to `_pad0/1/2: f32` - - `theme_modulation_effect.cc`: Same padding fix for EffectParams - - Fixed ODR violation in `demo_effects.h` (incomplete FadeEffect forward declaration) - - Renamed shadowing `uniforms_` members to `common_uniforms_`/`flash_uniforms_` - - Result: demo64k runs without crashes, 33/33 tests passing (100%) - -## Previously Completed (February 8, 2026) - -- [x] **Shader Parametrization System**: Full uniform parameter system with .seq syntax support. FlashEffect now supports color/decay parameters with per-frame animation. See `COMPLETED.md` for details. -- [x] **ChromaAberrationEffect Parametrization**: Added offset_scale and angle parameters. Supports diagonal and vertical aberration modes via .seq syntax. -- [x] **GaussianBlurEffect Parametrization**: Added strength parameter. Replaces hardcoded blur radius with configurable value. - ---- - -## Priority 1: Uniform Buffer Alignment (Task #74) [COMPLETED - February 9, 2026] - -**Goal**: Fix WebGPU uniform buffer size/padding/alignment mismatches between C++ structs and WGSL shaders. - -**Root Cause**: WGSL `vec3<f32>` has 16-byte alignment (not 12), causing struct padding mismatches. Using `vec3<f32>` for padding fields created unpredictable struct sizes. - -**Fixes Applied**: -- `circle_mask_compute.wgsl`: Changed `_pad: vec3<f32>` to three separate `f32` fields - - Before: 24+ bytes in WGSL, 16 bytes in C++ - - After: 16 bytes in both -- Verified all shaders use individual `f32` fields for padding (no `vec3` in padding) - -**Results**: -- ✅ demo64k: Runs with **0 WebGPU validation errors** -- ✅ Test suite: **32/33 tests passing (97%)** -- ❌ DemoEffectsTest: SEGFAULT in wgpu_native library (unrelated to alignment fixes) - -**Key Lesson**: Never use `vec3<f32>` for padding in WGSL uniform structs. Always use individual `f32` fields to ensure predictable alignment. - ---- - -## Priority 1: WGSL Uniform Buffer Validation & Consolidation (Task #75) - -**Goal**: Prevent alignment bugs by consolidating uniform buffer patterns and creating automated validation. - -**Background**: Recent bugs (Task #74) revealed WGSL `vec3<f32>` alignment issues causing 16-byte padding where 12 bytes expected. Need systematic approach to prevent recurrence. - -**Attack Plan**: - -### Phase 1: Audit & Document (1-2 hours) -- [ ] **1.1**: Audit all WGSL shaders for uniform struct definitions - - List all uniform structs, their sizes, and padding strategies - - Identify inconsistencies (vec3 padding vs individual f32 fields) - - Document in `doc/UNIFORM_BUFFER_GUIDELINES.md` -- [ ] **1.2**: Audit C++ struct definitions (CommonPostProcessUniforms, etc.) - - Verify static_assert size checks exist for all uniform structs - - Check for missing size validation - -### Phase 2: Consolidation (2-3 hours) -- [ ] **2.1**: Standardize on CommonUniforms pattern - - All post-process effects should use CommonPostProcessUniforms for binding 2 - - Effect-specific params at binding 3 (16 or 32 bytes, properly padded) -- [ ] **2.2**: Eliminate `vec3<f32>` in padding fields - - Replace all `_pad: vec3<f32>` with `_pad0/1/2: f32` - - Apply to: FadeEffect, ThemeModulationEffect, any other effects -- [ ] **2.3**: Add C++ wrapper structs with static_assert - - Every WGSL uniform struct should have matching C++ struct - - All structs require `static_assert(sizeof(...) == EXPECTED_SIZE)` - -### Phase 3: Validation Tool (3-4 hours) -- [ ] **3.1**: Create `tools/validate_uniforms.py` - - Parse WGSL shader files for uniform struct definitions - - Calculate expected size using WGSL alignment rules: - - `f32`: 4-byte aligned - - `vec2<f32>`: 8-byte aligned - - `vec3<f32>`: **16-byte aligned** (not 12!) - - `vec4<f32>`: 16-byte aligned - - Struct size: rounded to largest member alignment -- [ ] **3.2**: Parse C++ headers for matching structs - - Extract `sizeof()` from static_assert statements - - Match WGSL struct names to C++ struct names -- [ ] **3.3**: Report mismatches - - Exit non-zero if C++ size != WGSL size - - Print detailed alignment breakdown for debugging -- [ ] **3.4**: Integrate into CI/build system - - Add CMake custom command to run validation - - Fail build if validation fails (development builds only) - - Add to `scripts/check_all.sh` - -### Phase 4: Documentation (1 hour) -- [ ] **4.1**: Write `doc/UNIFORM_BUFFER_GUIDELINES.md` - - Explain WGSL alignment rules (with examples) - - Document standard patterns (CommonUniforms, effect params) - - Show correct padding techniques - - Add examples of common mistakes -- [ ] **4.2**: Update CONTRIBUTING.md - - Add "Uniform Buffer Checklist" section - - Require validation tool passes before commit - -**Size Impact**: Negligible (consolidation may save 50-100 bytes) - -**Priority**: High (prevents entire class of subtle bugs) - -**Dependencies**: None +**Note:** For completed tasks, see `doc/COMPLETED.md`. --- ## Priority 1: Spectral Brush Editor (Task #5) [IN PROGRESS] -**Goal:** Create a web-based tool for procedurally tracing audio spectrograms. Replaces large `.spec` binary assets with tiny procedural C++ code (50-100× compression). +**Goal:** Web-based tool for procedurally tracing audio spectrograms. Replaces large `.spec` binary assets with tiny procedural C++ code (50-100× compression). **Design Document:** See `doc/SPECTRAL_BRUSH_EDITOR.md` for complete architecture. -**Core Concept: "Spectral Brush"** -- **Central Curve** (Bezier): Traces time-frequency path through spectrogram -- **Vertical Profile**: Shapes "brush stroke" around curve (Gaussian, Decaying Sinusoid, Noise) +**Core Concept:** Bezier curves trace time-frequency paths. Gaussian profiles shape "brush strokes" around curves. -**Workflow:** -``` -.wav → Load in editor → Trace with Bezier curves → Export procedural_params.txt + C++ code -``` +**Workflow:** `.wav` → Load in editor → Trace with Bezier curves → Export `procedural_params.txt` + C++ code ### Phase 1: C++ Runtime (Foundation) -- [ ] **Files:** `src/audio/spectral_brush.h`, `src/audio/spectral_brush.cc` +- [ ] Files: `src/audio/spectral_brush.h`, `src/audio/spectral_brush.cc` - [ ] Define API (`ProfileType`, `draw_bezier_curve()`, `evaluate_profile()`) - [ ] Implement linear Bezier interpolation - [ ] Implement Gaussian profile evaluation -- [ ] Implement home-brew deterministic RNG (for future noise support) +- [ ] Implement home-brew deterministic RNG - [ ] Add unit tests (`src/tests/test_spectral_brush.cc`) - [ ] **Deliverable:** Compiles, tests pass ### Phase 2: Editor Core -- [ ] **Files:** `tools/spectral_editor/index.html`, `script.js`, `style.css`, `dct.js` (reuse from old editor) +- [ ] Files: `tools/spectral_editor/index.html`, `script.js`, `style.css`, `dct.js` - [ ] HTML structure (canvas, controls, file input) - [ ] Canvas rendering (dual-layer: reference + procedural) -- [ ] Bezier curve editor (click to place, drag to adjust, delete control points) +- [ ] Bezier curve editor (click, drag, delete control points) - [ ] Profile controls (Gaussian sigma slider) - [ ] Real-time spectrogram rendering - [ ] Audio playback (IDCT → Web Audio API) -- [ ] Undo/Redo system (action history with snapshots) -- [ ] **Keyboard shortcuts:** - - Key '1': Play procedural sound - - Key '2': Play original .wav - - Space: Play/pause - - Ctrl+Z: Undo - - Ctrl+Shift+Z: Redo - - Delete: Remove control point +- [ ] Undo/Redo system +- [ ] Keyboard shortcuts (1=play procedural, 2=play original, Space, Ctrl+Z, Delete) - [ ] **Deliverable:** Interactive editor, can trace .wav files ### Phase 3: File I/O @@ -164,178 +50,74 @@ This file tracks prioritized tasks with detailed attack plans. - [ ] Decaying sinusoid profile (metallic sounds) - [ ] Noise profile (textured sounds) - [ ] Composite profiles (add/subtract/multiply) -- [ ] Multi-dimensional Bezier ({freq, amplitude, decay, ...}) -- [ ] Frequency snapping (snap to musical notes) -- [ ] Generic `gen_from_params()` code generation -**Design Decisions:** -- Linear Bezier interpolation (Phase 1), cubic later -- Soft parameter limits in UI (not enforced) -- Home-brew RNG (small, deterministic) -- Single function per sound (generic loader later) -- Start with Bezier + Gaussian only +**Design Decisions:** Linear Bezier (Phase 1), cubic later. Soft parameter limits. Home-brew RNG. Single function per sound initially. **Size Impact:** 50-100× compression (5 KB .spec → ~100 bytes C++ code) --- ## Priority 2: 3D System Enhancements (Task #18) -**Goal:** Establish a pipeline for importing complex 3D scenes to replace hardcoded geometry. **Progress:** C++ pipeline for loading and processing object-specific data (like plane_distance) is now in place. Shader integration for SDFs is pending. +**Goal:** Establish pipeline for importing complex 3D scenes to replace hardcoded geometry. -## Priority 3: WGSL Modularization (Task #50) [RECURRENT] +**Progress:** C++ pipeline for loading object-specific data (plane_distance) is in place. Shader integration for SDFs pending. -**Goal**: Refactor `ShaderComposer` and WGSL assets to support granular, reusable snippets and `#include` directives. This is an ongoing task to maintain shader code hygiene as new features are added. +--- +## Priority 3: WGSL Modularization (Task #50) [RECURRENT] +**Goal:** Refactor `ShaderComposer` and WGSL assets to support granular, reusable snippets. Ongoing task for shader code hygiene. -## Phase 2: Size Optimization (Final Goal) +### Sub-task: Split common_uniforms.wgsl (Low Priority) +**Current:** `common_uniforms.wgsl` contains 4 structs (CommonUniforms, GlobalUniforms, ObjectData, ObjectsBuffer) -- [ ] **Task #34: Full STL Removal**: Replace all remaining `std::vector`, `std::map`, and `std::string` usage with custom minimal containers or C-style arrays to allow for CRT replacement. (Minimal Priority - deferred to end). +**Goal:** Split into separate files: +- `common_uniforms/common.wgsl` - CommonUniforms only +- `common_uniforms/global.wgsl` - GlobalUniforms only +- `common_uniforms/object.wgsl` - ObjectData + ObjectsBuffer -- [ ] **Task #22: Windows Native Platform**: Replace GLFW with direct Win32 API calls for the final 64k push. +**Benefit:** Shaders only include what they need, reducing compiled size -- [ ] **Task #28: Spectrogram Quantization**: Research optimal frequency bin distribution and implement quantization. +**Impact:** Minimal (most shaders only use CommonUniforms) -- [ ] **Task #35: CRT Replacement**: investigation and implementation of CRT-free entry point. +**Priority:** Low (nice-to-have) -## Future Goals & Ideas (Untriaged) +### Sub-task: Type-safe shader composition (Low Priority) +**Problem:** Recurrent error of forgetting `ShaderComposer::Get().Compose({}, code)` and using raw `code` directly. Runtime error only (crashes demo, tests may pass). -### Audio Tools -- [ ] **Task #64: specplay Enhancements**: Extend audio analysis tool with new features - - **Priority 1**: Spectral visualization (ASCII art), waveform display, frequency analysis, dynamic range - - **Priority 2**: Diff mode (compare .wav vs .spec), batch mode (CSV report, find clipping) - - **Priority 3**: WAV export (.spec → .wav), normalization - - **Priority 4**: Spectral envelope, harmonic analysis, onset detection - - **Priority 5**: Interactive mode (seek, loop, volume control) - - See `tools/specplay_README.md` for detailed feature list +**Solution:** Use strong typing to make it compile-time error: +```cpp +class ComposedShader { + private: + std::string code_; + friend class ShaderComposer; + explicit ComposedShader(std::string code) : code_(std::move(code)) {} + public: + const char* c_str() const { return code_.c_str(); } +}; +``` + +**Changes:** +- `ShaderComposer::Compose()` returns `ComposedShader` instead of `std::string` +- All shader creation functions take `const ComposedShader&` instead of `const char*` +- Cannot pass raw string to shader functions (compile error) -- [ ] **Task #65: Data-Driven Tempo Control**: Move tempo variation from code to data files - - **Current**: `g_tempo_scale` is hardcoded in `main.cc` with manual animation curves - - **Goal**: Define tempo curves in `.seq` or `.track` files for data-driven tempo control - - **Approach A**: Add TEMPO directive to `.seq` format - - Example: `TEMPO 0.0 1.0`, `TEMPO 10.0 2.0`, `TEMPO 20.0 1.0` (time, scale pairs) - - seq_compiler generates tempo curve array in timeline.cc - - **Approach B**: Add tempo column to music.track - - Each pattern trigger can specify tempo_scale override - - tracker_compiler generates tempo events in music_data.cc - - **Benefits**: Non-programmers can edit tempo, easier iteration, version control friendly - - **Priority**: Low (current hardcoded approach works, but less flexible) +**Benefits:** Impossible to forget composition (type mismatch). Self-documenting API. Compile-time error. -- [ ] **Task #67: DCT/FFT Performance Benchmarking**: Add timing measurements to audio tests - - **Goal**: Compare performance of different DCT/IDCT implementations - - **Location**: Add timing code to `test_dct.cc` or `test_fft.cc` - - **Measurements**: - - Reference IDCT/FDCT (naive O(N²) implementation) - - FFT-based DCT/IDCT (current O(N log N) implementation) - - Future x86_64 SIMD-optimized versions (when implemented) - - **Output Format**: - - Average time per transform (microseconds) - - Throughput (transforms per second) - - Speedup factor vs reference implementation - - **Test Sizes**: DCT_SIZE=512 (production), plus 128, 256, 1024 for scaling analysis - - **Implementation**: - - Use `std::chrono::high_resolution_clock` for timing - - Run each test 1000+ iterations to reduce noise - - Report min/avg/max times - - Guard with `#if !defined(STRIP_ALL)` to avoid production overhead - - **Benefits**: Quantify FFT speedup, validate SIMD optimizations, identify regressions - - **Priority**: Very Low (nice-to-have for future optimization work) +**Trade-offs:** More verbose code. Small overhead (extra std::string copy, negligible). -- [ ] **Task #69: Convert Audio Pipeline to Clipped Int16**: Use clipped int16 for all audio processing - - **Current**: Audio pipeline uses float32 throughout (generation, mixing, synthesis, output) - - **Goal**: Convert to clipped int16 for faster/easier processing and reduced memory footprint - - **Rationale**: - - Simpler arithmetic (no float operations) - - Smaller memory footprint (2 bytes vs 4 bytes per sample) - - Hardware-native format (most audio devices use int16) - - Eliminates float→int16 conversion at output stage - - Natural clipping behavior (overflow wraps/clips automatically) - - **Scope**: - - Output path: Definitely convert (backends, WAV dump) - - Synthesis: Consider keeping float32 for quality (IDCT produces float) - - Mixing: Could use int16 with proper overflow handling - - Asset storage: Already int16 in .spec files - - **Implementation Phases**: - 1. **Phase 1: Output Only** (Minimal change, ~50 lines) - - Convert `synth_render()` output from float to int16 - - Update `MiniaudioBackend` and `WavDumpBackend` to accept int16 - - Keep all internal processing as float - - **Benefit**: Eliminates final conversion step - 2. **Phase 2: Mixing Stage** (Moderate change, ~200 lines) - - Convert voice mixing to int16 arithmetic - - Add saturation/clipping logic - - Keep IDCT output as float, convert after synthesis - - **Benefit**: Faster mixing, reduced memory bandwidth - 3. **Phase 3: Full Pipeline** (Large change, ~500+ lines) - - Convert spectrograms from float to int16 storage - - Modify IDCT to output int16 directly - - All synthesis in int16 - - **Benefit**: Maximum size reduction and performance - - **Trade-offs**: - - Quality loss: 16-bit resolution vs 32-bit float precision - - Dynamic range: Limited to [-32768, 32767] - - Clipping: Must handle overflow carefully in mixing stage - - Code complexity: Saturation arithmetic more complex than float - - **Testing Requirements**: - - Verify no audible quality degradation - - Ensure clipping behavior matches float version - - Check mixing overflow doesn't cause artifacts - - Validate WAV dumps bit-identical to hardware output - - **Size Impact**: - - Phase 1: Negligible (~50 bytes) - - Phase 2: Small reduction (~100-200 bytes, faster code) - - Phase 3: Large reduction (50% memory, ~1-2KB code savings) - - **Priority**: Low (final optimization, after size budget is tight) - - **Notes**: - - This is a FINAL optimization task, only if 64k budget requires it - - Quality must be validated - may not be worth the trade-off - - Consider keeping float for procedural generation quality +**Priority:** Low (recurrent but rare, easy to catch in testing) -### Developer Tools -- [ ] **Task #66: External Asset Loading for Debugging**: mmap() asset files instead of embedded data - - **Current**: All assets embedded in `assets_data.cc` (regenerate on every asset change) - - **Goal**: Load assets from external files in debug builds for faster iteration - - **Scope**: macOS only, non-STRIP_ALL builds only - - **Implementation**: - - Add `DEMO_ENABLE_EXTERNAL_ASSETS` CMake option - - Modify `GetAsset()` to check for external file first (e.g., `assets/final/<name>`) - - Use `mmap()` to map file into memory (replaces `uint8_t asset[]` array) - - Fallback to embedded data if file not found - - **Benefits**: Edit shaders/assets without regenerating assets_data.cc (~10s rebuild) - - **Trade-offs**: Adds runtime file I/O, only useful during development - - **Priority**: Low (current workflow acceptable, but nice-to-have for rapid iteration) +--- -### Visual Effects -- [ ] **Task #73: Extend Shader Parametrization** [IN PROGRESS - 2/4 complete] - - **Goal**: Extend uniform parameter system to ChromaAberrationEffect, GaussianBlurEffect, DistortEffect, SolarizeEffect - - **Pattern**: Follow FlashEffect implementation (UniformHelper, params struct, .seq syntax) - - **Completed**: ChromaAberrationEffect (offset_scale, angle), GaussianBlurEffect (strength) - - **Remaining**: DistortEffect, SolarizeEffect - - **Priority**: Medium (quality-of-life improvement for artists) - - **Estimated Impact**: ~200-300 bytes per effect -- [ ] **Task #52: Procedural SDF Font**: Minimal bezier/spline set for [A-Z, 0-9] and SDF rendering. -- [ ] **Task #55: SDF Random Planes Intersection**: Implement `sdPolyhedron` (crystal/gem shapes) via plane intersection. -- [ ] **Task #54: Tracy Integration**: Integrate Tracy debugger for performance profiling. -- [ ] **Task #58: Advanced Shader Factorization**: Further factorize WGSL code into smaller, reusable snippets. -- [ ] **Task #59: Comprehensive RNG Library**: Add WGSL snippets for float/vec2/vec3 noise (Perlin, Gyroid, etc.) and random number generators. -- [ ] **Task #60: OOP Refactoring**: Investigate if more C++ code can be made object-oriented without size penalty (vs functional style). -- [ ] **Task #61: GPU Procedural Generation**: Implement system to generate procedural data (textures, geometry) on GPU and read back to CPU. -- [ ] **Task #62: Physics Engine Enhancements (PBD & Rotation)**: - - [ ] **Task #62.1: Quaternion Rotation**: Implement quaternion-based rotation for `Object3D` and incorporate angular momentum into physics. - - [ ] **Task #62.2: Position Based Dynamics (PBD)**: Refactor solver to re-evaluate velocity after resolving all collisions and constraints. -- [ ] **Task #63: Refactor large files**: Split `src/gpu/gpu.cc`, `src/3d/visual_debug.cc` and `src/gpu/effect.cc` into sub-functionalities. (`src/3d/renderer.cc` was also over 500 lines and was taken care of in the past) +## Phase 2: Size Optimization (Final Goal) -### Performance Optimization -- [ ] **Task #70: SIMD x86_64 Implementation**: Implement critical functions using intrinsics for x86_64 platforms. - - **Goal**: Optimize hot paths for audio and procedural generation. - - **Scope**: - - IDCT/FDCT transforms - - Audio mixing and voice synthesis - - CPU-side procedural texture/geometry generation - - **Constraint**: Non-critical; fallback to generic C++ must be maintained. - - **Priority**: Very Low +- [ ] **Task #34: Full STL Removal** - Replace remaining `std::vector`, `std::map`, `std::string` with custom containers +- [ ] **Task #22: Windows Native Platform** - Replace GLFW with Win32 API +- [ ] **Task #28: Spectrogram Quantization** - Research optimal frequency distribution +- [ ] **Task #35: CRT Replacement** - Investigation and implementation of CRT-free entry --- -## Future Goals
\ No newline at end of file +For untriaged future goals and ideas, see `doc/BACKLOG.md`. diff --git a/assets/demo.seq b/assets/demo.seq index 0dfb108..a8717f3 100644 --- a/assets/demo.seq +++ b/assets/demo.seq @@ -30,9 +30,10 @@ SEQUENCE 0b 0 EFFECT + VignetteEffect 0 6 radius=0.6 softness=0.1 SEQUENCE 2.0 0 - EFFECT + CircleMaskEffect 0.0 2.0 0.35 # Priority 0 (mask generator, radius 0.35) - EFFECT + RotatingCubeEffect 0.0 2.0 # Priority 1 (renders inside circle) - EFFECT + GaussianBlurEffect 0.0 2.0 strength=2.0 # Priority 2 (post-process blur) + EFFECT + CircleMaskEffect 0.0 4.0 0.50 # Priority 0 mask generator + EFFECT + RotatingCubeEffect 0.0 4.0 # Priority 1 (renders inside circle) + EFFECT + GaussianBlurEffect 1.0 2.0 strength=1.0 + EFFECT + GaussianBlurEffect 3.0 4.0 strength=2.0 SEQUENCE 4b 0 EFFECT - FlashCubeEffect 0.1 3. # Priority -1 diff --git a/assets/final/demo_assets.txt b/assets/final/demo_assets.txt index 05eee17..96f86f9 100644 --- a/assets/final/demo_assets.txt +++ b/assets/final/demo_assets.txt @@ -1,22 +1,22 @@ # Asset Name, Compression Type, Filename/Placeholder, Description # --- Drum & Percussion Samples --- -KICK_1, NONE, KICK_606.spec, "606 Kick" -KICK_2, NONE, KICK_90S_2.spec, "90s Kick" -SNARE_1, NONE, SNARE_808.spec, "808 Snare" -SNARE_2, NONE, SNARE_909_TUNE_8.spec, "909 Snare" -SNARE_3, NONE, SNARE_BLUE_ROOM.spec, "Snare Blue Room" -HIHAT_1, NONE, HIHAT_CLOSED_DMX.spec, "DMX Closed Hi-hat" -HIHAT_2, NONE, HIHAT_CLOSED_DUFF.spec, "Duff Closed Hi-hat" -HIHAT_3, NONE, HIHAT_CLOSED_ER_1.spec, "ER-1 Closed Hi-hat" -CRASH_1, NONE, CRASH_DMX.spec, "DMX Crash" -RIDE_1, NONE, RIDE_CUP_1.spec, "Ride Cymbal" -SPLASH_1, NONE, SPLASH_GROUNDED.spec, "Splash Cymbal" +KICK_1, NONE, music/KICK_606.spec, "606 Kick" +KICK_2, NONE, music/KICK_90S_2.spec, "90s Kick" +SNARE_1, NONE, music/SNARE_808.spec, "808 Snare" +SNARE_2, NONE, music/SNARE_909_TUNE_8.spec, "909 Snare" +SNARE_3, NONE, music/SNARE_BLUE_ROOM.spec, "Snare Blue Room" +HIHAT_1, NONE, music/HIHAT_CLOSED_DMX.spec, "DMX Closed Hi-hat" +HIHAT_2, NONE, music/HIHAT_CLOSED_DUFF.spec, "Duff Closed Hi-hat" +HIHAT_3, NONE, music/HIHAT_CLOSED_ER_1.spec, "ER-1 Closed Hi-hat" +CRASH_1, NONE, music/CRASH_DMX.spec, "DMX Crash" +RIDE_1, NONE, music/RIDE_CUP_1.spec, "Ride Cymbal" +SPLASH_1, NONE, music/SPLASH_GROUNDED.spec, "Splash Cymbal" # --- Melodic Samples --- -BASS_1, NONE, BASS_GUITAR_FEEL.spec, "Bass Guitar" -BASS_2, NONE, BASS_SYNTH_1.spec, "Synth Bass 1" -BASS_3, NONE, SYNTH_BASS_DISTORT.spec, "Distorted Synth Bass" +BASS_1, NONE, music/BASS_GUITAR_FEEL.spec, "Bass Guitar" +BASS_2, NONE, music/BASS_SYNTH_1.spec, "Synth Bass 1" +BASS_3, NONE, music/SYNTH_BASS_DISTORT.spec, "Distorted Synth Bass" # --- Procedural Textures --- NOISE_TEX, PROC(gen_noise, 1234, 16), _, "Procedural noise texture for bump mapping" @@ -52,6 +52,11 @@ SHADER_MESH, NONE, shaders/mesh_render.wgsl, "Mesh Rasterization Shader" MESH_CUBE, NONE, test_mesh.obj, "A simple cube mesh" DODECAHEDRON, NONE, dodecahedron.obj, "A dodecahedron mesh" SHADER_VIGNETTE, NONE, shaders/vignette.wgsl, "Vignette Shader" +SHADER_COMPUTE_GEN_NOISE, NONE, shaders/compute/gen_noise.wgsl, "GPU Noise Compute Shader" +SHADER_COMPUTE_GEN_PERLIN, NONE, shaders/compute/gen_perlin.wgsl, "GPU Perlin Noise Compute Shader" +SHADER_COMPUTE_GEN_GRID, NONE, shaders/compute/gen_grid.wgsl, "GPU Grid Compute Shader" +SHADER_COMPUTE_GEN_BLEND, NONE, shaders/compute/gen_blend.wgsl, "GPU Blend Composite Shader" +SHADER_COMPUTE_GEN_MASK, NONE, shaders/compute/gen_mask.wgsl, "GPU Mask Composite Shader" CIRCLE_MASK_COMPUTE_SHADER, NONE, shaders/circle_mask_compute.wgsl, "Circle mask compute shader" CIRCLE_MASK_RENDER_SHADER, NONE, shaders/circle_mask_render.wgsl, "Circle mask render shader" MASKED_CUBE_SHADER, NONE, shaders/masked_cube.wgsl, "Masked cube shader" diff --git a/assets/final/BASS_GUITAR_FEEL.spec b/assets/final/music/BASS_GUITAR_FEEL.spec Binary files differindex 54f49a6..54f49a6 100644 --- a/assets/final/BASS_GUITAR_FEEL.spec +++ b/assets/final/music/BASS_GUITAR_FEEL.spec diff --git a/assets/final/BASS_SYNTH_1.spec b/assets/final/music/BASS_SYNTH_1.spec Binary files differindex 33bc0a0..33bc0a0 100644 --- a/assets/final/BASS_SYNTH_1.spec +++ b/assets/final/music/BASS_SYNTH_1.spec diff --git a/assets/final/CRASH_DMX.spec b/assets/final/music/CRASH_DMX.spec Binary files differindex 45ee52d..45ee52d 100644 --- a/assets/final/CRASH_DMX.spec +++ b/assets/final/music/CRASH_DMX.spec diff --git a/assets/final/HIHAT_CLOSED_DMX.spec b/assets/final/music/HIHAT_CLOSED_DMX.spec Binary files differindex 8fce1d2..8fce1d2 100644 --- a/assets/final/HIHAT_CLOSED_DMX.spec +++ b/assets/final/music/HIHAT_CLOSED_DMX.spec diff --git a/assets/final/HIHAT_CLOSED_DUFF.spec b/assets/final/music/HIHAT_CLOSED_DUFF.spec Binary files differindex f738271..f738271 100644 --- a/assets/final/HIHAT_CLOSED_DUFF.spec +++ b/assets/final/music/HIHAT_CLOSED_DUFF.spec diff --git a/assets/final/HIHAT_CLOSED_ER_1.spec b/assets/final/music/HIHAT_CLOSED_ER_1.spec Binary files differindex bb03f5e..bb03f5e 100644 --- a/assets/final/HIHAT_CLOSED_ER_1.spec +++ b/assets/final/music/HIHAT_CLOSED_ER_1.spec diff --git a/assets/final/KICK_606.spec b/assets/final/music/KICK_606.spec Binary files differindex 10af84a..10af84a 100644 --- a/assets/final/KICK_606.spec +++ b/assets/final/music/KICK_606.spec diff --git a/assets/final/KICK_90S_2.spec b/assets/final/music/KICK_90S_2.spec Binary files differindex 126409e..126409e 100644 --- a/assets/final/KICK_90S_2.spec +++ b/assets/final/music/KICK_90S_2.spec diff --git a/assets/final/RIDE_CUP_1.spec b/assets/final/music/RIDE_CUP_1.spec Binary files differindex 78867c1..78867c1 100644 --- a/assets/final/RIDE_CUP_1.spec +++ b/assets/final/music/RIDE_CUP_1.spec diff --git a/assets/final/SNARE_808.spec b/assets/final/music/SNARE_808.spec Binary files differindex 2923c3c..2923c3c 100644 --- a/assets/final/SNARE_808.spec +++ b/assets/final/music/SNARE_808.spec diff --git a/assets/final/SNARE_909_TUNE_8.spec b/assets/final/music/SNARE_909_TUNE_8.spec Binary files differindex 4693c05..4693c05 100644 --- a/assets/final/SNARE_909_TUNE_8.spec +++ b/assets/final/music/SNARE_909_TUNE_8.spec diff --git a/assets/final/SNARE_BLUE_ROOM.spec b/assets/final/music/SNARE_BLUE_ROOM.spec Binary files differindex b24baa7..b24baa7 100644 --- a/assets/final/SNARE_BLUE_ROOM.spec +++ b/assets/final/music/SNARE_BLUE_ROOM.spec diff --git a/assets/final/SPLASH_GROUNDED.spec b/assets/final/music/SPLASH_GROUNDED.spec Binary files differindex a919be4..a919be4 100644 --- a/assets/final/SPLASH_GROUNDED.spec +++ b/assets/final/music/SPLASH_GROUNDED.spec diff --git a/assets/final/SYNTH_BASS_DISTORT.spec b/assets/final/music/SYNTH_BASS_DISTORT.spec Binary files differindex 33bc0a0..33bc0a0 100644 --- a/assets/final/SYNTH_BASS_DISTORT.spec +++ b/assets/final/music/SYNTH_BASS_DISTORT.spec diff --git a/assets/final/shaders/chroma_aberration.wgsl b/assets/final/shaders/chroma_aberration.wgsl index bad3624..6c942b7 100644 --- a/assets/final/shaders/chroma_aberration.wgsl +++ b/assets/final/shaders/chroma_aberration.wgsl @@ -1,22 +1,14 @@ @group(0) @binding(0) var smplr: sampler; @group(0) @binding(1) var txt: texture_2d<f32>; -struct CommonUniforms { - resolution: vec2<f32>, - _pad0: f32, - _pad1: f32, - aspect_ratio: f32, - time: f32, - beat: f32, - audio_intensity: f32, -}; -struct EffectParams { +#include "common_uniforms" +struct ChromaAberrationParams { offset_scale: f32, angle: f32, }; @group(0) @binding(2) var<uniform> uniforms: CommonUniforms; -@group(0) @binding(3) var<uniform> params: EffectParams; +@group(0) @binding(3) var<uniform> params: ChromaAberrationParams; @vertex fn vs_main(@builtin(vertex_index) i: u32) -> @builtin(position) vec4<f32> { var pos = array<vec2<f32>, 3>( diff --git a/assets/final/shaders/circle_mask_compute.wgsl b/assets/final/shaders/circle_mask_compute.wgsl index 1ed6c1e..484d3dd 100644 --- a/assets/final/shaders/circle_mask_compute.wgsl +++ b/assets/final/shaders/circle_mask_compute.wgsl @@ -1,16 +1,8 @@ // Circle mask compute shader // Generates a circular mask (1.0 inside, 0.0 outside) -struct CommonUniforms { - resolution: vec2<f32>, - _pad0: f32, - _pad1: f32, - aspect_ratio: f32, - time: f32, - beat: f32, - audio_intensity: f32, -}; -struct EffectParams { +#include "common_uniforms" +struct CircleMaskParams { radius: f32, _pad0: f32, _pad1: f32, @@ -18,7 +10,7 @@ struct EffectParams { }; @group(0) @binding(0) var<uniform> uniforms: CommonUniforms; -@group(0) @binding(1) var<uniform> params: EffectParams; +@group(0) @binding(1) var<uniform> params: CircleMaskParams; struct VSOutput { @builtin(position) position: vec4<f32>, diff --git a/assets/final/shaders/circle_mask_render.wgsl b/assets/final/shaders/circle_mask_render.wgsl index ce98f9c..cfa002e 100644 --- a/assets/final/shaders/circle_mask_render.wgsl +++ b/assets/final/shaders/circle_mask_render.wgsl @@ -4,15 +4,7 @@ @group(0) @binding(0) var mask_tex: texture_2d<f32>; @group(0) @binding(1) var mask_sampler: sampler; -struct CommonUniforms { - resolution: vec2<f32>, - _pad0: f32, - _pad1: f32, - aspect_ratio: f32, - time: f32, - beat: f32, - audio_intensity: f32, -}; +#include "common_uniforms" @group(0) @binding(2) var<uniform> uniforms: CommonUniforms; diff --git a/assets/final/shaders/compute/gen_blend.wgsl b/assets/final/shaders/compute/gen_blend.wgsl new file mode 100644 index 0000000..9fc9e1e --- /dev/null +++ b/assets/final/shaders/compute/gen_blend.wgsl @@ -0,0 +1,29 @@ +// This file is part of the 64k demo project. +// GPU composite shader: Blend two textures. + +struct BlendParams { + width: u32, + height: u32, + blend_factor: f32, + _pad0: f32, +} + +@group(0) @binding(0) var output_tex: texture_storage_2d<rgba8unorm, write>; +@group(0) @binding(1) var<uniform> params: BlendParams; +@group(0) @binding(2) var input_a: texture_2d<f32>; +@group(0) @binding(3) var input_b: texture_2d<f32>; +@group(0) @binding(4) var tex_sampler: sampler; + +@compute @workgroup_size(8, 8, 1) +fn main(@builtin(global_invocation_id) id: vec3<u32>) { + if (id.x >= params.width || id.y >= params.height) { return; } + + let uv = vec2<f32>(f32(id.x) / f32(params.width), + f32(id.y) / f32(params.height)); + + let color_a = textureSampleLevel(input_a, tex_sampler, uv, 0.0); + let color_b = textureSampleLevel(input_b, tex_sampler, uv, 0.0); + let blended = mix(color_a, color_b, params.blend_factor); + + textureStore(output_tex, id.xy, blended); +} diff --git a/assets/final/shaders/compute/gen_grid.wgsl b/assets/final/shaders/compute/gen_grid.wgsl new file mode 100644 index 0000000..cc5e189 --- /dev/null +++ b/assets/final/shaders/compute/gen_grid.wgsl @@ -0,0 +1,24 @@ +// GPU procedural grid pattern generator. +// Simple grid lines with configurable spacing and thickness. + +struct GridParams { + width: u32, + height: u32, + grid_size: u32, + thickness: u32, +} + +@group(0) @binding(0) var output_tex: texture_storage_2d<rgba8unorm, write>; +@group(0) @binding(1) var<uniform> params: GridParams; + +@compute @workgroup_size(8, 8, 1) +fn main(@builtin(global_invocation_id) id: vec3<u32>) { + if (id.x >= params.width || id.y >= params.height) { return; } + + let on_line = (id.x % params.grid_size) < params.thickness || + (id.y % params.grid_size) < params.thickness; + + let val = select(0.0, 1.0, on_line); + + textureStore(output_tex, id.xy, vec4<f32>(val, val, val, 1.0)); +} diff --git a/assets/final/shaders/compute/gen_mask.wgsl b/assets/final/shaders/compute/gen_mask.wgsl new file mode 100644 index 0000000..1ce9f52 --- /dev/null +++ b/assets/final/shaders/compute/gen_mask.wgsl @@ -0,0 +1,27 @@ +// This file is part of the 64k demo project. +// GPU composite shader: Multiply texture A by texture B (masking). + +struct MaskParams { + width: u32, + height: u32, +} + +@group(0) @binding(0) var output_tex: texture_storage_2d<rgba8unorm, write>; +@group(0) @binding(1) var<uniform> params: MaskParams; +@group(0) @binding(2) var input_a: texture_2d<f32>; +@group(0) @binding(3) var input_b: texture_2d<f32>; +@group(0) @binding(4) var tex_sampler: sampler; + +@compute @workgroup_size(8, 8, 1) +fn main(@builtin(global_invocation_id) id: vec3<u32>) { + if (id.x >= params.width || id.y >= params.height) { return; } + + let uv = vec2<f32>(f32(id.x) / f32(params.width), + f32(id.y) / f32(params.height)); + + let color_a = textureSampleLevel(input_a, tex_sampler, uv, 0.0); + let mask_b = textureSampleLevel(input_b, tex_sampler, uv, 0.0); + let masked = color_a * mask_b; + + textureStore(output_tex, id.xy, masked); +} diff --git a/assets/final/shaders/compute/gen_noise.wgsl b/assets/final/shaders/compute/gen_noise.wgsl new file mode 100644 index 0000000..5c0babd --- /dev/null +++ b/assets/final/shaders/compute/gen_noise.wgsl @@ -0,0 +1,26 @@ +// GPU procedural noise texture generator. +// Uses compute shader for parallel texture generation. + +#include "math/noise" + +struct NoiseParams { + width: u32, + height: u32, + seed: f32, + frequency: f32, +} + +@group(0) @binding(0) var output_tex: texture_storage_2d<rgba8unorm, write>; +@group(0) @binding(1) var<uniform> params: NoiseParams; + +@compute @workgroup_size(8, 8, 1) +fn main(@builtin(global_invocation_id) id: vec3<u32>) { + if (id.x >= params.width || id.y >= params.height) { return; } + + let uv = vec2<f32>(f32(id.x) / f32(params.width), + f32(id.y) / f32(params.height)); + let p = uv * params.frequency + params.seed; + let noise = noise_2d(p); + + textureStore(output_tex, id.xy, vec4<f32>(noise, noise, noise, 1.0)); +} diff --git a/assets/final/shaders/compute/gen_perlin.wgsl b/assets/final/shaders/compute/gen_perlin.wgsl new file mode 100644 index 0000000..73816d6 --- /dev/null +++ b/assets/final/shaders/compute/gen_perlin.wgsl @@ -0,0 +1,44 @@ +// GPU procedural Perlin noise texture generator. +// Fractional Brownian Motion using value noise. + +#include "math/noise" + +struct PerlinParams { + width: u32, + height: u32, + seed: f32, + frequency: f32, + amplitude: f32, + amplitude_decay: f32, + octaves: u32, + _pad0: f32, // Padding for alignment +} + +@group(0) @binding(0) var output_tex: texture_storage_2d<rgba8unorm, write>; +@group(0) @binding(1) var<uniform> params: PerlinParams; + +@compute @workgroup_size(8, 8, 1) +fn main(@builtin(global_invocation_id) id: vec3<u32>) { + if (id.x >= params.width || id.y >= params.height) { return; } + + let uv = vec2<f32>(f32(id.x) / f32(params.width), + f32(id.y) / f32(params.height)); + + var value = 0.0; + var amplitude = params.amplitude; + var frequency = params.frequency; + var total_amp = 0.0; + + for (var o: u32 = 0u; o < params.octaves; o++) { + let p = uv * frequency + params.seed; + value += noise_2d(p) * amplitude; + total_amp += amplitude; + frequency *= 2.0; + amplitude *= params.amplitude_decay; + } + + value /= total_amp; + let clamped = clamp(value, 0.0, 1.0); + + textureStore(output_tex, id.xy, vec4<f32>(clamped, clamped, clamped, 1.0)); +} diff --git a/assets/final/shaders/distort.wgsl b/assets/final/shaders/distort.wgsl index cca01c4..5d35129 100644 --- a/assets/final/shaders/distort.wgsl +++ b/assets/final/shaders/distort.wgsl @@ -1,15 +1,15 @@ @group(0) @binding(0) var smplr: sampler; @group(0) @binding(1) var txt: texture_2d<f32>; -struct CommonUniforms { - resolution: vec2<f32>, - aspect_ratio: f32, - time: f32, - beat: f32, - audio_intensity: f32, +#include "common_uniforms" + +struct DistortParams { + strength: f32, + speed: f32, }; @group(0) @binding(2) var<uniform> uniforms: CommonUniforms; +@group(0) @binding(3) var<uniform> params: DistortParams; @vertex fn vs_main(@builtin(vertex_index) i: u32) -> @builtin(position) vec4<f32> { var pos = array<vec2<f32>, 3>( @@ -22,6 +22,6 @@ struct CommonUniforms { @fragment fn fs_main(@builtin(position) p: vec4<f32>) -> @location(0) vec4<f32> { let uv = p.xy / uniforms.resolution; - let dist = 0.1 * uniforms.audio_intensity * sin(uv.y * 20.0 + uniforms.time * 5.0); + let dist = params.strength * uniforms.audio_intensity * sin(uv.y * 20.0 + uniforms.time * params.speed * 5.0); return textureSample(txt, smplr, uv + vec2<f32>(dist, 0.0)); } diff --git a/assets/final/shaders/ellipse.wgsl b/assets/final/shaders/ellipse.wgsl index 9c6b0d9..05dfcfc 100644 --- a/assets/final/shaders/ellipse.wgsl +++ b/assets/final/shaders/ellipse.wgsl @@ -1,12 +1,4 @@ -struct CommonUniforms { - resolution: vec2<f32>, - _pad0: f32, - _pad1: f32, - aspect_ratio: f32, - time: f32, - beat: f32, - audio_intensity: f32, -}; +#include "common_uniforms" @group(0) @binding(0) var<uniform> uniforms: CommonUniforms; diff --git a/assets/final/shaders/gaussian_blur.wgsl b/assets/final/shaders/gaussian_blur.wgsl index 3b87b10..02156f7 100644 --- a/assets/final/shaders/gaussian_blur.wgsl +++ b/assets/final/shaders/gaussian_blur.wgsl @@ -1,22 +1,14 @@ @group(0) @binding(0) var smplr: sampler; @group(0) @binding(1) var txt: texture_2d<f32>; -struct CommonUniforms { - resolution: vec2<f32>, - _pad0: f32, - _pad1: f32, - aspect_ratio: f32, - time: f32, - beat: f32, - audio_intensity: f32, -}; -struct EffectParams { +#include "common_uniforms" +struct GaussianBlurParams { strength: f32, _pad: f32, }; @group(0) @binding(2) var<uniform> uniforms: CommonUniforms; -@group(0) @binding(3) var<uniform> params: EffectParams; +@group(0) @binding(3) var<uniform> params: GaussianBlurParams; @vertex fn vs_main(@builtin(vertex_index) i: u32) -> @builtin(position) vec4<f32> { var pos = array<vec2<f32>, 3>( diff --git a/assets/final/shaders/main_shader.wgsl b/assets/final/shaders/main_shader.wgsl index 7155a6d..ab0278c 100644 --- a/assets/final/shaders/main_shader.wgsl +++ b/assets/final/shaders/main_shader.wgsl @@ -1,12 +1,4 @@ -struct CommonUniforms { - resolution: vec2<f32>, - _pad0: f32, - _pad1: f32, - aspect_ratio: f32, - time: f32, - beat: f32, - audio_intensity: f32, -}; +#include "common_uniforms" @group(0) @binding(0) var<uniform> uniforms: CommonUniforms; diff --git a/assets/final/shaders/particle_compute.wgsl b/assets/final/shaders/particle_compute.wgsl index 38a95e1..ae513c8 100644 --- a/assets/final/shaders/particle_compute.wgsl +++ b/assets/final/shaders/particle_compute.wgsl @@ -5,15 +5,7 @@ struct Particle { color: vec4<f32>, }; -struct CommonUniforms { - resolution: vec2<f32>, - _pad0: f32, - _pad1: f32, - aspect_ratio: f32, - time: f32, - beat: f32, - audio_intensity: f32, -}; +#include "common_uniforms" @group(0) @binding(0) var<storage, read_write> particles: array<Particle>; @group(0) @binding(1) var<uniform> uniforms: CommonUniforms; diff --git a/assets/final/shaders/particle_render.wgsl b/assets/final/shaders/particle_render.wgsl index 9030a3a..6a2b636 100644 --- a/assets/final/shaders/particle_render.wgsl +++ b/assets/final/shaders/particle_render.wgsl @@ -5,15 +5,7 @@ struct Particle { color: vec4<f32>, }; -struct CommonUniforms { - resolution: vec2<f32>, - _pad0: f32, - _pad1: f32, - aspect_ratio: f32, - time: f32, - beat: f32, - audio_intensity: f32, -}; +#include "common_uniforms" @group(0) @binding(0) var<storage, read> particles: array<Particle>; @group(0) @binding(1) var<uniform> uniforms: CommonUniforms; diff --git a/assets/final/shaders/particle_spray_compute.wgsl b/assets/final/shaders/particle_spray_compute.wgsl index b165971..a4041f2 100644 --- a/assets/final/shaders/particle_spray_compute.wgsl +++ b/assets/final/shaders/particle_spray_compute.wgsl @@ -5,15 +5,7 @@ struct Particle { color: vec4<f32>, }; -struct CommonUniforms { - resolution: vec2<f32>, - _pad0: f32, - _pad1: f32, - aspect_ratio: f32, - time: f32, - beat: f32, - audio_intensity: f32, -}; +#include "common_uniforms" @group(0) @binding(0) var<storage, read_write> particles: array<Particle>; @group(0) @binding(1) var<uniform> uniforms: CommonUniforms; diff --git a/assets/final/shaders/passthrough.wgsl b/assets/final/shaders/passthrough.wgsl index dfdacf4..266e231 100644 --- a/assets/final/shaders/passthrough.wgsl +++ b/assets/final/shaders/passthrough.wgsl @@ -1,15 +1,7 @@ @group(0) @binding(0) var smplr: sampler; @group(0) @binding(1) var txt: texture_2d<f32>; -struct CommonUniforms { - resolution: vec2<f32>, - _pad0: f32, - _pad1: f32, - aspect_ratio: f32, - time: f32, - beat: f32, - audio_intensity: f32, -}; +#include "common_uniforms" @group(0) @binding(2) var<uniform> uniforms: CommonUniforms; @vertex fn vs_main(@builtin(vertex_index) i: u32) -> @builtin(position) vec4<f32> { diff --git a/assets/final/shaders/solarize.wgsl b/assets/final/shaders/solarize.wgsl index 645fb9a..de15dfc 100644 --- a/assets/final/shaders/solarize.wgsl +++ b/assets/final/shaders/solarize.wgsl @@ -1,15 +1,7 @@ @group(0) @binding(0) var smplr: sampler; @group(0) @binding(1) var txt: texture_2d<f32>; -struct CommonUniforms { - resolution: vec2<f32>, - _pad0: f32, - _pad1: f32, - aspect_ratio: f32, - time: f32, - beat: f32, - audio_intensity: f32, -}; +#include "common_uniforms" @group(0) @binding(2) var<uniform> uniforms: CommonUniforms; diff --git a/assets/final/shaders/vignette.wgsl b/assets/final/shaders/vignette.wgsl index 4b096d7..b129883 100644 --- a/assets/final/shaders/vignette.wgsl +++ b/assets/final/shaders/vignette.wgsl @@ -1,20 +1,14 @@ @group(0) @binding(0) var input_sampler: sampler; @group(0) @binding(1) var input_tex: texture_2d<f32>; -struct CommonUniforms { - resolution: vec2<f32>, - _pad0: f32, - _pad1: f32, - aspect_ratio: f32, - time: f32, - beat: f32, - audio_intensity: f32, -};struct EffectParams { +#include "common_uniforms" + +struct VignetteParams { radius: f32, softness: f32, }; @group(0) @binding(2) var<uniform> common_uniforms: CommonUniforms; -@group(0) @binding(3) var<uniform> params: EffectParams; +@group(0) @binding(3) var<uniform> params: VignetteParams; @vertex fn vs_main(@builtin(vertex_index) vertex_idx: u32) -> @builtin(position) vec4<f32> { diff --git a/assets/final/test_demo_assets.txt b/assets/final/test_demo_assets.txt index d679237..dec8625 100644 --- a/assets/final/test_demo_assets.txt +++ b/assets/final/test_demo_assets.txt @@ -1,3 +1,3 @@ -KICK_1, NONE, KICK_606.spec, "606 Kick" -SNARE_1, NONE, SNARE_808.spec, "808 Snare" -CRASH_1, NONE, CRASH_DMX.spec, "DMX Crash" +KICK_1, NONE, music/KICK_606.spec, "606 Kick" +SNARE_1, NONE, music/SNARE_808.spec, "808 Snare" +CRASH_1, NONE, music/CRASH_DMX.spec, "DMX Crash" diff --git a/doc/ARCHITECTURE.md b/doc/ARCHITECTURE.md new file mode 100644 index 0000000..1a32300 --- /dev/null +++ b/doc/ARCHITECTURE.md @@ -0,0 +1,60 @@ +# Architectural Overview + +Detailed system architecture for the 64k demo project. + +--- + +## Hybrid 3D Renderer + +**Core Idea**: Uses standard rasterization to draw proxy hulls (boxes), then raymarches inside the fragment shader to find the exact SDF surface. + +**Transforms**: Uses `inv_model` matrices to perform all raymarching in local object space, handling rotation and non-uniform scaling correctly. + +**Shadows**: Instance-based shadow casting with self-shadowing prevention (`skip_idx`). + +--- + +## Sequence & Effect System + +**Effect**: Abstract base for visual elements. Supports `compute` and `render` phases. + +**Sequence**: Timeline of effects with start/end times. + +**MainSequence**: Top-level coordinator and framebuffer manager. + +**seq_compiler**: Transpiles `assets/demo.seq` into C++ `timeline.cc`. + +--- + +## Asset & Build System + +**asset_packer**: Embeds binary assets (like `.spec` files) into C++ arrays. + +**Runtime Manager**: O(1) retrieval with lazy procedural generation support. + +**Automation**: `gen_assets.sh`, `build_win.sh`, and `check_all.sh` for multi-platform validation. + +--- + +## Audio Engine + +### Synthesis +Real-time additive synthesis from spectrograms via FFT-based IDCT (O(N log N)). Stereo output (32kHz, 16-bit, interleaved L/R). Uses orthonormal DCT-II/DCT-III transforms with Numerical Recipes reordering method. + +### Variable Tempo +Music time abstraction with configurable tempo_scale. Tempo changes don't affect pitch. + +### Event-Based Tracker +Individual TrackerEvents trigger as separate voices with dynamic beat calculation. Notes within patterns respect tempo scaling. + +### Backend Abstraction +`AudioBackend` interface with `MiniaudioBackend` (production), `MockAudioBackend` (testing), and `WavDumpBackend` (offline rendering). + +### Dynamic Updates +Double-buffered spectrograms for live thread-safe updates. + +### Procedural Library +Melodies and spectral filters (noise, comb) generated at runtime. + +### Pattern System +TrackerPatterns contain lists of TrackerEvents (beat, sample_id, volume, pan). Events trigger individually based on elapsed music time. diff --git a/doc/BACKLOG.md b/doc/BACKLOG.md new file mode 100644 index 0000000..403ecc9 --- /dev/null +++ b/doc/BACKLOG.md @@ -0,0 +1,197 @@ +# Future Goals & Ideas (Untriaged) + +This file contains low-priority tasks and ideas that have not yet been triaged for active development. + +--- + +## Audio Tools + +### Task #64: specplay Enhancements +Extend audio analysis tool with new features: +- **Priority 1**: Spectral visualization (ASCII art), waveform display, frequency analysis, dynamic range +- **Priority 2**: Diff mode (compare .wav vs .spec), batch mode (CSV report, find clipping) +- **Priority 3**: WAV export (.spec → .wav), normalization +- **Priority 4**: Spectral envelope, harmonic analysis, onset detection +- **Priority 5**: Interactive mode (seek, loop, volume control) + +See `tools/specplay_README.md` for detailed feature list. + +### Task #65: Data-Driven Tempo Control +Move tempo variation from code to data files. + +**Current**: `g_tempo_scale` is hardcoded in `main.cc` with manual animation curves + +**Goal**: Define tempo curves in `.seq` or `.track` files + +**Approach A**: Add TEMPO directive to `.seq` format +- Example: `TEMPO 0.0 1.0`, `TEMPO 10.0 2.0`, `TEMPO 20.0 1.0` +- seq_compiler generates tempo curve array in timeline.cc + +**Approach B**: Add tempo column to music.track +- Each pattern trigger can specify tempo_scale override +- tracker_compiler generates tempo events in music_data.cc + +**Benefits**: Non-programmers can edit tempo, easier iteration, version control friendly + +**Priority**: Low (current approach works) + +### Task #67: DCT/FFT Performance Benchmarking +Add timing measurements to audio tests. + +**Goal**: Compare performance of different DCT/IDCT implementations + +**Location**: Add timing code to `test_dct.cc` or `test_fft.cc` + +**Measurements**: +- Reference IDCT/FDCT (naive O(N²)) +- FFT-based DCT/IDCT (current O(N log N)) +- Future x86_64 SIMD-optimized versions + +**Output Format**: +- Average time per transform (microseconds) +- Throughput (transforms per second) +- Speedup factor vs reference + +**Test Sizes**: DCT_SIZE=512 (production), plus 128, 256, 1024 for scaling + +**Implementation**: +- Use `std::chrono::high_resolution_clock` +- Run 1000+ iterations to reduce noise +- Report min/avg/max times +- Guard with `#if !defined(STRIP_ALL)` + +**Priority**: Very Low (nice-to-have) + +### Task #69: Convert Audio Pipeline to Clipped Int16 +Use clipped int16 for all audio processing. + +**Current**: Float32 throughout (generation, mixing, synthesis, output) + +**Goal**: Convert to int16 for faster processing and reduced memory + +**Rationale**: +- Simpler arithmetic (no float operations) +- Smaller memory footprint (2 bytes vs 4 bytes) +- Hardware-native format (most audio devices use int16) +- Eliminates float→int16 conversion at output +- Natural clipping behavior + +**Scope**: +- Output path: Definitely convert (backends, WAV dump) +- Synthesis: Consider keeping float32 for quality +- Mixing: Could use int16 with overflow handling +- Asset storage: Already int16 in .spec files + +**Implementation Phases**: +1. **Phase 1: Output Only** (~50 lines) - Convert `synth_render()` output to int16 +2. **Phase 2: Mixing Stage** (~200 lines) - Convert voice mixing to int16 arithmetic +3. **Phase 3: Full Pipeline** (~500+ lines) - Convert spectrograms to int16 storage + +**Trade-offs**: +- Quality loss: 16-bit vs 32-bit float precision +- Dynamic range: Limited to [-32768, 32767] +- Clipping: Must handle overflow carefully +- Code complexity: Saturation arithmetic + +**Testing Requirements**: +- Verify no audible quality degradation +- Ensure clipping behavior matches float version +- Check mixing overflow doesn't cause artifacts +- Validate WAV dumps bit-identical + +**Size Impact**: +- Phase 1: Negligible (~50 bytes) +- Phase 2: ~100-200 bytes +- Phase 3: 50% memory, ~1-2KB code savings + +**Priority**: Low (final optimization only if 64k budget requires it) + +**Notes**: Quality must be validated - may not be worth trade-off + +--- + +## Developer Tools + +### Task #66: External Asset Loading for Debugging +mmap() asset files instead of embedded data. + +**Current**: All assets embedded in `assets_data.cc` (regenerate on every change) + +**Goal**: Load assets from external files in debug builds for faster iteration + +**Scope**: macOS only, non-STRIP_ALL builds only + +**Implementation**: +- Add `DEMO_ENABLE_EXTERNAL_ASSETS` CMake option +- Modify `GetAsset()` to check for external file first (e.g., `assets/final/<name>`) +- Use `mmap()` to map file into memory +- Fallback to embedded data if file not found + +**Benefits**: Edit shaders/assets without regenerating assets_data.cc (~10s rebuild) + +**Trade-offs**: Adds runtime file I/O, only useful during development + +**Priority**: Low (current workflow acceptable) + +--- + +## Visual Effects + +### Task #73: Extend Shader Parametrization [IN PROGRESS - 2/4 complete] +Extend uniform parameter system to remaining effects. + +**Goal**: Add parametrization to DistortEffect, SolarizeEffect + +**Pattern**: Follow FlashEffect implementation (UniformHelper, params struct, .seq syntax) + +**Completed**: ChromaAberrationEffect (offset_scale, angle), GaussianBlurEffect (strength) + +**Priority**: Medium (quality-of-life for artists) + +**Estimated Impact**: ~200-300 bytes per effect + +### Task #52: Procedural SDF Font +Minimal bezier/spline set for [A-Z, 0-9] and SDF rendering. + +### Task #55: SDF Random Planes Intersection +Implement `sdPolyhedron` (crystal/gem shapes) via plane intersection. + +### Task #54: Tracy Integration +Integrate Tracy debugger for performance profiling. + +### Task #58: Advanced Shader Factorization +Further factorize WGSL code into smaller, reusable snippets. + +### Task #59: Comprehensive RNG Library +Add WGSL snippets for float/vec2/vec3 noise (Perlin, Gyroid, etc.) and random number generators. + +### Task #60: OOP Refactoring +Investigate if more C++ code can be made object-oriented without size penalty (vs functional style). + +### Task #61: GPU Procedural Generation +Implement system to generate procedural data (textures, geometry) on GPU and read back to CPU. + +### Task #62: Physics Engine Enhancements (PBD & Rotation) +- **Task #62.1**: Quaternion rotation for `Object3D` with angular momentum +- **Task #62.2**: Position Based Dynamics (PBD) - Re-evaluate velocity after resolving collisions/constraints + +### Task #63: Refactor Large Files +Split `src/gpu/gpu.cc`, `src/3d/visual_debug.cc` and `src/gpu/effect.cc` into sub-functionalities. + +--- + +## Performance Optimization + +### Task #70: SIMD x86_64 Implementation +Implement critical functions using intrinsics for x86_64 platforms. + +**Goal**: Optimize hot paths for audio and procedural generation + +**Scope**: +- IDCT/FDCT transforms +- Audio mixing and voice synthesis +- CPU-side procedural texture/geometry generation + +**Constraint**: Non-critical; fallback to generic C++ must be maintained + +**Priority**: Very Low diff --git a/doc/CODING_STYLE.md b/doc/CODING_STYLE.md new file mode 100644 index 0000000..533cffb --- /dev/null +++ b/doc/CODING_STYLE.md @@ -0,0 +1,109 @@ +# Coding Style Examples + +Detailed examples for the project's C++ coding style. + +--- + +## Core Rules Examples + +### Const Placement +```cpp +const T* name // Correct +const T *name // Wrong +``` + +### Pre-Increment +```cpp +++x // Correct +x++ // Wrong (except when postfix needed) +``` + +### Operator Spacing +```cpp +x = (a + b) * c; // Correct - spaces around all operators +x=(a+b)*c; // Wrong - no spaces +``` + +### No Auto (except complex iterators) +```cpp +int count = get_count(); // Correct +auto count = get_count(); // Wrong + +for (auto it = map.begin(); ...) // OK - complex iterator type +``` + +### No C++ Casts +```cpp +(int)value // Correct +static_cast<int>(value) // Wrong +``` + +--- + +## Preprocessor Style + +```cpp +#if defined(MY_TAG) + // code here +#endif /* defined(MY_TAG) */ +``` + +Always use `defined()` and closing comment. + +--- + +## Struct Initialization + +### Good +```cpp +const WGPUDescriptor desc = { + .format = g_format, + .dimension = WGPUTextureViewDimension_2D, +}; +``` + +### Bad +```cpp +WGPUDescriptor desc = {}; +desc.format = g_format; +desc.dimension = WGPUTextureViewDimension_2D; +``` + +Use designated initializers, not field-by-field assignment. + +--- + +## Class Keywords Indentation + +```cpp +class MyClass { + public: // 1 space indent + void foo(); + + private: // 1 space indent + int field_; +}; +``` + +--- + +## Comments + +### Function Comments +```cpp +// Initializes the audio engine with default settings. +void audio_init() { + ... +} +``` + +One-line comment for non-obvious functions. + +### File Headers +```cpp +// demo64k - 64 kilobyte demo +// src/audio/synth.cc +// Audio synthesis engine +``` + +Three-line header for all source files. diff --git a/doc/COMPLETED.md b/doc/COMPLETED.md index a3c173d..49cfbe9 100644 --- a/doc/COMPLETED.md +++ b/doc/COMPLETED.md @@ -29,7 +29,34 @@ Detailed historical documents have been moved to `doc/archive/` for reference: Use `read @doc/archive/FILENAME.md` to access archived documents. -## Recently Completed (February 8, 2026) +## Recently Completed (February 9, 2026) + +- [x] **WGSL Uniform Buffer Validation & Consolidation (Task #75)** + - **Goal**: Standardize uniform buffer usage across all post-process effects and add validation tooling + - **Implementation**: + - Refactored `DistortEffect` and others to use `CommonPostProcessUniforms` (binding 2) + `EffectParams` (binding 3) + - Created `tools/validate_uniforms.py` to parse C++ and WGSL (including embedded strings) and verify size/alignment + - Added validation step to CMake build system + - Renamed generic `EffectParams` to specific names (`FadeParams`, `CircleMaskParams`, etc.) in WGSL and C++ + - Added `doc/UNIFORM_BUFFER_GUIDELINES.md` and updated `CONTRIBUTING.md` + - **Result**: Consistent binding layout across all effects, automatic validation on build + +- [x] **Uniform Buffer Alignment (Task #74)** + - **Goal**: Fix WGSL struct alignment issues causing validation errors and crashes + - **Implementation**: + - `circle_mask_compute.wgsl`: Changed `_pad: vec3<f32>` to three `f32` fields for correct 16-byte alignment + - `fade_effect.cc`: Changed EffectParams padding from `vec3<f32>` to `_pad0/1/2: f32` + - `theme_modulation_effect.cc`: Same padding fix for EffectParams + - Fixed ODR violation in `demo_effects.h` (incomplete FadeEffect forward declaration) + - Renamed shadowing `uniforms_` members to `common_uniforms_`/`flash_uniforms_` + - **Result**: demo64k runs without crashes, 32/33 tests passing (97%), 0 WebGPU validation errors + +- [x] **Fix test_demo Black Screen** + - **Issue**: `test_demo` showed black screen because it failed to load its timeline sequence (`assets/test_demo.seq`) + - **Fix**: Added missing `LoadTimeline` call in `src/test_demo.cc` + - **Result**: `FlashEffect` and `PeakMeterEffect` now render correctly + +## Previously Completed (February 8, 2026) - [x] **Shader Parametrization System (Task #73 Phase 0)** (February 8, 2026) - **Goal**: Enable per-frame dynamic parameters for shaders and effects via uniform buffers and .seq syntax diff --git a/doc/CONTRIBUTING.md b/doc/CONTRIBUTING.md index 3a09dbc..de6378a 100644 --- a/doc/CONTRIBUTING.md +++ b/doc/CONTRIBUTING.md @@ -1,5 +1,7 @@ # Contributing Guidelines +--- + ## Commit Policy ### Verify Before Committing @@ -8,7 +10,6 @@ ```bash ./scripts/check_all.sh ``` -Runs tests, builds tools, cross-compiles Windows. **Manual:** ```bash @@ -26,18 +27,9 @@ cd build && ctest --output-on-failure cmake -S . -B build_debug_check -DDEMO_ENABLE_DEBUG_LOGS=ON cmake --build build_debug_check -j4 ``` -Must compile without errors. **Debug macros** (`src/util/debug.h`): -- `DEBUG_LOG_AUDIO`, `DEBUG_LOG_RING_BUFFER`, `DEBUG_LOG_TRACKER` -- `DEBUG_LOG_SYNTH`, `DEBUG_LOG_3D`, `DEBUG_LOG_ASSETS`, `DEBUG_LOG_GPU` - -Example: -```cpp -#if defined(DEBUG_LOG_AUDIO) - DEBUG_AUDIO("[CALLBACK #%d] frames=%d\n", ++count, frames); -#endif -``` +- `DEBUG_LOG_AUDIO`, `DEBUG_LOG_RING_BUFFER`, `DEBUG_LOG_TRACKER`, `DEBUG_LOG_SYNTH`, `DEBUG_LOG_3D`, `DEBUG_LOG_ASSETS`, `DEBUG_LOG_GPU` ### Code Formatting ```bash @@ -50,6 +42,8 @@ Never format `third_party/`. - 3-line header comment - Max 500 lines (split if larger) +--- + ## Coding Style ### Core Rules @@ -61,36 +55,9 @@ Never format `third_party/`. - No `auto` (except complex iterators) - No C++ casts (`static_cast`, `reinterpret_cast`) -### Preprocessor -```cpp -#if defined(MY_TAG) - ... -#endif /* defined(MY_TAG) */ -``` +See `doc/CODING_STYLE.md` for detailed examples. -### Struct Initialization -```cpp -// Good -const WGPUDescriptor desc = { - .format = g_format, - .dimension = WGPUTextureViewDimension_2D, -}; - -// Bad -WGPUDescriptor desc = {}; -desc.format = g_format; -desc.dimension = WGPUTextureViewDimension_2D; -``` - -### Class Keywords -```cpp - private: // 1 space indent - int field_; -``` - -### Comments -- 1-line comment for non-obvious functions -- 3-line header for all source files +--- ## Development Protocols @@ -170,4 +137,18 @@ After hierarchy changes (moving files, renaming), verify: ./scripts/gen_coverage_report.sh ``` -Update scripts with hardcoded paths. +--- + +## Uniform Buffer Checklist + +To ensure consistency and prevent alignment-related issues: + +1. **Define WGSL Structs:** Pay attention to type alignment (`f32`, `vec2`, `vec3`, `vec4`) and use explicit padding where necessary. +2. **Mirror in C++:** Create corresponding C++ structs that mirror WGSL definitions. +3. **`static_assert` for Size:** Every C++ struct must have a `static_assert` verifying size matches WGSL. +4. **Standard Bindings:** + - **Binding 2:** Always use `CommonPostProcessUniforms` for per-frame data (resolution, time, beat). + - **Binding 3:** Use effect-specific parameter structs for unique data. +5. **Shader Consistency:** Ensure WGSL shaders correctly declare uniforms at specified bindings. +6. **Validation Script:** Run `tools/validate_uniforms.py` to catch discrepancies. +7. **Documentation:** Refer to `doc/UNIFORM_BUFFER_GUIDELINES.md` for detailed alignment rules. diff --git a/doc/GPU_PROCEDURAL_PHASE4.md b/doc/GPU_PROCEDURAL_PHASE4.md new file mode 100644 index 0000000..4cfc271 --- /dev/null +++ b/doc/GPU_PROCEDURAL_PHASE4.md @@ -0,0 +1,70 @@ +# GPU Procedural Phase 4: Texture Composition + +**Status:** ✅ Complete + +## Implementation + +Multi-input composite shaders with configurable sampler support. + +### API + +```cpp +enum class SamplerType { + LinearClamp, LinearRepeat, NearestClamp, NearestRepeat +}; + +void create_gpu_composite_texture( + const std::string& name, + const std::string& shader_func, + const char* shader_code, + const void* uniform_data, + size_t uniform_size, + int width, int height, + const std::vector<std::string>& input_names, + SamplerType sampler = SamplerType::LinearClamp); +``` + +### Shaders + +**gen_blend.wgsl** - Blend two textures with lerp factor: +- Bindings: output (0), uniform (1), input_a (2), input_b (3), sampler (4) +- Uniform: `{u32 width, height; f32 blend_factor, _pad0}` + +**gen_mask.wgsl** - Multiply textures (masking): +- Bindings: output (0), uniform (1), input_a (2), input_b (3), sampler (4) +- Uniform: `{u32 width, height}` + +### Usage + +```cpp +extern const char* gen_blend_compute_wgsl; + +struct { uint32_t width, height; float blend_factor, _pad0; } uni = {256, 256, 0.5f, 0.0f}; + +tex_mgr.create_gpu_composite_texture( + "blended", "gen_blend", gen_blend_compute_wgsl, + &uni, sizeof(uni), 256, 256, + {"noise_a", "noise_b"}, + SamplerType::LinearClamp); +``` + +### Features + +- **Dynamic bind groups:** N input textures + 1 sampler +- **Lazy sampler creation:** Map-based cache, 4 preset types +- **Multi-stage composition:** Composite of composites supported +- **Guarded with `#if !defined(STRIP_GPU_COMPOSITE)`** + +### Size Impact + +- Code: ~460 lines added +- Compressed: ~830 bytes (2 shaders + dispatch logic) + +### Tests + +`test_gpu_composite.cc`: +- Blend two noise textures +- Mask noise with grid +- Multi-stage composite (composite of composites) + +All 35 tests passing. diff --git a/doc/HOWTO.md b/doc/HOWTO.md index 967b554..876d7dc 100644 --- a/doc/HOWTO.md +++ b/doc/HOWTO.md @@ -2,6 +2,8 @@ Common commands for building and testing. +--- + ## Building ### Debug Build @@ -11,10 +13,7 @@ cmake --build build -j4 ./build/demo64k ``` -Options: -- `--fullscreen`: Run in fullscreen -- `--resolution WxH`: Set window size (e.g., 1024x768) -- `--seek TIME`: Jump to timestamp (debug builds only) +Options: `--fullscreen`, `--resolution WxH`, `--seek TIME` (debug only) Keyboard: `Esc` (exit), `F` (toggle fullscreen) @@ -45,27 +44,34 @@ cmake --build build_final -j4 - STRIP_ALL: Full checks, no debug (~64k target) - FINAL_STRIP: No checks, no debug (absolute minimum) -### Developer Build +### Developer Build (Tests + Tools) ```bash -cmake -S . -B build -DDEMO_ALL_OPTIONS=ON +cmake -S . -B build -DDEMO_BUILD_TESTS=ON -DDEMO_BUILD_TOOLS=ON cmake --build build -j4 ``` -Enables tests, tools, size optimizations. + +**Note:** `DEMO_ALL_OPTIONS=ON` enables tests, tools, AND `STRIP_ALL`, which removes debug-only code. Use selective flags for debugging. + +--- ## Build System -**Dependency Tracking**: CMake tracks 42 demo + 17 test assets. Editing shaders/audio auto-triggers rebuild. +**Dependency Tracking:** CMake tracks 42 demo + 17 test assets. Editing shaders/audio auto-triggers rebuild. -**Header Organization**: +**Header Organization:** - `asset_manager_dcl.h`: Forward declarations - `asset_manager.h`: Core API (GetAsset/DropAsset) - `asset_manager_utils.h`: Typed helpers +--- + ## Git Clone ```bash git clone ssh://git@51.38.51.127/~/demo.git ``` +--- + ## Audio System ### AudioEngine API @@ -90,10 +96,7 @@ audio_shutdown(); - `seek(time)`: Jump to timestamp (debug only) **Direct Synth APIs** (performance-critical): -- `synth_register_spectrogram()`: Register samples -- `synth_trigger_voice()`: Trigger playback -- `synth_get_output_peak()`: Get audio level -- `synth_render()`: Low-level rendering +- `synth_register_spectrogram()`, `synth_trigger_voice()`, `synth_get_output_peak()`, `synth_render()` **Testing:** ```cpp @@ -103,6 +106,8 @@ engine.update(1.0f); engine.shutdown(); ``` +--- + ## Auxiliary Texture Masking Share textures between effects: @@ -116,6 +121,8 @@ WGPUTextureView view = demo_->get_auxiliary_view("mask_name"); ``` See `doc/MASKING_SYSTEM.md` for details. +--- + ## Demo Timeline Edit `assets/demo.seq`: @@ -125,6 +132,8 @@ SEQUENCE 0.0 0 ``` Rebuild to update timeline. +--- + ## Testing **Run all tests:** @@ -140,56 +149,7 @@ cd build && ctest - `SynthEngineTest`: Audio synthesis - `SequenceSystemTest`: Timeline logic -## Code Coverage (macOS) -```bash -brew install lcov -./scripts/gen_coverage_report.sh [target_dir] -``` - -## Tools - -### Windows Cross-Compilation -```bash -./scripts/fetch_win_deps.sh -./scripts/build_win.sh -./scripts/run_win.sh -``` - -### spectool (Audio Analysis) -```bash -cmake -S . -B build -DDEMO_BUILD_TOOLS=ON -cmake --build build -j4 - -# Analyze -./build/spectool analyze input.wav output.spec - -# Play -./build/spectool play input.spec -``` - -### specview (Visualization) -```bash -./build/specview input.spec -``` - -### specplay (Diagnostic) -```bash -./build/specplay input.spec -# or -./build/specplay input.wav -``` -Output: Peak, RMS, clipping detection. - -### Submodule Updates -```bash -cd third_party/wgpu-native -git fetch -git checkout trunk -git reset --hard origin/trunk -cd ../.. -git add third_party/wgpu-native -git commit -m "chore: Update wgpu-native" -``` +--- ## Asset Management @@ -216,3 +176,7 @@ const uint8_t* data = GetAsset(AssetId::KICK_1, &size); ``` Build system auto-runs `asset_packer` when asset lists change. + +--- + +For developer tools reference (spectool, Windows cross-compilation, code coverage), see `doc/TOOLS_REFERENCE.md`. diff --git a/doc/RECIPE.md b/doc/RECIPE.md new file mode 100644 index 0000000..6404391 --- /dev/null +++ b/doc/RECIPE.md @@ -0,0 +1,202 @@ +# Recipe: Common Patterns + +Quick reference for implementing common patterns in the demo codebase. + +## Runtime Shader Composition + +Use `ShaderComposer` to dynamically assemble shaders from snippets. + +**Pattern:** +```cpp +#include "gpu/effects/shader_composer.h" +#include "generated/assets.h" + +// 1. Load base shader template from asset +size_t shader_size; +const char* shader_code = + (const char*)GetAsset(AssetId::MY_SHADER_TEMPLATE, &shader_size); + +// 2. Define substitutions for dynamic parts +ShaderComposer::CompositionMap composition_map; +composition_map["placeholder_name"] = "actual_snippet_name"; +composition_map["fragment_main"] = "plasma_shader"; // Example + +// 3. Compose final shader +std::string composed_shader = ShaderComposer::Get().Compose( + {}, // Optional: explicit dependencies + std::string(shader_code, shader_size), + composition_map); + +// 4. Create shader module +WGPUShaderSourceWGSL wgsl_src = {}; +wgsl_src.chain.sType = WGPUSType_ShaderSourceWGSL; +wgsl_src.code = str_view(composed_shader.c_str()); + +WGPUShaderModuleDescriptor shader_desc = {}; +shader_desc.nextInChain = &wgsl_src.chain; +WGPUShaderModule shader_module = + wgpuDeviceCreateShaderModule(ctx_.device, &shader_desc); +``` + +**Base shader template (WGSL asset):** +```wgsl +// Common bindings +@group(0) @binding(0) var<uniform> uniforms: CommonUniforms; +@group(0) @binding(1) var tex_sampler: sampler; + +// Placeholder for dynamic fragment code +#include "fragment_main" + +@fragment +fn fs_main(@location(0) uv: vec2<f32>) -> @location(0) vec4<f32> { + return compute_color(uv); // Implemented by included snippet +} +``` + +**Register snippets at startup:** +```cpp +ShaderComposer::Get().RegisterSnippet("plasma_shader", R"( +fn compute_color(uv: vec2<f32>) -> vec4<f32> { + let t = uniforms.time; + return vec4(sin(uv.x * 10.0 + t), cos(uv.y * 10.0 + t), 0.5, 1.0); +} +)"); + +ShaderComposer::Get().RegisterSnippet("tunnel_shader", R"( +fn compute_color(uv: vec2<f32>) -> vec4<f32> { + let r = length(uv - vec2(0.5)); + return vec4(vec3(1.0 / r), 1.0); +} +)"); +``` + +**Example usage:** `src/gpu/effects/rotating_cube_effect.cc:72-75` + +## QuadEffect with Auxiliary Textures + +Full-screen quad effect with access to previous framebuffer + side textures. + +**Binding layout:** +``` +@group(0) @binding(0) - Previous framebuffer texture +@group(0) @binding(1) - Sampler +@group(0) @binding(2) - CommonPostProcessUniforms +@group(0) @binding(3) - Effect-specific params +@group(0) @binding(4+) - Auxiliary textures (optional) +``` + +**Access auxiliary texture:** +```cpp +// In effect init() +WGPUTextureView aux_view = demo_->get_auxiliary_view("mask_name"); + +// Bind to binding 4 +const WGPUBindGroupEntry entries[] = { + {.binding = 0, .textureView = prev_frame_view}, + {.binding = 1, .sampler = sampler}, + {.binding = 2, .buffer = common_uniforms}, + {.binding = 3, .buffer = effect_params}, + {.binding = 4, .textureView = aux_view}, // Side texture +}; +``` + +**WGSL shader:** +```wgsl +@group(0) @binding(0) var prev_frame: texture_2d<f32>; +@group(0) @binding(1) var tex_sampler: sampler; +@group(0) @binding(2) var<uniform> common: CommonPostProcessUniforms; +@group(0) @binding(3) var<uniform> params: EffectParams; +@group(0) @binding(4) var aux_texture: texture_2d<f32>; + +@fragment +fn fs_main(@location(0) uv: vec2<f32>) -> @location(0) vec4<f32> { + let prev = textureSample(prev_frame, tex_sampler, uv); + let mask = textureSample(aux_texture, tex_sampler, uv); + return mix(prev, compute_effect(uv), mask.r); +} +``` + +## Dynamic Effect Parameters + +Use `UniformHelper` for .seq-controllable parameters. + +**C++ param struct:** +```cpp +struct MyEffectParams { + float strength; + float speed; + float _pad0; + float _pad1; +}; +static_assert(sizeof(MyEffectParams) == 16); + +class MyEffect : public Effect { + private: + UniformHelper<MyEffectParams> params_; +}; +``` + +**Effect init:** +```cpp +void MyEffect::init(MainSequence* demo) { + params_.init(ctx_.device); + params_.get().strength = 1.0f; + params_.get().speed = 2.0f; +} +``` + +**Update per frame:** +```cpp +void MyEffect::render(WGPUTextureView prev, WGPUTextureView target, + float beat, const EffectParams* ep) { + params_.apply_optional(ep); // Updates from .seq + params_.upload(ctx_.queue); + // ... render pass +} +``` + +**.seq syntax:** +``` +EFFECT MyEffect 0.0 10.0 strength=0.5 speed=3.0 +EFFECT MyEffect 10.0 20.0 strength=2.0 # speed keeps previous value +``` + +**Example:** `src/gpu/effects/flash_effect.cc`, `src/gpu/effects/chroma_aberration_effect.cc` + +## Uniform Buffer Alignment + +**WGSL padding rules:** +- `vec3<f32>` requires 16-byte alignment (use padding or switch to `vec4`) +- Use three `f32` fields instead of single `vec3` when possible + +**Correct patterns:** +```cpp +// Option 1: Explicit padding +struct MyUniforms { + vec3<f32> color; + f32 _pad0; + vec2<f32> offset; + f32 _pad1; + f32 _pad2; +}; + +// Option 2: Avoid vec3 +struct MyUniforms { + f32 color_r; + f32 color_g; + f32 color_b; + f32 intensity; + vec2<f32> offset; + f32 _pad0; + f32 _pad1; +}; +``` + +**Verification:** +```cpp +static_assert(sizeof(MyUniforms) == EXPECTED_SIZE); +``` + +**Validation:** Run `tools/validate_uniforms.py` before commit. + +**Reference:** `doc/UNIFORM_BUFFER_GUIDELINES.md` diff --git a/doc/TOOLS_REFERENCE.md b/doc/TOOLS_REFERENCE.md new file mode 100644 index 0000000..61412a9 --- /dev/null +++ b/doc/TOOLS_REFERENCE.md @@ -0,0 +1,89 @@ +# Developer Tools Reference + +Comprehensive reference for all developer tools in the project. + +--- + +## Windows Cross-Compilation + +```bash +# Fetch dependencies +./scripts/fetch_win_deps.sh + +# Build Windows binary +./scripts/build_win.sh + +# Run with Wine +./scripts/run_win.sh +``` + +--- + +## spectool (Audio Analysis) + +```bash +# Build +cmake -S . -B build -DDEMO_BUILD_TOOLS=ON +cmake --build build -j4 + +# Analyze WAV → .spec +./build/spectool analyze input.wav output.spec + +# Play .spec file +./build/spectool play input.spec +``` + +--- + +## specview (Visualization) + +```bash +# View spectrogram +./build/specview input.spec +``` + +Displays spectrogram visualization. + +--- + +## specplay (Diagnostic) + +```bash +# Analyze .spec file +./build/specplay input.spec + +# Or analyze .wav file +./build/specplay input.wav +``` + +Output: Peak, RMS, clipping detection. + +--- + +## Code Coverage (macOS) + +```bash +# Install lcov +brew install lcov + +# Generate coverage report +./scripts/gen_coverage_report.sh [target_dir] +``` + +Creates HTML coverage report. + +--- + +## Submodule Updates + +```bash +cd third_party/wgpu-native +git fetch +git checkout trunk +git reset --hard origin/trunk +cd ../.. +git add third_party/wgpu-native +git commit -m "chore: Update wgpu-native" +``` + +Updates wgpu-native to latest trunk. diff --git a/doc/UNIFORM_BUFFER_GUIDELINES.md b/doc/UNIFORM_BUFFER_GUIDELINES.md new file mode 100644 index 0000000..ac02223 --- /dev/null +++ b/doc/UNIFORM_BUFFER_GUIDELINES.md @@ -0,0 +1,106 @@ +# WGSL Uniform Buffer Guidelines + +This document outlines the rules and best practices for defining and using uniform buffers in WGSL shaders within this project, focusing on alignment, size, and consistency. + +## WGSL Alignment Rules + +Understanding WGSL's memory layout rules is crucial for correct uniform buffer implementation. The following are the general alignment requirements for common WGSL types: + +- `f32`: 4-byte alignment. +- `vec2<f32>`: 8-byte alignment (4 bytes per component * 2 components = 8 bytes). +- `vec3<f32>`: 16-byte alignment (4 bytes per component * 3 components = 12 bytes, padded to 16). +- `vec4<f32>`: 16-byte alignment (4 bytes per component * 4 components = 16 bytes). +- `array<T, N>`: The alignment of an array is typically the alignment of its base type `T`. + +Structs are padded to the alignment of their largest member. Any trailing space in a struct is also padded to match the maximum alignment of any member within the struct. + +## Standard Uniform Buffer Pattern + +To maintain consistency and facilitate efficient rendering, a standard pattern for uniform buffer usage is established: + +- **Binding 0 & 1:** Reserved for Sampler and Texture access (handled by `pp_update_bind_group`). +- **Binding 2:** **Common Uniforms** (`CommonPostProcessUniforms` or similar). This buffer should contain frequently used data like resolution, aspect ratio, time, beat, and audio intensity. +- **Binding 3:** **Effect-Specific Parameters**. This buffer holds parameters unique to a particular effect (e.g., `strength`, `speed`, `fade_amount`). + +This pattern ensures that common data is shared efficiently across effects, while effect-specific data remains isolated. + +## Defining Uniform Structs + +### WGSL Definitions + +When defining uniform structs in WGSL, adhere to the following: + +- **Explicit Padding:** Use padding fields (`_pad0`, `_pad1`, etc.) where necessary to ensure correct alignment, especially when mixing types of different alignment requirements (e.g., `vec2<f32>` followed by `f32`s). +- **Use `vec2<f32>` for 8-byte padding:** If you need 8 bytes of padding, use `_pad0: vec2<f32>` instead of `_pad0: f32, _pad1: f32` for potentially better clarity and to leverage WGSL's type system. +- **Minimize Padding:** Only add padding where required by alignment rules to reduce memory usage. + +**Example (CommonPostProcessUniforms / HeptagonUniforms):** + +```wgsl +struct CommonUniforms { + resolution: vec2<f32>, + _pad0: vec2<f32>, // 8 bytes padding to align subsequent members + aspect_ratio: f32, + time: f32, + beat: f32, + audio_intensity: f32, +}; +// Expected size: 32 bytes +``` + +**Example (EffectParams with f32 members):** + +```wgsl +struct EffectParams { + parameter1: f32, + parameter2: f32, + // ... more parameters ... +}; +// Expected size: 8 bytes (if only two f32s) +``` + +### C++ Definitions and Validation + +For every WGSL uniform struct, a corresponding C++ struct must exist. This C++ struct must include a `static_assert` to verify its size and alignment matches the WGSL definition. + +- **Mirror WGSL Structure:** The C++ struct should mirror the WGSL struct's member order and types as closely as possible to ensure accurate size calculation. +- **`static_assert`:** Always include `static_assert(sizeof(MyStruct) == EXPECTED_SIZE, "MyStruct must be EXPECTED_SIZE bytes for WGSL alignment");`. +- **Use `float` for `f32`:** Use `float` for `f32` in C++. +- **Use `vec2<f32>` mapping:** If WGSL uses `vec2<f32>`, map it to an equivalent C++ type that occupies 8 bytes, typically `float[2]` or a `struct Vec2 { float x, y; }` if more complex type handling is needed. +- **Padding:** C++ padding rules can differ from WGSL. Pay close attention to `static_assert` for validation. + +**Example (C++ CommonPostProcessUniforms):** + +```cpp +struct CommonPostProcessUniforms { + vec2 resolution; // 8 bytes + float _pad[2]; // 8 bytes padding (matches vec2<f32> in WGSL) + float aspect_ratio; // 4 bytes + float time; // 4 bytes + float beat; // 4 bytes + float audio_intensity; // 4 bytes +}; +static_assert(sizeof(CommonPostProcessUniforms) == 32, + "CommonPostProcessUniforms must be 32 bytes for WGSL alignment"); +``` + +**Example (C++ GaussianBlurParams):** + +```cpp +struct GaussianBlurParams { + float strength = 2.0f; + float _pad = 0.0f; +}; +static_assert(sizeof(GaussianBlurParams) == 8, + "GaussianBlurParams must be 8 bytes for WGSL alignment"); +``` + +## Handling Common Pitfalls + +- **`vec3<f32>` Padding:** Avoid using `vec3<f32>` for padding in WGSL, as it has a 16-byte alignment. If padding is needed, use `vec2<f32>` for 8 bytes or individual `f32`s for 4-byte alignment. +- **C++ vs. WGSL Alignment:** Always rely on `static_assert` in C++ and verify against WGSL alignment rules. C++ padding rules might differ, and the `static_assert` is the ultimate arbiter. +- **Unmatched Structs:** Ensure every WGSL uniform struct has a corresponding C++ struct with a matching `static_assert`. + +## Validation Tool + +The `tools/validate_uniforms.py` script is integrated into the build system. It automatically checks for inconsistencies between WGSL and C++ uniform struct definitions and reports any size mismatches. Ensure this script passes for all new or modified uniform definitions. diff --git a/scripts/gen_spectrograms.sh b/scripts/gen_spectrograms.sh index a5c1510..3213787 100755 --- a/scripts/gen_spectrograms.sh +++ b/scripts/gen_spectrograms.sh @@ -7,7 +7,7 @@ set -euo pipefail # --- Configuration --- PROJECT_ROOT=$(git rev-parse --show-toplevel) SOURCE_DIR="${PROJECT_ROOT}/assets/originals" -DEST_DIR="${PROJECT_ROOT}/assets/final" +DEST_DIR="${PROJECT_ROOT}/assets/final/music" SPECTOOL_PATH="${PROJECT_ROOT}/build/spectool" TEMP_WAV_DIR=$(mktemp -d) diff --git a/src/3d/visual_debug.cc b/src/3d/visual_debug.cc index 77311f6..cd4ccce 100644 --- a/src/3d/visual_debug.cc +++ b/src/3d/visual_debug.cc @@ -26,7 +26,7 @@ void VisualDebug::init(WGPUDevice device, WGPUTextureFormat format) { WGPUBufferDescriptor ub_desc = {}; ub_desc.usage = WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst; - ub_desc.size = sizeof(mat4); + ub_desc.size = sizeof(GlobalUniforms); uniform_buffer_ = wgpuDeviceCreateBuffer(device_, &ub_desc); } @@ -340,9 +340,12 @@ void VisualDebug::add_trajectory(const std::vector<vec3>& points, } void VisualDebug::update_buffers(const mat4& view_proj) { - // Update Uniforms + // Update Uniforms - fill entire GlobalUniforms structure + GlobalUniforms uniforms = {}; + uniforms.view_proj = view_proj; + // Other fields zeroed (not used by visual debug shader) wgpuQueueWriteBuffer(wgpuDeviceGetQueue(device_), uniform_buffer_, 0, - &view_proj, sizeof(mat4)); + &uniforms, sizeof(GlobalUniforms)); // Update Vertices size_t required_size = lines_.size() * 2 * sizeof(float) * 6; @@ -385,7 +388,7 @@ void VisualDebug::update_buffers(const mat4& view_proj) { WGPUBindGroupEntry bg_entry = {}; bg_entry.binding = 0; bg_entry.buffer = uniform_buffer_; - bg_entry.size = sizeof(mat4); + bg_entry.size = sizeof(GlobalUniforms); WGPUBindGroupDescriptor bg_desc = {}; bg_desc.layout = bind_group_layout_; diff --git a/src/audio/audio.cc b/src/audio/audio.cc index 2f485a6..c5bd3d9 100644 --- a/src/audio/audio.cc +++ b/src/audio/audio.cc @@ -65,9 +65,11 @@ void audio_start() { g_audio_backend->start(); } -void audio_render_ahead(float music_time, float dt) { +void audio_render_ahead(float music_time, float dt, float target_fill) { // Target: maintain look-ahead buffer - const float target_lookahead = (float)RING_BUFFER_LOOKAHEAD_MS / 1000.0f; + const float target_lookahead = (target_fill < 0.0f) + ? (float)RING_BUFFER_LOOKAHEAD_MS / 1000.0f + : target_fill; // Render in small chunks to keep synth time synchronized with tracker // Chunk size: one frame's worth of audio (~16.6ms @ 60fps) diff --git a/src/audio/audio.h b/src/audio/audio.h index e063a57..778d312 100644 --- a/src/audio/audio.h +++ b/src/audio/audio.h @@ -24,7 +24,8 @@ void audio_init(); void audio_start(); // Starts the audio device callback // Ring buffer audio rendering (main thread fills buffer) -void audio_render_ahead(float music_time, float dt); +// target_fill: Target buffer fill time in seconds (default: RING_BUFFER_LOOKAHEAD_MS/1000) +void audio_render_ahead(float music_time, float dt, float target_fill = -1.0f); // Get current playback time (in seconds) based on samples consumed // This is the ring buffer READ position (what's being played NOW) diff --git a/src/gpu/demo_effects.h b/src/gpu/demo_effects.h index 54bf657..ff7e017 100644 --- a/src/gpu/demo_effects.h +++ b/src/gpu/demo_effects.h @@ -7,12 +7,14 @@ #include "3d/scene.h" #include "effect.h" #include "gpu/effects/circle_mask_effect.h" -#include "gpu/effects/fade_effect.h" // FadeEffect with full definition +#include "gpu/effects/fade_effect.h" // FadeEffect with full definition #include "gpu/effects/flash_effect.h" // FlashEffect with params support #include "gpu/effects/post_process_helper.h" #include "gpu/effects/rotating_cube_effect.h" #include "gpu/effects/shaders.h" #include "gpu/effects/theme_modulation_effect.h" // ThemeModulationEffect with full definition +#include "gpu/effects/hybrid_3d_effect.h" +#include "gpu/effects/flash_cube_effect.h" #include "gpu/gpu.h" #include "gpu/texture_manager.h" #include "gpu/uniform_helper.h" @@ -49,7 +51,6 @@ class ParticlesEffect : public Effect { ComputePass compute_pass_; RenderPass render_pass_; GpuBuffer particles_buffer_; - UniformBuffer<CommonPostProcessUniforms> uniforms_; }; class PassthroughEffect : public PostProcessEffect { @@ -58,7 +59,6 @@ class PassthroughEffect : public PostProcessEffect { void update_bind_group(WGPUTextureView input_view) override; private: - UniformBuffer<CommonPostProcessUniforms> uniforms_; }; class MovingEllipseEffect : public Effect { @@ -83,7 +83,6 @@ class ParticleSprayEffect : public Effect { ComputePass compute_pass_; RenderPass render_pass_; GpuBuffer particles_buffer_; - UniformBuffer<CommonPostProcessUniforms> uniforms_; }; // Parameters for GaussianBlurEffect (set at construction time) @@ -106,7 +105,6 @@ class GaussianBlurEffect : public PostProcessEffect { private: GaussianBlurParams params_; - UniformBuffer<CommonPostProcessUniforms> uniforms_; UniformBuffer<GaussianBlurParams> params_buffer_; }; @@ -118,7 +116,6 @@ class SolarizeEffect : public PostProcessEffect { void update_bind_group(WGPUTextureView input_view) override; private: - UniformBuffer<CommonPostProcessUniforms> uniforms_; }; // Parameters for VignetteEffect @@ -137,7 +134,6 @@ class VignetteEffect : public PostProcessEffect { private: VignetteParams params_; - UniformBuffer<CommonPostProcessUniforms> uniforms_; UniformBuffer<VignetteParams> params_buffer_; }; @@ -160,48 +156,33 @@ class ChromaAberrationEffect : public PostProcessEffect { private: ChromaAberrationParams params_; - UniformBuffer<CommonPostProcessUniforms> uniforms_; UniformBuffer<ChromaAberrationParams> params_buffer_; }; -class Hybrid3DEffect : public Effect { - public: - Hybrid3DEffect(const GpuContext& ctx); - void init(MainSequence* demo) override; - void render(WGPURenderPassEncoder pass, float time, float beat, - float intensity, float aspect_ratio) override; - - private: - Renderer3D renderer_; - TextureManager texture_manager_; - Scene scene_; - Camera camera_; - int width_ = 1280; - int height_ = 720; +// Parameters for DistortEffect +struct DistortParams { + float strength = 0.01f; // Default distortion strength + float speed = 1.0f; // Default distortion speed }; +static_assert(sizeof(DistortParams) == 8, "DistortParams must be 8 bytes for WGSL alignment"); -class FlashCubeEffect : public Effect { +class DistortEffect : public PostProcessEffect { public: - FlashCubeEffect(const GpuContext& ctx); - void init(MainSequence* demo) override; - void resize(int width, int height) override; + DistortEffect(const GpuContext& ctx); + DistortEffect(const GpuContext& ctx, const DistortParams& params); void render(WGPURenderPassEncoder pass, float time, float beat, float intensity, float aspect_ratio) override; + void update_bind_group(WGPUTextureView input_view) override; private: - Renderer3D renderer_; - TextureManager texture_manager_; - Scene scene_; - Camera camera_; - int width_ = 1280; - int height_ = 720; - float last_beat_; - float flash_intensity_; + DistortParams params_; + UniformBuffer<DistortParams> params_buffer_; }; -// ThemeModulationEffect now defined in gpu/effects/theme_modulation_effect.h (included above) -// FadeEffect now defined in gpu/effects/fade_effect.h (included above) -// FlashEffect now defined in gpu/effects/flash_effect.h (included above) +// ThemeModulationEffect now defined in gpu/effects/theme_modulation_effect.h +// (included above) FadeEffect now defined in gpu/effects/fade_effect.h +// (included above) FlashEffect now defined in gpu/effects/flash_effect.h +// (included above) // Auto-generated functions void LoadTimeline(MainSequence& main_seq, const GpuContext& ctx); diff --git a/src/gpu/effect.h b/src/gpu/effect.h index 6fdb0f4..8f35f3c 100644 --- a/src/gpu/effect.h +++ b/src/gpu/effect.h @@ -1,5 +1,7 @@ #pragma once #include "gpu/gpu.h" +#include "gpu/effects/post_process_helper.h" +#include "gpu/uniform_helper.h" #include <algorithm> #include <map> #include <memory> @@ -12,6 +14,7 @@ class PostProcessEffect; class Effect { public: Effect(const GpuContext& ctx) : ctx_(ctx) { + uniforms_.init(ctx.device); } virtual ~Effect() = default; virtual void init(MainSequence* demo) { @@ -43,7 +46,7 @@ class Effect { protected: const GpuContext& ctx_; - GpuBuffer uniforms_; + UniformBuffer<CommonPostProcessUniforms> uniforms_; int width_ = 1280; int height_ = 720; }; diff --git a/src/gpu/effects/chroma_aberration_effect.cc b/src/gpu/effects/chroma_aberration_effect.cc index 7f41153..af3acc5 100644 --- a/src/gpu/effects/chroma_aberration_effect.cc +++ b/src/gpu/effects/chroma_aberration_effect.cc @@ -18,7 +18,6 @@ ChromaAberrationEffect::ChromaAberrationEffect( : PostProcessEffect(ctx), params_(params) { pipeline_ = create_post_process_pipeline(ctx_.device, ctx_.format, chroma_aberration_shader_wgsl); - uniforms_.init(ctx_.device); params_buffer_.init(ctx_.device); } diff --git a/src/gpu/effects/circle_mask_effect.cc b/src/gpu/effects/circle_mask_effect.cc index 5b71086..ca80cf9 100644 --- a/src/gpu/effects/circle_mask_effect.cc +++ b/src/gpu/effects/circle_mask_effect.cc @@ -3,6 +3,7 @@ // Generates circular mask and renders green background outside circle. #include "gpu/effects/circle_mask_effect.h" +#include "gpu/effects/shader_composer.h" #include "generated/assets.h" CircleMaskEffect::CircleMaskEffect(const GpuContext& ctx, float radius) @@ -30,9 +31,7 @@ void CircleMaskEffect::init(MainSequence* demo) { demo_->register_auxiliary_texture("circle_mask", width, height); - compute_uniforms_.init(ctx_.device); compute_params_.init(ctx_.device); - render_uniforms_.init(ctx_.device); WGPUSamplerDescriptor sampler_desc = {}; sampler_desc.addressModeU = WGPUAddressMode_ClampToEdge; @@ -49,9 +48,12 @@ void CircleMaskEffect::init(MainSequence* demo) { const char* render_shader = (const char*)GetAsset( AssetId::ASSET_CIRCLE_MASK_RENDER_SHADER, &render_size); + // Compose shaders to resolve #include directives + std::string composed_compute = ShaderComposer::Get().Compose({}, compute_shader); + WGPUShaderSourceWGSL compute_wgsl = {}; compute_wgsl.chain.sType = WGPUSType_ShaderSourceWGSL; - compute_wgsl.code = str_view(compute_shader); + compute_wgsl.code = str_view(composed_compute.c_str()); WGPUShaderModuleDescriptor compute_desc = {}; compute_desc.nextInChain = &compute_wgsl.chain; @@ -82,11 +84,11 @@ void CircleMaskEffect::init(MainSequence* demo) { const WGPUBindGroupEntry compute_entries[] = { {.binding = 0, - .buffer = compute_uniforms_.get().buffer, + .buffer = uniforms_.get().buffer, .size = sizeof(CommonPostProcessUniforms)}, {.binding = 1, .buffer = compute_params_.get().buffer, - .size = sizeof(EffectParams)}, + .size = sizeof(CircleMaskParams)}, }; const WGPUBindGroupDescriptor compute_bg_desc = { .layout = wgpuRenderPipelineGetBindGroupLayout(compute_pipeline_, 0), @@ -96,9 +98,11 @@ void CircleMaskEffect::init(MainSequence* demo) { compute_bind_group_ = wgpuDeviceCreateBindGroup(ctx_.device, &compute_bg_desc); + std::string composed_render = ShaderComposer::Get().Compose({}, render_shader); + WGPUShaderSourceWGSL render_wgsl = {}; render_wgsl.chain.sType = WGPUSType_ShaderSourceWGSL; - render_wgsl.code = str_view(render_shader); + render_wgsl.code = str_view(composed_render.c_str()); WGPUShaderModuleDescriptor render_desc = {}; render_desc.nextInChain = &render_wgsl.chain; @@ -139,7 +143,7 @@ void CircleMaskEffect::init(MainSequence* demo) { {.binding = 0, .textureView = mask_view}, {.binding = 1, .sampler = mask_sampler_}, {.binding = 2, - .buffer = render_uniforms_.get().buffer, + .buffer = uniforms_.get().buffer, .size = sizeof(CommonPostProcessUniforms)}, }; const WGPUBindGroupDescriptor render_bg_desc = { @@ -160,9 +164,9 @@ void CircleMaskEffect::compute(WGPUCommandEncoder encoder, float time, .beat = beat, .audio_intensity = intensity, }; - compute_uniforms_.update(ctx_.queue, uniforms); + uniforms_.update(ctx_.queue, uniforms); - const EffectParams params = { + const CircleMaskParams params = { .radius = radius_, }; compute_params_.update(ctx_.queue, params); @@ -199,7 +203,7 @@ void CircleMaskEffect::render(WGPURenderPassEncoder pass, float time, .beat = beat, .audio_intensity = intensity, }; - render_uniforms_.update(ctx_.queue, uniforms); + uniforms_.update(ctx_.queue, uniforms); wgpuRenderPassEncoderSetPipeline(pass, render_pipeline_); wgpuRenderPassEncoderSetBindGroup(pass, 0, render_bind_group_, 0, nullptr); diff --git a/src/gpu/effects/circle_mask_effect.h b/src/gpu/effects/circle_mask_effect.h index ac44210..2ddbb11 100644 --- a/src/gpu/effects/circle_mask_effect.h +++ b/src/gpu/effects/circle_mask_effect.h @@ -21,23 +21,23 @@ class CircleMaskEffect : public Effect { float intensity, float aspect_ratio) override; private: - struct EffectParams { + struct CircleMaskParams { float radius; float _pad[3]; }; + static_assert(sizeof(CircleMaskParams) == 16, + "CircleMaskParams must be 16 bytes for WGSL alignment"); MainSequence* demo_ = nullptr; float radius_; WGPURenderPipeline compute_pipeline_ = nullptr; WGPUBindGroup compute_bind_group_ = nullptr; - UniformBuffer<CommonPostProcessUniforms> compute_uniforms_; - UniformBuffer<EffectParams> compute_params_; + UniformBuffer<CircleMaskParams> compute_params_; WGPURenderPipeline render_pipeline_ = nullptr; WGPUBindGroup render_bind_group_ = nullptr; WGPUSampler mask_sampler_ = nullptr; - UniformBuffer<CommonPostProcessUniforms> render_uniforms_; }; #endif /* CIRCLE_MASK_EFFECT_H_ */ diff --git a/src/gpu/effects/distort_effect.cc b/src/gpu/effects/distort_effect.cc index d11dfd7..52a8ec7 100644 --- a/src/gpu/effects/distort_effect.cc +++ b/src/gpu/effects/distort_effect.cc @@ -9,31 +9,35 @@ DistortEffect::DistortEffect(const GpuContext& ctx) : DistortEffect(ctx, DistortParams()) { } -DistortEffect::DistEffect(const GpuContext& ctx, const DistortParams& params) +DistortEffect::DistortEffect(const GpuContext& ctx, const DistortParams& params) : PostProcessEffect(ctx), params_(params) { - uniforms_ = - gpu_create_buffer(ctx_.device, sizeof(DistortUniforms), - WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst); + params_buffer_.init(ctx_.device); pipeline_ = create_post_process_pipeline(ctx_.device, ctx_.format, distort_shader_wgsl); } void DistortEffect::render(WGPURenderPassEncoder pass, float t, float b, float i, float a) { - DistortUniforms u = { + // Populate CommonPostProcessUniforms + const CommonPostProcessUniforms common_u = { + .resolution = {(float)width_, (float)height_}, + .aspect_ratio = a, .time = t, .beat = b, - .intensity = i, - .aspect_ratio = a, - .width = (float)width_, - .height = (float)height_, + .audio_intensity = i, + }; + uniforms_.update(ctx_.queue, common_u); + + // Populate DistortParams + const DistortParams distort_p = { .strength = params_.strength, .speed = params_.speed, }; - wgpuQueueWriteBuffer(ctx_.queue, uniforms_.buffer, 0, &u, sizeof(u)); + params_buffer_.update(ctx_.queue, distort_p); + PostProcessEffect::render(pass, t, b, i, a); } void DistortEffect::update_bind_group(WGPUTextureView v) { - pp_update_bind_group(ctx_.device, pipeline_, &bind_group_, v, {}, uniforms_); + pp_update_bind_group(ctx_.device, pipeline_, &bind_group_, v, uniforms_.get(), params_buffer_); }
\ No newline at end of file diff --git a/src/gpu/effects/fade_effect.cc b/src/gpu/effects/fade_effect.cc index 3efc583..39b54e0 100644 --- a/src/gpu/effects/fade_effect.cc +++ b/src/gpu/effects/fade_effect.cc @@ -5,6 +5,12 @@ #include "gpu/effects/post_process_helper.h" #include <cmath> +struct FadeParams { + float fade_amount; + float _pad[3]; +}; +static_assert(sizeof(FadeParams) == 16, "FadeParams must be 16 bytes for WGSL alignment"); + FadeEffect::FadeEffect(const GpuContext& ctx) : PostProcessEffect(ctx) { const char* shader_code = R"( struct VertexOutput { @@ -22,7 +28,7 @@ FadeEffect::FadeEffect(const GpuContext& ctx) : PostProcessEffect(ctx) { audio_intensity: f32, }; - struct EffectParams { + struct FadeParams { fade_amount: f32, _pad0: f32, _pad1: f32, @@ -32,7 +38,7 @@ FadeEffect::FadeEffect(const GpuContext& ctx) : PostProcessEffect(ctx) { @group(0) @binding(0) var inputSampler: sampler; @group(0) @binding(1) var inputTexture: texture_2d<f32>; @group(0) @binding(2) var<uniform> uniforms: CommonUniforms; - @group(0) @binding(3) var<uniform> params: EffectParams; + @group(0) @binding(3) var<uniform> params: FadeParams; @vertex fn vs_main(@builtin(vertex_index) vertexIndex: u32) -> VertexOutput { @@ -57,14 +63,13 @@ FadeEffect::FadeEffect(const GpuContext& ctx) : PostProcessEffect(ctx) { pipeline_ = create_post_process_pipeline(ctx_.device, ctx_.format, shader_code); - common_uniforms_.init(ctx_.device); params_buffer_ = gpu_create_buffer( ctx_.device, 16, WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst); } void FadeEffect::update_bind_group(WGPUTextureView input_view) { pp_update_bind_group(ctx_.device, pipeline_, &bind_group_, input_view, - common_uniforms_.get(), params_buffer_); + uniforms_.get(), params_buffer_); } void FadeEffect::render(WGPURenderPassEncoder pass, float time, float beat, @@ -76,7 +81,7 @@ void FadeEffect::render(WGPURenderPassEncoder pass, float time, float beat, .beat = beat, .audio_intensity = intensity, }; - common_uniforms_.update(ctx_.queue, u); + uniforms_.update(ctx_.queue, u); // Example fade pattern: fade in at start, fade out at end // Customize this based on your needs @@ -90,8 +95,8 @@ void FadeEffect::render(WGPURenderPassEncoder pass, float time, float beat, fade_amount = fmaxf(fade_amount, 0.0f); } - float params[4] = {fade_amount, 0.0f, 0.0f, 0.0f}; - wgpuQueueWriteBuffer(ctx_.queue, params_buffer_.buffer, 0, params, + FadeParams params = {fade_amount, {0.0f, 0.0f, 0.0f}}; + wgpuQueueWriteBuffer(ctx_.queue, params_buffer_.buffer, 0, ¶ms, sizeof(params)); wgpuRenderPassEncoderSetPipeline(pass, pipeline_); diff --git a/src/gpu/effects/fade_effect.h b/src/gpu/effects/fade_effect.h index 22b8f76..178c360 100644 --- a/src/gpu/effects/fade_effect.h +++ b/src/gpu/effects/fade_effect.h @@ -4,9 +4,9 @@ #pragma once #include "gpu/effect.h" +#include "gpu/effects/post_process_helper.h" #include "gpu/gpu.h" #include "gpu/uniform_helper.h" -#include "gpu/effects/post_process_helper.h" class FadeEffect : public PostProcessEffect { public: @@ -16,6 +16,5 @@ class FadeEffect : public PostProcessEffect { void update_bind_group(WGPUTextureView input_view) override; private: - UniformBuffer<CommonPostProcessUniforms> common_uniforms_; GpuBuffer params_buffer_; }; diff --git a/src/gpu/effects/flash_cube_effect.h b/src/gpu/effects/flash_cube_effect.h index 7089af2..5faeb00 100644 --- a/src/gpu/effects/flash_cube_effect.h +++ b/src/gpu/effects/flash_cube_effect.h @@ -22,8 +22,6 @@ class FlashCubeEffect : public Effect { TextureManager texture_manager_; Scene scene_; Camera camera_; - int width_ = 1280; - int height_ = 720; float last_beat_ = 0.0f; float flash_intensity_ = 0.0f; }; diff --git a/src/gpu/effects/gaussian_blur_effect.cc b/src/gpu/effects/gaussian_blur_effect.cc index 0cc4821..697be88 100644 --- a/src/gpu/effects/gaussian_blur_effect.cc +++ b/src/gpu/effects/gaussian_blur_effect.cc @@ -18,7 +18,6 @@ GaussianBlurEffect::GaussianBlurEffect(const GpuContext& ctx, : PostProcessEffect(ctx), params_(params) { pipeline_ = create_post_process_pipeline(ctx_.device, ctx_.format, gaussian_blur_shader_wgsl); - uniforms_.init(ctx_.device); params_buffer_.init(ctx_.device); } diff --git a/src/gpu/effects/heptagon_effect.cc b/src/gpu/effects/heptagon_effect.cc index b77ec53..7b0702d 100644 --- a/src/gpu/effects/heptagon_effect.cc +++ b/src/gpu/effects/heptagon_effect.cc @@ -5,39 +5,25 @@ #include "gpu/gpu.h" #include "util/mini_math.h" -// Match CommonUniforms struct from main_shader.wgsl. -// Padded to 32 bytes for WGSL alignment rules. -struct HeptagonUniforms { - vec2 resolution; // 8 bytes - float _pad0[2]; // 8 bytes padding to align next float - float aspect_ratio; // 4 bytes - float time; // 4 bytes - float beat; // 4 bytes - float audio_intensity; // 4 bytes -}; -static_assert(sizeof(HeptagonUniforms) == 32, - "HeptagonUniforms must be 32 bytes for WGSL alignment"); - // --- HeptagonEffect --- HeptagonEffect::HeptagonEffect(const GpuContext& ctx) : Effect(ctx) { - uniforms_ = - gpu_create_buffer(ctx_.device, sizeof(HeptagonUniforms), - WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst); - ResourceBinding bindings[] = {{uniforms_, WGPUBufferBindingType_Uniform}}; + // uniforms_ is initialized by Effect base class + ResourceBinding bindings[] = {{uniforms_.get(), WGPUBufferBindingType_Uniform}}; pass_ = gpu_create_render_pass(ctx_.device, ctx_.format, main_shader_wgsl, bindings, 1); pass_.vertex_count = 21; } void HeptagonEffect::render(WGPURenderPassEncoder pass, float t, float b, float i, float a) { - HeptagonUniforms u = { + CommonPostProcessUniforms u = { .resolution = {(float)width_, (float)height_}, + ._pad = {0.0f, 0.0f}, .aspect_ratio = a, .time = t, .beat = b, .audio_intensity = i, }; - wgpuQueueWriteBuffer(ctx_.queue, uniforms_.buffer, 0, &u, sizeof(u)); + uniforms_.update(ctx_.queue, u); wgpuRenderPassEncoderSetPipeline(pass, pass_.pipeline); wgpuRenderPassEncoderSetBindGroup(pass, 0, pass_.bind_group, 0, nullptr); wgpuRenderPassEncoderDraw(pass, pass_.vertex_count, 1, 0, 0); diff --git a/src/gpu/effects/moving_ellipse_effect.cc b/src/gpu/effects/moving_ellipse_effect.cc index 945f807..9866f20 100644 --- a/src/gpu/effects/moving_ellipse_effect.cc +++ b/src/gpu/effects/moving_ellipse_effect.cc @@ -7,10 +7,8 @@ // --- MovingEllipseEffect --- MovingEllipseEffect::MovingEllipseEffect(const GpuContext& ctx) : Effect(ctx) { - uniforms_ = - gpu_create_buffer(ctx_.device, sizeof(CommonPostProcessUniforms), - WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst); - ResourceBinding bindings[] = {{uniforms_, WGPUBufferBindingType_Uniform}}; + // uniforms_ is initialized by Effect base class + ResourceBinding bindings[] = {{uniforms_.get(), WGPUBufferBindingType_Uniform}}; pass_ = gpu_create_render_pass(ctx_.device, ctx_.format, ellipse_shader_wgsl, bindings, 1); pass_.vertex_count = 3; @@ -19,12 +17,13 @@ void MovingEllipseEffect::render(WGPURenderPassEncoder pass, float t, float b, float i, float a) { const CommonPostProcessUniforms u = { .resolution = {(float)width_, (float)height_}, + ._pad = {0.0f, 0.0f}, .aspect_ratio = a, .time = t, .beat = b, .audio_intensity = i, }; - wgpuQueueWriteBuffer(ctx_.queue, uniforms_.buffer, 0, &u, sizeof(u)); + uniforms_.update(ctx_.queue, u); wgpuRenderPassEncoderSetPipeline(pass, pass_.pipeline); wgpuRenderPassEncoderSetBindGroup(pass, 0, pass_.bind_group, 0, nullptr); wgpuRenderPassEncoderDraw(pass, 3, 1, 0, 0); diff --git a/src/gpu/effects/particle_spray_effect.cc b/src/gpu/effects/particle_spray_effect.cc index 3fd2590..a435884 100644 --- a/src/gpu/effects/particle_spray_effect.cc +++ b/src/gpu/effects/particle_spray_effect.cc @@ -8,7 +8,6 @@ // --- ParticleSprayEffect --- ParticleSprayEffect::ParticleSprayEffect(const GpuContext& ctx) : Effect(ctx) { - uniforms_.init(ctx_.device); std::vector<Particle> init_p(NUM_PARTICLES); for (Particle& p : init_p) p.pos[3] = 0.0f; diff --git a/src/gpu/effects/particles_effect.cc b/src/gpu/effects/particles_effect.cc index 01f90a5..cd0df74 100644 --- a/src/gpu/effects/particles_effect.cc +++ b/src/gpu/effects/particles_effect.cc @@ -8,7 +8,6 @@ // --- ParticlesEffect --- ParticlesEffect::ParticlesEffect(const GpuContext& ctx) : Effect(ctx) { - uniforms_.init(ctx_.device); std::vector<Particle> init_p(NUM_PARTICLES); particles_buffer_ = gpu_create_buffer( ctx_.device, sizeof(Particle) * NUM_PARTICLES, diff --git a/src/gpu/effects/passthrough_effect.cc b/src/gpu/effects/passthrough_effect.cc index 93cf948..01d557a 100644 --- a/src/gpu/effects/passthrough_effect.cc +++ b/src/gpu/effects/passthrough_effect.cc @@ -7,7 +7,6 @@ // --- PassthroughEffect --- PassthroughEffect::PassthroughEffect(const GpuContext& ctx) : PostProcessEffect(ctx) { - uniforms_.init(ctx_.device); pipeline_ = create_post_process_pipeline(ctx_.device, ctx_.format, passthrough_shader_wgsl); } diff --git a/src/gpu/effects/post_process_helper.cc b/src/gpu/effects/post_process_helper.cc index 74e052d..e99467f 100644 --- a/src/gpu/effects/post_process_helper.cc +++ b/src/gpu/effects/post_process_helper.cc @@ -4,16 +4,19 @@ #include "post_process_helper.h" #include "../demo_effects.h" #include "gpu/gpu.h" +#include "gpu/effects/shader_composer.h" #include <cstring> // Helper to create a standard post-processing pipeline WGPURenderPipeline create_post_process_pipeline(WGPUDevice device, WGPUTextureFormat format, const char* shader_code) { + std::string composed_shader = ShaderComposer::Get().Compose({}, shader_code); + WGPUShaderModuleDescriptor shader_desc = {}; WGPUShaderSourceWGSL wgsl_src = {}; wgsl_src.chain.sType = WGPUSType_ShaderSourceWGSL; - wgsl_src.code = str_view(shader_code); + wgsl_src.code = str_view(composed_shader.c_str()); shader_desc.nextInChain = &wgsl_src.chain; WGPUShaderModule shader_module = wgpuDeviceCreateShaderModule(device, &shader_desc); @@ -94,7 +97,8 @@ void pp_update_bind_group(WGPUDevice device, WGPURenderPipeline pipeline, bge[2].buffer = uniforms.buffer; bge[2].size = uniforms.size; bge[3].binding = PP_BINDING_EFFECT_PARAMS; - bge[3].buffer = effect_params.buffer ? effect_params.buffer : g_dummy_buffer.buffer; + bge[3].buffer = + effect_params.buffer ? effect_params.buffer : g_dummy_buffer.buffer; bge[3].size = effect_params.buffer ? effect_params.size : g_dummy_buffer.size; WGPUBindGroupDescriptor bgd = { .layout = bgl, .entryCount = 4, .entries = bge}; diff --git a/src/gpu/effects/post_process_helper.h b/src/gpu/effects/post_process_helper.h index 77b184f..23cde0e 100644 --- a/src/gpu/effects/post_process_helper.h +++ b/src/gpu/effects/post_process_helper.h @@ -19,10 +19,10 @@ static_assert(sizeof(CommonPostProcessUniforms) == 32, "CommonPostProcessUniforms must be 32 bytes for WGSL alignment"); // Standard post-process bind group layout (group 0): -#define PP_BINDING_SAMPLER 0 // Sampler for input texture -#define PP_BINDING_TEXTURE 1 // Input texture (previous render pass) -#define PP_BINDING_UNIFORMS 2 // Custom uniforms buffer -#define PP_BINDING_EFFECT_PARAMS 3 // Effect-specific parameters +#define PP_BINDING_SAMPLER 0 // Sampler for input texture +#define PP_BINDING_TEXTURE 1 // Input texture (previous render pass) +#define PP_BINDING_UNIFORMS 2 // Custom uniforms buffer +#define PP_BINDING_EFFECT_PARAMS 3 // Effect-specific parameters // Helper to create a standard post-processing pipeline // Uniforms are accessible to both vertex and fragment shaders diff --git a/src/gpu/effects/shaders.cc b/src/gpu/effects/shaders.cc index 2e1cfe5..625c5b6 100644 --- a/src/gpu/effects/shaders.cc +++ b/src/gpu/effects/shaders.cc @@ -99,6 +99,28 @@ const char* chroma_aberration_shader_wgsl = SafeGetAsset(AssetId::ASSET_SHADER_CHROMA_ABERRATION); +const char* gen_noise_compute_wgsl = + + SafeGetAsset(AssetId::ASSET_SHADER_COMPUTE_GEN_NOISE); + +const char* gen_perlin_compute_wgsl = + + SafeGetAsset(AssetId::ASSET_SHADER_COMPUTE_GEN_PERLIN); + +const char* gen_grid_compute_wgsl = + + SafeGetAsset(AssetId::ASSET_SHADER_COMPUTE_GEN_GRID); + +#if !defined(STRIP_GPU_COMPOSITE) +const char* gen_blend_compute_wgsl = + + SafeGetAsset(AssetId::ASSET_SHADER_COMPUTE_GEN_BLEND); + +const char* gen_mask_compute_wgsl = + + SafeGetAsset(AssetId::ASSET_SHADER_COMPUTE_GEN_MASK); +#endif + const char* vignette_shader_wgsl = SafeGetAsset(AssetId::ASSET_SHADER_VIGNETTE); diff --git a/src/gpu/effects/shaders.h b/src/gpu/effects/shaders.h index 50b4f32..68b8834 100644 --- a/src/gpu/effects/shaders.h +++ b/src/gpu/effects/shaders.h @@ -18,3 +18,10 @@ extern const char* solarize_shader_wgsl; extern const char* distort_shader_wgsl; extern const char* chroma_aberration_shader_wgsl; extern const char* vignette_shader_wgsl; +extern const char* gen_noise_compute_wgsl; +extern const char* gen_perlin_compute_wgsl; +extern const char* gen_grid_compute_wgsl; +#if !defined(STRIP_GPU_COMPOSITE) +extern const char* gen_blend_compute_wgsl; +extern const char* gen_mask_compute_wgsl; +#endif diff --git a/src/gpu/effects/solarize_effect.cc b/src/gpu/effects/solarize_effect.cc index d74d708..4f47218 100644 --- a/src/gpu/effects/solarize_effect.cc +++ b/src/gpu/effects/solarize_effect.cc @@ -6,7 +6,6 @@ // --- SolarizeEffect --- SolarizeEffect::SolarizeEffect(const GpuContext& ctx) : PostProcessEffect(ctx) { - uniforms_.init(ctx.device); pipeline_ = create_post_process_pipeline(ctx_.device, ctx_.format, solarize_shader_wgsl); } @@ -23,6 +22,6 @@ void SolarizeEffect::render(WGPURenderPassEncoder pass, float t, float b, PostProcessEffect::render(pass, t, b, i, a); } void SolarizeEffect::update_bind_group(WGPUTextureView v) { - pp_update_bind_group(ctx_.device, pipeline_, &bind_group_, v, - uniforms_.get(), {}); + pp_update_bind_group(ctx_.device, pipeline_, &bind_group_, v, uniforms_.get(), + {}); } diff --git a/src/gpu/effects/theme_modulation_effect.cc b/src/gpu/effects/theme_modulation_effect.cc index f9ae636..b1eff90 100644 --- a/src/gpu/effects/theme_modulation_effect.cc +++ b/src/gpu/effects/theme_modulation_effect.cc @@ -6,6 +6,12 @@ #include "gpu/effects/shaders.h" #include <cmath> +struct ThemeModulationParams { + float theme_brightness; + float _pad[3]; +}; +static_assert(sizeof(ThemeModulationParams) == 16, "ThemeModulationParams must be 16 bytes for WGSL alignment"); + ThemeModulationEffect::ThemeModulationEffect(const GpuContext& ctx) : PostProcessEffect(ctx) { const char* shader_code = R"( @@ -24,7 +30,7 @@ ThemeModulationEffect::ThemeModulationEffect(const GpuContext& ctx) audio_intensity: f32, }; - struct EffectParams { + struct ThemeModulationParams { theme_brightness: f32, _pad0: f32, _pad1: f32, @@ -34,7 +40,7 @@ ThemeModulationEffect::ThemeModulationEffect(const GpuContext& ctx) @group(0) @binding(0) var inputSampler: sampler; @group(0) @binding(1) var inputTexture: texture_2d<f32>; @group(0) @binding(2) var<uniform> uniforms: CommonUniforms; - @group(0) @binding(3) var<uniform> params: EffectParams; + @group(0) @binding(3) var<uniform> params: ThemeModulationParams; @vertex fn vs_main(@builtin(vertex_index) vertexIndex: u32) -> VertexOutput { @@ -61,14 +67,13 @@ ThemeModulationEffect::ThemeModulationEffect(const GpuContext& ctx) pipeline_ = create_post_process_pipeline(ctx_.device, ctx_.format, shader_code); - common_uniforms_.init(ctx_.device); params_buffer_ = gpu_create_buffer( ctx_.device, 16, WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst); } void ThemeModulationEffect::update_bind_group(WGPUTextureView input_view) { pp_update_bind_group(ctx_.device, pipeline_, &bind_group_, input_view, - common_uniforms_.get(), params_buffer_); + uniforms_.get(), params_buffer_); } void ThemeModulationEffect::render(WGPURenderPassEncoder pass, float time, @@ -81,7 +86,7 @@ void ThemeModulationEffect::render(WGPURenderPassEncoder pass, float time, .beat = beat, .audio_intensity = intensity, }; - common_uniforms_.update(ctx_.queue, u); + uniforms_.update(ctx_.queue, u); // Alternate between bright and dark every 4 seconds (2 pattern changes) // Music patterns change every 2 seconds at 120 BPM @@ -97,8 +102,8 @@ void ThemeModulationEffect::render(WGPURenderPassEncoder pass, float time, bright_value + (dark_value - bright_value) * transition; // Update params buffer - float params[4] = {theme_brightness, 0.0f, 0.0f, 0.0f}; - wgpuQueueWriteBuffer(ctx_.queue, params_buffer_.buffer, 0, params, + ThemeModulationParams params = {theme_brightness, {0.0f, 0.0f, 0.0f}}; + wgpuQueueWriteBuffer(ctx_.queue, params_buffer_.buffer, 0, ¶ms, sizeof(params)); // Render diff --git a/src/gpu/effects/theme_modulation_effect.h b/src/gpu/effects/theme_modulation_effect.h index 107529b..713347b 100644 --- a/src/gpu/effects/theme_modulation_effect.h +++ b/src/gpu/effects/theme_modulation_effect.h @@ -5,8 +5,8 @@ #pragma once #include "gpu/effect.h" -#include "gpu/uniform_helper.h" #include "gpu/effects/post_process_helper.h" +#include "gpu/uniform_helper.h" class ThemeModulationEffect : public PostProcessEffect { public: @@ -16,6 +16,5 @@ class ThemeModulationEffect : public PostProcessEffect { void update_bind_group(WGPUTextureView input_view) override; private: - UniformBuffer<CommonPostProcessUniforms> common_uniforms_; GpuBuffer params_buffer_; }; diff --git a/src/gpu/effects/vignette_effect.cc b/src/gpu/effects/vignette_effect.cc index a4967dd..bba0372 100644 --- a/src/gpu/effects/vignette_effect.cc +++ b/src/gpu/effects/vignette_effect.cc @@ -12,7 +12,6 @@ VignetteEffect::VignetteEffect(const GpuContext& ctx) VignetteEffect::VignetteEffect(const GpuContext& ctx, const VignetteParams& params) : PostProcessEffect(ctx), params_(params) { - uniforms_.init(ctx_.device); params_buffer_.init(ctx_.device); pipeline_ = create_post_process_pipeline(ctx_.device, ctx_.format, vignette_shader_wgsl); @@ -33,6 +32,6 @@ void VignetteEffect::render(WGPURenderPassEncoder pass, float t, float b, } void VignetteEffect::update_bind_group(WGPUTextureView v) { - pp_update_bind_group(ctx_.device, pipeline_, &bind_group_, v, - uniforms_.get(), params_buffer_.get()); + pp_update_bind_group(ctx_.device, pipeline_, &bind_group_, v, uniforms_.get(), + params_buffer_.get()); } diff --git a/src/gpu/gpu.cc b/src/gpu/gpu.cc index fde241d..e89a2f0 100644 --- a/src/gpu/gpu.cc +++ b/src/gpu/gpu.cc @@ -5,6 +5,7 @@ #include "gpu.h" #include "effect.h" #include "gpu/effects/shaders.h" +#include "gpu/effects/shader_composer.h" #include "platform/platform.h" #include <cassert> @@ -55,10 +56,13 @@ RenderPass gpu_create_render_pass(WGPUDevice device, WGPUTextureFormat format, ResourceBinding* bindings, int num_bindings) { RenderPass pass = {}; + // Compose shader to resolve #include directives + std::string composed_shader = ShaderComposer::Get().Compose({}, shader_code); + // Create Shader Module WGPUShaderSourceWGSL wgsl_src = {}; wgsl_src.chain.sType = WGPUSType_ShaderSourceWGSL; - wgsl_src.code = str_view(shader_code); + wgsl_src.code = str_view(composed_shader.c_str()); WGPUShaderModuleDescriptor shader_desc = {}; shader_desc.nextInChain = &wgsl_src.chain; WGPUShaderModule shader_module = @@ -156,9 +160,12 @@ ComputePass gpu_create_compute_pass(WGPUDevice device, const char* shader_code, int num_bindings) { ComputePass pass = {}; + // Compose shader to resolve #include directives + std::string composed_shader = ShaderComposer::Get().Compose({}, shader_code); + WGPUShaderSourceWGSL wgsl_src = {}; wgsl_src.chain.sType = WGPUSType_ShaderSourceWGSL; - wgsl_src.code = str_view(shader_code); + wgsl_src.code = str_view(composed_shader.c_str()); WGPUShaderModuleDescriptor shader_desc = {}; shader_desc.nextInChain = &wgsl_src.chain; WGPUShaderModule shader_module = diff --git a/src/gpu/texture_manager.cc b/src/gpu/texture_manager.cc index 0c30c94..dfa6315 100644 --- a/src/gpu/texture_manager.cc +++ b/src/gpu/texture_manager.cc @@ -2,7 +2,10 @@ // It implements the TextureManager. #include "gpu/texture_manager.h" +#include "gpu/effects/shader_composer.h" +#include "platform/platform.h" #include <cstdio> +#include <cstring> #include <vector> #if defined(DEMO_CROSS_COMPILE_WIN32) @@ -26,6 +29,22 @@ void TextureManager::shutdown() { wgpuTextureRelease(pair.second.texture); } textures_.clear(); + + for (auto& pair : compute_pipelines_) { + if (pair.second.pipeline) { + wgpuComputePipelineRelease(pair.second.pipeline); + } + } + compute_pipelines_.clear(); + +#if !defined(STRIP_GPU_COMPOSITE) + for (auto& pair : samplers_) { + if (pair.second) { + wgpuSamplerRelease(pair.second); + } + } + samplers_.clear(); +#endif } void TextureManager::create_procedural_texture( @@ -112,3 +131,570 @@ WGPUTextureView TextureManager::get_texture_view(const std::string& name) { } return nullptr; } + +WGPUComputePipeline TextureManager::get_or_create_compute_pipeline( + const std::string& func_name, const char* shader_code, + size_t uniform_size, int num_input_textures) { + auto it = compute_pipelines_.find(func_name); + if (it != compute_pipelines_.end()) { + return it->second.pipeline; + } + + // Create new pipeline + ShaderComposer& composer = ShaderComposer::Get(); + std::string resolved_shader = composer.Compose({}, shader_code); + + WGPUShaderSourceWGSL wgsl_src = {}; + wgsl_src.chain.sType = WGPUSType_ShaderSourceWGSL; + wgsl_src.code = str_view(resolved_shader.c_str()); + WGPUShaderModuleDescriptor shader_desc = {}; + shader_desc.nextInChain = &wgsl_src.chain; + WGPUShaderModule shader_module = + wgpuDeviceCreateShaderModule(device_, &shader_desc); + + // Dynamic bind group layout + // Binding 0: output storage texture + // Binding 1: uniform buffer + // Binding 2 to (2 + num_input_textures - 1): input textures + // Binding (2 + num_input_textures): sampler (if inputs > 0) + const int max_entries = 2 + num_input_textures + (num_input_textures > 0 ? 1 : 0); + std::vector<WGPUBindGroupLayoutEntry> bgl_entries(max_entries); + + // Binding 0: Output storage texture + bgl_entries[0].binding = 0; + bgl_entries[0].visibility = WGPUShaderStage_Compute; + bgl_entries[0].storageTexture.access = WGPUStorageTextureAccess_WriteOnly; + bgl_entries[0].storageTexture.format = WGPUTextureFormat_RGBA8Unorm; + bgl_entries[0].storageTexture.viewDimension = WGPUTextureViewDimension_2D; + + // Binding 1: Uniform buffer + bgl_entries[1].binding = 1; + bgl_entries[1].visibility = WGPUShaderStage_Compute; + bgl_entries[1].buffer.type = WGPUBufferBindingType_Uniform; + bgl_entries[1].buffer.minBindingSize = uniform_size; + + // Binding 2+: Input textures + for (int i = 0; i < num_input_textures; ++i) { + bgl_entries[2 + i].binding = 2 + i; + bgl_entries[2 + i].visibility = WGPUShaderStage_Compute; + bgl_entries[2 + i].texture.sampleType = WGPUTextureSampleType_Float; + bgl_entries[2 + i].texture.viewDimension = WGPUTextureViewDimension_2D; + } + + // Binding N: Sampler (if inputs exist) + if (num_input_textures > 0) { + bgl_entries[2 + num_input_textures].binding = 2 + num_input_textures; + bgl_entries[2 + num_input_textures].visibility = WGPUShaderStage_Compute; + bgl_entries[2 + num_input_textures].sampler.type = WGPUSamplerBindingType_Filtering; + } + + WGPUBindGroupLayoutDescriptor bgl_desc = {}; + bgl_desc.entryCount = max_entries; + bgl_desc.entries = bgl_entries.data(); + WGPUBindGroupLayout bind_group_layout = + wgpuDeviceCreateBindGroupLayout(device_, &bgl_desc); + + WGPUPipelineLayoutDescriptor pl_desc = {}; + pl_desc.bindGroupLayoutCount = 1; + pl_desc.bindGroupLayouts = &bind_group_layout; + WGPUPipelineLayout pipeline_layout = + wgpuDeviceCreatePipelineLayout(device_, &pl_desc); + + WGPUComputePipelineDescriptor pipeline_desc = {}; + pipeline_desc.layout = pipeline_layout; + pipeline_desc.compute.module = shader_module; + pipeline_desc.compute.entryPoint = str_view("main"); + + WGPUComputePipeline pipeline = + wgpuDeviceCreateComputePipeline(device_, &pipeline_desc); + + wgpuPipelineLayoutRelease(pipeline_layout); + wgpuBindGroupLayoutRelease(bind_group_layout); + wgpuShaderModuleRelease(shader_module); + + // Cache pipeline + ComputePipelineInfo info = {pipeline, shader_code, uniform_size, num_input_textures}; + compute_pipelines_[func_name] = info; + + return pipeline; +} + +void TextureManager::dispatch_compute(const std::string& func_name, + WGPUTexture target, + const GpuProceduralParams& params, + const void* uniform_data, + size_t uniform_size) { + auto it = compute_pipelines_.find(func_name); + if (it == compute_pipelines_.end()) { + return; // Pipeline not created yet + } + + WGPUComputePipeline pipeline = it->second.pipeline; + + // Create uniform buffer + WGPUBufferDescriptor buf_desc = {}; + buf_desc.size = uniform_size; + buf_desc.usage = WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst; + buf_desc.mappedAtCreation = WGPUOptionalBool_True; + WGPUBuffer uniform_buf = wgpuDeviceCreateBuffer(device_, &buf_desc); + void* mapped = wgpuBufferGetMappedRange(uniform_buf, 0, uniform_size); + memcpy(mapped, uniform_data, uniform_size); + wgpuBufferUnmap(uniform_buf); + + // Create storage texture view + WGPUTextureViewDescriptor view_desc = {}; + view_desc.format = WGPUTextureFormat_RGBA8Unorm; + view_desc.dimension = WGPUTextureViewDimension_2D; + view_desc.mipLevelCount = 1; + view_desc.arrayLayerCount = 1; + WGPUTextureView target_view = wgpuTextureCreateView(target, &view_desc); + + // Create bind group layout entries (must match pipeline) + WGPUBindGroupLayoutEntry bgl_entries[2] = {}; + bgl_entries[0].binding = 0; + bgl_entries[0].visibility = WGPUShaderStage_Compute; + bgl_entries[0].storageTexture.access = WGPUStorageTextureAccess_WriteOnly; + bgl_entries[0].storageTexture.format = WGPUTextureFormat_RGBA8Unorm; + bgl_entries[0].storageTexture.viewDimension = WGPUTextureViewDimension_2D; + bgl_entries[1].binding = 1; + bgl_entries[1].visibility = WGPUShaderStage_Compute; + bgl_entries[1].buffer.type = WGPUBufferBindingType_Uniform; + bgl_entries[1].buffer.minBindingSize = uniform_size; + + WGPUBindGroupLayoutDescriptor bgl_desc = {}; + bgl_desc.entryCount = 2; + bgl_desc.entries = bgl_entries; + WGPUBindGroupLayout bind_group_layout = + wgpuDeviceCreateBindGroupLayout(device_, &bgl_desc); + + // Create bind group + WGPUBindGroupEntry bg_entries[2] = {}; + bg_entries[0].binding = 0; + bg_entries[0].textureView = target_view; + bg_entries[1].binding = 1; + bg_entries[1].buffer = uniform_buf; + bg_entries[1].size = uniform_size; + + WGPUBindGroupDescriptor bg_desc = {}; + bg_desc.layout = bind_group_layout; + bg_desc.entryCount = 2; + bg_desc.entries = bg_entries; + WGPUBindGroup bind_group = wgpuDeviceCreateBindGroup(device_, &bg_desc); + + // Dispatch compute + WGPUCommandEncoderDescriptor enc_desc = {}; + WGPUCommandEncoder encoder = + wgpuDeviceCreateCommandEncoder(device_, &enc_desc); + WGPUComputePassEncoder pass = + wgpuCommandEncoderBeginComputePass(encoder, nullptr); + wgpuComputePassEncoderSetPipeline(pass, pipeline); + wgpuComputePassEncoderSetBindGroup(pass, 0, bind_group, 0, nullptr); + wgpuComputePassEncoderDispatchWorkgroups(pass, (params.width + 7) / 8, + (params.height + 7) / 8, 1); + wgpuComputePassEncoderEnd(pass); + + WGPUCommandBufferDescriptor cmd_desc = {}; + WGPUCommandBuffer cmd = wgpuCommandEncoderFinish(encoder, &cmd_desc); + wgpuQueueSubmit(queue_, 1, &cmd); + + // Cleanup + wgpuCommandBufferRelease(cmd); + wgpuCommandEncoderRelease(encoder); + wgpuComputePassEncoderRelease(pass); + wgpuBindGroupRelease(bind_group); + wgpuBindGroupLayoutRelease(bind_group_layout); + wgpuBufferRelease(uniform_buf); + wgpuTextureViewRelease(target_view); +} + +void TextureManager::create_gpu_noise_texture( + const std::string& name, const GpuProceduralParams& params) { + extern const char* gen_noise_compute_wgsl; + get_or_create_compute_pipeline("gen_noise", gen_noise_compute_wgsl, 16); + + WGPUTextureDescriptor tex_desc = {}; + tex_desc.usage = + WGPUTextureUsage_StorageBinding | WGPUTextureUsage_TextureBinding; + tex_desc.dimension = WGPUTextureDimension_2D; + tex_desc.size = {(uint32_t)params.width, (uint32_t)params.height, 1}; + tex_desc.format = WGPUTextureFormat_RGBA8Unorm; + tex_desc.mipLevelCount = 1; + tex_desc.sampleCount = 1; + WGPUTexture texture = wgpuDeviceCreateTexture(device_, &tex_desc); + + struct NoiseParams { + uint32_t width; + uint32_t height; + float seed; + float frequency; + }; + NoiseParams uniforms = {(uint32_t)params.width, (uint32_t)params.height, + params.params[0], params.params[1]}; + dispatch_compute("gen_noise", texture, params, &uniforms, sizeof(NoiseParams)); + + WGPUTextureViewDescriptor view_desc = {}; + view_desc.format = WGPUTextureFormat_RGBA8Unorm; + view_desc.dimension = WGPUTextureViewDimension_2D; + view_desc.mipLevelCount = 1; + view_desc.arrayLayerCount = 1; + WGPUTextureView view = wgpuTextureCreateView(texture, &view_desc); + + GpuTexture gpu_tex; + gpu_tex.texture = texture; + gpu_tex.view = view; + gpu_tex.width = params.width; + gpu_tex.height = params.height; + textures_[name] = gpu_tex; + +#if !defined(STRIP_ALL) + printf("Generated GPU noise texture: %s (%dx%d)\n", name.c_str(), + params.width, params.height); +#endif +} + +void TextureManager::create_gpu_perlin_texture( + const std::string& name, const GpuProceduralParams& params) { + extern const char* gen_perlin_compute_wgsl; + get_or_create_compute_pipeline("gen_perlin", gen_perlin_compute_wgsl, 32); + + WGPUTextureDescriptor tex_desc = {}; + tex_desc.usage = + WGPUTextureUsage_StorageBinding | WGPUTextureUsage_TextureBinding; + tex_desc.dimension = WGPUTextureDimension_2D; + tex_desc.size = {(uint32_t)params.width, (uint32_t)params.height, 1}; + tex_desc.format = WGPUTextureFormat_RGBA8Unorm; + tex_desc.mipLevelCount = 1; + tex_desc.sampleCount = 1; + WGPUTexture texture = wgpuDeviceCreateTexture(device_, &tex_desc); + + struct PerlinParams { + uint32_t width; + uint32_t height; + float seed; + float frequency; + float amplitude; + float amplitude_decay; + uint32_t octaves; + float _pad0; + }; + PerlinParams uniforms = { + (uint32_t)params.width, + (uint32_t)params.height, + params.params[0], + params.params[1], + params.num_params > 2 ? params.params[2] : 1.0f, + params.num_params > 3 ? params.params[3] : 0.5f, + params.num_params > 4 ? (uint32_t)params.params[4] : 4u, + 0.0f}; + dispatch_compute("gen_perlin", texture, params, &uniforms, + sizeof(PerlinParams)); + + WGPUTextureViewDescriptor view_desc = {}; + view_desc.format = WGPUTextureFormat_RGBA8Unorm; + view_desc.dimension = WGPUTextureViewDimension_2D; + view_desc.mipLevelCount = 1; + view_desc.arrayLayerCount = 1; + WGPUTextureView view = wgpuTextureCreateView(texture, &view_desc); + + GpuTexture gpu_tex; + gpu_tex.texture = texture; + gpu_tex.view = view; + gpu_tex.width = params.width; + gpu_tex.height = params.height; + textures_[name] = gpu_tex; + +#if !defined(STRIP_ALL) + printf("Generated GPU perlin texture: %s (%dx%d)\n", name.c_str(), + params.width, params.height); +#endif +} + +void TextureManager::create_gpu_grid_texture( + const std::string& name, const GpuProceduralParams& params) { + extern const char* gen_grid_compute_wgsl; + get_or_create_compute_pipeline("gen_grid", gen_grid_compute_wgsl, 16); + + WGPUTextureDescriptor tex_desc = {}; + tex_desc.usage = + WGPUTextureUsage_StorageBinding | WGPUTextureUsage_TextureBinding; + tex_desc.dimension = WGPUTextureDimension_2D; + tex_desc.size = {(uint32_t)params.width, (uint32_t)params.height, 1}; + tex_desc.format = WGPUTextureFormat_RGBA8Unorm; + tex_desc.mipLevelCount = 1; + tex_desc.sampleCount = 1; + WGPUTexture texture = wgpuDeviceCreateTexture(device_, &tex_desc); + + struct GridParams { + uint32_t width; + uint32_t height; + uint32_t grid_size; + uint32_t thickness; + }; + GridParams uniforms = { + (uint32_t)params.width, (uint32_t)params.height, + params.num_params > 0 ? (uint32_t)params.params[0] : 32u, + params.num_params > 1 ? (uint32_t)params.params[1] : 2u}; + dispatch_compute("gen_grid", texture, params, &uniforms, sizeof(GridParams)); + + WGPUTextureViewDescriptor view_desc = {}; + view_desc.format = WGPUTextureFormat_RGBA8Unorm; + view_desc.dimension = WGPUTextureViewDimension_2D; + view_desc.mipLevelCount = 1; + view_desc.arrayLayerCount = 1; + WGPUTextureView view = wgpuTextureCreateView(texture, &view_desc); + + GpuTexture gpu_tex; + gpu_tex.texture = texture; + gpu_tex.view = view; + gpu_tex.width = params.width; + gpu_tex.height = params.height; + textures_[name] = gpu_tex; + +#if !defined(STRIP_ALL) + printf("Generated GPU grid texture: %s (%dx%d)\n", name.c_str(), + params.width, params.height); +#endif +} + +#if !defined(STRIP_GPU_COMPOSITE) +WGPUSampler TextureManager::get_or_create_sampler(SamplerType type) { + auto it = samplers_.find(type); + if (it != samplers_.end()) { + return it->second; + } + + WGPUSamplerDescriptor desc = {}; + desc.lodMinClamp = 0.0f; + desc.lodMaxClamp = 1.0f; + desc.maxAnisotropy = 1; + + switch (type) { + case SamplerType::LinearClamp: + desc.addressModeU = WGPUAddressMode_ClampToEdge; + desc.addressModeV = WGPUAddressMode_ClampToEdge; + desc.magFilter = WGPUFilterMode_Linear; + desc.minFilter = WGPUFilterMode_Linear; + desc.mipmapFilter = WGPUMipmapFilterMode_Linear; + break; + case SamplerType::LinearRepeat: + desc.addressModeU = WGPUAddressMode_Repeat; + desc.addressModeV = WGPUAddressMode_Repeat; + desc.magFilter = WGPUFilterMode_Linear; + desc.minFilter = WGPUFilterMode_Linear; + desc.mipmapFilter = WGPUMipmapFilterMode_Linear; + break; + case SamplerType::NearestClamp: + desc.addressModeU = WGPUAddressMode_ClampToEdge; + desc.addressModeV = WGPUAddressMode_ClampToEdge; + desc.magFilter = WGPUFilterMode_Nearest; + desc.minFilter = WGPUFilterMode_Nearest; + desc.mipmapFilter = WGPUMipmapFilterMode_Nearest; + break; + case SamplerType::NearestRepeat: + desc.addressModeU = WGPUAddressMode_Repeat; + desc.addressModeV = WGPUAddressMode_Repeat; + desc.magFilter = WGPUFilterMode_Nearest; + desc.minFilter = WGPUFilterMode_Nearest; + desc.mipmapFilter = WGPUMipmapFilterMode_Nearest; + break; + } + + WGPUSampler sampler = wgpuDeviceCreateSampler(device_, &desc); + samplers_[type] = sampler; + return sampler; +} + +void TextureManager::dispatch_composite( + const std::string& func_name, WGPUTexture target, + const GpuProceduralParams& params, const void* uniform_data, + size_t uniform_size, const std::vector<WGPUTextureView>& input_views, + SamplerType sampler_type) { + auto it = compute_pipelines_.find(func_name); + if (it == compute_pipelines_.end()) { + return; // Pipeline not created yet + } + + WGPUComputePipeline pipeline = it->second.pipeline; + int num_inputs = (int)input_views.size(); + + // Create uniform buffer + WGPUBufferDescriptor buf_desc = {}; + buf_desc.size = uniform_size; + buf_desc.usage = WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst; + buf_desc.mappedAtCreation = WGPUOptionalBool_True; + WGPUBuffer uniform_buf = wgpuDeviceCreateBuffer(device_, &buf_desc); + void* mapped = wgpuBufferGetMappedRange(uniform_buf, 0, uniform_size); + memcpy(mapped, uniform_data, uniform_size); + wgpuBufferUnmap(uniform_buf); + + // Create storage texture view + WGPUTextureViewDescriptor view_desc = {}; + view_desc.format = WGPUTextureFormat_RGBA8Unorm; + view_desc.dimension = WGPUTextureViewDimension_2D; + view_desc.mipLevelCount = 1; + view_desc.arrayLayerCount = 1; + WGPUTextureView target_view = wgpuTextureCreateView(target, &view_desc); + + // Dynamic bind group + const int max_entries = 2 + num_inputs + (num_inputs > 0 ? 1 : 0); + std::vector<WGPUBindGroupEntry> bg_entries(max_entries); + + // Binding 0: Output texture + bg_entries[0].binding = 0; + bg_entries[0].textureView = target_view; + + // Binding 1: Uniform buffer + bg_entries[1].binding = 1; + bg_entries[1].buffer = uniform_buf; + bg_entries[1].size = uniform_size; + + // Binding 2+: Input textures + for (int i = 0; i < num_inputs; ++i) { + bg_entries[2 + i].binding = 2 + i; + bg_entries[2 + i].textureView = input_views[i]; + } + + // Binding N: Sampler + if (num_inputs > 0) { + bg_entries[2 + num_inputs].binding = 2 + num_inputs; + bg_entries[2 + num_inputs].sampler = get_or_create_sampler(sampler_type); + } + + // Create bind group layout (must match pipeline) + const int layout_entries_count = 2 + num_inputs + (num_inputs > 0 ? 1 : 0); + std::vector<WGPUBindGroupLayoutEntry> bgl_entries(layout_entries_count); + + bgl_entries[0].binding = 0; + bgl_entries[0].visibility = WGPUShaderStage_Compute; + bgl_entries[0].storageTexture.access = WGPUStorageTextureAccess_WriteOnly; + bgl_entries[0].storageTexture.format = WGPUTextureFormat_RGBA8Unorm; + bgl_entries[0].storageTexture.viewDimension = WGPUTextureViewDimension_2D; + + bgl_entries[1].binding = 1; + bgl_entries[1].visibility = WGPUShaderStage_Compute; + bgl_entries[1].buffer.type = WGPUBufferBindingType_Uniform; + bgl_entries[1].buffer.minBindingSize = uniform_size; + + for (int i = 0; i < num_inputs; ++i) { + bgl_entries[2 + i].binding = 2 + i; + bgl_entries[2 + i].visibility = WGPUShaderStage_Compute; + bgl_entries[2 + i].texture.sampleType = WGPUTextureSampleType_Float; + bgl_entries[2 + i].texture.viewDimension = WGPUTextureViewDimension_2D; + } + + if (num_inputs > 0) { + bgl_entries[2 + num_inputs].binding = 2 + num_inputs; + bgl_entries[2 + num_inputs].visibility = WGPUShaderStage_Compute; + bgl_entries[2 + num_inputs].sampler.type = WGPUSamplerBindingType_Filtering; + } + + WGPUBindGroupLayoutDescriptor bgl_desc = {}; + bgl_desc.entryCount = layout_entries_count; + bgl_desc.entries = bgl_entries.data(); + WGPUBindGroupLayout bind_group_layout = + wgpuDeviceCreateBindGroupLayout(device_, &bgl_desc); + + WGPUBindGroupDescriptor bg_desc = {}; + bg_desc.layout = bind_group_layout; + bg_desc.entryCount = max_entries; + bg_desc.entries = bg_entries.data(); + WGPUBindGroup bind_group = wgpuDeviceCreateBindGroup(device_, &bg_desc); + + // Dispatch compute + WGPUCommandEncoderDescriptor enc_desc = {}; + WGPUCommandEncoder encoder = + wgpuDeviceCreateCommandEncoder(device_, &enc_desc); + WGPUComputePassEncoder pass = + wgpuCommandEncoderBeginComputePass(encoder, nullptr); + wgpuComputePassEncoderSetPipeline(pass, pipeline); + wgpuComputePassEncoderSetBindGroup(pass, 0, bind_group, 0, nullptr); + wgpuComputePassEncoderDispatchWorkgroups(pass, (params.width + 7) / 8, + (params.height + 7) / 8, 1); + wgpuComputePassEncoderEnd(pass); + + WGPUCommandBufferDescriptor cmd_desc = {}; + WGPUCommandBuffer cmd = wgpuCommandEncoderFinish(encoder, &cmd_desc); + wgpuQueueSubmit(queue_, 1, &cmd); + + // Cleanup + wgpuCommandBufferRelease(cmd); + wgpuCommandEncoderRelease(encoder); + wgpuComputePassEncoderRelease(pass); + wgpuBindGroupRelease(bind_group); + wgpuBindGroupLayoutRelease(bind_group_layout); + wgpuBufferRelease(uniform_buf); + wgpuTextureViewRelease(target_view); +} + +void TextureManager::create_gpu_composite_texture( + const std::string& name, const std::string& shader_func, + const char* shader_code, const void* uniform_data, size_t uniform_size, + int width, int height, const std::vector<std::string>& input_names, + SamplerType sampler) { + // Create pipeline if needed + get_or_create_compute_pipeline(shader_func, shader_code, uniform_size, + (int)input_names.size()); + + // Resolve input texture views + std::vector<WGPUTextureView> input_views; + input_views.reserve(input_names.size()); + for (const auto& input_name : input_names) { + WGPUTextureView view = get_texture_view(input_name); + if (!view) { + fprintf(stderr, "Error: Input texture not found: %s\n", + input_name.c_str()); + return; + } + input_views.push_back(view); + } + + // Create output texture + WGPUTextureDescriptor tex_desc = {}; + tex_desc.usage = + WGPUTextureUsage_StorageBinding | WGPUTextureUsage_TextureBinding; + tex_desc.dimension = WGPUTextureDimension_2D; + tex_desc.size = {(uint32_t)width, (uint32_t)height, 1}; + tex_desc.format = WGPUTextureFormat_RGBA8Unorm; + tex_desc.mipLevelCount = 1; + tex_desc.sampleCount = 1; + WGPUTexture texture = wgpuDeviceCreateTexture(device_, &tex_desc); + + // Dispatch composite shader + GpuProceduralParams params = {width, height, nullptr, 0}; + dispatch_composite(shader_func, texture, params, uniform_data, uniform_size, + input_views, sampler); + + // Create view + WGPUTextureViewDescriptor view_desc = {}; + view_desc.format = WGPUTextureFormat_RGBA8Unorm; + view_desc.dimension = WGPUTextureViewDimension_2D; + view_desc.mipLevelCount = 1; + view_desc.arrayLayerCount = 1; + WGPUTextureView view = wgpuTextureCreateView(texture, &view_desc); + + // Store + GpuTexture gpu_tex; + gpu_tex.texture = texture; + gpu_tex.view = view; + gpu_tex.width = width; + gpu_tex.height = height; + textures_[name] = gpu_tex; + +#if !defined(STRIP_ALL) + printf("Generated GPU composite texture: %s (%dx%d, %zu inputs)\n", + name.c_str(), width, height, input_names.size()); +#endif +} +#endif // !defined(STRIP_GPU_COMPOSITE) + +#if !defined(STRIP_ALL) +WGPUTextureView TextureManager::get_or_generate_gpu_texture( + const std::string& name, const GpuProceduralParams& params) { + auto it = textures_.find(name); + if (it != textures_.end()) { + return it->second.view; + } + create_gpu_noise_texture(name, params); + return textures_[name].view; +} +#endif diff --git a/src/gpu/texture_manager.h b/src/gpu/texture_manager.h index 23fdbe8..5a2b9f8 100644 --- a/src/gpu/texture_manager.h +++ b/src/gpu/texture_manager.h @@ -23,6 +23,13 @@ struct GpuTexture { int height; }; +struct GpuProceduralParams { + int width; + int height; + const float* params; + int num_params; +}; + class TextureManager { public: void init(WGPUDevice device, WGPUQueue queue); @@ -36,11 +43,72 @@ class TextureManager { void create_texture(const std::string& name, int width, int height, const uint8_t* data); + // GPU procedural generation + void create_gpu_noise_texture(const std::string& name, + const GpuProceduralParams& params); + void create_gpu_perlin_texture(const std::string& name, + const GpuProceduralParams& params); + void create_gpu_grid_texture(const std::string& name, + const GpuProceduralParams& params); + +#if !defined(STRIP_GPU_COMPOSITE) + enum class SamplerType { + LinearClamp, + LinearRepeat, + NearestClamp, + NearestRepeat + }; + + // GPU composite generation (multi-input textures) + void create_gpu_composite_texture(const std::string& name, + const std::string& shader_func, + const char* shader_code, + const void* uniform_data, + size_t uniform_size, + int width, int height, + const std::vector<std::string>& input_names, + SamplerType sampler = SamplerType::LinearClamp); +#endif + +#if !defined(STRIP_ALL) + // On-demand lazy generation (stripped in final builds) + WGPUTextureView get_or_generate_gpu_texture(const std::string& name, + const GpuProceduralParams& params); +#endif + // Retrieves a texture view by name (returns nullptr if not found) WGPUTextureView get_texture_view(const std::string& name); private: + struct ComputePipelineInfo { + WGPUComputePipeline pipeline; + const char* shader_code; + size_t uniform_size; + int num_input_textures; + }; + + WGPUComputePipeline get_or_create_compute_pipeline(const std::string& func_name, + const char* shader_code, + size_t uniform_size, + int num_input_textures = 0); + void dispatch_compute(const std::string& func_name, WGPUTexture target, + const GpuProceduralParams& params, const void* uniform_data, + size_t uniform_size); + +#if !defined(STRIP_GPU_COMPOSITE) + void dispatch_composite(const std::string& func_name, WGPUTexture target, + const GpuProceduralParams& params, + const void* uniform_data, size_t uniform_size, + const std::vector<WGPUTextureView>& input_views, + SamplerType sampler_type); +#endif + WGPUDevice device_; WGPUQueue queue_; std::map<std::string, GpuTexture> textures_; + std::map<std::string, ComputePipelineInfo> compute_pipelines_; +#if !defined(STRIP_GPU_COMPOSITE) + WGPUSampler get_or_create_sampler(SamplerType type); + std::map<SamplerType, WGPUSampler> samplers_; +#endif }; diff --git a/src/gpu/uniform_helper.h b/src/gpu/uniform_helper.h index 151153f..8556c98 100644 --- a/src/gpu/uniform_helper.h +++ b/src/gpu/uniform_helper.h @@ -5,7 +5,6 @@ #pragma once #include "gpu/gpu.h" -#include <cstring> // Generic uniform buffer helper // Usage: diff --git a/src/main.cc b/src/main.cc index 4c44a78..59001fb 100644 --- a/src/main.cc +++ b/src/main.cc @@ -160,13 +160,9 @@ int main(int argc, char** argv) { } #endif /* !defined(STRIP_ALL) */ - // PRE-FILL: Fill ring buffer with initial 200ms before starting audio device - // This prevents underrun on first callback - g_audio_engine.update(g_music_time, 1.0f / 60.0f); - audio_render_ahead(g_music_time, - 1.0f / 60.0f); // Fill buffer with lookahead + // Pre-fill using same pattern as main loop (100ms) + fill_audio_buffer(0.1f, 0.0); - // Start audio (or render to WAV file) audio_start(); g_last_audio_time = audio_get_playback_time(); // Initialize after start diff --git a/src/test_demo.cc b/src/test_demo.cc index a438bbc..b8e9381 100644 --- a/src/test_demo.cc +++ b/src/test_demo.cc @@ -32,15 +32,23 @@ class PeakMeterEffect : public PostProcessEffect { }; struct Uniforms { - peak_value: f32, + resolution: vec2<f32>, _pad0: f32, _pad1: f32, - _pad2: f32, + aspect_ratio: f32, + time: f32, + beat: f32, + audio_intensity: f32, + }; + + struct EffectParams { + unused: f32, }; @group(0) @binding(0) var inputSampler: sampler; @group(0) @binding(1) var inputTexture: texture_2d<f32>; @group(0) @binding(2) var<uniform> uniforms: Uniforms; + @group(0) @binding(3) var<uniform> params: EffectParams; @vertex fn vs_main(@builtin(vertex_index) vertexIndex: u32) -> VertexOutput { @@ -69,7 +77,7 @@ class PeakMeterEffect : public PostProcessEffect { // Optimization: Return bar color early (avoids texture sampling for ~5% of pixels) if (in_bar_y && in_bar_x) { let uv_x = (input.uv.x - bar_x_min) / (bar_x_max - bar_x_min); - let factor = step(uv_x, uniforms.peak_value); + let factor = step(uv_x, uniforms.audio_intensity); return mix(vec4<f32>(0.0, 0.0, 0.0, 1.0), vec4<f32>(1.0, 0.0, 0.0,1.0), factor); } @@ -80,24 +88,26 @@ class PeakMeterEffect : public PostProcessEffect { pipeline_ = create_post_process_pipeline(ctx_.device, ctx_.format, shader_code); - uniforms_ = gpu_create_buffer( - ctx_.device, 16, WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst); } void update_bind_group(WGPUTextureView input_view) { pp_update_bind_group(ctx_.device, pipeline_, &bind_group_, input_view, - uniforms_, {}); + uniforms_.get(), {}); } void render(WGPURenderPassEncoder pass, float time, float beat, float peak_value, float aspect_ratio) { (void)time; (void)beat; - (void)aspect_ratio; - float uniforms[4] = {peak_value, 0.0f, 0.0f, 0.0f}; - wgpuQueueWriteBuffer(ctx_.queue, uniforms_.buffer, 0, uniforms, - sizeof(uniforms)); + CommonPostProcessUniforms u = { + .resolution = {(float)width_, (float)height_}, + .aspect_ratio = aspect_ratio, + .time = time, + .beat = beat, + .audio_intensity = peak_value, + }; + uniforms_.update(ctx_.queue, u); wgpuRenderPassEncoderSetPipeline(pass, pipeline_); wgpuRenderPassEncoderSetBindGroup(pass, 0, bind_group_, 0, nullptr); @@ -209,6 +219,9 @@ int main(int argc, char** argv) { platform_state = platform_init(fullscreen_enabled, width, height); gpu_init(&platform_state); + // Load timeline from test_demo.seq + LoadTimeline(*gpu_get_main_sequence(), *gpu_get_context()); + // Add peak meter visualization effect (renders as final post-process) #if !defined(STRIP_ALL) const GpuContext* gpu_ctx = gpu_get_context(); @@ -253,9 +266,9 @@ int main(int argc, char** argv) { audio_render_ahead(g_music_time, audio_dt * g_tempo_scale); }; - // Pre-fill audio buffer - g_audio_engine.update(g_music_time, 1.0f / 60.0f); - audio_render_ahead(g_music_time, 1.0f / 60.0f); + // Pre-fill using same pattern as main loop (100ms) + fill_audio_buffer(0.1f, 0.0); + audio_start(); g_last_audio_time = audio_get_playback_time(); diff --git a/src/tests/test_3d_render.cc b/src/tests/test_3d_render.cc index fa13a43..eee46ba 100644 --- a/src/tests/test_3d_render.cc +++ b/src/tests/test_3d_render.cc @@ -220,25 +220,36 @@ int main(int argc, char** argv) { g_renderer.resize(platform_state.width, platform_state.height); g_textures.init(g_device, g_queue); - ProceduralTextureDef noise_def; - noise_def.width = 256; - noise_def.height = 256; - noise_def.gen_func = gen_periodic_noise; - noise_def.params.push_back(1234.0f); - noise_def.params.push_back(16.0f); - g_textures.create_procedural_texture("noise", noise_def); + // GPU Noise texture (replaces CPU procedural) + GpuProceduralParams noise_params = {}; + noise_params.width = 256; + noise_params.height = 256; + float noise_vals[2] = {1234.0f, 16.0f}; + noise_params.params = noise_vals; + noise_params.num_params = 2; + g_textures.create_gpu_noise_texture("noise", noise_params); g_renderer.set_noise_texture(g_textures.get_texture_view("noise")); - ProceduralTextureDef sky_def; - sky_def.width = 512; - sky_def.height = 256; - sky_def.gen_func = procedural::gen_perlin; - sky_def.params = {42.0f, 4.0f, 1.0f, 0.5f, 6.0f}; - g_textures.create_procedural_texture("sky", sky_def); - + // GPU Perlin texture for sky (replaces CPU procedural) + GpuProceduralParams sky_params = {}; + sky_params.width = 512; + sky_params.height = 256; + float sky_vals[5] = {42.0f, 4.0f, 1.0f, 0.5f, 6.0f}; + sky_params.params = sky_vals; + sky_params.num_params = 5; + g_textures.create_gpu_perlin_texture("sky", sky_params); g_renderer.set_sky_texture(g_textures.get_texture_view("sky")); + // GPU Grid texture (new!) + GpuProceduralParams grid_params = {}; + grid_params.width = 256; + grid_params.height = 256; + float grid_vals[2] = {32.0f, 2.0f}; // grid_size, thickness + grid_params.params = grid_vals; + grid_params.num_params = 2; + g_textures.create_gpu_grid_texture("grid", grid_params); + setup_scene(); g_camera.position = vec3(0, 5, 10); diff --git a/src/tests/test_demo_effects.cc b/src/tests/test_demo_effects.cc index d0163c2..0d2b09a 100644 --- a/src/tests/test_demo_effects.cc +++ b/src/tests/test_demo_effects.cc @@ -197,6 +197,9 @@ static void test_effect_type_classification() { int main() { fprintf(stdout, "=== Demo Effects Tests ===\n"); + extern void InitShaderComposer(); + InitShaderComposer(); + test_post_process_effects(); test_scene_effects(); test_effect_type_classification(); diff --git a/src/tests/test_effect_base.cc b/src/tests/test_effect_base.cc index e280e05..612e9da 100644 --- a/src/tests/test_effect_base.cc +++ b/src/tests/test_effect_base.cc @@ -249,6 +249,9 @@ static void test_pixel_helpers() { int main() { fprintf(stdout, "=== Effect Base Tests ===\n"); + extern void InitShaderComposer(); + InitShaderComposer(); + test_webgpu_fixture(); test_offscreen_render_target(); test_effect_construction(); diff --git a/src/tests/test_gpu_composite.cc b/src/tests/test_gpu_composite.cc new file mode 100644 index 0000000..e5ac788 --- /dev/null +++ b/src/tests/test_gpu_composite.cc @@ -0,0 +1,124 @@ +// This file is part of the 64k demo project. +// Tests GPU composite texture generation (Phase 4). + +#include "gpu/gpu.h" +#include "gpu/texture_manager.h" +#include "platform/platform.h" +#include <cstdint> +#include <cstdio> +#include <vector> + +#if !defined(STRIP_GPU_COMPOSITE) + +int main() { + printf("GPU Composite Test: Starting...\n"); + + // Initialize GPU + PlatformState platform = platform_init(false, 256, 256); + if (!platform.window) { + fprintf(stderr, "Error: Failed to create window\n"); + return 1; + } + + gpu_init(&platform); + const GpuContext* ctx = gpu_get_context(); + + extern void InitShaderComposer(); + InitShaderComposer(); + + TextureManager tex_mgr; + tex_mgr.init(ctx->device, ctx->queue); + + // Create base textures + float noise_params_a[2] = {1234.0f, 4.0f}; + GpuProceduralParams noise_a = {256, 256, noise_params_a, 2}; + tex_mgr.create_gpu_noise_texture("noise_a", noise_a); + + float noise_params_b[2] = {5678.0f, 8.0f}; + GpuProceduralParams noise_b = {256, 256, noise_params_b, 2}; + tex_mgr.create_gpu_noise_texture("noise_b", noise_b); + + float grid_params[2] = {32.0f, 2.0f}; + GpuProceduralParams grid = {256, 256, grid_params, 2}; + tex_mgr.create_gpu_grid_texture("grid", grid); + + printf("SUCCESS: Base textures created (noise_a, noise_b, grid)\n"); + + // Test blend composite + extern const char* gen_blend_compute_wgsl; + struct { + uint32_t width, height; + float blend_factor, _pad0; + } blend_uni = {256, 256, 0.5f, 0.0f}; + + std::vector<std::string> blend_inputs = {"noise_a", "noise_b"}; + tex_mgr.create_gpu_composite_texture("blended", "gen_blend", + gen_blend_compute_wgsl, &blend_uni, + sizeof(blend_uni), 256, 256, blend_inputs); + + WGPUTextureView blended_view = tex_mgr.get_texture_view("blended"); + if (!blended_view) { + fprintf(stderr, "Error: Blended texture not created\n"); + tex_mgr.shutdown(); + gpu_shutdown(); + return 1; + } + printf("SUCCESS: Blend composite created (noise_a + noise_b)\n"); + + // Test mask composite + extern const char* gen_mask_compute_wgsl; + struct { + uint32_t width, height; + } mask_uni = {256, 256}; + + std::vector<std::string> mask_inputs = {"noise_a", "grid"}; + tex_mgr.create_gpu_composite_texture("masked", "gen_mask", gen_mask_compute_wgsl, + &mask_uni, sizeof(mask_uni), 256, 256, + mask_inputs); + + WGPUTextureView masked_view = tex_mgr.get_texture_view("masked"); + if (!masked_view) { + fprintf(stderr, "Error: Masked texture not created\n"); + tex_mgr.shutdown(); + gpu_shutdown(); + return 1; + } + printf("SUCCESS: Mask composite created (noise_a * grid)\n"); + + // Test multi-stage composite (composite of composite) + struct { + uint32_t width, height; + float blend_factor, _pad0; + } blend2_uni = {256, 256, 0.7f, 0.0f}; + + std::vector<std::string> blend2_inputs = {"blended", "masked"}; + tex_mgr.create_gpu_composite_texture("final", "gen_blend", + gen_blend_compute_wgsl, &blend2_uni, + sizeof(blend2_uni), 256, 256, blend2_inputs); + + WGPUTextureView final_view = tex_mgr.get_texture_view("final"); + if (!final_view) { + fprintf(stderr, "Error: Multi-stage composite not created\n"); + tex_mgr.shutdown(); + gpu_shutdown(); + return 1; + } + printf("SUCCESS: Multi-stage composite (composite of composites)\n"); + + // Cleanup + tex_mgr.shutdown(); + gpu_shutdown(); + platform_shutdown(&platform); + + printf("All GPU composite tests passed!\n"); + return 0; +} + +#else + +int main() { + printf("GPU Composite Test: SKIPPED (STRIP_GPU_COMPOSITE defined)\n"); + return 0; +} + +#endif diff --git a/src/tests/test_gpu_procedural.cc b/src/tests/test_gpu_procedural.cc new file mode 100644 index 0000000..f1bade0 --- /dev/null +++ b/src/tests/test_gpu_procedural.cc @@ -0,0 +1,117 @@ +// This file is part of the 64k demo project. +// Tests GPU procedural texture generation. + +#include "gpu/gpu.h" +#include "gpu/texture_manager.h" +#include "platform/platform.h" +#include <cstdio> + +int main() { + printf("GPU Procedural Test: Starting...\n"); + + // Minimal GPU initialization for testing + PlatformState platform = platform_init(false, 256, 256); + if (!platform.window) { + fprintf(stderr, "Error: Failed to create window\n"); + return 1; + } + + gpu_init(&platform); + const GpuContext* ctx = gpu_get_context(); + + // Initialize shader composer (needed for #include resolution) + extern void InitShaderComposer(); + InitShaderComposer(); + + // Create TextureManager + TextureManager tex_mgr; + tex_mgr.init(ctx->device, ctx->queue); + + // Test GPU noise generation + GpuProceduralParams params = {}; + params.width = 256; + params.height = 256; + float proc_params[2] = {0.0f, 4.0f}; // seed, frequency + params.params = proc_params; + params.num_params = 2; + + tex_mgr.create_gpu_noise_texture("test_noise", params); + + // Verify texture exists + WGPUTextureView view = tex_mgr.get_texture_view("test_noise"); + if (!view) { + fprintf(stderr, "Error: GPU noise texture not created\n"); + tex_mgr.shutdown(); + gpu_shutdown(); + return 1; + } + printf("SUCCESS: GPU noise texture created (256x256)\n"); + + // Test pipeline caching (create second noise texture) + tex_mgr.create_gpu_noise_texture("test_noise_2", params); + WGPUTextureView view2 = tex_mgr.get_texture_view("test_noise_2"); + if (!view2) { + fprintf(stderr, "Error: Second GPU noise texture not created\n"); + tex_mgr.shutdown(); + gpu_shutdown(); + return 1; + } + printf("SUCCESS: Pipeline caching works (second noise texture)\n"); + + // Test GPU perlin generation + float perlin_params[5] = {42.0f, 4.0f, 1.0f, 0.5f, 6.0f}; + GpuProceduralParams perlin = {512, 256, perlin_params, 5}; + tex_mgr.create_gpu_perlin_texture("test_perlin", perlin); + WGPUTextureView perlin_view = tex_mgr.get_texture_view("test_perlin"); + if (!perlin_view) { + fprintf(stderr, "Error: GPU perlin texture not created\n"); + tex_mgr.shutdown(); + gpu_shutdown(); + return 1; + } + printf("SUCCESS: GPU perlin texture created (512x256)\n"); + + // Test GPU grid generation + float grid_params[2] = {32.0f, 2.0f}; + GpuProceduralParams grid = {256, 256, grid_params, 2}; + tex_mgr.create_gpu_grid_texture("test_grid", grid); + WGPUTextureView grid_view = tex_mgr.get_texture_view("test_grid"); + if (!grid_view) { + fprintf(stderr, "Error: GPU grid texture not created\n"); + tex_mgr.shutdown(); + gpu_shutdown(); + return 1; + } + printf("SUCCESS: GPU grid texture created (256x256)\n"); + + // Test multiple pipelines coexist + printf("SUCCESS: All three GPU generators work (unified pipeline system)\n"); + + // Test variable-size textures + float noise_small[2] = {999.0f, 8.0f}; + GpuProceduralParams small = {128, 64, noise_small, 2}; + tex_mgr.create_gpu_noise_texture("noise_128x64", small); + if (!tex_mgr.get_texture_view("noise_128x64")) { + fprintf(stderr, "Error: Variable-size texture (128x64) not created\n"); + tex_mgr.shutdown(); + gpu_shutdown(); + return 1; + } + + float noise_large[2] = {777.0f, 2.0f}; + GpuProceduralParams large = {1024, 512, noise_large, 2}; + tex_mgr.create_gpu_noise_texture("noise_1024x512", large); + if (!tex_mgr.get_texture_view("noise_1024x512")) { + fprintf(stderr, "Error: Variable-size texture (1024x512) not created\n"); + tex_mgr.shutdown(); + gpu_shutdown(); + return 1; + } + printf("SUCCESS: Variable-size textures work (128x64, 1024x512)\n"); + + // Cleanup + tex_mgr.shutdown(); + gpu_shutdown(); + platform_shutdown(&platform); + return 0; +} diff --git a/src/tests/test_post_process_helper.cc b/src/tests/test_post_process_helper.cc index 104bbc3..36d193e 100644 --- a/src/tests/test_post_process_helper.cc +++ b/src/tests/test_post_process_helper.cc @@ -182,14 +182,14 @@ static void test_bind_group_update() { // Create initial bind group WGPUBindGroup bind_group = nullptr; - pp_update_bind_group(fixture.device(), pipeline, &bind_group, view1, - uniforms, dummy_effect_params_buffer); + pp_update_bind_group(fixture.device(), pipeline, &bind_group, view1, uniforms, + dummy_effect_params_buffer); assert(bind_group != nullptr && "Initial bind group should be created"); fprintf(stdout, " ✓ Initial bind group created\n"); // Update bind group (should release old and create new) - pp_update_bind_group(fixture.device(), pipeline, &bind_group, view2, - uniforms, dummy_effect_params_buffer); + pp_update_bind_group(fixture.device(), pipeline, &bind_group, view2, uniforms, + dummy_effect_params_buffer); assert(bind_group != nullptr && "Updated bind group should be created"); fprintf(stdout, " ✓ Bind group updated successfully\n"); diff --git a/src/tests/test_shader_compilation.cc b/src/tests/test_shader_compilation.cc index e2c0adc..a322e8a 100644 --- a/src/tests/test_shader_compilation.cc +++ b/src/tests/test_shader_compilation.cc @@ -115,16 +115,19 @@ static bool test_shader_compilation(const char* name, const char* shader_code) { return true; // Not a failure, just skipped } + // Compose shader to resolve #include directives + std::string composed_shader = ShaderComposer::Get().Compose({}, shader_code); + #if defined(DEMO_CROSS_COMPILE_WIN32) WGPUShaderModuleWGSLDescriptor wgsl_desc = {}; wgsl_desc.chain.sType = WGPUSType_ShaderModuleWGSLDescriptor; - wgsl_desc.code = shader_code; + wgsl_desc.code = composed_shader.c_str(); WGPUShaderModuleDescriptor shader_desc = {}; shader_desc.nextInChain = (const WGPUChainedStruct*)&wgsl_desc.chain; #else WGPUShaderSourceWGSL wgsl_desc = {}; wgsl_desc.chain.sType = WGPUSType_ShaderSourceWGSL; - wgsl_desc.code = str_view(shader_code); + wgsl_desc.code = str_view(composed_shader.c_str()); WGPUShaderModuleDescriptor shader_desc = {}; shader_desc.nextInChain = (const WGPUChainedStruct*)&wgsl_desc.chain; #endif diff --git a/src/util/asset_manager.h b/src/util/asset_manager.h index 1e0638c..168bfca 100644 --- a/src/util/asset_manager.h +++ b/src/util/asset_manager.h @@ -10,6 +10,7 @@ struct AssetRecord { size_t size; // Size of the asset data bool is_procedural; // True if data was dynamically allocated by a procedural // generator + bool is_gpu_procedural; // True if GPU compute shader generates texture const char* proc_func_name_str; // Name of procedural generation function // (string literal) const float* proc_params; // Parameters for procedural generation (static, diff --git a/tools/asset_packer.cc b/tools/asset_packer.cc index 0d26cf6..4aaa0e7 100644 --- a/tools/asset_packer.cc +++ b/tools/asset_packer.cc @@ -52,6 +52,7 @@ struct AssetBuildInfo { std::string name; std::string filename; // Original filename for static assets bool is_procedural; + bool is_gpu_procedural; std::string proc_func_name; // Function name string std::vector<float> proc_params; // Parameters for procedural function @@ -182,9 +183,64 @@ int main(int argc, char* argv[]) { info.params_array_name = "ASSET_PROC_PARAMS_" + info.name; info.func_name_str_name = "ASSET_PROC_FUNC_STR_" + info.name; info.is_procedural = false; + info.is_gpu_procedural = false; - if (compression_type_str.rfind("PROC(", 0) == 0) { + if (compression_type_str.rfind("PROC_GPU(", 0) == 0) { info.is_procedural = true; + info.is_gpu_procedural = true; + size_t open_paren = compression_type_str.find('('); + size_t close_paren = compression_type_str.rfind(')'); + if (open_paren == std::string::npos || + close_paren == std::string::npos) { + fprintf(stderr, + "Error: Invalid PROC_GPU() syntax for asset: %s, string: [%s]\n", + info.name.c_str(), compression_type_str.c_str()); + return 1; + } + std::string func_and_params_str = compression_type_str.substr( + open_paren + 1, close_paren - open_paren - 1); + + size_t params_start = func_and_params_str.find(','); + if (params_start != std::string::npos) { + std::string params_str = func_and_params_str.substr(params_start + 1); + info.proc_func_name = func_and_params_str.substr(0, params_start); + + size_t current_pos = 0; + while (current_pos < params_str.length()) { + size_t comma_pos = params_str.find(',', current_pos); + std::string param_val_str = + (comma_pos == std::string::npos) + ? params_str.substr(current_pos) + : params_str.substr(current_pos, comma_pos - current_pos); + param_val_str.erase(0, param_val_str.find_first_not_of(" \t\r\n")); + param_val_str.erase(param_val_str.find_last_not_of(" \t\r\n") + 1); + try { + info.proc_params.push_back(std::stof(param_val_str)); + } catch (...) { + fprintf(stderr, "Error: Invalid proc param for %s: %s\n", + info.name.c_str(), param_val_str.c_str()); + return 1; + } + if (comma_pos == std::string::npos) + break; + current_pos = comma_pos + 1; + } + } else { + info.proc_func_name = func_and_params_str; + } + + // Validate GPU procedural function name + if (info.proc_func_name != "gen_noise" && + info.proc_func_name != "gen_perlin" && + info.proc_func_name != "gen_grid") { + fprintf(stderr, + "Error: PROC_GPU only supports gen_noise, gen_perlin, gen_grid, got: %s for asset: %s\n", + info.proc_func_name.c_str(), info.name.c_str()); + return 1; + } + } else if (compression_type_str.rfind("PROC(", 0) == 0) { + info.is_procedural = true; + info.is_gpu_procedural = false; size_t open_paren = compression_type_str.find('('); size_t close_paren = compression_type_str.rfind(')'); if (open_paren == std::string::npos || @@ -500,12 +556,13 @@ int main(int argc, char* argv[]) { for (const auto& info : asset_build_infos) { fprintf(assets_data_cc_file, " { "); if (info.is_procedural) { - fprintf(assets_data_cc_file, "nullptr, 0, true, %s, %s, %zu", + fprintf(assets_data_cc_file, "nullptr, 0, true, %s, %s, %s, %zu", + info.is_gpu_procedural ? "true" : "false", info.func_name_str_name.c_str(), info.params_array_name.c_str(), info.proc_params.size()); } else { fprintf(assets_data_cc_file, - "%s, ASSET_SIZE_%s, false, nullptr, nullptr, 0", + "%s, ASSET_SIZE_%s, false, false, nullptr, nullptr, 0", info.data_array_name.c_str(), info.name.c_str()); } fprintf(assets_data_cc_file, " },\n"); diff --git a/tools/timeline_editor/index.html b/tools/timeline_editor/index.html index 074b711..db71beb 100644 --- a/tools/timeline_editor/index.html +++ b/tools/timeline_editor/index.html @@ -601,7 +601,11 @@ const modifier = effect.priorityModifier || '+'; output += ` EFFECT ${modifier} ${effect.className} ${effect.startTime.toFixed(2)} ${effect.endTime.toFixed(2)}`; if (effect.args) { - output += ` ${effect.args}`; + // Strip priority comments from args + const cleanArgs = effect.args.replace(/\s*#\s*Priority:\s*\d+/i, '').trim(); + if (cleanArgs) { + output += ` ${cleanArgs}`; + } } output += '\n'; } diff --git a/tools/validate_uniforms.py b/tools/validate_uniforms.py new file mode 100644 index 0000000..40d1b0f --- /dev/null +++ b/tools/validate_uniforms.py @@ -0,0 +1,178 @@ +#!/usr/bin/env python3 + +import sys +import re +import os + +# WGSL alignment rules (simplified for common types) +WGSL_ALIGNMENT = { + "f32": 4, + "vec2<f32>": 8, + "vec3<f32>": 16, + "vec4<f32>": 16, + # Add other types as needed (e.g., u32, i32, mat4x4<f32>) +} + +def get_wgsl_type_size_and_alignment(type_name): + type_name = type_name.strip() + if type_name in WGSL_ALIGNMENT: + return WGSL_ALIGNMENT[type_name], WGSL_ALIGNMENT[type_name] + # Handle arrays, e.g., array<f32, 5> + if type_name.startswith("array"): + match = re.search(r"array<([\w<>, ]+)>", type_name) + if match: + inner_type = match.group(1).split(",")[0].strip() + # For simplicity, assume scalar array doesn't change alignment of base type + return get_wgsl_type_size_and_alignment(inner_type) + # Handle structs recursively (simplified, assumes no nested structs for now) + return 0, 0 # Unknown or complex type + +def parse_wgsl_struct(wgsl_content): + structs = {} + # Regex to find struct definitions: struct StructName { ... } + struct_matches = re.finditer(r"struct\s+(\w+)\s*\{\s*(.*?)\s*\}", wgsl_content, re.DOTALL) + for struct_match in struct_matches: + struct_name = struct_match.group(1) + members_content = struct_match.group(2) + members = [] + # Regex to find members: member_name: member_type + # Adjusted regex to handle types with brackets and spaces, and comments. + # CHANGED: \s to [ \t] to avoid consuming newlines + member_matches = re.finditer(r"(\w+)\s*:\s*([\w<>,\[\] \t]+)(?:\s*//.*)?", members_content) + for member_match in member_matches: + member_name = member_match.group(1) + member_type = member_match.group(2).strip() + if member_type.endswith(','): + member_type = member_type[:-1].strip() + members.append((member_name, member_type)) + structs[struct_name] = members + # print(f"DEBUG: Parsed WGSL struct '{struct_name}' with members: {members}") + return structs + +def find_embedded_wgsl_in_cpp(cpp_content): + # Regex to find raw string literals R"(...)" which often contain WGSL + wgsl_blocks = [] + matches = re.finditer(r'R"\((.*?)\)"', cpp_content, re.DOTALL) + for match in matches: + wgsl_blocks.append(match.group(1)) + return wgsl_blocks + +def calculate_wgsl_struct_size(struct_name, struct_members): + total_size = 0 + max_alignment = 0 + members_info = [] + + for member_name, member_type in struct_members: + size, alignment = get_wgsl_type_size_and_alignment(member_type) + if size == 0: # If type is unknown or complex, we can't reliably calculate + # print(f"Warning: Unknown or complex WGSL type '{member_type}' for member '{member_name}'. Cannot reliably calculate size.", file=sys.stderr) + return 0, 0 + members_info.append((member_name, member_type, size, alignment)) + max_alignment = max(max_alignment, alignment) + + current_offset = 0 + for member_name, member_type, size, alignment in members_info: + # Align current offset to the alignment of the current member + current_offset = (current_offset + alignment - 1) & ~(alignment - 1) + current_offset += size + + # The total size of the struct is the final offset, padded to the max alignment + if max_alignment > 0: + total_size = (current_offset + max_alignment - 1) & ~(max_alignment - 1) + else: + total_size = current_offset + + return total_size, max_alignment + +def parse_cpp_static_asserts(cpp_content): + cpp_structs = {} + # Regex to find C++ struct definitions with static_asserts for sizeof + # This regex is simplified and might need adjustments for more complex C++ code + struct_matches = re.finditer(r"struct\s+(\w+)\s*\{\s*(.*?)\s*\}\s*;.*?static_assert\(sizeof\(\1\)\s*==\s*(\d+)\s*,.*?\);", cpp_content, re.DOTALL | re.MULTILINE) + for struct_match in struct_matches: + struct_name = struct_match.group(1) + members_content = struct_match.group(2) + expected_size = int(struct_match.group(3)) + members = [] + # Regex to find members: type member_name; + member_matches = re.finditer(r"(.*?)\s+(\w+)\s*(?:=\s*.*?|\s*\{.*?\})?;", members_content) + for member_match in member_matches: + member_type = member_match.group(1).strip() + member_name = member_match.group(2).strip() + members.append((member_name, member_type)) + cpp_structs[struct_name] = {"members": members, "expected_size": expected_size} + return cpp_structs + +def validate_uniforms(wgsl_files, cpp_files): + all_wgsl_structs = {} + + # Parse separate WGSL files + for file_path in wgsl_files: + try: + with open(file_path, 'r') as f: + wgsl_content = f.read() + structs = parse_wgsl_struct(wgsl_content) + all_wgsl_structs.update(structs) + except Exception as e: + print(f"Error parsing WGSL file {file_path}: {e}", file=sys.stderr) + continue + + # Parse C++ files for embedded WGSL and static_asserts + for cpp_file_path in cpp_files: + try: + with open(cpp_file_path, 'r') as f: + cpp_content = f.read() + + # Parse embedded WGSL + wgsl_blocks = find_embedded_wgsl_in_cpp(cpp_content) + for block in wgsl_blocks: + structs = parse_wgsl_struct(block) + all_wgsl_structs.update(structs) + + # Parse C++ structs and static_asserts + cpp_structs = parse_cpp_static_asserts(cpp_content) + for struct_name, data in cpp_structs.items(): + expected_size = data["expected_size"] + # Try to find the matching WGSL struct + if struct_name in all_wgsl_structs: + wgsl_members = all_wgsl_structs[struct_name] + calculated_wgsl_size, wgsl_max_alignment = calculate_wgsl_struct_size(struct_name, wgsl_members) + + if calculated_wgsl_size == 0: # If calculation failed + # print(f"Validation Warning for '{struct_name}': Could not calculate WGSL size.") + continue + + if calculated_wgsl_size != expected_size: + print(f"Validation Mismatch for '{struct_name}':\n WGSL Calculated Size: {calculated_wgsl_size}\n C++ Expected Size: {expected_size}\n Max WGSL Alignment: {wgsl_max_alignment}", file=sys.stderr) + sys.exit(1) + else: + print(f"Validation OK for '{struct_name}': Size {calculated_wgsl_size} matches C++ expected size.") + else: + print(f"Validation Warning for '{struct_name}': Matching WGSL struct not found.") + except Exception as e: + print(f"Error processing C++ file {cpp_file_path}: {e}", file=sys.stderr) + continue + +def main(): + if len(sys.argv) < 3: + print("Usage: validate_uniforms.py <wgsl_dir_or_file> <cpp_file1> [<cpp_file2> ...]", file=sys.stderr) + sys.exit(1) + + wgsl_input = sys.argv[1] + cpp_files = sys.argv[2:] + + wgsl_files = [] + if os.path.isfile(wgsl_input): + wgsl_files.append(wgsl_input) + elif os.path.isdir(wgsl_input): + for root, _, files in os.walk(wgsl_input): + for file in files: + if file.endswith(".wgsl"): + wgsl_files.append(os.path.join(root, file)) + + # We proceed even if wgsl_files is empty, because C++ files might contain embedded WGSL + + validate_uniforms(wgsl_files, cpp_files) + +if __name__ == "__main__": + main()
\ No newline at end of file |
