From 8c5e41724fdfc3be24e95f48ae4b2be616404074 Mon Sep 17 00:00:00 2001 From: skal Date: Thu, 26 Mar 2026 10:09:34 +0100 Subject: fix(audio): P1-P3 fixes from audio code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit P1 — correctness bugs: - tracker.cc: move delete[] loop before pool reset so guard condition is valid - audio_engine: replace tracker_reset() with tracker_init() in reset()/seek() so synth IDs are re-registered after synth_init() clears spectrogram slots - spectrogram_resource_manager: set spec.version in load_procedural() (was UB) P2 — minor bugs: - synth.cc: move pan clamp unconditionally before debug-only block - gen.cc: remove dead `freq` variable in generate_note_spectrogram() - tracker.cc: remove duplicate g_sample_synth_cache clear loop P3 — cleanup: - Replace hardcoded 32000.0f with RING_BUFFER_SAMPLE_RATE (5 sites) - audio.cc: extract clip_samples() helper, remove duplicated clip loops - audio_engine: inline update_silent(), remove no-op prewarm_for_time_range() - Remove stale comments: stdio.h include, NEW: labels, CACHING block, NOTE: - Move TODO(timing) drift notes from source to TODO.md handoff(Gemini): audio review implemented, 36/36 tests passing --- src/audio/audio.cc | 33 +++++++++--------------- src/audio/audio_engine.cc | 34 ++++--------------------- src/audio/audio_engine.h | 9 ------- src/audio/gen.cc | 6 ++--- src/audio/spectrogram_resource_manager.cc | 1 + src/audio/synth.cc | 13 +++++----- src/audio/synth.h | 10 -------- src/audio/tracker.cc | 42 +++++++++++-------------------- 8 files changed, 42 insertions(+), 106 deletions(-) (limited to 'src/audio') diff --git a/src/audio/audio.cc b/src/audio/audio.cc index 89380cd..3b98452 100644 --- a/src/audio/audio.cc +++ b/src/audio/audio.cc @@ -16,6 +16,15 @@ #include #include +static void clip_samples(float* buf, int count) { + for (int i = 0; i < count; ++i) { + if (buf[i] > 1.0f) + buf[i] = 1.0f; + if (buf[i] < -1.0f) + buf[i] = -1.0f; + } +} + // Global ring buffer for audio streaming static AudioRingBuffer g_ring_buffer; @@ -99,12 +108,6 @@ void audio_render_ahead(float music_time, float dt, float target_fill) { // Render in small chunks to keep synth time synchronized with tracker // Chunk size: one frame's worth of audio (~16.6ms @ 60fps) - // TODO(timing): CRITICAL BUG - Truncation here may cause 180ms drift over 63 - // beats (int) cast loses fractional samples: 0.333 samples/frame * 2560 - // frames = 853 samples = 27ms But observed drift is 180ms, so this is not the - // only source (27ms < 180ms) NOTE: This is NOT a float vs double precision - // issue - floats handle <500s times fine See also: tracker.cc BPM timing - // calculation const int chunk_frames = (int)(dt * RING_BUFFER_SAMPLE_RATE); const int chunk_samples = chunk_frames * RING_BUFFER_CHANNELS; @@ -176,13 +179,7 @@ void audio_render_ahead(float music_time, float dt, float target_fill) { // Render directly to ring buffer (NO COPY, NO ALLOCATION) synth_render(write_ptr, actual_frames); - // Apply clipping in-place (Phase 2: ensure samples stay in [-1.0, 1.0]) - for (int i = 0; i < actual_samples; ++i) { - if (write_ptr[i] > 1.0f) - write_ptr[i] = 1.0f; - if (write_ptr[i] < -1.0f) - write_ptr[i] = -1.0f; - } + clip_samples(write_ptr, actual_samples); // Commit written data atomically g_ring_buffer.commit_write(actual_samples); @@ -208,13 +205,7 @@ void audio_render_ahead(float music_time, float dt, float target_fill) { synth_render(second_ptr, second_frames); - // Apply clipping to wrap-around region - for (int i = 0; i < second_samples; ++i) { - if (second_ptr[i] > 1.0f) - second_ptr[i] = 1.0f; - if (second_ptr[i] < -1.0f) - second_ptr[i] = -1.0f; - } + clip_samples(second_ptr, second_samples); g_ring_buffer.commit_write(second_samples); @@ -279,7 +270,7 @@ AudioRingBuffer* audio_get_ring_buffer() { #if !defined(STRIP_ALL) void audio_render_silent(float duration_sec) { - const int sample_rate = 32000; + const int sample_rate = RING_BUFFER_SAMPLE_RATE; const int chunk_size = 512; int total_frames = (int)(duration_sec * sample_rate); float buffer[chunk_size * 2]; // Stereo diff --git a/src/audio/audio_engine.cc b/src/audio/audio_engine.cc index d11303c..8e47eac 100644 --- a/src/audio/audio_engine.cc +++ b/src/audio/audio_engine.cc @@ -3,7 +3,6 @@ #include "audio_engine.h" #include "util/debug.h" -#include #include void AudioEngine::init() { @@ -49,7 +48,7 @@ void AudioEngine::reset() { } synth_init(); // Re-init synth (clears all state) - tracker_reset(); + ::tracker_init(); // Re-register all spectrograms (synth slots now clean) resource_mgr_.reset(); // Clear sample-to-synth mapping @@ -171,23 +170,18 @@ void AudioEngine::seek(float target_time) { // 1. Reset synth state (clear all active voices) synth_init(); - // 2. Reset tracker state - tracker_reset(); + // 2. Re-init tracker: re-registers all spectrograms with now-clean synth slots + ::tracker_init(); // 3. Clear sample-to-synth mapping (will be re-registered on demand) for (int i = 0; i < MAX_SPECTROGRAM_RESOURCES; ++i) { sample_to_synth_id_[i] = -1; } - // 4. Pre-warm samples for target time range - const float prewarm_start = std::max(0.0f, target_time - 1.0f); - const float prewarm_end = target_time + 2.0f; - prewarm_for_time_range(prewarm_start, prewarm_end); - - // 5. Simulate tracker up to target time (without audio) + // 4. Simulate tracker up to target time (without audio) const float dt = 0.1f; for (float t = 0.0f; t < target_time; t += dt) { - update_silent(t); + tracker_update(t, 0.0f); } // 6. Final update at exact target time @@ -200,22 +194,4 @@ void AudioEngine::seek(float target_time) { #endif } -void AudioEngine::prewarm_for_time_range(float start_time, float end_time) { - // TODO: Scan tracker score for patterns in this time range - // and pre-load their samples. For now, this is a placeholder. - // The proper implementation requires access to g_tracker_score - // and pattern data to determine which samples will be needed. - -#if defined(DEBUG_LOG_AUDIO) - DEBUG_AUDIO("[AudioEngine] Pre-warming samples for t=%.2f-%.2f\n", start_time, - end_time); -#endif -} - -void AudioEngine::update_silent(float music_time) { - // Update tracker without triggering audio (for fast-forward/seeking) - // This is a placeholder - proper implementation requires tracker support - // for silent updates. For now, we just update normally. - tracker_update(music_time, 0.0f); -} #endif /* !defined(STRIP_ALL) */ diff --git a/src/audio/audio_engine.h b/src/audio/audio_engine.h index 699213d..cc4c80a 100644 --- a/src/audio/audio_engine.h +++ b/src/audio/audio_engine.h @@ -50,17 +50,8 @@ class AudioEngine { // Get or create synth ID for a sample int get_or_register_synth_id(int sample_id); -#if !defined(STRIP_ALL) - // Seeking support - void prewarm_for_time_range(float start_time, float end_time); - void update_silent(float music_time); // Update without triggering audio -#endif - SpectrogramResourceManager resource_mgr_; - // NOTE: For now, synth and tracker are global C functions (not members) - // Future refactoring will convert them to member objects - // Mapping: sample_id → synth_id (lazy registration) int sample_to_synth_id_[MAX_SPECTROGRAM_RESOURCES]; diff --git a/src/audio/gen.cc b/src/audio/gen.cc index cd36d54..9d18517 100644 --- a/src/audio/gen.cc +++ b/src/audio/gen.cc @@ -4,6 +4,7 @@ #include "audio/gen.h" #include "audio/dct.h" +#include "audio/ring_buffer.h" #include "audio/window.h" #include #include @@ -11,7 +12,7 @@ std::vector generate_note_spectrogram(const NoteParams& params, int* out_num_frames) { - int num_frames = (int)(params.duration_sec * 32000.0f / DCT_SIZE); + int num_frames = (int)(params.duration_sec * RING_BUFFER_SAMPLE_RATE / DCT_SIZE); if (num_frames < 1) num_frames = 1; *out_num_frames = num_frames; @@ -21,7 +22,7 @@ std::vector generate_note_spectrogram(const NoteParams& params, hamming_window_512(window); float phase = 0.0f; - float time_step = 1.0f / 32000.0f; + float time_step = 1.0f / RING_BUFFER_SAMPLE_RATE; for (int f = 0; f < num_frames; ++f) { float pcm_chunk[DCT_SIZE] = {0}; @@ -49,7 +50,6 @@ std::vector generate_note_spectrogram(const NoteParams& params, float sample = 0.0f; for (int h = 1; h <= params.num_harmonics; ++h) { float h_amp = powf(params.harmonic_decay, h - 1); - float freq = (params.base_freq + vib + pitch_rnd) * h; sample += sinf(phase * h) * h_amp; } diff --git a/src/audio/spectrogram_resource_manager.cc b/src/audio/spectrogram_resource_manager.cc index c500528..c0fb0a3 100644 --- a/src/audio/spectrogram_resource_manager.cc +++ b/src/audio/spectrogram_resource_manager.cc @@ -228,6 +228,7 @@ void SpectrogramResourceManager::load_procedural(Resource* resource) { resource->spec.spectral_data_a = resource->owned_data; resource->spec.spectral_data_b = resource->owned_data; resource->spec.num_frames = note_frames; + resource->spec.version = SPEC_VERSION_V1; #if defined(DEBUG_LOG_ASSETS) DEBUG_ASSETS("[ResourceMgr] Generated procedural: %d frames, freq=%.2f\n", diff --git a/src/audio/synth.cc b/src/audio/synth.cc index 0866bda..45ced59 100644 --- a/src/audio/synth.cc +++ b/src/audio/synth.cc @@ -6,7 +6,6 @@ #include "audio/dct.h" #include "util/debug.h" #include -#include // For printf #include // For memset #if !defined(STRIP_ALL) @@ -169,6 +168,11 @@ void synth_trigger_voice(int spectrogram_id, float volume, float pan, return; } + if (pan < -1.0f) + pan = -1.0f; + else if (pan > 1.0f) + pan = 1.0f; + #if defined(DEBUG_LOG_SYNTH) // VALIDATION: Check volume and pan ranges if (volume < 0.0f || volume > 2.0f) { @@ -177,9 +181,8 @@ void synth_trigger_voice(int spectrogram_id, float volume, float pan, } if (pan < -1.0f || pan > 1.0f) { DEBUG_SYNTH( - "[SYNTH WARNING] Invalid pan=%.2f (clamping) for spectrogram_id=%d\n", + "[SYNTH WARNING] Invalid pan=%.2f for spectrogram_id=%d\n", pan, spectrogram_id); - pan = (pan < -1.0f) ? -1.0f : 1.0f; } if (start_offset_samples < 0) { DEBUG_SYNTH("[SYNTH WARNING] Negative start_offset=%d, clamping to 0\n", @@ -210,8 +213,7 @@ void synth_trigger_voice(int spectrogram_id, float volume, float pan, memset(v.overlap_buf, 0, sizeof(v.overlap_buf)); v.fractional_pos = 0.0f; // Initialize fractional position for tempo scaling - v.start_sample_offset = - start_offset_samples; // NEW: Sample-accurate timing + v.start_sample_offset = start_offset_samples; v.active_spectral_data = g_synth_data.active_spectrogram_data[spectrogram_id]; @@ -242,7 +244,6 @@ void synth_render(float* output_buffer, int num_frames) { if (!v.active) continue; - // NEW: Skip this sample if we haven't reached the trigger offset yet if (v.start_sample_offset > 0) { v.start_sample_offset--; continue; // Don't produce audio until offset elapsed diff --git a/src/audio/synth.h b/src/audio/synth.h index e5a8197..d3bd70e 100644 --- a/src/audio/synth.h +++ b/src/audio/synth.h @@ -7,16 +7,6 @@ #include "dct.h" #include -// Based on tracker score analysis (see generated/music_data.cc) -// Max simultaneous patterns: 5, recommended: 10 each -// -// CACHING IMPLEMENTATION (COMPLETED): -// - All asset samples are registered ONCE in tracker_init() -// - All generated notes are cached by parameters (freq, duration, etc.) -// - Current track: 14 unique samples (8 assets + 6 generated notes) -// - With caching: MAX_SPECTROGRAMS = 32 provides 2.3x headroom -// -// Memory cost: 32 slots × 48 bytes = 1.5KB (down from 12KB with 256 slots) #define MAX_VOICES 48 // Per tracker_compiler: required=24, recommended=48 #define MAX_SPECTROGRAMS \ 32 // Current track: 14 unique, 32 provides comfortable headroom diff --git a/src/audio/tracker.cc b/src/audio/tracker.cc index d9beb56..651c4d2 100644 --- a/src/audio/tracker.cc +++ b/src/audio/tracker.cc @@ -1,6 +1,7 @@ #include "tracker.h" #include "audio.h" #include "audio/synth.h" +#include "ring_buffer.h" #include "util/asset_manager.h" #include "util/debug.h" #include "util/fatal_error.h" @@ -101,30 +102,24 @@ static float* convert_mp3_to_spectrogram(const uint8_t* data, size_t size, void tracker_init() { g_last_trigger_idx = 0; g_next_pool_slot = 0; - for (int i = 0; i < MAX_SPECTROGRAMS; ++i) { - g_spec_pool[i].synth_id = -1; - g_spec_pool[i].data = nullptr; - g_spec_pool[i].active = false; - g_active_patterns[i].active = false; - } - - // Always re-initialize cache to ensure spectrograms are registered - // This handles the case where synth_init() was called multiple times - for (int i = 0; i < 256; ++i) { - g_sample_synth_cache[i] = -1; - } - // Free any previously allocated generated note data to prevent leaks + // Free any previously allocated generated note data to prevent leaks. + // Must run before the pool is zeroed (relies on .active/.data still set). if (g_cache_initialized) { for (int i = 0; i < MAX_SPECTROGRAMS; ++i) { if (g_spec_pool[i].data != nullptr && g_spec_pool[i].active) { delete[] g_spec_pool[i].data; - g_spec_pool[i].data = nullptr; - g_spec_pool[i].active = false; } } } + for (int i = 0; i < MAX_SPECTROGRAMS; ++i) { + g_spec_pool[i].synth_id = -1; + g_spec_pool[i].data = nullptr; + g_spec_pool[i].active = false; + g_active_patterns[i].active = false; + } + // Initialize sample cache { for (int i = 0; i < 256; ++i) { @@ -321,16 +316,6 @@ static void trigger_note_event(const TrackerEvent& event, } void tracker_update(float music_time_sec, float dt_music_sec) { - // TODO(timing): CRITICAL BUG - Events trigger ~180ms early over 63 beats @ - // BPM=90 Observed: Beat 63 snare at 41.82s in WAV, should be at 42.00s (180ms - // drift) NOTE: This is NOT a float vs double precision issue - floats handle - // <500s times fine Root cause unknown - suspects: - // 1. Systematic bias in time calculation (not random accumulation) - // 2. Truncation in audio.cc:103 chunk_frames = (int)(dt * sample_rate) - // 3. BPM calculation precision below (unit_duration_sec) - // 4. Mismatch between tracker time and actual sample rendering - // See also: audio.cc sample rendering truncation - // Unit-less timing: 1 unit = 4 beats (by convention) const float BEATS_PER_UNIT = 4.0f; const float unit_duration_sec = @@ -382,8 +367,8 @@ void tracker_update(float music_time_sec, float dt_music_sec) { // Offset = (music_time_delta / tempo_scale) * sample_rate int sample_offset = 0; if (event_music_time > music_time_sec) { - sample_offset = - (int)((event_music_time - music_time_sec) / tempo_scale * 32000.0f); + sample_offset = (int)((event_music_time - music_time_sec) / tempo_scale * + RING_BUFFER_SAMPLE_RATE); } // Apply humanization if enabled @@ -401,7 +386,8 @@ void tracker_update(float music_time_sec, float dt_music_sec) { float jitter = dist(rng) * (g_tracker_score.timing_variation_pct / 100.0f) * beat_sec; - sample_offset += (int)(jitter / tempo_scale * 32000.0f); + sample_offset += + (int)(jitter / tempo_scale * RING_BUFFER_SAMPLE_RATE); } // Volume variation: vary by % -- cgit v1.2.3