From 8c5e41724fdfc3be24e95f48ae4b2be616404074 Mon Sep 17 00:00:00 2001
From: skal <pascal.massimino@gmail.com>
Date: Thu, 26 Mar 2026 10:09:34 +0100
Subject: fix(audio): P1-P3 fixes from audio code review
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

P1 — correctness bugs:
- tracker.cc: move delete[] loop before pool reset so guard condition is valid
- audio_engine: replace tracker_reset() with tracker_init() in reset()/seek()
  so synth IDs are re-registered after synth_init() clears spectrogram slots
- spectrogram_resource_manager: set spec.version in load_procedural() (was UB)

P2 — minor bugs:
- synth.cc: move pan clamp unconditionally before debug-only block
- gen.cc: remove dead `freq` variable in generate_note_spectrogram()
- tracker.cc: remove duplicate g_sample_synth_cache clear loop

P3 — cleanup:
- Replace hardcoded 32000.0f with RING_BUFFER_SAMPLE_RATE (5 sites)
- audio.cc: extract clip_samples() helper, remove duplicated clip loops
- audio_engine: inline update_silent(), remove no-op prewarm_for_time_range()
- Remove stale comments: stdio.h include, NEW: labels, CACHING block, NOTE:
- Move TODO(timing) drift notes from source to TODO.md

handoff(Gemini): audio review implemented, 36/36 tests passing
---
 src/audio/audio.cc                        | 33 +++++++++---------------
 src/audio/audio_engine.cc                 | 34 ++++---------------------
 src/audio/audio_engine.h                  |  9 -------
 src/audio/gen.cc                          |  6 ++---
 src/audio/spectrogram_resource_manager.cc |  1 +
 src/audio/synth.cc                        | 13 +++++-----
 src/audio/synth.h                         | 10 --------
 src/audio/tracker.cc                      | 42 +++++++++++--------------------
 8 files changed, 42 insertions(+), 106 deletions(-)

(limited to 'src/audio')
diff --git a/src/audio/audio.cc b/src/audio/audio.cc
index 89380cd..3b98452 100644
--- a/src/audio/audio.cc
+++ b/src/audio/audio.cc
@@ -16,6 +16,15 @@
 #include <ctime>
 #include <stdio.h>
 
+static void clip_samples(float* buf, int count) {
+  for (int i = 0; i < count; ++i) {
+    if (buf[i] > 1.0f)
+      buf[i] = 1.0f;
+    if (buf[i] < -1.0f)
+      buf[i] = -1.0f;
+  }
+}
+
 // Global ring buffer for audio streaming
 static AudioRingBuffer g_ring_buffer;
 
@@ -99,12 +108,6 @@ void audio_render_ahead(float music_time, float dt, float target_fill) {
 
   // Render in small chunks to keep synth time synchronized with tracker
   // Chunk size: one frame's worth of audio (~16.6ms @ 60fps)
-  // TODO(timing): CRITICAL BUG - Truncation here may cause 180ms drift over 63
-  // beats (int) cast loses fractional samples: 0.333 samples/frame * 2560
-  // frames = 853 samples = 27ms But observed drift is 180ms, so this is not the
-  // only source (27ms < 180ms) NOTE: This is NOT a float vs double precision
-  // issue - floats handle <500s times fine See also: tracker.cc BPM timing
-  // calculation
   const int chunk_frames = (int)(dt * RING_BUFFER_SAMPLE_RATE);
   const int chunk_samples = chunk_frames * RING_BUFFER_CHANNELS;
 
@@ -176,13 +179,7 @@ void audio_render_ahead(float music_time, float dt, float target_fill) {
     // Render directly to ring buffer (NO COPY, NO ALLOCATION)
     synth_render(write_ptr, actual_frames);
 
-    // Apply clipping in-place (Phase 2: ensure samples stay in [-1.0, 1.0])
-    for (int i = 0; i < actual_samples; ++i) {
-      if (write_ptr[i] > 1.0f)
-        write_ptr[i] = 1.0f;
-      if (write_ptr[i] < -1.0f)
-        write_ptr[i] = -1.0f;
-    }
+    clip_samples(write_ptr, actual_samples);
 
     // Commit written data atomically
     g_ring_buffer.commit_write(actual_samples);
@@ -208,13 +205,7 @@ void audio_render_ahead(float music_time, float dt, float target_fill) {
 
         synth_render(second_ptr, second_frames);
 
-        // Apply clipping to wrap-around region
-        for (int i = 0; i < second_samples; ++i) {
-          if (second_ptr[i] > 1.0f)
-            second_ptr[i] = 1.0f;
-          if (second_ptr[i] < -1.0f)
-            second_ptr[i] = -1.0f;
-        }
+        clip_samples(second_ptr, second_samples);
 
         g_ring_buffer.commit_write(second_samples);
 
@@ -279,7 +270,7 @@ AudioRingBuffer* audio_get_ring_buffer() {
 
 #if !defined(STRIP_ALL)
 void audio_render_silent(float duration_sec) {
-  const int sample_rate = 32000;
+  const int sample_rate = RING_BUFFER_SAMPLE_RATE;
   const int chunk_size = 512;
   int total_frames = (int)(duration_sec * sample_rate);
   float buffer[chunk_size * 2]; // Stereo
diff --git a/src/audio/audio_engine.cc b/src/audio/audio_engine.cc
index d11303c..8e47eac 100644
--- a/src/audio/audio_engine.cc
+++ b/src/audio/audio_engine.cc
@@ -3,7 +3,6 @@
 
 #include "audio_engine.h"
 #include "util/debug.h"
-#include <algorithm>
 #include <cstring>
 
 void AudioEngine::init() {
@@ -49,7 +48,7 @@ void AudioEngine::reset() {
   }
 
   synth_init(); // Re-init synth (clears all state)
-  tracker_reset();
+  ::tracker_init(); // Re-register all spectrograms (synth slots now clean)
   resource_mgr_.reset();
 
   // Clear sample-to-synth mapping
@@ -171,23 +170,18 @@ void AudioEngine::seek(float target_time) {
   // 1. Reset synth state (clear all active voices)
   synth_init();
 
-  // 2. Reset tracker state
-  tracker_reset();
+  // 2. Re-init tracker: re-registers all spectrograms with now-clean synth slots
+  ::tracker_init();
 
   // 3. Clear sample-to-synth mapping (will be re-registered on demand)
   for (int i = 0; i < MAX_SPECTROGRAM_RESOURCES; ++i) {
     sample_to_synth_id_[i] = -1;
   }
 
-  // 4. Pre-warm samples for target time range
-  const float prewarm_start = std::max(0.0f, target_time - 1.0f);
-  const float prewarm_end = target_time + 2.0f;
-  prewarm_for_time_range(prewarm_start, prewarm_end);
-
-  // 5. Simulate tracker up to target time (without audio)
+  // 4. Simulate tracker up to target time (without audio)
   const float dt = 0.1f;
   for (float t = 0.0f; t < target_time; t += dt) {
-    update_silent(t);
+    tracker_update(t, 0.0f);
   }
 
   // 6. Final update at exact target time
@@ -200,22 +194,4 @@ void AudioEngine::seek(float target_time) {
 #endif
 }
 
-void AudioEngine::prewarm_for_time_range(float start_time, float end_time) {
-  // TODO: Scan tracker score for patterns in this time range
-  // and pre-load their samples. For now, this is a placeholder.
-  // The proper implementation requires access to g_tracker_score
-  // and pattern data to determine which samples will be needed.
-
-#if defined(DEBUG_LOG_AUDIO)
-  DEBUG_AUDIO("[AudioEngine] Pre-warming samples for t=%.2f-%.2f\n", start_time,
-              end_time);
-#endif
-}
-
-void AudioEngine::update_silent(float music_time) {
-  // Update tracker without triggering audio (for fast-forward/seeking)
-  // This is a placeholder - proper implementation requires tracker support
-  // for silent updates. For now, we just update normally.
-  tracker_update(music_time, 0.0f);
-}
 #endif /* !defined(STRIP_ALL) */
diff --git a/src/audio/audio_engine.h b/src/audio/audio_engine.h
index 699213d..cc4c80a 100644
--- a/src/audio/audio_engine.h
+++ b/src/audio/audio_engine.h
@@ -50,17 +50,8 @@ class AudioEngine {
   // Get or create synth ID for a sample
   int get_or_register_synth_id(int sample_id);
 
-#if !defined(STRIP_ALL)
-  // Seeking support
-  void prewarm_for_time_range(float start_time, float end_time);
-  void update_silent(float music_time); // Update without triggering audio
-#endif
-
   SpectrogramResourceManager resource_mgr_;
 
-  // NOTE: For now, synth and tracker are global C functions (not members)
-  // Future refactoring will convert them to member objects
-
   // Mapping: sample_id → synth_id (lazy registration)
   int sample_to_synth_id_[MAX_SPECTROGRAM_RESOURCES];
 
diff --git a/src/audio/gen.cc b/src/audio/gen.cc
index cd36d54..9d18517 100644
--- a/src/audio/gen.cc
+++ b/src/audio/gen.cc
@@ -4,6 +4,7 @@
 
 #include "audio/gen.h"
 #include "audio/dct.h"
+#include "audio/ring_buffer.h"
 #include "audio/window.h"
 #include <math.h>
 #include <stdlib.h>
@@ -11,7 +12,7 @@
 
 std::vector<float> generate_note_spectrogram(const NoteParams& params,
                                              int* out_num_frames) {
-  int num_frames = (int)(params.duration_sec * 32000.0f / DCT_SIZE);
+  int num_frames = (int)(params.duration_sec * RING_BUFFER_SAMPLE_RATE / DCT_SIZE);
   if (num_frames < 1)
     num_frames = 1;
   *out_num_frames = num_frames;
@@ -21,7 +22,7 @@ std::vector<float> generate_note_spectrogram(const NoteParams& params,
   hamming_window_512(window);
 
   float phase = 0.0f;
-  float time_step = 1.0f / 32000.0f;
+  float time_step = 1.0f / RING_BUFFER_SAMPLE_RATE;
 
   for (int f = 0; f < num_frames; ++f) {
     float pcm_chunk[DCT_SIZE] = {0};
@@ -49,7 +50,6 @@ std::vector<float> generate_note_spectrogram(const NoteParams& params,
       float sample = 0.0f;
       for (int h = 1; h <= params.num_harmonics; ++h) {
         float h_amp = powf(params.harmonic_decay, h - 1);
-        float freq = (params.base_freq + vib + pitch_rnd) * h;
         sample += sinf(phase * h) * h_amp;
       }
 
diff --git a/src/audio/spectrogram_resource_manager.cc b/src/audio/spectrogram_resource_manager.cc
index c500528..c0fb0a3 100644
--- a/src/audio/spectrogram_resource_manager.cc
+++ b/src/audio/spectrogram_resource_manager.cc
@@ -228,6 +228,7 @@ void SpectrogramResourceManager::load_procedural(Resource* resource) {
   resource->spec.spectral_data_a = resource->owned_data;
   resource->spec.spectral_data_b = resource->owned_data;
   resource->spec.num_frames = note_frames;
+  resource->spec.version = SPEC_VERSION_V1;
 
 #if defined(DEBUG_LOG_ASSETS)
   DEBUG_ASSETS("[ResourceMgr] Generated procedural: %d frames, freq=%.2f\n",
diff --git a/src/audio/synth.cc b/src/audio/synth.cc
index 0866bda..45ced59 100644
--- a/src/audio/synth.cc
+++ b/src/audio/synth.cc
@@ -6,7 +6,6 @@
 #include "audio/dct.h"
 #include "util/debug.h"
 #include <math.h>
-#include <stdio.h>  // For printf
 #include <string.h> // For memset
 
 #if !defined(STRIP_ALL)
@@ -169,6 +168,11 @@ void synth_trigger_voice(int spectrogram_id, float volume, float pan,
     return;
   }
 
+  if (pan < -1.0f)
+    pan = -1.0f;
+  else if (pan > 1.0f)
+    pan = 1.0f;
+
 #if defined(DEBUG_LOG_SYNTH)
   // VALIDATION: Check volume and pan ranges
   if (volume < 0.0f || volume > 2.0f) {
@@ -177,9 +181,8 @@ void synth_trigger_voice(int spectrogram_id, float volume, float pan,
   }
   if (pan < -1.0f || pan > 1.0f) {
     DEBUG_SYNTH(
-        "[SYNTH WARNING] Invalid pan=%.2f (clamping) for spectrogram_id=%d\n",
+        "[SYNTH WARNING] Invalid pan=%.2f for spectrogram_id=%d\n",
         pan, spectrogram_id);
-    pan = (pan < -1.0f) ? -1.0f : 1.0f;
   }
   if (start_offset_samples < 0) {
     DEBUG_SYNTH("[SYNTH WARNING] Negative start_offset=%d, clamping to 0\n",
@@ -210,8 +213,7 @@ void synth_trigger_voice(int spectrogram_id, float volume, float pan,
         memset(v.overlap_buf, 0, sizeof(v.overlap_buf));
       v.fractional_pos =
           0.0f; // Initialize fractional position for tempo scaling
-      v.start_sample_offset =
-          start_offset_samples; // NEW: Sample-accurate timing
+      v.start_sample_offset = start_offset_samples;
       v.active_spectral_data =
           g_synth_data.active_spectrogram_data[spectrogram_id];
 
@@ -242,7 +244,6 @@ void synth_render(float* output_buffer, int num_frames) {
       if (!v.active)
         continue;
 
-      // NEW: Skip this sample if we haven't reached the trigger offset yet
       if (v.start_sample_offset > 0) {
         v.start_sample_offset--;
         continue; // Don't produce audio until offset elapsed
diff --git a/src/audio/synth.h b/src/audio/synth.h
index e5a8197..d3bd70e 100644
--- a/src/audio/synth.h
+++ b/src/audio/synth.h
@@ -7,16 +7,6 @@
 #include "dct.h"
 #include <cstdint>
 
-// Based on tracker score analysis (see generated/music_data.cc)
-// Max simultaneous patterns: 5, recommended: 10 each
-//
-// CACHING IMPLEMENTATION (COMPLETED):
-// - All asset samples are registered ONCE in tracker_init()
-// - All generated notes are cached by parameters (freq, duration, etc.)
-// - Current track: 14 unique samples (8 assets + 6 generated notes)
-// - With caching: MAX_SPECTROGRAMS = 32 provides 2.3x headroom
-//
-// Memory cost: 32 slots × 48 bytes = 1.5KB (down from 12KB with 256 slots)
 #define MAX_VOICES 48 // Per tracker_compiler: required=24, recommended=48
 #define MAX_SPECTROGRAMS                                                       \
   32 // Current track: 14 unique, 32 provides comfortable headroom
diff --git a/src/audio/tracker.cc b/src/audio/tracker.cc
index d9beb56..651c4d2 100644
--- a/src/audio/tracker.cc
+++ b/src/audio/tracker.cc
@@ -1,6 +1,7 @@
 #include "tracker.h"
 #include "audio.h"
 #include "audio/synth.h"
+#include "ring_buffer.h"
 #include "util/asset_manager.h"
 #include "util/debug.h"
 #include "util/fatal_error.h"
@@ -101,30 +102,24 @@ static float* convert_mp3_to_spectrogram(const uint8_t* data, size_t size,
 void tracker_init() {
   g_last_trigger_idx = 0;
   g_next_pool_slot = 0;
-  for (int i = 0; i < MAX_SPECTROGRAMS; ++i) {
-    g_spec_pool[i].synth_id = -1;
-    g_spec_pool[i].data = nullptr;
-    g_spec_pool[i].active = false;
-    g_active_patterns[i].active = false;
-  }
-
-  // Always re-initialize cache to ensure spectrograms are registered
-  // This handles the case where synth_init() was called multiple times
-  for (int i = 0; i < 256; ++i) {
-    g_sample_synth_cache[i] = -1;
-  }
 
-  // Free any previously allocated generated note data to prevent leaks
+  // Free any previously allocated generated note data to prevent leaks.
+  // Must run before the pool is zeroed (relies on .active/.data still set).
   if (g_cache_initialized) {
     for (int i = 0; i < MAX_SPECTROGRAMS; ++i) {
       if (g_spec_pool[i].data != nullptr && g_spec_pool[i].active) {
         delete[] g_spec_pool[i].data;
-        g_spec_pool[i].data = nullptr;
-        g_spec_pool[i].active = false;
       }
     }
   }
 
+  for (int i = 0; i < MAX_SPECTROGRAMS; ++i) {
+    g_spec_pool[i].synth_id = -1;
+    g_spec_pool[i].data = nullptr;
+    g_spec_pool[i].active = false;
+    g_active_patterns[i].active = false;
+  }
+
   // Initialize sample cache
   {
     for (int i = 0; i < 256; ++i) {
@@ -321,16 +316,6 @@ static void trigger_note_event(const TrackerEvent& event,
 }
 
 void tracker_update(float music_time_sec, float dt_music_sec) {
-  // TODO(timing): CRITICAL BUG - Events trigger ~180ms early over 63 beats @
-  // BPM=90 Observed: Beat 63 snare at 41.82s in WAV, should be at 42.00s (180ms
-  // drift) NOTE: This is NOT a float vs double precision issue - floats handle
-  // <500s times fine Root cause unknown - suspects:
-  //   1. Systematic bias in time calculation (not random accumulation)
-  //   2. Truncation in audio.cc:103 chunk_frames = (int)(dt * sample_rate)
-  //   3. BPM calculation precision below (unit_duration_sec)
-  //   4. Mismatch between tracker time and actual sample rendering
-  // See also: audio.cc sample rendering truncation
-
   // Unit-less timing: 1 unit = 4 beats (by convention)
   const float BEATS_PER_UNIT = 4.0f;
   const float unit_duration_sec =
@@ -382,8 +367,8 @@ void tracker_update(float music_time_sec, float dt_music_sec) {
       // Offset = (music_time_delta / tempo_scale) * sample_rate
       int sample_offset = 0;
       if (event_music_time > music_time_sec) {
-        sample_offset =
-            (int)((event_music_time - music_time_sec) / tempo_scale * 32000.0f);
+        sample_offset = (int)((event_music_time - music_time_sec) / tempo_scale *
+                              RING_BUFFER_SAMPLE_RATE);
       }
 
       // Apply humanization if enabled
@@ -401,7 +386,8 @@ void tracker_update(float music_time_sec, float dt_music_sec) {
           float jitter = dist(rng) *
                          (g_tracker_score.timing_variation_pct / 100.0f) *
                          beat_sec;
-          sample_offset += (int)(jitter / tempo_scale * 32000.0f);
+          sample_offset +=
+              (int)(jitter / tempo_scale * RING_BUFFER_SAMPLE_RATE);
         }
 
         // Volume variation: vary by %
-- 
cgit v1.2.3