From 91933ce05ba157dc549d52ed6c00c71c457fca05 Mon Sep 17 00:00:00 2001
From: skal <pascal.massimino@gmail.com>
Date: Wed, 4 Feb 2026 19:40:40 +0100
Subject: feat: Audio playback stability, NOTE_ parsing fix, sample caching,
 and debug logging infrastructure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

MILESTONE: Audio System Robustness & Debugging

Core Audio Backend Optimization:
- Fixed stop-and-go audio glitches caused by timing mismatch
- Core Audio optimized for 44.1kHz (10ms periods), but 32kHz expected ~13.78ms
- Added allowNominalSampleRateChange=TRUE to force OS-level 32kHz native
- Added performanceProfile=conservative for 4096-frame buffers (128ms)
- Result: Stable ~128ms callbacks, <1ms jitter, zero underruns

Ring Buffer Improvements:
- Increased capacity from 200ms to 400ms for tempo scaling headroom
- Added comprehensive bounds checking with abort() on violations
- Fixed tempo-scaled buffer fill: dt * g_tempo_scale
- Buffer maintains 400ms fullness during 2.0x acceleration

NOTE_ Parsing Fix & Sample Caching:
- Fixed is_note_name() checking only first letter (A-G)
- ASSET_KICK_1 was misidentified as A0 (27.5 Hz)
- Required "NOTE_" prefix to distinguish notes from assets
- Updated music.track to use NOTE_E2, NOTE_G4 format
- Discovered resource exhaustion: 14 unique samples → 228 registrations
- Implemented comprehensive caching in tracker_init()
- Assets: loaded once from AssetManager, cached synth_id
- Generated notes: created once, stored in persistent pool
- Result: MAX_SPECTROGRAMS 256 → 32 (88% memory reduction)

Debug Logging Infrastructure:
- Created src/util/debug.h with 7 category macros
  (AUDIO, RING_BUFFER, TRACKER, SYNTH, 3D, ASSETS, GPU)
- Added DEMO_ENABLE_DEBUG_LOGS CMake option (defines DEBUG_LOG_ALL)
- Converted all diagnostic code to use category macros
- Default build: macros compile to ((void)0) for zero runtime cost
- Debug build: comprehensive logging for troubleshooting
- Updated CONTRIBUTING.md with pre-commit policy

Resource Analysis Tool:
- Enhanced tracker_compiler to report pool sizes and cache potential
- Analysis: 152/228 spectrograms without caching, 14 with caching
- Tool generates optimization recommendations during compilation

Files Changed:
- CMakeLists.txt: Add DEBUG_LOG option
- src/util/debug.h: New debug logging header (7 categories)
- src/audio/miniaudio_backend.cc: Use DEBUG_AUDIO/DEBUG_RING_BUFFER
- src/audio/ring_buffer.cc: Use DEBUG_RING_BUFFER for underruns
- src/audio/tracker.cc: Implement sample caching, use DEBUG_TRACKER
- src/audio/synth.cc: Use DEBUG_SYNTH for validation
- src/audio/synth.h: Update MAX_SPECTROGRAMS (256→32), document caching
- tools/tracker_compiler.cc: Fix is_note_name(), add resource analysis
- assets/music.track: Update to use NOTE_ prefix format
- doc/CONTRIBUTING.md: Add debug logging pre-commit policy
- PROJECT_CONTEXT.md: Document milestone
- TODO.md: Mark tasks completed

Verification:
- Default build: No debug output, audio plays correctly
- Debug build: Comprehensive logging, audio plays correctly
- Caching working: 14 unique samples cached at init
- All tests passing (17/17)

handoff(Claude): Audio system now stable with robust diagnostic infrastructure.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 src/audio/synth.cc | 77 +++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 67 insertions(+), 10 deletions(-)

(limited to 'src/audio/synth.cc')
diff --git a/src/audio/synth.cc b/src/audio/synth.cc
index 67bc46e..1afb501 100644
--- a/src/audio/synth.cc
+++ b/src/audio/synth.cc
@@ -5,15 +5,16 @@
 #include "synth.h"
 #include "audio/dct.h"
 #include "audio/window.h"
+#include "util/debug.h"
 #include <atomic>
 #include <math.h>
 #include <stdio.h>  // For printf
 #include <string.h> // For memset
 
-#if !defined(STRIP_ALL)
+#if defined(DEBUG_LOG_SYNTH)
 #include "audio/audio.h"
 #include "audio/audio_backend.h"
-#endif /* !defined(STRIP_ALL) */
+#endif /* defined(DEBUG_LOG_SYNTH) */
 
 struct Voice {
   bool active;
@@ -27,6 +28,7 @@ struct Voice {
 
   float time_domain_buffer[DCT_SIZE];
   int buffer_pos;
+  float fractional_pos;  // Fractional sample position for tempo scaling
 
   const volatile float* active_spectral_data;
 };
@@ -41,18 +43,19 @@ static Voice g_voices[MAX_VOICES];
 static volatile float g_current_output_peak =
     0.0f;                                   // Global peak for visualization
 static float g_hamming_window[WINDOW_SIZE]; // Static window for optimization
+static float g_tempo_scale = 1.0f;          // Playback speed multiplier
 
-#if !defined(STRIP_ALL)
+#if defined(DEBUG_LOG_SYNTH)
 static float g_elapsed_time_sec = 0.0f; // Tracks elapsed time for event hooks
-#endif /* !defined(STRIP_ALL) */
+#endif /* defined(DEBUG_LOG_SYNTH) */
 
 void synth_init() {
   memset(&g_synth_data, 0, sizeof(g_synth_data));
   memset(g_voices, 0, sizeof(g_voices));
   g_current_output_peak = 0.0f;
-#if !defined(STRIP_ALL)
+#if defined(DEBUG_LOG_SYNTH)
   g_elapsed_time_sec = 0.0f;
-#endif /* !defined(STRIP_ALL) */
+#endif /* defined(DEBUG_LOG_SYNTH) */
   // Initialize the Hamming window once
   hamming_window_512(g_hamming_window);
 }
@@ -61,7 +64,42 @@ void synth_shutdown() {
   // Nothing to do here since we are not allocating memory
 }
 
+void synth_set_tempo_scale(float tempo_scale) {
+  g_tempo_scale = tempo_scale;
+}
+
 int synth_register_spectrogram(const Spectrogram* spec) {
+#if defined(DEBUG_LOG_SYNTH)
+  // VALIDATION: Check spectrogram pointer and data
+  if (spec == nullptr) {
+    DEBUG_SYNTH( "[SYNTH ERROR] Null spectrogram pointer\n");
+    return -1;
+  }
+  if (spec->spectral_data_a == nullptr || spec->spectral_data_b == nullptr) {
+    DEBUG_SYNTH( "[SYNTH ERROR] Null spectral data pointers\n");
+    return -1;
+  }
+  if (spec->num_frames <= 0 || spec->num_frames > 10000) {
+    DEBUG_SYNTH( "[SYNTH ERROR] Invalid num_frames=%d (must be 1-10000)\n",
+            spec->num_frames);
+    return -1;
+  }
+  // VALIDATION: Check spectral data isn't all zeros (common corruption symptom)
+  bool all_zero = true;
+  const float* data = spec->spectral_data_a;
+  const int samples_to_check = (spec->num_frames > 10) ? 10 * DCT_SIZE : spec->num_frames * DCT_SIZE;
+  for (int j = 0; j < samples_to_check; ++j) {
+    if (data[j] != 0.0f) {
+      all_zero = false;
+      break;
+    }
+  }
+  if (all_zero) {
+    DEBUG_SYNTH( "[SYNTH WARNING] Spectrogram appears to be all zeros (num_frames=%d)\n",
+            spec->num_frames);
+  }
+#endif
+
   for (int i = 0; i < MAX_SPECTROGRAMS; ++i) {
     if (!g_synth_data.spectrogram_registered[i]) {
       g_synth_data.spectrograms[i] = *spec;
@@ -118,9 +156,26 @@ void synth_commit_update(int spectrogram_id) {
 void synth_trigger_voice(int spectrogram_id, float volume, float pan) {
   if (spectrogram_id < 0 || spectrogram_id >= MAX_SPECTROGRAMS ||
       !g_synth_data.spectrogram_registered[spectrogram_id]) {
+#if defined(DEBUG_LOG_SYNTH)
+    DEBUG_SYNTH( "[SYNTH ERROR] Invalid spectrogram_id=%d in trigger_voice\n",
+            spectrogram_id);
+#endif
     return;
   }
 
+#if defined(DEBUG_LOG_SYNTH)
+  // VALIDATION: Check volume and pan ranges
+  if (volume < 0.0f || volume > 2.0f) {
+    DEBUG_SYNTH( "[SYNTH WARNING] Unusual volume=%.2f for spectrogram_id=%d\n",
+            volume, spectrogram_id);
+  }
+  if (pan < -1.0f || pan > 1.0f) {
+    DEBUG_SYNTH( "[SYNTH WARNING] Invalid pan=%.2f (clamping) for spectrogram_id=%d\n",
+            pan, spectrogram_id);
+    pan = (pan < -1.0f) ? -1.0f : 1.0f;
+  }
+#endif
+
   for (int i = 0; i < MAX_VOICES; ++i) {
     if (!g_voices[i].active) {
       Voice& v = g_voices[i];
@@ -136,17 +191,18 @@ void synth_trigger_voice(int spectrogram_id, float volume, float pan) {
       v.total_spectral_frames =
           g_synth_data.spectrograms[spectrogram_id].num_frames;
       v.buffer_pos = DCT_SIZE; // Force IDCT on first render
+      v.fractional_pos = 0.0f; // Initialize fractional position for tempo scaling
       v.active_spectral_data =
           g_synth_data.active_spectrogram_data[spectrogram_id];
 
-#if !defined(STRIP_ALL)
+#if defined(DEBUG_LOG_SYNTH)
       // Notify backend of voice trigger event (for testing/tracking)
       AudioBackend* backend = audio_get_backend();
       if (backend != nullptr) {
         backend->on_voice_triggered(g_elapsed_time_sec, spectrogram_id, volume,
                                      pan);
       }
-#endif /* !defined(STRIP_ALL) */
+#endif /* defined(DEBUG_LOG_SYNTH) */
 
       return; // Voice triggered
     }
@@ -199,6 +255,7 @@ void synth_render(float* output_buffer, int num_frames) {
       left_sample += voice_sample * v.pan_left;
       right_sample += voice_sample * v.pan_right;
 
+      // Advance voice position
       ++v.buffer_pos;
     }
 
@@ -210,11 +267,11 @@ void synth_render(float* output_buffer, int num_frames) {
         g_current_output_peak, fmaxf(fabsf(left_sample), fabsf(right_sample)));
   }
 
-#if !defined(STRIP_ALL)
+#if defined(DEBUG_LOG_SYNTH)
   // Update elapsed time for event tracking (32000 Hz sample rate)
   const float sample_rate = 32000.0f;
   g_elapsed_time_sec += (float)num_frames / sample_rate;
-#endif /* !defined(STRIP_ALL) */
+#endif /* defined(DEBUG_LOG_SYNTH) */
 }
 
 int synth_get_active_voice_count() {
-- 
cgit v1.2.3