From 9cae6f16897338cb33b85d93bb6f1be38a60a93c Mon Sep 17 00:00:00 2001
From: skal <pascal.massimino@gmail.com>
Date: Sat, 7 Feb 2026 21:19:19 +0100
Subject: fix(audio): Implement sample-accurate event timing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes the "off-beat" timing issue where audio events (drum hits,
notes) were triggering with random jitter of up to ±16ms.

ROOT CAUSE:
Events were quantized to frame boundaries (60fps = 16.6ms intervals)
instead of triggering at exact sample positions. When tracker_update()
detected an event had passed, it triggered the voice immediately, causing
it to start "sometime during this frame".

SOLUTION:
Implement sample-accurate trigger offsets:
1. Calculate exact sample offset when triggering events
2. Add start_sample_offset field to Voice struct
3. Skip samples in synth_render() until offset elapses

CHANGES:
- synth.h: Add optional start_offset_samples parameter to synth_trigger_voice()
- synth.cc: Add start_sample_offset field to Voice, implement offset logic in render loop
- tracker.cc: Calculate sample offsets based on event_trigger_time vs current_playback_time

BENEFITS:
- Sample-accurate timing (0ms error vs ±16ms before)
- Zero CPU overhead (just integer decrement per voice)
- Backward compatible (default offset=0)
- Improves audio/visual sync, variable tempo accuracy

TIMING EXAMPLE:
Before: Event at 0.500s could trigger at 0.483s or 0.517s (frame boundaries)
After: Event triggers at exactly 0.500s (1600 sample offset calculated)

See doc/SAMPLE_ACCURATE_TIMING_FIX.md for detailed explanation.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 src/audio/synth.cc | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

(limited to 'src/audio/synth.cc')

diff --git a/src/audio/synth.cc b/src/audio/synth.cc
index 2072bb4..d66c502 100644
--- a/src/audio/synth.cc
+++ b/src/audio/synth.cc
@@ -30,6 +30,8 @@ struct Voice {
   int buffer_pos;
   float fractional_pos; // Fractional sample position for tempo scaling
 
+  int start_sample_offset; // Samples to wait before producing audio output
+
   const volatile float* active_spectral_data;
 };
 
@@ -152,7 +154,8 @@ void synth_commit_update(int spectrogram_id) {
       new_active_ptr, __ATOMIC_RELEASE);
 }
 
-void synth_trigger_voice(int spectrogram_id, float volume, float pan) {
+void synth_trigger_voice(int spectrogram_id, float volume, float pan,
+                         int start_offset_samples) {
   if (spectrogram_id < 0 || spectrogram_id >= MAX_SPECTROGRAMS ||
       !g_synth_data.spectrogram_registered[spectrogram_id]) {
 #if defined(DEBUG_LOG_SYNTH)
@@ -174,6 +177,11 @@ void synth_trigger_voice(int spectrogram_id, float volume, float pan) {
         pan, spectrogram_id);
     pan = (pan < -1.0f) ? -1.0f : 1.0f;
   }
+  if (start_offset_samples < 0) {
+    DEBUG_SYNTH("[SYNTH WARNING] Negative start_offset=%d, clamping to 0\n",
+                start_offset_samples);
+    start_offset_samples = 0;
+  }
 #endif
 
   for (int i = 0; i < MAX_VOICES; ++i) {
@@ -193,6 +201,7 @@ void synth_trigger_voice(int spectrogram_id, float volume, float pan) {
       v.buffer_pos = DCT_SIZE; // Force IDCT on first render
       v.fractional_pos =
           0.0f; // Initialize fractional position for tempo scaling
+      v.start_sample_offset = start_offset_samples; // NEW: Sample-accurate timing
       v.active_spectral_data =
           g_synth_data.active_spectrogram_data[spectrogram_id];
 
@@ -223,6 +232,12 @@ void synth_render(float* output_buffer, int num_frames) {
       if (!v.active)
         continue;
 
+      // NEW: Skip this sample if we haven't reached the trigger offset yet
+      if (v.start_sample_offset > 0) {
+        v.start_sample_offset--;
+        continue; // Don't produce audio until offset elapsed
+      }
+
       if (v.buffer_pos >= DCT_SIZE) {
         if (v.current_spectral_frame >= v.total_spectral_frames) {
           v.active = false;
-- 
cgit v1.2.3