From f2963ac821a3af1c54002ba13944552166956d04 Mon Sep 17 00:00:00 2001
From: skal <pascal.massimino@gmail.com>
Date: Sat, 7 Feb 2026 16:41:30 +0100
Subject: fix(audio): Synchronize audio-visual timing with playback time
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Problem: test_demo was "flashing a lot" - visual effects triggered ~400ms
before audio was heard, causing poor synchronization.

Root Causes:
1. Beat calculation used physical time (platform_state.time), but audio
   peak measured at playback time (400ms behind due to ring buffer)
2. Peak decay too slow (0.7 per callback = 800ms fade) relative to beat
   interval (500ms at 120 BPM)

Solution:
1. Use audio_get_playback_time() for beat calculation
   - Automatically accounts for ring buffer latency
   - No hardcoded constants (was considering hardcoding 400ms offset)
   - System queries its own state
2. Faster decay rate (0.5 vs 0.7) to match beat interval
3. Added inline PeakMeterEffect for visual debugging

Changes:
- src/test_demo.cc:
  - Added inline PeakMeterEffect class (red bar visualization)
  - Use audio_get_playback_time() instead of physical time for beat calc
  - Updated logging to show audio time
- src/audio/backend/miniaudio_backend.cc:
  - Changed decay rate from 0.7 to 0.5 (500ms fade time)
- src/gpu/gpu.{h,cc}:
  - Added gpu_add_custom_effect() API for runtime effect injection
  - Exposed g_device, g_queue, g_format as non-static globals
- doc/PEAK_METER_DEBUG.md:
  - Initial analysis of timing issues
- doc/AUDIO_TIMING_ARCHITECTURE.md:
  - Comprehensive architecture documentation
  - Time source hierarchy (physical → audio playback → music)
  - Future work: TimeProvider class, tracker_get_bpm() API

Architectural Principle:
Single source of truth - platform_get_time() is the only physical clock.
Everything else derives from it. No hardcoded latency constants.

Result: Visual effects now sync perfectly with heard audio.
---
 src/audio/backend/miniaudio_backend.cc |   7 +-
 src/gpu/gpu.cc                         |  13 +++-
 src/gpu/gpu.h                          |   7 ++
 src/test_demo.cc                       | 136 ++++++++++++++++++++++++++++-----
 4 files changed, 140 insertions(+), 23 deletions(-)

(limited to 'src')

diff --git a/src/audio/backend/miniaudio_backend.cc b/src/audio/backend/miniaudio_backend.cc
index da8d558..3be9fb0 100644
--- a/src/audio/backend/miniaudio_backend.cc
+++ b/src/audio/backend/miniaudio_backend.cc
@@ -158,12 +158,13 @@ void MiniaudioBackend::audio_callback(ma_device* pDevice, void* pOutput,
     }
 
     // Exponential averaging: instant attack, fast decay
-    // Decay rate of 0.7 gives ~1 second decay time for visual sync
-    // (At 128ms callbacks: 0.7^7.8 ≈ 0.1 after ~1 second)
+    // Decay rate of 0.5 gives ~500ms decay time for 120 BPM music
+    // (At 128ms callbacks: 0.5^3.9 ≈ 0.07 after ~500ms = 1 beat)
+    // TODO: Make decay rate configurable based on BPM from tracker/MainSequence
     if (frame_peak > realtime_peak_) {
       realtime_peak_ = frame_peak;  // Attack: instant
     } else {
-      realtime_peak_ *= 0.7f;       // Decay: fast (30% per callback)
+      realtime_peak_ *= 0.5f;       // Decay: 50% per callback
     }
   }
 
diff --git a/src/gpu/gpu.cc b/src/gpu/gpu.cc
index 45f0f34..63a30ff 100644
--- a/src/gpu/gpu.cc
+++ b/src/gpu/gpu.cc
@@ -21,10 +21,11 @@
 
 static WGPUInstance g_instance = nullptr;
 static WGPUAdapter g_adapter = nullptr;
-static WGPUDevice g_device = nullptr;
-static WGPUQueue g_queue = nullptr;
+WGPUDevice g_device = nullptr;  // Non-static for external access (debug builds)
+WGPUQueue g_queue = nullptr;    // Non-static for external access (debug builds)
 static WGPUSurface g_surface = nullptr;
 static WGPUSurfaceConfiguration g_config = {};
+WGPUTextureFormat g_format = WGPUTextureFormat_BGRA8Unorm;  // Exposed for custom effects
 
 static MainSequence g_main_sequence;
 
@@ -354,6 +355,7 @@ void gpu_init(PlatformState* platform_state) {
 
   g_config.device = g_device;
   g_config.format = swap_chain_format;
+  g_format = swap_chain_format;  // Update global format for external access
   g_config.usage = WGPUTextureUsage_RenderAttachment;
   g_config.width = platform_state->width;
   g_config.height = platform_state->height;
@@ -386,6 +388,13 @@ void gpu_resize(int width, int height) {
 void gpu_simulate_until(float time) {
   g_main_sequence.simulate_until(time, 1.0f / 60.0f);
 }
+
+void gpu_add_custom_effect(Effect* effect, float start_time, float end_time, int priority) {
+  auto seq = std::make_shared<Sequence>();
+  seq->add_effect(std::shared_ptr<Effect>(effect), start_time, end_time, priority);
+  seq->init(&g_main_sequence);
+  g_main_sequence.add_sequence(seq, 0.0f, priority);
+}
 #endif /* !defined(STRIP_ALL) */
 
 void gpu_shutdown() {
diff --git a/src/gpu/gpu.h b/src/gpu/gpu.h
index d7f5a8d..b8f58b2 100644
--- a/src/gpu/gpu.h
+++ b/src/gpu/gpu.h
@@ -7,6 +7,7 @@
 #include "platform/platform.h"
 
 struct PlatformState; // Forward declaration
+class Effect; // Forward declaration
 
 // Basic wrapper for WebGPU buffers
 struct GpuBuffer {
@@ -36,6 +37,12 @@ void gpu_draw(float audio_peak, float aspect_ratio, float time, float beat);
 void gpu_resize(int width, int height);
 #if !defined(STRIP_ALL)
 void gpu_simulate_until(float time);
+void gpu_add_custom_effect(Effect* effect, float start_time, float end_time, int priority);
+
+// Expose WebGPU globals for custom effects (debug builds only)
+extern WGPUDevice g_device;
+extern WGPUQueue g_queue;
+extern WGPUTextureFormat g_format;
 #endif
 void gpu_shutdown();
 
diff --git a/src/test_demo.cc b/src/test_demo.cc
index c26e65a..9ae0e3a 100644
--- a/src/test_demo.cc
+++ b/src/test_demo.cc
@@ -17,6 +17,95 @@
 extern float GetDemoDuration();
 extern void LoadTimeline(MainSequence& main_seq, WGPUDevice device, WGPUQueue queue, WGPUTextureFormat format);
 
+// Inline peak meter effect for debugging audio-visual sync
+#include "gpu/effects/post_process_helper.h"
+class PeakMeterEffect : public PostProcessEffect {
+ public:
+  PeakMeterEffect(WGPUDevice device, WGPUQueue queue, WGPUTextureFormat format)
+      : PostProcessEffect(device, queue) {
+    const char* shader_code = R"(
+      struct VertexOutput {
+        @builtin(position) position: vec4<f32>,
+        @location(0) uv: vec2<f32>,
+      };
+
+      struct Uniforms {
+        peak_value: f32,
+        _pad0: f32,
+        _pad1: f32,
+        _pad2: f32,
+      };
+
+      @group(0) @binding(0) var inputSampler: sampler;
+      @group(0) @binding(1) var inputTexture: texture_2d<f32>;
+      @group(0) @binding(2) var<uniform> uniforms: Uniforms;
+
+      @vertex
+      fn vs_main(@builtin(vertex_index) vertexIndex: u32) -> VertexOutput {
+        var output: VertexOutput;
+        var pos = array<vec2<f32>, 3>(
+          vec2<f32>(-1.0, -1.0),
+          vec2<f32>(3.0, -1.0),
+          vec2<f32>(-1.0, 3.0)
+        );
+        output.position = vec4<f32>(pos[vertexIndex], 0.0, 1.0);
+        output.uv = pos[vertexIndex] * 0.5 + 0.5;
+        return output;
+      }
+
+      @fragment
+      fn fs_main(input: VertexOutput) -> @location(0) vec4<f32> {
+        let color = textureSample(inputTexture, inputSampler, input.uv);
+
+        // Draw red horizontal bar in middle of screen
+        // Bar height: 5% of screen height
+        // Bar width: proportional to peak_value (0.0 to 1.0)
+        let bar_height = 0.05;
+        let bar_center_y = 0.5;
+        let bar_y_min = bar_center_y - bar_height * 0.5;
+        let bar_y_max = bar_center_y + bar_height * 0.5;
+
+        // Bar extends from left (0.0) to peak_value position
+        let bar_x_max = uniforms.peak_value;
+
+        // Check if current pixel is inside the bar
+        let in_bar_y = input.uv.y >= bar_y_min && input.uv.y <= bar_y_max;
+        let in_bar_x = input.uv.x <= bar_x_max;
+
+        if (in_bar_y && in_bar_x) {
+          // Red bar
+          return vec4<f32>(1.0, 0.0, 0.0, 1.0);
+        } else {
+          // Original color
+          return color;
+        }
+      }
+    )";
+
+    pipeline_ = create_post_process_pipeline(device, format, shader_code);
+    uniforms_ = gpu_create_buffer(
+        device, 16, WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst);
+  }
+
+  void update_bind_group(WGPUTextureView input_view) {
+    pp_update_bind_group(device_, pipeline_, &bind_group_, input_view, uniforms_);
+  }
+
+  void render(WGPURenderPassEncoder pass, float time, float beat,
+              float peak_value, float aspect_ratio) {
+    (void)time;
+    (void)beat;
+    (void)aspect_ratio;
+
+    float uniforms[4] = {peak_value, 0.0f, 0.0f, 0.0f};
+    wgpuQueueWriteBuffer(queue_, uniforms_.buffer, 0, uniforms, sizeof(uniforms));
+
+    wgpuRenderPassEncoderSetPipeline(pass, pipeline_);
+    wgpuRenderPassEncoderSetBindGroup(pass, 0, bind_group_, 0, nullptr);
+    wgpuRenderPassEncoderDraw(pass, 3, 1, 0, 0);
+  }
+};
+
 #if !defined(STRIP_ALL)
 static void print_usage(const char* prog_name) {
   printf("Usage: %s [OPTIONS]\n", prog_name);
@@ -104,6 +193,13 @@ int main(int argc, char** argv) {
   // Initialize platform, GPU, audio
   platform_state = platform_init(fullscreen_enabled, width, height);
   gpu_init(&platform_state);
+
+  // Add peak meter visualization effect (renders as final post-process)
+#if !defined(STRIP_ALL)
+  auto* peak_meter = new PeakMeterEffect(g_device, g_queue, g_format);
+  gpu_add_custom_effect(peak_meter, 0.0f, 99999.0f, 999); // High priority = renders last
+#endif
+
   audio_init();
 
   static AudioEngine g_audio_engine;
@@ -187,26 +283,30 @@ int main(int argc, char** argv) {
       gpu_resize(last_width, last_height);
     }
 
-    const double current_time = platform_state.time;
+    const double physical_time = platform_state.time;
 
-    // Auto-exit at end
-    if (demo_duration > 0.0f && current_time >= demo_duration) {
+    // Auto-exit at end (based on physical time for reliability)
+    if (demo_duration > 0.0f && physical_time >= demo_duration) {
 #if !defined(STRIP_ALL)
-      printf("test_demo finished at %.2f seconds.\n", current_time);
+      printf("test_demo finished at %.2f seconds.\n", physical_time);
 #endif
       break;
     }
 
-    fill_audio_buffer(current_time);
+    fill_audio_buffer(physical_time);
+
+    // Audio-visual synchronization: Use audio playback time (not physical time!)
+    // This accounts for ring buffer latency automatically (no hardcoded constants)
+    const float audio_time = audio_get_playback_time();
 
     // Audio/visual sync parameters
     const float aspect_ratio = platform_state.aspect_ratio;
-    // Use real-time peak for proper audio-visual synchronization
+    // Peak is measured at audio playback time, so it matches audio_time
     const float raw_peak = audio_get_realtime_peak();
     const float visual_peak = fminf(raw_peak * 8.0f, 1.0f);
 
-    // Beat calculation (hardcoded BPM=120)
-    const float beat_time = (float)current_time * 120.0f / 60.0f;
+    // Beat calculation uses AUDIO TIME (what's being heard), not physical time
+    const float beat_time = audio_time * 120.0f / 60.0f;
     const int beat_number = (int)beat_time;
     const float beat = fmodf(beat_time, 1.0f);
 
@@ -215,30 +315,30 @@ int main(int argc, char** argv) {
     if (peak_log) {
       if (log_peaks_fine) {
         // Log every frame for fine-grained analysis
-        fprintf(peak_log, "%d %.6f %.6f %d\n", frame_number, current_time, raw_peak, beat_number);
+        fprintf(peak_log, "%d %.6f %.6f %d\n", frame_number, audio_time, raw_peak, beat_number);
       } else if (beat_number != last_beat_logged) {
         // Log only at beat boundaries
-        fprintf(peak_log, "%d %.6f %.6f\n", beat_number, current_time, raw_peak);
+        fprintf(peak_log, "%d %.6f %.6f\n", beat_number, audio_time, raw_peak);
         last_beat_logged = beat_number;
       }
     }
     frame_number++;
 
-    // Debug output every 0.5 seconds
+    // Debug output every 0.5 seconds (based on audio time for consistency)
     static float last_print_time = -1.0f;
-    if (current_time - last_print_time >= 0.5f) {
+    if (audio_time - last_print_time >= 0.5f) {
       if (tempo_test_enabled) {
-        printf("[T=%.2f, MusicT=%.2f, Beat=%d, Frac=%.2f, Peak=%.2f, Tempo=%.2fx]\n",
-               (float)current_time, g_music_time, beat_number, beat, visual_peak, g_tempo_scale);
+        printf("[AudioT=%.2f, PhysT=%.2f, MusicT=%.2f, Beat=%d, Frac=%.2f, Peak=%.2f, Tempo=%.2fx]\n",
+               audio_time, (float)physical_time, g_music_time, beat_number, beat, visual_peak, g_tempo_scale);
       } else {
-        printf("[T=%.2f, Beat=%d, Frac=%.2f, Peak=%.2f]\n",
-               (float)current_time, beat_number, beat, visual_peak);
+        printf("[AudioT=%.2f, Beat=%d, Frac=%.2f, Peak=%.2f]\n",
+               audio_time, beat_number, beat, visual_peak);
       }
-      last_print_time = (float)current_time;
+      last_print_time = audio_time;
     }
 #endif
 
-    gpu_draw(visual_peak, aspect_ratio, (float)current_time, beat);
+    gpu_draw(visual_peak, aspect_ratio, audio_time, beat);
     audio_update();
   }
 
-- 
cgit v1.2.3