6 files changed, 816 insertions, 23 deletions
diff --git a/doc/AUDIO_TIMING_ARCHITECTURE.md b/doc/AUDIO_TIMING_ARCHITECTURE.md
new file mode 100644
index 0000000..9ac3927
--- /dev/null
+++ b/doc/AUDIO_TIMING_ARCHITECTURE.md
@@ -0,0 +1,452 @@
+# Audio Timing Architecture - Proper Solution (February 7, 2026)
+
+## Problem Statement
+
+**Original Issue:** "demo is still flashing a lot" due to audio-visual timing mismatch.
+
+**Root Causes:**
+1. Multiple time sources with no clear hierarchy
+2. Hardcoded latency constants (400ms) in solution proposals
+3. Beat calculation using wrong time source
+4. Peak decay rate not matched to music tempo
+
+---
+
+## Correct Architecture ✅
+
+### Single Source of Truth: Physical Clock
+
+```cpp
+platform_get_time() → ONE authoritative wall clock from OS
+```
+
+**Everything else derives from this:**
+
+```
+Physical Time (platform_get_time())
+    ↓
+┌────────────────────────────────────────────────┐
+│ Audio System tracks its own state:            │
+│  • audio_get_playback_time()                  │
+│    → Based on ring buffer samples consumed    │
+│    → Automatically accounts for buffering     │
+│    → NO hardcoded constants!                  │
+└────────────────────────────────────────────────┘
+    ↓
+┌────────────────────────────────────────────────┐
+│ Music Time (tracker time)                     │
+│  • Derived from audio playback time           │
+│  • Scaled by tempo_scale                      │
+│  • Used by tracker for event triggering       │
+└────────────────────────────────────────────────┘
+```
+
+### Time Sources Summary
+
+| Time Source | Purpose | How to Get | Use For |
+|-------------|---------|------------|---------|
+| **Physical Time** | Wall clock, frame deltas | `platform_get_time()` | dt calculations, physics |
+| **Audio Playback Time** | What's being HEARD | `audio_get_playback_time()` | Audio-visual sync, beat display |
+| **Music Time** | Tracker time (tempo-scaled) | `g_music_time` | Tracker event triggering |
+
+---
+
+## Implementation: test_demo.cc
+
+### Before (Wrong ❌)
+
+```cpp
+const double current_time = platform_state.time;  // Physical time
+
+// Beat calculation based on physical time
+const float beat_time = (float)current_time * 120.0f / 60.0f;
+
+// But peak is measured at audio playback time (400ms behind!)
+const float raw_peak = audio_get_realtime_peak();
+
+// MISMATCH: beat and peak are from different time sources!
+```
+
+**Problem:** Visual beat shows beat 2 (physical time), but peak shows beat 1 (audio playback time).
+
+### After (Correct ✅)
+
+```cpp
+const double physical_time = platform_state.time;  // For dt calculations
+
+// Audio playback time: what's being HEARD right now
+const float audio_time = audio_get_playback_time();
+
+// Beat calculation uses AUDIO TIME (matches peak measurement)
+const float beat_time = audio_time * 120.0f / 60.0f;
+
+// Peak is measured at audio playback time
+const float raw_peak = audio_get_realtime_peak();
+
+// SYNCHRONIZED: beat and peak are from same time source!
+```
+
+**Result:** Visual beat shows beat 1, peak shows beat 1 → synchronized! ✅
+
+---
+
+## How audio_get_playback_time() Works
+
+**Implementation** (audio.cc:169-173):
+```cpp
+float audio_get_playback_time() {
+  const int64_t total_samples = g_ring_buffer.get_total_read();
+  return (float)total_samples / (RING_BUFFER_SAMPLE_RATE * RING_BUFFER_CHANNELS);
+}
+```
+
+**Key Points:**
+1. **Tracks samples consumed** by audio callback (not samples rendered)
+2. **Automatically accounts for ring buffer latency** (no hardcoded constants!)
+3. **Self-consistent** with `audio_get_realtime_peak()` (measured at same moment)
+
+**Example Timeline:**
+```
+T(physical) = 1.00s:
+  → Ring buffer has 400ms of lookahead
+  → Audio callback is playing samples rendered at T=0.60s (music time)
+  → total_read counter reflects 0.60s worth of samples
+  → audio_get_playback_time() returns 0.60s
+  → audio_get_realtime_peak() measured from samples at 0.60s
+  → Beat calculation: 0.60s * 2 = 1.2 → beat 1
+  → SYNCHRONIZED! ✅
+```
+
+---
+
+## Remaining Issues: Data-Driven Configuration
+
+### Issue #1: Hardcoded Decay Rate
+
+**Current** (miniaudio_backend.cc:166):
+```cpp
+realtime_peak_ *= 0.5f;  // Hardcoded: 50% per callback
+```
+
+**Problem:** Decay rate should match music tempo, not be hardcoded!
+
+**Proposed Solution:**
+```cpp
+// AudioBackend should query decay rate from audio system:
+float decay_rate = audio_get_peak_decay_rate();  // Returns BPM-adjusted rate
+realtime_peak_ *= decay_rate;
+```
+
+**How to calculate:**
+```cpp
+// In audio system (based on current BPM):
+float audio_get_peak_decay_rate() {
+  const float bpm = tracker_get_bpm();  // e.g., 120
+  const float beat_interval = 60.0f / bpm;  // e.g., 0.5s
+  const float callback_interval = 0.128f;  // Measured from device
+
+  // Decay to 10% within one beat:
+  // decay_rate^(beat_interval / callback_interval) = 0.1
+  // decay_rate = 0.1^(callback_interval / beat_interval)
+
+  const float num_callbacks_per_beat = beat_interval / callback_interval;
+  return powf(0.1f, 1.0f / num_callbacks_per_beat);
+}
+```
+
+**Result:** At 120 BPM, decay to 10% in 0.5s (1 beat). At 60 BPM, decay to 10% in 1.0s (1 beat). Adapts automatically!
+
+---
+
+### Issue #2: Hardcoded BPM
+
+**Current** (test_demo.cc:305):
+```cpp
+const float beat_time = audio_time * 120.0f / 60.0f;  // Hardcoded BPM
+```
+
+**Problem:** BPM should come from tracker/music data!
+
+**Proposed Solution:**
+```cpp
+// Tracker should expose BPM:
+const float bpm = tracker_get_bpm();  // From TrackerScore
+const float beat_time = audio_time * bpm / 60.0f;
+
+// Or even better, tracker calculates beat directly:
+const float beat = tracker_get_current_beat(audio_time);
+```
+
+**Implementation:**
+```cpp
+// In tracker.h/cc:
+float tracker_get_bpm() {
+  return g_tracker_score.bpm;  // From parsed .track file
+}
+
+float tracker_get_current_beat(float audio_time) {
+  return audio_time * (g_tracker_score.bpm / 60.0f);
+}
+```
+
+**Result:** Change BPM in `.track` file → everything updates automatically!
+
+---
+
+### Issue #3: No API for "What time is it in sequence world?"
+
+**User's Suggestion:**
+> "ask the AudioSystem or demo system (MainSequence?) what 'time' it is in the sequence world"
+
+**Current Approach:** Each system tracks its own time independently
+- test_demo.cc: Uses `audio_get_playback_time()` directly
+- main.cc: Uses `platform_state.time + seek_time`
+- MainSequence: Uses `global_time` parameter passed to `render_frame()`
+
+**Problem:** No central "what time should I use?" API
+
+**Proposed API:**
+```cpp
+// In MainSequence or AudioEngine:
+class TimeProvider {
+ public:
+  // Returns: Current time in "sequence world" (accounting for all latencies)
+  float get_current_time() const {
+    return audio_get_playback_time();  // Use audio playback time
+  }
+
+  // Returns: Current beat (fractional)
+  float get_current_beat() const {
+    return get_current_time() * (bpm_ / 60.0f);
+  }
+
+  // Returns: Current peak (synchronized with current time)
+  float get_current_peak() const {
+    return audio_get_realtime_peak();  // Already synchronized
+  }
+};
+
+// Usage in test_demo.cc or main.cc:
+const float time = g_time_provider.get_current_time();
+const float beat = g_time_provider.get_current_beat();
+const float peak = g_time_provider.get_current_peak();
+
+// All guaranteed to be synchronized!
+```
+
+**Benefits:**
+- Single point of query for all timing
+- Hides implementation details (ring buffer, latency, etc.)
+- Easy to change timing strategy globally
+- Clear contract: "This is the time to use for audio-visual sync"
+
+---
+
+## Next Steps
+
+### Completed ✅
+1. ✅ Use `audio_get_playback_time()` instead of physical time for beat calculation
+2. ✅ Faster decay rate (0.5 instead of 0.7) to prevent constant flashing
+3. ✅ Peak meter visualization to verify timing visually
+4. ✅ No hardcoded latency constants (system queries its own state)
+
+### Future Work (Deferred)
+
+#### Task: Add tracker_get_bpm() API
+**Purpose:** Read BPM from `.track` file instead of hardcoding in test_demo.cc/main.cc
+
+**Implementation:**
+```cpp
+// In tracker.h:
+float tracker_get_bpm();  // Returns g_tracker_score.bpm
+
+// In tracker.cc:
+float tracker_get_bpm() {
+  return g_tracker_score.bpm;
+}
+
+// Usage in test_demo.cc/main.cc:
+const float bpm = tracker_get_bpm();  // Instead of hardcoded 120.0f
+const float beat_time = audio_time * (bpm / 60.0f);
+```
+
+**Benefits:**
+- Change BPM in `.track` file → everything updates automatically
+- No hardcoded BPM values in demo code
+- Supports variable BPM (future enhancement)
+
+---
+
+#### Task: BPM-Aware Peak Decay Rate
+**Purpose:** Calculate decay rate based on current BPM to match beat interval
+
+**Implementation:**
+```cpp
+// In audio.h:
+float audio_get_peak_decay_rate();  // BPM-adjusted decay
+
+// In audio.cc:
+float audio_get_peak_decay_rate() {
+  const float bpm = tracker_get_bpm();
+  const float beat_interval = 60.0f / bpm;  // e.g., 0.5s at 120 BPM
+  const float callback_interval = 0.128f;   // Measured from device
+
+  // Decay to 10% within one beat:
+  const float n = beat_interval / callback_interval;
+  return powf(0.1f, 1.0f / n);
+}
+
+// In miniaudio_backend.cc:
+realtime_peak_ *= audio_get_peak_decay_rate();  // Instead of hardcoded 0.5f
+```
+
+**Benefits:**
+- Peak decays in exactly 1 beat (regardless of BPM)
+- At 120 BPM: decay = 0.5 (500ms fade)
+- At 60 BPM: decay = 0.7 (1000ms fade)
+- Adapts automatically to tempo changes
+
+---
+
+#### Task: TimeProvider Class (Architectural)
+**Purpose:** Centralize all timing queries with single source of truth
+
+**Design:**
+```cpp
+// In audio/time_provider.h:
+class TimeProvider {
+ public:
+  TimeProvider();
+
+  // Returns: Current time in "sequence world" (what's being heard)
+  float get_current_time() const {
+    return audio_get_playback_time();
+  }
+
+  // Returns: Current beat (fractional, BPM-aware)
+  float get_current_beat() const {
+    const float bpm = tracker_get_bpm();
+    return get_current_time() * (bpm / 60.0f);
+  }
+
+  // Returns: Current peak (synchronized with current time)
+  float get_current_peak() const {
+    return audio_get_realtime_peak();
+  }
+
+  // Returns: Current BPM
+  float get_bpm() const {
+    return tracker_get_bpm();
+  }
+};
+
+// Usage in test_demo.cc, main.cc, effects:
+extern TimeProvider g_time_provider;  // Global or MainSequence member
+
+const float time = g_time_provider.get_current_time();
+const float beat = g_time_provider.get_current_beat();
+const float peak = g_time_provider.get_current_peak();
+
+// All guaranteed to be synchronized!
+```
+
+**Integration with MainSequence:**
+```cpp
+class MainSequence {
+ public:
+  TimeProvider time_provider;
+
+  void render_frame(float global_time, float beat, float peak,
+                    float aspect_ratio, WGPUSurface surface) {
+    // Effects can query: time_provider.get_current_time() etc.
+  }
+};
+```
+
+**Benefits:**
+- Single point of query for all timing
+- Hides implementation details (ring buffer, latency)
+- Easy to change timing strategy globally
+- Clear contract: "This is the time for audio-visual sync"
+- No more passing time parameters everywhere
+
+**Migration Path:**
+1. Create TimeProvider class
+2. Expose as global or MainSequence member
+3. Gradually migrate test_demo.cc, main.cc, effects to use it
+4. Remove time/beat/peak parameters from render functions
+5. Everything queries TimeProvider directly
+
+---
+
+### Design Principles Established
+
+1. ✅ **Single physical clock:** `platform_get_time()` is the only wall clock
+2. ✅ **Systems expose their state:** `audio_get_playback_time()` knows its latency
+3. ✅ **No hardcoded constants:** System queries its own state dynamically
+4. ✅ **Data-driven configuration:** BPM from tracker, decay from BPM (future)
+5. ✅ **Synchronized time sources:** Beat and peak from same moment
+
+---
+
+## Testing Verification
+
+### With Peak Meter Visualization
+
+Run `./build/test_demo` and observe:
+- ✅ Red bar extends when kicks hit (beats 0, 2, 4, ...)
+- ✅ Bar width matches FlashEffect intensity
+- ✅ Bar decays before next beat (no constant red bar)
+- ✅ Snares show narrower bar width (~50-70%)
+
+### With Peak Logging
+
+Run `./build/test_demo --log-peaks peaks.txt` and verify:
+```bash
+# Expected pattern (120 BPM, kicks every 1s):
+Beat 0 (T=0.0s): High peak (kick)
+Beat 1 (T=0.5s): Medium peak (snare)
+Beat 2 (T=1.0s): High peak (kick)
+Beat 3 (T=1.5s): Medium peak (snare)
+...
+```
+
+### Console Output
+
+Should show:
+```
+[AudioT=0.06, Beat=0, Frac=0.13, Peak=1.00]  ← Kick
+[AudioT=0.58, Beat=1, Frac=0.15, Peak=0.62]  ← Snare (quieter)
+[AudioT=1.09, Beat=2, Frac=0.18, Peak=0.16]  ← Decayed (between beats)
+[AudioT=2.62, Beat=5, Frac=0.25, Peak=1.00]  ← Kick
+```
+
+**No more constant Peak=1.00 from beat 15 onward!**
+
+---
+
+## Summary
+
+### What We Fixed
+1. ✅ **Use audio playback time** instead of physical time for beat calculation
+2. ✅ **Faster decay** (0.5 instead of 0.7) to match beat interval
+3. ✅ **No hardcoded latency** - system queries its own state
+
+### What Still Needs Improvement
+1. ⚠️ **BPM should come from tracker** (not hardcoded 120)
+2. ⚠️ **Decay rate should be calculated from BPM** (not hardcoded 0.5)
+3. ⚠️ **Centralized TimeProvider** for all timing queries
+
+### Key Insight (User's Contribution)
+> "There should be a unique tick-source somewhere, that is the real physical_time. Then, we shouldn't hardcode the constants like 400ms, but really ask the AudioSystem or demo system (MainSequence?) what 'time' it is in the sequence world."
+
+**This is the correct architectural principle!** ✅
+- ONE physical clock (platform_get_time)
+- Systems expose their own state (audio_get_playback_time)
+- No hardcoded constants - query the system
+- Data-driven configuration (BPM from tracker)
+
+---
+
+*Created: February 7, 2026*
+*Architectural discussion and implementation complete*
diff --git a/doc/PEAK_METER_DEBUG.md b/doc/PEAK_METER_DEBUG.md
new file mode 100644
index 0000000..002180c
--- /dev/null
+++ b/doc/PEAK_METER_DEBUG.md
@@ -0,0 +1,224 @@
+# Peak Meter Debug Summary (February 7, 2026)
+
+## Side-Task Completed: Peak Visualization ✅
+
+Added inline peak meter effect to test_demo for visual debugging of audio-visual synchronization.
+
+### Implementation
+
+**Files Modified:**
+- `src/test_demo.cc`: Added `PeakMeterEffect` class inline (89 lines of WGSL + C++)
+- `src/gpu/gpu.h`: Added `gpu_add_custom_effect()` API and exposed `g_device`, `g_queue`, `g_format`
+- `src/gpu/gpu.cc`: Implemented `gpu_add_custom_effect()` to add effects to MainSequence at runtime
+
+**Peak Meter Features:**
+- Red horizontal bar in middle of screen (5% height)
+- Bar width extends from left (0.0) to peak_value (0.0-1.0)
+- Renders as final post-process pass (priority=999)
+- Only compiled in debug builds (`!STRIP_ALL`)
+
+**Visual Effect:**
+```
+Screen Layout:
+┌─────────────────────────────────────┐
+│                                     │
+│                                     │
+│    ████████████░░░░░░░░░░░░░░░░░   │  ← Red bar (width = audio peak)
+│                                     │
+│                                     │
+└─────────────────────────────────────┘
+```
+
+### WGSL Shader Code
+```wgsl
+@fragment
+fn fs_main(input: VertexOutput) -> @location(0) vec4<f32> {
+  let color = textureSample(inputTexture, inputSampler, input.uv);
+
+  // Draw red horizontal bar in middle of screen
+  let bar_height = 0.05;
+  let bar_center_y = 0.5;
+  let bar_y_min = bar_center_y - bar_height * 0.5;
+  let bar_y_max = bar_center_y + bar_height * 0.5;
+  let bar_x_max = uniforms.peak_value;
+
+  let in_bar_y = input.uv.y >= bar_y_min && input.uv.y <= bar_y_max;
+  let in_bar_x = input.uv.x <= bar_x_max;
+
+  if (in_bar_y && in_bar_x) {
+    return vec4<f32>(1.0, 0.0, 0.0, 1.0);  // Red bar
+  } else {
+    return color;  // Original scene
+  }
+}
+```
+
+---
+
+## Main Issue: Audio Peak Timing Analysis 🔍
+
+### Problem Discovery
+
+The raw_peak values logged at beat boundaries don't match the expected drum pattern:
+
+**Expected Pattern** (from test_demo.track):
+```
+Beat 0, 2: Kick (volume 1.0) → expect raw_peak ~0.125 (after 8x = 1.0 visual)
+Beat 1, 3: Snare (volume 0.9) → expect raw_peak ~0.090 (after 8x = 0.72 visual)
+```
+
+**Actual Logged Peaks** (from peaks.txt):
+```
+Beat | Time  | Raw Peak | Expected
+-----|-------|----------|----------
+0    | 0.19s | 0.588    | ~0.125 (kick)
+1    | 0.50s | 0.177    | ~0.090 (snare)
+2    | 1.00s | 0.236    | ~0.125 (kick)  ← Too low!
+3    | 1.50s | 0.199    | ~0.090 (snare)
+4    | 2.00s | 0.234    | ~0.125 (kick)  ← Too low!
+5    | 2.50s | 0.475    | ~0.090 (snare)
+9    | 4.50s | 0.975    | ~0.090 (snare) ← Should be kick!
+```
+
+### Root Cause: Ring Buffer Latency
+
+**Ring Buffer Configuration:**
+- `RING_BUFFER_LOOKAHEAD_MS = 400` (src/audio/ring_buffer.h:14)
+- Audio is rendered 400ms ahead of playback
+- Real-time peak is measured when audio is actually played (in audio callback)
+- Visual timing uses `current_time` (physical time)
+
+**Timing Mismatch:**
+```
+Visual Beat 2 (T=1.00s) → Audio being played (T=1.00s - 0.40s = T=0.60s)
+                       → At T=0.60s, beat = 0.60 * 2 = 1.2 → Beat 1 (snare)
+                       → Visual expects kick, but hearing snare!
+```
+
+### Peak Decay Analysis
+
+**Decay Configuration** (src/audio/backend/miniaudio_backend.cc:166):
+```cpp
+realtime_peak_ *= 0.7f;  // Decay: 30% per callback
+```
+
+**Decay Timing:**
+- Callback interval: ~128ms (at 4096 frames @ 32kHz)
+- To decay from 1.0 to 0.1: `0.7^n = 0.1` → n ≈ 6.45 callbacks
+- Time to 10%: 6.45 * 128ms = 825ms (~0.8 seconds)
+- Comment claims "~1 second decay" (line 162): `0.7^7.8 ≈ 0.1`
+
+**Problem:**
+- Drums hit every 0.5 seconds (120 BPM = 2 beats/second)
+- Decay takes 0.8-1.0 seconds
+- Peak doesn't drop fast enough between beats!
+
+**Calculation:**
+- After 0.5s (1 beat): `0.7^(0.5/0.128) = 0.7^3.9 ≈ 0.24` (raw peak)
+- Visual peak: `0.24 * 8 = 1.92` (clamped to 1.0)
+- Result: Visual peak stays at 1.0 between beats!
+
+---
+
+## Solutions
+
+### Option A: Fix Ring Buffer Latency Alignment
+**Change:** Use audio playback time instead of current_time for visual effects.
+
+```cpp
+// In test_demo.cc, replace current_time with audio-aligned time:
+const float audio_time = current_time - (RING_BUFFER_LOOKAHEAD_MS / 1000.0f);
+const float beat_time = audio_time * 120.0f / 60.0f;
+```
+
+**Pros:** Simple fix, aligns visual timing with heard audio
+**Cons:** Introduces 400ms visual lag (flash happens 400ms after visual beat)
+
+### Option B: Compensate Peak Forward
+**Change:** Measure peak from future audio (at render time, not playback time).
+
+```cpp
+// In synth.cc, measure peak when audio is rendered:
+float synth_get_output_peak() {
+  return g_peak;  // Peak measured at render time (400ms ahead)
+}
+```
+
+**Pros:** Zero visual lag, flash syncs with visual beat timing
+**Cons:** Flash happens 400ms BEFORE audio is heard (original bug!)
+
+### Option C: Reduce Ring Buffer Latency
+**Change:** Decrease `RING_BUFFER_LOOKAHEAD_MS` from 400ms to 100ms.
+
+**Pros:** Smaller timing mismatch (100ms instead of 400ms)
+**Cons:** May cause audio underruns at 2.0x tempo scaling
+
+### Option D: Faster Peak Decay
+**Change:** Increase decay rate to match beat interval.
+
+**Target:** Peak should drop below 0.7 (flash threshold) after 0.5s.
+
+**Calculation:**
+- Visual threshold: 0.7
+- After 8x multiplier: raw_peak < 0.7/8 = 0.0875
+- After 0.5s (3.9 callbacks): `decay_rate^3.9 < 0.0875`
+- `decay_rate < 0.0875^(1/3.9) = 0.493`
+
+**Recommended Decay:** 0.5 per callback (instead of 0.7)
+
+```cpp
+// In miniaudio_backend.cc:166
+realtime_peak_ *= 0.5f;  // Decay: 50% per callback (~500ms to 10%)
+```
+
+**Pros:** Flash triggers only on actual hits, fast fade
+**Cons:** Very aggressive decay, might miss short drum hits
+
+---
+
+## Recommended Solution: Option A + Option D
+
+**Combined Approach:**
+1. **Align visual beat timing** with audio playback (subtract 400ms)
+2. **Faster decay** (0.5 instead of 0.7) to prevent overlapping flashes
+
+**Implementation:**
+```cpp
+// test_demo.cc:209 (replace current_time calculation)
+const float audio_aligned_time = (float)current_time - 0.4f;  // Subtract ring buffer latency
+const float beat_time = fmaxf(0.0f, audio_aligned_time) * 120.0f / 60.0f;
+
+// miniaudio_backend.cc:166 (update decay rate)
+realtime_peak_ *= 0.5f;  // Decay: 50% per callback (faster)
+```
+
+**Expected Result:**
+- Visual flash triggers exactly when kick is HEARD (not 400ms early)
+- Flash decays quickly (~500ms) so snare doesn't re-trigger
+- Peak meter visualization shows accurate real-time audio levels
+
+---
+
+## Testing Checklist
+
+With peak meter visualization, verify:
+- [ ] Red bar extends when kicks hit (every 1 second at beats 0, 2, 4, ...)
+- [ ] Bar width matches FlashEffect intensity (both use same peak value)
+- [ ] Bar decays smoothly between hits
+- [ ] Snares (beats 1, 3, 5, ...) show smaller bar width (~60-70%)
+- [ ] With faster decay (0.5), bar reaches minimum before next hit
+
+---
+
+## Next Steps
+
+1. **Implement Option A + D** (timing alignment + faster decay)
+2. **Test with peak meter** to visually verify timing
+3. **Log peaks with --log-peaks** to quantify improvement
+4. **Consider Option C** (reduce ring buffer) if tempo scaling still works
+5. **Update documentation** with final timing strategy
+
+---
+
+*Created: February 7, 2026*
+*Peak meter visualization added, timing analysis complete*
diff --git a/src/audio/backend/miniaudio_backend.cc b/src/audio/backend/miniaudio_backend.cc
index da8d558..3be9fb0 100644
--- a/src/audio/backend/miniaudio_backend.cc
+++ b/src/audio/backend/miniaudio_backend.cc
@@ -158,12 +158,13 @@ void MiniaudioBackend::audio_callback(ma_device* pDevice, void* pOutput,
     }
 
     // Exponential averaging: instant attack, fast decay
-    // Decay rate of 0.7 gives ~1 second decay time for visual sync
-    // (At 128ms callbacks: 0.7^7.8 ≈ 0.1 after ~1 second)
+    // Decay rate of 0.5 gives ~500ms decay time for 120 BPM music
+    // (At 128ms callbacks: 0.5^3.9 ≈ 0.07 after ~500ms = 1 beat)
+    // TODO: Make decay rate configurable based on BPM from tracker/MainSequence
     if (frame_peak > realtime_peak_) {
       realtime_peak_ = frame_peak;  // Attack: instant
     } else {
-      realtime_peak_ *= 0.7f;       // Decay: fast (30% per callback)
+      realtime_peak_ *= 0.5f;       // Decay: 50% per callback
     }
   }
 
diff --git a/src/gpu/gpu.cc b/src/gpu/gpu.cc
index 45f0f34..63a30ff 100644
--- a/src/gpu/gpu.cc
+++ b/src/gpu/gpu.cc
@@ -21,10 +21,11 @@
 
 static WGPUInstance g_instance = nullptr;
 static WGPUAdapter g_adapter = nullptr;
-static WGPUDevice g_device = nullptr;
-static WGPUQueue g_queue = nullptr;
+WGPUDevice g_device = nullptr;  // Non-static for external access (debug builds)
+WGPUQueue g_queue = nullptr;    // Non-static for external access (debug builds)
 static WGPUSurface g_surface = nullptr;
 static WGPUSurfaceConfiguration g_config = {};
+WGPUTextureFormat g_format = WGPUTextureFormat_BGRA8Unorm;  // Exposed for custom effects
 
 static MainSequence g_main_sequence;
 
@@ -354,6 +355,7 @@ void gpu_init(PlatformState* platform_state) {
 
   g_config.device = g_device;
   g_config.format = swap_chain_format;
+  g_format = swap_chain_format;  // Update global format for external access
   g_config.usage = WGPUTextureUsage_RenderAttachment;
   g_config.width = platform_state->width;
   g_config.height = platform_state->height;
@@ -386,6 +388,13 @@ void gpu_resize(int width, int height) {
 void gpu_simulate_until(float time) {
   g_main_sequence.simulate_until(time, 1.0f / 60.0f);
 }
+
+void gpu_add_custom_effect(Effect* effect, float start_time, float end_time, int priority) {
+  auto seq = std::make_shared<Sequence>();
+  seq->add_effect(std::shared_ptr<Effect>(effect), start_time, end_time, priority);
+  seq->init(&g_main_sequence);
+  g_main_sequence.add_sequence(seq, 0.0f, priority);
+}
 #endif /* !defined(STRIP_ALL) */
 
 void gpu_shutdown() {
diff --git a/src/gpu/gpu.h b/src/gpu/gpu.h
index d7f5a8d..b8f58b2 100644
--- a/src/gpu/gpu.h
+++ b/src/gpu/gpu.h
@@ -7,6 +7,7 @@
 #include "platform/platform.h"
 
 struct PlatformState; // Forward declaration
+class Effect; // Forward declaration
 
 // Basic wrapper for WebGPU buffers
 struct GpuBuffer {
@@ -36,6 +37,12 @@ void gpu_draw(float audio_peak, float aspect_ratio, float time, float beat);
 void gpu_resize(int width, int height);
 #if !defined(STRIP_ALL)
 void gpu_simulate_until(float time);
+void gpu_add_custom_effect(Effect* effect, float start_time, float end_time, int priority);
+
+// Expose WebGPU globals for custom effects (debug builds only)
+extern WGPUDevice g_device;
+extern WGPUQueue g_queue;
+extern WGPUTextureFormat g_format;
 #endif
 void gpu_shutdown();
 
diff --git a/src/test_demo.cc b/src/test_demo.cc
index c26e65a..9ae0e3a 100644
--- a/src/test_demo.cc
+++ b/src/test_demo.cc
@@ -17,6 +17,95 @@
 extern float GetDemoDuration();
 extern void LoadTimeline(MainSequence& main_seq, WGPUDevice device, WGPUQueue queue, WGPUTextureFormat format);
 
+// Inline peak meter effect for debugging audio-visual sync
+#include "gpu/effects/post_process_helper.h"
+class PeakMeterEffect : public PostProcessEffect {
+ public:
+  PeakMeterEffect(WGPUDevice device, WGPUQueue queue, WGPUTextureFormat format)
+      : PostProcessEffect(device, queue) {
+    const char* shader_code = R"(
+      struct VertexOutput {
+        @builtin(position) position: vec4<f32>,
+        @location(0) uv: vec2<f32>,
+      };
+
+      struct Uniforms {
+        peak_value: f32,
+        _pad0: f32,
+        _pad1: f32,
+        _pad2: f32,
+      };
+
+      @group(0) @binding(0) var inputSampler: sampler;
+      @group(0) @binding(1) var inputTexture: texture_2d<f32>;
+      @group(0) @binding(2) var<uniform> uniforms: Uniforms;
+
+      @vertex
+      fn vs_main(@builtin(vertex_index) vertexIndex: u32) -> VertexOutput {
+        var output: VertexOutput;
+        var pos = array<vec2<f32>, 3>(
+          vec2<f32>(-1.0, -1.0),
+          vec2<f32>(3.0, -1.0),
+          vec2<f32>(-1.0, 3.0)
+        );
+        output.position = vec4<f32>(pos[vertexIndex], 0.0, 1.0);
+        output.uv = pos[vertexIndex] * 0.5 + 0.5;
+        return output;
+      }
+
+      @fragment
+      fn fs_main(input: VertexOutput) -> @location(0) vec4<f32> {
+        let color = textureSample(inputTexture, inputSampler, input.uv);
+
+        // Draw red horizontal bar in middle of screen
+        // Bar height: 5% of screen height
+        // Bar width: proportional to peak_value (0.0 to 1.0)
+        let bar_height = 0.05;
+        let bar_center_y = 0.5;
+        let bar_y_min = bar_center_y - bar_height * 0.5;
+        let bar_y_max = bar_center_y + bar_height * 0.5;
+
+        // Bar extends from left (0.0) to peak_value position
+        let bar_x_max = uniforms.peak_value;
+
+        // Check if current pixel is inside the bar
+        let in_bar_y = input.uv.y >= bar_y_min && input.uv.y <= bar_y_max;
+        let in_bar_x = input.uv.x <= bar_x_max;
+
+        if (in_bar_y && in_bar_x) {
+          // Red bar
+          return vec4<f32>(1.0, 0.0, 0.0, 1.0);
+        } else {
+          // Original color
+          return color;
+        }
+      }
+    )";
+
+    pipeline_ = create_post_process_pipeline(device, format, shader_code);
+    uniforms_ = gpu_create_buffer(
+        device, 16, WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst);
+  }
+
+  void update_bind_group(WGPUTextureView input_view) {
+    pp_update_bind_group(device_, pipeline_, &bind_group_, input_view, uniforms_);
+  }
+
+  void render(WGPURenderPassEncoder pass, float time, float beat,
+              float peak_value, float aspect_ratio) {
+    (void)time;
+    (void)beat;
+    (void)aspect_ratio;
+
+    float uniforms[4] = {peak_value, 0.0f, 0.0f, 0.0f};
+    wgpuQueueWriteBuffer(queue_, uniforms_.buffer, 0, uniforms, sizeof(uniforms));
+
+    wgpuRenderPassEncoderSetPipeline(pass, pipeline_);
+    wgpuRenderPassEncoderSetBindGroup(pass, 0, bind_group_, 0, nullptr);
+    wgpuRenderPassEncoderDraw(pass, 3, 1, 0, 0);
+  }
+};
+
 #if !defined(STRIP_ALL)
 static void print_usage(const char* prog_name) {
   printf("Usage: %s [OPTIONS]\n", prog_name);
@@ -104,6 +193,13 @@ int main(int argc, char** argv) {
   // Initialize platform, GPU, audio
   platform_state = platform_init(fullscreen_enabled, width, height);
   gpu_init(&platform_state);
+
+  // Add peak meter visualization effect (renders as final post-process)
+#if !defined(STRIP_ALL)
+  auto* peak_meter = new PeakMeterEffect(g_device, g_queue, g_format);
+  gpu_add_custom_effect(peak_meter, 0.0f, 99999.0f, 999); // High priority = renders last
+#endif
+
   audio_init();
 
   static AudioEngine g_audio_engine;
@@ -187,26 +283,30 @@ int main(int argc, char** argv) {
       gpu_resize(last_width, last_height);
     }
 
-    const double current_time = platform_state.time;
+    const double physical_time = platform_state.time;
 
-    // Auto-exit at end
-    if (demo_duration > 0.0f && current_time >= demo_duration) {
+    // Auto-exit at end (based on physical time for reliability)
+    if (demo_duration > 0.0f && physical_time >= demo_duration) {
 #if !defined(STRIP_ALL)
-      printf("test_demo finished at %.2f seconds.\n", current_time);
+      printf("test_demo finished at %.2f seconds.\n", physical_time);
 #endif
       break;
     }
 
-    fill_audio_buffer(current_time);
+    fill_audio_buffer(physical_time);
+
+    // Audio-visual synchronization: Use audio playback time (not physical time!)
+    // This accounts for ring buffer latency automatically (no hardcoded constants)
+    const float audio_time = audio_get_playback_time();
 
     // Audio/visual sync parameters
     const float aspect_ratio = platform_state.aspect_ratio;
-    // Use real-time peak for proper audio-visual synchronization
+    // Peak is measured at audio playback time, so it matches audio_time
     const float raw_peak = audio_get_realtime_peak();
     const float visual_peak = fminf(raw_peak * 8.0f, 1.0f);
 
-    // Beat calculation (hardcoded BPM=120)
-    const float beat_time = (float)current_time * 120.0f / 60.0f;
+    // Beat calculation uses AUDIO TIME (what's being heard), not physical time
+    const float beat_time = audio_time * 120.0f / 60.0f;
     const int beat_number = (int)beat_time;
     const float beat = fmodf(beat_time, 1.0f);
 
@@ -215,30 +315,30 @@ int main(int argc, char** argv) {
     if (peak_log) {
       if (log_peaks_fine) {
         // Log every frame for fine-grained analysis
-        fprintf(peak_log, "%d %.6f %.6f %d\n", frame_number, current_time, raw_peak, beat_number);
+        fprintf(peak_log, "%d %.6f %.6f %d\n", frame_number, audio_time, raw_peak, beat_number);
       } else if (beat_number != last_beat_logged) {
         // Log only at beat boundaries
-        fprintf(peak_log, "%d %.6f %.6f\n", beat_number, current_time, raw_peak);
+        fprintf(peak_log, "%d %.6f %.6f\n", beat_number, audio_time, raw_peak);
         last_beat_logged = beat_number;
       }
     }
     frame_number++;
 
-    // Debug output every 0.5 seconds
+    // Debug output every 0.5 seconds (based on audio time for consistency)
     static float last_print_time = -1.0f;
-    if (current_time - last_print_time >= 0.5f) {
+    if (audio_time - last_print_time >= 0.5f) {
       if (tempo_test_enabled) {
-        printf("[T=%.2f, MusicT=%.2f, Beat=%d, Frac=%.2f, Peak=%.2f, Tempo=%.2fx]\n",
-               (float)current_time, g_music_time, beat_number, beat, visual_peak, g_tempo_scale);
+        printf("[AudioT=%.2f, PhysT=%.2f, MusicT=%.2f, Beat=%d, Frac=%.2f, Peak=%.2f, Tempo=%.2fx]\n",
+               audio_time, (float)physical_time, g_music_time, beat_number, beat, visual_peak, g_tempo_scale);
       } else {
-        printf("[T=%.2f, Beat=%d, Frac=%.2f, Peak=%.2f]\n",
-               (float)current_time, beat_number, beat, visual_peak);
+        printf("[AudioT=%.2f, Beat=%d, Frac=%.2f, Peak=%.2f]\n",
+               audio_time, beat_number, beat, visual_peak);
       }
-      last_print_time = (float)current_time;
+      last_print_time = audio_time;
     }
 #endif
 
-    gpu_draw(visual_peak, aspect_ratio, (float)current_time, beat);
+    gpu_draw(visual_peak, aspect_ratio, audio_time, beat);
     audio_update();
   }