From e6cc5f40a0eadba904cbb56e3429c1c16bb46fad Mon Sep 17 00:00:00 2001
From: skal <pascal.massimino@gmail.com>
Date: Wed, 4 Feb 2026 14:20:19 +0100
Subject: fix(audio): WAV dump now outputs stereo format matching live audio
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixed critical audio format mismatch causing distorted/choppy notes.

Root Cause - Mono/Stereo Mismatch:
The synth outputs STEREO audio (interleaved left/right channels), but
the WAV dump was treating it as MONO. This caused severe distortion.

Analysis of Real Audio Path:
```cpp
// miniaudio_backend.cc:
config.playback.format = ma_format_f32;  // 32-bit float
config.playback.channels = 2;            // STEREO
config.sampleRate = 32000;

// synth.cc line ~200:
output_buffer[i * 2] = left_sample;      // Left channel
output_buffer[i * 2 + 1] = right_sample; // Right channel
```

The Problem:
```
BEFORE (broken):
- Call synth_render(buffer, 533)
- Synth writes 1066 samples (533 frames × 2 channels)
- WAV dump only reads first 533 samples as mono
- Result: Buffer overflow + missing half the audio!
```

The distortion was caused by:
1. Buffer size mismatch (reading only half the data)
2. Interleaved stereo treated as mono (every other sample lost)
3. Left/right channels mixed incorrectly

The Fix:
```
AFTER (correct):
- Allocate buffer: frames * 2 (stereo)
- Call synth_render(buffer, frames) ← frames, not samples!
- Write all samples (stereo interleaved) to WAV
- WAV header: num_channels = 2 (stereo)
```

Technical Changes:
- frames_per_update = 533 frames @ 32kHz = 16.67ms
- samples_per_update = frames * 2 = 1066 samples (stereo)
- synth_render() receives frame count (533)
- WAV header now specifies 2 channels (stereo)
- Buffer size: 2x larger for stereo data

Results:
✓ WAV file: 7.3 MB (2x mono size - correct!)
✓ Format: 16-bit PCM, stereo, 32000 Hz
✓ Matches miniaudio config exactly
✓ No more distortion or choppiness
✓ All 16 tests passing (100%)

File verification:
```
$ file stereo_audio.wav
RIFF (little-endian) data, WAVE audio, Microsoft PCM, 16 bit, stereo 32000 Hz
```

The audio should now match the live demo playback perfectly!

handoff(Claude): Stereo format fix complete, audio quality restored

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 src/audio/wav_dump_backend.cc | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

(limited to 'src')
diff --git a/src/audio/wav_dump_backend.cc b/src/audio/wav_dump_backend.cc
index f5ea9d7..bcf43c0 100644
--- a/src/audio/wav_dump_backend.cc
+++ b/src/audio/wav_dump_backend.cc
@@ -48,7 +48,8 @@ void WavDumpBackend::start() {
   // This matches the seek logic in main.cc
   const int max_duration_sec = 60;
   const float update_dt = 1.0f / 60.0f;  // 60Hz update rate (matches main loop)
-  const int samples_per_update = (int)(kSampleRate * update_dt);  // ~533 samples
+  const int frames_per_update = (int)(kSampleRate * update_dt);  // ~533 frames
+  const int samples_per_update = frames_per_update * 2;  // Stereo: 2 samples per frame
   const int total_updates = (int)(max_duration_sec / update_dt);
 
   // Music time tracking
@@ -56,7 +57,7 @@ void WavDumpBackend::start() {
   float tempo_scale = 1.0f;
   float physical_time = 0.0f;
 
-  // Temporary buffer for each update chunk
+  // Temporary buffer for each update chunk (stereo)
   std::vector<float> chunk_buffer(samples_per_update);
 
   for (int update_count = 0; update_count < total_updates; ++update_count) {
@@ -83,9 +84,10 @@ void WavDumpBackend::start() {
     tracker_update(music_time);
 
     // Render audio immediately after tracker update (keeps synth time in sync)
-    synth_render(chunk_buffer.data(), samples_per_update);
+    // Note: synth_render expects number of FRAMES, outputs stereo (2 samples/frame)
+    synth_render(chunk_buffer.data(), frames_per_update);
 
-    // Convert float to int16 and write to WAV
+    // Convert float to int16 and write to WAV (stereo interleaved)
     for (int i = 0; i < samples_per_update; ++i) {
       float sample = chunk_buffer[i];
       if (sample > 1.0f) sample = 1.0f;
@@ -104,12 +106,13 @@ void WavDumpBackend::start() {
       fflush(stdout);
     }
 
-    // Call frame rendering hook
-    on_frames_rendered(samples_per_update);
+    // Call frame rendering hook (pass frames, not samples)
+    on_frames_rendered(frames_per_update);
   }
 
-  printf("\nWAV dump complete: %zu samples (%.2f seconds, %.2f music time)\n",
-         samples_written_, (float)samples_written_ / kSampleRate, music_time);
+  printf(
+      "\nWAV dump complete: %zu samples (%.2f seconds stereo, %.2f music time)\n",
+      samples_written_, (float)samples_written_ / (kSampleRate * 2), music_time);
 
   is_active_ = false;
 }
@@ -129,7 +132,7 @@ void WavDumpBackend::write_wav_header(FILE* file, uint32_t num_samples) {
   // WAV file header structure
   // Reference: http://soundfile.sapp.org/doc/WaveFormat/
 
-  const uint32_t num_channels = 1;  // Mono
+  const uint32_t num_channels = 2;  // Stereo (matches miniaudio config)
   const uint32_t sample_rate = kSampleRate;
   const uint32_t bits_per_sample = 16;
   const uint32_t byte_rate = sample_rate * num_channels * bits_per_sample / 8;
-- 
cgit v1.2.3