From bb8197075161f9c9ded51beab913150b43954e2c Mon Sep 17 00:00:00 2001
From: skal <pascal.massimino@gmail.com>
Date: Mon, 2 Mar 2026 09:38:46 +0100
Subject: feat(audio): OLA-IDCT synthesis with Hann window to eliminate clicks

Add v2 spectrogram format (SPEC_VERSION_V2_OLA) using overlap-add IDCT
with 50% overlap (hop=256, OLA_OVERLAP=256) and Hann windowing.

- dct.h: OLA_HOP_SIZE=256, OLA_OVERLAP=256
- synth.h: SPEC_VERSION_V1/V2_OLA constants; version field on Spectrogram
- window.h/cc: hann_window_512() alongside existing hamming_window_512()
- synth.cc: g_hann[] precomputed at init; OLA path in synth_render when
  ola_mode=true (IDCT -> Hann -> add overlap tail -> save new tail ->
  output OLA_HOP_SIZE samples); v1 path unchanged for backward compat
- tracker.cc: MP3 encoder now uses sliding 512-sample Hann window with
  OLA_HOP_SIZE advance per frame; sets version=SPEC_VERSION_V2_OLA;
  .spec files propagate header->version; generated notes stay v1

Existing .spec files must be regenerated to benefit from click-free OLA.

handoff(Claude): OLA done. .spec files need regen via MP3 tool to activate v2.
---
 src/audio/synth.cc | 35 ++++++++++++++++++++++++++++++-----
 1 file changed, 30 insertions(+), 5 deletions(-)

(limited to 'src/audio/synth.cc')

diff --git a/src/audio/synth.cc b/src/audio/synth.cc
index 5fadf3c..a723404 100644
--- a/src/audio/synth.cc
+++ b/src/audio/synth.cc
@@ -27,6 +27,8 @@ struct Voice {
   int total_spectral_frames;
 
   float time_domain_buffer[DCT_SIZE];
+  float overlap_buf[OLA_OVERLAP]; // OLA tail from previous frame (v2 only)
+  bool ola_mode;                  // True for SPEC_VERSION_V2_OLA
   int buffer_pos;
   float fractional_pos; // Fractional sample position for tempo scaling
 
@@ -45,6 +47,7 @@ static Voice g_voices[MAX_VOICES];
 static volatile float g_current_output_peak =
     0.0f;                          // Global peak for visualization
 static float g_tempo_scale = 1.0f; // Playback speed multiplier
+static float g_hann[DCT_SIZE];     // Hann window for OLA synthesis (v2)
 
 #if !defined(STRIP_ALL)
 static float g_elapsed_time_sec = 0.0f; // Tracks elapsed time for event hooks
@@ -54,6 +57,7 @@ void synth_init() {
   memset(&g_synth_data, 0, sizeof(g_synth_data));
   memset(g_voices, 0, sizeof(g_voices));
   g_current_output_peak = 0.0f;
+  hann_window_512(g_hann);
 #if !defined(STRIP_ALL)
   g_elapsed_time_sec = 0.0f;
 #endif /* !defined(STRIP_ALL) */
@@ -202,7 +206,11 @@ void synth_trigger_voice(int spectrogram_id, float volume, float pan,
       v.current_spectral_frame = 0;
       v.total_spectral_frames =
           g_synth_data.spectrograms[spectrogram_id].num_frames;
-      v.buffer_pos = DCT_SIZE; // Force IDCT on first render
+      v.ola_mode = (g_synth_data.spectrograms[spectrogram_id].version ==
+                    SPEC_VERSION_V2_OLA);
+      v.buffer_pos = v.ola_mode ? OLA_HOP_SIZE : DCT_SIZE; // Force reload on first render
+      if (v.ola_mode)
+        memset(v.overlap_buf, 0, sizeof(v.overlap_buf));
       v.fractional_pos =
           0.0f; // Initialize fractional position for tempo scaling
       v.start_sample_offset =
@@ -243,7 +251,8 @@ void synth_render(float* output_buffer, int num_frames) {
         continue; // Don't produce audio until offset elapsed
       }
 
-      if (v.buffer_pos >= DCT_SIZE) {
+      const int frame_threshold = v.ola_mode ? OLA_HOP_SIZE : DCT_SIZE;
+      if (v.buffer_pos >= frame_threshold) {
         if (v.current_spectral_frame >= v.total_spectral_frames) {
           v.active = false;
           continue;
@@ -256,9 +265,25 @@ void synth_render(float* output_buffer, int num_frames) {
         const float* spectral_frame = (const float*)v.active_spectral_data +
                                       (v.current_spectral_frame * DCT_SIZE);
 
-        // IDCT directly - no windowing needed for synthesis
-        // (Window is only used during analysis, before DCT)
-        idct_512(spectral_frame, v.time_domain_buffer);
+        if (v.ola_mode) {
+          // OLA-IDCT synthesis (v2): Hann window + overlap-add
+          float tmp[DCT_SIZE];
+          idct_512(spectral_frame, tmp);
+          for (int j = 0; j < DCT_SIZE; ++j)
+            tmp[j] *= g_hann[j];
+          // Add saved overlap from previous frame
+          for (int j = 0; j < OLA_OVERLAP; ++j)
+            tmp[j] += v.overlap_buf[j];
+          // Save new tail as overlap for next frame
+          for (int j = 0; j < OLA_OVERLAP; ++j)
+            v.overlap_buf[j] = tmp[OLA_HOP_SIZE + j];
+          // Output buffer holds first OLA_HOP_SIZE samples
+          for (int j = 0; j < OLA_HOP_SIZE; ++j)
+            v.time_domain_buffer[j] = tmp[j];
+        } else {
+          // V1: IDCT directly, no windowing
+          idct_512(spectral_frame, v.time_domain_buffer);
+        }
 
         v.buffer_pos = 0;
         ++v.current_spectral_frame;
-- 
cgit v1.2.3