1 files changed, 23 insertions, 9 deletions
diff --git a/src/audio/tracker.cc b/src/audio/tracker.cc
index 59801a8..333a337 100644
--- a/src/audio/tracker.cc
+++ b/src/audio/tracker.cc
@@ -56,6 +56,7 @@ static bool is_mp3_asset(const uint8_t* data, size_t size) {
 
 #if !defined(STRIP_ALL)
 // Decode an in-memory MP3 blob to a heap-allocated spectrogram (caller owns).
+// Uses OLA analysis: 512-sample Hann window, OLA_HOP_SIZE advance per frame.
 // Returns nullptr on error. Sets *out_num_frames to frame count.
 static float* convert_mp3_to_spectrogram(const uint8_t* data, size_t size,
                                          int* out_num_frames) {
@@ -64,23 +65,33 @@ static float* convert_mp3_to_spectrogram(const uint8_t* data, size_t size,
   if (!dec) return nullptr;
 
   float window[DCT_SIZE];
-  hamming_window_512(window);
+  hann_window_512(window);
 
   std::vector<float> spec_data;
+  float pcm_buf[DCT_SIZE];
   float pcm_chunk[DCT_SIZE];
+  float dct_chunk[DCT_SIZE];
+
+  // Sliding-window OLA analysis: advance OLA_HOP_SIZE samples per frame.
+  // First iteration: pcm_buf is zero-initialized (silence before signal start).
+  memset(pcm_buf, 0, sizeof(pcm_buf));
 
   for (;;) {
-    const int decoded = mp3_decode(dec, DCT_SIZE, pcm_chunk);
-    if (decoded == 0) break;
-    if (decoded < DCT_SIZE) {
-      memset(pcm_chunk + decoded, 0, (DCT_SIZE - decoded) * sizeof(float));
-    }
-    for (int i = 0; i < DCT_SIZE; ++i) {
-      pcm_chunk[i] *= window[i];
+    // Slide left by OLA_HOP_SIZE; fill right half with new samples.
+    memmove(pcm_buf, pcm_buf + OLA_HOP_SIZE, OLA_HOP_SIZE * sizeof(float));
+    const int decoded = mp3_decode(dec, OLA_HOP_SIZE, pcm_buf + OLA_HOP_SIZE);
+    if (decoded < OLA_HOP_SIZE) {
+      memset(pcm_buf + OLA_HOP_SIZE + decoded, 0,
+             (OLA_HOP_SIZE - decoded) * sizeof(float));
     }
-    float dct_chunk[DCT_SIZE];
+
+    // Window + DCT the current 512-sample frame.
+    for (int i = 0; i < DCT_SIZE; ++i)
+      pcm_chunk[i] = pcm_buf[i] * window[i];
     fdct_512(pcm_chunk, dct_chunk);
     spec_data.insert(spec_data.end(), dct_chunk, dct_chunk + DCT_SIZE);
+
+    if (decoded == 0) break;
   }
 
   mp3_close(dec);
@@ -148,6 +159,7 @@ void tracker_init() {
             spec.spectral_data_a = spec_data;
             spec.spectral_data_b = spec_data;
             spec.num_frames = num_frames;
+            spec.version = SPEC_VERSION_V2_OLA;
             g_sample_synth_cache[sid] = synth_register_spectrogram(&spec);
             g_spec_pool[slot].synth_id = g_sample_synth_cache[sid];
           }
@@ -166,6 +178,7 @@ void tracker_init() {
           spec.spectral_data_a = spectral_data;
           spec.spectral_data_b = spectral_data;
           spec.num_frames = note_frames;
+          spec.version = header->version;
 
           g_sample_synth_cache[sid] = synth_register_spectrogram(&spec);
 
@@ -195,6 +208,7 @@ void tracker_init() {
           spec.spectral_data_a = g_spec_pool[slot].data;
           spec.spectral_data_b = g_spec_pool[slot].data;
           spec.num_frames = note_frames;
+          spec.version = SPEC_VERSION_V1;
 
           g_sample_synth_cache[sid] = synth_register_spectrogram(&spec);
           g_spec_pool[slot].synth_id = g_sample_synth_cache[sid];