diff options
| author | skal <pascal.massimino@gmail.com> | 2026-03-02 09:38:46 +0100 |
|---|---|---|
| committer | skal <pascal.massimino@gmail.com> | 2026-03-02 09:38:46 +0100 |
| commit | bb8197075161f9c9ded51beab913150b43954e2c (patch) | |
| tree | c3835a0cb2133be28e5cc695b487395250fd1ea0 /src/audio/tracker.cc | |
| parent | b2ca699723b1695bd712ec68635a947c6ca3fd42 (diff) | |
feat(audio): OLA-IDCT synthesis with Hann window to eliminate clicks
Add v2 spectrogram format (SPEC_VERSION_V2_OLA) using overlap-add IDCT
with 50% overlap (hop=256, OLA_OVERLAP=256) and Hann windowing.
- dct.h: OLA_HOP_SIZE=256, OLA_OVERLAP=256
- synth.h: SPEC_VERSION_V1/V2_OLA constants; version field on Spectrogram
- window.h/cc: hann_window_512() alongside existing hamming_window_512()
- synth.cc: g_hann[] precomputed at init; OLA path in synth_render when
ola_mode=true (IDCT -> Hann -> add overlap tail -> save new tail ->
output OLA_HOP_SIZE samples); v1 path unchanged for backward compat
- tracker.cc: MP3 encoder now uses sliding 512-sample Hann window with
OLA_HOP_SIZE advance per frame; sets version=SPEC_VERSION_V2_OLA;
.spec files propagate header->version; generated notes stay v1
Existing .spec files must be regenerated to benefit from click-free OLA.
handoff(Claude): OLA done. .spec files need regen via MP3 tool to activate v2.
Diffstat (limited to 'src/audio/tracker.cc')
| -rw-r--r-- | src/audio/tracker.cc | 32 |
1 files changed, 23 insertions, 9 deletions
diff --git a/src/audio/tracker.cc b/src/audio/tracker.cc index 59801a8..333a337 100644 --- a/src/audio/tracker.cc +++ b/src/audio/tracker.cc @@ -56,6 +56,7 @@ static bool is_mp3_asset(const uint8_t* data, size_t size) { #if !defined(STRIP_ALL) // Decode an in-memory MP3 blob to a heap-allocated spectrogram (caller owns). +// Uses OLA analysis: 512-sample Hann window, OLA_HOP_SIZE advance per frame. // Returns nullptr on error. Sets *out_num_frames to frame count. static float* convert_mp3_to_spectrogram(const uint8_t* data, size_t size, int* out_num_frames) { @@ -64,23 +65,33 @@ static float* convert_mp3_to_spectrogram(const uint8_t* data, size_t size, if (!dec) return nullptr; float window[DCT_SIZE]; - hamming_window_512(window); + hann_window_512(window); std::vector<float> spec_data; + float pcm_buf[DCT_SIZE]; float pcm_chunk[DCT_SIZE]; + float dct_chunk[DCT_SIZE]; + + // Sliding-window OLA analysis: advance OLA_HOP_SIZE samples per frame. + // First iteration: pcm_buf is zero-initialized (silence before signal start). + memset(pcm_buf, 0, sizeof(pcm_buf)); for (;;) { - const int decoded = mp3_decode(dec, DCT_SIZE, pcm_chunk); - if (decoded == 0) break; - if (decoded < DCT_SIZE) { - memset(pcm_chunk + decoded, 0, (DCT_SIZE - decoded) * sizeof(float)); - } - for (int i = 0; i < DCT_SIZE; ++i) { - pcm_chunk[i] *= window[i]; + // Slide left by OLA_HOP_SIZE; fill right half with new samples. + memmove(pcm_buf, pcm_buf + OLA_HOP_SIZE, OLA_HOP_SIZE * sizeof(float)); + const int decoded = mp3_decode(dec, OLA_HOP_SIZE, pcm_buf + OLA_HOP_SIZE); + if (decoded < OLA_HOP_SIZE) { + memset(pcm_buf + OLA_HOP_SIZE + decoded, 0, + (OLA_HOP_SIZE - decoded) * sizeof(float)); } - float dct_chunk[DCT_SIZE]; + + // Window + DCT the current 512-sample frame. + for (int i = 0; i < DCT_SIZE; ++i) + pcm_chunk[i] = pcm_buf[i] * window[i]; fdct_512(pcm_chunk, dct_chunk); spec_data.insert(spec_data.end(), dct_chunk, dct_chunk + DCT_SIZE); + + if (decoded == 0) break; } mp3_close(dec); @@ -148,6 +159,7 @@ void tracker_init() { spec.spectral_data_a = spec_data; spec.spectral_data_b = spec_data; spec.num_frames = num_frames; + spec.version = SPEC_VERSION_V2_OLA; g_sample_synth_cache[sid] = synth_register_spectrogram(&spec); g_spec_pool[slot].synth_id = g_sample_synth_cache[sid]; } @@ -166,6 +178,7 @@ void tracker_init() { spec.spectral_data_a = spectral_data; spec.spectral_data_b = spectral_data; spec.num_frames = note_frames; + spec.version = header->version; g_sample_synth_cache[sid] = synth_register_spectrogram(&spec); @@ -195,6 +208,7 @@ void tracker_init() { spec.spectral_data_a = g_spec_pool[slot].data; spec.spectral_data_b = g_spec_pool[slot].data; spec.num_frames = note_frames; + spec.version = SPEC_VERSION_V1; g_sample_synth_cache[sid] = synth_register_spectrogram(&spec); g_spec_pool[slot].synth_id = g_sample_synth_cache[sid]; |
