summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/audio/dct.h4
-rw-r--r--src/audio/synth.cc35
-rw-r--r--src/audio/synth.h4
-rw-r--r--src/audio/tracker.cc32
-rw-r--r--src/audio/window.cc8
-rw-r--r--src/audio/window.h1
6 files changed, 69 insertions, 15 deletions
diff --git a/src/audio/dct.h b/src/audio/dct.h
index ee3e9b3..ec9f651 100644
--- a/src/audio/dct.h
+++ b/src/audio/dct.h
@@ -4,7 +4,9 @@
#pragma once
-#define DCT_SIZE 512
+#define DCT_SIZE 512
+#define OLA_HOP_SIZE 256
+#define OLA_OVERLAP 256
// Forward declarations
void fdct_512(const float* input, float* output);
diff --git a/src/audio/synth.cc b/src/audio/synth.cc
index 5fadf3c..a723404 100644
--- a/src/audio/synth.cc
+++ b/src/audio/synth.cc
@@ -27,6 +27,8 @@ struct Voice {
int total_spectral_frames;
float time_domain_buffer[DCT_SIZE];
+ float overlap_buf[OLA_OVERLAP]; // OLA tail from previous frame (v2 only)
+ bool ola_mode; // True for SPEC_VERSION_V2_OLA
int buffer_pos;
float fractional_pos; // Fractional sample position for tempo scaling
@@ -45,6 +47,7 @@ static Voice g_voices[MAX_VOICES];
static volatile float g_current_output_peak =
0.0f; // Global peak for visualization
static float g_tempo_scale = 1.0f; // Playback speed multiplier
+static float g_hann[DCT_SIZE]; // Hann window for OLA synthesis (v2)
#if !defined(STRIP_ALL)
static float g_elapsed_time_sec = 0.0f; // Tracks elapsed time for event hooks
@@ -54,6 +57,7 @@ void synth_init() {
memset(&g_synth_data, 0, sizeof(g_synth_data));
memset(g_voices, 0, sizeof(g_voices));
g_current_output_peak = 0.0f;
+ hann_window_512(g_hann);
#if !defined(STRIP_ALL)
g_elapsed_time_sec = 0.0f;
#endif /* !defined(STRIP_ALL) */
@@ -202,7 +206,11 @@ void synth_trigger_voice(int spectrogram_id, float volume, float pan,
v.current_spectral_frame = 0;
v.total_spectral_frames =
g_synth_data.spectrograms[spectrogram_id].num_frames;
- v.buffer_pos = DCT_SIZE; // Force IDCT on first render
+ v.ola_mode = (g_synth_data.spectrograms[spectrogram_id].version ==
+ SPEC_VERSION_V2_OLA);
+ v.buffer_pos = v.ola_mode ? OLA_HOP_SIZE : DCT_SIZE; // Force reload on first render
+ if (v.ola_mode)
+ memset(v.overlap_buf, 0, sizeof(v.overlap_buf));
v.fractional_pos =
0.0f; // Initialize fractional position for tempo scaling
v.start_sample_offset =
@@ -243,7 +251,8 @@ void synth_render(float* output_buffer, int num_frames) {
continue; // Don't produce audio until offset elapsed
}
- if (v.buffer_pos >= DCT_SIZE) {
+ const int frame_threshold = v.ola_mode ? OLA_HOP_SIZE : DCT_SIZE;
+ if (v.buffer_pos >= frame_threshold) {
if (v.current_spectral_frame >= v.total_spectral_frames) {
v.active = false;
continue;
@@ -256,9 +265,25 @@ void synth_render(float* output_buffer, int num_frames) {
const float* spectral_frame = (const float*)v.active_spectral_data +
(v.current_spectral_frame * DCT_SIZE);
- // IDCT directly - no windowing needed for synthesis
- // (Window is only used during analysis, before DCT)
- idct_512(spectral_frame, v.time_domain_buffer);
+ if (v.ola_mode) {
+ // OLA-IDCT synthesis (v2): Hann window + overlap-add
+ float tmp[DCT_SIZE];
+ idct_512(spectral_frame, tmp);
+ for (int j = 0; j < DCT_SIZE; ++j)
+ tmp[j] *= g_hann[j];
+ // Add saved overlap from previous frame
+ for (int j = 0; j < OLA_OVERLAP; ++j)
+ tmp[j] += v.overlap_buf[j];
+ // Save new tail as overlap for next frame
+ for (int j = 0; j < OLA_OVERLAP; ++j)
+ v.overlap_buf[j] = tmp[OLA_HOP_SIZE + j];
+ // Output buffer holds first OLA_HOP_SIZE samples
+ for (int j = 0; j < OLA_HOP_SIZE; ++j)
+ v.time_domain_buffer[j] = tmp[j];
+ } else {
+ // V1: IDCT directly, no windowing
+ idct_512(spectral_frame, v.time_domain_buffer);
+ }
v.buffer_pos = 0;
++v.current_spectral_frame;
diff --git a/src/audio/synth.h b/src/audio/synth.h
index 3a42a61..61ecfd0 100644
--- a/src/audio/synth.h
+++ b/src/audio/synth.h
@@ -21,10 +21,14 @@
#define MAX_SPECTROGRAMS \
32 // Current track: 14 unique, 32 provides comfortable headroom
+#define SPEC_VERSION_V1 1
+#define SPEC_VERSION_V2_OLA 2
+
struct Spectrogram {
const float* spectral_data_a; // Front buffer
const float* spectral_data_b; // Back buffer (for double-buffering)
int num_frames;
+ int version; // SPEC_VERSION_V1 or SPEC_VERSION_V2_OLA
};
void synth_init();
diff --git a/src/audio/tracker.cc b/src/audio/tracker.cc
index 59801a8..333a337 100644
--- a/src/audio/tracker.cc
+++ b/src/audio/tracker.cc
@@ -56,6 +56,7 @@ static bool is_mp3_asset(const uint8_t* data, size_t size) {
#if !defined(STRIP_ALL)
// Decode an in-memory MP3 blob to a heap-allocated spectrogram (caller owns).
+// Uses OLA analysis: 512-sample Hann window, OLA_HOP_SIZE advance per frame.
// Returns nullptr on error. Sets *out_num_frames to frame count.
static float* convert_mp3_to_spectrogram(const uint8_t* data, size_t size,
int* out_num_frames) {
@@ -64,23 +65,33 @@ static float* convert_mp3_to_spectrogram(const uint8_t* data, size_t size,
if (!dec) return nullptr;
float window[DCT_SIZE];
- hamming_window_512(window);
+ hann_window_512(window);
std::vector<float> spec_data;
+ float pcm_buf[DCT_SIZE];
float pcm_chunk[DCT_SIZE];
+ float dct_chunk[DCT_SIZE];
+
+ // Sliding-window OLA analysis: advance OLA_HOP_SIZE samples per frame.
+ // First iteration: pcm_buf is zero-initialized (silence before signal start).
+ memset(pcm_buf, 0, sizeof(pcm_buf));
for (;;) {
- const int decoded = mp3_decode(dec, DCT_SIZE, pcm_chunk);
- if (decoded == 0) break;
- if (decoded < DCT_SIZE) {
- memset(pcm_chunk + decoded, 0, (DCT_SIZE - decoded) * sizeof(float));
- }
- for (int i = 0; i < DCT_SIZE; ++i) {
- pcm_chunk[i] *= window[i];
+ // Slide left by OLA_HOP_SIZE; fill right half with new samples.
+ memmove(pcm_buf, pcm_buf + OLA_HOP_SIZE, OLA_HOP_SIZE * sizeof(float));
+ const int decoded = mp3_decode(dec, OLA_HOP_SIZE, pcm_buf + OLA_HOP_SIZE);
+ if (decoded < OLA_HOP_SIZE) {
+ memset(pcm_buf + OLA_HOP_SIZE + decoded, 0,
+ (OLA_HOP_SIZE - decoded) * sizeof(float));
}
- float dct_chunk[DCT_SIZE];
+
+ // Window + DCT the current 512-sample frame.
+ for (int i = 0; i < DCT_SIZE; ++i)
+ pcm_chunk[i] = pcm_buf[i] * window[i];
fdct_512(pcm_chunk, dct_chunk);
spec_data.insert(spec_data.end(), dct_chunk, dct_chunk + DCT_SIZE);
+
+ if (decoded == 0) break;
}
mp3_close(dec);
@@ -148,6 +159,7 @@ void tracker_init() {
spec.spectral_data_a = spec_data;
spec.spectral_data_b = spec_data;
spec.num_frames = num_frames;
+ spec.version = SPEC_VERSION_V2_OLA;
g_sample_synth_cache[sid] = synth_register_spectrogram(&spec);
g_spec_pool[slot].synth_id = g_sample_synth_cache[sid];
}
@@ -166,6 +178,7 @@ void tracker_init() {
spec.spectral_data_a = spectral_data;
spec.spectral_data_b = spectral_data;
spec.num_frames = note_frames;
+ spec.version = header->version;
g_sample_synth_cache[sid] = synth_register_spectrogram(&spec);
@@ -195,6 +208,7 @@ void tracker_init() {
spec.spectral_data_a = g_spec_pool[slot].data;
spec.spectral_data_b = g_spec_pool[slot].data;
spec.num_frames = note_frames;
+ spec.version = SPEC_VERSION_V1;
g_sample_synth_cache[sid] = synth_register_spectrogram(&spec);
g_spec_pool[slot].synth_id = g_sample_synth_cache[sid];
diff --git a/src/audio/window.cc b/src/audio/window.cc
index b68c747..bcdd768 100644
--- a/src/audio/window.cc
+++ b/src/audio/window.cc
@@ -12,3 +12,11 @@ void hamming_window_512(float* window) {
0.54f - 0.46f * cosf(2.0f * PI * (float)i / (float)(WINDOW_SIZE - 1));
}
}
+
+void hann_window_512(float* window) {
+ const float PI = 3.14159265358979323846f;
+ for (int i = 0; i < WINDOW_SIZE; ++i) {
+ window[i] =
+ 0.5f - 0.5f * cosf(2.0f * PI * (float)i / (float)(WINDOW_SIZE - 1));
+ }
+}
diff --git a/src/audio/window.h b/src/audio/window.h
index c3b583a..80253da 100644
--- a/src/audio/window.h
+++ b/src/audio/window.h
@@ -7,3 +7,4 @@
#define WINDOW_SIZE 512
void hamming_window_512(float* window);
+void hann_window_512(float* window);