From ad4f87e0ebfd361c69c7ba9adc29292305f21f7c Mon Sep 17 00:00:00 2001
From: skal <pascal.massimino@gmail.com>
Date: Tue, 27 Jan 2026 22:16:23 +0100
Subject: feat(audio): Implement real-time spectrogram synthesizer

Adds a multi-voice, real-time audio synthesis engine that generates sound from spectrogram data using an Inverse Discrete Cosine Transform (IDCT).

Key features:
- A thread-safe, double-buffered system for dynamically updating spectrograms in real-time without interrupting audio playback.
- Core DSP components: FDCT, IDCT, and Hamming window functions.
- A simple sequencer in the main loop to demonstrate scripted audio events and dynamic updates.
- Unit tests for the new synth engine and Hamming window, integrated with CTest.
- A  file documenting the build process, features, and how to run tests.
---
 src/audio/audio.cpp       |  23 +++----
 src/audio/dct.h           |   6 ++
 src/audio/fdct.cpp        |  17 +++++
 src/audio/idct.cpp        |  17 +++++
 src/audio/synth.cpp       | 157 ++++++++++++++++++++++++++++++++++++++++++++++
 src/audio/synth.h         |  24 +++++++
 src/audio/window.cpp      |   9 +++
 src/audio/window.h        |   5 ++
 src/main.cpp              |  50 +++++++++++++++
 src/platform.cpp          |   4 ++
 src/platform.h            |   2 +
 src/tests/test_synth.cpp  | 107 +++++++++++++++++++++++++++++++
 src/tests/test_window.cpp |  34 ++++++++++
 13 files changed, 441 insertions(+), 14 deletions(-)
 create mode 100644 src/audio/dct.h
 create mode 100644 src/audio/fdct.cpp
 create mode 100644 src/audio/idct.cpp
 create mode 100644 src/audio/synth.cpp
 create mode 100644 src/audio/synth.h
 create mode 100644 src/audio/window.cpp
 create mode 100644 src/audio/window.h
 create mode 100644 src/tests/test_synth.cpp
 create mode 100644 src/tests/test_window.cpp

(limited to 'src')

diff --git a/src/audio/audio.cpp b/src/audio/audio.cpp
index 9e778f1..318ccb8 100644
--- a/src/audio/audio.cpp
+++ b/src/audio/audio.cpp
@@ -1,27 +1,19 @@
 #define MINIAUDIO_IMPLEMENTATION
 #include "miniaudio.h"
+#include "synth.h"
 #include <math.h>
 
 static ma_device device;
-static float phase = 0.0f;
 
 static void audio_callback(ma_device*, void* output, const void*, ma_uint32 frames) {
-    int16_t* out = (int16_t*)output;
-    const float freq = 440.0f;
-    const float sr = 32000.0f;
-
-    for (ma_uint32 i = 0; i < frames; i++) {
-        float s = sinf(phase) * 0.2f;
-        phase += 2.0f * 3.14159265f * freq / sr;
-        if (phase > 2.0f * 3.14159265f) phase -= 2.0f * 3.14159265f;
-        out[i] = (int16_t)(s * 32767.0f);
-    }
+    synth_render((float*)output, frames);
 }
 
 void audio_init() {
+    synth_init();
     ma_device_config cfg = ma_device_config_init(ma_device_type_playback);
-    cfg.playback.format = ma_format_s16;
-    cfg.playback.channels = 1;
+    cfg.playback.format = ma_format_f32;
+    cfg.playback.channels = 2;
     cfg.sampleRate = 32000;
     cfg.dataCallback = audio_callback;
 
@@ -30,4 +22,7 @@ void audio_init() {
 }
 
 void audio_update() {}
-void audio_shutdown() { ma_device_uninit(&device); }
+void audio_shutdown() {
+    synth_shutdown();
+    ma_device_uninit(&device);
+}
diff --git a/src/audio/dct.h b/src/audio/dct.h
new file mode 100644
index 0000000..3e51884
--- /dev/null
+++ b/src/audio/dct.h
@@ -0,0 +1,6 @@
+#pragma once
+
+#define DCT_SIZE 512
+
+void fdct_512(const float input[DCT_SIZE], float output[DCT_SIZE]);
+void idct_512(const float input[DCT_SIZE], float output[DCT_SIZE]);
diff --git a/src/audio/fdct.cpp b/src/audio/fdct.cpp
new file mode 100644
index 0000000..50ab458
--- /dev/null
+++ b/src/audio/fdct.cpp
@@ -0,0 +1,17 @@
+#include "dct.h"
+#include "util/math.h"
+#include <math.h>
+
+void fdct_512(const float input[DCT_SIZE], float output[DCT_SIZE]) {
+    float scale_k0 = sqrtf(1.0f / DCT_SIZE);
+    float scale_kn = sqrtf(2.0f / DCT_SIZE);
+
+    for (int k = 0; k < DCT_SIZE; ++k) {
+        float sum = 0.0f;
+        for (int n = 0; n < DCT_SIZE; ++n) {
+            sum += input[n] * cosf((PI / DCT_SIZE) * (n + 0.5f) * k);
+        }
+        float scale = (k == 0) ? scale_k0 : scale_kn;
+        output[k] = sum * scale;
+    }
+}
diff --git a/src/audio/idct.cpp b/src/audio/idct.cpp
new file mode 100644
index 0000000..a32f92e
--- /dev/null
+++ b/src/audio/idct.cpp
@@ -0,0 +1,17 @@
+#include "dct.h"
+#include "util/math.h"
+#include <math.h>
+
+void idct_512(const float input[DCT_SIZE], float output[DCT_SIZE]) {
+    float scale_k0 = sqrtf(1.0f / DCT_SIZE);
+    float scale_kn = sqrtf(2.0f / DCT_SIZE);
+
+    for (int n = 0; n < DCT_SIZE; ++n) {
+        float sum = 0.0f;
+        for (int k = 0; k < DCT_SIZE; ++k) {
+            float scale = (k == 0) ? scale_k0 : scale_kn;
+            sum += scale * input[k] * cosf((PI / DCT_SIZE) * (n + 0.5f) * k);
+        }
+        output[n] = sum;
+    }
+}
diff --git a/src/audio/synth.cpp b/src/audio/synth.cpp
new file mode 100644
index 0000000..f009876
--- /dev/null
+++ b/src/audio/synth.cpp
@@ -0,0 +1,157 @@
+#include "synth.h"
+#include "audio/window.h"
+#include <string.h> // For memset
+#include <atomic>
+
+struct Voice {
+    bool active;
+    int spectrogram_id;
+    float volume;
+    float pan_left;
+    float pan_right;
+
+    int current_spectral_frame;
+    int total_spectral_frames;
+
+    float time_domain_buffer[DCT_SIZE];
+    int buffer_pos;
+
+    const volatile float* active_spectral_data; 
+};
+
+static struct {
+    Spectrogram spectrograms[MAX_SPECTROGRAMS];
+    const volatile float* active_spectrogram_data[MAX_SPECTROGRAMS];
+    bool spectrogram_registered[MAX_SPECTROGRAMS];
+} g_synth_data;
+
+static Voice g_voices[MAX_VOICES];
+
+void synth_init() {
+    memset(&g_synth_data, 0, sizeof(g_synth_data));
+    memset(g_voices, 0, sizeof(g_voices));
+}
+
+void synth_shutdown() {
+    // Nothing to do here since we are not allocating memory
+}
+
+int synth_register_spectrogram(const Spectrogram* spec) {
+    for (int i = 0; i < MAX_SPECTROGRAMS; ++i) {
+        if (!g_synth_data.spectrogram_registered[i]) {
+            g_synth_data.spectrograms[i] = *spec;
+            g_synth_data.active_spectrogram_data[i] = spec->spectral_data_a;
+            g_synth_data.spectrogram_registered[i] = true;
+            return i;
+        }
+    }
+    return -1; // No free slots
+}
+
+void synth_unregister_spectrogram(int spectrogram_id) {
+    if (spectrogram_id >= 0 && spectrogram_id < MAX_SPECTROGRAMS) {
+        g_synth_data.spectrogram_registered[spectrogram_id] = false;
+    }
+}
+
+float* synth_begin_update(int spectrogram_id) {
+    if (spectrogram_id < 0 || spectrogram_id >= MAX_SPECTROGRAMS || !g_synth_data.spectrogram_registered[spectrogram_id]) {
+        return nullptr;
+    }
+
+    const volatile float* active_ptr = g_synth_data.active_spectrogram_data[spectrogram_id];
+    
+    if (active_ptr == g_synth_data.spectrograms[spectrogram_id].spectral_data_a) {
+        return g_synth_data.spectrograms[spectrogram_id].spectral_data_b;
+    } else {
+        return g_synth_data.spectrograms[spectrogram_id].spectral_data_a;
+    }
+}
+
+void synth_commit_update(int spectrogram_id) {
+    if (spectrogram_id < 0 || spectrogram_id >= MAX_SPECTROGRAMS || !g_synth_data.spectrogram_registered[spectrogram_id]) {
+        return;
+    }
+
+    const volatile float* old_active_ptr = g_synth_data.active_spectrogram_data[spectrogram_id];
+    const float* new_active_ptr = (old_active_ptr == g_synth_data.spectrograms[spectrogram_id].spectral_data_a)
+                                ? g_synth_data.spectrograms[spectrogram_id].spectral_data_b
+                                : g_synth_data.spectrograms[spectrogram_id].spectral_data_a;
+
+    // Atomic swap using GCC/Clang builtins for thread safety
+    __atomic_store_n((const float**)&g_synth_data.active_spectrogram_data[spectrogram_id], new_active_ptr, __ATOMIC_RELEASE);
+}
+
+
+void synth_trigger_voice(int spectrogram_id, float volume, float pan) {
+    if (spectrogram_id < 0 || spectrogram_id >= MAX_SPECTROGRAMS || !g_synth_data.spectrogram_registered[spectrogram_id]) {
+        return;
+    }
+
+    for (int i = 0; i < MAX_VOICES; ++i) {
+        if (!g_voices[i].active) {
+            Voice& v = g_voices[i];
+            v.active = true;
+            v.spectrogram_id = spectrogram_id;
+            v.volume = volume;
+
+            // Simple linear panning
+            v.pan_left = (pan > 0.0f) ? (1.0f - pan) : 1.0f;
+            v.pan_right = (pan < 0.0f) ? (1.0f + pan) : 1.0f;
+            
+            v.current_spectral_frame = 0;
+            v.total_spectral_frames = g_synth_data.spectrograms[spectrogram_id].num_frames;
+            v.buffer_pos = DCT_SIZE; // Force IDCT on first render
+            v.active_spectral_data = g_synth_data.active_spectrogram_data[spectrogram_id];
+            
+            return; // Voice triggered
+        }
+    }
+}
+
+
+void synth_render(float* output_buffer, int num_frames) {
+    float window[WINDOW_SIZE];
+    hamming_window_512(window);
+
+    for (int i = 0; i < num_frames; ++i) {
+        float left_sample = 0.0f;
+        float right_sample = 0.0f;
+
+        for (int v_idx = 0; v_idx < MAX_VOICES; ++v_idx) {
+            Voice& v = g_voices[v_idx];
+            if (!v.active) continue;
+
+            if (v.buffer_pos >= DCT_SIZE) {
+                if (v.current_spectral_frame >= v.total_spectral_frames) {
+                    v.active = false;
+                    continue;
+                }
+                
+                // Fetch the latest active spectrogram pointer for this voice
+                v.active_spectral_data = g_synth_data.active_spectrogram_data[v.spectrogram_id];
+
+                const float* spectral_frame = (const float*)v.active_spectral_data + (v.current_spectral_frame * DCT_SIZE);
+
+                float windowed_frame[DCT_SIZE];
+                for(int j=0; j<DCT_SIZE; ++j) {
+                    windowed_frame[j] = spectral_frame[j] * window[j];
+                }
+
+                idct_512(windowed_frame, v.time_domain_buffer);
+
+                v.buffer_pos = 0;
+                v.current_spectral_frame++;
+            }
+
+            float voice_sample = v.time_domain_buffer[v.buffer_pos] * v.volume;
+            left_sample += voice_sample * v.pan_left;
+            right_sample += voice_sample * v.pan_right;
+            
+            v.buffer_pos++;
+        }
+
+        output_buffer[i * 2] = left_sample;
+        output_buffer[i * 2 + 1] = right_sample;
+    }
+}
diff --git a/src/audio/synth.h b/src/audio/synth.h
new file mode 100644
index 0000000..ce9825d
--- /dev/null
+++ b/src/audio/synth.h
@@ -0,0 +1,24 @@
+#pragma once
+
+#include "dct.h"
+
+#define MAX_SPECTROGRAMS 16
+#define MAX_VOICES 16
+
+struct Spectrogram {
+    float* spectral_data_a;
+    float* spectral_data_b;
+    int num_frames;
+};
+
+void synth_init();
+void synth_shutdown();
+
+int synth_register_spectrogram(const Spectrogram* spec);
+void synth_unregister_spectrogram(int spectrogram_id);
+
+float* synth_begin_update(int spectrogram_id);
+void synth_commit_update(int spectrogram_id);
+
+void synth_trigger_voice(int spectrogram_id, float volume, float pan);
+void synth_render(float* output_buffer, int num_frames);
diff --git a/src/audio/window.cpp b/src/audio/window.cpp
new file mode 100644
index 0000000..3f36480
--- /dev/null
+++ b/src/audio/window.cpp
@@ -0,0 +1,9 @@
+#include "window.h"
+#include "util/math.h"
+#include <math.h>
+
+void hamming_window_512(float window[WINDOW_SIZE]) {
+    for (int i = 0; i < WINDOW_SIZE; ++i) {
+        window[i] = 0.54f - 0.46f * cosf(2.0f * PI * i / (WINDOW_SIZE - 1));
+    }
+}
diff --git a/src/audio/window.h b/src/audio/window.h
new file mode 100644
index 0000000..8cb5dd8
--- /dev/null
+++ b/src/audio/window.h
@@ -0,0 +1,5 @@
+#pragma once
+
+#define WINDOW_SIZE 512
+
+void hamming_window_512(float window[WINDOW_SIZE]);
diff --git a/src/main.cpp b/src/main.cpp
index 3b61e1e..c1e2789 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -1,14 +1,64 @@
 #include "platform.h"
 #include "gpu/gpu.h"
 #include "audio/audio.h"
+#include "audio/synth.h"
+#include "util/math.h"
+#include <string.h>
+#include <math.h>
+
+#define DEMO_BPM 120.0f
+#define SECONDS_PER_BEAT (60.0f / DEMO_BPM)
+#define SPEC_FRAMES 16
+
+static float g_spec_buffer_a[SPEC_FRAMES * DCT_SIZE];
+static float g_spec_buffer_b[SPEC_FRAMES * DCT_SIZE];
+
+void generate_tone(float* buffer, float freq) {
+    memset(buffer, 0, SPEC_FRAMES * DCT_SIZE * sizeof(float));
+    for (int frame = 0; frame < SPEC_FRAMES; ++frame) {
+        float* spec_frame = buffer + frame * DCT_SIZE;
+        float amplitude = powf(1.0f - (float)frame / SPEC_FRAMES, 2.0f);
+        
+        int bin = (int)(freq / (32000.0f / 2.0f) * DCT_SIZE);
+        if (bin > 0 && bin < DCT_SIZE) {
+            spec_frame[bin] = amplitude;
+        }
+    }
+}
 
 int main() {
     platform_init();
     gpu_init(platform_get_window());
     audio_init();
 
+    generate_tone(g_spec_buffer_a, 440.0f); // A4
+    generate_tone(g_spec_buffer_b, 880.0f); // A5
+    
+    Spectrogram spec = { g_spec_buffer_a, g_spec_buffer_b, SPEC_FRAMES };
+    int tone_id = synth_register_spectrogram(&spec);
+
+    double last_beat_time = 0.0;
+    int beat_count = 0;
+
     while (!platform_should_close()) {
         platform_poll();
+
+        double current_time = platform_get_time();
+        if (current_time - last_beat_time > SECONDS_PER_BEAT) {
+            synth_trigger_voice(tone_id, 0.5f, 0.0f);
+            last_beat_time = current_time;
+            beat_count++;
+
+            if (beat_count == 8) {
+                // Time to update the sound!
+                float* back_buffer = synth_begin_update(tone_id);
+                if (back_buffer) {
+                    generate_tone(back_buffer, 220.0f); // A3
+                    synth_commit_update(tone_id);
+                }
+            }
+        }
+        
         gpu_draw();
         audio_update();
     }
diff --git a/src/platform.cpp b/src/platform.cpp
index adb41d2..237aaf8 100644
--- a/src/platform.cpp
+++ b/src/platform.cpp
@@ -24,3 +24,7 @@ bool platform_should_close() {
 GLFWwindow* platform_get_window() {
     return window;
 }
+
+double platform_get_time() {
+    return glfwGetTime();
+}
diff --git a/src/platform.h b/src/platform.h
index 0ebeb59..ed08b19 100644
--- a/src/platform.h
+++ b/src/platform.h
@@ -6,3 +6,5 @@ void platform_shutdown();
 void platform_poll();
 bool platform_should_close();
 GLFWwindow* platform_get_window();
+double platform_get_time();
+
diff --git a/src/tests/test_synth.cpp b/src/tests/test_synth.cpp
new file mode 100644
index 0000000..04b0373
--- /dev/null
+++ b/src/tests/test_synth.cpp
@@ -0,0 +1,107 @@
+#include "audio/synth.h"
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+#include <math.h>
+
+// A simple floating point comparison with a tolerance
+bool is_close(float a, float b, float epsilon = 1e-6f) {
+    return fabsf(a - b) < epsilon;
+}
+
+void test_registration() {
+    synth_init();
+    printf("Running test: Registration...\n");
+    
+    float spec_buf_a[DCT_SIZE], spec_buf_b[DCT_SIZE];
+    Spectrogram spec = { spec_buf_a, spec_buf_b, 1 };
+
+    // Fill up all slots
+    for (int i = 0; i < MAX_SPECTROGRAMS; ++i) {
+        int id = synth_register_spectrogram(&spec);
+        assert(id == i);
+    }
+
+    // Next one should fail
+    int fail_id = synth_register_spectrogram(&spec);
+    assert(fail_id == -1);
+
+    // Unregister one
+    synth_unregister_spectrogram(5);
+
+    // Now we should be able to register again in the freed slot
+    int new_id = synth_register_spectrogram(&spec);
+    assert(new_id == 5);
+
+    printf("...Registration test PASSED.\n");
+}
+
+void test_render() {
+    synth_init();
+    printf("Running test: Render...\n");
+
+    float spec_buf_a[DCT_SIZE] = {0};
+    Spectrogram spec = { spec_buf_a, nullptr, 1 };
+    
+    // Create a simple spectrum with one active bin
+    spec_buf_a[10] = 1.0f; 
+
+    int id = synth_register_spectrogram(&spec);
+    assert(id != -1);
+
+    synth_trigger_voice(id, 1.0f, 0.0f);
+
+    float output_buffer[DCT_SIZE * 2] = {0}; // Stereo
+    synth_render(output_buffer, DCT_SIZE);
+
+    float total_energy = 0.0f;
+    for(int i = 0; i < DCT_SIZE * 2; ++i) {
+        total_energy += fabsf(output_buffer[i]);
+    }
+
+    // If we rendered a sound, the buffer should not be silent
+    assert(total_energy > 0.01f);
+
+    printf("...Render test PASSED.\n");
+}
+
+void test_update() {
+    synth_init();
+    printf("Running test: Update...\n");
+    float spec_buf_a[DCT_SIZE] = {0};
+    float spec_buf_b[DCT_SIZE] = {0};
+    Spectrogram spec = { spec_buf_a, spec_buf_b, 1 };
+    
+    spec_buf_a[10] = 1.0f; // Original sound
+    spec_buf_b[20] = 1.0f; // Updated sound
+
+    int id = synth_register_spectrogram(&spec);
+
+    // Begin update - should get back buffer B
+    float* back_buffer = synth_begin_update(id);
+    assert(back_buffer == spec_buf_b);
+
+    // We could modify it here, but it's already different.
+    // Let's just commit.
+    synth_commit_update(id);
+
+    // Now if we trigger a voice, it should play from buffer B.
+    // To test this, we'd need to analyze the output, which is complex.
+    // For this test, we'll just ensure the mechanism runs and we can
+    // begin an update on the *new* back buffer (A).
+    back_buffer = synth_begin_update(id);
+    assert(back_buffer == spec_buf_a);
+
+    printf("...Update test PASSED.\n");
+}
+
+int main() {
+    test_registration();
+    test_render();
+    test_update();
+
+    synth_shutdown();
+
+    printf("\nAll synth tests passed!\n");
+    return 0;
+}
diff --git a/src/tests/test_window.cpp b/src/tests/test_window.cpp
new file mode 100644
index 0000000..1667dab
--- /dev/null
+++ b/src/tests/test_window.cpp
@@ -0,0 +1,34 @@
+#include "audio/window.h"
+#include <stdio.h>
+#include <math.h>
+#include <assert.h>
+
+// A simple floating point comparison with a tolerance
+bool is_close(float a, float b, float epsilon = 1e-6f) {
+    return fabsf(a - b) < epsilon;
+}
+
+int main() {
+    float window[WINDOW_SIZE];
+    hamming_window_512(window);
+
+    // Test 1: Window should start and end at the same small value
+    assert(is_close(window[0], 0.08f));
+    assert(is_close(window[WINDOW_SIZE - 1], 0.08f));
+    printf("Test 1 passed: Window start and end values are correct.\n");
+
+    // Test 2: Window should be symmetric
+    for (int i = 0; i < WINDOW_SIZE / 2; ++i) {
+        assert(is_close(window[i], window[WINDOW_SIZE - 1 - i]));
+    }
+    printf("Test 2 passed: Window is symmetric.\n");
+
+    // Test 3: The two middle points of the even-sized window should be equal and the peak.
+    assert(is_close(window[WINDOW_SIZE / 2 - 1], window[WINDOW_SIZE / 2]));
+    assert(window[WINDOW_SIZE / 2] > window[WINDOW_SIZE / 2 - 2]); // Should be greater than neighbors
+    printf("Test 3 passed: Window peak is correct for even size.\n");
+    
+    printf("All tests passed for Hamming window!\n");
+
+    return 0;
+}
-- 
cgit v1.2.3