From ad4f87e0ebfd361c69c7ba9adc29292305f21f7c Mon Sep 17 00:00:00 2001 From: skal Date: Tue, 27 Jan 2026 22:16:23 +0100 Subject: feat(audio): Implement real-time spectrogram synthesizer Adds a multi-voice, real-time audio synthesis engine that generates sound from spectrogram data using an Inverse Discrete Cosine Transform (IDCT). Key features: - A thread-safe, double-buffered system for dynamically updating spectrograms in real-time without interrupting audio playback. - Core DSP components: FDCT, IDCT, and Hamming window functions. - A simple sequencer in the main loop to demonstrate scripted audio events and dynamic updates. - Unit tests for the new synth engine and Hamming window, integrated with CTest. - A file documenting the build process, features, and how to run tests. --- src/audio/audio.cpp | 23 +++----- src/audio/dct.h | 6 ++ src/audio/fdct.cpp | 17 ++++++ src/audio/idct.cpp | 17 ++++++ src/audio/synth.cpp | 157 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/audio/synth.h | 24 ++++++++ src/audio/window.cpp | 9 +++ src/audio/window.h | 5 ++ 8 files changed, 244 insertions(+), 14 deletions(-) create mode 100644 src/audio/dct.h create mode 100644 src/audio/fdct.cpp create mode 100644 src/audio/idct.cpp create mode 100644 src/audio/synth.cpp create mode 100644 src/audio/synth.h create mode 100644 src/audio/window.cpp create mode 100644 src/audio/window.h (limited to 'src/audio') diff --git a/src/audio/audio.cpp b/src/audio/audio.cpp index 9e778f1..318ccb8 100644 --- a/src/audio/audio.cpp +++ b/src/audio/audio.cpp @@ -1,27 +1,19 @@ #define MINIAUDIO_IMPLEMENTATION #include "miniaudio.h" +#include "synth.h" #include static ma_device device; -static float phase = 0.0f; static void audio_callback(ma_device*, void* output, const void*, ma_uint32 frames) { - int16_t* out = (int16_t*)output; - const float freq = 440.0f; - const float sr = 32000.0f; - - for (ma_uint32 i = 0; i < frames; i++) { - float s = sinf(phase) * 0.2f; - phase += 2.0f * 3.14159265f * freq / sr; - if (phase > 2.0f * 3.14159265f) phase -= 2.0f * 3.14159265f; - out[i] = (int16_t)(s * 32767.0f); - } + synth_render((float*)output, frames); } void audio_init() { + synth_init(); ma_device_config cfg = ma_device_config_init(ma_device_type_playback); - cfg.playback.format = ma_format_s16; - cfg.playback.channels = 1; + cfg.playback.format = ma_format_f32; + cfg.playback.channels = 2; cfg.sampleRate = 32000; cfg.dataCallback = audio_callback; @@ -30,4 +22,7 @@ void audio_init() { } void audio_update() {} -void audio_shutdown() { ma_device_uninit(&device); } +void audio_shutdown() { + synth_shutdown(); + ma_device_uninit(&device); +} diff --git a/src/audio/dct.h b/src/audio/dct.h new file mode 100644 index 0000000..3e51884 --- /dev/null +++ b/src/audio/dct.h @@ -0,0 +1,6 @@ +#pragma once + +#define DCT_SIZE 512 + +void fdct_512(const float input[DCT_SIZE], float output[DCT_SIZE]); +void idct_512(const float input[DCT_SIZE], float output[DCT_SIZE]); diff --git a/src/audio/fdct.cpp b/src/audio/fdct.cpp new file mode 100644 index 0000000..50ab458 --- /dev/null +++ b/src/audio/fdct.cpp @@ -0,0 +1,17 @@ +#include "dct.h" +#include "util/math.h" +#include + +void fdct_512(const float input[DCT_SIZE], float output[DCT_SIZE]) { + float scale_k0 = sqrtf(1.0f / DCT_SIZE); + float scale_kn = sqrtf(2.0f / DCT_SIZE); + + for (int k = 0; k < DCT_SIZE; ++k) { + float sum = 0.0f; + for (int n = 0; n < DCT_SIZE; ++n) { + sum += input[n] * cosf((PI / DCT_SIZE) * (n + 0.5f) * k); + } + float scale = (k == 0) ? scale_k0 : scale_kn; + output[k] = sum * scale; + } +} diff --git a/src/audio/idct.cpp b/src/audio/idct.cpp new file mode 100644 index 0000000..a32f92e --- /dev/null +++ b/src/audio/idct.cpp @@ -0,0 +1,17 @@ +#include "dct.h" +#include "util/math.h" +#include + +void idct_512(const float input[DCT_SIZE], float output[DCT_SIZE]) { + float scale_k0 = sqrtf(1.0f / DCT_SIZE); + float scale_kn = sqrtf(2.0f / DCT_SIZE); + + for (int n = 0; n < DCT_SIZE; ++n) { + float sum = 0.0f; + for (int k = 0; k < DCT_SIZE; ++k) { + float scale = (k == 0) ? scale_k0 : scale_kn; + sum += scale * input[k] * cosf((PI / DCT_SIZE) * (n + 0.5f) * k); + } + output[n] = sum; + } +} diff --git a/src/audio/synth.cpp b/src/audio/synth.cpp new file mode 100644 index 0000000..f009876 --- /dev/null +++ b/src/audio/synth.cpp @@ -0,0 +1,157 @@ +#include "synth.h" +#include "audio/window.h" +#include // For memset +#include + +struct Voice { + bool active; + int spectrogram_id; + float volume; + float pan_left; + float pan_right; + + int current_spectral_frame; + int total_spectral_frames; + + float time_domain_buffer[DCT_SIZE]; + int buffer_pos; + + const volatile float* active_spectral_data; +}; + +static struct { + Spectrogram spectrograms[MAX_SPECTROGRAMS]; + const volatile float* active_spectrogram_data[MAX_SPECTROGRAMS]; + bool spectrogram_registered[MAX_SPECTROGRAMS]; +} g_synth_data; + +static Voice g_voices[MAX_VOICES]; + +void synth_init() { + memset(&g_synth_data, 0, sizeof(g_synth_data)); + memset(g_voices, 0, sizeof(g_voices)); +} + +void synth_shutdown() { + // Nothing to do here since we are not allocating memory +} + +int synth_register_spectrogram(const Spectrogram* spec) { + for (int i = 0; i < MAX_SPECTROGRAMS; ++i) { + if (!g_synth_data.spectrogram_registered[i]) { + g_synth_data.spectrograms[i] = *spec; + g_synth_data.active_spectrogram_data[i] = spec->spectral_data_a; + g_synth_data.spectrogram_registered[i] = true; + return i; + } + } + return -1; // No free slots +} + +void synth_unregister_spectrogram(int spectrogram_id) { + if (spectrogram_id >= 0 && spectrogram_id < MAX_SPECTROGRAMS) { + g_synth_data.spectrogram_registered[spectrogram_id] = false; + } +} + +float* synth_begin_update(int spectrogram_id) { + if (spectrogram_id < 0 || spectrogram_id >= MAX_SPECTROGRAMS || !g_synth_data.spectrogram_registered[spectrogram_id]) { + return nullptr; + } + + const volatile float* active_ptr = g_synth_data.active_spectrogram_data[spectrogram_id]; + + if (active_ptr == g_synth_data.spectrograms[spectrogram_id].spectral_data_a) { + return g_synth_data.spectrograms[spectrogram_id].spectral_data_b; + } else { + return g_synth_data.spectrograms[spectrogram_id].spectral_data_a; + } +} + +void synth_commit_update(int spectrogram_id) { + if (spectrogram_id < 0 || spectrogram_id >= MAX_SPECTROGRAMS || !g_synth_data.spectrogram_registered[spectrogram_id]) { + return; + } + + const volatile float* old_active_ptr = g_synth_data.active_spectrogram_data[spectrogram_id]; + const float* new_active_ptr = (old_active_ptr == g_synth_data.spectrograms[spectrogram_id].spectral_data_a) + ? g_synth_data.spectrograms[spectrogram_id].spectral_data_b + : g_synth_data.spectrograms[spectrogram_id].spectral_data_a; + + // Atomic swap using GCC/Clang builtins for thread safety + __atomic_store_n((const float**)&g_synth_data.active_spectrogram_data[spectrogram_id], new_active_ptr, __ATOMIC_RELEASE); +} + + +void synth_trigger_voice(int spectrogram_id, float volume, float pan) { + if (spectrogram_id < 0 || spectrogram_id >= MAX_SPECTROGRAMS || !g_synth_data.spectrogram_registered[spectrogram_id]) { + return; + } + + for (int i = 0; i < MAX_VOICES; ++i) { + if (!g_voices[i].active) { + Voice& v = g_voices[i]; + v.active = true; + v.spectrogram_id = spectrogram_id; + v.volume = volume; + + // Simple linear panning + v.pan_left = (pan > 0.0f) ? (1.0f - pan) : 1.0f; + v.pan_right = (pan < 0.0f) ? (1.0f + pan) : 1.0f; + + v.current_spectral_frame = 0; + v.total_spectral_frames = g_synth_data.spectrograms[spectrogram_id].num_frames; + v.buffer_pos = DCT_SIZE; // Force IDCT on first render + v.active_spectral_data = g_synth_data.active_spectrogram_data[spectrogram_id]; + + return; // Voice triggered + } + } +} + + +void synth_render(float* output_buffer, int num_frames) { + float window[WINDOW_SIZE]; + hamming_window_512(window); + + for (int i = 0; i < num_frames; ++i) { + float left_sample = 0.0f; + float right_sample = 0.0f; + + for (int v_idx = 0; v_idx < MAX_VOICES; ++v_idx) { + Voice& v = g_voices[v_idx]; + if (!v.active) continue; + + if (v.buffer_pos >= DCT_SIZE) { + if (v.current_spectral_frame >= v.total_spectral_frames) { + v.active = false; + continue; + } + + // Fetch the latest active spectrogram pointer for this voice + v.active_spectral_data = g_synth_data.active_spectrogram_data[v.spectrogram_id]; + + const float* spectral_frame = (const float*)v.active_spectral_data + (v.current_spectral_frame * DCT_SIZE); + + float windowed_frame[DCT_SIZE]; + for(int j=0; j + +void hamming_window_512(float window[WINDOW_SIZE]) { + for (int i = 0; i < WINDOW_SIZE; ++i) { + window[i] = 0.54f - 0.46f * cosf(2.0f * PI * i / (WINDOW_SIZE - 1)); + } +} diff --git a/src/audio/window.h b/src/audio/window.h new file mode 100644 index 0000000..8cb5dd8 --- /dev/null +++ b/src/audio/window.h @@ -0,0 +1,5 @@ +#pragma once + +#define WINDOW_SIZE 512 + +void hamming_window_512(float window[WINDOW_SIZE]); -- cgit v1.2.3