diff options
| author | skal <pascal.massimino@gmail.com> | 2026-03-02 01:40:38 +0100 |
|---|---|---|
| committer | skal <pascal.massimino@gmail.com> | 2026-03-02 01:40:38 +0100 |
| commit | a94ec07cdaa53c0b11aa1610d40dd1a96efc0e39 (patch) | |
| tree | 6a5a54e9ad99c13ef2093cf3ed7acf696cd96547 /src | |
| parent | 2859c082179e19f0076a699174f7fa588234e465 (diff) | |
feat(audio): MP3 asset support in tracker via decode-at-init
Detect MP3 blobs by magic bytes in tracker_init(), decode to spectrogram
(hamming window + FDCT) using new mp3_decode(), and register with synth
exactly like .spec assets. STRIP_ALL builds guard with FATAL_CHECK.
handoff(Gemini): MP3 assets now usable in music.track with SAMPLE ASSET_*
syntax; see doc/TRACKER.md for usage. No synth/compiler/packer changes.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Diffstat (limited to 'src')
| -rw-r--r-- | src/audio/mp3_sample.cc | 7 | ||||
| -rw-r--r-- | src/audio/mp3_sample.h | 4 | ||||
| -rw-r--r-- | src/audio/tracker.cc | 77 |
3 files changed, 88 insertions, 0 deletions
diff --git a/src/audio/mp3_sample.cc b/src/audio/mp3_sample.cc index 2036acd..028fbff 100644 --- a/src/audio/mp3_sample.cc +++ b/src/audio/mp3_sample.cc @@ -37,6 +37,13 @@ int mp3_decode_range(Mp3Decoder* dec, int start_frame, int num_frames, return (int)frames_read; } +int mp3_decode(Mp3Decoder* dec, int num_frames, float* out) { + ma_uint64 frames_read = 0; + ma_decoder_read_pcm_frames(&dec->dec, out, (ma_uint64)num_frames, + &frames_read); + return (int)frames_read; +} + void mp3_close(Mp3Decoder* dec) { if (!dec) return; ma_decoder_uninit(&dec->dec); diff --git a/src/audio/mp3_sample.h b/src/audio/mp3_sample.h index e8229f2..e7e759e 100644 --- a/src/audio/mp3_sample.h +++ b/src/audio/mp3_sample.h @@ -24,6 +24,10 @@ Mp3Decoder* mp3_open(const uint8_t* data, size_t size); int mp3_decode_range(Mp3Decoder* dec, int start_frame, int num_frames, float* out); +// Read up to |num_frames| f32 mono samples sequentially from current position. +// Returns frames decoded (< num_frames means end of stream). +int mp3_decode(Mp3Decoder* dec, int num_frames, float* out); + // Release the decoder. void mp3_close(Mp3Decoder* dec); diff --git a/src/audio/tracker.cc b/src/audio/tracker.cc index 9446c6f..59801a8 100644 --- a/src/audio/tracker.cc +++ b/src/audio/tracker.cc @@ -8,6 +8,12 @@ #include <random> #include <vector> +#if !defined(STRIP_ALL) +#include "audio/dct.h" +#include "audio/mp3_sample.h" +#include "audio/window.h" +#endif // !defined(STRIP_ALL) + static uint32_t g_last_trigger_idx = 0; // Active pattern instance tracking @@ -38,6 +44,56 @@ static bool g_cache_initialized = false; // Forward declarations static int get_free_pool_slot(); +// Returns true if the asset blob looks like an MP3 file. +static bool is_mp3_asset(const uint8_t* data, size_t size) { + if (!data || size < 3) return false; + // ID3v2 tag prefix + if (data[0] == 'I' && data[1] == 'D' && data[2] == '3') return true; + // Raw MP3 sync word: 0xFF followed by 0xE0-0xFF + if (size >= 2 && data[0] == 0xFF && (data[1] & 0xE0) == 0xE0) return true; + return false; +} + +#if !defined(STRIP_ALL) +// Decode an in-memory MP3 blob to a heap-allocated spectrogram (caller owns). +// Returns nullptr on error. Sets *out_num_frames to frame count. +static float* convert_mp3_to_spectrogram(const uint8_t* data, size_t size, + int* out_num_frames) { + *out_num_frames = 0; + Mp3Decoder* dec = mp3_open(data, size); + if (!dec) return nullptr; + + float window[DCT_SIZE]; + hamming_window_512(window); + + std::vector<float> spec_data; + float pcm_chunk[DCT_SIZE]; + + for (;;) { + const int decoded = mp3_decode(dec, DCT_SIZE, pcm_chunk); + if (decoded == 0) break; + if (decoded < DCT_SIZE) { + memset(pcm_chunk + decoded, 0, (DCT_SIZE - decoded) * sizeof(float)); + } + for (int i = 0; i < DCT_SIZE; ++i) { + pcm_chunk[i] *= window[i]; + } + float dct_chunk[DCT_SIZE]; + fdct_512(pcm_chunk, dct_chunk); + spec_data.insert(spec_data.end(), dct_chunk, dct_chunk + DCT_SIZE); + } + + mp3_close(dec); + if (spec_data.empty()) return nullptr; + + const int num_frames = (int)(spec_data.size() / DCT_SIZE); + float* result = new float[spec_data.size()]; + memcpy(result, spec_data.data(), spec_data.size() * sizeof(float)); + *out_num_frames = num_frames; + return result; +} +#endif // !defined(STRIP_ALL) + void tracker_init() { g_last_trigger_idx = 0; g_next_pool_slot = 0; @@ -79,6 +135,27 @@ void tracker_init() { // ASSET sample: Load once and cache size_t size; const uint8_t* data = GetAsset(aid, &size); +#if !defined(STRIP_ALL) + if (data && size > 0 && is_mp3_asset(data, size)) { + int num_frames = 0; + float* spec_data = + convert_mp3_to_spectrogram(data, size, &num_frames); + if (spec_data && num_frames > 0) { + const int slot = get_free_pool_slot(); + g_spec_pool[slot].data = spec_data; + g_spec_pool[slot].active = true; + Spectrogram spec; + spec.spectral_data_a = spec_data; + spec.spectral_data_b = spec_data; + spec.num_frames = num_frames; + g_sample_synth_cache[sid] = synth_register_spectrogram(&spec); + g_spec_pool[slot].synth_id = g_sample_synth_cache[sid]; + } + } else +#else + FATAL_CHECK(data == nullptr || !is_mp3_asset(data, size), + "MP3 assets not supported in STRIP_ALL builds\n"); +#endif if (data && size >= sizeof(SpecHeader)) { const SpecHeader* header = (const SpecHeader*)data; const int note_frames = header->num_frames; |
