From 2f8926f433248af28081497e8371e02abe61d6ff Mon Sep 17 00:00:00 2001 From: skal Date: Thu, 5 Mar 2026 21:50:53 +0100 Subject: feat(spectool): add --wav decode, IMDCT, and roundtrip test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - spectool --wav : decodes .spec to mono 16-bit WAV at 32 kHz using IDCT-OLA synthesis (no synthesis window). The analysis Hann window at 50% overlap satisfies w[n]+w[n+H]=1, so the synthesis window must be rectangular for perfect reconstruction. - Add imdct_512 / imdct_fft to audio lib (fft.cc, fft.h, idct.cc, dct.h) for future MDCT-based synthesis. - test_wav_roundtrip: in-process OLA analyze+decode SNR test (≥30 dB). Currently measures 53 dB on a 440 Hz sine. - Fix stale test_spectool.cc: version assertion updated from 1 to SPEC_VERSION_V2_OLA (was always wrong since OLA fix landed). - Docs: TOOLS_REFERENCE.md removes dead specview, documents --wav / --normalize / test_gen. HOWTO.md adds decode section. TRACKER.md notes spec v2 OLA format and decode command. Co-Authored-By: Claude Sonnet 4.6 --- src/audio/dct.h | 3 +++ src/audio/fft.cc | 43 +++++++++++++++++++++++++++++++++++++++++++ src/audio/fft.h | 7 +++++++ src/audio/idct.cc | 4 ++++ 4 files changed, 57 insertions(+) (limited to 'src/audio') diff --git a/src/audio/dct.h b/src/audio/dct.h index ec9f651..ca423c2 100644 --- a/src/audio/dct.h +++ b/src/audio/dct.h @@ -11,3 +11,6 @@ // Forward declarations void fdct_512(const float* input, float* output); void idct_512(const float* input, float* output); +// IMDCT: N=512 coefficients -> 2*DCT_SIZE=1024 samples. +// Window with Hann(1024) and OLA with hop DCT_SIZE for perfect reconstruction. +void imdct_512(const float* input, float* output); diff --git a/src/audio/fft.cc b/src/audio/fft.cc index 64d7b1a..6029454 100644 --- a/src/audio/fft.cc +++ b/src/audio/fft.cc @@ -140,6 +140,49 @@ void dct_fft(const float* input, float* output, size_t N) { delete[] imag; } +// IMDCT via FFT +// Produces 2N time-domain samples from N MDCT coefficients. +// Formula: x[n] = (2/N) * sum_{k=0}^{N-1} X[k] * cos(pi*(2n+1+N)*(2k+1)/(2N)) +// When windowed (Hann, length 2N) and OLA'd with hop N, gives perfect reconstruction. +void imdct_fft(const float* input, float* output, size_t N) { + const float PI = 3.14159265358979323846f; + const size_t M = 2 * N; // output length + + float* real = new float[M]; + float* imag = new float[M]; + + // Pre-multiply X[k] by exp(-j*pi*(2k+1)/(4N)), build 2N complex FFT input + // via standard IMDCT-via-FFT algorithm (N-point complex FFT) + for (size_t k = 0; k < N; k++) { + const float angle = -PI * (2.0f * k + 1.0f) / (4.0f * N); + real[k] = input[k] * cosf(angle); + imag[k] = input[k] * sinf(angle); + } + for (size_t k = N; k < M; k++) { + real[k] = 0.0f; + imag[k] = 0.0f; + } + + // Inverse FFT of length 2N + bit_reverse_permute(real, imag, M); + fft_radix2(real, imag, M, -1); + const float scale = 1.0f / (float)M; + for (size_t i = 0; i < M; i++) { + real[i] *= scale; + imag[i] *= scale; + } + + // Post-multiply by 2N * exp(-j*pi*(2n+1)/(4N)) and take real part, scale by 2/N + const float gain = 2.0f; + for (size_t n = 0; n < M; n++) { + const float angle = -PI * (2.0f * n + 1.0f) / (4.0f * N); + output[n] = gain * (real[n] * cosf(angle) - imag[n] * sinf(angle)); + } + + delete[] real; + delete[] imag; +} + // IDCT (DCT-III) via FFT - inverse of the DCT-II reordering method // Reference: Numerical Recipes Chapter 12.3, Press et al. void idct_fft(const float* input, float* output, size_t N) { diff --git a/src/audio/fft.h b/src/audio/fft.h index 8c10afd..df37ad5 100644 --- a/src/audio/fft.h +++ b/src/audio/fft.h @@ -32,4 +32,11 @@ void dct_fft(const float* input, float* output, size_t N); // N must be a power of 2 void idct_fft(const float* input, float* output, size_t N); +// IMDCT via FFT +// Input: input[] (length N) — MDCT coefficients +// Output: output[] (length 2*N) — time-domain samples +// Window output with Hann(2N) and OLA with hop N for perfect reconstruction. +// N must be a power of 2 +void imdct_fft(const float* input, float* output, size_t N); + #endif /* AUDIO_FFT_H_ */ diff --git a/src/audio/idct.cc b/src/audio/idct.cc index 4566f99..7a3c489 100644 --- a/src/audio/idct.cc +++ b/src/audio/idct.cc @@ -9,3 +9,7 @@ void idct_512(const float* input, float* output) { idct_fft(input, output, DCT_SIZE); } + +void imdct_512(const float* input, float* output) { + imdct_fft(input, output, DCT_SIZE); +} -- cgit v1.2.3