From 2f8926f433248af28081497e8371e02abe61d6ff Mon Sep 17 00:00:00 2001 From: skal Date: Thu, 5 Mar 2026 21:50:53 +0100 Subject: feat(spectool): add --wav decode, IMDCT, and roundtrip test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - spectool --wav : decodes .spec to mono 16-bit WAV at 32 kHz using IDCT-OLA synthesis (no synthesis window). The analysis Hann window at 50% overlap satisfies w[n]+w[n+H]=1, so the synthesis window must be rectangular for perfect reconstruction. - Add imdct_512 / imdct_fft to audio lib (fft.cc, fft.h, idct.cc, dct.h) for future MDCT-based synthesis. - test_wav_roundtrip: in-process OLA analyze+decode SNR test (≥30 dB). Currently measures 53 dB on a 440 Hz sine. - Fix stale test_spectool.cc: version assertion updated from 1 to SPEC_VERSION_V2_OLA (was always wrong since OLA fix landed). - Docs: TOOLS_REFERENCE.md removes dead specview, documents --wav / --normalize / test_gen. HOWTO.md adds decode section. TRACKER.md notes spec v2 OLA format and decode command. Co-Authored-By: Claude Sonnet 4.6 --- src/tests/audio/test_wav_roundtrip.cc | 101 ++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 src/tests/audio/test_wav_roundtrip.cc (limited to 'src/tests/audio/test_wav_roundtrip.cc') diff --git a/src/tests/audio/test_wav_roundtrip.cc b/src/tests/audio/test_wav_roundtrip.cc new file mode 100644 index 0000000..6294d6d --- /dev/null +++ b/src/tests/audio/test_wav_roundtrip.cc @@ -0,0 +1,101 @@ +// Tests the wav->spec->wav roundtrip SNR. +// Generates a sine wave, runs OLA-DCT analysis then IMDCT-OLA synthesis, +// and asserts the reconstruction SNR exceeds the threshold. + +#include "audio/dct.h" +#include "audio/window.h" +#include +#include +#include +#include + +static const int SAMPLE_RATE = 32000; +static const float PI = 3.14159265358979323846f; + +// Replicate analyze_audio OLA pass (Hann + FDCT, hop = OLA_HOP_SIZE) +static std::vector ola_analyze(const std::vector& pcm) { + float win[DCT_SIZE]; + hann_window_512(win); + + const int hop = OLA_HOP_SIZE; + const int n_pcm = (int)pcm.size(); + const int num_frames = (n_pcm > DCT_SIZE) ? (n_pcm - DCT_SIZE) / hop + 1 : 1; + + std::vector spec(num_frames * DCT_SIZE); + float chunk[DCT_SIZE]; + + for (int f = 0; f < num_frames; ++f) { + const int start = f * hop; + const int avail = (start + DCT_SIZE <= n_pcm) ? DCT_SIZE : n_pcm - start; + for (int i = 0; i < avail; ++i) chunk[i] = pcm[start + i] * win[i]; + for (int i = avail; i < DCT_SIZE; ++i) chunk[i] = 0.0f; + + fdct_512(chunk, spec.data() + f * DCT_SIZE); + } + return spec; +} + +// IDCT + OLA synthesis (no synthesis window) matching decode_to_wav. +// Analysis used Hann; since Hann satisfies w[n]+w[n+H]=1 at 50% overlap, +// skipping the synthesis window gives perfect reconstruction. +static std::vector ola_decode(const std::vector& spec, + int num_frames) { + std::vector pcm(num_frames * OLA_HOP_SIZE + OLA_OVERLAP, 0.0f); + float overlap[OLA_OVERLAP] = {}; + float tmp[DCT_SIZE]; + + for (int f = 0; f < num_frames; ++f) { + idct_512(spec.data() + f * DCT_SIZE, tmp); + for (int j = 0; j < OLA_HOP_SIZE; ++j) + pcm[f * OLA_HOP_SIZE + j] = tmp[j] + overlap[j]; + for (int j = 0; j < OLA_OVERLAP; ++j) + overlap[j] = tmp[OLA_HOP_SIZE + j]; + } + pcm.resize(num_frames * OLA_HOP_SIZE); + return pcm; +} + +static float compute_snr_db(const std::vector& ref, + const std::vector& out, + int skip_samples) { + const int n = (int)std::min(ref.size(), out.size()); + double sig = 0.0, noise = 0.0; + for (int i = skip_samples; i < n; ++i) { + sig += (double)ref[i] * ref[i]; + double e = ref[i] - out[i]; + noise += e * e; + } + if (noise < 1e-30) return 999.0f; + return 10.0f * (float)log10(sig / noise); +} + +int main() { + printf("Running WAV roundtrip test...\n"); + + // 1-second 440 Hz sine at 32 kHz + const int n_samples = SAMPLE_RATE; + std::vector input(n_samples); + for (int i = 0; i < n_samples; ++i) + input[i] = 0.5f * sinf(2.0f * PI * 440.0f * i / SAMPLE_RATE); + + // Analyze + std::vector spec = ola_analyze(input); + const int num_frames = (int)(spec.size() / DCT_SIZE); + + // Decode with IDCT-OLA (no synthesis window) + std::vector output = ola_decode(spec, num_frames); + + // SNR — skip first DCT_SIZE samples (ramp-up transient) + const float snr = compute_snr_db(input, output, DCT_SIZE); + printf("Roundtrip SNR: %.1f dB (frames=%d, out_samples=%zu)\n", + snr, num_frames, output.size()); + + const float MIN_SNR_DB = 30.0f; + if (snr < MIN_SNR_DB) { + printf("FAIL: SNR %.1f dB < %.0f dB threshold\n", snr, MIN_SNR_DB); + return 1; + } + + printf("PASS\n"); + return 0; +} -- cgit v1.2.3