From 2f8926f433248af28081497e8371e02abe61d6ff Mon Sep 17 00:00:00 2001
From: skal <pascal.massimino@gmail.com>
Date: Thu, 5 Mar 2026 21:50:53 +0100
Subject: feat(spectool): add --wav decode, IMDCT, and roundtrip test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- spectool --wav <input.spec> <output.wav>: decodes .spec to mono 16-bit
  WAV at 32 kHz using IDCT-OLA synthesis (no synthesis window).
  The analysis Hann window at 50% overlap satisfies w[n]+w[n+H]=1,
  so the synthesis window must be rectangular for perfect reconstruction.

- Add imdct_512 / imdct_fft to audio lib (fft.cc, fft.h, idct.cc, dct.h)
  for future MDCT-based synthesis.

- test_wav_roundtrip: in-process OLA analyze+decode SNR test (≥30 dB).
  Currently measures 53 dB on a 440 Hz sine.

- Fix stale test_spectool.cc: version assertion updated from 1 to
  SPEC_VERSION_V2_OLA (was always wrong since OLA fix landed).

- Docs: TOOLS_REFERENCE.md removes dead specview, documents --wav /
  --normalize / test_gen. HOWTO.md adds decode section. TRACKER.md
  notes spec v2 OLA format and decode command.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/audio/dct.h   |  3 +++
 src/audio/fft.cc  | 43 +++++++++++++++++++++++++++++++++++++++++++
 src/audio/fft.h   |  7 +++++++
 src/audio/idct.cc |  4 ++++
 4 files changed, 57 insertions(+)

(limited to 'src/audio')

diff --git a/src/audio/dct.h b/src/audio/dct.h
index ec9f651..ca423c2 100644
--- a/src/audio/dct.h
+++ b/src/audio/dct.h
@@ -11,3 +11,6 @@
 // Forward declarations
 void fdct_512(const float* input, float* output);
 void idct_512(const float* input, float* output);
+// IMDCT: N=512 coefficients -> 2*DCT_SIZE=1024 samples.
+// Window with Hann(1024) and OLA with hop DCT_SIZE for perfect reconstruction.
+void imdct_512(const float* input, float* output);
diff --git a/src/audio/fft.cc b/src/audio/fft.cc
index 64d7b1a..6029454 100644
--- a/src/audio/fft.cc
+++ b/src/audio/fft.cc
@@ -140,6 +140,49 @@ void dct_fft(const float* input, float* output, size_t N) {
   delete[] imag;
 }
 
+// IMDCT via FFT
+// Produces 2N time-domain samples from N MDCT coefficients.
+// Formula: x[n] = (2/N) * sum_{k=0}^{N-1} X[k] * cos(pi*(2n+1+N)*(2k+1)/(2N))
+// When windowed (Hann, length 2N) and OLA'd with hop N, gives perfect reconstruction.
+void imdct_fft(const float* input, float* output, size_t N) {
+  const float PI = 3.14159265358979323846f;
+  const size_t M = 2 * N; // output length
+
+  float* real = new float[M];
+  float* imag = new float[M];
+
+  // Pre-multiply X[k] by exp(-j*pi*(2k+1)/(4N)), build 2N complex FFT input
+  // via standard IMDCT-via-FFT algorithm (N-point complex FFT)
+  for (size_t k = 0; k < N; k++) {
+    const float angle = -PI * (2.0f * k + 1.0f) / (4.0f * N);
+    real[k] = input[k] * cosf(angle);
+    imag[k] = input[k] * sinf(angle);
+  }
+  for (size_t k = N; k < M; k++) {
+    real[k] = 0.0f;
+    imag[k] = 0.0f;
+  }
+
+  // Inverse FFT of length 2N
+  bit_reverse_permute(real, imag, M);
+  fft_radix2(real, imag, M, -1);
+  const float scale = 1.0f / (float)M;
+  for (size_t i = 0; i < M; i++) {
+    real[i] *= scale;
+    imag[i] *= scale;
+  }
+
+  // Post-multiply by 2N * exp(-j*pi*(2n+1)/(4N)) and take real part, scale by 2/N
+  const float gain = 2.0f;
+  for (size_t n = 0; n < M; n++) {
+    const float angle = -PI * (2.0f * n + 1.0f) / (4.0f * N);
+    output[n] = gain * (real[n] * cosf(angle) - imag[n] * sinf(angle));
+  }
+
+  delete[] real;
+  delete[] imag;
+}
+
 // IDCT (DCT-III) via FFT - inverse of the DCT-II reordering method
 // Reference: Numerical Recipes Chapter 12.3, Press et al.
 void idct_fft(const float* input, float* output, size_t N) {
diff --git a/src/audio/fft.h b/src/audio/fft.h
index 8c10afd..df37ad5 100644
--- a/src/audio/fft.h
+++ b/src/audio/fft.h
@@ -32,4 +32,11 @@ void dct_fft(const float* input, float* output, size_t N);
 // N must be a power of 2
 void idct_fft(const float* input, float* output, size_t N);
 
+// IMDCT via FFT
+// Input: input[] (length N) — MDCT coefficients
+// Output: output[] (length 2*N) — time-domain samples
+// Window output with Hann(2N) and OLA with hop N for perfect reconstruction.
+// N must be a power of 2
+void imdct_fft(const float* input, float* output, size_t N);
+
 #endif /* AUDIO_FFT_H_ */
diff --git a/src/audio/idct.cc b/src/audio/idct.cc
index 4566f99..7a3c489 100644
--- a/src/audio/idct.cc
+++ b/src/audio/idct.cc
@@ -9,3 +9,7 @@
 void idct_512(const float* input, float* output) {
   idct_fft(input, output, DCT_SIZE);
 }
+
+void imdct_512(const float* input, float* output) {
+  imdct_fft(input, output, DCT_SIZE);
+}
-- 
cgit v1.2.3