feat(spectool): add --wav decode, IMDCT, and roundtrip test

- spectool --wav <input.spec> <output.wav>: decodes .spec to mono 16-bit WAV at 32 kHz using IDCT-OLA synthesis (no synthesis window). The analysis Hann window at 50% overlap satisfies w[n]+w[n+H]=1, so the synthesis window must be rectangular for perfect reconstruction. - Add imdct_512 / imdct_fft to audio lib (fft.cc, fft.h, idct.cc, dct.h) for future MDCT-based synthesis. - test_wav_roundtrip: in-process OLA analyze+decode SNR test (≥30 dB). Currently measures 53 dB on a 440 Hz sine. - Fix stale test_spectool.cc: version assertion updated from 1 to SPEC_VERSION_V2_OLA (was always wrong since OLA fix landed). - Docs: TOOLS_REFERENCE.md removes dead specview, documents --wav / --normalize / test_gen. HOWTO.md adds decode section. TRACKER.md notes spec v2 OLA format and decode command. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
author: skal <pascal.massimino@gmail.com> 2026-03-05 21:50:53 +0100
committer: skal <pascal.massimino@gmail.com> 2026-03-05 21:50:53 +0100
commit: 2f8926f433248af28081497e8371e02abe61d6ff (patch)
tree: 30e480325e2b7f01947a5ca2f8b3865e600d8bb7 /tools/spectool.cc
parent: e2c3c3e95b6a9e53b4631b271640bb9914f8c95e (diff)
1 files changed, 114 insertions, 0 deletions
diff --git a/tools/spectool.cc b/tools/spectool.cc
index 70bcae2..59a56b5 100644
--- a/tools/spectool.cc
+++ b/tools/spectool.cc
@@ -217,6 +217,110 @@ int analyze_audio(const char* in_path, const char* out_path, bool normalize,
   return 0;
 }
 
+static void write_wav_header(FILE* f, uint32_t num_samples, uint32_t sample_rate) {
+  const uint16_t num_channels = 1;
+  const uint16_t bits_per_sample = 16;
+  const uint32_t data_size = num_samples * num_channels * (bits_per_sample / 8);
+  const uint32_t byte_rate = sample_rate * num_channels * (bits_per_sample / 8);
+  const uint16_t block_align = num_channels * (bits_per_sample / 8);
+  const uint32_t riff_size = 36 + data_size;
+
+  fwrite("RIFF", 1, 4, f);
+  fwrite(&riff_size, 4, 1, f);
+  fwrite("WAVE", 1, 4, f);
+  fwrite("fmt ", 1, 4, f);
+  const uint32_t fmt_size = 16;
+  fwrite(&fmt_size, 4, 1, f);
+  const uint16_t audio_format = 1; // PCM
+  fwrite(&audio_format, 2, 1, f);
+  fwrite(&num_channels, 2, 1, f);
+  fwrite(&sample_rate, 4, 1, f);
+  fwrite(&byte_rate, 4, 1, f);
+  fwrite(&block_align, 2, 1, f);
+  fwrite(&bits_per_sample, 2, 1, f);
+  fwrite("data", 1, 4, f);
+  fwrite(&data_size, 4, 1, f);
+}
+
+int decode_to_wav(const char* in_path, const char* out_path) {
+  printf("Decoding %s -> %s\n", in_path, out_path);
+
+  FILE* f_in = fopen(in_path, "rb");
+  if (!f_in) {
+    printf("Error: Failed to open input file: %s\n", in_path);
+    return 1;
+  }
+
+  SpecHeader header;
+  if (fread(&header, sizeof(SpecHeader), 1, f_in) != 1 ||
+      strncmp(header.magic, "SPEC", 4) != 0) {
+    printf("Error: Invalid spectrogram file format.\n");
+    fclose(f_in);
+    return 1;
+  }
+
+  std::vector<float> spec_data(header.num_frames * header.dct_size);
+  fread(spec_data.data(), sizeof(float), spec_data.size(), f_in);
+  fclose(f_in);
+
+  const bool ola_mode = (header.version == SPEC_VERSION_V2_OLA);
+  const uint32_t sample_rate = 32000;
+
+  std::vector<float> pcm;
+
+  if (ola_mode) {
+    // IDCT + OLA (no synthesis window).
+    // Analysis: Hann * FDCT. Since Hann at 50% overlap satisfies
+    // w[n] + w[n+HOP] = 1, a rectangular synthesis window gives
+    // perfect reconstruction: output[n] = IDCT(X_k)[j] + IDCT(X_{k-1})[j+HOP]
+    //                                    = x[n]*w[j] + x[n]*w[j+HOP] = x[n].
+    const uint32_t total_samples = (uint32_t)header.num_frames * OLA_HOP_SIZE;
+    pcm.assign(total_samples + OLA_OVERLAP, 0.0f);
+
+    float overlap[OLA_OVERLAP] = {};
+    for (int f = 0; f < header.num_frames; ++f) {
+      float tmp[DCT_SIZE];
+      idct_512(spec_data.data() + f * DCT_SIZE, tmp);
+      // First half: output samples for this frame
+      for (int j = 0; j < OLA_HOP_SIZE; ++j)
+        pcm[f * OLA_HOP_SIZE + j] = tmp[j] + overlap[j];
+      // Second half: save as overlap for next frame
+      for (int j = 0; j < OLA_OVERLAP; ++j)
+        overlap[j] = tmp[OLA_HOP_SIZE + j];
+    }
+    pcm.resize(total_samples);
+  } else {
+    const uint32_t total_samples = (uint32_t)header.num_frames * DCT_SIZE;
+    pcm.resize(total_samples);
+    for (int f = 0; f < header.num_frames; ++f) {
+      idct_512(spec_data.data() + f * DCT_SIZE, pcm.data() + f * DCT_SIZE);
+    }
+  }
+
+  // Write WAV
+  FILE* f_out = fopen(out_path, "wb");
+  if (!f_out) {
+    printf("Error: Failed to open output file: %s\n", out_path);
+    return 1;
+  }
+
+  const uint32_t out_samples = (uint32_t)pcm.size();
+  write_wav_header(f_out, out_samples, sample_rate);
+
+  for (uint32_t i = 0; i < out_samples; ++i) {
+    float s = pcm[i];
+    if (s >  1.0f) s =  1.0f;
+    if (s < -1.0f) s = -1.0f;
+    int16_t sample = (int16_t)(s * 32767.0f);
+    fwrite(&sample, sizeof(int16_t), 1, f_out);
+  }
+  fclose(f_out);
+
+  printf("Decoded %d frames (%u samples) at %u Hz.\n",
+         header.num_frames, out_samples, sample_rate);
+  return 0;
+}
+
 int play_spec(const char* in_path) {
   printf("Playing %s\n", in_path);
 
@@ -334,6 +438,9 @@ void print_usage() {
   printf(
       "  test_gen <output.spec>                   Generate a test "
       "spectrogram.\n");
+  printf(
+      "  --wav   <input.spec> <output.wav>        Decode spectrogram to mono "
+      "WAV.\n");
   printf("\nOptions for 'analyze':\n");
   printf(
       "  --normalize [rms]   Normalize audio to target RMS level (default: "
@@ -392,6 +499,13 @@ int main(int argc, char** argv) {
       return 1;
     }
     return test_gen(argv[2]);
+  } else if (strcmp(command, "--wav") == 0) {
+    if (argc < 4) {
+      printf("Error: '--wav' requires input .spec and output .wav files.\n");
+      print_usage();
+      return 1;
+    }
+    return decode_to_wav(argv[2], argv[3]);
   } else {
     printf("Error: Unknown command '%s'\n", command);
     print_usage();
author	skal <pascal.massimino@gmail.com>	2026-03-05 21:50:53 +0100
committer	skal <pascal.massimino@gmail.com>	2026-03-05 21:50:53 +0100
commit	2f8926f433248af28081497e8371e02abe61d6ff (patch)
tree	30e480325e2b7f01947a5ca2f8b3865e600d8bb7 /tools/spectool.cc
parent	e2c3c3e95b6a9e53b4631b271640bb9914f8c95e (diff)