diff options
Diffstat (limited to 'tools/spectool.cc')
| -rw-r--r-- | tools/spectool.cc | 114 |
1 files changed, 114 insertions, 0 deletions
diff --git a/tools/spectool.cc b/tools/spectool.cc index 70bcae2..59a56b5 100644 --- a/tools/spectool.cc +++ b/tools/spectool.cc @@ -217,6 +217,110 @@ int analyze_audio(const char* in_path, const char* out_path, bool normalize, return 0; } +static void write_wav_header(FILE* f, uint32_t num_samples, uint32_t sample_rate) { + const uint16_t num_channels = 1; + const uint16_t bits_per_sample = 16; + const uint32_t data_size = num_samples * num_channels * (bits_per_sample / 8); + const uint32_t byte_rate = sample_rate * num_channels * (bits_per_sample / 8); + const uint16_t block_align = num_channels * (bits_per_sample / 8); + const uint32_t riff_size = 36 + data_size; + + fwrite("RIFF", 1, 4, f); + fwrite(&riff_size, 4, 1, f); + fwrite("WAVE", 1, 4, f); + fwrite("fmt ", 1, 4, f); + const uint32_t fmt_size = 16; + fwrite(&fmt_size, 4, 1, f); + const uint16_t audio_format = 1; // PCM + fwrite(&audio_format, 2, 1, f); + fwrite(&num_channels, 2, 1, f); + fwrite(&sample_rate, 4, 1, f); + fwrite(&byte_rate, 4, 1, f); + fwrite(&block_align, 2, 1, f); + fwrite(&bits_per_sample, 2, 1, f); + fwrite("data", 1, 4, f); + fwrite(&data_size, 4, 1, f); +} + +int decode_to_wav(const char* in_path, const char* out_path) { + printf("Decoding %s -> %s\n", in_path, out_path); + + FILE* f_in = fopen(in_path, "rb"); + if (!f_in) { + printf("Error: Failed to open input file: %s\n", in_path); + return 1; + } + + SpecHeader header; + if (fread(&header, sizeof(SpecHeader), 1, f_in) != 1 || + strncmp(header.magic, "SPEC", 4) != 0) { + printf("Error: Invalid spectrogram file format.\n"); + fclose(f_in); + return 1; + } + + std::vector<float> spec_data(header.num_frames * header.dct_size); + fread(spec_data.data(), sizeof(float), spec_data.size(), f_in); + fclose(f_in); + + const bool ola_mode = (header.version == SPEC_VERSION_V2_OLA); + const uint32_t sample_rate = 32000; + + std::vector<float> pcm; + + if (ola_mode) { + // IDCT + OLA (no synthesis window). + // Analysis: Hann * FDCT. Since Hann at 50% overlap satisfies + // w[n] + w[n+HOP] = 1, a rectangular synthesis window gives + // perfect reconstruction: output[n] = IDCT(X_k)[j] + IDCT(X_{k-1})[j+HOP] + // = x[n]*w[j] + x[n]*w[j+HOP] = x[n]. + const uint32_t total_samples = (uint32_t)header.num_frames * OLA_HOP_SIZE; + pcm.assign(total_samples + OLA_OVERLAP, 0.0f); + + float overlap[OLA_OVERLAP] = {}; + for (int f = 0; f < header.num_frames; ++f) { + float tmp[DCT_SIZE]; + idct_512(spec_data.data() + f * DCT_SIZE, tmp); + // First half: output samples for this frame + for (int j = 0; j < OLA_HOP_SIZE; ++j) + pcm[f * OLA_HOP_SIZE + j] = tmp[j] + overlap[j]; + // Second half: save as overlap for next frame + for (int j = 0; j < OLA_OVERLAP; ++j) + overlap[j] = tmp[OLA_HOP_SIZE + j]; + } + pcm.resize(total_samples); + } else { + const uint32_t total_samples = (uint32_t)header.num_frames * DCT_SIZE; + pcm.resize(total_samples); + for (int f = 0; f < header.num_frames; ++f) { + idct_512(spec_data.data() + f * DCT_SIZE, pcm.data() + f * DCT_SIZE); + } + } + + // Write WAV + FILE* f_out = fopen(out_path, "wb"); + if (!f_out) { + printf("Error: Failed to open output file: %s\n", out_path); + return 1; + } + + const uint32_t out_samples = (uint32_t)pcm.size(); + write_wav_header(f_out, out_samples, sample_rate); + + for (uint32_t i = 0; i < out_samples; ++i) { + float s = pcm[i]; + if (s > 1.0f) s = 1.0f; + if (s < -1.0f) s = -1.0f; + int16_t sample = (int16_t)(s * 32767.0f); + fwrite(&sample, sizeof(int16_t), 1, f_out); + } + fclose(f_out); + + printf("Decoded %d frames (%u samples) at %u Hz.\n", + header.num_frames, out_samples, sample_rate); + return 0; +} + int play_spec(const char* in_path) { printf("Playing %s\n", in_path); @@ -334,6 +438,9 @@ void print_usage() { printf( " test_gen <output.spec> Generate a test " "spectrogram.\n"); + printf( + " --wav <input.spec> <output.wav> Decode spectrogram to mono " + "WAV.\n"); printf("\nOptions for 'analyze':\n"); printf( " --normalize [rms] Normalize audio to target RMS level (default: " @@ -392,6 +499,13 @@ int main(int argc, char** argv) { return 1; } return test_gen(argv[2]); + } else if (strcmp(command, "--wav") == 0) { + if (argc < 4) { + printf("Error: '--wav' requires input .spec and output .wav files.\n"); + print_usage(); + return 1; + } + return decode_to_wav(argv[2], argv[3]); } else { printf("Error: Unknown command '%s'\n", command); print_usage(); |
