diff options
| author | skal <pascal.massimino@gmail.com> | 2026-02-06 18:08:06 +0100 |
|---|---|---|
| committer | skal <pascal.massimino@gmail.com> | 2026-02-06 18:08:06 +0100 |
| commit | 42390a8a28377cd25021b1647abf9dbd43d4e2c8 (patch) | |
| tree | 174f10bc635754b20764e764f1b9786f50f01f63 /tools/specplay.cc | |
| parent | 8aba6d94871315eac0153134a6c740344964d31f (diff) | |
fix(audio): Fix spectrogram amplification issue and add diagnostic tool
## Root Cause
.spec files were NOT regenerated after orthonormal DCT changes (commit d9e0da9).
They contained spectrograms from old non-orthonormal DCT (16x larger values),
but were played back with new orthonormal IDCT.
Result: 16x amplification → Peaks of 12-17x → Severe clipping/distortion
## Diagnosis Tool
Created specplay tool to analyze and play .spec/.wav files:
- Reports PCM peak and RMS values
- Detects clipping during playback
- Usage: ./build/specplay <file.spec|file.wav>
## Fixes
1. Revert accidental window.h include in synth.cc (keep no-window state)
2. Adjust gen.cc scaling from 16x to 6.4x (16/2.5) for procedural notes
3. Regenerated ALL .spec files with ./scripts/gen_spectrograms.sh
## Verified Results
Before: Peak=16.571 (KICK_3), 12.902 (SNARE_2), 14.383 (SNARE_3)
After: Peak=0.787 (BASS_GUITAR_FEEL), 0.759 (SNARE_909), 0.403 (KICK_606)
All peaks now < 1.0 (safe range)
Diffstat (limited to 'tools/specplay.cc')
| -rw-r--r-- | tools/specplay.cc | 212 |
1 files changed, 212 insertions, 0 deletions
diff --git a/tools/specplay.cc b/tools/specplay.cc new file mode 100644 index 0000000..9fa9355 --- /dev/null +++ b/tools/specplay.cc @@ -0,0 +1,212 @@ +// Standalone tool to play .spec or .wav files for debugging +// Usage: ./specplay <file.spec|file.wav> + +#include "audio/dct.h" +#include "audio/window.h" +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#define MINIAUDIO_IMPLEMENTATION +#include "miniaudio.h" + +struct PlaybackState { + float* pcm_data; + size_t num_samples; + size_t playback_pos; +}; + +void audio_callback(ma_device* device, void* output, const void* input, + ma_uint32 frame_count) { + PlaybackState* state = (PlaybackState*)device->pUserData; + float* out = (float*)output; + + for (ma_uint32 i = 0; i < frame_count; i++) { + if (state->playback_pos < state->num_samples) { + float sample = state->pcm_data[state->playback_pos++]; + // Clamp to [-1, 1] and warn if clipping + if (sample > 1.0f || sample < -1.0f) { + fprintf(stderr, "[CLIP at sample %zu: %.3f]\n", state->playback_pos - 1, + sample); + sample = (sample > 1.0f) ? 1.0f : -1.0f; + } + out[i * 2] = sample; // Left + out[i * 2 + 1] = sample; // Right (mono) + } else { + out[i * 2] = 0.0f; + out[i * 2 + 1] = 0.0f; + } + } +} + +float* load_spec(const char* path, size_t* out_num_samples) { + FILE* f = fopen(path, "rb"); + if (!f) { + fprintf(stderr, "Failed to open %s\n", path); + return nullptr; + } + + // Read SpecHeader + struct SpecHeader { + char magic[4]; + int32_t version; + int32_t dct_size; + int32_t num_frames; + }; + + SpecHeader header; + if (fread(&header, sizeof(SpecHeader), 1, f) != 1) { + fprintf(stderr, "Failed to read SpecHeader\n"); + fclose(f); + return nullptr; + } + + // Validate header + if (memcmp(header.magic, "SPEC", 4) != 0) { + fprintf(stderr, "Invalid magic bytes (expected 'SPEC')\n"); + fclose(f); + return nullptr; + } + + printf("Loading .spec: version=%d, dct_size=%d, frames=%d\n", header.version, + header.dct_size, header.num_frames); + + uint32_t num_frames = header.num_frames; + + // Read spectral data + size_t spec_size = num_frames * DCT_SIZE; + float* spec_data = (float*)malloc(spec_size * sizeof(float)); + if (fread(spec_data, sizeof(float), spec_size, f) != spec_size) { + fprintf(stderr, "Failed to read spectral data\n"); + free(spec_data); + fclose(f); + return nullptr; + } + fclose(f); + + // Convert to PCM via IDCT + *out_num_samples = spec_size; + float* pcm_data = (float*)malloc(*out_num_samples * sizeof(float)); + + for (uint32_t frame = 0; frame < num_frames; frame++) { + const float* spectral_frame = spec_data + (frame * DCT_SIZE); + float* time_frame = pcm_data + (frame * DCT_SIZE); + idct_512(spectral_frame, time_frame); + } + + free(spec_data); + + // Analyze PCM statistics + float peak = 0.0f, rms_sum = 0.0f; + for (size_t i = 0; i < *out_num_samples; i++) { + float abs_val = fabsf(pcm_data[i]); + if (abs_val > peak) + peak = abs_val; + rms_sum += pcm_data[i] * pcm_data[i]; + } + float rms = sqrtf(rms_sum / *out_num_samples); + + printf("PCM stats: Peak=%.3f, RMS=%.3f\n", peak, rms); + if (peak > 1.0f) { + printf("[WARNING] Peak exceeds 1.0! Will clip during playback.\n"); + } + + return pcm_data; +} + +float* load_wav(const char* path, size_t* out_num_samples) { + ma_decoder decoder; + ma_decoder_config config = ma_decoder_config_init(ma_format_f32, 1, 32000); + + if (ma_decoder_init_file(path, &config, &decoder) != MA_SUCCESS) { + fprintf(stderr, "Failed to open WAV file: %s\n", path); + return nullptr; + } + + ma_uint64 frame_count; + ma_decoder_get_length_in_pcm_frames(&decoder, &frame_count); + *out_num_samples = (size_t)frame_count; + + float* pcm_data = (float*)malloc(*out_num_samples * sizeof(float)); + ma_decoder_read_pcm_frames(&decoder, pcm_data, frame_count, nullptr); + ma_decoder_uninit(&decoder); + + printf("Loaded .wav: %zu samples\n", *out_num_samples); + + // Analyze PCM statistics + float peak = 0.0f, rms_sum = 0.0f; + for (size_t i = 0; i < *out_num_samples; i++) { + float abs_val = fabsf(pcm_data[i]); + if (abs_val > peak) + peak = abs_val; + rms_sum += pcm_data[i] * pcm_data[i]; + } + float rms = sqrtf(rms_sum / *out_num_samples); + + printf("PCM stats: Peak=%.3f, RMS=%.3f\n", peak, rms); + + return pcm_data; +} + +int main(int argc, char** argv) { + if (argc != 2) { + fprintf(stderr, "Usage: %s <file.spec|file.wav>\n", argv[0]); + return 1; + } + + const char* path = argv[1]; + const char* ext = strrchr(path, '.'); + + PlaybackState state = {}; + + if (ext && strcmp(ext, ".spec") == 0) { + state.pcm_data = load_spec(path, &state.num_samples); + } else if (ext && (strcmp(ext, ".wav") == 0 || strcmp(ext, ".aif") == 0)) { + state.pcm_data = load_wav(path, &state.num_samples); + } else { + fprintf(stderr, "Unknown file type: %s\n", path); + return 1; + } + + if (!state.pcm_data) { + fprintf(stderr, "Failed to load audio\n"); + return 1; + } + + printf("Playing %.2f seconds... Press Ctrl+C to stop.\n", + (float)state.num_samples / 32000.0f); + + // Initialize miniaudio + ma_device_config device_config = + ma_device_config_init(ma_device_type_playback); + device_config.playback.format = ma_format_f32; + device_config.playback.channels = 2; + device_config.sampleRate = 32000; + device_config.dataCallback = audio_callback; + device_config.pUserData = &state; + + ma_device device; + if (ma_device_init(NULL, &device_config, &device) != MA_SUCCESS) { + fprintf(stderr, "Failed to initialize audio device\n"); + free(state.pcm_data); + return 1; + } + + if (ma_device_start(&device) != MA_SUCCESS) { + fprintf(stderr, "Failed to start audio device\n"); + ma_device_uninit(&device); + free(state.pcm_data); + return 1; + } + + // Wait for playback to finish + while (state.playback_pos < state.num_samples) { + ma_sleep(100); + } + + ma_device_uninit(&device); + free(state.pcm_data); + + printf("Playback complete.\n"); + return 0; +} |
