// This file is part of the 64k demo project. // It implements the spectool for analyzing audio into spectrograms. // Provides both 'analyze' and 'play' modes for spectral data. #include "audio/audio.h" #include "audio/dct.h" #include "audio/gen.h" #include "audio/synth.h" #include "audio/window.h" #include "platform.h" #include #include #include "miniaudio.h" #include #include #include #include // Simple .spec file format: // char[4] magic = "SPEC" // int32_t version = 1 // int32_t dct_size // int32_t num_frames // float[num_frames * dct_size] data // struct SpecHeader { ... } -> now in audio.h int analyze_audio(const char* in_path, const char* out_path, bool normalize, float target_rms) { printf("Analyzing %s -> %s", in_path, out_path); if (normalize) { printf(" (normalizing to RMS=%.3f)", target_rms); } printf("\n"); // Use higher quality resampling for better audio quality // Source files are typically 44.1kHz or 96kHz, 16/24-bit, mono/stereo ma_decoder_config config = ma_decoder_config_init(ma_format_f32, 1, 32000); // CRITICAL: Use highest quality low-pass filter to preserve audio quality // Default lpfOrder is very low, causing audible aliasing when downsampling // Maximum lpfOrder is implementation-dependent, but 8 is reasonable for // quality config.resampling.linear.lpfOrder = 8; // Higher = better anti-aliasing (default is likely 1-2) ma_decoder decoder; if (ma_decoder_init_file(in_path, &config, &decoder) != MA_SUCCESS) { printf("Error: Failed to open or decode audio file: %s\n", in_path); return 1; } // First pass: Load all PCM data (needed for normalization) std::vector pcm_data; float pcm_chunk[DCT_SIZE]; ma_uint64 frames_read; while (ma_decoder_read_pcm_frames(&decoder, pcm_chunk, DCT_SIZE, &frames_read) == MA_SUCCESS && frames_read > 0) { pcm_data.insert(pcm_data.end(), pcm_chunk, pcm_chunk + frames_read); } ma_decoder_uninit(&decoder); if (pcm_data.empty()) { printf("Error: No audio data read from file.\n"); return 1; } // Calculate RMS and peak float rms_sum = 0.0f; float peak = 0.0f; for (size_t i = 0; i < pcm_data.size(); ++i) { const float abs_val = fabsf(pcm_data[i]); if (abs_val > peak) { peak = abs_val; } rms_sum += pcm_data[i] * pcm_data[i]; } const float original_rms = sqrtf(rms_sum / pcm_data.size()); printf("Original: Peak=%.3f, RMS=%.3f\n", peak, original_rms); // Normalize if requested float scale_factor = 1.0f; if (normalize && original_rms > 1e-6f) { // Calculate scale factor to reach target RMS scale_factor = target_rms / original_rms; // Check if this would cause clipping (peak > 1.0 after synthesis) // Peak amplification varies by sample (windowing + IDCT effects) // Use conservative limit: input peak ≤ 1.0 to guarantee output peak ≤ 1.0 const float max_safe_peak = 1.0f; const float predicted_peak = peak * scale_factor; if (predicted_peak > max_safe_peak) { // Reduce scale factor to prevent clipping const float peak_scale = max_safe_peak / peak; printf("Warning: RMS normalization would cause clipping (peak=%.3f)\n", predicted_peak); printf(" Reducing scale to prevent clipping.\n"); scale_factor = peak_scale; } printf("Normalizing: scale factor = %.3f\n", scale_factor); printf(" RMS: %.3f -> %.3f\n", original_rms, original_rms * scale_factor); printf(" Peak: %.3f -> %.3f\n", peak, peak * scale_factor); for (size_t i = 0; i < pcm_data.size(); ++i) { pcm_data[i] *= scale_factor; } } // Second pass: Windowing + DCT std::vector spec_data; float window[WINDOW_SIZE]; hamming_window_512(window); // Process PCM data in DCT_SIZE chunks const size_t num_chunks = (pcm_data.size() + DCT_SIZE - 1) / DCT_SIZE; for (size_t chunk_idx = 0; chunk_idx < num_chunks; ++chunk_idx) { const size_t chunk_start = chunk_idx * DCT_SIZE; const size_t chunk_end = (chunk_start + DCT_SIZE < pcm_data.size()) ? chunk_start + DCT_SIZE : pcm_data.size(); const size_t chunk_size = chunk_end - chunk_start; // Copy chunk (with zero-padding if needed) memcpy(pcm_chunk, pcm_data.data() + chunk_start, chunk_size * sizeof(float)); if (chunk_size < DCT_SIZE) { memset(pcm_chunk + chunk_size, 0, (DCT_SIZE - chunk_size) * sizeof(float)); } // Apply window for (int i = 0; i < DCT_SIZE; ++i) { pcm_chunk[i] *= window[i]; } // Apply FDCT float dct_chunk[DCT_SIZE]; fdct_512(pcm_chunk, dct_chunk); // Add to spectrogram data spec_data.insert(spec_data.end(), dct_chunk, dct_chunk + DCT_SIZE); } // --- Trim Silent Frames --- const float epsilon = 1e-6f; int num_frames = spec_data.size() / DCT_SIZE; int first_frame = 0; int last_frame = num_frames; // Trim leading silent frames for (int i = 0; i < num_frames; ++i) { bool all_zeros = true; for (int j = 0; j < DCT_SIZE; ++j) { if (fabsf(spec_data[i * DCT_SIZE + j]) > epsilon) { all_zeros = false; break; } } if (all_zeros) { first_frame = i + 1; } else { break; } } // Trim trailing silent frames for (int i = num_frames - 1; i >= first_frame; --i) { bool all_zeros = true; for (int j = 0; j < DCT_SIZE; ++j) { if (fabsf(spec_data[i * DCT_SIZE + j]) > epsilon) { all_zeros = false; break; } } if (all_zeros) { last_frame = i; } else { break; } } // Create a new vector with the trimmed data std::vector trimmed_data; if (first_frame < last_frame) { trimmed_data.assign(spec_data.begin() + first_frame * DCT_SIZE, spec_data.begin() + last_frame * DCT_SIZE); } printf("Trimming: Original frames: %d -> Trimmed frames: %zu\n", num_frames, trimmed_data.size() / DCT_SIZE); // Write to .spec file FILE* f_out = fopen(out_path, "wb"); if (!f_out) { printf("Error: Failed to open output file: %s\n", out_path); return 1; } SpecHeader header; memcpy(header.magic, "SPEC", 4); header.version = 1; header.dct_size = DCT_SIZE; header.num_frames = trimmed_data.size() / DCT_SIZE; fwrite(&header, sizeof(SpecHeader), 1, f_out); fwrite(trimmed_data.data(), sizeof(float), trimmed_data.size(), f_out); fclose(f_out); printf("Analysis complete. Wrote %d spectral frames.\n", header.num_frames); return 0; } int play_spec(const char* in_path) { printf("Playing %s\n", in_path); FILE* f_in = fopen(in_path, "rb"); if (!f_in) { printf("Error: Failed to open input file: %s\n", in_path); return 1; } SpecHeader header; if (fread(&header, sizeof(SpecHeader), 1, f_in) != 1 || strncmp(header.magic, "SPEC", 4) != 0) { printf("Error: Invalid spectrogram file format.\n"); fclose(f_in); return 1; } std::vector spec_data(header.num_frames * header.dct_size); fread(spec_data.data(), sizeof(float), spec_data.size(), f_in); fclose(f_in); PlatformState platform_state = platform_init(false, 100, 100); audio_init(); audio_start(); Spectrogram spec; spec.spectral_data_a = spec_data.data(); spec.spectral_data_b = spec_data.data(); // Point both to the same buffer for playback spec.num_frames = header.num_frames; int spec_id = synth_register_spectrogram(&spec); synth_trigger_voice(spec_id, 0.7f, 0.0f); printf("Playing... Press Ctrl+C to exit.\n"); while (synth_get_active_voice_count() > 0 && !platform_should_close(&platform_state)) { platform_poll(&platform_state); } audio_shutdown(); platform_shutdown(&platform_state); return 0; } int test_gen(const char* out_path) { printf("Generating test spectrogram -> %s\n", out_path); std::vector track_data; int track_frames = 0; // Generate a simple C Major scale float freqs[] = {261.63f, 293.66f, 329.63f, 349.23f, 392.00f, 440.00f, 493.88f, 523.25f}; srand(time(NULL)); for (int i = 0; i < 8; ++i) { NoteParams params; params.base_freq = freqs[i]; params.duration_sec = 0.5f; params.amplitude = 0.5f; params.attack_sec = 0.05f; params.decay_sec = 0.1f; params.vibrato_rate = 5.0f; params.vibrato_depth = 2.0f; params.num_harmonics = 5; params.harmonic_decay = 0.5f; params.pitch_randomness = 1.0f; params.amp_randomness = 0.05f; int note_frames = 0; std::vector note_data = generate_note_spectrogram(params, ¬e_frames); // Paste at 0.4s intervals (overlap) int offset = (int)(i * 0.4f * 32000.0f / DCT_SIZE); paste_spectrogram(track_data, &track_frames, note_data, note_frames, offset); } // Write to file (Duplicate logic, but fine for now) FILE* f_out = fopen(out_path, "wb"); if (!f_out) { printf("Error: Failed to open output file: %s\n", out_path); return 1; } SpecHeader header; memcpy(header.magic, "SPEC", 4); header.version = 1; header.dct_size = DCT_SIZE; header.num_frames = track_frames; fwrite(&header, sizeof(SpecHeader), 1, f_out); fwrite(track_data.data(), sizeof(float), track_data.size(), f_out); fclose(f_out); printf("Generated %d frames.\n", track_frames); return 0; } void print_usage() { printf("Usage: spectool [output] [options]\n"); printf("Commands:\n"); printf( " analyze Analyze an audio file and " "save as a spectrogram.\n"); printf( " play Play a spectrogram file.\n"); printf( " test_gen Generate a test " "spectrogram.\n"); printf("\nOptions for 'analyze':\n"); printf(" --normalize [rms] Normalize audio to target RMS level (default: " "0.15)\n"); printf( " Ensures consistent loudness across all samples.\n"); } int main(int argc, char** argv) { if (argc < 2) { print_usage(); return 1; } const char* command = argv[1]; if (strcmp(command, "analyze") == 0) { if (argc < 4) { printf("Error: 'analyze' command requires input and output files.\n"); print_usage(); return 1; } // Parse optional flags bool normalize = false; float target_rms = 0.15f; // Default target RMS for (int i = 4; i < argc; ++i) { if (strcmp(argv[i], "--normalize") == 0) { normalize = true; // Check if next arg is a number (custom target RMS) if (i + 1 < argc) { char* endptr; float custom_rms = strtof(argv[i + 1], &endptr); if (endptr != argv[i + 1] && custom_rms > 0.0f && custom_rms < 1.0f) { target_rms = custom_rms; ++i; // Consume the RMS value } } } } return analyze_audio(argv[2], argv[3], normalize, target_rms); } else if (strcmp(command, "play") == 0) { if (argc < 3) { printf("Error: 'play' command requires an input file.\n"); print_usage(); return 1; } return play_spec(argv[2]); } else if (strcmp(command, "test_gen") == 0) { if (argc < 3) { printf("Error: 'test_gen' command requires an output file.\n"); print_usage(); return 1; } return test_gen(argv[2]); } else { printf("Error: Unknown command '%s'\n", command); print_usage(); return 1; } return 0; }