1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
|
// Standalone tool to play .spec or .wav files for debugging
// Usage: ./specplay <file.spec|file.wav>
#include "audio/dct.h"
#include "audio/window.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MINIAUDIO_IMPLEMENTATION
#include "miniaudio.h"
struct PlaybackState {
float* pcm_data;
size_t num_samples;
size_t playback_pos;
};
void audio_callback(ma_device* device, void* output, const void* input,
ma_uint32 frame_count) {
PlaybackState* state = (PlaybackState*)device->pUserData;
float* out = (float*)output;
for (ma_uint32 i = 0; i < frame_count; i++) {
if (state->playback_pos < state->num_samples) {
float sample = state->pcm_data[state->playback_pos++];
// Clamp to [-1, 1] and warn if clipping
if (sample > 1.0f || sample < -1.0f) {
fprintf(stderr, "[CLIP at sample %zu: %.3f]\n", state->playback_pos - 1,
sample);
sample = (sample > 1.0f) ? 1.0f : -1.0f;
}
out[i * 2] = sample; // Left
out[i * 2 + 1] = sample; // Right (mono)
} else {
out[i * 2] = 0.0f;
out[i * 2 + 1] = 0.0f;
}
}
}
float* load_spec(const char* path, size_t* out_num_samples) {
FILE* f = fopen(path, "rb");
if (!f) {
fprintf(stderr, "Failed to open %s\n", path);
return nullptr;
}
// Read SpecHeader
struct SpecHeader {
char magic[4];
int32_t version;
int32_t dct_size;
int32_t num_frames;
};
SpecHeader header;
if (fread(&header, sizeof(SpecHeader), 1, f) != 1) {
fprintf(stderr, "Failed to read SpecHeader\n");
fclose(f);
return nullptr;
}
// Validate header
if (memcmp(header.magic, "SPEC", 4) != 0) {
fprintf(stderr, "Invalid magic bytes (expected 'SPEC')\n");
fclose(f);
return nullptr;
}
printf("Loading .spec: version=%d, dct_size=%d, frames=%d\n", header.version,
header.dct_size, header.num_frames);
uint32_t num_frames = header.num_frames;
// Read spectral data
size_t spec_size = num_frames * DCT_SIZE;
float* spec_data = (float*)malloc(spec_size * sizeof(float));
if (fread(spec_data, sizeof(float), spec_size, f) != spec_size) {
fprintf(stderr, "Failed to read spectral data\n");
free(spec_data);
fclose(f);
return nullptr;
}
fclose(f);
// Convert to PCM via IDCT
*out_num_samples = spec_size;
float* pcm_data = (float*)malloc(*out_num_samples * sizeof(float));
for (uint32_t frame = 0; frame < num_frames; frame++) {
const float* spectral_frame = spec_data + (frame * DCT_SIZE);
float* time_frame = pcm_data + (frame * DCT_SIZE);
idct_512(spectral_frame, time_frame);
}
free(spec_data);
// Analyze PCM statistics
float peak = 0.0f, rms_sum = 0.0f;
for (size_t i = 0; i < *out_num_samples; i++) {
float abs_val = fabsf(pcm_data[i]);
if (abs_val > peak)
peak = abs_val;
rms_sum += pcm_data[i] * pcm_data[i];
}
float rms = sqrtf(rms_sum / *out_num_samples);
printf("PCM stats: Peak=%.3f, RMS=%.3f\n", peak, rms);
if (peak > 1.0f) {
printf("[WARNING] Peak exceeds 1.0! Will clip during playback.\n");
}
return pcm_data;
}
float* load_wav(const char* path, size_t* out_num_samples) {
ma_decoder decoder;
ma_decoder_config config = ma_decoder_config_init(ma_format_f32, 1, 32000);
if (ma_decoder_init_file(path, &config, &decoder) != MA_SUCCESS) {
fprintf(stderr, "Failed to open WAV file: %s\n", path);
return nullptr;
}
ma_uint64 frame_count;
ma_decoder_get_length_in_pcm_frames(&decoder, &frame_count);
*out_num_samples = (size_t)frame_count;
float* pcm_data = (float*)malloc(*out_num_samples * sizeof(float));
ma_decoder_read_pcm_frames(&decoder, pcm_data, frame_count, nullptr);
ma_decoder_uninit(&decoder);
printf("Loaded .wav: %zu samples\n", *out_num_samples);
// Analyze PCM statistics
float peak = 0.0f, rms_sum = 0.0f;
for (size_t i = 0; i < *out_num_samples; i++) {
float abs_val = fabsf(pcm_data[i]);
if (abs_val > peak)
peak = abs_val;
rms_sum += pcm_data[i] * pcm_data[i];
}
float rms = sqrtf(rms_sum / *out_num_samples);
printf("PCM stats: Peak=%.3f, RMS=%.3f\n", peak, rms);
return pcm_data;
}
int main(int argc, char** argv) {
if (argc != 2) {
fprintf(stderr, "Usage: %s <file.spec|file.wav>\n", argv[0]);
return 1;
}
const char* path = argv[1];
const char* ext = strrchr(path, '.');
PlaybackState state = {};
if (ext && strcmp(ext, ".spec") == 0) {
state.pcm_data = load_spec(path, &state.num_samples);
} else if (ext && (strcmp(ext, ".wav") == 0 || strcmp(ext, ".aif") == 0)) {
state.pcm_data = load_wav(path, &state.num_samples);
} else {
fprintf(stderr, "Unknown file type: %s\n", path);
return 1;
}
if (!state.pcm_data) {
fprintf(stderr, "Failed to load audio\n");
return 1;
}
printf("Playing %.2f seconds... Press Ctrl+C to stop.\n",
(float)state.num_samples / 32000.0f);
// Initialize miniaudio
ma_device_config device_config =
ma_device_config_init(ma_device_type_playback);
device_config.playback.format = ma_format_f32;
device_config.playback.channels = 2;
device_config.sampleRate = 32000;
device_config.dataCallback = audio_callback;
device_config.pUserData = &state;
ma_device device;
if (ma_device_init(NULL, &device_config, &device) != MA_SUCCESS) {
fprintf(stderr, "Failed to initialize audio device\n");
free(state.pcm_data);
return 1;
}
if (ma_device_start(&device) != MA_SUCCESS) {
fprintf(stderr, "Failed to start audio device\n");
ma_device_uninit(&device);
free(state.pcm_data);
return 1;
}
// Wait for playback to finish
while (state.playback_pos < state.num_samples) {
ma_sleep(100);
}
ma_device_uninit(&device);
free(state.pcm_data);
printf("Playback complete.\n");
return 0;
}
|