1 files changed, 56 insertions, 2 deletions
diff --git a/src/audio/gen.cc b/src/audio/gen.cc
index 148fc68..0757b4d 100644
--- a/src/audio/gen.cc
+++ b/src/audio/gen.cc
@@ -69,9 +69,63 @@ std::vector<float> generate_note_spectrogram(const NoteParams& params,
     float dct_chunk[DCT_SIZE];
     fdct_512(pcm_chunk, dct_chunk);
 
-    // Copy to buffer
+    // Scale up to compensate for orthonormal normalization
+    // Old non-orthonormal DCT had no sqrt scaling, so output was ~sqrt(N/2) larger
+    // Scale factor: sqrt(DCT_SIZE / 2) = sqrt(256) = 16
+    //
+    // HOWEVER: After removing synthesis windowing (commit f998bfc), audio is louder.
+    // The old synthesis incorrectly applied Hamming window to spectrum (reducing energy by 0.63x).
+    // New synthesis is correct (no window), but procedural notes with 16x scaling are too loud.
+    //
+    // Analysis applies Hamming window (0.63x energy). With 16x scaling: 0.63 × 16 ≈ 10x.
+    // Divide by 2.5 to match the relative loudness increase: 16 / 2.5 = 6.4
+    const float scale_factor = sqrtf(DCT_SIZE / 2.0f) / 2.5f;
+
+    // Copy to buffer with scaling
     for (int i = 0; i < DCT_SIZE; ++i) {
-      spec_data[f * DCT_SIZE + i] = dct_chunk[i];
+      spec_data[f * DCT_SIZE + i] = dct_chunk[i] * scale_factor;
+    }
+  }
+
+  // Normalize to consistent RMS level (matching spectool --normalize behavior)
+  // 1. Synthesize PCM to measure actual output levels
+  std::vector<float> pcm_data(num_frames * DCT_SIZE);
+  for (int f = 0; f < num_frames; ++f) {
+    const float* spectral_frame = spec_data.data() + (f * DCT_SIZE);
+    float* time_frame = pcm_data.data() + (f * DCT_SIZE);
+    idct_512(spectral_frame, time_frame);
+  }
+
+  // 2. Calculate RMS and peak
+  float rms_sum = 0.0f;
+  float peak = 0.0f;
+  for (size_t i = 0; i < pcm_data.size(); ++i) {
+    const float abs_val = fabsf(pcm_data[i]);
+    if (abs_val > peak) {
+      peak = abs_val;
+    }
+    rms_sum += pcm_data[i] * pcm_data[i];
+  }
+  const float rms = sqrtf(rms_sum / pcm_data.size());
+
+  // 3. Normalize to target RMS (0.15, matching spectool default)
+  const float target_rms = 0.15f;
+  const float max_safe_peak = 1.0f; // Conservative: ensure output peak ≤ 1.0
+
+  if (rms > 1e-6f) {
+    // Calculate scale factor to reach target RMS
+    float norm_scale = target_rms / rms;
+
+    // Check if this would cause clipping
+    const float predicted_peak = peak * norm_scale;
+    if (predicted_peak > max_safe_peak) {
+      // Reduce scale to prevent clipping
+      norm_scale = max_safe_peak / peak;
+    }
+
+    // Apply normalization scale to spectrogram
+    for (size_t i = 0; i < spec_data.size(); ++i) {
+      spec_data[i] *= norm_scale;
     }
   }