src/audio/fft.cc


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194

// Fast Fourier Transform (FFT) implementation
// Radix-2 Cooley-Tukey algorithm
// Reference: Numerical Recipes, Press et al.

#include "audio/fft.h"

#include <cmath>
#include <cstring>

// Bit-reversal permutation (in-place)
// Reorders array elements by reversing their binary indices
static void bit_reverse_permute(float* real, float* imag, size_t N) {
  const size_t bits = 0;
  size_t temp_bits = N;
  size_t num_bits = 0;
  while (temp_bits > 1) {
    temp_bits >>= 1;
    num_bits++;
  }

  for (size_t i = 0; i < N; i++) {
    // Compute bit-reversed index
    size_t j = 0;
    size_t temp = i;
    for (size_t b = 0; b < num_bits; b++) {
      j = (j << 1) | (temp & 1);
      temp >>= 1;
    }

    // Swap if j > i (to avoid swapping twice)
    if (j > i) {
      const float tmp_real = real[i];
      const float tmp_imag = imag[i];
      real[i] = real[j];
      imag[i] = imag[j];
      real[j] = tmp_real;
      imag[j] = tmp_imag;
    }
  }
}

// In-place radix-2 FFT (after bit-reversal)
// direction: +1 for forward FFT, -1 for inverse FFT
static void fft_radix2(float* real, float* imag, size_t N, int direction) {
  const float PI = 3.14159265358979323846f;

  // Butterfly operations
  for (size_t stage_size = 2; stage_size <= N; stage_size *= 2) {
    const size_t half_stage = stage_size / 2;
    const float angle = direction * 2.0f * PI / stage_size;

    // Precompute twiddle factors for this stage
    float wr = 1.0f;
    float wi = 0.0f;
    const float wr_delta = cosf(angle);
    const float wi_delta = sinf(angle);

    for (size_t k = 0; k < half_stage; k++) {
      // Apply butterfly to all groups at this stage
      for (size_t group_start = k; group_start < N; group_start += stage_size) {
        const size_t i = group_start;
        const size_t j = group_start + half_stage;

        // Complex multiplication: (real[j] + i*imag[j]) * (wr + i*wi)
        const float temp_real = real[j] * wr - imag[j] * wi;
        const float temp_imag = real[j] * wi + imag[j] * wr;

        // Butterfly operation
        real[j] = real[i] - temp_real;
        imag[j] = imag[i] - temp_imag;
        real[i] = real[i] + temp_real;
        imag[i] = imag[i] + temp_imag;
      }

      // Update twiddle factor for next k (rotation)
      const float wr_old = wr;
      wr = wr_old * wr_delta - wi * wi_delta;
      wi = wr_old * wi_delta + wi * wr_delta;
    }
  }
}

void fft_forward(float* real, float* imag, size_t N) {
  bit_reverse_permute(real, imag, N);
  fft_radix2(real, imag, N, +1);
}

void fft_inverse(float* real, float* imag, size_t N) {
  bit_reverse_permute(real, imag, N);
  fft_radix2(real, imag, N, -1);

  // Scale by 1/N
  const float scale = 1.0f / N;
  for (size_t i = 0; i < N; i++) {
    real[i] *= scale;
    imag[i] *= scale;
  }
}

// DCT-II via FFT using double-and-mirror method
// This is a more robust algorithm that avoids reordering issues
// Reference: Numerical Recipes, Press et al.
void dct_fft(const float* input, float* output, size_t N) {
  const float PI = 3.14159265358979323846f;

  // Allocate temporary arrays for 2N-point FFT
  const size_t M = 2 * N;
  float* real = new float[M];
  float* imag = new float[M];

  // Pack input: [x[0], x[1], ..., x[N-1], x[N-1], x[N-2], ..., x[1]]
  // This creates even symmetry for real-valued DCT
  for (size_t i = 0; i < N; i++) {
    real[i] = input[i];
  }
  for (size_t i = 0; i < N; i++) {
    real[N + i] = input[N - 1 - i];
  }
  memset(imag, 0, M * sizeof(float));

  // Apply 2N-point FFT
  fft_forward(real, imag, M);

  // Extract DCT coefficients
  // DCT[k] = Re{FFT[k] * exp(-j*pi*k/(2*N))} * normalization
  // Note: Need to divide by 2 because we doubled the signal length
  for (size_t k = 0; k < N; k++) {
    const float angle = -PI * k / (2.0f * N);
    const float wr = cosf(angle);
    const float wi = sinf(angle);

    // Complex multiplication: (real + j*imag) * (wr + j*wi)
    // Real part: real*wr - imag*wi
    const float dct_value = real[k] * wr - imag[k] * wi;

    // Apply DCT-II normalization (divide by 2 for double-length FFT)
    if (k == 0) {
      output[k] = dct_value * sqrtf(1.0f / N) / 2.0f;
    } else {
      output[k] = dct_value * sqrtf(2.0f / N) / 2.0f;
    }
  }

  delete[] real;
  delete[] imag;
}

// IDCT (Inverse DCT-II) via FFT using double-and-mirror method
// This is the inverse of the DCT-II (used for synthesis)
void idct_fft(const float* input, float* output, size_t N) {
  const float PI = 3.14159265358979323846f;

  // Allocate temporary arrays for 2N-point FFT
  const size_t M = 2 * N;
  float* real = new float[M];
  float* imag = new float[M];

  // Prepare FFT input from DCT coefficients
  // IDCT = Re{IFFT[DCT * exp(j*pi*k/(2*N))]} * 2
  for (size_t k = 0; k < N; k++) {
    const float angle = PI * k / (2.0f * N);  // Positive for inverse
    const float wr = cosf(angle);
    const float wi = sinf(angle);

    // Apply inverse normalization
    float scaled_input;
    if (k == 0) {
      scaled_input = input[k] * sqrtf(N) * 2.0f;
    } else {
      scaled_input = input[k] * sqrtf(N / 2.0f) * 2.0f;
    }

    // Complex multiplication: DCT[k] * exp(j*theta)
    real[k] = scaled_input * wr;
    imag[k] = scaled_input * wi;
  }

  // Fill second half with conjugate symmetry (for real output)
  for (size_t k = 1; k < N; k++) {
    real[M - k] = real[k];
    imag[M - k] = -imag[k];
  }

  // Apply inverse FFT
  fft_inverse(real, imag, M);

  // Extract first N samples (real part only, imag should be ~0)
  for (size_t i = 0; i < N; i++) {
    output[i] = real[i];
  }

  delete[] real;
  delete[] imag;
}