diff options
| -rw-r--r-- | doc/SPECTRAL_BRUSH_2.md | 523 | ||||
| -rw-r--r-- | tools/mq_editor/README.md | 55 | ||||
| -rw-r--r-- | tools/mq_editor/fft.js | 103 | ||||
| -rw-r--r-- | tools/mq_editor/index.html | 175 | ||||
| -rw-r--r-- | tools/mq_editor/mq_extract.js | 216 | ||||
| -rw-r--r-- | tools/mq_editor/viewer.js | 376 |
6 files changed, 1448 insertions, 0 deletions
diff --git a/doc/SPECTRAL_BRUSH_2.md b/doc/SPECTRAL_BRUSH_2.md new file mode 100644 index 0000000..76e49db --- /dev/null +++ b/doc/SPECTRAL_BRUSH_2.md @@ -0,0 +1,523 @@ +# Spectral Brush Editor v2: MQ-Based Sinusoidal Synthesis + +**Status:** Design Phase +**Target:** Procedural audio compression for short samples (drums, piano, impacts) +**Replaces:** Spectrogram-based synthesis (poor audio quality) + +--- + +## Overview + +McAulay-Quatieri (MQ) sinusoidal modeling for audio compression. Extract frequency/amplitude trajectories as bezier curves, apply "style" via replicas (harmonics, spread, jitter), synthesize to baked PCM buffers. + +**Key Features:** +- **50-100× compression:** WAV → bezier curves + replica params → C++ structs +- **Web-based editor:** Real-time MQ extraction, curve editing, synthesis preview +- **Procedural synthesis:** Bandwidth-enhanced oscillators with phase jitter and frequency spread +- **Tracker integration:** MQ samples triggered as assets, future pitch/amp modulation + +--- + +## Architecture + +### Data Flow + +``` +┌─────────────────────────────────────────────────────┐ +│ Web Editor (tools/mq_editor/) │ +├─────────────────────────────────────────────────────┤ +│ Input: WAV or saved .txt params │ +│ ↓ │ +│ MQ Extraction: FFT → Peak Tracking → Bezier Fitting │ +│ ↓ │ +│ Editing: Drag control points, adjust replicas │ +│ ↓ │ +│ JS Synthesizer: Preview original vs. synthesized │ +│ ↓ │ +│ Export: .txt params + generated .cc code │ +└─────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────┐ +│ C++ Demo (src/audio/) │ +├─────────────────────────────────────────────────────┤ +│ Build: .txt → generated .cc (MQSample structs) │ +│ ↓ │ +│ Synthesis: Bake PCM at init (CPU, future GPU) │ +│ ↓ │ +│ AudioEngine: Register as sample asset │ +│ ↓ │ +│ Tracker: Trigger via patterns (future modulation) │ +└─────────────────────────────────────────────────────┘ +``` + +--- + +## Data Model + +### Per-Partial Representation + +Each sinusoidal partial stores: + +``` +Partial { + freq_curve: CubicBezier // Frequency trajectory (Hz vs. seconds) + amp_curve: CubicBezier // Amplitude envelope (0-1 vs. seconds) + replicas: ReplicaConfig // Harmonic/inharmonic copies +} + +CubicBezier { + (t0, v0), (t1, v1), (t2, v2), (t3, v3) // 4 control points +} + +ReplicaConfig { + offsets: [ratio1, ratio2, ...] // Frequency ratios (1.0, 2.01, 0.5, ...) + decay_alpha: float // Amplitude decay: exp(-α·|f-f₀|) + jitter: float [0-1] // Phase randomization amount + spread_above: float [0-1] // Frequency spread +% of f₀ + spread_below: float [0-1] // Frequency spread -% of f₀ + bandwidth: float [0-1] // Noise bandwidth ±% of f +} +``` + +### Text Format (.txt) + +Stored in `workspaces/main/mq_samples/`: + +``` +# MQ Sample: drum_kick.txt +sample_rate 32000 +duration 1.5 + +# Global defaults (optional, can override per partial) +replica_defaults + decay_alpha 0.1 + jitter 0.05 + spread_above 0.02 + spread_below 0.02 + bandwidth 0.01 +end + +# Partial 0: fundamental +partial + # Frequency bezier (seconds, Hz): t0 f0 t1 f1 t2 f2 t3 f3 + freq_curve 0.0 60.0 0.2 58.0 0.8 55.0 1.5 50.0 + + # Amplitude bezier (seconds, 0-1): t0 a0 t1 a1 t2 a2 t3 a3 + amp_curve 0.0 0.0 0.05 1.0 0.5 0.3 1.5 0.0 + + # Replica frequency ratios + replicas 1.0 2.01 3.03 + + # Override defaults (optional) + decay_alpha 0.15 + jitter 0.08 + spread_above 0.03 + spread_below 0.01 + bandwidth 0.02 +end + +# Partial 1: overtone +partial + freq_curve 0.0 180.0 0.2 178.0 0.8 175.0 1.5 170.0 + amp_curve 0.0 0.0 0.05 0.6 0.5 0.2 1.5 0.0 + replicas 1.0 1.99 +end +``` + +### Generated C++ Code + +Stored in `src/generated/mq_<name>.cc`: + +```cpp +// Auto-generated from mq_samples/drum_kick.txt +// DO NOT EDIT + +struct MQBezier { + float t0, v0, t1, v1, t2, v2, t3, v3; +}; + +struct MQPartial { + MQBezier freq; + MQBezier amp; + const float* replicas; + int num_replicas; + float decay_alpha; + float jitter; + float spread_above; + float spread_below; + float bandwidth; +}; + +static const float drum_kick_replicas_0[] = {1.0f, 2.01f, 3.03f}; +static const float drum_kick_replicas_1[] = {1.0f, 1.99f}; + +static const MQPartial drum_kick_partials[] = { + { + {0.0f, 60.0f, 0.2f, 58.0f, 0.8f, 55.0f, 1.5f, 50.0f}, + {0.0f, 0.0f, 0.05f, 1.0f, 0.5f, 0.3f, 1.5f, 0.0f}, + drum_kick_replicas_0, 3, + 0.15f, 0.08f, 0.03f, 0.01f, 0.02f + }, + { + {0.0f, 180.0f, 0.2f, 178.0f, 0.8f, 175.0f, 1.5f, 170.0f}, + {0.0f, 0.0f, 0.05f, 0.6f, 0.5f, 0.2f, 1.5f, 0.0f}, + drum_kick_replicas_1, 2, + 0.1f, 0.05f, 0.02f, 0.02f, 0.01f + } +}; + +struct MQSample { + int sample_rate; + float duration; + const MQPartial* partials; + int num_partials; +}; + +const MQSample ASSET_MQ_DRUM_KICK = { + 32000, 1.5f, drum_kick_partials, 2 +}; +``` + +--- + +## McAulay-Quatieri Algorithm + +### Phase 1: Peak Detection + +STFT with overlapping windows: + +``` +For each frame (hop = 512 samples): + 1. FFT (size = 2048) + 2. Magnitude spectrum |X[k]| + 3. Detect peaks: local maxima above threshold + 4. Extract (frequency, amplitude, phase) via parabolic interpolation +``` + +**Parameters:** +- `fft_size`: 2048 (adjustable 1024-4096) +- `hop_size`: 512 (75% overlap) +- `peak_threshold`: -60 dB (adjustable) + +### Phase 2: Trajectory Tracking + +Link peaks across frames into continuous partials: + +``` +Birth/Death/Continuation model: + - Match peak to existing partial if |f_new - f_old| < threshold + - Birth new partial if unmatched peak persists 2+ frames + - Death partial if no match for 2+ frames +``` + +**Tracking threshold:** 50 Hz (adjustable) + +### Phase 3: Bezier Curve Fitting + +Fit cubic bezier to each partial's trajectory: + +``` +Input: [(t1, f1), (t2, f2), ..., (tN, fN)] +Output: 4 control points minimizing least-squares error + +Algorithm: + 1. Fix endpoints: (t0, f0) = first, (t3, f3) = last + 2. Solve for (t1, f1), (t2, f2) via linear regression + 3. Repeat for amplitude trajectory +``` + +**Error threshold:** Auto-fit to minimize control points (future: user-adjustable simplification) + +--- + +## Synthesis Model + +### Replica Oscillator Bank + +For each partial at time `t`: + +```python +# Evaluate bezier curves +f0 = eval_bezier(partial.freq_curve, t) +A0 = eval_bezier(partial.amp_curve, t) + +# For each replica offset ratio +for ratio in partial.replicas: + # Frequency spread (asymmetric randomization) + spread = random.uniform(-partial.spread_below, +partial.spread_above) + f = f0 * ratio * (1.0 + spread) + + # Amplitude decay + A = A0 * exp(-partial.decay_alpha * abs(f - f0)) + + # Phase (non-deterministic, seeded by frame counter) + phase = 2*pi*f*t + partial.jitter * random.uniform(0, 2*pi) + + # Base sinusoid + sample += A * sin(phase) + + # Bandwidth-enhanced noise (optional) + if partial.bandwidth > 0: + noise_bw = f * partial.bandwidth + sample += A * bandlimited_noise(f - noise_bw, f + noise_bw) +``` + +### Bezier Evaluation (Cubic) + +De Casteljau's algorithm: + +```cpp +float eval_bezier(const MQBezier& b, float t) { + // Normalize t to [0, 1] + float u = (t - b.t0) / (b.t3 - b.t0); + u = clamp(u, 0.0f, 1.0f); + + // Cubic interpolation + float u1 = 1.0f - u; + return u1*u1*u1 * b.v0 + + 3*u1*u1*u * b.v1 + + 3*u1*u*u * b.v2 + + u*u*u * b.v3; +} +``` + +### Baking Process (C++) + +```cpp +// At audio_init() time +void synth_bake_mq(const MQSample& sample, std::vector<float>& pcm_out) { + int num_samples = sample.sample_rate * sample.duration; + pcm_out.resize(num_samples); + + for (int i = 0; i < num_samples; ++i) { + float t = (float)i / sample.sample_rate; + float sample_val = 0.0f; + + for (int p = 0; p < sample.num_partials; ++p) { + const MQPartial& partial = sample.partials[p]; + float f0 = eval_bezier(partial.freq, t); + float A0 = eval_bezier(partial.amp, t); + + for (int r = 0; r < partial.num_replicas; ++r) { + float ratio = partial.replicas[r]; + + // Frequency spread + uint32_t seed = i * 12345 + p * 67890 + r; + float spread = rand_float(seed, -partial.spread_below, partial.spread_above); + float f = f0 * ratio * (1.0f + spread); + + // Amplitude decay + float A = A0 * expf(-partial.decay_alpha * fabsf(f - f0)); + + // Phase jitter + float jitter = rand_float(seed + 1, 0.0f, 1.0f) * partial.jitter; + float phase = 2.0f * M_PI * f * t + jitter * 2.0f * M_PI; + + sample_val += A * sinf(phase); + + // TODO: bandwidth-enhanced noise + } + } + + pcm_out[i] = sample_val; + } +} +``` + +--- + +## Web Editor + +### UI Layout + +``` +┌─────────────────────────────────────────────────────┐ +│ [Load WAV] [Load .txt] [Save .txt] [Export C++] │ +├─────────────────────────────────────────────────────┤ +│ MQ Extraction Params: │ +│ FFT Size: [2048▼] Hop: [512] Threshold: [-60dB]│ +│ [Extract Partials] [Re-extract] │ +├─────────────────────────────────────────────────────┤ +│ ┌─────────────────────────────────────────────────┐ │ +│ │ │ │ +│ │ Time-Frequency Canvas │ │ +│ │ - Spectrogram background │ │ +│ │ - Bezier curves (colored per partial) │ │ +│ │ - Draggable control points (circles) │ │ +│ │ │ │ +│ └─────────────────────────────────────────────────┘ │ +├─────────────────────────────────────────────────────┤ +│ Selected Partial: [0▼] [Add Point] [Remove Point] │ +│ Replicas: [1.0, 2.01, 3.03] [Edit] │ +│ Decay α: [0.15] Jitter: [0.08] │ +│ Spread+: [3%] Spread-: [1%] Bandwidth: [2%] │ +├─────────────────────────────────────────────────────┤ +│ Playback: [▶ Original] [▶ Synthesized] [▶ Both] │ +│ Time: [━━━━━━━━━━━━━━━━━━━━━━━] 0.0s / 1.5s │ +└─────────────────────────────────────────────────────┘ +``` + +### Features + +**Phase 1 (Extraction):** +- Load WAV, run MQ algorithm, visualize partials +- Real-time parameter adjustment (FFT size, threshold, tracking) + +**Phase 2 (Synthesis Preview):** +- JS implementation of full synthesis pipeline +- Playback original vs. synthesized audio (Web Audio API) + +**Phase 3 (Editing):** +- Drag control points to adjust curves +- Add/remove control points (future: auto-simplification) +- Per-partial replica configuration + +**Phase 4 (Export):** +- Save `.txt` format (human-readable) +- Generate C++ code (copy-paste or auto-commit) + +--- + +## C++ Integration + +### File Organization + +``` +workspaces/main/ + mq_samples/ + drum_kick.txt + piano_c4.txt + synth_pad.txt + +src/generated/ + mq_drum_kick.cc # Auto-generated + mq_piano_c4.cc + mq_synth_pad.cc + +src/audio/ + mq_synth.h # Bezier eval, baking API + mq_synth.cc +``` + +### Asset Registration + +Add to `workspaces/main/assets.txt`: + +``` +MQ_DRUM_KICK, NONE, mq_samples/drum_kick.txt, "MQ kick drum" +``` + +Build system: +1. Detect `.txt` changes → trigger code generator +2. Compile generated `.cc` → link into demo +3. `ASSET_MQ_DRUM_KICK` available in code + +### Tracker Integration + +```cpp +// Register MQ samples at init +void audio_init() { + synth_register_mq_sample(SAMPLE_ID_KICK, &ASSET_MQ_DRUM_KICK); + synth_register_mq_sample(SAMPLE_ID_PIANO, &ASSET_MQ_PIANO_C4); +} + +// Trigger from pattern +void pattern_callback(int sample_id, float volume) { + synth_trigger_mq(sample_id, volume); + // Future: pitch modulation, time stretch +} +``` + +--- + +## Implementation Roadmap + +### Phase 1: MQ Extraction (Web) +**Goal:** Load WAV → Extract partials → Visualize trajectories +**Deliverables:** +- `tools/mq_editor/index.html` (basic UI) +- `tools/mq_editor/mq_extract.js` (FFT + peak tracking + bezier fitting) +- `tools/mq_editor/render.js` (canvas visualization) + +**Timeline:** 1-2 weeks + +### Phase 2: JS Synthesizer +**Goal:** Preview synthesized audio in browser +**Deliverables:** +- `tools/mq_editor/mq_synth.js` (replica oscillator bank) +- Web Audio API integration (playback comparison) + +**Timeline:** 1 week + +### Phase 3: Web Editor UI +**Goal:** Full editing workflow +**Deliverables:** +- Draggable control points (canvas interaction) +- Per-partial replica sliders +- Save/load `.txt` format + +**Timeline:** 1-2 weeks + +### Phase 4: C++ Code Generator +**Goal:** `.txt` → generated `.cc` code +**Deliverables:** +- `tools/mq_codegen.py` (parser + C++ emitter) +- Build system integration (CMake hook) + +**Timeline:** 3-5 days + +### Phase 5: C++ Synthesis +**Goal:** Bake PCM at demo init +**Deliverables:** +- `src/audio/mq_synth.{h,cc}` (bezier eval, oscillator bank) +- Integration with AudioEngine/tracker + +**Timeline:** 1 week + +### Phase 6: Optimization +**Goal:** GPU baking, quantization, size reduction +**Deliverables:** +- Compute shader for parallel synthesis +- Quantized bezier control points (f16 or i16) +- Curve simplification algorithm + +**Timeline:** 2-3 weeks (future work) + +--- + +## Future Enhancements + +### Short-Term (Post-MVP) +- **Pitch modulation:** `synth_trigger_mq(sample_id, volume, pitch_ratio)` +- **Time stretch:** Adjust bezier time domain dynamically +- **Amplitude modulation:** LFO/envelope override + +### Medium-Term +- **GPU synthesis:** Compute shader for baked PCM (parallel oscillators) +- **Curve simplification:** Iterative control point reduction (error tolerance) +- **Quantization:** f32 → f16/i16 control points (~50% size reduction) + +### Long-Term +- **Hybrid synthesis:** MQ partials + noise residual (stochastic component) +- **Real-time synthesis:** Per-chunk fillBuffer() instead of baked PCM +- **Segmented beziers:** Multi-segment curves for complex trajectories + +--- + +## References + +- McAulay, R. J., & Quatieri, T. F. (1986). "Speech analysis/synthesis based on a sinusoidal representation." IEEE TASSP. +- Serra, X., & Smith, J. O. (1990). "Spectral modeling synthesis: A sound analysis/synthesis system based on a deterministic plus stochastic decomposition." Computer Music Journal. +- De Casteljau's algorithm: https://en.wikipedia.org/wiki/De_Casteljau%27s_algorithm + +--- + +## Status + +- [x] Design document +- [ ] Phase 1: MQ extraction (Web) +- [ ] Phase 2: JS synthesizer +- [ ] Phase 3: Web editor UI +- [ ] Phase 4: C++ code generator +- [ ] Phase 5: C++ synthesis + integration +- [ ] Phase 6: GPU optimization diff --git a/tools/mq_editor/README.md b/tools/mq_editor/README.md new file mode 100644 index 0000000..89449c3 --- /dev/null +++ b/tools/mq_editor/README.md @@ -0,0 +1,55 @@ +# MQ Spectral Editor + +McAulay-Quatieri sinusoidal analysis and synthesis tool. + +## Usage + +```bash +open tools/mq_editor/index.html +``` + +1. Load WAV file +2. Adjust MQ parameters (FFT size, hop, threshold) +3. Click "Extract Partials" +4. View extracted sinusoidal trajectories with bezier curve fits + +## Features (Phase 1) + +- **MQ Extraction:** FFT → peak detection → trajectory tracking → bezier fitting +- **Visualization:** Time-frequency plot with raw trajectories and bezier overlays +- **Real-time:** Adjustable extraction parameters + +## Parameters + +- **FFT Size:** 1024-4096 (default 2048) +- **Hop Size:** 64-2048 samples (default 512, 75% overlap) +- **Threshold:** -80 to -20 dB (default -60 dB) + +## Architecture + +- `index.html` - UI and integration +- `fft.js` - Fast Fourier Transform (Cooley-Tukey radix-2) +- `mq_extract.js` - MQ algorithm (peak detection, tracking, bezier fitting) +- `viewer.js` - Visualization (spectrogram, partials, zoom, mouse interaction) + +## Implementation Status + +- [x] Phase 1: MQ extraction + visualization + - [x] Spectrogram rendering with hot colormap + - [x] Horizontal zoom (mousewheel) + - [x] Axis ticks and labels + - [x] Mouse tooltip (time/frequency) +- [ ] Phase 2: JS synthesizer (preview playback) +- [ ] Phase 3: Editing UI (drag control points, replicas) +- [ ] Phase 4: Export (.txt + C++ code generation) + +## Algorithm + +1. **STFT:** Overlapping Hann windows, radix-2 Cooley-Tukey FFT (from spectral_editor/dct.js) +2. **Peak Detection:** Local maxima above threshold, parabolic interpolation +3. **Tracking:** Birth/death/continuation (50 Hz tolerance, 2-frame persistence) +4. **Bezier Fitting:** Cubic curves (4 control points), fixed endpoints + +## See Also + +- Design doc: `doc/SPECTRAL_BRUSH_2.md` diff --git a/tools/mq_editor/fft.js b/tools/mq_editor/fft.js new file mode 100644 index 0000000..8610222 --- /dev/null +++ b/tools/mq_editor/fft.js @@ -0,0 +1,103 @@ +// Fast Fourier Transform (adapted from spectral_editor/dct.js) +// Radix-2 Cooley-Tukey algorithm + +// Bit-reversal permutation (in-place) +function bitReversePermute(real, imag, N) { + let temp_bits = N; + let num_bits = 0; + while (temp_bits > 1) { + temp_bits >>= 1; + num_bits++; + } + + for (let i = 0; i < N; ++i) { + let j = 0; + let temp = i; + for (let b = 0; b < num_bits; ++b) { + j = (j << 1) | (temp & 1); + temp >>= 1; + } + + if (j > i) { + const tmp_real = real[i]; + const tmp_imag = imag[i]; + real[i] = real[j]; + imag[i] = imag[j]; + real[j] = tmp_real; + imag[j] = tmp_imag; + } + } +} + +// In-place radix-2 FFT +// direction: +1 for forward, -1 for inverse +function fftRadix2(real, imag, N, direction) { + const PI = Math.PI; + + for (let stage_size = 2; stage_size <= N; stage_size *= 2) { + const half_stage = stage_size / 2; + const angle = direction * 2.0 * PI / stage_size; + + let wr = 1.0; + let wi = 0.0; + const wr_delta = Math.cos(angle); + const wi_delta = Math.sin(angle); + + for (let k = 0; k < half_stage; ++k) { + for (let group_start = k; group_start < N; group_start += stage_size) { + const i = group_start; + const j = group_start + half_stage; + + const temp_real = real[j] * wr - imag[j] * wi; + const temp_imag = real[j] * wi + imag[j] * wr; + + real[j] = real[i] - temp_real; + imag[j] = imag[i] - temp_imag; + real[i] = real[i] + temp_real; + imag[i] = imag[i] + temp_imag; + } + + const wr_old = wr; + wr = wr_old * wr_delta - wi * wi_delta; + wi = wr_old * wi_delta + wi * wr_delta; + } + } +} + +// Forward FFT: Time domain → Frequency domain +function fftForward(real, imag, N) { + bitReversePermute(real, imag, N); + fftRadix2(real, imag, N, +1); +} + +// Real FFT wrapper for MQ extraction +// Input: Float32Array (time-domain signal) +// Output: Float32Array (interleaved [re0, im0, re1, im1, ...]) +function realFFT(signal) { + const N = signal.length; + + // Must be power of 2 + if ((N & (N - 1)) !== 0) { + throw new Error('FFT size must be power of 2'); + } + + const real = new Float32Array(N); + const imag = new Float32Array(N); + + // Copy input to real part + for (let i = 0; i < N; ++i) { + real[i] = signal[i]; + } + + // Compute FFT + fftForward(real, imag, N); + + // Interleave output + const spectrum = new Float32Array(N * 2); + for (let i = 0; i < N; ++i) { + spectrum[i * 2] = real[i]; + spectrum[i * 2 + 1] = imag[i]; + } + + return spectrum; +} diff --git a/tools/mq_editor/index.html b/tools/mq_editor/index.html new file mode 100644 index 0000000..d44f19b --- /dev/null +++ b/tools/mq_editor/index.html @@ -0,0 +1,175 @@ +<!DOCTYPE html> +<html> +<head> + <meta charset="utf-8"> + <title>MQ Spectral Editor</title> + <style> + body { + font-family: monospace; + margin: 20px; + background: #1a1a1a; + color: #ddd; + } + .toolbar { + margin-bottom: 10px; + padding: 10px; + background: #2a2a2a; + border-radius: 4px; + } + button { + background: #3a3a3a; + color: #ddd; + border: 1px solid #555; + padding: 8px 16px; + margin-right: 8px; + cursor: pointer; + border-radius: 4px; + } + button:hover { background: #4a4a4a; } + button:disabled { opacity: 0.5; cursor: not-allowed; } + input[type="file"] { margin-right: 16px; } + .params { + display: inline-block; + margin-left: 20px; + } + label { + margin-right: 8px; + } + input[type="number"], select { + width: 80px; + background: #3a3a3a; + color: #ddd; + border: 1px solid #555; + padding: 4px; + border-radius: 3px; + } + #canvas { + border: 1px solid #555; + background: #000; + cursor: crosshair; + display: block; + margin-top: 10px; + } + #status { + margin-top: 10px; + padding: 8px; + background: #2a2a2a; + border-radius: 4px; + min-height: 20px; + } + .info { + color: #4af; + } + .warn { + color: #fa4; + } + .error { + color: #f44; + } + </style> +</head> +<body> + <h2>MQ Spectral Editor</h2> + + <div class="toolbar"> + <input type="file" id="wavFile" accept=".wav"> + <button id="extractBtn" disabled>Extract Partials</button> + + <div class="params"> + <label>FFT Size:</label> + <select id="fftSize"> + <option value="1024">1024</option> + <option value="2048" selected>2048</option> + <option value="4096">4096</option> + </select> + + <label>Hop:</label> + <input type="number" id="hopSize" value="512" min="64" max="2048" step="64"> + + <label>Threshold (dB):</label> + <input type="number" id="threshold" value="-60" min="-80" max="-20" step="5"> + </div> + </div> + + <canvas id="canvas" width="1400" height="600"></canvas> + + <div id="tooltip" style="position: fixed; display: none; background: #2a2a2a; padding: 4px 8px; border: 1px solid #555; border-radius: 3px; pointer-events: none; font-size: 11px; z-index: 1000;"></div> + + <div id="status">Load a WAV file to begin...</div> + + <script src="fft.js"></script> + <script src="mq_extract.js"></script> + <script src="viewer.js"></script> + <script> + let audioBuffer = null; + let viewer = null; + + const wavFile = document.getElementById('wavFile'); + const extractBtn = document.getElementById('extractBtn'); + const canvas = document.getElementById('canvas'); + const status = document.getElementById('status'); + + const fftSize = document.getElementById('fftSize'); + const hopSize = document.getElementById('hopSize'); + const threshold = document.getElementById('threshold'); + + // Load WAV file + wavFile.addEventListener('change', async (e) => { + const file = e.target.files[0]; + if (!file) return; + + setStatus('Loading WAV...', 'info'); + try { + const arrayBuffer = await file.arrayBuffer(); + const audioContext = new AudioContext(); + audioBuffer = await audioContext.decodeAudioData(arrayBuffer); + + extractBtn.disabled = false; + setStatus(`Loaded: ${audioBuffer.duration.toFixed(2)}s, ${audioBuffer.sampleRate}Hz, ${audioBuffer.numberOfChannels}ch`, 'info'); + + // Create viewer + viewer = new SpectrogramViewer(canvas, audioBuffer); + } catch (err) { + setStatus('Error loading WAV: ' + err.message, 'error'); + console.error(err); + } + }); + + // Extract partials + extractBtn.addEventListener('click', () => { + if (!audioBuffer) return; + + setStatus('Extracting partials...', 'info'); + extractBtn.disabled = true; + + setTimeout(() => { + try { + const params = { + fftSize: parseInt(fftSize.value), + hopSize: parseInt(hopSize.value), + threshold: parseFloat(threshold.value), + sampleRate: audioBuffer.sampleRate + }; + + const partials = extractPartials(audioBuffer, params); + + setStatus(`Extracted ${partials.length} partials`, 'info'); + + // Update viewer + viewer.setPartials(partials); + + } catch (err) { + setStatus('Extraction error: ' + err.message, 'error'); + console.error(err); + } + extractBtn.disabled = false; + }, 50); + }); + + function setStatus(msg, type = '') { + status.innerHTML = msg; + status.className = type; + } + </script> +</body> +</html> diff --git a/tools/mq_editor/mq_extract.js b/tools/mq_editor/mq_extract.js new file mode 100644 index 0000000..62b275c --- /dev/null +++ b/tools/mq_editor/mq_extract.js @@ -0,0 +1,216 @@ +// MQ Extraction Algorithm +// McAulay-Quatieri sinusoidal analysis + +// Extract partials from audio buffer +function extractPartials(audioBuffer, params) { + const {fftSize, hopSize, threshold, sampleRate} = params; + + // Get mono channel (mix to mono if stereo) + const signal = getMono(audioBuffer); + const numFrames = Math.floor((signal.length - fftSize) / hopSize); + + // Analyze frames + const frames = []; + for (let i = 0; i < numFrames; ++i) { + const offset = i * hopSize; + const frame = signal.slice(offset, offset + fftSize); + const peaks = detectPeaks(frame, fftSize, sampleRate, threshold); + const time = offset / sampleRate; + frames.push({time, peaks}); + } + + // Track trajectories + const partials = trackPartials(frames, sampleRate); + + // Fit bezier curves + for (const partial of partials) { + partial.freqCurve = fitBezier(partial.times, partial.freqs); + partial.ampCurve = fitBezier(partial.times, partial.amps); + } + + return partials; +} + +// Get mono signal +function getMono(audioBuffer) { + const data = audioBuffer.getChannelData(0); + if (audioBuffer.numberOfChannels === 1) { + return data; + } + + // Mix to mono + const left = audioBuffer.getChannelData(0); + const right = audioBuffer.getChannelData(1); + const mono = new Float32Array(left.length); + for (let i = 0; i < left.length; ++i) { + mono[i] = (left[i] + right[i]) * 0.5; + } + return mono; +} + +// Detect peaks in FFT frame +function detectPeaks(frame, fftSize, sampleRate, thresholdDB) { + // Apply Hann window + const windowed = new Float32Array(fftSize); + for (let i = 0; i < fftSize; ++i) { + const w = 0.5 - 0.5 * Math.cos(2 * Math.PI * i / fftSize); + windowed[i] = frame[i] * w; + } + + // FFT (using built-in) + const spectrum = realFFT(windowed); + + // Convert to magnitude dB + const mag = new Float32Array(fftSize / 2); + for (let i = 0; i < fftSize / 2; ++i) { + const re = spectrum[i * 2]; + const im = spectrum[i * 2 + 1]; + const magLin = Math.sqrt(re * re + im * im); + mag[i] = 20 * Math.log10(Math.max(magLin, 1e-10)); + } + + // Find local maxima above threshold + const peaks = []; + for (let i = 2; i < mag.length - 2; ++i) { + if (mag[i] > thresholdDB && + mag[i] > mag[i-1] && mag[i] > mag[i-2] && + mag[i] > mag[i+1] && mag[i] > mag[i+2]) { + + // Parabolic interpolation for sub-bin accuracy + const alpha = mag[i-1]; + const beta = mag[i]; + const gamma = mag[i+1]; + const p = 0.5 * (alpha - gamma) / (alpha - 2*beta + gamma); + + const binFreq = (i + p) * sampleRate / fftSize; + const ampDB = beta - 0.25 * (alpha - gamma) * p; + const ampLin = Math.pow(10, ampDB / 20); + + peaks.push({freq: binFreq, amp: ampLin}); + } + } + + return peaks; +} + +// Track partials across frames (birth/death/continuation) +function trackPartials(frames, sampleRate) { + const partials = []; + const activePartials = []; + const trackingThreshold = 50; // Hz + + for (const frame of frames) { + const matched = new Set(); + + // Match peaks to existing partials + for (const partial of activePartials) { + const lastFreq = partial.freqs[partial.freqs.length - 1]; + + let bestPeak = null; + let bestDist = Infinity; + + for (let i = 0; i < frame.peaks.length; ++i) { + if (matched.has(i)) continue; + + const peak = frame.peaks[i]; + const dist = Math.abs(peak.freq - lastFreq); + + if (dist < trackingThreshold && dist < bestDist) { + bestPeak = peak; + bestDist = dist; + partial.matchIdx = i; + } + } + + if (bestPeak) { + // Continuation + partial.times.push(frame.time); + partial.freqs.push(bestPeak.freq); + partial.amps.push(bestPeak.amp); + partial.age = 0; + matched.add(partial.matchIdx); + } else { + // No match + partial.age++; + } + } + + // Birth new partials from unmatched peaks + for (let i = 0; i < frame.peaks.length; ++i) { + if (matched.has(i)) continue; + + const peak = frame.peaks[i]; + activePartials.push({ + times: [frame.time], + freqs: [peak.freq], + amps: [peak.amp], + age: 0, + matchIdx: -1 + }); + } + + // Death old partials + for (let i = activePartials.length - 1; i >= 0; --i) { + if (activePartials[i].age > 2) { + // Move to finished if long enough + if (activePartials[i].times.length >= 4) { + partials.push(activePartials[i]); + } + activePartials.splice(i, 1); + } + } + } + + // Finish remaining active partials + for (const partial of activePartials) { + if (partial.times.length >= 4) { + partials.push(partial); + } + } + + return partials; +} + +// Fit cubic bezier curve to trajectory +function fitBezier(times, values) { + if (times.length < 4) { + // Not enough points, just use linear segments + return { + t0: times[0], v0: values[0], + t1: times[0], v1: values[0], + t2: times[times.length-1], v2: values[values.length-1], + t3: times[times.length-1], v3: values[values.length-1] + }; + } + + // Fix endpoints + const t0 = times[0]; + const t3 = times[times.length - 1]; + const v0 = values[0]; + const v3 = values[values.length - 1]; + + // Solve for interior control points via least squares + // Simplification: place at 1/3 and 2/3 positions + const t1 = t0 + (t3 - t0) / 3; + const t2 = t0 + 2 * (t3 - t0) / 3; + + // Find v1, v2 by evaluating at nearest data points + let v1 = v0, v2 = v3; + let minDist1 = Infinity, minDist2 = Infinity; + + for (let i = 0; i < times.length; ++i) { + const dist1 = Math.abs(times[i] - t1); + const dist2 = Math.abs(times[i] - t2); + + if (dist1 < minDist1) { + minDist1 = dist1; + v1 = values[i]; + } + if (dist2 < minDist2) { + minDist2 = dist2; + v2 = values[i]; + } + } + + return {t0, v0, t1, v1, t2, v2, t3, v3}; +} diff --git a/tools/mq_editor/viewer.js b/tools/mq_editor/viewer.js new file mode 100644 index 0000000..1b2f5bf --- /dev/null +++ b/tools/mq_editor/viewer.js @@ -0,0 +1,376 @@ +// Spectrogram Viewer +// Handles all visualization: spectrogram, partials, zoom, mouse interaction + +class SpectrogramViewer { + constructor(canvas, audioBuffer) { + this.canvas = canvas; + this.ctx = canvas.getContext('2d'); + this.audioBuffer = audioBuffer; + this.partials = []; + + // View state (time only, frequency fixed) + this.timeStart = 0; + this.timeEnd = audioBuffer.duration; + this.freqStart = 0; + this.freqEnd = 16000; // Fixed + + // Tooltip + this.tooltip = document.getElementById('tooltip'); + + // Setup event handlers + this.setupMouseHandlers(); + + // Initial render + this.render(); + } + + setPartials(partials) { + this.partials = partials; + this.render(); + } + + reset() { + this.timeStart = 0; + this.timeEnd = this.audioBuffer.duration; + this.render(); + } + + render() { + this.renderSpectrogram(); + this.renderPartials(); + this.drawAxes(); + } + + // Render spectrogram background + renderSpectrogram() { + const {canvas, ctx, audioBuffer} = this; + const width = canvas.width; + const height = canvas.height; + + ctx.fillStyle = '#000'; + ctx.fillRect(0, 0, width, height); + + const signal = getMono(audioBuffer); + const fftSize = 2048; + const hopSize = 512; + const sampleRate = audioBuffer.sampleRate; + + const numFrames = Math.floor((signal.length - fftSize) / hopSize); + + // Compute one FFT per ~4 pixels for wider bars + const pixelsPerFrame = 4; + const numDisplayFrames = Math.floor(width / pixelsPerFrame); + + // Map view bounds to frame indices + const startFrameIdx = Math.floor(this.timeStart * sampleRate / hopSize); + const endFrameIdx = Math.floor(this.timeEnd * sampleRate / hopSize); + const visibleFrames = endFrameIdx - startFrameIdx; + const frameStep = Math.max(1, Math.floor(visibleFrames / numDisplayFrames)); + + for (let displayIdx = 0; displayIdx < numDisplayFrames; ++displayIdx) { + const frameIdx = startFrameIdx + displayIdx * frameStep; + if (frameIdx >= numFrames) break; + + const offset = frameIdx * hopSize; + if (offset + fftSize > signal.length) break; + + const frame = signal.slice(offset, offset + fftSize); + + // Windowing + const windowed = new Float32Array(fftSize); + for (let i = 0; i < fftSize; ++i) { + const w = 0.5 - 0.5 * Math.cos(2 * Math.PI * i / fftSize); + windowed[i] = frame[i] * w; + } + + // FFT + const spectrum = realFFT(windowed); + + // Draw as vertical bar + const xStart = displayIdx * pixelsPerFrame; + const xEnd = Math.min(xStart + pixelsPerFrame, width); + + // Draw frequency bins + const numBins = fftSize / 2; + for (let bin = 0; bin < numBins; ++bin) { + const freq = bin * sampleRate / fftSize; + if (freq < this.freqStart || freq > this.freqEnd) continue; + + const re = spectrum[bin * 2]; + const im = spectrum[bin * 2 + 1]; + const mag = Math.sqrt(re * re + im * im); + const magDB = 20 * Math.log10(Math.max(mag, 1e-10)); + + const normalized = (magDB + 80) / 60; + const intensity = Math.max(0, Math.min(1, normalized)); + + const freqNorm = (freq - this.freqStart) / (this.freqEnd - this.freqStart); + const y = Math.floor(height - freqNorm * height); + if (y < 0 || y >= height) continue; + + const color = this.getSpectrogramColor(intensity); + ctx.fillStyle = `rgb(${color.r},${color.g},${color.b})`; + ctx.fillRect(xStart, y, xEnd - xStart, 1); + } + } + } + + // Render extracted partials + renderPartials() { + const {ctx, canvas, partials} = this; + const width = canvas.width; + const height = canvas.height; + + const colors = [ + '#f44', '#4f4', '#44f', '#ff4', '#f4f', '#4ff', + '#fa4', '#4fa', '#a4f', '#af4', '#f4a', '#4af' + ]; + + const timeDuration = this.timeEnd - this.timeStart; + const freqRange = this.freqEnd - this.freqStart; + + for (let p = 0; p < partials.length; ++p) { + const partial = partials[p]; + const color = colors[p % colors.length]; + + // Draw raw trajectory + ctx.strokeStyle = color + '44'; + ctx.lineWidth = 1; + ctx.beginPath(); + + let started = false; + for (let i = 0; i < partial.times.length; ++i) { + const t = partial.times[i]; + const f = partial.freqs[i]; + + if (t < this.timeStart || t > this.timeEnd) continue; + if (f < this.freqStart || f > this.freqEnd) continue; + + const x = (t - this.timeStart) / timeDuration * width; + const y = height - (f - this.freqStart) / freqRange * height; + + if (!started) { + ctx.moveTo(x, y); + started = true; + } else { + ctx.lineTo(x, y); + } + } + + if (started) ctx.stroke(); + + // Draw bezier curve + if (partial.freqCurve) { + ctx.strokeStyle = color; + ctx.lineWidth = 2; + ctx.beginPath(); + + const curve = partial.freqCurve; + const numSteps = 50; + + started = false; + for (let i = 0; i <= numSteps; ++i) { + const t = curve.t0 + (curve.t3 - curve.t0) * i / numSteps; + const freq = evalBezier(curve, t); + + if (t < this.timeStart || t > this.timeEnd) continue; + if (freq < this.freqStart || freq > this.freqEnd) continue; + + const x = (t - this.timeStart) / timeDuration * width; + const y = height - (freq - this.freqStart) / freqRange * height; + + if (!started) { + ctx.moveTo(x, y); + started = true; + } else { + ctx.lineTo(x, y); + } + } + + if (started) ctx.stroke(); + + // Draw control points + ctx.fillStyle = color; + this.drawControlPoint(curve.t0, curve.v0); + this.drawControlPoint(curve.t1, curve.v1); + this.drawControlPoint(curve.t2, curve.v2); + this.drawControlPoint(curve.t3, curve.v3); + } + } + } + + // Draw control point + drawControlPoint(t, v) { + if (t < this.timeStart || t > this.timeEnd) return; + if (v < this.freqStart || v > this.freqEnd) return; + + const timeDuration = this.timeEnd - this.timeStart; + const freqRange = this.freqEnd - this.freqStart; + + const x = (t - this.timeStart) / timeDuration * this.canvas.width; + const y = this.canvas.height - (v - this.freqStart) / freqRange * this.canvas.height; + + this.ctx.beginPath(); + this.ctx.arc(x, y, 4, 0, 2 * Math.PI); + this.ctx.fill(); + + this.ctx.strokeStyle = '#fff'; + this.ctx.lineWidth = 1; + this.ctx.stroke(); + } + + // Draw axes with ticks and labels + drawAxes() { + const {ctx, canvas} = this; + const width = canvas.width; + const height = canvas.height; + + ctx.strokeStyle = '#666'; + ctx.fillStyle = '#aaa'; + ctx.font = '11px monospace'; + ctx.lineWidth = 1; + + const timeDuration = this.timeEnd - this.timeStart; + const freqRange = this.freqEnd - this.freqStart; + + // Time axis + const timeStep = this.getAxisStep(timeDuration); + let t = Math.ceil(this.timeStart / timeStep) * timeStep; + while (t <= this.timeEnd) { + const x = (t - this.timeStart) / timeDuration * width; + + ctx.beginPath(); + ctx.moveTo(x, 0); + ctx.lineTo(x, height); + ctx.stroke(); + + ctx.fillText(t.toFixed(2) + 's', x + 2, height - 4); + t += timeStep; + } + + // Frequency axis + const freqStep = this.getAxisStep(freqRange); + let f = Math.ceil(this.freqStart / freqStep) * freqStep; + while (f <= this.freqEnd) { + const y = height - (f - this.freqStart) / freqRange * height; + + ctx.beginPath(); + ctx.moveTo(0, y); + ctx.lineTo(width, y); + ctx.stroke(); + + const label = f >= 1000 ? (f/1000).toFixed(1) + 'k' : f.toFixed(0); + ctx.fillText(label + 'Hz', 2, y - 2); + f += freqStep; + } + } + + // Setup mouse event handlers + setupMouseHandlers() { + const {canvas, tooltip} = this; + + // Mouse move (tooltip) + canvas.addEventListener('mousemove', (e) => { + const rect = canvas.getBoundingClientRect(); + const x = e.clientX - rect.left; + const y = e.clientY - rect.top; + + const time = this.canvasToTime(x); + const freq = this.canvasToFreq(y); + + tooltip.style.left = (e.clientX + 10) + 'px'; + tooltip.style.top = (e.clientY + 10) + 'px'; + tooltip.style.display = 'block'; + tooltip.textContent = `${time.toFixed(3)}s, ${freq.toFixed(1)}Hz`; + }); + + canvas.addEventListener('mouseleave', () => { + tooltip.style.display = 'none'; + }); + + // Mouse wheel (horizontal zoom only) + canvas.addEventListener('wheel', (e) => { + e.preventDefault(); + + const rect = canvas.getBoundingClientRect(); + const x = e.clientX - rect.left; + + // Get mouse position in time space + const mouseTime = this.canvasToTime(x); + + // Zoom factor + const zoomFactor = e.deltaY > 0 ? 1.2 : 0.8; + + // Zoom time around mouse position + const timeDuration = this.timeEnd - this.timeStart; + const newTimeDuration = timeDuration * zoomFactor; + const timeRatio = (mouseTime - this.timeStart) / timeDuration; + + this.timeStart = mouseTime - newTimeDuration * timeRatio; + this.timeEnd = mouseTime + newTimeDuration * (1 - timeRatio); + + // Clamp time bounds + if (this.timeStart < 0) { + this.timeEnd -= this.timeStart; + this.timeStart = 0; + } + if (this.timeEnd > this.audioBuffer.duration) { + this.timeStart -= (this.timeEnd - this.audioBuffer.duration); + this.timeEnd = this.audioBuffer.duration; + } + + // Re-render + this.render(); + }); + } + + // Coordinate conversion + canvasToTime(x) { + return this.timeStart + (x / this.canvas.width) * (this.timeEnd - this.timeStart); + } + + canvasToFreq(y) { + return this.freqEnd - (y / this.canvas.height) * (this.freqEnd - this.freqStart); + } + + // Utilities + getAxisStep(range) { + const steps = [0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5, 10, 20, 50, 100, 200, 500, 1000, 2000, 5000]; + const targetSteps = 8; + const targetStep = range / targetSteps; + + for (const step of steps) { + if (step >= targetStep) return step; + } + + return steps[steps.length - 1]; + } + + getSpectrogramColor(intensity) { + if (intensity < 0.25) { + const t = intensity / 0.25; + return {r: 0, g: 0, b: Math.floor(t * 128)}; + } else if (intensity < 0.5) { + const t = (intensity - 0.25) / 0.25; + return {r: 0, g: Math.floor(t * 128), b: 128}; + } else if (intensity < 0.75) { + const t = (intensity - 0.5) / 0.25; + return {r: Math.floor(t * 255), g: 128 + Math.floor(t * 127), b: 128 - Math.floor(t * 128)}; + } else { + const t = (intensity - 0.75) / 0.25; + return {r: 255, g: 255 - Math.floor(t * 128), b: 0}; + } + } +} + +// Bezier evaluation (shared utility) +function evalBezier(curve, t) { + let u = (t - curve.t0) / (curve.t3 - curve.t0); + u = Math.max(0, Math.min(1, u)); + + const u1 = 1 - u; + return u1*u1*u1 * curve.v0 + + 3*u1*u1*u * curve.v1 + + 3*u1*u*u * curve.v2 + + u*u*u * curve.v3; +} |
