diff options
Diffstat (limited to 'tools/spectral_editor')
| -rw-r--r-- | tools/spectral_editor/FEATURES.md | 151 | ||||
| -rw-r--r-- | tools/spectral_editor/dct.js | 101 | ||||
| -rw-r--r-- | tools/spectral_editor/script.js | 89 |
3 files changed, 264 insertions, 77 deletions
diff --git a/tools/spectral_editor/FEATURES.md b/tools/spectral_editor/FEATURES.md new file mode 100644 index 0000000..6c36cc2 --- /dev/null +++ b/tools/spectral_editor/FEATURES.md @@ -0,0 +1,151 @@ +# Spectral Editor - Feature Roadmap + +This document tracks planned enhancements for the spectral editor. + +## Priority: High + +### A. Curve Translation (Shift+Click+Drag) +**Description**: Shift+click on a control point + mouse-move should displace the whole curve at a time (translate all control points). + +**Implementation Notes**: +- Detect shift key state during control point click +- Store initial positions of all control points in the curve +- Apply uniform translation delta to all points during drag +- Maintain curve shape while moving + +**Complexity**: Medium +**Estimated Effort**: 2-3 hours + +--- + +### B. Viewport Zoom (Mouse Wheel) +**Description**: Mouse-wheel should allow zooming in/out on the view for fine placement of curves. + +**Implementation Notes**: +- Implement zoom scale factor (e.g., 0.5x to 4.0x) +- Center zoom around mouse cursor position +- Update rendering to use scaled coordinates +- Add visual zoom indicator (e.g., "Zoom: 2.0x") +- Consider pan functionality (drag with middle mouse or space+drag) + +**Complexity**: High (coordinate transformation, pan interaction) +**Estimated Effort**: 6-8 hours + +--- + +## Priority: Medium + +### C. Enhanced Sinusoid Pattern +**Description**: The 'sinusoid' pattern is quite interesting and should have more variations. + +**Proposed Variations**: +- **Asymmetric Decay**: Different decay rates above and below the curve center + - `decay_top` parameter (controls upper harmonics falloff) + - `decay_bottom` parameter (controls lower harmonics falloff) +- **Temporal Modulation**: Per-frame amplitude/frequency modulation along timeline + - `amplitude_envelope` (fade in/out over time) + - `frequency_drift` (vibrato/wobble effect) + - `phase_offset` (shift pattern over time) +- **Harmonic Series**: Option to generate harmonic overtones + - `num_harmonics` parameter + - `harmonic_decay` parameter + +**Implementation Notes**: +- Extend `SinusoidProfile` class with additional parameters +- Add UI controls for new parameters (sliders, dropdowns) +- Render preview showing modulation over time + +**Complexity**: Medium-High +**Estimated Effort**: 8-12 hours + +--- + +### D. Per-Control-Point Modulation +**Description**: Each control point should be assigned individually controllable volume, decay params, etc. for fine modulation along time. + +**Proposed Parameters (per control point)**: +- `volume`: Local amplitude multiplier (0.0 - 2.0) +- `decay`: Local decay rate override +- `width`: Gaussian width override (for profile spreading) +- `phase`: Phase offset for sinusoid patterns +- `color`: Visual indicator for parameter variations + +**Implementation Notes**: +- Extend control point data structure with parameter fields +- Add per-point property panel (show on control point selection) +- Render visual hints (color-coded points, size variations) +- Interpolate parameters between control points for smooth transitions + +**Complexity**: High (UI/UX design, parameter interpolation) +**Estimated Effort**: 10-15 hours + +--- + +### E. Composable Profiles +**Description**: Profiles should be composable along a curve (e.g., apply Gaussian curve to sinusoid pattern). + +**Proposed Syntax**: +```cpp +// Example: Gaussian-modulated sinusoid +CompositeProfile { + base: SinusoidProfile { frequency: 100.0, decay: 0.5 }, + envelope: GaussianProfile { center: 256, width: 50 } +} +``` + +**Implementation Notes**: +- Define profile composition operators: + - `multiply`: Envelope modulation (amplitude × profile) + - `add`: Additive blending (profile1 + profile2) + - `max`: Take maximum value at each bin +- Add UI for profile layering (drag-and-drop profile stack) +- Render composite preview with layer visualization + +**Complexity**: High (requires profile abstraction refactor) +**Estimated Effort**: 12-16 hours + +--- + +## Priority: Low (Polish) + +### F. Improved Parameter Sliders +**Description**: Adjust slider ranges for better usability (Decay, Width, Frequency, etc.). + +**Issues to Address**: +- Decay slider: Non-linear scaling (logarithmic?) for finer control at low values +- Frequency slider: Snap to musical notes (optional A440-based grid) +- Width slider: Preview visualization (show affected frequency range) +- General: Add numeric input fields next to sliders for precise values + +**Implementation Notes**: +- Implement logarithmic slider interpolation for decay/width +- Add slider tick marks at useful intervals +- Display current value and units (Hz, bins, dB, etc.) +- Add reset-to-default buttons + +**Complexity**: Low-Medium +**Estimated Effort**: 3-4 hours + +--- + +## Future Ideas (Backlog) + +- **Undo/Redo System**: Track edit history for curve modifications +- **Preset Library**: Save/load common curve patterns (kick drum, snare, bass, etc.) +- **Curve Smoothing**: Apply smoothing filters to jittery control points +- **Copy/Paste**: Duplicate curves or control point selections +- **Multi-Selection**: Select and edit multiple control points simultaneously +- **Grid Snapping**: Snap control points to frequency/time grid +- **Export Options**: Export to different formats (JSON, binary, C++ code) + +--- + +## Total Estimated Effort +- **High Priority**: 8-11 hours +- **Medium Priority**: 30-43 hours +- **Low Priority**: 3-4 hours +- **Grand Total**: 41-58 hours (roughly 1-1.5 weeks of focused work) + +--- + +*Last Updated: February 6, 2026* diff --git a/tools/spectral_editor/dct.js b/tools/spectral_editor/dct.js index deff8a9..435a7e8 100644 --- a/tools/spectral_editor/dct.js +++ b/tools/spectral_editor/dct.js @@ -1,20 +1,10 @@ const dctSize = 512; // Default DCT size, read from header // --- Utility Functions for Audio Processing --- +// Fast O(N log N) IDCT using FFT // JavaScript equivalent of C++ idct_512 function javascript_idct_512(input) { - const output = new Float32Array(dctSize); - const PI = Math.PI; - const N = dctSize; - - for (let n = 0; n < N; ++n) { - let sum = input[0] / 2.0; - for (let k = 1; k < N; ++k) { - sum += input[k] * Math.cos((PI / N) * k * (n + 0.5)); - } - output[n] = sum * (2.0 / N); - } - return output; + return javascript_idct_512_fft(input); } // Hanning window for smooth audio transitions (JavaScript equivalent) @@ -127,95 +117,90 @@ function fftInverse(real, imag, N) { } } -// DCT-II via FFT using double-and-mirror method (matches C++ dct_fft) -// This is a more robust algorithm that avoids reordering issues +// DCT-II via FFT using reordering method (matches C++ dct_fft) +// Reference: Numerical Recipes Chapter 12.3 function javascript_dct_fft(input, N) { const PI = Math.PI; - // Allocate arrays for 2N-point FFT - const M = 2 * N; - const real = new Float32Array(M); - const imag = new Float32Array(M); + // Allocate arrays for N-point FFT + const real = new Float32Array(N); + const imag = new Float32Array(N); - // Pack input: [x[0], x[1], ..., x[N-1], x[N-1], x[N-2], ..., x[1]] - // This creates even symmetry for real-valued DCT - for (let i = 0; i < N; i++) { - real[i] = input[i]; - } - for (let i = 0; i < N; i++) { - real[N + i] = input[N - 1 - i]; + // Reorder input: even indices first, then odd indices reversed + // [x[0], x[2], x[4], ...] followed by [x[N-1], x[N-3], x[N-5], ...] + for (let i = 0; i < N / 2; i++) { + real[i] = input[2 * i]; // Even indices: 0, 2, 4, ... + real[N - 1 - i] = input[2 * i + 1]; // Odd indices reversed: N-1, N-3, ... } // imag is already zeros (Float32Array default) - // Apply 2N-point FFT - fftForward(real, imag, M); + // Apply N-point FFT + fftForward(real, imag, N); - // Extract DCT coefficients + // Extract DCT coefficients with phase correction // DCT[k] = Re{FFT[k] * exp(-j*pi*k/(2*N))} * normalization - // Note: Need to divide by 2 because we doubled the signal length const output = new Float32Array(N); for (let k = 0; k < N; k++) { const angle = -PI * k / (2.0 * N); const wr = Math.cos(angle); const wi = Math.sin(angle); - // Complex multiplication: (real + j*imag) * (wr + j*wi) + // Complex multiplication: (real[k] + j*imag[k]) * (wr + j*wi) // Real part: real*wr - imag*wi const dct_value = real[k] * wr - imag[k] * wi; - // Apply DCT-II normalization (divide by 2 for double-length FFT) + // Apply DCT-II normalization if (k === 0) { - output[k] = dct_value * Math.sqrt(1.0 / N) / 2.0; + output[k] = dct_value * Math.sqrt(1.0 / N); } else { - output[k] = dct_value * Math.sqrt(2.0 / N) / 2.0; + output[k] = dct_value * Math.sqrt(2.0 / N); } } return output; } -// IDCT (Inverse DCT-II) via FFT using double-and-mirror method (matches C++ idct_fft) +// IDCT (DCT-III) via FFT using reordering method (matches C++ idct_fft) +// Reference: Numerical Recipes Chapter 12.3 function javascript_idct_fft(input, N) { const PI = Math.PI; - // Allocate arrays for 2N-point FFT - const M = 2 * N; - const real = new Float32Array(M); - const imag = new Float32Array(M); + // Allocate arrays for N-point FFT + const real = new Float32Array(N); + const imag = new Float32Array(N); - // Prepare FFT input from DCT coefficients - // IDCT = Re{IFFT[DCT * exp(j*pi*k/(2*N))]} * 2 + // Prepare FFT input with inverse phase correction + // FFT[k] = DCT[k] * exp(+j*pi*k/(2*N)) / normalization + // Note: DCT-III needs factor of 2 for AC terms for (let k = 0; k < N; k++) { - const angle = PI * k / (2.0 * N); // Positive for inverse + const angle = PI * k / (2.0 * N); // Positive angle for inverse const wr = Math.cos(angle); const wi = Math.sin(angle); - // Apply inverse normalization - let scaled_input; + // Inverse of DCT-II normalization with correct DCT-III scaling + let scaled; if (k === 0) { - scaled_input = input[k] * Math.sqrt(N) * 2.0; + scaled = input[k] / Math.sqrt(1.0 / N); } else { - scaled_input = input[k] * Math.sqrt(N / 2.0) * 2.0; + // DCT-III needs factor of 2 for AC terms + scaled = input[k] / Math.sqrt(2.0 / N) * 2.0; } - // Complex multiplication: DCT[k] * exp(j*theta) - real[k] = scaled_input * wr; - imag[k] = scaled_input * wi; - } - - // Fill second half with conjugate symmetry (for real output) - for (let k = 1; k < N; k++) { - real[M - k] = real[k]; - imag[M - k] = -imag[k]; + // Complex multiplication: scaled * (wr + j*wi) + real[k] = scaled * wr; + imag[k] = scaled * wi; } // Apply inverse FFT - fftInverse(real, imag, M); + fftInverse(real, imag, N); - // Extract first N samples (real part only, imag should be ~0) + // Unpack: reverse the reordering from DCT + // Even output indices come from first half of FFT output + // Odd output indices come from second half (reversed) const output = new Float32Array(N); - for (let i = 0; i < N; i++) { - output[i] = real[i]; + for (let i = 0; i < N / 2; i++) { + output[2 * i] = real[i]; // Even positions + output[2 * i + 1] = real[N - 1 - i]; // Odd positions (reversed) } return output; diff --git a/tools/spectral_editor/script.js b/tools/spectral_editor/script.js index 48b0661..7c424f9 100644 --- a/tools/spectral_editor/script.js +++ b/tools/spectral_editor/script.js @@ -30,6 +30,8 @@ const state = { canvasHeight: 0, pixelsPerFrame: 2.0, // Zoom level (pixels per frame) pixelsPerBin: 1.0, // Vertical scale (pixels per frequency bin) + viewportOffsetX: 0, // Horizontal pan offset (pixels) + viewportOffsetY: 0, // Vertical pan offset (pixels) // Audio playback audioContext: null, @@ -94,6 +96,9 @@ function initCanvas() { // Mouse hover handlers (for crosshair) canvas.addEventListener('mousemove', onCanvasHover); canvas.addEventListener('mouseleave', onCanvasLeave); + + // Mouse wheel: zoom (with Ctrl/Cmd) or pan + canvas.addEventListener('wheel', onCanvasWheel, { passive: false }); } function initUI() { @@ -378,19 +383,9 @@ function audioToSpectrogram(audioData) { } // Forward DCT (not in dct.js, add here) +// Fast O(N log N) DCT using FFT (delegates to dct.js implementation) function javascript_dct_512(input) { - const output = new Float32Array(DCT_SIZE); - const PI = Math.PI; - const N = DCT_SIZE; - - for (let k = 0; k < N; k++) { - let sum = 0; - for (let n = 0; n < N; n++) { - sum += input[n] * Math.cos((PI / N) * k * (n + 0.5)); - } - output[k] = sum * (k === 0 ? Math.sqrt(1 / N) : Math.sqrt(2 / N)); - } - return output; + return javascript_dct_512_fft(input); } function onReferenceLoaded(fileName) { @@ -414,6 +409,9 @@ function onReferenceLoaded(fileName) { // Adjust zoom to fit state.pixelsPerFrame = Math.max(1.0, state.canvasWidth / state.referenceNumFrames); + state.pixelsPerBin = 1.0; // Reset vertical scale + state.viewportOffsetX = 0; // Reset pan + state.viewportOffsetY = 0; updateCurveUI(); updateUndoRedoButtons(); @@ -859,12 +857,62 @@ function onCanvasLeave(e) { render(); } +function onCanvasWheel(e) { + e.preventDefault(); + + const canvas = e.target; + const rect = canvas.getBoundingClientRect(); + const mouseX = e.clientX - rect.left; + const mouseY = e.clientY - rect.top; + + // Zoom mode: Ctrl/Cmd + wheel + if (e.ctrlKey || e.metaKey) { + // Calculate frame under cursor BEFORE zoom + const frameUnderCursor = (mouseX + state.viewportOffsetX) / state.pixelsPerFrame; + + // Calculate new zoom level (horizontal only - logarithmic frequency axis doesn't zoom) + const zoomFactor = e.deltaY > 0 ? 0.9 : 1.1; // Wheel down = zoom out, wheel up = zoom in + state.pixelsPerFrame = Math.max(0.5, Math.min(20.0, state.pixelsPerFrame * zoomFactor)); + + // Adjust viewport offset so frame under cursor stays in same screen position + // After zoom: new_offset = frame * newPixelsPerFrame - mouseX + state.viewportOffsetX = frameUnderCursor * state.pixelsPerFrame - mouseX; + + // Clamp viewport offset to valid range + const maxOffsetX = Math.max(0, state.referenceNumFrames * state.pixelsPerFrame - state.canvasWidth); + state.viewportOffsetX = Math.max(0, Math.min(maxOffsetX, state.viewportOffsetX)); + + render(); + return; + } + + // Pan mode: Shift + wheel (horizontal/vertical pan) + if (e.shiftKey) { + state.viewportOffsetX += e.deltaY; + const maxOffsetX = Math.max(0, state.referenceNumFrames * state.pixelsPerFrame - state.canvasWidth); + state.viewportOffsetX = Math.max(0, Math.min(maxOffsetX, state.viewportOffsetX)); + render(); + return; + } + + // Normal mode: pan vertically (disabled for logarithmic frequency axis) + // Note: With logarithmic frequency scale, vertical pan doesn't make sense + // because the frequency range (FREQ_MIN to FREQ_MAX) is always scaled to fit canvas height. + // Vertical pan only works in linear frequency mode. + if (!USE_LOG_SCALE) { + state.viewportOffsetY += e.deltaY; + const maxOffsetY = Math.max(0, DCT_SIZE * state.pixelsPerBin - state.canvasHeight); + state.viewportOffsetY = Math.max(0, Math.min(maxOffsetY, state.viewportOffsetY)); + render(); + } +} + // ============================================================================ // Coordinate Conversion // ============================================================================ function screenToSpectrogram(screenX, screenY) { - const frame = Math.round(screenX / state.pixelsPerFrame); + const frame = Math.round((screenX + state.viewportOffsetX) / state.pixelsPerFrame); let freqHz; if (USE_LOG_SCALE) { @@ -891,7 +939,7 @@ function screenToSpectrogram(screenX, screenY) { } function spectrogramToScreen(frame, freqHz) { - const x = frame * state.pixelsPerFrame; + const x = frame * state.pixelsPerFrame - state.viewportOffsetX; let y; if (USE_LOG_SCALE) { @@ -901,11 +949,11 @@ function spectrogramToScreen(frame, freqHz) { const clampedFreq = Math.max(FREQ_MIN, Math.min(FREQ_MAX, freqHz)); const logFreq = Math.log10(clampedFreq); const normalizedY = (logFreq - logMin) / (logMax - logMin); - y = state.canvasHeight * (1.0 - normalizedY); // Flip Y back to screen coords + y = state.canvasHeight * (1.0 - normalizedY) - state.viewportOffsetY; // Flip Y back to screen coords } else { // Linear frequency mapping (old behavior) const bin = (freqHz / (SAMPLE_RATE / 2)) * state.referenceDctSize; - y = state.canvasHeight - (bin * state.pixelsPerBin); + y = state.canvasHeight - (bin * state.pixelsPerBin) - state.viewportOffsetY; } return {x, y}; @@ -953,7 +1001,10 @@ function render() { function drawPlayhead(ctx) { if (!state.isPlaying || state.playbackCurrentFrame < 0) return; - const x = state.playbackCurrentFrame * state.pixelsPerFrame; + const x = state.playbackCurrentFrame * state.pixelsPerFrame - state.viewportOffsetX; + + // Only draw if playhead is visible in viewport + if (x < 0 || x > state.canvasWidth) return; // Draw vertical line ctx.strokeStyle = '#ff3333'; // Bright red @@ -1553,7 +1604,7 @@ function spectrogramToAudio(spectrogram, dctSize, numFrames) { const window = hanningWindowArray; for (let frameIdx = 0; frameIdx < numFrames; frameIdx++) { - // Extract frame + // Extract frame (no windowing - window is only for analysis, not synthesis) const frame = new Float32Array(dctSize); for (let b = 0; b < dctSize; b++) { frame[b] = spectrogram[frameIdx * dctSize + b]; @@ -1562,7 +1613,7 @@ function spectrogramToAudio(spectrogram, dctSize, numFrames) { // IDCT const timeFrame = javascript_idct_512(frame); - // Apply window and overlap-add + // Apply synthesis window for overlap-add const frameStart = frameIdx * hopSize; for (let i = 0; i < dctSize; i++) { if (frameStart + i < audioLength) { |
