summaryrefslogtreecommitdiff
path: root/tools/spectral_editor
diff options
context:
space:
mode:
Diffstat (limited to 'tools/spectral_editor')
-rw-r--r--tools/spectral_editor/FEATURES.md151
-rw-r--r--tools/spectral_editor/dct.js101
-rw-r--r--tools/spectral_editor/script.js89
3 files changed, 264 insertions, 77 deletions
diff --git a/tools/spectral_editor/FEATURES.md b/tools/spectral_editor/FEATURES.md
new file mode 100644
index 0000000..6c36cc2
--- /dev/null
+++ b/tools/spectral_editor/FEATURES.md
@@ -0,0 +1,151 @@
+# Spectral Editor - Feature Roadmap
+
+This document tracks planned enhancements for the spectral editor.
+
+## Priority: High
+
+### A. Curve Translation (Shift+Click+Drag)
+**Description**: Shift+click on a control point + mouse-move should displace the whole curve at a time (translate all control points).
+
+**Implementation Notes**:
+- Detect shift key state during control point click
+- Store initial positions of all control points in the curve
+- Apply uniform translation delta to all points during drag
+- Maintain curve shape while moving
+
+**Complexity**: Medium
+**Estimated Effort**: 2-3 hours
+
+---
+
+### B. Viewport Zoom (Mouse Wheel)
+**Description**: Mouse-wheel should allow zooming in/out on the view for fine placement of curves.
+
+**Implementation Notes**:
+- Implement zoom scale factor (e.g., 0.5x to 4.0x)
+- Center zoom around mouse cursor position
+- Update rendering to use scaled coordinates
+- Add visual zoom indicator (e.g., "Zoom: 2.0x")
+- Consider pan functionality (drag with middle mouse or space+drag)
+
+**Complexity**: High (coordinate transformation, pan interaction)
+**Estimated Effort**: 6-8 hours
+
+---
+
+## Priority: Medium
+
+### C. Enhanced Sinusoid Pattern
+**Description**: The 'sinusoid' pattern is quite interesting and should have more variations.
+
+**Proposed Variations**:
+- **Asymmetric Decay**: Different decay rates above and below the curve center
+ - `decay_top` parameter (controls upper harmonics falloff)
+ - `decay_bottom` parameter (controls lower harmonics falloff)
+- **Temporal Modulation**: Per-frame amplitude/frequency modulation along timeline
+ - `amplitude_envelope` (fade in/out over time)
+ - `frequency_drift` (vibrato/wobble effect)
+ - `phase_offset` (shift pattern over time)
+- **Harmonic Series**: Option to generate harmonic overtones
+ - `num_harmonics` parameter
+ - `harmonic_decay` parameter
+
+**Implementation Notes**:
+- Extend `SinusoidProfile` class with additional parameters
+- Add UI controls for new parameters (sliders, dropdowns)
+- Render preview showing modulation over time
+
+**Complexity**: Medium-High
+**Estimated Effort**: 8-12 hours
+
+---
+
+### D. Per-Control-Point Modulation
+**Description**: Each control point should be assigned individually controllable volume, decay params, etc. for fine modulation along time.
+
+**Proposed Parameters (per control point)**:
+- `volume`: Local amplitude multiplier (0.0 - 2.0)
+- `decay`: Local decay rate override
+- `width`: Gaussian width override (for profile spreading)
+- `phase`: Phase offset for sinusoid patterns
+- `color`: Visual indicator for parameter variations
+
+**Implementation Notes**:
+- Extend control point data structure with parameter fields
+- Add per-point property panel (show on control point selection)
+- Render visual hints (color-coded points, size variations)
+- Interpolate parameters between control points for smooth transitions
+
+**Complexity**: High (UI/UX design, parameter interpolation)
+**Estimated Effort**: 10-15 hours
+
+---
+
+### E. Composable Profiles
+**Description**: Profiles should be composable along a curve (e.g., apply Gaussian curve to sinusoid pattern).
+
+**Proposed Syntax**:
+```cpp
+// Example: Gaussian-modulated sinusoid
+CompositeProfile {
+ base: SinusoidProfile { frequency: 100.0, decay: 0.5 },
+ envelope: GaussianProfile { center: 256, width: 50 }
+}
+```
+
+**Implementation Notes**:
+- Define profile composition operators:
+ - `multiply`: Envelope modulation (amplitude × profile)
+ - `add`: Additive blending (profile1 + profile2)
+ - `max`: Take maximum value at each bin
+- Add UI for profile layering (drag-and-drop profile stack)
+- Render composite preview with layer visualization
+
+**Complexity**: High (requires profile abstraction refactor)
+**Estimated Effort**: 12-16 hours
+
+---
+
+## Priority: Low (Polish)
+
+### F. Improved Parameter Sliders
+**Description**: Adjust slider ranges for better usability (Decay, Width, Frequency, etc.).
+
+**Issues to Address**:
+- Decay slider: Non-linear scaling (logarithmic?) for finer control at low values
+- Frequency slider: Snap to musical notes (optional A440-based grid)
+- Width slider: Preview visualization (show affected frequency range)
+- General: Add numeric input fields next to sliders for precise values
+
+**Implementation Notes**:
+- Implement logarithmic slider interpolation for decay/width
+- Add slider tick marks at useful intervals
+- Display current value and units (Hz, bins, dB, etc.)
+- Add reset-to-default buttons
+
+**Complexity**: Low-Medium
+**Estimated Effort**: 3-4 hours
+
+---
+
+## Future Ideas (Backlog)
+
+- **Undo/Redo System**: Track edit history for curve modifications
+- **Preset Library**: Save/load common curve patterns (kick drum, snare, bass, etc.)
+- **Curve Smoothing**: Apply smoothing filters to jittery control points
+- **Copy/Paste**: Duplicate curves or control point selections
+- **Multi-Selection**: Select and edit multiple control points simultaneously
+- **Grid Snapping**: Snap control points to frequency/time grid
+- **Export Options**: Export to different formats (JSON, binary, C++ code)
+
+---
+
+## Total Estimated Effort
+- **High Priority**: 8-11 hours
+- **Medium Priority**: 30-43 hours
+- **Low Priority**: 3-4 hours
+- **Grand Total**: 41-58 hours (roughly 1-1.5 weeks of focused work)
+
+---
+
+*Last Updated: February 6, 2026*
diff --git a/tools/spectral_editor/dct.js b/tools/spectral_editor/dct.js
index deff8a9..435a7e8 100644
--- a/tools/spectral_editor/dct.js
+++ b/tools/spectral_editor/dct.js
@@ -1,20 +1,10 @@
const dctSize = 512; // Default DCT size, read from header
// --- Utility Functions for Audio Processing ---
+// Fast O(N log N) IDCT using FFT
// JavaScript equivalent of C++ idct_512
function javascript_idct_512(input) {
- const output = new Float32Array(dctSize);
- const PI = Math.PI;
- const N = dctSize;
-
- for (let n = 0; n < N; ++n) {
- let sum = input[0] / 2.0;
- for (let k = 1; k < N; ++k) {
- sum += input[k] * Math.cos((PI / N) * k * (n + 0.5));
- }
- output[n] = sum * (2.0 / N);
- }
- return output;
+ return javascript_idct_512_fft(input);
}
// Hanning window for smooth audio transitions (JavaScript equivalent)
@@ -127,95 +117,90 @@ function fftInverse(real, imag, N) {
}
}
-// DCT-II via FFT using double-and-mirror method (matches C++ dct_fft)
-// This is a more robust algorithm that avoids reordering issues
+// DCT-II via FFT using reordering method (matches C++ dct_fft)
+// Reference: Numerical Recipes Chapter 12.3
function javascript_dct_fft(input, N) {
const PI = Math.PI;
- // Allocate arrays for 2N-point FFT
- const M = 2 * N;
- const real = new Float32Array(M);
- const imag = new Float32Array(M);
+ // Allocate arrays for N-point FFT
+ const real = new Float32Array(N);
+ const imag = new Float32Array(N);
- // Pack input: [x[0], x[1], ..., x[N-1], x[N-1], x[N-2], ..., x[1]]
- // This creates even symmetry for real-valued DCT
- for (let i = 0; i < N; i++) {
- real[i] = input[i];
- }
- for (let i = 0; i < N; i++) {
- real[N + i] = input[N - 1 - i];
+ // Reorder input: even indices first, then odd indices reversed
+ // [x[0], x[2], x[4], ...] followed by [x[N-1], x[N-3], x[N-5], ...]
+ for (let i = 0; i < N / 2; i++) {
+ real[i] = input[2 * i]; // Even indices: 0, 2, 4, ...
+ real[N - 1 - i] = input[2 * i + 1]; // Odd indices reversed: N-1, N-3, ...
}
// imag is already zeros (Float32Array default)
- // Apply 2N-point FFT
- fftForward(real, imag, M);
+ // Apply N-point FFT
+ fftForward(real, imag, N);
- // Extract DCT coefficients
+ // Extract DCT coefficients with phase correction
// DCT[k] = Re{FFT[k] * exp(-j*pi*k/(2*N))} * normalization
- // Note: Need to divide by 2 because we doubled the signal length
const output = new Float32Array(N);
for (let k = 0; k < N; k++) {
const angle = -PI * k / (2.0 * N);
const wr = Math.cos(angle);
const wi = Math.sin(angle);
- // Complex multiplication: (real + j*imag) * (wr + j*wi)
+ // Complex multiplication: (real[k] + j*imag[k]) * (wr + j*wi)
// Real part: real*wr - imag*wi
const dct_value = real[k] * wr - imag[k] * wi;
- // Apply DCT-II normalization (divide by 2 for double-length FFT)
+ // Apply DCT-II normalization
if (k === 0) {
- output[k] = dct_value * Math.sqrt(1.0 / N) / 2.0;
+ output[k] = dct_value * Math.sqrt(1.0 / N);
} else {
- output[k] = dct_value * Math.sqrt(2.0 / N) / 2.0;
+ output[k] = dct_value * Math.sqrt(2.0 / N);
}
}
return output;
}
-// IDCT (Inverse DCT-II) via FFT using double-and-mirror method (matches C++ idct_fft)
+// IDCT (DCT-III) via FFT using reordering method (matches C++ idct_fft)
+// Reference: Numerical Recipes Chapter 12.3
function javascript_idct_fft(input, N) {
const PI = Math.PI;
- // Allocate arrays for 2N-point FFT
- const M = 2 * N;
- const real = new Float32Array(M);
- const imag = new Float32Array(M);
+ // Allocate arrays for N-point FFT
+ const real = new Float32Array(N);
+ const imag = new Float32Array(N);
- // Prepare FFT input from DCT coefficients
- // IDCT = Re{IFFT[DCT * exp(j*pi*k/(2*N))]} * 2
+ // Prepare FFT input with inverse phase correction
+ // FFT[k] = DCT[k] * exp(+j*pi*k/(2*N)) / normalization
+ // Note: DCT-III needs factor of 2 for AC terms
for (let k = 0; k < N; k++) {
- const angle = PI * k / (2.0 * N); // Positive for inverse
+ const angle = PI * k / (2.0 * N); // Positive angle for inverse
const wr = Math.cos(angle);
const wi = Math.sin(angle);
- // Apply inverse normalization
- let scaled_input;
+ // Inverse of DCT-II normalization with correct DCT-III scaling
+ let scaled;
if (k === 0) {
- scaled_input = input[k] * Math.sqrt(N) * 2.0;
+ scaled = input[k] / Math.sqrt(1.0 / N);
} else {
- scaled_input = input[k] * Math.sqrt(N / 2.0) * 2.0;
+ // DCT-III needs factor of 2 for AC terms
+ scaled = input[k] / Math.sqrt(2.0 / N) * 2.0;
}
- // Complex multiplication: DCT[k] * exp(j*theta)
- real[k] = scaled_input * wr;
- imag[k] = scaled_input * wi;
- }
-
- // Fill second half with conjugate symmetry (for real output)
- for (let k = 1; k < N; k++) {
- real[M - k] = real[k];
- imag[M - k] = -imag[k];
+ // Complex multiplication: scaled * (wr + j*wi)
+ real[k] = scaled * wr;
+ imag[k] = scaled * wi;
}
// Apply inverse FFT
- fftInverse(real, imag, M);
+ fftInverse(real, imag, N);
- // Extract first N samples (real part only, imag should be ~0)
+ // Unpack: reverse the reordering from DCT
+ // Even output indices come from first half of FFT output
+ // Odd output indices come from second half (reversed)
const output = new Float32Array(N);
- for (let i = 0; i < N; i++) {
- output[i] = real[i];
+ for (let i = 0; i < N / 2; i++) {
+ output[2 * i] = real[i]; // Even positions
+ output[2 * i + 1] = real[N - 1 - i]; // Odd positions (reversed)
}
return output;
diff --git a/tools/spectral_editor/script.js b/tools/spectral_editor/script.js
index 48b0661..7c424f9 100644
--- a/tools/spectral_editor/script.js
+++ b/tools/spectral_editor/script.js
@@ -30,6 +30,8 @@ const state = {
canvasHeight: 0,
pixelsPerFrame: 2.0, // Zoom level (pixels per frame)
pixelsPerBin: 1.0, // Vertical scale (pixels per frequency bin)
+ viewportOffsetX: 0, // Horizontal pan offset (pixels)
+ viewportOffsetY: 0, // Vertical pan offset (pixels)
// Audio playback
audioContext: null,
@@ -94,6 +96,9 @@ function initCanvas() {
// Mouse hover handlers (for crosshair)
canvas.addEventListener('mousemove', onCanvasHover);
canvas.addEventListener('mouseleave', onCanvasLeave);
+
+ // Mouse wheel: zoom (with Ctrl/Cmd) or pan
+ canvas.addEventListener('wheel', onCanvasWheel, { passive: false });
}
function initUI() {
@@ -378,19 +383,9 @@ function audioToSpectrogram(audioData) {
}
// Forward DCT (not in dct.js, add here)
+// Fast O(N log N) DCT using FFT (delegates to dct.js implementation)
function javascript_dct_512(input) {
- const output = new Float32Array(DCT_SIZE);
- const PI = Math.PI;
- const N = DCT_SIZE;
-
- for (let k = 0; k < N; k++) {
- let sum = 0;
- for (let n = 0; n < N; n++) {
- sum += input[n] * Math.cos((PI / N) * k * (n + 0.5));
- }
- output[k] = sum * (k === 0 ? Math.sqrt(1 / N) : Math.sqrt(2 / N));
- }
- return output;
+ return javascript_dct_512_fft(input);
}
function onReferenceLoaded(fileName) {
@@ -414,6 +409,9 @@ function onReferenceLoaded(fileName) {
// Adjust zoom to fit
state.pixelsPerFrame = Math.max(1.0, state.canvasWidth / state.referenceNumFrames);
+ state.pixelsPerBin = 1.0; // Reset vertical scale
+ state.viewportOffsetX = 0; // Reset pan
+ state.viewportOffsetY = 0;
updateCurveUI();
updateUndoRedoButtons();
@@ -859,12 +857,62 @@ function onCanvasLeave(e) {
render();
}
+function onCanvasWheel(e) {
+ e.preventDefault();
+
+ const canvas = e.target;
+ const rect = canvas.getBoundingClientRect();
+ const mouseX = e.clientX - rect.left;
+ const mouseY = e.clientY - rect.top;
+
+ // Zoom mode: Ctrl/Cmd + wheel
+ if (e.ctrlKey || e.metaKey) {
+ // Calculate frame under cursor BEFORE zoom
+ const frameUnderCursor = (mouseX + state.viewportOffsetX) / state.pixelsPerFrame;
+
+ // Calculate new zoom level (horizontal only - logarithmic frequency axis doesn't zoom)
+ const zoomFactor = e.deltaY > 0 ? 0.9 : 1.1; // Wheel down = zoom out, wheel up = zoom in
+ state.pixelsPerFrame = Math.max(0.5, Math.min(20.0, state.pixelsPerFrame * zoomFactor));
+
+ // Adjust viewport offset so frame under cursor stays in same screen position
+ // After zoom: new_offset = frame * newPixelsPerFrame - mouseX
+ state.viewportOffsetX = frameUnderCursor * state.pixelsPerFrame - mouseX;
+
+ // Clamp viewport offset to valid range
+ const maxOffsetX = Math.max(0, state.referenceNumFrames * state.pixelsPerFrame - state.canvasWidth);
+ state.viewportOffsetX = Math.max(0, Math.min(maxOffsetX, state.viewportOffsetX));
+
+ render();
+ return;
+ }
+
+ // Pan mode: Shift + wheel (horizontal/vertical pan)
+ if (e.shiftKey) {
+ state.viewportOffsetX += e.deltaY;
+ const maxOffsetX = Math.max(0, state.referenceNumFrames * state.pixelsPerFrame - state.canvasWidth);
+ state.viewportOffsetX = Math.max(0, Math.min(maxOffsetX, state.viewportOffsetX));
+ render();
+ return;
+ }
+
+ // Normal mode: pan vertically (disabled for logarithmic frequency axis)
+ // Note: With logarithmic frequency scale, vertical pan doesn't make sense
+ // because the frequency range (FREQ_MIN to FREQ_MAX) is always scaled to fit canvas height.
+ // Vertical pan only works in linear frequency mode.
+ if (!USE_LOG_SCALE) {
+ state.viewportOffsetY += e.deltaY;
+ const maxOffsetY = Math.max(0, DCT_SIZE * state.pixelsPerBin - state.canvasHeight);
+ state.viewportOffsetY = Math.max(0, Math.min(maxOffsetY, state.viewportOffsetY));
+ render();
+ }
+}
+
// ============================================================================
// Coordinate Conversion
// ============================================================================
function screenToSpectrogram(screenX, screenY) {
- const frame = Math.round(screenX / state.pixelsPerFrame);
+ const frame = Math.round((screenX + state.viewportOffsetX) / state.pixelsPerFrame);
let freqHz;
if (USE_LOG_SCALE) {
@@ -891,7 +939,7 @@ function screenToSpectrogram(screenX, screenY) {
}
function spectrogramToScreen(frame, freqHz) {
- const x = frame * state.pixelsPerFrame;
+ const x = frame * state.pixelsPerFrame - state.viewportOffsetX;
let y;
if (USE_LOG_SCALE) {
@@ -901,11 +949,11 @@ function spectrogramToScreen(frame, freqHz) {
const clampedFreq = Math.max(FREQ_MIN, Math.min(FREQ_MAX, freqHz));
const logFreq = Math.log10(clampedFreq);
const normalizedY = (logFreq - logMin) / (logMax - logMin);
- y = state.canvasHeight * (1.0 - normalizedY); // Flip Y back to screen coords
+ y = state.canvasHeight * (1.0 - normalizedY) - state.viewportOffsetY; // Flip Y back to screen coords
} else {
// Linear frequency mapping (old behavior)
const bin = (freqHz / (SAMPLE_RATE / 2)) * state.referenceDctSize;
- y = state.canvasHeight - (bin * state.pixelsPerBin);
+ y = state.canvasHeight - (bin * state.pixelsPerBin) - state.viewportOffsetY;
}
return {x, y};
@@ -953,7 +1001,10 @@ function render() {
function drawPlayhead(ctx) {
if (!state.isPlaying || state.playbackCurrentFrame < 0) return;
- const x = state.playbackCurrentFrame * state.pixelsPerFrame;
+ const x = state.playbackCurrentFrame * state.pixelsPerFrame - state.viewportOffsetX;
+
+ // Only draw if playhead is visible in viewport
+ if (x < 0 || x > state.canvasWidth) return;
// Draw vertical line
ctx.strokeStyle = '#ff3333'; // Bright red
@@ -1553,7 +1604,7 @@ function spectrogramToAudio(spectrogram, dctSize, numFrames) {
const window = hanningWindowArray;
for (let frameIdx = 0; frameIdx < numFrames; frameIdx++) {
- // Extract frame
+ // Extract frame (no windowing - window is only for analysis, not synthesis)
const frame = new Float32Array(dctSize);
for (let b = 0; b < dctSize; b++) {
frame[b] = spectrogram[frameIdx * dctSize + b];
@@ -1562,7 +1613,7 @@ function spectrogramToAudio(spectrogram, dctSize, numFrames) {
// IDCT
const timeFrame = javascript_idct_512(frame);
- // Apply window and overlap-add
+ // Apply synthesis window for overlap-add
const frameStart = frameIdx * hopSize;
for (let i = 0; i < dctSize; i++) {
if (frameStart + i < audioLength) {