summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/editor/dct.js31
-rw-r--r--tools/editor/index.html36
-rw-r--r--tools/editor/script.js648
-rw-r--r--tools/editor/sdf.js39
-rw-r--r--tools/editor/style.css79
-rw-r--r--tools/specplay.cc212
-rw-r--r--tools/specplay_README.md164
-rw-r--r--tools/spectool.cc121
-rw-r--r--tools/spectral_editor/FEATURES.md151
-rw-r--r--tools/spectral_editor/dct.js101
-rw-r--r--tools/spectral_editor/script.js89
-rw-r--r--tools/timeline_editor/index.html41
12 files changed, 786 insertions, 926 deletions
diff --git a/tools/editor/dct.js b/tools/editor/dct.js
deleted file mode 100644
index e48ce2b..0000000
--- a/tools/editor/dct.js
+++ /dev/null
@@ -1,31 +0,0 @@
-const dctSize = 512; // Default DCT size, read from header
-
-// --- Utility Functions for Audio Processing ---
-// JavaScript equivalent of C++ idct_512
-function javascript_idct_512(input) {
- const output = new Float32Array(dctSize);
- const PI = Math.PI;
- const N = dctSize;
-
- for (let n = 0; n < N; ++n) {
- let sum = input[0] / 2.0;
- for (let k = 1; k < N; ++k) {
- sum += input[k] * Math.cos((PI / N) * k * (n + 0.5));
- }
- output[n] = sum * (2.0 / N);
- }
- return output;
-}
-
-// Hanning window for smooth audio transitions (JavaScript equivalent)
-function hanningWindow(size) {
- const window = new Float32Array(size);
- const PI = Math.PI;
- for (let i = 0; i < size; i++) {
- window[i] = 0.5 * (1 - Math.cos((2 * PI * i) / (size - 1)));
- }
- return window;
-}
-
-const hanningWindowArray = hanningWindow(dctSize); // Pre-calculate window
-
diff --git a/tools/editor/index.html b/tools/editor/index.html
deleted file mode 100644
index 82a11ce..0000000
--- a/tools/editor/index.html
+++ /dev/null
@@ -1,36 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
- <meta charset="UTF-8">
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
- <title>Spectrogram Editor</title>
- <link rel="stylesheet" href="style.css">
-</head>
-<body>
- <h1>Spectrogram Editor</h1>
-
- <input type="file" id="specFileInput" accept=".spec">
- <label for="specFileInput">Load SPEC File</label>
-
- <div id="editorContainer">
- <canvas id="spectrogramCanvas"></canvas>
- <div id="controls">
- <h2>Tools</h2>
- <button id="lineTool">Line</button>
- <button id="ellipseTool">Ellipse</button>
- <button id="noiseTool">Noise</button>
- <button id="undoButton">Undo</button>
- <button id="redoButton">Redo</button>
- <hr>
- <h2>Playback</h2>
- <button id="listenOriginalButton">Listen Original</button>
- <button id="listenGeneratedButton">Listen Generated</button>
- <!-- Add more tool controls later -->
- </div>
- </div>
-
- <script src="sdf.js"></script>
- <script src="dct.js"></script>
- <script src="script.js"></script>
-</body>
-</html>
diff --git a/tools/editor/script.js b/tools/editor/script.js
deleted file mode 100644
index abfd4f4..0000000
--- a/tools/editor/script.js
+++ /dev/null
@@ -1,648 +0,0 @@
-// This is the core JavaScript for the Spectrogram Editor.
-// It handles file loading (.spec), visualization, tool interaction, and saving.
-
-// --- Global Variables ---
-let currentSpecData = null; // Stores the currently displayed/edited spectrogram data
-let originalSpecData = null; // Stores the pristine, initially loaded spectrogram data
-
-let undoStack = [];
-let redoStack = [];
-const MAX_HISTORY_SIZE = 50;
-
-let activeTool = null; // 'line', 'ellipse', 'noise', etc.
-let isDrawing = false;
-let startX, startY; // For tracking mouse down position
-
-let shapes = []; // Array to store all drawn shapes (lines, ellipses, etc.)
-
-// Web Audio Context
-const audioContext = new (window.AudioContext || window.webkitAudioContext)();
-
-// Audio Constants (should match C++ side)
-const SAMPLE_RATE = 32000;
-const MAX_FREQ = SAMPLE_RATE / 2; // Nyquist frequency
-const MIN_FREQ = 20; // Lower bound for log scale visualization
-
-const SDF_FALLOFF_FACTOR = 10.0; // Adjust this value to control the softness of SDF edges.
-
-// --- Button Element Declarations ---
-const specFileInput = document.getElementById('specFileInput');
-const lineToolButton = document.getElementById('lineTool');
-const ellipseToolButton = document.getElementById('ellipseTool');
-const noiseToolButton = document.getElementById('noiseTool');
-const undoButton = document.getElementById('undoButton');
-const redoButton = document.getElementById('redoButton');
-const listenOriginalButton = document.getElementById('listenOriginalButton');
-const listenGeneratedButton = document.getElementById('listenGeneratedButton');
-
-// --- Event Listeners ---
-specFileInput.addEventListener('change', handleFileSelect);
-lineToolButton.addEventListener('click', () => { activeTool = 'line'; console.log('Line tool selected'); });
-ellipseToolButton.addEventListener('click', () => { activeTool = 'ellipse'; console.log('Ellipse tool selected'); });
-noiseToolButton.addEventListener('click', () => { activeTool = 'noise'; console.log('Noise tool selected'); });
-undoButton.addEventListener('click', handleUndo);
-redoButton.addEventListener('click', handleRedo);
-listenOriginalButton.addEventListener('click', () => {
- if (originalSpecData) {
- playSpectrogramData(originalSpecData);
- } else {
- alert("No original SPEC data loaded.");
- }
-});
-listenGeneratedButton.addEventListener('click', () => {
- if (currentSpecData) {
- redrawCanvas(); // Ensure currentSpecData reflects all shapes before playing
- playSpectrogramData(currentSpecData);
- } else {
- alert("No generated SPEC data to play.");
- }
-});
-
-
-// --- Utility to map canvas coords to spectrogram bins/frames (LOG SCALE) ---
-// Maps a linear frequency bin index to its corresponding frequency in Hz
-function binIndexToFreq(binIndex) {
- return (binIndex / (dctSize / 2)) * MAX_FREQ;
-}
-
-// Maps a frequency in Hz to its corresponding linear bin index
-function freqToBinIndex(freq) {
- return Math.floor((freq / MAX_FREQ) * (dctSize / 2));
-}
-
-// Maps a frequency (Hz) to its corresponding log-scaled bin index
-function freqToBinIndexLog(freq) {
- if (freq < MIN_FREQ) freq = MIN_FREQ; // Clamp minimum frequency
- const logMin = Math.log(MIN_FREQ);
- const logMax = Math.log(MAX_FREQ);
- const logFreq = Math.log(freq);
- const normalizedLog = (logFreq - logMin) / (logMax - logMin);
- return Math.floor(normalizedLog * dctSize);
-}
-
-// Maps a log-scaled bin index to its corresponding frequency in Hz
-function binIndexToFreqLog(binIndex) {
- const normalizedLog = binIndex / dctSize;
- const logMin = Math.log(MIN_FREQ);
- const logMax = Math.log(MAX_FREQ);
- const logFreq = normalizedLog * (logMax - logMin) + logMin;
- return Math.exp(logFreq);
-}
-
-// Converts a frequency (Hz) to a Y-coordinate on the canvas (log scale)
-function freqToCanvasYLog(freq, canvasHeight) {
- if (freq < MIN_FREQ) freq = MIN_FREQ; // Clamp minimum frequency
- const logMin = Math.log(MIN_FREQ);
- const logMax = Math.log(MAX_FREQ);
- const logFreq = Math.log(freq);
- const normalizedLog = (logFreq - logMin) / (logMax - logMin);
- return canvasHeight * (1 - normalizedLog); // Y-axis is inverted
-}
-
-// Converts a Y-coordinate on the canvas to a frequency (Hz) (log scale)
-function canvasYToFreqLog(canvasY, canvasHeight) {
- const normalizedLog = 1 - (canvasY / canvasHeight);
- const logMin = Math.log(MIN_FREQ);
- const logMax = Math.log(MAX_FREQ);
- const logFreq = normalizedLog * (logMax - logMin) + logMin;
- return Math.exp(logFreq);
-}
-
-// Converts canvas Y-coordinate to log-scaled bin index
-function canvasYToBinIndexLog(canvasY, specData) {
- const freq = canvasYToFreqLog(canvasY, canvas.height);
- return freqToBinIndex(freq); // Use linear bin index from calculated log freq
-}
-
-// Converts log-scaled bin index to canvas Y-coordinate
-function binIndexToCanvasYLog(binIndex, specData) {
- const freq = binIndexToFreq(binIndex);
- return freqToCanvasYLog(freq, canvas.height);
-}
-
-// Helper to get frequency delta from canvas delta (for ellipse radius in freq)
-function canvasDeltaYToFreqDeltaLog(canvasDeltaY, canvasHeight) {
- // This is an approximation as delta in log scale is not linear
- // For small deltas around a center, it can be approximated
- const centerCanvasY = canvasHeight / 2;
- const freqAtCenter = canvasYToFreqLog(centerCanvasY, canvasHeight);
- const freqAtCenterPlusDelta = canvasYToFreqLog(centerCanvasY - canvasDeltaY, canvasHeight);
- return Math.abs(freqAtCenterPlusDelta - freqAtCenter);
-}
-
-// Initial setup for canvas size (can be updated on window resize)
-window.addEventListener('resize', () => {
- if (originalSpecData) {
- canvas.width = window.innerWidth * 0.7;
- canvas.height = 400; // Fixed height
- redrawCanvas();
- }
-});
-
-// Initial call to set button states
-updateUndoRedoButtons();
-
-// --- File Handling Functions ---
-async function handleFileSelect(event) {
- const file = event.target.files[0];
- if (!file) {
- return;
- }
-
- try {
- const buffer = await file.arrayBuffer();
- const dataView = new DataView(buffer);
-
- // Parse SPEC header
- const header = {
- magic: String.fromCharCode(...new Uint8Array(buffer.slice(0, 4))),
- version: dataView.getInt32(4, true),
- dct_size: dataView.getInt32(8, true),
- num_frames: dataView.getInt32(12, true)
- };
-
- if (header.magic !== "SPEC" || header.version !== 1) {
- console.error("Invalid SPEC file format.");
- alert("Invalid SPEC file format. Please load a valid .spec file.");
- return;
- }
-
- if (dctSize != header.dct_size) {
- alert("Invalid dctSize in SPEC file");
- return;
- }
- const dataStart = 16;
- const numBytes = header.num_frames * header.dct_size * Float32Array.BYTES_PER_ELEMENT;
- const spectralDataFloat = new Float32Array(buffer, dataStart, header.num_frames * header.dct_size);
-
- originalSpecData = { header: header, data: new Float32Array(spectralDataFloat) }; // Store pristine copy
- currentSpecData = { header: header, data: new Float32Array(spectralDataFloat) }; // Editable copy
-
- shapes = []; // Clear shapes on new file load
- undoStack = []; // Clear undo history
- redoStack = []; // Clear redo history
-
- console.log("Loaded SPEC file:", header);
- redrawCanvas(); // Redraw with new data
-
- } catch (error) {
- console.error("Error loading SPEC file:", error);
- alert("Failed to load SPEC file. Check console for details.");
- }
-}
-
-// --- Spectrogram Visualization ---
-const canvas = document.getElementById('spectrogramCanvas');
-const ctx = canvas.getContext('2d');
-
-// Add canvas event listeners
-canvas.addEventListener('mousedown', handleMouseDown);
-canvas.addEventListener('mousemove', handleMouseMove);
-canvas.addEventListener('mouseup', handleMouseUp);
-canvas.addEventListener('mouseout', handleMouseUp); // Treat mouse out as mouse up
-
-// Function to get a color based on intensity (0 to 1)
-function getColorForIntensity(intensity) {
- // Example: Blue to white/yellow gradient
- const log_intensity = Math.log(1. + intensity) / Math.log(2.);
- const h = (1 - log_intensity) * 240; // Hue from blue (240) to red (0), inverse for intensity
- const s = 60.; // Saturation
- const l = log_intensity * 60 + 30; // Lightness from 30 to 90
- return `hsl(${h}, ${s}%, ${l}%)`;
-}
-
-function drawSpectrogram(specData) {
- const width = canvas.width;
- const height = canvas.height;
-
- ctx.clearRect(0, 0, width, height);
- ctx.fillStyle = '#ffffff';
- ctx.fillRect(0, 0, width, height);
-
- if (!specData || !specData.data || specData.header.num_frames === 0 || specData.data.length === 0) {
- console.warn("No spectrogram data or invalid header/data to draw.");
- return;
- }
-
- const numFrames = specData.header.num_frames;
- const frameWidth = width / numFrames; // Width of each time frame
-
- // Draw each frame's spectral data with log frequency scale
- for (let frameIndex = 0; frameIndex < numFrames; frameIndex++) {
- const frameDataStart = frameIndex * dctSize;
- const xPos = frameIndex * frameWidth;
-
- // To draw with log scale, we iterate over canvas y-coordinates
- // and map them back to frequency bins
- for (let y = 0; y < height; y++) {
- const binIndex = canvasYToBinIndexLog(y, specData);
- if (binIndex < 0 || binIndex >= dctSize) continue; // Out of bounds
-
- const value = specData.data[frameDataStart + binIndex];
- const intensity = Math.min(1, Math.abs(value) / 1.0); // Assuming values are normalized to [-1, 1]
-
- ctx.fillStyle = getColorForIntensity(intensity);
- ctx.fillRect(xPos, height - y - 1, frameWidth, 1); // Draw a 1-pixel height line for each y
- }
- }
-
- // Draw active shapes on top (previews for current drawing tool)
- shapes.forEach(shape => {
- drawShape(shape);
- });
-}
-
-function drawShape(shape) {
- // This draws the final, persistent shape. Preview is drawn in handleMouseMove.
- ctx.strokeStyle = shape.color || 'red';
- ctx.lineWidth = shape.width || 2;
-
- switch (shape.type) {
- case 'line':
- ctx.beginPath();
- ctx.moveTo(shape.x1, shape.y1);
- ctx.lineTo(shape.x2, shape.y2);
- ctx.stroke();
- break;
- case 'ellipse':
- ctx.beginPath();
- ctx.ellipse(shape.cx, shape.cy, shape.rx, shape.ry, 0, 0, 2 * Math.PI);
- ctx.stroke();
- break;
- case 'noise_rect': // Noise is visualized as a rectangle
- ctx.fillStyle = 'rgba(0, 0, 255, 0.2)';
- ctx.fillRect(shape.x, shape.y, shape.width, shape.height);
- ctx.strokeStyle = 'blue';
- ctx.strokeRect(shape.x, shape.y, shape.width, shape.height);
- break;
- }
-}
-
-// --- Mouse Event Handlers ---
-function getMousePos(event) {
- const rect = canvas.getBoundingClientRect();
- return {
- x: event.clientX - rect.left,
- y: event.clientY - rect.top
- };
-}
-
-function handleMouseDown(event) {
- if (!activeTool || !currentSpecData) return;
- isDrawing = true;
- const pos = getMousePos(event);
- startX = pos.x;
- startY = pos.y;
-}
-
-function handleMouseMove(event) {
- if (!isDrawing || !activeTool) return;
- const pos = getMousePos(event);
-
- redrawCanvas(); // Clear and redraw persistent state
-
- ctx.strokeStyle = 'rgba(0, 0, 0, 0.5)'; // Preview color
- ctx.lineWidth = 1;
- ctx.setLineDash([5, 5]); // Dashed line for preview
-
- switch (activeTool) {
- case 'line':
- ctx.beginPath();
- ctx.moveTo(startX, startY);
- ctx.lineTo(pos.x, pos.y);
- ctx.stroke();
- break;
- case 'ellipse':
- // Draw preview ellipse based on start and current pos (bounding box)
- const rx = Math.abs(pos.x - startX) / 2;
- const ry = Math.abs(pos.y - startY) / 2;
- const cx = startX + (pos.x - startX) / 2;
- const cy = startY + (pos.y - startY) / 2;
- if (rx > 0 && ry > 0) {
- ctx.beginPath();
- ctx.ellipse(cx, cy, rx, ry, 0, 0, 2 * Math.PI);
- ctx.stroke();
- }
- break;
- case 'noise':
- // Draw preview rectangle for noise area
- const rectX = Math.min(startX, pos.x);
- const rectY = Math.min(startY, pos.y);
- const rectW = Math.abs(pos.x - startX);
- const rectH = Math.abs(pos.y - startY);
- ctx.strokeRect(rectX, rectY, rectW, rectH);
- break;
- }
- ctx.setLineDash([]); // Reset line dash
-
- // debug the mouse position by draw a white square
- ctx.fillStyle = '#fff';
- ctx.fillRect(pos.x - 10, pos.y - 10, 20, 20);
-}
-
-function handleMouseUp(event) {
- if (!isDrawing || !activeTool || !currentSpecData) return;
- isDrawing = false;
- const endPos = getMousePos(event);
-
- let newShape = null;
-
- switch (activeTool) {
- case 'line': {
- const startCoords = canvasToSpectrogramCoords(startX, startY, currentSpecData);
- const endCoords = canvasToSpectrogramCoords(endPos.x, endPos.y, currentSpecData);
-
- newShape = {
- type: 'line',
- // Canvas coordinates for drawing visual representation (unchanged)
- x1: startX, y1: startY,
- x2: endPos.x, y2: endPos.y,
- // World coordinates for SDF calculations (frame and log-scaled frequency)
- frame1_world: startCoords.frame,
- freq1_world: binIndexToFreqLog(startCoords.bin),
- frame2_world: endCoords.frame,
- freq2_world: binIndexToFreqLog(endCoords.bin),
- amplitude: 0.5, // Default amplitude
- width: 2, // Visual width in canvas pixels, not directly used by SDF, but kept for drawing
- color: 'red',
- falloff: SDF_FALLOFF_FACTOR, // SDF falloff factor
- };
- break;
- }
- case 'ellipse': {
- const rx = Math.abs(endPos.x - startX) / 2;
- const ry = Math.abs(endPos.y - startY) / 2;
- const cx = startX + (pos.x - startX) / 2;
- const cy = startY + (pos.y - startY) / 2;
-
- const centerCoords = canvasToSpectrogramCoords(cx, cy, currentSpecData);
- const halfWidthFrames = Math.floor((rx / canvas.width) * currentSpecData.header.num_frames);
-
- const startFreq = canvasYToFreqLog(startY, canvas.height);
- const endFreq = canvasYToFreqLog(endPos.y, canvas.height);
- const centerFreq = (startFreq + endFreq) / 2;
- const halfHeightFreq = Math.abs(startFreq - endFreq) / 2;
-
-
- newShape = {
- type: 'ellipse',
- // Canvas coordinates for drawing visual representation (unchanged)
- cx: cx, cy: cy,
- rx: rx, ry: ry,
- // World coordinates for SDF calculations
- center_frame_world: centerCoords.frame,
- center_freq_world: centerFreq,
- radius_frames_world: halfWidthFrames,
- radius_freq_world: halfHeightFreq,
- amplitude: 0.5,
- color: 'green',
- falloff: SDF_FALLOFF_FACTOR,
- };
- break;
- }
- case 'noise': {
- const rectX = Math.min(startX, endPos.x);
- const rectY = Math.min(startY, endPos.y);
- const rectW = Math.abs(endPos.x - startX);
- const rectH = Math.abs(endPos.y - startY);
-
- const startCoords = canvasToSpectrogramCoords(rectX, rectY, currentSpecData);
- const endCoords = canvasToSpectrogramCoords(rectX + rectW, rectY + rectH, currentSpecData);
-
- const centerFrame = Math.floor((startCoords.frame + endCoords.frame) / 2);
- const centerFreq = (binIndexToFreqLog(startCoords.bin) + binIndexToFreqLog(endCoords.bin)) / 2;
- const halfExtentFrames = Math.floor(Math.abs(endCoords.frame - startCoords.frame) / 2);
- const halfExtentFreq = Math.abs(binIndexToFreqLog(endCoords.bin) - binIndexToFreqLog(startCoords.bin)) / 2;
-
- newShape = {
- type: 'noise_rect',
- // Canvas coordinates for drawing visual representation (unchanged)
- x: rectX, y: rectY,
- width: rectW, height: rectH,
- // World coordinates for SDF calculations
- center_frame_world: centerFrame,
- center_freq_world: centerFreq,
- half_extent_frames_world: halfExtentFrames,
- half_extent_freq_world: halfExtentFreq,
- amplitude: 0.3, // Default noise amplitude
- density: 0.5, // Default noise density
- color: 'blue',
- falloff: 0.0, // No falloff for pure noise inside rect
- };
- break;
- }
- }
-
- if (newShape) {
- // Capture the state *before* applying the new shape for undo
- const previousDataSnapshot = new Float32Array(currentSpecData.data); // Copy of actual data
- const previousShapesSnapshot = shapes.map(s => ({ ...s })); // Deep copy shapes array
-
- applyShapeToSpectrogram(newShape, currentSpecData); // Modify currentSpecData directly
- shapes.push(newShape);
- addAction({
- type: 'add_shape',
- shape: newShape,
- undo: () => {
- // To undo, restore previous shapes and previous data
- shapes = previousShapesSnapshot;
- currentSpecData.data = previousDataSnapshot;
- },
- redo: () => {
- // To redo, add the shape back and apply it to current data
- shapes.push(newShape);
- applyShapeToSpectrogram(newShape, currentSpecData);
- }
- });
- }
- redrawCanvas(); // Final redraw after action
- updateUndoRedoButtons();
-}
-
-// --- Spectrogram Data Manipulation ---
-function applyShapeToSpectrogram(shape, targetSpecData) {
- if (!targetSpecData || !targetSpecData.data || targetSpecData.header.num_frames === 0) return;
-
- const numFrames = targetSpecData.header.num_frames;
-
- // Determine a bounding box for optimization (iterate only relevant cells)
- let minFrame = 0, maxFrame = numFrames - 1;
- let minBin = 0, maxBin = dctSize - 1;
-
- // Calculate tighter bounding boxes for each shape type
- switch (shape.type) {
- case 'line':
- minFrame = Math.min(shape.frame1_world, shape.frame2_world) - Math.ceil(shape.width / 2);
- maxFrame = Math.max(shape.frame1_world, shape.frame2_world) + Math.ceil(shape.width / 2);
- // For frequency, approximate by visual width or a fixed range if needed
- minBin = freqToBinIndex(Math.min(shape.freq1_world, shape.freq2_world)) - Math.ceil(shape.width / 2);
- maxBin = freqToBinIndex(Math.max(shape.freq1_world, shape.freq2_world)) + Math.ceil(shape.width / 2);
- break;
- case 'ellipse':
- minFrame = shape.center_frame_world - shape.radius_frames_world - 1;
- maxFrame = shape.center_frame_world + shape.radius_frames_world + 1;
- minBin = freqToBinIndex(shape.center_freq_world - shape.radius_freq_world) - 1; // Approx bin range from world freq
- maxBin = freqToBinIndex(shape.center_freq_world + shape.radius_freq_world) + 1; // Approx bin range from world freq
- break;
- case 'noise_rect':
- minFrame = shape.center_frame_world - shape.half_extent_frames_world - 1;
- maxFrame = shape.center_frame_world + shape.half_extent_frames_world + 1;
- minBin = freqToBinIndex(shape.center_freq_world - shape.half_extent_freq_world) - 1; // Approx bin range from world freq
- maxBin = freqToBinIndex(shape.center_freq_world + shape.half_extent_freq_world) + 1; // Approx bin range from world freq
- break;
- }
-
- minFrame = Math.max(0, minFrame);
- maxFrame = Math.min(targetSpecData.header.num_frames - 1, maxFrame); // Use targetSpecData.header.num_frames
- minBin = Math.max(0, minBin);
- maxBin = Math.min(dctSize - 1, maxBin);
-
- for (let f = minFrame; f <= maxFrame; f++) {
- for (let b = minBin; b <= maxBin; b++) {
- const p_world = vec2(f, binIndexToFreqLog(b));
- let distance = Infinity;
-
- switch (shape.type) {
- case 'line':
- const a_line = vec2(shape.frame1_world, shape.freq1_world);
- const b_line = vec2(shape.frame2_world, shape.freq2_world);
- distance = sdSegment(p_world, a_line, b_line);
- break;
- case 'ellipse':
- const center_ellipse = vec2(shape.center_frame_world, shape.center_freq_world);
- const r_ellipse = vec2(shape.radius_frames_world, shape.radius_freq_world);
- distance = sdEllipse(sub(p_world, center_ellipse), r_ellipse);
- break;
- case 'noise_rect':
- const center_box = vec2(shape.center_frame_world, shape.center_freq_world);
- const r_box = vec2(shape.half_extent_frames_world, shape.half_extent_freq_world);
- distance = sdBox(sub(p_world, center_box), r_box);
- if (distance <= 0 && Math.random() < shape.density) { // Only add noise inside the box with density
- targetSpecData.data[f * dctSize + b] += (Math.random() * 2 - 1) * shape.amplitude;
- }
- break;
- }
-
- if (shape.type !== 'noise_rect') { // Noise is handled differently for amplitude
- const attenuation = Math.exp(-distance * distance * shape.falloff);
- targetSpecData.data[f * dctSize + b] += shape.amplitude * attenuation;
- }
-
- // Clamp final value
- targetSpecData.data[f * dctSize + b] = Math.max(-1, Math.min(1, targetSpecData.data[f * dctSize + b]));
- }
- }
-}
-
-// --- Undo/Redo Logic ---
-function addAction(action) {
- undoStack.push(action);
- if (undoStack.length > MAX_HISTORY_SIZE) {
- undoStack.shift();
- }
- redoStack = [];
- updateUndoRedoButtons();
-}
-
-function handleUndo() {
- if (undoStack.length === 0) {
- console.log('Undo stack is empty.');
- return;
- }
- const actionToUndo = undoStack.pop();
- actionToUndo.undo();
- redoStack.push(actionToUndo);
- redrawCanvas();
- updateUndoRedoButtons();
-}
-
-function handleRedo() {
- if (redoStack.length === 0) {
- console.log('Redo stack is empty.');
- return;
- }
-
- const actionToRedo = redoStack.pop();
- actionToRedo.redo();
- undoStack.push(actionToRedo);
- redrawCanvas();
- updateUndoRedoButtons();
-}
-
-function redrawCanvas() {
- console.log('Redrawing canvas...');
- if (!originalSpecData) {
- ctx.clearRect(0, 0, canvas.width, canvas.height);
- ctx.fillStyle = '#ffffff';
- ctx.fillRect(0, 0, canvas.width, canvas.height);
- return;
- }
-
- // Start with a fresh copy of the original data
- currentSpecData.data = new Float32Array(originalSpecData.data);
-
- // Replay all shapes from the `shapes` array to `currentSpecData`
- shapes.forEach(shape => {
- applyShapeToSpectrogram(shape, currentSpecData);
- });
-
- drawSpectrogram(currentSpecData);
-}
-
-function updateUndoRedoButtons() {
- undoButton.disabled = undoStack.length === 0;
- redoButton.disabled = redoStack.length === 0;
-}
-
-// Initial setup for canvas size (can be updated on window resize)
-window.addEventListener('resize', () => {
- if (originalSpecData) {
- canvas.width = window.innerWidth * 0.7;
- canvas.height = 400; // Fixed height
- redrawCanvas();
- }
-});
-
-// Initial call to set button states
-updateUndoRedoButtons();
-
-// --- Audio Playback Functions ---
-let currentAudioSource = null; // To stop currently playing audio
-
-async function playSpectrogramData(specData) {
- if (!specData || !specData.data || specData.header.num_frames === 0) {
- alert("No spectrogram data to play.");
- return;
- }
-
- if (currentAudioSource) {
- currentAudioSource.stop();
- currentAudioSource.disconnect();
- currentAudioSource = null;
- }
-
- const sampleRate = SAMPLE_RATE; // Fixed sample rate
- const numFrames = specData.header.num_frames;
- const totalAudioSamples = numFrames * dctSize; // Total samples in time domain
-
- const audioBuffer = audioContext.createBuffer(1, totalAudioSamples, sampleRate);
- const audioData = audioBuffer.getChannelData(0); // Mono channel
-
- // Convert spectrogram frames (frequency domain) to audio samples (time domain)
- for (let frameIndex = 0; frameIndex < numFrames; frameIndex++) {
- const spectralFrame = specData.data.slice(frameIndex * dctSize, (frameIndex + 1) * dctSize);
- const timeDomainFrame = javascript_idct_512(spectralFrame);
-
- // Apply Hanning window for smooth transitions
- for (let i = 0; i < dctSize; i++) {
- audioData[frameIndex * dctSize + i] = timeDomainFrame[i] * hanningWindowArray[i];
- }
- }
-
- currentAudioSource = audioContext.createBufferSource();
- currentAudioSource.buffer = audioBuffer;
- currentAudioSource.connect(audioContext.destination);
- currentAudioSource.start();
-
- console.log(`Playing audio (Sample Rate: ${sampleRate}, Duration: ${audioBuffer.duration.toFixed(2)}s)`);
-}
diff --git a/tools/editor/sdf.js b/tools/editor/sdf.js
deleted file mode 100644
index c68d79a..0000000
--- a/tools/editor/sdf.js
+++ /dev/null
@@ -1,39 +0,0 @@
-// --- Signed Distance Functions (SDFs) ---
-// Generic 2D vector operations
-function vec2(x, y) { return { x: x, y: y }; }
-function length(v) { return Math.sqrt(v.x * v.x + v.y * v.y); }
-function dot(v1, v2) { return v1.x * v2.x + v1.y * v2.y; }
-function sub(v1, v2) { return vec2(v1.x - v2.x, v1.y - v2.y); }
-function mul(v, s) { return vec2(v.x * s, v.y * s); }
-function div(v, s) { return vec2(v.x / s, v.y / s); }
-function normalize(v) { return div(v, length(v)); }
-function clamp(x, minVal, maxVal) { return Math.max(minVal, Math.min(x, maxVal)); }
-function abs(v) { return vec2(Math.abs(v.x), Math.abs(v.y)); }
-function max(v1, v2) { return vec2(Math.max(v1.x, v2.x), Math.max(v1.y, v2.y)); }
-function sign(x) { return (x > 0) ? 1 : ((x < 0) ? -1 : 0); }
-
-// sdSegment(p, a, b) - signed distance to a line segment
-// p: point, a: segment start, b: segment end
-function sdSegment(p, a, b) {
- const pa = sub(p, a);
- const ba = sub(b, a);
- const h = clamp(dot(pa, ba) / dot(ba, ba), 0.0, 1.0);
- return length(sub(pa, mul(ba, h)));
-}
-
-// sdEllipse(p, r) - signed distance to an ellipse (p relative to center, r is half-extents)
-// p: point relative to ellipse center, r: half-extents (rx, ry)
-function sdEllipse(p, r) {
- const k0 = vec2(1, length(div(p, r)));
- const k1 = vec2(length(div(p, r)), 1);
- const f = ((dot(div(mul(p, p), k0), vec2(1, 1)) < dot(div(mul(p, p), k1), vec2(1, 1))) ? k0 : k1);
- return length(sub(p, mul(r, normalize(mul(f, p))))) * sign(length(p) - r.x); // Simplified, original has length(p)-r.x which is only for circular
-}
-
-// sdBox(p, r) - signed distance to a rectangle (p relative to center, r is half-extents)
-// p: point relative to box center, r: half-extents (hx, hy)
-function sdBox(p, r) {
- const q = sub(abs(p), r);
- return length(max(q, vec2(0, 0))) + Math.min(0.0, Math.max(q.x, q.y));
-}
-
diff --git a/tools/editor/style.css b/tools/editor/style.css
deleted file mode 100644
index e0014cf..0000000
--- a/tools/editor/style.css
+++ /dev/null
@@ -1,79 +0,0 @@
-body {
- font-family: sans-serif;
- margin: 20px;
- background-color: #f4f4f4;
-}
-
-h1, h2 {
- color: #333;
-}
-
-#editorContainer {
- display: flex;
- margin-top: 20px;
-}
-
-#spectrogramCanvas {
- border: 1px solid #ccc;
- background-color: #fff;
- margin-right: 20px;
-}
-
-#controls {
- border: 1px solid #ccc;
- padding: 15px;
- background-color: #eee;
- min-width: 200px;
-}
-
-#controls button {
- display: block;
- width: 100%;
- margin-bottom: 10px;
- padding: 10px;
- cursor: pointer;
-}
-
-#undoButton {
- background-color: #d9534f;
- color: white;
- border: none;
- border-radius: 4px;
-}
-
-#undoButton:hover {
- background-color: #c9302c;
-}
-
-#redoButton {
- background-color: #5cb85c;
- color: white;
- border: none;
- border-radius: 4px;
-}
-
-#redoButton:hover {
- background-color: #4cae4c;
-}
-
-/* New styles for playback buttons */
-#listenOriginalButton,
-#listenGeneratedButton {
- background-color: #5bc0de;
- color: white;
- border: none;
- border-radius: 4px;
- margin-top: 5px;
-}
-
-#listenOriginalButton:hover,
-#listenGeneratedButton:hover {
- background-color: #31b0d5;
-}
-
-hr {
- border: 0;
- height: 1px;
- background-color: #ccc;
- margin: 20px 0;
-}
diff --git a/tools/specplay.cc b/tools/specplay.cc
new file mode 100644
index 0000000..9fa9355
--- /dev/null
+++ b/tools/specplay.cc
@@ -0,0 +1,212 @@
+// Standalone tool to play .spec or .wav files for debugging
+// Usage: ./specplay <file.spec|file.wav>
+
+#include "audio/dct.h"
+#include "audio/window.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define MINIAUDIO_IMPLEMENTATION
+#include "miniaudio.h"
+
+struct PlaybackState {
+ float* pcm_data;
+ size_t num_samples;
+ size_t playback_pos;
+};
+
+void audio_callback(ma_device* device, void* output, const void* input,
+ ma_uint32 frame_count) {
+ PlaybackState* state = (PlaybackState*)device->pUserData;
+ float* out = (float*)output;
+
+ for (ma_uint32 i = 0; i < frame_count; i++) {
+ if (state->playback_pos < state->num_samples) {
+ float sample = state->pcm_data[state->playback_pos++];
+ // Clamp to [-1, 1] and warn if clipping
+ if (sample > 1.0f || sample < -1.0f) {
+ fprintf(stderr, "[CLIP at sample %zu: %.3f]\n", state->playback_pos - 1,
+ sample);
+ sample = (sample > 1.0f) ? 1.0f : -1.0f;
+ }
+ out[i * 2] = sample; // Left
+ out[i * 2 + 1] = sample; // Right (mono)
+ } else {
+ out[i * 2] = 0.0f;
+ out[i * 2 + 1] = 0.0f;
+ }
+ }
+}
+
+float* load_spec(const char* path, size_t* out_num_samples) {
+ FILE* f = fopen(path, "rb");
+ if (!f) {
+ fprintf(stderr, "Failed to open %s\n", path);
+ return nullptr;
+ }
+
+ // Read SpecHeader
+ struct SpecHeader {
+ char magic[4];
+ int32_t version;
+ int32_t dct_size;
+ int32_t num_frames;
+ };
+
+ SpecHeader header;
+ if (fread(&header, sizeof(SpecHeader), 1, f) != 1) {
+ fprintf(stderr, "Failed to read SpecHeader\n");
+ fclose(f);
+ return nullptr;
+ }
+
+ // Validate header
+ if (memcmp(header.magic, "SPEC", 4) != 0) {
+ fprintf(stderr, "Invalid magic bytes (expected 'SPEC')\n");
+ fclose(f);
+ return nullptr;
+ }
+
+ printf("Loading .spec: version=%d, dct_size=%d, frames=%d\n", header.version,
+ header.dct_size, header.num_frames);
+
+ uint32_t num_frames = header.num_frames;
+
+ // Read spectral data
+ size_t spec_size = num_frames * DCT_SIZE;
+ float* spec_data = (float*)malloc(spec_size * sizeof(float));
+ if (fread(spec_data, sizeof(float), spec_size, f) != spec_size) {
+ fprintf(stderr, "Failed to read spectral data\n");
+ free(spec_data);
+ fclose(f);
+ return nullptr;
+ }
+ fclose(f);
+
+ // Convert to PCM via IDCT
+ *out_num_samples = spec_size;
+ float* pcm_data = (float*)malloc(*out_num_samples * sizeof(float));
+
+ for (uint32_t frame = 0; frame < num_frames; frame++) {
+ const float* spectral_frame = spec_data + (frame * DCT_SIZE);
+ float* time_frame = pcm_data + (frame * DCT_SIZE);
+ idct_512(spectral_frame, time_frame);
+ }
+
+ free(spec_data);
+
+ // Analyze PCM statistics
+ float peak = 0.0f, rms_sum = 0.0f;
+ for (size_t i = 0; i < *out_num_samples; i++) {
+ float abs_val = fabsf(pcm_data[i]);
+ if (abs_val > peak)
+ peak = abs_val;
+ rms_sum += pcm_data[i] * pcm_data[i];
+ }
+ float rms = sqrtf(rms_sum / *out_num_samples);
+
+ printf("PCM stats: Peak=%.3f, RMS=%.3f\n", peak, rms);
+ if (peak > 1.0f) {
+ printf("[WARNING] Peak exceeds 1.0! Will clip during playback.\n");
+ }
+
+ return pcm_data;
+}
+
+float* load_wav(const char* path, size_t* out_num_samples) {
+ ma_decoder decoder;
+ ma_decoder_config config = ma_decoder_config_init(ma_format_f32, 1, 32000);
+
+ if (ma_decoder_init_file(path, &config, &decoder) != MA_SUCCESS) {
+ fprintf(stderr, "Failed to open WAV file: %s\n", path);
+ return nullptr;
+ }
+
+ ma_uint64 frame_count;
+ ma_decoder_get_length_in_pcm_frames(&decoder, &frame_count);
+ *out_num_samples = (size_t)frame_count;
+
+ float* pcm_data = (float*)malloc(*out_num_samples * sizeof(float));
+ ma_decoder_read_pcm_frames(&decoder, pcm_data, frame_count, nullptr);
+ ma_decoder_uninit(&decoder);
+
+ printf("Loaded .wav: %zu samples\n", *out_num_samples);
+
+ // Analyze PCM statistics
+ float peak = 0.0f, rms_sum = 0.0f;
+ for (size_t i = 0; i < *out_num_samples; i++) {
+ float abs_val = fabsf(pcm_data[i]);
+ if (abs_val > peak)
+ peak = abs_val;
+ rms_sum += pcm_data[i] * pcm_data[i];
+ }
+ float rms = sqrtf(rms_sum / *out_num_samples);
+
+ printf("PCM stats: Peak=%.3f, RMS=%.3f\n", peak, rms);
+
+ return pcm_data;
+}
+
+int main(int argc, char** argv) {
+ if (argc != 2) {
+ fprintf(stderr, "Usage: %s <file.spec|file.wav>\n", argv[0]);
+ return 1;
+ }
+
+ const char* path = argv[1];
+ const char* ext = strrchr(path, '.');
+
+ PlaybackState state = {};
+
+ if (ext && strcmp(ext, ".spec") == 0) {
+ state.pcm_data = load_spec(path, &state.num_samples);
+ } else if (ext && (strcmp(ext, ".wav") == 0 || strcmp(ext, ".aif") == 0)) {
+ state.pcm_data = load_wav(path, &state.num_samples);
+ } else {
+ fprintf(stderr, "Unknown file type: %s\n", path);
+ return 1;
+ }
+
+ if (!state.pcm_data) {
+ fprintf(stderr, "Failed to load audio\n");
+ return 1;
+ }
+
+ printf("Playing %.2f seconds... Press Ctrl+C to stop.\n",
+ (float)state.num_samples / 32000.0f);
+
+ // Initialize miniaudio
+ ma_device_config device_config =
+ ma_device_config_init(ma_device_type_playback);
+ device_config.playback.format = ma_format_f32;
+ device_config.playback.channels = 2;
+ device_config.sampleRate = 32000;
+ device_config.dataCallback = audio_callback;
+ device_config.pUserData = &state;
+
+ ma_device device;
+ if (ma_device_init(NULL, &device_config, &device) != MA_SUCCESS) {
+ fprintf(stderr, "Failed to initialize audio device\n");
+ free(state.pcm_data);
+ return 1;
+ }
+
+ if (ma_device_start(&device) != MA_SUCCESS) {
+ fprintf(stderr, "Failed to start audio device\n");
+ ma_device_uninit(&device);
+ free(state.pcm_data);
+ return 1;
+ }
+
+ // Wait for playback to finish
+ while (state.playback_pos < state.num_samples) {
+ ma_sleep(100);
+ }
+
+ ma_device_uninit(&device);
+ free(state.pcm_data);
+
+ printf("Playback complete.\n");
+ return 0;
+}
diff --git a/tools/specplay_README.md b/tools/specplay_README.md
new file mode 100644
index 0000000..72d9ec1
--- /dev/null
+++ b/tools/specplay_README.md
@@ -0,0 +1,164 @@
+# specplay - Audio Analysis & Playback Tool
+
+Standalone diagnostic tool for analyzing and playing .spec spectrogram files and .wav audio files.
+
+## Usage
+
+```bash
+./build/specplay <file.spec|file.wav>
+```
+
+## Features
+
+### Current (v1.0)
+- ✅ Plays .spec files via IDCT synthesis (matches demo playback exactly)
+- ✅ Plays .wav files for comparison
+- ✅ Reports PCM statistics:
+ - **Peak level**: Maximum absolute sample value (detects clipping if > 1.0)
+ - **RMS level**: Root-mean-square energy (loudness measure)
+- ✅ Real-time clipping detection during playback with sample position
+- ✅ Validates .spec file format (magic bytes, version, DCT size)
+
+### Example Output
+```
+Loading .spec: version=1, dct_size=512, frames=68
+PCM stats: Peak=0.403, RMS=0.058
+Playing 1.09 seconds... Press Ctrl+C to stop.
+Playback complete.
+```
+
+## Use Cases
+
+1. **Debugging Audio Issues**
+ - Quickly identify which samples have clipping (Peak > 1.0)
+ - Compare .wav source vs .spec output to detect analysis artifacts
+ - Verify spectrogram regeneration after DCT changes
+
+2. **Quality Assurance**
+ - Batch test all .spec files for clipping before committing
+ - Measure loudness consistency across samples (RMS levels)
+ - Validate spectrograms match expected characteristics
+
+3. **Development Workflow**
+ - Test individual samples without running full demo
+ - A/B compare different spectrogram generation parameters
+ - Verify procedural note generation output
+
+## Technical Details
+
+- **Sample Rate**: 32kHz (matches demo audio engine)
+- **Format**: Mono (duplicated to stereo for playback)
+- **IDCT**: Uses same `idct_512()` function as demo (bit-exact)
+- **Clamping**: Clipping detected but samples clamped to [-1, 1] for playback
+
+## Future Enhancement Ideas
+
+### Priority 1: Analysis Features
+- [ ] **Spectral visualization**: ASCII art frequency plot
+- [ ] **Waveform display**: Time-domain amplitude graph
+- [ ] **Frequency analysis**: Dominant frequency, spectral centroid
+- [ ] **Dynamic range**: Measure headroom, suggest normalization
+- [ ] **Duration info**: Show seconds, samples, frames
+
+### Priority 2: Comparison Tools
+- [ ] **Diff mode**: `specplay --compare file.wav file.spec`
+ - Side-by-side PCM stats
+ - RMS error, peak difference
+ - Correlation coefficient
+- [ ] **Batch mode**: `specplay --batch assets/final/*.spec`
+ - Generate CSV report of all stats
+ - Sort by peak level (find clipping candidates)
+ - Find outliers (unusually loud/quiet samples)
+
+### Priority 3: Export Features
+- [ ] **WAV export**: `specplay file.spec --export output.wav`
+ - Convert .spec → .wav for external analysis
+ - Useful for importing into audio editors
+- [ ] **Normalization**: `specplay file.spec --normalize --export normalized.wav`
+ - Auto-scale to target peak/RMS
+ - Preserve transients
+
+### Priority 4: Advanced Analysis
+- [ ] **Spectral envelope**: Extract formants, resonances
+- [ ] **Harmonic analysis**: Detect fundamental frequency, harmonics
+- [ ] **Onset detection**: Find transient attacks (kick/snare hits)
+- [ ] **Spectral flux**: Measure frequency content change over time
+
+### Priority 5: Interactive Mode
+- [ ] **Seek controls**: Play from specific time offset
+- [ ] **Loop mode**: Repeat playback indefinitely
+- [ ] **Volume control**: Adjust playback gain
+- [ ] **Real-time waveform**: Live visualization during playback
+
+## Integration with Build System
+
+Built automatically when `DEMO_BUILD_TOOLS=ON`:
+```bash
+cmake -S . -B build -DDEMO_BUILD_TOOLS=ON
+cmake --build build --target specplay
+```
+
+Or with all options:
+```bash
+cmake -S . -B build -DDEMO_ALL_OPTIONS=ON
+cmake --build build
+```
+
+## Code Architecture
+
+- **Minimal dependencies**: Only uses audio subsystem + miniaudio
+- **Self-contained**: No GPU, no platform layer (pure audio)
+- **Size**: ~250 lines (easy to extend)
+- **Format support**:
+ - `.spec` via SpecHeader parser
+ - `.wav` via miniaudio decoder (also handles .aif, .mp3)
+
+## Related Tools
+
+- **spectool**: Analyzes .wav → generates .spec (batch processing)
+- **specview**: ASCII visualization of .spec frequency content
+- **specplay**: This tool (playback + analysis)
+
+## Troubleshooting
+
+**"Failed to read SpecHeader"**
+- File is not a valid .spec file
+- Try with spectool to regenerate: `./build/spectool analyze input.wav output.spec`
+
+**"Peak exceeds 1.0! Will clip during playback."**
+- Spectrogram has too much energy
+- Likely needs regeneration after DCT changes
+- Or source .wav is already clipping
+
+**No audio output**
+- Check system audio device
+- Ensure 32kHz sample rate is supported
+- Try with a different .spec file (may be empty/silent)
+
+## Examples
+
+### Find all clipping samples
+```bash
+for f in assets/final/*.spec; do
+ ./build/specplay "$f" | grep "WARNING" && echo "$f"
+done
+```
+
+### Compare wav source to spec output
+```bash
+./build/specplay assets/originals/kick.wav
+./build/specplay assets/final/KICK.spec
+# Compare Peak/RMS values
+```
+
+### Quick loudness check
+```bash
+./build/specplay assets/final/KICK_606.spec | grep "RMS"
+# RMS > 0.3: loud, RMS < 0.1: quiet
+```
+
+---
+
+**Last Updated**: February 6, 2026
+**Author**: Claude (AI assistant)
+**Status**: Production-ready, actively used for debugging
diff --git a/tools/spectool.cc b/tools/spectool.cc
index 67e9ff3..4cd98c7 100644
--- a/tools/spectool.cc
+++ b/tools/spectool.cc
@@ -26,8 +26,13 @@
// float[num_frames * dct_size] data
// struct SpecHeader { ... } -> now in audio.h
-int analyze_audio(const char* in_path, const char* out_path) {
- printf("Analyzing %s -> %s\n", in_path, out_path);
+int analyze_audio(const char* in_path, const char* out_path, bool normalize,
+ float target_rms) {
+ printf("Analyzing %s -> %s", in_path, out_path);
+ if (normalize) {
+ printf(" (normalizing to RMS=%.3f)", target_rms);
+ }
+ printf("\n");
// Use higher quality resampling for better audio quality
// Source files are typically 44.1kHz or 96kHz, 16/24-bit, mono/stereo
@@ -46,19 +51,84 @@ int analyze_audio(const char* in_path, const char* out_path) {
return 1;
}
- std::vector<float> spec_data;
+ // First pass: Load all PCM data (needed for normalization)
+ std::vector<float> pcm_data;
float pcm_chunk[DCT_SIZE];
- float window[WINDOW_SIZE];
- hamming_window_512(window);
-
ma_uint64 frames_read;
while (ma_decoder_read_pcm_frames(&decoder, pcm_chunk, DCT_SIZE,
&frames_read) == MA_SUCCESS &&
frames_read > 0) {
- if (frames_read < DCT_SIZE) {
- // Zero-pad the last chunk if it's smaller
- memset(pcm_chunk + frames_read, 0,
- (DCT_SIZE - frames_read) * sizeof(float));
+ pcm_data.insert(pcm_data.end(), pcm_chunk, pcm_chunk + frames_read);
+ }
+ ma_decoder_uninit(&decoder);
+
+ if (pcm_data.empty()) {
+ printf("Error: No audio data read from file.\n");
+ return 1;
+ }
+
+ // Calculate RMS and peak
+ float rms_sum = 0.0f;
+ float peak = 0.0f;
+ for (size_t i = 0; i < pcm_data.size(); ++i) {
+ const float abs_val = fabsf(pcm_data[i]);
+ if (abs_val > peak) {
+ peak = abs_val;
+ }
+ rms_sum += pcm_data[i] * pcm_data[i];
+ }
+ const float original_rms = sqrtf(rms_sum / pcm_data.size());
+ printf("Original: Peak=%.3f, RMS=%.3f\n", peak, original_rms);
+
+ // Normalize if requested
+ float scale_factor = 1.0f;
+ if (normalize && original_rms > 1e-6f) {
+ // Calculate scale factor to reach target RMS
+ scale_factor = target_rms / original_rms;
+
+ // Check if this would cause clipping (peak > 1.0 after synthesis)
+ // Peak amplification varies by sample (windowing + IDCT effects)
+ // Use conservative limit: input peak ≤ 1.0 to guarantee output peak ≤ 1.0
+ const float max_safe_peak = 1.0f;
+ const float predicted_peak = peak * scale_factor;
+
+ if (predicted_peak > max_safe_peak) {
+ // Reduce scale factor to prevent clipping
+ const float peak_scale = max_safe_peak / peak;
+ printf("Warning: RMS normalization would cause clipping (peak=%.3f)\n",
+ predicted_peak);
+ printf(" Reducing scale to prevent clipping.\n");
+ scale_factor = peak_scale;
+ }
+
+ printf("Normalizing: scale factor = %.3f\n", scale_factor);
+ printf(" RMS: %.3f -> %.3f\n", original_rms, original_rms * scale_factor);
+ printf(" Peak: %.3f -> %.3f\n", peak, peak * scale_factor);
+
+ for (size_t i = 0; i < pcm_data.size(); ++i) {
+ pcm_data[i] *= scale_factor;
+ }
+ }
+
+ // Second pass: Windowing + DCT
+ std::vector<float> spec_data;
+ float window[WINDOW_SIZE];
+ hamming_window_512(window);
+
+ // Process PCM data in DCT_SIZE chunks
+ const size_t num_chunks = (pcm_data.size() + DCT_SIZE - 1) / DCT_SIZE;
+ for (size_t chunk_idx = 0; chunk_idx < num_chunks; ++chunk_idx) {
+ const size_t chunk_start = chunk_idx * DCT_SIZE;
+ const size_t chunk_end =
+ (chunk_start + DCT_SIZE < pcm_data.size()) ? chunk_start + DCT_SIZE
+ : pcm_data.size();
+ const size_t chunk_size = chunk_end - chunk_start;
+
+ // Copy chunk (with zero-padding if needed)
+ memcpy(pcm_chunk, pcm_data.data() + chunk_start,
+ chunk_size * sizeof(float));
+ if (chunk_size < DCT_SIZE) {
+ memset(pcm_chunk + chunk_size, 0, (DCT_SIZE - chunk_size) * sizeof(float));
}
// Apply window
@@ -74,8 +144,6 @@ int analyze_audio(const char* in_path, const char* out_path) {
spec_data.insert(spec_data.end(), dct_chunk, dct_chunk + DCT_SIZE);
}
- ma_decoder_uninit(&decoder);
-
// --- Trim Silent Frames ---
const float epsilon = 1e-6f;
int num_frames = spec_data.size() / DCT_SIZE;
@@ -248,7 +316,7 @@ int test_gen(const char* out_path) {
}
void print_usage() {
- printf("Usage: spectool <command> <input> [output]\n");
+ printf("Usage: spectool <command> <input> [output] [options]\n");
printf("Commands:\n");
printf(
" analyze <input.wav|.mp3> <output.spec> Analyze an audio file and "
@@ -258,6 +326,11 @@ void print_usage() {
printf(
" test_gen <output.spec> Generate a test "
"spectrogram.\n");
+ printf("\nOptions for 'analyze':\n");
+ printf(" --normalize [rms] Normalize audio to target RMS level (default: "
+ "0.15)\n");
+ printf(
+ " Ensures consistent loudness across all samples.\n");
}
int main(int argc, char** argv) {
@@ -274,7 +347,27 @@ int main(int argc, char** argv) {
print_usage();
return 1;
}
- return analyze_audio(argv[2], argv[3]);
+
+ // Parse optional flags
+ bool normalize = false;
+ float target_rms = 0.15f; // Default target RMS
+
+ for (int i = 4; i < argc; ++i) {
+ if (strcmp(argv[i], "--normalize") == 0) {
+ normalize = true;
+ // Check if next arg is a number (custom target RMS)
+ if (i + 1 < argc) {
+ char* endptr;
+ float custom_rms = strtof(argv[i + 1], &endptr);
+ if (endptr != argv[i + 1] && custom_rms > 0.0f && custom_rms < 1.0f) {
+ target_rms = custom_rms;
+ ++i; // Consume the RMS value
+ }
+ }
+ }
+ }
+
+ return analyze_audio(argv[2], argv[3], normalize, target_rms);
} else if (strcmp(command, "play") == 0) {
if (argc < 3) {
printf("Error: 'play' command requires an input file.\n");
diff --git a/tools/spectral_editor/FEATURES.md b/tools/spectral_editor/FEATURES.md
new file mode 100644
index 0000000..6c36cc2
--- /dev/null
+++ b/tools/spectral_editor/FEATURES.md
@@ -0,0 +1,151 @@
+# Spectral Editor - Feature Roadmap
+
+This document tracks planned enhancements for the spectral editor.
+
+## Priority: High
+
+### A. Curve Translation (Shift+Click+Drag)
+**Description**: Shift+click on a control point + mouse-move should displace the whole curve at a time (translate all control points).
+
+**Implementation Notes**:
+- Detect shift key state during control point click
+- Store initial positions of all control points in the curve
+- Apply uniform translation delta to all points during drag
+- Maintain curve shape while moving
+
+**Complexity**: Medium
+**Estimated Effort**: 2-3 hours
+
+---
+
+### B. Viewport Zoom (Mouse Wheel)
+**Description**: Mouse-wheel should allow zooming in/out on the view for fine placement of curves.
+
+**Implementation Notes**:
+- Implement zoom scale factor (e.g., 0.5x to 4.0x)
+- Center zoom around mouse cursor position
+- Update rendering to use scaled coordinates
+- Add visual zoom indicator (e.g., "Zoom: 2.0x")
+- Consider pan functionality (drag with middle mouse or space+drag)
+
+**Complexity**: High (coordinate transformation, pan interaction)
+**Estimated Effort**: 6-8 hours
+
+---
+
+## Priority: Medium
+
+### C. Enhanced Sinusoid Pattern
+**Description**: The 'sinusoid' pattern is quite interesting and should have more variations.
+
+**Proposed Variations**:
+- **Asymmetric Decay**: Different decay rates above and below the curve center
+ - `decay_top` parameter (controls upper harmonics falloff)
+ - `decay_bottom` parameter (controls lower harmonics falloff)
+- **Temporal Modulation**: Per-frame amplitude/frequency modulation along timeline
+ - `amplitude_envelope` (fade in/out over time)
+ - `frequency_drift` (vibrato/wobble effect)
+ - `phase_offset` (shift pattern over time)
+- **Harmonic Series**: Option to generate harmonic overtones
+ - `num_harmonics` parameter
+ - `harmonic_decay` parameter
+
+**Implementation Notes**:
+- Extend `SinusoidProfile` class with additional parameters
+- Add UI controls for new parameters (sliders, dropdowns)
+- Render preview showing modulation over time
+
+**Complexity**: Medium-High
+**Estimated Effort**: 8-12 hours
+
+---
+
+### D. Per-Control-Point Modulation
+**Description**: Each control point should be assigned individually controllable volume, decay params, etc. for fine modulation along time.
+
+**Proposed Parameters (per control point)**:
+- `volume`: Local amplitude multiplier (0.0 - 2.0)
+- `decay`: Local decay rate override
+- `width`: Gaussian width override (for profile spreading)
+- `phase`: Phase offset for sinusoid patterns
+- `color`: Visual indicator for parameter variations
+
+**Implementation Notes**:
+- Extend control point data structure with parameter fields
+- Add per-point property panel (show on control point selection)
+- Render visual hints (color-coded points, size variations)
+- Interpolate parameters between control points for smooth transitions
+
+**Complexity**: High (UI/UX design, parameter interpolation)
+**Estimated Effort**: 10-15 hours
+
+---
+
+### E. Composable Profiles
+**Description**: Profiles should be composable along a curve (e.g., apply Gaussian curve to sinusoid pattern).
+
+**Proposed Syntax**:
+```cpp
+// Example: Gaussian-modulated sinusoid
+CompositeProfile {
+ base: SinusoidProfile { frequency: 100.0, decay: 0.5 },
+ envelope: GaussianProfile { center: 256, width: 50 }
+}
+```
+
+**Implementation Notes**:
+- Define profile composition operators:
+ - `multiply`: Envelope modulation (amplitude × profile)
+ - `add`: Additive blending (profile1 + profile2)
+ - `max`: Take maximum value at each bin
+- Add UI for profile layering (drag-and-drop profile stack)
+- Render composite preview with layer visualization
+
+**Complexity**: High (requires profile abstraction refactor)
+**Estimated Effort**: 12-16 hours
+
+---
+
+## Priority: Low (Polish)
+
+### F. Improved Parameter Sliders
+**Description**: Adjust slider ranges for better usability (Decay, Width, Frequency, etc.).
+
+**Issues to Address**:
+- Decay slider: Non-linear scaling (logarithmic?) for finer control at low values
+- Frequency slider: Snap to musical notes (optional A440-based grid)
+- Width slider: Preview visualization (show affected frequency range)
+- General: Add numeric input fields next to sliders for precise values
+
+**Implementation Notes**:
+- Implement logarithmic slider interpolation for decay/width
+- Add slider tick marks at useful intervals
+- Display current value and units (Hz, bins, dB, etc.)
+- Add reset-to-default buttons
+
+**Complexity**: Low-Medium
+**Estimated Effort**: 3-4 hours
+
+---
+
+## Future Ideas (Backlog)
+
+- **Undo/Redo System**: Track edit history for curve modifications
+- **Preset Library**: Save/load common curve patterns (kick drum, snare, bass, etc.)
+- **Curve Smoothing**: Apply smoothing filters to jittery control points
+- **Copy/Paste**: Duplicate curves or control point selections
+- **Multi-Selection**: Select and edit multiple control points simultaneously
+- **Grid Snapping**: Snap control points to frequency/time grid
+- **Export Options**: Export to different formats (JSON, binary, C++ code)
+
+---
+
+## Total Estimated Effort
+- **High Priority**: 8-11 hours
+- **Medium Priority**: 30-43 hours
+- **Low Priority**: 3-4 hours
+- **Grand Total**: 41-58 hours (roughly 1-1.5 weeks of focused work)
+
+---
+
+*Last Updated: February 6, 2026*
diff --git a/tools/spectral_editor/dct.js b/tools/spectral_editor/dct.js
index deff8a9..435a7e8 100644
--- a/tools/spectral_editor/dct.js
+++ b/tools/spectral_editor/dct.js
@@ -1,20 +1,10 @@
const dctSize = 512; // Default DCT size, read from header
// --- Utility Functions for Audio Processing ---
+// Fast O(N log N) IDCT using FFT
// JavaScript equivalent of C++ idct_512
function javascript_idct_512(input) {
- const output = new Float32Array(dctSize);
- const PI = Math.PI;
- const N = dctSize;
-
- for (let n = 0; n < N; ++n) {
- let sum = input[0] / 2.0;
- for (let k = 1; k < N; ++k) {
- sum += input[k] * Math.cos((PI / N) * k * (n + 0.5));
- }
- output[n] = sum * (2.0 / N);
- }
- return output;
+ return javascript_idct_512_fft(input);
}
// Hanning window for smooth audio transitions (JavaScript equivalent)
@@ -127,95 +117,90 @@ function fftInverse(real, imag, N) {
}
}
-// DCT-II via FFT using double-and-mirror method (matches C++ dct_fft)
-// This is a more robust algorithm that avoids reordering issues
+// DCT-II via FFT using reordering method (matches C++ dct_fft)
+// Reference: Numerical Recipes Chapter 12.3
function javascript_dct_fft(input, N) {
const PI = Math.PI;
- // Allocate arrays for 2N-point FFT
- const M = 2 * N;
- const real = new Float32Array(M);
- const imag = new Float32Array(M);
+ // Allocate arrays for N-point FFT
+ const real = new Float32Array(N);
+ const imag = new Float32Array(N);
- // Pack input: [x[0], x[1], ..., x[N-1], x[N-1], x[N-2], ..., x[1]]
- // This creates even symmetry for real-valued DCT
- for (let i = 0; i < N; i++) {
- real[i] = input[i];
- }
- for (let i = 0; i < N; i++) {
- real[N + i] = input[N - 1 - i];
+ // Reorder input: even indices first, then odd indices reversed
+ // [x[0], x[2], x[4], ...] followed by [x[N-1], x[N-3], x[N-5], ...]
+ for (let i = 0; i < N / 2; i++) {
+ real[i] = input[2 * i]; // Even indices: 0, 2, 4, ...
+ real[N - 1 - i] = input[2 * i + 1]; // Odd indices reversed: N-1, N-3, ...
}
// imag is already zeros (Float32Array default)
- // Apply 2N-point FFT
- fftForward(real, imag, M);
+ // Apply N-point FFT
+ fftForward(real, imag, N);
- // Extract DCT coefficients
+ // Extract DCT coefficients with phase correction
// DCT[k] = Re{FFT[k] * exp(-j*pi*k/(2*N))} * normalization
- // Note: Need to divide by 2 because we doubled the signal length
const output = new Float32Array(N);
for (let k = 0; k < N; k++) {
const angle = -PI * k / (2.0 * N);
const wr = Math.cos(angle);
const wi = Math.sin(angle);
- // Complex multiplication: (real + j*imag) * (wr + j*wi)
+ // Complex multiplication: (real[k] + j*imag[k]) * (wr + j*wi)
// Real part: real*wr - imag*wi
const dct_value = real[k] * wr - imag[k] * wi;
- // Apply DCT-II normalization (divide by 2 for double-length FFT)
+ // Apply DCT-II normalization
if (k === 0) {
- output[k] = dct_value * Math.sqrt(1.0 / N) / 2.0;
+ output[k] = dct_value * Math.sqrt(1.0 / N);
} else {
- output[k] = dct_value * Math.sqrt(2.0 / N) / 2.0;
+ output[k] = dct_value * Math.sqrt(2.0 / N);
}
}
return output;
}
-// IDCT (Inverse DCT-II) via FFT using double-and-mirror method (matches C++ idct_fft)
+// IDCT (DCT-III) via FFT using reordering method (matches C++ idct_fft)
+// Reference: Numerical Recipes Chapter 12.3
function javascript_idct_fft(input, N) {
const PI = Math.PI;
- // Allocate arrays for 2N-point FFT
- const M = 2 * N;
- const real = new Float32Array(M);
- const imag = new Float32Array(M);
+ // Allocate arrays for N-point FFT
+ const real = new Float32Array(N);
+ const imag = new Float32Array(N);
- // Prepare FFT input from DCT coefficients
- // IDCT = Re{IFFT[DCT * exp(j*pi*k/(2*N))]} * 2
+ // Prepare FFT input with inverse phase correction
+ // FFT[k] = DCT[k] * exp(+j*pi*k/(2*N)) / normalization
+ // Note: DCT-III needs factor of 2 for AC terms
for (let k = 0; k < N; k++) {
- const angle = PI * k / (2.0 * N); // Positive for inverse
+ const angle = PI * k / (2.0 * N); // Positive angle for inverse
const wr = Math.cos(angle);
const wi = Math.sin(angle);
- // Apply inverse normalization
- let scaled_input;
+ // Inverse of DCT-II normalization with correct DCT-III scaling
+ let scaled;
if (k === 0) {
- scaled_input = input[k] * Math.sqrt(N) * 2.0;
+ scaled = input[k] / Math.sqrt(1.0 / N);
} else {
- scaled_input = input[k] * Math.sqrt(N / 2.0) * 2.0;
+ // DCT-III needs factor of 2 for AC terms
+ scaled = input[k] / Math.sqrt(2.0 / N) * 2.0;
}
- // Complex multiplication: DCT[k] * exp(j*theta)
- real[k] = scaled_input * wr;
- imag[k] = scaled_input * wi;
- }
-
- // Fill second half with conjugate symmetry (for real output)
- for (let k = 1; k < N; k++) {
- real[M - k] = real[k];
- imag[M - k] = -imag[k];
+ // Complex multiplication: scaled * (wr + j*wi)
+ real[k] = scaled * wr;
+ imag[k] = scaled * wi;
}
// Apply inverse FFT
- fftInverse(real, imag, M);
+ fftInverse(real, imag, N);
- // Extract first N samples (real part only, imag should be ~0)
+ // Unpack: reverse the reordering from DCT
+ // Even output indices come from first half of FFT output
+ // Odd output indices come from second half (reversed)
const output = new Float32Array(N);
- for (let i = 0; i < N; i++) {
- output[i] = real[i];
+ for (let i = 0; i < N / 2; i++) {
+ output[2 * i] = real[i]; // Even positions
+ output[2 * i + 1] = real[N - 1 - i]; // Odd positions (reversed)
}
return output;
diff --git a/tools/spectral_editor/script.js b/tools/spectral_editor/script.js
index 48b0661..7c424f9 100644
--- a/tools/spectral_editor/script.js
+++ b/tools/spectral_editor/script.js
@@ -30,6 +30,8 @@ const state = {
canvasHeight: 0,
pixelsPerFrame: 2.0, // Zoom level (pixels per frame)
pixelsPerBin: 1.0, // Vertical scale (pixels per frequency bin)
+ viewportOffsetX: 0, // Horizontal pan offset (pixels)
+ viewportOffsetY: 0, // Vertical pan offset (pixels)
// Audio playback
audioContext: null,
@@ -94,6 +96,9 @@ function initCanvas() {
// Mouse hover handlers (for crosshair)
canvas.addEventListener('mousemove', onCanvasHover);
canvas.addEventListener('mouseleave', onCanvasLeave);
+
+ // Mouse wheel: zoom (with Ctrl/Cmd) or pan
+ canvas.addEventListener('wheel', onCanvasWheel, { passive: false });
}
function initUI() {
@@ -378,19 +383,9 @@ function audioToSpectrogram(audioData) {
}
// Forward DCT (not in dct.js, add here)
+// Fast O(N log N) DCT using FFT (delegates to dct.js implementation)
function javascript_dct_512(input) {
- const output = new Float32Array(DCT_SIZE);
- const PI = Math.PI;
- const N = DCT_SIZE;
-
- for (let k = 0; k < N; k++) {
- let sum = 0;
- for (let n = 0; n < N; n++) {
- sum += input[n] * Math.cos((PI / N) * k * (n + 0.5));
- }
- output[k] = sum * (k === 0 ? Math.sqrt(1 / N) : Math.sqrt(2 / N));
- }
- return output;
+ return javascript_dct_512_fft(input);
}
function onReferenceLoaded(fileName) {
@@ -414,6 +409,9 @@ function onReferenceLoaded(fileName) {
// Adjust zoom to fit
state.pixelsPerFrame = Math.max(1.0, state.canvasWidth / state.referenceNumFrames);
+ state.pixelsPerBin = 1.0; // Reset vertical scale
+ state.viewportOffsetX = 0; // Reset pan
+ state.viewportOffsetY = 0;
updateCurveUI();
updateUndoRedoButtons();
@@ -859,12 +857,62 @@ function onCanvasLeave(e) {
render();
}
+function onCanvasWheel(e) {
+ e.preventDefault();
+
+ const canvas = e.target;
+ const rect = canvas.getBoundingClientRect();
+ const mouseX = e.clientX - rect.left;
+ const mouseY = e.clientY - rect.top;
+
+ // Zoom mode: Ctrl/Cmd + wheel
+ if (e.ctrlKey || e.metaKey) {
+ // Calculate frame under cursor BEFORE zoom
+ const frameUnderCursor = (mouseX + state.viewportOffsetX) / state.pixelsPerFrame;
+
+ // Calculate new zoom level (horizontal only - logarithmic frequency axis doesn't zoom)
+ const zoomFactor = e.deltaY > 0 ? 0.9 : 1.1; // Wheel down = zoom out, wheel up = zoom in
+ state.pixelsPerFrame = Math.max(0.5, Math.min(20.0, state.pixelsPerFrame * zoomFactor));
+
+ // Adjust viewport offset so frame under cursor stays in same screen position
+ // After zoom: new_offset = frame * newPixelsPerFrame - mouseX
+ state.viewportOffsetX = frameUnderCursor * state.pixelsPerFrame - mouseX;
+
+ // Clamp viewport offset to valid range
+ const maxOffsetX = Math.max(0, state.referenceNumFrames * state.pixelsPerFrame - state.canvasWidth);
+ state.viewportOffsetX = Math.max(0, Math.min(maxOffsetX, state.viewportOffsetX));
+
+ render();
+ return;
+ }
+
+ // Pan mode: Shift + wheel (horizontal/vertical pan)
+ if (e.shiftKey) {
+ state.viewportOffsetX += e.deltaY;
+ const maxOffsetX = Math.max(0, state.referenceNumFrames * state.pixelsPerFrame - state.canvasWidth);
+ state.viewportOffsetX = Math.max(0, Math.min(maxOffsetX, state.viewportOffsetX));
+ render();
+ return;
+ }
+
+ // Normal mode: pan vertically (disabled for logarithmic frequency axis)
+ // Note: With logarithmic frequency scale, vertical pan doesn't make sense
+ // because the frequency range (FREQ_MIN to FREQ_MAX) is always scaled to fit canvas height.
+ // Vertical pan only works in linear frequency mode.
+ if (!USE_LOG_SCALE) {
+ state.viewportOffsetY += e.deltaY;
+ const maxOffsetY = Math.max(0, DCT_SIZE * state.pixelsPerBin - state.canvasHeight);
+ state.viewportOffsetY = Math.max(0, Math.min(maxOffsetY, state.viewportOffsetY));
+ render();
+ }
+}
+
// ============================================================================
// Coordinate Conversion
// ============================================================================
function screenToSpectrogram(screenX, screenY) {
- const frame = Math.round(screenX / state.pixelsPerFrame);
+ const frame = Math.round((screenX + state.viewportOffsetX) / state.pixelsPerFrame);
let freqHz;
if (USE_LOG_SCALE) {
@@ -891,7 +939,7 @@ function screenToSpectrogram(screenX, screenY) {
}
function spectrogramToScreen(frame, freqHz) {
- const x = frame * state.pixelsPerFrame;
+ const x = frame * state.pixelsPerFrame - state.viewportOffsetX;
let y;
if (USE_LOG_SCALE) {
@@ -901,11 +949,11 @@ function spectrogramToScreen(frame, freqHz) {
const clampedFreq = Math.max(FREQ_MIN, Math.min(FREQ_MAX, freqHz));
const logFreq = Math.log10(clampedFreq);
const normalizedY = (logFreq - logMin) / (logMax - logMin);
- y = state.canvasHeight * (1.0 - normalizedY); // Flip Y back to screen coords
+ y = state.canvasHeight * (1.0 - normalizedY) - state.viewportOffsetY; // Flip Y back to screen coords
} else {
// Linear frequency mapping (old behavior)
const bin = (freqHz / (SAMPLE_RATE / 2)) * state.referenceDctSize;
- y = state.canvasHeight - (bin * state.pixelsPerBin);
+ y = state.canvasHeight - (bin * state.pixelsPerBin) - state.viewportOffsetY;
}
return {x, y};
@@ -953,7 +1001,10 @@ function render() {
function drawPlayhead(ctx) {
if (!state.isPlaying || state.playbackCurrentFrame < 0) return;
- const x = state.playbackCurrentFrame * state.pixelsPerFrame;
+ const x = state.playbackCurrentFrame * state.pixelsPerFrame - state.viewportOffsetX;
+
+ // Only draw if playhead is visible in viewport
+ if (x < 0 || x > state.canvasWidth) return;
// Draw vertical line
ctx.strokeStyle = '#ff3333'; // Bright red
@@ -1553,7 +1604,7 @@ function spectrogramToAudio(spectrogram, dctSize, numFrames) {
const window = hanningWindowArray;
for (let frameIdx = 0; frameIdx < numFrames; frameIdx++) {
- // Extract frame
+ // Extract frame (no windowing - window is only for analysis, not synthesis)
const frame = new Float32Array(dctSize);
for (let b = 0; b < dctSize; b++) {
frame[b] = spectrogram[frameIdx * dctSize + b];
@@ -1562,7 +1613,7 @@ function spectrogramToAudio(spectrogram, dctSize, numFrames) {
// IDCT
const timeFrame = javascript_idct_512(frame);
- // Apply window and overlap-add
+ // Apply synthesis window for overlap-add
const frameStart = frameIdx * hopSize;
for (let i = 0; i < dctSize; i++) {
if (frameStart + i < audioLength) {
diff --git a/tools/timeline_editor/index.html b/tools/timeline_editor/index.html
index f85f914..074b711 100644
--- a/tools/timeline_editor/index.html
+++ b/tools/timeline_editor/index.html
@@ -1279,11 +1279,48 @@
updateProperties();
});
- // Mouse wheel diagonal scroll (follows time-ordered sequence cascade)
+ // Mouse wheel: zoom (with Ctrl/Cmd) or diagonal scroll
timelineContainer.addEventListener('wheel', (e) => {
e.preventDefault();
- // Horizontal scroll
+ // Zoom mode: Ctrl/Cmd + wheel
+ if (e.ctrlKey || e.metaKey) {
+ // Get mouse position relative to timeline container
+ const rect = timelineContainer.getBoundingClientRect();
+ const mouseX = e.clientX - rect.left; // Mouse X in viewport coordinates
+
+ // Calculate time position under cursor BEFORE zoom
+ const scrollLeft = timelineContainer.scrollLeft;
+ const timeUnderCursor = (scrollLeft + mouseX) / pixelsPerSecond;
+
+ // Calculate new zoom level
+ const zoomDelta = e.deltaY > 0 ? -10 : 10; // Wheel down = zoom out, wheel up = zoom in
+ const oldPixelsPerSecond = pixelsPerSecond;
+ const newPixelsPerSecond = Math.max(10, Math.min(500, pixelsPerSecond + zoomDelta));
+
+ if (newPixelsPerSecond !== oldPixelsPerSecond) {
+ pixelsPerSecond = newPixelsPerSecond;
+
+ // Update zoom slider and labels
+ zoomSlider.value = pixelsPerSecond;
+ zoomLevel.textContent = `${pixelsPerSecond}%`;
+ pixelsPerSecLabel.textContent = pixelsPerSecond;
+
+ // Re-render waveform and timeline at new zoom
+ if (audioBuffer) {
+ renderWaveform();
+ }
+ renderTimeline();
+
+ // Adjust scroll position so time under cursor stays in same place
+ // After zoom: new_scrollLeft = time_under_cursor * newPixelsPerSecond - mouseX
+ const newScrollLeft = timeUnderCursor * newPixelsPerSecond - mouseX;
+ timelineContainer.scrollLeft = newScrollLeft;
+ }
+ return;
+ }
+
+ // Normal mode: diagonal scroll
timelineContainer.scrollLeft += e.deltaY;
// Calculate current time position with 10% headroom for visual comfort