diff options
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/editor/dct.js | 31 | ||||
| -rw-r--r-- | tools/editor/index.html | 36 | ||||
| -rw-r--r-- | tools/editor/script.js | 648 | ||||
| -rw-r--r-- | tools/editor/sdf.js | 39 | ||||
| -rw-r--r-- | tools/editor/style.css | 79 | ||||
| -rw-r--r-- | tools/specplay.cc | 212 | ||||
| -rw-r--r-- | tools/specplay_README.md | 164 | ||||
| -rw-r--r-- | tools/spectool.cc | 121 | ||||
| -rw-r--r-- | tools/spectral_editor/FEATURES.md | 151 | ||||
| -rw-r--r-- | tools/spectral_editor/dct.js | 101 | ||||
| -rw-r--r-- | tools/spectral_editor/script.js | 89 | ||||
| -rw-r--r-- | tools/timeline_editor/index.html | 41 |
12 files changed, 786 insertions, 926 deletions
diff --git a/tools/editor/dct.js b/tools/editor/dct.js deleted file mode 100644 index e48ce2b..0000000 --- a/tools/editor/dct.js +++ /dev/null @@ -1,31 +0,0 @@ -const dctSize = 512; // Default DCT size, read from header - -// --- Utility Functions for Audio Processing --- -// JavaScript equivalent of C++ idct_512 -function javascript_idct_512(input) { - const output = new Float32Array(dctSize); - const PI = Math.PI; - const N = dctSize; - - for (let n = 0; n < N; ++n) { - let sum = input[0] / 2.0; - for (let k = 1; k < N; ++k) { - sum += input[k] * Math.cos((PI / N) * k * (n + 0.5)); - } - output[n] = sum * (2.0 / N); - } - return output; -} - -// Hanning window for smooth audio transitions (JavaScript equivalent) -function hanningWindow(size) { - const window = new Float32Array(size); - const PI = Math.PI; - for (let i = 0; i < size; i++) { - window[i] = 0.5 * (1 - Math.cos((2 * PI * i) / (size - 1))); - } - return window; -} - -const hanningWindowArray = hanningWindow(dctSize); // Pre-calculate window - diff --git a/tools/editor/index.html b/tools/editor/index.html deleted file mode 100644 index 82a11ce..0000000 --- a/tools/editor/index.html +++ /dev/null @@ -1,36 +0,0 @@ -<!DOCTYPE html> -<html lang="en"> -<head> - <meta charset="UTF-8"> - <meta name="viewport" content="width=device-width, initial-scale=1.0"> - <title>Spectrogram Editor</title> - <link rel="stylesheet" href="style.css"> -</head> -<body> - <h1>Spectrogram Editor</h1> - - <input type="file" id="specFileInput" accept=".spec"> - <label for="specFileInput">Load SPEC File</label> - - <div id="editorContainer"> - <canvas id="spectrogramCanvas"></canvas> - <div id="controls"> - <h2>Tools</h2> - <button id="lineTool">Line</button> - <button id="ellipseTool">Ellipse</button> - <button id="noiseTool">Noise</button> - <button id="undoButton">Undo</button> - <button id="redoButton">Redo</button> - <hr> - <h2>Playback</h2> - <button id="listenOriginalButton">Listen Original</button> - <button id="listenGeneratedButton">Listen Generated</button> - <!-- Add more tool controls later --> - </div> - </div> - - <script src="sdf.js"></script> - <script src="dct.js"></script> - <script src="script.js"></script> -</body> -</html> diff --git a/tools/editor/script.js b/tools/editor/script.js deleted file mode 100644 index abfd4f4..0000000 --- a/tools/editor/script.js +++ /dev/null @@ -1,648 +0,0 @@ -// This is the core JavaScript for the Spectrogram Editor. -// It handles file loading (.spec), visualization, tool interaction, and saving. - -// --- Global Variables --- -let currentSpecData = null; // Stores the currently displayed/edited spectrogram data -let originalSpecData = null; // Stores the pristine, initially loaded spectrogram data - -let undoStack = []; -let redoStack = []; -const MAX_HISTORY_SIZE = 50; - -let activeTool = null; // 'line', 'ellipse', 'noise', etc. -let isDrawing = false; -let startX, startY; // For tracking mouse down position - -let shapes = []; // Array to store all drawn shapes (lines, ellipses, etc.) - -// Web Audio Context -const audioContext = new (window.AudioContext || window.webkitAudioContext)(); - -// Audio Constants (should match C++ side) -const SAMPLE_RATE = 32000; -const MAX_FREQ = SAMPLE_RATE / 2; // Nyquist frequency -const MIN_FREQ = 20; // Lower bound for log scale visualization - -const SDF_FALLOFF_FACTOR = 10.0; // Adjust this value to control the softness of SDF edges. - -// --- Button Element Declarations --- -const specFileInput = document.getElementById('specFileInput'); -const lineToolButton = document.getElementById('lineTool'); -const ellipseToolButton = document.getElementById('ellipseTool'); -const noiseToolButton = document.getElementById('noiseTool'); -const undoButton = document.getElementById('undoButton'); -const redoButton = document.getElementById('redoButton'); -const listenOriginalButton = document.getElementById('listenOriginalButton'); -const listenGeneratedButton = document.getElementById('listenGeneratedButton'); - -// --- Event Listeners --- -specFileInput.addEventListener('change', handleFileSelect); -lineToolButton.addEventListener('click', () => { activeTool = 'line'; console.log('Line tool selected'); }); -ellipseToolButton.addEventListener('click', () => { activeTool = 'ellipse'; console.log('Ellipse tool selected'); }); -noiseToolButton.addEventListener('click', () => { activeTool = 'noise'; console.log('Noise tool selected'); }); -undoButton.addEventListener('click', handleUndo); -redoButton.addEventListener('click', handleRedo); -listenOriginalButton.addEventListener('click', () => { - if (originalSpecData) { - playSpectrogramData(originalSpecData); - } else { - alert("No original SPEC data loaded."); - } -}); -listenGeneratedButton.addEventListener('click', () => { - if (currentSpecData) { - redrawCanvas(); // Ensure currentSpecData reflects all shapes before playing - playSpectrogramData(currentSpecData); - } else { - alert("No generated SPEC data to play."); - } -}); - - -// --- Utility to map canvas coords to spectrogram bins/frames (LOG SCALE) --- -// Maps a linear frequency bin index to its corresponding frequency in Hz -function binIndexToFreq(binIndex) { - return (binIndex / (dctSize / 2)) * MAX_FREQ; -} - -// Maps a frequency in Hz to its corresponding linear bin index -function freqToBinIndex(freq) { - return Math.floor((freq / MAX_FREQ) * (dctSize / 2)); -} - -// Maps a frequency (Hz) to its corresponding log-scaled bin index -function freqToBinIndexLog(freq) { - if (freq < MIN_FREQ) freq = MIN_FREQ; // Clamp minimum frequency - const logMin = Math.log(MIN_FREQ); - const logMax = Math.log(MAX_FREQ); - const logFreq = Math.log(freq); - const normalizedLog = (logFreq - logMin) / (logMax - logMin); - return Math.floor(normalizedLog * dctSize); -} - -// Maps a log-scaled bin index to its corresponding frequency in Hz -function binIndexToFreqLog(binIndex) { - const normalizedLog = binIndex / dctSize; - const logMin = Math.log(MIN_FREQ); - const logMax = Math.log(MAX_FREQ); - const logFreq = normalizedLog * (logMax - logMin) + logMin; - return Math.exp(logFreq); -} - -// Converts a frequency (Hz) to a Y-coordinate on the canvas (log scale) -function freqToCanvasYLog(freq, canvasHeight) { - if (freq < MIN_FREQ) freq = MIN_FREQ; // Clamp minimum frequency - const logMin = Math.log(MIN_FREQ); - const logMax = Math.log(MAX_FREQ); - const logFreq = Math.log(freq); - const normalizedLog = (logFreq - logMin) / (logMax - logMin); - return canvasHeight * (1 - normalizedLog); // Y-axis is inverted -} - -// Converts a Y-coordinate on the canvas to a frequency (Hz) (log scale) -function canvasYToFreqLog(canvasY, canvasHeight) { - const normalizedLog = 1 - (canvasY / canvasHeight); - const logMin = Math.log(MIN_FREQ); - const logMax = Math.log(MAX_FREQ); - const logFreq = normalizedLog * (logMax - logMin) + logMin; - return Math.exp(logFreq); -} - -// Converts canvas Y-coordinate to log-scaled bin index -function canvasYToBinIndexLog(canvasY, specData) { - const freq = canvasYToFreqLog(canvasY, canvas.height); - return freqToBinIndex(freq); // Use linear bin index from calculated log freq -} - -// Converts log-scaled bin index to canvas Y-coordinate -function binIndexToCanvasYLog(binIndex, specData) { - const freq = binIndexToFreq(binIndex); - return freqToCanvasYLog(freq, canvas.height); -} - -// Helper to get frequency delta from canvas delta (for ellipse radius in freq) -function canvasDeltaYToFreqDeltaLog(canvasDeltaY, canvasHeight) { - // This is an approximation as delta in log scale is not linear - // For small deltas around a center, it can be approximated - const centerCanvasY = canvasHeight / 2; - const freqAtCenter = canvasYToFreqLog(centerCanvasY, canvasHeight); - const freqAtCenterPlusDelta = canvasYToFreqLog(centerCanvasY - canvasDeltaY, canvasHeight); - return Math.abs(freqAtCenterPlusDelta - freqAtCenter); -} - -// Initial setup for canvas size (can be updated on window resize) -window.addEventListener('resize', () => { - if (originalSpecData) { - canvas.width = window.innerWidth * 0.7; - canvas.height = 400; // Fixed height - redrawCanvas(); - } -}); - -// Initial call to set button states -updateUndoRedoButtons(); - -// --- File Handling Functions --- -async function handleFileSelect(event) { - const file = event.target.files[0]; - if (!file) { - return; - } - - try { - const buffer = await file.arrayBuffer(); - const dataView = new DataView(buffer); - - // Parse SPEC header - const header = { - magic: String.fromCharCode(...new Uint8Array(buffer.slice(0, 4))), - version: dataView.getInt32(4, true), - dct_size: dataView.getInt32(8, true), - num_frames: dataView.getInt32(12, true) - }; - - if (header.magic !== "SPEC" || header.version !== 1) { - console.error("Invalid SPEC file format."); - alert("Invalid SPEC file format. Please load a valid .spec file."); - return; - } - - if (dctSize != header.dct_size) { - alert("Invalid dctSize in SPEC file"); - return; - } - const dataStart = 16; - const numBytes = header.num_frames * header.dct_size * Float32Array.BYTES_PER_ELEMENT; - const spectralDataFloat = new Float32Array(buffer, dataStart, header.num_frames * header.dct_size); - - originalSpecData = { header: header, data: new Float32Array(spectralDataFloat) }; // Store pristine copy - currentSpecData = { header: header, data: new Float32Array(spectralDataFloat) }; // Editable copy - - shapes = []; // Clear shapes on new file load - undoStack = []; // Clear undo history - redoStack = []; // Clear redo history - - console.log("Loaded SPEC file:", header); - redrawCanvas(); // Redraw with new data - - } catch (error) { - console.error("Error loading SPEC file:", error); - alert("Failed to load SPEC file. Check console for details."); - } -} - -// --- Spectrogram Visualization --- -const canvas = document.getElementById('spectrogramCanvas'); -const ctx = canvas.getContext('2d'); - -// Add canvas event listeners -canvas.addEventListener('mousedown', handleMouseDown); -canvas.addEventListener('mousemove', handleMouseMove); -canvas.addEventListener('mouseup', handleMouseUp); -canvas.addEventListener('mouseout', handleMouseUp); // Treat mouse out as mouse up - -// Function to get a color based on intensity (0 to 1) -function getColorForIntensity(intensity) { - // Example: Blue to white/yellow gradient - const log_intensity = Math.log(1. + intensity) / Math.log(2.); - const h = (1 - log_intensity) * 240; // Hue from blue (240) to red (0), inverse for intensity - const s = 60.; // Saturation - const l = log_intensity * 60 + 30; // Lightness from 30 to 90 - return `hsl(${h}, ${s}%, ${l}%)`; -} - -function drawSpectrogram(specData) { - const width = canvas.width; - const height = canvas.height; - - ctx.clearRect(0, 0, width, height); - ctx.fillStyle = '#ffffff'; - ctx.fillRect(0, 0, width, height); - - if (!specData || !specData.data || specData.header.num_frames === 0 || specData.data.length === 0) { - console.warn("No spectrogram data or invalid header/data to draw."); - return; - } - - const numFrames = specData.header.num_frames; - const frameWidth = width / numFrames; // Width of each time frame - - // Draw each frame's spectral data with log frequency scale - for (let frameIndex = 0; frameIndex < numFrames; frameIndex++) { - const frameDataStart = frameIndex * dctSize; - const xPos = frameIndex * frameWidth; - - // To draw with log scale, we iterate over canvas y-coordinates - // and map them back to frequency bins - for (let y = 0; y < height; y++) { - const binIndex = canvasYToBinIndexLog(y, specData); - if (binIndex < 0 || binIndex >= dctSize) continue; // Out of bounds - - const value = specData.data[frameDataStart + binIndex]; - const intensity = Math.min(1, Math.abs(value) / 1.0); // Assuming values are normalized to [-1, 1] - - ctx.fillStyle = getColorForIntensity(intensity); - ctx.fillRect(xPos, height - y - 1, frameWidth, 1); // Draw a 1-pixel height line for each y - } - } - - // Draw active shapes on top (previews for current drawing tool) - shapes.forEach(shape => { - drawShape(shape); - }); -} - -function drawShape(shape) { - // This draws the final, persistent shape. Preview is drawn in handleMouseMove. - ctx.strokeStyle = shape.color || 'red'; - ctx.lineWidth = shape.width || 2; - - switch (shape.type) { - case 'line': - ctx.beginPath(); - ctx.moveTo(shape.x1, shape.y1); - ctx.lineTo(shape.x2, shape.y2); - ctx.stroke(); - break; - case 'ellipse': - ctx.beginPath(); - ctx.ellipse(shape.cx, shape.cy, shape.rx, shape.ry, 0, 0, 2 * Math.PI); - ctx.stroke(); - break; - case 'noise_rect': // Noise is visualized as a rectangle - ctx.fillStyle = 'rgba(0, 0, 255, 0.2)'; - ctx.fillRect(shape.x, shape.y, shape.width, shape.height); - ctx.strokeStyle = 'blue'; - ctx.strokeRect(shape.x, shape.y, shape.width, shape.height); - break; - } -} - -// --- Mouse Event Handlers --- -function getMousePos(event) { - const rect = canvas.getBoundingClientRect(); - return { - x: event.clientX - rect.left, - y: event.clientY - rect.top - }; -} - -function handleMouseDown(event) { - if (!activeTool || !currentSpecData) return; - isDrawing = true; - const pos = getMousePos(event); - startX = pos.x; - startY = pos.y; -} - -function handleMouseMove(event) { - if (!isDrawing || !activeTool) return; - const pos = getMousePos(event); - - redrawCanvas(); // Clear and redraw persistent state - - ctx.strokeStyle = 'rgba(0, 0, 0, 0.5)'; // Preview color - ctx.lineWidth = 1; - ctx.setLineDash([5, 5]); // Dashed line for preview - - switch (activeTool) { - case 'line': - ctx.beginPath(); - ctx.moveTo(startX, startY); - ctx.lineTo(pos.x, pos.y); - ctx.stroke(); - break; - case 'ellipse': - // Draw preview ellipse based on start and current pos (bounding box) - const rx = Math.abs(pos.x - startX) / 2; - const ry = Math.abs(pos.y - startY) / 2; - const cx = startX + (pos.x - startX) / 2; - const cy = startY + (pos.y - startY) / 2; - if (rx > 0 && ry > 0) { - ctx.beginPath(); - ctx.ellipse(cx, cy, rx, ry, 0, 0, 2 * Math.PI); - ctx.stroke(); - } - break; - case 'noise': - // Draw preview rectangle for noise area - const rectX = Math.min(startX, pos.x); - const rectY = Math.min(startY, pos.y); - const rectW = Math.abs(pos.x - startX); - const rectH = Math.abs(pos.y - startY); - ctx.strokeRect(rectX, rectY, rectW, rectH); - break; - } - ctx.setLineDash([]); // Reset line dash - - // debug the mouse position by draw a white square - ctx.fillStyle = '#fff'; - ctx.fillRect(pos.x - 10, pos.y - 10, 20, 20); -} - -function handleMouseUp(event) { - if (!isDrawing || !activeTool || !currentSpecData) return; - isDrawing = false; - const endPos = getMousePos(event); - - let newShape = null; - - switch (activeTool) { - case 'line': { - const startCoords = canvasToSpectrogramCoords(startX, startY, currentSpecData); - const endCoords = canvasToSpectrogramCoords(endPos.x, endPos.y, currentSpecData); - - newShape = { - type: 'line', - // Canvas coordinates for drawing visual representation (unchanged) - x1: startX, y1: startY, - x2: endPos.x, y2: endPos.y, - // World coordinates for SDF calculations (frame and log-scaled frequency) - frame1_world: startCoords.frame, - freq1_world: binIndexToFreqLog(startCoords.bin), - frame2_world: endCoords.frame, - freq2_world: binIndexToFreqLog(endCoords.bin), - amplitude: 0.5, // Default amplitude - width: 2, // Visual width in canvas pixels, not directly used by SDF, but kept for drawing - color: 'red', - falloff: SDF_FALLOFF_FACTOR, // SDF falloff factor - }; - break; - } - case 'ellipse': { - const rx = Math.abs(endPos.x - startX) / 2; - const ry = Math.abs(endPos.y - startY) / 2; - const cx = startX + (pos.x - startX) / 2; - const cy = startY + (pos.y - startY) / 2; - - const centerCoords = canvasToSpectrogramCoords(cx, cy, currentSpecData); - const halfWidthFrames = Math.floor((rx / canvas.width) * currentSpecData.header.num_frames); - - const startFreq = canvasYToFreqLog(startY, canvas.height); - const endFreq = canvasYToFreqLog(endPos.y, canvas.height); - const centerFreq = (startFreq + endFreq) / 2; - const halfHeightFreq = Math.abs(startFreq - endFreq) / 2; - - - newShape = { - type: 'ellipse', - // Canvas coordinates for drawing visual representation (unchanged) - cx: cx, cy: cy, - rx: rx, ry: ry, - // World coordinates for SDF calculations - center_frame_world: centerCoords.frame, - center_freq_world: centerFreq, - radius_frames_world: halfWidthFrames, - radius_freq_world: halfHeightFreq, - amplitude: 0.5, - color: 'green', - falloff: SDF_FALLOFF_FACTOR, - }; - break; - } - case 'noise': { - const rectX = Math.min(startX, endPos.x); - const rectY = Math.min(startY, endPos.y); - const rectW = Math.abs(endPos.x - startX); - const rectH = Math.abs(endPos.y - startY); - - const startCoords = canvasToSpectrogramCoords(rectX, rectY, currentSpecData); - const endCoords = canvasToSpectrogramCoords(rectX + rectW, rectY + rectH, currentSpecData); - - const centerFrame = Math.floor((startCoords.frame + endCoords.frame) / 2); - const centerFreq = (binIndexToFreqLog(startCoords.bin) + binIndexToFreqLog(endCoords.bin)) / 2; - const halfExtentFrames = Math.floor(Math.abs(endCoords.frame - startCoords.frame) / 2); - const halfExtentFreq = Math.abs(binIndexToFreqLog(endCoords.bin) - binIndexToFreqLog(startCoords.bin)) / 2; - - newShape = { - type: 'noise_rect', - // Canvas coordinates for drawing visual representation (unchanged) - x: rectX, y: rectY, - width: rectW, height: rectH, - // World coordinates for SDF calculations - center_frame_world: centerFrame, - center_freq_world: centerFreq, - half_extent_frames_world: halfExtentFrames, - half_extent_freq_world: halfExtentFreq, - amplitude: 0.3, // Default noise amplitude - density: 0.5, // Default noise density - color: 'blue', - falloff: 0.0, // No falloff for pure noise inside rect - }; - break; - } - } - - if (newShape) { - // Capture the state *before* applying the new shape for undo - const previousDataSnapshot = new Float32Array(currentSpecData.data); // Copy of actual data - const previousShapesSnapshot = shapes.map(s => ({ ...s })); // Deep copy shapes array - - applyShapeToSpectrogram(newShape, currentSpecData); // Modify currentSpecData directly - shapes.push(newShape); - addAction({ - type: 'add_shape', - shape: newShape, - undo: () => { - // To undo, restore previous shapes and previous data - shapes = previousShapesSnapshot; - currentSpecData.data = previousDataSnapshot; - }, - redo: () => { - // To redo, add the shape back and apply it to current data - shapes.push(newShape); - applyShapeToSpectrogram(newShape, currentSpecData); - } - }); - } - redrawCanvas(); // Final redraw after action - updateUndoRedoButtons(); -} - -// --- Spectrogram Data Manipulation --- -function applyShapeToSpectrogram(shape, targetSpecData) { - if (!targetSpecData || !targetSpecData.data || targetSpecData.header.num_frames === 0) return; - - const numFrames = targetSpecData.header.num_frames; - - // Determine a bounding box for optimization (iterate only relevant cells) - let minFrame = 0, maxFrame = numFrames - 1; - let minBin = 0, maxBin = dctSize - 1; - - // Calculate tighter bounding boxes for each shape type - switch (shape.type) { - case 'line': - minFrame = Math.min(shape.frame1_world, shape.frame2_world) - Math.ceil(shape.width / 2); - maxFrame = Math.max(shape.frame1_world, shape.frame2_world) + Math.ceil(shape.width / 2); - // For frequency, approximate by visual width or a fixed range if needed - minBin = freqToBinIndex(Math.min(shape.freq1_world, shape.freq2_world)) - Math.ceil(shape.width / 2); - maxBin = freqToBinIndex(Math.max(shape.freq1_world, shape.freq2_world)) + Math.ceil(shape.width / 2); - break; - case 'ellipse': - minFrame = shape.center_frame_world - shape.radius_frames_world - 1; - maxFrame = shape.center_frame_world + shape.radius_frames_world + 1; - minBin = freqToBinIndex(shape.center_freq_world - shape.radius_freq_world) - 1; // Approx bin range from world freq - maxBin = freqToBinIndex(shape.center_freq_world + shape.radius_freq_world) + 1; // Approx bin range from world freq - break; - case 'noise_rect': - minFrame = shape.center_frame_world - shape.half_extent_frames_world - 1; - maxFrame = shape.center_frame_world + shape.half_extent_frames_world + 1; - minBin = freqToBinIndex(shape.center_freq_world - shape.half_extent_freq_world) - 1; // Approx bin range from world freq - maxBin = freqToBinIndex(shape.center_freq_world + shape.half_extent_freq_world) + 1; // Approx bin range from world freq - break; - } - - minFrame = Math.max(0, minFrame); - maxFrame = Math.min(targetSpecData.header.num_frames - 1, maxFrame); // Use targetSpecData.header.num_frames - minBin = Math.max(0, minBin); - maxBin = Math.min(dctSize - 1, maxBin); - - for (let f = minFrame; f <= maxFrame; f++) { - for (let b = minBin; b <= maxBin; b++) { - const p_world = vec2(f, binIndexToFreqLog(b)); - let distance = Infinity; - - switch (shape.type) { - case 'line': - const a_line = vec2(shape.frame1_world, shape.freq1_world); - const b_line = vec2(shape.frame2_world, shape.freq2_world); - distance = sdSegment(p_world, a_line, b_line); - break; - case 'ellipse': - const center_ellipse = vec2(shape.center_frame_world, shape.center_freq_world); - const r_ellipse = vec2(shape.radius_frames_world, shape.radius_freq_world); - distance = sdEllipse(sub(p_world, center_ellipse), r_ellipse); - break; - case 'noise_rect': - const center_box = vec2(shape.center_frame_world, shape.center_freq_world); - const r_box = vec2(shape.half_extent_frames_world, shape.half_extent_freq_world); - distance = sdBox(sub(p_world, center_box), r_box); - if (distance <= 0 && Math.random() < shape.density) { // Only add noise inside the box with density - targetSpecData.data[f * dctSize + b] += (Math.random() * 2 - 1) * shape.amplitude; - } - break; - } - - if (shape.type !== 'noise_rect') { // Noise is handled differently for amplitude - const attenuation = Math.exp(-distance * distance * shape.falloff); - targetSpecData.data[f * dctSize + b] += shape.amplitude * attenuation; - } - - // Clamp final value - targetSpecData.data[f * dctSize + b] = Math.max(-1, Math.min(1, targetSpecData.data[f * dctSize + b])); - } - } -} - -// --- Undo/Redo Logic --- -function addAction(action) { - undoStack.push(action); - if (undoStack.length > MAX_HISTORY_SIZE) { - undoStack.shift(); - } - redoStack = []; - updateUndoRedoButtons(); -} - -function handleUndo() { - if (undoStack.length === 0) { - console.log('Undo stack is empty.'); - return; - } - const actionToUndo = undoStack.pop(); - actionToUndo.undo(); - redoStack.push(actionToUndo); - redrawCanvas(); - updateUndoRedoButtons(); -} - -function handleRedo() { - if (redoStack.length === 0) { - console.log('Redo stack is empty.'); - return; - } - - const actionToRedo = redoStack.pop(); - actionToRedo.redo(); - undoStack.push(actionToRedo); - redrawCanvas(); - updateUndoRedoButtons(); -} - -function redrawCanvas() { - console.log('Redrawing canvas...'); - if (!originalSpecData) { - ctx.clearRect(0, 0, canvas.width, canvas.height); - ctx.fillStyle = '#ffffff'; - ctx.fillRect(0, 0, canvas.width, canvas.height); - return; - } - - // Start with a fresh copy of the original data - currentSpecData.data = new Float32Array(originalSpecData.data); - - // Replay all shapes from the `shapes` array to `currentSpecData` - shapes.forEach(shape => { - applyShapeToSpectrogram(shape, currentSpecData); - }); - - drawSpectrogram(currentSpecData); -} - -function updateUndoRedoButtons() { - undoButton.disabled = undoStack.length === 0; - redoButton.disabled = redoStack.length === 0; -} - -// Initial setup for canvas size (can be updated on window resize) -window.addEventListener('resize', () => { - if (originalSpecData) { - canvas.width = window.innerWidth * 0.7; - canvas.height = 400; // Fixed height - redrawCanvas(); - } -}); - -// Initial call to set button states -updateUndoRedoButtons(); - -// --- Audio Playback Functions --- -let currentAudioSource = null; // To stop currently playing audio - -async function playSpectrogramData(specData) { - if (!specData || !specData.data || specData.header.num_frames === 0) { - alert("No spectrogram data to play."); - return; - } - - if (currentAudioSource) { - currentAudioSource.stop(); - currentAudioSource.disconnect(); - currentAudioSource = null; - } - - const sampleRate = SAMPLE_RATE; // Fixed sample rate - const numFrames = specData.header.num_frames; - const totalAudioSamples = numFrames * dctSize; // Total samples in time domain - - const audioBuffer = audioContext.createBuffer(1, totalAudioSamples, sampleRate); - const audioData = audioBuffer.getChannelData(0); // Mono channel - - // Convert spectrogram frames (frequency domain) to audio samples (time domain) - for (let frameIndex = 0; frameIndex < numFrames; frameIndex++) { - const spectralFrame = specData.data.slice(frameIndex * dctSize, (frameIndex + 1) * dctSize); - const timeDomainFrame = javascript_idct_512(spectralFrame); - - // Apply Hanning window for smooth transitions - for (let i = 0; i < dctSize; i++) { - audioData[frameIndex * dctSize + i] = timeDomainFrame[i] * hanningWindowArray[i]; - } - } - - currentAudioSource = audioContext.createBufferSource(); - currentAudioSource.buffer = audioBuffer; - currentAudioSource.connect(audioContext.destination); - currentAudioSource.start(); - - console.log(`Playing audio (Sample Rate: ${sampleRate}, Duration: ${audioBuffer.duration.toFixed(2)}s)`); -} diff --git a/tools/editor/sdf.js b/tools/editor/sdf.js deleted file mode 100644 index c68d79a..0000000 --- a/tools/editor/sdf.js +++ /dev/null @@ -1,39 +0,0 @@ -// --- Signed Distance Functions (SDFs) --- -// Generic 2D vector operations -function vec2(x, y) { return { x: x, y: y }; } -function length(v) { return Math.sqrt(v.x * v.x + v.y * v.y); } -function dot(v1, v2) { return v1.x * v2.x + v1.y * v2.y; } -function sub(v1, v2) { return vec2(v1.x - v2.x, v1.y - v2.y); } -function mul(v, s) { return vec2(v.x * s, v.y * s); } -function div(v, s) { return vec2(v.x / s, v.y / s); } -function normalize(v) { return div(v, length(v)); } -function clamp(x, minVal, maxVal) { return Math.max(minVal, Math.min(x, maxVal)); } -function abs(v) { return vec2(Math.abs(v.x), Math.abs(v.y)); } -function max(v1, v2) { return vec2(Math.max(v1.x, v2.x), Math.max(v1.y, v2.y)); } -function sign(x) { return (x > 0) ? 1 : ((x < 0) ? -1 : 0); } - -// sdSegment(p, a, b) - signed distance to a line segment -// p: point, a: segment start, b: segment end -function sdSegment(p, a, b) { - const pa = sub(p, a); - const ba = sub(b, a); - const h = clamp(dot(pa, ba) / dot(ba, ba), 0.0, 1.0); - return length(sub(pa, mul(ba, h))); -} - -// sdEllipse(p, r) - signed distance to an ellipse (p relative to center, r is half-extents) -// p: point relative to ellipse center, r: half-extents (rx, ry) -function sdEllipse(p, r) { - const k0 = vec2(1, length(div(p, r))); - const k1 = vec2(length(div(p, r)), 1); - const f = ((dot(div(mul(p, p), k0), vec2(1, 1)) < dot(div(mul(p, p), k1), vec2(1, 1))) ? k0 : k1); - return length(sub(p, mul(r, normalize(mul(f, p))))) * sign(length(p) - r.x); // Simplified, original has length(p)-r.x which is only for circular -} - -// sdBox(p, r) - signed distance to a rectangle (p relative to center, r is half-extents) -// p: point relative to box center, r: half-extents (hx, hy) -function sdBox(p, r) { - const q = sub(abs(p), r); - return length(max(q, vec2(0, 0))) + Math.min(0.0, Math.max(q.x, q.y)); -} - diff --git a/tools/editor/style.css b/tools/editor/style.css deleted file mode 100644 index e0014cf..0000000 --- a/tools/editor/style.css +++ /dev/null @@ -1,79 +0,0 @@ -body { - font-family: sans-serif; - margin: 20px; - background-color: #f4f4f4; -} - -h1, h2 { - color: #333; -} - -#editorContainer { - display: flex; - margin-top: 20px; -} - -#spectrogramCanvas { - border: 1px solid #ccc; - background-color: #fff; - margin-right: 20px; -} - -#controls { - border: 1px solid #ccc; - padding: 15px; - background-color: #eee; - min-width: 200px; -} - -#controls button { - display: block; - width: 100%; - margin-bottom: 10px; - padding: 10px; - cursor: pointer; -} - -#undoButton { - background-color: #d9534f; - color: white; - border: none; - border-radius: 4px; -} - -#undoButton:hover { - background-color: #c9302c; -} - -#redoButton { - background-color: #5cb85c; - color: white; - border: none; - border-radius: 4px; -} - -#redoButton:hover { - background-color: #4cae4c; -} - -/* New styles for playback buttons */ -#listenOriginalButton, -#listenGeneratedButton { - background-color: #5bc0de; - color: white; - border: none; - border-radius: 4px; - margin-top: 5px; -} - -#listenOriginalButton:hover, -#listenGeneratedButton:hover { - background-color: #31b0d5; -} - -hr { - border: 0; - height: 1px; - background-color: #ccc; - margin: 20px 0; -} diff --git a/tools/specplay.cc b/tools/specplay.cc new file mode 100644 index 0000000..9fa9355 --- /dev/null +++ b/tools/specplay.cc @@ -0,0 +1,212 @@ +// Standalone tool to play .spec or .wav files for debugging +// Usage: ./specplay <file.spec|file.wav> + +#include "audio/dct.h" +#include "audio/window.h" +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#define MINIAUDIO_IMPLEMENTATION +#include "miniaudio.h" + +struct PlaybackState { + float* pcm_data; + size_t num_samples; + size_t playback_pos; +}; + +void audio_callback(ma_device* device, void* output, const void* input, + ma_uint32 frame_count) { + PlaybackState* state = (PlaybackState*)device->pUserData; + float* out = (float*)output; + + for (ma_uint32 i = 0; i < frame_count; i++) { + if (state->playback_pos < state->num_samples) { + float sample = state->pcm_data[state->playback_pos++]; + // Clamp to [-1, 1] and warn if clipping + if (sample > 1.0f || sample < -1.0f) { + fprintf(stderr, "[CLIP at sample %zu: %.3f]\n", state->playback_pos - 1, + sample); + sample = (sample > 1.0f) ? 1.0f : -1.0f; + } + out[i * 2] = sample; // Left + out[i * 2 + 1] = sample; // Right (mono) + } else { + out[i * 2] = 0.0f; + out[i * 2 + 1] = 0.0f; + } + } +} + +float* load_spec(const char* path, size_t* out_num_samples) { + FILE* f = fopen(path, "rb"); + if (!f) { + fprintf(stderr, "Failed to open %s\n", path); + return nullptr; + } + + // Read SpecHeader + struct SpecHeader { + char magic[4]; + int32_t version; + int32_t dct_size; + int32_t num_frames; + }; + + SpecHeader header; + if (fread(&header, sizeof(SpecHeader), 1, f) != 1) { + fprintf(stderr, "Failed to read SpecHeader\n"); + fclose(f); + return nullptr; + } + + // Validate header + if (memcmp(header.magic, "SPEC", 4) != 0) { + fprintf(stderr, "Invalid magic bytes (expected 'SPEC')\n"); + fclose(f); + return nullptr; + } + + printf("Loading .spec: version=%d, dct_size=%d, frames=%d\n", header.version, + header.dct_size, header.num_frames); + + uint32_t num_frames = header.num_frames; + + // Read spectral data + size_t spec_size = num_frames * DCT_SIZE; + float* spec_data = (float*)malloc(spec_size * sizeof(float)); + if (fread(spec_data, sizeof(float), spec_size, f) != spec_size) { + fprintf(stderr, "Failed to read spectral data\n"); + free(spec_data); + fclose(f); + return nullptr; + } + fclose(f); + + // Convert to PCM via IDCT + *out_num_samples = spec_size; + float* pcm_data = (float*)malloc(*out_num_samples * sizeof(float)); + + for (uint32_t frame = 0; frame < num_frames; frame++) { + const float* spectral_frame = spec_data + (frame * DCT_SIZE); + float* time_frame = pcm_data + (frame * DCT_SIZE); + idct_512(spectral_frame, time_frame); + } + + free(spec_data); + + // Analyze PCM statistics + float peak = 0.0f, rms_sum = 0.0f; + for (size_t i = 0; i < *out_num_samples; i++) { + float abs_val = fabsf(pcm_data[i]); + if (abs_val > peak) + peak = abs_val; + rms_sum += pcm_data[i] * pcm_data[i]; + } + float rms = sqrtf(rms_sum / *out_num_samples); + + printf("PCM stats: Peak=%.3f, RMS=%.3f\n", peak, rms); + if (peak > 1.0f) { + printf("[WARNING] Peak exceeds 1.0! Will clip during playback.\n"); + } + + return pcm_data; +} + +float* load_wav(const char* path, size_t* out_num_samples) { + ma_decoder decoder; + ma_decoder_config config = ma_decoder_config_init(ma_format_f32, 1, 32000); + + if (ma_decoder_init_file(path, &config, &decoder) != MA_SUCCESS) { + fprintf(stderr, "Failed to open WAV file: %s\n", path); + return nullptr; + } + + ma_uint64 frame_count; + ma_decoder_get_length_in_pcm_frames(&decoder, &frame_count); + *out_num_samples = (size_t)frame_count; + + float* pcm_data = (float*)malloc(*out_num_samples * sizeof(float)); + ma_decoder_read_pcm_frames(&decoder, pcm_data, frame_count, nullptr); + ma_decoder_uninit(&decoder); + + printf("Loaded .wav: %zu samples\n", *out_num_samples); + + // Analyze PCM statistics + float peak = 0.0f, rms_sum = 0.0f; + for (size_t i = 0; i < *out_num_samples; i++) { + float abs_val = fabsf(pcm_data[i]); + if (abs_val > peak) + peak = abs_val; + rms_sum += pcm_data[i] * pcm_data[i]; + } + float rms = sqrtf(rms_sum / *out_num_samples); + + printf("PCM stats: Peak=%.3f, RMS=%.3f\n", peak, rms); + + return pcm_data; +} + +int main(int argc, char** argv) { + if (argc != 2) { + fprintf(stderr, "Usage: %s <file.spec|file.wav>\n", argv[0]); + return 1; + } + + const char* path = argv[1]; + const char* ext = strrchr(path, '.'); + + PlaybackState state = {}; + + if (ext && strcmp(ext, ".spec") == 0) { + state.pcm_data = load_spec(path, &state.num_samples); + } else if (ext && (strcmp(ext, ".wav") == 0 || strcmp(ext, ".aif") == 0)) { + state.pcm_data = load_wav(path, &state.num_samples); + } else { + fprintf(stderr, "Unknown file type: %s\n", path); + return 1; + } + + if (!state.pcm_data) { + fprintf(stderr, "Failed to load audio\n"); + return 1; + } + + printf("Playing %.2f seconds... Press Ctrl+C to stop.\n", + (float)state.num_samples / 32000.0f); + + // Initialize miniaudio + ma_device_config device_config = + ma_device_config_init(ma_device_type_playback); + device_config.playback.format = ma_format_f32; + device_config.playback.channels = 2; + device_config.sampleRate = 32000; + device_config.dataCallback = audio_callback; + device_config.pUserData = &state; + + ma_device device; + if (ma_device_init(NULL, &device_config, &device) != MA_SUCCESS) { + fprintf(stderr, "Failed to initialize audio device\n"); + free(state.pcm_data); + return 1; + } + + if (ma_device_start(&device) != MA_SUCCESS) { + fprintf(stderr, "Failed to start audio device\n"); + ma_device_uninit(&device); + free(state.pcm_data); + return 1; + } + + // Wait for playback to finish + while (state.playback_pos < state.num_samples) { + ma_sleep(100); + } + + ma_device_uninit(&device); + free(state.pcm_data); + + printf("Playback complete.\n"); + return 0; +} diff --git a/tools/specplay_README.md b/tools/specplay_README.md new file mode 100644 index 0000000..72d9ec1 --- /dev/null +++ b/tools/specplay_README.md @@ -0,0 +1,164 @@ +# specplay - Audio Analysis & Playback Tool + +Standalone diagnostic tool for analyzing and playing .spec spectrogram files and .wav audio files. + +## Usage + +```bash +./build/specplay <file.spec|file.wav> +``` + +## Features + +### Current (v1.0) +- ✅ Plays .spec files via IDCT synthesis (matches demo playback exactly) +- ✅ Plays .wav files for comparison +- ✅ Reports PCM statistics: + - **Peak level**: Maximum absolute sample value (detects clipping if > 1.0) + - **RMS level**: Root-mean-square energy (loudness measure) +- ✅ Real-time clipping detection during playback with sample position +- ✅ Validates .spec file format (magic bytes, version, DCT size) + +### Example Output +``` +Loading .spec: version=1, dct_size=512, frames=68 +PCM stats: Peak=0.403, RMS=0.058 +Playing 1.09 seconds... Press Ctrl+C to stop. +Playback complete. +``` + +## Use Cases + +1. **Debugging Audio Issues** + - Quickly identify which samples have clipping (Peak > 1.0) + - Compare .wav source vs .spec output to detect analysis artifacts + - Verify spectrogram regeneration after DCT changes + +2. **Quality Assurance** + - Batch test all .spec files for clipping before committing + - Measure loudness consistency across samples (RMS levels) + - Validate spectrograms match expected characteristics + +3. **Development Workflow** + - Test individual samples without running full demo + - A/B compare different spectrogram generation parameters + - Verify procedural note generation output + +## Technical Details + +- **Sample Rate**: 32kHz (matches demo audio engine) +- **Format**: Mono (duplicated to stereo for playback) +- **IDCT**: Uses same `idct_512()` function as demo (bit-exact) +- **Clamping**: Clipping detected but samples clamped to [-1, 1] for playback + +## Future Enhancement Ideas + +### Priority 1: Analysis Features +- [ ] **Spectral visualization**: ASCII art frequency plot +- [ ] **Waveform display**: Time-domain amplitude graph +- [ ] **Frequency analysis**: Dominant frequency, spectral centroid +- [ ] **Dynamic range**: Measure headroom, suggest normalization +- [ ] **Duration info**: Show seconds, samples, frames + +### Priority 2: Comparison Tools +- [ ] **Diff mode**: `specplay --compare file.wav file.spec` + - Side-by-side PCM stats + - RMS error, peak difference + - Correlation coefficient +- [ ] **Batch mode**: `specplay --batch assets/final/*.spec` + - Generate CSV report of all stats + - Sort by peak level (find clipping candidates) + - Find outliers (unusually loud/quiet samples) + +### Priority 3: Export Features +- [ ] **WAV export**: `specplay file.spec --export output.wav` + - Convert .spec → .wav for external analysis + - Useful for importing into audio editors +- [ ] **Normalization**: `specplay file.spec --normalize --export normalized.wav` + - Auto-scale to target peak/RMS + - Preserve transients + +### Priority 4: Advanced Analysis +- [ ] **Spectral envelope**: Extract formants, resonances +- [ ] **Harmonic analysis**: Detect fundamental frequency, harmonics +- [ ] **Onset detection**: Find transient attacks (kick/snare hits) +- [ ] **Spectral flux**: Measure frequency content change over time + +### Priority 5: Interactive Mode +- [ ] **Seek controls**: Play from specific time offset +- [ ] **Loop mode**: Repeat playback indefinitely +- [ ] **Volume control**: Adjust playback gain +- [ ] **Real-time waveform**: Live visualization during playback + +## Integration with Build System + +Built automatically when `DEMO_BUILD_TOOLS=ON`: +```bash +cmake -S . -B build -DDEMO_BUILD_TOOLS=ON +cmake --build build --target specplay +``` + +Or with all options: +```bash +cmake -S . -B build -DDEMO_ALL_OPTIONS=ON +cmake --build build +``` + +## Code Architecture + +- **Minimal dependencies**: Only uses audio subsystem + miniaudio +- **Self-contained**: No GPU, no platform layer (pure audio) +- **Size**: ~250 lines (easy to extend) +- **Format support**: + - `.spec` via SpecHeader parser + - `.wav` via miniaudio decoder (also handles .aif, .mp3) + +## Related Tools + +- **spectool**: Analyzes .wav → generates .spec (batch processing) +- **specview**: ASCII visualization of .spec frequency content +- **specplay**: This tool (playback + analysis) + +## Troubleshooting + +**"Failed to read SpecHeader"** +- File is not a valid .spec file +- Try with spectool to regenerate: `./build/spectool analyze input.wav output.spec` + +**"Peak exceeds 1.0! Will clip during playback."** +- Spectrogram has too much energy +- Likely needs regeneration after DCT changes +- Or source .wav is already clipping + +**No audio output** +- Check system audio device +- Ensure 32kHz sample rate is supported +- Try with a different .spec file (may be empty/silent) + +## Examples + +### Find all clipping samples +```bash +for f in assets/final/*.spec; do + ./build/specplay "$f" | grep "WARNING" && echo "$f" +done +``` + +### Compare wav source to spec output +```bash +./build/specplay assets/originals/kick.wav +./build/specplay assets/final/KICK.spec +# Compare Peak/RMS values +``` + +### Quick loudness check +```bash +./build/specplay assets/final/KICK_606.spec | grep "RMS" +# RMS > 0.3: loud, RMS < 0.1: quiet +``` + +--- + +**Last Updated**: February 6, 2026 +**Author**: Claude (AI assistant) +**Status**: Production-ready, actively used for debugging diff --git a/tools/spectool.cc b/tools/spectool.cc index 67e9ff3..4cd98c7 100644 --- a/tools/spectool.cc +++ b/tools/spectool.cc @@ -26,8 +26,13 @@ // float[num_frames * dct_size] data // struct SpecHeader { ... } -> now in audio.h -int analyze_audio(const char* in_path, const char* out_path) { - printf("Analyzing %s -> %s\n", in_path, out_path); +int analyze_audio(const char* in_path, const char* out_path, bool normalize, + float target_rms) { + printf("Analyzing %s -> %s", in_path, out_path); + if (normalize) { + printf(" (normalizing to RMS=%.3f)", target_rms); + } + printf("\n"); // Use higher quality resampling for better audio quality // Source files are typically 44.1kHz or 96kHz, 16/24-bit, mono/stereo @@ -46,19 +51,84 @@ int analyze_audio(const char* in_path, const char* out_path) { return 1; } - std::vector<float> spec_data; + // First pass: Load all PCM data (needed for normalization) + std::vector<float> pcm_data; float pcm_chunk[DCT_SIZE]; - float window[WINDOW_SIZE]; - hamming_window_512(window); - ma_uint64 frames_read; while (ma_decoder_read_pcm_frames(&decoder, pcm_chunk, DCT_SIZE, &frames_read) == MA_SUCCESS && frames_read > 0) { - if (frames_read < DCT_SIZE) { - // Zero-pad the last chunk if it's smaller - memset(pcm_chunk + frames_read, 0, - (DCT_SIZE - frames_read) * sizeof(float)); + pcm_data.insert(pcm_data.end(), pcm_chunk, pcm_chunk + frames_read); + } + ma_decoder_uninit(&decoder); + + if (pcm_data.empty()) { + printf("Error: No audio data read from file.\n"); + return 1; + } + + // Calculate RMS and peak + float rms_sum = 0.0f; + float peak = 0.0f; + for (size_t i = 0; i < pcm_data.size(); ++i) { + const float abs_val = fabsf(pcm_data[i]); + if (abs_val > peak) { + peak = abs_val; + } + rms_sum += pcm_data[i] * pcm_data[i]; + } + const float original_rms = sqrtf(rms_sum / pcm_data.size()); + printf("Original: Peak=%.3f, RMS=%.3f\n", peak, original_rms); + + // Normalize if requested + float scale_factor = 1.0f; + if (normalize && original_rms > 1e-6f) { + // Calculate scale factor to reach target RMS + scale_factor = target_rms / original_rms; + + // Check if this would cause clipping (peak > 1.0 after synthesis) + // Peak amplification varies by sample (windowing + IDCT effects) + // Use conservative limit: input peak ≤ 1.0 to guarantee output peak ≤ 1.0 + const float max_safe_peak = 1.0f; + const float predicted_peak = peak * scale_factor; + + if (predicted_peak > max_safe_peak) { + // Reduce scale factor to prevent clipping + const float peak_scale = max_safe_peak / peak; + printf("Warning: RMS normalization would cause clipping (peak=%.3f)\n", + predicted_peak); + printf(" Reducing scale to prevent clipping.\n"); + scale_factor = peak_scale; + } + + printf("Normalizing: scale factor = %.3f\n", scale_factor); + printf(" RMS: %.3f -> %.3f\n", original_rms, original_rms * scale_factor); + printf(" Peak: %.3f -> %.3f\n", peak, peak * scale_factor); + + for (size_t i = 0; i < pcm_data.size(); ++i) { + pcm_data[i] *= scale_factor; + } + } + + // Second pass: Windowing + DCT + std::vector<float> spec_data; + float window[WINDOW_SIZE]; + hamming_window_512(window); + + // Process PCM data in DCT_SIZE chunks + const size_t num_chunks = (pcm_data.size() + DCT_SIZE - 1) / DCT_SIZE; + for (size_t chunk_idx = 0; chunk_idx < num_chunks; ++chunk_idx) { + const size_t chunk_start = chunk_idx * DCT_SIZE; + const size_t chunk_end = + (chunk_start + DCT_SIZE < pcm_data.size()) ? chunk_start + DCT_SIZE + : pcm_data.size(); + const size_t chunk_size = chunk_end - chunk_start; + + // Copy chunk (with zero-padding if needed) + memcpy(pcm_chunk, pcm_data.data() + chunk_start, + chunk_size * sizeof(float)); + if (chunk_size < DCT_SIZE) { + memset(pcm_chunk + chunk_size, 0, (DCT_SIZE - chunk_size) * sizeof(float)); } // Apply window @@ -74,8 +144,6 @@ int analyze_audio(const char* in_path, const char* out_path) { spec_data.insert(spec_data.end(), dct_chunk, dct_chunk + DCT_SIZE); } - ma_decoder_uninit(&decoder); - // --- Trim Silent Frames --- const float epsilon = 1e-6f; int num_frames = spec_data.size() / DCT_SIZE; @@ -248,7 +316,7 @@ int test_gen(const char* out_path) { } void print_usage() { - printf("Usage: spectool <command> <input> [output]\n"); + printf("Usage: spectool <command> <input> [output] [options]\n"); printf("Commands:\n"); printf( " analyze <input.wav|.mp3> <output.spec> Analyze an audio file and " @@ -258,6 +326,11 @@ void print_usage() { printf( " test_gen <output.spec> Generate a test " "spectrogram.\n"); + printf("\nOptions for 'analyze':\n"); + printf(" --normalize [rms] Normalize audio to target RMS level (default: " + "0.15)\n"); + printf( + " Ensures consistent loudness across all samples.\n"); } int main(int argc, char** argv) { @@ -274,7 +347,27 @@ int main(int argc, char** argv) { print_usage(); return 1; } - return analyze_audio(argv[2], argv[3]); + + // Parse optional flags + bool normalize = false; + float target_rms = 0.15f; // Default target RMS + + for (int i = 4; i < argc; ++i) { + if (strcmp(argv[i], "--normalize") == 0) { + normalize = true; + // Check if next arg is a number (custom target RMS) + if (i + 1 < argc) { + char* endptr; + float custom_rms = strtof(argv[i + 1], &endptr); + if (endptr != argv[i + 1] && custom_rms > 0.0f && custom_rms < 1.0f) { + target_rms = custom_rms; + ++i; // Consume the RMS value + } + } + } + } + + return analyze_audio(argv[2], argv[3], normalize, target_rms); } else if (strcmp(command, "play") == 0) { if (argc < 3) { printf("Error: 'play' command requires an input file.\n"); diff --git a/tools/spectral_editor/FEATURES.md b/tools/spectral_editor/FEATURES.md new file mode 100644 index 0000000..6c36cc2 --- /dev/null +++ b/tools/spectral_editor/FEATURES.md @@ -0,0 +1,151 @@ +# Spectral Editor - Feature Roadmap + +This document tracks planned enhancements for the spectral editor. + +## Priority: High + +### A. Curve Translation (Shift+Click+Drag) +**Description**: Shift+click on a control point + mouse-move should displace the whole curve at a time (translate all control points). + +**Implementation Notes**: +- Detect shift key state during control point click +- Store initial positions of all control points in the curve +- Apply uniform translation delta to all points during drag +- Maintain curve shape while moving + +**Complexity**: Medium +**Estimated Effort**: 2-3 hours + +--- + +### B. Viewport Zoom (Mouse Wheel) +**Description**: Mouse-wheel should allow zooming in/out on the view for fine placement of curves. + +**Implementation Notes**: +- Implement zoom scale factor (e.g., 0.5x to 4.0x) +- Center zoom around mouse cursor position +- Update rendering to use scaled coordinates +- Add visual zoom indicator (e.g., "Zoom: 2.0x") +- Consider pan functionality (drag with middle mouse or space+drag) + +**Complexity**: High (coordinate transformation, pan interaction) +**Estimated Effort**: 6-8 hours + +--- + +## Priority: Medium + +### C. Enhanced Sinusoid Pattern +**Description**: The 'sinusoid' pattern is quite interesting and should have more variations. + +**Proposed Variations**: +- **Asymmetric Decay**: Different decay rates above and below the curve center + - `decay_top` parameter (controls upper harmonics falloff) + - `decay_bottom` parameter (controls lower harmonics falloff) +- **Temporal Modulation**: Per-frame amplitude/frequency modulation along timeline + - `amplitude_envelope` (fade in/out over time) + - `frequency_drift` (vibrato/wobble effect) + - `phase_offset` (shift pattern over time) +- **Harmonic Series**: Option to generate harmonic overtones + - `num_harmonics` parameter + - `harmonic_decay` parameter + +**Implementation Notes**: +- Extend `SinusoidProfile` class with additional parameters +- Add UI controls for new parameters (sliders, dropdowns) +- Render preview showing modulation over time + +**Complexity**: Medium-High +**Estimated Effort**: 8-12 hours + +--- + +### D. Per-Control-Point Modulation +**Description**: Each control point should be assigned individually controllable volume, decay params, etc. for fine modulation along time. + +**Proposed Parameters (per control point)**: +- `volume`: Local amplitude multiplier (0.0 - 2.0) +- `decay`: Local decay rate override +- `width`: Gaussian width override (for profile spreading) +- `phase`: Phase offset for sinusoid patterns +- `color`: Visual indicator for parameter variations + +**Implementation Notes**: +- Extend control point data structure with parameter fields +- Add per-point property panel (show on control point selection) +- Render visual hints (color-coded points, size variations) +- Interpolate parameters between control points for smooth transitions + +**Complexity**: High (UI/UX design, parameter interpolation) +**Estimated Effort**: 10-15 hours + +--- + +### E. Composable Profiles +**Description**: Profiles should be composable along a curve (e.g., apply Gaussian curve to sinusoid pattern). + +**Proposed Syntax**: +```cpp +// Example: Gaussian-modulated sinusoid +CompositeProfile { + base: SinusoidProfile { frequency: 100.0, decay: 0.5 }, + envelope: GaussianProfile { center: 256, width: 50 } +} +``` + +**Implementation Notes**: +- Define profile composition operators: + - `multiply`: Envelope modulation (amplitude × profile) + - `add`: Additive blending (profile1 + profile2) + - `max`: Take maximum value at each bin +- Add UI for profile layering (drag-and-drop profile stack) +- Render composite preview with layer visualization + +**Complexity**: High (requires profile abstraction refactor) +**Estimated Effort**: 12-16 hours + +--- + +## Priority: Low (Polish) + +### F. Improved Parameter Sliders +**Description**: Adjust slider ranges for better usability (Decay, Width, Frequency, etc.). + +**Issues to Address**: +- Decay slider: Non-linear scaling (logarithmic?) for finer control at low values +- Frequency slider: Snap to musical notes (optional A440-based grid) +- Width slider: Preview visualization (show affected frequency range) +- General: Add numeric input fields next to sliders for precise values + +**Implementation Notes**: +- Implement logarithmic slider interpolation for decay/width +- Add slider tick marks at useful intervals +- Display current value and units (Hz, bins, dB, etc.) +- Add reset-to-default buttons + +**Complexity**: Low-Medium +**Estimated Effort**: 3-4 hours + +--- + +## Future Ideas (Backlog) + +- **Undo/Redo System**: Track edit history for curve modifications +- **Preset Library**: Save/load common curve patterns (kick drum, snare, bass, etc.) +- **Curve Smoothing**: Apply smoothing filters to jittery control points +- **Copy/Paste**: Duplicate curves or control point selections +- **Multi-Selection**: Select and edit multiple control points simultaneously +- **Grid Snapping**: Snap control points to frequency/time grid +- **Export Options**: Export to different formats (JSON, binary, C++ code) + +--- + +## Total Estimated Effort +- **High Priority**: 8-11 hours +- **Medium Priority**: 30-43 hours +- **Low Priority**: 3-4 hours +- **Grand Total**: 41-58 hours (roughly 1-1.5 weeks of focused work) + +--- + +*Last Updated: February 6, 2026* diff --git a/tools/spectral_editor/dct.js b/tools/spectral_editor/dct.js index deff8a9..435a7e8 100644 --- a/tools/spectral_editor/dct.js +++ b/tools/spectral_editor/dct.js @@ -1,20 +1,10 @@ const dctSize = 512; // Default DCT size, read from header // --- Utility Functions for Audio Processing --- +// Fast O(N log N) IDCT using FFT // JavaScript equivalent of C++ idct_512 function javascript_idct_512(input) { - const output = new Float32Array(dctSize); - const PI = Math.PI; - const N = dctSize; - - for (let n = 0; n < N; ++n) { - let sum = input[0] / 2.0; - for (let k = 1; k < N; ++k) { - sum += input[k] * Math.cos((PI / N) * k * (n + 0.5)); - } - output[n] = sum * (2.0 / N); - } - return output; + return javascript_idct_512_fft(input); } // Hanning window for smooth audio transitions (JavaScript equivalent) @@ -127,95 +117,90 @@ function fftInverse(real, imag, N) { } } -// DCT-II via FFT using double-and-mirror method (matches C++ dct_fft) -// This is a more robust algorithm that avoids reordering issues +// DCT-II via FFT using reordering method (matches C++ dct_fft) +// Reference: Numerical Recipes Chapter 12.3 function javascript_dct_fft(input, N) { const PI = Math.PI; - // Allocate arrays for 2N-point FFT - const M = 2 * N; - const real = new Float32Array(M); - const imag = new Float32Array(M); + // Allocate arrays for N-point FFT + const real = new Float32Array(N); + const imag = new Float32Array(N); - // Pack input: [x[0], x[1], ..., x[N-1], x[N-1], x[N-2], ..., x[1]] - // This creates even symmetry for real-valued DCT - for (let i = 0; i < N; i++) { - real[i] = input[i]; - } - for (let i = 0; i < N; i++) { - real[N + i] = input[N - 1 - i]; + // Reorder input: even indices first, then odd indices reversed + // [x[0], x[2], x[4], ...] followed by [x[N-1], x[N-3], x[N-5], ...] + for (let i = 0; i < N / 2; i++) { + real[i] = input[2 * i]; // Even indices: 0, 2, 4, ... + real[N - 1 - i] = input[2 * i + 1]; // Odd indices reversed: N-1, N-3, ... } // imag is already zeros (Float32Array default) - // Apply 2N-point FFT - fftForward(real, imag, M); + // Apply N-point FFT + fftForward(real, imag, N); - // Extract DCT coefficients + // Extract DCT coefficients with phase correction // DCT[k] = Re{FFT[k] * exp(-j*pi*k/(2*N))} * normalization - // Note: Need to divide by 2 because we doubled the signal length const output = new Float32Array(N); for (let k = 0; k < N; k++) { const angle = -PI * k / (2.0 * N); const wr = Math.cos(angle); const wi = Math.sin(angle); - // Complex multiplication: (real + j*imag) * (wr + j*wi) + // Complex multiplication: (real[k] + j*imag[k]) * (wr + j*wi) // Real part: real*wr - imag*wi const dct_value = real[k] * wr - imag[k] * wi; - // Apply DCT-II normalization (divide by 2 for double-length FFT) + // Apply DCT-II normalization if (k === 0) { - output[k] = dct_value * Math.sqrt(1.0 / N) / 2.0; + output[k] = dct_value * Math.sqrt(1.0 / N); } else { - output[k] = dct_value * Math.sqrt(2.0 / N) / 2.0; + output[k] = dct_value * Math.sqrt(2.0 / N); } } return output; } -// IDCT (Inverse DCT-II) via FFT using double-and-mirror method (matches C++ idct_fft) +// IDCT (DCT-III) via FFT using reordering method (matches C++ idct_fft) +// Reference: Numerical Recipes Chapter 12.3 function javascript_idct_fft(input, N) { const PI = Math.PI; - // Allocate arrays for 2N-point FFT - const M = 2 * N; - const real = new Float32Array(M); - const imag = new Float32Array(M); + // Allocate arrays for N-point FFT + const real = new Float32Array(N); + const imag = new Float32Array(N); - // Prepare FFT input from DCT coefficients - // IDCT = Re{IFFT[DCT * exp(j*pi*k/(2*N))]} * 2 + // Prepare FFT input with inverse phase correction + // FFT[k] = DCT[k] * exp(+j*pi*k/(2*N)) / normalization + // Note: DCT-III needs factor of 2 for AC terms for (let k = 0; k < N; k++) { - const angle = PI * k / (2.0 * N); // Positive for inverse + const angle = PI * k / (2.0 * N); // Positive angle for inverse const wr = Math.cos(angle); const wi = Math.sin(angle); - // Apply inverse normalization - let scaled_input; + // Inverse of DCT-II normalization with correct DCT-III scaling + let scaled; if (k === 0) { - scaled_input = input[k] * Math.sqrt(N) * 2.0; + scaled = input[k] / Math.sqrt(1.0 / N); } else { - scaled_input = input[k] * Math.sqrt(N / 2.0) * 2.0; + // DCT-III needs factor of 2 for AC terms + scaled = input[k] / Math.sqrt(2.0 / N) * 2.0; } - // Complex multiplication: DCT[k] * exp(j*theta) - real[k] = scaled_input * wr; - imag[k] = scaled_input * wi; - } - - // Fill second half with conjugate symmetry (for real output) - for (let k = 1; k < N; k++) { - real[M - k] = real[k]; - imag[M - k] = -imag[k]; + // Complex multiplication: scaled * (wr + j*wi) + real[k] = scaled * wr; + imag[k] = scaled * wi; } // Apply inverse FFT - fftInverse(real, imag, M); + fftInverse(real, imag, N); - // Extract first N samples (real part only, imag should be ~0) + // Unpack: reverse the reordering from DCT + // Even output indices come from first half of FFT output + // Odd output indices come from second half (reversed) const output = new Float32Array(N); - for (let i = 0; i < N; i++) { - output[i] = real[i]; + for (let i = 0; i < N / 2; i++) { + output[2 * i] = real[i]; // Even positions + output[2 * i + 1] = real[N - 1 - i]; // Odd positions (reversed) } return output; diff --git a/tools/spectral_editor/script.js b/tools/spectral_editor/script.js index 48b0661..7c424f9 100644 --- a/tools/spectral_editor/script.js +++ b/tools/spectral_editor/script.js @@ -30,6 +30,8 @@ const state = { canvasHeight: 0, pixelsPerFrame: 2.0, // Zoom level (pixels per frame) pixelsPerBin: 1.0, // Vertical scale (pixels per frequency bin) + viewportOffsetX: 0, // Horizontal pan offset (pixels) + viewportOffsetY: 0, // Vertical pan offset (pixels) // Audio playback audioContext: null, @@ -94,6 +96,9 @@ function initCanvas() { // Mouse hover handlers (for crosshair) canvas.addEventListener('mousemove', onCanvasHover); canvas.addEventListener('mouseleave', onCanvasLeave); + + // Mouse wheel: zoom (with Ctrl/Cmd) or pan + canvas.addEventListener('wheel', onCanvasWheel, { passive: false }); } function initUI() { @@ -378,19 +383,9 @@ function audioToSpectrogram(audioData) { } // Forward DCT (not in dct.js, add here) +// Fast O(N log N) DCT using FFT (delegates to dct.js implementation) function javascript_dct_512(input) { - const output = new Float32Array(DCT_SIZE); - const PI = Math.PI; - const N = DCT_SIZE; - - for (let k = 0; k < N; k++) { - let sum = 0; - for (let n = 0; n < N; n++) { - sum += input[n] * Math.cos((PI / N) * k * (n + 0.5)); - } - output[k] = sum * (k === 0 ? Math.sqrt(1 / N) : Math.sqrt(2 / N)); - } - return output; + return javascript_dct_512_fft(input); } function onReferenceLoaded(fileName) { @@ -414,6 +409,9 @@ function onReferenceLoaded(fileName) { // Adjust zoom to fit state.pixelsPerFrame = Math.max(1.0, state.canvasWidth / state.referenceNumFrames); + state.pixelsPerBin = 1.0; // Reset vertical scale + state.viewportOffsetX = 0; // Reset pan + state.viewportOffsetY = 0; updateCurveUI(); updateUndoRedoButtons(); @@ -859,12 +857,62 @@ function onCanvasLeave(e) { render(); } +function onCanvasWheel(e) { + e.preventDefault(); + + const canvas = e.target; + const rect = canvas.getBoundingClientRect(); + const mouseX = e.clientX - rect.left; + const mouseY = e.clientY - rect.top; + + // Zoom mode: Ctrl/Cmd + wheel + if (e.ctrlKey || e.metaKey) { + // Calculate frame under cursor BEFORE zoom + const frameUnderCursor = (mouseX + state.viewportOffsetX) / state.pixelsPerFrame; + + // Calculate new zoom level (horizontal only - logarithmic frequency axis doesn't zoom) + const zoomFactor = e.deltaY > 0 ? 0.9 : 1.1; // Wheel down = zoom out, wheel up = zoom in + state.pixelsPerFrame = Math.max(0.5, Math.min(20.0, state.pixelsPerFrame * zoomFactor)); + + // Adjust viewport offset so frame under cursor stays in same screen position + // After zoom: new_offset = frame * newPixelsPerFrame - mouseX + state.viewportOffsetX = frameUnderCursor * state.pixelsPerFrame - mouseX; + + // Clamp viewport offset to valid range + const maxOffsetX = Math.max(0, state.referenceNumFrames * state.pixelsPerFrame - state.canvasWidth); + state.viewportOffsetX = Math.max(0, Math.min(maxOffsetX, state.viewportOffsetX)); + + render(); + return; + } + + // Pan mode: Shift + wheel (horizontal/vertical pan) + if (e.shiftKey) { + state.viewportOffsetX += e.deltaY; + const maxOffsetX = Math.max(0, state.referenceNumFrames * state.pixelsPerFrame - state.canvasWidth); + state.viewportOffsetX = Math.max(0, Math.min(maxOffsetX, state.viewportOffsetX)); + render(); + return; + } + + // Normal mode: pan vertically (disabled for logarithmic frequency axis) + // Note: With logarithmic frequency scale, vertical pan doesn't make sense + // because the frequency range (FREQ_MIN to FREQ_MAX) is always scaled to fit canvas height. + // Vertical pan only works in linear frequency mode. + if (!USE_LOG_SCALE) { + state.viewportOffsetY += e.deltaY; + const maxOffsetY = Math.max(0, DCT_SIZE * state.pixelsPerBin - state.canvasHeight); + state.viewportOffsetY = Math.max(0, Math.min(maxOffsetY, state.viewportOffsetY)); + render(); + } +} + // ============================================================================ // Coordinate Conversion // ============================================================================ function screenToSpectrogram(screenX, screenY) { - const frame = Math.round(screenX / state.pixelsPerFrame); + const frame = Math.round((screenX + state.viewportOffsetX) / state.pixelsPerFrame); let freqHz; if (USE_LOG_SCALE) { @@ -891,7 +939,7 @@ function screenToSpectrogram(screenX, screenY) { } function spectrogramToScreen(frame, freqHz) { - const x = frame * state.pixelsPerFrame; + const x = frame * state.pixelsPerFrame - state.viewportOffsetX; let y; if (USE_LOG_SCALE) { @@ -901,11 +949,11 @@ function spectrogramToScreen(frame, freqHz) { const clampedFreq = Math.max(FREQ_MIN, Math.min(FREQ_MAX, freqHz)); const logFreq = Math.log10(clampedFreq); const normalizedY = (logFreq - logMin) / (logMax - logMin); - y = state.canvasHeight * (1.0 - normalizedY); // Flip Y back to screen coords + y = state.canvasHeight * (1.0 - normalizedY) - state.viewportOffsetY; // Flip Y back to screen coords } else { // Linear frequency mapping (old behavior) const bin = (freqHz / (SAMPLE_RATE / 2)) * state.referenceDctSize; - y = state.canvasHeight - (bin * state.pixelsPerBin); + y = state.canvasHeight - (bin * state.pixelsPerBin) - state.viewportOffsetY; } return {x, y}; @@ -953,7 +1001,10 @@ function render() { function drawPlayhead(ctx) { if (!state.isPlaying || state.playbackCurrentFrame < 0) return; - const x = state.playbackCurrentFrame * state.pixelsPerFrame; + const x = state.playbackCurrentFrame * state.pixelsPerFrame - state.viewportOffsetX; + + // Only draw if playhead is visible in viewport + if (x < 0 || x > state.canvasWidth) return; // Draw vertical line ctx.strokeStyle = '#ff3333'; // Bright red @@ -1553,7 +1604,7 @@ function spectrogramToAudio(spectrogram, dctSize, numFrames) { const window = hanningWindowArray; for (let frameIdx = 0; frameIdx < numFrames; frameIdx++) { - // Extract frame + // Extract frame (no windowing - window is only for analysis, not synthesis) const frame = new Float32Array(dctSize); for (let b = 0; b < dctSize; b++) { frame[b] = spectrogram[frameIdx * dctSize + b]; @@ -1562,7 +1613,7 @@ function spectrogramToAudio(spectrogram, dctSize, numFrames) { // IDCT const timeFrame = javascript_idct_512(frame); - // Apply window and overlap-add + // Apply synthesis window for overlap-add const frameStart = frameIdx * hopSize; for (let i = 0; i < dctSize; i++) { if (frameStart + i < audioLength) { diff --git a/tools/timeline_editor/index.html b/tools/timeline_editor/index.html index f85f914..074b711 100644 --- a/tools/timeline_editor/index.html +++ b/tools/timeline_editor/index.html @@ -1279,11 +1279,48 @@ updateProperties(); }); - // Mouse wheel diagonal scroll (follows time-ordered sequence cascade) + // Mouse wheel: zoom (with Ctrl/Cmd) or diagonal scroll timelineContainer.addEventListener('wheel', (e) => { e.preventDefault(); - // Horizontal scroll + // Zoom mode: Ctrl/Cmd + wheel + if (e.ctrlKey || e.metaKey) { + // Get mouse position relative to timeline container + const rect = timelineContainer.getBoundingClientRect(); + const mouseX = e.clientX - rect.left; // Mouse X in viewport coordinates + + // Calculate time position under cursor BEFORE zoom + const scrollLeft = timelineContainer.scrollLeft; + const timeUnderCursor = (scrollLeft + mouseX) / pixelsPerSecond; + + // Calculate new zoom level + const zoomDelta = e.deltaY > 0 ? -10 : 10; // Wheel down = zoom out, wheel up = zoom in + const oldPixelsPerSecond = pixelsPerSecond; + const newPixelsPerSecond = Math.max(10, Math.min(500, pixelsPerSecond + zoomDelta)); + + if (newPixelsPerSecond !== oldPixelsPerSecond) { + pixelsPerSecond = newPixelsPerSecond; + + // Update zoom slider and labels + zoomSlider.value = pixelsPerSecond; + zoomLevel.textContent = `${pixelsPerSecond}%`; + pixelsPerSecLabel.textContent = pixelsPerSecond; + + // Re-render waveform and timeline at new zoom + if (audioBuffer) { + renderWaveform(); + } + renderTimeline(); + + // Adjust scroll position so time under cursor stays in same place + // After zoom: new_scrollLeft = time_under_cursor * newPixelsPerSecond - mouseX + const newScrollLeft = timeUnderCursor * newPixelsPerSecond - mouseX; + timelineContainer.scrollLeft = newScrollLeft; + } + return; + } + + // Normal mode: diagonal scroll timelineContainer.scrollLeft += e.deltaY; // Calculate current time position with 10% headroom for visual comfort |
