diff options
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/editor/dct.js | 168 | ||||
| -rw-r--r-- | tools/editor/index.html | 36 | ||||
| -rw-r--r-- | tools/editor/script.js | 650 | ||||
| -rw-r--r-- | tools/editor/sdf.js | 39 | ||||
| -rw-r--r-- | tools/editor/style.css | 79 | ||||
| -rw-r--r-- | tools/spectool.cc | 121 | ||||
| -rw-r--r-- | tools/spectral_editor/script.js | 71 | ||||
| -rw-r--r-- | tools/timeline_editor/index.html | 41 |
8 files changed, 212 insertions, 993 deletions
diff --git a/tools/editor/dct.js b/tools/editor/dct.js deleted file mode 100644 index c081473..0000000 --- a/tools/editor/dct.js +++ /dev/null @@ -1,168 +0,0 @@ -const dctSize = 512; // Default DCT size, read from header - -// --- Utility Functions for Audio Processing --- - -// Hanning window for smooth audio transitions (JavaScript equivalent) -function hanningWindow(size) { - const window = new Float32Array(size); - const PI = Math.PI; - for (let i = 0; i < size; i++) { - window[i] = 0.5 * (1 - Math.cos((2 * PI * i) / (size - 1))); - } - return window; -} - -const hanningWindowArray = hanningWindow(dctSize); // Pre-calculate window - -// ============================================================================ -// FFT-based DCT/IDCT Implementation -// ============================================================================ - -// Bit-reversal permutation (in-place) -function bitReversePermute(real, imag, N) { - let temp_bits = N; - let num_bits = 0; - while (temp_bits > 1) { - temp_bits >>= 1; - num_bits++; - } - - for (let i = 0; i < N; i++) { - let j = 0; - let temp = i; - for (let b = 0; b < num_bits; b++) { - j = (j << 1) | (temp & 1); - temp >>= 1; - } - - if (j > i) { - const tmp_real = real[i]; - const tmp_imag = imag[i]; - real[i] = real[j]; - imag[i] = imag[j]; - real[j] = tmp_real; - imag[j] = tmp_imag; - } - } -} - -// In-place radix-2 FFT -function fftRadix2(real, imag, N, direction) { - const PI = Math.PI; - - for (let stage_size = 2; stage_size <= N; stage_size *= 2) { - const half_stage = stage_size / 2; - const angle = direction * 2.0 * PI / stage_size; - - let wr = 1.0; - let wi = 0.0; - const wr_delta = Math.cos(angle); - const wi_delta = Math.sin(angle); - - for (let k = 0; k < half_stage; k++) { - for (let group_start = k; group_start < N; group_start += stage_size) { - const i = group_start; - const j = group_start + half_stage; - - const temp_real = real[j] * wr - imag[j] * wi; - const temp_imag = real[j] * wi + imag[j] * wr; - - real[j] = real[i] - temp_real; - imag[j] = imag[i] - temp_imag; - real[i] = real[i] + temp_real; - imag[i] = imag[i] + temp_imag; - } - - const wr_old = wr; - wr = wr_old * wr_delta - wi * wi_delta; - wi = wr_old * wi_delta + wi * wr_delta; - } - } -} - -function fftForward(real, imag, N) { - bitReversePermute(real, imag, N); - fftRadix2(real, imag, N, +1); -} - -function fftInverse(real, imag, N) { - bitReversePermute(real, imag, N); - fftRadix2(real, imag, N, -1); - - const scale = 1.0 / N; - for (let i = 0; i < N; i++) { - real[i] *= scale; - imag[i] *= scale; - } -} - -// DCT-II via FFT using reordering method -function javascript_dct_fft(input, N) { - const PI = Math.PI; - - const real = new Float32Array(N); - const imag = new Float32Array(N); - - for (let i = 0; i < N / 2; i++) { - real[i] = input[2 * i]; - real[N - 1 - i] = input[2 * i + 1]; - } - - fftForward(real, imag, N); - - const output = new Float32Array(N); - for (let k = 0; k < N; k++) { - const angle = -PI * k / (2.0 * N); - const wr = Math.cos(angle); - const wi = Math.sin(angle); - - const dct_value = real[k] * wr - imag[k] * wi; - - if (k === 0) { - output[k] = dct_value * Math.sqrt(1.0 / N); - } else { - output[k] = dct_value * Math.sqrt(2.0 / N); - } - } - - return output; -} - -// IDCT (DCT-III) via FFT using reordering method -function javascript_idct_fft(input, N) { - const PI = Math.PI; - - const real = new Float32Array(N); - const imag = new Float32Array(N); - - for (let k = 0; k < N; k++) { - const angle = PI * k / (2.0 * N); - const wr = Math.cos(angle); - const wi = Math.sin(angle); - - let scaled; - if (k === 0) { - scaled = input[k] / Math.sqrt(1.0 / N); - } else { - scaled = input[k] / Math.sqrt(2.0 / N) * 2.0; - } - - real[k] = scaled * wr; - imag[k] = scaled * wi; - } - - fftInverse(real, imag, N); - - const output = new Float32Array(N); - for (let i = 0; i < N / 2; i++) { - output[2 * i] = real[i]; - output[2 * i + 1] = real[N - 1 - i]; - } - - return output; -} - -// Fast O(N log N) IDCT using FFT -function javascript_idct_512(input) { - return javascript_idct_fft(input, dctSize); -} diff --git a/tools/editor/index.html b/tools/editor/index.html deleted file mode 100644 index 82a11ce..0000000 --- a/tools/editor/index.html +++ /dev/null @@ -1,36 +0,0 @@ -<!DOCTYPE html> -<html lang="en"> -<head> - <meta charset="UTF-8"> - <meta name="viewport" content="width=device-width, initial-scale=1.0"> - <title>Spectrogram Editor</title> - <link rel="stylesheet" href="style.css"> -</head> -<body> - <h1>Spectrogram Editor</h1> - - <input type="file" id="specFileInput" accept=".spec"> - <label for="specFileInput">Load SPEC File</label> - - <div id="editorContainer"> - <canvas id="spectrogramCanvas"></canvas> - <div id="controls"> - <h2>Tools</h2> - <button id="lineTool">Line</button> - <button id="ellipseTool">Ellipse</button> - <button id="noiseTool">Noise</button> - <button id="undoButton">Undo</button> - <button id="redoButton">Redo</button> - <hr> - <h2>Playback</h2> - <button id="listenOriginalButton">Listen Original</button> - <button id="listenGeneratedButton">Listen Generated</button> - <!-- Add more tool controls later --> - </div> - </div> - - <script src="sdf.js"></script> - <script src="dct.js"></script> - <script src="script.js"></script> -</body> -</html> diff --git a/tools/editor/script.js b/tools/editor/script.js deleted file mode 100644 index 06c9bef..0000000 --- a/tools/editor/script.js +++ /dev/null @@ -1,650 +0,0 @@ -// This is the core JavaScript for the Spectrogram Editor. -// It handles file loading (.spec), visualization, tool interaction, and saving. - -// --- Global Variables --- -let currentSpecData = null; // Stores the currently displayed/edited spectrogram data -let originalSpecData = null; // Stores the pristine, initially loaded spectrogram data - -let undoStack = []; -let redoStack = []; -const MAX_HISTORY_SIZE = 50; - -let activeTool = null; // 'line', 'ellipse', 'noise', etc. -let isDrawing = false; -let startX, startY; // For tracking mouse down position - -let shapes = []; // Array to store all drawn shapes (lines, ellipses, etc.) - -// Web Audio Context -const audioContext = new (window.AudioContext || window.webkitAudioContext)(); - -// Audio Constants (should match C++ side) -const SAMPLE_RATE = 32000; -const MAX_FREQ = SAMPLE_RATE / 2; // Nyquist frequency -const MIN_FREQ = 20; // Lower bound for log scale visualization - -const SDF_FALLOFF_FACTOR = 10.0; // Adjust this value to control the softness of SDF edges. - -// --- Button Element Declarations --- -const specFileInput = document.getElementById('specFileInput'); -const lineToolButton = document.getElementById('lineTool'); -const ellipseToolButton = document.getElementById('ellipseTool'); -const noiseToolButton = document.getElementById('noiseTool'); -const undoButton = document.getElementById('undoButton'); -const redoButton = document.getElementById('redoButton'); -const listenOriginalButton = document.getElementById('listenOriginalButton'); -const listenGeneratedButton = document.getElementById('listenGeneratedButton'); - -// --- Event Listeners --- -specFileInput.addEventListener('change', handleFileSelect); -lineToolButton.addEventListener('click', () => { activeTool = 'line'; console.log('Line tool selected'); }); -ellipseToolButton.addEventListener('click', () => { activeTool = 'ellipse'; console.log('Ellipse tool selected'); }); -noiseToolButton.addEventListener('click', () => { activeTool = 'noise'; console.log('Noise tool selected'); }); -undoButton.addEventListener('click', handleUndo); -redoButton.addEventListener('click', handleRedo); -listenOriginalButton.addEventListener('click', () => { - if (originalSpecData) { - playSpectrogramData(originalSpecData); - } else { - alert("No original SPEC data loaded."); - } -}); -listenGeneratedButton.addEventListener('click', () => { - if (currentSpecData) { - redrawCanvas(); // Ensure currentSpecData reflects all shapes before playing - playSpectrogramData(currentSpecData); - } else { - alert("No generated SPEC data to play."); - } -}); - - -// --- Utility to map canvas coords to spectrogram bins/frames (LOG SCALE) --- -// Maps a linear frequency bin index to its corresponding frequency in Hz -function binIndexToFreq(binIndex) { - return (binIndex / (dctSize / 2)) * MAX_FREQ; -} - -// Maps a frequency in Hz to its corresponding linear bin index -function freqToBinIndex(freq) { - return Math.floor((freq / MAX_FREQ) * (dctSize / 2)); -} - -// Maps a frequency (Hz) to its corresponding log-scaled bin index -function freqToBinIndexLog(freq) { - if (freq < MIN_FREQ) freq = MIN_FREQ; // Clamp minimum frequency - const logMin = Math.log(MIN_FREQ); - const logMax = Math.log(MAX_FREQ); - const logFreq = Math.log(freq); - const normalizedLog = (logFreq - logMin) / (logMax - logMin); - return Math.floor(normalizedLog * dctSize); -} - -// Maps a log-scaled bin index to its corresponding frequency in Hz -function binIndexToFreqLog(binIndex) { - const normalizedLog = binIndex / dctSize; - const logMin = Math.log(MIN_FREQ); - const logMax = Math.log(MAX_FREQ); - const logFreq = normalizedLog * (logMax - logMin) + logMin; - return Math.exp(logFreq); -} - -// Converts a frequency (Hz) to a Y-coordinate on the canvas (log scale) -function freqToCanvasYLog(freq, canvasHeight) { - if (freq < MIN_FREQ) freq = MIN_FREQ; // Clamp minimum frequency - const logMin = Math.log(MIN_FREQ); - const logMax = Math.log(MAX_FREQ); - const logFreq = Math.log(freq); - const normalizedLog = (logFreq - logMin) / (logMax - logMin); - return canvasHeight * (1 - normalizedLog); // Y-axis is inverted -} - -// Converts a Y-coordinate on the canvas to a frequency (Hz) (log scale) -function canvasYToFreqLog(canvasY, canvasHeight) { - const normalizedLog = 1 - (canvasY / canvasHeight); - const logMin = Math.log(MIN_FREQ); - const logMax = Math.log(MAX_FREQ); - const logFreq = normalizedLog * (logMax - logMin) + logMin; - return Math.exp(logFreq); -} - -// Converts canvas Y-coordinate to log-scaled bin index -function canvasYToBinIndexLog(canvasY, specData) { - const freq = canvasYToFreqLog(canvasY, canvas.height); - return freqToBinIndex(freq); // Use linear bin index from calculated log freq -} - -// Converts log-scaled bin index to canvas Y-coordinate -function binIndexToCanvasYLog(binIndex, specData) { - const freq = binIndexToFreq(binIndex); - return freqToCanvasYLog(freq, canvas.height); -} - -// Helper to get frequency delta from canvas delta (for ellipse radius in freq) -function canvasDeltaYToFreqDeltaLog(canvasDeltaY, canvasHeight) { - // This is an approximation as delta in log scale is not linear - // For small deltas around a center, it can be approximated - const centerCanvasY = canvasHeight / 2; - const freqAtCenter = canvasYToFreqLog(centerCanvasY, canvasHeight); - const freqAtCenterPlusDelta = canvasYToFreqLog(centerCanvasY - canvasDeltaY, canvasHeight); - return Math.abs(freqAtCenterPlusDelta - freqAtCenter); -} - -// Initial setup for canvas size (can be updated on window resize) -window.addEventListener('resize', () => { - if (originalSpecData) { - canvas.width = window.innerWidth * 0.7; - canvas.height = 400; // Fixed height - redrawCanvas(); - } -}); - -// Initial call to set button states -updateUndoRedoButtons(); - -// --- File Handling Functions --- -async function handleFileSelect(event) { - const file = event.target.files[0]; - if (!file) { - return; - } - - try { - const buffer = await file.arrayBuffer(); - const dataView = new DataView(buffer); - - // Parse SPEC header - const header = { - magic: String.fromCharCode(...new Uint8Array(buffer.slice(0, 4))), - version: dataView.getInt32(4, true), - dct_size: dataView.getInt32(8, true), - num_frames: dataView.getInt32(12, true) - }; - - if (header.magic !== "SPEC" || header.version !== 1) { - console.error("Invalid SPEC file format."); - alert("Invalid SPEC file format. Please load a valid .spec file."); - return; - } - - if (dctSize != header.dct_size) { - alert("Invalid dctSize in SPEC file"); - return; - } - const dataStart = 16; - const numBytes = header.num_frames * header.dct_size * Float32Array.BYTES_PER_ELEMENT; - const spectralDataFloat = new Float32Array(buffer, dataStart, header.num_frames * header.dct_size); - - originalSpecData = { header: header, data: new Float32Array(spectralDataFloat) }; // Store pristine copy - currentSpecData = { header: header, data: new Float32Array(spectralDataFloat) }; // Editable copy - - shapes = []; // Clear shapes on new file load - undoStack = []; // Clear undo history - redoStack = []; // Clear redo history - - console.log("Loaded SPEC file:", header); - redrawCanvas(); // Redraw with new data - - } catch (error) { - console.error("Error loading SPEC file:", error); - alert("Failed to load SPEC file. Check console for details."); - } -} - -// --- Spectrogram Visualization --- -const canvas = document.getElementById('spectrogramCanvas'); -const ctx = canvas.getContext('2d'); - -// Add canvas event listeners -canvas.addEventListener('mousedown', handleMouseDown); -canvas.addEventListener('mousemove', handleMouseMove); -canvas.addEventListener('mouseup', handleMouseUp); -canvas.addEventListener('mouseout', handleMouseUp); // Treat mouse out as mouse up - -// Function to get a color based on intensity (0 to 1) -function getColorForIntensity(intensity) { - // Example: Blue to white/yellow gradient - const log_intensity = Math.log(1. + intensity) / Math.log(2.); - const h = (1 - log_intensity) * 240; // Hue from blue (240) to red (0), inverse for intensity - const s = 60.; // Saturation - const l = log_intensity * 60 + 30; // Lightness from 30 to 90 - return `hsl(${h}, ${s}%, ${l}%)`; -} - -function drawSpectrogram(specData) { - const width = canvas.width; - const height = canvas.height; - - ctx.clearRect(0, 0, width, height); - ctx.fillStyle = '#ffffff'; - ctx.fillRect(0, 0, width, height); - - if (!specData || !specData.data || specData.header.num_frames === 0 || specData.data.length === 0) { - console.warn("No spectrogram data or invalid header/data to draw."); - return; - } - - const numFrames = specData.header.num_frames; - const frameWidth = width / numFrames; // Width of each time frame - - // Draw each frame's spectral data with log frequency scale - for (let frameIndex = 0; frameIndex < numFrames; frameIndex++) { - const frameDataStart = frameIndex * dctSize; - const xPos = frameIndex * frameWidth; - - // To draw with log scale, we iterate over canvas y-coordinates - // and map them back to frequency bins - for (let y = 0; y < height; y++) { - const binIndex = canvasYToBinIndexLog(y, specData); - if (binIndex < 0 || binIndex >= dctSize) continue; // Out of bounds - - const value = specData.data[frameDataStart + binIndex]; - const intensity = Math.min(1, Math.abs(value) / 1.0); // Assuming values are normalized to [-1, 1] - - ctx.fillStyle = getColorForIntensity(intensity); - ctx.fillRect(xPos, height - y - 1, frameWidth, 1); // Draw a 1-pixel height line for each y - } - } - - // Draw active shapes on top (previews for current drawing tool) - shapes.forEach(shape => { - drawShape(shape); - }); -} - -function drawShape(shape) { - // This draws the final, persistent shape. Preview is drawn in handleMouseMove. - ctx.strokeStyle = shape.color || 'red'; - ctx.lineWidth = shape.width || 2; - - switch (shape.type) { - case 'line': - ctx.beginPath(); - ctx.moveTo(shape.x1, shape.y1); - ctx.lineTo(shape.x2, shape.y2); - ctx.stroke(); - break; - case 'ellipse': - ctx.beginPath(); - ctx.ellipse(shape.cx, shape.cy, shape.rx, shape.ry, 0, 0, 2 * Math.PI); - ctx.stroke(); - break; - case 'noise_rect': // Noise is visualized as a rectangle - ctx.fillStyle = 'rgba(0, 0, 255, 0.2)'; - ctx.fillRect(shape.x, shape.y, shape.width, shape.height); - ctx.strokeStyle = 'blue'; - ctx.strokeRect(shape.x, shape.y, shape.width, shape.height); - break; - } -} - -// --- Mouse Event Handlers --- -function getMousePos(event) { - const rect = canvas.getBoundingClientRect(); - return { - x: event.clientX - rect.left, - y: event.clientY - rect.top - }; -} - -function handleMouseDown(event) { - if (!activeTool || !currentSpecData) return; - isDrawing = true; - const pos = getMousePos(event); - startX = pos.x; - startY = pos.y; -} - -function handleMouseMove(event) { - if (!isDrawing || !activeTool) return; - const pos = getMousePos(event); - - redrawCanvas(); // Clear and redraw persistent state - - ctx.strokeStyle = 'rgba(0, 0, 0, 0.5)'; // Preview color - ctx.lineWidth = 1; - ctx.setLineDash([5, 5]); // Dashed line for preview - - switch (activeTool) { - case 'line': - ctx.beginPath(); - ctx.moveTo(startX, startY); - ctx.lineTo(pos.x, pos.y); - ctx.stroke(); - break; - case 'ellipse': - // Draw preview ellipse based on start and current pos (bounding box) - const rx = Math.abs(pos.x - startX) / 2; - const ry = Math.abs(pos.y - startY) / 2; - const cx = startX + (pos.x - startX) / 2; - const cy = startY + (pos.y - startY) / 2; - if (rx > 0 && ry > 0) { - ctx.beginPath(); - ctx.ellipse(cx, cy, rx, ry, 0, 0, 2 * Math.PI); - ctx.stroke(); - } - break; - case 'noise': - // Draw preview rectangle for noise area - const rectX = Math.min(startX, pos.x); - const rectY = Math.min(startY, pos.y); - const rectW = Math.abs(pos.x - startX); - const rectH = Math.abs(pos.y - startY); - ctx.strokeRect(rectX, rectY, rectW, rectH); - break; - } - ctx.setLineDash([]); // Reset line dash - - // debug the mouse position by draw a white square - ctx.fillStyle = '#fff'; - ctx.fillRect(pos.x - 10, pos.y - 10, 20, 20); -} - -function handleMouseUp(event) { - if (!isDrawing || !activeTool || !currentSpecData) return; - isDrawing = false; - const endPos = getMousePos(event); - - let newShape = null; - - switch (activeTool) { - case 'line': { - const startCoords = canvasToSpectrogramCoords(startX, startY, currentSpecData); - const endCoords = canvasToSpectrogramCoords(endPos.x, endPos.y, currentSpecData); - - newShape = { - type: 'line', - // Canvas coordinates for drawing visual representation (unchanged) - x1: startX, y1: startY, - x2: endPos.x, y2: endPos.y, - // World coordinates for SDF calculations (frame and log-scaled frequency) - frame1_world: startCoords.frame, - freq1_world: binIndexToFreqLog(startCoords.bin), - frame2_world: endCoords.frame, - freq2_world: binIndexToFreqLog(endCoords.bin), - amplitude: 0.5, // Default amplitude - width: 2, // Visual width in canvas pixels, not directly used by SDF, but kept for drawing - color: 'red', - falloff: SDF_FALLOFF_FACTOR, // SDF falloff factor - }; - break; - } - case 'ellipse': { - const rx = Math.abs(endPos.x - startX) / 2; - const ry = Math.abs(endPos.y - startY) / 2; - const cx = startX + (pos.x - startX) / 2; - const cy = startY + (pos.y - startY) / 2; - - const centerCoords = canvasToSpectrogramCoords(cx, cy, currentSpecData); - const halfWidthFrames = Math.floor((rx / canvas.width) * currentSpecData.header.num_frames); - - const startFreq = canvasYToFreqLog(startY, canvas.height); - const endFreq = canvasYToFreqLog(endPos.y, canvas.height); - const centerFreq = (startFreq + endFreq) / 2; - const halfHeightFreq = Math.abs(startFreq - endFreq) / 2; - - - newShape = { - type: 'ellipse', - // Canvas coordinates for drawing visual representation (unchanged) - cx: cx, cy: cy, - rx: rx, ry: ry, - // World coordinates for SDF calculations - center_frame_world: centerCoords.frame, - center_freq_world: centerFreq, - radius_frames_world: halfWidthFrames, - radius_freq_world: halfHeightFreq, - amplitude: 0.5, - color: 'green', - falloff: SDF_FALLOFF_FACTOR, - }; - break; - } - case 'noise': { - const rectX = Math.min(startX, endPos.x); - const rectY = Math.min(startY, endPos.y); - const rectW = Math.abs(endPos.x - startX); - const rectH = Math.abs(endPos.y - startY); - - const startCoords = canvasToSpectrogramCoords(rectX, rectY, currentSpecData); - const endCoords = canvasToSpectrogramCoords(rectX + rectW, rectY + rectH, currentSpecData); - - const centerFrame = Math.floor((startCoords.frame + endCoords.frame) / 2); - const centerFreq = (binIndexToFreqLog(startCoords.bin) + binIndexToFreqLog(endCoords.bin)) / 2; - const halfExtentFrames = Math.floor(Math.abs(endCoords.frame - startCoords.frame) / 2); - const halfExtentFreq = Math.abs(binIndexToFreqLog(endCoords.bin) - binIndexToFreqLog(startCoords.bin)) / 2; - - newShape = { - type: 'noise_rect', - // Canvas coordinates for drawing visual representation (unchanged) - x: rectX, y: rectY, - width: rectW, height: rectH, - // World coordinates for SDF calculations - center_frame_world: centerFrame, - center_freq_world: centerFreq, - half_extent_frames_world: halfExtentFrames, - half_extent_freq_world: halfExtentFreq, - amplitude: 0.3, // Default noise amplitude - density: 0.5, // Default noise density - color: 'blue', - falloff: 0.0, // No falloff for pure noise inside rect - }; - break; - } - } - - if (newShape) { - // Capture the state *before* applying the new shape for undo - const previousDataSnapshot = new Float32Array(currentSpecData.data); // Copy of actual data - const previousShapesSnapshot = shapes.map(s => ({ ...s })); // Deep copy shapes array - - applyShapeToSpectrogram(newShape, currentSpecData); // Modify currentSpecData directly - shapes.push(newShape); - addAction({ - type: 'add_shape', - shape: newShape, - undo: () => { - // To undo, restore previous shapes and previous data - shapes = previousShapesSnapshot; - currentSpecData.data = previousDataSnapshot; - }, - redo: () => { - // To redo, add the shape back and apply it to current data - shapes.push(newShape); - applyShapeToSpectrogram(newShape, currentSpecData); - } - }); - } - redrawCanvas(); // Final redraw after action - updateUndoRedoButtons(); -} - -// --- Spectrogram Data Manipulation --- -function applyShapeToSpectrogram(shape, targetSpecData) { - if (!targetSpecData || !targetSpecData.data || targetSpecData.header.num_frames === 0) return; - - const numFrames = targetSpecData.header.num_frames; - - // Determine a bounding box for optimization (iterate only relevant cells) - let minFrame = 0, maxFrame = numFrames - 1; - let minBin = 0, maxBin = dctSize - 1; - - // Calculate tighter bounding boxes for each shape type - switch (shape.type) { - case 'line': - minFrame = Math.min(shape.frame1_world, shape.frame2_world) - Math.ceil(shape.width / 2); - maxFrame = Math.max(shape.frame1_world, shape.frame2_world) + Math.ceil(shape.width / 2); - // For frequency, approximate by visual width or a fixed range if needed - minBin = freqToBinIndex(Math.min(shape.freq1_world, shape.freq2_world)) - Math.ceil(shape.width / 2); - maxBin = freqToBinIndex(Math.max(shape.freq1_world, shape.freq2_world)) + Math.ceil(shape.width / 2); - break; - case 'ellipse': - minFrame = shape.center_frame_world - shape.radius_frames_world - 1; - maxFrame = shape.center_frame_world + shape.radius_frames_world + 1; - minBin = freqToBinIndex(shape.center_freq_world - shape.radius_freq_world) - 1; // Approx bin range from world freq - maxBin = freqToBinIndex(shape.center_freq_world + shape.radius_freq_world) + 1; // Approx bin range from world freq - break; - case 'noise_rect': - minFrame = shape.center_frame_world - shape.half_extent_frames_world - 1; - maxFrame = shape.center_frame_world + shape.half_extent_frames_world + 1; - minBin = freqToBinIndex(shape.center_freq_world - shape.half_extent_freq_world) - 1; // Approx bin range from world freq - maxBin = freqToBinIndex(shape.center_freq_world + shape.half_extent_freq_world) + 1; // Approx bin range from world freq - break; - } - - minFrame = Math.max(0, minFrame); - maxFrame = Math.min(targetSpecData.header.num_frames - 1, maxFrame); // Use targetSpecData.header.num_frames - minBin = Math.max(0, minBin); - maxBin = Math.min(dctSize - 1, maxBin); - - for (let f = minFrame; f <= maxFrame; f++) { - for (let b = minBin; b <= maxBin; b++) { - const p_world = vec2(f, binIndexToFreqLog(b)); - let distance = Infinity; - - switch (shape.type) { - case 'line': - const a_line = vec2(shape.frame1_world, shape.freq1_world); - const b_line = vec2(shape.frame2_world, shape.freq2_world); - distance = sdSegment(p_world, a_line, b_line); - break; - case 'ellipse': - const center_ellipse = vec2(shape.center_frame_world, shape.center_freq_world); - const r_ellipse = vec2(shape.radius_frames_world, shape.radius_freq_world); - distance = sdEllipse(sub(p_world, center_ellipse), r_ellipse); - break; - case 'noise_rect': - const center_box = vec2(shape.center_frame_world, shape.center_freq_world); - const r_box = vec2(shape.half_extent_frames_world, shape.half_extent_freq_world); - distance = sdBox(sub(p_world, center_box), r_box); - if (distance <= 0 && Math.random() < shape.density) { // Only add noise inside the box with density - targetSpecData.data[f * dctSize + b] += (Math.random() * 2 - 1) * shape.amplitude; - } - break; - } - - if (shape.type !== 'noise_rect') { // Noise is handled differently for amplitude - const attenuation = Math.exp(-distance * distance * shape.falloff); - targetSpecData.data[f * dctSize + b] += shape.amplitude * attenuation; - } - - // Clamp final value - targetSpecData.data[f * dctSize + b] = Math.max(-1, Math.min(1, targetSpecData.data[f * dctSize + b])); - } - } -} - -// --- Undo/Redo Logic --- -function addAction(action) { - undoStack.push(action); - if (undoStack.length > MAX_HISTORY_SIZE) { - undoStack.shift(); - } - redoStack = []; - updateUndoRedoButtons(); -} - -function handleUndo() { - if (undoStack.length === 0) { - console.log('Undo stack is empty.'); - return; - } - const actionToUndo = undoStack.pop(); - actionToUndo.undo(); - redoStack.push(actionToUndo); - redrawCanvas(); - updateUndoRedoButtons(); -} - -function handleRedo() { - if (redoStack.length === 0) { - console.log('Redo stack is empty.'); - return; - } - - const actionToRedo = redoStack.pop(); - actionToRedo.redo(); - undoStack.push(actionToRedo); - redrawCanvas(); - updateUndoRedoButtons(); -} - -function redrawCanvas() { - console.log('Redrawing canvas...'); - if (!originalSpecData) { - ctx.clearRect(0, 0, canvas.width, canvas.height); - ctx.fillStyle = '#ffffff'; - ctx.fillRect(0, 0, canvas.width, canvas.height); - return; - } - - // Start with a fresh copy of the original data - currentSpecData.data = new Float32Array(originalSpecData.data); - - // Replay all shapes from the `shapes` array to `currentSpecData` - shapes.forEach(shape => { - applyShapeToSpectrogram(shape, currentSpecData); - }); - - drawSpectrogram(currentSpecData); -} - -function updateUndoRedoButtons() { - undoButton.disabled = undoStack.length === 0; - redoButton.disabled = redoStack.length === 0; -} - -// Initial setup for canvas size (can be updated on window resize) -window.addEventListener('resize', () => { - if (originalSpecData) { - canvas.width = window.innerWidth * 0.7; - canvas.height = 400; // Fixed height - redrawCanvas(); - } -}); - -// Initial call to set button states -updateUndoRedoButtons(); - -// --- Audio Playback Functions --- -let currentAudioSource = null; // To stop currently playing audio - -async function playSpectrogramData(specData) { - if (!specData || !specData.data || specData.header.num_frames === 0) { - alert("No spectrogram data to play."); - return; - } - - if (currentAudioSource) { - currentAudioSource.stop(); - currentAudioSource.disconnect(); - currentAudioSource = null; - } - - const sampleRate = SAMPLE_RATE; // Fixed sample rate - const numFrames = specData.header.num_frames; - const totalAudioSamples = numFrames * dctSize; // Total samples in time domain - - const audioBuffer = audioContext.createBuffer(1, totalAudioSamples, sampleRate); - const audioData = audioBuffer.getChannelData(0); // Mono channel - - // Convert spectrogram frames (frequency domain) to audio samples (time domain) - for (let frameIndex = 0; frameIndex < numFrames; frameIndex++) { - const spectralFrame = specData.data.slice(frameIndex * dctSize, (frameIndex + 1) * dctSize); - - // IDCT (no windowing - window is only for analysis, not synthesis) - const timeDomainFrame = javascript_idct_512(spectralFrame); - - // Apply Hanning window for smooth transitions between frames - for (let i = 0; i < dctSize; i++) { - audioData[frameIndex * dctSize + i] = timeDomainFrame[i] * hanningWindowArray[i]; - } - } - - currentAudioSource = audioContext.createBufferSource(); - currentAudioSource.buffer = audioBuffer; - currentAudioSource.connect(audioContext.destination); - currentAudioSource.start(); - - console.log(`Playing audio (Sample Rate: ${sampleRate}, Duration: ${audioBuffer.duration.toFixed(2)}s)`); -} diff --git a/tools/editor/sdf.js b/tools/editor/sdf.js deleted file mode 100644 index c68d79a..0000000 --- a/tools/editor/sdf.js +++ /dev/null @@ -1,39 +0,0 @@ -// --- Signed Distance Functions (SDFs) --- -// Generic 2D vector operations -function vec2(x, y) { return { x: x, y: y }; } -function length(v) { return Math.sqrt(v.x * v.x + v.y * v.y); } -function dot(v1, v2) { return v1.x * v2.x + v1.y * v2.y; } -function sub(v1, v2) { return vec2(v1.x - v2.x, v1.y - v2.y); } -function mul(v, s) { return vec2(v.x * s, v.y * s); } -function div(v, s) { return vec2(v.x / s, v.y / s); } -function normalize(v) { return div(v, length(v)); } -function clamp(x, minVal, maxVal) { return Math.max(minVal, Math.min(x, maxVal)); } -function abs(v) { return vec2(Math.abs(v.x), Math.abs(v.y)); } -function max(v1, v2) { return vec2(Math.max(v1.x, v2.x), Math.max(v1.y, v2.y)); } -function sign(x) { return (x > 0) ? 1 : ((x < 0) ? -1 : 0); } - -// sdSegment(p, a, b) - signed distance to a line segment -// p: point, a: segment start, b: segment end -function sdSegment(p, a, b) { - const pa = sub(p, a); - const ba = sub(b, a); - const h = clamp(dot(pa, ba) / dot(ba, ba), 0.0, 1.0); - return length(sub(pa, mul(ba, h))); -} - -// sdEllipse(p, r) - signed distance to an ellipse (p relative to center, r is half-extents) -// p: point relative to ellipse center, r: half-extents (rx, ry) -function sdEllipse(p, r) { - const k0 = vec2(1, length(div(p, r))); - const k1 = vec2(length(div(p, r)), 1); - const f = ((dot(div(mul(p, p), k0), vec2(1, 1)) < dot(div(mul(p, p), k1), vec2(1, 1))) ? k0 : k1); - return length(sub(p, mul(r, normalize(mul(f, p))))) * sign(length(p) - r.x); // Simplified, original has length(p)-r.x which is only for circular -} - -// sdBox(p, r) - signed distance to a rectangle (p relative to center, r is half-extents) -// p: point relative to box center, r: half-extents (hx, hy) -function sdBox(p, r) { - const q = sub(abs(p), r); - return length(max(q, vec2(0, 0))) + Math.min(0.0, Math.max(q.x, q.y)); -} - diff --git a/tools/editor/style.css b/tools/editor/style.css deleted file mode 100644 index e0014cf..0000000 --- a/tools/editor/style.css +++ /dev/null @@ -1,79 +0,0 @@ -body { - font-family: sans-serif; - margin: 20px; - background-color: #f4f4f4; -} - -h1, h2 { - color: #333; -} - -#editorContainer { - display: flex; - margin-top: 20px; -} - -#spectrogramCanvas { - border: 1px solid #ccc; - background-color: #fff; - margin-right: 20px; -} - -#controls { - border: 1px solid #ccc; - padding: 15px; - background-color: #eee; - min-width: 200px; -} - -#controls button { - display: block; - width: 100%; - margin-bottom: 10px; - padding: 10px; - cursor: pointer; -} - -#undoButton { - background-color: #d9534f; - color: white; - border: none; - border-radius: 4px; -} - -#undoButton:hover { - background-color: #c9302c; -} - -#redoButton { - background-color: #5cb85c; - color: white; - border: none; - border-radius: 4px; -} - -#redoButton:hover { - background-color: #4cae4c; -} - -/* New styles for playback buttons */ -#listenOriginalButton, -#listenGeneratedButton { - background-color: #5bc0de; - color: white; - border: none; - border-radius: 4px; - margin-top: 5px; -} - -#listenOriginalButton:hover, -#listenGeneratedButton:hover { - background-color: #31b0d5; -} - -hr { - border: 0; - height: 1px; - background-color: #ccc; - margin: 20px 0; -} diff --git a/tools/spectool.cc b/tools/spectool.cc index 67e9ff3..4cd98c7 100644 --- a/tools/spectool.cc +++ b/tools/spectool.cc @@ -26,8 +26,13 @@ // float[num_frames * dct_size] data // struct SpecHeader { ... } -> now in audio.h -int analyze_audio(const char* in_path, const char* out_path) { - printf("Analyzing %s -> %s\n", in_path, out_path); +int analyze_audio(const char* in_path, const char* out_path, bool normalize, + float target_rms) { + printf("Analyzing %s -> %s", in_path, out_path); + if (normalize) { + printf(" (normalizing to RMS=%.3f)", target_rms); + } + printf("\n"); // Use higher quality resampling for better audio quality // Source files are typically 44.1kHz or 96kHz, 16/24-bit, mono/stereo @@ -46,19 +51,84 @@ int analyze_audio(const char* in_path, const char* out_path) { return 1; } - std::vector<float> spec_data; + // First pass: Load all PCM data (needed for normalization) + std::vector<float> pcm_data; float pcm_chunk[DCT_SIZE]; - float window[WINDOW_SIZE]; - hamming_window_512(window); - ma_uint64 frames_read; while (ma_decoder_read_pcm_frames(&decoder, pcm_chunk, DCT_SIZE, &frames_read) == MA_SUCCESS && frames_read > 0) { - if (frames_read < DCT_SIZE) { - // Zero-pad the last chunk if it's smaller - memset(pcm_chunk + frames_read, 0, - (DCT_SIZE - frames_read) * sizeof(float)); + pcm_data.insert(pcm_data.end(), pcm_chunk, pcm_chunk + frames_read); + } + ma_decoder_uninit(&decoder); + + if (pcm_data.empty()) { + printf("Error: No audio data read from file.\n"); + return 1; + } + + // Calculate RMS and peak + float rms_sum = 0.0f; + float peak = 0.0f; + for (size_t i = 0; i < pcm_data.size(); ++i) { + const float abs_val = fabsf(pcm_data[i]); + if (abs_val > peak) { + peak = abs_val; + } + rms_sum += pcm_data[i] * pcm_data[i]; + } + const float original_rms = sqrtf(rms_sum / pcm_data.size()); + printf("Original: Peak=%.3f, RMS=%.3f\n", peak, original_rms); + + // Normalize if requested + float scale_factor = 1.0f; + if (normalize && original_rms > 1e-6f) { + // Calculate scale factor to reach target RMS + scale_factor = target_rms / original_rms; + + // Check if this would cause clipping (peak > 1.0 after synthesis) + // Peak amplification varies by sample (windowing + IDCT effects) + // Use conservative limit: input peak ≤ 1.0 to guarantee output peak ≤ 1.0 + const float max_safe_peak = 1.0f; + const float predicted_peak = peak * scale_factor; + + if (predicted_peak > max_safe_peak) { + // Reduce scale factor to prevent clipping + const float peak_scale = max_safe_peak / peak; + printf("Warning: RMS normalization would cause clipping (peak=%.3f)\n", + predicted_peak); + printf(" Reducing scale to prevent clipping.\n"); + scale_factor = peak_scale; + } + + printf("Normalizing: scale factor = %.3f\n", scale_factor); + printf(" RMS: %.3f -> %.3f\n", original_rms, original_rms * scale_factor); + printf(" Peak: %.3f -> %.3f\n", peak, peak * scale_factor); + + for (size_t i = 0; i < pcm_data.size(); ++i) { + pcm_data[i] *= scale_factor; + } + } + + // Second pass: Windowing + DCT + std::vector<float> spec_data; + float window[WINDOW_SIZE]; + hamming_window_512(window); + + // Process PCM data in DCT_SIZE chunks + const size_t num_chunks = (pcm_data.size() + DCT_SIZE - 1) / DCT_SIZE; + for (size_t chunk_idx = 0; chunk_idx < num_chunks; ++chunk_idx) { + const size_t chunk_start = chunk_idx * DCT_SIZE; + const size_t chunk_end = + (chunk_start + DCT_SIZE < pcm_data.size()) ? chunk_start + DCT_SIZE + : pcm_data.size(); + const size_t chunk_size = chunk_end - chunk_start; + + // Copy chunk (with zero-padding if needed) + memcpy(pcm_chunk, pcm_data.data() + chunk_start, + chunk_size * sizeof(float)); + if (chunk_size < DCT_SIZE) { + memset(pcm_chunk + chunk_size, 0, (DCT_SIZE - chunk_size) * sizeof(float)); } // Apply window @@ -74,8 +144,6 @@ int analyze_audio(const char* in_path, const char* out_path) { spec_data.insert(spec_data.end(), dct_chunk, dct_chunk + DCT_SIZE); } - ma_decoder_uninit(&decoder); - // --- Trim Silent Frames --- const float epsilon = 1e-6f; int num_frames = spec_data.size() / DCT_SIZE; @@ -248,7 +316,7 @@ int test_gen(const char* out_path) { } void print_usage() { - printf("Usage: spectool <command> <input> [output]\n"); + printf("Usage: spectool <command> <input> [output] [options]\n"); printf("Commands:\n"); printf( " analyze <input.wav|.mp3> <output.spec> Analyze an audio file and " @@ -258,6 +326,11 @@ void print_usage() { printf( " test_gen <output.spec> Generate a test " "spectrogram.\n"); + printf("\nOptions for 'analyze':\n"); + printf(" --normalize [rms] Normalize audio to target RMS level (default: " + "0.15)\n"); + printf( + " Ensures consistent loudness across all samples.\n"); } int main(int argc, char** argv) { @@ -274,7 +347,27 @@ int main(int argc, char** argv) { print_usage(); return 1; } - return analyze_audio(argv[2], argv[3]); + + // Parse optional flags + bool normalize = false; + float target_rms = 0.15f; // Default target RMS + + for (int i = 4; i < argc; ++i) { + if (strcmp(argv[i], "--normalize") == 0) { + normalize = true; + // Check if next arg is a number (custom target RMS) + if (i + 1 < argc) { + char* endptr; + float custom_rms = strtof(argv[i + 1], &endptr); + if (endptr != argv[i + 1] && custom_rms > 0.0f && custom_rms < 1.0f) { + target_rms = custom_rms; + ++i; // Consume the RMS value + } + } + } + } + + return analyze_audio(argv[2], argv[3], normalize, target_rms); } else if (strcmp(command, "play") == 0) { if (argc < 3) { printf("Error: 'play' command requires an input file.\n"); diff --git a/tools/spectral_editor/script.js b/tools/spectral_editor/script.js index 6c6dd49..7c424f9 100644 --- a/tools/spectral_editor/script.js +++ b/tools/spectral_editor/script.js @@ -30,6 +30,8 @@ const state = { canvasHeight: 0, pixelsPerFrame: 2.0, // Zoom level (pixels per frame) pixelsPerBin: 1.0, // Vertical scale (pixels per frequency bin) + viewportOffsetX: 0, // Horizontal pan offset (pixels) + viewportOffsetY: 0, // Vertical pan offset (pixels) // Audio playback audioContext: null, @@ -94,6 +96,9 @@ function initCanvas() { // Mouse hover handlers (for crosshair) canvas.addEventListener('mousemove', onCanvasHover); canvas.addEventListener('mouseleave', onCanvasLeave); + + // Mouse wheel: zoom (with Ctrl/Cmd) or pan + canvas.addEventListener('wheel', onCanvasWheel, { passive: false }); } function initUI() { @@ -404,6 +409,9 @@ function onReferenceLoaded(fileName) { // Adjust zoom to fit state.pixelsPerFrame = Math.max(1.0, state.canvasWidth / state.referenceNumFrames); + state.pixelsPerBin = 1.0; // Reset vertical scale + state.viewportOffsetX = 0; // Reset pan + state.viewportOffsetY = 0; updateCurveUI(); updateUndoRedoButtons(); @@ -849,12 +857,62 @@ function onCanvasLeave(e) { render(); } +function onCanvasWheel(e) { + e.preventDefault(); + + const canvas = e.target; + const rect = canvas.getBoundingClientRect(); + const mouseX = e.clientX - rect.left; + const mouseY = e.clientY - rect.top; + + // Zoom mode: Ctrl/Cmd + wheel + if (e.ctrlKey || e.metaKey) { + // Calculate frame under cursor BEFORE zoom + const frameUnderCursor = (mouseX + state.viewportOffsetX) / state.pixelsPerFrame; + + // Calculate new zoom level (horizontal only - logarithmic frequency axis doesn't zoom) + const zoomFactor = e.deltaY > 0 ? 0.9 : 1.1; // Wheel down = zoom out, wheel up = zoom in + state.pixelsPerFrame = Math.max(0.5, Math.min(20.0, state.pixelsPerFrame * zoomFactor)); + + // Adjust viewport offset so frame under cursor stays in same screen position + // After zoom: new_offset = frame * newPixelsPerFrame - mouseX + state.viewportOffsetX = frameUnderCursor * state.pixelsPerFrame - mouseX; + + // Clamp viewport offset to valid range + const maxOffsetX = Math.max(0, state.referenceNumFrames * state.pixelsPerFrame - state.canvasWidth); + state.viewportOffsetX = Math.max(0, Math.min(maxOffsetX, state.viewportOffsetX)); + + render(); + return; + } + + // Pan mode: Shift + wheel (horizontal/vertical pan) + if (e.shiftKey) { + state.viewportOffsetX += e.deltaY; + const maxOffsetX = Math.max(0, state.referenceNumFrames * state.pixelsPerFrame - state.canvasWidth); + state.viewportOffsetX = Math.max(0, Math.min(maxOffsetX, state.viewportOffsetX)); + render(); + return; + } + + // Normal mode: pan vertically (disabled for logarithmic frequency axis) + // Note: With logarithmic frequency scale, vertical pan doesn't make sense + // because the frequency range (FREQ_MIN to FREQ_MAX) is always scaled to fit canvas height. + // Vertical pan only works in linear frequency mode. + if (!USE_LOG_SCALE) { + state.viewportOffsetY += e.deltaY; + const maxOffsetY = Math.max(0, DCT_SIZE * state.pixelsPerBin - state.canvasHeight); + state.viewportOffsetY = Math.max(0, Math.min(maxOffsetY, state.viewportOffsetY)); + render(); + } +} + // ============================================================================ // Coordinate Conversion // ============================================================================ function screenToSpectrogram(screenX, screenY) { - const frame = Math.round(screenX / state.pixelsPerFrame); + const frame = Math.round((screenX + state.viewportOffsetX) / state.pixelsPerFrame); let freqHz; if (USE_LOG_SCALE) { @@ -881,7 +939,7 @@ function screenToSpectrogram(screenX, screenY) { } function spectrogramToScreen(frame, freqHz) { - const x = frame * state.pixelsPerFrame; + const x = frame * state.pixelsPerFrame - state.viewportOffsetX; let y; if (USE_LOG_SCALE) { @@ -891,11 +949,11 @@ function spectrogramToScreen(frame, freqHz) { const clampedFreq = Math.max(FREQ_MIN, Math.min(FREQ_MAX, freqHz)); const logFreq = Math.log10(clampedFreq); const normalizedY = (logFreq - logMin) / (logMax - logMin); - y = state.canvasHeight * (1.0 - normalizedY); // Flip Y back to screen coords + y = state.canvasHeight * (1.0 - normalizedY) - state.viewportOffsetY; // Flip Y back to screen coords } else { // Linear frequency mapping (old behavior) const bin = (freqHz / (SAMPLE_RATE / 2)) * state.referenceDctSize; - y = state.canvasHeight - (bin * state.pixelsPerBin); + y = state.canvasHeight - (bin * state.pixelsPerBin) - state.viewportOffsetY; } return {x, y}; @@ -943,7 +1001,10 @@ function render() { function drawPlayhead(ctx) { if (!state.isPlaying || state.playbackCurrentFrame < 0) return; - const x = state.playbackCurrentFrame * state.pixelsPerFrame; + const x = state.playbackCurrentFrame * state.pixelsPerFrame - state.viewportOffsetX; + + // Only draw if playhead is visible in viewport + if (x < 0 || x > state.canvasWidth) return; // Draw vertical line ctx.strokeStyle = '#ff3333'; // Bright red diff --git a/tools/timeline_editor/index.html b/tools/timeline_editor/index.html index f85f914..074b711 100644 --- a/tools/timeline_editor/index.html +++ b/tools/timeline_editor/index.html @@ -1279,11 +1279,48 @@ updateProperties(); }); - // Mouse wheel diagonal scroll (follows time-ordered sequence cascade) + // Mouse wheel: zoom (with Ctrl/Cmd) or diagonal scroll timelineContainer.addEventListener('wheel', (e) => { e.preventDefault(); - // Horizontal scroll + // Zoom mode: Ctrl/Cmd + wheel + if (e.ctrlKey || e.metaKey) { + // Get mouse position relative to timeline container + const rect = timelineContainer.getBoundingClientRect(); + const mouseX = e.clientX - rect.left; // Mouse X in viewport coordinates + + // Calculate time position under cursor BEFORE zoom + const scrollLeft = timelineContainer.scrollLeft; + const timeUnderCursor = (scrollLeft + mouseX) / pixelsPerSecond; + + // Calculate new zoom level + const zoomDelta = e.deltaY > 0 ? -10 : 10; // Wheel down = zoom out, wheel up = zoom in + const oldPixelsPerSecond = pixelsPerSecond; + const newPixelsPerSecond = Math.max(10, Math.min(500, pixelsPerSecond + zoomDelta)); + + if (newPixelsPerSecond !== oldPixelsPerSecond) { + pixelsPerSecond = newPixelsPerSecond; + + // Update zoom slider and labels + zoomSlider.value = pixelsPerSecond; + zoomLevel.textContent = `${pixelsPerSecond}%`; + pixelsPerSecLabel.textContent = pixelsPerSecond; + + // Re-render waveform and timeline at new zoom + if (audioBuffer) { + renderWaveform(); + } + renderTimeline(); + + // Adjust scroll position so time under cursor stays in same place + // After zoom: new_scrollLeft = time_under_cursor * newPixelsPerSecond - mouseX + const newScrollLeft = timeUnderCursor * newPixelsPerSecond - mouseX; + timelineContainer.scrollLeft = newScrollLeft; + } + return; + } + + // Normal mode: diagonal scroll timelineContainer.scrollLeft += e.deltaY; // Calculate current time position with 10% headroom for visual comfort |
