diff options
| -rw-r--r-- | doc/CNN_V2.md | 9 | ||||
| -rw-r--r-- | doc/CNN_V2_BINARY_FORMAT.md | 155 | ||||
| -rw-r--r-- | doc/CNN_V2_WEB_TOOL.md | 45 | ||||
| -rw-r--r-- | tools/cnn_v2_test/index.html | 29 |
4 files changed, 209 insertions, 29 deletions
diff --git a/doc/CNN_V2.md b/doc/CNN_V2.md index 09d0841..588c3db 100644 --- a/doc/CNN_V2.md +++ b/doc/CNN_V2.md @@ -669,6 +669,15 @@ workspaces/main/shaders/cnn_*.wgsl # Original v1 shaders --- +## Related Documentation + +- `doc/CNN_V2_BINARY_FORMAT.md` - Binary weight file specification (.bin format) +- `doc/CNN_V2_WEB_TOOL.md` - WebGPU testing tool with layer visualization +- `doc/CNN_TEST_TOOL.md` - C++ offline validation tool (deprecated) +- `doc/HOWTO.md` - Training and validation workflows + +--- + **Document Version:** 1.0 **Last Updated:** 2026-02-12 **Status:** Design approved, ready for implementation diff --git a/doc/CNN_V2_BINARY_FORMAT.md b/doc/CNN_V2_BINARY_FORMAT.md new file mode 100644 index 0000000..650177f --- /dev/null +++ b/doc/CNN_V2_BINARY_FORMAT.md @@ -0,0 +1,155 @@ +# CNN v2 Binary Weight Format Specification + +Binary format for storing trained CNN v2 weights with static feature architecture. + +**File Extension:** `.bin` +**Byte Order:** Little-endian +**Version:** 1.0 + +--- + +## File Structure + +``` +┌─────────────────────┐ +│ Header (16 bytes) │ +├─────────────────────┤ +│ Layer Info │ +│ (20 bytes × N) │ +├─────────────────────┤ +│ Weight Data │ +│ (variable size) │ +└─────────────────────┘ +``` + +--- + +## Header (16 bytes) + +| Offset | Type | Field | Description | +|--------|------|----------------|--------------------------------------| +| 0x00 | u32 | magic | Magic number: `0x32_4E_4E_43` ("CNN2") | +| 0x04 | u32 | version | Format version (currently 1) | +| 0x08 | u32 | num_layers | Number of CNN layers (excludes static features) | +| 0x0C | u32 | total_weights | Total f16 weight count across all layers | + +--- + +## Layer Info (20 bytes per layer) + +Repeated `num_layers` times, starting at offset 0x10. + +| Offset | Type | Field | Description | +|-------------|------|----------------|--------------------------------------| +| 0x00 | u32 | kernel_size | Convolution kernel dimension (3, 5, 7, etc.) | +| 0x04 | u32 | in_channels | Input channel count (includes 8 static features for Layer 1) | +| 0x08 | u32 | out_channels | Output channel count (max 8) | +| 0x0C | u32 | weight_offset | Weight array start index (f16 units, relative to weight data section) | +| 0x10 | u32 | weight_count | Number of f16 weights for this layer | + +**Layer Order:** Sequential (Layer 1, Layer 2, Layer 3, ...) + +--- + +## Weight Data (variable size) + +Starts at offset: `16 + (num_layers × 20)` + +**Format:** Packed f16 pairs stored as u32 +**Packing:** `u32 = (f16_hi << 16) | f16_lo` +**Storage:** Sequential by layer, then by output channel, input channel, spatial position + +**Weight Indexing:** +``` +weight_idx = output_ch × (in_channels × kernel_size²) + + input_ch × kernel_size² + + (ky × kernel_size + kx) +``` + +Where: +- `output_ch` ∈ [0, out_channels) +- `input_ch` ∈ [0, in_channels) +- `ky`, `kx` ∈ [0, kernel_size) + +**Unpacking f16 from u32:** +```c +uint32_t packed = weights_buffer[weight_idx / 2]; +uint16_t f16_bits = (weight_idx % 2 == 0) ? (packed & 0xFFFF) : (packed >> 16); +``` + +--- + +## Example: 3-Layer Network + +**Configuration:** +- Layer 1: 15→8, kernel 3×3 (1,080 weights) +- Layer 2: 8→4, kernel 3×3 (288 weights) +- Layer 3: 4→3, kernel 3×3 (108 weights) + +**File Layout:** +``` +Offset Size Content +------ ---- ------- +0x00 16 Header (magic, version=1, layers=3, weights=1476) +0x10 20 Layer 1 info (kernel=3, in=15, out=8, offset=0, count=1080) +0x24 20 Layer 2 info (kernel=3, in=8, out=4, offset=1080, count=288) +0x38 20 Layer 3 info (kernel=3, in=4, out=3, offset=1368, count=108) +0x4C 1476 Weight data (738 u32 packed f16 pairs) + ---- +Total: 1528 bytes (~1.5 KB) +``` + +--- + +## Static Features + +Not stored in .bin file (computed at runtime): + +**7D Input Features (packed as 8 channels):** +1. R (red channel) +2. G (green channel) +3. B (blue channel) +4. D (depth value) +5. UV_X (normalized x coordinate) +6. UV_Y (normalized y coordinate) +7. sin(10 × UV_X) (spatial frequency encoding) +8. 1.0 (bias term) + +**First CNN layer** receives all 8 static features + 0-7 previous layer outputs (total 8-15 input channels). + +--- + +## Validation + +**Magic Check:** +```c +uint32_t magic; +fread(&magic, 4, 1, fp); +if (magic != 0x32_4E_4E_43) { error("Invalid CNN v2 file"); } +``` + +**Size Check:** +```c +expected_size = 16 + (num_layers × 20) + (total_weights × 2); +if (file_size != expected_size) { error("Size mismatch"); } +``` + +**Weight Offset Sanity:** +```c +// Each layer's offset should match cumulative count +uint32_t cumulative = 0; +for (int i = 0; i < num_layers; i++) { + if (layers[i].weight_offset != cumulative) { error("Invalid offset"); } + cumulative += layers[i].weight_count; +} +if (cumulative != total_weights) { error("Total mismatch"); } +``` + +--- + +## Related Files + +- `training/export_cnn_v2_weights.py` - Binary export tool +- `src/gpu/effects/cnn_v2_effect.cc` - C++ loader +- `tools/cnn_v2_test/index.html` - WebGPU validator +- `doc/CNN_V2.md` - Architecture design diff --git a/doc/CNN_V2_WEB_TOOL.md b/doc/CNN_V2_WEB_TOOL.md index 2fbc70e..81549ab 100644 --- a/doc/CNN_V2_WEB_TOOL.md +++ b/doc/CNN_V2_WEB_TOOL.md @@ -49,9 +49,11 @@ Browser-based WebGPU tool for validating CNN v2 inference with layer visualizati **3. Visualization Modes** **Activations Mode:** -- 4 grayscale views per layer (channels 0-3) +- 4 grayscale views per layer (channels 0-3 of up to 8 total) - WebGPU compute → unpack f16 → scale → grayscale -- Auto-scale: Layer 0 (static) = 1.0, CNN layers = 0.2 +- Auto-scale: Static features = 1.0, CNN layers = 0.2 +- Static features: Shows R,G,B,D (first 4 of 8: RGBD+UV+sin+bias) +- CNN layers: Shows first 4 output channels **Weights Mode:** - 2D canvas rendering per output channel @@ -78,6 +80,21 @@ For each CNN layer i: Compute (ping-pong) → copy to layerTextures[i+1] ``` +### Layer Indexing + +**UI Layer Buttons:** +- "Static" → layerOutputs[0] (7D input features) +- "Layer 1" → layerOutputs[1] (CNN layer 1 output, uses weights.layers[0]) +- "Layer 2" → layerOutputs[2] (CNN layer 2 output, uses weights.layers[1]) +- "Layer N" → layerOutputs[N] (CNN layer N output, uses weights.layers[N-1]) + +**Weights Table:** +- "Layer 1" → weights.layers[0] (first CNN layer weights) +- "Layer 2" → weights.layers[1] (second CNN layer weights) +- "Layer N" → weights.layers[N-1] + +**Consistency:** Both UI and weights table use same numbering (1, 2, 3...) for CNN layers. + --- ## Known Issues @@ -192,26 +209,12 @@ For each CNN layer i: ## Binary Weight Format -**Header (16 bytes):** -``` -u32 magic; // 0x32_4E_4E_43 ("CNN2") -u32 version; // Format version -u32 num_layers; // Layer count -u32 total_weights;// Total f16 weight count -``` - -**Layer Info (20 bytes × N):** -``` -u32 kernel_size; // 3, 5, 7, etc. -u32 in_channels; // Input channel count -u32 out_channels; // Output channel count -u32 weight_offset; // Offset in f16 units -u32 weight_count; // Number of f16 weights -``` +See `doc/CNN_V2_BINARY_FORMAT.md` for complete specification. -**Weights (variable):** -- Packed f16 pairs as u32 (lo 16 bits, hi 16 bits) -- Sequential storage: [layer0_weights][layer1_weights]... +**Quick Summary:** +- Header: 16 bytes (magic, version, layer count, total weights) +- Layer info: 20 bytes × N (kernel size, channels, offsets) +- Weights: Packed f16 pairs as u32 --- diff --git a/tools/cnn_v2_test/index.html b/tools/cnn_v2_test/index.html index bfc91c5..9ce3d8c 100644 --- a/tools/cnn_v2_test/index.html +++ b/tools/cnn_v2_test/index.html @@ -669,7 +669,8 @@ class CNNTester { let html = ` <div style="margin-bottom: 12px;"> <div><strong>File Size:</strong> ${(fileSize / 1024).toFixed(2)} KB</div> - <div><strong>Layers:</strong> ${layers.length}</div> + <div><strong>CNN Layers:</strong> ${layers.length}</div> + <div style="font-size: 9px; color: #808080; margin-top: 4px;">Static features (input) + ${layers.length} conv layers</div> </div> <table> <thead> @@ -684,11 +685,12 @@ class CNNTester { <tbody> `; + // Display layers as "Layer 1", "Layer 2", etc. (matching visualization button labels) for (let i = 0; i < layers.length; i++) { const l = layers[i]; html += ` <tr> - <td>${i + 1}</td> + <td>Layer ${i + 1}</td> <td>${l.inChannels}→${l.outChannels} (${l.kernelSize}×${l.kernelSize})</td> <td>${l.weightCount}</td> <td>${l.min.toFixed(3)}</td> @@ -1011,9 +1013,12 @@ class CNNTester { </div> `; + html += '<div style="font-size: 9px; color: #808080; margin-bottom: 8px; padding-bottom: 8px; border-bottom: 1px solid #404040;">Static features (7D input) + ${this.weights.layers.length} CNN layers. Showing first 4 of 8 channels.</div>'; + html += '<div class="layer-buttons">'; for (let i = 0; i < this.layerOutputs.length; i++) { - const label = i === 0 ? 'Static (L0)' : `Layer ${i}`; + // Visualization layers: Static features (i=0), CNN Layer 1 (i=1), CNN Layer 2 (i=2), ... + const label = i === 0 ? 'Static' : `Layer ${i}`; html += `<button onclick="tester.visualizeLayer(${i})" id="layerBtn${i}">${label}</button>`; } html += '</div>'; @@ -1084,7 +1089,10 @@ class CNNTester { // Check mode if (this.vizMode === 'weights' && layerIdx > 0) { - this.visualizeWeights(layerIdx - 1); // Layer 1 → weights.layers[0] + // Map visualization layer to weight array index + // Visualization Layer 1 (layerIdx=1) → weights.layers[0] (CNN Layer 1 weights) + // Visualization Layer 2 (layerIdx=2) → weights.layers[1] (CNN Layer 2 weights) + this.visualizeWeights(layerIdx - 1); return; } @@ -1108,9 +1116,11 @@ class CNNTester { this.log(`Visualizing ${layerName} activations (${width}×${height})`); // Update channel labels based on layer type + // Static features: 8 channels total (R,G,B,D,UV_X,UV_Y,sin,bias), showing first 4 + // CNN layers: Up to 8 channels per layer, showing first 4 const channelLabels = layerIdx === 0 - ? ['R', 'G', 'B', 'D'] // Static features: RGBA (R,G,B,Depth,UV_X,UV_Y,sin,bias) - : ['Ch0', 'Ch1', 'Ch2', 'Ch3']; // CNN layers + ? ['Ch0 (R)', 'Ch1 (G)', 'Ch2 (B)', 'Ch3 (D)'] + : ['Ch0', 'Ch1', 'Ch2', 'Ch3']; for (let c = 0; c < 4; c++) { const label = document.getElementById(`channelLabel${c}`); @@ -1201,14 +1211,17 @@ class CNNTester { } visualizeWeights(cnnLayerIdx) { + // cnnLayerIdx is index into weights.layers[] array + // Display as "Layer N" where N = cnnLayerIdx + 1 (e.g., weights.layers[0] = Layer 1) const layer = this.weights.layers[cnnLayerIdx]; if (!layer) { - this.log(`Layer ${cnnLayerIdx} not found`, 'error'); + this.log(`CNN Layer ${cnnLayerIdx + 1} not found`, 'error'); return; } const { kernelSize, inChannels, outChannels, weightOffset, min, max } = layer; - this.log(`Visualizing Layer ${cnnLayerIdx + 1} weights: ${inChannels}→${outChannels}, ${kernelSize}×${kernelSize}, offset=${weightOffset}`); + const displayLayerNum = cnnLayerIdx + 1; + this.log(`Visualizing Layer ${displayLayerNum} weights: ${inChannels}→${outChannels}, ${kernelSize}×${kernelSize}, offset=${weightOffset}`); this.log(`Weight range: [${min.toFixed(3)}, ${max.toFixed(3)}]`); // Update channel labels to show output channels |
