TODO: 8-bit weight quantization for 2× size reduction

- Add QAT (quantization-aware training) notes - Requires training with fake quantization - Target: ~1.6 KB weights (vs 3.2 KB f16) - Shader unpacking needs adaptation (4× u8 per u32)
author: skal <pascal.massimino@gmail.com> 2026-02-12 12:11:53 +0100
committer: skal <pascal.massimino@gmail.com> 2026-02-12 12:11:53 +0100
commit: eaf0bd855306e70ca03f2d6579b4d6551aff6482 (patch)
tree: 62316af1143db1e59e1ad62e70b9844e324cda55 /workspaces
parent: e8344bc84ec0f571e5c5aafffe7c914abe226bd6 (diff)
1 files changed, 1 insertions, 0 deletions
diff --git a/workspaces/main/shaders/cnn_v2_compute.wgsl b/workspaces/main/shaders/cnn_v2_compute.wgsl
index f9eb556..b19a692 100644
--- a/workspaces/main/shaders/cnn_v2_compute.wgsl
+++ b/workspaces/main/shaders/cnn_v2_compute.wgsl
@@ -46,6 +46,7 @@ fn pack_channels(values: array<f32, 8>) -> vec4<u32> {
 
 // Get weight from storage buffer (f16 packed as u32 pairs)
 // Buffer layout: [header: 4 u32][layer_info: N×5 u32][weights: packed f16]
+// TODO: Support 8-bit quantized weights (4× per u32) for 2× size reduction
 fn get_weight(idx: u32) -> f32 {
   // Skip header (16 bytes = 4 u32) and layer info
   // Weights start after header + layer_info, but weight_offset already accounts for this
author	skal <pascal.massimino@gmail.com>	2026-02-12 12:11:53 +0100
committer	skal <pascal.massimino@gmail.com>	2026-02-12 12:11:53 +0100
commit	eaf0bd855306e70ca03f2d6579b4d6551aff6482 (patch)
tree	62316af1143db1e59e1ad62e70b9844e324cda55 /workspaces
parent	e8344bc84ec0f571e5c5aafffe7c914abe226bd6 (diff)