From 6ef8f578817ee0134fd5867ca3b80590e3eb2368 Mon Sep 17 00:00:00 2001 From: skal Date: Thu, 14 May 2026 19:09:39 +0200 Subject: ans: order-0 rANS coder + WGSL asset compression Adds src/util/ans.{h,cc}, a per-chunk-adaptive order-0 rANS entropy coder. Decoder is always built; encoder is gated on ANS_ENABLE_ENCODER (tools only). Both sides take an optional 256-entry initial_counts table to seed the adaptive model. The per-chunk initial state is (1 << kBits). Higher initial states (e.g. with a signature packed into the upper bits) force a renorm-emit at iter 0 that the decoder never consumes, corrupting multi-chunk streams once stats become skewed. Asset pipeline: - AssetRecord gains 'compression' and 'uncompressed_size' fields. - asset_packer scans every WGSL file to build a corpus-wide byte histogram, then ANS-encodes each shader using that histogram as the seed. Histogram and accessor are emitted alongside the asset table. Round-trip verification runs at pack time for every compressed asset; failures fall back to uncompressed storage. - asset_manager decompresses on first GetAsset(), caches the heap-allocated buffer, and DropAsset / ReloadAssetsFromFile free it along with the procedural cache. - Disk-load (dev) builds are unchanged: WGSL paths stay as filenames. Tests: - src/tests/util/test_ans.cc: roundtrip variants (empty, single byte, single-symbol run, all-zeros, random uniform/skewed, repeated ASCII), seeded-vs-uniform compression, rejection of mismatched counts / corruption / truncation, PeekUncompressedSize. - 37/37 dev, 36/36 STRIP_ALL. Compression observed: WGSL shaders shrink to ~0.62-0.71x in the main workspace (81 of 105 assets qualify). Docs: - doc/ANS.md (new): algorithm, bitstream, API, asset pipeline integration, compression numbers, limitations, tests. - doc/ASSET_SYSTEM.md: new Compression section + updated technical guarantees for compressed assets. - doc/COMPLETED.md: May 2026 entry. - PROJECT_CONTEXT.md: Build status line mentions WGSL ANS compression. - CLAUDE.md, GEMINI.md: tier-3 build doc list includes ANS.md. --- src/util/ans.cc | 215 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 215 insertions(+) create mode 100644 src/util/ans.cc (limited to 'src/util/ans.cc') diff --git a/src/util/ans.cc b/src/util/ans.cc new file mode 100644 index 0000000..aa79e4b --- /dev/null +++ b/src/util/ans.cc @@ -0,0 +1,215 @@ +// This file is part of the 64k demo project. +// Order-0 rANS coder. See ans.h for the bitstream format. + +#include "util/ans.h" + +#include +#include +#include + +namespace ans { + +namespace { + +// Per-chunk adaptive byte model. Probabilities sum to 1<* dst, uint16_t v) { + dst->push_back((uint8_t)(v >> 8)); + dst->push_back((uint8_t)(v & 0xff)); +} +inline void AppendU32BE(std::vector* dst, uint32_t v) { + dst->push_back((uint8_t)(v >> 24)); + dst->push_back((uint8_t)(v >> 16)); + dst->push_back((uint8_t)(v >> 8)); + dst->push_back((uint8_t)(v & 0xff)); +} +#endif + +} // namespace + +uint32_t PeekUncompressedSize(const uint8_t* src, size_t src_size) { + if (!src || src_size < 4) return 0; + return ReadU32BE(src); +} + +bool Decode(const uint8_t* src, size_t src_size, + uint8_t* dst, size_t dst_capacity, + size_t* out_size, + const uint32_t* initial_counts) { + if (out_size) *out_size = 0; + if (!src || src_size < 4) return false; + + const uint8_t* p = src; + const uint8_t* end = src + src_size; + + const uint32_t output_size = ReadU32BE(p); + p += 4; + if (output_size > dst_capacity) return false; + if (output_size > 0 && !dst) return false; + + Stats stats; + stats.init(initial_counts); + + uint32_t t = 0; + while (t < output_size) { + const uint32_t chunk = std::min(kChunkSize, output_size - t); + + if (end - p < 4) return false; + uint32_t s = ReadU32BE(p); + p += 4; + + for (uint32_t i = 0; i < chunk; ++i) { + if (s <= kMask) { + // Pull in one renorm word. + if (end - p < 2) return false; + s = (s << kBits) | (uint32_t)ReadU16BE(p); + p += 2; + } + uint8_t sym; + uint32_t proba, residual; + stats.decode_lookup(s & kMask, &sym, &proba, &residual); + dst[t + i] = sym; + s = proba * (s >> kBits) + residual; + } + // Final-state sanity check: catches stream corruption and model mismatch. + if (s != kInitState) return false; + stats.normalize(); + t += chunk; + } + + if (out_size) *out_size = output_size; + return true; +} + +#if defined(ANS_ENABLE_ENCODER) + +void Histogram(const uint8_t* src, size_t size, uint32_t* out_counts) { + if (!src || !out_counts) return; + for (size_t i = 0; i < size; ++i) out_counts[src[i]] += 1; +} + +bool Encode(const uint8_t* src, size_t size, + std::vector* dst, + const uint32_t* initial_counts) { + if (!dst) return false; + dst->clear(); + if (size > 0xffffffffu) return false; // header is u32 + + AppendU32BE(dst, (uint32_t)size); + if (size == 0) return true; + + Stats stats; + stats.init(initial_counts); + + uint16_t tmp[kChunkSize]; + + size_t t = 0; + while (t < size) { + // Initial state is the bare signature (not shifted): the decoder ends + // each chunk with s == kInitState, so symmetry requires the encoder to + // start in the same low-state. Starting with (kInitState << kBits) would + // force a spurious renorm at iter 0 that the decoder never consumes, + // corrupting any stream with more than one chunk. + uint32_t s = kInitState; + const size_t chunk = std::min(kChunkSize, size - t); + size_t pos = chunk; + // Encode the chunk in reverse so the decoder consumes symbols in order. + for (size_t k = chunk; k-- > 0;) { + const uint8_t sym = src[t + k]; + uint32_t proba, base; + stats.encode_lookup(sym, &proba, &base); + if (s >= (proba << kBits)) { + // Flush low word and shift down to keep s/proba bounded in u32. + --pos; + tmp[pos] = (uint16_t)(s & kMask); + s >>= kBits; + } + s = ((s / proba) << kBits) + (s % proba) + base; + } + // Invariant: final state must be > kMask so the decoder's first read is + // a valid renormalized state. + if (s <= kMask) return false; + AppendU32BE(dst, s); + for (size_t k = pos; k < chunk; ++k) AppendU16BE(dst, tmp[k]); + + stats.normalize(); + t += chunk; + } + return true; +} + +#endif // ANS_ENABLE_ENCODER + +} // namespace ans -- cgit v1.2.3