// This file is part of the 64k demo project. // Order-0 rANS coder. See ans.h for the bitstream format. #include "util/ans.h" #include #include #include namespace ans { namespace { // Per-chunk adaptive byte model. Probabilities sum to 1<* dst, uint16_t v) { dst->push_back((uint8_t)(v >> 8)); dst->push_back((uint8_t)(v & 0xff)); } inline void AppendU32BE(std::vector* dst, uint32_t v) { dst->push_back((uint8_t)(v >> 24)); dst->push_back((uint8_t)(v >> 16)); dst->push_back((uint8_t)(v >> 8)); dst->push_back((uint8_t)(v & 0xff)); } #endif } // namespace uint32_t PeekUncompressedSize(const uint8_t* src, size_t src_size) { if (!src || src_size < 4) return 0; return ReadU32BE(src); } bool Decode(const uint8_t* src, size_t src_size, uint8_t* dst, size_t dst_capacity, size_t* out_size, const uint32_t* initial_counts) { if (out_size) *out_size = 0; if (!src || src_size < 4) return false; const uint8_t* p = src; const uint8_t* end = src + src_size; const uint32_t output_size = ReadU32BE(p); p += 4; if (output_size > dst_capacity) return false; if (output_size > 0 && !dst) return false; Stats stats; stats.init(initial_counts); uint32_t t = 0; while (t < output_size) { const uint32_t chunk = std::min(kChunkSize, output_size - t); if (end - p < 4) return false; uint32_t s = ReadU32BE(p); p += 4; for (uint32_t i = 0; i < chunk; ++i) { if (s <= kMask) { // Pull in one renorm word. if (end - p < 2) return false; s = (s << kBits) | (uint32_t)ReadU16BE(p); p += 2; } uint8_t sym; uint32_t proba, residual; stats.decode_lookup(s & kMask, &sym, &proba, &residual); dst[t + i] = sym; s = proba * (s >> kBits) + residual; } // Final-state sanity check: catches stream corruption and model mismatch. if (s != kInitState) return false; stats.normalize(); t += chunk; } if (out_size) *out_size = output_size; return true; } #if defined(ANS_ENABLE_ENCODER) void Histogram(const uint8_t* src, size_t size, uint32_t* out_counts) { if (!src || !out_counts) return; for (size_t i = 0; i < size; ++i) out_counts[src[i]] += 1; } bool Encode(const uint8_t* src, size_t size, std::vector* dst, const uint32_t* initial_counts) { if (!dst) return false; dst->clear(); if (size > 0xffffffffu) return false; // header is u32 AppendU32BE(dst, (uint32_t)size); if (size == 0) return true; Stats stats; stats.init(initial_counts); uint16_t tmp[kChunkSize]; size_t t = 0; while (t < size) { // Initial state is the bare signature (not shifted): the decoder ends // each chunk with s == kInitState, so symmetry requires the encoder to // start in the same low-state. Starting with (kInitState << kBits) would // force a spurious renorm at iter 0 that the decoder never consumes, // corrupting any stream with more than one chunk. uint32_t s = kInitState; const size_t chunk = std::min(kChunkSize, size - t); size_t pos = chunk; // Encode the chunk in reverse so the decoder consumes symbols in order. for (size_t k = chunk; k-- > 0;) { const uint8_t sym = src[t + k]; uint32_t proba, base; stats.encode_lookup(sym, &proba, &base); if (s >= (proba << kBits)) { // Flush low word and shift down to keep s/proba bounded in u32. --pos; tmp[pos] = (uint16_t)(s & kMask); s >>= kBits; } s = ((s / proba) << kBits) + (s % proba) + base; } // Invariant: final state must be > kMask so the decoder's first read is // a valid renormalized state. if (s <= kMask) return false; AppendU32BE(dst, s); for (size_t k = pos; k < chunk; ++k) AppendU16BE(dst, tmp[k]); stats.normalize(); t += chunk; } return true; } #endif // ANS_ENABLE_ENCODER } // namespace ans