diff options
Diffstat (limited to 'src/util/ans.h')
| -rw-r--r-- | src/util/ans.h | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/src/util/ans.h b/src/util/ans.h new file mode 100644 index 0000000..53c34b1 --- /dev/null +++ b/src/util/ans.h @@ -0,0 +1,58 @@ +// This file is part of the 64k demo project. +// Asymmetric Numeral System (rANS) order-0 entropy coder. +// Decoder is always built; encoder is gated by ANS_ENABLE_ENCODER. +// Bitstream is big-endian and chunk-adaptive; see the per-chunk format in +// the implementation file. + +#ifndef ANS_H_ +#define ANS_H_ + +#include <cstddef> +#include <cstdint> + +#if defined(ANS_ENABLE_ENCODER) +#include <vector> +#endif + +namespace ans { + +// Fixed parameters; must match the encoder/decoder on both ends. +constexpr int kBits = 16; +constexpr uint32_t kMask = (1u << kBits) - 1u; +constexpr int kNumSymbols = 256; +constexpr int kChunkSize = 1024; +// Initial state per chunk. Doubles as a chunk-end integrity check: any +// decoder/encoder model divergence drives the state away from this constant, +// which is verified after every chunk. +constexpr uint32_t kInitState = 1u << kBits; + +// Reads the original (uncompressed) size from a bitstream header. +// Returns 0 on malformed input. +uint32_t PeekUncompressedSize(const uint8_t* src, size_t src_size); + +// Decodes 'src_size' bytes from 'src' into 'dst' (capacity 'dst_capacity'). +// 'initial_counts' (256 entries) seeds the per-chunk adaptive model; pass +// nullptr for the uniform default (all-ones). +// Returns true on success and writes the decoded size to '*out_size'. +bool Decode(const uint8_t* src, size_t src_size, + uint8_t* dst, size_t dst_capacity, + size_t* out_size, + const uint32_t* initial_counts = nullptr); + +#if defined(ANS_ENABLE_ENCODER) +// Encodes 'src[0..size]' into '*dst' (cleared and re-filled). +// 'initial_counts' has the same semantics as in Decode(). +// Returns true on success. +bool Encode(const uint8_t* src, size_t size, + std::vector<uint8_t>* dst, + const uint32_t* initial_counts = nullptr); + +// Computes a byte histogram over 'src[0..size]', accumulating into +// 'out_counts[256]' (caller must zero-initialize before the first call). +// Useful for deriving a corpus-wide initial distribution from many files. +void Histogram(const uint8_t* src, size_t size, uint32_t* out_counts); +#endif // ANS_ENABLE_ENCODER + +} // namespace ans + +#endif // ANS_H_ |
