summaryrefslogtreecommitdiff
path: root/src/util/ans.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/util/ans.h')
-rw-r--r--src/util/ans.h58
1 files changed, 58 insertions, 0 deletions
diff --git a/src/util/ans.h b/src/util/ans.h
new file mode 100644
index 0000000..53c34b1
--- /dev/null
+++ b/src/util/ans.h
@@ -0,0 +1,58 @@
+// This file is part of the 64k demo project.
+// Asymmetric Numeral System (rANS) order-0 entropy coder.
+// Decoder is always built; encoder is gated by ANS_ENABLE_ENCODER.
+// Bitstream is big-endian and chunk-adaptive; see the per-chunk format in
+// the implementation file.
+
+#ifndef ANS_H_
+#define ANS_H_
+
+#include <cstddef>
+#include <cstdint>
+
+#if defined(ANS_ENABLE_ENCODER)
+#include <vector>
+#endif
+
+namespace ans {
+
+// Fixed parameters; must match the encoder/decoder on both ends.
+constexpr int kBits = 16;
+constexpr uint32_t kMask = (1u << kBits) - 1u;
+constexpr int kNumSymbols = 256;
+constexpr int kChunkSize = 1024;
+// Initial state per chunk. Doubles as a chunk-end integrity check: any
+// decoder/encoder model divergence drives the state away from this constant,
+// which is verified after every chunk.
+constexpr uint32_t kInitState = 1u << kBits;
+
+// Reads the original (uncompressed) size from a bitstream header.
+// Returns 0 on malformed input.
+uint32_t PeekUncompressedSize(const uint8_t* src, size_t src_size);
+
+// Decodes 'src_size' bytes from 'src' into 'dst' (capacity 'dst_capacity').
+// 'initial_counts' (256 entries) seeds the per-chunk adaptive model; pass
+// nullptr for the uniform default (all-ones).
+// Returns true on success and writes the decoded size to '*out_size'.
+bool Decode(const uint8_t* src, size_t src_size,
+ uint8_t* dst, size_t dst_capacity,
+ size_t* out_size,
+ const uint32_t* initial_counts = nullptr);
+
+#if defined(ANS_ENABLE_ENCODER)
+// Encodes 'src[0..size]' into '*dst' (cleared and re-filled).
+// 'initial_counts' has the same semantics as in Decode().
+// Returns true on success.
+bool Encode(const uint8_t* src, size_t size,
+ std::vector<uint8_t>* dst,
+ const uint32_t* initial_counts = nullptr);
+
+// Computes a byte histogram over 'src[0..size]', accumulating into
+// 'out_counts[256]' (caller must zero-initialize before the first call).
+// Useful for deriving a corpus-wide initial distribution from many files.
+void Histogram(const uint8_t* src, size_t size, uint32_t* out_counts);
+#endif // ANS_ENABLE_ENCODER
+
+} // namespace ans
+
+#endif // ANS_H_