summaryrefslogtreecommitdiff
path: root/src/util/ans.h
blob: 53c34b109c313bde6b4145e7530c5e98419d85d3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
// This file is part of the 64k demo project.
// Asymmetric Numeral System (rANS) order-0 entropy coder.
// Decoder is always built; encoder is gated by ANS_ENABLE_ENCODER.
// Bitstream is big-endian and chunk-adaptive; see the per-chunk format in
// the implementation file.

#ifndef ANS_H_
#define ANS_H_

#include <cstddef>
#include <cstdint>

#if defined(ANS_ENABLE_ENCODER)
#include <vector>
#endif

namespace ans {

// Fixed parameters; must match the encoder/decoder on both ends.
constexpr int kBits = 16;
constexpr uint32_t kMask = (1u << kBits) - 1u;
constexpr int kNumSymbols = 256;
constexpr int kChunkSize = 1024;
// Initial state per chunk. Doubles as a chunk-end integrity check: any
// decoder/encoder model divergence drives the state away from this constant,
// which is verified after every chunk.
constexpr uint32_t kInitState = 1u << kBits;

// Reads the original (uncompressed) size from a bitstream header.
// Returns 0 on malformed input.
uint32_t PeekUncompressedSize(const uint8_t* src, size_t src_size);

// Decodes 'src_size' bytes from 'src' into 'dst' (capacity 'dst_capacity').
// 'initial_counts' (256 entries) seeds the per-chunk adaptive model; pass
// nullptr for the uniform default (all-ones).
// Returns true on success and writes the decoded size to '*out_size'.
bool Decode(const uint8_t* src, size_t src_size,
            uint8_t* dst, size_t dst_capacity,
            size_t* out_size,
            const uint32_t* initial_counts = nullptr);

#if defined(ANS_ENABLE_ENCODER)
// Encodes 'src[0..size]' into '*dst' (cleared and re-filled).
// 'initial_counts' has the same semantics as in Decode().
// Returns true on success.
bool Encode(const uint8_t* src, size_t size,
            std::vector<uint8_t>* dst,
            const uint32_t* initial_counts = nullptr);

// Computes a byte histogram over 'src[0..size]', accumulating into
// 'out_counts[256]' (caller must zero-initialize before the first call).
// Useful for deriving a corpus-wide initial distribution from many files.
void Histogram(const uint8_t* src, size_t size, uint32_t* out_counts);
#endif  // ANS_ENABLE_ENCODER

}  // namespace ans

#endif  // ANS_H_