1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
|
// This file is part of the 64k demo project.
// Order-0 rANS coder. See ans.h for the bitstream format.
#include "util/ans.h"
#include <algorithm>
#include <cmath>
#include <cstring>
namespace ans {
namespace {
// Per-chunk adaptive byte model. Probabilities sum to 1<<kBits after
// normalize(); raw counts in 'stats' keep adapting across chunks.
struct Stats {
uint32_t stats[kNumSymbols];
uint32_t cumul[kNumSymbols + 1];
// Seeds 'stats' from caller counts (or all-ones if null) and builds the
// normalized cumulative table. stats_to_cumul() floors zero counts, so
// 'init' doesn't have to.
void init(const uint32_t* initial_counts) {
if (initial_counts) {
std::memcpy(stats, initial_counts, sizeof(stats));
} else {
std::fill_n(stats, kNumSymbols, 1u);
}
normalize();
}
// Rebuilds 'cumul' from 'stats'. Every slot is floored at 1 so each
// symbol remains reachable even if it has been seen zero times.
void stats_to_cumul() {
uint32_t acc = 0;
for (int i = 0; i < kNumSymbols; ++i) {
cumul[i] = acc;
acc += stats[i] ? stats[i] : 1u;
}
cumul[kNumSymbols] = acc;
}
// Rescales 'cumul' so cumul[NUM_SYMBOLS] == (1 << kBits). Ceil-multiply
// preserves monotonicity and prevents any active slot from collapsing.
void normalize() {
stats_to_cumul();
const uint32_t target = 1u << kBits;
while (cumul[kNumSymbols] != target) {
const double ratio = (double)target / (double)cumul[kNumSymbols];
for (int i = 0; i <= kNumSymbols; ++i) {
cumul[i] = (uint32_t)std::ceil((double)cumul[i] * ratio);
}
}
}
// Decoder lookup: locate the slot containing 's' in [0, 1<<kBits).
// Returns the symbol, its probability, and the residual offset within
// the slot. Increments the symbol's count on the fly.
void decode_lookup(uint32_t s, uint8_t* sym, uint32_t* p, uint32_t* r) {
// upper_bound(s) - 1; cumul is strictly non-decreasing.
const uint32_t* it = std::upper_bound(cumul, cumul + kNumSymbols + 1, s);
const int c = (int)(it - cumul) - 1;
stats[c] += 1;
*sym = (uint8_t)c;
*p = cumul[c + 1] - cumul[c];
*r = s - cumul[c];
}
// Encoder lookup: probability and base offset for symbol 'c'.
void encode_lookup(int c, uint32_t* p, uint32_t* base) {
stats[c] += 1;
*p = cumul[c + 1] - cumul[c];
*base = cumul[c];
}
};
// Big-endian primitive readers/writers.
inline uint16_t ReadU16BE(const uint8_t* p) {
return (uint16_t)((p[0] << 8) | p[1]);
}
inline uint32_t ReadU32BE(const uint8_t* p) {
return ((uint32_t)p[0] << 24) | ((uint32_t)p[1] << 16) |
((uint32_t)p[2] << 8) | (uint32_t)p[3];
}
#if defined(ANS_ENABLE_ENCODER)
inline void AppendU16BE(std::vector<uint8_t>* dst, uint16_t v) {
dst->push_back((uint8_t)(v >> 8));
dst->push_back((uint8_t)(v & 0xff));
}
inline void AppendU32BE(std::vector<uint8_t>* dst, uint32_t v) {
dst->push_back((uint8_t)(v >> 24));
dst->push_back((uint8_t)(v >> 16));
dst->push_back((uint8_t)(v >> 8));
dst->push_back((uint8_t)(v & 0xff));
}
#endif
} // namespace
uint32_t PeekUncompressedSize(const uint8_t* src, size_t src_size) {
if (!src || src_size < 4)
return 0;
return ReadU32BE(src);
}
bool Decode(const uint8_t* src, size_t src_size, uint8_t* dst,
size_t dst_capacity, size_t* out_size,
const uint32_t* initial_counts) {
if (out_size)
*out_size = 0;
if (!src || src_size < 4)
return false;
const uint8_t* p = src;
const uint8_t* end = src + src_size;
const uint32_t output_size = ReadU32BE(p);
p += 4;
if (output_size > dst_capacity)
return false;
if (output_size > 0 && !dst)
return false;
Stats stats;
stats.init(initial_counts);
uint32_t t = 0;
while (t < output_size) {
const uint32_t chunk = std::min<uint32_t>(kChunkSize, output_size - t);
if (end - p < 4)
return false;
uint32_t s = ReadU32BE(p);
p += 4;
for (uint32_t i = 0; i < chunk; ++i) {
if (s <= kMask) {
// Pull in one renorm word.
if (end - p < 2)
return false;
s = (s << kBits) | (uint32_t)ReadU16BE(p);
p += 2;
}
uint8_t sym;
uint32_t proba, residual;
stats.decode_lookup(s & kMask, &sym, &proba, &residual);
dst[t + i] = sym;
s = proba * (s >> kBits) + residual;
}
// Final-state sanity check: catches stream corruption and model mismatch.
if (s != kInitState)
return false;
stats.normalize();
t += chunk;
}
if (out_size)
*out_size = output_size;
return true;
}
#if defined(ANS_ENABLE_ENCODER)
void Histogram(const uint8_t* src, size_t size, uint32_t* out_counts) {
if (!src || !out_counts)
return;
for (size_t i = 0; i < size; ++i)
out_counts[src[i]] += 1;
}
bool Encode(const uint8_t* src, size_t size, std::vector<uint8_t>* dst,
const uint32_t* initial_counts) {
if (!dst)
return false;
dst->clear();
if (size > 0xffffffffu)
return false; // header is u32
AppendU32BE(dst, (uint32_t)size);
if (size == 0)
return true;
Stats stats;
stats.init(initial_counts);
uint16_t tmp[kChunkSize];
size_t t = 0;
while (t < size) {
// Initial state is the bare signature (not shifted): the decoder ends
// each chunk with s == kInitState, so symmetry requires the encoder to
// start in the same low-state. Starting with (kInitState << kBits) would
// force a spurious renorm at iter 0 that the decoder never consumes,
// corrupting any stream with more than one chunk.
uint32_t s = kInitState;
const size_t chunk = std::min<size_t>(kChunkSize, size - t);
size_t pos = chunk;
// Encode the chunk in reverse so the decoder consumes symbols in order.
for (size_t k = chunk; k-- > 0;) {
const uint8_t sym = src[t + k];
uint32_t proba, base;
stats.encode_lookup(sym, &proba, &base);
if (s >= (proba << kBits)) {
// Flush low word and shift down to keep s/proba bounded in u32.
--pos;
tmp[pos] = (uint16_t)(s & kMask);
s >>= kBits;
}
s = ((s / proba) << kBits) + (s % proba) + base;
}
// Invariant: final state must be > kMask so the decoder's first read is
// a valid renormalized state.
if (s <= kMask)
return false;
AppendU32BE(dst, s);
for (size_t k = pos; k < chunk; ++k)
AppendU16BE(dst, tmp[k]);
stats.normalize();
t += chunk;
}
return true;
}
#endif // ANS_ENABLE_ENCODER
} // namespace ans
|