summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/tests/util/test_ans.cc189
-rw-r--r--src/util/ans.cc215
-rw-r--r--src/util/ans.h58
-rw-r--r--src/util/asset_manager.cc44
-rw-r--r--src/util/asset_manager.h18
5 files changed, 519 insertions, 5 deletions
diff --git a/src/tests/util/test_ans.cc b/src/tests/util/test_ans.cc
new file mode 100644
index 0000000..108c46d
--- /dev/null
+++ b/src/tests/util/test_ans.cc
@@ -0,0 +1,189 @@
+// This file is part of the 64k demo project.
+// Tests for the ANS (rANS) entropy coder in util/ans.h.
+// Encoder is enabled in tests via ANS_ENABLE_ENCODER.
+
+#include "util/ans.h"
+
+#include <cassert>
+#include <cstdint>
+#include <cstdio>
+#include <cstring>
+#include <random>
+#include <string>
+#include <vector>
+
+namespace {
+
+bool RoundtripCheck(const std::vector<uint8_t>& input,
+ const uint32_t* initial_counts,
+ const char* label) {
+ std::vector<uint8_t> compressed;
+ if (!ans::Encode(input.data(), input.size(), &compressed, initial_counts)) {
+ fprintf(stderr, "[%s] Encode failed\n", label);
+ return false;
+ }
+ std::vector<uint8_t> decoded(input.size());
+ size_t decoded_size = 0;
+ if (!ans::Decode(compressed.data(), compressed.size(), decoded.data(),
+ decoded.size(), &decoded_size, initial_counts)) {
+ fprintf(stderr, "[%s] Decode failed\n", label);
+ return false;
+ }
+ if (decoded_size != input.size() ||
+ std::memcmp(decoded.data(), input.data(), input.size()) != 0) {
+ fprintf(stderr, "[%s] payload mismatch\n", label);
+ return false;
+ }
+ const double ratio =
+ input.empty() ? 0.0 : (double)compressed.size() / (double)input.size();
+ fprintf(stderr, "[%s] OK: %zu -> %zu bytes (%.3f x)\n", label, input.size(),
+ compressed.size(), ratio);
+ return true;
+}
+
+// Covers: empty / single byte / single-symbol run / all-zeros / random uniform
+// / random skewed / repeated ASCII. Each is a roundtrip with default (uniform)
+// initial counts.
+void TestRoundtripVariants() {
+ std::mt19937 rng_uniform(12345);
+ std::vector<uint8_t> random_uniform(64 * 1024);
+ for (auto& b : random_uniform) b = (uint8_t)(rng_uniform() & 0xff);
+
+ std::mt19937 rng_skewed(67890);
+ std::vector<uint8_t> random_skewed(32 * 1024);
+ for (auto& b : random_skewed) {
+ // 90% byte 'A', 10% other ASCII letters.
+ const uint32_t r = rng_skewed();
+ b = (r % 10 == 0) ? (uint8_t)('B' + (r >> 8) % 25) : (uint8_t)'A';
+ }
+
+ const char* ascii =
+ "@group(0) @binding(0) var smplr: sampler;\n"
+ "@group(0) @binding(1) var tex: texture_2d<f32>;\n"
+ "@fragment fn fs_main(@builtin(position) p: vec4f) -> @location(0) "
+ "vec4f {\n"
+ " let uv = p.xy / vec2f(1280.0, 720.0);\n"
+ " return textureSample(tex, smplr, uv);\n"
+ "}\n";
+ std::vector<uint8_t> ascii_block;
+ for (int i = 0; i < 50; ++i) { // cross chunk boundary
+ ascii_block.insert(ascii_block.end(), ascii, ascii + std::strlen(ascii));
+ }
+
+ struct Case {
+ const char* label;
+ std::vector<uint8_t> data;
+ };
+ const Case cases[] = {
+ {"empty", {}},
+ {"single-byte", {0x42}},
+ {"single-symbol-run", std::vector<uint8_t>(4096, 'A')},
+ {"all-zeros", std::vector<uint8_t>(8192, 0)},
+ {"random-uniform", random_uniform},
+ {"random-skewed", random_skewed},
+ {"ascii-shader-text", ascii_block},
+ };
+ for (const Case& c : cases) {
+ assert(RoundtripCheck(c.data, nullptr, c.label));
+ }
+}
+
+// Seeded initial counts should compress a matching corpus at least as well
+// as uniform init, and roundtrip identically.
+void TestSeededInitialCounts() {
+ std::vector<uint8_t> corpus;
+ const char* sample = "fn main() { let x = vec3f(1.0, 2.0, 3.0); }\n";
+ for (int i = 0; i < 200; ++i) {
+ corpus.insert(corpus.end(), sample, sample + std::strlen(sample));
+ }
+ uint32_t hist[256] = {};
+ ans::Histogram(corpus.data(), corpus.size(), hist);
+
+ std::vector<uint8_t> seeded_out, uniform_out;
+ assert(ans::Encode(corpus.data(), corpus.size(), &seeded_out, hist));
+ assert(ans::Encode(corpus.data(), corpus.size(), &uniform_out, nullptr));
+
+ std::vector<uint8_t> decoded(corpus.size());
+ size_t decoded_size = 0;
+ assert(ans::Decode(seeded_out.data(), seeded_out.size(), decoded.data(),
+ decoded.size(), &decoded_size, hist));
+ assert(decoded_size == corpus.size());
+ assert(std::memcmp(decoded.data(), corpus.data(), corpus.size()) == 0);
+
+ fprintf(stderr, "[seeded-vs-uniform] seeded=%zu uniform=%zu raw=%zu\n",
+ seeded_out.size(), uniform_out.size(), corpus.size());
+ assert(seeded_out.size() <= uniform_out.size());
+}
+
+// The chunk-end state check must reject: a mismatched model (decoder uses a
+// different initial distribution), a single bit-flip in the payload, and a
+// truncated stream.
+void TestRejection() {
+ std::mt19937 rng(1);
+
+ // 1) Mismatched models.
+ {
+ std::vector<uint8_t> v(4096);
+ for (auto& b : v) b = (uint8_t)('a' + (rng() % 26));
+ uint32_t hist[256] = {};
+ ans::Histogram(v.data(), v.size(), hist);
+
+ std::vector<uint8_t> encoded;
+ assert(ans::Encode(v.data(), v.size(), &encoded, hist));
+
+ std::vector<uint8_t> decoded(v.size());
+ size_t decoded_size = 0;
+ assert(!ans::Decode(encoded.data(), encoded.size(), decoded.data(),
+ decoded.size(), &decoded_size, nullptr));
+ fprintf(stderr, "[rejection] mismatched-counts OK\n");
+ }
+
+ // 2) Corruption.
+ {
+ std::vector<uint8_t> v(2048);
+ for (auto& b : v) b = (uint8_t)(rng() & 0xff);
+ std::vector<uint8_t> encoded;
+ assert(ans::Encode(v.data(), v.size(), &encoded, nullptr));
+ encoded[encoded.size() / 2] ^= 0x55; // flip a payload byte
+
+ std::vector<uint8_t> decoded(v.size());
+ size_t decoded_size = 0;
+ assert(!ans::Decode(encoded.data(), encoded.size(), decoded.data(),
+ decoded.size(), &decoded_size, nullptr));
+ fprintf(stderr, "[rejection] corruption OK\n");
+ }
+
+ // 3) Truncation.
+ {
+ std::vector<uint8_t> v(4096);
+ for (size_t i = 0; i < v.size(); ++i) v[i] = (uint8_t)i;
+ std::vector<uint8_t> encoded;
+ assert(ans::Encode(v.data(), v.size(), &encoded, nullptr));
+ encoded.resize(encoded.size() - 8);
+
+ std::vector<uint8_t> decoded(v.size());
+ size_t decoded_size = 0;
+ assert(!ans::Decode(encoded.data(), encoded.size(), decoded.data(),
+ decoded.size(), &decoded_size, nullptr));
+ fprintf(stderr, "[rejection] truncation OK\n");
+ }
+}
+
+void TestPeekSize() {
+ std::vector<uint8_t> v(1234, 'Q');
+ std::vector<uint8_t> encoded;
+ assert(ans::Encode(v.data(), v.size(), &encoded, nullptr));
+ assert(ans::PeekUncompressedSize(encoded.data(), encoded.size()) ==
+ v.size());
+}
+
+} // namespace
+
+int main() {
+ TestRoundtripVariants();
+ TestSeededInitialCounts();
+ TestRejection();
+ TestPeekSize();
+ fprintf(stderr, "All ANS tests passed.\n");
+ return 0;
+}
diff --git a/src/util/ans.cc b/src/util/ans.cc
new file mode 100644
index 0000000..aa79e4b
--- /dev/null
+++ b/src/util/ans.cc
@@ -0,0 +1,215 @@
+// This file is part of the 64k demo project.
+// Order-0 rANS coder. See ans.h for the bitstream format.
+
+#include "util/ans.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstring>
+
+namespace ans {
+
+namespace {
+
+// Per-chunk adaptive byte model. Probabilities sum to 1<<kBits after
+// normalize(); raw counts in 'stats' keep adapting across chunks.
+struct Stats {
+ uint32_t stats[kNumSymbols];
+ uint32_t cumul[kNumSymbols + 1];
+
+ // Seeds 'stats' from caller counts (or all-ones if null) and builds the
+ // normalized cumulative table. stats_to_cumul() floors zero counts, so
+ // 'init' doesn't have to.
+ void init(const uint32_t* initial_counts) {
+ if (initial_counts) {
+ std::memcpy(stats, initial_counts, sizeof(stats));
+ } else {
+ std::fill_n(stats, kNumSymbols, 1u);
+ }
+ normalize();
+ }
+
+ // Rebuilds 'cumul' from 'stats'. Every slot is floored at 1 so each
+ // symbol remains reachable even if it has been seen zero times.
+ void stats_to_cumul() {
+ uint32_t acc = 0;
+ for (int i = 0; i < kNumSymbols; ++i) {
+ cumul[i] = acc;
+ acc += stats[i] ? stats[i] : 1u;
+ }
+ cumul[kNumSymbols] = acc;
+ }
+
+ // Rescales 'cumul' so cumul[NUM_SYMBOLS] == (1 << kBits). Ceil-multiply
+ // preserves monotonicity and prevents any active slot from collapsing.
+ void normalize() {
+ stats_to_cumul();
+ const uint32_t target = 1u << kBits;
+ while (cumul[kNumSymbols] != target) {
+ const double ratio = (double)target / (double)cumul[kNumSymbols];
+ for (int i = 0; i <= kNumSymbols; ++i) {
+ cumul[i] = (uint32_t)std::ceil((double)cumul[i] * ratio);
+ }
+ }
+ }
+
+ // Decoder lookup: locate the slot containing 's' in [0, 1<<kBits).
+ // Returns the symbol, its probability, and the residual offset within
+ // the slot. Increments the symbol's count on the fly.
+ void decode_lookup(uint32_t s, uint8_t* sym, uint32_t* p, uint32_t* r) {
+ // upper_bound(s) - 1; cumul is strictly non-decreasing.
+ const uint32_t* it =
+ std::upper_bound(cumul, cumul + kNumSymbols + 1, s);
+ const int c = (int)(it - cumul) - 1;
+ stats[c] += 1;
+ *sym = (uint8_t)c;
+ *p = cumul[c + 1] - cumul[c];
+ *r = s - cumul[c];
+ }
+
+ // Encoder lookup: probability and base offset for symbol 'c'.
+ void encode_lookup(int c, uint32_t* p, uint32_t* base) {
+ stats[c] += 1;
+ *p = cumul[c + 1] - cumul[c];
+ *base = cumul[c];
+ }
+};
+
+// Big-endian primitive readers/writers.
+inline uint16_t ReadU16BE(const uint8_t* p) {
+ return (uint16_t)((p[0] << 8) | p[1]);
+}
+inline uint32_t ReadU32BE(const uint8_t* p) {
+ return ((uint32_t)p[0] << 24) | ((uint32_t)p[1] << 16) |
+ ((uint32_t)p[2] << 8) | (uint32_t)p[3];
+}
+
+#if defined(ANS_ENABLE_ENCODER)
+inline void AppendU16BE(std::vector<uint8_t>* dst, uint16_t v) {
+ dst->push_back((uint8_t)(v >> 8));
+ dst->push_back((uint8_t)(v & 0xff));
+}
+inline void AppendU32BE(std::vector<uint8_t>* dst, uint32_t v) {
+ dst->push_back((uint8_t)(v >> 24));
+ dst->push_back((uint8_t)(v >> 16));
+ dst->push_back((uint8_t)(v >> 8));
+ dst->push_back((uint8_t)(v & 0xff));
+}
+#endif
+
+} // namespace
+
+uint32_t PeekUncompressedSize(const uint8_t* src, size_t src_size) {
+ if (!src || src_size < 4) return 0;
+ return ReadU32BE(src);
+}
+
+bool Decode(const uint8_t* src, size_t src_size,
+ uint8_t* dst, size_t dst_capacity,
+ size_t* out_size,
+ const uint32_t* initial_counts) {
+ if (out_size) *out_size = 0;
+ if (!src || src_size < 4) return false;
+
+ const uint8_t* p = src;
+ const uint8_t* end = src + src_size;
+
+ const uint32_t output_size = ReadU32BE(p);
+ p += 4;
+ if (output_size > dst_capacity) return false;
+ if (output_size > 0 && !dst) return false;
+
+ Stats stats;
+ stats.init(initial_counts);
+
+ uint32_t t = 0;
+ while (t < output_size) {
+ const uint32_t chunk = std::min<uint32_t>(kChunkSize, output_size - t);
+
+ if (end - p < 4) return false;
+ uint32_t s = ReadU32BE(p);
+ p += 4;
+
+ for (uint32_t i = 0; i < chunk; ++i) {
+ if (s <= kMask) {
+ // Pull in one renorm word.
+ if (end - p < 2) return false;
+ s = (s << kBits) | (uint32_t)ReadU16BE(p);
+ p += 2;
+ }
+ uint8_t sym;
+ uint32_t proba, residual;
+ stats.decode_lookup(s & kMask, &sym, &proba, &residual);
+ dst[t + i] = sym;
+ s = proba * (s >> kBits) + residual;
+ }
+ // Final-state sanity check: catches stream corruption and model mismatch.
+ if (s != kInitState) return false;
+ stats.normalize();
+ t += chunk;
+ }
+
+ if (out_size) *out_size = output_size;
+ return true;
+}
+
+#if defined(ANS_ENABLE_ENCODER)
+
+void Histogram(const uint8_t* src, size_t size, uint32_t* out_counts) {
+ if (!src || !out_counts) return;
+ for (size_t i = 0; i < size; ++i) out_counts[src[i]] += 1;
+}
+
+bool Encode(const uint8_t* src, size_t size,
+ std::vector<uint8_t>* dst,
+ const uint32_t* initial_counts) {
+ if (!dst) return false;
+ dst->clear();
+ if (size > 0xffffffffu) return false; // header is u32
+
+ AppendU32BE(dst, (uint32_t)size);
+ if (size == 0) return true;
+
+ Stats stats;
+ stats.init(initial_counts);
+
+ uint16_t tmp[kChunkSize];
+
+ size_t t = 0;
+ while (t < size) {
+ // Initial state is the bare signature (not shifted): the decoder ends
+ // each chunk with s == kInitState, so symmetry requires the encoder to
+ // start in the same low-state. Starting with (kInitState << kBits) would
+ // force a spurious renorm at iter 0 that the decoder never consumes,
+ // corrupting any stream with more than one chunk.
+ uint32_t s = kInitState;
+ const size_t chunk = std::min<size_t>(kChunkSize, size - t);
+ size_t pos = chunk;
+ // Encode the chunk in reverse so the decoder consumes symbols in order.
+ for (size_t k = chunk; k-- > 0;) {
+ const uint8_t sym = src[t + k];
+ uint32_t proba, base;
+ stats.encode_lookup(sym, &proba, &base);
+ if (s >= (proba << kBits)) {
+ // Flush low word and shift down to keep s/proba bounded in u32.
+ --pos;
+ tmp[pos] = (uint16_t)(s & kMask);
+ s >>= kBits;
+ }
+ s = ((s / proba) << kBits) + (s % proba) + base;
+ }
+ // Invariant: final state must be > kMask so the decoder's first read is
+ // a valid renormalized state.
+ if (s <= kMask) return false;
+ AppendU32BE(dst, s);
+ for (size_t k = pos; k < chunk; ++k) AppendU16BE(dst, tmp[k]);
+
+ stats.normalize();
+ t += chunk;
+ }
+ return true;
+}
+
+#endif // ANS_ENABLE_ENCODER
+
+} // namespace ans
diff --git a/src/util/ans.h b/src/util/ans.h
new file mode 100644
index 0000000..53c34b1
--- /dev/null
+++ b/src/util/ans.h
@@ -0,0 +1,58 @@
+// This file is part of the 64k demo project.
+// Asymmetric Numeral System (rANS) order-0 entropy coder.
+// Decoder is always built; encoder is gated by ANS_ENABLE_ENCODER.
+// Bitstream is big-endian and chunk-adaptive; see the per-chunk format in
+// the implementation file.
+
+#ifndef ANS_H_
+#define ANS_H_
+
+#include <cstddef>
+#include <cstdint>
+
+#if defined(ANS_ENABLE_ENCODER)
+#include <vector>
+#endif
+
+namespace ans {
+
+// Fixed parameters; must match the encoder/decoder on both ends.
+constexpr int kBits = 16;
+constexpr uint32_t kMask = (1u << kBits) - 1u;
+constexpr int kNumSymbols = 256;
+constexpr int kChunkSize = 1024;
+// Initial state per chunk. Doubles as a chunk-end integrity check: any
+// decoder/encoder model divergence drives the state away from this constant,
+// which is verified after every chunk.
+constexpr uint32_t kInitState = 1u << kBits;
+
+// Reads the original (uncompressed) size from a bitstream header.
+// Returns 0 on malformed input.
+uint32_t PeekUncompressedSize(const uint8_t* src, size_t src_size);
+
+// Decodes 'src_size' bytes from 'src' into 'dst' (capacity 'dst_capacity').
+// 'initial_counts' (256 entries) seeds the per-chunk adaptive model; pass
+// nullptr for the uniform default (all-ones).
+// Returns true on success and writes the decoded size to '*out_size'.
+bool Decode(const uint8_t* src, size_t src_size,
+ uint8_t* dst, size_t dst_capacity,
+ size_t* out_size,
+ const uint32_t* initial_counts = nullptr);
+
+#if defined(ANS_ENABLE_ENCODER)
+// Encodes 'src[0..size]' into '*dst' (cleared and re-filled).
+// 'initial_counts' has the same semantics as in Decode().
+// Returns true on success.
+bool Encode(const uint8_t* src, size_t size,
+ std::vector<uint8_t>* dst,
+ const uint32_t* initial_counts = nullptr);
+
+// Computes a byte histogram over 'src[0..size]', accumulating into
+// 'out_counts[256]' (caller must zero-initialize before the first call).
+// Useful for deriving a corpus-wide initial distribution from many files.
+void Histogram(const uint8_t* src, size_t size, uint32_t* out_counts);
+#endif // ANS_ENABLE_ENCODER
+
+} // namespace ans
+
+#endif // ANS_H_
diff --git a/src/util/asset_manager.cc b/src/util/asset_manager.cc
index 26a82bf..264ddda 100644
--- a/src/util/asset_manager.cc
+++ b/src/util/asset_manager.cc
@@ -2,6 +2,7 @@
// It implements the generic asset retrieval logic with runtime caching.
#include "util/asset_manager.h"
+#include "util/ans.h"
#include "util/asset_manager_utils.h"
#include "util/check_return.h"
#if defined(USE_TEST_ASSETS)
@@ -114,6 +115,34 @@ const uint8_t* GetAsset(AssetId asset_id, size_t* out_size) {
AssetRecord cached_record = source_record;
+ if (source_record.compression == AssetCompression::ANS_ASCII) {
+ // Decompress on first access. Buffer is null-terminated for C-string use.
+ const size_t uncomp = source_record.uncompressed_size;
+ uint8_t* buffer = new (std::nothrow) uint8_t[uncomp + 1];
+ CHECK_RETURN_BEGIN(!buffer, nullptr)
+ if (out_size) *out_size = 0;
+ ERROR_MSG("Failed to allocate buffer for ANS-compressed asset id=%u",
+ index);
+ return nullptr;
+ CHECK_RETURN_END
+ size_t got = 0;
+ if (!ans::Decode(source_record.data, source_record.size, buffer, uncomp,
+ &got, GetAnsAsciiHistogram()) ||
+ got != uncomp) {
+ delete[] buffer;
+ if (out_size) *out_size = 0;
+ ERROR_MSG("ANS decode failed for asset id=%u", index);
+ return nullptr;
+ }
+ buffer[uncomp] = 0;
+ cached_record.data = buffer;
+ cached_record.size = uncomp;
+ cached_record.uncompressed_size = uncomp;
+ g_asset_cache[index] = cached_record;
+ if (out_size) *out_size = uncomp;
+ return buffer;
+ }
+
if (source_record.type == AssetType::PROC ||
source_record.type == AssetType::PROC_GPU) {
// Dynamically generate the asset
@@ -224,6 +253,12 @@ void DropAsset(AssetId asset_id, const uint8_t* asset) {
delete[] g_asset_cache[index].data;
g_asset_cache[index] = {}; // Zero out the struct to force re-generation
}
+ // Heap-allocated decompressed buffer (compression != NONE): cache owns it.
+ if (g_asset_cache[index].data == asset &&
+ g_asset_cache[index].compression != AssetCompression::NONE) {
+ delete[] g_asset_cache[index].data;
+ g_asset_cache[index] = {};
+ }
#if !defined(STRIP_ALL)
if (g_asset_cache[index].data == asset &&
(g_asset_cache[index].type == AssetType::SPEC ||
@@ -245,10 +280,11 @@ bool ReloadAssetsFromFile(const char* config_path) {
// Clear cache to force reload
for (size_t i = 0; i < (size_t)AssetId::ASSET_LAST_ID; ++i) {
- if ((g_asset_cache[i].type == AssetType::PROC ||
- g_asset_cache[i].type == AssetType::PROC_GPU) &&
- g_asset_cache[i].data) {
- delete[] g_asset_cache[i].data;
+ const AssetRecord& e = g_asset_cache[i];
+ if (e.data &&
+ (e.type == AssetType::PROC || e.type == AssetType::PROC_GPU ||
+ e.compression != AssetCompression::NONE)) {
+ delete[] e.data;
}
g_asset_cache[i] = {};
}
diff --git a/src/util/asset_manager.h b/src/util/asset_manager.h
index 786a8db..f6ad244 100644
--- a/src/util/asset_manager.h
+++ b/src/util/asset_manager.h
@@ -16,10 +16,21 @@ enum class AssetType : uint8_t {
PROC_GPU,
};
+// Compression scheme applied to the packed bytes (only relevant for static
+// embedded assets; disk-load and procedural assets are always NONE).
+enum class AssetCompression : uint8_t {
+ NONE = 0,
+ // Order-0 rANS, model seeded from a corpus-wide histogram embedded in the
+ // generated assets blob (see GetAnsAsciiHistogram). Used for WGSL.
+ ANS_ASCII = 1,
+};
+
struct AssetRecord {
const uint8_t* data; // Pointer to asset data (static or dynamic)
- size_t size; // Size of the asset data
+ size_t size; // Size of 'data' in bytes (compressed size if any)
AssetType type;
+ AssetCompression compression; // How 'data' is encoded; NONE = raw bytes
+ size_t uncompressed_size; // Size after decompression (== size if NONE)
const char* proc_func_name_str; // Name of procedural generation function
// (string literal)
const float* proc_params; // Parameters for procedural generation (static,
@@ -27,6 +38,11 @@ struct AssetRecord {
int num_proc_params; // Number of procedural parameters
};
+// Initial-state histogram (256 entries) used to seed ANS_ASCII compression.
+// Defined in the generated assets blob; computed at pack time over the WGSL
+// corpus.
+const uint32_t* GetAnsAsciiHistogram();
+
// Generic interface
// Retrieves a pointer to the asset data.
// - Static assets are guaranteed to be 16-byte aligned.