From 6ef8f578817ee0134fd5867ca3b80590e3eb2368 Mon Sep 17 00:00:00 2001 From: skal Date: Thu, 14 May 2026 19:09:39 +0200 Subject: ans: order-0 rANS coder + WGSL asset compression Adds src/util/ans.{h,cc}, a per-chunk-adaptive order-0 rANS entropy coder. Decoder is always built; encoder is gated on ANS_ENABLE_ENCODER (tools only). Both sides take an optional 256-entry initial_counts table to seed the adaptive model. The per-chunk initial state is (1 << kBits). Higher initial states (e.g. with a signature packed into the upper bits) force a renorm-emit at iter 0 that the decoder never consumes, corrupting multi-chunk streams once stats become skewed. Asset pipeline: - AssetRecord gains 'compression' and 'uncompressed_size' fields. - asset_packer scans every WGSL file to build a corpus-wide byte histogram, then ANS-encodes each shader using that histogram as the seed. Histogram and accessor are emitted alongside the asset table. Round-trip verification runs at pack time for every compressed asset; failures fall back to uncompressed storage. - asset_manager decompresses on first GetAsset(), caches the heap-allocated buffer, and DropAsset / ReloadAssetsFromFile free it along with the procedural cache. - Disk-load (dev) builds are unchanged: WGSL paths stay as filenames. Tests: - src/tests/util/test_ans.cc: roundtrip variants (empty, single byte, single-symbol run, all-zeros, random uniform/skewed, repeated ASCII), seeded-vs-uniform compression, rejection of mismatched counts / corruption / truncation, PeekUncompressedSize. - 37/37 dev, 36/36 STRIP_ALL. Compression observed: WGSL shaders shrink to ~0.62-0.71x in the main workspace (81 of 105 assets qualify). Docs: - doc/ANS.md (new): algorithm, bitstream, API, asset pipeline integration, compression numbers, limitations, tests. - doc/ASSET_SYSTEM.md: new Compression section + updated technical guarantees for compressed assets. - doc/COMPLETED.md: May 2026 entry. - PROJECT_CONTEXT.md: Build status line mentions WGSL ANS compression. - CLAUDE.md, GEMINI.md: tier-3 build doc list includes ANS.md. --- tools/asset_packer.cc | 121 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 105 insertions(+), 16 deletions(-) (limited to 'tools/asset_packer.cc') diff --git a/tools/asset_packer.cc b/tools/asset_packer.cc index df876be..6162f19 100644 --- a/tools/asset_packer.cc +++ b/tools/asset_packer.cc @@ -23,6 +23,7 @@ #include "stb_image.h" #include "procedural/generator.h" // For ProcGenFunc and procedural functions +#include "util/ans.h" // ANS compression for WGSL assets #include "util/asset_manager.h" // For AssetRecord and AssetId // Map of procedural function names to their pointers (used only internally by @@ -81,6 +82,11 @@ struct AssetBuildInfo { std::string data_array_name; // ASSET_DATA_xxx for static std::string params_array_name; // ASSET_PROC_PARAMS_xxx for procedural std::string func_name_str_name; // ASSET_PROC_FUNC_STR_xxx for procedural + + // Set during the per-asset emit step (only for embedded data, not + // disk-load and not procedural). + std::string compression = "NONE"; // "NONE" | "ANS_ASCII" + size_t uncompressed_size = 0; // 0 when 'compression' == "NONE" }; static bool ParseProceduralFunction(const std::string& compression_type_str, @@ -339,6 +345,41 @@ static bool ProcessImageFile(const std::string& full_path, return true; } +// ANS-compress 'raw' with the seeded histogram and round-trip verify the +// payload. Returns true on success and writes the compressed bytes to '*out'. +// Returns false (without populating *out) if encoding fails, the compressed +// payload is not smaller, or the round-trip mismatches. +static bool TryAnsCompress(const std::vector& raw, + const uint32_t* hist, + std::vector* out) { + if (raw.empty()) return false; + std::vector enc; + if (!ans::Encode(raw.data(), raw.size(), &enc, hist)) return false; + if (enc.size() >= raw.size()) return false; + std::vector verify(raw.size()); + size_t got = 0; + if (!ans::Decode(enc.data(), enc.size(), verify.data(), verify.size(), &got, + hist) || + got != raw.size() || + std::memcmp(verify.data(), raw.data(), raw.size()) != 0) { + return false; + } + *out = std::move(enc); + return true; +} + +// Emits a comma-separated list of values as a C array initializer, wrapping +// at 12 entries per line. +template +static void EmitArrayInit(FILE* f, const T* data, size_t n, FormatFn fmt) { + for (size_t i = 0; i < n; ++i) { + if (i % 12 == 0) fprintf(f, "\n "); + fmt(f, data[i]); + if (i + 1 != n) fprintf(f, ", "); + } + fprintf(f, "\n"); +} + int main(int argc, char* argv[]) { if (argc < 4) { fprintf(stderr, @@ -482,7 +523,37 @@ int main(int argc, char* argv[]) { std::fclose(assets_h_file); - for (const auto& info : asset_build_infos) { + // --------------------------------------------------------------------- + // Pre-pass: build a corpus-wide byte histogram from all WGSL assets to + // seed the ANS coder. Skipped in disk-load mode (WGSL data is not + // embedded then, so we never run the encoder). + // --------------------------------------------------------------------- + uint32_t ans_ascii_hist[256] = {}; + if (!disk_load_mode) { + for (const auto& info : asset_build_infos) { + if (info.asset_type != "WGSL") continue; + std::string base_dir = + assets_txt_path.substr(0, assets_txt_path.find_last_of("/\\") + 1); + std::filesystem::path p = std::filesystem::absolute(base_dir) / info.filename; + std::ifstream f(p.lexically_normal().string(), std::ios::binary); + if (!f.is_open()) continue; + std::vector buf((std::istreambuf_iterator(f)), + std::istreambuf_iterator()); + ans::Histogram(buf.data(), buf.size(), ans_ascii_hist); + } + } + + fprintf(assets_data_cc_file, + "// Per-corpus byte histogram, seed for ANS_ASCII decompression.\n"); + fprintf(assets_data_cc_file, + "static const uint32_t kAnsAsciiHistogram[256] = {"); + EmitArrayInit(assets_data_cc_file, ans_ascii_hist, 256, + [](FILE* f, uint32_t v) { fprintf(f, "%u", v); }); + fprintf(assets_data_cc_file, "};\n"); + fprintf(assets_data_cc_file, + "const uint32_t* GetAnsAsciiHistogram() { return kAnsAsciiHistogram; }\n\n"); + + for (auto& info : asset_build_infos) { if (info.asset_type != "PROC" && info.asset_type != "PROC_GPU") { std::string base_dir = assets_txt_path.substr(0, assets_txt_path.find_last_of("/\\") + 1); @@ -526,21 +597,35 @@ int main(int argc, char* argv[]) { std::istreambuf_iterator()); } - size_t original_size = buffer.size(); - buffer.push_back(0); // Null terminator for safety + const size_t original_size = buffer.size(); + + // ANS-compress WGSL (ASCII text) using the corpus histogram. + // Compressed payload replaces the raw buffer; we don't null-terminate + // compressed blobs since the runtime decoder writes NUL itself. + std::vector compressed; + const bool use_ans = + (info.asset_type == "WGSL") && + TryAnsCompress(buffer, ans_ascii_hist, &compressed); + if (use_ans) { + info.compression = "ANS_ASCII"; + info.uncompressed_size = original_size; + printf(" ANS %-32s %7zu -> %7zu (%.2f x)\n", info.name.c_str(), + original_size, compressed.size(), + (double)compressed.size() / (double)original_size); + } else { + buffer.push_back(0); // null-terminate raw assets + } + const std::vector& payload = use_ans ? compressed : buffer; fprintf(assets_data_cc_file, "const size_t ASSET_SIZE_%s = %zu;\n", - info.name.c_str(), original_size); + info.name.c_str(), + use_ans ? payload.size() : original_size); fprintf(assets_data_cc_file, - "alignas(16) static const uint8_t %s[] = {\n ", + "alignas(16) static const uint8_t %s[] = {", info.data_array_name.c_str()); - for (size_t i = 0; i < buffer.size(); ++i) { - if (i > 0 && i % 12 == 0) - fprintf(assets_data_cc_file, "\n "); - fprintf(assets_data_cc_file, "0x%02x%s", buffer[i], - (i == buffer.size() - 1 ? "" : ", ")); - } - fprintf(assets_data_cc_file, "\n};\n"); + EmitArrayInit(assets_data_cc_file, payload.data(), payload.size(), + [](FILE* f, uint8_t v) { fprintf(f, "0x%02x", v); }); + fprintf(assets_data_cc_file, "};\n"); } } else { fprintf(assets_data_cc_file, "static const float %s[] = {", @@ -561,15 +646,19 @@ int main(int argc, char* argv[]) { for (const auto& info : asset_build_infos) { fprintf(assets_data_cc_file, " { "); if (info.asset_type == "PROC" || info.asset_type == "PROC_GPU") { - fprintf(assets_data_cc_file, "nullptr, 0, AssetType::%s, %s, %s, %zu", + // data, size, type, compression, uncompressed_size, proc_func, params, n + fprintf(assets_data_cc_file, + "nullptr, 0, AssetType::%s, AssetCompression::NONE, 0, " + "%s, %s, %zu", info.asset_type.c_str(), info.func_name_str_name.c_str(), info.params_array_name.c_str(), info.proc_params.size()); } else { fprintf(assets_data_cc_file, - "(const uint8_t*)%s, ASSET_SIZE_%s, AssetType::%s, nullptr, " - "nullptr, 0", + "(const uint8_t*)%s, ASSET_SIZE_%s, AssetType::%s, " + "AssetCompression::%s, %zu, nullptr, nullptr, 0", info.data_array_name.c_str(), info.name.c_str(), - info.asset_type.c_str()); + info.asset_type.c_str(), info.compression.c_str(), + info.uncompressed_size); } fprintf(assets_data_cc_file, " },\n"); } -- cgit v1.2.3