summaryrefslogtreecommitdiff
path: root/src/tests/gpu/test_cnn_v3_parity.cc
diff options
context:
space:
mode:
authorskal <pascal.massimino@gmail.com>2026-05-21 08:10:47 +0200
committerskal <pascal.massimino@gmail.com>2026-05-21 08:10:47 +0200
commitd806027dcaeadcdd8d2febd88bc46b2fd2c465de (patch)
tree30bc1ef9f40ccab7c00e31ee20e62bb86755fa26 /src/tests/gpu/test_cnn_v3_parity.cc
parent680042a18c11ad5e58757e45b260745c2f52417f (diff)
style: apply clang-formatHEADmain
Diffstat (limited to 'src/tests/gpu/test_cnn_v3_parity.cc')
-rw-r--r--src/tests/gpu/test_cnn_v3_parity.cc194
1 files changed, 105 insertions, 89 deletions
diff --git a/src/tests/gpu/test_cnn_v3_parity.cc b/src/tests/gpu/test_cnn_v3_parity.cc
index 4fada5d..9663b09 100644
--- a/src/tests/gpu/test_cnn_v3_parity.cc
+++ b/src/tests/gpu/test_cnn_v3_parity.cc
@@ -4,10 +4,10 @@
// 2. Random-weight test: output must match Python-generated test vectors
// (within 1/255 per pixel)
+#include "../../cnn_v3/test_vectors.h"
#include "../common/webgpu_test_fixture.h"
#include "cnn_v3/src/cnn_v3_effect.h"
#include "gpu/sequence.h"
-#include "../../cnn_v3/test_vectors.h"
#include <cassert>
#include <cmath>
@@ -20,37 +20,46 @@
static float fp16_bits_to_f32(uint16_t h) {
uint32_t sign = (h & 0x8000u) << 16;
- uint32_t exp = (h & 0x7C00u) >> 10;
+ uint32_t exp = (h & 0x7C00u) >> 10;
uint32_t mant = (h & 0x03FFu);
if (exp == 0 && mant == 0) {
- float r; uint32_t b = sign; __builtin_memcpy(&r, &b, 4); return r;
+ float r;
+ uint32_t b = sign;
+ __builtin_memcpy(&r, &b, 4);
+ return r;
}
if (exp == 31) {
uint32_t b = sign | 0x7F800000u | (mant << 13);
- float r; __builtin_memcpy(&r, &b, 4); return r;
+ float r;
+ __builtin_memcpy(&r, &b, 4);
+ return r;
}
uint32_t b = sign | ((exp + 112) << 23) | (mant << 13);
- float r; __builtin_memcpy(&r, &b, 4); return r;
+ float r;
+ __builtin_memcpy(&r, &b, 4);
+ return r;
}
// ---------------------------------------------------------------------------
// Raw RGBA16Float readback → flat array of f32 (one per channel per pixel)
// ---------------------------------------------------------------------------
-struct MapState { bool done = false; WGPUMapAsyncStatus status; };
+struct MapState {
+ bool done = false;
+ WGPUMapAsyncStatus status;
+};
static std::vector<float> readback_rgba16float(WGPUDevice device,
- WGPUQueue queue,
- WGPUTexture tex,
- int W, int H) {
- const uint32_t bytes_per_px = 8; // 4 × f16
- const uint32_t unaligned_bpr = (uint32_t)(W * bytes_per_px);
- const uint32_t aligned_bpr = ((unaligned_bpr + 255u) / 256u) * 256u;
- const size_t buf_size = aligned_bpr * (size_t)H;
+ WGPUQueue queue, WGPUTexture tex,
+ int W, int H) {
+ const uint32_t bytes_per_px = 8; // 4 × f16
+ const uint32_t unaligned_bpr = (uint32_t)(W * bytes_per_px);
+ const uint32_t aligned_bpr = ((unaligned_bpr + 255u) / 256u) * 256u;
+ const size_t buf_size = aligned_bpr * (size_t)H;
WGPUBufferDescriptor bd = {};
bd.usage = WGPUBufferUsage_CopyDst | WGPUBufferUsage_MapRead;
- bd.size = buf_size;
+ bd.size = buf_size;
WGPUBuffer staging = wgpuDeviceCreateBuffer(device, &bd);
WGPUCommandEncoder enc = wgpuDeviceCreateCommandEncoder(device, nullptr);
@@ -58,9 +67,9 @@ static std::vector<float> readback_rgba16float(WGPUDevice device,
src.texture = tex;
WGPUTexelCopyBufferInfo dst = {};
dst.buffer = staging;
- dst.layout.bytesPerRow = aligned_bpr;
+ dst.layout.bytesPerRow = aligned_bpr;
dst.layout.rowsPerImage = (uint32_t)H;
- WGPUExtent3D extent = { (uint32_t)W, (uint32_t)H, 1 };
+ WGPUExtent3D extent = {(uint32_t)W, (uint32_t)H, 1};
wgpuCommandEncoderCopyTextureToBuffer(enc, &src, &dst, &extent);
WGPUCommandBuffer cmds = wgpuCommandEncoderFinish(enc, nullptr);
wgpuQueueSubmit(queue, 1, &cmds);
@@ -73,7 +82,8 @@ static std::vector<float> readback_rgba16float(WGPUDevice device,
mi.mode = WGPUCallbackMode_AllowProcessEvents;
mi.callback = [](WGPUMapAsyncStatus s, WGPUStringView, void* u, void*) {
auto* st = (MapState*)u;
- st->status = s; st->done = true;
+ st->status = s;
+ st->done = true;
};
mi.userdata1 = &ms;
wgpuBufferMapAsync(staging, WGPUMapMode_Read, 0, buf_size, mi);
@@ -82,16 +92,15 @@ static std::vector<float> readback_rgba16float(WGPUDevice device,
std::vector<float> result(W * H * 4, 0.0f);
if (ms.done && ms.status == WGPUMapAsyncStatus_Success) {
- const uint8_t* mapped = (const uint8_t*)wgpuBufferGetConstMappedRange(
- staging, 0, buf_size);
+ const uint8_t* mapped =
+ (const uint8_t*)wgpuBufferGetConstMappedRange(staging, 0, buf_size);
if (mapped) {
for (int y = 0; y < H; ++y) {
const uint16_t* row =
(const uint16_t*)(mapped + (size_t)y * aligned_bpr);
for (int x = 0; x < W; ++x) {
for (int c = 0; c < 4; ++c) {
- result[(y * W + x) * 4 + c] =
- fp16_bits_to_f32(row[x * 4 + c]);
+ result[(y * W + x) * 4 + c] = fp16_bits_to_f32(row[x * 4 + c]);
}
}
}
@@ -107,17 +116,17 @@ static std::vector<float> readback_rgba16float(WGPUDevice device,
// ---------------------------------------------------------------------------
static std::vector<float> readback_rgba32uint_8ch(WGPUDevice device,
- WGPUQueue queue,
- WGPUTexture tex,
- int W, int H) {
- const uint32_t bytes_per_px = 16; // 4 × u32
- const uint32_t unaligned_bpr = (uint32_t)(W * bytes_per_px);
- const uint32_t aligned_bpr = ((unaligned_bpr + 255u) / 256u) * 256u;
- const size_t buf_size = aligned_bpr * (size_t)H;
+ WGPUQueue queue,
+ WGPUTexture tex, int W,
+ int H) {
+ const uint32_t bytes_per_px = 16; // 4 × u32
+ const uint32_t unaligned_bpr = (uint32_t)(W * bytes_per_px);
+ const uint32_t aligned_bpr = ((unaligned_bpr + 255u) / 256u) * 256u;
+ const size_t buf_size = aligned_bpr * (size_t)H;
WGPUBufferDescriptor bd = {};
bd.usage = WGPUBufferUsage_CopyDst | WGPUBufferUsage_MapRead;
- bd.size = buf_size;
+ bd.size = buf_size;
WGPUBuffer staging = wgpuDeviceCreateBuffer(device, &bd);
WGPUCommandEncoder enc = wgpuDeviceCreateCommandEncoder(device, nullptr);
@@ -125,9 +134,9 @@ static std::vector<float> readback_rgba32uint_8ch(WGPUDevice device,
src.texture = tex;
WGPUTexelCopyBufferInfo dst = {};
dst.buffer = staging;
- dst.layout.bytesPerRow = aligned_bpr;
+ dst.layout.bytesPerRow = aligned_bpr;
dst.layout.rowsPerImage = (uint32_t)H;
- WGPUExtent3D extent = { (uint32_t)W, (uint32_t)H, 1 };
+ WGPUExtent3D extent = {(uint32_t)W, (uint32_t)H, 1};
wgpuCommandEncoderCopyTextureToBuffer(enc, &src, &dst, &extent);
WGPUCommandBuffer cmds = wgpuCommandEncoderFinish(enc, nullptr);
wgpuQueueSubmit(queue, 1, &cmds);
@@ -140,7 +149,8 @@ static std::vector<float> readback_rgba32uint_8ch(WGPUDevice device,
mi.mode = WGPUCallbackMode_AllowProcessEvents;
mi.callback = [](WGPUMapAsyncStatus s, WGPUStringView, void* u, void*) {
auto* st = (MapState*)u;
- st->status = s; st->done = true;
+ st->status = s;
+ st->done = true;
};
mi.userdata1 = &ms;
wgpuBufferMapAsync(staging, WGPUMapMode_Read, 0, buf_size, mi);
@@ -149,8 +159,8 @@ static std::vector<float> readback_rgba32uint_8ch(WGPUDevice device,
std::vector<float> result(W * H * 8, 0.0f);
if (ms.done && ms.status == WGPUMapAsyncStatus_Success) {
- const uint8_t* mapped = (const uint8_t*)wgpuBufferGetConstMappedRange(
- staging, 0, buf_size);
+ const uint8_t* mapped =
+ (const uint8_t*)wgpuBufferGetConstMappedRange(staging, 0, buf_size);
if (mapped) {
for (int y = 0; y < H; ++y) {
const uint32_t* row =
@@ -178,31 +188,31 @@ static std::vector<float> readback_rgba32uint_8ch(WGPUDevice device,
static WGPUTexture make_feat_tex(WGPUDevice dev, int W, int H) {
WGPUTextureDescriptor d = {};
- d.format = WGPUTextureFormat_RGBA32Uint;
- d.usage = WGPUTextureUsage_TextureBinding | WGPUTextureUsage_CopyDst;
- d.dimension = WGPUTextureDimension_2D;
- d.size = { (uint32_t)W, (uint32_t)H, 1 };
+ d.format = WGPUTextureFormat_RGBA32Uint;
+ d.usage = WGPUTextureUsage_TextureBinding | WGPUTextureUsage_CopyDst;
+ d.dimension = WGPUTextureDimension_2D;
+ d.size = {(uint32_t)W, (uint32_t)H, 1};
d.mipLevelCount = 1;
- d.sampleCount = 1;
+ d.sampleCount = 1;
return wgpuDeviceCreateTexture(dev, &d);
}
static WGPUTexture make_output_tex(WGPUDevice dev, int W, int H) {
WGPUTextureDescriptor d = {};
- d.format = WGPUTextureFormat_RGBA16Float;
- d.usage = WGPUTextureUsage_StorageBinding | WGPUTextureUsage_CopySrc;
- d.dimension = WGPUTextureDimension_2D;
- d.size = { (uint32_t)W, (uint32_t)H, 1 };
+ d.format = WGPUTextureFormat_RGBA16Float;
+ d.usage = WGPUTextureUsage_StorageBinding | WGPUTextureUsage_CopySrc;
+ d.dimension = WGPUTextureDimension_2D;
+ d.size = {(uint32_t)W, (uint32_t)H, 1};
d.mipLevelCount = 1;
- d.sampleCount = 1;
+ d.sampleCount = 1;
return wgpuDeviceCreateTexture(dev, &d);
}
static WGPUTextureView make_view(WGPUTexture tex, WGPUTextureFormat fmt) {
WGPUTextureViewDescriptor d = {};
- d.format = fmt;
- d.dimension = WGPUTextureViewDimension_2D;
- d.mipLevelCount = 1;
+ d.format = fmt;
+ d.dimension = WGPUTextureViewDimension_2D;
+ d.mipLevelCount = 1;
d.arrayLayerCount = 1;
return wgpuTextureCreateView(tex, &d);
}
@@ -211,38 +221,36 @@ static WGPUTextureView make_view(WGPUTexture tex, WGPUTextureFormat fmt) {
// Run one CNN v3 forward pass and return output pixels
// ---------------------------------------------------------------------------
-static std::vector<float> run_cnn_v3(WebGPUTestFixture& fixture,
- int W, int H,
- const uint32_t* feat0_u32, // W*H*4
- const uint32_t* feat1_u32, // W*H*4
- const uint32_t* weights_u32, // (TOTAL_F16+1)/2
- uint32_t weights_bytes,
- std::vector<float>* enc0_out = nullptr,
- std::vector<float>* dec1_out = nullptr) {
+static std::vector<float>
+run_cnn_v3(WebGPUTestFixture& fixture, int W, int H,
+ const uint32_t* feat0_u32, // W*H*4
+ const uint32_t* feat1_u32, // W*H*4
+ const uint32_t* weights_u32, // (TOTAL_F16+1)/2
+ uint32_t weights_bytes, std::vector<float>* enc0_out = nullptr,
+ std::vector<float>* dec1_out = nullptr) {
GpuContext ctx = fixture.ctx();
// Create input textures manually (with CopyDst for upload)
WGPUTexture feat0_tex = make_feat_tex(ctx.device, W, H);
WGPUTexture feat1_tex = make_feat_tex(ctx.device, W, H);
- WGPUTexture out_tex = make_output_tex(ctx.device, W, H);
+ WGPUTexture out_tex = make_output_tex(ctx.device, W, H);
WGPUTextureView feat0_view =
make_view(feat0_tex, WGPUTextureFormat_RGBA32Uint);
WGPUTextureView feat1_view =
make_view(feat1_tex, WGPUTextureFormat_RGBA32Uint);
- WGPUTextureView out_view =
- make_view(out_tex, WGPUTextureFormat_RGBA16Float);
+ WGPUTextureView out_view = make_view(out_tex, WGPUTextureFormat_RGBA16Float);
// Upload feat texture data
auto upload_tex = [&](WGPUTexture tex, const uint32_t* data) {
WGPUTexelCopyTextureInfo dst_tex = {};
dst_tex.texture = tex;
WGPUTexelCopyBufferLayout layout = {};
- layout.bytesPerRow = (uint32_t)(W * 16); // 4 u32 per pixel
+ layout.bytesPerRow = (uint32_t)(W * 16); // 4 u32 per pixel
layout.rowsPerImage = (uint32_t)H;
- WGPUExtent3D ext = { (uint32_t)W, (uint32_t)H, 1 };
- wgpuQueueWriteTexture(ctx.queue, &dst_tex, data,
- (size_t)(W * H * 16), &layout, &ext);
+ WGPUExtent3D ext = {(uint32_t)W, (uint32_t)H, 1};
+ wgpuQueueWriteTexture(ctx.queue, &dst_tex, data, (size_t)(W * H * 16),
+ &layout, &ext);
};
upload_tex(feat0_tex, feat0_u32);
upload_tex(feat1_tex, feat1_u32);
@@ -267,10 +275,9 @@ static std::vector<float> run_cnn_v3(WebGPUTestFixture& fixture,
}
// Run 5 compute passes
- WGPUCommandEncoder enc =
- wgpuDeviceCreateCommandEncoder(ctx.device, nullptr);
+ WGPUCommandEncoder enc = wgpuDeviceCreateCommandEncoder(ctx.device, nullptr);
UniformsSequenceParams params = {};
- params.resolution = { (float)W, (float)H };
+ params.resolution = {(float)W, (float)H};
params.aspect_ratio = 1.0f;
effect.render(enc, params, registry);
@@ -292,7 +299,8 @@ static std::vector<float> run_cnn_v3(WebGPUTestFixture& fixture,
if (dec1_out) {
// dec1 is rgba32uint, 8ch (pack2x16float), half-res
WGPUTexture dec1_tex = registry.get_texture("cnn3_out_dec1");
- *dec1_out = readback_rgba32uint_8ch(ctx.device, ctx.queue, dec1_tex, W / 2, H / 2);
+ *dec1_out =
+ readback_rgba32uint_8ch(ctx.device, ctx.queue, dec1_tex, W / 2, H / 2);
}
// Cleanup
@@ -326,9 +334,8 @@ static int test_zero_weights() {
std::vector<uint32_t> feat0(W * H * 4, 0u);
std::vector<uint32_t> feat1(W * H * 4, 0u);
- auto pixels = run_cnn_v3(fixture, W, H,
- feat0.data(), feat1.data(),
- nullptr, 0); // null = zero weights (default)
+ auto pixels = run_cnn_v3(fixture, W, H, feat0.data(), feat1.data(), nullptr,
+ 0); // null = zero weights (default)
// Expected: sigmoid(0) = 0.5 exactly
const float expected = 0.5f;
@@ -360,14 +367,12 @@ static int test_random_weights() {
InitShaderComposer();
const int W = kCnnV3TestW, H = kCnnV3TestH;
- const uint32_t weights_bytes =
- (uint32_t)sizeof(kCnnV3TestWeightsU32);
+ const uint32_t weights_bytes = (uint32_t)sizeof(kCnnV3TestWeightsU32);
std::vector<float> enc0_pixels, dec1_pixels;
- auto pixels = run_cnn_v3(fixture, W, H,
- kCnnV3TestFeat0U32, kCnnV3TestFeat1U32,
- kCnnV3TestWeightsU32, weights_bytes,
- &enc0_pixels, &dec1_pixels);
+ auto pixels = run_cnn_v3(fixture, W, H, kCnnV3TestFeat0U32,
+ kCnnV3TestFeat1U32, kCnnV3TestWeightsU32,
+ weights_bytes, &enc0_pixels, &dec1_pixels);
// Check enc0 layer first (8ch, rgba32uint)
const float tol = 1.0f / 255.0f;
@@ -376,15 +381,18 @@ static int test_random_weights() {
for (int i = 0; i < W * H * 8; ++i) {
float ref = fp16_bits_to_f32(kCnnV3ExpectedEnc0U16[i]);
float err = fabsf(enc0_pixels[i] - ref);
- if (err > enc0_max_err) { enc0_max_err = err; enc0_worst = i; }
+ if (err > enc0_max_err) {
+ enc0_max_err = err;
+ enc0_worst = i;
+ }
}
bool enc0_ok = (enc0_max_err <= tol);
if (!enc0_ok) {
int px = enc0_worst / 8, ch = enc0_worst % 8;
- fprintf(stderr, " ✗ enc0 mismatch: max_err=%.5f > %.5f at px=%d ch=%d"
+ fprintf(stderr,
+ " ✗ enc0 mismatch: max_err=%.5f > %.5f at px=%d ch=%d"
" gpu=%.5f ref=%.5f\n",
- enc0_max_err, tol, px, ch,
- enc0_pixels[enc0_worst],
+ enc0_max_err, tol, px, ch, enc0_pixels[enc0_worst],
fp16_bits_to_f32(kCnnV3ExpectedEnc0U16[enc0_worst]));
} else {
fprintf(stdout, " ✓ enc0: max_err=%.2e OK\n", enc0_max_err);
@@ -397,15 +405,18 @@ static int test_random_weights() {
for (int i = 0; i < dec1_n; ++i) {
float ref = fp16_bits_to_f32(kCnnV3ExpectedDec1U16[i]);
float err = fabsf(dec1_pixels[i] - ref);
- if (err > dec1_max_err) { dec1_max_err = err; dec1_worst = i; }
+ if (err > dec1_max_err) {
+ dec1_max_err = err;
+ dec1_worst = i;
+ }
}
bool dec1_ok = (dec1_max_err <= tol);
if (!dec1_ok) {
int px = dec1_worst / 8, ch = dec1_worst % 8;
- fprintf(stderr, " ✗ dec1 mismatch: max_err=%.5f > %.5f at px=%d ch=%d"
+ fprintf(stderr,
+ " ✗ dec1 mismatch: max_err=%.5f > %.5f at px=%d ch=%d"
" gpu=%.5f ref=%.5f\n",
- dec1_max_err, tol, px, ch,
- dec1_pixels[dec1_worst],
+ dec1_max_err, tol, px, ch, dec1_pixels[dec1_worst],
fp16_bits_to_f32(kCnnV3ExpectedDec1U16[dec1_worst]));
} else {
fprintf(stdout, " ✓ dec1: max_err=%.2e OK\n", dec1_max_err);
@@ -418,16 +429,19 @@ static int test_random_weights() {
for (int i = 0; i < n; ++i) {
float ref = fp16_bits_to_f32(kCnnV3ExpectedOutputU16[i]);
float err = fabsf(pixels[i] - ref);
- if (err > max_err) { max_err = err; worst = i; }
+ if (err > max_err) {
+ max_err = err;
+ worst = i;
+ }
}
bool out_ok = (max_err <= tol);
if (!out_ok) {
int px = worst / 4, ch = worst % 4;
- fprintf(stderr, " ✗ random_weights: max_err=%.5f > %.5f at px=%d ch=%d"
+ fprintf(stderr,
+ " ✗ random_weights: max_err=%.5f > %.5f at px=%d ch=%d"
" gpu=%.5f ref=%.5f\n",
- max_err, tol, px, ch,
- pixels[worst],
+ max_err, tol, px, ch, pixels[worst],
fp16_bits_to_f32(kCnnV3ExpectedOutputU16[worst]));
} else {
fprintf(stdout, " ✓ random_weights: max_err=%.2e OK\n", max_err);
@@ -442,8 +456,10 @@ static int test_random_weights() {
int main() {
int pass = 0, total = 0;
- ++total; pass += test_zero_weights();
- ++total; pass += test_random_weights();
+ ++total;
+ pass += test_zero_weights();
+ ++total;
+ pass += test_random_weights();
fprintf(stdout, "\nCNN v3 parity: %d/%d passed\n", pass, total);
return (pass == total) ? 0 : 1;