summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorskal <pascal.massimino@gmail.com>2026-02-11 07:48:56 +0100
committerskal <pascal.massimino@gmail.com>2026-02-11 07:48:56 +0100
commit6d64674f7e3d00a9d18ec61eaf968ed37c8e849b (patch)
tree4fcffcc28b866daa61543f48a8c3e76fed982951
parentbadf6907dd071afa7a5438fdf575d73c4c417e3f (diff)
fix: CNN test tool GPU readback with wgpuDevicePoll
Fixed buffer mapping callback mode mismatch causing Unknown status. Changed from WaitAnyOnly+ProcessEvents to AllowProcessEvents+DevicePoll. Readback now functional but CNN output incorrect (all white). Issue isolated to tool-specific binding/uniform setup - CNNEffect in demo works correctly. Technical details: - WGPUCallbackMode_WaitAnyOnly requires wgpuInstanceWaitAny - Using wgpuInstanceProcessEvents with WaitAnyOnly never fires callback - Fixed by using AllowProcessEvents mode + wgpuDevicePoll - Removed debug output and platform warnings Status: 36/36 tests pass, readback works, CNN shader issue remains. handoff(Claude): CNN test tool readback fixed, output debugging needed
-rw-r--r--PROJECT_CONTEXT.md4
-rw-r--r--doc/CNN_TEST_TOOL.md37
-rw-r--r--doc/HOWTO.md3
-rw-r--r--src/gpu/texture_readback.cc12
-rw-r--r--tools/cnn_test.cc175
5 files changed, 126 insertions, 105 deletions
diff --git a/PROJECT_CONTEXT.md b/PROJECT_CONTEXT.md
index 8b84cde..7e8107e 100644
--- a/PROJECT_CONTEXT.md
+++ b/PROJECT_CONTEXT.md
@@ -35,8 +35,8 @@
- **Audio:** Sample-accurate sync. Zero heap allocations per frame. Variable tempo. Comprehensive tests.
- **Shaders:** Parameterized effects (UniformHelper, .seq syntax). Modular WGSL composition.
- **3D:** Hybrid SDF/rasterization with BVH. Binary scene loader. Blender pipeline.
-- **Effects:** CNN post-processing foundation (3-layer architecture, modular snippets, validation tool).
-- **Tools:** CNN test tool for offline shader validation. Texture readback utility for GPU-to-CPU operations.
+- **Effects:** CNN post-processing foundation (3-layer architecture, modular snippets). CNNEffect validated in demo.
+- **Tools:** CNN test tool (readback works, output incorrect - under investigation). Texture readback utility functional.
- **Build:** Asset dependency tracking. Size measurement. Hot-reload (debug-only).
- **Testing:** **36/36 passing (100%)**
diff --git a/doc/CNN_TEST_TOOL.md b/doc/CNN_TEST_TOOL.md
index 09c55d4..e7d679e 100644
--- a/doc/CNN_TEST_TOOL.md
+++ b/doc/CNN_TEST_TOOL.md
@@ -178,12 +178,37 @@ assert mse < 10.0, f'MSE too high: {mse}'
## Known Issues
-**BUG: Black output (unknown cause)**
-- Tool produces all-black output despite correct architecture
-- Fixed ping-pong logic, RGBA16Float intermediates, proper pipelines
-- Shader compiles, GPU commands execute without errors
-- Possible causes: shader execution issue, synchronization, binding problem
-- Status: Under investigation
+**BUG: CNN produces incorrect output (all white)**
+- Readback works correctly (see Technical Notes below)
+- Shader compiles and executes without errors
+- Output is all white (255) regardless of input or blend setting
+- **Likely causes:**
+ - Uniform buffer layout mismatch between C++ and WGSL
+ - Texture binding issue (input not sampled correctly)
+ - Weight matrix initialization problem
+- CNNEffect works correctly in demo (visual validation confirms)
+- **Status:** Under investigation - rendering pipeline differs from demo's CNNEffect
+- **Workaround:** Use CNNEffect visual validation in demo until tool fixed
+
+---
+
+## Technical Notes (Readback Fix)
+
+**Original Bug:** Buffer mapping returned `WGPUMapAsyncStatus_Unknown` (status=5)
+
+**Root Cause:** Callback mode mismatch
+- Used `WGPUCallbackMode_WaitAnyOnly` (fires only during `wgpuInstanceWaitAny`)
+- Called `wgpuInstanceProcessEvents` in wait loop (wrong API for this mode)
+- Callback never fired → timeout → empty buffer
+
+**Fix Applied:**
+1. Changed callback mode to `WGPUCallbackMode_AllowProcessEvents`
+2. Replaced `wgpuInstanceProcessEvents` with `wgpuDevicePoll(device, true, nullptr)`
+3. Added pre-mapping device poll to ensure copy completes
+
+**Relevant Code:** `src/gpu/texture_readback.cc` lines 97-110
+
+**Reference:** WebGPU spec - Asynchronous Operations, Callback Modes
---
diff --git a/doc/HOWTO.md b/doc/HOWTO.md
index c0e9363..140c09f 100644
--- a/doc/HOWTO.md
+++ b/doc/HOWTO.md
@@ -165,6 +165,9 @@ See `doc/ASSET_SYSTEM.md` and `doc/WORKSPACE_SYSTEM.md`.
## CNN Testing
### Offline Shader Validation
+
+**Note:** Tool builds and runs but produces incorrect output. Use CNNEffect visual validation in demo. See `doc/CNN_TEST_TOOL.md`.
+
```bash
# Test trained CNN on PNG input
./build/cnn_test input.png output.png
diff --git a/src/gpu/texture_readback.cc b/src/gpu/texture_readback.cc
index 3a690d3..0eb63d7 100644
--- a/src/gpu/texture_readback.cc
+++ b/src/gpu/texture_readback.cc
@@ -72,6 +72,9 @@ std::vector<uint8_t> read_texture_pixels(
wgpuCommandBufferRelease(commands);
wgpuCommandEncoderRelease(encoder);
+ // Wait for copy to complete before mapping
+ wgpuDevicePoll(device, true, nullptr);
+
// Map buffer for reading (API differs between Win32 and native)
#if defined(DEMO_CROSS_COMPILE_WIN32)
// Win32: Old callback API
@@ -95,7 +98,7 @@ std::vector<uint8_t> read_texture_pixels(
state->done = true;
};
WGPUBufferMapCallbackInfo map_info = {};
- map_info.mode = WGPUCallbackMode_WaitAnyOnly;
+ map_info.mode = WGPUCallbackMode_AllowProcessEvents; // Fire during ProcessEvents
map_info.callback = map_cb;
map_info.userdata1 = &map_state;
wgpuBufferMapAsync(staging, WGPUMapMode_Read, 0, buffer_size, map_info);
@@ -106,14 +109,13 @@ std::vector<uint8_t> read_texture_pixels(
#if defined(__EMSCRIPTEN__)
emscripten_sleep(10);
#else
- wgpuInstanceProcessEvents(instance);
+ wgpuDevicePoll(device, true, nullptr);
#endif
}
- if (map_state.status != WGPUMapAsyncStatus_Success) {
- fprintf(stderr, "Buffer mapping failed: %d\n", map_state.status);
+ if (!map_state.done || map_state.status != WGPUMapAsyncStatus_Success) {
wgpuBufferRelease(staging);
- return pixels; // Return empty
+ return pixels; // Return empty on timeout or failure
}
// Copy data from mapped buffer (handle row padding)
diff --git a/tools/cnn_test.cc b/tools/cnn_test.cc
index c44606c..62a60f4 100644
--- a/tools/cnn_test.cc
+++ b/tools/cnn_test.cc
@@ -352,26 +352,6 @@ int main(int argc, char** argv) {
wgpuTextureCreateView(intermediate_textures[1], &intermediate_view_desc),
};
- // Create final output texture (BGRA8Unorm for readback)
- const WGPUTextureDescriptor final_desc = {
- .usage = WGPUTextureUsage_RenderAttachment | WGPUTextureUsage_CopySrc,
- .dimension = WGPUTextureDimension_2D,
- .size = {static_cast<uint32_t>(width), static_cast<uint32_t>(height), 1},
- .format = WGPUTextureFormat_BGRA8Unorm,
- .mipLevelCount = 1,
- .sampleCount = 1,
- };
- WGPUTexture final_output_texture = wgpuDeviceCreateTexture(device, &final_desc);
- const WGPUTextureViewDescriptor final_view_desc = {
- .format = WGPUTextureFormat_BGRA8Unorm,
- .dimension = WGPUTextureViewDimension_2D,
- .baseMipLevel = 0,
- .mipLevelCount = 1,
- .baseArrayLayer = 0,
- .arrayLayerCount = 1,
- };
- WGPUTextureView final_output_view = wgpuTextureCreateView(final_output_texture, &final_view_desc);
-
// Get sampler
WGPUSampler sampler =
SamplerCache::Get().get_or_create(device, SamplerCache::clamp());
@@ -420,25 +400,92 @@ int main(int argc, char** argv) {
// Render to appropriate output texture with correct pipeline
bool is_final = (layer == NUM_LAYERS - 1);
- WGPUTextureView output_view = is_final ? final_output_view : intermediate_views[dst_idx];
- WGPURenderPipeline current_pipeline = is_final ? pipeline_final : pipeline_intermediate;
- WGPUCommandEncoder encoder = wgpuDeviceCreateCommandEncoder(device, nullptr);
- WGPURenderPassEncoder pass = begin_render_pass(encoder, output_view);
- wgpuRenderPassEncoderSetPipeline(pass, current_pipeline);
- wgpuRenderPassEncoderSetBindGroup(pass, 0, bind_group, 0, nullptr);
- wgpuRenderPassEncoderDraw(pass, 3, 1, 0, 0); // Fullscreen triangle
- wgpuRenderPassEncoderEnd(pass);
- WGPUCommandBuffer commands = wgpuCommandEncoderFinish(encoder, nullptr);
- wgpuQueueSubmit(queue, 1, &commands);
+ if (is_final) {
+ // Final layer: use OffscreenRenderTarget (known working readback)
+ OffscreenRenderTarget rt(instance, device, width, height);
+ WGPUCommandEncoder encoder = wgpuDeviceCreateCommandEncoder(device, nullptr);
+ WGPURenderPassEncoder pass = begin_render_pass(encoder, rt.view());
+ wgpuRenderPassEncoderSetPipeline(pass, pipeline_final);
+ wgpuRenderPassEncoderSetBindGroup(pass, 0, bind_group, 0, nullptr);
+ wgpuRenderPassEncoderDraw(pass, 3, 1, 0, 0);
+ wgpuRenderPassEncoderEnd(pass);
+ WGPUCommandBuffer commands = wgpuCommandEncoderFinish(encoder, nullptr);
+ wgpuQueueSubmit(queue, 1, &commands);
+ wgpuDevicePoll(device, true, nullptr);
+
+ wgpuCommandBufferRelease(commands);
+ wgpuRenderPassEncoderRelease(pass);
+ wgpuCommandEncoderRelease(encoder);
+ wgpuBindGroupRelease(bind_group);
+
+ // Read pixels immediately
+ printf("Reading pixels from GPU...\n");
+ std::vector<uint8_t> pixels = rt.read_pixels();
+
+ if (pixels.empty()) {
+ fprintf(stderr, "Error: GPU readback failed\n");
+ wgpuTextureViewRelease(intermediate_views[0]);
+ wgpuTextureViewRelease(intermediate_views[1]);
+ wgpuTextureRelease(intermediate_textures[0]);
+ wgpuTextureRelease(intermediate_textures[1]);
+ wgpuTextureViewRelease(input_view);
+ wgpuTextureRelease(input_texture);
+ wgpuBufferRelease(layer_params_buffer);
+ wgpuBufferRelease(common_uniform_buffer);
+ wgpuBindGroupLayoutRelease(bgl);
+ wgpuRenderPipelineRelease(pipeline_final);
+ wgpuRenderPipelineRelease(pipeline_intermediate);
+ fixture.shutdown();
+ return 1;
+ }
+
+ // Save output
+ bool success;
+ if (args.output_png) {
+ printf("Saving PNG to '%s'...\n", args.output_path);
+ success = save_png(args.output_path, pixels, width, height);
+ } else {
+ printf("Saving PPM to '%s'...\n", args.output_path);
+ success = save_ppm(args.output_path, pixels, width, height);
+ }
+
+ if (!success) {
+ wgpuTextureViewRelease(intermediate_views[0]);
+ wgpuTextureViewRelease(intermediate_views[1]);
+ wgpuTextureRelease(intermediate_textures[0]);
+ wgpuTextureRelease(intermediate_textures[1]);
+ wgpuTextureViewRelease(input_view);
+ wgpuTextureRelease(input_texture);
+ wgpuBufferRelease(layer_params_buffer);
+ wgpuBufferRelease(common_uniform_buffer);
+ wgpuBindGroupLayoutRelease(bgl);
+ wgpuRenderPipelineRelease(pipeline_final);
+ wgpuRenderPipelineRelease(pipeline_intermediate);
+ fixture.shutdown();
+ return 1;
+ }
- // Wait for GPU to complete this layer before proceeding
- wgpuDevicePoll(device, true, nullptr);
+ printf("Done! Output saved to '%s'\n", args.output_path);
+ break; // Exit loop after final layer
+ } else {
+ // Intermediate layers: render to ping-pong textures
+ WGPUTextureView output_view = intermediate_views[dst_idx];
+ WGPUCommandEncoder encoder = wgpuDeviceCreateCommandEncoder(device, nullptr);
+ WGPURenderPassEncoder pass = begin_render_pass(encoder, output_view);
+ wgpuRenderPassEncoderSetPipeline(pass, pipeline_intermediate);
+ wgpuRenderPassEncoderSetBindGroup(pass, 0, bind_group, 0, nullptr);
+ wgpuRenderPassEncoderDraw(pass, 3, 1, 0, 0);
+ wgpuRenderPassEncoderEnd(pass);
+ WGPUCommandBuffer commands = wgpuCommandEncoderFinish(encoder, nullptr);
+ wgpuQueueSubmit(queue, 1, &commands);
+ wgpuDevicePoll(device, true, nullptr);
- wgpuCommandBufferRelease(commands);
- wgpuRenderPassEncoderRelease(pass);
- wgpuCommandEncoderRelease(encoder);
- wgpuBindGroupRelease(bind_group);
+ wgpuCommandBufferRelease(commands);
+ wgpuRenderPassEncoderRelease(pass);
+ wgpuCommandEncoderRelease(encoder);
+ wgpuBindGroupRelease(bind_group);
+ }
// Update for next layer: output becomes input
if (layer < NUM_LAYERS - 1) {
@@ -448,62 +495,6 @@ int main(int argc, char** argv) {
}
}
- printf("Reading pixels from GPU...\n");
-
- // Read final output from GPU (always BGRA8Unorm)
- std::vector<uint8_t> pixels =
- read_texture_pixels(instance, device, final_output_texture, width, height);
-
- if (pixels.empty()) {
- fprintf(stderr, "Error: failed to read pixels from GPU\n");
- // Cleanup...
- wgpuTextureViewRelease(final_output_view);
- wgpuTextureRelease(final_output_texture);
- wgpuTextureViewRelease(intermediate_views[0]);
- wgpuTextureViewRelease(intermediate_views[1]);
- wgpuTextureRelease(intermediate_textures[0]);
- wgpuTextureRelease(intermediate_textures[1]);
- wgpuBufferRelease(layer_params_buffer);
- wgpuBufferRelease(common_uniform_buffer);
- wgpuBindGroupLayoutRelease(bgl);
- wgpuRenderPipelineRelease(pipeline_intermediate);
- wgpuRenderPipelineRelease(pipeline_final);
- wgpuTextureViewRelease(input_view);
- wgpuTextureRelease(input_texture);
- fixture.shutdown();
- return 1;
- }
-
- // Save output
- bool success = false;
- if (args.output_png) {
- printf("Saving PNG to '%s'...\n", args.output_path);
- success = save_png(args.output_path, pixels, width, height);
- } else {
- printf("Saving PPM to '%s'...\n", args.output_path);
- success = save_ppm(args.output_path, pixels, width, height);
- }
-
- if (!success) {
- wgpuTextureViewRelease(final_output_view);
- wgpuTextureRelease(final_output_texture);
- wgpuTextureViewRelease(intermediate_views[0]);
- wgpuTextureViewRelease(intermediate_views[1]);
- wgpuTextureRelease(intermediate_textures[0]);
- wgpuTextureRelease(intermediate_textures[1]);
- wgpuBufferRelease(layer_params_buffer);
- wgpuBufferRelease(common_uniform_buffer);
- wgpuBindGroupLayoutRelease(bgl);
- wgpuRenderPipelineRelease(pipeline_intermediate);
- wgpuRenderPipelineRelease(pipeline_final);
- wgpuTextureViewRelease(input_view);
- wgpuTextureRelease(input_texture);
- fixture.shutdown();
- return 1;
- }
-
- printf("Done! Output saved to '%s'\n", args.output_path);
-
// Cleanup
wgpuTextureViewRelease(intermediate_views[0]);
wgpuTextureViewRelease(intermediate_views[1]);