From 6d64674f7e3d00a9d18ec61eaf968ed37c8e849b Mon Sep 17 00:00:00 2001 From: skal Date: Wed, 11 Feb 2026 07:48:56 +0100 Subject: fix: CNN test tool GPU readback with wgpuDevicePoll Fixed buffer mapping callback mode mismatch causing Unknown status. Changed from WaitAnyOnly+ProcessEvents to AllowProcessEvents+DevicePoll. Readback now functional but CNN output incorrect (all white). Issue isolated to tool-specific binding/uniform setup - CNNEffect in demo works correctly. Technical details: - WGPUCallbackMode_WaitAnyOnly requires wgpuInstanceWaitAny - Using wgpuInstanceProcessEvents with WaitAnyOnly never fires callback - Fixed by using AllowProcessEvents mode + wgpuDevicePoll - Removed debug output and platform warnings Status: 36/36 tests pass, readback works, CNN shader issue remains. handoff(Claude): CNN test tool readback fixed, output debugging needed --- PROJECT_CONTEXT.md | 4 +- doc/CNN_TEST_TOOL.md | 37 +++++++-- doc/HOWTO.md | 3 + src/gpu/texture_readback.cc | 12 +-- tools/cnn_test.cc | 181 +++++++++++++++++++++----------------------- 5 files changed, 129 insertions(+), 108 deletions(-) diff --git a/PROJECT_CONTEXT.md b/PROJECT_CONTEXT.md index 8b84cde..7e8107e 100644 --- a/PROJECT_CONTEXT.md +++ b/PROJECT_CONTEXT.md @@ -35,8 +35,8 @@ - **Audio:** Sample-accurate sync. Zero heap allocations per frame. Variable tempo. Comprehensive tests. - **Shaders:** Parameterized effects (UniformHelper, .seq syntax). Modular WGSL composition. - **3D:** Hybrid SDF/rasterization with BVH. Binary scene loader. Blender pipeline. -- **Effects:** CNN post-processing foundation (3-layer architecture, modular snippets, validation tool). -- **Tools:** CNN test tool for offline shader validation. Texture readback utility for GPU-to-CPU operations. +- **Effects:** CNN post-processing foundation (3-layer architecture, modular snippets). CNNEffect validated in demo. +- **Tools:** CNN test tool (readback works, output incorrect - under investigation). Texture readback utility functional. - **Build:** Asset dependency tracking. Size measurement. Hot-reload (debug-only). - **Testing:** **36/36 passing (100%)** diff --git a/doc/CNN_TEST_TOOL.md b/doc/CNN_TEST_TOOL.md index 09c55d4..e7d679e 100644 --- a/doc/CNN_TEST_TOOL.md +++ b/doc/CNN_TEST_TOOL.md @@ -178,12 +178,37 @@ assert mse < 10.0, f'MSE too high: {mse}' ## Known Issues -**BUG: Black output (unknown cause)** -- Tool produces all-black output despite correct architecture -- Fixed ping-pong logic, RGBA16Float intermediates, proper pipelines -- Shader compiles, GPU commands execute without errors -- Possible causes: shader execution issue, synchronization, binding problem -- Status: Under investigation +**BUG: CNN produces incorrect output (all white)** +- Readback works correctly (see Technical Notes below) +- Shader compiles and executes without errors +- Output is all white (255) regardless of input or blend setting +- **Likely causes:** + - Uniform buffer layout mismatch between C++ and WGSL + - Texture binding issue (input not sampled correctly) + - Weight matrix initialization problem +- CNNEffect works correctly in demo (visual validation confirms) +- **Status:** Under investigation - rendering pipeline differs from demo's CNNEffect +- **Workaround:** Use CNNEffect visual validation in demo until tool fixed + +--- + +## Technical Notes (Readback Fix) + +**Original Bug:** Buffer mapping returned `WGPUMapAsyncStatus_Unknown` (status=5) + +**Root Cause:** Callback mode mismatch +- Used `WGPUCallbackMode_WaitAnyOnly` (fires only during `wgpuInstanceWaitAny`) +- Called `wgpuInstanceProcessEvents` in wait loop (wrong API for this mode) +- Callback never fired → timeout → empty buffer + +**Fix Applied:** +1. Changed callback mode to `WGPUCallbackMode_AllowProcessEvents` +2. Replaced `wgpuInstanceProcessEvents` with `wgpuDevicePoll(device, true, nullptr)` +3. Added pre-mapping device poll to ensure copy completes + +**Relevant Code:** `src/gpu/texture_readback.cc` lines 97-110 + +**Reference:** WebGPU spec - Asynchronous Operations, Callback Modes --- diff --git a/doc/HOWTO.md b/doc/HOWTO.md index c0e9363..140c09f 100644 --- a/doc/HOWTO.md +++ b/doc/HOWTO.md @@ -165,6 +165,9 @@ See `doc/ASSET_SYSTEM.md` and `doc/WORKSPACE_SYSTEM.md`. ## CNN Testing ### Offline Shader Validation + +**Note:** Tool builds and runs but produces incorrect output. Use CNNEffect visual validation in demo. See `doc/CNN_TEST_TOOL.md`. + ```bash # Test trained CNN on PNG input ./build/cnn_test input.png output.png diff --git a/src/gpu/texture_readback.cc b/src/gpu/texture_readback.cc index 3a690d3..0eb63d7 100644 --- a/src/gpu/texture_readback.cc +++ b/src/gpu/texture_readback.cc @@ -72,6 +72,9 @@ std::vector read_texture_pixels( wgpuCommandBufferRelease(commands); wgpuCommandEncoderRelease(encoder); + // Wait for copy to complete before mapping + wgpuDevicePoll(device, true, nullptr); + // Map buffer for reading (API differs between Win32 and native) #if defined(DEMO_CROSS_COMPILE_WIN32) // Win32: Old callback API @@ -95,7 +98,7 @@ std::vector read_texture_pixels( state->done = true; }; WGPUBufferMapCallbackInfo map_info = {}; - map_info.mode = WGPUCallbackMode_WaitAnyOnly; + map_info.mode = WGPUCallbackMode_AllowProcessEvents; // Fire during ProcessEvents map_info.callback = map_cb; map_info.userdata1 = &map_state; wgpuBufferMapAsync(staging, WGPUMapMode_Read, 0, buffer_size, map_info); @@ -106,14 +109,13 @@ std::vector read_texture_pixels( #if defined(__EMSCRIPTEN__) emscripten_sleep(10); #else - wgpuInstanceProcessEvents(instance); + wgpuDevicePoll(device, true, nullptr); #endif } - if (map_state.status != WGPUMapAsyncStatus_Success) { - fprintf(stderr, "Buffer mapping failed: %d\n", map_state.status); + if (!map_state.done || map_state.status != WGPUMapAsyncStatus_Success) { wgpuBufferRelease(staging); - return pixels; // Return empty + return pixels; // Return empty on timeout or failure } // Copy data from mapped buffer (handle row padding) diff --git a/tools/cnn_test.cc b/tools/cnn_test.cc index c44606c..62a60f4 100644 --- a/tools/cnn_test.cc +++ b/tools/cnn_test.cc @@ -352,26 +352,6 @@ int main(int argc, char** argv) { wgpuTextureCreateView(intermediate_textures[1], &intermediate_view_desc), }; - // Create final output texture (BGRA8Unorm for readback) - const WGPUTextureDescriptor final_desc = { - .usage = WGPUTextureUsage_RenderAttachment | WGPUTextureUsage_CopySrc, - .dimension = WGPUTextureDimension_2D, - .size = {static_cast(width), static_cast(height), 1}, - .format = WGPUTextureFormat_BGRA8Unorm, - .mipLevelCount = 1, - .sampleCount = 1, - }; - WGPUTexture final_output_texture = wgpuDeviceCreateTexture(device, &final_desc); - const WGPUTextureViewDescriptor final_view_desc = { - .format = WGPUTextureFormat_BGRA8Unorm, - .dimension = WGPUTextureViewDimension_2D, - .baseMipLevel = 0, - .mipLevelCount = 1, - .baseArrayLayer = 0, - .arrayLayerCount = 1, - }; - WGPUTextureView final_output_view = wgpuTextureCreateView(final_output_texture, &final_view_desc); - // Get sampler WGPUSampler sampler = SamplerCache::Get().get_or_create(device, SamplerCache::clamp()); @@ -420,25 +400,92 @@ int main(int argc, char** argv) { // Render to appropriate output texture with correct pipeline bool is_final = (layer == NUM_LAYERS - 1); - WGPUTextureView output_view = is_final ? final_output_view : intermediate_views[dst_idx]; - WGPURenderPipeline current_pipeline = is_final ? pipeline_final : pipeline_intermediate; - - WGPUCommandEncoder encoder = wgpuDeviceCreateCommandEncoder(device, nullptr); - WGPURenderPassEncoder pass = begin_render_pass(encoder, output_view); - wgpuRenderPassEncoderSetPipeline(pass, current_pipeline); - wgpuRenderPassEncoderSetBindGroup(pass, 0, bind_group, 0, nullptr); - wgpuRenderPassEncoderDraw(pass, 3, 1, 0, 0); // Fullscreen triangle - wgpuRenderPassEncoderEnd(pass); - WGPUCommandBuffer commands = wgpuCommandEncoderFinish(encoder, nullptr); - wgpuQueueSubmit(queue, 1, &commands); - - // Wait for GPU to complete this layer before proceeding - wgpuDevicePoll(device, true, nullptr); - - wgpuCommandBufferRelease(commands); - wgpuRenderPassEncoderRelease(pass); - wgpuCommandEncoderRelease(encoder); - wgpuBindGroupRelease(bind_group); + + if (is_final) { + // Final layer: use OffscreenRenderTarget (known working readback) + OffscreenRenderTarget rt(instance, device, width, height); + WGPUCommandEncoder encoder = wgpuDeviceCreateCommandEncoder(device, nullptr); + WGPURenderPassEncoder pass = begin_render_pass(encoder, rt.view()); + wgpuRenderPassEncoderSetPipeline(pass, pipeline_final); + wgpuRenderPassEncoderSetBindGroup(pass, 0, bind_group, 0, nullptr); + wgpuRenderPassEncoderDraw(pass, 3, 1, 0, 0); + wgpuRenderPassEncoderEnd(pass); + WGPUCommandBuffer commands = wgpuCommandEncoderFinish(encoder, nullptr); + wgpuQueueSubmit(queue, 1, &commands); + wgpuDevicePoll(device, true, nullptr); + + wgpuCommandBufferRelease(commands); + wgpuRenderPassEncoderRelease(pass); + wgpuCommandEncoderRelease(encoder); + wgpuBindGroupRelease(bind_group); + + // Read pixels immediately + printf("Reading pixels from GPU...\n"); + std::vector pixels = rt.read_pixels(); + + if (pixels.empty()) { + fprintf(stderr, "Error: GPU readback failed\n"); + wgpuTextureViewRelease(intermediate_views[0]); + wgpuTextureViewRelease(intermediate_views[1]); + wgpuTextureRelease(intermediate_textures[0]); + wgpuTextureRelease(intermediate_textures[1]); + wgpuTextureViewRelease(input_view); + wgpuTextureRelease(input_texture); + wgpuBufferRelease(layer_params_buffer); + wgpuBufferRelease(common_uniform_buffer); + wgpuBindGroupLayoutRelease(bgl); + wgpuRenderPipelineRelease(pipeline_final); + wgpuRenderPipelineRelease(pipeline_intermediate); + fixture.shutdown(); + return 1; + } + + // Save output + bool success; + if (args.output_png) { + printf("Saving PNG to '%s'...\n", args.output_path); + success = save_png(args.output_path, pixels, width, height); + } else { + printf("Saving PPM to '%s'...\n", args.output_path); + success = save_ppm(args.output_path, pixels, width, height); + } + + if (!success) { + wgpuTextureViewRelease(intermediate_views[0]); + wgpuTextureViewRelease(intermediate_views[1]); + wgpuTextureRelease(intermediate_textures[0]); + wgpuTextureRelease(intermediate_textures[1]); + wgpuTextureViewRelease(input_view); + wgpuTextureRelease(input_texture); + wgpuBufferRelease(layer_params_buffer); + wgpuBufferRelease(common_uniform_buffer); + wgpuBindGroupLayoutRelease(bgl); + wgpuRenderPipelineRelease(pipeline_final); + wgpuRenderPipelineRelease(pipeline_intermediate); + fixture.shutdown(); + return 1; + } + + printf("Done! Output saved to '%s'\n", args.output_path); + break; // Exit loop after final layer + } else { + // Intermediate layers: render to ping-pong textures + WGPUTextureView output_view = intermediate_views[dst_idx]; + WGPUCommandEncoder encoder = wgpuDeviceCreateCommandEncoder(device, nullptr); + WGPURenderPassEncoder pass = begin_render_pass(encoder, output_view); + wgpuRenderPassEncoderSetPipeline(pass, pipeline_intermediate); + wgpuRenderPassEncoderSetBindGroup(pass, 0, bind_group, 0, nullptr); + wgpuRenderPassEncoderDraw(pass, 3, 1, 0, 0); + wgpuRenderPassEncoderEnd(pass); + WGPUCommandBuffer commands = wgpuCommandEncoderFinish(encoder, nullptr); + wgpuQueueSubmit(queue, 1, &commands); + wgpuDevicePoll(device, true, nullptr); + + wgpuCommandBufferRelease(commands); + wgpuRenderPassEncoderRelease(pass); + wgpuCommandEncoderRelease(encoder); + wgpuBindGroupRelease(bind_group); + } // Update for next layer: output becomes input if (layer < NUM_LAYERS - 1) { @@ -448,62 +495,6 @@ int main(int argc, char** argv) { } } - printf("Reading pixels from GPU...\n"); - - // Read final output from GPU (always BGRA8Unorm) - std::vector pixels = - read_texture_pixels(instance, device, final_output_texture, width, height); - - if (pixels.empty()) { - fprintf(stderr, "Error: failed to read pixels from GPU\n"); - // Cleanup... - wgpuTextureViewRelease(final_output_view); - wgpuTextureRelease(final_output_texture); - wgpuTextureViewRelease(intermediate_views[0]); - wgpuTextureViewRelease(intermediate_views[1]); - wgpuTextureRelease(intermediate_textures[0]); - wgpuTextureRelease(intermediate_textures[1]); - wgpuBufferRelease(layer_params_buffer); - wgpuBufferRelease(common_uniform_buffer); - wgpuBindGroupLayoutRelease(bgl); - wgpuRenderPipelineRelease(pipeline_intermediate); - wgpuRenderPipelineRelease(pipeline_final); - wgpuTextureViewRelease(input_view); - wgpuTextureRelease(input_texture); - fixture.shutdown(); - return 1; - } - - // Save output - bool success = false; - if (args.output_png) { - printf("Saving PNG to '%s'...\n", args.output_path); - success = save_png(args.output_path, pixels, width, height); - } else { - printf("Saving PPM to '%s'...\n", args.output_path); - success = save_ppm(args.output_path, pixels, width, height); - } - - if (!success) { - wgpuTextureViewRelease(final_output_view); - wgpuTextureRelease(final_output_texture); - wgpuTextureViewRelease(intermediate_views[0]); - wgpuTextureViewRelease(intermediate_views[1]); - wgpuTextureRelease(intermediate_textures[0]); - wgpuTextureRelease(intermediate_textures[1]); - wgpuBufferRelease(layer_params_buffer); - wgpuBufferRelease(common_uniform_buffer); - wgpuBindGroupLayoutRelease(bgl); - wgpuRenderPipelineRelease(pipeline_intermediate); - wgpuRenderPipelineRelease(pipeline_final); - wgpuTextureViewRelease(input_view); - wgpuTextureRelease(input_texture); - fixture.shutdown(); - return 1; - } - - printf("Done! Output saved to '%s'\n", args.output_path); - // Cleanup wgpuTextureViewRelease(intermediate_views[0]); wgpuTextureViewRelease(intermediate_views[1]); -- cgit v1.2.3