diff options
| author | skal <pascal.massimino@gmail.com> | 2026-02-11 07:07:29 +0100 |
|---|---|---|
| committer | skal <pascal.massimino@gmail.com> | 2026-02-11 07:07:29 +0100 |
| commit | 3915a5e1c8c904f8f2154845cb99223a598653ee (patch) | |
| tree | cb0e75dea7f8aa729d3b440a5e81b3ac811f8f04 /src | |
| parent | 01e640be66f9d72c22417403eb88e18d6747866f (diff) | |
feat: Add CNN shader testing tool with GPU texture readback
Core GPU Utility (texture_readback):
- Reusable synchronous texture-to-CPU readback (~150 lines)
- STRIP_ALL guards (0 bytes in release builds)
- Handles COPY_BYTES_PER_ROW_ALIGNMENT (256-byte alignment)
- Refactored OffscreenRenderTarget to use new utility
CNN Test Tool (cnn_test):
- Standalone PNG→3-layer CNN→PNG/PPM tool (~450 lines)
- --blend parameter (0.0-1.0) for final layer mixing
- --format option (png/ppm) for output format
- ShaderComposer integration for include resolution
Build Integration:
- Added texture_readback.cc to GPU_SOURCES (both sections)
- Tool target with STB_IMAGE support
Testing:
- All 36 tests pass (100%)
- Processes 64×64 and 555×370 images successfully
- Ground-truth validation setup complete
Known Issues:
- BUG: Tool produces black output (uninitialized input texture)
- First intermediate texture not initialized before layer loop
- MSE 64860 vs Python ground truth (expected <10)
- Fix required: Copy input to intermediate[0] before processing
Documentation:
- doc/CNN_TEST_TOOL.md - Full technical reference
- Updated PROJECT_CONTEXT.md and COMPLETED.md
handoff(Claude): CNN test tool foundation complete, needs input init bugfix
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
Diffstat (limited to 'src')
| -rw-r--r-- | src/gpu/texture_readback.cc | 143 | ||||
| -rw-r--r-- | src/gpu/texture_readback.h | 23 | ||||
| -rw-r--r-- | src/tests/common/offscreen_render_target.cc | 103 |
3 files changed, 170 insertions, 99 deletions
diff --git a/src/gpu/texture_readback.cc b/src/gpu/texture_readback.cc new file mode 100644 index 0000000..3a690d3 --- /dev/null +++ b/src/gpu/texture_readback.cc @@ -0,0 +1,143 @@ +// GPU texture readback utility implementation +// Extracts texture pixels to CPU memory for offline processing + +#include "gpu/texture_readback.h" + +#if !defined(STRIP_ALL) + +#include <cassert> +#include <cstdio> +#include <cstring> + +// Callback state for async buffer mapping +struct MapState { + bool done = false; + WGPUMapAsyncStatus status = WGPUMapAsyncStatus_Unknown; +}; + +std::vector<uint8_t> read_texture_pixels( + WGPUInstance instance, + WGPUDevice device, + WGPUTexture texture, + int width, + int height) { + + // Align bytes per row to 256 (COPY_BYTES_PER_ROW_ALIGNMENT) + const uint32_t bytes_per_pixel = 4; // BGRA8 + const uint32_t unaligned_bytes_per_row = width * bytes_per_pixel; + const uint32_t aligned_bytes_per_row = + ((unaligned_bytes_per_row + 255) / 256) * 256; + + const size_t buffer_size = aligned_bytes_per_row * height; + std::vector<uint8_t> pixels(width * height * bytes_per_pixel); + + // Create staging buffer for readback (with aligned size) + const WGPUBufferDescriptor buffer_desc = { + .usage = WGPUBufferUsage_CopyDst | WGPUBufferUsage_MapRead, + .size = buffer_size, + }; + WGPUBuffer staging = wgpuDeviceCreateBuffer(device, &buffer_desc); + assert(staging && "Failed to create staging buffer"); + + // Create command encoder for copy operation + const WGPUCommandEncoderDescriptor enc_desc = {}; + WGPUCommandEncoder encoder = + wgpuDeviceCreateCommandEncoder(device, &enc_desc); + + // Copy texture to buffer + const WGPUTexelCopyTextureInfo src = { + .texture = texture, + .mipLevel = 0, + .origin = {0, 0, 0}, + }; + + const WGPUTexelCopyBufferInfo dst = { + .buffer = staging, + .layout = + { + .bytesPerRow = aligned_bytes_per_row, + .rowsPerImage = static_cast<uint32_t>(height), + }, + }; + + const WGPUExtent3D copy_size = {static_cast<uint32_t>(width), + static_cast<uint32_t>(height), 1}; + + wgpuCommandEncoderCopyTextureToBuffer(encoder, &src, &dst, ©_size); + + // Submit commands + WGPUCommandBuffer commands = wgpuCommandEncoderFinish(encoder, nullptr); + WGPUQueue queue = wgpuDeviceGetQueue(device); + wgpuQueueSubmit(queue, 1, &commands); + wgpuCommandBufferRelease(commands); + wgpuCommandEncoderRelease(encoder); + + // Map buffer for reading (API differs between Win32 and native) +#if defined(DEMO_CROSS_COMPILE_WIN32) + // Win32: Old callback API + MapState map_state = {}; + auto map_cb = [](WGPUBufferMapAsyncStatus status, void* userdata) { + MapState* state = static_cast<MapState*>(userdata); + state->status = status; + state->done = true; + }; + wgpuBufferMapAsync(staging, WGPUMapMode_Read, 0, buffer_size, map_cb, + &map_state); +#else + // Native: New callback info API + MapState map_state = {}; + auto map_cb = [](WGPUMapAsyncStatus status, WGPUStringView message, + void* userdata, void* user2) { + (void)message; + (void)user2; + MapState* state = static_cast<MapState*>(userdata); + state->status = status; + state->done = true; + }; + WGPUBufferMapCallbackInfo map_info = {}; + map_info.mode = WGPUCallbackMode_WaitAnyOnly; + map_info.callback = map_cb; + map_info.userdata1 = &map_state; + wgpuBufferMapAsync(staging, WGPUMapMode_Read, 0, buffer_size, map_info); +#endif + + // Wait for mapping to complete (synchronous blocking) + for (int i = 0; i < 100 && !map_state.done; ++i) { +#if defined(__EMSCRIPTEN__) + emscripten_sleep(10); +#else + wgpuInstanceProcessEvents(instance); +#endif + } + + if (map_state.status != WGPUMapAsyncStatus_Success) { + fprintf(stderr, "Buffer mapping failed: %d\n", map_state.status); + wgpuBufferRelease(staging); + return pixels; // Return empty + } + + // Copy data from mapped buffer (handle row padding) + const uint8_t* mapped_data = static_cast<const uint8_t*>( + wgpuBufferGetConstMappedRange(staging, 0, buffer_size)); + if (mapped_data) { + // If rows are aligned, copy row by row to remove padding + if (aligned_bytes_per_row != unaligned_bytes_per_row) { + for (int y = 0; y < height; ++y) { + memcpy(pixels.data() + y * unaligned_bytes_per_row, + mapped_data + y * aligned_bytes_per_row, + unaligned_bytes_per_row); + } + } else { + // No padding, direct copy + memcpy(pixels.data(), mapped_data, pixels.size()); + } + } + + // Cleanup + wgpuBufferUnmap(staging); + wgpuBufferRelease(staging); + + return pixels; +} + +#endif // !defined(STRIP_ALL) diff --git a/src/gpu/texture_readback.h b/src/gpu/texture_readback.h new file mode 100644 index 0000000..1bf770f --- /dev/null +++ b/src/gpu/texture_readback.h @@ -0,0 +1,23 @@ +// GPU texture readback utility for offline processing +// Synchronous blocking operation (waits for GPU completion) + +#pragma once + +// Protected with STRIP_ALL: only needed for dev tools, not final release +#if !defined(STRIP_ALL) + +#include "platform/platform.h" +#include <vector> +#include <cstdint> + +// Read texture pixels to CPU memory (synchronous, blocking) +// Format: BGRA8Unorm (4 bytes per pixel) +// Returns: width * height * 4 bytes +std::vector<uint8_t> read_texture_pixels( + WGPUInstance instance, + WGPUDevice device, + WGPUTexture texture, + int width, + int height); + +#endif // !defined(STRIP_ALL) diff --git a/src/tests/common/offscreen_render_target.cc b/src/tests/common/offscreen_render_target.cc index 9f65e9a..10775a1 100644 --- a/src/tests/common/offscreen_render_target.cc +++ b/src/tests/common/offscreen_render_target.cc @@ -3,6 +3,7 @@ // Provides pixel readback for validation. #include "offscreen_render_target.h" +#include "gpu/texture_readback.h" #include <cassert> #include <cstdio> #include <cstring> @@ -64,105 +65,9 @@ WGPUBuffer OffscreenRenderTarget::create_staging_buffer() { } std::vector<uint8_t> OffscreenRenderTarget::read_pixels() { - const size_t buffer_size = width_ * height_ * 4; // BGRA8 - std::vector<uint8_t> pixels(buffer_size); - - // Create staging buffer for readback - WGPUBuffer staging = create_staging_buffer(); - assert(staging && "Failed to create staging buffer"); - - // Create command encoder for copy operation - const WGPUCommandEncoderDescriptor enc_desc = {}; - WGPUCommandEncoder encoder = - wgpuDeviceCreateCommandEncoder(device_, &enc_desc); - - // Copy texture to buffer - const WGPUTexelCopyTextureInfo src = { - .texture = texture_, - .mipLevel = 0, - .origin = {0, 0, 0}, - }; - - const WGPUTexelCopyBufferInfo dst = { - .buffer = staging, - .layout = - { - .bytesPerRow = static_cast<uint32_t>(width_ * 4), - .rowsPerImage = static_cast<uint32_t>(height_), - }, - }; - - const WGPUExtent3D copy_size = {static_cast<uint32_t>(width_), - static_cast<uint32_t>(height_), 1}; - - wgpuCommandEncoderCopyTextureToBuffer(encoder, &src, &dst, ©_size); - - // Submit commands - WGPUCommandBuffer commands = wgpuCommandEncoderFinish(encoder, nullptr); - WGPUQueue queue = wgpuDeviceGetQueue(device_); - wgpuQueueSubmit(queue, 1, &commands); - wgpuCommandBufferRelease(commands); - wgpuCommandEncoderRelease(encoder); - - // CRITICAL: Wait for GPU work to complete before mapping - // Without this, buffer may be destroyed before copy finishes - // Note: Skipping wait for now - appears to be causing issues - // The buffer mapping will handle synchronization internally - - // Map buffer for reading (API differs between Win32 and native) -#if defined(DEMO_CROSS_COMPILE_WIN32) - // Win32: Old callback API - MapState map_state = {}; - auto map_cb = [](WGPUBufferMapAsyncStatus status, void* userdata) { - MapState* state = static_cast<MapState*>(userdata); - state->status = status; - state->done = true; - }; - wgpuBufferMapAsync(staging, WGPUMapMode_Read, 0, buffer_size, map_cb, - &map_state); +#if !defined(STRIP_ALL) + return read_texture_pixels(instance_, device_, texture_, width_, height_); #else - // Native: New callback info API - MapState map_state = {}; - auto map_cb = [](WGPUMapAsyncStatus status, WGPUStringView message, - void* userdata, void* user2) { - (void)message; - (void)user2; - MapState* state = static_cast<MapState*>(userdata); - state->status = status; - state->done = true; - }; - WGPUBufferMapCallbackInfo map_info = {}; - map_info.mode = WGPUCallbackMode_WaitAnyOnly; - map_info.callback = map_cb; - map_info.userdata1 = &map_state; - wgpuBufferMapAsync(staging, WGPUMapMode_Read, 0, buffer_size, map_info); -#endif - - // Wait for mapping to complete - for (int i = 0; i < 100 && !map_state.done; ++i) { -#if defined(__EMSCRIPTEN__) - emscripten_sleep(10); -#else - wgpuInstanceProcessEvents(instance_); + return std::vector<uint8_t>(); // Should never be called in STRIP_ALL builds #endif - } - - if (map_state.status != WGPUMapAsyncStatus_Success) { - fprintf(stderr, "Buffer mapping failed: %d\n", map_state.status); - wgpuBufferRelease(staging); - return pixels; // Return empty - } - - // Copy data from mapped buffer - const uint8_t* mapped_data = static_cast<const uint8_t*>( - wgpuBufferGetConstMappedRange(staging, 0, buffer_size)); - if (mapped_data) { - memcpy(pixels.data(), mapped_data, buffer_size); - } - - // Cleanup - wgpuBufferUnmap(staging); - wgpuBufferRelease(staging); - - return pixels; } |
