summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorskal <pascal.massimino@gmail.com>2026-02-11 07:11:59 +0100
committerskal <pascal.massimino@gmail.com>2026-02-11 07:11:59 +0100
commitd594609420b3d0ca43d760ff043b3750e2be55ca (patch)
tree8e7668a4730bfe90f10b6e92e4c4992c14fa8cc8 /tools
parent3915a5e1c8c904f8f2154845cb99223a598653ee (diff)
fix: CNN test tool ping-pong bug and RGBA16Float intermediates
Bugfixes: - Fixed ping-pong logic: update current_input BEFORE flipping dst_idx - Use RGBA16Float for intermediate layers (preserve [-1,1] range from tanh) - Separate BGRA8Unorm final output texture for readback - Create two pipelines: intermediate (RGBA16Float) and final (BGRA8Unorm) - Fix all cleanup code to reference correct pipeline variables Implementation: - Intermediate textures use RGBA16Float to avoid clamping [-1,1] → [0,1] - Final layer renders to separate BGRA8Unorm texture - Correct texture view descriptors for each format - Layer 0-1: render to RGBA16Float ping-pong textures - Layer 2: render to BGRA8Unorm output texture Documentation: - Added CNN testing section to doc/HOWTO.md - Updated CNN_TEST_TOOL.md with ground-truth comparison workflow - Noted remaining black output bug (under investigation) Status: - Tool compiles and runs without GPU errors - Architecture correct: ping-pong, format conversion, separate pipelines - Output still all-black (unknown cause, needs debugging) - All 36 tests still pass handoff(Claude): CNN test tool bugfixes complete, black output remains Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
Diffstat (limited to 'tools')
-rw-r--r--tools/cnn_test.cc108
1 files changed, 77 insertions, 31 deletions
diff --git a/tools/cnn_test.cc b/tools/cnn_test.cc
index 59f5d36..bb4a824 100644
--- a/tools/cnn_test.cc
+++ b/tools/cnn_test.cc
@@ -145,8 +145,10 @@ static WGPUTexture load_texture(WGPUDevice device, WGPUQueue queue,
}
// Create CNN render pipeline (5 bindings)
+// Takes both intermediate format (RGBA16Float) and final format (BGRA8Unorm)
static WGPURenderPipeline create_cnn_pipeline(WGPUDevice device,
- WGPUTextureFormat format) {
+ WGPUTextureFormat format,
+ bool is_final_layer) {
const char* shader_code = SafeGetAsset(AssetId::ASSET_SHADER_CNN_LAYER);
WGPUBindGroupLayout bgl =
@@ -158,10 +160,14 @@ static WGPURenderPipeline create_cnn_pipeline(WGPUDevice device,
.texture(4, WGPUShaderStage_Fragment) // Original input
.build(device);
+ // Use appropriate format: RGBA16Float for intermediate, BGRA8Unorm for final
+ WGPUTextureFormat output_format =
+ is_final_layer ? WGPUTextureFormat_BGRA8Unorm : WGPUTextureFormat_RGBA16Float;
+
WGPURenderPipeline pipeline = RenderPipelineBuilder(device)
.shader(shader_code) // compose=true by default
.bind_group_layout(bgl)
- .format(format)
+ .format(output_format)
.build();
wgpuBindGroupLayoutRelease(bgl);
@@ -274,19 +280,24 @@ int main(int argc, char** argv) {
WGPUTextureView input_view = wgpuTextureCreateView(input_texture, &view_desc);
WGPUTextureView original_view = input_view; // Keep reference to original
- // Create CNN pipeline
- WGPURenderPipeline pipeline =
- create_cnn_pipeline(device, WGPUTextureFormat_BGRA8Unorm);
- if (!pipeline) {
- fprintf(stderr, "Error: failed to create CNN pipeline\n");
+ // Create CNN pipelines (different formats for intermediate vs final)
+ WGPURenderPipeline pipeline_intermediate =
+ create_cnn_pipeline(device, WGPUTextureFormat_RGBA16Float, false);
+ WGPURenderPipeline pipeline_final =
+ create_cnn_pipeline(device, WGPUTextureFormat_BGRA8Unorm, true);
+
+ if (!pipeline_intermediate || !pipeline_final) {
+ fprintf(stderr, "Error: failed to create CNN pipelines\n");
+ if (pipeline_intermediate) wgpuRenderPipelineRelease(pipeline_intermediate);
+ if (pipeline_final) wgpuRenderPipelineRelease(pipeline_final);
wgpuTextureViewRelease(input_view);
wgpuTextureRelease(input_texture);
fixture.shutdown();
return 1;
}
- // Get bind group layout from pipeline
- WGPUBindGroupLayout bgl = wgpuRenderPipelineGetBindGroupLayout(pipeline, 0);
+ // Get bind group layout from intermediate pipeline (same for both)
+ WGPUBindGroupLayout bgl = wgpuRenderPipelineGetBindGroupLayout(pipeline_intermediate, 0);
// Create uniform buffers
const WGPUBufferDescriptor common_uniform_desc = {
@@ -304,12 +315,13 @@ int main(int argc, char** argv) {
wgpuDeviceCreateBuffer(device, &layer_params_desc);
// Create intermediate textures for ping-pong (2 textures)
+ // Use RGBA16Float to preserve [-1,1] range from tanh activation
const WGPUTextureDescriptor intermediate_desc = {
.usage = WGPUTextureUsage_TextureBinding |
WGPUTextureUsage_RenderAttachment | WGPUTextureUsage_CopySrc,
.dimension = WGPUTextureDimension_2D,
.size = {static_cast<uint32_t>(width), static_cast<uint32_t>(height), 1},
- .format = WGPUTextureFormat_BGRA8Unorm,
+ .format = WGPUTextureFormat_RGBA16Float,
.mipLevelCount = 1,
.sampleCount = 1,
};
@@ -319,10 +331,39 @@ int main(int argc, char** argv) {
wgpuDeviceCreateTexture(device, &intermediate_desc),
};
+ // Create views for intermediate textures (RGBA16Float)
+ const WGPUTextureViewDescriptor intermediate_view_desc = {
+ .format = WGPUTextureFormat_RGBA16Float,
+ .dimension = WGPUTextureViewDimension_2D,
+ .baseMipLevel = 0,
+ .mipLevelCount = 1,
+ .baseArrayLayer = 0,
+ .arrayLayerCount = 1,
+ };
WGPUTextureView intermediate_views[2] = {
- wgpuTextureCreateView(intermediate_textures[0], &view_desc),
- wgpuTextureCreateView(intermediate_textures[1], &view_desc),
+ wgpuTextureCreateView(intermediate_textures[0], &intermediate_view_desc),
+ wgpuTextureCreateView(intermediate_textures[1], &intermediate_view_desc),
+ };
+
+ // Create final output texture (BGRA8Unorm for readback)
+ const WGPUTextureDescriptor final_desc = {
+ .usage = WGPUTextureUsage_RenderAttachment | WGPUTextureUsage_CopySrc,
+ .dimension = WGPUTextureDimension_2D,
+ .size = {static_cast<uint32_t>(width), static_cast<uint32_t>(height), 1},
+ .format = WGPUTextureFormat_BGRA8Unorm,
+ .mipLevelCount = 1,
+ .sampleCount = 1,
};
+ WGPUTexture final_output_texture = wgpuDeviceCreateTexture(device, &final_desc);
+ const WGPUTextureViewDescriptor final_view_desc = {
+ .format = WGPUTextureFormat_BGRA8Unorm,
+ .dimension = WGPUTextureViewDimension_2D,
+ .baseMipLevel = 0,
+ .mipLevelCount = 1,
+ .baseArrayLayer = 0,
+ .arrayLayerCount = 1,
+ };
+ WGPUTextureView final_output_view = wgpuTextureCreateView(final_output_texture, &final_view_desc);
// Get sampler
WGPUSampler sampler =
@@ -330,8 +371,7 @@ int main(int argc, char** argv) {
// Multi-layer processing (fixed 3 layers)
const int NUM_LAYERS = 3;
- int src_idx = 0; // Ping-pong index
- WGPUTexture final_texture = nullptr;
+ int dst_idx = 0; // Index of texture to render to
// First layer reads from input, subsequent layers read from previous output
WGPUTextureView current_input = input_view;
@@ -371,11 +411,14 @@ int main(int argc, char** argv) {
.texture(4, original_view)
.build(device, bgl);
- // Render to intermediate texture
- WGPUTextureView output_view = intermediate_views[src_idx];
+ // Render to appropriate output texture with correct pipeline
+ bool is_final = (layer == NUM_LAYERS - 1);
+ WGPUTextureView output_view = is_final ? final_output_view : intermediate_views[dst_idx];
+ WGPURenderPipeline current_pipeline = is_final ? pipeline_final : pipeline_intermediate;
+
WGPUCommandEncoder encoder = wgpuDeviceCreateCommandEncoder(device, nullptr);
WGPURenderPassEncoder pass = begin_render_pass(encoder, output_view);
- wgpuRenderPassEncoderSetPipeline(pass, pipeline);
+ wgpuRenderPassEncoderSetPipeline(pass, current_pipeline);
wgpuRenderPassEncoderSetBindGroup(pass, 0, bind_group, 0, nullptr);
wgpuRenderPassEncoderDraw(pass, 3, 1, 0, 0); // Fullscreen triangle
wgpuRenderPassEncoderEnd(pass);
@@ -387,27 +430,25 @@ int main(int argc, char** argv) {
wgpuCommandEncoderRelease(encoder);
wgpuBindGroupRelease(bind_group);
- // Update for next layer
- if (layer == NUM_LAYERS - 1) {
- // Last layer: save final texture
- final_texture = intermediate_textures[src_idx];
- } else {
- // Switch to next intermediate for input
- current_input = intermediate_views[src_idx];
+ // Update for next layer: output becomes input
+ if (layer < NUM_LAYERS - 1) {
+ // Use this layer's output as next layer's input
+ current_input = intermediate_views[dst_idx];
+ dst_idx = 1 - dst_idx; // Flip ping-pong for next render
}
-
- src_idx = 1 - src_idx; // Flip ping-pong
}
printf("Reading pixels from GPU...\n");
- // Read final output from GPU
+ // Read final output from GPU (always BGRA8Unorm)
std::vector<uint8_t> pixels =
- read_texture_pixels(instance, device, final_texture, width, height);
+ read_texture_pixels(instance, device, final_output_texture, width, height);
if (pixels.empty()) {
fprintf(stderr, "Error: failed to read pixels from GPU\n");
// Cleanup...
+ wgpuTextureViewRelease(final_output_view);
+ wgpuTextureRelease(final_output_texture);
wgpuTextureViewRelease(intermediate_views[0]);
wgpuTextureViewRelease(intermediate_views[1]);
wgpuTextureRelease(intermediate_textures[0]);
@@ -415,7 +456,8 @@ int main(int argc, char** argv) {
wgpuBufferRelease(layer_params_buffer);
wgpuBufferRelease(common_uniform_buffer);
wgpuBindGroupLayoutRelease(bgl);
- wgpuRenderPipelineRelease(pipeline);
+ wgpuRenderPipelineRelease(pipeline_intermediate);
+ wgpuRenderPipelineRelease(pipeline_final);
wgpuTextureViewRelease(input_view);
wgpuTextureRelease(input_texture);
fixture.shutdown();
@@ -433,6 +475,8 @@ int main(int argc, char** argv) {
}
if (!success) {
+ wgpuTextureViewRelease(final_output_view);
+ wgpuTextureRelease(final_output_texture);
wgpuTextureViewRelease(intermediate_views[0]);
wgpuTextureViewRelease(intermediate_views[1]);
wgpuTextureRelease(intermediate_textures[0]);
@@ -440,7 +484,8 @@ int main(int argc, char** argv) {
wgpuBufferRelease(layer_params_buffer);
wgpuBufferRelease(common_uniform_buffer);
wgpuBindGroupLayoutRelease(bgl);
- wgpuRenderPipelineRelease(pipeline);
+ wgpuRenderPipelineRelease(pipeline_intermediate);
+ wgpuRenderPipelineRelease(pipeline_final);
wgpuTextureViewRelease(input_view);
wgpuTextureRelease(input_texture);
fixture.shutdown();
@@ -457,7 +502,8 @@ int main(int argc, char** argv) {
wgpuBufferRelease(layer_params_buffer);
wgpuBufferRelease(common_uniform_buffer);
wgpuBindGroupLayoutRelease(bgl);
- wgpuRenderPipelineRelease(pipeline);
+ wgpuRenderPipelineRelease(pipeline_intermediate);
+ wgpuRenderPipelineRelease(pipeline_final);
wgpuTextureViewRelease(input_view);
wgpuTextureRelease(input_texture);
fixture.shutdown();