diff options
| author | skal <pascal.massimino@gmail.com> | 2026-02-11 07:11:59 +0100 |
|---|---|---|
| committer | skal <pascal.massimino@gmail.com> | 2026-02-11 07:11:59 +0100 |
| commit | d594609420b3d0ca43d760ff043b3750e2be55ca (patch) | |
| tree | 8e7668a4730bfe90f10b6e92e4c4992c14fa8cc8 /tools | |
| parent | 3915a5e1c8c904f8f2154845cb99223a598653ee (diff) | |
fix: CNN test tool ping-pong bug and RGBA16Float intermediates
Bugfixes:
- Fixed ping-pong logic: update current_input BEFORE flipping dst_idx
- Use RGBA16Float for intermediate layers (preserve [-1,1] range from tanh)
- Separate BGRA8Unorm final output texture for readback
- Create two pipelines: intermediate (RGBA16Float) and final (BGRA8Unorm)
- Fix all cleanup code to reference correct pipeline variables
Implementation:
- Intermediate textures use RGBA16Float to avoid clamping [-1,1] → [0,1]
- Final layer renders to separate BGRA8Unorm texture
- Correct texture view descriptors for each format
- Layer 0-1: render to RGBA16Float ping-pong textures
- Layer 2: render to BGRA8Unorm output texture
Documentation:
- Added CNN testing section to doc/HOWTO.md
- Updated CNN_TEST_TOOL.md with ground-truth comparison workflow
- Noted remaining black output bug (under investigation)
Status:
- Tool compiles and runs without GPU errors
- Architecture correct: ping-pong, format conversion, separate pipelines
- Output still all-black (unknown cause, needs debugging)
- All 36 tests still pass
handoff(Claude): CNN test tool bugfixes complete, black output remains
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/cnn_test.cc | 108 |
1 files changed, 77 insertions, 31 deletions
diff --git a/tools/cnn_test.cc b/tools/cnn_test.cc index 59f5d36..bb4a824 100644 --- a/tools/cnn_test.cc +++ b/tools/cnn_test.cc @@ -145,8 +145,10 @@ static WGPUTexture load_texture(WGPUDevice device, WGPUQueue queue, } // Create CNN render pipeline (5 bindings) +// Takes both intermediate format (RGBA16Float) and final format (BGRA8Unorm) static WGPURenderPipeline create_cnn_pipeline(WGPUDevice device, - WGPUTextureFormat format) { + WGPUTextureFormat format, + bool is_final_layer) { const char* shader_code = SafeGetAsset(AssetId::ASSET_SHADER_CNN_LAYER); WGPUBindGroupLayout bgl = @@ -158,10 +160,14 @@ static WGPURenderPipeline create_cnn_pipeline(WGPUDevice device, .texture(4, WGPUShaderStage_Fragment) // Original input .build(device); + // Use appropriate format: RGBA16Float for intermediate, BGRA8Unorm for final + WGPUTextureFormat output_format = + is_final_layer ? WGPUTextureFormat_BGRA8Unorm : WGPUTextureFormat_RGBA16Float; + WGPURenderPipeline pipeline = RenderPipelineBuilder(device) .shader(shader_code) // compose=true by default .bind_group_layout(bgl) - .format(format) + .format(output_format) .build(); wgpuBindGroupLayoutRelease(bgl); @@ -274,19 +280,24 @@ int main(int argc, char** argv) { WGPUTextureView input_view = wgpuTextureCreateView(input_texture, &view_desc); WGPUTextureView original_view = input_view; // Keep reference to original - // Create CNN pipeline - WGPURenderPipeline pipeline = - create_cnn_pipeline(device, WGPUTextureFormat_BGRA8Unorm); - if (!pipeline) { - fprintf(stderr, "Error: failed to create CNN pipeline\n"); + // Create CNN pipelines (different formats for intermediate vs final) + WGPURenderPipeline pipeline_intermediate = + create_cnn_pipeline(device, WGPUTextureFormat_RGBA16Float, false); + WGPURenderPipeline pipeline_final = + create_cnn_pipeline(device, WGPUTextureFormat_BGRA8Unorm, true); + + if (!pipeline_intermediate || !pipeline_final) { + fprintf(stderr, "Error: failed to create CNN pipelines\n"); + if (pipeline_intermediate) wgpuRenderPipelineRelease(pipeline_intermediate); + if (pipeline_final) wgpuRenderPipelineRelease(pipeline_final); wgpuTextureViewRelease(input_view); wgpuTextureRelease(input_texture); fixture.shutdown(); return 1; } - // Get bind group layout from pipeline - WGPUBindGroupLayout bgl = wgpuRenderPipelineGetBindGroupLayout(pipeline, 0); + // Get bind group layout from intermediate pipeline (same for both) + WGPUBindGroupLayout bgl = wgpuRenderPipelineGetBindGroupLayout(pipeline_intermediate, 0); // Create uniform buffers const WGPUBufferDescriptor common_uniform_desc = { @@ -304,12 +315,13 @@ int main(int argc, char** argv) { wgpuDeviceCreateBuffer(device, &layer_params_desc); // Create intermediate textures for ping-pong (2 textures) + // Use RGBA16Float to preserve [-1,1] range from tanh activation const WGPUTextureDescriptor intermediate_desc = { .usage = WGPUTextureUsage_TextureBinding | WGPUTextureUsage_RenderAttachment | WGPUTextureUsage_CopySrc, .dimension = WGPUTextureDimension_2D, .size = {static_cast<uint32_t>(width), static_cast<uint32_t>(height), 1}, - .format = WGPUTextureFormat_BGRA8Unorm, + .format = WGPUTextureFormat_RGBA16Float, .mipLevelCount = 1, .sampleCount = 1, }; @@ -319,10 +331,39 @@ int main(int argc, char** argv) { wgpuDeviceCreateTexture(device, &intermediate_desc), }; + // Create views for intermediate textures (RGBA16Float) + const WGPUTextureViewDescriptor intermediate_view_desc = { + .format = WGPUTextureFormat_RGBA16Float, + .dimension = WGPUTextureViewDimension_2D, + .baseMipLevel = 0, + .mipLevelCount = 1, + .baseArrayLayer = 0, + .arrayLayerCount = 1, + }; WGPUTextureView intermediate_views[2] = { - wgpuTextureCreateView(intermediate_textures[0], &view_desc), - wgpuTextureCreateView(intermediate_textures[1], &view_desc), + wgpuTextureCreateView(intermediate_textures[0], &intermediate_view_desc), + wgpuTextureCreateView(intermediate_textures[1], &intermediate_view_desc), + }; + + // Create final output texture (BGRA8Unorm for readback) + const WGPUTextureDescriptor final_desc = { + .usage = WGPUTextureUsage_RenderAttachment | WGPUTextureUsage_CopySrc, + .dimension = WGPUTextureDimension_2D, + .size = {static_cast<uint32_t>(width), static_cast<uint32_t>(height), 1}, + .format = WGPUTextureFormat_BGRA8Unorm, + .mipLevelCount = 1, + .sampleCount = 1, }; + WGPUTexture final_output_texture = wgpuDeviceCreateTexture(device, &final_desc); + const WGPUTextureViewDescriptor final_view_desc = { + .format = WGPUTextureFormat_BGRA8Unorm, + .dimension = WGPUTextureViewDimension_2D, + .baseMipLevel = 0, + .mipLevelCount = 1, + .baseArrayLayer = 0, + .arrayLayerCount = 1, + }; + WGPUTextureView final_output_view = wgpuTextureCreateView(final_output_texture, &final_view_desc); // Get sampler WGPUSampler sampler = @@ -330,8 +371,7 @@ int main(int argc, char** argv) { // Multi-layer processing (fixed 3 layers) const int NUM_LAYERS = 3; - int src_idx = 0; // Ping-pong index - WGPUTexture final_texture = nullptr; + int dst_idx = 0; // Index of texture to render to // First layer reads from input, subsequent layers read from previous output WGPUTextureView current_input = input_view; @@ -371,11 +411,14 @@ int main(int argc, char** argv) { .texture(4, original_view) .build(device, bgl); - // Render to intermediate texture - WGPUTextureView output_view = intermediate_views[src_idx]; + // Render to appropriate output texture with correct pipeline + bool is_final = (layer == NUM_LAYERS - 1); + WGPUTextureView output_view = is_final ? final_output_view : intermediate_views[dst_idx]; + WGPURenderPipeline current_pipeline = is_final ? pipeline_final : pipeline_intermediate; + WGPUCommandEncoder encoder = wgpuDeviceCreateCommandEncoder(device, nullptr); WGPURenderPassEncoder pass = begin_render_pass(encoder, output_view); - wgpuRenderPassEncoderSetPipeline(pass, pipeline); + wgpuRenderPassEncoderSetPipeline(pass, current_pipeline); wgpuRenderPassEncoderSetBindGroup(pass, 0, bind_group, 0, nullptr); wgpuRenderPassEncoderDraw(pass, 3, 1, 0, 0); // Fullscreen triangle wgpuRenderPassEncoderEnd(pass); @@ -387,27 +430,25 @@ int main(int argc, char** argv) { wgpuCommandEncoderRelease(encoder); wgpuBindGroupRelease(bind_group); - // Update for next layer - if (layer == NUM_LAYERS - 1) { - // Last layer: save final texture - final_texture = intermediate_textures[src_idx]; - } else { - // Switch to next intermediate for input - current_input = intermediate_views[src_idx]; + // Update for next layer: output becomes input + if (layer < NUM_LAYERS - 1) { + // Use this layer's output as next layer's input + current_input = intermediate_views[dst_idx]; + dst_idx = 1 - dst_idx; // Flip ping-pong for next render } - - src_idx = 1 - src_idx; // Flip ping-pong } printf("Reading pixels from GPU...\n"); - // Read final output from GPU + // Read final output from GPU (always BGRA8Unorm) std::vector<uint8_t> pixels = - read_texture_pixels(instance, device, final_texture, width, height); + read_texture_pixels(instance, device, final_output_texture, width, height); if (pixels.empty()) { fprintf(stderr, "Error: failed to read pixels from GPU\n"); // Cleanup... + wgpuTextureViewRelease(final_output_view); + wgpuTextureRelease(final_output_texture); wgpuTextureViewRelease(intermediate_views[0]); wgpuTextureViewRelease(intermediate_views[1]); wgpuTextureRelease(intermediate_textures[0]); @@ -415,7 +456,8 @@ int main(int argc, char** argv) { wgpuBufferRelease(layer_params_buffer); wgpuBufferRelease(common_uniform_buffer); wgpuBindGroupLayoutRelease(bgl); - wgpuRenderPipelineRelease(pipeline); + wgpuRenderPipelineRelease(pipeline_intermediate); + wgpuRenderPipelineRelease(pipeline_final); wgpuTextureViewRelease(input_view); wgpuTextureRelease(input_texture); fixture.shutdown(); @@ -433,6 +475,8 @@ int main(int argc, char** argv) { } if (!success) { + wgpuTextureViewRelease(final_output_view); + wgpuTextureRelease(final_output_texture); wgpuTextureViewRelease(intermediate_views[0]); wgpuTextureViewRelease(intermediate_views[1]); wgpuTextureRelease(intermediate_textures[0]); @@ -440,7 +484,8 @@ int main(int argc, char** argv) { wgpuBufferRelease(layer_params_buffer); wgpuBufferRelease(common_uniform_buffer); wgpuBindGroupLayoutRelease(bgl); - wgpuRenderPipelineRelease(pipeline); + wgpuRenderPipelineRelease(pipeline_intermediate); + wgpuRenderPipelineRelease(pipeline_final); wgpuTextureViewRelease(input_view); wgpuTextureRelease(input_texture); fixture.shutdown(); @@ -457,7 +502,8 @@ int main(int argc, char** argv) { wgpuBufferRelease(layer_params_buffer); wgpuBufferRelease(common_uniform_buffer); wgpuBindGroupLayoutRelease(bgl); - wgpuRenderPipelineRelease(pipeline); + wgpuRenderPipelineRelease(pipeline_intermediate); + wgpuRenderPipelineRelease(pipeline_final); wgpuTextureViewRelease(input_view); wgpuTextureRelease(input_texture); fixture.shutdown(); |
