summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/CNN_TEST_TOOL.md11
-rw-r--r--doc/HOWTO.md39
-rw-r--r--tools/cnn_test.cc108
3 files changed, 122 insertions, 36 deletions
diff --git a/doc/CNN_TEST_TOOL.md b/doc/CNN_TEST_TOOL.md
index 7a970fe..09c55d4 100644
--- a/doc/CNN_TEST_TOOL.md
+++ b/doc/CNN_TEST_TOOL.md
@@ -178,11 +178,12 @@ assert mse < 10.0, f'MSE too high: {mse}'
## Known Issues
-**BUG: Black output (uninitialized input texture)**
-- Tool produces all-black output (MSE 64860 vs ground truth)
-- Root cause: First intermediate texture not initialized with input image
-- Multi-layer processing starts with uninitialized data
-- Fix required: Copy input_texture → intermediate_textures[0] before layer loop
+**BUG: Black output (unknown cause)**
+- Tool produces all-black output despite correct architecture
+- Fixed ping-pong logic, RGBA16Float intermediates, proper pipelines
+- Shader compiles, GPU commands execute without errors
+- Possible causes: shader execution issue, synchronization, binding problem
+- Status: Under investigation
---
diff --git a/doc/HOWTO.md b/doc/HOWTO.md
index ba550bb..c0e9363 100644
--- a/doc/HOWTO.md
+++ b/doc/HOWTO.md
@@ -162,6 +162,45 @@ See `doc/ASSET_SYSTEM.md` and `doc/WORKSPACE_SYSTEM.md`.
---
+## CNN Testing
+
+### Offline Shader Validation
+```bash
+# Test trained CNN on PNG input
+./build/cnn_test input.png output.png
+
+# Adjust blend amount (0.0 = original, 1.0 = full CNN)
+./build/cnn_test input.png output.png --blend 0.5
+
+# PPM output format
+./build/cnn_test input.png output.ppm --format ppm
+```
+
+### Ground Truth Comparison
+```bash
+# Generate Python ground truth
+./training/train_cnn.py --infer input.png \
+ --export-only checkpoints/checkpoint_epoch_1000.pth \
+ --output ground_truth.png
+
+# Run tool
+./build/cnn_test input.png tool_output.png
+
+# Compare (Python required)
+python3 -c "
+import numpy as np
+from PIL import Image
+gt = np.array(Image.open('ground_truth.png').convert('RGB'))
+out = np.array(Image.open('tool_output.png').convert('RGB'))
+mse = np.mean((gt.astype(float) - out.astype(float)) ** 2)
+print(f'MSE: {mse:.4f} (target: < 10.0)')
+"
+```
+
+See `doc/CNN_TEST_TOOL.md` for full documentation.
+
+---
+
## Additional Documentation
- **Build System:** `doc/BUILD.md` - Multi-platform, size optimization
diff --git a/tools/cnn_test.cc b/tools/cnn_test.cc
index 59f5d36..bb4a824 100644
--- a/tools/cnn_test.cc
+++ b/tools/cnn_test.cc
@@ -145,8 +145,10 @@ static WGPUTexture load_texture(WGPUDevice device, WGPUQueue queue,
}
// Create CNN render pipeline (5 bindings)
+// Takes both intermediate format (RGBA16Float) and final format (BGRA8Unorm)
static WGPURenderPipeline create_cnn_pipeline(WGPUDevice device,
- WGPUTextureFormat format) {
+ WGPUTextureFormat format,
+ bool is_final_layer) {
const char* shader_code = SafeGetAsset(AssetId::ASSET_SHADER_CNN_LAYER);
WGPUBindGroupLayout bgl =
@@ -158,10 +160,14 @@ static WGPURenderPipeline create_cnn_pipeline(WGPUDevice device,
.texture(4, WGPUShaderStage_Fragment) // Original input
.build(device);
+ // Use appropriate format: RGBA16Float for intermediate, BGRA8Unorm for final
+ WGPUTextureFormat output_format =
+ is_final_layer ? WGPUTextureFormat_BGRA8Unorm : WGPUTextureFormat_RGBA16Float;
+
WGPURenderPipeline pipeline = RenderPipelineBuilder(device)
.shader(shader_code) // compose=true by default
.bind_group_layout(bgl)
- .format(format)
+ .format(output_format)
.build();
wgpuBindGroupLayoutRelease(bgl);
@@ -274,19 +280,24 @@ int main(int argc, char** argv) {
WGPUTextureView input_view = wgpuTextureCreateView(input_texture, &view_desc);
WGPUTextureView original_view = input_view; // Keep reference to original
- // Create CNN pipeline
- WGPURenderPipeline pipeline =
- create_cnn_pipeline(device, WGPUTextureFormat_BGRA8Unorm);
- if (!pipeline) {
- fprintf(stderr, "Error: failed to create CNN pipeline\n");
+ // Create CNN pipelines (different formats for intermediate vs final)
+ WGPURenderPipeline pipeline_intermediate =
+ create_cnn_pipeline(device, WGPUTextureFormat_RGBA16Float, false);
+ WGPURenderPipeline pipeline_final =
+ create_cnn_pipeline(device, WGPUTextureFormat_BGRA8Unorm, true);
+
+ if (!pipeline_intermediate || !pipeline_final) {
+ fprintf(stderr, "Error: failed to create CNN pipelines\n");
+ if (pipeline_intermediate) wgpuRenderPipelineRelease(pipeline_intermediate);
+ if (pipeline_final) wgpuRenderPipelineRelease(pipeline_final);
wgpuTextureViewRelease(input_view);
wgpuTextureRelease(input_texture);
fixture.shutdown();
return 1;
}
- // Get bind group layout from pipeline
- WGPUBindGroupLayout bgl = wgpuRenderPipelineGetBindGroupLayout(pipeline, 0);
+ // Get bind group layout from intermediate pipeline (same for both)
+ WGPUBindGroupLayout bgl = wgpuRenderPipelineGetBindGroupLayout(pipeline_intermediate, 0);
// Create uniform buffers
const WGPUBufferDescriptor common_uniform_desc = {
@@ -304,12 +315,13 @@ int main(int argc, char** argv) {
wgpuDeviceCreateBuffer(device, &layer_params_desc);
// Create intermediate textures for ping-pong (2 textures)
+ // Use RGBA16Float to preserve [-1,1] range from tanh activation
const WGPUTextureDescriptor intermediate_desc = {
.usage = WGPUTextureUsage_TextureBinding |
WGPUTextureUsage_RenderAttachment | WGPUTextureUsage_CopySrc,
.dimension = WGPUTextureDimension_2D,
.size = {static_cast<uint32_t>(width), static_cast<uint32_t>(height), 1},
- .format = WGPUTextureFormat_BGRA8Unorm,
+ .format = WGPUTextureFormat_RGBA16Float,
.mipLevelCount = 1,
.sampleCount = 1,
};
@@ -319,10 +331,39 @@ int main(int argc, char** argv) {
wgpuDeviceCreateTexture(device, &intermediate_desc),
};
+ // Create views for intermediate textures (RGBA16Float)
+ const WGPUTextureViewDescriptor intermediate_view_desc = {
+ .format = WGPUTextureFormat_RGBA16Float,
+ .dimension = WGPUTextureViewDimension_2D,
+ .baseMipLevel = 0,
+ .mipLevelCount = 1,
+ .baseArrayLayer = 0,
+ .arrayLayerCount = 1,
+ };
WGPUTextureView intermediate_views[2] = {
- wgpuTextureCreateView(intermediate_textures[0], &view_desc),
- wgpuTextureCreateView(intermediate_textures[1], &view_desc),
+ wgpuTextureCreateView(intermediate_textures[0], &intermediate_view_desc),
+ wgpuTextureCreateView(intermediate_textures[1], &intermediate_view_desc),
+ };
+
+ // Create final output texture (BGRA8Unorm for readback)
+ const WGPUTextureDescriptor final_desc = {
+ .usage = WGPUTextureUsage_RenderAttachment | WGPUTextureUsage_CopySrc,
+ .dimension = WGPUTextureDimension_2D,
+ .size = {static_cast<uint32_t>(width), static_cast<uint32_t>(height), 1},
+ .format = WGPUTextureFormat_BGRA8Unorm,
+ .mipLevelCount = 1,
+ .sampleCount = 1,
};
+ WGPUTexture final_output_texture = wgpuDeviceCreateTexture(device, &final_desc);
+ const WGPUTextureViewDescriptor final_view_desc = {
+ .format = WGPUTextureFormat_BGRA8Unorm,
+ .dimension = WGPUTextureViewDimension_2D,
+ .baseMipLevel = 0,
+ .mipLevelCount = 1,
+ .baseArrayLayer = 0,
+ .arrayLayerCount = 1,
+ };
+ WGPUTextureView final_output_view = wgpuTextureCreateView(final_output_texture, &final_view_desc);
// Get sampler
WGPUSampler sampler =
@@ -330,8 +371,7 @@ int main(int argc, char** argv) {
// Multi-layer processing (fixed 3 layers)
const int NUM_LAYERS = 3;
- int src_idx = 0; // Ping-pong index
- WGPUTexture final_texture = nullptr;
+ int dst_idx = 0; // Index of texture to render to
// First layer reads from input, subsequent layers read from previous output
WGPUTextureView current_input = input_view;
@@ -371,11 +411,14 @@ int main(int argc, char** argv) {
.texture(4, original_view)
.build(device, bgl);
- // Render to intermediate texture
- WGPUTextureView output_view = intermediate_views[src_idx];
+ // Render to appropriate output texture with correct pipeline
+ bool is_final = (layer == NUM_LAYERS - 1);
+ WGPUTextureView output_view = is_final ? final_output_view : intermediate_views[dst_idx];
+ WGPURenderPipeline current_pipeline = is_final ? pipeline_final : pipeline_intermediate;
+
WGPUCommandEncoder encoder = wgpuDeviceCreateCommandEncoder(device, nullptr);
WGPURenderPassEncoder pass = begin_render_pass(encoder, output_view);
- wgpuRenderPassEncoderSetPipeline(pass, pipeline);
+ wgpuRenderPassEncoderSetPipeline(pass, current_pipeline);
wgpuRenderPassEncoderSetBindGroup(pass, 0, bind_group, 0, nullptr);
wgpuRenderPassEncoderDraw(pass, 3, 1, 0, 0); // Fullscreen triangle
wgpuRenderPassEncoderEnd(pass);
@@ -387,27 +430,25 @@ int main(int argc, char** argv) {
wgpuCommandEncoderRelease(encoder);
wgpuBindGroupRelease(bind_group);
- // Update for next layer
- if (layer == NUM_LAYERS - 1) {
- // Last layer: save final texture
- final_texture = intermediate_textures[src_idx];
- } else {
- // Switch to next intermediate for input
- current_input = intermediate_views[src_idx];
+ // Update for next layer: output becomes input
+ if (layer < NUM_LAYERS - 1) {
+ // Use this layer's output as next layer's input
+ current_input = intermediate_views[dst_idx];
+ dst_idx = 1 - dst_idx; // Flip ping-pong for next render
}
-
- src_idx = 1 - src_idx; // Flip ping-pong
}
printf("Reading pixels from GPU...\n");
- // Read final output from GPU
+ // Read final output from GPU (always BGRA8Unorm)
std::vector<uint8_t> pixels =
- read_texture_pixels(instance, device, final_texture, width, height);
+ read_texture_pixels(instance, device, final_output_texture, width, height);
if (pixels.empty()) {
fprintf(stderr, "Error: failed to read pixels from GPU\n");
// Cleanup...
+ wgpuTextureViewRelease(final_output_view);
+ wgpuTextureRelease(final_output_texture);
wgpuTextureViewRelease(intermediate_views[0]);
wgpuTextureViewRelease(intermediate_views[1]);
wgpuTextureRelease(intermediate_textures[0]);
@@ -415,7 +456,8 @@ int main(int argc, char** argv) {
wgpuBufferRelease(layer_params_buffer);
wgpuBufferRelease(common_uniform_buffer);
wgpuBindGroupLayoutRelease(bgl);
- wgpuRenderPipelineRelease(pipeline);
+ wgpuRenderPipelineRelease(pipeline_intermediate);
+ wgpuRenderPipelineRelease(pipeline_final);
wgpuTextureViewRelease(input_view);
wgpuTextureRelease(input_texture);
fixture.shutdown();
@@ -433,6 +475,8 @@ int main(int argc, char** argv) {
}
if (!success) {
+ wgpuTextureViewRelease(final_output_view);
+ wgpuTextureRelease(final_output_texture);
wgpuTextureViewRelease(intermediate_views[0]);
wgpuTextureViewRelease(intermediate_views[1]);
wgpuTextureRelease(intermediate_textures[0]);
@@ -440,7 +484,8 @@ int main(int argc, char** argv) {
wgpuBufferRelease(layer_params_buffer);
wgpuBufferRelease(common_uniform_buffer);
wgpuBindGroupLayoutRelease(bgl);
- wgpuRenderPipelineRelease(pipeline);
+ wgpuRenderPipelineRelease(pipeline_intermediate);
+ wgpuRenderPipelineRelease(pipeline_final);
wgpuTextureViewRelease(input_view);
wgpuTextureRelease(input_texture);
fixture.shutdown();
@@ -457,7 +502,8 @@ int main(int argc, char** argv) {
wgpuBufferRelease(layer_params_buffer);
wgpuBufferRelease(common_uniform_buffer);
wgpuBindGroupLayoutRelease(bgl);
- wgpuRenderPipelineRelease(pipeline);
+ wgpuRenderPipelineRelease(pipeline_intermediate);
+ wgpuRenderPipelineRelease(pipeline_final);
wgpuTextureViewRelease(input_view);
wgpuTextureRelease(input_texture);
fixture.shutdown();