// CNN shader testing tool for offline validation // Tests trained CNN shaders on input PNG with GPU readback #if defined(STRIP_ALL) #error "cnn_test requires STRIP_ALL=OFF (tool builds only)" #endif #include "platform/platform.h" #include "gpu/gpu.h" #include "gpu/bind_group_builder.h" #include "gpu/pipeline_builder.h" #include "gpu/sampler_cache.h" #include "gpu/texture_readback.h" #include "gpu/effects/post_process_helper.h" #include "gpu/effects/cnn_effect.h" #include "gpu/effects/shader_composer.h" #include "gpu/effects/shaders.h" #include "tests/common/webgpu_test_fixture.h" #include "tests/common/offscreen_render_target.h" #include "generated/assets.h" #include "util/asset_manager.h" #include "util/mini_math.h" #include "stb_image.h" #include "wgpu-native/examples/capture/stb_image_write.h" #include #include #include #include // Helper to get asset string or empty string static const char* SafeGetAsset(AssetId id) { const uint8_t* data = GetAsset(id); return data ? (const char*)data : ""; } // Command-line arguments struct Args { const char* input_path = nullptr; const char* output_path = nullptr; float blend = 1.0f; bool output_png = true; // Default to PNG }; // Parse command-line arguments static bool parse_args(int argc, char** argv, Args* args) { if (argc < 3) { return false; } args->input_path = argv[1]; args->output_path = argv[2]; for (int i = 3; i < argc; ++i) { if (strcmp(argv[i], "--blend") == 0 && i + 1 < argc) { args->blend = atof(argv[++i]); if (args->blend < 0.0f || args->blend > 1.0f) { fprintf(stderr, "Error: blend must be in range [0.0, 1.0]\n"); return false; } } else if (strcmp(argv[i], "--format") == 0 && i + 1 < argc) { ++i; if (strcmp(argv[i], "ppm") == 0) { args->output_png = false; } else if (strcmp(argv[i], "png") == 0) { args->output_png = true; } else { fprintf(stderr, "Error: unknown format '%s' (use 'png' or 'ppm')\n", argv[i]); return false; } } else if (strcmp(argv[i], "--help") == 0) { return false; } else { fprintf(stderr, "Error: unknown option '%s'\n", argv[i]); return false; } } return true; } // Print usage static void print_usage(const char* prog) { fprintf(stderr, "Usage: %s input.png output.png [OPTIONS]\n", prog); fprintf(stderr, "\nOPTIONS:\n"); fprintf(stderr, " --blend F Final blend amount (0.0-1.0, default: 1.0)\n"); fprintf(stderr, " --format ppm|png Output format (default: png)\n"); fprintf(stderr, " --help Show this help\n"); } // Load PNG and upload to GPU texture static WGPUTexture load_texture(WGPUDevice device, WGPUQueue queue, const char* path, int* out_width, int* out_height) { int width, height, channels; uint8_t* data = stbi_load(path, &width, &height, &channels, 4); if (!data) { fprintf(stderr, "Error: failed to load image '%s'\n", path); return nullptr; } *out_width = width; *out_height = height; // Create texture const WGPUTextureDescriptor texture_desc = { .usage = WGPUTextureUsage_TextureBinding | WGPUTextureUsage_CopyDst | WGPUTextureUsage_RenderAttachment, .dimension = WGPUTextureDimension_2D, .size = {static_cast(width), static_cast(height), 1}, .format = WGPUTextureFormat_BGRA8Unorm, .mipLevelCount = 1, .sampleCount = 1, }; WGPUTexture texture = wgpuDeviceCreateTexture(device, &texture_desc); if (!texture) { fprintf(stderr, "Error: failed to create texture\n"); stbi_image_free(data); return nullptr; } // Convert RGBA → BGRA std::vector bgra_data(width * height * 4); for (int i = 0; i < width * height; ++i) { bgra_data[i * 4 + 0] = data[i * 4 + 2]; // B bgra_data[i * 4 + 1] = data[i * 4 + 1]; // G bgra_data[i * 4 + 2] = data[i * 4 + 0]; // R bgra_data[i * 4 + 3] = data[i * 4 + 3]; // A } // Upload to GPU const WGPUTexelCopyTextureInfo dst = {.texture = texture, .mipLevel = 0}; const WGPUTexelCopyBufferLayout layout = { .bytesPerRow = static_cast(width * 4), .rowsPerImage = static_cast(height)}; const WGPUExtent3D size = {static_cast(width), static_cast(height), 1}; wgpuQueueWriteTexture(queue, &dst, bgra_data.data(), bgra_data.size(), &layout, &size); stbi_image_free(data); return texture; } // Create CNN render pipeline (5 bindings) // Takes both intermediate format (RGBA16Float) and final format (BGRA8Unorm) static WGPURenderPipeline create_cnn_pipeline(WGPUDevice device, WGPUTextureFormat format, bool is_final_layer) { const char* shader_code = SafeGetAsset(AssetId::ASSET_SHADER_CNN_LAYER); WGPUBindGroupLayout bgl = BindGroupLayoutBuilder() .sampler(0, WGPUShaderStage_Fragment) .texture(1, WGPUShaderStage_Fragment) .uniform(2, WGPUShaderStage_Vertex | WGPUShaderStage_Fragment) .uniform(3, WGPUShaderStage_Fragment) .texture(4, WGPUShaderStage_Fragment) // Original input .build(device); // Use appropriate format: RGBA16Float for intermediate, BGRA8Unorm for final WGPUTextureFormat output_format = is_final_layer ? WGPUTextureFormat_BGRA8Unorm : WGPUTextureFormat_RGBA16Float; WGPURenderPipeline pipeline = RenderPipelineBuilder(device) .shader(shader_code) // compose=true by default .bind_group_layout(bgl) .format(output_format) .build(); wgpuBindGroupLayoutRelease(bgl); return pipeline; } // Begin render pass with clear static WGPURenderPassEncoder begin_render_pass(WGPUCommandEncoder encoder, WGPUTextureView view) { const WGPURenderPassColorAttachment color_attachment = { .view = view, .depthSlice = WGPU_DEPTH_SLICE_UNDEFINED, .loadOp = WGPULoadOp_Clear, .storeOp = WGPUStoreOp_Store, .clearValue = {0.0f, 0.0f, 0.0f, 1.0f}, }; const WGPURenderPassDescriptor pass_desc = { .colorAttachmentCount = 1, .colorAttachments = &color_attachment, }; return wgpuCommandEncoderBeginRenderPass(encoder, &pass_desc); } // Save PNG output static bool save_png(const char* path, const std::vector& pixels, int width, int height) { // Convert BGRA → RGBA std::vector rgba(width * height * 4); for (int i = 0; i < width * height; ++i) { rgba[i * 4 + 0] = pixels[i * 4 + 2]; // R rgba[i * 4 + 1] = pixels[i * 4 + 1]; // G rgba[i * 4 + 2] = pixels[i * 4 + 0]; // B rgba[i * 4 + 3] = pixels[i * 4 + 3]; // A } if (!stbi_write_png(path, width, height, 4, rgba.data(), width * 4)) { fprintf(stderr, "Error: failed to write PNG '%s'\n", path); return false; } return true; } // Save PPM output (fallback) static bool save_ppm(const char* path, const std::vector& pixels, int width, int height) { FILE* f = fopen(path, "wb"); if (!f) { fprintf(stderr, "Error: failed to open '%s' for writing\n", path); return false; } fprintf(f, "P6\n%d %d\n255\n", width, height); for (int i = 0; i < width * height; ++i) { const uint8_t rgb[3] = {pixels[i * 4 + 2], // R pixels[i * 4 + 1], // G pixels[i * 4 + 0]}; // B fwrite(rgb, 1, 3, f); } fclose(f); return true; } int main(int argc, char** argv) { // Parse arguments Args args; if (!parse_args(argc, argv, &args)) { print_usage(argv[0]); return 1; } // Initialize shader composer (required for #include resolution) InitShaderComposer(); // Initialize WebGPU WebGPUTestFixture fixture; if (!fixture.init()) { fprintf(stderr, "Error: GPU unavailable\n"); return 1; } GpuContext ctx = fixture.ctx(); WGPUDevice device = ctx.device; WGPUQueue queue = ctx.queue; WGPUInstance instance = fixture.instance(); // Load input texture int width, height; WGPUTexture input_texture = load_texture(device, queue, args.input_path, &width, &height); if (!input_texture) { fixture.shutdown(); return 1; } printf("Loaded %dx%d image from '%s'\n", width, height, args.input_path); // Create input texture view const WGPUTextureViewDescriptor view_desc = { .format = WGPUTextureFormat_BGRA8Unorm, .dimension = WGPUTextureViewDimension_2D, .baseMipLevel = 0, .mipLevelCount = 1, .baseArrayLayer = 0, .arrayLayerCount = 1, }; WGPUTextureView input_view = wgpuTextureCreateView(input_texture, &view_desc); WGPUTextureView original_view = input_view; // Keep reference to original // Create CNN pipelines (different formats for intermediate vs final) WGPURenderPipeline pipeline_intermediate = create_cnn_pipeline(device, WGPUTextureFormat_RGBA16Float, false); WGPURenderPipeline pipeline_final = create_cnn_pipeline(device, WGPUTextureFormat_BGRA8Unorm, true); if (!pipeline_intermediate || !pipeline_final) { fprintf(stderr, "Error: failed to create CNN pipelines\n"); if (pipeline_intermediate) wgpuRenderPipelineRelease(pipeline_intermediate); if (pipeline_final) wgpuRenderPipelineRelease(pipeline_final); wgpuTextureViewRelease(input_view); wgpuTextureRelease(input_texture); fixture.shutdown(); return 1; } // Get bind group layout from intermediate pipeline (same for both) WGPUBindGroupLayout bgl = wgpuRenderPipelineGetBindGroupLayout(pipeline_intermediate, 0); // Create uniform buffers const WGPUBufferDescriptor common_uniform_desc = { .usage = WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst, .size = sizeof(CommonPostProcessUniforms), }; WGPUBuffer common_uniform_buffer = wgpuDeviceCreateBuffer(device, &common_uniform_desc); const WGPUBufferDescriptor layer_params_desc = { .usage = WGPUBufferUsage_Uniform | WGPUBufferUsage_CopyDst, .size = sizeof(CNNLayerParams), }; WGPUBuffer layer_params_buffer = wgpuDeviceCreateBuffer(device, &layer_params_desc); // Create intermediate textures for ping-pong (2 textures) // Use RGBA16Float to preserve [-1,1] range from tanh activation const WGPUTextureDescriptor intermediate_desc = { .usage = WGPUTextureUsage_TextureBinding | WGPUTextureUsage_RenderAttachment | WGPUTextureUsage_CopySrc, .dimension = WGPUTextureDimension_2D, .size = {static_cast(width), static_cast(height), 1}, .format = WGPUTextureFormat_RGBA16Float, .mipLevelCount = 1, .sampleCount = 1, }; WGPUTexture intermediate_textures[2] = { wgpuDeviceCreateTexture(device, &intermediate_desc), wgpuDeviceCreateTexture(device, &intermediate_desc), }; // Create views for intermediate textures (RGBA16Float) const WGPUTextureViewDescriptor intermediate_view_desc = { .format = WGPUTextureFormat_RGBA16Float, .dimension = WGPUTextureViewDimension_2D, .baseMipLevel = 0, .mipLevelCount = 1, .baseArrayLayer = 0, .arrayLayerCount = 1, }; WGPUTextureView intermediate_views[2] = { wgpuTextureCreateView(intermediate_textures[0], &intermediate_view_desc), wgpuTextureCreateView(intermediate_textures[1], &intermediate_view_desc), }; // Create final output texture (BGRA8Unorm for readback) const WGPUTextureDescriptor final_desc = { .usage = WGPUTextureUsage_RenderAttachment | WGPUTextureUsage_CopySrc, .dimension = WGPUTextureDimension_2D, .size = {static_cast(width), static_cast(height), 1}, .format = WGPUTextureFormat_BGRA8Unorm, .mipLevelCount = 1, .sampleCount = 1, }; WGPUTexture final_output_texture = wgpuDeviceCreateTexture(device, &final_desc); const WGPUTextureViewDescriptor final_view_desc = { .format = WGPUTextureFormat_BGRA8Unorm, .dimension = WGPUTextureViewDimension_2D, .baseMipLevel = 0, .mipLevelCount = 1, .baseArrayLayer = 0, .arrayLayerCount = 1, }; WGPUTextureView final_output_view = wgpuTextureCreateView(final_output_texture, &final_view_desc); // Get sampler WGPUSampler sampler = SamplerCache::Get().get_or_create(device, SamplerCache::clamp()); // Multi-layer processing (fixed 3 layers) const int NUM_LAYERS = 3; int dst_idx = 0; // Index of texture to render to // First layer reads from input, subsequent layers read from previous output WGPUTextureView current_input = input_view; for (int layer = 0; layer < NUM_LAYERS; ++layer) { printf("Processing layer %d/%d...\n", layer + 1, NUM_LAYERS); // Update uniforms CommonPostProcessUniforms common_u = { .resolution = {static_cast(width), static_cast(height)}, ._pad = {0.0f, 0.0f}, .aspect_ratio = static_cast(width) / static_cast(height), .time = 0.0f, .beat = 0.0f, .audio_intensity = 0.0f, }; wgpuQueueWriteBuffer(queue, common_uniform_buffer, 0, &common_u, sizeof(common_u)); CNNLayerParams layer_params = { .layer_index = layer, .blend_amount = (layer == NUM_LAYERS - 1) ? args.blend : 1.0f, // Only final layer ._pad = {0.0f, 0.0f}, }; wgpuQueueWriteBuffer(queue, layer_params_buffer, 0, &layer_params, sizeof(layer_params)); // Build bind group WGPUBindGroup bind_group = BindGroupBuilder() .sampler(0, sampler) .texture(1, current_input) .buffer(2, common_uniform_buffer, sizeof(CommonPostProcessUniforms)) .buffer(3, layer_params_buffer, sizeof(CNNLayerParams)) .texture(4, original_view) .build(device, bgl); // Render to appropriate output texture with correct pipeline bool is_final = (layer == NUM_LAYERS - 1); WGPUTextureView output_view = is_final ? final_output_view : intermediate_views[dst_idx]; WGPURenderPipeline current_pipeline = is_final ? pipeline_final : pipeline_intermediate; WGPUCommandEncoder encoder = wgpuDeviceCreateCommandEncoder(device, nullptr); WGPURenderPassEncoder pass = begin_render_pass(encoder, output_view); wgpuRenderPassEncoderSetPipeline(pass, current_pipeline); wgpuRenderPassEncoderSetBindGroup(pass, 0, bind_group, 0, nullptr); wgpuRenderPassEncoderDraw(pass, 3, 1, 0, 0); // Fullscreen triangle wgpuRenderPassEncoderEnd(pass); WGPUCommandBuffer commands = wgpuCommandEncoderFinish(encoder, nullptr); wgpuQueueSubmit(queue, 1, &commands); wgpuCommandBufferRelease(commands); wgpuRenderPassEncoderRelease(pass); wgpuCommandEncoderRelease(encoder); wgpuBindGroupRelease(bind_group); // Update for next layer: output becomes input if (layer < NUM_LAYERS - 1) { // Use this layer's output as next layer's input current_input = intermediate_views[dst_idx]; dst_idx = 1 - dst_idx; // Flip ping-pong for next render } } printf("Reading pixels from GPU...\n"); // Read final output from GPU (always BGRA8Unorm) std::vector pixels = read_texture_pixels(instance, device, final_output_texture, width, height); if (pixels.empty()) { fprintf(stderr, "Error: failed to read pixels from GPU\n"); // Cleanup... wgpuTextureViewRelease(final_output_view); wgpuTextureRelease(final_output_texture); wgpuTextureViewRelease(intermediate_views[0]); wgpuTextureViewRelease(intermediate_views[1]); wgpuTextureRelease(intermediate_textures[0]); wgpuTextureRelease(intermediate_textures[1]); wgpuBufferRelease(layer_params_buffer); wgpuBufferRelease(common_uniform_buffer); wgpuBindGroupLayoutRelease(bgl); wgpuRenderPipelineRelease(pipeline_intermediate); wgpuRenderPipelineRelease(pipeline_final); wgpuTextureViewRelease(input_view); wgpuTextureRelease(input_texture); fixture.shutdown(); return 1; } // Save output bool success = false; if (args.output_png) { printf("Saving PNG to '%s'...\n", args.output_path); success = save_png(args.output_path, pixels, width, height); } else { printf("Saving PPM to '%s'...\n", args.output_path); success = save_ppm(args.output_path, pixels, width, height); } if (!success) { wgpuTextureViewRelease(final_output_view); wgpuTextureRelease(final_output_texture); wgpuTextureViewRelease(intermediate_views[0]); wgpuTextureViewRelease(intermediate_views[1]); wgpuTextureRelease(intermediate_textures[0]); wgpuTextureRelease(intermediate_textures[1]); wgpuBufferRelease(layer_params_buffer); wgpuBufferRelease(common_uniform_buffer); wgpuBindGroupLayoutRelease(bgl); wgpuRenderPipelineRelease(pipeline_intermediate); wgpuRenderPipelineRelease(pipeline_final); wgpuTextureViewRelease(input_view); wgpuTextureRelease(input_texture); fixture.shutdown(); return 1; } printf("Done! Output saved to '%s'\n", args.output_path); // Cleanup wgpuTextureViewRelease(intermediate_views[0]); wgpuTextureViewRelease(intermediate_views[1]); wgpuTextureRelease(intermediate_textures[0]); wgpuTextureRelease(intermediate_textures[1]); wgpuBufferRelease(layer_params_buffer); wgpuBufferRelease(common_uniform_buffer); wgpuBindGroupLayoutRelease(bgl); wgpuRenderPipelineRelease(pipeline_intermediate); wgpuRenderPipelineRelease(pipeline_final); wgpuTextureViewRelease(input_view); wgpuTextureRelease(input_texture); fixture.shutdown(); return 0; }