From 0f53ed1ed8ed7c07cd7ea8e88e21b5be5d5494e5 Mon Sep 17 00:00:00 2001 From: skal Date: Sat, 14 Feb 2026 07:22:17 +0100 Subject: CNN v2: bilinear mip-level sampling and UI improvements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit **CNN v2 Changes:** - Replace point sampling with bilinear interpolation for mip-level features - Add linear sampler (binding 6) to static features shader - Update CNNv2Effect, cnn_test, and HTML tool **HTML Tool UI:** - Move controls to floating bottom bar in central view - Consolidate video controls + Blend/Depth/Save PNG in single container - Increase left panel width: 300px → 315px (+5%) - Remove per-frame debug messages (visualization, rendering logs) **Technical:** - WGSL: textureSample() with linear_sampler vs textureLoad() - C++: Create WGPUSampler with Linear filtering - HTML: Change sampler from 'nearest' to 'linear' handoff(Claude): CNN v2 now uses bilinear mip-level sampling across all tools --- .gitignore | 1 + src/gpu/effects/cnn_v2_effect.cc | 36 ++++++++-- src/gpu/effects/cnn_v2_effect.h | 1 + tools/cnn_test.cc | 30 ++++++-- tools/cnn_v2_test/index.html | 84 +++++++++++++--------- training/layers/chk_10000_3x5x3x3.pt | Bin 0 -> 5092 bytes workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl | 13 ++-- 7 files changed, 116 insertions(+), 49 deletions(-) create mode 100644 training/layers/chk_10000_3x5x3x3.pt diff --git a/.gitignore b/.gitignore index 4c783d7..41d0683 100644 --- a/.gitignore +++ b/.gitignore @@ -68,4 +68,5 @@ timeline.txt timeline.html Testing/ training/checkpoints/ +checkpoints/ validation_results/ diff --git a/src/gpu/effects/cnn_v2_effect.cc b/src/gpu/effects/cnn_v2_effect.cc index 366a232..d412154 100644 --- a/src/gpu/effects/cnn_v2_effect.cc +++ b/src/gpu/effects/cnn_v2_effect.cc @@ -19,6 +19,7 @@ CNNv2Effect::CNNv2Effect(const GpuContext& ctx) static_params_buffer_(nullptr), static_features_tex_(nullptr), static_features_view_(nullptr), + linear_sampler_(nullptr), layer_pipeline_(nullptr), weights_buffer_(nullptr), input_mip_tex_(nullptr), @@ -36,6 +37,7 @@ CNNv2Effect::CNNv2Effect(const GpuContext& ctx, const CNNv2EffectParams& params) static_params_buffer_(nullptr), static_features_tex_(nullptr), static_features_view_(nullptr), + linear_sampler_(nullptr), layer_pipeline_(nullptr), weights_buffer_(nullptr), input_mip_tex_(nullptr), @@ -221,6 +223,20 @@ void CNNv2Effect::create_textures() { } void CNNv2Effect::create_pipelines() { + // Create linear sampler for bilinear interpolation + WGPUSamplerDescriptor sampler_desc = {}; + sampler_desc.addressModeU = WGPUAddressMode_ClampToEdge; + sampler_desc.addressModeV = WGPUAddressMode_ClampToEdge; + sampler_desc.addressModeW = WGPUAddressMode_ClampToEdge; + sampler_desc.magFilter = WGPUFilterMode_Linear; + sampler_desc.minFilter = WGPUFilterMode_Linear; + sampler_desc.mipmapFilter = WGPUMipmapFilterMode_Linear; + sampler_desc.lodMinClamp = 0.0f; + sampler_desc.lodMaxClamp = 32.0f; + sampler_desc.maxAnisotropy = 1; + + linear_sampler_ = wgpuDeviceCreateSampler(ctx_.device, &sampler_desc); + // Static features compute pipeline size_t shader_size = 0; const char* static_code = (const char*)GetAsset(AssetId::ASSET_SHADER_CNN_V2_STATIC, &shader_size); @@ -238,8 +254,8 @@ void CNNv2Effect::create_pipelines() { shader_desc.nextInChain = &wgsl_src.chain; // Create bind group layout for static features compute - // Bindings: 0=input_tex, 1=input_mip1, 2=input_mip2, 3=depth_tex, 4=output, 5=params - WGPUBindGroupLayoutEntry bgl_entries[6] = {}; + // Bindings: 0=input_tex, 1=input_mip1, 2=input_mip2, 3=depth_tex, 4=output, 5=params, 6=linear_sampler + WGPUBindGroupLayoutEntry bgl_entries[7] = {}; // Binding 0: Input texture (mip 0) bgl_entries[0].binding = 0; @@ -278,8 +294,13 @@ void CNNv2Effect::create_pipelines() { bgl_entries[5].buffer.type = WGPUBufferBindingType_Uniform; bgl_entries[5].buffer.minBindingSize = sizeof(StaticFeatureParams); + // Binding 6: Linear sampler (for bilinear interpolation) + bgl_entries[6].binding = 6; + bgl_entries[6].visibility = WGPUShaderStage_Compute; + bgl_entries[6].sampler.type = WGPUSamplerBindingType_Filtering; + WGPUBindGroupLayoutDescriptor bgl_desc = {}; - bgl_desc.entryCount = 6; + bgl_desc.entryCount = 7; bgl_desc.entries = bgl_entries; WGPUBindGroupLayout static_bgl = wgpuDeviceCreateBindGroupLayout(ctx_.device, &bgl_desc); @@ -398,7 +419,7 @@ void CNNv2Effect::update_bind_group(WGPUTextureView input_view) { } // Create bind group for static features compute - WGPUBindGroupEntry bg_entries[6] = {}; + WGPUBindGroupEntry bg_entries[7] = {}; // Binding 0: Input (mip 0) bg_entries[0].binding = 0; @@ -425,9 +446,13 @@ void CNNv2Effect::update_bind_group(WGPUTextureView input_view) { bg_entries[5].buffer = static_params_buffer_; bg_entries[5].size = sizeof(StaticFeatureParams); + // Binding 6: Linear sampler + bg_entries[6].binding = 6; + bg_entries[6].sampler = linear_sampler_; + WGPUBindGroupDescriptor bg_desc = {}; bg_desc.layout = wgpuComputePipelineGetBindGroupLayout(static_pipeline_, 0); - bg_desc.entryCount = 6; + bg_desc.entryCount = 7; bg_desc.entries = bg_entries; static_bind_group_ = wgpuDeviceCreateBindGroup(ctx_.device, &bg_desc); @@ -563,6 +588,7 @@ void CNNv2Effect::cleanup() { if (static_bind_group_) wgpuBindGroupRelease(static_bind_group_); if (static_params_buffer_) wgpuBufferRelease(static_params_buffer_); if (static_pipeline_) wgpuComputePipelineRelease(static_pipeline_); + if (linear_sampler_) wgpuSamplerRelease(linear_sampler_); if (layer_pipeline_) wgpuComputePipelineRelease(layer_pipeline_); if (weights_buffer_) wgpuBufferRelease(weights_buffer_); diff --git a/src/gpu/effects/cnn_v2_effect.h b/src/gpu/effects/cnn_v2_effect.h index 8a2e1b6..d530d3b 100644 --- a/src/gpu/effects/cnn_v2_effect.h +++ b/src/gpu/effects/cnn_v2_effect.h @@ -64,6 +64,7 @@ private: WGPUBuffer static_params_buffer_; WGPUTexture static_features_tex_; WGPUTextureView static_features_view_; + WGPUSampler linear_sampler_; // CNN layers (storage buffer architecture) WGPUComputePipeline layer_pipeline_; // Single pipeline for all layers diff --git a/tools/cnn_test.cc b/tools/cnn_test.cc index c504c3d..b4a4bdc 100644 --- a/tools/cnn_test.cc +++ b/tools/cnn_test.cc @@ -784,6 +784,20 @@ static bool process_cnn_v2(WGPUDevice device, WGPUQueue queue, wgpuQueueWriteBuffer(queue, static_params_buffer, 0, &static_params, sizeof(static_params)); + // Create linear sampler for bilinear interpolation + WGPUSamplerDescriptor linear_sampler_desc = {}; + linear_sampler_desc.addressModeU = WGPUAddressMode_ClampToEdge; + linear_sampler_desc.addressModeV = WGPUAddressMode_ClampToEdge; + linear_sampler_desc.addressModeW = WGPUAddressMode_ClampToEdge; + linear_sampler_desc.magFilter = WGPUFilterMode_Linear; + linear_sampler_desc.minFilter = WGPUFilterMode_Linear; + linear_sampler_desc.mipmapFilter = WGPUMipmapFilterMode_Linear; + linear_sampler_desc.lodMinClamp = 0.0f; + linear_sampler_desc.lodMaxClamp = 32.0f; + linear_sampler_desc.maxAnisotropy = 1; + + WGPUSampler linear_sampler = wgpuDeviceCreateSampler(device, &linear_sampler_desc); + // Create static features compute pipeline WGPUShaderSourceWGSL static_wgsl = {}; static_wgsl.chain.sType = WGPUSType_ShaderSourceWGSL; @@ -796,8 +810,8 @@ static bool process_cnn_v2(WGPUDevice device, WGPUQueue queue, wgpuDeviceCreateShaderModule(device, &static_module_desc); // Bind group layout: 0=input, 1=input_mip1, 2=input_mip2, 3=depth, 4=output, - // 5=params - WGPUBindGroupLayoutEntry static_bgl_entries[6] = {}; + // 5=params, 6=linear_sampler + WGPUBindGroupLayoutEntry static_bgl_entries[7] = {}; static_bgl_entries[0].binding = 0; static_bgl_entries[0].visibility = WGPUShaderStage_Compute; static_bgl_entries[0].texture.sampleType = WGPUTextureSampleType_Float; @@ -832,8 +846,12 @@ static bool process_cnn_v2(WGPUDevice device, WGPUQueue queue, static_bgl_entries[5].buffer.minBindingSize = sizeof(CNNv2StaticFeatureParams); + static_bgl_entries[6].binding = 6; + static_bgl_entries[6].visibility = WGPUShaderStage_Compute; + static_bgl_entries[6].sampler.type = WGPUSamplerBindingType_Filtering; + WGPUBindGroupLayoutDescriptor static_bgl_desc = {}; - static_bgl_desc.entryCount = 6; + static_bgl_desc.entryCount = 7; static_bgl_desc.entries = static_bgl_entries; WGPUBindGroupLayout static_bgl = @@ -858,7 +876,7 @@ static bool process_cnn_v2(WGPUDevice device, WGPUQueue queue, wgpuPipelineLayoutRelease(static_pl); // Create static bind group (use input as all mips for simplicity) - WGPUBindGroupEntry static_bg_entries[6] = {}; + WGPUBindGroupEntry static_bg_entries[7] = {}; static_bg_entries[0].binding = 0; static_bg_entries[0].textureView = input_view; static_bg_entries[1].binding = 1; @@ -872,10 +890,12 @@ static bool process_cnn_v2(WGPUDevice device, WGPUQueue queue, static_bg_entries[5].binding = 5; static_bg_entries[5].buffer = static_params_buffer; static_bg_entries[5].size = sizeof(CNNv2StaticFeatureParams); + static_bg_entries[6].binding = 6; + static_bg_entries[6].sampler = linear_sampler; WGPUBindGroupDescriptor static_bg_desc = {}; static_bg_desc.layout = static_bgl; - static_bg_desc.entryCount = 6; + static_bg_desc.entryCount = 7; static_bg_desc.entries = static_bg_entries; WGPUBindGroup static_bg = wgpuDeviceCreateBindGroup(device, &static_bg_desc); diff --git a/tools/cnn_v2_test/index.html b/tools/cnn_v2_test/index.html index 2ec934d..e226d0c 100644 --- a/tools/cnn_v2_test/index.html +++ b/tools/cnn_v2_test/index.html @@ -104,7 +104,7 @@ background: #404040; } .left-sidebar { - width: 300px; + width: 315px; background: #2a2a2a; overflow-y: auto; display: flex; @@ -135,6 +135,32 @@ border: 1px solid #404040; z-index: 100; } + .bottom-controls-float { + position: absolute; + bottom: 16px; + left: 50%; + transform: translateX(-50%); + display: flex; + gap: 16px; + align-items: center; + background: rgba(42, 42, 42, 0.95); + padding: 8px 16px; + border-radius: 4px; + border: 1px solid #404040; + z-index: 100; + } + .bottom-controls-float .control-group { + display: flex; + gap: 8px; + align-items: center; + } + .bottom-controls-float #videoControls { + display: flex; + gap: 8px; + align-items: center; + padding-right: 16px; + border-right: 1px solid #404040; + } .main.drop-active::after { content: 'Drop PNG/video here'; position: absolute; @@ -312,19 +338,6 @@

CNN v2 Testing Tool

-
-
- - - 1.0 -
-
- - - 1.0 -
- -
@@ -358,10 +371,23 @@
-
- - - +
+
+ + + +
+
+ + + 1.0 +
+
+ + + 1.0 +
+
@@ -409,7 +435,7 @@ fn vs_main(@builtin(vertex_index) idx: u32) -> @builtin(position) vec4 { // Static features: 7D parametric features (RGBD + UV + sin(10*uv_x) + bias) const STATIC_SHADER = ` @group(0) @binding(0) var input_tex: texture_2d; -@group(0) @binding(1) var point_sampler: sampler; +@group(0) @binding(1) var linear_sampler: sampler; @group(0) @binding(2) var depth_tex: texture_2d; @group(0) @binding(3) var output_tex: texture_storage_2d; @group(0) @binding(4) var mip_level: u32; @@ -420,9 +446,9 @@ fn main(@builtin(global_invocation_id) id: vec3) { let dims = textureDimensions(input_tex); if (coord.x >= i32(dims.x) || coord.y >= i32(dims.y)) { return; } - // Use normalized UV coords with point sampler (no filtering) + // Use normalized UV coords with linear sampler (bilinear filtering) let uv = (vec2(coord) + 0.5) / vec2(dims); - let rgba = textureSampleLevel(input_tex, point_sampler, uv, f32(mip_level)); + let rgba = textureSampleLevel(input_tex, linear_sampler, uv, f32(mip_level)); let p0 = rgba.r; let p1 = rgba.g; @@ -1114,8 +1140,6 @@ class CNNTester { if (!source) return; const { width, height } = this.getDimensions(); - this.log(`Running CNN pipeline (${this.weights.layers.length} layers)...`); - this.context.configure({ device: this.device, format: this.format }); // Create persistent input texture for original view with mipmaps @@ -1203,9 +1227,9 @@ class CNNTester { if (!this.pointSampler) { this.pointSampler = this.device.createSampler({ - magFilter: 'nearest', - minFilter: 'nearest', - mipmapFilter: 'nearest' + magFilter: 'linear', + minFilter: 'linear', + mipmapFilter: 'linear' }); } @@ -1464,8 +1488,6 @@ class CNNTester { const layerTex = this.layerOutputs[layerIdx]; const { width, height } = this.getDimensions(); - this.log(`Visualizing ${layerName} activations (${width}×${height})`); - // Update channel labels based on layer type // Static features (layerIdx=0): 8 channels split into two views // CNN layers (layerIdx≥1): 4 channels per layer @@ -1519,7 +1541,6 @@ class CNNTester { try { ctx.configure({ device: this.device, format: this.format }); - this.log(`Canvas ${c}: ${width}×${height}, WebGPU context configured`); } catch (e) { this.log(`Failed to configure canvas ${c}: ${e.message}`, 'error'); continue; @@ -1559,12 +1580,10 @@ class CNNTester { renderPass.end(); this.device.queue.submit([encoder.finish()]); - this.log(`Submitted render for channel ${c}`); } // Wait for all renders to complete await this.device.queue.onSubmittedWorkDone(); - this.log(`Rendered 4 channels for ${layerName}`); // Update active channel highlighting and preview this.updateChannelSelection(); @@ -1666,7 +1685,6 @@ class CNNTester { if (btn) btn.classList.add('active'); const { kernelSize, inChannels, outChannels, weightOffset, min, max } = layer; - this.log(`Visualizing Layer ${cnnLayerIdx} weights: ${inChannels}→${outChannels}, ${kernelSize}×${kernelSize}`); const canvas = document.getElementById('weightsCanvas'); const ctx = canvas.getContext('2d', { willReadFrequently: false }); @@ -1706,8 +1724,6 @@ class CNNTester { } } } - - this.log(`Rendered ${outChannels} output channels (${width}×${height}px)`); } getWeightValue(idx) { diff --git a/training/layers/chk_10000_3x5x3x3.pt b/training/layers/chk_10000_3x5x3x3.pt new file mode 100644 index 0000000..6e6750c Binary files /dev/null and b/training/layers/chk_10000_3x5x3x3.pt differ diff --git a/workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl b/workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl index 7b08132..63fafa8 100644 --- a/workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl +++ b/workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl @@ -20,6 +20,7 @@ struct StaticFeatureParams { @group(0) @binding(3) var depth_tex: texture_2d; @group(0) @binding(4) var output_tex: texture_storage_2d; @group(0) @binding(5) var params: StaticFeatureParams; +@group(0) @binding(6) var linear_sampler: sampler; @compute @workgroup_size(8, 8) fn main(@builtin(global_invocation_id) id: vec3) { @@ -30,17 +31,19 @@ fn main(@builtin(global_invocation_id) id: vec3) { return; } - // Parametric features (p0-p3) - sample from specified mip level + // Parametric features (p0-p3) - bilinear sample from specified mip level + // Use UV coordinates for bilinear interpolation + let uv = (vec2(coord) + 0.5) / vec2(dims); var rgba: vec4; if (params.mip_level == 0u) { - rgba = textureLoad(input_tex, coord, 0); + rgba = textureSample(input_tex, linear_sampler, uv); } else if (params.mip_level == 1u) { - rgba = textureLoad(input_tex_mip1, coord, 0); + rgba = textureSample(input_tex_mip1, linear_sampler, uv); } else if (params.mip_level == 2u) { - rgba = textureLoad(input_tex_mip2, coord, 0); + rgba = textureSample(input_tex_mip2, linear_sampler, uv); } else { // Mip 3 or higher: use mip 2 as fallback - rgba = textureLoad(input_tex_mip2, coord, 0); + rgba = textureSample(input_tex_mip2, linear_sampler, uv); } let p0 = rgba.r; -- cgit v1.2.3