summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorskal <pascal.massimino@gmail.com>2026-02-14 07:22:17 +0100
committerskal <pascal.massimino@gmail.com>2026-02-14 07:24:51 +0100
commit0f53ed1ed8ed7c07cd7ea8e88e21b5be5d5494e5 (patch)
tree0e1a8426c16e7c89b83038d5b90bb9d94c6d06e5
parent8dd77545b5ec2f45ce46b98dd7d94a3c4a13e290 (diff)
CNN v2: bilinear mip-level sampling and UI improvements
**CNN v2 Changes:** - Replace point sampling with bilinear interpolation for mip-level features - Add linear sampler (binding 6) to static features shader - Update CNNv2Effect, cnn_test, and HTML tool **HTML Tool UI:** - Move controls to floating bottom bar in central view - Consolidate video controls + Blend/Depth/Save PNG in single container - Increase left panel width: 300px → 315px (+5%) - Remove per-frame debug messages (visualization, rendering logs) **Technical:** - WGSL: textureSample() with linear_sampler vs textureLoad() - C++: Create WGPUSampler with Linear filtering - HTML: Change sampler from 'nearest' to 'linear' handoff(Claude): CNN v2 now uses bilinear mip-level sampling across all tools
-rw-r--r--.gitignore1
-rw-r--r--src/gpu/effects/cnn_v2_effect.cc36
-rw-r--r--src/gpu/effects/cnn_v2_effect.h1
-rw-r--r--tools/cnn_test.cc30
-rw-r--r--tools/cnn_v2_test/index.html84
-rw-r--r--training/layers/chk_10000_3x5x3x3.ptbin0 -> 5092 bytes
-rw-r--r--workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl13
7 files changed, 116 insertions, 49 deletions
diff --git a/.gitignore b/.gitignore
index 4c783d7..41d0683 100644
--- a/.gitignore
+++ b/.gitignore
@@ -68,4 +68,5 @@ timeline.txt
timeline.html
Testing/
training/checkpoints/
+checkpoints/
validation_results/
diff --git a/src/gpu/effects/cnn_v2_effect.cc b/src/gpu/effects/cnn_v2_effect.cc
index 366a232..d412154 100644
--- a/src/gpu/effects/cnn_v2_effect.cc
+++ b/src/gpu/effects/cnn_v2_effect.cc
@@ -19,6 +19,7 @@ CNNv2Effect::CNNv2Effect(const GpuContext& ctx)
static_params_buffer_(nullptr),
static_features_tex_(nullptr),
static_features_view_(nullptr),
+ linear_sampler_(nullptr),
layer_pipeline_(nullptr),
weights_buffer_(nullptr),
input_mip_tex_(nullptr),
@@ -36,6 +37,7 @@ CNNv2Effect::CNNv2Effect(const GpuContext& ctx, const CNNv2EffectParams& params)
static_params_buffer_(nullptr),
static_features_tex_(nullptr),
static_features_view_(nullptr),
+ linear_sampler_(nullptr),
layer_pipeline_(nullptr),
weights_buffer_(nullptr),
input_mip_tex_(nullptr),
@@ -221,6 +223,20 @@ void CNNv2Effect::create_textures() {
}
void CNNv2Effect::create_pipelines() {
+ // Create linear sampler for bilinear interpolation
+ WGPUSamplerDescriptor sampler_desc = {};
+ sampler_desc.addressModeU = WGPUAddressMode_ClampToEdge;
+ sampler_desc.addressModeV = WGPUAddressMode_ClampToEdge;
+ sampler_desc.addressModeW = WGPUAddressMode_ClampToEdge;
+ sampler_desc.magFilter = WGPUFilterMode_Linear;
+ sampler_desc.minFilter = WGPUFilterMode_Linear;
+ sampler_desc.mipmapFilter = WGPUMipmapFilterMode_Linear;
+ sampler_desc.lodMinClamp = 0.0f;
+ sampler_desc.lodMaxClamp = 32.0f;
+ sampler_desc.maxAnisotropy = 1;
+
+ linear_sampler_ = wgpuDeviceCreateSampler(ctx_.device, &sampler_desc);
+
// Static features compute pipeline
size_t shader_size = 0;
const char* static_code = (const char*)GetAsset(AssetId::ASSET_SHADER_CNN_V2_STATIC, &shader_size);
@@ -238,8 +254,8 @@ void CNNv2Effect::create_pipelines() {
shader_desc.nextInChain = &wgsl_src.chain;
// Create bind group layout for static features compute
- // Bindings: 0=input_tex, 1=input_mip1, 2=input_mip2, 3=depth_tex, 4=output, 5=params
- WGPUBindGroupLayoutEntry bgl_entries[6] = {};
+ // Bindings: 0=input_tex, 1=input_mip1, 2=input_mip2, 3=depth_tex, 4=output, 5=params, 6=linear_sampler
+ WGPUBindGroupLayoutEntry bgl_entries[7] = {};
// Binding 0: Input texture (mip 0)
bgl_entries[0].binding = 0;
@@ -278,8 +294,13 @@ void CNNv2Effect::create_pipelines() {
bgl_entries[5].buffer.type = WGPUBufferBindingType_Uniform;
bgl_entries[5].buffer.minBindingSize = sizeof(StaticFeatureParams);
+ // Binding 6: Linear sampler (for bilinear interpolation)
+ bgl_entries[6].binding = 6;
+ bgl_entries[6].visibility = WGPUShaderStage_Compute;
+ bgl_entries[6].sampler.type = WGPUSamplerBindingType_Filtering;
+
WGPUBindGroupLayoutDescriptor bgl_desc = {};
- bgl_desc.entryCount = 6;
+ bgl_desc.entryCount = 7;
bgl_desc.entries = bgl_entries;
WGPUBindGroupLayout static_bgl = wgpuDeviceCreateBindGroupLayout(ctx_.device, &bgl_desc);
@@ -398,7 +419,7 @@ void CNNv2Effect::update_bind_group(WGPUTextureView input_view) {
}
// Create bind group for static features compute
- WGPUBindGroupEntry bg_entries[6] = {};
+ WGPUBindGroupEntry bg_entries[7] = {};
// Binding 0: Input (mip 0)
bg_entries[0].binding = 0;
@@ -425,9 +446,13 @@ void CNNv2Effect::update_bind_group(WGPUTextureView input_view) {
bg_entries[5].buffer = static_params_buffer_;
bg_entries[5].size = sizeof(StaticFeatureParams);
+ // Binding 6: Linear sampler
+ bg_entries[6].binding = 6;
+ bg_entries[6].sampler = linear_sampler_;
+
WGPUBindGroupDescriptor bg_desc = {};
bg_desc.layout = wgpuComputePipelineGetBindGroupLayout(static_pipeline_, 0);
- bg_desc.entryCount = 6;
+ bg_desc.entryCount = 7;
bg_desc.entries = bg_entries;
static_bind_group_ = wgpuDeviceCreateBindGroup(ctx_.device, &bg_desc);
@@ -563,6 +588,7 @@ void CNNv2Effect::cleanup() {
if (static_bind_group_) wgpuBindGroupRelease(static_bind_group_);
if (static_params_buffer_) wgpuBufferRelease(static_params_buffer_);
if (static_pipeline_) wgpuComputePipelineRelease(static_pipeline_);
+ if (linear_sampler_) wgpuSamplerRelease(linear_sampler_);
if (layer_pipeline_) wgpuComputePipelineRelease(layer_pipeline_);
if (weights_buffer_) wgpuBufferRelease(weights_buffer_);
diff --git a/src/gpu/effects/cnn_v2_effect.h b/src/gpu/effects/cnn_v2_effect.h
index 8a2e1b6..d530d3b 100644
--- a/src/gpu/effects/cnn_v2_effect.h
+++ b/src/gpu/effects/cnn_v2_effect.h
@@ -64,6 +64,7 @@ private:
WGPUBuffer static_params_buffer_;
WGPUTexture static_features_tex_;
WGPUTextureView static_features_view_;
+ WGPUSampler linear_sampler_;
// CNN layers (storage buffer architecture)
WGPUComputePipeline layer_pipeline_; // Single pipeline for all layers
diff --git a/tools/cnn_test.cc b/tools/cnn_test.cc
index c504c3d..b4a4bdc 100644
--- a/tools/cnn_test.cc
+++ b/tools/cnn_test.cc
@@ -784,6 +784,20 @@ static bool process_cnn_v2(WGPUDevice device, WGPUQueue queue,
wgpuQueueWriteBuffer(queue, static_params_buffer, 0, &static_params,
sizeof(static_params));
+ // Create linear sampler for bilinear interpolation
+ WGPUSamplerDescriptor linear_sampler_desc = {};
+ linear_sampler_desc.addressModeU = WGPUAddressMode_ClampToEdge;
+ linear_sampler_desc.addressModeV = WGPUAddressMode_ClampToEdge;
+ linear_sampler_desc.addressModeW = WGPUAddressMode_ClampToEdge;
+ linear_sampler_desc.magFilter = WGPUFilterMode_Linear;
+ linear_sampler_desc.minFilter = WGPUFilterMode_Linear;
+ linear_sampler_desc.mipmapFilter = WGPUMipmapFilterMode_Linear;
+ linear_sampler_desc.lodMinClamp = 0.0f;
+ linear_sampler_desc.lodMaxClamp = 32.0f;
+ linear_sampler_desc.maxAnisotropy = 1;
+
+ WGPUSampler linear_sampler = wgpuDeviceCreateSampler(device, &linear_sampler_desc);
+
// Create static features compute pipeline
WGPUShaderSourceWGSL static_wgsl = {};
static_wgsl.chain.sType = WGPUSType_ShaderSourceWGSL;
@@ -796,8 +810,8 @@ static bool process_cnn_v2(WGPUDevice device, WGPUQueue queue,
wgpuDeviceCreateShaderModule(device, &static_module_desc);
// Bind group layout: 0=input, 1=input_mip1, 2=input_mip2, 3=depth, 4=output,
- // 5=params
- WGPUBindGroupLayoutEntry static_bgl_entries[6] = {};
+ // 5=params, 6=linear_sampler
+ WGPUBindGroupLayoutEntry static_bgl_entries[7] = {};
static_bgl_entries[0].binding = 0;
static_bgl_entries[0].visibility = WGPUShaderStage_Compute;
static_bgl_entries[0].texture.sampleType = WGPUTextureSampleType_Float;
@@ -832,8 +846,12 @@ static bool process_cnn_v2(WGPUDevice device, WGPUQueue queue,
static_bgl_entries[5].buffer.minBindingSize =
sizeof(CNNv2StaticFeatureParams);
+ static_bgl_entries[6].binding = 6;
+ static_bgl_entries[6].visibility = WGPUShaderStage_Compute;
+ static_bgl_entries[6].sampler.type = WGPUSamplerBindingType_Filtering;
+
WGPUBindGroupLayoutDescriptor static_bgl_desc = {};
- static_bgl_desc.entryCount = 6;
+ static_bgl_desc.entryCount = 7;
static_bgl_desc.entries = static_bgl_entries;
WGPUBindGroupLayout static_bgl =
@@ -858,7 +876,7 @@ static bool process_cnn_v2(WGPUDevice device, WGPUQueue queue,
wgpuPipelineLayoutRelease(static_pl);
// Create static bind group (use input as all mips for simplicity)
- WGPUBindGroupEntry static_bg_entries[6] = {};
+ WGPUBindGroupEntry static_bg_entries[7] = {};
static_bg_entries[0].binding = 0;
static_bg_entries[0].textureView = input_view;
static_bg_entries[1].binding = 1;
@@ -872,10 +890,12 @@ static bool process_cnn_v2(WGPUDevice device, WGPUQueue queue,
static_bg_entries[5].binding = 5;
static_bg_entries[5].buffer = static_params_buffer;
static_bg_entries[5].size = sizeof(CNNv2StaticFeatureParams);
+ static_bg_entries[6].binding = 6;
+ static_bg_entries[6].sampler = linear_sampler;
WGPUBindGroupDescriptor static_bg_desc = {};
static_bg_desc.layout = static_bgl;
- static_bg_desc.entryCount = 6;
+ static_bg_desc.entryCount = 7;
static_bg_desc.entries = static_bg_entries;
WGPUBindGroup static_bg = wgpuDeviceCreateBindGroup(device, &static_bg_desc);
diff --git a/tools/cnn_v2_test/index.html b/tools/cnn_v2_test/index.html
index 2ec934d..e226d0c 100644
--- a/tools/cnn_v2_test/index.html
+++ b/tools/cnn_v2_test/index.html
@@ -104,7 +104,7 @@
background: #404040;
}
.left-sidebar {
- width: 300px;
+ width: 315px;
background: #2a2a2a;
overflow-y: auto;
display: flex;
@@ -135,6 +135,32 @@
border: 1px solid #404040;
z-index: 100;
}
+ .bottom-controls-float {
+ position: absolute;
+ bottom: 16px;
+ left: 50%;
+ transform: translateX(-50%);
+ display: flex;
+ gap: 16px;
+ align-items: center;
+ background: rgba(42, 42, 42, 0.95);
+ padding: 8px 16px;
+ border-radius: 4px;
+ border: 1px solid #404040;
+ z-index: 100;
+ }
+ .bottom-controls-float .control-group {
+ display: flex;
+ gap: 8px;
+ align-items: center;
+ }
+ .bottom-controls-float #videoControls {
+ display: flex;
+ gap: 8px;
+ align-items: center;
+ padding-right: 16px;
+ border-right: 1px solid #404040;
+ }
.main.drop-active::after {
content: 'Drop PNG/video here';
position: absolute;
@@ -312,19 +338,6 @@
<body>
<div class="header">
<h1>CNN v2 Testing Tool</h1>
- <div class="controls">
- <div class="control-group">
- <label>Blend:</label>
- <input type="range" id="blend" min="0" max="1" step="0.01" value="1.0">
- <span id="blendValue">1.0</span>
- </div>
- <div class="control-group">
- <label>Depth:</label>
- <input type="range" id="depth" min="0" max="1" step="0.01" value="1.0">
- <span id="depthValue">1.0</span>
- </div>
- <button id="savePngBtn">Save PNG</button>
- </div>
</div>
<video id="videoSource" muted loop></video>
<div class="content">
@@ -358,10 +371,23 @@
</div>
</div>
<div class="main" id="mainDrop">
- <div class="video-controls-float" id="videoControls">
- <button id="playPauseBtn" disabled>Play</button>
- <button id="stepBackBtn" disabled>◄ Frame</button>
- <button id="stepForwardBtn" disabled>Frame ►</button>
+ <div class="bottom-controls-float">
+ <div id="videoControls">
+ <button id="playPauseBtn" disabled>Play</button>
+ <button id="stepBackBtn" disabled>◄ Frame</button>
+ <button id="stepForwardBtn" disabled>Frame ►</button>
+ </div>
+ <div class="control-group">
+ <label>Blend:</label>
+ <input type="range" id="blend" min="0" max="1" step="0.01" value="1.0">
+ <span id="blendValue">1.0</span>
+ </div>
+ <div class="control-group">
+ <label>Depth:</label>
+ <input type="range" id="depth" min="0" max="1" step="0.01" value="1.0">
+ <span id="depthValue">1.0</span>
+ </div>
+ <button id="savePngBtn">Save PNG</button>
</div>
<canvas id="canvas"></canvas>
</div>
@@ -409,7 +435,7 @@ fn vs_main(@builtin(vertex_index) idx: u32) -> @builtin(position) vec4<f32> {
// Static features: 7D parametric features (RGBD + UV + sin(10*uv_x) + bias)
const STATIC_SHADER = `
@group(0) @binding(0) var input_tex: texture_2d<f32>;
-@group(0) @binding(1) var point_sampler: sampler;
+@group(0) @binding(1) var linear_sampler: sampler;
@group(0) @binding(2) var depth_tex: texture_2d<f32>;
@group(0) @binding(3) var output_tex: texture_storage_2d<rgba32uint, write>;
@group(0) @binding(4) var<uniform> mip_level: u32;
@@ -420,9 +446,9 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
let dims = textureDimensions(input_tex);
if (coord.x >= i32(dims.x) || coord.y >= i32(dims.y)) { return; }
- // Use normalized UV coords with point sampler (no filtering)
+ // Use normalized UV coords with linear sampler (bilinear filtering)
let uv = (vec2<f32>(coord) + 0.5) / vec2<f32>(dims);
- let rgba = textureSampleLevel(input_tex, point_sampler, uv, f32(mip_level));
+ let rgba = textureSampleLevel(input_tex, linear_sampler, uv, f32(mip_level));
let p0 = rgba.r;
let p1 = rgba.g;
@@ -1114,8 +1140,6 @@ class CNNTester {
if (!source) return;
const { width, height } = this.getDimensions();
- this.log(`Running CNN pipeline (${this.weights.layers.length} layers)...`);
-
this.context.configure({ device: this.device, format: this.format });
// Create persistent input texture for original view with mipmaps
@@ -1203,9 +1227,9 @@ class CNNTester {
if (!this.pointSampler) {
this.pointSampler = this.device.createSampler({
- magFilter: 'nearest',
- minFilter: 'nearest',
- mipmapFilter: 'nearest'
+ magFilter: 'linear',
+ minFilter: 'linear',
+ mipmapFilter: 'linear'
});
}
@@ -1464,8 +1488,6 @@ class CNNTester {
const layerTex = this.layerOutputs[layerIdx];
const { width, height } = this.getDimensions();
- this.log(`Visualizing ${layerName} activations (${width}×${height})`);
-
// Update channel labels based on layer type
// Static features (layerIdx=0): 8 channels split into two views
// CNN layers (layerIdx≥1): 4 channels per layer
@@ -1519,7 +1541,6 @@ class CNNTester {
try {
ctx.configure({ device: this.device, format: this.format });
- this.log(`Canvas ${c}: ${width}×${height}, WebGPU context configured`);
} catch (e) {
this.log(`Failed to configure canvas ${c}: ${e.message}`, 'error');
continue;
@@ -1559,12 +1580,10 @@ class CNNTester {
renderPass.end();
this.device.queue.submit([encoder.finish()]);
- this.log(`Submitted render for channel ${c}`);
}
// Wait for all renders to complete
await this.device.queue.onSubmittedWorkDone();
- this.log(`Rendered 4 channels for ${layerName}`);
// Update active channel highlighting and preview
this.updateChannelSelection();
@@ -1666,7 +1685,6 @@ class CNNTester {
if (btn) btn.classList.add('active');
const { kernelSize, inChannels, outChannels, weightOffset, min, max } = layer;
- this.log(`Visualizing Layer ${cnnLayerIdx} weights: ${inChannels}→${outChannels}, ${kernelSize}×${kernelSize}`);
const canvas = document.getElementById('weightsCanvas');
const ctx = canvas.getContext('2d', { willReadFrequently: false });
@@ -1706,8 +1724,6 @@ class CNNTester {
}
}
}
-
- this.log(`Rendered ${outChannels} output channels (${width}×${height}px)`);
}
getWeightValue(idx) {
diff --git a/training/layers/chk_10000_3x5x3x3.pt b/training/layers/chk_10000_3x5x3x3.pt
new file mode 100644
index 0000000..6e6750c
--- /dev/null
+++ b/training/layers/chk_10000_3x5x3x3.pt
Binary files differ
diff --git a/workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl b/workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl
index 7b08132..63fafa8 100644
--- a/workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl
+++ b/workspaces/main/shaders/cnn_v2/cnn_v2_static.wgsl
@@ -20,6 +20,7 @@ struct StaticFeatureParams {
@group(0) @binding(3) var depth_tex: texture_2d<f32>;
@group(0) @binding(4) var output_tex: texture_storage_2d<rgba32uint, write>;
@group(0) @binding(5) var<uniform> params: StaticFeatureParams;
+@group(0) @binding(6) var linear_sampler: sampler;
@compute @workgroup_size(8, 8)
fn main(@builtin(global_invocation_id) id: vec3<u32>) {
@@ -30,17 +31,19 @@ fn main(@builtin(global_invocation_id) id: vec3<u32>) {
return;
}
- // Parametric features (p0-p3) - sample from specified mip level
+ // Parametric features (p0-p3) - bilinear sample from specified mip level
+ // Use UV coordinates for bilinear interpolation
+ let uv = (vec2<f32>(coord) + 0.5) / vec2<f32>(dims);
var rgba: vec4<f32>;
if (params.mip_level == 0u) {
- rgba = textureLoad(input_tex, coord, 0);
+ rgba = textureSample(input_tex, linear_sampler, uv);
} else if (params.mip_level == 1u) {
- rgba = textureLoad(input_tex_mip1, coord, 0);
+ rgba = textureSample(input_tex_mip1, linear_sampler, uv);
} else if (params.mip_level == 2u) {
- rgba = textureLoad(input_tex_mip2, coord, 0);
+ rgba = textureSample(input_tex_mip2, linear_sampler, uv);
} else {
// Mip 3 or higher: use mip 2 as fallback
- rgba = textureLoad(input_tex_mip2, coord, 0);
+ rgba = textureSample(input_tex_mip2, linear_sampler, uv);
}
let p0 = rgba.r;