fix: CNN bias accumulation and output format improvements

- Fix bias division bug: divide by num_positions to compensate for shader loop accumulation (affects all layers) - train_cnn.py: Save RGBA output preserving alpha channel from input - Add --debug-hex flag to both tools for pixel-level debugging - Remove sRGB/linear_png debug code from cnn_test - Regenerate weights with corrected bias export Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
author: skal <pascal.massimino@gmail.com> 2026-02-11 23:13:43 +0100
committer: skal <pascal.massimino@gmail.com> 2026-02-11 23:13:43 +0100
commit: 8ff8c56cd68d9e785cf6cb36ce1fc2bdc54ac15a (patch)
tree: 09f44369f1926a8315c7d06ce050cd51d49c91a6
parent: 3530fcd7414ea24c8916adc1e490f71c02ac96f1 (diff)
3 files changed, 157 insertions, 90 deletions
diff --git a/tools/cnn_test.cc b/tools/cnn_test.cc
index fa4394f..59f1d22 100644
--- a/tools/cnn_test.cc
+++ b/tools/cnn_test.cc
@@ -43,6 +43,7 @@ struct Args {
   bool output_png = true; // Default to PNG
   const char* save_intermediates = nullptr;
   int num_layers = 3; // Default to 3 layers
+  bool debug_hex = false; // Print first 8 pixels as hex
 };
 
 // Parse command-line arguments
@@ -80,6 +81,8 @@ static bool parse_args(int argc, char** argv, Args* args) {
         fprintf(stderr, "Error: layers must be in range [1, 10]\n");
         return false;
       }
+    } else if (strcmp(argv[i], "--debug-hex") == 0) {
+      args->debug_hex = true;
     } else if (strcmp(argv[i], "--help") == 0) {
       return false;
     } else {
@@ -99,6 +102,7 @@ static void print_usage(const char* prog) {
   fprintf(stderr, "  --format ppm|png         Output format (default: png)\n");
   fprintf(stderr, "  --layers N               Number of CNN layers (1-10, default: 3)\n");
   fprintf(stderr, "  --save-intermediates DIR Save intermediate layers to directory\n");
+  fprintf(stderr, "  --debug-hex              Print first 8 pixels as hex (debug)\n");
   fprintf(stderr, "  --help                   Show this help\n");
 }
 
@@ -437,6 +441,18 @@ int main(int argc, char** argv) {
       printf("Reading pixels from GPU...\n");
       std::vector<uint8_t> pixels = rt.read_pixels();
 
+      // Debug: print first 8 pixels as hex
+      if (args.debug_hex && !pixels.empty()) {
+        printf("First 8 pixels (BGRA hex):\n");
+        for (int i = 0; i < 8 && i < width * height; ++i) {
+          const uint8_t b = pixels[i * 4 + 0];
+          const uint8_t g = pixels[i * 4 + 1];
+          const uint8_t r = pixels[i * 4 + 2];
+          const uint8_t a = pixels[i * 4 + 3];
+          printf("  [%d] 0x%02X%02X%02X%02X (RGBA)\n", i, r, g, b, a);
+        }
+      }
+
       if (pixels.empty()) {
         fprintf(stderr, "Error: GPU readback failed\n");
         wgpuTextureViewRelease(intermediate_views[0]);
@@ -513,6 +529,18 @@ int main(int argc, char** argv) {
         std::vector<uint8_t> pixels = texture_readback_fp16_to_u8(
             device, queue, intermediate_textures[dst_idx], width, height);
 
+        // Debug: print first 8 pixels as hex
+        if (args.debug_hex && !pixels.empty()) {
+          printf("Layer %d first 8 pixels (BGRA hex):\n", layer);
+          for (int i = 0; i < 8 && i < width * height; ++i) {
+            const uint8_t b = pixels[i * 4 + 0];
+            const uint8_t g = pixels[i * 4 + 1];
+            const uint8_t r = pixels[i * 4 + 2];
+            const uint8_t a = pixels[i * 4 + 3];
+            printf("  [%d] 0x%02X%02X%02X%02X (RGBA)\n", i, r, g, b, a);
+          }
+        }
+
         if (!pixels.empty()) {
           save_png(layer_path, pixels, width, height);
         } else {
diff --git a/training/train_cnn.py b/training/train_cnn.py
index 1ea42a3..c775325 100755
--- a/training/train_cnn.py
+++ b/training/train_cnn.py
@@ -378,7 +378,7 @@ def export_weights_to_wgsl(model, output_path, kernel_sizes):
                     v0 = [f"{weights[0, in_c, row, col]:.6f}" for in_c in range(4)]
                     # Second vec4: [w4, w5, w6, bias] (uv, gray, 1)
                     v1 = [f"{weights[0, in_c, row, col]:.6f}" for in_c in range(4, 7)]
-                    v1.append(f"{bias[0]:.6f}")
+                    v1.append(f"{bias[0] / num_positions:.6f}")
                     f.write(f"  vec4<f32>({', '.join(v0)}),\n")
                     f.write(f"  vec4<f32>({', '.join(v1)})")
                     f.write(",\n" if pos < num_positions-1 else "\n")
@@ -395,7 +395,7 @@ def export_weights_to_wgsl(model, output_path, kernel_sizes):
                         v0 = [f"{weights[out_c, in_c, row, col]:.6f}" for in_c in range(4)]
                         # Second vec4: [w4, w5, w6, bias] (uv, gray, 1)
                         v1 = [f"{weights[out_c, in_c, row, col]:.6f}" for in_c in range(4, 7)]
-                        v1.append(f"{bias[out_c]:.6f}")
+                        v1.append(f"{bias[out_c] / num_positions:.6f}")
                         idx = (pos * 4 + out_c) * 2
                         f.write(f"  vec4<f32>({', '.join(v0)}),\n")
                         f.write(f"  vec4<f32>({', '.join(v1)})")
@@ -776,7 +776,7 @@ def export_from_checkpoint(checkpoint_path, output_path=None):
     print("Export complete!")
 
 
-def infer_from_checkpoint(checkpoint_path, input_path, output_path, patch_size=32, save_intermediates=None):
+def infer_from_checkpoint(checkpoint_path, input_path, output_path, patch_size=32, save_intermediates=None, zero_weights=False, debug_hex=False):
     """Run sliding-window inference to match WGSL shader behavior"""
 
     if not os.path.exists(checkpoint_path):
@@ -796,6 +796,15 @@ def infer_from_checkpoint(checkpoint_path, input_path, output_path, patch_size=3
         kernel_sizes=checkpoint['kernel_sizes']
     )
     model.load_state_dict(checkpoint['model_state'])
+
+    # Debug: Zero out all weights and biases
+    if zero_weights:
+        print("DEBUG: Zeroing out all weights and biases")
+        for layer in model.layers:
+            with torch.no_grad():
+                layer.weight.zero_()
+                layer.bias.zero_()
+
     model.eval()
 
     # Load image
@@ -815,10 +824,23 @@ def infer_from_checkpoint(checkpoint_path, input_path, output_path, patch_size=3
     # Convert to numpy
     output = output_tensor.squeeze(0).permute(1, 2, 0).numpy()
 
-    # Save final output
+    # Append alpha channel from input
+    alpha = img_tensor[0, 3:4, :, :].permute(1, 2, 0).numpy()  # [H,W,1]
+    output_rgba = np.concatenate([output, alpha], axis=2)  # [H,W,4]
+
+    # Debug: print first 8 pixels as hex
+    if debug_hex:
+        output_u8 = (output_rgba * 255).astype(np.uint8)
+        print("First 8 pixels (RGBA hex):")
+        for i in range(min(8, output_u8.shape[0] * output_u8.shape[1])):
+            y, x = i // output_u8.shape[1], i % output_u8.shape[1]
+            r, g, b, a = output_u8[y, x]
+            print(f"  [{i}] 0x{r:02X}{g:02X}{b:02X}{a:02X}")
+
+    # Save final output as RGBA
     print(f"Saving output to: {output_path}")
     os.makedirs(os.path.dirname(output_path) if os.path.dirname(output_path) else '.', exist_ok=True)
-    output_img = Image.fromarray((output * 255).astype(np.uint8))
+    output_img = Image.fromarray((output_rgba * 255).astype(np.uint8), mode='RGBA')
     output_img.save(output_path)
 
     # Save intermediates if requested
@@ -828,10 +850,25 @@ def infer_from_checkpoint(checkpoint_path, input_path, output_path, patch_size=3
         for layer_idx, layer_tensor in enumerate(intermediates):
             # Convert [-1,1] to [0,1] for visualization
             layer_data = (layer_tensor.squeeze(0).permute(1, 2, 0).numpy() + 1.0) * 0.5
-            # Take first channel for 4-channel intermediate layers
+            layer_u8 = (layer_data.clip(0, 1) * 255).astype(np.uint8)
+
+            # Debug: print first 8 pixels as hex
+            if debug_hex:
+                print(f"Layer {layer_idx} first 8 pixels (RGBA hex):")
+                for i in range(min(8, layer_u8.shape[0] * layer_u8.shape[1])):
+                    y, x = i // layer_u8.shape[1], i % layer_u8.shape[1]
+                    if layer_u8.shape[2] == 4:
+                        r, g, b, a = layer_u8[y, x]
+                        print(f"  [{i}] 0x{r:02X}{g:02X}{b:02X}{a:02X}")
+                    else:
+                        r, g, b = layer_u8[y, x]
+                        print(f"  [{i}] 0x{r:02X}{g:02X}{b:02X}")
+
+            # Save all 4 channels for intermediate layers
             if layer_data.shape[2] == 4:
-                layer_data = layer_data[:, :, :3]  # Show RGB only
-            layer_img = Image.fromarray((layer_data.clip(0, 1) * 255).astype(np.uint8))
+                layer_img = Image.fromarray(layer_u8, mode='RGBA')
+            else:
+                layer_img = Image.fromarray(layer_u8)
             layer_path = os.path.join(save_intermediates, f'layer_{layer_idx}.png')
             layer_img.save(layer_path)
             print(f"  Saved layer {layer_idx} to {layer_path}")
@@ -861,6 +898,8 @@ def main():
     parser.add_argument('--early-stop-patience', type=int, default=0, help='Stop if loss changes less than eps over N epochs (default: 0 = disabled)')
     parser.add_argument('--early-stop-eps', type=float, default=1e-6, help='Loss change threshold for early stopping (default: 1e-6)')
     parser.add_argument('--save-intermediates', help='Directory to save intermediate layer outputs (inference only)')
+    parser.add_argument('--zero-weights', action='store_true', help='Zero out all weights/biases during inference (debug only)')
+    parser.add_argument('--debug-hex', action='store_true', help='Print first 8 pixels as hex (debug only)')
 
     args = parser.parse_args()
 
@@ -872,7 +911,7 @@ def main():
             sys.exit(1)
         output_path = args.output or 'inference_output.png'
         patch_size = args.patch_size or 32
-        infer_from_checkpoint(checkpoint, args.infer, output_path, patch_size, args.save_intermediates)
+        infer_from_checkpoint(checkpoint, args.infer, output_path, patch_size, args.save_intermediates, args.zero_weights, args.debug_hex)
         return
 
     # Export-only mode
diff --git a/workspaces/main/shaders/cnn/cnn_weights_generated.wgsl b/workspaces/main/shaders/cnn/cnn_weights_generated.wgsl
index a2a465c..89b5a06 100644
--- a/workspaces/main/shaders/cnn/cnn_weights_generated.wgsl
+++ b/workspaces/main/shaders/cnn/cnn_weights_generated.wgsl
@@ -3,172 +3,172 @@
 
 const weights_layer0: array<vec4<f32>, 72> = array(
   vec4<f32>(-0.064365, 0.036971, -0.002396, 0.109771),
-  vec4<f32>(-0.072773, -0.119603, -0.123951, 0.273301),
+  vec4<f32>(-0.072773, -0.119603, -0.123951, 0.030367),
   vec4<f32>(-0.236017, 0.095366, -0.091895, 0.268624),
-  vec4<f32>(-0.079571, 0.007580, -0.058172, 0.170957),
+  vec4<f32>(-0.079571, 0.007580, -0.058172, 0.018995),
   vec4<f32>(0.112710, 0.131221, 0.168938, -0.119702),
-  vec4<f32>(0.007403, -0.219214, 0.131916, 0.006009),
+  vec4<f32>(0.007403, -0.219214, 0.131916, 0.000668),
   vec4<f32>(-0.221084, -0.054396, 0.055713, 0.150345),
-  vec4<f32>(0.032674, 0.016578, 0.033386, 0.260089),
+  vec4<f32>(0.032674, 0.016578, 0.033386, 0.028899),
   vec4<f32>(0.169524, 0.374150, 0.225601, 0.213908),
-  vec4<f32>(-0.137716, -0.103676, 0.367652, 0.273301),
+  vec4<f32>(-0.137716, -0.103676, 0.367652, 0.030367),
   vec4<f32>(0.115163, 0.324975, 0.177730, 0.235134),
-  vec4<f32>(0.027009, -0.008278, 0.299946, 0.170957),
+  vec4<f32>(0.027009, -0.008278, 0.299946, 0.018995),
   vec4<f32>(-0.011976, -0.130614, 0.041849, -0.063980),
-  vec4<f32>(-0.098651, -0.113425, -0.001081, 0.006009),
+  vec4<f32>(-0.098651, -0.113425, -0.001081, 0.000668),
   vec4<f32>(0.078393, 0.067415, 0.032002, 0.036014),
-  vec4<f32>(0.024718, -0.130110, 0.077101, 0.260089),
+  vec4<f32>(0.024718, -0.130110, 0.077101, 0.028899),
   vec4<f32>(0.218265, 0.276989, 0.397049, 0.167719),
-  vec4<f32>(0.103226, -0.044606, 0.215080, 0.273301),
+  vec4<f32>(0.103226, -0.044606, 0.215080, 0.030367),
   vec4<f32>(-0.039886, 0.070243, 0.398847, 0.200997),
-  vec4<f32>(0.065451, 0.010958, 0.123146, 0.170957),
+  vec4<f32>(0.065451, 0.010958, 0.123146, 0.018995),
   vec4<f32>(-0.137112, -0.237932, -0.104421, 0.001620),
-  vec4<f32>(-0.124844, -0.134461, -0.069051, 0.006009),
+  vec4<f32>(-0.124844, -0.134461, -0.069051, 0.000668),
   vec4<f32>(0.208783, 0.066374, 0.311512, 0.247299),
-  vec4<f32>(0.050053, 0.085183, 0.228604, 0.260089),
+  vec4<f32>(0.050053, 0.085183, 0.228604, 0.028899),
   vec4<f32>(0.026044, -0.080923, -0.109507, 0.237600),
-  vec4<f32>(-0.110202, 0.117810, -0.098755, 0.273301),
+  vec4<f32>(-0.110202, 0.117810, -0.098755, 0.030367),
   vec4<f32>(-0.105352, -0.026309, 0.015819, 0.201850),
-  vec4<f32>(0.071904, -0.018016, -0.059729, 0.170957),
+  vec4<f32>(0.071904, -0.018016, -0.059729, 0.018995),
   vec4<f32>(0.184291, -0.040453, -0.005354, -0.180611),
-  vec4<f32>(0.048572, -0.114292, 0.055505, 0.006009),
+  vec4<f32>(0.048572, -0.114292, 0.055505, 0.000668),
   vec4<f32>(-0.188638, -0.003004, -0.117432, 0.148765),
-  vec4<f32>(-0.014862, -0.119666, -0.033349, 0.260089),
+  vec4<f32>(-0.014862, -0.119666, -0.033349, 0.028899),
   vec4<f32>(0.062853, 0.098261, 0.253355, 0.213134),
-  vec4<f32>(-0.115418, 0.123752, 0.108521, 0.273301),
+  vec4<f32>(-0.115418, 0.123752, 0.108521, 0.030367),
   vec4<f32>(0.341238, 0.300137, 0.254389, 0.272310),
-  vec4<f32>(0.004007, 0.063720, 0.191673, 0.170957),
+  vec4<f32>(0.004007, 0.063720, 0.191673, 0.018995),
   vec4<f32>(0.068615, -0.206535, -0.003347, -0.135647),
-  vec4<f32>(0.067868, -0.211995, -0.024734, 0.006009),
+  vec4<f32>(0.067868, -0.211995, -0.024734, 0.000668),
   vec4<f32>(0.251922, 0.099100, 0.270495, 0.051887),
-  vec4<f32>(-0.006790, 0.086298, 0.195115, 0.260089),
+  vec4<f32>(-0.006790, 0.086298, 0.195115, 0.028899),
   vec4<f32>(0.142018, 0.148073, 0.222099, 0.154902),
-  vec4<f32>(0.127208, 0.015852, 0.117386, 0.273301),
+  vec4<f32>(0.127208, 0.015852, 0.117386, 0.030367),
   vec4<f32>(0.225823, 0.268716, 0.337170, 0.085034),
-  vec4<f32>(-0.019814, -0.022082, 0.102247, 0.170957),
+  vec4<f32>(-0.019814, -0.022082, 0.102247, 0.018995),
   vec4<f32>(-0.033797, -0.054259, -0.045772, -0.171449),
-  vec4<f32>(-0.088982, -0.147155, -0.081125, 0.006009),
+  vec4<f32>(-0.088982, -0.147155, -0.081125, 0.000668),
   vec4<f32>(0.143216, 0.161192, 0.085559, 0.114788),
-  vec4<f32>(0.019867, -0.094182, 0.070013, 0.260089),
+  vec4<f32>(0.019867, -0.094182, 0.070013, 0.028899),
   vec4<f32>(-0.132244, -0.208928, -0.233332, 0.166794),
-  vec4<f32>(0.009434, 0.046124, -0.176356, 0.273301),
+  vec4<f32>(0.009434, 0.046124, -0.176356, 0.030367),
   vec4<f32>(-0.262216, -0.289356, -0.361782, 0.204387),
-  vec4<f32>(-0.049280, -0.054182, -0.178049, 0.170957),
+  vec4<f32>(-0.049280, -0.054182, -0.178049, 0.018995),
   vec4<f32>(0.289035, 0.027330, 0.288493, -0.185146),
-  vec4<f32>(-0.078621, -0.097178, 0.092377, 0.006009),
+  vec4<f32>(-0.078621, -0.097178, 0.092377, 0.000668),
   vec4<f32>(-0.149584, -0.037569, -0.080982, 0.130455),
-  vec4<f32>(0.048648, -0.020057, -0.104357, 0.260089),
+  vec4<f32>(0.048648, -0.020057, -0.104357, 0.028899),
   vec4<f32>(0.069915, -0.044736, -0.214734, 0.125458),
-  vec4<f32>(0.080929, -0.040713, -0.096242, 0.273301),
+  vec4<f32>(0.080929, -0.040713, -0.096242, 0.030367),
   vec4<f32>(0.125697, 0.169932, -0.069403, 0.099432),
-  vec4<f32>(-0.097684, 0.046785, 0.010933, 0.170957),
+  vec4<f32>(-0.097684, 0.046785, 0.010933, 0.018995),
   vec4<f32>(0.123145, 0.053711, 0.183947, -0.086836),
-  vec4<f32>(-0.121956, -0.181722, 0.100617, 0.006009),
+  vec4<f32>(-0.121956, -0.181722, 0.100617, 0.000668),
   vec4<f32>(0.027013, 0.153177, 0.023556, 0.151352),
-  vec4<f32>(-0.154991, -0.001818, 0.136445, 0.260089),
+  vec4<f32>(-0.154991, -0.001818, 0.136445, 0.028899),
   vec4<f32>(-0.036641, -0.003436, -0.066425, 0.123169),
-  vec4<f32>(-0.038525, -0.054057, 0.067107, 0.273301),
+  vec4<f32>(-0.038525, -0.054057, 0.067107, 0.030367),
   vec4<f32>(0.008664, -0.173642, 0.053332, 0.226703),
-  vec4<f32>(-0.079667, -0.041206, -0.013324, 0.170957),
+  vec4<f32>(-0.079667, -0.041206, -0.013324, 0.018995),
   vec4<f32>(-0.120826, -0.216758, -0.047136, -0.029416),
-  vec4<f32>(-0.175910, -0.127141, 0.025666, 0.006009),
+  vec4<f32>(-0.175910, -0.127141, 0.025666, 0.000668),
   vec4<f32>(0.033025, -0.033796, -0.034294, 0.249376),
-  vec4<f32>(0.045212, 0.018668, -0.083031, 0.260089)
+  vec4<f32>(0.045212, 0.018668, -0.083031, 0.028899)
 );
 
 const weights_layer1: array<vec4<f32>, 72> = array(
   vec4<f32>(0.254979, 0.244192, -0.324097, 0.193675),
-  vec4<f32>(-0.216693, -0.037426, 0.137290, 0.124129),
+  vec4<f32>(-0.216693, -0.037426, 0.137290, 0.013792),
   vec4<f32>(-0.237225, -0.270981, 0.141186, -0.200254),
-  vec4<f32>(0.112698, -0.065897, -0.043789, -0.017097),
+  vec4<f32>(0.112698, -0.065897, -0.043789, -0.001900),
   vec4<f32>(-0.003851, 0.048306, -0.180819, 0.001167),
-  vec4<f32>(-0.096826, -0.121978, 0.068031, 0.155805),
+  vec4<f32>(-0.096826, -0.121978, 0.068031, 0.017312),
   vec4<f32>(-0.020896, -0.321049, 0.236152, -0.117393),
-  vec4<f32>(0.166722, 0.114709, -0.040971, -0.035307),
+  vec4<f32>(0.166722, 0.114709, -0.040971, -0.003923),
   vec4<f32>(0.086793, 0.348764, -0.262425, 0.388516),
-  vec4<f32>(-0.003484, 0.049151, 0.089976, 0.124129),
+  vec4<f32>(-0.003484, 0.049151, 0.089976, 0.013792),
   vec4<f32>(-0.080785, -0.197224, 0.172738, -0.199285),
-  vec4<f32>(-0.056783, -0.079022, 0.002687, -0.017097),
+  vec4<f32>(-0.056783, -0.079022, 0.002687, -0.001900),
   vec4<f32>(0.230001, 0.195060, -0.237831, 0.213528),
-  vec4<f32>(0.010828, -0.175885, 0.075520, 0.155805),
+  vec4<f32>(0.010828, -0.175885, 0.075520, 0.017312),
   vec4<f32>(-0.048712, -0.175662, 0.176246, -0.067224),
-  vec4<f32>(-0.048118, 0.067163, -0.114919, -0.035307),
+  vec4<f32>(-0.048118, 0.067163, -0.114919, -0.003923),
   vec4<f32>(0.134012, 0.106994, -0.240137, 0.317381),
-  vec4<f32>(-0.209563, 0.027607, 0.150277, 0.124129),
+  vec4<f32>(-0.209563, 0.027607, 0.150277, 0.013792),
   vec4<f32>(0.009538, -0.103125, 0.068209, -0.046492),
-  vec4<f32>(0.007195, -0.039193, -0.100942, -0.017097),
+  vec4<f32>(0.007195, -0.039193, -0.100942, -0.001900),
   vec4<f32>(0.052864, 0.280180, -0.209586, 0.237769),
-  vec4<f32>(0.023231, 0.038595, -0.071840, 0.155805),
+  vec4<f32>(0.023231, 0.038595, -0.071840, 0.017312),
   vec4<f32>(-0.141548, -0.233184, 0.023925, -0.019451),
-  vec4<f32>(0.067484, 0.057416, 0.051800, -0.035307),
+  vec4<f32>(0.067484, 0.057416, 0.051800, -0.003923),
   vec4<f32>(0.087521, 0.214685, -0.282225, 0.424372),
-  vec4<f32>(-0.056052, -0.156497, 0.023169, 0.124129),
+  vec4<f32>(-0.056052, -0.156497, 0.023169, 0.013792),
   vec4<f32>(-0.258638, -0.149771, 0.183942, -0.260751),
-  vec4<f32>(0.148103, 0.129853, 0.012112, -0.017097),
+  vec4<f32>(0.148103, 0.129853, 0.012112, -0.001900),
   vec4<f32>(0.010897, 0.133480, -0.192718, 0.252913),
-  vec4<f32>(-0.088200, -0.138182, -0.005718, 0.155805),
+  vec4<f32>(-0.088200, -0.138182, -0.005718, 0.017312),
   vec4<f32>(-0.209273, -0.180554, 0.232975, -0.064025),
-  vec4<f32>(0.040287, -0.104819, 0.049971, -0.035307),
+  vec4<f32>(0.040287, -0.104819, 0.049971, -0.003923),
   vec4<f32>(0.107734, 0.238235, -0.326000, 0.247477),
-  vec4<f32>(-0.181079, -0.113835, 0.103895, 0.124129),
+  vec4<f32>(-0.181079, -0.113835, 0.103895, 0.013792),
   vec4<f32>(-0.101286, -0.185177, 0.277056, -0.084833),
-  vec4<f32>(0.106763, 0.107663, -0.073146, -0.017097),
+  vec4<f32>(0.106763, 0.107663, -0.073146, -0.001900),
   vec4<f32>(-0.005552, 0.177759, -0.029719, 0.060847),
-  vec4<f32>(0.161847, -0.061512, 0.132996, 0.155805),
+  vec4<f32>(0.161847, -0.061512, 0.132996, 0.017312),
   vec4<f32>(-0.257929, -0.377618, 0.130284, -0.159584),
-  vec4<f32>(0.093874, -0.134273, -0.030173, -0.035307),
+  vec4<f32>(0.093874, -0.134273, -0.030173, -0.003923),
   vec4<f32>(0.214204, 0.298440, -0.217456, 0.215751),
-  vec4<f32>(-0.122887, -0.152684, 0.125991, 0.124129),
+  vec4<f32>(-0.122887, -0.152684, 0.125991, 0.013792),
   vec4<f32>(-0.183399, -0.242007, 0.076115, -0.234678),
-  vec4<f32>(0.097245, 0.104651, 0.037698, -0.017097),
+  vec4<f32>(0.097245, 0.104651, 0.037698, -0.001900),
   vec4<f32>(0.119585, 0.291845, -0.081658, 0.209448),
-  vec4<f32>(0.205421, -0.010635, -0.016784, 0.155805),
+  vec4<f32>(0.205421, -0.010635, -0.016784, 0.017312),
   vec4<f32>(-0.097006, -0.123679, 0.222243, -0.211022),
-  vec4<f32>(0.092569, -0.032077, -0.082684, -0.035307),
+  vec4<f32>(0.092569, -0.032077, -0.082684, -0.003923),
   vec4<f32>(0.225746, 0.263205, -0.388231, 0.342453),
-  vec4<f32>(-0.002536, 0.050069, 0.130625, 0.124129),
+  vec4<f32>(-0.002536, 0.050069, 0.130625, 0.013792),
   vec4<f32>(-0.238043, -0.051000, 0.113924, -0.204495),
-  vec4<f32>(0.020115, -0.106413, 0.127491, -0.017097),
+  vec4<f32>(0.020115, -0.106413, 0.127491, -0.001900),
   vec4<f32>(0.194054, 0.126466, -0.102551, 0.263437),
-  vec4<f32>(-0.072873, 0.018799, -0.015787, 0.155805),
+  vec4<f32>(-0.072873, 0.018799, -0.015787, 0.017312),
   vec4<f32>(-0.042832, -0.173311, 0.147956, -0.126879),
-  vec4<f32>(0.128747, -0.134294, 0.111303, -0.035307),
+  vec4<f32>(0.128747, -0.134294, 0.111303, -0.003923),
   vec4<f32>(0.243550, 0.288997, -0.250508, 0.252519),
-  vec4<f32>(-0.089251, -0.111297, 0.112490, 0.124129),
+  vec4<f32>(-0.089251, -0.111297, 0.112490, 0.013792),
   vec4<f32>(-0.223589, -0.095261, 0.145935, -0.139099),
-  vec4<f32>(0.156093, -0.096579, -0.006812, -0.017097),
+  vec4<f32>(0.156093, -0.096579, -0.006812, -0.001900),
   vec4<f32>(0.005154, 0.251212, -0.107685, 0.143674),
-  vec4<f32>(-0.017334, -0.082075, 0.088612, 0.155805),
+  vec4<f32>(-0.017334, -0.082075, 0.088612, 0.017312),
   vec4<f32>(-0.199050, -0.333849, 0.219760, -0.079489),
-  vec4<f32>(0.127082, 0.103959, -0.133810, -0.035307),
+  vec4<f32>(0.127082, 0.103959, -0.133810, -0.003923),
   vec4<f32>(0.297919, 0.254741, -0.134594, 0.088785),
-  vec4<f32>(-0.095642, -0.044997, 0.167123, 0.124129),
+  vec4<f32>(-0.095642, -0.044997, 0.167123, 0.013792),
   vec4<f32>(-0.004141, -0.243894, 0.176584, -0.157082),
-  vec4<f32>(0.068545, -0.112190, -0.117632, -0.017097),
+  vec4<f32>(0.068545, -0.112190, -0.117632, -0.001900),
   vec4<f32>(0.045947, 0.055152, -0.161696, 0.018504),
-  vec4<f32>(0.197872, 0.018991, -0.106403, 0.155805),
+  vec4<f32>(0.197872, 0.018991, -0.106403, 0.017312),
   vec4<f32>(-0.190943, -0.075032, 0.069981, -0.145124),
-  vec4<f32>(0.012619, 0.059838, -0.139603, -0.035307)
+  vec4<f32>(0.012619, 0.059838, -0.139603, -0.003923)
 );
 
 const weights_layer2: array<vec4<f32>, 18> = array(
   vec4<f32>(0.245902, -0.217970, 0.056291, -0.070807),
-  vec4<f32>(0.016448, -0.018901, -0.124061, -0.041984),
+  vec4<f32>(0.016448, -0.018901, -0.124061, -0.004665),
   vec4<f32>(0.031777, -0.024354, 0.057418, -0.191855),
-  vec4<f32>(0.066501, -0.005719, 0.060455, -0.041984),
+  vec4<f32>(0.066501, -0.005719, 0.060455, -0.004665),
   vec4<f32>(0.152728, -0.171275, 0.042248, -0.104487),
-  vec4<f32>(0.080702, -0.084644, 0.121606, -0.041984),
+  vec4<f32>(0.080702, -0.084644, 0.121606, -0.004665),
   vec4<f32>(0.250396, -0.225428, 0.240979, -0.069204),
-  vec4<f32>(-0.025812, -0.029050, 0.050136, -0.041984),
+  vec4<f32>(-0.025812, -0.029050, 0.050136, -0.004665),
   vec4<f32>(0.042793, -0.011392, 0.222963, -0.241601),
-  vec4<f32>(-0.134324, -0.071387, 0.108891, -0.041984),
+  vec4<f32>(-0.134324, -0.071387, 0.108891, -0.004665),
   vec4<f32>(0.130457, -0.123356, 0.002928, -0.122289),
-  vec4<f32>(-0.059852, -0.166258, -0.062116, -0.041984),
+  vec4<f32>(-0.059852, -0.166258, -0.062116, -0.004665),
   vec4<f32>(0.171512, -0.030577, 0.234602, -0.193894),
-  vec4<f32>(-0.020420, -0.071602, -0.056219, -0.041984),
+  vec4<f32>(-0.020420, -0.071602, -0.056219, -0.004665),
   vec4<f32>(0.242776, -0.138304, 0.220627, -0.012853),
-  vec4<f32>(0.020668, -0.104949, -0.013328, -0.041984),
+  vec4<f32>(0.020668, -0.104949, -0.013328, -0.004665),
   vec4<f32>(0.013987, -0.195309, 0.083045, -0.189454),
-  vec4<f32>(0.028755, -0.036818, 0.045813, -0.041984)
+  vec4<f32>(0.028755, -0.036818, 0.045813, -0.004665)
 );
author	skal <pascal.massimino@gmail.com>	2026-02-11 23:13:43 +0100
committer	skal <pascal.massimino@gmail.com>	2026-02-11 23:13:43 +0100
commit	8ff8c56cd68d9e785cf6cb36ce1fc2bdc54ac15a (patch)
tree	09f44369f1926a8315c7d06ce050cd51d49c91a6
parent	3530fcd7414ea24c8916adc1e490f71c02ac96f1 (diff)