summaryrefslogtreecommitdiff
path: root/training/gen_identity_weights.py
diff options
context:
space:
mode:
authorskal <pascal.massimino@gmail.com>2026-02-14 00:50:11 +0100
committerskal <pascal.massimino@gmail.com>2026-02-14 00:50:11 +0100
commit67ada21b34d87c780f42b1af7958dcf831d864ec (patch)
tree44654f744f80fb3b1e78fe7e243ab295f0b37d08 /training/gen_identity_weights.py
parentc7ea4123efa2457e17a9894bba645914c065a190 (diff)
gen_identity_weights: Change --mix to 50-50 blend
Updates --mix mode to use 50-50 weighting to avoid overflow: - Before: p0+p4, p1+p5, p2+p6, p3+p7 - After: 0.5*p0+0.5*p4, 0.5*p1+0.5*p5, etc Prevents saturation when blending input with static features. Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
Diffstat (limited to 'training/gen_identity_weights.py')
-rwxr-xr-xtraining/gen_identity_weights.py22
1 files changed, 11 insertions, 11 deletions
diff --git a/training/gen_identity_weights.py b/training/gen_identity_weights.py
index 0d79593..5756e67 100755
--- a/training/gen_identity_weights.py
+++ b/training/gen_identity_weights.py
@@ -4,8 +4,8 @@
Creates trivial .bin with 1 layer, 1×1 kernel, identity passthrough.
Output Ch{0,1,2,3} = Input Ch{0,1,2,3} (ignores static features).
-With --mix: Output Ch{i} = Input Ch{i} + Input Ch{i+4}
- (p0+p4, p1+p5, p2+p6, p3+p7)
+With --mix: Output Ch{i} = 0.5*Input Ch{i} + 0.5*Input Ch{i+4}
+ (50-50 blend, avoids overflow)
With --p47: Output Ch{i} = Input Ch{i+4} (static features only)
(p4→ch0, p5→ch1, p6→ch2, p7→ch3)
@@ -25,7 +25,7 @@ from pathlib import Path
def generate_identity_weights(output_path, kernel_size=1, mip_level=0, mix=False, p47=False):
"""Generate identity weights: output = input (ignores static features).
- If mix=True, adds p4→p0, p5→p1, p6→p2, p7→p3 (blends input with static).
+ If mix=True, 50-50 blend: 0.5*p0+0.5*p4, 0.5*p1+0.5*p5, etc (avoids overflow).
If p47=True, transfers p4→p0, p5→p1, p6→p2, p7→p3 (static features only).
Binary format:
@@ -63,16 +63,16 @@ def generate_identity_weights(output_path, kernel_size=1, mip_level=0, mix=False
# p47 mode: p4→ch0, p5→ch1, p6→ch2, p7→ch3 (static features only)
for i in range(out_channels):
weights[i, i + 4, center, center] = 1.0
+ elif mix:
+ # Mix mode: 50-50 blend to avoid overflow
+ for i in range(out_channels):
+ weights[i, i, center, center] = 0.5 # 0.5*p{i}
+ weights[i, i + 4, center, center] = 0.5 # 0.5*p{i+4}
else:
- # Set diagonal to 1.0 (output ch i = input ch i)
+ # Identity: output ch i = input ch i
for i in range(out_channels):
weights[i, i, center, center] = 1.0
- # If mix, add p4→p0, p5→p1, p6→p2, p7→p3
- if mix:
- for i in range(out_channels):
- weights[i, i + 4, center, center] = 1.0
-
# Flatten
weights_flat = weights.flatten()
weight_count = len(weights_flat)
@@ -84,7 +84,7 @@ def generate_identity_weights(output_path, kernel_size=1, mip_level=0, mix=False
print(f" Weights: {weight_count}")
print(f" Mip level: {mip_level}")
if mix:
- print(f" Mode: p0+p4, p1+p5, p2+p6, p3+p7")
+ print(f" Mode: 0.5*p0+0.5*p4, 0.5*p1+0.5*p5, 0.5*p2+0.5*p6, 0.5*p3+0.5*p7")
elif p47:
print(f" Mode: p4→ch0, p5→ch1, p6→ch2, p7→ch3")
@@ -150,7 +150,7 @@ def main():
parser.add_argument('--mip-level', type=int, default=0,
help='Mip level for p0-p3 features (default: 0)')
parser.add_argument('--mix', action='store_true',
- help='Mix mode: p0+p4, p1+p5, p2+p6, p3+p7')
+ help='Mix mode: 50-50 blend of p0-p3 and p4-p7')
parser.add_argument('--p47', action='store_true',
help='Static features only: p4→ch0, p5→ch1, p6→ch2, p7→ch3')