summaryrefslogtreecommitdiff
path: root/workspaces/main/shaders/cnn/cnn_conv3x3.wgsl
blob: 48bb392f873e8f3faceb977d52f4036801931049 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
// 3x3 convolution (vec4-optimized)

// Source layers: 7→4 channels (RGBD output)
// Assumes 'tex' (the input) is *not* normalized to [-1,1], but is [0,1]
// UV coordinates remain in [0,1] and are normalized internally
// weights: array<vec4<f32>, 72> (9 pos × 4 ch × 2 vec4)
fn cnn_conv3x3_7to4_src(
  tex: texture_2d<f32>,
  samp: sampler,
  uv: vec2<f32>,
  resolution: vec2<f32>,
  weights: array<vec4<f32>, 72>
) -> vec4<f32> {
  let step = 1.0 / resolution;

  // Compute grayscale from original (converted in [-1,1])
  let original = (textureSample(tex, samp, uv) - 0.5) * 2.0;
  let gray = dot(original.rgb, vec3<f32>(0.2126, 0.7152, 0.0722));

  // Normalize UV to [-1,1]
  let uv_norm = (uv - 0.5) * 2.0;
  let in1 = vec4<f32>(uv_norm, gray, 1.0);

  var sum = vec4<f32>(0.0);

  var pos = 0;
  for (var dy = -1; dy <= 1; dy++) {
    for (var dx = -1; dx <= 1; dx++) {
      let offset = vec2<f32>(f32(dx), f32(dy)) * step;
      let rgbd = (textureSample(tex, samp, uv + offset) - .5) * 2.0;

      sum.r += dot(weights[pos+0], rgbd) + dot(weights[pos+1], in1);
      sum.g += dot(weights[pos+2], rgbd) + dot(weights[pos+3], in1);
      sum.b += dot(weights[pos+4], rgbd) + dot(weights[pos+5], in1);
      sum.a += dot(weights[pos+6], rgbd) + dot(weights[pos+7], in1);
      pos += 8;
    }
  }

  return sum;
}

// Inner layers: 7→4 channels (vec4-optimized)
// Assumes 'tex' is already normalized to [-1,1]
// UV coordinates remain in [0,1] and are normalized internally
// weights: array<vec4<f32>, 72> (9 pos × 4 ch × 2 vec4)
fn cnn_conv3x3_7to4(
  tex: texture_2d<f32>,
  samp: sampler,
  uv: vec2<f32>,
  resolution: vec2<f32>,
  gray: f32,
  weights: array<vec4<f32>, 72>
) -> vec4<f32> {
  let step = 1.0 / resolution;

  // Normalize UV to [-1,1]
  let uv_norm = (uv - 0.5) * 2.0;

  var sum = vec4<f32>(0.0);

  var pos = 0;
  for (var dy = -1; dy <= 1; dy++) {
    for (var dx = -1; dx <= 1; dx++) {
      let offset = vec2<f32>(f32(dx), f32(dy)) * step;
      let rgbd = textureSample(tex, samp, uv + offset);
      let in1 = vec4<f32>(uv_norm, gray, 1.0);

      sum.r += dot(weights[pos+0], rgbd) + dot(weights[pos+1], in1);
      sum.g += dot(weights[pos+2], rgbd) + dot(weights[pos+3], in1);
      sum.b += dot(weights[pos+4], rgbd) + dot(weights[pos+5], in1);
      sum.a += dot(weights[pos+6], rgbd) + dot(weights[pos+7], in1);
      pos += 8;
    }
  }

  return sum;
}

// Final layer: 7→1 channel (vec4-optimized)
// Assumes 'tex' is already normalized to [-1,1]
// UV coordinates remain in [0,1] and are normalized internally
// weights: array<vec4<f32>, 18> (9 pos × 2 vec4)
fn cnn_conv3x3_7to1(
  tex: texture_2d<f32>,
  samp: sampler,
  uv: vec2<f32>,
  resolution: vec2<f32>,
  gray: f32,
  weights: array<vec4<f32>, 18>
) -> f32 {
  let step = 1.0 / resolution;

  // Normalize UV to [-1,1]
  let uv_norm = (uv - 0.5) * 2.0;
  let in1 = vec4<f32>(uv_norm, gray, 1.0);

  var sum = 0.0;

  var pos = 0;
  for (var dy = -1; dy <= 1; dy++) {
    for (var dx = -1; dx <= 1; dx++) {
      let offset = vec2<f32>(f32(dx), f32(dy)) * step;
      let rgbd = textureSample(tex, samp, uv + offset);

      sum += dot(weights[pos], rgbd) + dot(weights[pos+1], in1);
      pos += 2;
    }
  }

  return sum;
}