From 15c4c0cf02d2b4ec8b3446bbc1805651ab312889 Mon Sep 17 00:00:00 2001
From: skal <pascal.massimino@gmail.com>
Date: Mon, 2 Feb 2026 13:34:36 +0100
Subject: feat(3d): Implement scene query shadows (POC)

- Updated Renderer3D shader to include 'map_scene' and 'calc_shadow' functions.
- Shader now iterates over all objects (up to kMaxObjects) to evaluate the scene SDF globally.
- Implemented hard/soft shadows from a fixed directional light.
- Updated GlobalUniforms struct with proper packing/padding for WebGPU compatibility.
- Captured 'GPU BVH & Shadows' task in PROJECT_CONTEXT.md for future optimization.
---
 PROJECT_CONTEXT.md |  1 +
 TODO.md            |  1 +
 src/3d/renderer.cc | 98 ++++++++++++++++++++++++++++--------------------------
 src/3d/renderer.h  |  4 +--
 4 files changed, 55 insertions(+), 49 deletions(-)
diff --git a/PROJECT_CONTEXT.md b/PROJECT_CONTEXT.md
index ee22a89..538d00e 100644
--- a/PROJECT_CONTEXT.md
+++ b/PROJECT_CONTEXT.md
@@ -38,6 +38,7 @@ Style:
 - **Task #18: 3D System Enhancements**
     - [ ] **Visual Debug Mode**: Implement a debug overlay (removable with `STRIP_ALL`) to render wireframe bounding volumes, object trajectories, and light source representations.
     - [ ] **Blender Exporter**: Create a tool to convert simple Blender scenes into the demo's internal asset format.
+    - [ ] **GPU BVH & Shadows**: Implement a GPU-based Bounding Volume Hierarchy (BVH) to optimize scene queries (shadows, AO) from the shader, replacing the current O(N) loop.
 - **Phase 2: Advanced Size Optimization**
     - [x] PC+Windows (.exe binary) via MinGW
     - [ ] Task #4a: Linux Cross-Compilation
diff --git a/TODO.md b/TODO.md
index b4c2f58..19097da 100644
--- a/TODO.md
+++ b/TODO.md
@@ -22,6 +22,7 @@ This file tracks the next set of immediate, actionable tasks for the project.
            == Can this std::vector<T> be replaced by a simple C-like "const T*" array?
            == are these std::string needed or can they be replaced by some 'const char*' ?
            == etc.
+    - [ ] the inclusion of gpu.h (either "gpu.h" or <webgpu/gpu.h>) seems to be a recurring compilation and portability issue. Can we have a single inclusion of gpu.h in some platform header instead of scattered inclusion in .cc files? This would reduce the single-point-of-compilation failures during compilation and portability checks.
 
 ## Future Goals
 
diff --git a/src/3d/renderer.cc b/src/3d/renderer.cc
index adc9a5f..031895d 100644
--- a/src/3d/renderer.cc
+++ b/src/3d/renderer.cc
@@ -14,8 +14,8 @@ bool Renderer3D::s_debug_enabled_ = false;
 static const char* kShaderCode = R"(
 struct GlobalUniforms {
     view_proj: mat4x4<f32>,
-    camera_pos: vec3<f32>,
-    time: f32,
+    camera_pos_time: vec4<f32>,
+    params: vec4<f32>,
 };
 
 struct ObjectData {
@@ -95,6 +95,44 @@ fn get_dist(p: vec3<f32>, obj_type: f32) -> f32 {
     return 100.0;
 }
 
+fn map_scene(p: vec3<f32>) -> f32 {
+    var d = 1000.0;
+    let count = u32(globals.params.x);
+    
+    // Brute force loop over all objects
+    for (var i = 0u; i < count; i = i + 1u) {
+        let obj = object_data.objects[i];
+        let obj_type = obj.params.x;
+        if (obj_type <= 0.0) { continue; } // Skip non-sdf objects
+
+        // Transform world p to local q
+        // Assuming uniform scale
+        let center = vec3<f32>(obj.model[3].x, obj.model[3].y, obj.model[3].z);
+        let scale = length(vec3<f32>(obj.model[0].x, obj.model[0].y, obj.model[0].z));
+        let mat3 = mat3x3<f32>(obj.model[0].xyz/scale, obj.model[1].xyz/scale, obj.model[2].xyz/scale);
+        let q = transpose(mat3) * (p - center) / scale;
+        
+        let dist = get_dist(q, obj_type) * scale;
+        d = min(d, dist);
+    }
+    return d;
+}
+
+fn calc_shadow(ro: vec3<f32>, rd: vec3<f32>, tmin: f32, tmax: f32) -> f32 {
+    var t = tmin;
+    var res = 1.0;
+    for (var i = 0; i < 30; i++) {
+        let h = map_scene(ro + rd * t);
+        if (h < 0.001) {
+            return 0.0; // Hard shadow hit
+        }
+        res = min(res, 8.0 * h / t); // Soft shadow k=8
+        t = t + h;
+        if (t > tmax) { break; }
+    }
+    return res;
+}
+
 fn get_normal(p: vec3<f32>, obj_type: f32) -> vec3<f32> {
     if (obj_type == 1.0) { return normalize(p); }
     let e = vec2<f32>(0.001, 0.0);
@@ -127,8 +165,8 @@ fn fs_main(in: VertexOutput) -> @location(0) vec4<f32> {
     
     let center = vec3<f32>(obj.model[3].x, obj.model[3].y, obj.model[3].z);
     let scale = length(vec3<f32>(obj.model[0].x, obj.model[0].y, obj.model[0].z));
-    let ro = globals.camera_pos;
-    let rd = normalize(in.world_pos - globals.camera_pos);
+    let ro = globals.camera_pos_time.xyz;
+    let rd = normalize(in.world_pos - ro);
     var t = length(in.world_pos - ro); 
     var p = ro + rd * t;
     
@@ -146,47 +184,10 @@ fn fs_main(in: VertexOutput) -> @location(0) vec4<f32> {
     if (!hit) { discard; }
     
     // Shading
-    // Recompute local pos at hit
     let q_hit = transpose(mat3) * (p - center) / scale;
     
-    // Calculate normal with bump mapping (Displacement method)
-    // N = normalize(gradient( dist(p) - displacement(p) ))
-    // We do finite difference on the combined field.
-    
-    let e = vec2<f32>(0.005, 0.0); // Slightly larger epsilon for texture smoothness
-    
-    // Helper to get displaced distance
-    // Note: We only displace for normal calc, not for the raymarch hit (surface detail only)
-    // or we could refine the hit. For now, just lighting.
-    
-    // Inline helper for displacement
-    // We need UVs for any point q
-    // UV Mapping: Spherical
-    
-    var n_local = vec3<f32>(0.0);
-    
-    // Base normal
-    let n_base = get_normal(q_hit, obj_type);
-    
-    // Sample noise at center
-    let uv_c = vec2<f32>(atan2(q_hit.x, q_hit.z) / 6.28 + 0.5, acos(clamp(q_hit.y / length(q_hit), -1.0, 1.0)) / 3.14);
-    let h_c = textureSample(noise_tex, noise_sampler, uv_c).r;
-    
-    // Evaluate noise gradient via finite difference on UVs? 
-    // Or just 3D finite difference on pos?
-    // 3D FD is generic but requires 6 texture samples (or 4 tetra).
-    // Let's try a cheaper trick: Gradient of texture in UV space?
-    // textureSampleGrad? No, we want world space normal perturbation.
-    
-    // Standard tri-planar or 3D noise is better for SDFs, but we have 2D texture.
-    // Let's stick to the "Gradient by 2D finite difference on UVs" or simply perturb n_base with derivatives.
-    // simpler:
-    // float h = texture(...);
-    // vec3 bump = vec3(dFdx(h), dFdy(h), 0.0); // Screen space derivative? No.
-    
-    // Let's go with the robust 3D FD on the displacement field.
-    // dist_disp(q) = get_dist(q) - 0.02 * noise(q)
-    
+    // Calculate normal with bump mapping
+    let e = vec2<f32>(0.005, 0.0);
     let disp_strength = 0.05;
     
     let q_x1 = q_hit + e.xyy;
@@ -219,13 +220,15 @@ fn fs_main(in: VertexOutput) -> @location(0) vec4<f32> {
     let h_z2 = textureSample(noise_tex, noise_sampler, uv_z2).r;
     let d_z2 = get_dist(q_z2, obj_type) - disp_strength * h_z2;
     
-    n_local = normalize(vec3<f32>(d_x1 - d_x2, d_y1 - d_y2, d_z1 - d_z2));
+    let n_local = normalize(vec3<f32>(d_x1 - d_x2, d_y1 - d_y2, d_z1 - d_z2));
     
     let n_world = mat3 * n_local; 
     let normal = normalize(n_world);
 
     let light_dir = normalize(vec3<f32>(1.0, 1.0, 1.0));
-    let lighting = max(dot(normal, light_dir), 0.0) + 0.1;
+    let shadow = calc_shadow(p + normal * 0.05, light_dir, 0.0, 20.0);
+    
+    let lighting = (max(dot(normal, light_dir), 0.0) * shadow) + 0.1;
     return vec4<f32>(in.color.rgb * lighting, 1.0);
 }
 )";
@@ -413,8 +416,9 @@ void Renderer3D::update_uniforms(const Scene& scene, const Camera& camera,
                                  float time) {
   GlobalUniforms globals;
   globals.view_proj = camera.get_projection_matrix() * camera.get_view_matrix();
-  globals.camera_pos = camera.position;
-  globals.time = time;
+  globals.camera_pos_time = vec4(camera.position.x, camera.position.y, camera.position.z, time);
+  globals.params = vec4((float)std::min((size_t)kMaxObjects, scene.objects.size()), 0.0f, 0.0f, 0.0f);
+  
   wgpuQueueWriteBuffer(queue_, global_uniform_buffer_, 0, &globals,
                        sizeof(GlobalUniforms));
 
diff --git a/src/3d/renderer.h b/src/3d/renderer.h
index 453daf2..c4fec06 100644
--- a/src/3d/renderer.h
+++ b/src/3d/renderer.h
@@ -16,8 +16,8 @@
 // Matches the GPU struct layout
 struct GlobalUniforms {
   mat4 view_proj;
-  vec3 camera_pos;
-  float time;
+  vec4 camera_pos_time; // xyz = camera_pos, w = time
+  vec4 params;          // x = num_objects, yzw = padding
 };
 
 // Matches the GPU struct layout
-- 
cgit v1.2.3