diff options
38 files changed, 2595 insertions, 578 deletions
@@ -11,6 +11,7 @@ @doc/HOWTO.md @doc/CONTRIBUTING.md @doc/AI_RULES.md +@doc/EFFECT_WORKFLOW.md # ============================================ # TIER 3: DESIGN DOCS (Load On-Demand) diff --git a/CMakeLists.txt b/CMakeLists.txt index 97b371a..48a46e4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -149,6 +149,8 @@ if (DEMO_HEADLESS) src/gpu/effects/particle_spray_effect.cc src/gpu/effects/gaussian_blur_effect.cc src/gpu/effects/solarize_effect.cc +# Disabled: src/gpu/effects/cube_sphere_effect.cc (incomplete conversion) + src/gpu/effects/scene1_effect.cc src/gpu/effects/chroma_aberration_effect.cc src/gpu/effects/vignette_effect.cc src/gpu/effects/cnn_effect.cc @@ -179,6 +181,8 @@ else() src/gpu/effects/particle_spray_effect.cc src/gpu/effects/gaussian_blur_effect.cc src/gpu/effects/solarize_effect.cc +# Disabled: src/gpu/effects/cube_sphere_effect.cc (incomplete conversion) + src/gpu/effects/scene1_effect.cc src/gpu/effects/chroma_aberration_effect.cc src/gpu/effects/vignette_effect.cc src/gpu/effects/cnn_effect.cc @@ -16,5 +16,6 @@ cmake --build build -j4 - **TODO.md** - Active tasks and priorities - **doc/HOWTO.md** - Common operations (building, testing, assets) - **doc/CONTRIBUTING.md** - Development guidelines and protocols +- **doc/EFFECT_WORKFLOW.md** - Step-by-step guide for adding visual effects See `doc/` for detailed technical documentation. diff --git a/doc/AI_RULES.md b/doc/AI_RULES.md index d18a0cc..1a4ee78 100644 --- a/doc/AI_RULES.md +++ b/doc/AI_RULES.md @@ -5,3 +5,22 @@ - Prefer small, reviewable commits - All `cmake --build` commands must use the `-j4` option for parallel building. - after a task, a 'big' final commit should contain a short handoff tag like "handoff(Gemini):..." if you're gemini-cli, or "handoff(Claude): ..." if you're claude-code. + +## Adding Visual Effects + +**IMPORTANT:** When adding new visual effects, follow the complete workflow in `doc/EFFECT_WORKFLOW.md`. + +**Required steps (must complete ALL):** +1. Create effect files (.h, .cc, .wgsl) +2. Add shader to `workspaces/main/assets.txt` +3. Add `.cc` to CMakeLists.txt GPU_SOURCES (BOTH sections: headless and normal) +4. Include header in `src/gpu/demo_effects.h` +5. Add to timeline with `EFFECT +` (priority modifier is REQUIRED) +6. Add to test list in `src/tests/gpu/test_demo_effects.cc` +7. Build and verify: `cmake --build build -j4 && cd build && ./test_demo_effects` + +**Common mistakes to avoid:** +- Missing priority modifier in timeline (`EFFECT` must be `EFFECT +`, `EFFECT =`, or `EFFECT -`) +- Adding `.cc` to only one CMakeLists.txt section (need BOTH headless and normal) +- Wrong asset ID (check assets.txt entry name → `ASSET_SHADER_<NAME>`) +- Forgetting to add to test file diff --git a/doc/CNN_EFFECT.md b/doc/CNN_EFFECT.md index ae0f38a..4659fd3 100644 --- a/doc/CNN_EFFECT.md +++ b/doc/CNN_EFFECT.md @@ -21,27 +21,46 @@ Trainable convolutional neural network layers for artistic stylization (painterl ## Architecture -### Coordinate-Aware Layer 0 +### RGBD → Grayscale Pipeline -Layer 0 accepts normalized (x,y) patch center coordinates alongside RGBA samples: +**Input:** RGBD (RGB + inverse depth D=1/z) +**Output:** Grayscale (1 channel) +**Layer Input:** 7 channels = [RGBD, UV coords, grayscale] all normalized to [-1,1] + +**Architecture:** +- **Inner layers (0..N-2):** Conv2d(7→4) - output RGBD +- **Final layer (N-1):** Conv2d(7→1) - output grayscale ```wgsl -fn cnn_conv3x3_with_coord( +// Inner layers: 7→4 (RGBD output) +fn cnn_conv3x3_7to4( tex: texture_2d<f32>, samp: sampler, - uv: vec2<f32>, # Center position [0,1] + uv: vec2<f32>, resolution: vec2<f32>, - rgba_weights: array<mat4x4<f32>, 9>, # 9 samples × 4×4 matrix - coord_weights: mat2x4<f32>, # 2 coords → 4 outputs - bias: vec4<f32> + original: vec4<f32>, # Original RGBD [-1,1] + weights: array<array<f32, 8>, 36> # 9 pos × 4 out × (7 weights + bias) ) -> vec4<f32> -``` -**Input structure:** 9 RGBA samples (36 values) + 1 xy coordinate (2 values) = 38 inputs → 4 outputs +// Final layer: 7→1 (grayscale output) +fn cnn_conv3x3_7to1( + tex: texture_2d<f32>, + samp: sampler, + uv: vec2<f32>, + resolution: vec2<f32>, + original: vec4<f32>, + weights: array<array<f32, 8>, 9> # 9 pos × (7 weights + bias) +) -> f32 +``` -**Size impact:** +32B coord weights, kernel-agnostic +**Input normalization:** +- **fs_main** normalizes textures once: `(tex - 0.5) * 2` → [-1,1] +- **Conv functions** normalize UV coords: `(uv - 0.5) * 2` → [-1,1] +- **Grayscale** computed from normalized RGBD: `0.2126*R + 0.7152*G + 0.0722*B` +- **Inter-layer data** stays in [-1,1] (no denormalization) +- **Final output** denormalized for display: `(result + 1.0) * 0.5` → [0,1] -**Use cases:** Position-dependent stylization (vignettes, corner darkening, radial gradients) +**Activation:** tanh for inner layers (output stays [-1,1]), none for final layer ### Multi-Layer Architecture @@ -80,18 +99,15 @@ workspaces/main/shaders/cnn/ ### 1. Prepare Training Data Collect input/target image pairs: -- **Input:** Raw 3D render -- **Target:** Artistic style (hand-painted, filtered, stylized) +- **Input:** RGBA (RGB + depth as alpha channel, D=1/z) +- **Target:** Grayscale stylized output ```bash -training/input/img_000.png # Raw render -training/output/img_000.png # Stylized target +training/input/img_000.png # RGBA render (RGB + depth) +training/output/img_000.png # Grayscale target ``` -Use `image_style_processor.py` to generate targets: -```bash -python3 training/image_style_processor.py input/ output/ pencil_sketch -``` +**Note:** Input images must be RGBA where alpha = inverse depth (1/z) ### 2. Train Network @@ -135,6 +151,14 @@ python3 training/train_cnn.py \ --output workspaces/main/shaders/cnn/cnn_weights_generated.wgsl ``` +**Generate ground truth (for shader validation):** +```bash +python3 training/train_cnn.py \ + --infer training/input/img_000.png \ + --export-only training/checkpoints/checkpoint_epoch_200.pth \ + --output training/ground_truth.png +``` + ### 3. Rebuild Demo Training script auto-generates both `cnn_weights_generated.wgsl` and `cnn_layer.wgsl`: @@ -245,20 +269,25 @@ Expands to: **Weight Storage:** -**Layer 0 (coordinate-aware):** +**Inner layers (7→4 RGBD output):** ```wgsl -const rgba_weights_layer0: array<mat4x4<f32>, 9> = array(...); -const coord_weights_layer0 = mat2x4<f32>( - 0.1, -0.2, 0.0, 0.0, # x-coord weights - -0.1, 0.0, 0.2, 0.0 # y-coord weights +// Structure: array<array<f32, 8>, 36> +// 9 positions × 4 output channels, each with 7 weights + bias +const weights_layer0: array<array<f32, 8>, 36> = array( + array<f32, 8>(w0_r, w0_g, w0_b, w0_d, w0_u, w0_v, w0_gray, bias0), // pos0_ch0 + array<f32, 8>(w1_r, w1_g, w1_b, w1_d, w1_u, w1_v, w1_gray, bias1), // pos0_ch1 + // ... 34 more entries ); -const bias_layer0 = vec4<f32>(0.0, 0.0, 0.0, 0.0); ``` -**Layers 1+ (standard):** +**Final layer (7→1 grayscale output):** ```wgsl -const weights_layer1: array<mat4x4<f32>, 9> = array(...); -const bias_layer1 = vec4<f32>(0.0, 0.0, 0.0, 0.0); +// Structure: array<array<f32, 8>, 9> +// 9 positions, each with 7 weights + bias +const weights_layerN: array<array<f32, 8>, 9> = array( + array<f32, 8>(w0_r, w0_g, w0_b, w0_d, w0_u, w0_v, w0_gray, bias0), // pos0 + // ... 8 more entries +); ``` --- diff --git a/doc/CNN_RGBD_GRAYSCALE_SUMMARY.md b/doc/CNN_RGBD_GRAYSCALE_SUMMARY.md new file mode 100644 index 0000000..4c13693 --- /dev/null +++ b/doc/CNN_RGBD_GRAYSCALE_SUMMARY.md @@ -0,0 +1,134 @@ +# CNN RGBD→Grayscale Architecture Implementation + +## Summary + +Implemented CNN architecture upgrade: RGBD input → grayscale output with 7-channel augmented input. + +## Changes Made + +### Architecture + +**Input:** RGBD (4 channels: RGB + inverse depth D=1/z) +**Output:** Grayscale (1 channel) +**Layer Input:** 7 channels = [RGBD, UV coords, grayscale] all normalized to [-1,1] + +**Layer Configuration:** +- Inner layers (0..N-2): Conv2d(7→4) - output RGBD with tanh activation +- Final layer (N-1): Conv2d(7→1) - output grayscale, no activation + +### Input Normalization (all to [-1,1]) + +- **RGBD:** `(rgbd - 0.5) * 2` +- **UV coords:** `(uv - 0.5) * 2` +- **Grayscale:** `(0.2126*R + 0.7152*G + 0.0722*B - 0.5) * 2` + +**Rationale:** Zero-centered inputs for tanh activation, better gradient flow. + +### Modified Files + +**Training (`/Users/skal/demo/training/train_cnn.py`):** +1. Removed `CoordConv2d` class +2. Updated `SimpleCNN`: + - Inner layers: `Conv2d(7, 4)` - RGBD output + - Final layer: `Conv2d(7, 1)` - grayscale output +3. Updated `forward()`: + - Normalize RGBD/coords/gray to [-1,1] + - Concatenate 7-channel input for each layer + - Apply tanh (inner) or none (final) + - Denormalize final output +4. Updated `export_weights_to_wgsl()`: + - Inner: `array<array<f32, 8>, 36>` (9 pos × 4 ch × 8 values) + - Final: `array<array<f32, 8>, 9>` (9 pos × 8 values) +5. Updated `generate_layer_shader()`: + - Use `cnn_conv3x3_7to4` for inner layers + - Use `cnn_conv3x3_7to1` for final layer + - Denormalize outputs from [-1,1] to [0,1] +6. Updated `ImagePairDataset`: + - Load RGBA input (was RGB) + +**Shaders (`/Users/skal/demo/workspaces/main/shaders/cnn/cnn_conv3x3.wgsl`):** +1. Added `cnn_conv3x3_7to4()`: + - 7-channel input: [RGBD, uv_x, uv_y, gray] + - 4-channel output: RGBD + - Weights: `array<array<f32, 8>, 36>` +2. Added `cnn_conv3x3_7to1()`: + - 7-channel input: [RGBD, uv_x, uv_y, gray] + - 1-channel output: grayscale + - Weights: `array<array<f32, 8>, 9>` + +**Documentation (`/Users/skal/demo/doc/CNN_EFFECT.md`):** +1. Updated architecture section with RGBD→grayscale pipeline +2. Updated training data requirements (RGBA input) +3. Updated weight storage format + +### No C++ Changes + +CNNLayerParams and bind groups remain unchanged. + +## Data Flow + +1. Layer 0 captures original RGBD to `captured_frame` +2. Each layer: + - Samples previous layer output (RGBD in [0,1]) + - Normalizes RGBD to [-1,1] + - Computes UV coords and grayscale, normalizes to [-1,1] + - Concatenates 7-channel input + - Applies convolution with layer-specific weights + - Outputs RGBD (inner) or grayscale (final) in [-1,1] + - Applies tanh (inner only) + - Denormalizes to [0,1] for texture storage + - Blends with original + +## Next Steps + +1. **Prepare RGBD training data:** + - Input: RGBA images (RGB + depth in alpha) + - Target: Grayscale stylized output + +2. **Train network:** + ```bash + python3 training/train_cnn.py \ + --input training/input \ + --target training/output \ + --layers 3 \ + --epochs 1000 + ``` + +3. **Verify generated shaders:** + - Check `cnn_weights_generated.wgsl` structure + - Check `cnn_layer.wgsl` uses new conv functions + +4. **Test in demo:** + ```bash + cmake --build build -j4 + ./build/demo64k + ``` + +## Design Rationale + +**Why [-1,1] normalization?** +- Centered inputs for tanh (operates best around 0) +- Better gradient flow +- Standard ML practice for normalized data + +**Why RGBD throughout vs RGB?** +- Depth information propagates through network +- Enables depth-aware stylization +- Consistent 4-channel processing + +**Why 7-channel input?** +- Coordinates: position-dependent effects (vignettes) +- Grayscale: luminance-aware processing +- RGBD: full color+depth information +- Enables richer feature learning + +## Testing Checklist + +- [ ] Train network with RGBD input data +- [ ] Verify `cnn_weights_generated.wgsl` structure +- [ ] Verify `cnn_layer.wgsl` uses `7to4`/`7to1` functions +- [ ] Build demo without errors +- [ ] Visual test: inner layers show RGBD evolution +- [ ] Visual test: final layer produces grayscale +- [ ] Visual test: blending works correctly +- [ ] Compare quality with previous RGB→RGB architecture diff --git a/doc/COMPLETED.md b/doc/COMPLETED.md index d1c89af..2336f62 100644 --- a/doc/COMPLETED.md +++ b/doc/COMPLETED.md @@ -29,6 +29,22 @@ Detailed historical documents have been moved to `doc/archive/` for reference: Use `read @doc/archive/FILENAME.md` to access archived documents. +## Recently Completed (February 10, 2026) + +- [x] **WGPU Boilerplate Factorization** + - **Goal**: Reduce repetitive WGPU code via builder pattern helpers + - **Implementation**: + - Created `BindGroupLayoutBuilder` and `BindGroupBuilder` for declarative bind group creation + - Created `RenderPipelineBuilder` to simplify pipeline setup with ShaderComposer integration + - Created `SamplerCache` singleton to deduplicate sampler instances + - Refactored `post_process_helper.cc`, `cnn_effect.cc`, `rotating_cube_effect.cc` + - **Result**: + - Bind group creation: 19 instances reduced from 14→4 lines each + - Pipeline creation: 30-50 lines reduced to 8 lines + - Sampler deduplication: 6 instances → cached + - Total: -122 lines boilerplate, binary size unchanged (6.3M debug) + - Tests pass, prevents binding index errors + ## Recently Completed (February 9, 2026) - [x] **External Library Size Measurement (Task #76)** diff --git a/doc/CONTRIBUTING.md b/doc/CONTRIBUTING.md index 9cd785b..98df873 100644 --- a/doc/CONTRIBUTING.md +++ b/doc/CONTRIBUTING.md @@ -65,12 +65,15 @@ See `doc/CODING_STYLE.md` for detailed examples. ## Development Protocols ### Adding Visual Effect -1. Implement `Effect` subclass in `src/gpu/demo_effects.cc` -2. Add to workspace `timeline.seq` (e.g., `workspaces/main/timeline.seq`) -3. **Update `test_demo_effects.cc`**: - - Add to test list - - Increment `EXPECTED_*_COUNT` -4. Verify: +1. Create effect class files (use `tools/shadertoy/convert_shadertoy.py` or templates) +2. Add shader to `workspaces/main/assets.txt` +3. Add effect `.cc` file to `CMakeLists.txt` GPU_SOURCES (both sections) +4. Include header in `src/gpu/demo_effects.h` +5. Add to workspace `timeline.seq` (e.g., `workspaces/main/timeline.seq`) +6. **Update `src/tests/gpu/test_demo_effects.cc`**: + - Add to `post_process_effects` list (lines 80-93) or `scene_effects` list (lines 125-137) + - Example: `{"MyEffect", std::make_shared<MyEffect>(fixture.ctx())},` +7. Verify: ```bash cmake -S . -B build -DDEMO_BUILD_TESTS=ON cmake --build build -j4 --target test_demo_effects diff --git a/doc/EFFECT_WORKFLOW.md b/doc/EFFECT_WORKFLOW.md new file mode 100644 index 0000000..45c47b7 --- /dev/null +++ b/doc/EFFECT_WORKFLOW.md @@ -0,0 +1,228 @@ +# Effect Creation Workflow + +**Target Audience:** AI coding agents and developers + +Automated checklist for adding new visual effects to the demo. + +--- + +## Quick Reference + +**For ShaderToy conversions:** Use `tools/shadertoy/convert_shadertoy.py` then follow steps 3-8 below. + +**For custom effects:** Follow all steps 1-8. + +--- + +## Step-by-Step Workflow + +### 1. Create Effect Files + +**Location:** +- Header: `src/gpu/effects/<effect_name>_effect.h` +- Implementation: `src/gpu/effects/<effect_name>_effect.cc` +- Shader: `workspaces/main/shaders/<effect_name>.wgsl` + +**Naming Convention:** +- Class name: `<EffectName>Effect` (e.g., `TunnelEffect`, `PlasmaEffect`) +- Files: `<effect_name>_effect.*` (snake_case) + +**Base Class:** +- Post-process effects: inherit from `PostProcessEffect` +- Scene effects: inherit from `Effect` + +**Template:** See `tools/shadertoy/template.*` or use `convert_shadertoy.py` + +### 2. Add Shader to Assets + +**File:** `workspaces/main/assets.txt` + +**Format:** +``` +SHADER_<UPPER_SNAKE_NAME>, NONE, shaders/<effect_name>.wgsl, "Effect description" +``` + +**Example:** +``` +SHADER_TUNNEL, NONE, shaders/tunnel.wgsl, "Tunnel effect shader" +``` + +**Asset ID:** Will be `AssetId::ASSET_SHADER_<UPPER_SNAKE_NAME>` in C++ + +### 3. Add to CMakeLists.txt + +**File:** `CMakeLists.txt` + +**Action:** Add `src/gpu/effects/<effect_name>_effect.cc` to **BOTH** GPU_SOURCES sections: +- Headless mode section (around line 141-167) +- Normal mode section (around line 171-197) + +**Location:** After similar effects (post-process with post-process, scene with scene) + +**Example:** +```cmake +# In headless section (line ~152): + src/gpu/effects/solarize_effect.cc + src/gpu/effects/tunnel_effect.cc # <-- Add here + src/gpu/effects/chroma_aberration_effect.cc + +# In normal section (line ~183): + src/gpu/effects/solarize_effect.cc + src/gpu/effects/tunnel_effect.cc # <-- Add here + src/gpu/effects/chroma_aberration_effect.cc +``` + +### 4. Include in demo_effects.h + +**File:** `src/gpu/demo_effects.h` + +**Action:** Add include directive: +```cpp +#include "gpu/effects/<effect_name>_effect.h" +``` + +**Location:** Alphabetically with other effect includes + +### 5. Add to Timeline + +**File:** `workspaces/main/timeline.seq` + +**Format:** +``` +SEQUENCE <start_time> <priority> + EFFECT <+|=|-> <EffectName>Effect <local_start> <local_end> [params...] +``` + +**Priority Modifiers (REQUIRED):** +- `+` : Increment priority +- `=` : Same priority as previous effect +- `-` : Decrement priority (for backgrounds) + +**Example:** +``` +SEQUENCE 0.0 0 + EFFECT + TunnelEffect 0.0 10.0 +``` + +**Common Mistake:** Missing priority modifier (`+`, `=`, `-`) after EFFECT keyword + +### 6. Update Tests + +**File:** `src/tests/gpu/test_demo_effects.cc` + +**Action:** Add effect to appropriate list: + +**Post-Process Effects (lines 80-93):** +```cpp +{"TunnelEffect", std::make_shared<TunnelEffect>(fixture.ctx())}, +``` + +**Scene Effects (lines 125-137):** +```cpp +{"TunnelEffect", std::make_shared<TunnelEffect>(fixture.ctx())}, +``` + +**3D Effects:** If requires Renderer3D, add to `requires_3d` check (line 148-151) + +### 7. Build and Test + +```bash +# Full build +cmake --build build -j4 + +# Run effect tests +cmake -S . -B build -DDEMO_BUILD_TESTS=ON +cmake --build build -j4 --target test_demo_effects +cd build && ./test_demo_effects + +# Run all tests +cd build && ctest +``` + +### 8. Verify + +**Checklist:** +- [ ] Effect compiles without errors +- [ ] Effect appears in timeline +- [ ] test_demo_effects passes +- [ ] Effect renders correctly: `./build/demo64k` +- [ ] No shader compilation errors +- [ ] Follows naming conventions + +--- + +## Common Issues + +### Build Error: "no member named 'ASSET_..._SHADER'" + +**Cause:** Shader not in assets.txt or wrong asset ID name + +**Fix:** +1. Check `workspaces/main/assets.txt` has shader entry +2. Asset ID is `ASSET_` + uppercase entry name (e.g., `SHADER_TUNNEL` → `ASSET_SHADER_TUNNEL`) + +### Build Error: "undefined symbol for architecture" + +**Cause:** Effect not in CMakeLists.txt GPU_SOURCES + +**Fix:** Add `.cc` file to BOTH sections (headless and normal mode) + +### Timeline Parse Error: "Expected '+', '=', or '-'" + +**Cause:** Missing priority modifier after EFFECT keyword + +**Fix:** Use `EFFECT +`, `EFFECT =`, or `EFFECT -` (never just `EFFECT`) + +### Test Failure: Effect not in test list + +**Cause:** Effect not added to test_demo_effects.cc + +**Fix:** Add to `post_process_effects` or `scene_effects` list + +--- + +## Automation Script Example + +```bash +#!/bin/bash +# Example automation for AI agents + +EFFECT_NAME="$1" # CamelCase (e.g., "Tunnel") +SNAKE_NAME=$(echo "$EFFECT_NAME" | sed 's/\([A-Z]\)/_\L\1/g' | sed 's/^_//') +UPPER_NAME=$(echo "$SNAKE_NAME" | tr '[:lower:]' '[:upper:]') + +echo "Creating effect: $EFFECT_NAME" +echo " Snake case: $SNAKE_NAME" +echo " Upper case: $UPPER_NAME" + +# 1. Generate files (if using ShaderToy) +# ./tools/shadertoy/convert_shadertoy.py shader.txt "$EFFECT_NAME" + +# 2. Add to assets.txt +echo "SHADER_${UPPER_NAME}, NONE, shaders/${SNAKE_NAME}.wgsl, \"${EFFECT_NAME} effect\"" \ + >> workspaces/main/assets.txt + +# 3. Add to CMakeLists.txt (both sections) +# Use Edit tool to add to both GPU_SOURCES sections + +# 4. Add include to demo_effects.h +# Use Edit tool to add #include line + +# 5. Add to timeline.seq +# Use Edit tool to add EFFECT line with priority modifier + +# 6. Add to test file +# Use Edit tool to add to appropriate test list + +# 7. Build +cmake --build build -j4 +``` + +--- + +## See Also + +- `tools/shadertoy/README.md` - ShaderToy conversion guide +- `doc/SEQUENCE.md` - Timeline format documentation +- `doc/CONTRIBUTING.md` - General contribution guidelines +- `src/gpu/effects/` - Existing effect examples diff --git a/doc/HOWTO.md b/doc/HOWTO.md index bdc0214..5ea6afd 100644 --- a/doc/HOWTO.md +++ b/doc/HOWTO.md @@ -86,12 +86,34 @@ make run_util_tests # Utility tests --- +## Training + +```bash +./training/train_cnn.py --layers 3 --kernel_sizes 3,5,3 --epochs 10000 --batch_size 8 --input training/input/ --target training/output/ --checkpoint-every 1000 +``` + +Generate shaders from checkpoint: +```bash +./training/train_cnn.py --export-only training/checkpoints/checkpoint_epoch_7000.pth +``` + +Generate ground truth (for shader validation): +```bash +./training/train_cnn.py --infer input.png --export-only checkpoints/checkpoint_epoch_7000.pth --output ground_truth.png +``` + +**Note:** Kernel sizes must match shader functions: +- 3×3 kernel → `cnn_conv3x3_7to4` (36 weights: 9 pos × 4 channels) +- 5×5 kernel → `cnn_conv5x5_7to4` (100 weights: 25 pos × 4 channels) + +--- + ## Timeline Edit `workspaces/main/timeline.seq`: ```text SEQUENCE 0.0 0 - EFFECT HeptagonEffect 0.0 60.0 0 + EFFECT + HeptagonEffect 0.0 60.0 0 ``` Rebuild to apply. See `doc/SEQUENCE.md`. diff --git a/doc/RECIPE.md b/doc/RECIPE.md index 6404391..d563027 100644 --- a/doc/RECIPE.md +++ b/doc/RECIPE.md @@ -157,8 +157,8 @@ void MyEffect::render(WGPUTextureView prev, WGPUTextureView target, **.seq syntax:** ``` -EFFECT MyEffect 0.0 10.0 strength=0.5 speed=3.0 -EFFECT MyEffect 10.0 20.0 strength=2.0 # speed keeps previous value +EFFECT + MyEffect 0.0 10.0 strength=0.5 speed=3.0 +EFFECT = MyEffect 10.0 20.0 strength=2.0 # speed keeps previous value ``` **Example:** `src/gpu/effects/flash_effect.cc`, `src/gpu/effects/chroma_aberration_effect.cc` diff --git a/src/gpu/bind_group_builder.h b/src/gpu/bind_group_builder.h new file mode 100644 index 0000000..d63f6e2 --- /dev/null +++ b/src/gpu/bind_group_builder.h @@ -0,0 +1,111 @@ +// WGPU bind group builder - reduces boilerplate for bind group creation +#pragma once +#include <vector> + +// Forward declarations (users must include gpu.h) +struct WGPUBindGroupLayoutEntry; +struct WGPUBindGroupEntry; +struct WGPUDeviceImpl; +typedef struct WGPUDeviceImpl* WGPUDevice; +struct WGPUBindGroupLayoutImpl; +typedef struct WGPUBindGroupLayoutImpl* WGPUBindGroupLayout; +struct WGPUBindGroupImpl; +typedef struct WGPUBindGroupImpl* WGPUBindGroup; +struct WGPUBufferImpl; +typedef struct WGPUBufferImpl* WGPUBuffer; +struct WGPUTextureViewImpl; +typedef struct WGPUTextureViewImpl* WGPUTextureView; +struct WGPUSamplerImpl; +typedef struct WGPUSamplerImpl* WGPUSampler; +typedef uint32_t WGPUShaderStageFlags; + +#include "platform/platform.h" + +class BindGroupLayoutBuilder { + std::vector<WGPUBindGroupLayoutEntry> entries_; + +public: + BindGroupLayoutBuilder& uniform(uint32_t binding, WGPUShaderStageFlags vis, size_t min_size = 0) { + WGPUBindGroupLayoutEntry e{}; + e.binding = binding; + e.visibility = vis; + e.buffer.type = WGPUBufferBindingType_Uniform; + if (min_size) e.buffer.minBindingSize = min_size; + entries_.push_back(e); + return *this; + } + + BindGroupLayoutBuilder& storage(uint32_t binding, WGPUShaderStageFlags vis, size_t min_size = 0) { + WGPUBindGroupLayoutEntry e{}; + e.binding = binding; + e.visibility = vis; + e.buffer.type = WGPUBufferBindingType_ReadOnlyStorage; + if (min_size) e.buffer.minBindingSize = min_size; + entries_.push_back(e); + return *this; + } + + BindGroupLayoutBuilder& texture(uint32_t binding, WGPUShaderStageFlags vis) { + WGPUBindGroupLayoutEntry e{}; + e.binding = binding; + e.visibility = vis; + e.texture.sampleType = WGPUTextureSampleType_Float; + e.texture.viewDimension = WGPUTextureViewDimension_2D; + entries_.push_back(e); + return *this; + } + + BindGroupLayoutBuilder& sampler(uint32_t binding, WGPUShaderStageFlags vis) { + WGPUBindGroupLayoutEntry e{}; + e.binding = binding; + e.visibility = vis; + e.sampler.type = WGPUSamplerBindingType_Filtering; + entries_.push_back(e); + return *this; + } + + WGPUBindGroupLayout build(WGPUDevice device) { + WGPUBindGroupLayoutDescriptor desc{}; + desc.entryCount = entries_.size(); + desc.entries = entries_.data(); + return wgpuDeviceCreateBindGroupLayout(device, &desc); + } +}; + +class BindGroupBuilder { + std::vector<WGPUBindGroupEntry> entries_; + +public: + BindGroupBuilder& buffer(uint32_t binding, WGPUBuffer buf, size_t size) { + WGPUBindGroupEntry e{}; + e.binding = binding; + e.buffer = buf; + e.size = size; + entries_.push_back(e); + return *this; + } + + BindGroupBuilder& texture(uint32_t binding, WGPUTextureView view) { + WGPUBindGroupEntry e{}; + e.binding = binding; + e.textureView = view; + entries_.push_back(e); + return *this; + } + + BindGroupBuilder& sampler(uint32_t binding, WGPUSampler samp) { + WGPUBindGroupEntry e{}; + e.binding = binding; + e.sampler = samp; + entries_.push_back(e); + return *this; + } + + WGPUBindGroup build(WGPUDevice device, WGPUBindGroupLayout layout) { + WGPUBindGroupDescriptor desc{}; + desc.layout = layout; + desc.entryCount = entries_.size(); + desc.entries = entries_.data(); + return wgpuDeviceCreateBindGroup(device, &desc); + } +}; diff --git a/src/gpu/demo_effects.h b/src/gpu/demo_effects.h index 72b3f65..1ccf930 100644 --- a/src/gpu/demo_effects.h +++ b/src/gpu/demo_effects.h @@ -15,6 +15,7 @@ #include "gpu/effects/theme_modulation_effect.h" // ThemeModulationEffect with full definition #include "gpu/effects/hybrid_3d_effect.h" #include "gpu/effects/flash_cube_effect.h" +#include "gpu/effects/scene1_effect.h" #include "gpu/gpu.h" #include "gpu/texture_manager.h" #include "gpu/uniform_helper.h" diff --git a/src/gpu/effects/cnn_effect.cc b/src/gpu/effects/cnn_effect.cc index 7107bea..d74187c 100644 --- a/src/gpu/effects/cnn_effect.cc +++ b/src/gpu/effects/cnn_effect.cc @@ -6,70 +6,30 @@ #include "gpu/effects/shaders.h" #include "gpu/effects/shader_composer.h" #include "gpu/effect.h" +#include "gpu/bind_group_builder.h" +#include "gpu/sampler_cache.h" +#include "gpu/pipeline_builder.h" // Create custom pipeline with 5 bindings (includes original texture) static WGPURenderPipeline create_cnn_pipeline(WGPUDevice device, WGPUTextureFormat format, const char* shader_code) { - std::string composed_shader = ShaderComposer::Get().Compose({}, shader_code); + WGPUBindGroupLayout bgl = BindGroupLayoutBuilder() + .sampler(0, WGPUShaderStage_Fragment) + .texture(1, WGPUShaderStage_Fragment) + .uniform(2, WGPUShaderStage_Vertex | WGPUShaderStage_Fragment) + .uniform(3, WGPUShaderStage_Fragment) + .texture(4, WGPUShaderStage_Fragment) + .build(device); - WGPUShaderModuleDescriptor shader_desc = {}; - WGPUShaderSourceWGSL wgsl_src = {}; - wgsl_src.chain.sType = WGPUSType_ShaderSourceWGSL; - wgsl_src.code = str_view(composed_shader.c_str()); - shader_desc.nextInChain = &wgsl_src.chain; - WGPUShaderModule shader_module = - wgpuDeviceCreateShaderModule(device, &shader_desc); + WGPURenderPipeline pipeline = RenderPipelineBuilder(device) + .shader(shader_code) + .bind_group_layout(bgl) + .format(format) + .build(); - WGPUBindGroupLayoutEntry bgl_entries[5] = {}; - bgl_entries[0].binding = 0; // sampler - bgl_entries[0].visibility = WGPUShaderStage_Fragment; - bgl_entries[0].sampler.type = WGPUSamplerBindingType_Filtering; - bgl_entries[1].binding = 1; // input texture - bgl_entries[1].visibility = WGPUShaderStage_Fragment; - bgl_entries[1].texture.sampleType = WGPUTextureSampleType_Float; - bgl_entries[1].texture.viewDimension = WGPUTextureViewDimension_2D; - bgl_entries[2].binding = 2; // uniforms - bgl_entries[2].visibility = WGPUShaderStage_Vertex | WGPUShaderStage_Fragment; - bgl_entries[2].buffer.type = WGPUBufferBindingType_Uniform; - bgl_entries[3].binding = 3; // effect params - bgl_entries[3].visibility = WGPUShaderStage_Fragment; - bgl_entries[3].buffer.type = WGPUBufferBindingType_Uniform; - bgl_entries[4].binding = 4; // original texture - bgl_entries[4].visibility = WGPUShaderStage_Fragment; - bgl_entries[4].texture.sampleType = WGPUTextureSampleType_Float; - bgl_entries[4].texture.viewDimension = WGPUTextureViewDimension_2D; - - WGPUBindGroupLayoutDescriptor bgl_desc = {}; - bgl_desc.entryCount = 5; - bgl_desc.entries = bgl_entries; - WGPUBindGroupLayout bgl = wgpuDeviceCreateBindGroupLayout(device, &bgl_desc); - - WGPUPipelineLayoutDescriptor pl_desc = {}; - pl_desc.bindGroupLayoutCount = 1; - pl_desc.bindGroupLayouts = &bgl; - WGPUPipelineLayout pl = wgpuDeviceCreatePipelineLayout(device, &pl_desc); - - WGPUColorTargetState color_target = {}; - color_target.format = format; - color_target.writeMask = WGPUColorWriteMask_All; - - WGPUFragmentState fragment_state = {}; - fragment_state.module = shader_module; - fragment_state.entryPoint = str_view("fs_main"); - fragment_state.targetCount = 1; - fragment_state.targets = &color_target; - - WGPURenderPipelineDescriptor pipeline_desc = {}; - pipeline_desc.layout = pl; - pipeline_desc.vertex.module = shader_module; - pipeline_desc.vertex.entryPoint = str_view("vs_main"); - pipeline_desc.fragment = &fragment_state; - pipeline_desc.primitive.topology = WGPUPrimitiveTopology_TriangleList; - pipeline_desc.multisample.count = 1; - pipeline_desc.multisample.mask = 0xFFFFFFFF; - - return wgpuDeviceCreateRenderPipeline(device, &pipeline_desc); + wgpuBindGroupLayoutRelease(bgl); + return pipeline; } CNNEffect::CNNEffect(const GpuContext& ctx) @@ -137,29 +97,13 @@ void CNNEffect::update_bind_group(WGPUTextureView input_view) { wgpuBindGroupRelease(bind_group_); WGPUBindGroupLayout bgl = wgpuRenderPipelineGetBindGroupLayout(pipeline_, 0); - WGPUSamplerDescriptor sd = {}; - sd.magFilter = WGPUFilterMode_Linear; - sd.minFilter = WGPUFilterMode_Linear; - sd.maxAnisotropy = 1; - WGPUSampler sampler = wgpuDeviceCreateSampler(ctx_.device, &sd); - - WGPUBindGroupEntry bge[5] = {}; - bge[0].binding = 0; - bge[0].sampler = sampler; - bge[1].binding = 1; - bge[1].textureView = input_view_; - bge[2].binding = 2; - bge[2].buffer = uniforms_.get().buffer; - bge[2].size = uniforms_.get().size; - bge[3].binding = 3; - bge[3].buffer = params_buffer_.get().buffer; - bge[3].size = params_buffer_.get().size; - bge[4].binding = 4; - bge[4].textureView = original_view_ ? original_view_ : input_view_; + WGPUSampler sampler = SamplerCache::Get().get_or_create(ctx_.device, SamplerCache::linear()); - WGPUBindGroupDescriptor bgd = {}; - bgd.layout = bgl; - bgd.entryCount = 5; - bgd.entries = bge; - bind_group_ = wgpuDeviceCreateBindGroup(ctx_.device, &bgd); + bind_group_ = BindGroupBuilder() + .sampler(0, sampler) + .texture(1, input_view_) + .buffer(2, uniforms_.get().buffer, uniforms_.get().size) + .buffer(3, params_buffer_.get().buffer, params_buffer_.get().size) + .texture(4, original_view_ ? original_view_ : input_view_) + .build(ctx_.device, bgl); } diff --git a/src/gpu/effects/post_process_helper.cc b/src/gpu/effects/post_process_helper.cc index e99467f..0c339c7 100644 --- a/src/gpu/effects/post_process_helper.cc +++ b/src/gpu/effects/post_process_helper.cc @@ -5,69 +5,30 @@ #include "../demo_effects.h" #include "gpu/gpu.h" #include "gpu/effects/shader_composer.h" +#include "gpu/bind_group_builder.h" +#include "gpu/sampler_cache.h" +#include "gpu/pipeline_builder.h" #include <cstring> // Helper to create a standard post-processing pipeline WGPURenderPipeline create_post_process_pipeline(WGPUDevice device, WGPUTextureFormat format, const char* shader_code) { - std::string composed_shader = ShaderComposer::Get().Compose({}, shader_code); + WGPUBindGroupLayout bgl = BindGroupLayoutBuilder() + .sampler(PP_BINDING_SAMPLER, WGPUShaderStage_Fragment) + .texture(PP_BINDING_TEXTURE, WGPUShaderStage_Fragment) + .uniform(PP_BINDING_UNIFORMS, WGPUShaderStage_Vertex | WGPUShaderStage_Fragment) + .uniform(PP_BINDING_EFFECT_PARAMS, WGPUShaderStage_Fragment) + .build(device); - WGPUShaderModuleDescriptor shader_desc = {}; - WGPUShaderSourceWGSL wgsl_src = {}; - wgsl_src.chain.sType = WGPUSType_ShaderSourceWGSL; - wgsl_src.code = str_view(composed_shader.c_str()); - shader_desc.nextInChain = &wgsl_src.chain; - WGPUShaderModule shader_module = - wgpuDeviceCreateShaderModule(device, &shader_desc); + WGPURenderPipeline pipeline = RenderPipelineBuilder(device) + .shader(shader_code) + .bind_group_layout(bgl) + .format(format) + .build(); - WGPUBindGroupLayoutEntry bgl_entries[4] = {}; - bgl_entries[0].binding = PP_BINDING_SAMPLER; - bgl_entries[0].visibility = WGPUShaderStage_Fragment; - bgl_entries[0].sampler.type = WGPUSamplerBindingType_Filtering; - bgl_entries[1].binding = PP_BINDING_TEXTURE; - bgl_entries[1].visibility = WGPUShaderStage_Fragment; - bgl_entries[1].texture.sampleType = WGPUTextureSampleType_Float; - bgl_entries[1].texture.viewDimension = WGPUTextureViewDimension_2D; - bgl_entries[2].binding = PP_BINDING_UNIFORMS; - bgl_entries[2].visibility = WGPUShaderStage_Vertex | WGPUShaderStage_Fragment; - bgl_entries[2].buffer.type = WGPUBufferBindingType_Uniform; - - // Add an entry for effect-specific parameters - bgl_entries[3].binding = PP_BINDING_EFFECT_PARAMS; - bgl_entries[3].visibility = WGPUShaderStage_Fragment; - bgl_entries[3].buffer.type = WGPUBufferBindingType_Uniform; - - WGPUBindGroupLayoutDescriptor bgl_desc = {}; - bgl_desc.entryCount = 4; - bgl_desc.entries = bgl_entries; - WGPUBindGroupLayout bgl = wgpuDeviceCreateBindGroupLayout(device, &bgl_desc); - - WGPUPipelineLayoutDescriptor pl_desc = {}; - pl_desc.bindGroupLayoutCount = 1; - pl_desc.bindGroupLayouts = &bgl; - WGPUPipelineLayout pl = wgpuDeviceCreatePipelineLayout(device, &pl_desc); - - WGPUColorTargetState color_target = {}; - color_target.format = format; - color_target.writeMask = WGPUColorWriteMask_All; - - WGPUFragmentState fragment_state = {}; - fragment_state.module = shader_module; - fragment_state.entryPoint = str_view("fs_main"); - fragment_state.targetCount = 1; - fragment_state.targets = &color_target; - - WGPURenderPipelineDescriptor pipeline_desc = {}; - pipeline_desc.layout = pl; - pipeline_desc.vertex.module = shader_module; - pipeline_desc.vertex.entryPoint = str_view("vs_main"); - pipeline_desc.fragment = &fragment_state; - pipeline_desc.primitive.topology = WGPUPrimitiveTopology_TriangleList; - pipeline_desc.multisample.count = 1; - pipeline_desc.multisample.mask = 0xFFFFFFFF; - - return wgpuDeviceCreateRenderPipeline(device, &pipeline_desc); + wgpuBindGroupLayoutRelease(bgl); + return pipeline; } // --- PostProcess Implementation Helper --- @@ -82,25 +43,16 @@ void pp_update_bind_group(WGPUDevice device, WGPURenderPipeline pipeline, if (*bind_group) wgpuBindGroupRelease(*bind_group); + WGPUBindGroupLayout bgl = wgpuRenderPipelineGetBindGroupLayout(pipeline, 0); - WGPUSamplerDescriptor sd = {}; - sd.magFilter = WGPUFilterMode_Linear; - sd.minFilter = WGPUFilterMode_Linear; - sd.maxAnisotropy = 1; - WGPUSampler sampler = wgpuDeviceCreateSampler(device, &sd); - WGPUBindGroupEntry bge[4] = {}; - bge[0].binding = PP_BINDING_SAMPLER; - bge[0].sampler = sampler; - bge[1].binding = PP_BINDING_TEXTURE; - bge[1].textureView = input_view; - bge[2].binding = PP_BINDING_UNIFORMS; - bge[2].buffer = uniforms.buffer; - bge[2].size = uniforms.size; - bge[3].binding = PP_BINDING_EFFECT_PARAMS; - bge[3].buffer = - effect_params.buffer ? effect_params.buffer : g_dummy_buffer.buffer; - bge[3].size = effect_params.buffer ? effect_params.size : g_dummy_buffer.size; - WGPUBindGroupDescriptor bgd = { - .layout = bgl, .entryCount = 4, .entries = bge}; - *bind_group = wgpuDeviceCreateBindGroup(device, &bgd); + WGPUSampler sampler = SamplerCache::Get().get_or_create(device, SamplerCache::linear()); + + *bind_group = BindGroupBuilder() + .sampler(PP_BINDING_SAMPLER, sampler) + .texture(PP_BINDING_TEXTURE, input_view) + .buffer(PP_BINDING_UNIFORMS, uniforms.buffer, uniforms.size) + .buffer(PP_BINDING_EFFECT_PARAMS, + effect_params.buffer ? effect_params.buffer : g_dummy_buffer.buffer, + effect_params.buffer ? effect_params.size : g_dummy_buffer.size) + .build(device, bgl); } diff --git a/src/gpu/effects/rotating_cube_effect.cc b/src/gpu/effects/rotating_cube_effect.cc index 8d1f05a..da973e5 100644 --- a/src/gpu/effects/rotating_cube_effect.cc +++ b/src/gpu/effects/rotating_cube_effect.cc @@ -5,16 +5,14 @@ #include "gpu/effects/rotating_cube_effect.h" #include "generated/assets.h" #include "gpu/effects/shader_composer.h" +#include "gpu/sampler_cache.h" #include "util/asset_manager_utils.h" RotatingCubeEffect::RotatingCubeEffect(const GpuContext& ctx) : Effect(ctx) { } RotatingCubeEffect::~RotatingCubeEffect() { - if (mask_sampler_) - wgpuSamplerRelease(mask_sampler_); - if (noise_sampler_) - wgpuSamplerRelease(noise_sampler_); + // Samplers owned by SamplerCache - don't release if (noise_view_) wgpuTextureViewRelease(noise_view_); if (noise_texture_) @@ -49,21 +47,8 @@ void RotatingCubeEffect::init(MainSequence* demo) { noise_texture_ = wgpuDeviceCreateTexture(ctx_.device, &tex_desc); noise_view_ = wgpuTextureCreateView(noise_texture_, nullptr); - WGPUSamplerDescriptor sampler_desc = {}; - sampler_desc.addressModeU = WGPUAddressMode_Repeat; - sampler_desc.addressModeV = WGPUAddressMode_Repeat; - sampler_desc.magFilter = WGPUFilterMode_Linear; - sampler_desc.minFilter = WGPUFilterMode_Linear; - sampler_desc.maxAnisotropy = 1; - noise_sampler_ = wgpuDeviceCreateSampler(ctx_.device, &sampler_desc); - - WGPUSamplerDescriptor mask_sampler_desc = {}; - mask_sampler_desc.addressModeU = WGPUAddressMode_ClampToEdge; - mask_sampler_desc.addressModeV = WGPUAddressMode_ClampToEdge; - mask_sampler_desc.magFilter = WGPUFilterMode_Linear; - mask_sampler_desc.minFilter = WGPUFilterMode_Linear; - mask_sampler_desc.maxAnisotropy = 1; - mask_sampler_ = wgpuDeviceCreateSampler(ctx_.device, &mask_sampler_desc); + noise_sampler_ = SamplerCache::Get().get_or_create(ctx_.device, SamplerCache::linear()); + mask_sampler_ = SamplerCache::Get().get_or_create(ctx_.device, SamplerCache::clamp()); size_t shader_size; const char* shader_code = diff --git a/src/gpu/effects/scene1_effect.cc b/src/gpu/effects/scene1_effect.cc new file mode 100644 index 0000000..a6733b7 --- /dev/null +++ b/src/gpu/effects/scene1_effect.cc @@ -0,0 +1,28 @@ +// This file is part of the 64k demo project. +// Scene1 effect - ShaderToy conversion (raymarching scene) + +#include "gpu/demo_effects.h" +#include "gpu/gpu.h" + +Scene1Effect::Scene1Effect(const GpuContext& ctx) : Effect(ctx) { + ResourceBinding bindings[] = {{uniforms_.get(), WGPUBufferBindingType_Uniform}}; + pass_ = gpu_create_render_pass(ctx_.device, ctx_.format, scene1_shader_wgsl, + bindings, 1); + pass_.vertex_count = 3; +} + +void Scene1Effect::render(WGPURenderPassEncoder pass, float t, float b, + float i, float a) { + CommonPostProcessUniforms u = { + .resolution = {(float)width_, (float)height_}, + ._pad = {0.0f, 0.0f}, + .aspect_ratio = a, + .time = t, + .beat = b, + .audio_intensity = i, + }; + uniforms_.update(ctx_.queue, u); + wgpuRenderPassEncoderSetPipeline(pass, pass_.pipeline); + wgpuRenderPassEncoderSetBindGroup(pass, 0, pass_.bind_group, 0, nullptr); + wgpuRenderPassEncoderDraw(pass, pass_.vertex_count, 1, 0, 0); +} diff --git a/src/gpu/effects/scene1_effect.h b/src/gpu/effects/scene1_effect.h new file mode 100644 index 0000000..dc5c747 --- /dev/null +++ b/src/gpu/effects/scene1_effect.h @@ -0,0 +1,19 @@ +// This file is part of the 64k demo project. +// Scene1 effect - ShaderToy conversion (raymarching scene) + +#ifndef SCENE1_EFFECT_H_ +#define SCENE1_EFFECT_H_ + +#include "gpu/effect.h" + +class Scene1Effect : public Effect { + public: + Scene1Effect(const GpuContext& ctx); + void render(WGPURenderPassEncoder pass, float time, float beat, + float intensity, float aspect_ratio) override; + + private: + RenderPass pass_; +}; + +#endif /* SCENE1_EFFECT_H_ */ diff --git a/src/gpu/effects/shaders.cc b/src/gpu/effects/shaders.cc index 6559bf5..5f78298 100644 --- a/src/gpu/effects/shaders.cc +++ b/src/gpu/effects/shaders.cc @@ -98,6 +98,10 @@ const char* solarize_shader_wgsl = SafeGetAsset(AssetId::ASSET_SHADER_SOLARIZE); +const char* scene1_shader_wgsl = + + SafeGetAsset(AssetId::ASSET_SHADER_SCENE1); + const char* distort_shader_wgsl = SafeGetAsset(AssetId::ASSET_SHADER_DISTORT); diff --git a/src/gpu/effects/shaders.h b/src/gpu/effects/shaders.h index 7acc2a6..03fa48c 100644 --- a/src/gpu/effects/shaders.h +++ b/src/gpu/effects/shaders.h @@ -15,6 +15,7 @@ extern const char* ellipse_shader_wgsl; extern const char* particle_spray_compute_wgsl; extern const char* gaussian_blur_shader_wgsl; extern const char* solarize_shader_wgsl; +extern const char* scene1_shader_wgsl; extern const char* distort_shader_wgsl; extern const char* chroma_aberration_shader_wgsl; extern const char* vignette_shader_wgsl; diff --git a/src/gpu/pipeline_builder.h b/src/gpu/pipeline_builder.h new file mode 100644 index 0000000..06b4ceb --- /dev/null +++ b/src/gpu/pipeline_builder.h @@ -0,0 +1,109 @@ +// WGPU render pipeline builder - reduces pipeline creation boilerplate +#pragma once +#include <vector> +#include <string> + +// Forward declarations (users must include gpu.h and shader_composer.h) +struct WGPUDeviceImpl; +typedef struct WGPUDeviceImpl* WGPUDevice; +struct WGPUBindGroupLayoutImpl; +typedef struct WGPUBindGroupLayoutImpl* WGPUBindGroupLayout; +struct WGPURenderPipelineImpl; +typedef struct WGPURenderPipelineImpl* WGPURenderPipeline; +struct WGPUShaderModuleImpl; +typedef struct WGPUShaderModuleImpl* WGPUShaderModule; + +#include "platform/platform.h" +#include "gpu/effects/shader_composer.h" + +class RenderPipelineBuilder { + WGPUDevice device_; + WGPURenderPipelineDescriptor desc_{}; + WGPUColorTargetState color_{}; + WGPUBlendState blend_{}; + WGPUDepthStencilState depth_{}; + std::vector<WGPUBindGroupLayout> layouts_; + std::string shader_text_; + WGPUShaderModule shader_module_ = nullptr; + bool has_blend_ = false; + bool has_depth_ = false; + +public: + explicit RenderPipelineBuilder(WGPUDevice device) : device_(device) { + desc_.primitive.topology = WGPUPrimitiveTopology_TriangleList; + desc_.primitive.cullMode = WGPUCullMode_None; + desc_.multisample.count = 1; + desc_.multisample.mask = 0xFFFFFFFF; + } + + RenderPipelineBuilder& shader(const char* wgsl, bool compose = true) { + shader_text_ = compose ? ShaderComposer::Get().Compose({}, wgsl) : wgsl; + WGPUShaderSourceWGSL wgsl_src{}; + wgsl_src.chain.sType = WGPUSType_ShaderSourceWGSL; + wgsl_src.code = str_view(shader_text_.c_str()); + WGPUShaderModuleDescriptor shader_desc{}; + shader_desc.nextInChain = &wgsl_src.chain; + shader_module_ = wgpuDeviceCreateShaderModule(device_, &shader_desc); + desc_.vertex.module = shader_module_; + desc_.vertex.entryPoint = str_view("vs_main"); + return *this; + } + + RenderPipelineBuilder& bind_group_layout(WGPUBindGroupLayout layout) { + layouts_.push_back(layout); + return *this; + } + + RenderPipelineBuilder& format(WGPUTextureFormat fmt) { + color_.format = fmt; + return *this; + } + + RenderPipelineBuilder& blend_alpha() { + has_blend_ = true; + blend_.color.operation = WGPUBlendOperation_Add; + blend_.color.srcFactor = WGPUBlendFactor_SrcAlpha; + blend_.color.dstFactor = WGPUBlendFactor_OneMinusSrcAlpha; + blend_.alpha.operation = WGPUBlendOperation_Add; + blend_.alpha.srcFactor = WGPUBlendFactor_One; + blend_.alpha.dstFactor = WGPUBlendFactor_OneMinusSrcAlpha; + return *this; + } + + RenderPipelineBuilder& depth(WGPUTextureFormat depth_fmt = WGPUTextureFormat_Depth24Plus) { + has_depth_ = true; + depth_.format = depth_fmt; + depth_.depthWriteEnabled = WGPUOptionalBool_True; + depth_.depthCompare = WGPUCompareFunction_Less; + return *this; + } + + RenderPipelineBuilder& cull_back() { + desc_.primitive.cullMode = WGPUCullMode_Back; + return *this; + } + + WGPURenderPipeline build() { + color_.writeMask = WGPUColorWriteMask_All; + if (has_blend_) color_.blend = &blend_; + + WGPUFragmentState fragment{}; + fragment.module = shader_module_; + fragment.entryPoint = str_view("fs_main"); + fragment.targetCount = 1; + fragment.targets = &color_; + + WGPUPipelineLayoutDescriptor pl_desc{}; + pl_desc.bindGroupLayoutCount = layouts_.size(); + pl_desc.bindGroupLayouts = layouts_.data(); + WGPUPipelineLayout layout = wgpuDeviceCreatePipelineLayout(device_, &pl_desc); + + desc_.layout = layout; + desc_.fragment = &fragment; + if (has_depth_) desc_.depthStencil = &depth_; + + WGPURenderPipeline pipeline = wgpuDeviceCreateRenderPipeline(device_, &desc_); + wgpuPipelineLayoutRelease(layout); + return pipeline; + } +}; diff --git a/src/gpu/sampler_cache.h b/src/gpu/sampler_cache.h new file mode 100644 index 0000000..0f012a8 --- /dev/null +++ b/src/gpu/sampler_cache.h @@ -0,0 +1,61 @@ +// Sampler cache - deduplicates samplers across effects +#pragma once +#include <map> + +// Forward declarations (users must include gpu.h) +struct WGPUDeviceImpl; +typedef struct WGPUDeviceImpl* WGPUDevice; +struct WGPUSamplerImpl; +typedef struct WGPUSamplerImpl* WGPUSampler; + +#include "platform/platform.h" + +struct SamplerSpec { + WGPUAddressMode u, v; + WGPUFilterMode mag, min; + uint16_t anisotropy; + + bool operator<(const SamplerSpec& o) const { + if (u != o.u) return u < o.u; + if (v != o.v) return v < o.v; + if (mag != o.mag) return mag < o.mag; + if (min != o.min) return min < o.min; + return anisotropy < o.anisotropy; + } +}; + +class SamplerCache { + std::map<SamplerSpec, WGPUSampler> cache_; + SamplerCache() = default; + +public: + static SamplerCache& Get() { + static SamplerCache instance; + return instance; + } + + WGPUSampler get_or_create(WGPUDevice device, const SamplerSpec& spec) { + auto it = cache_.find(spec); + if (it != cache_.end()) return it->second; + + WGPUSamplerDescriptor desc{}; + desc.addressModeU = spec.u; + desc.addressModeV = spec.v; + desc.magFilter = spec.mag; + desc.minFilter = spec.min; + desc.maxAnisotropy = spec.anisotropy; + WGPUSampler sampler = wgpuDeviceCreateSampler(device, &desc); + cache_[spec] = sampler; + return sampler; + } + + // Common presets + static SamplerSpec linear() { + return {WGPUAddressMode_Repeat, WGPUAddressMode_Repeat, + WGPUFilterMode_Linear, WGPUFilterMode_Linear, 1}; + } + static SamplerSpec clamp() { + return {WGPUAddressMode_ClampToEdge, WGPUAddressMode_ClampToEdge, + WGPUFilterMode_Linear, WGPUFilterMode_Linear, 1}; + } +}; diff --git a/src/tests/gpu/test_demo_effects.cc b/src/tests/gpu/test_demo_effects.cc index 619b9c9..01e6678 100644 --- a/src/tests/gpu/test_demo_effects.cc +++ b/src/tests/gpu/test_demo_effects.cc @@ -134,6 +134,7 @@ static void test_scene_effects() { {"CircleMaskEffect", std::make_shared<CircleMaskEffect>(fixture.ctx())}, {"RotatingCubeEffect", std::make_shared<RotatingCubeEffect>(fixture.ctx())}, + {"Scene1Effect", std::make_shared<Scene1Effect>(fixture.ctx())}, }; int passed = 0; diff --git a/tools/shadertoy/README.md b/tools/shadertoy/README.md new file mode 100644 index 0000000..283a65f --- /dev/null +++ b/tools/shadertoy/README.md @@ -0,0 +1,204 @@ +# ShaderToy Conversion Guide + +Quick guide to convert ShaderToy shaders to demo effects. + +**For complete workflow:** See `doc/EFFECT_WORKFLOW.md` for full integration checklist. + +## Quick Start (Automated) + +```bash +# Save ShaderToy code to a file +cat > tunnel.txt << 'EOF' +void mainImage(out vec4 fragColor, in vec2 fragCoord) { + vec2 uv = fragCoord / iResolution.xy; + vec3 col = 0.5 + 0.5 * cos(iTime + uv.xyx + vec3(0,2,4)); + fragColor = vec4(col, 1.0); +} +EOF + +# Generate effect files +./tools/shadertoy/convert_shadertoy.py tunnel.txt Tunnel + +# Regenerate only shader (if .h/.cc already exist) +./tools/shadertoy/convert_shadertoy.py tunnel.txt Tunnel --shader-only + +# Follow printed instructions to integrate +``` + +## Files + +**Automated Script:** +- `convert_shadertoy.py` - Generates all files from ShaderToy code +- `example.txt` - Example ShaderToy shader for testing + +**Manual Templates:** +- `template.h` - Header boilerplate +- `template.cc` - Implementation boilerplate +- `template.wgsl` - Shader boilerplate with conversion notes + +## Manual Steps + +### 1. Copy Templates + +```bash +# Choose effect name (e.g., "tunnel", "plasma", "warp") +EFFECT_NAME="myeffect" + +cp tools/shadertoy/template.h src/gpu/effects/${EFFECT_NAME}_effect.h +cp tools/shadertoy/template.cc src/gpu/effects/${EFFECT_NAME}_effect.cc +cp tools/shadertoy/template.wgsl workspaces/main/shaders/${EFFECT_NAME}.wgsl +``` + +### 2. Rename Class + +In both `.h` and `.cc`: +- `ShaderToyEffect` → `MyEffectEffect` +- `SHADERTOY_EFFECT_H_` → `MYEFFECT_EFFECT_H_` +- `shadertoy_effect.h` → `myeffect_effect.h` + +### 3. Convert Shader + +In `.wgsl`, paste ShaderToy `mainImage()` into `fs_main()`: + +**ShaderToy:** +```glsl +void mainImage(out vec4 fragColor, in vec2 fragCoord) { + vec2 uv = fragCoord / iResolution.xy; + fragColor = vec4(uv, 0.5, 1.0); +} +``` + +**WGSL:** +```wgsl +@fragment fn fs_main(@builtin(position) p: vec4<f32>) -> @location(0) vec4<f32> { + let uv = p.xy / uniforms.resolution; + return vec4<f32>(uv, 0.5, 1.0); +} +``` + +### 4. Update Asset Name + +In `.cc`, update `AssetId::ASSET_SHADERTOY_SHADER` to match your shader filename: +```cpp +AssetId::ASSET_MYEFFECT_SHADER +``` + +### 5. Add to Assets + +In `workspaces/main/assets.txt`: +``` +shaders/myeffect.wgsl +``` + +### 6. Register Effect + +In `src/gpu/demo_effects.h`: +```cpp +#include "gpu/effects/myeffect_effect.h" +``` + +In `workspaces/main/timeline.seq`: +``` +SEQUENCE 0.0 0 + EFFECT + MyEffectEffect 0.0 10.0 +``` + +### 7. Update CMakeLists.txt + +Add effect source to `CMakeLists.txt` GPU_SOURCES (both headless and normal mode sections): +```cmake +src/gpu/effects/myeffect_effect.cc +``` + +### 8. Update Tests + +In `src/tests/gpu/test_demo_effects.cc`: +- Add to `post_process_effects` list (lines 80-93) if it's a post-process effect +- OR add to `scene_effects` list (lines 125-137) if it's a scene effect +- Example: `{"MyEffectEffect", std::make_shared<MyEffectEffect>(fixture.ctx())},` + +### 9. Build & Test + +```bash +cmake --build build -j4 +./build/demo64k + +# Run tests +cmake -S . -B build -DDEMO_BUILD_TESTS=ON +cmake --build build -j4 +cd build && ctest +``` + +## Example Conversion + +**Input ShaderToy:** +```glsl +void mainImage(out vec4 fragColor, in vec2 fragCoord) { + vec2 uv = fragCoord / iResolution.xy; + vec3 col = 0.5 + 0.5 * cos(iTime + uv.xyx + vec3(0,2,4)); + fragColor = vec4(col, 1.0); +} +``` + +**Generated WGSL (after script + manual fixes):** +```wgsl +@fragment fn fs_main(@builtin(position) p: vec4<f32>) -> @location(0) vec4<f32> { + let uv = p.xy / uniforms.resolution; + let col = vec3<f32>(0.5) + 0.5 * cos(uniforms.time + uv.xyx + vec3<f32>(0.0, 2.0, 4.0)); + return vec4<f32>(col, 1.0); +} +``` + +## Common Conversions + +| ShaderToy | WGSL | +|-----------|------| +| `iResolution.xy` | `uniforms.resolution` | +| `iTime` | `uniforms.time` | +| `fragCoord` | `p.xy` | +| `float` | `f32` | +| `vec2` | `vec2<f32>` | +| `mod(x, y)` | `x % y` | +| `texture(iChannel0, uv)` | `textureSample(txt, smplr, uv)` | +| `fragColor = ...` | `return ...` | +| `vec2 p = ...` | `let p = vec2<f32>(...)` or `var p: vec2<f32> = ...` | + +## Custom Parameters + +For tunable values: + +**C++ (`.h`):** +```cpp +struct MyEffectParams { + float speed; + float scale; + float _pad[2]; +}; +static_assert(sizeof(MyEffectParams) == 16, "..."); +``` + +**WGSL:** +```wgsl +struct MyEffectParams { + speed: f32, + scale: f32, + _pad0: f32, + _pad1: f32, +} +@group(0) @binding(3) var<uniform> params: MyEffectParams; +``` + +## Available Uniforms + +Always available in `uniforms: CommonUniforms`: +- `resolution: vec2<f32>` - Screen resolution +- `aspect_ratio: f32` - Width/height +- `time: f32` - Demo time (seconds) +- `beat: f32` - Music beat sync (0-1) +- `audio_intensity: f32` - Audio reactivity + +## Next Steps + +- See `doc/CONTRIBUTING.md` for commit policy +- See `doc/SEQUENCE.md` for timeline syntax +- See existing effects in `src/gpu/effects/` for examples diff --git a/tools/shadertoy/convert_shadertoy.py b/tools/shadertoy/convert_shadertoy.py new file mode 100755 index 0000000..e85f384 --- /dev/null +++ b/tools/shadertoy/convert_shadertoy.py @@ -0,0 +1,399 @@ +#!/usr/bin/env python3 +# This file is part of the 64k demo project. +# Converts ShaderToy shader to demo effect boilerplate. +# +# Usage: +# ./tools/shadertoy/convert_shadertoy.py <shader.txt> <EffectName> +# +# Example: +# ./tools/shadertoy/convert_shadertoy.py tunnel.txt Tunnel +# ./tools/shadertoy/convert_shadertoy.py tools/shadertoy/example.txt Rainbow +# +# Generates: +# - src/gpu/effects/<effect>_effect.h +# - src/gpu/effects/<effect>_effect.cc +# - workspaces/main/shaders/<effect>.wgsl +# +# The script performs basic ShaderToy→WGSL conversion: +# - Converts types (float→f32, vec2→vec2<f32>, etc.) +# - Converts uniforms (iTime→uniforms.time, etc.) +# - Extracts mainImage() body into fs_main() +# - Generates boilerplate C++ effect class +# +# Manual fixes usually needed: +# - fragColor assignments → return statements +# - Variable name conflicts (e.g., shadowing 'p') +# - Complex type inference +# - Texture channel mappings +# - Helper function signatures + +import sys +import os +import re +from pathlib import Path + +def to_snake_case(name): + """Convert CamelCase to snake_case.""" + s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name) + return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower() + +def to_upper_snake_case(name): + """Convert CamelCase to UPPER_SNAKE_CASE.""" + return to_snake_case(name).upper() + +def to_camel_case(name): + """Convert snake_case to CamelCase.""" + return ''.join(word.capitalize() for word in name.split('_')) + +def convert_shadertoy_to_wgsl(shader_code): + """Basic ShaderToy to WGSL conversion.""" + # Extract mainImage first + main_match = re.search(r'void\s+mainImage\s*\([^)]+\)\s*\{(.*)\}', shader_code, re.DOTALL) + if main_match: + main_body = main_match.group(1).strip() + helpers = shader_code[:main_match.start()] + else: + main_body = "" + helpers = shader_code + + # Replace common ShaderToy defines + conversions = [ + (r'#define\s+TIME\s+iTime', ''), + (r'#define\s+RESOLUTION\s+iResolution', ''), + (r'#define\s+PI\s+[\d.]+', 'const PI: f32 = 3.141592654;'), + (r'#define\s+TAU\s+\([^)]+\)', 'const TAU: f32 = 6.283185307;'), + (r'#define\s+ROT\(a\)\s+mat2\([^)]+\)', ''), # Will be converted to function + + # Common ShaderToy uniforms + (r'\bTIME\b', 'uniforms.time'), + (r'\biTime\b', 'uniforms.time'), + (r'\bRESOLUTION\b', 'uniforms.resolution'), + (r'\biResolution\b', 'uniforms.resolution'), + (r'\bfragCoord\b', 'p.xy'), + + # Type conversions + (r'\bfloat\b', 'f32'), + (r'\bvec2\b', 'vec2<f32>'), + (r'\bvec3\b', 'vec3<f32>'), + (r'\bvec4\b', 'vec4<f32>'), + (r'\bmat2\b', 'mat2x2<f32>'), + (r'\bmat3\b', 'mat3x3<f32>'), + (r'\bmat4\b', 'mat4x4<f32>'), + + # Function declarations (preserve return type context) + (r'\bf32\s+(\w+)\s*\(', r'fn \1('), + (r'\bvec2<f32>\s+(\w+)\s*\(', r'fn \1('), + (r'\bvec3<f32>\s+(\w+)\s*\(', r'fn \1('), + (r'\bvec4<f32>\s+(\w+)\s*\(', r'fn \1('), + (r'\bvoid\s+(\w+)\s*\(', r'fn \1('), + + # Const declarations + (r'\bconst\s+f32\s+(\w+)\s*=', r'const \1: f32 ='), + (r'\bconst\s+vec2<f32>\s+(\w+)\s*=', r'const \1 ='), + (r'\bconst\s+vec3<f32>\s+(\w+)\s*=', r'const \1 ='), + (r'\bconst\s+vec4<f32>\s+(\w+)\s*=', r'const \1 ='), + + # Function calls that need fixing + (r'\bfract\s*\(', 'fract('), + (r'\bmod\s*\(([^,]+),\s*([^)]+)\)', r'(\1 % \2)'), + ] + + converted_helpers = helpers + for pattern, replacement in conversions: + converted_helpers = re.sub(pattern, replacement, converted_helpers) + + # Convert mainImage body + converted_main = main_body + for pattern, replacement in conversions: + converted_main = re.sub(pattern, replacement, converted_main) + + # Fix fragColor assignments -> returns + converted_main = re.sub(r'\bfragColor\s*=\s*([^;]+);', r'return \1;', converted_main) + + # Indent main body + indented_main = '\n'.join(' ' + line if line.strip() else '' for line in converted_main.split('\n')) + + # Build fragment function with Y-flip for ShaderToy convention + fragment = f"""@fragment fn fs_main(@builtin(position) p: vec4<f32>) -> @location(0) vec4<f32> {{ + // Flip Y to match ShaderToy convention (origin at bottom-left) + let flipped = vec2<f32>(p.x, uniforms.resolution.y - p.y); + let q = flipped / uniforms.resolution; + var coord = -1.0 + 2.0 * q; + coord.x *= uniforms.resolution.x / uniforms.resolution.y; + +{indented_main} +}}""" + + return converted_helpers + '\n\n' + fragment + +def extract_main_image(shader_code): + """Extract mainImage function body from ShaderToy code.""" + # Try to find mainImage function + match = re.search(r'void\s+mainImage\s*\([^)]+\)\s*\{(.*)\}', shader_code, re.DOTALL) + if match: + return match.group(1).strip() + + # If no mainImage found, return whole shader + return shader_code + +def generate_header(effect_name, is_post_process=False): + """Generate .h file content.""" + class_name = f"{effect_name}Effect" + upper_name = to_upper_snake_case(effect_name) + + if is_post_process: + return f"""// This file is part of the 64k demo project. +// {effect_name} effect - ShaderToy conversion (post-process) +// Generated by convert_shadertoy.py + +#ifndef {upper_name}_EFFECT_H_ +#define {upper_name}_EFFECT_H_ + +#include "gpu/effect.h" +#include "gpu/effects/post_process_helper.h" + +class {class_name} : public PostProcessEffect {{ + public: + {class_name}(const GpuContext& ctx); + void render(WGPURenderPassEncoder pass, float time, float beat, + float intensity, float aspect_ratio) override; + void update_bind_group(WGPUTextureView input_view) override; +}}; + +#endif /* {upper_name}_EFFECT_H_ */ +""" + else: + # Scene effect (simpler, like HeptagonEffect) + return f"""// This file is part of the 64k demo project. +// {effect_name} effect - ShaderToy conversion (scene) +// Generated by convert_shadertoy.py + +#ifndef {upper_name}_EFFECT_H_ +#define {upper_name}_EFFECT_H_ + +#include "gpu/effect.h" + +class {class_name} : public Effect {{ + public: + {class_name}(const GpuContext& ctx); + void render(WGPURenderPassEncoder pass, float time, float beat, + float intensity, float aspect_ratio) override; + + private: + RenderPass pass_; +}}; + +#endif /* {upper_name}_EFFECT_H_ */ +""" + +def generate_implementation(effect_name, is_post_process=False): + """Generate .cc file content.""" + class_name = f"{effect_name}Effect" + snake_name = to_snake_case(effect_name) + + if is_post_process: + return f"""// This file is part of the 64k demo project. +// {effect_name} effect - ShaderToy conversion (post-process) +// Generated by convert_shadertoy.py + +#include "gpu/demo_effects.h" +#include "gpu/effects/post_process_helper.h" +#include "gpu/gpu.h" + +{class_name}::{class_name}(const GpuContext& ctx) : PostProcessEffect(ctx) {{ + pipeline_ = create_post_process_pipeline(ctx_.device, ctx_.format, {snake_name}_shader_wgsl); +}} + +void {class_name}::render(WGPURenderPassEncoder pass, float time, float beat, + float intensity, float aspect_ratio) {{ + const CommonPostProcessUniforms u = {{ + .resolution = {{(float)width_, (float)height_}}, + ._pad = {{0.0f, 0.0f}}, + .aspect_ratio = aspect_ratio, + .time = time, + .beat = beat, + .audio_intensity = intensity, + }}; + uniforms_.update(ctx_.queue, u); + + wgpuRenderPassEncoderSetPipeline(pass, pipeline_); + wgpuRenderPassEncoderSetBindGroup(pass, 0, bind_group_, 0, nullptr); + wgpuRenderPassEncoderDraw(pass, 3, 1, 0, 0); +}} + +void {class_name}::update_bind_group(WGPUTextureView input_view) {{ + pp_update_bind_group(ctx_.device, pipeline_, &bind_group_, input_view, uniforms_.get()); +}} +""" + else: + # Scene effect (simpler pattern like HeptagonEffect) + return f"""// This file is part of the 64k demo project. +// {effect_name} effect - ShaderToy conversion (scene) +// Generated by convert_shadertoy.py + +#include "gpu/demo_effects.h" +#include "gpu/gpu.h" + +{class_name}::{class_name}(const GpuContext& ctx) : Effect(ctx) {{ + ResourceBinding bindings[] = {{{{uniforms_.get(), WGPUBufferBindingType_Uniform}}}}; + pass_ = gpu_create_render_pass(ctx_.device, ctx_.format, {snake_name}_shader_wgsl, + bindings, 1); + pass_.vertex_count = 3; +}} + +void {class_name}::render(WGPURenderPassEncoder pass, float t, float b, + float i, float a) {{ + CommonPostProcessUniforms u = {{ + .resolution = {{(float)width_, (float)height_}}, + ._pad = {{0.0f, 0.0f}}, + .aspect_ratio = a, + .time = t, + .beat = b, + .audio_intensity = i, + }}; + uniforms_.update(ctx_.queue, u); + wgpuRenderPassEncoderSetPipeline(pass, pass_.pipeline); + wgpuRenderPassEncoderSetBindGroup(pass, 0, pass_.bind_group, 0, nullptr); + wgpuRenderPassEncoderDraw(pass, pass_.vertex_count, 1, 0, 0); +}} +""" + +def generate_shader(effect_name, shadertoy_code, is_post_process=False): + """Generate .wgsl file content.""" + # Convert to WGSL (full shader, not just mainImage) + converted = convert_shadertoy_to_wgsl(shadertoy_code) + + if is_post_process: + bindings = """@group(0) @binding(0) var smplr: sampler; +@group(0) @binding(1) var txt: texture_2d<f32>; + +#include "common_uniforms" + +@group(0) @binding(2) var<uniform> uniforms: CommonUniforms;""" + else: + # Scene effect - only uniforms, no texture input + bindings = """#include "common_uniforms" + +@group(0) @binding(0) var<uniform> uniforms: CommonUniforms;""" + + return f"""// {effect_name} effect shader - ShaderToy conversion +// Generated by convert_shadertoy.py +// NOTE: Manual review recommended - conversion is basic + +{bindings} + +@vertex fn vs_main(@builtin(vertex_index) i: u32) -> @builtin(position) vec4<f32> {{ + var pos = array<vec2<f32>, 3>( + vec2<f32>(-1.0, -1.0), + vec2<f32>(3.0, -1.0), + vec2<f32>(-1.0, 3.0) + ); + return vec4<f32>(pos[i], 0.0, 1.0); +}} + +{converted} +""" + +def main(): + if len(sys.argv) < 3: + print("Usage: convert_shadertoy.py <shader.txt> <EffectName> [--post-process] [--shader-only]") + print() + print("Examples:") + print(" ./tools/shadertoy/convert_shadertoy.py tunnel.txt Tunnel") + print(" ./tools/shadertoy/convert_shadertoy.py blur.txt Blur --post-process") + print(" ./tools/shadertoy/convert_shadertoy.py tunnel.txt Tunnel --shader-only") + print() + print("Options:") + print(" --post-process Generate post-process effect (operates on previous frame)") + print(" Default: scene effect (renders geometry)") + print(" --shader-only Only regenerate .wgsl shader (skip .h/.cc files)") + print() + print("This will generate:") + print(" src/gpu/effects/<effect>_effect.h") + print(" src/gpu/effects/<effect>_effect.cc") + print(" workspaces/main/shaders/<effect>.wgsl") + sys.exit(1) + + shader_file = sys.argv[1] + effect_name = sys.argv[2] + is_post_process = '--post-process' in sys.argv + shader_only = '--shader-only' in sys.argv + + # Ensure effect name is CamelCase + if '_' in effect_name: + effect_name = to_camel_case(effect_name) + + # Read shader code + if not os.path.exists(shader_file): + print(f"Error: {shader_file} not found") + sys.exit(1) + + with open(shader_file, 'r') as f: + shadertoy_code = f.read() + + # Generate file names + snake_name = to_snake_case(effect_name) + upper_name = to_upper_snake_case(effect_name) + + # Script is in tools/shadertoy/, so go up two levels to repo root + repo_root = Path(__file__).parent.parent.parent + header_path = repo_root / "src" / "gpu" / "effects" / f"{snake_name}_effect.h" + impl_path = repo_root / "src" / "gpu" / "effects" / f"{snake_name}_effect.cc" + shader_path = repo_root / "workspaces" / "main" / "shaders" / f"{snake_name}.wgsl" + + # Generate files + if shader_only: + print(f"Regenerating shader only: {effect_name}") + print(f" Shader: {shader_path}") + print() + shader_path.write_text(generate_shader(effect_name, shadertoy_code, is_post_process)) + print(f"✓ Shader regenerated") + return + + print(f"Generating effect: {effect_name}") + print(f" Header: {header_path}") + print(f" Impl: {impl_path}") + print(f" Shader: {shader_path}") + print() + + # Write files + header_path.write_text(generate_header(effect_name, is_post_process)) + impl_path.write_text(generate_implementation(effect_name, is_post_process)) + shader_path.write_text(generate_shader(effect_name, shadertoy_code, is_post_process)) + + effect_type = "post-process" if is_post_process else "scene" + print(f"✓ Files generated ({effect_type} effect)") + print() + print("Next steps (see doc/EFFECT_WORKFLOW.md for details):") + print() + print("1. Add shader to workspaces/main/assets.txt:") + print(f" SHADER_{upper_name}, NONE, shaders/{snake_name}.wgsl, \"{effect_name} effect\"") + print() + print() + print("2. Add shader declaration to src/gpu/effects/shaders.h:") + print(f" extern const char* {snake_name}_shader_wgsl;") + print() + print("3. Add shader definition to src/gpu/effects/shaders.cc:") + print(f" const char* {snake_name}_shader_wgsl = SafeGetAsset(AssetId::ASSET_SHADER_{upper_name});") + print() + print("4. Include header in src/gpu/demo_effects.h:") + print(f' #include "gpu/effects/{snake_name}_effect.h"') + print() + print("5. Add to timeline in workspaces/main/timeline.seq:") + print(f" EFFECT + {effect_name}Effect 0.0 10.0") + print() + print("6. Add to CMakeLists.txt GPU_SOURCES (both headless and normal mode):") + print(f" src/gpu/effects/{snake_name}_effect.cc") + print() + print("7. Update src/tests/gpu/test_demo_effects.cc:") + test_list = "post_process_effects" if is_post_process else "scene_effects" + print(f' - Add "{{{effect_name}Effect", std::make_shared<{effect_name}Effect>(fixture.ctx())}}" to {test_list} list') + print() + print("8. Build and test:") + print(" cmake --build build -j4") + print(" ./build/demo64k") + print() + print("Note: Review generated shader for const expression issues (normalize, etc)") + +if __name__ == '__main__': + main() diff --git a/tools/shadertoy/example.txt b/tools/shadertoy/example.txt new file mode 100644 index 0000000..e0287de --- /dev/null +++ b/tools/shadertoy/example.txt @@ -0,0 +1,25 @@ +// Example ShaderToy shader for testing convert_shadertoy.py +// Simple animated gradient effect +// +// Test with: +// ./tools/shadertoy/convert_shadertoy.py tools/shadertoy/example.txt Rainbow + +void mainImage(out vec4 fragColor, in vec2 fragCoord) { + // Normalized pixel coordinates (from 0 to 1) + vec2 uv = fragCoord / iResolution.xy; + + // Center coordinates + vec2 center = uv - 0.5; + + // Distance from center + float dist = length(center); + + // Animated rainbow colors + vec3 col = 0.5 + 0.5 * cos(iTime + dist * 10.0 + vec3(0.0, 2.0, 4.0)); + + // Pulsing effect + col *= 1.0 + 0.2 * sin(iTime * 2.0); + + // Output to screen + fragColor = vec4(col, 1.0); +} diff --git a/tools/shadertoy/template.cc b/tools/shadertoy/template.cc new file mode 100644 index 0000000..288283d --- /dev/null +++ b/tools/shadertoy/template.cc @@ -0,0 +1,120 @@ +// This file is part of the 64k demo project. +// ShaderToy effect implementation - REPLACE THIS LINE +// TODO: Update description, rename class + +#include "gpu/effects/shadertoy_effect.h" +#include "gpu/effects/shader_composer.h" +#include "generated/assets.h" + +// TODO: Rename class and adjust constructor parameters +ShaderToyEffect::ShaderToyEffect(const GpuContext& ctx) : Effect(ctx) { +} + +ShaderToyEffect::~ShaderToyEffect() { + if (sampler_) + wgpuSamplerRelease(sampler_); + if (bind_group_) + wgpuBindGroupRelease(bind_group_); + if (pipeline_) + wgpuRenderPipelineRelease(pipeline_); +} + +void ShaderToyEffect::init(MainSequence* demo) { + demo_ = demo; + params_.init(ctx_.device); + + WGPUSamplerDescriptor sampler_desc = {}; + sampler_desc.addressModeU = WGPUAddressMode_ClampToEdge; + sampler_desc.addressModeV = WGPUAddressMode_ClampToEdge; + sampler_desc.magFilter = WGPUFilterMode_Linear; + sampler_desc.minFilter = WGPUFilterMode_Linear; + sampler_desc.mipmapFilter = WGPUMipmapFilterMode_Linear; + sampler_desc.maxAnisotropy = 1; + sampler_ = wgpuDeviceCreateSampler(ctx_.device, &sampler_desc); + + // TODO: Update asset name to match your shader file + size_t shader_size; + const char* shader_code = (const char*)GetAsset( + AssetId::ASSET_SHADERTOY_SHADER, &shader_size); + + std::string composed = ShaderComposer::Get().Compose({}, shader_code); + + WGPUShaderSourceWGSL wgsl = {}; + wgsl.chain.sType = WGPUSType_ShaderSourceWGSL; + wgsl.code = str_view(composed.c_str()); + + WGPUShaderModuleDescriptor desc = {}; + desc.nextInChain = &wgsl.chain; + WGPUShaderModule module = wgpuDeviceCreateShaderModule(ctx_.device, &desc); + + const WGPUColorTargetState target = { + .format = ctx_.format, + .writeMask = WGPUColorWriteMask_All, + }; + WGPUFragmentState frag = {}; + frag.module = module; + frag.entryPoint = str_view("fs_main"); + frag.targetCount = 1; + frag.targets = ⌖ + + const WGPUDepthStencilState depth_stencil = { + .format = WGPUTextureFormat_Depth24Plus, + .depthWriteEnabled = WGPUOptionalBool_False, + .depthCompare = WGPUCompareFunction_Always, + }; + + WGPURenderPipelineDescriptor pipeline_desc = {}; + pipeline_desc.label = label_view("ShaderToyEffect"); + pipeline_desc.vertex.module = module; + pipeline_desc.vertex.entryPoint = str_view("vs_main"); + pipeline_desc.primitive.topology = WGPUPrimitiveTopology_TriangleList; + pipeline_desc.primitive.cullMode = WGPUCullMode_None; + pipeline_desc.depthStencil = &depth_stencil; + pipeline_desc.multisample.count = 1; + pipeline_desc.multisample.mask = 0xFFFFFFFF; + pipeline_desc.fragment = &frag; + + pipeline_ = wgpuDeviceCreateRenderPipeline(ctx_.device, &pipeline_desc); + wgpuShaderModuleRelease(module); + + WGPUTextureView prev_view = demo_->get_prev_texture_view(); + const WGPUBindGroupEntry entries[] = { + {.binding = 0, .sampler = sampler_}, + {.binding = 1, .textureView = prev_view}, + {.binding = 2, + .buffer = uniforms_.get().buffer, + .size = sizeof(CommonPostProcessUniforms)}, + {.binding = 3, + .buffer = params_.get().buffer, + .size = sizeof(ShaderToyParams)}, + }; + const WGPUBindGroupDescriptor bg_desc = { + .layout = wgpuRenderPipelineGetBindGroupLayout(pipeline_, 0), + .entryCount = 4, + .entries = entries, + }; + bind_group_ = wgpuDeviceCreateBindGroup(ctx_.device, &bg_desc); +} + +void ShaderToyEffect::render(WGPURenderPassEncoder pass, float time, + float beat, float intensity, float aspect_ratio) { + const CommonPostProcessUniforms uniforms = { + .resolution = {static_cast<float>(width_), static_cast<float>(height_)}, + .aspect_ratio = aspect_ratio, + .time = time, + .beat = beat, + .audio_intensity = intensity, + }; + uniforms_.update(ctx_.queue, uniforms); + + // TODO: Update parameters based on your effect + const ShaderToyParams params = { + .param1 = 1.0f, + .param2 = beat, + }; + params_.update(ctx_.queue, params); + + wgpuRenderPassEncoderSetPipeline(pass, pipeline_); + wgpuRenderPassEncoderSetBindGroup(pass, 0, bind_group_, 0, nullptr); + wgpuRenderPassEncoderDraw(pass, 3, 1, 0, 0); +} diff --git a/tools/shadertoy/template.h b/tools/shadertoy/template.h new file mode 100644 index 0000000..2e4af5f --- /dev/null +++ b/tools/shadertoy/template.h @@ -0,0 +1,41 @@ +// This file is part of the 64k demo project. +// ShaderToy effect boilerplate - REPLACE THIS LINE WITH DESCRIPTION +// TODO: Update description, rename class, adjust parameters + +#ifndef SHADERTOY_EFFECT_H_ +#define SHADERTOY_EFFECT_H_ + +#include "gpu/effect.h" +#include "gpu/effects/post_process_helper.h" +#include "gpu/uniform_helper.h" + +// TODO: Rename class to match your effect (e.g., TunnelEffect, PlasmaEffect) +class ShaderToyEffect : public Effect { + public: + // TODO: Add constructor parameters for tunable values + ShaderToyEffect(const GpuContext& ctx); + ~ShaderToyEffect() override; + + void init(MainSequence* demo) override; + void render(WGPURenderPassEncoder pass, float time, float beat, + float intensity, float aspect_ratio) override; + + private: + // TODO: Add effect-specific parameters here + // Must match WGSL struct exactly - use padding for 16-byte alignment + struct ShaderToyParams { + float param1; + float param2; + float _pad[2]; // Padding to 16 bytes + }; + static_assert(sizeof(ShaderToyParams) == 16, + "ShaderToyParams must be 16 bytes for WGSL alignment"); + + MainSequence* demo_ = nullptr; + WGPURenderPipeline pipeline_ = nullptr; + WGPUBindGroup bind_group_ = nullptr; + WGPUSampler sampler_ = nullptr; + UniformBuffer<ShaderToyParams> params_; +}; + +#endif /* SHADERTOY_EFFECT_H_ */ diff --git a/tools/shadertoy/template.wgsl b/tools/shadertoy/template.wgsl new file mode 100644 index 0000000..37e7def --- /dev/null +++ b/tools/shadertoy/template.wgsl @@ -0,0 +1,90 @@ +// ShaderToy conversion template for 64k demo project +// TODO: Paste ShaderToy mainImage() function below and adapt + +@group(0) @binding(0) var smplr: sampler; +@group(0) @binding(1) var txt: texture_2d<f32>; + +#include "common_uniforms" + +@group(0) @binding(2) var<uniform> uniforms: CommonUniforms; + +// TODO: Define your effect parameters (must match C++ struct) +struct ShaderToyParams { + param1: f32, + param2: f32, + _pad0: f32, + _pad1: f32, +} + +@group(0) @binding(3) var<uniform> params: ShaderToyParams; + +// Standard fullscreen triangle vertex shader +@vertex fn vs_main(@builtin(vertex_index) i: u32) -> @builtin(position) vec4<f32> { + var pos = array<vec2<f32>, 3>( + vec2<f32>(-1.0, -1.0), + vec2<f32>(3.0, -1.0), + vec2<f32>(-1.0, 3.0) + ); + return vec4<f32>(pos[i], 0.0, 1.0); +} + +// ============================================================================ +// PASTE SHADERTOY CODE HERE +// ============================================================================ +// ShaderToy → WGSL conversion notes: +// +// 1. Replace ShaderToy uniforms: +// iResolution.xy → uniforms.resolution +// iTime → uniforms.time +// fragCoord → p.xy (from @builtin(position)) +// fragColor → return value +// +// 2. Coordinate conversion: +// vec2 uv = fragCoord / iResolution.xy; +// becomes: +// let uv = p.xy / uniforms.resolution; +// +// 3. Type syntax changes: +// float → f32 +// vec2/vec3/vec4 → vec2<f32>, vec3<f32>, vec4<f32> +// mat2/mat3/mat4 → mat2x2<f32>, mat3x3<f32>, mat4x4<f32> +// +// 4. Function syntax: +// float foo(vec2 p) → fn foo(p: vec2<f32>) -> f32 +// +// 5. Common functions (mostly same): +// mix, sin, cos, length, normalize, dot, cross, etc. +// fract() → fract() +// mod(x, y) → x % y OR x - y * floor(x / y) +// +// 6. Texture sampling: +// texture(iChannel0, uv) → textureSample(txt, smplr, uv) +// +// 7. Variable declarations: +// float x = 1.0; → var x: f32 = 1.0; OR let x = 1.0; +// const float x = 1.0; → const x: f32 = 1.0; +// +// 8. Swizzling is the same: col.rgb, uv.xy, etc. +// +// ============================================================================ + +@fragment fn fs_main(@builtin(position) p: vec4<f32>) -> @location(0) vec4<f32> { + // TODO: Paste and adapt ShaderToy mainImage() body here + + // Example coordinate setup (typical ShaderToy pattern): + let uv = p.xy / uniforms.resolution; + + // TODO: Your effect code here + var col = vec3<f32>(uv.x, uv.y, 0.5); + + // Optional: Sample previous frame + // var prev_col = textureSample(txt, smplr, uv); + + // Optional: Audio reactivity + // col *= 1.0 + uniforms.audio_intensity * 0.2; + + // Optional: Beat sync + // col *= 1.0 + uniforms.beat * 0.1; + + return vec4<f32>(col, 1.0); +} diff --git a/training/ground_truth.png b/training/ground_truth.png Binary files differnew file mode 100644 index 0000000..6e1f2aa --- /dev/null +++ b/training/ground_truth.png diff --git a/training/train_cnn.py b/training/train_cnn.py index 1cd6579..16f8e7a 100755 --- a/training/train_cnn.py +++ b/training/train_cnn.py @@ -5,10 +5,15 @@ CNN Training Script for Image-to-Image Transformation Trains a convolutional neural network on multiple input/target image pairs. Usage: + # Training python3 train_cnn.py --input input_dir/ --target target_dir/ [options] + # Inference (generate ground truth) + python3 train_cnn.py --infer image.png --export-only checkpoint.pth --output result.png + Example: python3 train_cnn.py --input ./input --target ./output --layers 3 --epochs 100 + python3 train_cnn.py --infer input.png --export-only checkpoints/checkpoint_epoch_10000.pth """ import torch @@ -62,7 +67,8 @@ class ImagePairDataset(Dataset): def __getitem__(self, idx): input_path, target_path = self.image_pairs[idx] - input_img = Image.open(input_path).convert('RGB') + # Load RGBD input (4 channels: RGB + Depth) + input_img = Image.open(input_path).convert('RGBA') target_img = Image.open(target_path).convert('RGB') if self.transform: @@ -72,27 +78,8 @@ class ImagePairDataset(Dataset): return input_img, target_img -class CoordConv2d(nn.Module): - """Conv2d that accepts coordinate input separate from spatial patches""" - - def __init__(self, in_channels, out_channels, kernel_size, padding=0): - super().__init__() - self.conv_rgba = nn.Conv2d(in_channels, out_channels, kernel_size, padding=padding, bias=False) - self.coord_weights = nn.Parameter(torch.randn(out_channels, 2) * 0.01) - self.bias = nn.Parameter(torch.zeros(out_channels)) - - def forward(self, x, coords): - # x: [B, C, H, W] image - # coords: [B, 2, H, W] coordinate grid - out = self.conv_rgba(x) - B, C, H, W = out.shape - coord_contrib = torch.einsum('bchw,oc->bohw', coords, self.coord_weights) - out = out + coord_contrib + self.bias.view(1, -1, 1, 1) - return out - - class SimpleCNN(nn.Module): - """Simple CNN for image-to-image transformation""" + """CNN for RGBD→grayscale with 7-channel input (RGBD + UV + gray)""" def __init__(self, num_layers=1, kernel_sizes=None): super(SimpleCNN, self).__init__() @@ -107,26 +94,46 @@ class SimpleCNN(nn.Module): for i, kernel_size in enumerate(kernel_sizes): padding = kernel_size // 2 - if i == 0: - self.layers.append(CoordConv2d(3, 3, kernel_size, padding=padding)) + if i < num_layers - 1: + # Inner layers: 7→4 (RGBD output) + self.layers.append(nn.Conv2d(7, 4, kernel_size=kernel_size, padding=padding, bias=True)) else: - self.layers.append(nn.Conv2d(3, 3, kernel_size=kernel_size, padding=padding, bias=True)) + # Final layer: 7→1 (grayscale output) + self.layers.append(nn.Conv2d(7, 1, kernel_size=kernel_size, padding=padding, bias=True)) def forward(self, x): + # x: [B,4,H,W] - RGBD input (D = 1/z) B, C, H, W = x.shape + + # Normalize RGBD to [-1,1] + x_norm = (x - 0.5) * 2.0 + + # Compute coordinates [0,1] then normalize to [-1,1] y_coords = torch.linspace(0, 1, H, device=x.device).view(1,1,H,1).expand(B,1,H,W) x_coords = torch.linspace(0, 1, W, device=x.device).view(1,1,1,W).expand(B,1,H,W) - coords = torch.cat([x_coords, y_coords], dim=1) + y_coords = (y_coords - 0.5) * 2.0 # [-1,1] + x_coords = (x_coords - 0.5) * 2.0 # [-1,1] - out = self.layers[0](x, coords) - out = torch.tanh(out) + # Compute grayscale from original RGB (Rec.709) and normalize to [-1,1] + gray = 0.2126*x[:,0:1] + 0.7152*x[:,1:2] + 0.0722*x[:,2:3] # [B,1,H,W] in [0,1] + gray = (gray - 0.5) * 2.0 # [-1,1] - for i in range(1, len(self.layers)): - out = self.layers[i](out) - if i < len(self.layers) - 1: - out = torch.tanh(out) + # Layer 0 + layer0_input = torch.cat([x_norm, x_coords, y_coords, gray], dim=1) # [B,7,H,W] + out = self.layers[0](layer0_input) # [B,4,H,W] + out = torch.tanh(out) # [-1,1] - return out + # Inner layers + for i in range(1, len(self.layers)-1): + layer_input = torch.cat([out, x_coords, y_coords, gray], dim=1) + out = self.layers[i](layer_input) + out = torch.tanh(out) + + # Final layer (grayscale output) + final_input = torch.cat([out, x_coords, y_coords, gray], dim=1) + out = self.layers[-1](final_input) # [B,1,H,W] + out = torch.clamp(out, 0.0, 1.0) # Clip to [0,1] + return out.expand(-1, 3, -1, -1) def generate_layer_shader(output_path, num_layers, kernel_sizes): @@ -163,37 +170,49 @@ def generate_layer_shader(output_path, num_layers, kernel_sizes): f.write("}\n\n") f.write("@fragment fn fs_main(@builtin(position) p: vec4<f32>) -> @location(0) vec4<f32> {\n") f.write(" let uv = p.xy / uniforms.resolution;\n") - f.write(" let input = textureSample(txt, smplr, uv);\n") - f.write(" let original = textureSample(original_input, smplr, uv);\n") + f.write(" let original_raw = textureSample(original_input, smplr, uv);\n") + f.write(" let original = (original_raw - 0.5) * 2.0; // Normalize to [-1,1]\n") f.write(" var result = vec4<f32>(0.0);\n\n") # Generate layer switches for layer_idx in range(num_layers): + is_final = layer_idx == num_layers - 1 ks = kernel_sizes[layer_idx] + conv_fn = f"cnn_conv{ks}x{ks}_7to4" if not is_final else f"cnn_conv{ks}x{ks}_7to1" + if layer_idx == 0: - f.write(f" // Layer 0 uses coordinate-aware convolution\n") + conv_fn_src = f"cnn_conv{ks}x{ks}_7to4_src" + f.write(f" // Layer 0: 7→4 (RGBD output, normalizes [0,1] input)\n") f.write(f" if (params.layer_index == {layer_idx}) {{\n") - f.write(f" result = cnn_conv{ks}x{ks}_with_coord(txt, smplr, uv, uniforms.resolution,\n") - f.write(f" rgba_weights_layer{layer_idx}, coord_weights_layer{layer_idx}, bias_layer{layer_idx});\n") + f.write(f" result = {conv_fn_src}(txt, smplr, uv, uniforms.resolution,\n") + f.write(f" weights_layer{layer_idx});\n") f.write(f" result = cnn_tanh(result);\n") f.write(f" }}\n") + elif not is_final: + f.write(f" else if (params.layer_index == {layer_idx}) {{\n") + f.write(f" result = {conv_fn}(txt, smplr, uv, uniforms.resolution,\n") + f.write(f" original, weights_layer{layer_idx});\n") + f.write(f" result = cnn_tanh(result); // Keep in [-1,1]\n") + f.write(f" }}\n") else: - is_last = layer_idx == num_layers - 1 - f.write(f" {'else ' if layer_idx > 0 else ''}if (params.layer_index == {layer_idx}) {{\n") - f.write(f" result = cnn_conv{ks}x{ks}(txt, smplr, uv, uniforms.resolution,\n") - f.write(f" weights_layer{layer_idx}, bias_layer{layer_idx});\n") - if not is_last: - f.write(f" result = cnn_tanh(result);\n") + f.write(f" else if (params.layer_index == {layer_idx}) {{\n") + f.write(f" let gray_out = {conv_fn}(txt, smplr, uv, uniforms.resolution,\n") + f.write(f" original, weights_layer{layer_idx});\n") + f.write(f" // gray_out already in [0,1] from clipped training\n") + f.write(f" let original_denorm = (original + 1.0) * 0.5;\n") + f.write(f" result = vec4<f32>(gray_out, gray_out, gray_out, 1.0);\n") + f.write(f" let blended = mix(original_denorm, result, params.blend_amount);\n") + f.write(f" return blended; // [0,1]\n") f.write(f" }}\n") # Add else clause for invalid layer index - if num_layers > 1: + if num_layers > 0: f.write(f" else {{\n") - f.write(f" result = input;\n") + f.write(f" return textureSample(txt, smplr, uv);\n") f.write(f" }}\n") - f.write("\n // Blend with ORIGINAL input from layer 0\n") - f.write(" return mix(original, result, params.blend_amount);\n") + f.write("\n // Non-final layers: denormalize for display\n") + f.write(" return (result + 1.0) * 0.5; // [-1,1] → [0,1]\n") f.write("}\n") @@ -204,95 +223,95 @@ def export_weights_to_wgsl(model, output_path, kernel_sizes): f.write("// Auto-generated CNN weights\n") f.write("// DO NOT EDIT - Generated by train_cnn.py\n\n") - layer_idx = 0 for i, layer in enumerate(model.layers): - if isinstance(layer, CoordConv2d): - # Export RGBA weights - weights = layer.conv_rgba.weight.data.cpu().numpy() - kernel_size = kernel_sizes[layer_idx] - out_ch, in_ch, kh, kw = weights.shape - num_positions = kh * kw + weights = layer.weight.data.cpu().numpy() + bias = layer.bias.data.cpu().numpy() + out_ch, in_ch, kh, kw = weights.shape + num_positions = kh * kw - f.write(f"const rgba_weights_layer{layer_idx}: array<mat4x4<f32>, {num_positions}> = array(\n") + is_final = (i == len(model.layers) - 1) + + if is_final: + # Final layer: 7→1, structure: array<array<f32, 8>, 9> + # [w0, w1, w2, w3, w4, w5, w6, bias] + f.write(f"const weights_layer{i}: array<array<f32, 8>, {num_positions}> = array(\n") for pos in range(num_positions): - row = pos // kw - col = pos % kw - f.write(" mat4x4<f32>(\n") - for out_c in range(4): - vals = [] - for in_c in range(4): - if out_c < out_ch and in_c < in_ch: - vals.append(f"{weights[out_c, in_c, row, col]:.6f}") - else: - vals.append("0.0") - f.write(f" {', '.join(vals)},\n") - f.write(" )") - if pos < num_positions - 1: - f.write(",\n") - else: - f.write("\n") + row, col = pos // kw, pos % kw + vals = [f"{weights[0, in_c, row, col]:.6f}" for in_c in range(7)] + vals.append(f"{bias[0]:.6f}") # Append bias as 8th element + f.write(f" array<f32, 8>({', '.join(vals)})") + f.write(",\n" if pos < num_positions-1 else "\n") f.write(");\n\n") - - # Export coordinate weights - coord_w = layer.coord_weights.data.cpu().numpy() - f.write(f"const coord_weights_layer{layer_idx} = mat2x4<f32>(\n") - for c in range(2): - vals = [] + else: + # Inner layers: 7→4, structure: array<array<f32, 8>, 36> + # Flattened: [pos0_ch0[7w+bias], pos0_ch1[7w+bias], ..., pos8_ch3[7w+bias]] + num_entries = num_positions * 4 + f.write(f"const weights_layer{i}: array<array<f32, 8>, {num_entries}> = array(\n") + for pos in range(num_positions): + row, col = pos // kw, pos % kw for out_c in range(4): - if out_c < coord_w.shape[0]: - vals.append(f"{coord_w[out_c, c]:.6f}") - else: - vals.append("0.0") - f.write(f" {', '.join(vals)}") - if c < 1: - f.write(",\n") - else: - f.write("\n") + vals = [f"{weights[out_c, in_c, row, col]:.6f}" for in_c in range(7)] + vals.append(f"{bias[out_c]:.6f}") # Append bias + idx = pos * 4 + out_c + f.write(f" array<f32, 8>({', '.join(vals)})") + f.write(",\n" if idx < num_entries-1 else "\n") f.write(");\n\n") - # Export bias - bias = layer.bias.data.cpu().numpy() - bias_vals = [f"{bias[i]:.6f}" if i < len(bias) else "0.0" for i in range(4)] - f.write(f"const bias_layer{layer_idx} = vec4<f32>(") - f.write(", ".join(bias_vals)) - f.write(");\n\n") - layer_idx += 1 - elif isinstance(layer, nn.Conv2d): - # Standard conv layer - weights = layer.weight.data.cpu().numpy() - kernel_size = kernel_sizes[layer_idx] - out_ch, in_ch, kh, kw = weights.shape - num_positions = kh * kw +def generate_conv_src_function(kernel_size, output_path): + """Generate cnn_conv{K}x{K}_7to4_src() function for layer 0""" - f.write(f"const weights_layer{layer_idx}: array<mat4x4<f32>, {num_positions}> = array(\n") - for pos in range(num_positions): - row = pos // kw - col = pos % kw - f.write(" mat4x4<f32>(\n") - for out_c in range(4): - vals = [] - for in_c in range(4): - if out_c < out_ch and in_c < in_ch: - vals.append(f"{weights[out_c, in_c, row, col]:.6f}") - else: - vals.append("0.0") - f.write(f" {', '.join(vals)},\n") - f.write(" )") - if pos < num_positions - 1: - f.write(",\n") - else: - f.write("\n") - f.write(");\n\n") + k = kernel_size + num_positions = k * k + radius = k // 2 - # Export bias - bias = layer.bias.data.cpu().numpy() - bias_vals = [f"{bias[i]:.6f}" if i < len(bias) else "0.0" for i in range(4)] - f.write(f"const bias_layer{layer_idx} = vec4<f32>(") - f.write(", ".join(bias_vals)) - f.write(");\n\n") + with open(output_path, 'a') as f: + f.write(f"\n// Source layer: 7→4 channels (RGBD output)\n") + f.write(f"// Normalizes [0,1] input to [-1,1] internally\n") + f.write(f"fn cnn_conv{k}x{k}_7to4_src(\n") + f.write(f" tex: texture_2d<f32>,\n") + f.write(f" samp: sampler,\n") + f.write(f" uv: vec2<f32>,\n") + f.write(f" resolution: vec2<f32>,\n") + f.write(f" weights: array<array<f32, 8>, {num_positions * 4}>\n") + f.write(f") -> vec4<f32> {{\n") + f.write(f" let step = 1.0 / resolution;\n\n") + + # Normalize center pixel for gray channel + f.write(f" let original = (textureSample(tex, samp, uv) - 0.5) * 2.0;\n") + f.write(f" let gray = 0.2126*original.r + 0.7152*original.g + 0.0722*original.b;\n") + f.write(f" let uv_norm = (uv - 0.5) * 2.0;\n\n") + + f.write(f" var sum = vec4<f32>(0.0);\n") + f.write(f" var pos = 0;\n\n") + + # Convolution loop + f.write(f" for (var dy = -{radius}; dy <= {radius}; dy++) {{\n") + f.write(f" for (var dx = -{radius}; dx <= {radius}; dx++) {{\n") + f.write(f" let offset = vec2<f32>(f32(dx), f32(dy)) * step;\n") + f.write(f" let rgbd = (textureSample(tex, samp, uv + offset) - 0.5) * 2.0;\n\n") + + # 7-channel input + f.write(f" let inputs = array<f32, 7>(\n") + f.write(f" rgbd.r, rgbd.g, rgbd.b, rgbd.a,\n") + f.write(f" uv_norm.x, uv_norm.y, gray\n") + f.write(f" );\n\n") + + # Accumulate + f.write(f" for (var out_c = 0; out_c < 4; out_c++) {{\n") + f.write(f" let idx = pos * 4 + out_c;\n") + f.write(f" var channel_sum = weights[idx][7];\n") + f.write(f" for (var in_c = 0; in_c < 7; in_c++) {{\n") + f.write(f" channel_sum += weights[idx][in_c] * inputs[in_c];\n") + f.write(f" }}\n") + f.write(f" sum[out_c] += channel_sum;\n") + f.write(f" }}\n") + f.write(f" pos++;\n") + f.write(f" }}\n") + f.write(f" }}\n\n") - layer_idx += 1 + f.write(f" return sum;\n") + f.write(f"}}\n") def train(args): @@ -382,6 +401,24 @@ def train(args): print(f"Generating layer shader to {shader_path}...") generate_layer_shader(shader_path, args.layers, kernel_sizes) + # Generate _src variants for kernel sizes (skip 3x3, already exists) + for ks in set(kernel_sizes): + if ks == 3: + continue + conv_path = os.path.join(shader_dir, f'cnn_conv{ks}x{ks}.wgsl') + if not os.path.exists(conv_path): + print(f"Warning: {conv_path} not found, skipping _src generation") + continue + + # Check if _src already exists + with open(conv_path, 'r') as f: + content = f.read() + if f"cnn_conv{ks}x{ks}_7to4_src" in content: + continue + + generate_conv_src_function(ks, conv_path) + print(f"Added _src variant to {conv_path}") + print("Training complete!") @@ -414,26 +451,94 @@ def export_from_checkpoint(checkpoint_path, output_path=None): print(f"Generating layer shader to {shader_path}...") generate_layer_shader(shader_path, num_layers, kernel_sizes) + # Generate _src variants for kernel sizes (skip 3x3, already exists) + for ks in set(kernel_sizes): + if ks == 3: + continue + conv_path = os.path.join(shader_dir, f'cnn_conv{ks}x{ks}.wgsl') + if not os.path.exists(conv_path): + print(f"Warning: {conv_path} not found, skipping _src generation") + continue + + # Check if _src already exists + with open(conv_path, 'r') as f: + content = f.read() + if f"cnn_conv{ks}x{ks}_7to4_src" in content: + continue + + generate_conv_src_function(ks, conv_path) + print(f"Added _src variant to {conv_path}") + print("Export complete!") +def infer_from_checkpoint(checkpoint_path, input_path, output_path): + """Run inference on single image to generate ground truth""" + + if not os.path.exists(checkpoint_path): + print(f"Error: Checkpoint '{checkpoint_path}' not found") + sys.exit(1) + + if not os.path.exists(input_path): + print(f"Error: Input image '{input_path}' not found") + sys.exit(1) + + print(f"Loading checkpoint from {checkpoint_path}...") + checkpoint = torch.load(checkpoint_path, map_location='cpu') + + # Reconstruct model + model = SimpleCNN( + num_layers=checkpoint['num_layers'], + kernel_sizes=checkpoint['kernel_sizes'] + ) + model.load_state_dict(checkpoint['model_state']) + model.eval() + + # Load image [0,1] + print(f"Loading input image: {input_path}") + img = Image.open(input_path).convert('RGBA') + img_tensor = transforms.ToTensor()(img).unsqueeze(0) # [1,4,H,W] + + # Inference + print("Running inference...") + with torch.no_grad(): + out = model(img_tensor) # [1,3,H,W] in [0,1] + + # Save + print(f"Saving output to: {output_path}") + os.makedirs(os.path.dirname(output_path), exist_ok=True) + transforms.ToPILImage()(out.squeeze(0)).save(output_path) + print("Done!") + + def main(): parser = argparse.ArgumentParser(description='Train CNN for image-to-image transformation') - parser.add_argument('--input', help='Input image directory') + parser.add_argument('--input', help='Input image directory (training) or single image (inference)') parser.add_argument('--target', help='Target image directory') parser.add_argument('--layers', type=int, default=1, help='Number of CNN layers (default: 1)') parser.add_argument('--kernel_sizes', default='3', help='Comma-separated kernel sizes (default: 3)') parser.add_argument('--epochs', type=int, default=100, help='Number of training epochs (default: 100)') parser.add_argument('--batch_size', type=int, default=4, help='Batch size (default: 4)') parser.add_argument('--learning_rate', type=float, default=0.001, help='Learning rate (default: 0.001)') - parser.add_argument('--output', help='Output WGSL file path (default: workspaces/main/shaders/cnn/cnn_weights_generated.wgsl)') + parser.add_argument('--output', help='Output path (WGSL for training/export, PNG for inference)') parser.add_argument('--checkpoint-every', type=int, default=0, help='Save checkpoint every N epochs (default: 0 = disabled)') parser.add_argument('--checkpoint-dir', help='Checkpoint directory (default: training/checkpoints)') parser.add_argument('--resume', help='Resume from checkpoint file') parser.add_argument('--export-only', help='Export WGSL from checkpoint without training') + parser.add_argument('--infer', help='Run inference on single image (requires --export-only for checkpoint)') args = parser.parse_args() + # Inference mode + if args.infer: + checkpoint = args.export_only + if not checkpoint: + print("Error: --infer requires --export-only <checkpoint>") + sys.exit(1) + output_path = args.output or 'inference_output.png' + infer_from_checkpoint(checkpoint, args.infer, output_path) + return + # Export-only mode if args.export_only: export_from_checkpoint(args.export_only, args.output) diff --git a/workspaces/main/assets.txt b/workspaces/main/assets.txt index 53c8b3e..af8b9e9 100644 --- a/workspaces/main/assets.txt +++ b/workspaces/main/assets.txt @@ -67,3 +67,4 @@ SHADER_COMPUTE_GEN_MASK, NONE, shaders/compute/gen_mask.wgsl, "GPU Mask Composit CIRCLE_MASK_COMPUTE_SHADER, NONE, shaders/circle_mask_compute.wgsl, "Circle mask compute shader" CIRCLE_MASK_RENDER_SHADER, NONE, shaders/circle_mask_render.wgsl, "Circle mask render shader" MASKED_CUBE_SHADER, NONE, shaders/masked_cube.wgsl, "Masked cube shader" +SHADER_SCENE1, NONE, shaders/scene1.wgsl, "Scene1 effect shader" diff --git a/workspaces/main/shaders/cnn/cnn_conv3x3.wgsl b/workspaces/main/shaders/cnn/cnn_conv3x3.wgsl index 168c9e2..96ddf5b 100644 --- a/workspaces/main/shaders/cnn/cnn_conv3x3.wgsl +++ b/workspaces/main/shaders/cnn/cnn_conv3x3.wgsl @@ -1,53 +1,148 @@ // 3x3 convolution with weight indexing -// Samples 9 pixels, applies mat4 weights per sample -fn cnn_conv3x3( +// Source layers: 7→4 channels (RGBD output) +// Assumes 'tex' (the input) is *not* normalized to [-1,1], but is [0,1] +// UV coordinates remain in [0,1] and are normalized internally +// weights: array<array<f32, 8>, 36> (9 positions × 4 channels, each with 7 weights + bias) +fn cnn_conv3x3_7to4_src( tex: texture_2d<f32>, samp: sampler, uv: vec2<f32>, resolution: vec2<f32>, - weights: array<mat4x4<f32>, 9>, - bias: vec4<f32> + weights: array<array<f32, 8>, 36> ) -> vec4<f32> { let step = 1.0 / resolution; - var sum = bias; - var idx = 0; + // Compute grayscale from original (converted in [-1,1]) + let original = (textureSample(tex, samp, uv) - 0.5) * 2.0; + let gray = 0.2126*original.r + 0.7152*original.g + 0.0722*original.b; + + // Normalize UV to [-1,1] + let uv_norm = (uv - 0.5) * 2.0; + + var sum = vec4<f32>(0.0); + + var pos = 0; for (var dy = -1; dy <= 1; dy++) { for (var dx = -1; dx <= 1; dx++) { let offset = vec2<f32>(f32(dx), f32(dy)) * step; - let sample = textureSample(tex, samp, uv + offset); - sum += weights[idx] * sample; - idx++; + let rgbd = (textureSample(tex, samp, uv + offset) - .5) * 2.0; // convert to [-1,1] + + // 7-channel input: [R,G,B,D, uv.x, uv.y, gray] all in [-1,1] + let inputs = array<f32, 7>( + rgbd.r, rgbd.g, rgbd.b, rgbd.a, + uv_norm.x, uv_norm.y, gray + ); + + // Accumulate for each output channel (RGBD) + for (var out_c = 0; out_c < 4; out_c++) { + let idx = pos * 4 + out_c; + var channel_sum = weights[idx][7]; // Bias (8th element) + for (var in_c = 0; in_c < 7; in_c++) { + channel_sum += weights[idx][in_c] * inputs[in_c]; + } + sum[out_c] += channel_sum; + } + + pos++; } } - return sum; + return sum; // Output in [-1,1] range } -fn cnn_conv3x3_with_coord( +// Inner layers: 7→4 channels (RGBD output) +// Assumes 'tex' and 'original' are already normalized to [-1,1] +// UV coordinates remain in [0,1] and are normalized internally +// weights: array<array<f32, 8>, 36> (9 positions × 4 channels, each with 7 weights + bias) +fn cnn_conv3x3_7to4( tex: texture_2d<f32>, samp: sampler, uv: vec2<f32>, resolution: vec2<f32>, - rgba_weights: array<mat4x4<f32>, 9>, - coord_weights: mat2x4<f32>, - bias: vec4<f32> + original: vec4<f32>, + weights: array<array<f32, 8>, 36> ) -> vec4<f32> { let step = 1.0 / resolution; - var sum = bias; - sum += coord_weights * uv; + // Compute grayscale from original (already in [-1,1]) + let gray = 0.2126*original.r + 0.7152*original.g + 0.0722*original.b; + + // Normalize UV to [-1,1] + let uv_norm = (uv - 0.5) * 2.0; + + var sum = vec4<f32>(0.0); + + var pos = 0; + for (var dy = -1; dy <= 1; dy++) { + for (var dx = -1; dx <= 1; dx++) { + let offset = vec2<f32>(f32(dx), f32(dy)) * step; + let rgbd = textureSample(tex, samp, uv + offset); // Already in [-1,1] + + // 7-channel input: [R,G,B,D, uv.x, uv.y, gray] all in [-1,1] + let inputs = array<f32, 7>( + rgbd.r, rgbd.g, rgbd.b, rgbd.a, + uv_norm.x, uv_norm.y, gray + ); + + // Accumulate for each output channel (RGBD) + for (var out_c = 0; out_c < 4; out_c++) { + let idx = pos * 4 + out_c; + var channel_sum = weights[idx][7]; // Bias (8th element) + for (var in_c = 0; in_c < 7; in_c++) { + channel_sum += weights[idx][in_c] * inputs[in_c]; + } + sum[out_c] += channel_sum; + } + + pos++; + } + } + + return sum; // Output in [-1,1] range +} + +// Final layer: 7→1 channel (scalar output) +// Assumes 'tex' and 'original' are already normalized to [-1,1] +// UV coordinates remain in [0,1] and are normalized internally +// weights: array<array<f32, 8>, 9> (9 positions, each with 7 weights + bias) +fn cnn_conv3x3_7to1( + tex: texture_2d<f32>, + samp: sampler, + uv: vec2<f32>, + resolution: vec2<f32>, + original: vec4<f32>, + weights: array<array<f32, 8>, 9> +) -> f32 { + let step = 1.0 / resolution; + + // Compute grayscale from original (already in [-1,1]) + let gray = 0.2126*original.r + 0.7152*original.g + 0.0722*original.b; + + // Normalize UV to [-1,1] + let uv_norm = (uv - 0.5) * 2.0; - var idx = 0; + var sum = 0.0; + + var pos = 0; for (var dy = -1; dy <= 1; dy++) { for (var dx = -1; dx <= 1; dx++) { let offset = vec2<f32>(f32(dx), f32(dy)) * step; - let rgba = textureSample(tex, samp, uv + offset); - sum += rgba_weights[idx] * rgba; - idx++; + let rgbd = textureSample(tex, samp, uv + offset); // Already in [-1,1] + + // 7-channel input all in [-1,1] + sum += weights[pos][0] * rgbd.r; + sum += weights[pos][1] * rgbd.g; + sum += weights[pos][2] * rgbd.b; + sum += weights[pos][3] * rgbd.a; + sum += weights[pos][4] * uv_norm.x; + sum += weights[pos][5] * uv_norm.y; + sum += weights[pos][6] * gray; + sum += weights[pos][7]; // Bias + + pos++; } } - return sum; + return sum; // Output in [-1,1] } diff --git a/workspaces/main/shaders/cnn/cnn_conv5x5.wgsl b/workspaces/main/shaders/cnn/cnn_conv5x5.wgsl index bd9abfa..5136740 100644 --- a/workspaces/main/shaders/cnn/cnn_conv5x5.wgsl +++ b/workspaces/main/shaders/cnn/cnn_conv5x5.wgsl @@ -1,53 +1,85 @@ -// 5x5 convolution with 25 samples -// Applies mat4 weights per sample - -fn cnn_conv5x5( +// 5×5 variant for 7→4 channels (RGBD output) +// Assumes 'tex' and 'original' are already normalized to [-1,1] +// UV coordinates remain in [0,1] and are normalized internally +// weights: array<array<f32, 8>, 100> (25 positions × 4 channels, each with 7 weights + bias) +fn cnn_conv5x5_7to4( tex: texture_2d<f32>, samp: sampler, uv: vec2<f32>, resolution: vec2<f32>, - weights: array<mat4x4<f32>, 25>, - bias: vec4<f32> + original: vec4<f32>, + weights: array<array<f32, 8>, 100> ) -> vec4<f32> { let step = 1.0 / resolution; - var sum = bias; - var idx = 0; + + let gray = 0.2126*original.r + 0.7152*original.g + 0.0722*original.b; + let uv_norm = (uv - 0.5) * 2.0; + + var sum = vec4<f32>(0.0); + var pos = 0; for (var dy = -2; dy <= 2; dy++) { for (var dx = -2; dx <= 2; dx++) { let offset = vec2<f32>(f32(dx), f32(dy)) * step; - let sample = textureSample(tex, samp, uv + offset); - sum += weights[idx] * sample; - idx++; + let rgbd = textureSample(tex, samp, uv + offset); // Already in [-1,1] + + let inputs = array<f32, 7>( + rgbd.r, rgbd.g, rgbd.b, rgbd.a, + uv_norm.x, uv_norm.y, gray + ); + + for (var out_c = 0; out_c < 4; out_c++) { + let idx = pos * 4 + out_c; + var channel_sum = weights[idx][7]; + for (var in_c = 0; in_c < 7; in_c++) { + channel_sum += weights[idx][in_c] * inputs[in_c]; + } + sum[out_c] += channel_sum; + } + pos++; } } return sum; } -fn cnn_conv5x5_with_coord( +// 5×5 variant for 7→1 channel (scalar output) +// Assumes 'tex' and 'original' are already normalized to [-1,1] +// UV coordinates remain in [0,1] and are normalized internally +// weights: array<array<f32, 8>, 25> (25 positions, each with 7 weights + bias) +fn cnn_conv5x5_7to1( tex: texture_2d<f32>, samp: sampler, uv: vec2<f32>, resolution: vec2<f32>, - rgba_weights: array<mat4x4<f32>, 25>, - coord_weights: mat2x4<f32>, - bias: vec4<f32> -) -> vec4<f32> { + original: vec4<f32>, + weights: array<array<f32, 8>, 25> +) -> f32 { let step = 1.0 / resolution; - var sum = bias; - sum += coord_weights * uv; + let gray = 0.2126*original.r + 0.7152*original.g + 0.0722*original.b; + let uv_norm = (uv - 0.5) * 2.0; + + var sum = 0.0; + var pos = 0; - var idx = 0; for (var dy = -2; dy <= 2; dy++) { for (var dx = -2; dx <= 2; dx++) { let offset = vec2<f32>(f32(dx), f32(dy)) * step; - let rgba = textureSample(tex, samp, uv + offset); - sum += rgba_weights[idx] * rgba; - idx++; + let rgbd = textureSample(tex, samp, uv + offset); // Already in [-1,1] + + sum += weights[pos][0] * rgbd.r; + sum += weights[pos][1] * rgbd.g; + sum += weights[pos][2] * rgbd.b; + sum += weights[pos][3] * rgbd.a; + sum += weights[pos][4] * uv_norm.x; + sum += weights[pos][5] * uv_norm.y; + sum += weights[pos][6] * gray; + sum += weights[pos][7]; // Bias + + pos++; } } - return sum; + return sum; // Output in [-1,1] } diff --git a/workspaces/main/shaders/cnn/cnn_layer.wgsl b/workspaces/main/shaders/cnn/cnn_layer.wgsl index 5834f78..1b1b539 100644 --- a/workspaces/main/shaders/cnn/cnn_layer.wgsl +++ b/workspaces/main/shaders/cnn/cnn_layer.wgsl @@ -8,6 +8,7 @@ #include "common_uniforms" #include "cnn_activation" #include "cnn_conv3x3" +#include "cnn_conv5x5" #include "cnn_weights_generated" struct CNNLayerParams { @@ -29,28 +30,28 @@ struct CNNLayerParams { @fragment fn fs_main(@builtin(position) p: vec4<f32>) -> @location(0) vec4<f32> { let uv = p.xy / uniforms.resolution; - let input = textureSample(txt, smplr, uv); - let original = textureSample(original_input, smplr, uv); + let original = (textureSample(original_input, smplr, uv) - 0.5) * 2.0; // Normalize to [-1,1] var result = vec4<f32>(0.0); - // Layer 0 uses coordinate-aware convolution + // Layer 0: 7→4 (RGBD output) if (params.layer_index == 0) { - result = cnn_conv3x3_with_coord(txt, smplr, uv, uniforms.resolution, - rgba_weights_layer0, coord_weights_layer0, bias_layer0); - result = cnn_tanh(result); + result = cnn_conv3x3_7to4_src(txt, smplr, uv, uniforms.resolution, weights_layer0); + result = cnn_tanh(result); // Keep in [-1,1] } else if (params.layer_index == 1) { - result = cnn_conv3x3(txt, smplr, uv, uniforms.resolution, - weights_layer1, bias_layer1); - result = cnn_tanh(result); - } - else if (params.layer_index == 2) { - result = cnn_conv3x3(txt, smplr, uv, uniforms.resolution, - weights_layer2, bias_layer2); - } - else { - result = input; + result = cnn_conv5x5_7to4(txt, smplr, uv, uniforms.resolution, + original, weights_layer1); + result = cnn_tanh(result); // Keep in [-1,1] } + else if (params.layer_index == 2) { // last layer + let gray_out = cnn_conv3x3_7to1(txt, smplr, uv, uniforms.resolution, + original, weights_layer2); - return mix(original, result, params.blend_amount); + // At this point here, 'gray_out' is what the training script should have learned. + // Below is some extra code for visual output, excluded from training: + result = vec4<f32>(gray_out, gray_out, gray_out, 1.0); // Keep in [-1,1] + let blended = mix(original, result, params.blend_amount); + return (blended + 1.0) * 0.5; // Denormalize to [0,1] for display + } + return result; } diff --git a/workspaces/main/shaders/cnn/cnn_weights_generated.wgsl b/workspaces/main/shaders/cnn/cnn_weights_generated.wgsl index 6052ac5..e38669f 100644 --- a/workspaces/main/shaders/cnn/cnn_weights_generated.wgsl +++ b/workspaces/main/shaders/cnn/cnn_weights_generated.wgsl @@ -1,185 +1,157 @@ // Auto-generated CNN weights // DO NOT EDIT - Generated by train_cnn.py -const rgba_weights_layer0: array<mat4x4<f32>, 9> = array( - mat4x4<f32>( - -0.181929, -0.244329, -0.354404, 0.0, - -0.291597, -0.195653, 0.081896, 0.0, - 0.081595, 0.164081, -0.236318, 0.0, - 0.0, 0.0, 0.0, 0.0, - ), - mat4x4<f32>( - 0.731888, 0.717648, 0.524081, 0.0, - -0.029760, -0.208000, 0.008438, 0.0, - 0.442082, 0.354681, 0.049288, 0.0, - 0.0, 0.0, 0.0, 0.0, - ), - mat4x4<f32>( - -0.623141, -0.695759, -0.087885, 0.0, - 0.043135, 0.071979, 0.213065, 0.0, - 0.011581, 0.110995, 0.034100, 0.0, - 0.0, 0.0, 0.0, 0.0, - ), - mat4x4<f32>( - 0.170016, 0.188298, 0.134083, 0.0, - -0.222954, -0.088011, 0.015668, 0.0, - 0.921836, 0.437158, 0.061577, 0.0, - 0.0, 0.0, 0.0, 0.0, - ), - mat4x4<f32>( - 1.431940, 1.148113, 1.238067, 0.0, - -0.212535, 0.366860, 0.320956, 0.0, - 0.771192, 0.765570, 0.029189, 0.0, - 0.0, 0.0, 0.0, 0.0, - ), - mat4x4<f32>( - 0.171088, 0.000155, 0.212552, 0.0, - 0.029536, 0.447892, 0.041381, 0.0, - 0.011807, -0.167281, -0.200702, 0.0, - 0.0, 0.0, 0.0, 0.0, - ), - mat4x4<f32>( - -0.668151, -0.813927, -0.132108, 0.0, - -0.156250, 0.179112, -0.069585, 0.0, - 0.403347, 0.482877, 0.182611, 0.0, - 0.0, 0.0, 0.0, 0.0, - ), - mat4x4<f32>( - -0.609871, -0.768480, -0.590538, 0.0, - -0.171854, 0.150167, 0.105694, 0.0, - -0.059052, 0.066999, -0.244222, 0.0, - 0.0, 0.0, 0.0, 0.0, - ), - mat4x4<f32>( - -0.112983, -0.066299, 0.117696, 0.0, - -0.172541, 0.095008, -0.160754, 0.0, - -0.369667, -0.000628, 0.163602, 0.0, - 0.0, 0.0, 0.0, 0.0, - ) +const weights_layer0: array<array<f32, 8>, 36> = array( + array<f32, 8>(0.074911, 0.143202, 0.086903, 0.070680, -0.031904, 0.122884, 0.191824, 0.071112), + array<f32, 8>(0.081964, 0.033505, 0.058371, -0.015971, -0.069732, -0.014956, 0.142861, 0.119666), + array<f32, 8>(0.231883, -0.160763, -0.147218, 0.161321, -0.031718, -0.065766, 0.093359, 0.171734), + array<f32, 8>(0.082047, 0.288492, 0.121087, 0.001740, -0.104745, -0.071150, 0.031105, 0.037989), + array<f32, 8>(0.139236, 0.160690, 0.022091, 0.070994, 0.008793, 0.059247, 0.215077, 0.071112), + array<f32, 8>(0.128842, 0.268017, -0.031546, 0.068152, -0.073793, 0.124100, 0.252295, 0.119666), + array<f32, 8>(0.077193, -0.080009, -0.160674, 0.101131, -0.152167, -0.035271, 0.067397, 0.171734), + array<f32, 8>(-0.073119, 0.204309, 0.005654, 0.101254, -0.063530, -0.040801, 0.213393, 0.037989), + array<f32, 8>(-0.024175, 0.018739, 0.095518, 0.096945, 0.088315, 0.079085, -0.069127, 0.071112), + array<f32, 8>(0.219014, 0.218505, 0.014228, 0.014379, 0.075954, -0.001065, 0.201142, 0.119666), + array<f32, 8>(0.182743, -0.041270, -0.085458, 0.092904, 0.020316, 0.036077, 0.020220, 0.171734), + array<f32, 8>(-0.210247, -0.072180, 0.017628, 0.084834, 0.050409, -0.067274, -0.130565, 0.037989), + array<f32, 8>(0.071649, -0.072076, -0.109385, -0.012436, 0.041505, -0.013451, -0.068780, 0.071112), + array<f32, 8>(0.083389, 0.133852, -0.018137, 0.086250, -0.006205, 0.052853, 0.137369, 0.119666), + array<f32, 8>(0.023275, 0.036871, -0.092898, -0.059569, -0.029758, -0.089218, -0.031705, 0.171734), + array<f32, 8>(0.054874, 0.290596, 0.157026, -0.127200, 0.054010, -0.163627, 0.185273, 0.037989), + array<f32, 8>(0.069455, -0.122527, 0.010922, -0.051404, -0.067941, 0.122001, 0.034784, 0.071112), + array<f32, 8>(0.263187, 0.346644, 0.094376, 0.080049, -0.013980, -0.020629, 0.287019, 0.119666), + array<f32, 8>(0.078601, -0.045813, 0.048391, 0.107248, -0.001537, 0.003619, 0.040853, 0.171734), + array<f32, 8>(-0.052910, 0.333324, -0.028273, 0.111413, 0.059925, 0.054957, 0.257592, 0.037989), + array<f32, 8>(0.037894, 0.001266, 0.039858, 0.027731, 0.156182, 0.094188, 0.021791, 0.071112), + array<f32, 8>(0.220401, 0.241493, 0.138405, 0.082160, 0.144517, -0.050410, 0.257101, 0.119666), + array<f32, 8>(0.055409, -0.103410, 0.049778, -0.023193, -0.116368, -0.085046, 0.047003, 0.171734), + array<f32, 8>(0.019721, 0.099621, 0.005697, -0.069641, -0.100712, 0.044279, -0.104894, 0.037989), + array<f32, 8>(0.132833, 0.144224, 0.075612, -0.052095, -0.027924, 0.029124, -0.012077, 0.071112), + array<f32, 8>(0.146387, 0.098381, 0.131536, 0.034274, -0.073611, 0.080596, 0.124333, 0.119666), + array<f32, 8>(0.118243, -0.165692, -0.091107, 0.001822, 0.003771, -0.053877, -0.045592, 0.171734), + array<f32, 8>(-0.146034, 0.167379, 0.036433, -0.074485, 0.047772, 0.007719, -0.057026, 0.037989), + array<f32, 8>(-0.105517, -0.143677, 0.006013, 0.038752, 0.082525, -0.070290, -0.082964, 0.071112), + array<f32, 8>(0.084325, 0.192342, 0.005734, 0.083787, 0.010618, 0.076732, 0.206159, 0.119666), + array<f32, 8>(0.025873, -0.002030, -0.008453, 0.189578, 0.077363, 0.014099, 0.086760, 0.171734), + array<f32, 8>(-0.040145, 0.209639, 0.131112, 0.021154, -0.046391, -0.055185, 0.110424, 0.037989), + array<f32, 8>(-0.091272, -0.149872, -0.018825, 0.109157, 0.037674, -0.067088, -0.199940, 0.071112), + array<f32, 8>(0.170814, 0.171591, -0.039657, 0.146638, -0.054918, -0.043451, 0.262821, 0.119666), + array<f32, 8>(0.183810, -0.147660, -0.144689, 0.045301, 0.055273, 0.017425, 0.136362, 0.171734), + array<f32, 8>(-0.078196, 0.116630, -0.138657, -0.140199, -0.052198, -0.040295, -0.093252, 0.037989) ); -const coord_weights_layer0 = mat2x4<f32>( - 0.059076, -0.026617, -0.005155, 0.0, - 0.135407, -0.090329, 0.058216, 0.0 +const weights_layer1: array<array<f32, 8>, 100> = array( + array<f32, 8>(0.016434, 0.032528, 0.014184, -0.048073, 0.017625, 0.025898, 0.035017, -0.024743), + array<f32, 8>(-0.086195, 0.041682, 0.071182, -0.062423, -0.016809, -0.004450, -0.035094, 0.087283), + array<f32, 8>(-0.070627, 0.033625, 0.025104, -0.086014, -0.037153, -0.019897, 0.046995, -0.025117), + array<f32, 8>(-0.042999, 0.043590, -0.107547, 0.114733, -0.006566, 0.067189, 0.042215, -0.019137), + array<f32, 8>(-0.105321, 0.188555, -0.033070, 0.005478, -0.019701, -0.006125, -0.006207, -0.024743), + array<f32, 8>(-0.018644, 0.021491, 0.042713, 0.047051, 0.009930, -0.074932, 0.016032, 0.087283), + array<f32, 8>(-0.036977, 0.022728, -0.031943, -0.134525, -0.024105, 0.022550, 0.038872, -0.025117), + array<f32, 8>(-0.017196, 0.102869, -0.028182, 0.153466, -0.024727, 0.008610, -0.029993, -0.019137), + array<f32, 8>(-0.135262, 0.264086, 0.052894, 0.104268, -0.044918, 0.085902, 0.119113, -0.024743), + array<f32, 8>(0.052648, 0.081481, 0.063582, 0.016832, 0.100333, -0.095727, 0.022089, 0.087283), + array<f32, 8>(0.028176, 0.006417, -0.010806, -0.049843, 0.010670, 0.058400, 0.051595, -0.025117), + array<f32, 8>(-0.078976, 0.040644, -0.116569, 0.145770, 0.019023, 0.071229, 0.056151, -0.019137), + array<f32, 8>(-0.028693, 0.154285, -0.019369, 0.111634, 0.022241, -0.015484, 0.039056, -0.024743), + array<f32, 8>(-0.052688, -0.046999, -0.000280, -0.024856, 0.012262, 0.028524, -0.028633, 0.087283), + array<f32, 8>(-0.004525, 0.052883, 0.002108, -0.096774, 0.052697, -0.055029, -0.022623, -0.025117), + array<f32, 8>(-0.076488, 0.013246, -0.097773, 0.023400, 0.027572, 0.041318, 0.012556, -0.019137), + array<f32, 8>(0.028093, 0.007624, 0.021861, -0.079392, 0.053487, 0.065200, -0.084020, -0.024743), + array<f32, 8>(-0.027503, 0.010973, 0.077242, 0.105956, 0.003837, -0.032827, 0.062214, 0.087283), + array<f32, 8>(0.028159, 0.036260, 0.051032, -0.057339, -0.032511, -0.019800, -0.113611, -0.025117), + array<f32, 8>(-0.004438, 0.024692, -0.151404, 0.097579, -0.031042, 0.067771, -0.062624, -0.019137), + array<f32, 8>(-0.053284, 0.062195, 0.018403, -0.145339, 0.008091, -0.048359, 0.060338, -0.024743), + array<f32, 8>(0.035264, 0.022147, 0.014877, -0.010450, 0.048411, -0.011475, -0.025409, 0.087283), + array<f32, 8>(-0.095181, 0.095906, 0.022414, -0.068326, -0.035929, 0.041247, -0.066456, -0.025117), + array<f32, 8>(0.011500, 0.097427, -0.072423, 0.068691, 0.006129, 0.025585, -0.066149, -0.019137), + array<f32, 8>(0.000253, 0.207033, 0.041903, -0.018208, 0.080300, 0.029738, 0.170740, -0.024743), + array<f32, 8>(0.118473, -0.002532, 0.082055, 0.029355, -0.017353, -0.094582, -0.028445, 0.087283), + array<f32, 8>(-0.167765, 0.166992, -0.051393, 0.018985, 0.000246, -0.060339, -0.036368, -0.025117), + array<f32, 8>(-0.037902, 0.123576, -0.135429, 0.018780, 0.069222, -0.048750, 0.010303, -0.019137), + array<f32, 8>(0.092400, 0.317862, 0.056507, 0.269526, 0.015330, -0.078774, 0.213070, -0.024743), + array<f32, 8>(0.147994, -0.056838, -0.046159, 0.069406, -0.025076, -0.018648, 0.019698, 0.087283), + array<f32, 8>(-0.063516, 0.051390, -0.043280, 0.053602, 0.046148, 0.032013, -0.012079, -0.025117), + array<f32, 8>(-0.069387, 0.008554, -0.016392, 0.041428, 0.069626, -0.028865, 0.031068, -0.019137), + array<f32, 8>(0.001597, 0.092924, 0.064679, 0.242996, 0.070280, -0.047444, 0.155082, -0.024743), + array<f32, 8>(0.003761, -0.067148, 0.020808, -0.009994, 0.064026, -0.023521, -0.061335, 0.087283), + array<f32, 8>(0.013300, 0.048670, -0.058611, -0.104133, 0.060389, 0.022588, -0.085768, -0.025117), + array<f32, 8>(0.001996, 0.035599, -0.067395, 0.113355, -0.054467, 0.021354, -0.020545, -0.019137), + array<f32, 8>(0.024443, 0.016439, 0.095606, -0.006610, 0.056457, 0.009034, 0.048181, -0.024743), + array<f32, 8>(-0.081707, 0.089380, 0.012570, 0.040154, 0.006970, -0.097259, -0.003088, 0.087283), + array<f32, 8>(0.037347, -0.012520, -0.009110, -0.164514, -0.052337, 0.031441, -0.117828, -0.025117), + array<f32, 8>(-0.050695, 0.023007, -0.086370, 0.106721, -0.022698, -0.063039, 0.007639, -0.019137), + array<f32, 8>(-0.032690, 0.100637, 0.090612, -0.170336, -0.013709, 0.096891, -0.064632, -0.024743), + array<f32, 8>(0.005479, 0.068678, -0.014147, -0.117601, 0.033542, -0.026603, -0.034334, 0.087283), + array<f32, 8>(-0.049645, 0.161140, 0.019592, -0.020424, 0.021700, 0.046387, 0.070111, -0.025117), + array<f32, 8>(-0.075219, -0.030338, -0.042611, 0.045346, -0.012298, -0.029272, -0.048395, -0.019137), + array<f32, 8>(0.110303, 0.091954, 0.026566, -0.013034, -0.001918, 0.025677, -0.003027, -0.024743), + array<f32, 8>(0.084352, 0.004527, 0.042981, 0.040333, 0.011019, 0.011699, 0.053396, 0.087283), + array<f32, 8>(-0.151306, 0.282692, 0.038388, 0.199704, -0.024410, -0.021070, 0.135509, -0.025117), + array<f32, 8>(0.008868, 0.058833, -0.035204, 0.017617, 0.036727, -0.084137, 0.008426, -0.019137), + array<f32, 8>(0.111690, 0.202555, 0.002230, 0.104773, 0.043414, 0.094714, 0.024386, -0.024743), + array<f32, 8>(0.109470, -0.130369, -0.049615, 0.027567, 0.015618, 0.010219, -0.035927, 0.087283), + array<f32, 8>(0.013092, 0.191465, -0.022463, 0.306655, 0.046994, 0.023051, 0.114596, -0.025117), + array<f32, 8>(-0.095580, 0.067644, -0.069810, 0.058185, 0.079298, 0.042359, 0.102818, -0.019137), + array<f32, 8>(0.163902, 0.060505, 0.020250, 0.151637, -0.041346, 0.079968, -0.066609, -0.024743), + array<f32, 8>(0.007401, -0.119463, 0.029195, -0.118251, -0.057537, 0.057136, -0.162722, 0.087283), + array<f32, 8>(-0.036401, 0.152383, -0.049404, 0.188484, 0.069434, -0.056077, -0.041920, -0.025117), + array<f32, 8>(-0.070811, 0.042628, -0.080224, 0.133910, 0.054912, -0.086587, 0.104432, -0.019137), + array<f32, 8>(0.045319, 0.031249, -0.007304, -0.008136, 0.001678, 0.019408, -0.016683, -0.024743), + array<f32, 8>(-0.054316, -0.005207, -0.003794, -0.009173, -0.015797, 0.088869, -0.054766, 0.087283), + array<f32, 8>(0.036646, 0.049626, -0.038869, -0.049720, 0.012847, -0.054911, -0.012426, -0.025117), + array<f32, 8>(-0.002965, 0.087409, -0.027885, 0.089920, 0.013074, -0.106163, 0.065504, -0.019137), + array<f32, 8>(-0.004488, 0.102517, 0.092916, -0.079512, 0.001532, -0.048995, -0.041429, -0.024743), + array<f32, 8>(-0.062161, -0.027813, 0.037159, -0.030745, -0.017068, 0.084630, -0.046134, 0.087283), + array<f32, 8>(-0.017315, 0.191771, -0.050660, -0.140278, 0.038320, 0.037753, -0.043447, -0.025117), + array<f32, 8>(-0.079621, 0.091290, -0.098575, 0.055638, 0.007634, -0.051456, -0.011530, -0.019137), + array<f32, 8>(-0.044260, 0.010435, 0.104869, -0.029082, 0.038487, 0.004167, 0.020321, -0.024743), + array<f32, 8>(0.004107, -0.049898, -0.011912, 0.126974, 0.074958, 0.038876, 0.027066, 0.087283), + array<f32, 8>(0.022312, 0.332216, -0.028889, 0.171475, 0.052267, -0.023821, 0.193472, -0.025117), + array<f32, 8>(0.009104, -0.027289, -0.016718, 0.092231, 0.023904, -0.034162, 0.004693, -0.019137), + array<f32, 8>(0.022922, -0.036846, 0.071670, -0.118853, -0.046374, 0.005972, -0.079006, -0.024743), + array<f32, 8>(-0.086613, -0.033065, 0.032719, 0.081925, -0.025818, -0.065103, 0.010425, 0.087283), + array<f32, 8>(0.014945, 0.330249, -0.062079, 0.408858, 0.044895, -0.036703, 0.195226, -0.025117), + array<f32, 8>(0.021647, 0.086135, -0.013491, 0.027627, -0.033652, -0.016643, -0.037425, -0.019137), + array<f32, 8>(-0.028124, 0.039691, 0.108537, -0.123861, -0.071841, -0.034232, 0.009737, -0.024743), + array<f32, 8>(-0.095938, -0.080740, 0.047554, -0.145590, -0.041365, 0.031658, -0.027601, 0.087283), + array<f32, 8>(-0.050837, 0.179578, 0.020990, 0.240896, -0.038067, 0.007052, 0.036244, -0.025117), + array<f32, 8>(-0.100474, 0.012669, -0.123589, 0.147449, -0.056871, 0.029335, -0.041989, -0.019137), + array<f32, 8>(0.000809, 0.020182, 0.123381, 0.009990, 0.061892, -0.056804, 0.049866, -0.024743), + array<f32, 8>(-0.006123, 0.085572, -0.065080, -0.003607, -0.100605, -0.015746, 0.045932, 0.087283), + array<f32, 8>(-0.068945, 0.037700, -0.068738, 0.088604, 0.034364, -0.027429, -0.023157, -0.025117), + array<f32, 8>(-0.028689, 0.018089, -0.144344, 0.097751, -0.022261, 0.004934, 0.044538, -0.019137), + array<f32, 8>(-0.072695, 0.099329, 0.037965, -0.007148, -0.061809, -0.014461, -0.050644, -0.024743), + array<f32, 8>(-0.043364, -0.019908, 0.033602, -0.011686, -0.046646, -0.005387, 0.057703, 0.087283), + array<f32, 8>(0.020640, 0.058992, 0.042389, -0.111803, -0.000105, -0.069637, -0.058816, -0.025117), + array<f32, 8>(-0.090411, -0.034394, -0.135574, 0.085031, -0.020320, -0.002235, 0.079036, -0.019137), + array<f32, 8>(-0.035238, 0.052656, 0.011918, -0.032684, 0.067555, -0.047663, -0.013151, -0.024743), + array<f32, 8>(0.077223, 0.067583, -0.053024, 0.063017, -0.023909, -0.041936, 0.039041, 0.087283), + array<f32, 8>(-0.011154, 0.253355, 0.006886, 0.066990, -0.018613, -0.033851, 0.022408, -0.025117), + array<f32, 8>(-0.042376, 0.097067, -0.107170, 0.053378, 0.081423, -0.059980, -0.019982, -0.019137), + array<f32, 8>(-0.086462, 0.042703, 0.052655, -0.129460, -0.073930, -0.004732, -0.089001, -0.024743), + array<f32, 8>(0.019294, 0.036932, -0.046783, 0.172396, -0.003345, 0.029704, -0.013067, 0.087283), + array<f32, 8>(0.142370, 0.248269, -0.072705, 0.188676, 0.028917, -0.058974, -0.007950, -0.025117), + array<f32, 8>(-0.021378, 0.064055, -0.103605, -0.015491, -0.002155, -0.048161, -0.045529, -0.019137), + array<f32, 8>(0.006191, 0.063159, 0.005143, -0.101334, -0.020484, 0.038330, 0.010742, -0.024743), + array<f32, 8>(-0.123413, 0.027806, -0.063111, 0.060050, -0.087346, 0.080827, 0.016499, 0.087283), + array<f32, 8>(0.054552, 0.047349, 0.029259, 0.152502, -0.013689, -0.035447, -0.006584, -0.025117), + array<f32, 8>(-0.034984, 0.059972, -0.147872, 0.096835, 0.055766, -0.001973, -0.033631, -0.019137), + array<f32, 8>(0.004488, -0.060204, 0.120817, -0.095007, 0.040546, 0.026207, -0.011824, -0.024743), + array<f32, 8>(0.000380, 0.102988, 0.010112, -0.011668, 0.004855, -0.019988, -0.035633, 0.087283), + array<f32, 8>(0.003894, -0.083172, -0.046051, -0.005485, 0.017347, -0.057191, -0.085077, -0.025117), + array<f32, 8>(-0.066185, 0.092341, -0.135679, 0.009092, -0.015954, 0.003226, -0.010182, -0.019137) ); -const bias_layer0 = vec4<f32>(-0.526177, -0.569862, -1.370040, 0.0); - -const weights_layer1: array<mat4x4<f32>, 9> = array( - mat4x4<f32>( - 0.180029, -1.107249, 0.570741, 0.0, - -0.098536, 0.079545, -0.083257, 0.0, - -0.020066, 0.333084, 0.039506, 0.0, - 0.0, 0.0, 0.0, 0.0, - ), - mat4x4<f32>( - 3.068946, -1.783570, -0.550517, 0.0, - -0.296369, -0.080958, 0.040260, 0.0, - -0.093713, -0.212577, -0.110011, 0.0, - 0.0, 0.0, 0.0, 0.0, - ), - mat4x4<f32>( - 2.282564, -0.538192, -0.793214, 0.0, - -0.395788, 0.130881, 0.078571, 0.0, - -0.041375, 0.061666, 0.045651, 0.0, - 0.0, 0.0, 0.0, 0.0, - ), - mat4x4<f32>( - -0.267284, -1.971639, -0.099616, 0.0, - -0.084432, 0.139794, 0.007091, 0.0, - -0.103042, -0.104340, 0.067299, 0.0, - 0.0, 0.0, 0.0, 0.0, - ), - mat4x4<f32>( - -5.233469, -2.252747, -3.555217, 0.0, - 0.647940, -0.178858, 0.351633, 0.0, - -0.014237, -0.505881, 0.165940, 0.0, - 0.0, 0.0, 0.0, 0.0, - ), - mat4x4<f32>( - -0.121700, -0.677386, -2.435040, 0.0, - 0.084806, -0.028000, 0.380387, 0.0, - -0.020906, -0.279161, 0.041915, 0.0, - 0.0, 0.0, 0.0, 0.0, - ), - mat4x4<f32>( - 2.982562, -0.298441, -0.147775, 0.0, - -0.291832, 0.102875, -0.128590, 0.0, - -0.091786, 0.104389, -0.188678, 0.0, - 0.0, 0.0, 0.0, 0.0, - ), - mat4x4<f32>( - -4.434978, -0.261830, -2.436411, 0.0, - 0.349188, -0.245908, 0.272592, 0.0, - 0.010322, -0.148525, -0.031531, 0.0, - 0.0, 0.0, 0.0, 0.0, - ), - mat4x4<f32>( - 0.129886, 1.516168, -0.755576, 0.0, - 0.133138, -0.260276, 0.028059, 0.0, - 0.001185, 0.141547, -0.003606, 0.0, - 0.0, 0.0, 0.0, 0.0, - ) +const weights_layer2: array<array<f32, 8>, 9> = array( + array<f32, 8>(0.071600, -0.118269, 0.093769, 0.096974, -0.002193, -0.065924, -0.125094, 0.018248), + array<f32, 8>(-0.089131, -0.053007, 0.150626, -0.051485, 0.087371, -0.078030, -0.045468, 0.018248), + array<f32, 8>(0.042144, 0.146191, 0.152445, 0.028572, 0.064491, -0.061860, 0.037828, 0.018248), + array<f32, 8>(-0.084747, -0.133062, -0.030736, 0.061174, -0.055809, -0.012031, 0.126923, 0.018248), + array<f32, 8>(-0.017155, -0.105189, 0.003457, 0.105491, 0.003587, 0.089110, -0.001623, 0.018248), + array<f32, 8>(-0.028012, -0.066691, 0.125358, -0.027705, 0.032134, 0.044475, -0.036991, 0.018248), + array<f32, 8>(0.094536, -0.038367, -0.009421, 0.027049, -0.103427, -0.065209, -0.110071, 0.018248), + array<f32, 8>(0.147956, 0.028446, 0.031066, 0.055667, -0.039952, 0.069251, 0.020060, 0.018248), + array<f32, 8>(0.067507, 0.154407, -0.017526, 0.064009, -0.014328, 0.022175, 0.015376, 0.018248) ); -const bias_layer1 = vec4<f32>(1.367986, -1.148709, -0.650040, 0.0); - -const weights_layer2: array<mat4x4<f32>, 9> = array( - mat4x4<f32>( - -0.137003, -0.289376, 0.625000, 0.0, - -0.120120, -0.238968, 0.448432, 0.0, - -0.142094, -0.253706, 0.458181, 0.0, - 0.0, 0.0, 0.0, 0.0, - ), - mat4x4<f32>( - -0.337017, -0.757585, 0.135953, 0.0, - -0.304432, -0.553491, 0.419907, 0.0, - -0.313585, -0.467667, 0.615326, 0.0, - 0.0, 0.0, 0.0, 0.0, - ), - mat4x4<f32>( - -0.161089, -0.328735, 0.612679, 0.0, - -0.137144, -0.172882, 0.176362, 0.0, - -0.153195, -0.061571, 0.173977, 0.0, - 0.0, 0.0, 0.0, 0.0, - ), - mat4x4<f32>( - -0.227814, -0.544193, -0.564658, 0.0, - -0.211743, -0.430586, 0.080349, 0.0, - -0.214442, -0.417501, 0.880266, 0.0, - 0.0, 0.0, 0.0, 0.0, - ), - mat4x4<f32>( - -0.435370, -0.295169, -0.865976, 0.0, - -0.423147, -0.274780, 0.323049, 0.0, - -0.411180, -0.062517, 1.099769, 0.0, - 0.0, 0.0, 0.0, 0.0, - ), - mat4x4<f32>( - -0.199573, -0.488030, -0.396440, 0.0, - -0.187844, -0.360516, -0.156646, 0.0, - -0.188681, -0.292304, -0.134645, 0.0, - 0.0, 0.0, 0.0, 0.0, - ), - mat4x4<f32>( - -0.123218, -0.287990, 0.154656, 0.0, - -0.112954, -0.282778, 0.498742, 0.0, - -0.139083, -0.319337, 1.112621, 0.0, - 0.0, 0.0, 0.0, 0.0, - ), - mat4x4<f32>( - -0.267477, -0.691374, -0.028960, 0.0, - -0.246348, -0.585583, 0.401194, 0.0, - -0.253279, -0.562875, 1.105818, 0.0, - 0.0, 0.0, 0.0, 0.0, - ), - mat4x4<f32>( - -0.083133, -0.131627, 0.460039, 0.0, - -0.071126, -0.108601, 0.163545, 0.0, - -0.092579, -0.110020, 0.131282, 0.0, - 0.0, 0.0, 0.0, 0.0, - ) -); - -const bias_layer2 = vec4<f32>(-1.805686, -0.798340, 0.462318, 0.0); - diff --git a/workspaces/main/shaders/scene1.wgsl b/workspaces/main/shaders/scene1.wgsl new file mode 100644 index 0000000..7af3811 --- /dev/null +++ b/workspaces/main/shaders/scene1.wgsl @@ -0,0 +1,258 @@ +// Scene1 effect shader - ShaderToy conversion (raymarching cube & sphere) +// Source: Saturday cubism experiment by skal + +#include "common_uniforms" + +@group(0) @binding(0) var<uniform> uniforms: CommonUniforms; + +const PI: f32 = 3.141592654; +const TAU: f32 = 6.283185307; +const TOLERANCE: f32 = 0.0005; +const MAX_RAY_LENGTH: f32 = 20.0; +const MAX_RAY_MARCHES: i32 = 80; +const MAX_SHD_MARCHES: i32 = 20; +const NORM_OFF: f32 = 0.005; + +fn rot(a: f32) -> mat2x2<f32> { + let c = cos(a); + let s = sin(a); + return mat2x2<f32>(c, s, -s, c); +} + +// HSV to RGB conversion +const hsv2rgb_K = vec4<f32>(1.0, 2.0 / 3.0, 1.0 / 3.0, 3.0); +fn hsv2rgb(c: vec3<f32>) -> vec3<f32> { + let p = abs(fract(c.xxx + hsv2rgb_K.xyz) * 6.0 - hsv2rgb_K.www); + return c.z * mix(hsv2rgb_K.xxx, clamp(p - hsv2rgb_K.xxx, vec3<f32>(0.0), vec3<f32>(1.0)), c.y); +} + +// Colors (precomputed HSV conversions) +const skyCol = vec3<f32>(0.176, 0.235, 0.25); // HSV(0.57, 0.90, 0.25) +const skylineCol = vec3<f32>(0.5, 0.125, 0.025); // HSV(0.02, 0.95, 0.5) +const sunCol = vec3<f32>(0.5, 0.163, 0.025); // HSV(0.07, 0.95, 0.5) +const diffCol1 = vec3<f32>(0.4, 1.0, 1.0); // HSV(0.60, 0.90, 1.0) +const diffCol2 = vec3<f32>(0.325, 1.0, 0.975); // HSV(0.55, 0.90, 1.0) + +// Lighting (normalized manually) +const sunDir1 = vec3<f32>(0.0, 0.04997, -0.99875); // normalize(0, 0.05, -1) +const lightPos1 = vec3<f32>(10.0, 10.0, 10.0); +const lightPos2 = vec3<f32>(-10.0, 10.0, -10.0); + +fn sRGB(t: vec3<f32>) -> vec3<f32> { + return mix(1.055 * pow(t, vec3<f32>(1.0/2.4)) - 0.055, 12.92 * t, step(t, vec3<f32>(0.0031308))); +} + +fn aces_approx(v_in: vec3<f32>) -> vec3<f32> { + var v = max(v_in, vec3<f32>(0.0)); + v *= 0.6; + let a = 2.51; + let b = 0.03; + let c = 2.43; + let d = 0.59; + let e = 0.14; + return clamp((v * (a * v + b)) / (v * (c * v + d) + e), vec3<f32>(0.0), vec3<f32>(1.0)); +} + +fn tanh_approx(x: f32) -> f32 { + let x2 = x * x; + return clamp(x * (27.0 + x2) / (27.0 + 9.0 * x2), -1.0, 1.0); +} + +fn rayPlane(ro: vec3<f32>, rd: vec3<f32>, plane: vec4<f32>) -> f32 { + return -(dot(ro, plane.xyz) + plane.w) / dot(rd, plane.xyz); +} + +fn box2d(p: vec2<f32>, b: vec2<f32>) -> f32 { + let d = abs(p) - b; + return length(max(d, vec2<f32>(0.0))) + min(max(d.x, d.y), 0.0); +} + +fn box3d(p: vec3<f32>, b: vec3<f32>) -> f32 { + let q = abs(p) - b; + return length(max(q, vec3<f32>(0.0))) + min(max(q.x, max(q.y, q.z)), 0.0); +} + +fn sphere(p: vec3<f32>, r: f32) -> f32 { + return length(p) - r; +} + +var<private> g_rot0: mat2x2<f32>; + +fn render0(ro: vec3<f32>, rd: vec3<f32>) -> vec3<f32> { + var col = vec3<f32>(0.0); + var sf = 1.0001 - max(dot(sunDir1, rd), 0.0); + col += skyCol * pow((1.0 - abs(rd.y)), 8.0); + col += clamp(vec3<f32>(mix(0.0025, 0.125, tanh_approx(0.005 / sf)) / abs(rd.y)) * skylineCol, vec3<f32>(0.0), vec3<f32>(10.0)); + sf *= sf; + col += sunCol * 0.00005 / sf; + + let tp1 = rayPlane(ro, rd, vec4<f32>(0.0, -1.0, 0.0, 6.0)); + if (tp1 > 0.0) { + let pos = ro + tp1 * rd; + let pp = pos.xz; + let db = box2d(pp, vec2<f32>(5.0, 9.0)) - 3.0; + col += vec3<f32>(4.0) * skyCol * rd.y * rd.y * smoothstep(0.25, 0.0, db); + col += vec3<f32>(0.8) * skyCol * exp(-0.5 * max(db, 0.0)); + } + + return clamp(col, vec3<f32>(0.0), vec3<f32>(10.0)); +} + +fn df(p_in: vec3<f32>) -> f32 { + var p = p_in; + p.x = p_in.x * g_rot0[0][0] + p_in.z * g_rot0[0][1]; + p.z = p_in.x * g_rot0[1][0] + p_in.z * g_rot0[1][1]; + + // Cube + var pc = p; + pc -= vec3<f32>(-1.9, 0.0, 0.0); + let dCube = box3d(pc, vec3<f32>(1.6)); + + // Sphere + var ps = p; + ps -= vec3<f32>(1.3, 0.0, 0.0); + let dSphere = sphere(ps, 1.2); + + // Ground plane + let dPlane = p.y + 1.0; + + // Union + var d = min(dCube, dSphere); + d = min(d, dPlane); + + return d; +} + +fn normal(pos: vec3<f32>) -> vec3<f32> { + let eps = vec2<f32>(NORM_OFF, 0.0); + var nor: vec3<f32>; + nor.x = df(pos + eps.xyy) - df(pos - eps.xyy); + nor.y = df(pos + eps.yxy) - df(pos - eps.yxy); + nor.z = df(pos + eps.yyx) - df(pos - eps.yyx); + return normalize(nor); +} + +fn rayMarch(ro: vec3<f32>, rd: vec3<f32>, initt: f32) -> f32 { + var t = initt; + for (var i = 0; i < MAX_RAY_MARCHES; i++) { + if (t > MAX_RAY_LENGTH) { + t = MAX_RAY_LENGTH; + break; + } + let d = df(ro + rd * t); + if (d < TOLERANCE) { + break; + } + t += d; + } + return t; +} + +fn shadow(lp: vec3<f32>, ld: vec3<f32>, mint: f32, maxt: f32) -> f32 { + let ds = 1.0 - 0.4; + var t = mint; + var nd = 1e6; + let soff = 0.05; + let smul = 1.5; + for (var i = 0; i < MAX_SHD_MARCHES; i++) { + let p = lp + ld * t; + let d = df(p); + if (d < TOLERANCE || t >= maxt) { + let sd = 1.0 - exp(-smul * max(t / maxt - soff, 0.0)); + return select(mix(sd, 1.0, smoothstep(0.0, 0.025, nd)), sd, t >= maxt); + } + nd = min(nd, d); + t += ds * d; + } + let sd = 1.0 - exp(-smul * max(t / maxt - soff, 0.0)); + return sd; +} + +fn boxCol(col: vec3<f32>, nsp: vec3<f32>, rd: vec3<f32>, nnor: vec3<f32>, nrcol: vec3<f32>, nshd1: f32, nshd2: f32) -> vec3<f32> { + var nfre = 1.0 + dot(rd, nnor); + nfre *= nfre; + + let nld1 = normalize(lightPos1 - nsp); + let nld2 = normalize(lightPos2 - nsp); + + var ndif1 = max(dot(nld1, nnor), 0.0); + ndif1 *= ndif1; + + var ndif2 = max(dot(nld2, nnor), 0.0); + ndif2 *= ndif2; + + var scol = vec3<f32>(0.0); + let rf = smoothstep(1.0, 0.9, nfre); + scol += diffCol1 * ndif1 * nshd1; + scol += diffCol2 * ndif2 * nshd2; + scol += 0.1 * (skyCol + skylineCol); + scol += nrcol * 0.75 * mix(vec3<f32>(0.25), vec3<f32>(0.5, 0.5, 1.0), nfre); + + return mix(col, scol, rf * smoothstep(90.0, 20.0, dot(nsp, nsp))); +} + +fn render1(ro: vec3<f32>, rd: vec3<f32>) -> vec3<f32> { + let skyCol_local = render0(ro, rd); + var col = skyCol_local; + + let nt = rayMarch(ro, rd, 0.0); + if (nt < MAX_RAY_LENGTH) { + let nsp = ro + rd * nt; + let nnor = normal(nsp); + + let nref = reflect(rd, nnor); + let nrt = rayMarch(nsp, nref, 0.2); + var nrcol = render0(nsp, nref); + + if (nrt < MAX_RAY_LENGTH) { + let nrsp = nsp + nref * nrt; + let nrnor = normal(nrsp); + let nrref = reflect(nref, nrnor); + nrcol = boxCol(nrcol, nrsp, nref, nrnor, render0(nrsp, nrref), 1.0, 1.0); + } + + let nshd1 = mix(0.0, 1.0, shadow(nsp, normalize(lightPos1 - nsp), 0.1, distance(lightPos1, nsp))); + let nshd2 = mix(0.0, 1.0, shadow(nsp, normalize(lightPos2 - nsp), 0.1, distance(lightPos2, nsp))); + + col = boxCol(col, nsp, rd, nnor, nrcol, nshd1, nshd2); + } + + return col; +} + +fn effect(p: vec2<f32>) -> vec3<f32> { + g_rot0 = rot(-0.2 * uniforms.time); + + let fov = tan(TAU / 6.0); + let ro = vec3<f32>(0.0, 2.5, 5.0); + let la = vec3<f32>(0.0, 0.0, 0.0); + let up = vec3<f32>(0.1, 1.0, 0.0); + + let ww = normalize(la - ro); + let uu = normalize(cross(up, ww)); + let vv = cross(ww, uu); + let rd = normalize(-p.x * uu + p.y * vv + fov * ww); + + return render1(ro, rd); +} + +@vertex fn vs_main(@builtin(vertex_index) i: u32) -> @builtin(position) vec4<f32> { + var pos = array<vec2<f32>, 3>( + vec2<f32>(-1.0, -1.0), + vec2<f32>(3.0, -1.0), + vec2<f32>(-1.0, 3.0) + ); + return vec4<f32>(pos[i], 0.0, 1.0); +} + +@fragment fn fs_main(@builtin(position) p: vec4<f32>) -> @location(0) vec4<f32> { + // Flip Y to match ShaderToy convention (origin at bottom-left) + let flipped = vec2<f32>(p.x, uniforms.resolution.y - p.y); + let q = flipped / uniforms.resolution; + var coord = -1.0 + 2.0 * q; + coord.x *= uniforms.resolution.x / uniforms.resolution.y; + var col = effect(coord); + col = aces_approx(col); + col = sRGB(col); + return vec4<f32>(col, 1.0); +} diff --git a/workspaces/main/timeline.seq b/workspaces/main/timeline.seq index 8f7eea6..42d81a0 100644 --- a/workspaces/main/timeline.seq +++ b/workspaces/main/timeline.seq @@ -36,8 +36,9 @@ SEQUENCE 8.50 2 "Hybrid3D" SEQUENCE 10.50 0 "CNN effect" EFFECT + HeptagonEffect 0.0 12.00 # EFFECT + RotatingCubeEffect 0.00 12.0 - EFFECT + Hybrid3DEffect 0.00 12.00 - EFFECT + CNNEffect 1.0 12.0 layers=3 blend=1.5 +# EFFECT + Hybrid3DEffect 0.00 12.00 + EFFECT + Scene1Effect 0.0 12.0 + EFFECT + CNNEffect 1.0 12.0 layers=3 blend=.5 SEQUENCE 22.0 0 "buggy" EFFECT + HeptagonEffect 0.00 0.20 |
