diff options
Diffstat (limited to 'src/gpu')
| -rw-r--r-- | src/gpu/texture_readback.cc | 158 | ||||
| -rw-r--r-- | src/gpu/texture_readback.h | 10 |
2 files changed, 168 insertions, 0 deletions
diff --git a/src/gpu/texture_readback.cc b/src/gpu/texture_readback.cc index 0eb63d7..f3e4056 100644 --- a/src/gpu/texture_readback.cc +++ b/src/gpu/texture_readback.cc @@ -142,4 +142,162 @@ std::vector<uint8_t> read_texture_pixels( return pixels; } +// Half-float (FP16) to float conversion +static float fp16_to_float(uint16_t h) { + uint32_t sign = (h & 0x8000) << 16; + uint32_t exp = (h & 0x7C00) >> 10; + uint32_t mant = (h & 0x03FF); + + if (exp == 0) { + if (mant == 0) { + // Zero + uint32_t bits = sign; + float result; + memcpy(&result, &bits, sizeof(float)); + return result; + } + // Denormalized + exp = 1; + while ((mant & 0x400) == 0) { + mant <<= 1; + exp--; + } + mant &= 0x3FF; + } else if (exp == 31) { + // Inf or NaN + uint32_t bits = sign | 0x7F800000 | (mant << 13); + float result; + memcpy(&result, &bits, sizeof(float)); + return result; + } + + uint32_t bits = sign | ((exp + 112) << 23) | (mant << 13); + float result; + memcpy(&result, &bits, sizeof(float)); + return result; +} + +std::vector<uint8_t> texture_readback_fp16_to_u8( + WGPUDevice device, + WGPUQueue queue, + WGPUTexture texture, + int width, + int height) { + + // Align bytes per row to 256 + const uint32_t bytes_per_pixel = 8; // RGBA16Float = 4 × 2 bytes + const uint32_t unaligned_bytes_per_row = width * bytes_per_pixel; + const uint32_t aligned_bytes_per_row = + ((unaligned_bytes_per_row + 255) / 256) * 256; + + const size_t buffer_size = aligned_bytes_per_row * height; + + // Create staging buffer + const WGPUBufferDescriptor buffer_desc = { + .usage = WGPUBufferUsage_CopyDst | WGPUBufferUsage_MapRead, + .size = buffer_size, + }; + WGPUBuffer staging = wgpuDeviceCreateBuffer(device, &buffer_desc); + if (!staging) { + return {}; + } + + // Copy texture to buffer + WGPUCommandEncoder encoder = wgpuDeviceCreateCommandEncoder(device, nullptr); + const WGPUTexelCopyTextureInfo src = { + .texture = texture, + .mipLevel = 0, + .origin = {0, 0, 0}, + }; + const WGPUTexelCopyBufferInfo dst = { + .buffer = staging, + .layout = + { + .bytesPerRow = aligned_bytes_per_row, + .rowsPerImage = static_cast<uint32_t>(height), + }, + }; + const WGPUExtent3D copy_size = {static_cast<uint32_t>(width), + static_cast<uint32_t>(height), 1}; + wgpuCommandEncoderCopyTextureToBuffer(encoder, &src, &dst, ©_size); + + WGPUCommandBuffer commands = wgpuCommandEncoderFinish(encoder, nullptr); + wgpuQueueSubmit(queue, 1, &commands); + wgpuCommandBufferRelease(commands); + wgpuCommandEncoderRelease(encoder); + wgpuDevicePoll(device, true, nullptr); + + // Map buffer +#if defined(DEMO_CROSS_COMPILE_WIN32) + MapState map_state = {}; + auto map_cb = [](WGPUBufferMapAsyncStatus status, void* userdata) { + MapState* state = static_cast<MapState*>(userdata); + state->status = status; + state->done = true; + }; + wgpuBufferMapAsync(staging, WGPUMapMode_Read, 0, buffer_size, map_cb, + &map_state); +#else + MapState map_state = {}; + auto map_cb = [](WGPUMapAsyncStatus status, WGPUStringView message, + void* userdata, void* user2) { + (void)message; + (void)user2; + MapState* state = static_cast<MapState*>(userdata); + state->status = status; + state->done = true; + }; + WGPUBufferMapCallbackInfo map_info = {}; + map_info.mode = WGPUCallbackMode_AllowProcessEvents; + map_info.callback = map_cb; + map_info.userdata1 = &map_state; + wgpuBufferMapAsync(staging, WGPUMapMode_Read, 0, buffer_size, map_info); +#endif + + for (int i = 0; i < 100 && !map_state.done; ++i) { + wgpuDevicePoll(device, true, nullptr); + } + + if (!map_state.done || map_state.status != WGPUMapAsyncStatus_Success) { + wgpuBufferRelease(staging); + return {}; + } + + // Convert FP16 to U8 ([-1,1] → [0,255]) + const uint16_t* mapped_data = static_cast<const uint16_t*>( + wgpuBufferGetConstMappedRange(staging, 0, buffer_size)); + + std::vector<uint8_t> pixels(width * height * 4); + if (mapped_data) { + for (int y = 0; y < height; ++y) { + const uint16_t* src_row = + reinterpret_cast<const uint16_t*>( + reinterpret_cast<const uint8_t*>(mapped_data) + + y * aligned_bytes_per_row); + for (int x = 0; x < width; ++x) { + float r = fp16_to_float(src_row[x * 4 + 0]); + float g = fp16_to_float(src_row[x * 4 + 1]); + float b = fp16_to_float(src_row[x * 4 + 2]); + float a = fp16_to_float(src_row[x * 4 + 3]); + + // Convert [-1,1] → [0,1] → [0,255] + r = (r + 1.0f) * 0.5f; + g = (g + 1.0f) * 0.5f; + b = (b + 1.0f) * 0.5f; + a = (a + 1.0f) * 0.5f; + + int idx = (y * width + x) * 4; + pixels[idx + 0] = static_cast<uint8_t>(b * 255.0f); // B + pixels[idx + 1] = static_cast<uint8_t>(g * 255.0f); // G + pixels[idx + 2] = static_cast<uint8_t>(r * 255.0f); // R + pixels[idx + 3] = static_cast<uint8_t>(a * 255.0f); // A + } + } + } + + wgpuBufferUnmap(staging); + wgpuBufferRelease(staging); + return pixels; +} + #endif // !defined(STRIP_ALL) diff --git a/src/gpu/texture_readback.h b/src/gpu/texture_readback.h index 1bf770f..8230e13 100644 --- a/src/gpu/texture_readback.h +++ b/src/gpu/texture_readback.h @@ -20,4 +20,14 @@ std::vector<uint8_t> read_texture_pixels( int width, int height); +// Read RGBA16Float texture and convert to BGRA8Unorm for saving +// Converts [-1,1] float range to [0,255] uint8 range +// Returns: width * height * 4 bytes (BGRA8) +std::vector<uint8_t> texture_readback_fp16_to_u8( + WGPUDevice device, + WGPUQueue queue, + WGPUTexture texture, + int width, + int height); + #endif // !defined(STRIP_ALL) |
