Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ By @cwfitzgerald in [#8162](https://github.com/gfx-rs/wgpu/pull/8162).
- Added mesh shader support to `wgpu`, with examples. Requires passthrough. By @SupaMaggie70Incorporated in [#7345](https://github.com/gfx-rs/wgpu/pull/7345).

- Added support for external textures based on WebGPU's [`GPUExternalTexture`](https://www.w3.org/TR/webgpu/#gpuexternaltexture). These allow shaders to transparently operate on potentially multiplanar source texture data in either RGB or YCbCr formats via WGSL's `texture_external` type. This is gated behind the `Features::EXTERNAL_TEXTURE` feature, which is currently only supported on DX12. By @jamienicol in [#4386](https://github.com/gfx-rs/wgpu/issues/4386).
- `wgpu::Device::poll` can now specify a timeout via `wgpu::PollType::WaitWithTimeout`/`wgpu::PollType::WaitForSubmissionIndexWithTimeout`. By @wumpf in [#8282](https://github.com/gfx-rs/wgpu/pull/8282)

#### naga

Expand Down Expand Up @@ -194,6 +195,7 @@ By @cwfitzgerald in [#8162](https://github.com/gfx-rs/wgpu/pull/8162).
- Require new `F16_IN_F32` downlevel flag for `quantizeToF16`, `pack2x16float`, and `unpack2x16float` in WGSL input. By @aleiserson in [#8130](https://github.com/gfx-rs/wgpu/pull/8130).
- The error message for non-copyable depth/stencil formats no longer mentions the aspect when it is not relevant. By @reima in [#8156](https://github.com/gfx-rs/wgpu/pull/8156).
- Track the initialization status of buffer memory correctly when `copy_texture_to_buffer` skips over padding space between rows or layers, or when the start/end of a texture-buffer transfer is not 4B aligned. By @andyleiserson in [#8099](https://github.com/gfx-rs/wgpu/pull/8099).
- `wgpu::PollType::Wait`/`wgpu::PollType::WaitForSubmissionIndex` will no longer timeout after 60 seconds, but instead wait indefinitely or (depending on backend implementation) until an error is encountered. Use `wgpu::PollType::WaitWithTimeout`/`wgpu::PollType::WaitForSubmissionIndexWithTimeout` if you need a timeout. By @wumpf in [#8282](https://github.com/gfx-rs/wgpu/pull/8282)

#### naga

Expand Down
31 changes: 30 additions & 1 deletion tests/tests/wgpu-gpu/poll.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::num::NonZeroU64;
use std::{num::NonZeroU64, time::Duration};

use wgpu::{
BindGroupDescriptor, BindGroupEntry, BindGroupLayoutDescriptor, BindGroupLayoutEntry,
Expand All @@ -13,8 +13,10 @@ use wgpu_test::{
pub fn all_tests(vec: &mut Vec<GpuTestInitializer>) {
vec.extend([
WAIT,
WAIT_WITH_TIMEOUT,
DOUBLE_WAIT,
WAIT_ON_SUBMISSION,
WAIT_ON_SUBMISSION_WITH_TIMEOUT,
DOUBLE_WAIT_ON_SUBMISSION,
WAIT_OUT_OF_ORDER,
WAIT_AFTER_BAD_SUBMISSION,
Expand Down Expand Up @@ -75,6 +77,18 @@ static WAIT: GpuTestConfiguration = GpuTestConfiguration::new()
ctx.async_poll(PollType::wait()).await.unwrap();
});

#[gpu_test]
static WAIT_WITH_TIMEOUT: GpuTestConfiguration = GpuTestConfiguration::new()
.parameters(TestParameters::default().enable_noop())
.run_async(|ctx| async move {
let cmd_buf = generate_dummy_work(&ctx);

ctx.queue.submit(Some(cmd_buf));
ctx.async_poll(PollType::WaitWithTimeout(Duration::from_secs(1)))
.await
.unwrap();
});

#[gpu_test]
static DOUBLE_WAIT: GpuTestConfiguration = GpuTestConfiguration::new()
.parameters(TestParameters::default().enable_noop())
Expand All @@ -96,6 +110,21 @@ static WAIT_ON_SUBMISSION: GpuTestConfiguration = GpuTestConfiguration::new()
ctx.async_poll(PollType::wait_for(index)).await.unwrap();
});

#[gpu_test]
static WAIT_ON_SUBMISSION_WITH_TIMEOUT: GpuTestConfiguration = GpuTestConfiguration::new()
.parameters(TestParameters::default().enable_noop())
.run_async(|ctx| async move {
let cmd_buf = generate_dummy_work(&ctx);

let index = ctx.queue.submit(Some(cmd_buf));
ctx.async_poll(PollType::WaitForSubmissionIndexWithTimeout {
submission_index: index,
timeout: Duration::from_secs(1),
})
.await
.unwrap();
});

#[gpu_test]
static DOUBLE_WAIT_ON_SUBMISSION: GpuTestConfiguration = GpuTestConfiguration::new()
.parameters(TestParameters::default().enable_noop())
Expand Down
4 changes: 0 additions & 4 deletions wgpu-core/src/device/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,6 @@ pub const SHADER_STAGE_COUNT: usize = hal::MAX_CONCURRENT_SHADER_STAGES;
// value is enough for a 16k texture with float4 format.
pub(crate) const ZERO_BUFFER_SIZE: BufferAddress = 512 << 10;

// If a submission is not completed within this time, we go off into UB land.
// See https://github.com/gfx-rs/wgpu/issues/4589. 60s to reduce the chances of this.
const CLEANUP_WAIT_MS: u32 = 60000;

pub(crate) const ENTRYPOINT_FAILURE_ERROR: &str = "The given EntryPoint is Invalid";

pub type DeviceDescriptor<'a> = wgt::DeviceDescriptor<Label<'a>>;
Expand Down
4 changes: 2 additions & 2 deletions wgpu-core/src/device/queue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -183,9 +183,9 @@ impl Drop for Queue {
fence.as_ref(),
last_successful_submission_index,
#[cfg(not(target_arch = "wasm32"))]
timeout_ms,
Some(core::time::Duration::from_millis(timeout_ms)),
#[cfg(target_arch = "wasm32")]
0, // WebKit and Chromium don't support a non-0 timeout
Some(core::time::Duration::ZERO), // WebKit and Chromium don't support a non-0 timeout
)
};
// Note: If we don't panic below we are in UB land (destroying resources while they are still in use by the GPU).
Expand Down
12 changes: 7 additions & 5 deletions wgpu-core/src/device/resource.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ use crate::{
device::{
bgl, create_validator, life::WaitIdleError, map_buffer, AttachmentData,
DeviceLostInvocation, HostMap, MissingDownlevelFlags, MissingFeatures, RenderPassContext,
CLEANUP_WAIT_MS,
},
hal_label,
init_tracker::{
Expand Down Expand Up @@ -712,7 +711,10 @@ impl Device {

// If a wait was requested, determine which submission index to wait for.
let wait_submission_index = match poll_type {
wgt::PollType::WaitForSubmissionIndex(submission_index) => {
wgt::PollType::WaitForSubmissionIndex(submission_index)
| wgt::PollType::WaitForSubmissionIndexWithTimeout {
submission_index, ..
} => {
let last_successful_submission_index = self
.last_successful_submission_index
.load(Ordering::Acquire);
Expand All @@ -728,7 +730,7 @@ impl Device {

Some(submission_index)
}
wgt::PollType::Wait => Some(
wgt::PollType::Wait | wgt::PollType::WaitWithTimeout { .. } => Some(
self.last_successful_submission_index
.load(Ordering::Acquire),
),
Expand All @@ -741,7 +743,7 @@ impl Device {

let wait_result = unsafe {
self.raw()
.wait(fence.as_ref(), target_submission_index, CLEANUP_WAIT_MS)
.wait(fence.as_ref(), target_submission_index, poll_type.timeout())
};

// This error match is only about `DeviceErrors`. At this stage we do not care if
Expand Down Expand Up @@ -4499,7 +4501,7 @@ impl Device {
let last_done_index = unsafe { self.raw().get_fence_value(fence.as_ref()) }
.map_err(|e| self.handle_hal_error(e))?;
if last_done_index < submission_index {
unsafe { self.raw().wait(fence.as_ref(), submission_index, !0) }
unsafe { self.raw().wait(fence.as_ref(), submission_index, None) }
.map_err(|e| self.handle_hal_error(e))?;
drop(fence);
if let Some(queue) = self.get_queue() {
Expand Down
4 changes: 2 additions & 2 deletions wgpu-hal/examples/halmark/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ struct ExecutionContext<A: hal::Api> {

impl<A: hal::Api> ExecutionContext<A> {
unsafe fn wait_and_clear(&mut self, device: &A::Device) {
device.wait(&self.fence, self.fence_value, !0).unwrap();
device.wait(&self.fence, self.fence_value, None).unwrap();
self.encoder.reset_all(self.used_cmd_bufs.drain(..));
for view in self.used_views.drain(..) {
device.destroy_texture_view(view);
Expand Down Expand Up @@ -519,7 +519,7 @@ impl<A: hal::Api> Example<A> {
queue
.submit(&[&init_cmd], &[], (&mut fence, init_fence_value))
.unwrap();
device.wait(&fence, init_fence_value, !0).unwrap();
device.wait(&fence, init_fence_value, None).unwrap();
device.destroy_buffer(staging_buffer);
cmd_encoder.reset_all(iter::once(init_cmd));
fence
Expand Down
4 changes: 2 additions & 2 deletions wgpu-hal/examples/ray-traced-triangle/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ struct ExecutionContext<A: hal::Api> {

impl<A: hal::Api> ExecutionContext<A> {
unsafe fn wait_and_clear(&mut self, device: &A::Device) {
device.wait(&self.fence, self.fence_value, !0).unwrap();
device.wait(&self.fence, self.fence_value, None).unwrap();
self.encoder.reset_all(self.used_cmd_bufs.drain(..));
for view in self.used_views.drain(..) {
device.destroy_texture_view(view);
Expand Down Expand Up @@ -816,7 +816,7 @@ impl<A: hal::Api> Example<A> {
queue
.submit(&[&init_cmd], &[], (&mut fence, init_fence_value))
.unwrap();
device.wait(&fence, init_fence_value, !0).unwrap();
device.wait(&fence, init_fence_value, None).unwrap();
cmd_encoder.reset_all(iter::once(init_cmd));
fence
};
Expand Down
8 changes: 4 additions & 4 deletions wgpu-hal/src/dx12/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2237,9 +2237,9 @@ impl crate::Device for super::Device {
&self,
fence: &super::Fence,
value: crate::FenceValue,
timeout_ms: u32,
timeout: Option<Duration>,
) -> Result<bool, crate::DeviceError> {
let timeout_duration = Duration::from_millis(timeout_ms as u64);
let timeout = timeout.unwrap_or(Duration::MAX);
Copy link
Contributor

@emilk emilk Oct 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If None means the same as Duration::MAX, why pass duration by Option at all?

From just the function declaration it is far from obvious that None means MAX. It could just as well mean "Use the default timeout"

Copy link
Member Author

@Wumpf Wumpf Oct 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

none of the implementations do this right now, but often there's simpler fast/path for no timeout. I think for the low-level hal api this is fine like that, but I am missing docstring on the trait this implements


// We first check if the fence has already reached the value we're waiting for.
let mut fence_value = unsafe { fence.raw.GetCompletedValue() };
Expand Down Expand Up @@ -2273,7 +2273,7 @@ impl crate::Device for super::Device {
//
// This happens when a previous iteration WaitForSingleObject succeeded with a previous fence value,
// right before the timeout would have been hit.
let remaining_wait_duration = match timeout_duration.checked_sub(elapsed) {
let remaining_wait_duration = match timeout.checked_sub(elapsed) {
Some(remaining) => remaining,
None => {
log::trace!("Timeout elapsed in between waits!");
Expand All @@ -2286,7 +2286,7 @@ impl crate::Device for super::Device {
match unsafe {
Threading::WaitForSingleObject(
event.0,
remaining_wait_duration.as_millis().try_into().unwrap(),
remaining_wait_duration.as_millis().min(u32::MAX as u128) as u32,
)
} {
Foundation::WAIT_OBJECT_0 => {}
Expand Down
6 changes: 3 additions & 3 deletions wgpu-hal/src/dynamic/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ pub trait DynDevice: DynResource {
&self,
fence: &dyn DynFence,
value: FenceValue,
timeout_ms: u32,
timeout: Option<core::time::Duration>,
) -> Result<bool, DeviceError>;

unsafe fn start_graphics_debugger_capture(&self) -> bool;
Expand Down Expand Up @@ -486,10 +486,10 @@ impl<D: Device + DynResource> DynDevice for D {
&self,
fence: &dyn DynFence,
value: FenceValue,
timeout_ms: u32,
timeout: Option<core::time::Duration>,
) -> Result<bool, DeviceError> {
let fence = fence.expect_downcast_ref();
unsafe { D::wait(self, fence, value, timeout_ms) }
unsafe { D::wait(self, fence, value, timeout) }
}

unsafe fn start_graphics_debugger_capture(&self) -> bool {
Expand Down
6 changes: 4 additions & 2 deletions wgpu-hal/src/gles/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1564,7 +1564,7 @@ impl crate::Device for super::Device {
&self,
fence: &super::Fence,
wait_value: crate::FenceValue,
timeout_ms: u32,
timeout: Option<core::time::Duration>,
) -> Result<bool, crate::DeviceError> {
if fence.satisfied(wait_value) {
return Ok(true);
Expand All @@ -1578,7 +1578,9 @@ impl crate::Device for super::Device {
let timeout_ns = if cfg!(any(webgl, Emscripten)) {
0
} else {
(timeout_ms as u64 * 1_000_000).min(!0u32 as u64)
timeout
.map(|t| t.as_nanos().min(u32::MAX as u128) as u32)
.unwrap_or(u32::MAX)
};
fence.wait(gl, wait_value, timeout_ns)
}
Expand Down
4 changes: 2 additions & 2 deletions wgpu-hal/src/gles/fence.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ impl Fence {
&self,
gl: &glow::Context,
wait_value: crate::FenceValue,
timeout_ns: u64,
timeout_ns: u32,
) -> Result<bool, crate::DeviceError> {
let last_completed = self.last_completed.load(Ordering::Acquire);

Expand Down Expand Up @@ -134,7 +134,7 @@ impl Fence {
gl.client_wait_sync(
gl_fence.sync,
glow::SYNC_FLUSH_COMMANDS_BIT,
timeout_ns as i32,
timeout_ns.min(i32::MAX as u32) as i32,
)
};

Expand Down
5 changes: 4 additions & 1 deletion wgpu-hal/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -981,6 +981,9 @@ pub trait Device: WasmNotSendSync {
/// Calling `wait` with a lower [`FenceValue`] than `fence`'s current value
/// returns immediately.
///
/// If `timeout` is provided, the function will block indefinitely or until
/// an error is encountered.
///
/// Returns `Ok(true)` on success and `Ok(false)` on timeout.
///
/// [`Fence`]: Api::Fence
Expand All @@ -989,7 +992,7 @@ pub trait Device: WasmNotSendSync {
&self,
fence: &<Self::A as Api>::Fence,
value: FenceValue,
timeout_ms: u32,
timeout: Option<core::time::Duration>,
) -> Result<bool, DeviceError>;

/// Start a graphics debugger capture.
Expand Down
8 changes: 5 additions & 3 deletions wgpu-hal/src/metal/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1580,7 +1580,7 @@ impl crate::Device for super::Device {
&self,
fence: &super::Fence,
wait_value: crate::FenceValue,
timeout_ms: u32,
timeout: Option<core::time::Duration>,
) -> DeviceResult<bool> {
if wait_value <= fence.completed_value.load(atomic::Ordering::Acquire) {
return Ok(true);
Expand All @@ -1603,8 +1603,10 @@ impl crate::Device for super::Device {
if let MTLCommandBufferStatus::Completed = cmd_buf.status() {
return Ok(true);
}
if start.elapsed().as_millis() >= timeout_ms as u128 {
return Ok(false);
if let Some(timeout) = timeout {
if start.elapsed() >= timeout {
return Ok(false);
}
}
thread::sleep(core::time::Duration::from_millis(1));
}
Expand Down
2 changes: 1 addition & 1 deletion wgpu-hal/src/noop/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,7 @@ impl crate::Device for Context {
&self,
fence: &Fence,
value: crate::FenceValue,
timeout_ms: u32,
timeout: Option<Duration>,
) -> DeviceResult<bool> {
// The relevant commands must have already been submitted, and noop-backend commands are
// executed synchronously, so there is no waiting — either it is already done,
Expand Down
8 changes: 6 additions & 2 deletions wgpu-hal/src/vulkan/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use core::{
mem::{self, MaybeUninit},
num::NonZeroU32,
ptr,
time::Duration,
};

use arrayvec::ArrayVec;
Expand Down Expand Up @@ -2443,9 +2444,12 @@ impl crate::Device for super::Device {
&self,
fence: &super::Fence,
wait_value: crate::FenceValue,
timeout_ms: u32,
timeout: Option<Duration>,
) -> Result<bool, crate::DeviceError> {
let timeout_ns = timeout_ms as u64 * super::MILLIS_TO_NANOS;
let timeout_ns = timeout
.unwrap_or(Duration::MAX)
.as_nanos()
.min(u64::MAX as _) as u64;
self.shared.wait_for_fence(fence, wait_value, timeout_ns)
}

Expand Down
1 change: 0 additions & 1 deletion wgpu-hal/src/vulkan/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ use wgt::InternalCounter;

use semaphore_list::SemaphoreList;

const MILLIS_TO_NANOS: u64 = 1_000_000;
const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1;

#[derive(Clone, Debug)]
Expand Down
Loading