diff --git a/CHANGELOG.md b/CHANGELOG.md index efd9a2c5e02..54db2d4b59b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -165,6 +165,7 @@ By @cwfitzgerald in [#8162](https://github.com/gfx-rs/wgpu/pull/8162). - Added mesh shader support to `wgpu`, with examples. Requires passthrough. By @SupaMaggie70Incorporated in [#7345](https://github.com/gfx-rs/wgpu/pull/7345). - Added support for external textures based on WebGPU's [`GPUExternalTexture`](https://www.w3.org/TR/webgpu/#gpuexternaltexture). These allow shaders to transparently operate on potentially multiplanar source texture data in either RGB or YCbCr formats via WGSL's `texture_external` type. This is gated behind the `Features::EXTERNAL_TEXTURE` feature, which is currently only supported on DX12. By @jamienicol in [#4386](https://github.com/gfx-rs/wgpu/issues/4386). +- `wgpu::Device::poll` can now specify a timeout via `wgpu::PollType::WaitWithTimeout`/`wgpu::PollType::WaitForSubmissionIndexWithTimeout`. By @wumpf in [#8282](https://github.com/gfx-rs/wgpu/pull/8282) #### naga @@ -194,6 +195,7 @@ By @cwfitzgerald in [#8162](https://github.com/gfx-rs/wgpu/pull/8162). - Require new `F16_IN_F32` downlevel flag for `quantizeToF16`, `pack2x16float`, and `unpack2x16float` in WGSL input. By @aleiserson in [#8130](https://github.com/gfx-rs/wgpu/pull/8130). - The error message for non-copyable depth/stencil formats no longer mentions the aspect when it is not relevant. By @reima in [#8156](https://github.com/gfx-rs/wgpu/pull/8156). - Track the initialization status of buffer memory correctly when `copy_texture_to_buffer` skips over padding space between rows or layers, or when the start/end of a texture-buffer transfer is not 4B aligned. By @andyleiserson in [#8099](https://github.com/gfx-rs/wgpu/pull/8099). +- `wgpu::PollType::Wait`/`wgpu::PollType::WaitForSubmissionIndex` will no longer timeout after 60 seconds, but instead wait indefinitely or (depending on backend implementation) until an error is encountered. Use `wgpu::PollType::WaitWithTimeout`/`wgpu::PollType::WaitForSubmissionIndexWithTimeout` if you need a timeout. By @wumpf in [#8282](https://github.com/gfx-rs/wgpu/pull/8282) #### naga diff --git a/tests/tests/wgpu-gpu/poll.rs b/tests/tests/wgpu-gpu/poll.rs index 70d3b393c7e..6f29313fdcc 100644 --- a/tests/tests/wgpu-gpu/poll.rs +++ b/tests/tests/wgpu-gpu/poll.rs @@ -1,4 +1,4 @@ -use std::num::NonZeroU64; +use std::{num::NonZeroU64, time::Duration}; use wgpu::{ BindGroupDescriptor, BindGroupEntry, BindGroupLayoutDescriptor, BindGroupLayoutEntry, @@ -13,8 +13,10 @@ use wgpu_test::{ pub fn all_tests(vec: &mut Vec) { vec.extend([ WAIT, + WAIT_WITH_TIMEOUT, DOUBLE_WAIT, WAIT_ON_SUBMISSION, + WAIT_ON_SUBMISSION_WITH_TIMEOUT, DOUBLE_WAIT_ON_SUBMISSION, WAIT_OUT_OF_ORDER, WAIT_AFTER_BAD_SUBMISSION, @@ -75,6 +77,18 @@ static WAIT: GpuTestConfiguration = GpuTestConfiguration::new() ctx.async_poll(PollType::wait()).await.unwrap(); }); +#[gpu_test] +static WAIT_WITH_TIMEOUT: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters(TestParameters::default().enable_noop()) + .run_async(|ctx| async move { + let cmd_buf = generate_dummy_work(&ctx); + + ctx.queue.submit(Some(cmd_buf)); + ctx.async_poll(PollType::WaitWithTimeout(Duration::from_secs(1))) + .await + .unwrap(); + }); + #[gpu_test] static DOUBLE_WAIT: GpuTestConfiguration = GpuTestConfiguration::new() .parameters(TestParameters::default().enable_noop()) @@ -96,6 +110,21 @@ static WAIT_ON_SUBMISSION: GpuTestConfiguration = GpuTestConfiguration::new() ctx.async_poll(PollType::wait_for(index)).await.unwrap(); }); +#[gpu_test] +static WAIT_ON_SUBMISSION_WITH_TIMEOUT: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters(TestParameters::default().enable_noop()) + .run_async(|ctx| async move { + let cmd_buf = generate_dummy_work(&ctx); + + let index = ctx.queue.submit(Some(cmd_buf)); + ctx.async_poll(PollType::WaitForSubmissionIndexWithTimeout { + submission_index: index, + timeout: Duration::from_secs(1), + }) + .await + .unwrap(); + }); + #[gpu_test] static DOUBLE_WAIT_ON_SUBMISSION: GpuTestConfiguration = GpuTestConfiguration::new() .parameters(TestParameters::default().enable_noop()) diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index 643b07c8f3b..38e1b1d08f8 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -35,10 +35,6 @@ pub const SHADER_STAGE_COUNT: usize = hal::MAX_CONCURRENT_SHADER_STAGES; // value is enough for a 16k texture with float4 format. pub(crate) const ZERO_BUFFER_SIZE: BufferAddress = 512 << 10; -// If a submission is not completed within this time, we go off into UB land. -// See https://github.com/gfx-rs/wgpu/issues/4589. 60s to reduce the chances of this. -const CLEANUP_WAIT_MS: u32 = 60000; - pub(crate) const ENTRYPOINT_FAILURE_ERROR: &str = "The given EntryPoint is Invalid"; pub type DeviceDescriptor<'a> = wgt::DeviceDescriptor>; diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index eb936ce7095..0f36ad1e5f7 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -183,9 +183,9 @@ impl Drop for Queue { fence.as_ref(), last_successful_submission_index, #[cfg(not(target_arch = "wasm32"))] - timeout_ms, + Some(core::time::Duration::from_millis(timeout_ms)), #[cfg(target_arch = "wasm32")] - 0, // WebKit and Chromium don't support a non-0 timeout + Some(core::time::Duration::ZERO), // WebKit and Chromium don't support a non-0 timeout ) }; // Note: If we don't panic below we are in UB land (destroying resources while they are still in use by the GPU). diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index 7d5898ecc2d..f1f0a4c4fb1 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -29,7 +29,6 @@ use crate::{ device::{ bgl, create_validator, life::WaitIdleError, map_buffer, AttachmentData, DeviceLostInvocation, HostMap, MissingDownlevelFlags, MissingFeatures, RenderPassContext, - CLEANUP_WAIT_MS, }, hal_label, init_tracker::{ @@ -712,7 +711,10 @@ impl Device { // If a wait was requested, determine which submission index to wait for. let wait_submission_index = match poll_type { - wgt::PollType::WaitForSubmissionIndex(submission_index) => { + wgt::PollType::WaitForSubmissionIndex(submission_index) + | wgt::PollType::WaitForSubmissionIndexWithTimeout { + submission_index, .. + } => { let last_successful_submission_index = self .last_successful_submission_index .load(Ordering::Acquire); @@ -728,7 +730,7 @@ impl Device { Some(submission_index) } - wgt::PollType::Wait => Some( + wgt::PollType::Wait | wgt::PollType::WaitWithTimeout { .. } => Some( self.last_successful_submission_index .load(Ordering::Acquire), ), @@ -741,7 +743,7 @@ impl Device { let wait_result = unsafe { self.raw() - .wait(fence.as_ref(), target_submission_index, CLEANUP_WAIT_MS) + .wait(fence.as_ref(), target_submission_index, poll_type.timeout()) }; // This error match is only about `DeviceErrors`. At this stage we do not care if @@ -4499,7 +4501,7 @@ impl Device { let last_done_index = unsafe { self.raw().get_fence_value(fence.as_ref()) } .map_err(|e| self.handle_hal_error(e))?; if last_done_index < submission_index { - unsafe { self.raw().wait(fence.as_ref(), submission_index, !0) } + unsafe { self.raw().wait(fence.as_ref(), submission_index, None) } .map_err(|e| self.handle_hal_error(e))?; drop(fence); if let Some(queue) = self.get_queue() { diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs index 22f211c909b..c70e37bc12b 100644 --- a/wgpu-hal/examples/halmark/main.rs +++ b/wgpu-hal/examples/halmark/main.rs @@ -54,7 +54,7 @@ struct ExecutionContext { impl ExecutionContext { unsafe fn wait_and_clear(&mut self, device: &A::Device) { - device.wait(&self.fence, self.fence_value, !0).unwrap(); + device.wait(&self.fence, self.fence_value, None).unwrap(); self.encoder.reset_all(self.used_cmd_bufs.drain(..)); for view in self.used_views.drain(..) { device.destroy_texture_view(view); @@ -519,7 +519,7 @@ impl Example { queue .submit(&[&init_cmd], &[], (&mut fence, init_fence_value)) .unwrap(); - device.wait(&fence, init_fence_value, !0).unwrap(); + device.wait(&fence, init_fence_value, None).unwrap(); device.destroy_buffer(staging_buffer); cmd_encoder.reset_all(iter::once(init_cmd)); fence diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs index 43cb405ef14..69c4ff2cf4e 100644 --- a/wgpu-hal/examples/ray-traced-triangle/main.rs +++ b/wgpu-hal/examples/ray-traced-triangle/main.rs @@ -181,7 +181,7 @@ struct ExecutionContext { impl ExecutionContext { unsafe fn wait_and_clear(&mut self, device: &A::Device) { - device.wait(&self.fence, self.fence_value, !0).unwrap(); + device.wait(&self.fence, self.fence_value, None).unwrap(); self.encoder.reset_all(self.used_cmd_bufs.drain(..)); for view in self.used_views.drain(..) { device.destroy_texture_view(view); @@ -816,7 +816,7 @@ impl Example { queue .submit(&[&init_cmd], &[], (&mut fence, init_fence_value)) .unwrap(); - device.wait(&fence, init_fence_value, !0).unwrap(); + device.wait(&fence, init_fence_value, None).unwrap(); cmd_encoder.reset_all(iter::once(init_cmd)); fence }; diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index a135478e38f..6cd959e7114 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -2237,9 +2237,9 @@ impl crate::Device for super::Device { &self, fence: &super::Fence, value: crate::FenceValue, - timeout_ms: u32, + timeout: Option, ) -> Result { - let timeout_duration = Duration::from_millis(timeout_ms as u64); + let timeout = timeout.unwrap_or(Duration::MAX); // We first check if the fence has already reached the value we're waiting for. let mut fence_value = unsafe { fence.raw.GetCompletedValue() }; @@ -2273,7 +2273,7 @@ impl crate::Device for super::Device { // // This happens when a previous iteration WaitForSingleObject succeeded with a previous fence value, // right before the timeout would have been hit. - let remaining_wait_duration = match timeout_duration.checked_sub(elapsed) { + let remaining_wait_duration = match timeout.checked_sub(elapsed) { Some(remaining) => remaining, None => { log::trace!("Timeout elapsed in between waits!"); @@ -2286,7 +2286,7 @@ impl crate::Device for super::Device { match unsafe { Threading::WaitForSingleObject( event.0, - remaining_wait_duration.as_millis().try_into().unwrap(), + remaining_wait_duration.as_millis().min(u32::MAX as u128) as u32, ) } { Foundation::WAIT_OBJECT_0 => {} diff --git a/wgpu-hal/src/dynamic/device.rs b/wgpu-hal/src/dynamic/device.rs index 1f6ed912689..20263143808 100644 --- a/wgpu-hal/src/dynamic/device.rs +++ b/wgpu-hal/src/dynamic/device.rs @@ -135,7 +135,7 @@ pub trait DynDevice: DynResource { &self, fence: &dyn DynFence, value: FenceValue, - timeout_ms: u32, + timeout: Option, ) -> Result; unsafe fn start_graphics_debugger_capture(&self) -> bool; @@ -486,10 +486,10 @@ impl DynDevice for D { &self, fence: &dyn DynFence, value: FenceValue, - timeout_ms: u32, + timeout: Option, ) -> Result { let fence = fence.expect_downcast_ref(); - unsafe { D::wait(self, fence, value, timeout_ms) } + unsafe { D::wait(self, fence, value, timeout) } } unsafe fn start_graphics_debugger_capture(&self) -> bool { diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs index dda5525c61c..b45d53cc23e 100644 --- a/wgpu-hal/src/gles/device.rs +++ b/wgpu-hal/src/gles/device.rs @@ -1564,7 +1564,7 @@ impl crate::Device for super::Device { &self, fence: &super::Fence, wait_value: crate::FenceValue, - timeout_ms: u32, + timeout: Option, ) -> Result { if fence.satisfied(wait_value) { return Ok(true); @@ -1578,7 +1578,9 @@ impl crate::Device for super::Device { let timeout_ns = if cfg!(any(webgl, Emscripten)) { 0 } else { - (timeout_ms as u64 * 1_000_000).min(!0u32 as u64) + timeout + .map(|t| t.as_nanos().min(u32::MAX as u128) as u32) + .unwrap_or(u32::MAX) }; fence.wait(gl, wait_value, timeout_ns) } diff --git a/wgpu-hal/src/gles/fence.rs b/wgpu-hal/src/gles/fence.rs index b29a6324290..d5cd0ec42c9 100644 --- a/wgpu-hal/src/gles/fence.rs +++ b/wgpu-hal/src/gles/fence.rs @@ -102,7 +102,7 @@ impl Fence { &self, gl: &glow::Context, wait_value: crate::FenceValue, - timeout_ns: u64, + timeout_ns: u32, ) -> Result { let last_completed = self.last_completed.load(Ordering::Acquire); @@ -134,7 +134,7 @@ impl Fence { gl.client_wait_sync( gl_fence.sync, glow::SYNC_FLUSH_COMMANDS_BIT, - timeout_ns as i32, + timeout_ns.min(i32::MAX as u32) as i32, ) }; diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index b4255a6c811..a614098240d 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -981,6 +981,9 @@ pub trait Device: WasmNotSendSync { /// Calling `wait` with a lower [`FenceValue`] than `fence`'s current value /// returns immediately. /// + /// If `timeout` is provided, the function will block indefinitely or until + /// an error is encountered. + /// /// Returns `Ok(true)` on success and `Ok(false)` on timeout. /// /// [`Fence`]: Api::Fence @@ -989,7 +992,7 @@ pub trait Device: WasmNotSendSync { &self, fence: &::Fence, value: FenceValue, - timeout_ms: u32, + timeout: Option, ) -> Result; /// Start a graphics debugger capture. diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index 6b94f43fb1b..9f994e6302f 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -1580,7 +1580,7 @@ impl crate::Device for super::Device { &self, fence: &super::Fence, wait_value: crate::FenceValue, - timeout_ms: u32, + timeout: Option, ) -> DeviceResult { if wait_value <= fence.completed_value.load(atomic::Ordering::Acquire) { return Ok(true); @@ -1603,8 +1603,10 @@ impl crate::Device for super::Device { if let MTLCommandBufferStatus::Completed = cmd_buf.status() { return Ok(true); } - if start.elapsed().as_millis() >= timeout_ms as u128 { - return Ok(false); + if let Some(timeout) = timeout { + if start.elapsed() >= timeout { + return Ok(false); + } } thread::sleep(core::time::Duration::from_millis(1)); } diff --git a/wgpu-hal/src/noop/mod.rs b/wgpu-hal/src/noop/mod.rs index abd7c628a98..4d06e04331e 100644 --- a/wgpu-hal/src/noop/mod.rs +++ b/wgpu-hal/src/noop/mod.rs @@ -429,7 +429,7 @@ impl crate::Device for Context { &self, fence: &Fence, value: crate::FenceValue, - timeout_ms: u32, + timeout: Option, ) -> DeviceResult { // The relevant commands must have already been submitted, and noop-backend commands are // executed synchronously, so there is no waiting — either it is already done, diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index f04bce4954e..17d075c3810 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -4,6 +4,7 @@ use core::{ mem::{self, MaybeUninit}, num::NonZeroU32, ptr, + time::Duration, }; use arrayvec::ArrayVec; @@ -2443,9 +2444,12 @@ impl crate::Device for super::Device { &self, fence: &super::Fence, wait_value: crate::FenceValue, - timeout_ms: u32, + timeout: Option, ) -> Result { - let timeout_ns = timeout_ms as u64 * super::MILLIS_TO_NANOS; + let timeout_ns = timeout + .unwrap_or(Duration::MAX) + .as_nanos() + .min(u64::MAX as _) as u64; self.shared.wait_for_fence(fence, wait_value, timeout_ns) } diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs index e5b2dd49a91..183950b71f0 100644 --- a/wgpu-hal/src/vulkan/mod.rs +++ b/wgpu-hal/src/vulkan/mod.rs @@ -49,7 +49,6 @@ use wgt::InternalCounter; use semaphore_list::SemaphoreList; -const MILLIS_TO_NANOS: u64 = 1_000_000; const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1; #[derive(Clone, Debug)] diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index aba8542c520..c2c70e67c66 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -16,13 +16,14 @@ extern crate alloc; use alloc::borrow::Cow; use alloc::{string::String, vec, vec::Vec}; -use core::cmp::Ordering; use core::{ + cmp::Ordering, fmt, hash::{Hash, Hasher}, mem, num::NonZeroU32, ops::Range, + time::Duration, }; use bytemuck::{Pod, Zeroable}; @@ -4503,8 +4504,26 @@ pub enum PollType { /// On WebGPU, this has no effect. Callbacks are invoked from the /// window event loop. WaitForSubmissionIndex(T), - /// Same as `WaitForSubmissionIndex` but waits for the most recent submission. + + /// Same as [`Self::WaitForSubmissionIndex`] but with a timeout. + WaitForSubmissionIndexWithTimeout { + /// Submission index to wait for. + submission_index: T, + + /// Max time to wait for the submission to complete. + /// + /// If waiting for the GPU device takes this long or longer, the poll will return [`PollError::Timeout`]. + timeout: Duration, + }, + + /// Same as [`Self::WaitForSubmissionIndex`] but waits for the most recent submission. Wait, + + /// Same as [`Self::Wait`], but with a timeout. + /// + /// If waiting for the GPU device takes this long or longer, the poll will return [`PollError::Timeout`]. + WaitWithTimeout(Duration), + /// Check the device for a single time without blocking. Poll, } @@ -4532,7 +4551,10 @@ impl PollType { #[must_use] pub fn is_wait(&self) -> bool { match *self { - Self::WaitForSubmissionIndex(..) | Self::Wait => true, + Self::WaitForSubmissionIndex(..) + | Self::Wait + | Self::WaitForSubmissionIndexWithTimeout { .. } + | Self::WaitWithTimeout { .. } => true, Self::Poll => false, } } @@ -4546,9 +4568,27 @@ impl PollType { match self { Self::WaitForSubmissionIndex(i) => PollType::WaitForSubmissionIndex(func(i)), Self::Wait => PollType::Wait, + Self::WaitForSubmissionIndexWithTimeout { + submission_index, + timeout, + } => PollType::WaitForSubmissionIndexWithTimeout { + submission_index: func(submission_index), + timeout, + }, + Self::WaitWithTimeout(timeout) => PollType::WaitWithTimeout(timeout), Self::Poll => PollType::Poll, } } + + /// Returns the timeout in milliseconds if the poll type has a timeout. + #[must_use] + pub fn timeout(&self) -> Option { + match self { + Self::WaitForSubmissionIndexWithTimeout { timeout, .. } + | Self::WaitWithTimeout(timeout) => Some(*timeout), + _ => None, + } + } } /// Error states after a device poll