Skip to content

Commit ffcb0f3

Browse files
authored
Implement DecInt without itoa (#1245)
Using `itoa` is unlikely to have any actual benefits for any reasonable usecase. You are unlikely to format lots of integers in a tight loop without performing IO operations in between, so any speed-benefits of `itoa` are likely moot. The buffer `itoa` writes to is not re-used by `DecInt`, which makes the generated code use a `memcpy` call. `itoa`'s buffer is twice the size as we need, because it is implemented to be able to hold an `i128`/`u128`. The `i128`/`u128` implementation of `DecInt` would panic if the input was outside of `-10**19 + 1 ..= 10**20 - 1`. This PR implements the integer stringification naively using a div-mod-10 loop. Users will be able to use integers as path arguments without opting-in to any feature. This is a breaking change because the feature `itoa` was removed, and because the implementation for `i128`/`u128` was removed.
1 parent b6970cb commit ffcb0f3

File tree

9 files changed

+171
-62
lines changed

9 files changed

+171
-62
lines changed

Cargo.toml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ rust-version = "1.63"
1717

1818
[dependencies]
1919
bitflags = { version = "2.4.0", default-features = false }
20-
itoa = { version = "1.0.13", default-features = false, optional = true }
2120

2221
# Special dependencies used in rustc-dep-of-std mode.
2322
core = { version = "1.0.0", optional = true, package = "rustc-std-workspace-core" }
@@ -159,10 +158,10 @@ time = []
159158
param = ["fs"]
160159

161160
# Enable this to enable `rustix::io::proc_self_*` (on Linux) and `ttyname`.
162-
procfs = ["once_cell", "itoa", "fs"]
161+
procfs = ["once_cell", "fs"]
163162

164163
# Enable `rustix::pty::*`.
165-
pty = ["itoa", "fs"]
164+
pty = ["fs"]
166165

167166
# Enable `rustix::termios::*`.
168167
termios = []

src/path/arg.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,9 @@
88
99
use crate::ffi::CStr;
1010
use crate::io;
11-
#[cfg(feature = "itoa")]
1211
use crate::path::DecInt;
1312
use crate::path::SMALL_PATH_BUFFER_SIZE;
14-
#[cfg(all(feature = "alloc", feature = "itoa"))]
13+
#[cfg(feature = "alloc")]
1514
use alloc::borrow::ToOwned;
1615
use core::mem::MaybeUninit;
1716
use core::{ptr, slice, str};
@@ -982,7 +981,6 @@ impl Arg for Vec<u8> {
982981
}
983982
}
984983

985-
#[cfg(feature = "itoa")]
986984
impl Arg for DecInt {
987985
#[inline]
988986
fn as_str(&self) -> io::Result<&str> {

src/path/dec_int.rs

Lines changed: 151 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@
88

99
use crate::backend::fd::{AsFd, AsRawFd};
1010
use crate::ffi::CStr;
11+
use core::hint::unreachable_unchecked;
1112
use core::mem::{self, MaybeUninit};
12-
use itoa::{Buffer, Integer};
13+
use core::num::NonZeroU8;
1314
#[cfg(all(feature = "std", unix))]
1415
use std::os::unix::ffi::OsStrExt;
1516
#[cfg(all(feature = "std", target_os = "wasi"))]
@@ -36,35 +37,155 @@ use {core::fmt, std::ffi::OsStr, std::path::Path};
3637
/// ```
3738
#[derive(Clone)]
3839
pub struct DecInt {
39-
// Enough to hold an {u,i}64 and NUL terminator.
40-
buf: [MaybeUninit<u8>; u64::MAX_STR_LEN + 1],
41-
len: usize,
40+
buf: [MaybeUninit<u8>; BUF_LEN],
41+
len: NonZeroU8,
4242
}
43-
const _: () = assert!(u64::MAX_STR_LEN == i64::MAX_STR_LEN);
43+
44+
/// Enough to hold an {u,i}64 and NUL terminator.
45+
const BUF_LEN: usize = U64_MAX_STR_LEN + 1;
46+
47+
/// Maximum length of a formatted [`u64`].
48+
const U64_MAX_STR_LEN: usize = "18446744073709551615".len();
49+
50+
/// Maximum length of a formatted [`i64`].
51+
#[allow(dead_code)]
52+
const I64_MAX_STR_LEN: usize = "-9223372036854775808".len();
53+
54+
const _: () = assert!(U64_MAX_STR_LEN == I64_MAX_STR_LEN);
55+
56+
mod private {
57+
pub trait Sealed: Copy {
58+
type Unsigned: super::Integer;
59+
60+
fn as_unsigned(self) -> (bool, Self::Unsigned);
61+
fn eq_zero(self) -> bool;
62+
fn div_mod_10(&mut self) -> u8;
63+
}
64+
65+
macro_rules! impl_unsigned {
66+
($($ty:ty)+) => { $(
67+
impl Sealed for $ty {
68+
type Unsigned = $ty;
69+
70+
#[inline]
71+
fn as_unsigned(self) -> (bool, $ty) {
72+
(false, self)
73+
}
74+
75+
#[inline]
76+
fn eq_zero(self) -> bool {
77+
self == 0
78+
}
79+
80+
#[inline]
81+
fn div_mod_10(&mut self) -> u8 {
82+
let result = (*self % 10) as u8;
83+
*self /= 10;
84+
result
85+
}
86+
}
87+
)+ }
88+
}
89+
90+
macro_rules! impl_signed {
91+
($($signed:ty : $unsigned:ty)+) => { $(
92+
impl Sealed for $signed {
93+
type Unsigned = $unsigned;
94+
95+
#[inline]
96+
fn as_unsigned(self) -> (bool, $unsigned) {
97+
if self >= 0 {
98+
(false, self as $unsigned)
99+
} else {
100+
(true, !(self as $unsigned) + 1)
101+
}
102+
}
103+
104+
#[inline]
105+
fn eq_zero(self) -> bool {
106+
unimplemented!()
107+
}
108+
109+
#[inline]
110+
fn div_mod_10(&mut self) -> u8 {
111+
unimplemented!()
112+
}
113+
}
114+
)+ }
115+
}
116+
117+
impl_unsigned!(u8 u16 u32 u64);
118+
impl_signed!(i8:u8 i16:u16 i32:u32 i64:u64);
119+
120+
#[cfg(any(
121+
target_pointer_width = "16",
122+
target_pointer_width = "32",
123+
target_pointer_width = "64"
124+
))]
125+
const _: () = {
126+
impl_unsigned!(usize);
127+
impl_signed!(isize:usize);
128+
};
129+
}
130+
131+
/// An integer that can be used by [`DecInt::new`].
132+
pub trait Integer: private::Sealed {}
133+
134+
impl Integer for i8 {}
135+
impl Integer for i16 {}
136+
impl Integer for i32 {}
137+
impl Integer for i64 {}
138+
impl Integer for u8 {}
139+
impl Integer for u16 {}
140+
impl Integer for u32 {}
141+
impl Integer for u64 {}
142+
143+
#[cfg(any(
144+
target_pointer_width = "16",
145+
target_pointer_width = "32",
146+
target_pointer_width = "64"
147+
))]
148+
const _: () = {
149+
impl Integer for isize {}
150+
impl Integer for usize {}
151+
};
44152

45153
impl DecInt {
46154
/// Construct a new path component from an integer.
47-
#[inline]
48155
pub fn new<Int: Integer>(i: Int) -> Self {
49-
let mut buf = [MaybeUninit::uninit(); 21];
50-
51-
let mut str_buf = Buffer::new();
52-
let str_buf = str_buf.format(i);
53-
assert!(
54-
str_buf.len() < buf.len(),
55-
"{str_buf}{} unsupported.",
56-
core::any::type_name::<Int>()
57-
);
58-
59-
buf[..str_buf.len()].copy_from_slice(unsafe {
60-
// SAFETY: you can always go from init to uninit
61-
mem::transmute::<&[u8], &[MaybeUninit<u8>]>(str_buf.as_bytes())
62-
});
63-
buf[str_buf.len()] = MaybeUninit::new(0);
64-
65-
Self {
156+
use private::Sealed;
157+
158+
let (is_neg, mut i) = i.as_unsigned();
159+
let mut len = 1;
160+
let mut buf = [MaybeUninit::uninit(); BUF_LEN];
161+
buf[BUF_LEN - 1] = MaybeUninit::new(b'\0');
162+
163+
// We use `loop { …; if cond { break } }` instead of `while !cond { … }` so the loop is
164+
// entered at least once. This way `0` does not need a special handling.
165+
loop {
166+
len += 1;
167+
if len > BUF_LEN {
168+
// SAFETY: a stringified i64/u64 cannot be longer than `U64_MAX_STR_LEN` bytes
169+
unsafe { unreachable_unchecked() };
170+
}
171+
buf[BUF_LEN - len] = MaybeUninit::new(b'0' + i.div_mod_10());
172+
if i.eq_zero() {
173+
break;
174+
}
175+
}
176+
177+
if is_neg {
178+
len += 1;
179+
if len > BUF_LEN {
180+
// SAFETY: a stringified i64/u64 cannot be longer than `U64_MAX_STR_LEN` bytes
181+
unsafe { unreachable_unchecked() };
182+
}
183+
buf[BUF_LEN - len] = MaybeUninit::new(b'-');
184+
}
185+
186+
DecInt {
66187
buf,
67-
len: str_buf.len(),
188+
len: NonZeroU8::new(len as u8).unwrap(),
68189
}
69190
}
70191

@@ -96,7 +217,12 @@ impl DecInt {
96217
/// Return the raw byte buffer including the NUL byte.
97218
#[inline]
98219
pub fn as_bytes_with_nul(&self) -> &[u8] {
99-
let init = &self.buf[..=self.len];
220+
let len = usize::from(self.len.get());
221+
if len > BUF_LEN {
222+
// SAFETY: a stringified i64/u64 cannot be longer than `U64_MAX_STR_LEN` bytes
223+
unsafe { unreachable_unchecked() };
224+
}
225+
let init = &self.buf[(self.buf.len() - len)..];
100226
// SAFETY: we're guaranteed to have initialized len+1 bytes.
101227
unsafe { mem::transmute::<&[MaybeUninit<u8>], &[u8]>(init) }
102228
}

src/path/mod.rs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,9 @@
11
//! Filesystem path operations.
22
33
mod arg;
4-
#[cfg(feature = "itoa")]
54
mod dec_int;
65

76
pub use arg::{option_into_with_c_str, Arg};
8-
#[cfg(feature = "itoa")]
9-
#[cfg_attr(docsrs, doc(cfg(feature = "itoa")))]
10-
pub use dec_int::DecInt;
7+
pub use dec_int::{DecInt, Integer};
118

129
pub(crate) const SMALL_PATH_BUFFER_SIZE: usize = 256;

tests/net/unix.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
// This test uses `AF_UNIX` with `SOCK_SEQPACKET` which is unsupported on
66
// macOS.
77
#![cfg(not(any(apple, target_os = "espidf", target_os = "redox", target_os = "wasi")))]
8-
// This test uses `DecInt`.
9-
#![cfg(feature = "itoa")]
108
#![cfg(feature = "fs")]
119

1210
use rustix::fs::{unlinkat, AtFlags, CWD};

tests/net/unix_alloc.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
// This test uses `AF_UNIX` with `SOCK_SEQPACKET` which is unsupported on
44
// macOS.
55
#![cfg(not(any(apple, target_os = "espidf", target_os = "redox", target_os = "wasi")))]
6-
// This test uses `DecInt`.
7-
#![cfg(feature = "itoa")]
86
#![cfg(feature = "fs")]
97

108
use rustix::fs::{unlinkat, AtFlags, CWD};

tests/path/arg.rs

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
use rustix::ffi::{CStr, CString};
55
use rustix::io;
66
use rustix::path::Arg;
7-
#[cfg(feature = "itoa")]
87
use rustix::path::DecInt;
98
use std::borrow::Cow;
109
use std::ffi::{OsStr, OsString};
@@ -132,15 +131,12 @@ fn test_arg() {
132131
assert_eq!(cstr!("hello"), Borrow::borrow(&t.as_cow_c_str().unwrap()));
133132
assert_eq!(cstr!("hello"), Borrow::borrow(&t.into_c_str().unwrap()));
134133

135-
#[cfg(feature = "itoa")]
136-
{
137-
let t: DecInt = DecInt::new(43110);
138-
assert_eq!("43110", t.as_str());
139-
assert_eq!("43110".to_owned(), Arg::to_string_lossy(&t));
140-
assert_eq!(cstr!("43110"), Borrow::borrow(&t.as_cow_c_str().unwrap()));
141-
assert_eq!(cstr!("43110"), t.as_c_str());
142-
assert_eq!(cstr!("43110"), Borrow::borrow(&t.into_c_str().unwrap()));
143-
}
134+
let t: DecInt = DecInt::new(43110);
135+
assert_eq!("43110", t.as_str());
136+
assert_eq!("43110".to_owned(), Arg::to_string_lossy(&t));
137+
assert_eq!(cstr!("43110"), Borrow::borrow(&t.as_cow_c_str().unwrap()));
138+
assert_eq!(cstr!("43110"), t.as_c_str());
139+
assert_eq!(cstr!("43110"), Borrow::borrow(&t.into_c_str().unwrap()));
144140
}
145141

146142
#[test]

tests/path/dec_int.rs

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@ use rustix::path::DecInt;
33
macro_rules! check {
44
($i:expr) => {
55
let i = $i;
6-
assert_eq!(DecInt::new(i).as_ref().to_str().unwrap(), i.to_string());
6+
assert_eq!(
7+
DecInt::new(i).as_c_str().to_bytes_with_nul(),
8+
format!("{i}\0").as_bytes(),
9+
);
710
};
811
}
912

@@ -30,16 +33,11 @@ fn test_dec_int() {
3033
check!(usize::MAX);
3134
check!(isize::MIN);
3235
}
33-
}
34-
35-
#[test]
36-
#[should_panic]
37-
fn test_unsupported_max_u128_dec_int() {
38-
check!(u128::MAX);
39-
}
4036

41-
#[test]
42-
#[should_panic]
43-
fn test_unsupported_min_u128_dec_int() {
44-
check!(i128::MIN);
37+
for i in u16::MIN..=u16::MAX {
38+
check!(i);
39+
}
40+
for i in i16::MIN..=i16::MAX {
41+
check!(i);
42+
}
4543
}

tests/path/main.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,4 @@
77

88
#[cfg(not(feature = "rustc-dep-of-std"))]
99
mod arg;
10-
#[cfg(feature = "itoa")]
1110
mod dec_int;

0 commit comments

Comments
 (0)