Skip to content

Commit 5e55346

Browse files
author
Joseph Rafael Ferrer
committed
Directory traversal library
1 parent 7f7b8c7 commit 5e55346

File tree

12 files changed

+2693
-906
lines changed

12 files changed

+2693
-906
lines changed

Cargo.lock

Lines changed: 11 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ members = [
99
"display",
1010
"file",
1111
"fs",
12+
"ftw",
1213
"m4",
1314
"m4/test-manager",
1415
"gettext-rs",
@@ -33,3 +34,4 @@ chrono = { version = "0.4", default-features = false, features = ["clock"] }
3334
libc = "0.2"
3435
regex = "1.10"
3536
gettext-rs = { path = "./gettext-rs" }
37+
errno = "0.3"

ftw/Cargo.toml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
[package]
2+
name = "ftw"
3+
version = "0.2.0"
4+
edition = "2021"
5+
authors = ["Jeff Garzik"]
6+
license = "MIT"
7+
repository = "https://github.com/rustcoreutils/posixutils-rs.git"
8+
9+
[dependencies]
10+
libc.workspace = true
11+
errno.workspace = true
12+
13+
[dev-dependencies]
14+
rand = "0.8.5"
15+
16+
[lib]
17+
doctest = false

ftw/src/dir.rs

Lines changed: 251 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,251 @@
1+
use crate::{open_long_filename, Error, ErrorKind, FileDescriptor};
2+
use std::{
3+
cell::{RefCell, RefMut},
4+
collections::HashSet,
5+
ffi::{CStr, CString},
6+
io,
7+
marker::PhantomData,
8+
os::unix::ffi::OsStrExt as _,
9+
path::PathBuf,
10+
rc::Rc,
11+
};
12+
13+
// Not to be used publically. The public interface for a directory entry is `Entry`.
14+
pub struct EntryInternal<'a> {
15+
dirent: *mut libc::dirent,
16+
phantom: PhantomData<&'a libc::dirent>,
17+
}
18+
19+
impl<'a> EntryInternal<'a> {
20+
pub fn name_cstr(&self) -> &CStr {
21+
// Avoid dereferencing `dirent` when getting its fields. See note at:
22+
// https://github.com/rust-lang/rust/blob/1.80.1/library/std/src/sys/pal/unix/fs.rs#L725-L742
23+
const OFFSET: isize = std::mem::offset_of!(libc::dirent, d_name) as isize;
24+
unsafe { CStr::from_ptr(self.dirent.byte_offset(OFFSET).cast()) }
25+
}
26+
27+
pub fn ino(&self) -> libc::ino_t {
28+
const OFFSET: isize = std::mem::offset_of!(libc::dirent, d_ino) as isize;
29+
unsafe {
30+
self.dirent
31+
.byte_offset(OFFSET)
32+
.cast::<libc::ino_t>()
33+
.read_unaligned()
34+
}
35+
}
36+
37+
pub fn is_dot_or_double_dot(&self) -> bool {
38+
const DOT: u8 = b'.';
39+
40+
let slice = self.name_cstr().to_bytes_with_nul();
41+
slice.get(..2) == Some(&[DOT, 0]) || slice.get(..3) == Some(&[DOT, DOT, 0])
42+
}
43+
}
44+
45+
/// RAII wrapper for a `*mut libc::DIR`.
46+
///
47+
/// The state of the directory entry listing is preserved so this is more efficient than
48+
/// `DeferredDir`.
49+
#[derive(Debug)]
50+
pub struct OwnedDir {
51+
dirp: *mut libc::DIR,
52+
file_descriptor: std::mem::ManuallyDrop<FileDescriptor>,
53+
}
54+
55+
impl Drop for OwnedDir {
56+
fn drop(&mut self) {
57+
unsafe {
58+
// Also closes `self.dir_file_descriptor`
59+
libc::closedir(self.dirp);
60+
}
61+
}
62+
}
63+
64+
impl OwnedDir {
65+
pub fn new(file_descriptor: FileDescriptor) -> io::Result<Self> {
66+
unsafe {
67+
let dirp = libc::fdopendir(file_descriptor.fd);
68+
if dirp.is_null() {
69+
return Err(io::Error::last_os_error());
70+
}
71+
72+
Ok(Self {
73+
dirp,
74+
file_descriptor: std::mem::ManuallyDrop::new(file_descriptor),
75+
})
76+
}
77+
}
78+
79+
pub fn open_at(
80+
dir_file_descriptor: &FileDescriptor,
81+
filename: *const libc::c_char,
82+
) -> Result<Self, Error> {
83+
let file_descriptor =
84+
FileDescriptor::open_at(dir_file_descriptor, filename, libc::O_RDONLY)
85+
.map_err(|e| Error::new(e, ErrorKind::Open))?;
86+
let dir = OwnedDir::new(file_descriptor).map_err(|e| Error::new(e, ErrorKind::OpenDir))?;
87+
Ok(dir)
88+
}
89+
90+
pub fn iter<'a>(&'a self) -> OwnedDirIterator<'a> {
91+
OwnedDirIterator {
92+
dirp: self.dirp,
93+
phantom: PhantomData,
94+
}
95+
}
96+
97+
pub fn file_descriptor(&self) -> &FileDescriptor {
98+
&self.file_descriptor
99+
}
100+
}
101+
102+
pub struct OwnedDirIterator<'a> {
103+
dirp: *mut libc::DIR,
104+
phantom: PhantomData<&'a OwnedDir>,
105+
}
106+
107+
impl<'a> Iterator for OwnedDirIterator<'a> {
108+
type Item = io::Result<EntryInternal<'a>>;
109+
110+
fn next(&mut self) -> Option<Self::Item> {
111+
unsafe {
112+
errno::set_errno(errno::Errno(0));
113+
114+
let dirent = libc::readdir(self.dirp);
115+
if dirent.is_null() {
116+
let last_err = io::Error::last_os_error();
117+
let errno = last_err.raw_os_error().unwrap();
118+
if errno == 0 {
119+
None
120+
} else {
121+
Some(Err(last_err))
122+
}
123+
} else {
124+
Some(Ok(EntryInternal {
125+
dirent,
126+
phantom: PhantomData,
127+
}))
128+
}
129+
}
130+
}
131+
}
132+
133+
/// Used when conserving file descriptors.
134+
///
135+
/// Its `iter` method returns `DeferredDirIterator` which has to recreate the directory state with
136+
/// every instantiation.
137+
#[derive(Debug)]
138+
pub struct DeferredDir {
139+
parent: Rc<(FileDescriptor, PathBuf)>,
140+
path: PathBuf,
141+
visited: RefCell<HashSet<libc::ino_t>>,
142+
}
143+
144+
impl DeferredDir {
145+
pub fn new(parent: Rc<(FileDescriptor, PathBuf)>, path: PathBuf) -> Self {
146+
Self {
147+
parent,
148+
path,
149+
visited: RefCell::new(HashSet::new()),
150+
}
151+
}
152+
153+
pub fn iter<'a>(&'a self) -> DeferredDirIterator<'a> {
154+
let file_descriptor = self.open_file_descriptor();
155+
let dir = OwnedDir::new(file_descriptor).unwrap();
156+
let dirp = dir.dirp;
157+
158+
// Passing ownership of `dirp` to `SlowDirIterator`
159+
std::mem::forget(dir);
160+
161+
DeferredDirIterator {
162+
dirp,
163+
visited: self.visited.borrow_mut(),
164+
}
165+
}
166+
167+
pub fn open_file_descriptor(&self) -> FileDescriptor {
168+
// e.g.:
169+
// self.parent.1 - foo
170+
// self.path - foo/bar/baz
171+
// remainder - bar/baz
172+
let remainder = self.path.strip_prefix(&self.parent.1).unwrap();
173+
174+
// `remainder` is not guaranteed to be shorter than `libc::PATH_MAX`
175+
let (starting_dir, components) =
176+
open_long_filename(self.parent.0.clone(), remainder, None, &mut |_, _| {}).unwrap();
177+
178+
let filename_cstr = CString::new(components.as_path().as_os_str().as_bytes()).unwrap();
179+
180+
FileDescriptor::open_at(&starting_dir, filename_cstr.as_ptr(), libc::O_RDONLY).unwrap()
181+
}
182+
183+
pub fn parent(&self) -> &Rc<(FileDescriptor, PathBuf)> {
184+
&self.parent
185+
}
186+
}
187+
188+
pub struct DeferredDirIterator<'a> {
189+
dirp: *mut libc::DIR,
190+
visited: RefMut<'a, HashSet<libc::ino_t>>,
191+
}
192+
193+
impl<'a> Drop for DeferredDirIterator<'a> {
194+
fn drop(&mut self) {
195+
unsafe {
196+
libc::closedir(self.dirp);
197+
}
198+
}
199+
}
200+
201+
impl<'a> Iterator for DeferredDirIterator<'a> {
202+
type Item = io::Result<EntryInternal<'a>>;
203+
204+
fn next(&mut self) -> Option<Self::Item> {
205+
loop {
206+
unsafe {
207+
errno::set_errno(errno::Errno(0));
208+
209+
let dirent = libc::readdir(self.dirp);
210+
211+
if dirent.is_null() {
212+
let last_err = io::Error::last_os_error();
213+
let errno = last_err.raw_os_error().unwrap();
214+
if errno == 0 {
215+
break None;
216+
} else {
217+
break Some(Err(last_err));
218+
}
219+
} else {
220+
let entry = EntryInternal {
221+
dirent,
222+
phantom: PhantomData,
223+
};
224+
let ino = entry.ino();
225+
if self.visited.contains(&ino) {
226+
continue;
227+
} else {
228+
self.visited.insert(ino);
229+
}
230+
231+
break Some(Ok(entry));
232+
}
233+
}
234+
}
235+
}
236+
}
237+
238+
#[derive(Debug)]
239+
pub enum HybridDir {
240+
Owned(OwnedDir),
241+
Deferred(DeferredDir),
242+
}
243+
244+
impl HybridDir {
245+
pub fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = io::Result<EntryInternal<'a>>> + 'a> {
246+
match self {
247+
HybridDir::Owned(d) => Box::new(d.iter()),
248+
HybridDir::Deferred(d) => Box::new(d.iter()),
249+
}
250+
}
251+
}

0 commit comments

Comments
 (0)