diff --git a/src/cargo/core/source/mod.rs b/src/cargo/core/source/mod.rs index bdf5d4cda4a..6ca614d3430 100644 --- a/src/cargo/core/source/mod.rs +++ b/src/cargo/core/source/mod.rs @@ -62,6 +62,10 @@ pub trait Source { /// Attempts to find the packages that match a dependency request. /// + /// Usually you should call [`Source::block_until_ready`] somewhere and + /// wait until package informations become available. Otherwise any query + /// may return a [`Poll::Pending`]. + /// /// The `f` argument is expected to get called when any [`Summary`] becomes available. fn query( &mut self, @@ -70,8 +74,8 @@ pub trait Source { f: &mut dyn FnMut(Summary), ) -> Poll>; - /// A helper function that collects and returns the result from - /// [`Source::query`] as a list of [`Summary`] items when available. + /// Gathers the result from [`Source::query`] as a list of [`Summary`] items + /// when they become available. fn query_vec(&mut self, dep: &Dependency, kind: QueryKind) -> Poll>> { let mut ret = Vec::new(); self.query(dep, kind, &mut |s| ret.push(s)).map_ok(|_| ret) diff --git a/src/cargo/core/source/source_id.rs b/src/cargo/core/source/source_id.rs index 034d7ed590c..c369dab16eb 100644 --- a/src/cargo/core/source/source_id.rs +++ b/src/cargo/core/source/source_id.rs @@ -20,16 +20,31 @@ lazy_static::lazy_static! { } /// Unique identifier for a source of packages. +/// +/// Cargo uniquely identifies packages using [`PackageId`], a combination of the +/// package name, version, and the code source. `SourceId` exactly represents +/// the "code source" in `PackageId`. See [`SourceId::hash`] to learn what are +/// taken into account for the uniqueness of a source. +/// +/// `SourceId` is usually associated with an instance of [`Source`], which is +/// supposed to provide a `SourceId` via [`Source::source_id`] method. +/// +/// [`Source`]: super::Source +/// [`Source::source_id`]: super::Source::source_id +/// [`PackageId`]: super::super::PackageId #[derive(Clone, Copy, Eq, Debug)] pub struct SourceId { inner: &'static SourceIdInner, } +/// The interned version of [`SourceId`] to avoid excessive clones and borrows. +/// Values are cached in `SOURCE_ID_CACHE` once created. #[derive(Eq, Clone, Debug)] struct SourceIdInner { /// The source URL. url: Url, - /// The canonical version of the above url + /// The canonical version of the above url. See [`CanonicalUrl`] to learn + /// why it is needed and how it normalizes a URL. canonical_url: CanonicalUrl, /// The source kind. kind: SourceKind, @@ -45,8 +60,8 @@ struct SourceIdInner { alt_registry_key: Option, } -/// The possible kinds of code source. Along with `SourceIdInner`, this fully defines the -/// source. +/// The possible kinds of code source. +/// Along with [`SourceIdInner`], this fully defines the source. #[derive(Debug, Clone, PartialEq, Eq, Hash)] enum SourceKind { /// A git repository. @@ -70,7 +85,8 @@ pub enum GitReference { Tag(String), /// From a branch. Branch(String), - /// From a specific revision. + /// From a specific revision. Can be a commit hash (either short or full), + /// or a named reference like `refs/pull/493/head`. Rev(String), /// The default branch of the repository, the reference named `HEAD`. DefaultBranch, @@ -100,6 +116,7 @@ impl SourceId { Ok(source_id) } + /// Interns the value and returns the wrapped type. fn wrap(inner: SourceIdInner) -> SourceId { let mut cache = SOURCE_ID_CACHE.lock().unwrap(); let inner = cache.get(&inner).cloned().unwrap_or_else(|| { @@ -172,7 +189,7 @@ impl SourceId { } } - /// A view of the `SourceId` that can be `Display`ed as a URL. + /// A view of the [`SourceId`] that can be `Display`ed as a URL. pub fn as_url(&self) -> SourceIdAsUrl<'_> { SourceIdAsUrl { inner: &*self.inner, @@ -208,7 +225,7 @@ impl SourceId { SourceId::new(kind, url.to_owned(), Some(name)) } - /// Creates a SourceId from a local registry path. + /// Creates a `SourceId` from a local registry path. pub fn for_local_registry(path: &Path) -> CargoResult { let url = path.into_url()?; SourceId::new(SourceKind::LocalRegistry, url, None) @@ -287,6 +304,7 @@ impl SourceId { &self.inner.canonical_url } + /// Displays the text "crates.io index" for Cargo shell status output. pub fn display_index(self) -> String { if self.is_crates_io() { format!("{} index", CRATES_IO_DOMAIN) @@ -295,6 +313,7 @@ impl SourceId { } } + /// Displays the name of a registry if it has one. Otherwise just the URL. pub fn display_registry_name(self) -> String { if self.is_crates_io() { CRATES_IO_REGISTRY.to_string() @@ -360,6 +379,8 @@ impl SourceId { } /// Creates an implementation of `Source` corresponding to this ID. + /// + /// * `yanked_whitelist` --- Packages allowed to be used, even if they are yanked. pub fn load<'a>( self, config: &'a Config, @@ -434,7 +455,7 @@ impl SourceId { /// Hashes `self`. /// /// For paths, remove the workspace prefix so the same source will give the - /// same hash in different locations. + /// same hash in different locations, helping reproducible builds. pub fn stable_hash(self, workspace: &Path, into: &mut S) { if self.is_path() { if let Ok(p) = self @@ -563,9 +584,9 @@ impl fmt::Display for SourceId { } } -// The hash of SourceId is used in the name of some Cargo folders, so shouldn't -// vary. `as_str` gives the serialisation of a url (which has a spec) and so -// insulates against possible changes in how the url crate does hashing. +/// The hash of SourceId is used in the name of some Cargo folders, so shouldn't +/// vary. `as_str` gives the serialisation of a url (which has a spec) and so +/// insulates against possible changes in how the url crate does hashing. impl Hash for SourceId { fn hash(&self, into: &mut S) { self.inner.kind.hash(into); @@ -576,13 +597,14 @@ impl Hash for SourceId { } } +/// The hash of `SourceIdInner` is used to retrieve its interned value from +/// `SOURCE_ID_CACHE`. We only care about fields that make `SourceIdInner` +/// unique. Optional fields not affecting the uniqueness must be excluded, +/// such as [`name`] and [`alt_registry_key`]. That's why this is not derived. +/// +/// [`name`]: SourceIdInner::name +/// [`alt_registry_key`]: SourceIdInner::alt_registry_key impl Hash for SourceIdInner { - /// The hash of `SourceIdInner` is used to retrieve its interned value. We - /// only care about fields that make `SourceIdInner` unique, which are: - /// - /// - `kind` - /// - `precise` - /// - `canonical_url` fn hash(&self, into: &mut S) { self.kind.hash(into); self.precise.hash(into); @@ -590,8 +612,8 @@ impl Hash for SourceIdInner { } } +/// This implementation must be synced with [`SourceIdInner::hash`]. impl PartialEq for SourceIdInner { - /// This implementation must be synced with [`SourceIdInner::hash`]. fn eq(&self, other: &Self) -> bool { self.kind == other.kind && self.precise == other.precise @@ -599,66 +621,66 @@ impl PartialEq for SourceIdInner { } } -// forward to `Ord` +/// Forwards to `Ord` impl PartialOrd for SourceKind { fn partial_cmp(&self, other: &SourceKind) -> Option { Some(self.cmp(other)) } } -// Note that this is specifically not derived on `SourceKind` although the -// implementation here is very similar to what it might look like if it were -// otherwise derived. -// -// The reason for this is somewhat obtuse. First of all the hash value of -// `SourceKind` makes its way into `~/.cargo/registry/index/gitproxy.zycloud.tk-XXXX` -// which means that changes to the hash means that all Rust users need to -// redownload the crates.io index and all their crates. If possible we strive to -// not change this to make this redownloading behavior happen as little as -// possible. How is this connected to `Ord` you might ask? That's a good -// question! -// -// Since the beginning of time `SourceKind` has had `#[derive(Hash)]`. It for -// the longest time *also* derived the `Ord` and `PartialOrd` traits. In #8522, -// however, the implementation of `Ord` changed. This handwritten implementation -// forgot to sync itself with the originally derived implementation, namely -// placing git dependencies as sorted after all other dependencies instead of -// first as before. -// -// This regression in #8522 (Rust 1.47) went unnoticed. When we switched back -// to a derived implementation in #9133 (Rust 1.52 beta) we only then ironically -// saw an issue (#9334). In #9334 it was observed that stable Rust at the time -// (1.51) was sorting git dependencies last, whereas Rust 1.52 beta would sort -// git dependencies first. This is because the `PartialOrd` implementation in -// 1.51 used #8522, the buggy implementation, which put git deps last. In 1.52 -// it was (unknowingly) restored to the pre-1.47 behavior with git dependencies -// first. -// -// Because the breakage was only witnessed after the original breakage, this -// trait implementation is preserving the "broken" behavior. Put a different way: -// -// * Rust pre-1.47 sorted git deps first. -// * Rust 1.47 to Rust 1.51 sorted git deps last, a breaking change (#8522) that -// was never noticed. -// * Rust 1.52 restored the pre-1.47 behavior (#9133, without knowing it did -// so), and breakage was witnessed by actual users due to difference with -// 1.51. -// * Rust 1.52 (the source as it lives now) was fixed to match the 1.47-1.51 -// behavior (#9383), which is now considered intentionally breaking from the -// pre-1.47 behavior. -// -// Note that this was all discovered when Rust 1.53 was in nightly and 1.52 was -// in beta. #9133 was in both beta and nightly at the time of discovery. For -// 1.52 #9383 reverted #9133, meaning 1.52 is the same as 1.51. On nightly -// (1.53) #9397 was created to fix the regression introduced by #9133 relative -// to the current stable (1.51). -// -// That's all a long winded way of saying "it's weird that git deps hash first -// and are sorted last, but it's the way it is right now". The author of this -// comment chose to handwrite the `Ord` implementation instead of the `Hash` -// implementation, but it's only required that at most one of them is -// hand-written because the other can be derived. Perhaps one day in -// the future someone can figure out how to remove this behavior. +/// Note that this is specifically not derived on `SourceKind` although the +/// implementation here is very similar to what it might look like if it were +/// otherwise derived. +/// +/// The reason for this is somewhat obtuse. First of all the hash value of +/// `SourceKind` makes its way into `~/.cargo/registry/index/gitproxy.zycloud.tk-XXXX` +/// which means that changes to the hash means that all Rust users need to +/// redownload the crates.io index and all their crates. If possible we strive +/// to not change this to make this redownloading behavior happen as little as +/// possible. How is this connected to `Ord` you might ask? That's a good +/// question! +/// +/// Since the beginning of time `SourceKind` has had `#[derive(Hash)]`. It for +/// the longest time *also* derived the `Ord` and `PartialOrd` traits. In #8522, +/// however, the implementation of `Ord` changed. This handwritten implementation +/// forgot to sync itself with the originally derived implementation, namely +/// placing git dependencies as sorted after all other dependencies instead of +/// first as before. +/// +/// This regression in #8522 (Rust 1.47) went unnoticed. When we switched back +/// to a derived implementation in #9133 (Rust 1.52 beta) we only then ironically +/// saw an issue (#9334). In #9334 it was observed that stable Rust at the time +/// (1.51) was sorting git dependencies last, whereas Rust 1.52 beta would sort +/// git dependencies first. This is because the `PartialOrd` implementation in +/// 1.51 used #8522, the buggy implementation, which put git deps last. In 1.52 +/// it was (unknowingly) restored to the pre-1.47 behavior with git dependencies +/// first. +/// +/// Because the breakage was only witnessed after the original breakage, this +/// trait implementation is preserving the "broken" behavior. Put a different way: +/// +/// * Rust pre-1.47 sorted git deps first. +/// * Rust 1.47 to Rust 1.51 sorted git deps last, a breaking change (#8522) that +/// was never noticed. +/// * Rust 1.52 restored the pre-1.47 behavior (#9133, without knowing it did +/// so), and breakage was witnessed by actual users due to difference with +/// 1.51. +/// * Rust 1.52 (the source as it lives now) was fixed to match the 1.47-1.51 +/// behavior (#9383), which is now considered intentionally breaking from the +/// pre-1.47 behavior. +/// +/// Note that this was all discovered when Rust 1.53 was in nightly and 1.52 was +/// in beta. #9133 was in both beta and nightly at the time of discovery. For +/// 1.52 #9383 reverted #9133, meaning 1.52 is the same as 1.51. On nightly +/// (1.53) #9397 was created to fix the regression introduced by #9133 relative +/// to the current stable (1.51). +/// +/// That's all a long winded way of saying "it's weird that git deps hash first +/// and are sorted last, but it's the way it is right now". The author of this +/// comment chose to handwrite the `Ord` implementation instead of the `Hash` +/// implementation, but it's only required that at most one of them is +/// hand-written because the other can be derived. Perhaps one day in +/// the future someone can figure out how to remove this behavior. impl Ord for SourceKind { fn cmp(&self, other: &SourceKind) -> Ordering { match (self, other) { diff --git a/src/cargo/sources/config.rs b/src/cargo/sources/config.rs index 97a23a0b41c..5d5a4e8dbdf 100644 --- a/src/cargo/sources/config.rs +++ b/src/cargo/sources/config.rs @@ -1,4 +1,4 @@ -//! Implementation of configuration for various sources +//! Implementation of configuration for various sources. //! //! This module will parse the various `source.*` TOML configuration keys into a //! structure usable by Cargo itself. Currently this is primarily used to map @@ -14,11 +14,12 @@ use log::debug; use std::collections::{HashMap, HashSet}; use url::Url; +/// Represents the entire `[source]` table in Cargo configuration. #[derive(Clone)] pub struct SourceConfigMap<'cfg> { /// Mapping of source name to the toml configuration. cfgs: HashMap, - /// Mapping of `SourceId` to the source name. + /// Mapping of [`SourceId`] to the source name. id2name: HashMap, config: &'cfg Config, } @@ -67,6 +68,8 @@ struct SourceConfig { } impl<'cfg> SourceConfigMap<'cfg> { + /// Like [`SourceConfigMap::empty`] but includes sources from source + /// replacement configurations. pub fn new(config: &'cfg Config) -> CargoResult> { let mut base = SourceConfigMap::empty(config)?; let sources: Option> = config.get("source")?; @@ -78,6 +81,8 @@ impl<'cfg> SourceConfigMap<'cfg> { Ok(base) } + /// Creates the default set of sources that doesn't take `[source]` + /// replacement into account. pub fn empty(config: &'cfg Config) -> CargoResult> { let mut base = SourceConfigMap { cfgs: HashMap::new(), @@ -112,11 +117,14 @@ impl<'cfg> SourceConfigMap<'cfg> { Ok(base) } + /// Returns the `Config` this source config map is associated with. pub fn config(&self) -> &'cfg Config { self.config } - /// Get the `Source` for a given `SourceId`. + /// Gets the [`Source`] for a given [`SourceId`]. + /// + /// * `yanked_whitelist` --- Packages allowed to be used, even if they are yanked. pub fn load( &self, id: SourceId, @@ -208,6 +216,7 @@ restore the source replacement configuration to continue the build Ok(Box::new(ReplacedSource::new(id, new_id, new_src))) } + /// Adds a source config with an associated name. fn add(&mut self, name: &str, cfg: SourceConfig) -> CargoResult<()> { if let Some(old_name) = self.id2name.insert(cfg.id, name.to_string()) { // The user is allowed to redefine the built-in crates-io @@ -226,6 +235,7 @@ restore the source replacement configuration to continue the build Ok(()) } + /// Adds a source config from TOML definition. fn add_config(&mut self, name: String, def: SourceConfigDef) -> CargoResult<()> { let mut srcs = Vec::new(); if let Some(registry) = def.registry { diff --git a/src/cargo/sources/path.rs b/src/cargo/sources/path.rs index 37e1e1f0f9d..2f147b19ea1 100644 --- a/src/cargo/sources/path.rs +++ b/src/cargo/sources/path.rs @@ -14,13 +14,28 @@ use ignore::gitignore::GitignoreBuilder; use log::{trace, warn}; use walkdir::WalkDir; +/// A source represents one or multiple packages gathering from a given root +/// path on the filesystem. +/// +/// It's the cornerstone of every other source --- other implementations +/// eventually need to call `PathSource` to read local packages somewhere on +/// the filesystem. +/// +/// It also provides convenient methods like [`PathSource::list_files`] to +/// list all files in a package, given its ability to walk the filesystem. pub struct PathSource<'cfg> { + /// The unique identifier of this source. source_id: SourceId, + /// The root path of this source. path: PathBuf, + /// Whether this source has updated all package informations it may contain. updated: bool, + /// Packages that this sources has discovered. packages: Vec, - config: &'cfg Config, + /// Whether this source should discover nested packages recursively. + /// See [`PathSource::new_recursive`] for more. recursive: bool, + config: &'cfg Config, } impl<'cfg> PathSource<'cfg> { @@ -41,9 +56,9 @@ impl<'cfg> PathSource<'cfg> { /// Creates a new source which is walked recursively to discover packages. /// - /// This is similar to the `new` method except that instead of requiring a - /// valid package to be present at `root` the folder is walked entirely to - /// crawl for packages. + /// This is similar to the [`PathSource::new`] method except that instead + /// of requiring a valid package to be present at `root` the folder is + /// walked entirely to crawl for packages. /// /// Note that this should be used with care and likely shouldn't be chosen /// by default! @@ -54,6 +69,8 @@ impl<'cfg> PathSource<'cfg> { } } + /// Preloads a package for this source. The source is assumed that it has + /// yet loaded any other packages. pub fn preload_with(&mut self, pkg: Package) { assert!(!self.updated); assert!(!self.recursive); @@ -62,6 +79,7 @@ impl<'cfg> PathSource<'cfg> { self.packages.push(pkg); } + /// Gets the package on the root path. pub fn root_package(&mut self) -> CargoResult { trace!("root_package; source={:?}", self); @@ -76,6 +94,8 @@ impl<'cfg> PathSource<'cfg> { } } + /// Returns the packages discovered by this source. It may walk the + /// the filesystem if package informations haven't yet updated. pub fn read_packages(&self) -> CargoResult> { if self.updated { Ok(self.packages.clone()) @@ -96,7 +116,8 @@ impl<'cfg> PathSource<'cfg> { /// /// The basic assumption of this method is that all files in the directory /// are relevant for building this package, but it also contains logic to - /// use other methods like .gitignore to filter the list of files. + /// use other methods like `.gitignore`, `package.include`, or + /// `package.exclude` to filter the list of files. pub fn list_files(&self, pkg: &Package) -> CargoResult> { self._list_files(pkg).with_context(|| { format!( @@ -106,6 +127,7 @@ impl<'cfg> PathSource<'cfg> { }) } + /// See [`PathSource::list_files`]. fn _list_files(&self, pkg: &Package) -> CargoResult> { let root = pkg.root(); let no_include_option = pkg.manifest().include().is_empty(); @@ -218,6 +240,11 @@ impl<'cfg> PathSource<'cfg> { Ok(None) } + /// Lists files relevant to building this package inside this source by + /// consulting both Git index (tracked) or status (untracked) under + /// a given Git repository. + /// + /// This looks into Git submodules as well. fn list_files_git( &self, pkg: &Package, @@ -373,6 +400,11 @@ impl<'cfg> PathSource<'cfg> { } } + /// Lists files relevant to building this package inside this source by + /// walking the filesystem from the package root path. + /// + /// This is a fallback for [`PathSource::list_files_git`] when the package + /// is not tracked under a Git repository. fn list_files_walk( &self, pkg: &Package, @@ -383,6 +415,7 @@ impl<'cfg> PathSource<'cfg> { Ok(ret) } + /// Helper recursive function for [`PathSource::list_files_walk`]. fn walk( &self, path: &Path, @@ -448,6 +481,7 @@ impl<'cfg> PathSource<'cfg> { Ok(()) } + /// Gets the last modified file in a package. pub fn last_modified_file(&self, pkg: &Package) -> CargoResult<(FileTime, PathBuf)> { if !self.updated { return Err(internal(format!( @@ -479,10 +513,12 @@ impl<'cfg> PathSource<'cfg> { Ok((max, max_path)) } + /// Returns the root path of this source. pub fn path(&self) -> &Path { &self.path } + /// Discovers packages inside this source if it hasn't yet done. pub fn update(&mut self) -> CargoResult<()> { if !self.updated { let packages = self.read_packages()?;