From 341962ac8bbe457c080c34c2764ddc7e253245c4 Mon Sep 17 00:00:00 2001 From: Weihang Lo Date: Fri, 10 Oct 2025 10:58:20 -0400 Subject: [PATCH 1/4] refactor(gctx): remove duplicate function --- src/cargo/util/context/de.rs | 2 +- src/cargo/util/context/mod.rs | 10 ---------- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/src/cargo/util/context/de.rs b/src/cargo/util/context/de.rs index 5f6325b4b2a..c5288794391 100644 --- a/src/cargo/util/context/de.rs +++ b/src/cargo/util/context/de.rs @@ -387,7 +387,7 @@ impl<'de, 'gctx> de::MapAccess<'de> for ConfigMapAccess<'gctx> { .gctx .get_cv_with_env(&self.de.key) .ok() - .and_then(|cv| cv.map(|cv| cv.get_definition().clone())), + .and_then(|cv| cv.map(|cv| cv.definition().clone())), ) }); self.de.key.pop(); diff --git a/src/cargo/util/context/mod.rs b/src/cargo/util/context/mod.rs index bea9eb19914..80222c1c6f1 100644 --- a/src/cargo/util/context/mod.rs +++ b/src/cargo/util/context/mod.rs @@ -2165,16 +2165,6 @@ impl fmt::Debug for ConfigValue { } impl ConfigValue { - fn get_definition(&self) -> &Definition { - match self { - CV::Boolean(_, def) - | CV::Integer(_, def) - | CV::String(_, def) - | CV::List(_, def) - | CV::Table(_, def) => def, - } - } - fn from_toml(def: Definition, toml: toml::Value) -> CargoResult { let mut error_path = Vec::new(); Self::from_toml_inner(def, toml, &mut error_path).with_context(|| { From 185002901f3bb080c00000652111864583e32bdf Mon Sep 17 00:00:00 2001 From: Weihang Lo Date: Tue, 7 Oct 2025 22:11:45 -0400 Subject: [PATCH 2/4] refactor: switch to `Vec` for `ConfigValue::List` This is a preparation for supporting list of types other than string. --- src/cargo/ops/cargo_config.rs | 20 +++++++++-- src/cargo/util/context/de.rs | 20 ++++++++--- src/cargo/util/context/mod.rs | 58 +++++++++++++++++++++----------- src/cargo/util/context/target.rs | 8 ++--- 4 files changed, 75 insertions(+), 31 deletions(-) diff --git a/src/cargo/ops/cargo_config.rs b/src/cargo/ops/cargo_config.rs index ea49cd79fa4..95a1e3c879d 100644 --- a/src/cargo/ops/cargo_config.rs +++ b/src/cargo/ops/cargo_config.rs @@ -128,7 +128,13 @@ fn print_toml(gctx: &GlobalContext, opts: &GetOptions<'_>, key: &ConfigKey, cv: CV::List(vals, _def) => { if opts.show_origin { drop_println!(gctx, "{} = [", key); - for (val, def) in vals { + for cv in vals { + let (val, def) = match cv { + CV::String(s, def) => (s.as_str(), def), + // This is actually unreachable until we start supporting list of different types. + // It should be validated already during the deserialization. + v => todo!("support {} type ", v.desc()), + }; drop_println!( gctx, " {}, # {}", @@ -139,7 +145,15 @@ fn print_toml(gctx: &GlobalContext, opts: &GetOptions<'_>, key: &ConfigKey, cv: } drop_println!(gctx, "]"); } else { - let vals: toml_edit::Array = vals.iter().map(|x| &x.0).collect(); + let vals: toml_edit::Array = vals + .iter() + .map(|cv| match cv { + CV::String(s, _) => toml_edit::Value::from(s.as_str()), + // This is actually unreachable until we start supporting list of different types. + // It should be validated already during the deserialization. + v => todo!("support {} type ", v.desc()), + }) + .collect(); drop_println!(gctx, "{} = {}", key, vals); } } @@ -204,7 +218,7 @@ fn print_json(gctx: &GlobalContext, key: &ConfigKey, cv: &CV, include_key: bool) CV::Integer(val, _def) => json!(val), CV::String(val, _def) => json!(val), CV::List(vals, _def) => { - let jvals: Vec<_> = vals.iter().map(|(val, _def)| json!(val)).collect(); + let jvals: Vec<_> = vals.iter().map(cv_to_json).collect(); json!(jvals) } CV::Table(map, _def) => { diff --git a/src/cargo/util/context/de.rs b/src/cargo/util/context/de.rs index c5288794391..789779c04d9 100644 --- a/src/cargo/util/context/de.rs +++ b/src/cargo/util/context/de.rs @@ -160,7 +160,9 @@ impl<'de, 'gctx> de::Deserializer<'de> for Deserializer<'gctx> { match self.gctx.get_cv(&self.key)? { Some(CV::List(val, _def)) => res.extend(val), Some(CV::String(val, def)) => { - let split_vs = val.split_whitespace().map(|s| (s.to_string(), def.clone())); + let split_vs = val + .split_whitespace() + .map(|s| CV::String(s.to_string(), def.clone())); res.extend(split_vs); } Some(val) => { @@ -172,7 +174,13 @@ impl<'de, 'gctx> de::Deserializer<'de> for Deserializer<'gctx> { self.gctx.get_env_list(&self.key, &mut res)?; - let vals: Vec = res.into_iter().map(|vd| vd.0).collect(); + let vals: Vec = res + .into_iter() + .map(|val| match val { + CV::String(s, _defintion) => Ok(s), + other => Err(ConfigError::expected(&self.key, "string", &other)), + }) + .collect::>()?; visitor.visit_newtype_struct(vals.into_deserializer()) } else { visitor.visit_newtype_struct(self) @@ -396,7 +404,9 @@ impl<'de, 'gctx> de::MapAccess<'de> for ConfigMapAccess<'gctx> { } struct ConfigSeqAccess { - list_iter: vec::IntoIter<(String, Definition)>, + /// The config key to the sequence. + key: ConfigKey, + list_iter: vec::IntoIter, } impl ConfigSeqAccess { @@ -416,6 +426,7 @@ impl ConfigSeqAccess { de.gctx.get_env_list(&de.key, &mut res)?; Ok(ConfigSeqAccess { + key: de.key, list_iter: res.into_iter(), }) } @@ -430,12 +441,13 @@ impl<'de> de::SeqAccess<'de> for ConfigSeqAccess { { match self.list_iter.next() { // TODO: add `def` to error? - Some((value, def)) => { + Some(CV::String(value, def)) => { // This might be a String or a Value. // ValueDeserializer will handle figuring out which one it is. let maybe_value_de = ValueDeserializer::new_with_string(value, def); seed.deserialize(maybe_value_de).map(Some) } + Some(val) => Err(ConfigError::expected(&self.key, "list of string", &val)), None => Ok(None), } } diff --git a/src/cargo/util/context/mod.rs b/src/cargo/util/context/mod.rs index 80222c1c6f1..8196d14d469 100644 --- a/src/cargo/util/context/mod.rs +++ b/src/cargo/util/context/mod.rs @@ -989,11 +989,7 @@ impl GlobalContext { /// Internal method for getting an environment variable as a list. /// If the key is a non-mergeable list and a value is found in the environment, existing values are cleared. - fn get_env_list( - &self, - key: &ConfigKey, - output: &mut Vec<(String, Definition)>, - ) -> CargoResult<()> { + fn get_env_list(&self, key: &ConfigKey, output: &mut Vec) -> CargoResult<()> { let Some(env_val) = self.env.get_str(key.as_env_key()) else { self.check_environment_key_case_mismatch(key); return Ok(()); @@ -1018,16 +1014,16 @@ impl GlobalContext { def.clone(), ) })?; - output.push((s.to_string(), def.clone())); + output.push(CV::String(s.to_string(), def.clone())) } } else { output.extend( env_val .split_whitespace() - .map(|s| (s.to_string(), def.clone())), + .map(|s| CV::String(s.to_string(), def.clone())), ); } - output.sort_by(|a, b| a.1.cmp(&b.1)); + output.sort_by(|a, b| a.definition().cmp(b.definition())); Ok(()) } @@ -1386,7 +1382,16 @@ impl GlobalContext { Some(CV::String(s, def)) => { vec![abs(s, def)] } - Some(CV::List(list, _def)) => list.iter().map(|(s, def)| abs(s, def)).collect(), + Some(CV::List(list, _def)) => list + .iter() + .map(|cv| match cv { + CV::String(s, def) => Ok(abs(s, def)), + other => bail!( + "`include` expected a string or list of strings, but found {} in list", + other.desc() + ), + }) + .collect::>>()?, Some(other) => bail!( "`include` expected a string or list, but found {} in `{}`", other.desc(), @@ -1774,7 +1779,16 @@ impl GlobalContext { let key = ConfigKey::from_str("paths"); // paths overrides cannot be set via env config, so use get_cv here. match self.get_cv(&key)? { - Some(CV::List(val, definition)) => Ok(Some(Value { val, definition })), + Some(CV::List(val, definition)) => { + let val = val + .into_iter() + .map(|cv| match cv { + CV::String(s, def) => Ok((s, def)), + other => self.expected("string", &key, &other), + }) + .collect::>>()?; + Ok(Some(Value { val, definition })) + } Some(val) => self.expected("list", &key, &val), None => Ok(None), } @@ -2138,7 +2152,7 @@ enum KeyOrIdx { pub enum ConfigValue { Integer(i64, Definition), String(String, Definition), - List(Vec<(String, Definition)>, Definition), + List(Vec, Definition), Table(HashMap, Definition), Boolean(bool, Definition), } @@ -2151,11 +2165,11 @@ impl fmt::Debug for ConfigValue { CV::String(s, def) => write!(f, "{} (from {})", s, def), CV::List(list, def) => { write!(f, "[")?; - for (i, (s, def)) in list.iter().enumerate() { + for (i, item) in list.iter().enumerate() { if i > 0 { write!(f, ", ")?; } - write!(f, "{} (from {})", s, def)?; + write!(f, "{item:?}")?; } write!(f, "] (from {})", def) } @@ -2196,7 +2210,7 @@ impl ConfigValue { val.into_iter() .enumerate() .map(|(i, toml)| match toml { - toml::Value::String(val) => Ok((val, def.clone())), + toml::Value::String(val) => Ok(CV::String(val, def.clone())), v => { path.push(KeyOrIdx::Idx(i)); bail!("expected string but found {} at index {i}", v.type_str()) @@ -2231,9 +2245,7 @@ impl ConfigValue { CV::Boolean(s, _) => toml::Value::Boolean(s), CV::String(s, _) => toml::Value::String(s), CV::Integer(i, _) => toml::Value::Integer(i), - CV::List(l, _) => { - toml::Value::Array(l.into_iter().map(|(s, _)| toml::Value::String(s)).collect()) - } + CV::List(l, _) => toml::Value::Array(l.into_iter().map(|cv| cv.into_toml()).collect()), CV::Table(l, _) => { toml::Value::Table(l.into_iter().map(|(k, v)| (k, v.into_toml())).collect()) } @@ -2275,7 +2287,7 @@ impl ConfigValue { mem::swap(new, old); } } - old.sort_by(|a, b| a.1.cmp(&b.1)); + old.sort_by(|a, b| a.definition().cmp(b.definition())); } (&mut CV::Table(ref mut old, _), CV::Table(ref mut new, _)) => { for (key, value) in mem::take(new) { @@ -2344,9 +2356,15 @@ impl ConfigValue { } } - pub fn list(&self, key: &str) -> CargoResult<&[(String, Definition)]> { + pub fn list(&self, key: &str) -> CargoResult> { match self { - CV::List(list, _) => Ok(list), + CV::List(list, _) => list + .iter() + .map(|cv| match cv { + CV::String(s, def) => Ok((s.clone(), def.clone())), + _ => self.expected("string", key), + }) + .collect::>(), _ => self.expected("list", key), } } diff --git a/src/cargo/util/context/target.rs b/src/cargo/util/context/target.rs index 85955e61dc1..32c1fffef84 100644 --- a/src/cargo/util/context/target.rs +++ b/src/cargo/util/context/target.rs @@ -238,11 +238,11 @@ fn parse_links_overrides( Ok(links_overrides) } -fn extra_link_args<'a>( +fn extra_link_args( link_type: LinkArgTarget, key: &str, - value: &'a CV, -) -> CargoResult + 'a> { + value: &CV, +) -> CargoResult> { let args = value.list(key)?; - Ok(args.iter().map(move |v| (link_type.clone(), v.0.clone()))) + Ok(args.into_iter().map(|v| (link_type.clone(), v.0)).collect()) } From df4f7fc93192b5bcdde19ca535285530e938fff4 Mon Sep 17 00:00:00 2001 From: Weihang Lo Date: Wed, 8 Oct 2025 14:15:46 -0400 Subject: [PATCH 3/4] refactor: rename `ConfigValue::list` to `string_list` It is only used for retriving string list for now. If we have needs for a generalized `list`, we add it later. --- src/cargo/util/context/mod.rs | 2 +- src/cargo/util/context/target.rs | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/cargo/util/context/mod.rs b/src/cargo/util/context/mod.rs index 8196d14d469..694c27260b9 100644 --- a/src/cargo/util/context/mod.rs +++ b/src/cargo/util/context/mod.rs @@ -2356,7 +2356,7 @@ impl ConfigValue { } } - pub fn list(&self, key: &str) -> CargoResult> { + pub fn string_list(&self, key: &str) -> CargoResult> { match self { CV::List(list, _) => list .iter() diff --git a/src/cargo/util/context/target.rs b/src/cargo/util/context/target.rs index 32c1fffef84..38dc55afc72 100644 --- a/src/cargo/util/context/target.rs +++ b/src/cargo/util/context/target.rs @@ -173,13 +173,13 @@ fn parse_links_overrides( output.library_links.extend(links); } "rustc-link-lib" => { - let list = value.list(key)?; + let list = value.string_list(key)?; output .library_links .extend(list.iter().map(|v| v.0.clone())); } "rustc-link-search" => { - let list = value.list(key)?; + let list = value.string_list(key)?; output.library_paths.extend( list.iter() .map(|v| PathBuf::from(&v.0)) @@ -211,11 +211,11 @@ fn parse_links_overrides( output.linker_args.extend(args); } "rustc-cfg" => { - let list = value.list(key)?; + let list = value.string_list(key)?; output.cfgs.extend(list.iter().map(|v| v.0.clone())); } "rustc-check-cfg" => { - let list = value.list(key)?; + let list = value.string_list(key)?; output.check_cfgs.extend(list.iter().map(|v| v.0.clone())); } "rustc-env" => { @@ -243,6 +243,6 @@ fn extra_link_args( key: &str, value: &CV, ) -> CargoResult> { - let args = value.list(key)?; + let args = value.string_list(key)?; Ok(args.into_iter().map(|v| (link_type.clone(), v.0)).collect()) } From c762238aac576be5f770809583d3081d501045c6 Mon Sep 17 00:00:00 2001 From: Weihang Lo Date: Fri, 10 Oct 2025 10:32:16 -0400 Subject: [PATCH 4/4] refactor: split two code paths in ValueDeserializer to enum The MapAccess and Deserializer impls in ValueDeserializer are actually tow distinct code paths. It is better to extract them to two different value sources. --- src/cargo/util/context/de.rs | 109 ++++++++++++++++++++++------------- 1 file changed, 69 insertions(+), 40 deletions(-) diff --git a/src/cargo/util/context/de.rs b/src/cargo/util/context/de.rs index 789779c04d9..2413e9f1289 100644 --- a/src/cargo/util/context/de.rs +++ b/src/cargo/util/context/de.rs @@ -441,11 +441,10 @@ impl<'de> de::SeqAccess<'de> for ConfigSeqAccess { { match self.list_iter.next() { // TODO: add `def` to error? - Some(CV::String(value, def)) => { + Some(val @ CV::String(..)) => { // This might be a String or a Value. - // ValueDeserializer will handle figuring out which one it is. - let maybe_value_de = ValueDeserializer::new_with_string(value, def); - seed.deserialize(maybe_value_de).map(Some) + // ArrayItemDeserializer will handle figuring out which one it is. + seed.deserialize(ArrayItemDeserializer::new(val)).map(Some) } Some(val) => Err(ConfigError::expected(&self.key, "list of string", &val)), None => Ok(None), @@ -453,6 +452,23 @@ impl<'de> de::SeqAccess<'de> for ConfigSeqAccess { } } +/// Source of data for [`ValueDeserializer`] +enum ValueSource<'gctx> { + /// The deserializer used to actually deserialize a Value struct. + Deserializer { + de: Deserializer<'gctx>, + definition: Definition, + }, + /// A [`ConfigValue`](CV). + /// + /// This is used for situations where you can't address a string via a TOML key, + /// such as a string inside an array. + /// The [`ConfigSeqAccess`] doesn't know if the type it should deserialize to + /// is a `String` or `Value`, + /// so [`ArrayItemDeserializer`] needs to be able to handle both. + ConfigValue(CV), +} + /// This is a deserializer that deserializes into a `Value` for /// configuration. /// @@ -462,18 +478,7 @@ impl<'de> de::SeqAccess<'de> for ConfigSeqAccess { /// See more comments in `value.rs` for the protocol used here. struct ValueDeserializer<'gctx> { hits: u32, - definition: Definition, - /// The deserializer, used to actually deserialize a Value struct. - /// This is `None` if deserializing a string. - de: Option>, - /// A string value to deserialize. - /// - /// This is used for situations where you can't address a string via a - /// TOML key, such as a string inside an array. The `ConfigSeqAccess` - /// doesn't know if the type it should deserialize to is a `String` or - /// `Value`, so `ValueDeserializer` needs to be able to handle - /// both. - str_value: Option, + source: ValueSource<'gctx>, } impl<'gctx> ValueDeserializer<'gctx> { @@ -498,20 +503,24 @@ impl<'gctx> ValueDeserializer<'gctx> { (_, None) => env_def, } }; + Ok(ValueDeserializer { hits: 0, - definition, - de: Some(de), - str_value: None, + source: ValueSource::Deserializer { de, definition }, }) } - fn new_with_string(s: String, definition: Definition) -> ValueDeserializer<'gctx> { + fn with_cv(cv: CV) -> ValueDeserializer<'gctx> { ValueDeserializer { hits: 0, - definition, - de: None, - str_value: Some(s), + source: ValueSource::ConfigValue(cv), + } + } + + fn definition(&self) -> &Definition { + match &self.source { + ValueSource::Deserializer { definition, .. } => definition, + ValueSource::ConfigValue(cv) => cv.definition(), } } } @@ -542,19 +551,19 @@ impl<'de, 'gctx> de::MapAccess<'de> for ValueDeserializer<'gctx> { // If this is the first time around we deserialize the `value` field // which is the actual deserializer if self.hits == 1 { - if let Some(de) = &self.de { - return seed + return match &self.source { + ValueSource::Deserializer { de, definition } => seed .deserialize(de.clone()) - .map_err(|e| e.with_key_context(&de.key, Some(self.definition.clone()))); - } else { - return seed - .deserialize(self.str_value.as_ref().unwrap().clone().into_deserializer()); - } + .map_err(|e| e.with_key_context(&de.key, Some(definition.clone()))), + ValueSource::ConfigValue(cv) => { + seed.deserialize(ArrayItemDeserializer::new(cv.clone())) + } + }; } // ... otherwise we're deserializing the `definition` field, so we need // to figure out where the field we just deserialized was defined at. - match &self.definition { + match self.definition() { Definition::Path(path) => { seed.deserialize(Tuple2Deserializer(0i32, path.to_string_lossy())) } @@ -572,25 +581,45 @@ impl<'de, 'gctx> de::MapAccess<'de> for ValueDeserializer<'gctx> { } } -// Deserializer is only implemented to handle deserializing a String inside a -// sequence (like `Vec` or `Vec>`). `Value` is -// handled by deserialize_struct, and the plain `String` is handled by all the -// other functions here. -impl<'de, 'gctx> de::Deserializer<'de> for ValueDeserializer<'gctx> { +/// A deserializer for individual [`ConfigValue`](CV) items in arrays +/// +/// This deserializer is only implemented to handle deserializing a String +/// inside a sequence (like `Vec` or `Vec>`). +/// `Value` is handled by `deserialize_struct` in [`ValueDeserializer`], +/// and the plain `String` is handled by all the other functions here. +#[derive(Clone)] +struct ArrayItemDeserializer { + cv: CV, +} + +impl ArrayItemDeserializer { + fn new(cv: CV) -> Self { + Self { cv } + } + + fn into_inner(self) -> String { + match self.cv { + CV::String(s, _def) => s, + _ => unreachable!("string expected"), + } + } +} + +impl<'de> de::Deserializer<'de> for ArrayItemDeserializer { type Error = ConfigError; fn deserialize_str(self, visitor: V) -> Result where V: de::Visitor<'de>, { - visitor.visit_str(&self.str_value.expect("string expected")) + visitor.visit_str(&self.into_inner()) } fn deserialize_string(self, visitor: V) -> Result where V: de::Visitor<'de>, { - visitor.visit_string(self.str_value.expect("string expected")) + visitor.visit_string(self.into_inner()) } fn deserialize_struct( @@ -607,7 +636,7 @@ impl<'de, 'gctx> de::Deserializer<'de> for ValueDeserializer<'gctx> { // // See more comments in `value.rs` for the protocol used here. if name == value::NAME && fields == value::FIELDS { - return visitor.visit_map(self); + return visitor.visit_map(ValueDeserializer::with_cv(self.cv)); } unimplemented!("only strings and Value can be deserialized from a sequence"); } @@ -616,7 +645,7 @@ impl<'de, 'gctx> de::Deserializer<'de> for ValueDeserializer<'gctx> { where V: de::Visitor<'de>, { - visitor.visit_string(self.str_value.expect("string expected")) + visitor.visit_string(self.into_inner()) } fn deserialize_ignored_any(self, visitor: V) -> Result