From 9fdb1564f6fd05168b02b488ead751cb25de24bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Sch=C3=A4fer?= Date: Sun, 27 Apr 2025 09:26:56 +0200 Subject: [PATCH] xso: reject attempts to match the same XML attribute in different fields This was a bit tricky to build, because it is possible to have an indirection through a `static` there. Thanks to Rust's extensive const-fn capabilities, though, it's in fact possible to cover all cases. We still do two different checks to improve user experience. If we can, from within the proc macro, determine that two fields refer to the same XML attribute (because their namespace/name values use the same Rust tokens), then we reject the fields with a clear error message pointing at both fields. In the other case, when there's e.g. `#[xml(lang)]` and `#[xml(attribute(namespace = rxml::XMLNS_XML, name = "lang"))]`, the macro cannot be sure that XMLNS_XML is in fact the XML namespace. For that case, we generate code which is evaluated at compile time (and has no runtime impact) which panics if the namespace and name of two attribute-matching fields is the same. The error message will be less clear (because it contains extra, unchangeable wording like "evaluation of constant value failed" and "the evaluated program panicked at", which may be a bit confusing) than the message generated by the macros themselves, but it's a price we have to pay unfortunately. Note that this check may seem cosmetic and purely for better user experience, but it is in fact needed to avoid generating not-well-formed and/or not-namespace-well-formed XML: As `AsXml` generates `xso::Item`, where each attribute is emitted separated (and not aggregated in a map structure), a naive (and efficient) implementation of a writer might not double-check that no duplicate attributes are generated. --- xso-proc/Cargo.toml | 2 + xso-proc/src/compound.rs | 116 +++++++++++++++++++++++++++++++- xso-proc/src/error_message.rs | 18 +++++ xso-proc/src/field/attribute.rs | 9 ++- xso-proc/src/field/lang.rs | 11 +++ xso-proc/src/field/mod.rs | 12 +++- xso-proc/src/meta.rs | 33 ++++++++- xso/ChangeLog | 2 + xso/src/from_xml_doc.md | 64 +++++++++++++----- xso/src/lib.rs | 23 +++++++ 10 files changed, 267 insertions(+), 23 deletions(-) diff --git a/xso-proc/Cargo.toml b/xso-proc/Cargo.toml index 5f801b1bb0b45b28871a9810d809d25ae49614bf..37f6837ea6c91412bb8c143009839091157b75e6 100644 --- a/xso-proc/Cargo.toml +++ b/xso-proc/Cargo.toml @@ -10,6 +10,8 @@ repository = "https://gitlab.com/xmpp-rs/xmpp-rs" keywords = ["xso", "derive", "serialization"] license = "MPL-2.0" edition = "2021" +# we need const_refs_to_static, stabilized with 1.83 +rust-version = "1.83" [lib] proc-macro = true diff --git a/xso-proc/src/compound.rs b/xso-proc/src/compound.rs index cc4af1a5fa57aa309b3374116cb66bd1e9919e2f..b4b31da6ee8341b169a1bcf221ef4739ccc5d2a6 100644 --- a/xso-proc/src/compound.rs +++ b/xso-proc/src/compound.rs @@ -7,10 +7,12 @@ //! Handling of the insides of compound structures (structs and enum variants) use proc_macro2::{Span, TokenStream}; -use quote::{quote, ToTokens}; +use quote::{quote, quote_spanned, ToTokens}; use syn::{spanned::Spanned, *}; -use crate::error_message::ParentRef; +use std::collections::{hash_map::Entry, HashMap}; + +use crate::error_message::{FieldName, ParentRef}; use crate::field::{FieldBuilderPart, FieldDef, FieldIteratorPart, FieldTempInit, NestedMatcher}; use crate::meta::{DiscardSpec, Flag, NameRef, NamespaceRef, QNameRef}; use crate::scope::{mangle_member, AsItemsScope, FromEventsScope}; @@ -61,6 +63,12 @@ pub(crate) struct Compound { /// Text to discard. discard_text: Flag, + + /// Attribute qualified names which are selected by fields. + /// + /// This is used to generate code which asserts, at compile time, that no + /// two fields select the same XML attribute. + selected_attributes: Vec<(QNameRef, Member)>, } impl Compound { @@ -83,6 +91,7 @@ impl Compound { let size_hint = compound_fields.size_hint(); let mut fields = Vec::with_capacity(size_hint.1.unwrap_or(size_hint.0)); let mut text_field = None; + let mut selected_attributes: HashMap = HashMap::new(); for field in compound_fields { let field = field?; @@ -101,6 +110,26 @@ impl Compound { text_field = Some(field.member().span()) } + if let Some(qname) = field.captures_attribute() { + let span = field.span(); + match selected_attributes.entry(qname) { + Entry::Occupied(o) => { + let mut err = Error::new( + span, + "this field XML field matches the same attribute as another field", + ); + err.combine(Error::new( + o.get().span(), + "the other field matching the same attribute is here", + )); + return Err(err); + } + Entry::Vacant(v) => { + v.insert(field.member().clone()); + } + } + } + fields.push(field); } @@ -157,6 +186,7 @@ impl Compound { unknown_child_policy, discard_attr, discard_text, + selected_attributes: selected_attributes.into_iter().collect(), }) } @@ -189,6 +219,82 @@ impl Compound { ) } + /// Generate code which, at compile time, asserts that all attributes + /// which are selected by this compound are disjunct. + /// + /// NOTE: this needs rustc 1.83 or newer for `const_refs_to_static`. + fn assert_disjunct_attributes(&self) -> TokenStream { + let mut checks = TokenStream::default(); + + // Comparison is commutative, so we *could* reduce this to n^2/2 + // comparisons instead of n*(n-1). However, by comparing every field + // with every other field and emitting check code for that, we can + // point at both fields in the error messages. + for (i, (qname_a, member_a)) in self.selected_attributes.iter().enumerate() { + for (j, (qname_b, member_b)) in self.selected_attributes.iter().enumerate() { + if i == j { + continue; + } + // Flip a and b around if a is later than b. + // This way, the error message is the same for both + // conflicting fields. Note that we always take the span of + // `a` though, so that the two errors point at different + // fields. + let span = member_a.span(); + let (member_a, member_b) = if i > j { + (member_b, member_a) + } else { + (member_a, member_b) + }; + if qname_a.namespace.is_some() != qname_b.namespace.is_some() { + // cannot ever match. + continue; + } + let Some((name_a, name_b)) = qname_a.name.as_ref().zip(qname_b.name.as_ref()) + else { + panic!("selected attribute has no XML local name"); + }; + + let mut check = quote! { + ::xso::exports::const_str_eq(#name_a.as_str(), #name_b.as_str()) + }; + + let namespaces = qname_a.namespace.as_ref().zip(qname_b.namespace.as_ref()); + if let Some((ns_a, ns_b)) = namespaces { + check.extend(quote! { + && ::xso::exports::const_str_eq(#ns_a, #ns_b) + }); + }; + + let attr_a = if let Some(namespace_a) = qname_a.namespace.as_ref() { + format!("{{{}}}{}", namespace_a, name_a) + } else { + format!("{}", name_a) + }; + + let attr_b = if let Some(namespace_b) = qname_b.namespace.as_ref() { + format!("{{{}}}{}", namespace_b, name_b) + } else { + format!("{}", name_b) + }; + + let field_a = FieldName(&member_a).to_string(); + let field_b = FieldName(&member_b).to_string(); + + // By assigning the checks to a `const`, we ensure that they + // are in fact evaluated at compile time, even if that constant + // is never used. + checks.extend(quote_spanned! {span=> + const _: () = { if #check { + panic!("member {} and member {} match the same XML attribute: {} == {}", #field_a, #field_b, #attr_a, #attr_b); + } }; + }) + } + } + + checks + } + /// Make and return a set of states which is used to construct the target /// type from XML events. /// @@ -513,9 +619,12 @@ impl Compound { let unknown_attribute_policy = &self.unknown_attribute_policy; + let checks = self.assert_disjunct_attributes(); + Ok(FromEventsSubmachine { defs: quote! { #extra_defs + #checks struct #builder_data_ty { #builder_data_def @@ -733,6 +842,9 @@ impl Compound { }, }; + let checks = self.assert_disjunct_attributes(); + extra_defs.extend(checks); + Ok(AsItemsSubmachine { defs: extra_defs, states, diff --git a/xso-proc/src/error_message.rs b/xso-proc/src/error_message.rs index db70ab7a7abc7f8a13c13c8691938469f0451f02..600defa79035161a199750abada7670edef23d1f 100644 --- a/xso-proc/src/error_message.rs +++ b/xso-proc/src/error_message.rs @@ -96,6 +96,24 @@ impl ParentRef { } } +/// Wrapper around `Path` with a more human-readable, collapsed version of +/// `Path`. +pub(crate) struct PrettyPath<'x>(pub &'x Path); + +impl fmt::Display for PrettyPath<'_> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut first = self.0.leading_colon.is_none(); + for segment in self.0.segments.iter() { + if !first { + f.write_str("::")?; + } + write!(f, "{}", segment.ident)?; + first = false; + } + Ok(()) + } +} + /// Ephemeral struct to create a nice human-readable representation of /// [`syn::Member`]. /// diff --git a/xso-proc/src/field/attribute.rs b/xso-proc/src/field/attribute.rs index 47ab601f826cb40d22dfea35f070affde66fdf20..80b72d733f8f1a932269b3aa0b667832653a7f5b 100644 --- a/xso-proc/src/field/attribute.rs +++ b/xso-proc/src/field/attribute.rs @@ -12,7 +12,7 @@ use quote::{quote, ToTokens}; use syn::*; use crate::error_message::{self, ParentRef}; -use crate::meta::{Flag, NameRef, NamespaceRef}; +use crate::meta::{Flag, NameRef, NamespaceRef, QNameRef}; use crate::scope::{AsItemsScope, FromEventsScope}; use crate::types::{ as_optional_xml_text_fn, default_fn, from_xml_text_fn, text_codec_decode_fn, @@ -134,4 +134,11 @@ impl Field for AttributeField { }, }) } + + fn captures_attribute(&self) -> Option { + Some(QNameRef { + namespace: self.xml_namespace.clone(), + name: Some(self.xml_name.clone()), + }) + } } diff --git a/xso-proc/src/field/lang.rs b/xso-proc/src/field/lang.rs index 8390eea1a65c1f460b429be0f40dc4a2a201d25e..fa75b8b87c25031f19edc72f081bd459805da616 100644 --- a/xso-proc/src/field/lang.rs +++ b/xso-proc/src/field/lang.rs @@ -13,6 +13,7 @@ use quote::quote; use syn::*; use crate::error_message::ParentRef; +use crate::meta::{NameRef, NamespaceRef, QNameRef, XMLNS_XML}; use crate::scope::{AsItemsScope, FromEventsScope}; use crate::types::{as_optional_xml_text_fn, option_ty, string_ty}; @@ -69,4 +70,14 @@ impl Field for LangField { }, }) } + + fn captures_attribute(&self) -> Option { + Some(QNameRef { + namespace: Some(NamespaceRef::fudge(XMLNS_XML, Span::call_site())), + name: Some(NameRef::fudge( + rxml_validation::NcName::try_from("lang").unwrap(), + Span::call_site(), + )), + }) + } } diff --git a/xso-proc/src/field/mod.rs b/xso-proc/src/field/mod.rs index c3c59c3c999ea7ccde6290671ae286c427eb157c..e6552cd8c8617ab83f3b2786f9f28ac75d41a28c 100644 --- a/xso-proc/src/field/mod.rs +++ b/xso-proc/src/field/mod.rs @@ -229,6 +229,11 @@ trait Field { fn captures_text(&self) -> bool { false } + + /// Return a QNameRef if the field captures an attribute. + fn captures_attribute(&self) -> Option { + None + } } fn default_name(span: Span, name: Option, field_ident: Option<&Ident>) -> Result { @@ -571,7 +576,12 @@ impl FieldDef { self.inner.captures_text() } - /// Return a span which points at the field's definition.' + /// Return a QNameRef if the field captures an attribute. + pub(crate) fn captures_attribute(&self) -> Option { + self.inner.captures_attribute() + } + + /// Return a span which points at the field's definition. pub(crate) fn span(&self) -> Span { self.span } diff --git a/xso-proc/src/meta.rs b/xso-proc/src/meta.rs index 291b0432a06c5103e0d68d7eb198782bb3385d81..615e8b86d6b67c81a116c400b938287a90bec511 100644 --- a/xso-proc/src/meta.rs +++ b/xso-proc/src/meta.rs @@ -9,6 +9,7 @@ //! This module is concerned with parsing attributes from the Rust "meta" //! annotations on structs, enums, enum variants and fields. +use core::fmt; use core::hash::{Hash, Hasher}; use proc_macro2::{Span, TokenStream}; @@ -17,6 +18,8 @@ use syn::{meta::ParseNestedMeta, spanned::Spanned, *}; use rxml_validation::NcName; +use crate::error_message::PrettyPath; + /// XML core namespace URI (for the `xml:` prefix) pub const XMLNS_XML: &str = "http://www.w3.org/XML/1998/namespace"; /// XML namespace URI (for the `xmlns:` prefix) @@ -84,7 +87,7 @@ macro_rules! reject_key { pub(crate) use reject_key; /// Value for the `#[xml(namespace = ..)]` attribute. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub(crate) enum NamespaceRef { /// The XML namespace is specified as a string literal. LitStr(LitStr), @@ -94,11 +97,20 @@ pub(crate) enum NamespaceRef { } impl NamespaceRef { - fn fudge(value: &str, span: Span) -> Self { + pub(crate) fn fudge(value: &str, span: Span) -> Self { Self::LitStr(LitStr::new(value, span)) } } +impl fmt::Display for NamespaceRef { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::LitStr(s) => write!(f, "{}", s.value()), + Self::Path(ref p) => write!(f, "<{}>", PrettyPath(p)), + } + } +} + impl syn::parse::Parse for NamespaceRef { fn parse(input: syn::parse::ParseStream<'_>) -> Result { if input.peek(syn::LitStr) { @@ -134,6 +146,21 @@ pub(crate) enum NameRef { Path(Path), } +impl NameRef { + pub(crate) fn fudge(value: NcName, span: Span) -> Self { + Self::Literal { value, span } + } +} + +impl fmt::Display for NameRef { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::Literal { value, .. } => write!(f, "{}", value.as_str()), + Self::Path(ref p) => write!(f, "<{}>", PrettyPath(p)), + } + } +} + impl Hash for NameRef { fn hash(&self, h: &mut H) { match self { @@ -291,7 +318,7 @@ impl From for Flag { } /// A pair of `namespace` and `name` keys. -#[derive(Debug, Default)] +#[derive(Debug, Default, PartialEq, Eq, Hash)] pub(crate) struct QNameRef { /// The XML namespace supplied. pub(crate) namespace: Option, diff --git a/xso/ChangeLog b/xso/ChangeLog index f1662d165db02015b8c09f975c81c8681177ff45..cb55b56de663c0096fdae61321ab4a13659b71fa 100644 --- a/xso/ChangeLog +++ b/xso/ChangeLog @@ -59,6 +59,8 @@ Version NEXT: field in a struct definition would cause a compile-time error when deriving `AsXml`. - Update rxml dependency to 0.13. + - xso now rejects conflicting `#[xml(attribute)]` (and `#[xml(lang)]`) + specifications at compile time. Version 0.1.2: 2024-07-26 Jonas Schäfer diff --git a/xso/src/from_xml_doc.md b/xso/src/from_xml_doc.md index a4a6854c156579a49ff416519b236203b3bd8391..00e17bbf18659e8c18804c63506927e9037e720a 100644 --- a/xso/src/from_xml_doc.md +++ b/xso/src/from_xml_doc.md @@ -323,14 +323,6 @@ field type on which the `extract` is declared. If `codec` is given, the given `codec` value must implement [`TextCodec`][`TextCodec`] where `T` is the type of the field. -If two (or more) `#[xml(attribute)]` metas match the same XML attribute, -unspecified behavior occurs during serialisation: only one of the values will -be in the output, but it is unspecified which of the two. (Due to indirections -when refering to `static` items for attribute namespaces and names, it is not -possible to check this at compile-time.) This behaviour also affects -attribute fields which match the special `xml:lang` attribute when used in -conjuction with a `#[xml(lang)]` field. - #### Example ```rust @@ -365,6 +357,37 @@ assert_eq!(foo, Foo { }); ``` +Note that it is not possible to have two `#[xml(attribute)]` fields which +match the same XML attribute: + +```compile_fail +# use xso::FromXml; +#[derive(FromXml)] +#[xml(namespace = "urn:example", name = "dup")] +struct Dup { + #[xml(attribute)] + a: String, + + #[xml(attribute = "a")] + b: String, +} +``` + +```compile_fail +# use xso::FromXml; +static A: &str = "a"; + +#[derive(FromXml)] +#[xml(namespace = "urn:example", name = "dup")] +struct Dup { + #[xml(attribute)] + a: String, + + #[xml(attribute = A)] + b: String, +} +``` + ### `child` meta The `child` meta causes the field to be mapped to a child element of the @@ -666,14 +689,6 @@ The `lang` meta allows to access the (potentially inherited) logical This meta supports no arguments and can only be used on fields of type `Option`. -This meta should not be used alongsite `#[xml(attribute)]` meta which match -the `xml:lang` attribute. Doing so causes unspecified behavior during -serialisation: only one of the values will be in the output, but it is -unspecified which of the two. This is the same as when having two -`#[xml(attribute)]` field which match the same attribute. (Due to indirections -when refering to `static` items for attribute namespaces and names, it is not -possible to check this at compile-time.) - Unlike `#[xml(attribute = "xml:lang")]`, the `#[xml(lang)]` meta takes inheritance into account. @@ -715,6 +730,23 @@ assert_eq!(foo, Foo { }); ``` +Note that it is not possible to use `#[xml(lang)]` and an `#[xml(attribute)]` +which also matches `xml:lang` in the same struct: + +```compile_fail +# use xso::FromXml; +# use xso::exports::rxml::XMLNS_XML; +#[derive(FromXml)] +#[xml(namespace = "urn:example", name = "dup")] +struct Dup { + #[xml(attribute(namespace = XMLNS_XML, name = "lang"))] + a: String, + + #[xml(lang)] + b: Option, +} +``` + ### `text` meta The `text` meta causes the field to be mapped to the text content of the diff --git a/xso/src/lib.rs b/xso/src/lib.rs index cb66ae97b974f1c770e112e383e95a6add668cbb..50396f103aa03efd189b0b77d96e49dad32732a8 100644 --- a/xso/src/lib.rs +++ b/xso/src/lib.rs @@ -72,6 +72,29 @@ pub mod exports { /// This is re-exported for use by macros in cases where we cannot rely on /// people not having done `type u8 = str` or some similar shenanigans. pub type CoreU8 = u8; + + /// Compile-time comparison of two strings. + /// + /// Used by macro-generated code. + /// + /// This is necessary because `::eq` is not `const`. + pub const fn const_str_eq(a: &'static str, b: &'static str) -> bool { + let a = a.as_bytes(); + let b = b.as_bytes(); + if a.len() != b.len() { + return false; + } + + let mut i = 0; + while i < a.len() { + if a[i] != b[i] { + return false; + } + i += 1; + } + + true + } } use alloc::{