xso: re-implement `#[xml(lang)]` on top of `#[xml(attribute)]`

Jonas Schäfer created

This gives us all the goodies of `default`, `type_` and `codec` without
having to duplicate lots of code (and I think the `match`-iness of the
new macro code is still within limits).

However, we still keep them as separate `#[xml(..)]` attributes, because
their semantics are very different and it is sensible to make them stand
out.

skip-changelog, because `#[xml(lang)]` was introduced in this version.

Change summary

parsers/src/util/macro_tests.rs |  2 
xso-proc/src/field/attribute.rs | 94 +++++++++++++++++++++++++++-------
xso-proc/src/field/lang.rs      | 83 ------------------------------
xso-proc/src/field/mod.rs       | 42 ++++++++++++---
xso-proc/src/meta.rs            | 82 +++++++++++++++++++++++------
xso/src/from_xml_doc.md         | 33 ++++++++++--
6 files changed, 198 insertions(+), 138 deletions(-)

Detailed changes

parsers/src/util/macro_tests.rs 🔗

@@ -2467,7 +2467,7 @@ struct Language {
     #[xml(child(default))]
     child: core::option::Option<Box<Language>>,
 
-    #[xml(lang)]
+    #[xml(lang(default))]
     lang: core::option::Option<String>,
 }
 

xso-proc/src/field/attribute.rs 🔗

@@ -8,11 +8,14 @@
 //!
 //! In particular, it provides the `#[xml(attribute)]` implementation.
 
+use proc_macro2::Span;
 use quote::{quote, ToTokens};
 use syn::*;
 
+use std::borrow::Cow;
+
 use crate::error_message::{self, ParentRef};
-use crate::meta::{Flag, NameRef, NamespaceRef, QNameRef};
+use crate::meta::{Flag, NameRef, NamespaceRef, QNameRef, XMLNS_XML};
 use crate::scope::{AsItemsScope, FromEventsScope};
 use crate::types::{
     as_optional_xml_text_fn, default_fn, from_xml_text_fn, text_codec_decode_fn,
@@ -21,13 +24,51 @@ use crate::types::{
 
 use super::{Field, FieldBuilderPart, FieldIteratorPart, FieldTempInit};
 
+/// Subtype for attribute-matching fields.
+pub(super) enum AttributeFieldKind {
+    /// Matches any attribute
+    Generic {
+        /// The optional XML namespace of the attribute.
+        xml_namespace: Option<NamespaceRef>,
+
+        /// The XML name of the attribute.
+        xml_name: NameRef,
+    },
+
+    /// Matches `xml:lang`
+    XmlLang,
+}
+
+impl AttributeFieldKind {
+    fn matcher(&self) -> (Cow<'_, Option<NamespaceRef>>, Cow<'_, NameRef>) {
+        match self {
+            Self::Generic {
+                ref xml_namespace,
+                ref xml_name,
+            } => (Cow::Borrowed(xml_namespace), Cow::Borrowed(xml_name)),
+            Self::XmlLang => (
+                Cow::Owned(Some(NamespaceRef::fudge(XMLNS_XML, Span::call_site()))),
+                Cow::Owned(NameRef::fudge(
+                    rxml_validation::NcName::try_from("lang").unwrap(),
+                    Span::call_site(),
+                )),
+            ),
+        }
+    }
+
+    fn qname_ref(&self) -> QNameRef {
+        let (namespace, name) = self.matcher();
+        QNameRef {
+            namespace: namespace.into_owned(),
+            name: Some(name.into_owned()),
+        }
+    }
+}
+
 /// The field maps to an attribute.
 pub(super) struct AttributeField {
-    /// The optional XML namespace of the attribute.
-    pub(super) xml_namespace: Option<NamespaceRef>,
-
-    /// The XML name of the attribute.
-    pub(super) xml_name: NameRef,
+    /// Subtype
+    pub(super) kind: AttributeFieldKind,
 
     /// Flag indicating whether the value should be defaulted if the
     /// attribute is absent.
@@ -47,16 +88,29 @@ impl Field for AttributeField {
     ) -> Result<FieldBuilderPart> {
         let FromEventsScope { ref attrs, .. } = scope;
         let ty = ty.clone();
-        let xml_namespace = &self.xml_namespace;
-        let xml_name = &self.xml_name;
 
-        let missing_msg = error_message::on_missing_attribute(container_name, member);
+        let fetch = match self.kind {
+            AttributeFieldKind::Generic {
+                ref xml_namespace,
+                ref xml_name,
+            } => {
+                let xml_namespace = match xml_namespace {
+                    Some(v) => v.to_token_stream(),
+                    None => quote! {
+                        ::xso::exports::rxml::Namespace::none()
+                    },
+                };
 
-        let xml_namespace = match xml_namespace {
-            Some(v) => v.to_token_stream(),
-            None => quote! {
-                ::xso::exports::rxml::Namespace::none()
-            },
+                quote! {
+                    #attrs.remove(#xml_namespace, #xml_name)
+                }
+            }
+
+            AttributeFieldKind::XmlLang => {
+                quote! {
+                    ctx.language().map(::xso::exports::alloc::borrow::ToOwned::to_owned)
+                }
+            }
         };
 
         let finalize = match self.codec {
@@ -72,6 +126,7 @@ impl Field for AttributeField {
             }
         };
 
+        let missing_msg = error_message::on_missing_attribute(container_name, member);
         let on_absent = match self.default_ {
             Flag::Absent => quote! {
                 return ::core::result::Result::Err(::xso::error::Error::Other(#missing_msg).into())
@@ -87,7 +142,7 @@ impl Field for AttributeField {
         Ok(FieldBuilderPart::Init {
             value: FieldTempInit {
                 init: quote! {
-                    match #attrs.remove(#xml_namespace, #xml_name).map(#finalize).transpose()? {
+                    match #fetch.map(#finalize).transpose()? {
                         ::core::option::Option::Some(v) => v,
                         ::core::option::Option::None => #on_absent,
                     }
@@ -105,13 +160,13 @@ impl Field for AttributeField {
         _member: &Member,
         ty: &Type,
     ) -> Result<FieldIteratorPart> {
-        let xml_namespace = match self.xml_namespace {
+        let (xml_namespace, xml_name) = self.kind.matcher();
+        let xml_namespace = match xml_namespace.as_ref() {
             Some(ref v) => quote! { ::xso::exports::rxml::Namespace::from(#v) },
             None => quote! {
                 ::xso::exports::rxml::Namespace::NONE
             },
         };
-        let xml_name = &self.xml_name;
 
         let generator = match self.codec {
             Some(ref codec) => {
@@ -136,9 +191,6 @@ impl Field for AttributeField {
     }
 
     fn captures_attribute(&self) -> Option<QNameRef> {
-        Some(QNameRef {
-            namespace: self.xml_namespace.clone(),
-            name: Some(self.xml_name.clone()),
-        })
+        Some(self.kind.qname_ref())
     }
 }

xso-proc/src/field/lang.rs 🔗

@@ -1,83 +0,0 @@
-// Copyright (c) 2025 Jonas Schäfer <jonas@zombofant.net>
-//
-// This Source Code Form is subject to the terms of the Mozilla Public
-// License, v. 2.0. If a copy of the MPL was not distributed with this
-// file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-//! This module concerns the processing of inherited `xml:lang` values.
-//!
-//! In particular, it provides the `#[xml(lang)]` implementation.
-
-use proc_macro2::Span;
-use quote::quote;
-use syn::*;
-
-use crate::error_message::ParentRef;
-use crate::meta::{NameRef, NamespaceRef, QNameRef, XMLNS_XML};
-use crate::scope::{AsItemsScope, FromEventsScope};
-use crate::types::{as_optional_xml_text_fn, option_ty, string_ty};
-
-use super::{Field, FieldBuilderPart, FieldIteratorPart, FieldTempInit};
-
-/// The field maps to a potentially inherited `xml:lang` value.
-pub(super) struct LangField;
-
-impl Field for LangField {
-    fn make_builder_part(
-        &self,
-        _scope: &FromEventsScope,
-        _container_name: &ParentRef,
-        _member: &Member,
-        _ty: &Type,
-    ) -> Result<FieldBuilderPart> {
-        let string_ty = string_ty(Span::call_site());
-        let ty = option_ty(string_ty.clone());
-
-        Ok(FieldBuilderPart::Init {
-            value: FieldTempInit {
-                ty,
-                init: quote! {
-                    ctx.language().map(#string_ty::from).into()
-                },
-            },
-        })
-    }
-
-    fn make_iterator_part(
-        &self,
-        _scope: &AsItemsScope,
-        _container_name: &ParentRef,
-        bound_name: &Ident,
-        _member: &Member,
-        ty: &Type,
-    ) -> Result<FieldIteratorPart> {
-        let as_optional_xml_text = as_optional_xml_text_fn(ty.clone());
-
-        Ok(FieldIteratorPart::Header {
-            generator: quote! {
-                #as_optional_xml_text(#bound_name)?.map(|#bound_name|
-                    ::xso::Item::Attribute(
-                        ::xso::exports::rxml::Namespace::XML,
-                        // SAFETY: `lang` is a known-good NcName
-                        unsafe {
-                            ::xso::exports::alloc::borrow::Cow::Borrowed(
-                                ::xso::exports::rxml::NcNameStr::from_str_unchecked("lang"),
-                            )
-                        },
-                        #bound_name,
-                    )
-                )
-            },
-        })
-    }
-
-    fn captures_attribute(&self) -> Option<QNameRef> {
-        Some(QNameRef {
-            namespace: Some(NamespaceRef::fudge(XMLNS_XML, Span::call_site())),
-            name: Some(NameRef::fudge(
-                rxml_validation::NcName::try_from("lang").unwrap(),
-                Span::call_site(),
-            )),
-        })
-    }
-}

xso-proc/src/field/mod.rs 🔗

@@ -13,7 +13,9 @@ use rxml_validation::NcName;
 
 use crate::compound::Compound;
 use crate::error_message::ParentRef;
-use crate::meta::{AmountConstraint, Flag, NameRef, NamespaceRef, QNameRef, XmlFieldMeta};
+use crate::meta::{
+    AmountConstraint, AttributeKind, Flag, NameRef, NamespaceRef, QNameRef, XmlFieldMeta,
+};
 use crate::scope::{AsItemsScope, FromEventsScope};
 
 mod attribute;
@@ -21,15 +23,13 @@ mod child;
 #[cfg(feature = "minidom")]
 mod element;
 mod flag;
-mod lang;
 mod text;
 
-use self::attribute::AttributeField;
+use self::attribute::{AttributeField, AttributeFieldKind};
 use self::child::{ChildField, ExtractDef};
 #[cfg(feature = "minidom")]
 use self::element::ElementField;
 use self::flag::FlagField;
-use self::lang::LangField;
 use self::text::TextField;
 
 /// Code slices necessary for declaring and initializing a temporary variable
@@ -276,7 +276,7 @@ fn new_field(
     match meta {
         XmlFieldMeta::Attribute {
             span,
-            qname: QNameRef { namespace, name },
+            kind: AttributeKind::Generic(QNameRef { name, namespace }),
             default_,
             type_,
             codec,
@@ -294,8 +294,34 @@ fn new_field(
             }
 
             Ok(Box::new(AttributeField {
-                xml_name,
-                xml_namespace: namespace,
+                kind: AttributeFieldKind::Generic {
+                    xml_name,
+                    xml_namespace: namespace,
+                },
+                default_,
+                codec,
+            }))
+        }
+
+        XmlFieldMeta::Attribute {
+            span: _,
+            kind: AttributeKind::XmlLang,
+            default_,
+            type_,
+            codec,
+        } => {
+            // This would've been taken via `XmlFieldMeta::take_type` if
+            // this field was within an extract where a `type_` is legal
+            // to have.
+            if let Some(type_) = type_ {
+                return Err(Error::new_spanned(
+                    type_,
+                    "specifying `type_` on fields inside structs and enum variants is redundant and not allowed."
+                ));
+            }
+
+            Ok(Box::new(AttributeField {
+                kind: AttributeFieldKind::XmlLang,
                 default_,
                 codec,
             }))
@@ -451,8 +477,6 @@ fn new_field(
                 xml_name,
             }))
         }
-
-        XmlFieldMeta::Language { span: _ } => Ok(Box::new(LangField)),
     }
 }
 

xso-proc/src/meta.rs 🔗

@@ -414,7 +414,7 @@ impl TryFrom<XmlFieldMeta> for DiscardSpec {
         match other {
             XmlFieldMeta::Attribute {
                 span,
-                qname,
+                kind: AttributeKind::Generic(qname),
                 default_,
                 type_,
                 codec,
@@ -800,18 +800,28 @@ fn parse_prefixed_name(
     }
 }
 
+/// XML attribute subtypes for `#[xml(attribute)]` and `#[xml(lang)]`.
+#[derive(Debug)]
+pub(crate) enum AttributeKind {
+    /// Any generic attribute (`#[xml(attribute)]`).
+    Generic(QNameRef),
+
+    /// The special `xml:lang` attribute (`#[xml(lang)]`).
+    XmlLang,
+}
+
 /// Contents of an `#[xml(..)]` attribute on a struct or enum variant member.
 #[derive(Debug)]
 pub(crate) enum XmlFieldMeta {
-    /// `#[xml(attribute)]`, `#[xml(attribute = ..)]` or `#[xml(attribute(..))]`
+    /// `#[xml(attribute)]`, `#[xml(attribute = ..)]` or `#[xml(attribute(..))]`, `#[xml(lang)]`
     Attribute {
         /// The span of the `#[xml(attribute)]` meta from which this was parsed.
         ///
         /// This is useful for error messages.
         span: Span,
 
-        /// The namespace/name keys.
-        qname: QNameRef,
+        /// Attribute subtype (normal vs. `xml:lang`).
+        kind: AttributeKind,
 
         /// The `default` flag.
         default_: Flag,
@@ -901,14 +911,6 @@ pub(crate) enum XmlFieldMeta {
         /// The namespace/name keys.
         qname: QNameRef,
     },
-
-    /// `#[xml(lang)]`
-    Language {
-        /// The span of the `#[xml(lang)]` meta from which this was parsed.
-        ///
-        /// This is useful for error messages.
-        span: Span,
-    },
 }
 
 impl XmlFieldMeta {
@@ -924,10 +926,10 @@ impl XmlFieldMeta {
             let (namespace, name) = parse_prefixed_name(meta.value()?)?;
             Ok(Self::Attribute {
                 span: meta.path.span(),
-                qname: QNameRef {
+                kind: AttributeKind::Generic(QNameRef {
                     name: Some(name),
                     namespace,
-                },
+                }),
                 default_: Flag::Absent,
                 type_: None,
                 codec: None,
@@ -977,7 +979,7 @@ impl XmlFieldMeta {
             })?;
             Ok(Self::Attribute {
                 span: meta.path.span(),
-                qname,
+                kind: AttributeKind::Generic(qname),
                 default_,
                 type_,
                 codec,
@@ -986,7 +988,7 @@ impl XmlFieldMeta {
             // argument-less syntax
             Ok(Self::Attribute {
                 span: meta.path.span(),
-                qname: QNameRef::default(),
+                kind: AttributeKind::Generic(QNameRef::default()),
                 default_: Flag::Absent,
                 type_: None,
                 codec: None,
@@ -1219,8 +1221,53 @@ impl XmlFieldMeta {
 
     /// Parse a `#[xml(lang)]` meta.
     fn lang_from_meta(meta: ParseNestedMeta<'_>) -> Result<Self> {
-        Ok(Self::Language {
+        let mut default_ = Flag::Absent;
+        let mut type_ = None;
+        let mut codec = None;
+
+        if meta.input.peek(syn::token::Paren) {
+            meta.parse_nested_meta(|meta| {
+                if meta.path.is_ident("default") {
+                    if default_.is_set() {
+                        return Err(Error::new_spanned(meta.path, "duplicate `default` key"));
+                    }
+                    default_ = (&meta.path).into();
+                    Ok(())
+                } else if meta.path.is_ident("type_") {
+                    if type_.is_some() {
+                        return Err(Error::new_spanned(meta.path, "duplicate `type_` key"));
+                    }
+                    type_ = Some(meta.value()?.parse()?);
+                    Ok(())
+                } else if meta.path.is_ident("codec") {
+                    if codec.is_some() {
+                        return Err(Error::new_spanned(meta.path, "duplicate `codec` key"));
+                    }
+                    let (new_codec, helpful_error) = parse_codec_expr(meta.value()?)?;
+                    // See the comment at the top of text_from_meta() below for why we
+                    // do this.
+                    let lookahead = meta.input.lookahead1();
+                    if !lookahead.peek(Token![,]) && !meta.input.is_empty() {
+                        if let Some(helpful_error) = helpful_error {
+                            let mut e = lookahead.error();
+                            e.combine(helpful_error);
+                            return Err(e);
+                        }
+                    }
+                    codec = Some(new_codec);
+                    Ok(())
+                } else {
+                    Err(Error::new_spanned(meta.path, "unsupported key"))
+                }
+            })?;
+        }
+
+        Ok(Self::Attribute {
             span: meta.path.span(),
+            kind: AttributeKind::XmlLang,
+            default_,
+            type_,
+            codec,
         })
     }
 
@@ -1324,7 +1371,6 @@ impl XmlFieldMeta {
             Self::Extract { ref span, .. } => *span,
             Self::Element { ref span, .. } => *span,
             Self::Flag { ref span, .. } => *span,
-            Self::Language { ref span, .. } => *span,
         }
     }
 

xso/src/from_xml_doc.md 🔗

@@ -684,17 +684,38 @@ assert_eq!(foo, Foo {
 
 The `lang` meta allows to access the (potentially inherited) logical
 `xml:lang` value as defined in
-[XML 1.0 § 2.12](https://www.w3.org/TR/REC-xml/#sec-lang-tag).
+[XML 1.0 § 2.12](https://www.w3.org/TR/REC-xml/#sec-lang-tag). For `FromXml`,
+the field's type must implement [`FromXmlText`] and for `AsXml`, the field's
+type must implement [`AsOptionalXmlText`].
 
-This meta supports no arguments and can only be used on fields of type
-`Option<String>`.
+| Key | Value type | Description |
+| --- | --- | --- |
+| `default` | *flag* | If present, an absent attribute will substitute the default value instead of raising an error. |
+| `type_` | *type* | Optional explicit type specification. Only allowed within `#[xml(extract(fields(..)))]`. |
+| `codec` | optional *expression* | [`TextCodec`] implementation which is used to encode or decode the field. |
 
-Unlike `#[xml(attribute = "xml:lang")]`, the `#[xml(lang)]` meta takes
-inheritance into account.
+Unlike `#[xml(attribute = "xml:lang")]`, using `#[xml(lang)]` takes
+the inheritance of the `xml:lang` attribute into account.
 
-**Note:** Using this meta is not roundtrip-safe. `rxml` will always emit its
+**Note:** Using this meta is not roundtrip-safe. `xso` will always emit its
 value on serialisation, even if it was inherited during deserialisation.
 
+If `default` is specified and there is no `xml:lang` specified at the point of
+the element, the value is generated using [`core::default::Default`],
+requiring the field type to implement the `Default` trait for a `FromXml`
+derivation. `default` has no influence on `AsXml`. If `default` is not
+specified, an error is raised if `xml:lang` has not been set on the element
+or any of its ancestors.
+
+Note that no error is generated (by `xso`) for `xml:lang` values of `""`.
+
+If `type_` is specified and the `lang` meta is used within an
+`#[xml(extract(fields(..)))]` meta, the specified type is used instead of the
+field type on which the `extract` is declared.
+
+If `codec` is given, the given `codec` value must implement
+[`TextCodec<T>`][`TextCodec`] where `T` is the type of the field.
+
 #### Example
 
 ```rust