xso: implement catch-all for unknown elements

Jonas Schรคfer created

Change summary

parsers/src/util/macro_tests.rs |  19 ++++++
xso-proc/src/compound.rs        |  60 ++++++++++++++----
xso-proc/src/field/child.rs     |   8 +-
xso-proc/src/field/element.rs   | 110 +++++++++++++++++++++++++++++++++++
xso-proc/src/field/mod.rs       |  75 +++++++++++++++++++++-
xso-proc/src/meta.rs            |  34 ++++++++++
xso-proc/src/types.rs           |  33 ++++++++++
xso/ChangeLog                   |   2 
xso/src/from_xml_doc.md         |  58 ++++++++++++++++++
xso/src/minidom_compat.rs       |  61 +++++++++++-------
10 files changed, 412 insertions(+), 48 deletions(-)

Detailed changes

parsers/src/util/macro_tests.rs ๐Ÿ”—

@@ -1551,3 +1551,22 @@ fn optional_attribute_optional_extract_double_option_roundtrip_absent_child() {
         "<parent xmlns='urn:example:ns1'/>",
     )
 }
+
+#[derive(FromXml, AsXml, PartialEq, Debug, Clone)]
+#[xml(namespace = NS1, name = "parent")]
+struct ElementCatchall {
+    #[xml(element(n = ..))]
+    children: Vec<::minidom::Element>,
+}
+
+#[test]
+fn element_catchall_roundtrip() {
+    #[allow(unused_imports)]
+    use std::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    roundtrip_full::<ElementCatchall>(
+        "<parent xmlns='urn:example:ns1'><child><deeper/></child><child xmlns='urn:example:ns2'/><more-children/><yet-another-child/><child/></parent>",
+    )
+}

xso-proc/src/compound.rs ๐Ÿ”—

@@ -11,7 +11,7 @@ use quote::quote;
 use syn::{spanned::Spanned, *};
 
 use crate::error_message::ParentRef;
-use crate::field::{FieldBuilderPart, FieldDef, FieldIteratorPart, FieldTempInit};
+use crate::field::{FieldBuilderPart, FieldDef, FieldIteratorPart, FieldTempInit, NestedMatcher};
 use crate::meta::NamespaceRef;
 use crate::scope::{mangle_member, AsItemsScope, FromEventsScope};
 use crate::state::{AsItemsSubmachine, FromEventsSubmachine, State};
@@ -109,6 +109,7 @@ impl Compound {
         let mut builder_data_init = TokenStream::default();
         let mut output_cons = TokenStream::default();
         let mut child_matchers = TokenStream::default();
+        let mut fallback_child_matcher = None;
         let mut text_handler = None;
         let mut extra_defs = TokenStream::default();
         let is_tuple = !output_name.is_path();
@@ -219,18 +220,44 @@ impl Compound {
                         #builder_field_name: #init,
                     });
 
-                    child_matchers.extend(quote! {
-                        let (name, attrs) = match #matcher {
-                            ::core::result::Result::Err(::xso::error::FromEventsError::Mismatch { name, attrs }) => (name, attrs),
-                            ::core::result::Result::Err(::xso::error::FromEventsError::Invalid(e)) => return ::core::result::Result::Err(e),
-                            ::core::result::Result::Ok(#substate_data) => {
-                                return ::core::result::Result::Ok(::core::ops::ControlFlow::Break(Self::#state_name {
+                    match matcher {
+                        NestedMatcher::Selective(matcher) => {
+                            child_matchers.extend(quote! {
+                                let (name, attrs) = match #matcher {
+                                    ::core::result::Result::Err(::xso::error::FromEventsError::Mismatch { name, attrs }) => (name, attrs),
+                                    ::core::result::Result::Err(::xso::error::FromEventsError::Invalid(e)) => return ::core::result::Result::Err(e),
+                                    ::core::result::Result::Ok(#substate_data) => {
+                                        return ::core::result::Result::Ok(::core::ops::ControlFlow::Break(Self::#state_name {
+                                            #builder_data_ident,
+                                            #substate_data,
+                                        }))
+                                    }
+                                };
+                            });
+                        }
+                        NestedMatcher::Fallback(matcher) => {
+                            if let Some((span, _)) = fallback_child_matcher.as_ref() {
+                                let mut err = Error::new(
+                                    field.span(),
+                                    "more than one field is attempting to consume all unmatched child elements"
+                                );
+                                err.combine(Error::new(
+                                    *span,
+                                    "the previous field collecting all unmatched child elements is here"
+                                ));
+                                return Err(err);
+                            }
+
+                            let matcher = quote! {
+                                ::core::result::Result::Ok(::core::ops::ControlFlow::Break(Self::#state_name {
                                     #builder_data_ident,
-                                    #substate_data,
+                                    #substate_data: { #matcher },
                                 }))
-                            }
-                        };
-                    });
+                            };
+
+                            fallback_child_matcher = Some((field.span(), matcher));
+                        }
+                    }
 
                     if is_tuple {
                         output_cons.extend(quote! {
@@ -278,6 +305,14 @@ impl Compound {
             }
         };
 
+        let child_fallback = match fallback_child_matcher {
+            Some((_, matcher)) => matcher,
+            None => quote! {
+                let _ = (name, attrs);
+                ::core::result::Result::Err(::xso::error::Error::Other(#unknown_child_err))
+            },
+        };
+
         states.push(State::new_with_builder(
             default_state_ident.clone(),
             builder_data_ident,
@@ -292,8 +327,7 @@ impl Compound {
                 }
                 ::xso::exports::rxml::Event::StartElement(_, name, attrs) => {
                     #child_matchers
-                    let _ = (name, attrs);
-                    ::core::result::Result::Err(::xso::error::Error::Other(#unknown_child_err))
+                    #child_fallback
                 }
                 ::xso::exports::rxml::Event::Text(_, #text) => {
                     #text_handler

xso-proc/src/field/child.rs ๐Ÿ”—

@@ -23,7 +23,7 @@ use crate::types::{
     option_as_xml_ty, option_ty, ref_ty, ty_from_ident,
 };
 
-use super::{Field, FieldBuilderPart, FieldIteratorPart, FieldTempInit};
+use super::{Field, FieldBuilderPart, FieldIteratorPart, FieldTempInit, NestedMatcher};
 
 /// The field maps to a child
 pub(super) struct ChildField {
@@ -101,7 +101,7 @@ impl Field for ChildField {
                         init: quote! { ::core::option::Option::None },
                         ty: option_ty(ty.clone()),
                     },
-                    matcher: quote! {
+                    matcher: NestedMatcher::Selective(quote! {
                         match #matcher {
                             ::core::result::Result::Ok(v) => if #field_access.is_some() {
                                 ::core::result::Result::Err(::xso::error::FromEventsError::Invalid(::xso::error::Error::Other(#duplicate_msg)))
@@ -110,7 +110,7 @@ impl Field for ChildField {
                             },
                             ::core::result::Result::Err(e) => ::core::result::Result::Err(e),
                         }
-                    },
+                    }),
                     builder,
                     collect: quote! {
                         #field_access = ::core::option::Option::Some(#fetch);
@@ -132,7 +132,7 @@ impl Field for ChildField {
                         init: quote! { #ty_default() },
                         ty: ty.clone(),
                     },
-                    matcher,
+                    matcher: NestedMatcher::Selective(matcher),
                     builder,
                     collect: quote! {
                         #ty_extend(&mut #field_access, [#fetch]);

xso-proc/src/field/element.rs ๐Ÿ”—

@@ -0,0 +1,110 @@
+// Copyright (c) 2024 Jonas Schรคfer <jonas@zombofant.net>
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+//! This module concerns the processing of untyped `minidom::Element`
+//! children.
+//!
+//! In particular, it provides the `#[xml(element)]` implementation.
+
+use proc_macro2::{Span, TokenStream};
+use quote::quote;
+use syn::*;
+
+use crate::error_message::ParentRef;
+use crate::scope::{AsItemsScope, FromEventsScope};
+use crate::types::{
+    default_fn, element_ty, from_xml_builder_ty, into_iterator_into_iter_fn, into_iterator_iter_ty,
+    item_iter_ty, option_ty, ref_ty,
+};
+
+use super::{Field, FieldBuilderPart, FieldIteratorPart, FieldTempInit, NestedMatcher};
+
+pub(super) struct ElementField;
+
+impl Field for ElementField {
+    fn make_builder_part(
+        &self,
+        scope: &FromEventsScope,
+        _container_name: &ParentRef,
+        member: &Member,
+        ty: &Type,
+    ) -> Result<FieldBuilderPart> {
+        let FromEventsScope {
+            ref substate_result,
+            ..
+        } = scope;
+        let field_access = scope.access_field(member);
+
+        let element_ty = element_ty(Span::call_site());
+        let default_fn = default_fn(ty.clone());
+        let builder = from_xml_builder_ty(element_ty.clone());
+
+        Ok(FieldBuilderPart::Nested {
+            extra_defs: TokenStream::default(),
+            value: FieldTempInit {
+                init: quote! { #default_fn() },
+                ty: ty.clone(),
+            },
+            matcher: NestedMatcher::Fallback(quote! {
+                #builder::new(name, attrs)
+            }),
+            builder,
+            collect: quote! {
+                <#ty as ::core::iter::Extend::<#element_ty>>::extend(&mut #field_access, [#substate_result]);
+            },
+            finalize: quote! {
+                #field_access
+            },
+        })
+    }
+
+    fn make_iterator_part(
+        &self,
+        scope: &AsItemsScope,
+        _container_name: &ParentRef,
+        bound_name: &Ident,
+        _member: &Member,
+        ty: &Type,
+    ) -> Result<FieldIteratorPart> {
+        let AsItemsScope { ref lifetime, .. } = scope;
+
+        let element_ty = element_ty(Span::call_site());
+        let iter_ty = item_iter_ty(element_ty.clone(), lifetime.clone());
+        let element_iter = into_iterator_iter_ty(ref_ty(ty.clone(), lifetime.clone()));
+        let into_iter = into_iterator_into_iter_fn(ref_ty(ty.clone(), lifetime.clone()));
+
+        let state_ty = Type::Tuple(TypeTuple {
+            paren_token: token::Paren::default(),
+            elems: [element_iter, option_ty(iter_ty)].into_iter().collect(),
+        });
+
+        Ok(FieldIteratorPart::Content {
+            extra_defs: TokenStream::default(),
+            value: FieldTempInit {
+                init: quote! {
+                    (#into_iter(#bound_name), ::core::option::Option::None)
+                },
+                ty: state_ty,
+            },
+            generator: quote! {
+                loop {
+                    if let ::core::option::Option::Some(current) = #bound_name.1.as_mut() {
+                        if let ::core::option::Option::Some(item) = current.next() {
+                            break ::core::option::Option::Some(item).transpose();
+                        }
+                    }
+                    if let ::core::option::Option::Some(item) = #bound_name.0.next() {
+                        #bound_name.1 = ::core::option::Option::Some(
+                            <#element_ty as ::xso::AsXml>::as_xml_iter(item)?
+                        );
+                    } else {
+                        break ::core::result::Result::Ok(::core::option::Option::None)
+                    }
+                }
+            },
+        })
+    }
+}

xso-proc/src/field/mod.rs ๐Ÿ”—

@@ -18,10 +18,14 @@ use crate::scope::{AsItemsScope, FromEventsScope};
 
 mod attribute;
 mod child;
+#[cfg(feature = "minidom")]
+mod element;
 mod text;
 
 use self::attribute::AttributeField;
 use self::child::{ChildField, ExtractDef};
+#[cfg(feature = "minidom")]
+use self::element::ElementField;
 use self::text::TextField;
 
 /// Code slices necessary for declaring and initializing a temporary variable
@@ -34,6 +38,33 @@ pub(crate) struct FieldTempInit {
     pub(crate) init: TokenStream,
 }
 
+/// Configure how a nested field builder selects child elements.
+pub(crate) enum NestedMatcher {
+    /// Matches a specific child element fallabily.
+    Selective(
+        /// Expression which evaluates to `Result<T, FromEventsError>`,
+        /// consuming `name: rxml::QName` and `attrs: rxml::AttrMap`.
+        ///
+        /// `T` must be the type specified in the
+        /// [`FieldBuilderPart::Nested::builder`]  field.
+        TokenStream,
+    ),
+
+    #[cfg_attr(not(feature = "minidom"), allow(dead_code))]
+    /// Matches any child element not matched by another matcher.
+    ///
+    /// Only a single field may use this variant, otherwise an error is
+    /// raised during execution of the proc macro.
+    Fallback(
+        /// Expression which evaluates to `T` (or `return`s an error),
+        /// consuming `name: rxml::QName` and `attrs: rxml::AttrMap`.
+        ///
+        /// `T` must be the type specified in the
+        /// [`FieldBuilderPart::Nested::builder`]  field.
+        TokenStream,
+    ),
+}
+
 /// Describe how a struct or enum variant's member is parsed from XML data.
 ///
 /// This struct is returned from [`FieldDef::make_builder_part`] and
@@ -73,12 +104,9 @@ pub(crate) enum FieldBuilderPart {
         /// parsing.
         value: FieldTempInit,
 
-        /// Expression which evaluates to `Result<T, FromEventsError>`,
-        /// consuming `name: rxml::QName` and `attrs: rxml::AttrMap`.
-        ///
-        /// `T` must be the type specified in the
-        /// [`Self::Nested::builder`]  field.
-        matcher: TokenStream,
+        /// Configure child matching behaviour for this field. See
+        /// [`NestedMatcher`] for options.
+        matcher: NestedMatcher,
 
         /// Type implementing `xso::FromEventsBuilder` which parses the child
         /// element.
@@ -343,6 +371,31 @@ fn new_field(
                 }),
             }))
         }
+
+        #[cfg(feature = "minidom")]
+        XmlFieldMeta::Element { span, amount } => {
+            match amount {
+                Some(AmountConstraint::Any(_)) => (),
+                Some(AmountConstraint::FixedSingle(span)) => {
+                    return Err(Error::new(
+                        span,
+                        "only `n = ..` is supported for #[xml(element)]` currently",
+                    ))
+                }
+                None => return Err(Error::new(span, "`n` must be set to `..` currently")),
+            }
+
+            Ok(Box::new(ElementField))
+        }
+
+        #[cfg(not(feature = "minidom"))]
+        XmlFieldMeta::Element { span, amount } => {
+            let _ = amount;
+            Err(Error::new(
+                span,
+                "#[xml(element)] requires xso to be built with the \"minidom\" feature.",
+            ))
+        }
     }
 }
 
@@ -351,6 +404,9 @@ fn new_field(
 /// See [`Compound`][`crate::compound::Compound`] for more information on
 /// compounds in general.
 pub(crate) struct FieldDef {
+    /// A span which refers to the field's definition.
+    span: Span,
+
     /// The member identifying the field.
     member: Member,
 
@@ -388,6 +444,7 @@ impl FieldDef {
         let ty = field.ty.clone();
 
         Ok(Self {
+            span: field_span,
             inner: new_field(meta, ident, &ty, container_namespace)?,
             member,
             ty,
@@ -406,6 +463,7 @@ impl FieldDef {
     ) -> Result<Self> {
         let span = meta.span();
         Ok(Self {
+            span,
             member: Member::Unnamed(Index { index, span }),
             ty: ty.clone(),
             inner: new_field(meta, None, ty, container_namespace)?,
@@ -454,4 +512,9 @@ impl FieldDef {
     pub(crate) fn is_text_field(&self) -> bool {
         self.inner.captures_text()
     }
+
+    /// Return a span which points at the field's definition.'
+    pub(crate) fn span(&self) -> Span {
+        self.span
+    }
 }

xso-proc/src/meta.rs ๐Ÿ”—

@@ -686,6 +686,17 @@ pub(crate) enum XmlFieldMeta {
         /// The `fields` nested meta.
         fields: Vec<XmlFieldMeta>,
     },
+
+    /// `#[xml(element)]`
+    Element {
+        /// The span of the `#[xml(element)]` meta from which this was parsed.
+        ///
+        /// This is useful for error messages.
+        span: Span,
+
+        /// The `n` flag.
+        amount: Option<AmountConstraint>,
+    },
 }
 
 impl XmlFieldMeta {
@@ -906,6 +917,26 @@ impl XmlFieldMeta {
         })
     }
 
+    /// Parse a `#[xml(element)]` meta.
+    fn element_from_meta(meta: ParseNestedMeta<'_>) -> Result<Self> {
+        let mut amount = None;
+        meta.parse_nested_meta(|meta| {
+            if meta.path.is_ident("n") {
+                if amount.is_some() {
+                    return Err(Error::new_spanned(meta.path, "duplicate `n` key"));
+                }
+                amount = Some(meta.value()?.parse()?);
+                Ok(())
+            } else {
+                Err(Error::new_spanned(meta.path, "unsupported key"))
+            }
+        })?;
+        Ok(Self::Element {
+            span: meta.path.span(),
+            amount,
+        })
+    }
+
     /// Parse [`Self`] from a nestd meta, switching on the identifier
     /// of that nested meta.
     fn parse_from_meta(meta: ParseNestedMeta<'_>) -> Result<Self> {
@@ -917,6 +948,8 @@ impl XmlFieldMeta {
             Self::child_from_meta(meta)
         } else if meta.path.is_ident("extract") {
             Self::extract_from_meta(meta)
+        } else if meta.path.is_ident("element") {
+            Self::element_from_meta(meta)
         } else {
             Err(Error::new_spanned(meta.path, "unsupported field meta"))
         }
@@ -998,6 +1031,7 @@ impl XmlFieldMeta {
             Self::Child { ref span, .. } => *span,
             Self::Text { ref span, .. } => *span,
             Self::Extract { ref span, .. } => *span,
+            Self::Element { ref span, .. } => *span,
         }
     }
 

xso-proc/src/types.rs ๐Ÿ”—

@@ -783,3 +783,36 @@ pub(crate) fn option_as_xml_ty(inner_ty: Type) -> Type {
         },
     })
 }
+
+/// Construct a [`syn::Type`] referring to `::xso::exports::minidom::Element`.
+#[cfg(feature = "minidom")]
+pub(crate) fn element_ty(span: Span) -> Type {
+    Type::Path(TypePath {
+        qself: None,
+        path: Path {
+            leading_colon: Some(syn::token::PathSep {
+                spans: [span, span],
+            }),
+            segments: [
+                PathSegment {
+                    ident: Ident::new("xso", span),
+                    arguments: PathArguments::None,
+                },
+                PathSegment {
+                    ident: Ident::new("exports", span),
+                    arguments: PathArguments::None,
+                },
+                PathSegment {
+                    ident: Ident::new("minidom", span),
+                    arguments: PathArguments::None,
+                },
+                PathSegment {
+                    ident: Ident::new("Element", span),
+                    arguments: PathArguments::None,
+                },
+            ]
+            .into_iter()
+            .collect(),
+        },
+    })
+}

xso/ChangeLog ๐Ÿ”—

@@ -20,6 +20,8 @@ Version NEXT:
       - Support for deriving FromXml and AsXml on enums.
       - Support for extracting data from child elements without intermediate
         structs.
+      - Support for collecting all unknown children in a single field as
+        collection of `minidom::Element`.
 
 Version 0.1.2:
 2024-07-26 Jonas Schรคfer <jonas@zombofant.net>

xso/src/from_xml_doc.md ๐Ÿ”—

@@ -150,6 +150,7 @@ The following mapping types are defined:
 | --- | --- |
 | [`attribute`](#attribute-meta) | Map the field to an XML attribute on the struct's element |
 | [`child`](#child-meta) | Map the field to a child element |
+| [`element`](#element-meta) | Map the field to a child element as [`minidom::Element`] |
 | [`extract`](#extract-meta) | Map the field to contents of a child element of specified structure |
 | [`text`](#text-meta) | Map the field to the text content of the struct's element |
 
@@ -307,6 +308,63 @@ assert_eq!(parent, Parent {
 });
 ```
 
+#### `element` meta
+
+The `element` meta causes the field to be mapped to child elements, stored as
+a container containing [`minidom::Element`] instances.
+
+This meta is only available if `xso` is being built with the `"minidom"`
+feature.
+
+The following keys can be used inside the `#[xml(extract(..))]` meta:
+
+| Key | Value type | Description |
+| --- | --- | --- |
+| `n` | `..` | Must be set to the value `..`. |
+
+The `n` parameter will, in the future, support values other than `..`. In
+order to provide a non-breaking path into that future, it must be set to the
+value `..` right now, indicating that an arbitrary number of elements may be
+collected by this meta.
+
+The field's type must be a collection of `minidom::Element`. It must thus
+implement
+[`IntoIterator<Item = minidom::Element>`][`core::iter::IntoIterator`]. In
+addition, the field's type must implement
+[`Extend<minidom::Element>`][`core::iter::Extend`] to derive `FromXml` and the
+field's reference type must implement
+`IntoIterator<Item = &'_ minidom::Element>` to derive `AsXml`.
+
+Fields with the `element` meta are deserialised with the lowest priority.
+While other fields are processed in the order they are declared, `element`
+fields may capture arbitrary child elements, so they are considered as the
+last choice when no other field matched a given child element. In addition,
+it is not allowed to have more than one field in any given struct with the
+`#[xml(element)]` meta.
+
+##### Example
+
+```rust
+# #[cfg(feature = "minidom")]
+# {
+# use xso::FromXml;
+# use xso::exports::minidom;
+#[derive(FromXml, Debug, PartialEq)]
+#[xml(namespace = "urn:example", name = "parent")]
+struct Parent {
+    #[xml(element(n = ..))]
+    misc: Vec<minidom::Element>,
+}
+
+let parent: Parent = xso::from_bytes(b"<parent
+    xmlns='urn:example'
+><child-a/><child-b/><child-a/></parent>").unwrap();
+assert_eq!(parent.misc[0].name(), "child-a");
+assert_eq!(parent.misc[1].name(), "child-b");
+assert_eq!(parent.misc[2].name(), "child-a");
+# }
+```
+
 #### `extract` meta
 
 The `extract` meta causes the field to be mapped to the *contents* of a child

xso/src/minidom_compat.rs ๐Ÿ”—

@@ -309,6 +309,41 @@ pub struct ElementFromEvents {
     nested: Option<Box<ElementFromEvents>>,
 }
 
+impl ElementFromEvents {
+    /// Construct a new builder from an element header.
+    ///
+    /// Unlike the [`FromXml::from_events`] implementation on
+    /// [`minidom::Element`], this is contractually infallible. Using this may
+    /// thus save you an `unwrap()` call.
+    pub fn new(qname: rxml::QName, attrs: rxml::AttrMap) -> Self {
+        let mut prefixes = SimpleNamespaces::new();
+        let mut builder = Element::builder(qname.1, qname.0);
+        for ((namespace, name), value) in attrs.into_iter() {
+            if namespace.is_none() {
+                builder = builder.attr(name, value);
+            } else {
+                let (is_new, prefix) = prefixes.declare_with_auto_prefix(namespace.clone());
+                let name = prefix.with_suffix(&name);
+                if is_new {
+                    builder = builder
+                        .prefix(
+                            Some(prefix.as_str().to_owned()),
+                            namespace.as_str().to_owned(),
+                        )
+                        .unwrap();
+                }
+                builder = builder.attr(name, value);
+            }
+        }
+
+        let element = builder.build();
+        Self {
+            inner: Some(element),
+            nested: None,
+        }
+    }
+}
+
 impl FromEventsBuilder for ElementFromEvents {
     type Output = minidom::Element;
 
@@ -356,31 +391,7 @@ impl FromXml for Element {
         qname: rxml::QName,
         attrs: rxml::AttrMap,
     ) -> Result<Self::Builder, FromEventsError> {
-        let mut prefixes = SimpleNamespaces::new();
-        let mut builder = Element::builder(qname.1, qname.0);
-        for ((namespace, name), value) in attrs.into_iter() {
-            if namespace.is_none() {
-                builder = builder.attr(name, value);
-            } else {
-                let (is_new, prefix) = prefixes.declare_with_auto_prefix(namespace.clone());
-                let name = prefix.with_suffix(&name);
-                if is_new {
-                    builder = builder
-                        .prefix(
-                            Some(prefix.as_str().to_owned()),
-                            namespace.as_str().to_owned(),
-                        )
-                        .unwrap();
-                }
-                builder = builder.attr(name, value);
-            }
-        }
-
-        let element = builder.build();
-        Ok(Self::Builder {
-            inner: Some(element),
-            nested: None,
-        })
+        Ok(Self::Builder::new(qname, attrs))
     }
 }