xso: add support for extracting tuples

Jonas SchΓ€fer created

Change summary

parsers/src/util/macro_tests.rs | 107 ++++++++++++++++++++++++++++++
xso-proc/src/compound.rs        |  16 ++++
xso-proc/src/field/child.rs     | 122 ++++++++++++++++++++++++++--------
xso-proc/src/field/mod.rs       |  71 +++++++++++---------
xso/src/from_xml_doc.md         |   5 
5 files changed, 259 insertions(+), 62 deletions(-)

Detailed changes

parsers/src/util/macro_tests.rs πŸ”—

@@ -1672,3 +1672,110 @@ fn fallible_parse_positive_err() {
         other => panic!("unexpected result: {:?}", other),
     }
 }
+
+#[derive(FromXml, AsXml, PartialEq, Debug, Clone)]
+#[xml(namespace = NS1, name = "parent")]
+struct ExtractTupleToCollection {
+    #[xml(extract(n = .., namespace = NS1, name = "text", fields(
+        attribute(name = "xml:lang", type_ = ::core::option::Option<String>, default),
+        text(type_ = String),
+    )))]
+    contents: Vec<(::core::option::Option<String>, String)>,
+}
+
+#[test]
+fn extract_tuple_to_collection_roundtrip() {
+    #[allow(unused_imports)]
+    use std::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    roundtrip_full::<ExtractTupleToCollection>(
+        "<parent xmlns='urn:example:ns1'><text>hello world</text><text xml:lang='de'>hallo welt</text></parent>",
+    );
+}
+
+#[derive(FromXml, AsXml, PartialEq, Debug, Clone)]
+#[xml(namespace = NS1, name = "parent")]
+struct ExtractTuple {
+    #[xml(extract(namespace = NS1, name = "text", fields(
+        attribute(name = "xml:lang", type_ = ::core::option::Option<String>, default),
+        text(type_ = String),
+    )))]
+    contents: (::core::option::Option<String>, String),
+}
+
+#[test]
+fn extract_tuple_roundtrip() {
+    #[allow(unused_imports)]
+    use std::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    roundtrip_full::<ExtractTuple>(
+        "<parent xmlns='urn:example:ns1'><text xml:lang='de'>hallo welt</text></parent>",
+    );
+}
+
+#[derive(FromXml, AsXml, PartialEq, Debug, Clone)]
+#[xml(namespace = NS1, name = "parent")]
+struct ExtractOptionalTuple {
+    #[xml(extract(namespace = NS1, name = "text", default, fields(
+        attribute(name = "xml:lang", type_ = ::core::option::Option<String>, default),
+        text(type_ = String),
+    )))]
+    contents: ::core::option::Option<(::core::option::Option<String>, String)>,
+}
+
+#[test]
+fn extract_optional_tuple_roundtrip_present() {
+    #[allow(unused_imports)]
+    use std::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    roundtrip_full::<ExtractOptionalTuple>(
+        "<parent xmlns='urn:example:ns1'><text xml:lang='de'>hallo welt</text></parent>",
+    );
+    // Cannot test without text body here right now due to a limitation in the
+    // `#[xml(text)]` serialiser: It will create an empty text node in the
+    // minidom output, which will then fail the comparison.
+    roundtrip_full::<ExtractOptionalTuple>(
+        "<parent xmlns='urn:example:ns1'><text xml:lang='de'>x</text></parent>",
+    );
+    roundtrip_full::<ExtractOptionalTuple>(
+        "<parent xmlns='urn:example:ns1'><text xml:lang=''>x</text></parent>",
+    );
+}
+
+#[test]
+fn extract_optional_tuple_roundtrip_absent() {
+    #[allow(unused_imports)]
+    use std::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    roundtrip_full::<ExtractOptionalTuple>("<parent xmlns='urn:example:ns1'/>");
+}
+
+#[derive(FromXml, AsXml, PartialEq, Debug, Clone)]
+#[xml(namespace = NS1, name = "parent")]
+struct ExtractTupleToMap {
+    #[xml(extract(n = .., namespace = NS1, name = "text", fields(
+        attribute(name = "xml:lang", type_ = ::core::option::Option<String>, default),
+        text(type_ = String),
+    )))]
+    contents: std::collections::BTreeMap<::core::option::Option<String>, String>,
+}
+
+#[test]
+fn extract_tuple_to_map_roundtrip() {
+    #[allow(unused_imports)]
+    use std::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    roundtrip_full::<ExtractTupleToMap>(
+        "<parent xmlns='urn:example:ns1'><text>hello world</text><text xml:lang='de'>hallo welt</text></parent>",
+    );
+}

xso-proc/src/compound.rs πŸ”—

@@ -552,6 +552,17 @@ impl Compound {
         })
     }
 
+    /// Return a reference to this compound's only field's type.
+    ///
+    /// If the compound does not have exactly one field, this function returns
+    /// None.
+    pub(crate) fn single_ty(&self) -> Option<&Type> {
+        if self.fields.len() > 1 {
+            return None;
+        }
+        self.fields.get(0).map(|x| x.ty())
+    }
+
     /// Construct a tuple type with this compound's field's types in the same
     /// order as they appear in the compound.
     pub(crate) fn to_tuple_ty(&self) -> TypeTuple {
@@ -574,4 +585,9 @@ impl Compound {
                 .collect(),
         }
     }
+
+    /// Return the number of fields in this compound.
+    pub(crate) fn field_count(&self) -> usize {
+        self.fields.len()
+    }
 }

xso-proc/src/field/child.rs πŸ”—

@@ -9,7 +9,7 @@
 //! In particular, it provides both `#[xml(extract)]` and `#[xml(child)]`
 //! implementations in a single type.
 
-use proc_macro2::TokenStream;
+use proc_macro2::{Span, TokenStream};
 use quote::quote;
 use syn::*;
 
@@ -47,14 +47,14 @@ impl Field for ChildField {
         member: &Member,
         ty: &Type,
     ) -> Result<FieldBuilderPart> {
-        let element_ty = match self.amount {
-            AmountConstraint::FixedSingle(_) => ty.clone(),
-            AmountConstraint::Any(_) => into_iterator_item_ty(ty.clone()),
+        let (element_ty, is_container) = match self.amount {
+            AmountConstraint::FixedSingle(_) => (ty.clone(), false),
+            AmountConstraint::Any(_) => (into_iterator_item_ty(ty.clone()), true),
         };
 
         let (extra_defs, matcher, fetch, builder) = match self.extract {
             Some(ref extract) => {
-                extract.make_from_xml_builder_parts(scope, container_name, member)?
+                extract.make_from_xml_builder_parts(scope, container_name, member, is_container)?
             }
             None => {
                 let FromEventsScope {
@@ -153,19 +153,23 @@ impl Field for ChildField {
     ) -> Result<FieldIteratorPart> {
         let AsItemsScope { ref lifetime, .. } = scope;
 
-        let item_ty = match self.amount {
-            AmountConstraint::FixedSingle(_) => ty.clone(),
+        let (item_ty, is_container) = match self.amount {
+            AmountConstraint::FixedSingle(_) => (ty.clone(), false),
             AmountConstraint::Any(_) => {
                 // This should give us the type of element stored in the
                 // collection.
-                into_iterator_item_ty(ty.clone())
+                (into_iterator_item_ty(ty.clone()), true)
             }
         };
 
         let (extra_defs, init, iter_ty) = match self.extract {
-            Some(ref extract) => {
-                extract.make_as_item_iter_parts(scope, container_name, bound_name, member)?
-            }
+            Some(ref extract) => extract.make_as_item_iter_parts(
+                scope,
+                container_name,
+                bound_name,
+                member,
+                is_container,
+            )?,
             None => {
                 let as_xml_iter = as_xml_iter_fn(item_ty.clone());
                 let item_iter = item_iter_ty(item_ty.clone(), lifetime.clone());
@@ -266,6 +270,7 @@ impl ExtractDef {
         scope: &FromEventsScope,
         container_name: &ParentRef,
         member: &Member,
+        collecting_into_container: bool,
     ) -> Result<(TokenStream, TokenStream, TokenStream, Type)> {
         let FromEventsScope {
             ref substate_result,
@@ -298,9 +303,22 @@ impl ExtractDef {
 
         let matcher = quote! { #state_ty_ident::new(name, attrs).map(|x| #from_xml_builder_ty_ident(::core::option::Option::Some(x))) };
 
-        Ok((
-            extra_defs,
-            matcher,
+        let fetch = if self.parts.field_count() == 1 {
+            // If we extract only a single field, we automatically unwrap the
+            // tuple, because that behaviour is more obvious to users.
+            quote! { #substate_result.0 }
+        } else {
+            // If we extract more than one field, we pass the value down as
+            // the tuple that it is.
+            quote! { #substate_result }
+        };
+
+        let fetch = if collecting_into_container {
+            // This is for symmetry with the AsXml implementation part. Please
+            // see there for why we cannot do option magic in the collection
+            // case.
+            fetch
+        } else {
             // This little ".into()" here goes a long way. It relies on one of
             // the most underrated trait implementations in the standard
             // library: `impl From<T> for Option<T>`, which creates a
@@ -319,9 +337,12 @@ impl ExtractDef {
             // the type constraint imposed by the place the value is *used*,
             // which is strictly bound by the field's type (so there is, in
             // fact, no ambiguity). So this works all kinds of magic.
-            quote! { #substate_result.0.into() },
-            from_xml_builder_ty,
-        ))
+            quote! {
+                #fetch.into()
+            }
+        };
+
+        Ok((extra_defs, matcher, fetch, from_xml_builder_ty))
     }
 
     /// Construct
@@ -334,6 +355,7 @@ impl ExtractDef {
         container_name: &ParentRef,
         bound_name: &Ident,
         member: &Member,
+        iterating_container: bool,
     ) -> Result<(TokenStream, TokenStream, Type)> {
         let AsItemsScope { ref lifetime, .. } = scope;
 
@@ -353,6 +375,31 @@ impl ExtractDef {
                 gt_token: token::Gt::default(),
             });
         let item_iter_ty = item_iter_ty.into();
+        let tuple_ty = self.parts.to_ref_tuple_ty(lifetime);
+
+        let (repack, inner_ty) = match self.parts.single_ty() {
+            Some(single_ty) => (
+                quote! { #bound_name, },
+                ref_ty(single_ty.clone(), lifetime.clone()),
+            ),
+            None => {
+                let mut repack_tuple = TokenStream::default();
+                // The cast here is sound, because the constructor of Compound
+                // already asserts that there are less than 2^32 fields (with
+                // what I think is a great error message, go check it out).
+                for i in 0..(tuple_ty.elems.len() as u32) {
+                    let index = Index {
+                        index: i,
+                        span: Span::call_site(),
+                    };
+                    repack_tuple.extend(quote! {
+                        &#bound_name.#index,
+                    })
+                }
+                let ref_tuple_ty = ref_ty(self.parts.to_tuple_ty().into(), lifetime.clone());
+                (repack_tuple, ref_tuple_ty)
+            }
+        };
 
         let extra_defs = self
             .parts
@@ -374,26 +421,45 @@ impl ExtractDef {
             .compile()
             .render(
                 &Visibility::Inherited,
-                &self.parts.to_ref_tuple_ty(lifetime).into(),
+                &tuple_ty.into(),
                 &state_ty_ident,
                 lifetime,
                 &item_iter_ty,
             )?;
 
-        let item_iter_ty = option_as_xml_ty(item_iter_ty);
-        Ok((
-            extra_defs,
+        let (make_iter, item_iter_ty) = if iterating_container {
+            // When we are iterating a container, the container's iterator's
+            // item type may either be `&(A, B, ...)` or `(&A, &B, ...)`.
+            // Unfortunately, to be able to handle both, we need to omit the
+            // magic Option cast, because we cannot specify the type of the
+            // argument of the `.map(...)` closure and rust is not able to
+            // infer that type because the repacking is too opaque.
+            //
+            // However, this is not much of a loss, because it doesn't really
+            // make sense to have the option cast there, anyway: optional
+            // elements in a container would be weird.
+            (
+                quote! {
+                    #item_iter_ty_ident::new((#repack))?
+                },
+                item_iter_ty,
+            )
+        } else {
             // Again we exploit the extreme usefulness of the
             // `impl From<T> for Option<T>`. We already wrote extensively
             // about that in [`make_from_xml_builder_parts`] implementation
             // corresponding to this code above, and we will not repeat it
             // here.
-            quote! {
-                ::xso::asxml::OptionAsXml::new(::core::option::Option::from(#bound_name).map(|#bound_name| {
-                    #item_iter_ty_ident::new((#bound_name,))
-                }).transpose()?)
-            },
-            item_iter_ty,
-        ))
+            (
+                quote! {
+                    ::xso::asxml::OptionAsXml::new(::core::option::Option::from(#bound_name).map(|#bound_name: #inner_ty| {
+                        #item_iter_ty_ident::new((#repack))
+                    }).transpose()?)
+                },
+                option_as_xml_ty(item_iter_ty),
+            )
+        };
+
+        Ok((extra_defs, make_iter, item_iter_ty))
     }
 }

xso-proc/src/field/mod.rs πŸ”—

@@ -323,43 +323,50 @@ fn new_field(
             let xml_namespace = namespace.unwrap_or_else(|| container_namespace.clone());
             let xml_name = default_name(span, name, field_ident)?;
 
-            let mut field = {
-                let mut fields = fields.into_iter();
-                let Some(field) = fields.next() else {
-                    return Err(Error::new(
-                        span,
-                        "`#[xml(extract(..))]` must contain one `fields(..)` nested meta which contains at least one field meta."
-                    ));
-                };
-
-                if let Some(field) = fields.next() {
-                    return Err(Error::new(
-                        field.span(),
-                        "more than one extracted piece of data is currently not supported",
-                    ));
+            let amount = amount.unwrap_or(AmountConstraint::FixedSingle(Span::call_site()));
+            match amount {
+                AmountConstraint::Any(ref amount) => {
+                    if let Flag::Present(default_) = default_ {
+                        let mut err = Error::new(
+                            default_,
+                            "default cannot be set when collecting into a collection",
+                        );
+                        err.combine(Error::new(
+                            *amount,
+                            "`n` was set to a non-1 value here, which enables connection logic",
+                        ));
+                        return Err(err);
+                    }
                 }
+                AmountConstraint::FixedSingle(_) => (),
+            }
 
-                field
-            };
-
-            let amount = amount.unwrap_or(AmountConstraint::FixedSingle(Span::call_site()));
-            let field_ty = match field.take_type() {
-                Some(v) => v,
-                None => match amount {
-                    // Only allow inferrence for single values: inferrence
-                    // for collections will always be wrong.
-                    AmountConstraint::FixedSingle(_) => field_ty.clone(),
-                    _ => {
-                        return Err(Error::new(
+            let mut field_defs = Vec::new();
+            let allow_inference =
+                matches!(amount, AmountConstraint::FixedSingle(_)) && fields.len() == 1;
+            for (i, mut field) in fields.into_iter().enumerate() {
+                let field_ty = match field.take_type() {
+                    Some(v) => v,
+                    None => {
+                        if allow_inference {
+                            field_ty.clone()
+                        } else {
+                            return Err(Error::new(
                             field.span(),
-                            "extracted field must specify a type explicitly when extracting into a collection."
+                            "extracted field must specify a type explicitly when extracting into a collection or when extracting more than one field."
                         ));
+                        }
                     }
-                },
-            };
-            let parts = Compound::from_field_defs(
-                [FieldDef::from_extract(field, 0, &field_ty, &xml_namespace)].into_iter(),
-            )?;
+                };
+
+                field_defs.push(FieldDef::from_extract(
+                    field,
+                    i as u32,
+                    &field_ty,
+                    &xml_namespace,
+                ));
+            }
+            let parts = Compound::from_field_defs(field_defs)?;
 
             Ok(Box::new(ChildField {
                 default_,

xso/src/from_xml_doc.md πŸ”—

@@ -517,8 +517,9 @@ serialisation to mismatch the deserialisation, too (i.e. the struct is then
 not roundtrip-safe), because the deserialisation does not compare the value
 against `default` (but has special provisions to work with `Option<T>`).
 
-**Note:** Currently, only a single field can be extracted. This restriction
-will be lifted in the future.
+If more than one single field is contained in `fields`, the fields will be
+extracted as a tuple in the order they are given in the meta. In addition, it
+is required to explicitly specify each extracted field's type in that case.
 
 Using `extract` instead of `child` combined with a specific struct declaration
 comes with trade-offs. On the one hand, using `extract` gives you flexibility