xso: implement support for defaulting extracts

Jonas SchΓ€fer created

Change summary

parsers/src/util/macro_tests.rs | 233 +++++++++++++++++++++++++++++++++++
xso-proc/src/compound.rs        |  25 +++
xso-proc/src/field.rs           |  77 +++++++----
xso-proc/src/meta.rs            |  13 +
xso-proc/src/types.rs           |  30 ++++
xso/src/from_xml_doc.md         |  30 ++++
xso/src/lib.rs                  |  11 +
7 files changed, 388 insertions(+), 31 deletions(-)

Detailed changes

parsers/src/util/macro_tests.rs πŸ”—

@@ -1287,3 +1287,236 @@ fn text_extract_attribute_vec_roundtrip() {
         "<parent xmlns='urn:example:ns1'><child attr='hello'/><child attr='world'/></parent>",
     )
 }
+
+#[derive(FromXml, AsXml, PartialEq, Debug, Clone)]
+#[xml(namespace = NS1, name = "parent")]
+struct TextOptionalExtract {
+    #[xml(extract(namespace = NS1, name = "child", default, fields(text(type_ = String))))]
+    contents: ::std::option::Option<String>,
+}
+
+#[test]
+fn text_optional_extract_positive_present() {
+    #[allow(unused_imports)]
+    use std::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    match parse_str::<TextOptionalExtract>(
+        "<parent xmlns='urn:example:ns1'><child>hello world</child></parent>",
+    ) {
+        Ok(TextOptionalExtract {
+            contents: Some(contents),
+        }) => {
+            assert_eq!(contents, "hello world");
+        }
+        other => panic!("unexpected result: {:?}", other),
+    }
+}
+
+#[test]
+fn text_optional_extract_positive_absent_child() {
+    #[allow(unused_imports)]
+    use std::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    match parse_str::<TextOptionalExtract>("<parent xmlns='urn:example:ns1'/>") {
+        Ok(TextOptionalExtract { contents: None }) => (),
+        other => panic!("unexpected result: {:?}", other),
+    }
+}
+
+#[test]
+fn text_optional_extract_roundtrip_present() {
+    #[allow(unused_imports)]
+    use std::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    roundtrip_full::<TextOptionalExtract>(
+        "<parent xmlns='urn:example:ns1'><child>hello world!</child></parent>",
+    )
+}
+
+#[test]
+fn text_optional_extract_roundtrip_absent() {
+    #[allow(unused_imports)]
+    use std::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    roundtrip_full::<TextOptionalExtract>("<parent xmlns='urn:example:ns1'/>")
+}
+
+#[derive(FromXml, AsXml, PartialEq, Debug, Clone)]
+#[xml(namespace = NS1, name = "parent")]
+struct OptionalAttributeOptionalExtract {
+    #[xml(extract(namespace = NS1, name = "child", default, fields(attribute(name = "foo", default))))]
+    contents: ::std::option::Option<String>,
+}
+
+#[test]
+fn optional_attribute_optional_extract_positive_present() {
+    #[allow(unused_imports)]
+    use std::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    match parse_str::<OptionalAttributeOptionalExtract>(
+        "<parent xmlns='urn:example:ns1'><child foo='hello world'/></parent>",
+    ) {
+        Ok(OptionalAttributeOptionalExtract {
+            contents: Some(contents),
+        }) => {
+            assert_eq!(contents, "hello world");
+        }
+        other => panic!("unexpected result: {:?}", other),
+    }
+}
+
+#[test]
+fn optional_attribute_optional_extract_positive_absent_attribute() {
+    #[allow(unused_imports)]
+    use std::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    match parse_str::<OptionalAttributeOptionalExtract>(
+        "<parent xmlns='urn:example:ns1'><child/></parent>",
+    ) {
+        Ok(OptionalAttributeOptionalExtract { contents: None }) => (),
+        other => panic!("unexpected result: {:?}", other),
+    }
+}
+
+#[test]
+fn optional_attribute_optional_extract_positive_absent_element() {
+    #[allow(unused_imports)]
+    use std::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    match parse_str::<OptionalAttributeOptionalExtract>("<parent xmlns='urn:example:ns1'/>") {
+        Ok(OptionalAttributeOptionalExtract { contents: None }) => (),
+        other => panic!("unexpected result: {:?}", other),
+    }
+}
+
+#[test]
+fn optional_attribute_optional_extract_roundtrip_present() {
+    #[allow(unused_imports)]
+    use std::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    roundtrip_full::<OptionalAttributeOptionalExtract>(
+        "<parent xmlns='urn:example:ns1'><child foo='hello world'/></parent>",
+    )
+}
+
+#[test]
+fn optional_attribute_optional_extract_roundtrip_absent_attribute() {
+    #[allow(unused_imports)]
+    use std::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    roundtrip_full::<OptionalAttributeOptionalExtract>(
+        "<parent xmlns='urn:example:ns1'><child/></parent>",
+    )
+}
+
+#[derive(FromXml, AsXml, PartialEq, Debug, Clone)]
+#[xml(namespace = NS1, name = "parent")]
+struct OptionalAttributeOptionalExtractDoubleOption {
+    #[xml(extract(namespace = NS1, name = "child", default, fields(attribute(name = "foo", type_ = ::std::option::Option<String>, default))))]
+    contents: ::std::option::Option<::std::option::Option<String>>,
+}
+
+#[test]
+fn optional_attribute_optional_extract_double_option_positive_present() {
+    #[allow(unused_imports)]
+    use std::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    match parse_str::<OptionalAttributeOptionalExtractDoubleOption>(
+        "<parent xmlns='urn:example:ns1'><child foo='hello world'/></parent>",
+    ) {
+        Ok(OptionalAttributeOptionalExtractDoubleOption {
+            contents: Some(Some(contents)),
+        }) => {
+            assert_eq!(contents, "hello world");
+        }
+        other => panic!("unexpected result: {:?}", other),
+    }
+}
+
+#[test]
+fn optional_attribute_optional_extract_double_option_positive_absent_attribute() {
+    #[allow(unused_imports)]
+    use std::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    match parse_str::<OptionalAttributeOptionalExtractDoubleOption>(
+        "<parent xmlns='urn:example:ns1'><child/></parent>",
+    ) {
+        Ok(OptionalAttributeOptionalExtractDoubleOption {
+            contents: Some(None),
+        }) => (),
+        other => panic!("unexpected result: {:?}", other),
+    }
+}
+
+#[test]
+fn optional_attribute_optional_extract_double_option_positive_absent_element() {
+    #[allow(unused_imports)]
+    use std::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    match parse_str::<OptionalAttributeOptionalExtractDoubleOption>(
+        "<parent xmlns='urn:example:ns1'/>",
+    ) {
+        Ok(OptionalAttributeOptionalExtractDoubleOption { contents: None }) => (),
+        other => panic!("unexpected result: {:?}", other),
+    }
+}
+
+#[test]
+fn optional_attribute_optional_extract_double_option_roundtrip_present() {
+    #[allow(unused_imports)]
+    use std::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    roundtrip_full::<OptionalAttributeOptionalExtractDoubleOption>(
+        "<parent xmlns='urn:example:ns1'><child foo='hello world'/></parent>",
+    )
+}
+
+#[test]
+fn optional_attribute_optional_extract_double_option_roundtrip_absent_attribute() {
+    #[allow(unused_imports)]
+    use std::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    roundtrip_full::<OptionalAttributeOptionalExtractDoubleOption>(
+        "<parent xmlns='urn:example:ns1'><child/></parent>",
+    )
+}
+
+#[test]
+fn optional_attribute_optional_extract_double_option_roundtrip_absent_child() {
+    #[allow(unused_imports)]
+    use std::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    roundtrip_full::<OptionalAttributeOptionalExtractDoubleOption>(
+        "<parent xmlns='urn:example:ns1'/>",
+    )
+}

xso-proc/src/compound.rs πŸ”—

@@ -15,7 +15,7 @@ use crate::field::{FieldBuilderPart, FieldDef, FieldIteratorPart, FieldTempInit}
 use crate::meta::NamespaceRef;
 use crate::scope::{mangle_member, AsItemsScope, FromEventsScope};
 use crate::state::{AsItemsSubmachine, FromEventsSubmachine, State};
-use crate::types::{feed_fn, namespace_ty, ncnamestr_cow_ty, phantom_lifetime_ty};
+use crate::types::{feed_fn, namespace_ty, ncnamestr_cow_ty, phantom_lifetime_ty, ref_ty};
 
 /// A struct or enum variant's contents.
 pub(crate) struct Compound {
@@ -517,4 +517,27 @@ impl Compound {
             },
         })
     }
+
+    /// Construct a tuple type with this compound's field's types in the same
+    /// order as they appear in the compound.
+    pub(crate) fn to_tuple_ty(&self) -> TypeTuple {
+        TypeTuple {
+            paren_token: token::Paren::default(),
+            elems: self.fields.iter().map(|x| x.ty().clone()).collect(),
+        }
+    }
+
+    /// Construct a tuple type with references to this compound's field's
+    /// types in the same order as they appear in the compound, with the given
+    /// lifetime.
+    pub(crate) fn to_ref_tuple_ty(&self, lifetime: &Lifetime) -> TypeTuple {
+        TypeTuple {
+            paren_token: token::Paren::default(),
+            elems: self
+                .fields
+                .iter()
+                .map(|x| ref_ty(x.ty().clone(), lifetime.clone()))
+                .collect(),
+        }
+    }
 }

xso-proc/src/field.rs πŸ”—

@@ -19,8 +19,8 @@ use crate::scope::{AsItemsScope, FromEventsScope};
 use crate::types::{
     as_optional_xml_text_fn, as_xml_iter_fn, as_xml_text_fn, default_fn, extend_fn, from_events_fn,
     from_xml_builder_ty, from_xml_text_fn, into_iterator_into_iter_fn, into_iterator_item_ty,
-    into_iterator_iter_ty, item_iter_ty, option_ty, ref_ty, string_ty, text_codec_decode_fn,
-    text_codec_encode_fn, ty_from_ident,
+    into_iterator_iter_ty, item_iter_ty, option_as_xml_ty, option_ty, ref_ty, string_ty,
+    text_codec_decode_fn, text_codec_encode_fn, ty_from_ident,
 };
 
 /// Code slices necessary for declaring and initializing a temporary variable
@@ -306,6 +306,7 @@ impl FieldKind {
 
             XmlFieldMeta::Extract {
                 span,
+                default_,
                 qname: QNameRef { namespace, name },
                 amount,
                 fields,
@@ -352,7 +353,7 @@ impl FieldKind {
                 )?;
 
                 Ok(Self::Child {
-                    default_: Flag::Absent,
+                    default_,
                     amount,
                     extract: Some(ExtractDef {
                         xml_namespace,
@@ -565,12 +566,7 @@ impl FieldDef {
                             &Visibility::Inherited,
                             &from_xml_builder_ty_ident,
                             &state_ty_ident,
-                            &Type::Tuple(TypeTuple {
-                                paren_token: token::Paren::default(),
-                                elems: [
-                                    element_ty.clone(),
-                                ].into_iter().collect(),
-                            })
+                            &parts.to_tuple_ty().into(),
                         )?;
                         let from_xml_builder_ty =
                             ty_from_ident(from_xml_builder_ty_ident.clone()).into();
@@ -580,7 +576,31 @@ impl FieldDef {
                         (
                             extra_defs,
                             matcher,
-                            quote! { #substate_result.0 },
+                            // This little ".into()" here goes a long way. It
+                            // relies on one of the most underrated trait
+                            // implementations in the standard library:
+                            // `impl From<T> for Option<T>`, which creates a
+                            // `Some(_)` from a `T`. Why is it so great?
+                            // Because there is also `impl From<Option<T>> for
+                            // Option<T>` (obviously), which is just a move.
+                            // So even without knowing the exact type of the
+                            // substate result and the field, we can make an
+                            // "downcast" to `Option<T>` if the field is of
+                            // type `Option<T>`, and it does the right thing
+                            // no matter whether the extracted field is of
+                            // type `Option<T>` or `T`.
+                            //
+                            // And then, type inferrence does the rest: There
+                            // is ambiguity there, of course, if we call
+                            // `.into()` on a value of type `Option<T>`:
+                            // Should Rust wrap it into another layer of
+                            // `Option`, or should it just move the value? The
+                            // answer lies in the type constraint imposed by
+                            // the place the value is *used*, which is
+                            // strictly bound by the field's type (so there
+                            // is, in fact, no ambiguity). So this works all
+                            // kinds of magic.
+                            quote! { #substate_result.0.into() },
                             from_xml_builder_ty,
                         )
                     }
@@ -736,7 +756,7 @@ impl FieldDef {
                     }
                 };
 
-                let (extra_defs, fetch, as_xml_iter, iter_ty) = match extract {
+                let (extra_defs, init, iter_ty) = match extract {
                     Some(ExtractDef {
                         ref xml_namespace,
                         ref xml_name,
@@ -776,21 +796,25 @@ impl FieldDef {
                             .compile()
                             .render(
                                 &Visibility::Inherited,
-                                &Type::Tuple(TypeTuple {
-                                    paren_token: token::Paren::default(),
-                                    elems: [ref_ty(item_ty.clone(), lifetime.clone())]
-                                        .into_iter()
-                                        .collect(),
-                                }),
+                                &parts.to_ref_tuple_ty(lifetime).into(),
                                 &state_ty_ident,
                                 lifetime,
                                 &item_iter_ty,
                             )?;
 
+                        let item_iter_ty = option_as_xml_ty(item_iter_ty);
                         (
                             extra_defs,
-                            quote! { (&#bound_name,) },
-                            quote! { #item_iter_ty_ident::new },
+                            // Again we exploit the extreme usefulness of the
+                            // `impl From<T> for Option<T>`. We already wrote
+                            // extensively about that in the FromXml
+                            // implementation corresponding to this code
+                            // above, and we will not repeat it here.
+                            quote! {
+                                ::xso::OptionAsXml::new(::core::option::Option::from(#bound_name).map(|#bound_name| {
+                                    #item_iter_ty_ident::new((#bound_name,))
+                                }).transpose()?)
+                            },
                             item_iter_ty,
                         )
                     }
@@ -800,8 +824,7 @@ impl FieldDef {
 
                         (
                             TokenStream::default(),
-                            quote! { #bound_name },
-                            quote! { #as_xml_iter },
+                            quote! { #as_xml_iter(#bound_name)? },
                             item_iter,
                         )
                     }
@@ -810,12 +833,7 @@ impl FieldDef {
                 match amount {
                     AmountConstraint::FixedSingle(_) => Ok(FieldIteratorPart::Content {
                         extra_defs,
-                        value: FieldTempInit {
-                            init: quote! {
-                                #as_xml_iter(#fetch)?
-                            },
-                            ty: iter_ty,
-                        },
+                        value: FieldTempInit { init, ty: iter_ty },
                         generator: quote! {
                             #bound_name.next().transpose()
                         },
@@ -853,7 +871,10 @@ impl FieldDef {
                                         }
                                     }
                                     if let ::core::option::Option::Some(item) = #bound_name.0.next() {
-                                        #bound_name.1 = ::core::option::Option::Some(#as_xml_iter({ let #bound_name = item; #fetch })?)
+                                        #bound_name.1 = ::core::option::Option::Some({
+                                            let #bound_name = item;
+                                            #init
+                                        });
                                     } else {
                                         break ::core::result::Result::Ok(::core::option::Option::None)
                                     }

xso-proc/src/meta.rs πŸ”—

@@ -680,6 +680,9 @@ pub(crate) enum XmlFieldMeta {
         /// The `n` flag.
         amount: Option<AmountConstraint>,
 
+        /// The `default` flag.
+        default_: Flag,
+
         /// The `fields` nested meta.
         fields: Vec<XmlFieldMeta>,
     },
@@ -859,8 +862,15 @@ impl XmlFieldMeta {
         let mut qname = QNameRef::default();
         let mut fields = None;
         let mut amount = None;
+        let mut default_ = Flag::Absent;
         meta.parse_nested_meta(|meta| {
-            if meta.path.is_ident("fields") {
+            if meta.path.is_ident("default") {
+                if default_.is_set() {
+                    return Err(Error::new_spanned(meta.path, "duplicate `default` key"));
+                }
+                default_ = (&meta.path).into();
+                Ok(())
+            } else if meta.path.is_ident("fields") {
                 if let Some((fields_span, _)) = fields.as_ref() {
                     let mut error = Error::new_spanned(meta.path, "duplicate `fields` meta");
                     error.combine(Error::new(*fields_span, "previous `fields` meta was here"));
@@ -889,6 +899,7 @@ impl XmlFieldMeta {
         let fields = fields.map(|(_, x)| x).unwrap_or_else(Vec::new);
         Ok(Self::Extract {
             span: meta.path.span(),
+            default_,
             qname,
             fields,
             amount,

xso-proc/src/types.rs πŸ”—

@@ -753,3 +753,33 @@ pub(crate) fn ty_from_ident(ident: Ident) -> TypePath {
         path: ident.into(),
     }
 }
+
+/// Construct a [`syn::Type`] referring to `xso::OptionAsXml<#inner_ty>`.
+pub(crate) fn option_as_xml_ty(inner_ty: Type) -> Type {
+    let span = inner_ty.span();
+    Type::Path(TypePath {
+        qself: None,
+        path: Path {
+            leading_colon: Some(syn::token::PathSep {
+                spans: [span, span],
+            }),
+            segments: [
+                PathSegment {
+                    ident: Ident::new("xso", span),
+                    arguments: PathArguments::None,
+                },
+                PathSegment {
+                    ident: Ident::new("OptionAsXml", span),
+                    arguments: PathArguments::AngleBracketed(AngleBracketedGenericArguments {
+                        colon2_token: None,
+                        lt_token: token::Lt { spans: [span] },
+                        args: [GenericArgument::Type(inner_ty)].into_iter().collect(),
+                        gt_token: token::Gt { spans: [span] },
+                    }),
+                },
+            ]
+            .into_iter()
+            .collect(),
+        },
+    })
+}

xso/src/from_xml_doc.md πŸ”—

@@ -251,6 +251,11 @@ implement the `Default` trait for a `FromXml` derivation. `default` has no
 influence on `AsXml`. Combining `default` and `n` where `n` is not set to `1`
 is not supported and will cause a compile-time error.
 
+Using `default` with a type other than `Option<T>` will cause the
+serialisation to mismatch the deserialisation (i.e. the struct is then not
+roundtrip-safe), because the deserialisation does not compare the value
+against `default` (but has special provisions to work with `Option<T>`).
+
 ##### Example
 
 ```rust
@@ -313,6 +318,7 @@ The following keys can be used inside the `#[xml(extract(..))]` meta:
 | --- | --- | --- |
 | `namespace` | *string literal* or *path* | The XML namespace of the child element. |
 | `name` | *string literal* or *path* | The XML name of the child element. If it is a *path*, it must point at a `&'static NcNameStr`. |
+| `default` | flag | If present, an absent child will substitute the default value instead of raising an error. |
 | `n` | `1` or `..` | If `1`, a single element is parsed. If `..`, a collection is parsed. Defaults to `1`. |
 | `fields` | *nested* | A list of [field meta](#field-meta) which describe the contents of the child element. |
 
@@ -341,8 +347,30 @@ tuple-style struct. The macro generates serialisation/deserialisation code
 for that nameless tuple-style struct and uses it to serialise/deserialise
 the field.
 
+If `default` is specified and the child is absent in the source, the value
+is generated using [`core::default::Default`], requiring the field type to
+implement the `Default` trait for a `FromXml` derivation. `default` has no
+influence on `AsXml`. Combining `default` and `n` where `n` is not set to `1`
+is not supported and will cause a compile-time error.
+
+Mixing `default` on the `#[xml(extract)]` itself with `default` on the
+extracted inner fields creates non-roundtrip-safe parsing, unless you also
+use twice-nested [`core::option::Option`] types. That means that when
+deserialising a piece of XML and reserialising it without changing the
+contents of the struct in Rust, the resulting XML may not match the input.
+This is because to the serialiser, if only one layer of
+[`core::option::Option`] is used, it is not possible to distinguish which of
+the two layers were defaulted. The exact behaviour on serialisation in such a
+situation is *not guaranteed* and may change between versions of the `xso`
+crate, its dependencies, the standard library, or even rustc itself.
+
+Using `default` with a type other than `Option<T>` will cause the
+serialisation to mismatch the deserialisation, too (i.e. the struct is then
+not roundtrip-safe), because the deserialisation does not compare the value
+against `default` (but has special provisions to work with `Option<T>`).
+
 **Note:** Currently, only a single field can be extracted. This restriction
-will be lifted in the future. Collections are not supported yet, either.
+will be lifted in the future.
 
 Using `extract` instead of `child` combined with a specific struct declaration
 comes with trade-offs. On the one hand, using `extract` gives you flexibility

xso/src/lib.rs πŸ”—

@@ -86,6 +86,17 @@ pub trait AsXml {
 /// Helper iterator to convert an `Option<T>` to XML.
 pub struct OptionAsXml<T: Iterator>(Option<T>);
 
+impl<T: Iterator> OptionAsXml<T> {
+    /// Construct a new iterator, wrapping the given iterator.
+    ///
+    /// If `inner` is `None`, this iterator terminates immediately. Otherwise,
+    /// it yields the elements yielded by `inner` until `inner` finishes,
+    /// after which this iterator completes, too.
+    pub fn new(inner: Option<T>) -> Self {
+        Self(inner)
+    }
+}
+
 impl<'x, T: Iterator<Item = Result<Item<'x>, self::error::Error>>> Iterator for OptionAsXml<T> {
     type Item = Result<Item<'x>, self::error::Error>;