xso: add support for extracting into collections

Jonas SchΓ€fer created

Change summary

parsers/src/util/macro_tests.rs | 53 +++++++++++++++++++++++++++++++++++
xso-proc/src/field.rs           | 40 +++++++++++++++++++++++--
xso-proc/src/meta.rs            | 32 +++++++++++++++++++++
xso/src/from_xml_doc.md         | 14 +++++++++
4 files changed, 135 insertions(+), 4 deletions(-)

Detailed changes

parsers/src/util/macro_tests.rs πŸ”—

@@ -1181,3 +1181,56 @@ fn extract_omit_name_and_namespace_roundtrip() {
         "<parent xmlns='urn:example:ns1'><contents>hello world!</contents></parent>",
     )
 }
+
+#[derive(FromXml, AsXml, PartialEq, Debug, Clone)]
+#[xml(namespace = NS1, name = "parent")]
+struct TextExtractVec {
+    #[xml(extract(n = .., namespace = NS1, name = "child", fields(text(type_ = String))))]
+    contents: Vec<String>,
+}
+
+#[test]
+fn text_extract_vec_positive_nonempty() {
+    #[allow(unused_imports)]
+    use std::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    match parse_str::<TextExtractVec>(
+        "<parent xmlns='urn:example:ns1'><child>hello</child><child>world</child></parent>",
+    ) {
+        Ok(TextExtractVec { contents }) => {
+            assert_eq!(contents[0], "hello");
+            assert_eq!(contents[1], "world");
+            assert_eq!(contents.len(), 2);
+        }
+        other => panic!("unexpected result: {:?}", other),
+    }
+}
+
+#[test]
+fn text_extract_vec_positive_empty() {
+    #[allow(unused_imports)]
+    use std::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    match parse_str::<TextExtractVec>("<parent xmlns='urn:example:ns1'/>") {
+        Ok(TextExtractVec { contents }) => {
+            assert_eq!(contents.len(), 0);
+        }
+        other => panic!("unexpected result: {:?}", other),
+    }
+}
+
+#[test]
+fn text_extract_vec_roundtrip() {
+    #[allow(unused_imports)]
+    use std::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    roundtrip_full::<TextExtractVec>(
+        "<parent xmlns='urn:example:ns1'><child>hello</child><child>world</child></parent>",
+    )
+}

xso-proc/src/field.rs πŸ”—

@@ -249,7 +249,23 @@ impl FieldKind {
                 })
             }
 
-            XmlFieldMeta::Text { span: _, codec } => Ok(Self::Text { codec }),
+            XmlFieldMeta::Text {
+                span: _,
+                codec,
+                type_,
+            } => {
+                // This would've been taken via `XmlFieldMeta::take_type` if
+                // this field was within an extract where a `type_` is legal
+                // to have.
+                if let Some(type_) = type_ {
+                    return Err(Error::new_spanned(
+                        type_,
+                        "specifying `type_` on fields inside structs and enum variants is redundant and not allowed."
+                    ));
+                }
+
+                Ok(Self::Text { codec })
+            }
 
             XmlFieldMeta::Child {
                 span: _,
@@ -280,12 +296,13 @@ impl FieldKind {
             XmlFieldMeta::Extract {
                 span,
                 qname: QNameRef { namespace, name },
+                amount,
                 fields,
             } => {
                 let xml_namespace = namespace.unwrap_or_else(|| container_namespace.clone());
                 let xml_name = default_name(span, name, field_ident)?;
 
-                let field = {
+                let mut field = {
                     let mut fields = fields.into_iter();
                     let Some(field) = fields.next() else {
                         return Err(Error::new(
@@ -304,13 +321,28 @@ impl FieldKind {
                     field
                 };
 
+                let amount = amount.unwrap_or(AmountConstraint::FixedSingle(Span::call_site()));
+                let field_ty = match field.take_type() {
+                    Some(v) => v,
+                    None => match amount {
+                        // Only allow inferrence for single values: inferrence
+                        // for collections will always be wrong.
+                        AmountConstraint::FixedSingle(_) => field_ty.clone(),
+                        _ => {
+                            return Err(Error::new(
+                                field.span(),
+                                "extracted field must specify a type explicitly when extracting into a collection."
+                            ));
+                        }
+                    },
+                };
                 let parts = Compound::from_field_defs(
-                    [FieldDef::from_extract(field, 0, field_ty, &xml_namespace)].into_iter(),
+                    [FieldDef::from_extract(field, 0, &field_ty, &xml_namespace)].into_iter(),
                 )?;
 
                 Ok(Self::Child {
                     default_: Flag::Absent,
-                    amount: AmountConstraint::FixedSingle(Span::call_site()),
+                    amount,
                     extract: Some(ExtractDef {
                         xml_namespace,
                         xml_name,

xso-proc/src/meta.rs πŸ”—

@@ -645,6 +645,9 @@ pub(crate) enum XmlFieldMeta {
 
         /// The path to the optional codec type.
         codec: Option<Expr>,
+
+        /// An explicit type override, only usable within extracts.
+        type_: Option<Type>,
     },
 
     /// `#[xml(child)`
@@ -671,6 +674,9 @@ pub(crate) enum XmlFieldMeta {
         /// The namespace/name keys.
         qname: QNameRef,
 
+        /// The `n` flag.
+        amount: Option<AmountConstraint>,
+
         /// The `fields` nested meta.
         fields: Vec<XmlFieldMeta>,
     },
@@ -751,10 +757,12 @@ impl XmlFieldMeta {
             }
             Ok(Self::Text {
                 span: meta.path.span(),
+                type_: None,
                 codec: Some(codec),
             })
         } else if meta.input.peek(syn::token::Paren) {
             let mut codec: Option<Expr> = None;
+            let mut type_: Option<Type> = None;
             meta.parse_nested_meta(|meta| {
                 if meta.path.is_ident("codec") {
                     if codec.is_some() {
@@ -773,17 +781,25 @@ impl XmlFieldMeta {
                     }
                     codec = Some(new_codec);
                     Ok(())
+                } else if meta.path.is_ident("type_") {
+                    if type_.is_some() {
+                        return Err(Error::new_spanned(meta.path, "duplicate `type_` key"));
+                    }
+                    type_ = Some(meta.value()?.parse()?);
+                    Ok(())
                 } else {
                     Err(Error::new_spanned(meta.path, "unsupported key"))
                 }
             })?;
             Ok(Self::Text {
                 span: meta.path.span(),
+                type_,
                 codec,
             })
         } else {
             Ok(Self::Text {
                 span: meta.path.span(),
+                type_: None,
                 codec: None,
             })
         }
@@ -829,6 +845,7 @@ impl XmlFieldMeta {
     fn extract_from_meta(meta: ParseNestedMeta<'_>) -> Result<Self> {
         let mut qname = QNameRef::default();
         let mut fields = None;
+        let mut amount = None;
         meta.parse_nested_meta(|meta| {
             if meta.path.is_ident("fields") {
                 if let Some((fields_span, _)) = fields.as_ref() {
@@ -843,6 +860,12 @@ impl XmlFieldMeta {
                 })?;
                 fields = Some((meta.path.span(), new_fields));
                 Ok(())
+            } else if meta.path.is_ident("n") {
+                if amount.is_some() {
+                    return Err(Error::new_spanned(meta.path, "duplicate `n` key"));
+                }
+                amount = Some(meta.value()?.parse()?);
+                Ok(())
             } else {
                 match qname.parse_incremental_from_meta(meta)? {
                     None => Ok(()),
@@ -855,6 +878,7 @@ impl XmlFieldMeta {
             span: meta.path.span(),
             qname,
             fields,
+            amount,
         })
     }
 
@@ -952,4 +976,12 @@ impl XmlFieldMeta {
             Self::Extract { ref span, .. } => *span,
         }
     }
+
+    /// Extract an explicit type specification if it exists.
+    pub(crate) fn take_type(&mut self) -> Option<Type> {
+        match self {
+            Self::Text { ref mut type_, .. } => type_.take(),
+            _ => None,
+        }
+    }
 }

xso/src/from_xml_doc.md πŸ”—

@@ -308,6 +308,7 @@ The following keys can be used inside the `#[xml(extract(..))]` meta:
 | --- | --- | --- |
 | `namespace` | *string literal* or *path* | The XML namespace of the child element. |
 | `name` | *string literal* or *path* | The XML name of the child element. If it is a *path*, it must point at a `&'static NcNameStr`. |
+| `n` | `1` or `..` | If `1`, a single element is parsed. If `..`, a collection is parsed. Defaults to `1`. |
 | `fields` | *nested* | A list of [field meta](#field-meta) which describe the contents of the child element. |
 
 If the `name` key contains a namespace prefix, it must be one of the prefixes
@@ -322,6 +323,14 @@ and the `extract` meta is being used on a named field, that field's name is
 used. If `name` is omitted and `extract` is not used on a named field, an
 error is emitted.
 
+When parsing a single child element (i.e. `n = 1` or no `n` value set at all),
+the extracted field's type is set to be the same type as the field on which
+the extract is declared, unless overridden in the extracted field's meta.
+
+When parsing a collection (with `n = ..`), the extracted fields within
+`fields()` must all have type specifications. Not all fields kinds support
+that.
+
 The sequence of field meta inside `fields` can be thought of as a nameless
 tuple-style struct. The macro generates serialisation/deserialisation code
 for that nameless tuple-style struct and uses it to serialise/deserialise
@@ -367,6 +376,7 @@ element.
 | Key | Value type | Description |
 | --- | --- | --- |
 | `codec` | *expression* | Optional [`TextCodec`] implementation which is used to encode or decode the field. |
+| `type_` | *type* | Optional explicit type specification. Only allowed within `#[xml(extract(fields(..)))]`. |
 
 If `codec` is given, the given `codec` value must implement
 [`TextCodec<T>`][`TextCodec`] where `T` is the type of the field.
@@ -374,6 +384,10 @@ If `codec` is given, the given `codec` value must implement
 If `codec` is *not* given, the field's type must implement [`FromXmlText`] for
 `FromXml` and for `AsXml`, the field's type must implement [`AsXmlText`].
 
+If `type_` is specified and the `text` meta is used within an
+`#[xml(extract(fields(..)))]` meta, the specified type is used instead of the
+field type on which the `extract` is declared.
+
 The `text` meta also supports a shorthand syntax, `#[xml(text = ..)]`, where
 the value is treated as the value for the `codec` key (with optional prefix as
 described above, and unnamespaced otherwise).