xso: implement support for extracting data from child elements

Jonas SchΓ€fer created

Change summary

parsers/src/util/macro_tests.rs | 127 +++++++++++++++++
xso-proc/src/compound.rs        | 112 ++++++++++++---
xso-proc/src/enums.rs           |  13 +
xso-proc/src/error_message.rs   |  45 ++++++
xso-proc/src/field.rs           | 250 ++++++++++++++++++++++++++++++++++
xso-proc/src/meta.rs            |  92 ++++++++++++
xso-proc/src/scope.rs           |  50 ++++++
xso-proc/src/state.rs           |  11 
xso-proc/src/structs.rs         |  11 
xso-proc/src/types.rs           |   8 +
xso/ChangeLog                   |   2 
xso/src/from_xml_doc.md         |  59 ++++++++
12 files changed, 728 insertions(+), 52 deletions(-)

Detailed changes

parsers/src/util/macro_tests.rs πŸ”—

@@ -827,3 +827,130 @@ fn children_roundtrip() {
         "<parent xmlns='urn:example:ns1'><attr foo='X'/><attr foo='Y'/><attr foo='Z'/></parent>",
     )
 }
+
+#[derive(FromXml, AsXml, PartialEq, Debug, Clone)]
+#[xml(namespace = NS1, name = "parent")]
+struct TextExtract {
+    #[xml(extract(namespace = NS1, name = "child", fields(text)))]
+    contents: String,
+}
+
+#[test]
+fn text_extract_positive() {
+    #[allow(unused_imports)]
+    use std::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    match parse_str::<TextExtract>(
+        "<parent xmlns='urn:example:ns1'><child>hello world</child></parent>",
+    ) {
+        Ok(TextExtract { contents }) => {
+            assert_eq!(contents, "hello world");
+        }
+        other => panic!("unexpected result: {:?}", other),
+    }
+}
+
+#[test]
+fn text_extract_roundtrip() {
+    #[allow(unused_imports)]
+    use std::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    roundtrip_full::<TextExtract>(
+        "<parent xmlns='urn:example:ns1'><child>hello world!</child></parent>",
+    )
+}
+
+#[derive(FromXml, AsXml, PartialEq, Debug, Clone)]
+#[xml(namespace = NS1, name = "parent")]
+struct AttributeExtract {
+    #[xml(extract(namespace = NS1, name = "child", fields(attribute = "foo")))]
+    contents: String,
+}
+
+#[test]
+fn attribute_extract_positive() {
+    #[allow(unused_imports)]
+    use std::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    match parse_str::<AttributeExtract>(
+        "<parent xmlns='urn:example:ns1'><child foo='hello world'/></parent>",
+    ) {
+        Ok(AttributeExtract { contents }) => {
+            assert_eq!(contents, "hello world");
+        }
+        other => panic!("unexpected result: {:?}", other),
+    }
+}
+
+#[test]
+fn attribute_extract_roundtrip() {
+    #[allow(unused_imports)]
+    use std::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    roundtrip_full::<AttributeExtract>(
+        "<parent xmlns='urn:example:ns1'><child foo='hello world'/></parent>",
+    )
+}
+
+#[derive(FromXml, AsXml, PartialEq, Debug, Clone)]
+#[xml(namespace = NS1, name = "parent")]
+struct ChildExtract {
+    #[xml(extract(namespace = NS1, name = "child", fields(child)))]
+    contents: RequiredAttribute,
+}
+
+#[test]
+fn child_extract_roundtrip() {
+    #[allow(unused_imports)]
+    use std::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    roundtrip_full::<ChildExtract>(
+        "<parent xmlns='urn:example:ns1'><child><attr foo='hello world!'/></child></parent>",
+    )
+}
+
+#[derive(FromXml, AsXml, PartialEq, Debug, Clone)]
+#[xml(namespace = NS1, name = "parent")]
+struct NestedExtract {
+    #[xml(extract(namespace = NS1, name = "child", fields(
+        extract(namespace = NS1, name = "grandchild", fields(text))
+    )))]
+    contents: String,
+}
+
+#[test]
+fn nested_extract_positive() {
+    #[allow(unused_imports)]
+    use std::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    match parse_str::<NestedExtract>(
+        "<parent xmlns='urn:example:ns1'><child><grandchild>hello world</grandchild></child></parent>",
+    ) {
+        Ok(NestedExtract { contents }) => {
+            assert_eq!(contents, "hello world");
+        }
+        other => panic!("unexpected result: {:?}", other),
+    }
+}
+
+#[test]
+fn nested_extract_roundtrip() {
+    #[allow(unused_imports)]
+    use std::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    roundtrip_full::<NestedExtract>("<parent xmlns='urn:example:ns1'><child><grandchild>hello world</grandchild></child></parent>")
+}

xso-proc/src/compound.rs πŸ”—

@@ -83,7 +83,7 @@ impl Compound {
         output_name: &ParentRef,
         state_prefix: &str,
     ) -> Result<FromEventsSubmachine> {
-        let scope = FromEventsScope::new();
+        let scope = FromEventsScope::new(state_ty_ident.clone());
         let FromEventsScope {
             ref attrs,
             ref builder_data_ident,
@@ -106,6 +106,8 @@ impl Compound {
         let mut output_cons = TokenStream::default();
         let mut child_matchers = TokenStream::default();
         let mut text_handler = None;
+        let mut extra_defs = TokenStream::default();
+        let is_tuple = !output_name.is_path();
 
         for (i, field) in self.fields.iter().enumerate() {
             let member = field.member();
@@ -125,9 +127,15 @@ impl Compound {
                         #builder_field_name: #init,
                     });
 
-                    output_cons.extend(quote! {
-                        #member: #builder_data_ident.#builder_field_name,
-                    });
+                    if is_tuple {
+                        output_cons.extend(quote! {
+                            #builder_data_ident.#builder_field_name,
+                        });
+                    } else {
+                        output_cons.extend(quote! {
+                            #member: #builder_data_ident.#builder_field_name,
+                        });
+                    }
                 }
 
                 FieldBuilderPart::Text {
@@ -153,12 +161,20 @@ impl Compound {
                             Self::#default_state_ident { #builder_data_ident }
                         ))
                     });
-                    output_cons.extend(quote! {
-                        #member: #finalize,
-                    });
+
+                    if is_tuple {
+                        output_cons.extend(quote! {
+                            #finalize,
+                        });
+                    } else {
+                        output_cons.extend(quote! {
+                            #member: #finalize,
+                        });
+                    }
                 }
 
                 FieldBuilderPart::Nested {
+                    extra_defs: field_extra_defs,
                     value: FieldTempInit { ty, init },
                     matcher,
                     builder,
@@ -212,9 +228,17 @@ impl Compound {
                         };
                     });
 
-                    output_cons.extend(quote! {
-                        #member: #finalize,
-                    });
+                    if is_tuple {
+                        output_cons.extend(quote! {
+                            #finalize,
+                        });
+                    } else {
+                        output_cons.extend(quote! {
+                            #member: #finalize,
+                        });
+                    }
+
+                    extra_defs.extend(field_extra_defs);
                 }
             }
         }
@@ -243,6 +267,11 @@ impl Compound {
                     #path { #output_cons }
                 }
             }
+            ParentRef::Unnamed { .. } => {
+                quote! {
+                    ( #output_cons )
+                }
+            }
         };
 
         states.push(State::new_with_builder(
@@ -277,6 +306,8 @@ impl Compound {
 
         Ok(FromEventsSubmachine {
             defs: quote! {
+                #extra_defs
+
                 struct #builder_data_ty {
                     #builder_data_def
                 }
@@ -308,10 +339,11 @@ impl Compound {
     pub(crate) fn make_as_item_iter_statemachine(
         &self,
         input_name: &ParentRef,
+        state_ty_ident: &Ident,
         state_prefix: &str,
         lifetime: &Lifetime,
     ) -> Result<AsItemsSubmachine> {
-        let scope = AsItemsScope::new(lifetime);
+        let scope = AsItemsScope::new(lifetime, state_ty_ident.clone());
 
         let element_head_start_state_ident =
             quote::format_ident!("{}ElementHeadStart", state_prefix);
@@ -322,8 +354,10 @@ impl Compound {
         let dummy_ident = quote::format_ident!("dummy");
         let mut states = Vec::new();
 
+        let is_tuple = !input_name.is_path();
         let mut destructure = TokenStream::default();
         let mut start_init = TokenStream::default();
+        let mut extra_defs = TokenStream::default();
 
         states.push(
             State::new(element_head_start_state_ident.clone())
@@ -345,7 +379,7 @@ impl Compound {
         for (i, field) in self.fields.iter().enumerate() {
             let member = field.member();
             let bound_name = mangle_member(member);
-            let part = field.make_iterator_part(&scope, &bound_name)?;
+            let part = field.make_iterator_part(&scope, input_name, &bound_name)?;
             let state_name = quote::format_ident!("{}Field{}", state_prefix, i);
             let ty = scope.borrow(field.ty().clone());
 
@@ -366,9 +400,15 @@ impl Compound {
                     );
                     element_head_end_idx += 1;
 
-                    destructure.extend(quote! {
-                        #member: ref #bound_name,
-                    });
+                    if is_tuple {
+                        destructure.extend(quote! {
+                            ref #bound_name,
+                        });
+                    } else {
+                        destructure.extend(quote! {
+                            #member: ref #bound_name,
+                        });
+                    }
                     start_init.extend(quote! {
                         #bound_name,
                     });
@@ -389,15 +429,22 @@ impl Compound {
                                 ))
                             }),
                     );
-                    destructure.extend(quote! {
-                        #member: #bound_name,
-                    });
+                    if is_tuple {
+                        destructure.extend(quote! {
+                            #bound_name,
+                        });
+                    } else {
+                        destructure.extend(quote! {
+                            #member: #bound_name,
+                        });
+                    }
                     start_init.extend(quote! {
                         #bound_name,
                     });
                 }
 
                 FieldIteratorPart::Content {
+                    extra_defs: field_extra_defs,
                     value: FieldTempInit { ty, init },
                     generator,
                 } => {
@@ -415,12 +462,20 @@ impl Compound {
                                 #generator?
                             }),
                     );
-                    destructure.extend(quote! {
-                        #member: #bound_name,
-                    });
+                    if is_tuple {
+                        destructure.extend(quote! {
+                            #bound_name,
+                        });
+                    } else {
+                        destructure.extend(quote! {
+                            #member: #bound_name,
+                        });
+                    }
                     start_init.extend(quote! {
                         #bound_name: #init,
                     });
+
+                    extra_defs.extend(field_extra_defs);
                 }
             }
         }
@@ -440,14 +495,19 @@ impl Compound {
             }),
         );
 
-        let ParentRef::Named(input_path) = input_name;
+        let destructure = match input_name {
+            ParentRef::Named(ref input_path) => quote! {
+                #input_path { #destructure }
+            },
+            ParentRef::Unnamed { .. } => quote! {
+                ( #destructure )
+            },
+        };
 
         Ok(AsItemsSubmachine {
-            defs: TokenStream::default(),
+            defs: extra_defs,
             states,
-            destructure: quote! {
-                #input_path { #destructure }
-            },
+            destructure,
             init: quote! {
                 Self::#element_head_start_state_ident { #dummy_ident: ::std::marker::PhantomData, #name_ident: name.1, #ns_ident: name.0, #start_init }
             },

xso-proc/src/enums.rs πŸ”—

@@ -17,6 +17,7 @@ use crate::compound::Compound;
 use crate::error_message::ParentRef;
 use crate::meta::{reject_key, Flag, NameRef, NamespaceRef, QNameRef, XmlCompoundMeta};
 use crate::state::{AsItemsStateMachine, FromEventsStateMachine};
+use crate::types::{ref_ty, ty_from_ident};
 
 /// The definition of an enum variant, switched on the XML element's name.
 struct NameVariant {
@@ -103,6 +104,7 @@ impl NameVariant {
         &self,
         xml_namespace: &NamespaceRef,
         enum_ident: &Ident,
+        state_ty_ident: &Ident,
         item_iter_ty_lifetime: &Lifetime,
     ) -> Result<AsItemsStateMachine> {
         let xml_name = &self.name;
@@ -119,6 +121,7 @@ impl NameVariant {
                     .into_iter()
                     .collect(),
                 }),
+                state_ty_ident,
                 &self.ident.to_string(),
                 &item_iter_ty_lifetime,
             )?
@@ -316,17 +319,17 @@ impl ItemDef for EnumDef {
             statemachine.merge(variant.make_as_item_iter_statemachine(
                 &self.namespace,
                 target_ty_ident,
+                &state_ty_ident,
                 &item_iter_ty_lifetime,
             )?);
         }
 
         let defs = statemachine.render(
             vis,
-            &TypePath {
-                qself: None,
-                path: target_ty_ident.clone().into(),
-            }
-            .into(),
+            &ref_ty(
+                ty_from_ident(target_ty_ident.clone()).into(),
+                item_iter_ty_lifetime.clone(),
+            ),
             &state_ty_ident,
             &item_iter_ty_lifetime,
             &item_iter_ty,

xso-proc/src/error_message.rs πŸ”—

@@ -19,6 +19,25 @@ pub(super) enum ParentRef {
     /// The parent is addressable by a path, e.g. a struct type or enum
     /// variant.
     Named(Path),
+
+    /// The parent is not addressable by a path, because it is in fact an
+    /// ephemeral structure.
+    ///
+    /// Used to reference the ephemeral structures used by fields declared
+    /// with `#[xml(extract(..))]`.
+    Unnamed {
+        /// The parent's ref.
+        ///
+        /// For extracts, this refers to the compound where the field with
+        /// the extract is declared.
+        parent: Box<ParentRef>,
+
+        /// The field inside that parent.
+        ///
+        /// For extracts, this refers to the compound field where the extract
+        /// is declared.
+        field: Member,
+    },
 }
 
 impl From<Path> for ParentRef {
@@ -47,6 +66,32 @@ impl fmt::Display for ParentRef {
                 }
                 write!(f, " element")
             }
+            Self::Unnamed { parent, field } => {
+                write!(f, "extraction for {} in {}", FieldName(field), parent)
+            }
+        }
+    }
+}
+
+impl ParentRef {
+    /// Create a new `ParentRef` for a member inside this one.
+    ///
+    /// Returns a [`Self::Unnamed`] with `self` as parent and `member` as
+    /// field.
+    pub(crate) fn child(&self, member: Member) -> Self {
+        match self {
+            Self::Named { .. } | Self::Unnamed { .. } => Self::Unnamed {
+                parent: Box::new(self.clone()),
+                field: member,
+            },
+        }
+    }
+
+    /// Return true if and only if this ParentRef can be addressed as a path.
+    pub(crate) fn is_path(&self) -> bool {
+        match self {
+            Self::Named { .. } => true,
+            Self::Unnamed { .. } => false,
         }
     }
 }

xso-proc/src/field.rs πŸ”—

@@ -12,6 +12,7 @@ use syn::{spanned::Spanned, *};
 
 use rxml_validation::NcName;
 
+use crate::compound::Compound;
 use crate::error_message::{self, ParentRef};
 use crate::meta::{AmountConstraint, Flag, NameRef, NamespaceRef, QNameRef, XmlFieldMeta};
 use crate::scope::{AsItemsScope, FromEventsScope};
@@ -19,7 +20,7 @@ use crate::types::{
     as_optional_xml_text_fn, as_xml_iter_fn, as_xml_text_fn, default_fn, extend_fn, from_events_fn,
     from_xml_builder_ty, from_xml_text_fn, into_iterator_into_iter_fn, into_iterator_item_ty,
     into_iterator_iter_ty, item_iter_ty, option_ty, ref_ty, string_ty, text_codec_decode_fn,
-    text_codec_encode_fn,
+    text_codec_encode_fn, ty_from_ident,
 };
 
 /// Code slices necessary for declaring and initializing a temporary variable
@@ -63,6 +64,10 @@ pub(crate) enum FieldBuilderPart {
 
     /// Parse a field from child element events.
     Nested {
+        /// Additional definition items which need to be inserted at module
+        /// level for the rest of the implementation to work.
+        extra_defs: TokenStream,
+
         /// Expression and type which initializes a buffer to use during
         /// parsing.
         value: FieldTempInit,
@@ -117,6 +122,10 @@ pub(crate) enum FieldIteratorPart {
 
     /// The field is emitted as series of items which form a child element.
     Content {
+        /// Additional definition items which need to be inserted at module
+        /// level for the rest of the implementation to work.
+        extra_defs: TokenStream,
+
         /// Expression and type which initializes the nested iterator.
         ///
         /// Note that this is evaluated at construction time of the iterator.
@@ -161,6 +170,26 @@ enum FieldKind {
         /// Number of child elements allowed.
         amount: AmountConstraint,
     },
+
+    /// Extract contents from a child element.
+    Extract {
+        /// The XML namespace of the child to extract data from.
+        xml_namespace: NamespaceRef,
+
+        /// The XML name of the child to extract data from.
+        xml_name: NameRef,
+
+        /// Compound which contains the arguments of the `extract(..)` meta
+        /// (except the `from`), transformed into a struct with unnamed
+        /// fields.
+        ///
+        /// This is used to generate the parsing/serialisation code, by
+        /// essentially "declaring" a shim struct, as if it were a real Rust
+        /// struct, and using the result of the parsing process directly for
+        /// the field on which the `extract(..)` option was used, instead of
+        /// putting it into a Rust struct.
+        parts: Compound,
+    },
 }
 
 impl FieldKind {
@@ -168,7 +197,9 @@ impl FieldKind {
     ///
     /// `field_ident` is, for some field types, used to infer an XML name if
     /// it is not specified explicitly.
-    fn from_meta(meta: XmlFieldMeta, field_ident: Option<&Ident>) -> Result<Self> {
+    ///
+    /// `field_ty` is needed for type inferrence on extracted fields.
+    fn from_meta(meta: XmlFieldMeta, field_ident: Option<&Ident>, field_ty: &Type) -> Result<Self> {
         match meta {
             XmlFieldMeta::Attribute {
                 span,
@@ -204,9 +235,13 @@ impl FieldKind {
                 })
             }
 
-            XmlFieldMeta::Text { codec } => Ok(Self::Text { codec }),
+            XmlFieldMeta::Text { span: _, codec } => Ok(Self::Text { codec }),
 
-            XmlFieldMeta::Child { default_, amount } => {
+            XmlFieldMeta::Child {
+                span: _,
+                default_,
+                amount,
+            } => {
                 if let Some(AmountConstraint::Any(ref amount_span)) = amount {
                     if let Flag::Present(ref flag_span) = default_ {
                         let mut err = Error::new(
@@ -226,6 +261,55 @@ impl FieldKind {
                     amount: amount.unwrap_or(AmountConstraint::FixedSingle(Span::call_site())),
                 })
             }
+
+            XmlFieldMeta::Extract {
+                span,
+                qname: QNameRef { namespace, name },
+                fields,
+            } => {
+                let Some(xml_namespace) = namespace else {
+                    return Err(Error::new(
+                        span,
+                        "`#[xml(extract(..))]` must contain a `namespace` key.",
+                    ));
+                };
+
+                let Some(xml_name) = name else {
+                    return Err(Error::new(
+                        span,
+                        "`#[xml(extract(..))]` must contain a `name` key.",
+                    ));
+                };
+
+                let field = {
+                    let mut fields = fields.into_iter();
+                    let Some(field) = fields.next() else {
+                        return Err(Error::new(
+                            span,
+                            "`#[xml(extract(..))]` must contain one `fields(..)` nested meta which contains at least one field meta."
+                        ));
+                    };
+
+                    if let Some(field) = fields.next() {
+                        return Err(Error::new(
+                            field.span(),
+                            "more than one extracted piece of data is currently not supported",
+                        ));
+                    }
+
+                    field
+                };
+
+                let parts = Compound::from_field_defs(
+                    [FieldDef::from_extract(field, 0, field_ty)].into_iter(),
+                )?;
+
+                Ok(Self::Extract {
+                    xml_namespace,
+                    xml_name,
+                    parts,
+                })
+            }
         }
     }
 }
@@ -268,9 +352,22 @@ impl FieldDef {
         let ty = field.ty.clone();
 
         Ok(Self {
+            kind: FieldKind::from_meta(meta, ident, &ty)?,
             member,
             ty,
-            kind: FieldKind::from_meta(meta, ident)?,
+        })
+    }
+
+    /// Create a new field definition from its declaration.
+    ///
+    /// The `index` must be the zero-based index of the field even for named
+    /// fields.
+    pub(crate) fn from_extract(meta: XmlFieldMeta, index: u32, ty: &Type) -> Result<Self> {
+        let span = meta.span();
+        Ok(Self {
+            member: Member::Unnamed(Index { index, span }),
+            ty: ty.clone(),
+            kind: FieldKind::from_meta(meta, None, ty)?,
         })
     }
 
@@ -408,6 +505,7 @@ impl FieldDef {
                         };
 
                         Ok(FieldBuilderPart::Nested {
+                            extra_defs: TokenStream::default(),
                             value: FieldTempInit {
                                 init: quote! { ::std::option::Option::None },
                                 ty: option_ty(self.ty.clone()),
@@ -438,6 +536,7 @@ impl FieldDef {
                         let ty_extend = extend_fn(self.ty.clone(), element_ty.clone());
                         let ty_default = default_fn(self.ty.clone());
                         Ok(FieldBuilderPart::Nested {
+                            extra_defs: TokenStream::default(),
                             value: FieldTempInit {
                                 init: quote! { #ty_default() },
                                 ty: self.ty.clone(),
@@ -452,6 +551,80 @@ impl FieldDef {
                     }
                 }
             }
+
+            FieldKind::Extract {
+                ref xml_namespace,
+                ref xml_name,
+                ref parts,
+            } => {
+                let FromEventsScope {
+                    ref substate_result,
+                    ..
+                } = scope;
+                let field_access = scope.access_field(&self.member);
+
+                let missing_msg = error_message::on_missing_child(container_name, &self.member);
+                let duplicate_msg = error_message::on_duplicate_child(container_name, &self.member);
+
+                let on_absent = quote! {
+                    return ::core::result::Result::Err(::xso::error::Error::Other(#missing_msg).into())
+                };
+
+                let from_xml_builder_ty_ident =
+                    scope.make_member_type_name(&self.member, "FromXmlBuilder");
+                let state_ty_ident = quote::format_ident!("{}State", from_xml_builder_ty_ident,);
+
+                let extra_defs = parts.make_from_events_statemachine(
+                    &state_ty_ident,
+                    &container_name.child(self.member.clone()),
+                    "",
+                )?.with_augmented_init(|init| quote! {
+                    if name.0 == #xml_namespace && name.1 == #xml_name {
+                        #init
+                    } else {
+                        ::core::result::Result::Err(::xso::error::FromEventsError::Mismatch { name, attrs })
+                    }
+                }).compile().render(
+                    &Visibility::Inherited,
+                    &from_xml_builder_ty_ident,
+                    &state_ty_ident,
+                    &Type::Tuple(TypeTuple {
+                        paren_token: token::Paren::default(),
+                        elems: [
+                            self.ty.clone(),
+                        ].into_iter().collect(),
+                    })
+                )?;
+                let from_xml_builder_ty = ty_from_ident(from_xml_builder_ty_ident.clone()).into();
+
+                Ok(FieldBuilderPart::Nested {
+                    extra_defs,
+                    value: FieldTempInit {
+                        init: quote! { ::std::option::Option::None },
+                        ty: option_ty(self.ty.clone()),
+                    },
+                    matcher: quote! {
+                        match #state_ty_ident::new(name, attrs) {
+                            ::core::result::Result::Ok(v) => if #field_access.is_some() {
+                                ::core::result::Result::Err(::xso::error::FromEventsError::Invalid(::xso::error::Error::Other(#duplicate_msg)))
+                            } else {
+                                ::core::result::Result::Ok(#from_xml_builder_ty_ident(::core::option::Option::Some(v)))
+                            },
+                            ::core::result::Result::Err(e) => ::core::result::Result::Err(e),
+                        }
+                    },
+                    builder: from_xml_builder_ty,
+                    collect: quote! {
+                        #field_access = ::std::option::Option::Some(#substate_result.0);
+                    },
+                    finalize: quote! {
+                        match #field_access {
+                            ::std::option::Option::Some(value) => value,
+                            ::std::option::Option::None => #on_absent,
+                        }
+                    },
+                })
+            }
         }
     }
 
@@ -462,6 +635,7 @@ impl FieldDef {
     pub(crate) fn make_iterator_part(
         &self,
         scope: &AsItemsScope,
+        container_name: &ParentRef,
         bound_name: &Ident,
     ) -> Result<FieldIteratorPart> {
         match self.kind {
@@ -515,6 +689,7 @@ impl FieldDef {
                 let item_iter = item_iter_ty(self.ty.clone(), lifetime.clone());
 
                 Ok(FieldIteratorPart::Content {
+                    extra_defs: TokenStream::default(),
                     value: FieldTempInit {
                         init: quote! {
                             #as_xml_iter(#bound_name)?
@@ -561,6 +736,7 @@ impl FieldDef {
                 });
 
                 Ok(FieldIteratorPart::Content {
+                    extra_defs: TokenStream::default(),
                     value: FieldTempInit {
                         init: quote! {
                             (#into_iter(#bound_name), ::core::option::Option::None)
@@ -583,6 +759,70 @@ impl FieldDef {
                     },
                 })
             }
+
+            FieldKind::Extract {
+                ref xml_namespace,
+                ref xml_name,
+                ref parts,
+            } => {
+                let AsItemsScope { ref lifetime, .. } = scope;
+                let item_iter_ty_ident = scope.make_member_type_name(&self.member, "AsXmlIterator");
+                let state_ty_ident = quote::format_ident!("{}State", item_iter_ty_ident,);
+                let mut item_iter_ty = ty_from_ident(item_iter_ty_ident.clone());
+                item_iter_ty.path.segments[0].arguments =
+                    PathArguments::AngleBracketed(AngleBracketedGenericArguments {
+                        colon2_token: None,
+                        lt_token: token::Lt::default(),
+                        args: [GenericArgument::Lifetime(lifetime.clone())]
+                            .into_iter()
+                            .collect(),
+                        gt_token: token::Gt::default(),
+                    });
+                let item_iter_ty = item_iter_ty.into();
+
+                let extra_defs = parts
+                    .make_as_item_iter_statemachine(
+                        &container_name.child(self.member.clone()),
+                        &state_ty_ident,
+                        "",
+                        lifetime,
+                    )?
+                    .with_augmented_init(|init| {
+                        quote! {
+                            let name = (
+                                ::xso::exports::rxml::Namespace::from(#xml_namespace),
+                                ::std::borrow::Cow::Borrowed(#xml_name),
+                            );
+                            #init
+                        }
+                    })
+                    .compile()
+                    .render(
+                        &Visibility::Inherited,
+                        &Type::Tuple(TypeTuple {
+                            paren_token: token::Paren::default(),
+                            elems: [ref_ty(self.ty.clone(), lifetime.clone())]
+                                .into_iter()
+                                .collect(),
+                        }),
+                        &state_ty_ident,
+                        lifetime,
+                        &item_iter_ty,
+                    )?;
+
+                Ok(FieldIteratorPart::Content {
+                    extra_defs,
+                    value: FieldTempInit {
+                        init: quote! {
+                            #item_iter_ty_ident::new((&#bound_name,))?
+                        },
+                        ty: item_iter_ty,
+                    },
+                    generator: quote! {
+                        #bound_name.next().transpose()
+                    },
+                })
+            }
         }
     }
 

xso-proc/src/meta.rs πŸ”—

@@ -638,18 +638,42 @@ pub(crate) enum XmlFieldMeta {
 
     /// `#[xml(text)]`
     Text {
+        /// The span of the `#[xml(text)]` meta from which this was parsed.
+        ///
+        /// This is useful for error messages.
+        span: Span,
+
         /// The path to the optional codec type.
         codec: Option<Expr>,
     },
 
     /// `#[xml(child)`
     Child {
+        /// The span of the `#[xml(child)]` meta from which this was parsed.
+        ///
+        /// This is useful for error messages.
+        span: Span,
+
         /// The `default` flag.
         default_: Flag,
 
         /// The `n` flag.
         amount: Option<AmountConstraint>,
     },
+
+    /// `#[xml(extract)]
+    Extract {
+        /// The span of the `#[xml(extract)]` meta from which this was parsed.
+        ///
+        /// This is useful for error messages.
+        span: Span,
+
+        /// The namespace/name keys.
+        qname: QNameRef,
+
+        /// The `fields` nested meta.
+        fields: Vec<XmlFieldMeta>,
+    },
 }
 
 impl XmlFieldMeta {
@@ -725,7 +749,10 @@ impl XmlFieldMeta {
                     return Err(e);
                 }
             }
-            Ok(Self::Text { codec: Some(codec) })
+            Ok(Self::Text {
+                span: meta.path.span(),
+                codec: Some(codec),
+            })
         } else if meta.input.peek(syn::token::Paren) {
             let mut codec: Option<Expr> = None;
             meta.parse_nested_meta(|meta| {
@@ -750,9 +777,15 @@ impl XmlFieldMeta {
                     Err(Error::new_spanned(meta.path, "unsupported key"))
                 }
             })?;
-            Ok(Self::Text { codec })
+            Ok(Self::Text {
+                span: meta.path.span(),
+                codec,
+            })
         } else {
-            Ok(Self::Text { codec: None })
+            Ok(Self::Text {
+                span: meta.path.span(),
+                codec: None,
+            })
         }
     }
 
@@ -778,15 +811,53 @@ impl XmlFieldMeta {
                     Err(Error::new_spanned(meta.path, "unsupported key"))
                 }
             })?;
-            Ok(Self::Child { default_, amount })
+            Ok(Self::Child {
+                span: meta.path.span(),
+                default_,
+                amount,
+            })
         } else {
             Ok(Self::Child {
+                span: meta.path.span(),
                 default_: Flag::Absent,
                 amount: None,
             })
         }
     }
 
+    /// Parse a `#[xml(extract)]` meta.
+    fn extract_from_meta(meta: ParseNestedMeta<'_>) -> Result<Self> {
+        let mut qname = QNameRef::default();
+        let mut fields = None;
+        meta.parse_nested_meta(|meta| {
+            if meta.path.is_ident("fields") {
+                if let Some((fields_span, _)) = fields.as_ref() {
+                    let mut error = Error::new_spanned(meta.path, "duplicate `fields` meta");
+                    error.combine(Error::new(*fields_span, "previous `fields` meta was here"));
+                    return Err(error);
+                }
+                let mut new_fields = Vec::new();
+                meta.parse_nested_meta(|meta| {
+                    new_fields.push(XmlFieldMeta::parse_from_meta(meta)?);
+                    Ok(())
+                })?;
+                fields = Some((meta.path.span(), new_fields));
+                Ok(())
+            } else {
+                match qname.parse_incremental_from_meta(meta)? {
+                    None => Ok(()),
+                    Some(meta) => Err(Error::new_spanned(meta.path, "unsupported key")),
+                }
+            }
+        })?;
+        let fields = fields.map(|(_, x)| x).unwrap_or_else(Vec::new);
+        Ok(Self::Extract {
+            span: meta.path.span(),
+            qname,
+            fields,
+        })
+    }
+
     /// Parse [`Self`] from a nestd meta, switching on the identifier
     /// of that nested meta.
     fn parse_from_meta(meta: ParseNestedMeta<'_>) -> Result<Self> {
@@ -796,6 +867,8 @@ impl XmlFieldMeta {
             Self::text_from_meta(meta)
         } else if meta.path.is_ident("child") {
             Self::child_from_meta(meta)
+        } else if meta.path.is_ident("extract") {
+            Self::extract_from_meta(meta)
         } else {
             Err(Error::new_spanned(meta.path, "unsupported field meta"))
         }
@@ -868,4 +941,15 @@ impl XmlFieldMeta {
             Err(Error::new(*err_span, "missing #[xml(..)] meta on field"))
         }
     }
+
+    /// Return a span which points at the meta which constructed this
+    /// XmlFieldMeta.
+    pub(crate) fn span(&self) -> Span {
+        match self {
+            Self::Attribute { ref span, .. } => *span,
+            Self::Child { ref span, .. } => *span,
+            Self::Text { ref span, .. } => *span,
+            Self::Extract { ref span, .. } => *span,
+        }
+    }
 }

xso-proc/src/scope.rs πŸ”—

@@ -52,11 +52,15 @@ pub(crate) struct FromEventsScope {
     ///
     /// See [`crate::field::FieldBuilderPart::Nested`].
     pub(crate) substate_result: Ident,
+
+    /// Prefix which should be used for any types which are declared, to
+    /// ensure they don't collide with other names.
+    pub(crate) type_prefix: Ident,
 }
 
 impl FromEventsScope {
     /// Create a fresh scope with all necessary identifiers.
-    pub(crate) fn new() -> Self {
+    pub(crate) fn new(type_prefix: Ident) -> Self {
         // Sadly, `Ident::new` is not `const`, so we have to create even the
         // well-known identifiers from scratch all the time.
         Self {
@@ -65,6 +69,7 @@ impl FromEventsScope {
             builder_data_ident: Ident::new("__xso_proc_macro_builder_data", Span::call_site()),
             substate_data: Ident::new("__xso_proc_macro_substate_data", Span::call_site()),
             substate_result: Ident::new("__xso_proc_macro_substate_result", Span::call_site()),
+            type_prefix,
         }
     }
 
@@ -84,6 +89,24 @@ impl FromEventsScope {
             member: Member::Named(mangle_member(member)),
         })
     }
+
+    /// Generate an ident with proper scope and span from the type prefix and
+    /// the given member and actual type name.
+    ///
+    /// Due to being merged with the type prefix of this scope and the given
+    /// member, this type name is guaranteed to be unique for unique values of
+    /// `name`.
+    pub(crate) fn make_member_type_name(&self, member: &Member, name: &str) -> Ident {
+        quote::format_ident!(
+            "{}Member{}{}",
+            self.type_prefix,
+            match member {
+                Member::Named(ref ident) => ident.to_string(),
+                Member::Unnamed(Index { index, .. }) => index.to_string(),
+            },
+            name,
+        )
+    }
 }
 
 /// Container struct for various identifiers used throughout the generator
@@ -97,13 +120,18 @@ impl FromEventsScope {
 pub(crate) struct AsItemsScope {
     /// Lifetime for data borrowed by the implementation.
     pub(crate) lifetime: Lifetime,
+
+    /// Prefix which should be used for any types which are declared, to
+    /// ensure they don't collide with other names.
+    pub(crate) type_prefix: Ident,
 }
 
 impl AsItemsScope {
     /// Create a fresh scope with all necessary identifiers.
-    pub(crate) fn new(lifetime: &Lifetime) -> Self {
+    pub(crate) fn new(lifetime: &Lifetime, type_prefix: Ident) -> Self {
         Self {
             lifetime: lifetime.clone(),
+            type_prefix,
         }
     }
 
@@ -112,6 +140,24 @@ impl AsItemsScope {
     pub(crate) fn borrow(&self, ty: Type) -> Type {
         ref_ty(ty, self.lifetime.clone())
     }
+
+    /// Generate an ident with proper scope and span from the type prefix and
+    /// the given member and actual type name.
+    ///
+    /// Due to being merged with the type prefix of this scope and the given
+    /// member, this type name is guaranteed to be unique for unique values of
+    /// `name`.
+    pub(crate) fn make_member_type_name(&self, member: &Member, name: &str) -> Ident {
+        quote::format_ident!(
+            "{}Member{}{}",
+            self.type_prefix,
+            match member {
+                Member::Named(ref ident) => ident.to_string(),
+                Member::Unnamed(Index { index, .. }) => index.to_string(),
+            },
+            name,
+        )
+    }
 }
 
 pub(crate) fn mangle_member(member: &Member) -> Ident {

xso-proc/src/state.rs πŸ”—

@@ -638,7 +638,7 @@ impl AsItemsStateMachine {
     pub(crate) fn render(
         self,
         vis: &Visibility,
-        input_ty: &Type,
+        input_ty_ref: &Type,
         state_ty_ident: &Ident,
         item_iter_ty_lifetime: &Lifetime,
         item_iter_ty: &Type,
@@ -650,8 +650,8 @@ impl AsItemsStateMachine {
             mut variants,
         } = self;
 
-        let input_ty_ref = make_ty_ref(input_ty);
-        let docstr = format!("Convert a {0} into XML events.\n\nThis type is generated using the [`macro@xso::AsXml`] derive macro and implements [`std::iter:Iterator`] for {0}.", input_ty_ref);
+        let input_ty_ref_text = make_ty_ref(input_ty_ref);
+        let docstr = format!("Convert a {0} into XML events.\n\nThis type is generated using the [`macro@xso::AsXml`] derive macro and implements [`std::iter:Iterator`] for {0}.", input_ty_ref_text);
 
         let init_body = if variants.len() == 1 {
             let AsItemsEntryPoint { destructure, init } = variants.remove(0);
@@ -691,7 +691,7 @@ impl AsItemsStateMachine {
                 }
 
                 fn new(
-                    value: &#item_iter_ty_lifetime #input_ty,
+                    value: #input_ty_ref,
                 ) -> ::core::result::Result<Self, ::xso::error::Error> {
                     ::core::result::Result::Ok(#init_body)
                 }
@@ -729,7 +729,7 @@ impl AsItemsStateMachine {
             }
 
             impl<#item_iter_ty_lifetime> #item_iter_ty {
-                fn new(value: &#item_iter_ty_lifetime #input_ty) -> ::core::result::Result<Self, ::xso::error::Error> {
+                fn new(value: #input_ty_ref) -> ::core::result::Result<Self, ::xso::error::Error> {
                     #state_ty_ident::new(value).map(|ok| Self(::core::option::Option::Some(ok)))
                 }
             }
@@ -765,6 +765,7 @@ fn doc_link_path(ty: &Type) -> Option<String> {
             }
             Some(buf)
         }
+        Type::Reference(TypeReference { ref elem, .. }) => doc_link_path(elem),
         _ => None,
     }
 }

xso-proc/src/structs.rs πŸ”—

@@ -13,6 +13,7 @@ use syn::*;
 use crate::common::{AsXmlParts, FromXmlParts, ItemDef};
 use crate::compound::Compound;
 use crate::meta::{reject_key, Flag, NameRef, NamespaceRef, QNameRef, XmlCompoundMeta};
+use crate::types::{ref_ty, ty_from_ident};
 
 /// Definition of a struct and how to parse it.
 pub(crate) struct StructDef {
@@ -178,6 +179,7 @@ impl ItemDef for StructDef {
             .inner
             .make_as_item_iter_statemachine(
                 &Path::from(target_ty_ident.clone()).into(),
+                &state_ty_ident,
                 "Struct",
                 &item_iter_ty_lifetime,
             )?
@@ -193,11 +195,10 @@ impl ItemDef for StructDef {
             .compile()
             .render(
                 vis,
-                &TypePath {
-                    qself: None,
-                    path: target_ty_ident.clone().into(),
-                }
-                .into(),
+                &ref_ty(
+                    ty_from_ident(target_ty_ident.clone()).into(),
+                    item_iter_ty_lifetime.clone(),
+                ),
                 &state_ty_ident,
                 &item_iter_ty_lifetime,
                 &item_iter_ty,

xso-proc/src/types.rs πŸ”—

@@ -745,3 +745,11 @@ pub(crate) fn extend_fn(of_ty: Type, item_ty: Type) -> Expr {
         },
     })
 }
+
+/// Construct a [`syn::TypePath`] which references the given type name.
+pub(crate) fn ty_from_ident(ident: Ident) -> TypePath {
+    TypePath {
+        qself: None,
+        path: ident.into(),
+    }
+}

xso/ChangeLog πŸ”—

@@ -18,6 +18,8 @@ Version NEXT:
       - Support for overriding the names of the types generated by the derive
         macros.
       - Support for deriving FromXml and AsXml on enums.
+      - Support for extracting data from child elements without intermediate
+        structs.
 
 Version 0.1.2:
 2024-07-26 Jonas SchΓ€fer <jonas@zombofant.net>

xso/src/from_xml_doc.md πŸ”—

@@ -37,6 +37,8 @@ such:
 - *type*: A Rust type.
 - *expression*: A Rust expression.
 - *ident*: A Rust identifier.
+- *nested*: The meta is followed by parentheses, inside of which meta-specific
+  additional keys are present.
 - flag: Has no value. The key's mere presence has relevance and it must not be
   followed by a `=` sign.
 
@@ -148,6 +150,7 @@ The following mapping types are defined:
 | --- | --- |
 | [`attribute`](#attribute-meta) | Map the field to an XML attribute on the struct's element |
 | [`child`](#child-meta) | Map the field to a child element |
+| [`extract`](#extract-meta) | Map the field to contents of a child element of specified structure |
 | [`text`](#text-meta) | Map the field to the text content of the struct's element |
 
 #### `attribute` meta
@@ -294,6 +297,62 @@ assert_eq!(parent, Parent {
 });
 ```
 
+#### `extract` meta
+
+The `extract` meta causes the field to be mapped to the *contents* of a child
+element.
+
+The following keys can be used inside the `#[xml(extract(..))]` meta:
+
+| Key | Value type | Description |
+| --- | --- | --- |
+| `namespace` | *string literal* or *path* | The XML namespace of the child element. |
+| `name` | *string literal* or *path* | The XML name of the child element. If it is a *path*, it must point at a `&'static NcNameStr`. |
+| `fields` | *nested* | A list of [field meta](#field-meta) which describe the contents of the child element. |
+
+If the `name` key contains a namespace prefix, it must be one of the prefixes
+defined as built-in in the XML specifications. That prefix will then be
+expanded to the corresponding namespace URI and the value for the `namespace`
+key is implied. Mixing a prefixed name with an explicit `namespace` key is
+not allowed.
+
+The sequence of field meta inside `fields` can be thought of as a nameless
+tuple-style struct. The macro generates serialisation/deserialisation code
+for that nameless tuple-style struct and uses it to serialise/deserialise
+the field.
+
+**Note:** Currently, only a single field can be extracted. This restriction
+will be lifted in the future. Collections are not supported yet, either.
+
+Using `extract` instead of `child` combined with a specific struct declaration
+comes with trade-offs. On the one hand, using `extract` gives you flexibility
+in regard of the specific serialisation of a field: it is possible to exchange
+a nested child element for an attribute without changing the Rust interface
+of the struct.
+
+On the other hand, `extract` meta declarations can quickly become unwieldly
+and they may not support all configuration options which may in the future be
+added on structs (such as configuring handling of undeclared attributes) and
+they cannot be used for enumerations.
+
+##### Example
+
+```rust
+# use xso::FromXml;
+#[derive(FromXml, Debug, PartialEq)]
+#[xml(namespace = "urn:example", name = "foo")]
+struct Foo {
+    #[xml(extract(namespace = "urn:example", name = "bar", fields(attribute = "a")))]
+    a: String,
+}
+
+let foo: Foo = xso::from_bytes(b"<foo
+    xmlns='urn:example'><bar a='xyz'/></foo>").unwrap();
+assert_eq!(foo, Foo {
+    a: "xyz".to_string(),
+});
+```
+
 #### `text` meta
 
 The `text` meta causes the field to be mapped to the text content of the