xso-proc: add support for parsing text content

Jonas SchΓ€fer created

Change summary

parsers/src/util/macro_tests.rs | 34 +++++++++++++++
xso-proc/src/compound.rs        | 76 ++++++++++++++++++++++++++++++++--
xso-proc/src/field.rs           | 70 +++++++++++++++++++++++++++++--
xso-proc/src/meta.rs            | 11 +++++
xso-proc/src/scope.rs           | 30 +++++++++++++
xso-proc/src/types.rs           | 65 +++++++++++++++++++++++++++++
xso/src/from_xml_doc.md         | 26 +++++++++++
7 files changed, 301 insertions(+), 11 deletions(-)

Detailed changes

parsers/src/util/macro_tests.rs πŸ”—

@@ -380,3 +380,37 @@ fn default_attribute_roundtrip_pp() {
     };
     roundtrip_full::<DefaultAttribute>("<attr xmlns='urn:example:ns1' foo='xyz' bar='16'/>");
 }
+
+#[derive(FromXml, IntoXml, PartialEq, Debug, Clone)]
+#[xml(namespace = NS1, name = "text")]
+struct TextString {
+    #[xml(text)]
+    text: String,
+}
+
+#[test]
+fn text_string_roundtrip() {
+    #[allow(unused_imports)]
+    use std::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    roundtrip_full::<TextString>("<text xmlns='urn:example:ns1'>hello world!</text>");
+}
+
+#[derive(FromXml, IntoXml, PartialEq, Debug, Clone)]
+#[xml(namespace = NS1, name = "text")]
+struct TextNonString {
+    #[xml(text)]
+    text: u32,
+}
+
+#[test]
+fn text_non_string_roundtrip() {
+    #[allow(unused_imports)]
+    use std::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    roundtrip_full::<TextNonString>("<text xmlns='urn:example:ns1'>123456</text>");
+}

xso-proc/src/compound.rs πŸ”—

@@ -56,10 +56,14 @@ impl Compound {
         state_prefix: &str,
     ) -> Result<FromEventsSubmachine> {
         let scope = FromEventsScope::new();
-        let FromEventsScope { ref attrs, .. } = scope;
+        let FromEventsScope {
+            ref attrs,
+            ref builder_data_ident,
+            ref text,
+            ..
+        } = scope;
 
         let default_state_ident = quote::format_ident!("{}Default", state_prefix);
-        let builder_data_ident = quote::format_ident!("__data");
         let builder_data_ty: Type = TypePath {
             qself: None,
             path: quote::format_ident!("{}Data{}", state_ty_ident, state_prefix).into(),
@@ -70,6 +74,7 @@ impl Compound {
         let mut builder_data_def = TokenStream::default();
         let mut builder_data_init = TokenStream::default();
         let mut output_cons = TokenStream::default();
+        let mut text_handler = None;
 
         for field in self.fields.iter() {
             let member = field.member();
@@ -92,9 +97,45 @@ impl Compound {
                         #member: #builder_data_ident.#builder_field_name,
                     });
                 }
+
+                FieldBuilderPart::Text {
+                    value: FieldTempInit { ty, init },
+                    collect,
+                    finalize,
+                } => {
+                    if text_handler.is_some() {
+                        return Err(Error::new_spanned(
+                            field.member(),
+                            "more than one field attempts to collect text data",
+                        ));
+                    }
+
+                    builder_data_def.extend(quote! {
+                        #builder_field_name: #ty,
+                    });
+                    builder_data_init.extend(quote! {
+                        #builder_field_name: #init,
+                    });
+                    text_handler = Some(quote! {
+                        #collect
+                        ::core::result::Result::Ok(::std::ops::ControlFlow::Break(
+                            Self::#default_state_ident { #builder_data_ident }
+                        ))
+                    });
+                    output_cons.extend(quote! {
+                        #member: #finalize,
+                    });
+                }
             }
         }
 
+        let text_handler = match text_handler {
+            Some(v) => v,
+            None => quote! {
+                ::core::result::Result::Err(::xso::error::Error::Other("Unexpected text content".into()))
+            },
+        };
+
         let unknown_attr_err = format!("Unknown attribute in {}.", output_name);
         let unknown_child_err = format!("Unknown child in {}.", output_name);
 
@@ -121,8 +162,8 @@ impl Compound {
                 ::xso::exports::rxml::Event::StartElement(..) => {
                     ::core::result::Result::Err(::xso::error::Error::Other(#unknown_child_err))
                 }
-                ::xso::exports::rxml::Event::Text(..) => {
-                    ::core::result::Result::Err(::xso::error::Error::Other("Unexpected text content".into()))
+                ::xso::exports::rxml::Event::Text(_, #text) => {
+                    #text_handler
                 }
                 // we ignore these: a correct parser only generates
                 // them at document start, and there we want to indeed
@@ -186,10 +227,11 @@ impl Compound {
                 .with_field(&name_ident, &qname_ty(Span::call_site())),
         );
 
-        for field in self.fields.iter() {
+        for (i, field) in self.fields.iter().enumerate() {
             let member = field.member();
             let bound_name = mangle_member(member);
             let part = field.make_iterator_part(&scope, &bound_name)?;
+            let state_name = quote::format_ident!("{}Field{}", state_prefix, i);
 
             match part {
                 FieldIteratorPart::Header { setter } => {
@@ -202,6 +244,30 @@ impl Compound {
                     });
                     states[0].add_field(&bound_name, field.ty());
                 }
+
+                FieldIteratorPart::Text { generator } => {
+                    // we have to make sure that we carry our data around in
+                    // all the previous states.
+                    for state in states.iter_mut() {
+                        state.add_field(&bound_name, field.ty());
+                    }
+                    states.push(
+                        State::new(state_name)
+                            .with_field(&bound_name, field.ty())
+                            .with_impl(quote! {
+                                ::core::option::Option::Some(::xso::exports::rxml::Event::Text(
+                                    ::xso::exports::rxml::parser::EventMetrics::zero(),
+                                    #generator,
+                                ))
+                            }),
+                    );
+                    destructure.extend(quote! {
+                        #member: #bound_name,
+                    });
+                    start_init.extend(quote! {
+                        #bound_name,
+                    });
+                }
             }
         }
 

xso-proc/src/field.rs πŸ”—

@@ -6,7 +6,7 @@
 
 //! Compound (struct or enum variant) field types
 
-use proc_macro2::TokenStream;
+use proc_macro2::{Span, TokenStream};
 use quote::{quote, ToTokens};
 use syn::{spanned::Spanned, *};
 
@@ -15,7 +15,9 @@ use rxml_validation::NcName;
 use crate::error_message::{self, ParentRef};
 use crate::meta::{Flag, NameRef, NamespaceRef, XmlFieldMeta};
 use crate::scope::{FromEventsScope, IntoEventsScope};
-use crate::types::{default_fn, from_xml_text_fn, into_optional_xml_text_fn};
+use crate::types::{
+    default_fn, from_xml_text_fn, into_optional_xml_text_fn, into_xml_text_fn, string_ty,
+};
 
 /// Code slices necessary for declaring and initializing a temporary variable
 /// for parsing purposes.
@@ -40,6 +42,21 @@ pub(crate) enum FieldBuilderPart {
         /// element's start event.
         value: FieldTempInit,
     },
+
+    /// Parse a field from text events.
+    Text {
+        /// Expression and type which initializes a buffer to use during
+        /// parsing.
+        value: FieldTempInit,
+
+        /// Statement which takes text and accumulates it into the temporary
+        /// value declared via `value`.
+        collect: TokenStream,
+
+        /// Expression which evaluates to the field's type, consuming the
+        /// temporary value.
+        finalize: TokenStream,
+    },
 }
 
 /// Describe how a struct or enum variant's member is converted to XML data.
@@ -56,6 +73,13 @@ pub(crate) enum FieldIteratorPart {
         /// field's value.
         setter: TokenStream,
     },
+
+    /// The field is emitted as text event.
+    Text {
+        /// An expression which consumes the field's value and returns a
+        /// String, which is then emitted as text data.
+        generator: TokenStream,
+    },
 }
 
 /// Specify how the field is mapped to XML.
@@ -72,6 +96,9 @@ enum FieldKind {
         // attribute is absent.
         default_: Flag,
     },
+
+    /// The field maps to the character data of the element.
+    Text,
 }
 
 impl FieldKind {
@@ -115,6 +142,8 @@ impl FieldKind {
                     default_,
                 })
             }
+
+            XmlFieldMeta::Text => Ok(Self::Text),
         }
     }
 }
@@ -215,7 +244,7 @@ impl FieldDef {
                     }
                 };
 
-                return Ok(FieldBuilderPart::Init {
+                Ok(FieldBuilderPart::Init {
                     value: FieldTempInit {
                         init: quote! {
                             match #attrs.remove(#xml_namespace, #xml_name).map(#from_xml_text).transpose()? {
@@ -225,7 +254,26 @@ impl FieldDef {
                         },
                         ty: self.ty.clone(),
                     },
-                });
+                })
+            }
+
+            FieldKind::Text => {
+                let FromEventsScope { ref text, .. } = scope;
+                let field_access = scope.access_field(&self.member);
+                let from_xml_text = from_xml_text_fn(self.ty.clone());
+
+                Ok(FieldBuilderPart::Text {
+                    value: FieldTempInit {
+                        init: quote! { ::std::string::String::new() },
+                        ty: string_ty(Span::call_site()),
+                    },
+                    collect: quote! {
+                        #field_access.push_str(#text.as_str());
+                    },
+                    finalize: quote! {
+                        #from_xml_text(#field_access)?
+                    },
+                })
             }
         }
     }
@@ -256,7 +304,7 @@ impl FieldDef {
 
                 let into_optional_xml_text = into_optional_xml_text_fn(self.ty.clone());
 
-                return Ok(FieldIteratorPart::Header {
+                Ok(FieldIteratorPart::Header {
                     // This is a neat little trick:
                     // Option::from(x) converts x to an Option<T> *unless* it
                     // already is an Option<_>.
@@ -267,7 +315,17 @@ impl FieldDef {
                             #bound_name,
                         ));
                     },
-                });
+                })
+            }
+
+            FieldKind::Text => {
+                let into_xml_text = into_xml_text_fn(self.ty.clone());
+
+                Ok(FieldIteratorPart::Text {
+                    generator: quote! {
+                        #into_xml_text(#bound_name)?
+                    },
+                })
             }
         }
     }

xso-proc/src/meta.rs πŸ”—

@@ -294,6 +294,7 @@ fn parse_prefixed_name(
 /// Contents of an `#[xml(..)]` attribute on a struct or enum variant member.
 #[derive(Debug)]
 pub(crate) enum XmlFieldMeta {
+    /// `#[xml(attribute)]`, `#[xml(attribute = ..)]` or `#[xml(attribute(..))]`
     Attribute {
         /// The span of the `#[xml(attribute)]` meta from which this was parsed.
         ///
@@ -309,6 +310,9 @@ pub(crate) enum XmlFieldMeta {
         /// The `default` flag.
         default_: Flag,
     },
+
+    /// `#[xml(text)]`
+    Text,
 }
 
 impl XmlFieldMeta {
@@ -388,11 +392,18 @@ impl XmlFieldMeta {
         }
     }
 
+    /// Parse a `#[xml(text)]` meta.
+    fn text_from_meta(_: ParseNestedMeta<'_>) -> Result<Self> {
+        Ok(Self::Text)
+    }
+
     /// Parse [`Self`] from a nestd meta, switching on the identifier
     /// of that nested meta.
     fn parse_from_meta(meta: ParseNestedMeta<'_>) -> Result<Self> {
         if meta.path.is_ident("attribute") {
             Self::attribute_from_meta(meta)
+        } else if meta.path.is_ident("text") {
+            Self::text_from_meta(meta)
         } else {
             Err(Error::new_spanned(meta.path, "unsupported field meta"))
         }

xso-proc/src/scope.rs πŸ”—

@@ -29,6 +29,17 @@ pub(crate) struct FromEventsScope {
     /// Accesses the `AttrMap` from code in
     /// [`crate::field::FieldBuilderPart::Init`].
     pub(crate) attrs: Ident,
+
+    /// Accesses the `String` of a `rxml::Event::Text` event from code in
+    /// [`crate::field::FieldBuilderPart::Text`].
+    pub(crate) text: Ident,
+
+    /// Accesses the builder data during parsing.
+    ///
+    /// This should not be used directly outside [`crate::compound`]. Most of
+    /// the time, using [`Self::access_field`] is the correct way to access
+    /// the builder data.
+    pub(crate) builder_data_ident: Ident,
 }
 
 impl FromEventsScope {
@@ -38,8 +49,27 @@ impl FromEventsScope {
         // well-known identifiers from scratch all the time.
         Self {
             attrs: Ident::new("attrs", Span::call_site()),
+            text: Ident::new("__xso_proc_macro_text_data", Span::call_site()),
+            builder_data_ident: Ident::new("__xso_proc_macro_builder_data", Span::call_site()),
         }
     }
+
+    /// Generate an expression which accesses the temporary value for the
+    /// given `member` during parsing.
+    pub(crate) fn access_field(&self, member: &Member) -> Expr {
+        Expr::Field(ExprField {
+            attrs: Vec::new(),
+            base: Box::new(Expr::Path(ExprPath {
+                attrs: Vec::new(),
+                qself: None,
+                path: self.builder_data_ident.clone().into(),
+            })),
+            dot_token: syn::token::Dot {
+                spans: [Span::call_site()],
+            },
+            member: Member::Named(mangle_member(member)),
+        })
+    }
 }
 
 /// Container struct for various identifiers used throughout the generator

xso-proc/src/types.rs πŸ”—

@@ -155,3 +155,68 @@ pub(crate) fn default_fn(of_ty: Type) -> Expr {
         },
     })
 }
+
+/// Construct a [`syn::Type`] referring to `::std::string::String`.
+pub(crate) fn string_ty(span: Span) -> Type {
+    Type::Path(TypePath {
+        qself: None,
+        path: Path {
+            leading_colon: Some(syn::token::PathSep {
+                spans: [span, span],
+            }),
+            segments: [
+                PathSegment {
+                    ident: Ident::new("std", span),
+                    arguments: PathArguments::None,
+                },
+                PathSegment {
+                    ident: Ident::new("string", span),
+                    arguments: PathArguments::None,
+                },
+                PathSegment {
+                    ident: Ident::new("String", span),
+                    arguments: PathArguments::None,
+                },
+            ]
+            .into_iter()
+            .collect(),
+        },
+    })
+}
+
+/// Construct a [`syn::Expr`] referring to
+/// `<#ty as ::xso::IntoXmlText>::into_xml_text`.
+pub(crate) fn into_xml_text_fn(ty: Type) -> Expr {
+    let span = ty.span();
+    Expr::Path(ExprPath {
+        attrs: Vec::new(),
+        qself: Some(QSelf {
+            lt_token: syn::token::Lt { spans: [span] },
+            ty: Box::new(ty),
+            position: 2,
+            as_token: Some(syn::token::As { span }),
+            gt_token: syn::token::Gt { spans: [span] },
+        }),
+        path: Path {
+            leading_colon: Some(syn::token::PathSep {
+                spans: [span, span],
+            }),
+            segments: [
+                PathSegment {
+                    ident: Ident::new("xso", span),
+                    arguments: PathArguments::None,
+                },
+                PathSegment {
+                    ident: Ident::new("IntoXmlText", span),
+                    arguments: PathArguments::None,
+                },
+                PathSegment {
+                    ident: Ident::new("into_xml_text", span),
+                    arguments: PathArguments::None,
+                },
+            ]
+            .into_iter()
+            .collect(),
+        },
+    })
+}

xso/src/from_xml_doc.md πŸ”—

@@ -68,6 +68,7 @@ The following mapping types are defined:
 | Type | Description |
 | --- | --- |
 | [`attribute`](#attribute-meta) | Map the field to an XML attribute on the struct's element |
+| [`text`](#text-meta) | Map the field to the text content of the struct's element |
 
 #### `attribute` meta
 
@@ -132,3 +133,28 @@ assert_eq!(foo, Foo {
     e: "5".to_string(),
 });
 ```
+
+#### `text` meta
+
+The `text` meta causes the field to be mapped to the text content of the
+element. For `FromXml`, the field's type must implement [`FromXmlText`] and
+for `IntoXml`, the field's type must implement [`IntoXmlText`].
+
+The `text` meta supports no options or value.
+
+##### Example
+
+```rust
+# use xso::FromXml;
+#[derive(FromXml, Debug, PartialEq)]
+#[xml(namespace = "urn:example", name = "foo")]
+struct Foo {
+    #[xml(text)]
+    a: String,
+};
+
+let foo: Foo = xso::from_bytes(b"<foo xmlns='urn:example'>hello</foo>").unwrap();
+assert_eq!(foo, Foo {
+    a: "hello".to_string(),
+});
+```