xso-proc: add support for text codecs

Jonas SchΓ€fer created

Text codecs allow to customize the conversion of data from/to XML,
in particular in two scenarios:

1. When the type for which the behaviour is to be defined comes from a
   foreign crate, preventing the implementation of
   FromXmlText/IntoXmlText.

2. When there is not one obvious, or more than one sensible, way to
   convert a value to XML text and back.

Change summary

parsers/src/util/macro_tests.rs | 27 ++++++++++++
xso-proc/src/compound.rs        |  4 
xso-proc/src/field.rs           | 46 +++++++++++++++------
xso-proc/src/meta.rs            | 29 ++++++++++++-
xso-proc/src/types.rs           | 73 +++++++++++++++++++++++++++++++++++
xso/src/from_xml_doc.md         | 43 +++++++++++++++++--
xso/src/lib.rs                  |  5 +
xso/src/text.rs                 | 57 +++++++++++++++++++++++++++
8 files changed, 258 insertions(+), 26 deletions(-)

Detailed changes

parsers/src/util/macro_tests.rs πŸ”—

@@ -464,3 +464,30 @@ fn fails_text_without_text_consumer_positive() {
         other => panic!("unexpected result: {:?}", other),
     }
 }
+
+#[derive(FromXml, IntoXml, PartialEq, Debug, Clone)]
+#[xml(namespace = NS1, name = "text")]
+struct TextWithCodec {
+    #[xml(text(codec = xso::text::EmptyAsNone))]
+    text: std::option::Option<String>,
+}
+
+#[test]
+fn text_with_codec_roundtrip_empty() {
+    #[allow(unused_imports)]
+    use std::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    roundtrip_full::<TextWithCodec>("<text xmlns='urn:example:ns1'/>");
+}
+
+#[test]
+fn text_with_codec_roundtrip_non_empty() {
+    #[allow(unused_imports)]
+    use std::{
+        option::Option::{None, Some},
+        result::Result::{Err, Ok},
+    };
+    roundtrip_full::<TextWithCodec>("<text xmlns='urn:example:ns1'>hello</text>");
+}

xso-proc/src/compound.rs πŸ”—

@@ -280,9 +280,9 @@ impl Compound {
                         State::new(state_name)
                             .with_field(&bound_name, field.ty())
                             .with_impl(quote! {
-                                ::core::option::Option::Some(::xso::exports::rxml::Event::Text(
+                                #generator.map(|value| ::xso::exports::rxml::Event::Text(
                                     ::xso::exports::rxml::parser::EventMetrics::zero(),
-                                    #generator,
+                                    value,
                                 ))
                             }),
                     );

xso-proc/src/field.rs πŸ”—

@@ -17,6 +17,7 @@ use crate::meta::{Flag, NameRef, NamespaceRef, XmlFieldMeta};
 use crate::scope::{FromEventsScope, IntoEventsScope};
 use crate::types::{
     default_fn, from_xml_text_fn, into_optional_xml_text_fn, into_xml_text_fn, string_ty,
+    text_codec_decode_fn, text_codec_encode_fn,
 };
 
 /// Code slices necessary for declaring and initializing a temporary variable
@@ -98,7 +99,10 @@ enum FieldKind {
     },
 
     /// The field maps to the character data of the element.
-    Text,
+    Text {
+        /// Optional codec to use
+        codec: Option<Type>,
+    },
 }
 
 impl FieldKind {
@@ -143,7 +147,7 @@ impl FieldKind {
                 })
             }
 
-            XmlFieldMeta::Text => Ok(Self::Text),
+            XmlFieldMeta::Text { codec } => Ok(Self::Text { codec }),
         }
     }
 }
@@ -257,10 +261,21 @@ impl FieldDef {
                 })
             }
 
-            FieldKind::Text => {
+            FieldKind::Text { ref codec } => {
                 let FromEventsScope { ref text, .. } = scope;
                 let field_access = scope.access_field(&self.member);
-                let from_xml_text = from_xml_text_fn(self.ty.clone());
+                let finalize = match codec {
+                    Some(codec_ty) => {
+                        let decode = text_codec_decode_fn(codec_ty.clone(), self.ty.clone());
+                        quote! {
+                            #decode(#field_access)?
+                        }
+                    }
+                    None => {
+                        let from_xml_text = from_xml_text_fn(self.ty.clone());
+                        quote! { #from_xml_text(#field_access)? }
+                    }
+                };
 
                 Ok(FieldBuilderPart::Text {
                     value: FieldTempInit {
@@ -270,9 +285,7 @@ impl FieldDef {
                     collect: quote! {
                         #field_access.push_str(#text.as_str());
                     },
-                    finalize: quote! {
-                        #from_xml_text(#field_access)?
-                    },
+                    finalize,
                 })
             }
         }
@@ -318,14 +331,19 @@ impl FieldDef {
                 })
             }
 
-            FieldKind::Text => {
-                let into_xml_text = into_xml_text_fn(self.ty.clone());
+            FieldKind::Text { ref codec } => {
+                let generator = match codec {
+                    Some(codec_ty) => {
+                        let encode = text_codec_encode_fn(codec_ty.clone(), self.ty.clone());
+                        quote! { #encode(#bound_name)? }
+                    }
+                    None => {
+                        let into_xml_text = into_xml_text_fn(self.ty.clone());
+                        quote! { ::core::option::Option::Some(#into_xml_text(#bound_name)?) }
+                    }
+                };
 
-                Ok(FieldIteratorPart::Text {
-                    generator: quote! {
-                        #into_xml_text(#bound_name)?
-                    },
-                })
+                Ok(FieldIteratorPart::Text { generator })
             }
         }
     }

xso-proc/src/meta.rs πŸ”—

@@ -312,7 +312,10 @@ pub(crate) enum XmlFieldMeta {
     },
 
     /// `#[xml(text)]`
-    Text,
+    Text {
+        /// The path to the optional codec type.
+        codec: Option<Type>,
+    },
 }
 
 impl XmlFieldMeta {
@@ -393,8 +396,28 @@ impl XmlFieldMeta {
     }
 
     /// Parse a `#[xml(text)]` meta.
-    fn text_from_meta(_: ParseNestedMeta<'_>) -> Result<Self> {
-        Ok(Self::Text)
+    fn text_from_meta(meta: ParseNestedMeta<'_>) -> Result<Self> {
+        let mut codec: Option<Type> = None;
+        if meta.input.peek(Token![=]) {
+            Ok(Self::Text {
+                codec: Some(meta.value()?.parse()?),
+            })
+        } else if meta.input.peek(syn::token::Paren) {
+            meta.parse_nested_meta(|meta| {
+                if meta.path.is_ident("codec") {
+                    if codec.is_some() {
+                        return Err(Error::new_spanned(meta.path, "duplicate `codec` key"));
+                    }
+                    codec = Some(meta.value()?.parse()?);
+                    Ok(())
+                } else {
+                    Err(Error::new_spanned(meta.path, "unsupported key"))
+                }
+            })?;
+            Ok(Self::Text { codec })
+        } else {
+            Ok(Self::Text { codec: None })
+        }
     }
 
     /// Parse [`Self`] from a nestd meta, switching on the identifier

xso-proc/src/types.rs πŸ”—

@@ -220,3 +220,76 @@ pub(crate) fn into_xml_text_fn(ty: Type) -> Expr {
         },
     })
 }
+
+/// Construct a [`syn::TypePath`] referring to
+/// `<#codec_ty as ::xso::TextCodec::<#for_ty>>` and return the
+/// [`syn::Span`] of the `codec_ty` alongside it.
+fn text_codec_of(codec_ty: Type, for_ty: Type) -> (Span, TypePath) {
+    let span = codec_ty.span();
+    (
+        span,
+        TypePath {
+            qself: Some(QSelf {
+                lt_token: syn::token::Lt { spans: [span] },
+                ty: Box::new(codec_ty),
+                position: 2,
+                as_token: Some(syn::token::As { span }),
+                gt_token: syn::token::Gt { spans: [span] },
+            }),
+            path: Path {
+                leading_colon: Some(syn::token::PathSep {
+                    spans: [span, span],
+                }),
+                segments: [
+                    PathSegment {
+                        ident: Ident::new("xso", span),
+                        arguments: PathArguments::None,
+                    },
+                    PathSegment {
+                        ident: Ident::new("TextCodec", span),
+                        arguments: PathArguments::AngleBracketed(AngleBracketedGenericArguments {
+                            colon2_token: Some(syn::token::PathSep {
+                                spans: [span, span],
+                            }),
+                            lt_token: syn::token::Lt { spans: [span] },
+                            args: [GenericArgument::Type(for_ty)].into_iter().collect(),
+                            gt_token: syn::token::Gt { spans: [span] },
+                        }),
+                    },
+                ]
+                .into_iter()
+                .collect(),
+            },
+        },
+    )
+}
+
+/// Construct a [`syn::Expr`] referring to
+/// `<#codec_ty as ::xso::TextCodec::<#for_ty>>::encode`.
+pub(crate) fn text_codec_encode_fn(codec_ty: Type, for_ty: Type) -> Expr {
+    let (span, mut ty) = text_codec_of(codec_ty, for_ty);
+    ty.path.segments.push(PathSegment {
+        ident: Ident::new("encode", span),
+        arguments: PathArguments::None,
+    });
+    Expr::Path(ExprPath {
+        attrs: Vec::new(),
+        qself: ty.qself,
+        path: ty.path,
+    })
+}
+
+/// Construct a [`syn::Expr`] referring to
+/// `<#codec_ty as ::xso::TextCodec::<#for_ty>>::decode`.
+pub(crate) fn text_codec_decode_fn(codec_ty: Type, for_ty: Type) -> Expr {
+    let (span, mut ty) = text_codec_of(codec_ty, for_ty);
+    ty.path.segments.push(PathSegment {
+        ident: Ident::new("decode", span),
+        arguments: PathArguments::None,
+    });
+    Expr::Path(ExprPath {
+        attrs: Vec::new(),
+        qself: ty.qself,
+        path: ty.path,
+    })
+}

xso/src/from_xml_doc.md πŸ”—

@@ -34,6 +34,7 @@ such:
 - *path*: A Rust path, like `some_crate::foo::Bar`. Note that `foo` on its own
   is also a path.
 - *string literal*: A string literal, like `"hello world!"`.
+- *type*: A Rust type.
 - flag: Has no value. The key's mere presence has relevance and it must not be
   followed by a `=` sign.
 
@@ -137,14 +138,27 @@ assert_eq!(foo, Foo {
 #### `text` meta
 
 The `text` meta causes the field to be mapped to the text content of the
-element. For `FromXml`, the field's type must implement [`FromXmlText`] and
-for `IntoXml`, the field's type must implement [`IntoXmlText`].
+element.
 
-The `text` meta supports no options or value. Only a single field per struct
-may be annotated with `#[xml(text)]` at a time, to avoid parsing ambiguities.
-This is also true if only `IntoXml` is derived on a field, for consistency.
+| Key | Value type | Description |
+| --- | --- | --- |
+| `codec` | *type* | Optional [`TextCodec`] implementation which is used to encode or decode the field. |
 
-##### Example
+If `codec` is given, the given `codec` must implement
+[`TextCodec<T>`][`TextCodec`] where `T` is the type of the field.
+
+If `codec` is *not* given, the field's type must implement [`FromXmlText`] for
+`FromXml` and for `IntoXml`, the field's type must implement [`IntoXmlText`].
+
+The `text` meta also supports a shorthand syntax, `#[xml(text = ..)]`, where
+the value is treated as the value for the `codec` key (with optional prefix as
+described above, and unnamespaced otherwise).
+
+Only a single field per struct may be annotated with `#[xml(text)]` at a time,
+to avoid parsing ambiguities. This is also true if only `IntoXml` is derived on
+a field, for consistency.
+
+##### Example without codec
 
 ```rust
 # use xso::FromXml;
@@ -160,3 +174,20 @@ assert_eq!(foo, Foo {
     a: "hello".to_string(),
 });
 ```
+
+##### Example with codec
+
+```rust
+# use xso::FromXml;
+#[derive(FromXml, Debug, PartialEq)]
+#[xml(namespace = "urn:example", name = "foo")]
+struct Foo {
+    #[xml(text = xso::text::EmptyAsNone)]
+    a: Option<String>,
+};
+
+let foo: Foo = xso::from_bytes(b"<foo xmlns='urn:example'/>").unwrap();
+assert_eq!(foo, Foo {
+    a: None,
+});
+```

xso/src/lib.rs πŸ”—

@@ -24,7 +24,7 @@ pub mod error;
 #[cfg(feature = "minidom")]
 #[cfg_attr(docsrs, doc(cfg(feature = "minidom")))]
 pub mod minidom_compat;
-mod text;
+pub mod text;
 
 #[doc(hidden)]
 pub mod exports {
@@ -35,6 +35,9 @@ pub mod exports {
 
 use std::borrow::Cow;
 
+#[doc(inline)]
+pub use text::TextCodec;
+
 #[doc = include_str!("from_xml_doc.md")]
 #[doc(inline)]
 #[cfg(feature = "macros")]

xso/src/text.rs πŸ”—

@@ -103,3 +103,60 @@ convert_via_fromstr_and_display! {
     #[cfg(feature = "jid")]
     jid::BareJid,
 }
+
+/// Represent a way to encode/decode text data into a Rust type.
+///
+///Β This trait can be used in scenarios where implementing [`FromXmlText`]
+/// and/or [`IntoXmlText`] on a type is not feasible or sensible, such as the
+/// following:
+///
+/// 1. The type originates in a foreign crate, preventing the implementation
+///    of foreign traits.
+///
+/// 2. There is more than one way to convert a value to/from XML.
+///
+/// The codec to use for a text can be specified in the attributes understood
+/// by `FromXml` and `IntoXml` derive macros. See the documentation of the
+/// [`FromXml`][`macro@crate::FromXml`] derive macro for details.
+pub trait TextCodec<T> {
+    /// Decode a string value into the type.
+    fn decode(s: String) -> Result<T, Error>;
+
+    /// Encode the type as string value.
+    ///
+    /// If this returns `None`, the string value is not emitted at all.
+    fn encode(value: T) -> Result<Option<String>, Error>;
+}
+
+/// Text codec which does no transform.
+pub struct Plain;
+
+impl TextCodec<String> for Plain {
+    fn decode(s: String) -> Result<String, Error> {
+        Ok(s)
+    }
+
+    fn encode(value: String) -> Result<Option<String>, Error> {
+        Ok(Some(value))
+    }
+}
+
+/// Text codec which returns None instead of the empty string.
+pub struct EmptyAsNone;
+
+impl TextCodec<Option<String>> for EmptyAsNone {
+    fn decode(s: String) -> Result<Option<String>, Error> {
+        if s.len() == 0 {
+            Ok(None)
+        } else {
+            Ok(Some(s))
+        }
+    }
+
+    fn encode(value: Option<String>) -> Result<Option<String>, Error> {
+        Ok(match value {
+            Some(v) if v.len() > 0 => Some(v),
+            Some(_) | None => None,
+        })
+    }
+}