xso: use values instead of types for text codecs

Jonas SchΓ€fer created

This allows stateful or configurable codecs without having to express
all configuration in the type name itself. For example, we could have a
Base64 type with configurable Base64 engines without having to duplicate
the Base64 type itself.

(Note that the different engines in the Base64 crate are values, not
types.)

Change summary

parsers/src/avatar.rs       |   4 
parsers/src/component.rs    |   2 
parsers/src/vcard.rs        |   4 
parsers/src/vcard_update.rs |   2 
xso-proc/src/field.rs       |  14 ++--
xso-proc/src/meta.rs        |   4 
xso-proc/src/types.rs       |  84 ++++++++++++---------------
xso/ChangeLog               |  17 +++++
xso/src/from_xml_doc.md     |   5 +
xso/src/text.rs             | 115 ++++++++++++++++++++++++++------------
10 files changed, 150 insertions(+), 101 deletions(-)

Detailed changes

parsers/src/avatar.rs πŸ”—

@@ -5,7 +5,7 @@
 // file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 use xso::{
-    text::{Base64, StripWhitespace},
+    text::{Base64, StripWhitespace, TextCodec},
     AsXml, FromXml,
 };
 
@@ -58,7 +58,7 @@ pub struct Info {
 #[xml(namespace = ns::AVATAR_DATA, name = "data")]
 pub struct Data {
     /// Vector of bytes representing the avatar’s image.
-    #[xml(text(codec = Base64<StripWhitespace>))]
+    #[xml(text(codec = Base64.filtered(StripWhitespace)))]
     pub data: Vec<u8>,
 }
 

parsers/src/component.rs πŸ”—

@@ -20,7 +20,7 @@ pub struct Handshake {
     ///
     /// If None, it is the successful reply from the server, the stream is now
     /// fully established and both sides can now exchange stanzas.
-    #[xml(text(codec = FixedHex<20>))]
+    #[xml(text(codec = FixedHex::<20>))]
     pub data: Option<[u8; 20]>,
 }
 

parsers/src/vcard.rs πŸ”—

@@ -15,7 +15,7 @@
 
 use xso::{
     error::Error,
-    text::{Base64, StripWhitespace},
+    text::{Base64, StripWhitespace, TextCodec},
     AsXml, FromXml,
 };
 
@@ -50,7 +50,7 @@ pub struct Type {
 #[xml(namespace = ns::VCARD, name = "BINVAL")]
 pub struct Binval {
     /// The actual data.
-    #[xml(text(codec = Base64<StripWhitespace>))]
+    #[xml(text(codec = Base64.filtered(StripWhitespace)))]
     pub data: Vec<u8>,
 }
 

parsers/src/vcard_update.rs πŸ”—

@@ -30,7 +30,7 @@ pub struct VCardUpdate {
 #[xml(namespace = ns::VCARD_UPDATE, name = "photo")]
 pub struct Photo {
     /// The SHA1 hash of the avatar. Empty when there is no photo.
-    #[xml(text(codec = FixedHex<20>))]
+    #[xml(text(codec = FixedHex::<20>))]
     pub data: Option<[u8; 20]>,
 }
 

xso-proc/src/field.rs πŸ”—

@@ -148,7 +148,7 @@ enum FieldKind {
     /// The field maps to the character data of the element.
     Text {
         /// Optional codec to use
-        codec: Option<Type>,
+        codec: Option<Expr>,
     },
 
     /// The field maps to a child
@@ -321,10 +321,10 @@ impl FieldDef {
                 let FromEventsScope { ref text, .. } = scope;
                 let field_access = scope.access_field(&self.member);
                 let finalize = match codec {
-                    Some(codec_ty) => {
-                        let decode = text_codec_decode_fn(codec_ty.clone(), self.ty.clone());
+                    Some(codec) => {
+                        let decode = text_codec_decode_fn(self.ty.clone());
                         quote! {
-                            #decode(#field_access)?
+                            #decode(&#codec, #field_access)?
                         }
                     }
                     None => {
@@ -429,9 +429,9 @@ impl FieldDef {
 
             FieldKind::Text { ref codec } => {
                 let generator = match codec {
-                    Some(codec_ty) => {
-                        let encode = text_codec_encode_fn(codec_ty.clone(), self.ty.clone());
-                        quote! { #encode(#bound_name)? }
+                    Some(codec) => {
+                        let encode = text_codec_encode_fn(self.ty.clone());
+                        quote! { #encode(&#codec, #bound_name)? }
                     }
                     None => {
                         let as_xml_text = as_xml_text_fn(self.ty.clone());

xso-proc/src/meta.rs πŸ”—

@@ -408,7 +408,7 @@ pub(crate) enum XmlFieldMeta {
     /// `#[xml(text)]`
     Text {
         /// The path to the optional codec type.
-        codec: Option<Type>,
+        codec: Option<Expr>,
     },
 
     /// `#[xml(child)`
@@ -497,7 +497,7 @@ impl XmlFieldMeta {
 
     /// Parse a `#[xml(text)]` meta.
     fn text_from_meta(meta: ParseNestedMeta<'_>) -> Result<Self> {
-        let mut codec: Option<Type> = None;
+        let mut codec: Option<Expr> = None;
         if meta.input.peek(Token![=]) {
             Ok(Self::Text {
                 codec: Some(meta.value()?.parse()?),

xso-proc/src/types.rs πŸ”—

@@ -298,76 +298,66 @@ pub(crate) fn as_xml_text_fn(ty: Type) -> Expr {
     })
 }
 
-/// Construct a [`syn::TypePath`] referring to
-/// `<#codec_ty as ::xso::TextCodec::<#for_ty>>` and return the
-/// [`syn::Span`] of the `codec_ty` alongside it.
-fn text_codec_of(codec_ty: Type, for_ty: Type) -> (Span, TypePath) {
-    let span = codec_ty.span();
+/// Construct a [`syn::Path`] referring to `::xso::TextCodec::<#for_ty>`,
+/// returing the span of `for_ty` alongside it.
+fn text_codec_of(for_ty: Type) -> (Span, Path) {
+    let span = for_ty.span();
     (
         span,
-        TypePath {
-            qself: Some(QSelf {
-                lt_token: syn::token::Lt { spans: [span] },
-                ty: Box::new(codec_ty),
-                position: 2,
-                as_token: Some(syn::token::As { span }),
-                gt_token: syn::token::Gt { spans: [span] },
+        Path {
+            leading_colon: Some(syn::token::PathSep {
+                spans: [span, span],
             }),
-            path: Path {
-                leading_colon: Some(syn::token::PathSep {
-                    spans: [span, span],
-                }),
-                segments: [
-                    PathSegment {
-                        ident: Ident::new("xso", span),
-                        arguments: PathArguments::None,
-                    },
-                    PathSegment {
-                        ident: Ident::new("TextCodec", span),
-                        arguments: PathArguments::AngleBracketed(AngleBracketedGenericArguments {
-                            colon2_token: Some(syn::token::PathSep {
-                                spans: [span, span],
-                            }),
-                            lt_token: syn::token::Lt { spans: [span] },
-                            args: [GenericArgument::Type(for_ty)].into_iter().collect(),
-                            gt_token: syn::token::Gt { spans: [span] },
+            segments: [
+                PathSegment {
+                    ident: Ident::new("xso", span),
+                    arguments: PathArguments::None,
+                },
+                PathSegment {
+                    ident: Ident::new("TextCodec", span),
+                    arguments: PathArguments::AngleBracketed(AngleBracketedGenericArguments {
+                        colon2_token: Some(syn::token::PathSep {
+                            spans: [span, span],
                         }),
-                    },
-                ]
-                .into_iter()
-                .collect(),
-            },
+                        lt_token: syn::token::Lt { spans: [span] },
+                        args: [GenericArgument::Type(for_ty)].into_iter().collect(),
+                        gt_token: syn::token::Gt { spans: [span] },
+                    }),
+                },
+            ]
+            .into_iter()
+            .collect(),
         },
     )
 }
 
 /// Construct a [`syn::Expr`] referring to
-/// `<#codec_ty as ::xso::TextCodec::<#for_ty>>::encode`.
-pub(crate) fn text_codec_encode_fn(codec_ty: Type, for_ty: Type) -> Expr {
-    let (span, mut ty) = text_codec_of(codec_ty, for_ty);
-    ty.path.segments.push(PathSegment {
+/// `::xso::TextCodec::<#for_ty>::encode`.
+pub(crate) fn text_codec_encode_fn(for_ty: Type) -> Expr {
+    let (span, mut path) = text_codec_of(for_ty);
+    path.segments.push(PathSegment {
         ident: Ident::new("encode", span),
         arguments: PathArguments::None,
     });
     Expr::Path(ExprPath {
         attrs: Vec::new(),
-        qself: ty.qself,
-        path: ty.path,
+        qself: None,
+        path: path,
     })
 }
 
 /// Construct a [`syn::Expr`] referring to
-/// `<#codec_ty as ::xso::TextCodec::<#for_ty>>::decode`.
-pub(crate) fn text_codec_decode_fn(codec_ty: Type, for_ty: Type) -> Expr {
-    let (span, mut ty) = text_codec_of(codec_ty, for_ty);
-    ty.path.segments.push(PathSegment {
+/// `::xso::TextCodec::<#for_ty>::decode`.
+pub(crate) fn text_codec_decode_fn(for_ty: Type) -> Expr {
+    let (span, mut path) = text_codec_of(for_ty);
+    path.segments.push(PathSegment {
         ident: Ident::new("decode", span),
         arguments: PathArguments::None,
     });
     Expr::Path(ExprPath {
         attrs: Vec::new(),
-        qself: ty.qself,
-        path: ty.path,
+        qself: None,
+        path: path,
     })
 }
 

xso/ChangeLog πŸ”—

@@ -1,5 +1,22 @@
 Version NEXT:
 0000-00-00 Jonas SchΓ€fer <jonas@zombofant.net>
+    * Breaking
+      - The methods of `TextCodec<T>` now have `&self` receivers. This also
+        implies that `#[xml(text(codec = ..))]` now takes expressions instead
+        of type paths.
+
+        Because all implementations provided by `xso` were in fact unit
+        structs, this should not change most invocations, with two exceptions:
+
+        1. The type argument of `Base64` was removed. Replace all
+           `Base64<Foo>` references with `Base64.filtered(Foo)` to update
+           your code.
+
+        2. `FixedHex<N>` is not a valid expression. You will have to update
+           your code to use `FixedHex::<N>` instead.
+
+        This change overall allows for more flexibility in the implementation
+        of text codecs.
     * Added
       - Support for child elements in derive macros. Child elements may also
         be wrapped in Option or Box.

xso/src/from_xml_doc.md πŸ”—

@@ -35,6 +35,7 @@ such:
   is also a path.
 - *string literal*: A string literal, like `"hello world!"`.
 - *type*: A Rust type.
+- *expression*: A Rust expression.
 - *ident*: A Rust identifier.
 - flag: Has no value. The key's mere presence has relevance and it must not be
   followed by a `=` sign.
@@ -258,9 +259,9 @@ element.
 
 | Key | Value type | Description |
 | --- | --- | --- |
-| `codec` | *type* | Optional [`TextCodec`] implementation which is used to encode or decode the field. |
+| `codec` | *expression* | Optional [`TextCodec`] implementation which is used to encode or decode the field. |
 
-If `codec` is given, the given `codec` must implement
+If `codec` is given, the given `codec` value must implement
 [`TextCodec<T>`][`TextCodec`] where `T` is the type of the field.
 
 If `codec` is *not* given, the field's type must implement [`FromXmlText`] for

xso/src/text.rs πŸ”—

@@ -6,7 +6,6 @@
 
 //! Module containing implementations for conversions to/from XML text.
 
-#[cfg(feature = "base64")]
 use core::marker::PhantomData;
 
 use std::borrow::Cow;
@@ -138,23 +137,67 @@ convert_via_fromstr_and_display! {
 /// [`FromXml`][`macro@crate::FromXml`] derive macro for details.
 pub trait TextCodec<T> {
     /// Decode a string value into the type.
-    fn decode(s: String) -> Result<T, Error>;
+    fn decode(&self, s: String) -> Result<T, Error>;
 
     /// Encode the type as string value.
     ///
     /// If this returns `None`, the string value is not emitted at all.
-    fn encode(value: &T) -> Result<Option<Cow<'_, str>>, Error>;
+    fn encode<'x>(&self, value: &'x T) -> Result<Option<Cow<'x, str>>, Error>;
+
+    /// Apply a filter to this codec.
+    ///
+    /// Filters preprocess strings before they are handed to the codec for
+    /// parsing, allowing to, for example, make the codec ignore irrelevant
+    /// content by stripping it.
+    // NOTE: The bound on T is needed because any given type A may implement
+    // TextCodec for any number of types. If we pass T down to the `Filtered`
+    // struct, rustc can do type inferrence on which `TextCodec`
+    // implementation the `filtered` method is supposed to have been called
+    // on.
+    fn filtered<F: TextFilter>(self, filter: F) -> Filtered<F, Self, T>
+    where
+        // placing the bound here (instead of on the `TextCodec<T>` trait
+        // itself) preserves object-safety of TextCodec<T>.
+        Self: Sized,
+    {
+        Filtered {
+            filter,
+            codec: self,
+            bound: PhantomData,
+        }
+    }
+}
+
+/// Wrapper struct to apply a filter to a codec.
+///
+/// You can construct a value of this type via [`TextCodec::filtered`].
+// NOTE: see the note on TextCodec::filtered for why we bind `T` here, too.
+pub struct Filtered<F, C, T> {
+    filter: F,
+    codec: C,
+    bound: PhantomData<T>,
+}
+
+impl<T, F: TextFilter, C: TextCodec<T>> TextCodec<T> for Filtered<F, C, T> {
+    fn decode(&self, s: String) -> Result<T, Error> {
+        let s = self.filter.preprocess(s);
+        self.codec.decode(s)
+    }
+
+    fn encode<'x>(&self, value: &'x T) -> Result<Option<Cow<'x, str>>, Error> {
+        self.codec.encode(value)
+    }
 }
 
 /// Text codec which does no transform.
 pub struct Plain;
 
 impl TextCodec<String> for Plain {
-    fn decode(s: String) -> Result<String, Error> {
+    fn decode(&self, s: String) -> Result<String, Error> {
         Ok(s)
     }
 
-    fn encode(value: &String) -> Result<Option<Cow<'_, str>>, Error> {
+    fn encode<'x>(&self, value: &'x String) -> Result<Option<Cow<'x, str>>, Error> {
         Ok(Some(Cow::Borrowed(value.as_str())))
     }
 }
@@ -163,7 +206,7 @@ impl TextCodec<String> for Plain {
 pub struct EmptyAsNone;
 
 impl TextCodec<Option<String>> for EmptyAsNone {
-    fn decode(s: String) -> Result<Option<String>, Error> {
+    fn decode(&self, s: String) -> Result<Option<String>, Error> {
         if s.is_empty() {
             Ok(None)
         } else {
@@ -171,7 +214,7 @@ impl TextCodec<Option<String>> for EmptyAsNone {
         }
     }
 
-    fn encode(value: &Option<String>) -> Result<Option<Cow<'_, str>>, Error> {
+    fn encode<'x>(&self, value: &'x Option<String>) -> Result<Option<Cow<'x, str>>, Error> {
         Ok(match value.as_ref() {
             Some(v) if !v.is_empty() => Some(Cow::Borrowed(v.as_str())),
             Some(_) | None => None,
@@ -183,7 +226,7 @@ impl TextCodec<Option<String>> for EmptyAsNone {
 pub struct EmptyAsError;
 
 impl TextCodec<String> for EmptyAsError {
-    fn decode(s: String) -> Result<String, Error> {
+    fn decode(&self, s: String) -> Result<String, Error> {
         if s.is_empty() {
             Err(Error::Other("Empty text node."))
         } else {
@@ -191,7 +234,7 @@ impl TextCodec<String> for EmptyAsError {
         }
     }
 
-    fn encode(value: &String) -> Result<Option<Cow<'_, str>>, Error> {
+    fn encode<'x>(&self, value: &'x String) -> Result<Option<Cow<'x, str>>, Error> {
         if value.is_empty() {
             Err(Error::Other("Empty text node."))
         } else {
@@ -205,14 +248,14 @@ impl TextCodec<String> for EmptyAsError {
 /// This may be used by codecs to allow to customize some of their behaviour.
 pub trait TextFilter {
     /// Process the incoming string and return the result of the processing.
-    fn preprocess(s: String) -> String;
+    fn preprocess(&self, s: String) -> String;
 }
 
 /// Text preprocessor which returns the input unchanged.
 pub struct NoFilter;
 
 impl TextFilter for NoFilter {
-    fn preprocess(s: String) -> String {
+    fn preprocess(&self, s: String) -> String {
         s
     }
 }
@@ -221,7 +264,7 @@ impl TextFilter for NoFilter {
 pub struct StripWhitespace;
 
 impl TextFilter for StripWhitespace {
-    fn preprocess(s: String) -> String {
+    fn preprocess(&self, s: String) -> String {
         let s: String = s
             .chars()
             .filter(|ch| *ch != ' ' && *ch != '\n' && *ch != '\t')
@@ -237,56 +280,54 @@ impl TextFilter for StripWhitespace {
 /// will make the implementation ignore any whitespace within the text.
 #[cfg(feature = "base64")]
 #[cfg_attr(docsrs, doc(cfg(feature = "base64")))]
-pub struct Base64<Filter: TextFilter = NoFilter>(PhantomData<Filter>);
+pub struct Base64;
 
 #[cfg(feature = "base64")]
 #[cfg_attr(docsrs, doc(cfg(feature = "base64")))]
-impl<Filter: TextFilter> TextCodec<Vec<u8>> for Base64<Filter> {
-    fn decode(s: String) -> Result<Vec<u8>, Error> {
-        let value = Filter::preprocess(s);
+impl TextCodec<Vec<u8>> for Base64 {
+    fn decode(&self, s: String) -> Result<Vec<u8>, Error> {
         StandardBase64Engine
-            .decode(value.as_bytes())
+            .decode(s.as_bytes())
             .map_err(Error::text_parse_error)
     }
 
-    fn encode(value: &Vec<u8>) -> Result<Option<Cow<'_, str>>, Error> {
+    fn encode<'x>(&self, value: &'x Vec<u8>) -> Result<Option<Cow<'x, str>>, Error> {
         Ok(Some(Cow::Owned(StandardBase64Engine.encode(&value))))
     }
 }
 
 #[cfg(feature = "base64")]
 #[cfg_attr(docsrs, doc(cfg(feature = "base64")))]
-impl<'x, Filter: TextFilter> TextCodec<Cow<'x, [u8]>> for Base64<Filter> {
-    fn decode(s: String) -> Result<Cow<'x, [u8]>, Error> {
-        let value = Filter::preprocess(s);
+impl<'x> TextCodec<Cow<'x, [u8]>> for Base64 {
+    fn decode(&self, s: String) -> Result<Cow<'x, [u8]>, Error> {
         StandardBase64Engine
-            .decode(value.as_bytes())
+            .decode(s.as_bytes())
             .map_err(Error::text_parse_error)
             .map(Cow::Owned)
     }
 
-    fn encode<'a>(value: &'a Cow<'x, [u8]>) -> Result<Option<Cow<'a, str>>, Error> {
+    fn encode<'a>(&self, value: &'a Cow<'x, [u8]>) -> Result<Option<Cow<'a, str>>, Error> {
         Ok(Some(Cow::Owned(StandardBase64Engine.encode(&value))))
     }
 }
 
 #[cfg(feature = "base64")]
 #[cfg_attr(docsrs, doc(cfg(feature = "base64")))]
-impl<T, Filter: TextFilter> TextCodec<Option<T>> for Base64<Filter>
+impl<T> TextCodec<Option<T>> for Base64
 where
-    Base64<Filter>: TextCodec<T>,
+    Base64: TextCodec<T>,
 {
-    fn decode(s: String) -> Result<Option<T>, Error> {
+    fn decode(&self, s: String) -> Result<Option<T>, Error> {
         if s.is_empty() {
             return Ok(None);
         }
-        Ok(Some(Self::decode(s)?))
+        Ok(Some(self.decode(s)?))
     }
 
-    fn encode(decoded: &Option<T>) -> Result<Option<Cow<'_, str>>, Error> {
+    fn encode<'x>(&self, decoded: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
         decoded
             .as_ref()
-            .map(Self::encode)
+            .map(|x| self.encode(x))
             .transpose()
             .map(Option::flatten)
     }
@@ -298,7 +339,7 @@ where
 pub struct FixedHex<const N: usize>;
 
 impl<const N: usize> TextCodec<[u8; N]> for FixedHex<N> {
-    fn decode(s: String) -> Result<[u8; N], Error> {
+    fn decode(&self, s: String) -> Result<[u8; N], Error> {
         if s.len() != 2 * N {
             return Err(Error::Other("Invalid length"));
         }
@@ -312,7 +353,7 @@ impl<const N: usize> TextCodec<[u8; N]> for FixedHex<N> {
         Ok(bytes)
     }
 
-    fn encode(value: &[u8; N]) -> Result<Option<Cow<'_, str>>, Error> {
+    fn encode<'x>(&self, value: &'x [u8; N]) -> Result<Option<Cow<'x, str>>, Error> {
         let mut bytes = String::with_capacity(N * 2);
         for byte in value {
             bytes.extend(format!("{:02x}", byte).chars());
@@ -325,17 +366,17 @@ impl<T, const N: usize> TextCodec<Option<T>> for FixedHex<N>
 where
     FixedHex<N>: TextCodec<T>,
 {
-    fn decode(s: String) -> Result<Option<T>, Error> {
+    fn decode(&self, s: String) -> Result<Option<T>, Error> {
         if s.is_empty() {
             return Ok(None);
         }
-        Ok(Some(Self::decode(s)?))
+        Ok(Some(self.decode(s)?))
     }
 
-    fn encode(decoded: &Option<T>) -> Result<Option<Cow<'_, str>>, Error> {
+    fn encode<'x>(&self, decoded: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
         decoded
             .as_ref()
-            .map(Self::encode)
+            .map(|x| self.encode(x))
             .transpose()
             .map(Option::flatten)
     }
@@ -345,7 +386,7 @@ where
 pub struct ColonSeparatedHex;
 
 impl TextCodec<Vec<u8>> for ColonSeparatedHex {
-    fn decode(s: String) -> Result<Vec<u8>, Error> {
+    fn decode(&self, s: String) -> Result<Vec<u8>, Error> {
         assert_eq!((s.len() + 1) % 3, 0);
         let mut bytes = Vec::with_capacity((s.len() + 1) / 3);
         for i in 0..(1 + s.len()) / 3 {
@@ -359,7 +400,7 @@ impl TextCodec<Vec<u8>> for ColonSeparatedHex {
         Ok(bytes)
     }
 
-    fn encode(decoded: &Vec<u8>) -> Result<Option<Cow<'_, str>>, Error> {
+    fn encode<'x>(&self, decoded: &'x Vec<u8>) -> Result<Option<Cow<'x, str>>, Error> {
         // TODO: Super inefficient!
         let mut bytes = Vec::with_capacity(decoded.len());
         for byte in decoded {