text.rs

  1// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
  2//
  3// This Source Code Form is subject to the terms of the Mozilla Public
  4// License, v. 2.0. If a copy of the MPL was not distributed with this
  5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
  6
  7//! Module containing implementations for conversions to/from XML text.
  8
  9use core::marker::PhantomData;
 10
 11use std::borrow::Cow;
 12
 13use crate::{error::Error, AsXmlText, FromXmlText};
 14
 15#[cfg(feature = "base64")]
 16use base64::engine::{general_purpose::STANDARD as StandardBase64Engine, Engine as _};
 17
 18macro_rules! convert_via_fromstr_and_display {
 19    ($($(#[cfg $cfg:tt])?$t:ty,)+) => {
 20        $(
 21            $(
 22                #[cfg $cfg]
 23            )?
 24            impl FromXmlText for $t {
 25                #[doc = concat!("Parse [`", stringify!($t), "`] from XML text via [`FromStr`][`core::str::FromStr`].")]
 26                fn from_xml_text(s: String) -> Result<Self, Error> {
 27                    s.parse().map_err(Error::text_parse_error)
 28                }
 29            }
 30
 31            $(
 32                #[cfg $cfg]
 33            )?
 34            impl AsXmlText for $t {
 35                #[doc = concat!("Convert [`", stringify!($t), "`] to XML text via [`Display`][`core::fmt::Display`].\n\nThis implementation never fails.")]
 36                fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
 37                    Ok(Cow::Owned(self.to_string()))
 38                }
 39            }
 40        )+
 41    }
 42}
 43
 44/// This provides an implementation compliant with xsd::bool.
 45impl FromXmlText for bool {
 46    /// Parse a boolean from XML text.
 47    ///
 48    /// The values `"1"` and `"true"` are considered true. The values `"0"`
 49    /// and `"false"` are considered `false`. Any other value is invalid and
 50    /// will return an error.
 51    fn from_xml_text(s: String) -> Result<Self, Error> {
 52        match s.as_str() {
 53            "1" => "true",
 54            "0" => "false",
 55            other => other,
 56        }
 57        .parse()
 58        .map_err(Error::text_parse_error)
 59    }
 60}
 61
 62/// This provides an implementation compliant with xsd::bool.
 63impl AsXmlText for bool {
 64    /// Convert a boolean to XML text.
 65    ///
 66    /// `true` is converted to `"true"` and `false` is converted to `"false"`.
 67    /// This implementation never fails.
 68    fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
 69        match self {
 70            true => Ok(Cow::Borrowed("true")),
 71            false => Ok(Cow::Borrowed("false")),
 72        }
 73    }
 74}
 75
 76convert_via_fromstr_and_display! {
 77    u8,
 78    u16,
 79    u32,
 80    u64,
 81    u128,
 82    usize,
 83    i8,
 84    i16,
 85    i32,
 86    i64,
 87    i128,
 88    isize,
 89    f32,
 90    f64,
 91    char,
 92    core::net::IpAddr,
 93    core::net::Ipv4Addr,
 94    core::net::Ipv6Addr,
 95    core::net::SocketAddr,
 96    core::net::SocketAddrV4,
 97    core::net::SocketAddrV6,
 98    core::num::NonZeroU8,
 99    core::num::NonZeroU16,
100    core::num::NonZeroU32,
101    core::num::NonZeroU64,
102    core::num::NonZeroU128,
103    core::num::NonZeroUsize,
104    core::num::NonZeroI8,
105    core::num::NonZeroI16,
106    core::num::NonZeroI32,
107    core::num::NonZeroI64,
108    core::num::NonZeroI128,
109    core::num::NonZeroIsize,
110
111    #[cfg(feature = "uuid")]
112    uuid::Uuid,
113
114    #[cfg(feature = "jid")]
115    jid::Jid,
116    #[cfg(feature = "jid")]
117    jid::FullJid,
118    #[cfg(feature = "jid")]
119    jid::BareJid,
120}
121
122/// Represent a way to encode/decode text data into a Rust type.
123///
124/// This trait can be used in scenarios where implementing [`FromXmlText`]
125/// and/or [`AsXmlText`] on a type is not feasible or sensible, such as the
126/// following:
127///
128/// 1. The type originates in a foreign crate, preventing the implementation
129///    of foreign traits.
130///
131/// 2. There is more than one way to convert a value to/from XML.
132///
133/// The codec to use for a text can be specified in the attributes understood
134/// by `FromXml` and `AsXml` derive macros. See the documentation of the
135/// [`FromXml`][`macro@crate::FromXml`] derive macro for details.
136pub trait TextCodec<T> {
137    /// Decode a string value into the type.
138    fn decode(&self, s: String) -> Result<T, Error>;
139
140    /// Encode the type as string value.
141    ///
142    /// If this returns `None`, the string value is not emitted at all.
143    fn encode<'x>(&self, value: &'x T) -> Result<Option<Cow<'x, str>>, Error>;
144
145    /// Apply a filter to this codec.
146    ///
147    /// Filters preprocess strings before they are handed to the codec for
148    /// parsing, allowing to, for example, make the codec ignore irrelevant
149    /// content by stripping it.
150    // NOTE: The bound on T is needed because any given type A may implement
151    // TextCodec for any number of types. If we pass T down to the `Filtered`
152    // struct, rustc can do type inferrence on which `TextCodec`
153    // implementation the `filtered` method is supposed to have been called
154    // on.
155    fn filtered<F: TextFilter>(self, filter: F) -> Filtered<F, Self, T>
156    where
157        // placing the bound here (instead of on the `TextCodec<T>` trait
158        // itself) preserves object-safety of TextCodec<T>.
159        Self: Sized,
160    {
161        Filtered {
162            filter,
163            codec: self,
164            bound: PhantomData,
165        }
166    }
167}
168
169/// Wrapper struct to apply a filter to a codec.
170///
171/// You can construct a value of this type via [`TextCodec::filtered`].
172// NOTE: see the note on TextCodec::filtered for why we bind `T` here, too.
173pub struct Filtered<F, C, T> {
174    filter: F,
175    codec: C,
176    bound: PhantomData<T>,
177}
178
179impl<T, F: TextFilter, C: TextCodec<T>> TextCodec<T> for Filtered<F, C, T> {
180    fn decode(&self, s: String) -> Result<T, Error> {
181        let s = self.filter.preprocess(s);
182        self.codec.decode(s)
183    }
184
185    fn encode<'x>(&self, value: &'x T) -> Result<Option<Cow<'x, str>>, Error> {
186        self.codec.encode(value)
187    }
188}
189
190/// Text codec which does no transform.
191pub struct Plain;
192
193impl TextCodec<String> for Plain {
194    fn decode(&self, s: String) -> Result<String, Error> {
195        Ok(s)
196    }
197
198    fn encode<'x>(&self, value: &'x String) -> Result<Option<Cow<'x, str>>, Error> {
199        Ok(Some(Cow::Borrowed(value.as_str())))
200    }
201}
202
203/// Text codec which returns `None` if the input to decode is the empty string, instead of
204/// attempting to decode it.
205///
206/// Particularly useful when parsing `Option<T>` on `#[xml(text)]`, which does not support
207/// `Option<_>` otherwise.
208pub struct EmptyAsNone;
209
210impl<T> TextCodec<Option<T>> for EmptyAsNone
211where
212    T: FromXmlText + AsXmlText,
213{
214    fn decode(&self, s: String) -> Result<Option<T>, Error> {
215        if s.is_empty() {
216            Ok(None)
217        } else {
218            Some(T::from_xml_text(s)).transpose()
219        }
220    }
221
222    fn encode<'x>(&self, value: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
223        Ok(value
224            .as_ref()
225            .map(AsXmlText::as_xml_text)
226            .transpose()?
227            .map(|v| (!v.is_empty()).then_some(v))
228            .flatten())
229    }
230}
231
232/// Text codec which returns None instead of the empty string.
233pub struct EmptyAsError;
234
235impl TextCodec<String> for EmptyAsError {
236    fn decode(&self, s: String) -> Result<String, Error> {
237        if s.is_empty() {
238            Err(Error::Other("Empty text node."))
239        } else {
240            Ok(s)
241        }
242    }
243
244    fn encode<'x>(&self, value: &'x String) -> Result<Option<Cow<'x, str>>, Error> {
245        if value.is_empty() {
246            Err(Error::Other("Empty text node."))
247        } else {
248            Ok(Some(Cow::Borrowed(value.as_str())))
249        }
250    }
251}
252
253/// Trait for preprocessing text data from XML.
254///
255/// This may be used by codecs to allow to customize some of their behaviour.
256pub trait TextFilter {
257    /// Process the incoming string and return the result of the processing.
258    fn preprocess(&self, s: String) -> String;
259}
260
261/// Text preprocessor which returns the input unchanged.
262pub struct NoFilter;
263
264impl TextFilter for NoFilter {
265    fn preprocess(&self, s: String) -> String {
266        s
267    }
268}
269
270/// Text preprocessor to remove all whitespace.
271pub struct StripWhitespace;
272
273impl TextFilter for StripWhitespace {
274    fn preprocess(&self, s: String) -> String {
275        let s: String = s
276            .chars()
277            .filter(|ch| *ch != ' ' && *ch != '\n' && *ch != '\t')
278            .collect();
279        s
280    }
281}
282
283/// Text codec transforming text to binary using standard base64.
284///
285/// The `Filter` type argument can be used to employ additional preprocessing
286/// of incoming text data. Most interestingly, passing [`StripWhitespace`]
287/// will make the implementation ignore any whitespace within the text.
288#[cfg(feature = "base64")]
289pub struct Base64;
290
291#[cfg(feature = "base64")]
292impl TextCodec<Vec<u8>> for Base64 {
293    fn decode(&self, s: String) -> Result<Vec<u8>, Error> {
294        StandardBase64Engine
295            .decode(s.as_bytes())
296            .map_err(Error::text_parse_error)
297    }
298
299    fn encode<'x>(&self, value: &'x Vec<u8>) -> Result<Option<Cow<'x, str>>, Error> {
300        Ok(Some(Cow::Owned(StandardBase64Engine.encode(&value))))
301    }
302}
303
304#[cfg(feature = "base64")]
305impl<'x> TextCodec<Cow<'x, [u8]>> for Base64 {
306    fn decode(&self, s: String) -> Result<Cow<'x, [u8]>, Error> {
307        StandardBase64Engine
308            .decode(s.as_bytes())
309            .map_err(Error::text_parse_error)
310            .map(Cow::Owned)
311    }
312
313    fn encode<'a>(&self, value: &'a Cow<'x, [u8]>) -> Result<Option<Cow<'a, str>>, Error> {
314        Ok(Some(Cow::Owned(StandardBase64Engine.encode(&value))))
315    }
316}
317
318#[cfg(feature = "base64")]
319impl<T> TextCodec<Option<T>> for Base64
320where
321    Base64: TextCodec<T>,
322{
323    fn decode(&self, s: String) -> Result<Option<T>, Error> {
324        if s.is_empty() {
325            return Ok(None);
326        }
327        Ok(Some(self.decode(s)?))
328    }
329
330    fn encode<'x>(&self, decoded: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
331        decoded
332            .as_ref()
333            .map(|x| self.encode(x))
334            .transpose()
335            .map(Option::flatten)
336    }
337}
338
339/// Text codec transforming text to binary using hexadecimal nibbles.
340///
341/// The length must be known at compile-time.
342pub struct FixedHex<const N: usize>;
343
344impl<const N: usize> TextCodec<[u8; N]> for FixedHex<N> {
345    fn decode(&self, s: String) -> Result<[u8; N], Error> {
346        if s.len() != 2 * N {
347            return Err(Error::Other("Invalid length"));
348        }
349
350        let mut bytes = [0u8; N];
351        for i in 0..N {
352            bytes[i] =
353                u8::from_str_radix(&s[2 * i..2 * i + 2], 16).map_err(Error::text_parse_error)?;
354        }
355
356        Ok(bytes)
357    }
358
359    fn encode<'x>(&self, value: &'x [u8; N]) -> Result<Option<Cow<'x, str>>, Error> {
360        let mut bytes = String::with_capacity(N * 2);
361        for byte in value {
362            bytes.extend(format!("{:02x}", byte).chars());
363        }
364        Ok(Some(Cow::Owned(bytes)))
365    }
366}
367
368impl<T, const N: usize> TextCodec<Option<T>> for FixedHex<N>
369where
370    FixedHex<N>: TextCodec<T>,
371{
372    fn decode(&self, s: String) -> Result<Option<T>, Error> {
373        if s.is_empty() {
374            return Ok(None);
375        }
376        Ok(Some(self.decode(s)?))
377    }
378
379    fn encode<'x>(&self, decoded: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
380        decoded
381            .as_ref()
382            .map(|x| self.encode(x))
383            .transpose()
384            .map(Option::flatten)
385    }
386}
387
388/// Text codec for colon-separated bytes of uppercase hexadecimal.
389pub struct ColonSeparatedHex;
390
391impl TextCodec<Vec<u8>> for ColonSeparatedHex {
392    fn decode(&self, s: String) -> Result<Vec<u8>, Error> {
393        assert_eq!((s.len() + 1) % 3, 0);
394        let mut bytes = Vec::with_capacity((s.len() + 1) / 3);
395        for i in 0..(1 + s.len()) / 3 {
396            let byte =
397                u8::from_str_radix(&s[3 * i..3 * i + 2], 16).map_err(Error::text_parse_error)?;
398            if 3 * i + 2 < s.len() {
399                assert_eq!(&s[3 * i + 2..3 * i + 3], ":");
400            }
401            bytes.push(byte);
402        }
403        Ok(bytes)
404    }
405
406    fn encode<'x>(&self, decoded: &'x Vec<u8>) -> Result<Option<Cow<'x, str>>, Error> {
407        // TODO: Super inefficient!
408        let mut bytes = Vec::with_capacity(decoded.len());
409        for byte in decoded {
410            bytes.push(format!("{:02X}", byte));
411        }
412        Ok(Some(Cow::Owned(bytes.join(":"))))
413    }
414}