text.rs

  1// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
  2//
  3// This Source Code Form is subject to the terms of the Mozilla Public
  4// License, v. 2.0. If a copy of the MPL was not distributed with this
  5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
  6
  7//! Module containing implementations for conversions to/from XML text.
  8
  9use core::marker::PhantomData;
 10
 11use std::borrow::Cow;
 12
 13use crate::{error::Error, AsXmlText, FromXmlText};
 14
 15#[cfg(feature = "base64")]
 16use base64::engine::{general_purpose::STANDARD as StandardBase64Engine, Engine as _};
 17
 18macro_rules! convert_via_fromstr_and_display {
 19    ($($(#[cfg $cfg:tt])?$t:ty,)+) => {
 20        $(
 21            $(
 22                #[cfg $cfg]
 23                #[cfg_attr(docsrs, doc(cfg $cfg))]
 24            )?
 25            impl FromXmlText for $t {
 26                #[doc = concat!("Parse [`", stringify!($t), "`] from XML text via [`FromStr`][`core::str::FromStr`].")]
 27                fn from_xml_text(s: String) -> Result<Self, Error> {
 28                    s.parse().map_err(Error::text_parse_error)
 29                }
 30            }
 31
 32            $(
 33                #[cfg $cfg]
 34                #[cfg_attr(docsrs, doc(cfg $cfg))]
 35            )?
 36            impl AsXmlText for $t {
 37                #[doc = concat!("Convert [`", stringify!($t), "`] to XML text via [`Display`][`core::fmt::Display`].\n\nThis implementation never fails.")]
 38                fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
 39                    Ok(Cow::Owned(self.to_string()))
 40                }
 41            }
 42        )+
 43    }
 44}
 45
 46/// This provides an implementation compliant with xsd::bool.
 47impl FromXmlText for bool {
 48    /// Parse a boolean from XML text.
 49    ///
 50    /// The values `"1"` and `"true"` are considered true. The values `"0"`
 51    /// and `"false"` are considered `false`. Any other value is invalid and
 52    /// will return an error.
 53    fn from_xml_text(s: String) -> Result<Self, Error> {
 54        match s.as_str() {
 55            "1" => "true",
 56            "0" => "false",
 57            other => other,
 58        }
 59        .parse()
 60        .map_err(Error::text_parse_error)
 61    }
 62}
 63
 64/// This provides an implementation compliant with xsd::bool.
 65impl AsXmlText for bool {
 66    /// Convert a boolean to XML text.
 67    ///
 68    /// `true` is converted to `"true"` and `false` is converted to `"false"`.
 69    /// This implementation never fails.
 70    fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
 71        match self {
 72            true => Ok(Cow::Borrowed("true")),
 73            false => Ok(Cow::Borrowed("false")),
 74        }
 75    }
 76}
 77
 78convert_via_fromstr_and_display! {
 79    u8,
 80    u16,
 81    u32,
 82    u64,
 83    u128,
 84    usize,
 85    i8,
 86    i16,
 87    i32,
 88    i64,
 89    i128,
 90    isize,
 91    f32,
 92    f64,
 93    char,
 94    std::net::IpAddr,
 95    std::net::Ipv4Addr,
 96    std::net::Ipv6Addr,
 97    std::net::SocketAddr,
 98    std::net::SocketAddrV4,
 99    std::net::SocketAddrV6,
100    std::num::NonZeroU8,
101    std::num::NonZeroU16,
102    std::num::NonZeroU32,
103    std::num::NonZeroU64,
104    std::num::NonZeroU128,
105    std::num::NonZeroUsize,
106    std::num::NonZeroI8,
107    std::num::NonZeroI16,
108    std::num::NonZeroI32,
109    std::num::NonZeroI64,
110    std::num::NonZeroI128,
111    std::num::NonZeroIsize,
112
113    #[cfg(feature = "uuid")]
114    uuid::Uuid,
115
116    #[cfg(feature = "jid")]
117    jid::Jid,
118    #[cfg(feature = "jid")]
119    jid::FullJid,
120    #[cfg(feature = "jid")]
121    jid::BareJid,
122}
123
124/// Represent a way to encode/decode text data into a Rust type.
125///
126/// This trait can be used in scenarios where implementing [`FromXmlText`]
127/// and/or [`AsXmlText`] on a type is not feasible or sensible, such as the
128/// following:
129///
130/// 1. The type originates in a foreign crate, preventing the implementation
131///    of foreign traits.
132///
133/// 2. There is more than one way to convert a value to/from XML.
134///
135/// The codec to use for a text can be specified in the attributes understood
136/// by `FromXml` and `AsXml` derive macros. See the documentation of the
137/// [`FromXml`][`macro@crate::FromXml`] derive macro for details.
138pub trait TextCodec<T> {
139    /// Decode a string value into the type.
140    fn decode(&self, s: String) -> Result<T, Error>;
141
142    /// Encode the type as string value.
143    ///
144    /// If this returns `None`, the string value is not emitted at all.
145    fn encode<'x>(&self, value: &'x T) -> Result<Option<Cow<'x, str>>, Error>;
146
147    /// Apply a filter to this codec.
148    ///
149    /// Filters preprocess strings before they are handed to the codec for
150    /// parsing, allowing to, for example, make the codec ignore irrelevant
151    /// content by stripping it.
152    // NOTE: The bound on T is needed because any given type A may implement
153    // TextCodec for any number of types. If we pass T down to the `Filtered`
154    // struct, rustc can do type inferrence on which `TextCodec`
155    // implementation the `filtered` method is supposed to have been called
156    // on.
157    fn filtered<F: TextFilter>(self, filter: F) -> Filtered<F, Self, T>
158    where
159        // placing the bound here (instead of on the `TextCodec<T>` trait
160        // itself) preserves object-safety of TextCodec<T>.
161        Self: Sized,
162    {
163        Filtered {
164            filter,
165            codec: self,
166            bound: PhantomData,
167        }
168    }
169}
170
171/// Wrapper struct to apply a filter to a codec.
172///
173/// You can construct a value of this type via [`TextCodec::filtered`].
174// NOTE: see the note on TextCodec::filtered for why we bind `T` here, too.
175pub struct Filtered<F, C, T> {
176    filter: F,
177    codec: C,
178    bound: PhantomData<T>,
179}
180
181impl<T, F: TextFilter, C: TextCodec<T>> TextCodec<T> for Filtered<F, C, T> {
182    fn decode(&self, s: String) -> Result<T, Error> {
183        let s = self.filter.preprocess(s);
184        self.codec.decode(s)
185    }
186
187    fn encode<'x>(&self, value: &'x T) -> Result<Option<Cow<'x, str>>, Error> {
188        self.codec.encode(value)
189    }
190}
191
192/// Text codec which does no transform.
193pub struct Plain;
194
195impl TextCodec<String> for Plain {
196    fn decode(&self, s: String) -> Result<String, Error> {
197        Ok(s)
198    }
199
200    fn encode<'x>(&self, value: &'x String) -> Result<Option<Cow<'x, str>>, Error> {
201        Ok(Some(Cow::Borrowed(value.as_str())))
202    }
203}
204
205/// Text codec which returns `None` if the input to decode is the empty string, instead of
206/// attempting to decode it.
207///
208/// Particularly useful when parsing `Option<T>` on `#[xml(text)]`, which does not support
209/// `Option<_>` otherwise.
210pub struct EmptyAsNone;
211
212impl<T> TextCodec<Option<T>> for EmptyAsNone
213where
214    T: FromXmlText + AsXmlText,
215{
216    fn decode(&self, s: String) -> Result<Option<T>, Error> {
217        if s.is_empty() {
218            Ok(None)
219        } else {
220            Some(T::from_xml_text(s)).transpose()
221        }
222    }
223
224    fn encode<'x>(&self, value: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
225        Ok(value
226            .as_ref()
227            .map(AsXmlText::as_xml_text)
228            .transpose()?
229            .map(|v| (!v.is_empty()).then_some(v))
230            .flatten())
231    }
232}
233
234/// Text codec which returns None instead of the empty string.
235pub struct EmptyAsError;
236
237impl TextCodec<String> for EmptyAsError {
238    fn decode(&self, s: String) -> Result<String, Error> {
239        if s.is_empty() {
240            Err(Error::Other("Empty text node."))
241        } else {
242            Ok(s)
243        }
244    }
245
246    fn encode<'x>(&self, value: &'x String) -> Result<Option<Cow<'x, str>>, Error> {
247        if value.is_empty() {
248            Err(Error::Other("Empty text node."))
249        } else {
250            Ok(Some(Cow::Borrowed(value.as_str())))
251        }
252    }
253}
254
255/// Trait for preprocessing text data from XML.
256///
257/// This may be used by codecs to allow to customize some of their behaviour.
258pub trait TextFilter {
259    /// Process the incoming string and return the result of the processing.
260    fn preprocess(&self, s: String) -> String;
261}
262
263/// Text preprocessor which returns the input unchanged.
264pub struct NoFilter;
265
266impl TextFilter for NoFilter {
267    fn preprocess(&self, s: String) -> String {
268        s
269    }
270}
271
272/// Text preprocessor to remove all whitespace.
273pub struct StripWhitespace;
274
275impl TextFilter for StripWhitespace {
276    fn preprocess(&self, s: String) -> String {
277        let s: String = s
278            .chars()
279            .filter(|ch| *ch != ' ' && *ch != '\n' && *ch != '\t')
280            .collect();
281        s
282    }
283}
284
285/// Text codec transforming text to binary using standard base64.
286///
287/// The `Filter` type argument can be used to employ additional preprocessing
288/// of incoming text data. Most interestingly, passing [`StripWhitespace`]
289/// will make the implementation ignore any whitespace within the text.
290#[cfg(feature = "base64")]
291#[cfg_attr(docsrs, doc(cfg(feature = "base64")))]
292pub struct Base64;
293
294#[cfg(feature = "base64")]
295#[cfg_attr(docsrs, doc(cfg(feature = "base64")))]
296impl TextCodec<Vec<u8>> for Base64 {
297    fn decode(&self, s: String) -> Result<Vec<u8>, Error> {
298        StandardBase64Engine
299            .decode(s.as_bytes())
300            .map_err(Error::text_parse_error)
301    }
302
303    fn encode<'x>(&self, value: &'x Vec<u8>) -> Result<Option<Cow<'x, str>>, Error> {
304        Ok(Some(Cow::Owned(StandardBase64Engine.encode(&value))))
305    }
306}
307
308#[cfg(feature = "base64")]
309#[cfg_attr(docsrs, doc(cfg(feature = "base64")))]
310impl<'x> TextCodec<Cow<'x, [u8]>> for Base64 {
311    fn decode(&self, s: String) -> Result<Cow<'x, [u8]>, Error> {
312        StandardBase64Engine
313            .decode(s.as_bytes())
314            .map_err(Error::text_parse_error)
315            .map(Cow::Owned)
316    }
317
318    fn encode<'a>(&self, value: &'a Cow<'x, [u8]>) -> Result<Option<Cow<'a, str>>, Error> {
319        Ok(Some(Cow::Owned(StandardBase64Engine.encode(&value))))
320    }
321}
322
323#[cfg(feature = "base64")]
324#[cfg_attr(docsrs, doc(cfg(feature = "base64")))]
325impl<T> TextCodec<Option<T>> for Base64
326where
327    Base64: TextCodec<T>,
328{
329    fn decode(&self, s: String) -> Result<Option<T>, Error> {
330        if s.is_empty() {
331            return Ok(None);
332        }
333        Ok(Some(self.decode(s)?))
334    }
335
336    fn encode<'x>(&self, decoded: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
337        decoded
338            .as_ref()
339            .map(|x| self.encode(x))
340            .transpose()
341            .map(Option::flatten)
342    }
343}
344
345/// Text codec transforming text to binary using hexadecimal nibbles.
346///
347/// The length must be known at compile-time.
348pub struct FixedHex<const N: usize>;
349
350impl<const N: usize> TextCodec<[u8; N]> for FixedHex<N> {
351    fn decode(&self, s: String) -> Result<[u8; N], Error> {
352        if s.len() != 2 * N {
353            return Err(Error::Other("Invalid length"));
354        }
355
356        let mut bytes = [0u8; N];
357        for i in 0..N {
358            bytes[i] =
359                u8::from_str_radix(&s[2 * i..2 * i + 2], 16).map_err(Error::text_parse_error)?;
360        }
361
362        Ok(bytes)
363    }
364
365    fn encode<'x>(&self, value: &'x [u8; N]) -> Result<Option<Cow<'x, str>>, Error> {
366        let mut bytes = String::with_capacity(N * 2);
367        for byte in value {
368            bytes.extend(format!("{:02x}", byte).chars());
369        }
370        Ok(Some(Cow::Owned(bytes)))
371    }
372}
373
374impl<T, const N: usize> TextCodec<Option<T>> for FixedHex<N>
375where
376    FixedHex<N>: TextCodec<T>,
377{
378    fn decode(&self, s: String) -> Result<Option<T>, Error> {
379        if s.is_empty() {
380            return Ok(None);
381        }
382        Ok(Some(self.decode(s)?))
383    }
384
385    fn encode<'x>(&self, decoded: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
386        decoded
387            .as_ref()
388            .map(|x| self.encode(x))
389            .transpose()
390            .map(Option::flatten)
391    }
392}
393
394/// Text codec for colon-separated bytes of uppercase hexadecimal.
395pub struct ColonSeparatedHex;
396
397impl TextCodec<Vec<u8>> for ColonSeparatedHex {
398    fn decode(&self, s: String) -> Result<Vec<u8>, Error> {
399        assert_eq!((s.len() + 1) % 3, 0);
400        let mut bytes = Vec::with_capacity((s.len() + 1) / 3);
401        for i in 0..(1 + s.len()) / 3 {
402            let byte =
403                u8::from_str_radix(&s[3 * i..3 * i + 2], 16).map_err(Error::text_parse_error)?;
404            if 3 * i + 2 < s.len() {
405                assert_eq!(&s[3 * i + 2..3 * i + 3], ":");
406            }
407            bytes.push(byte);
408        }
409        Ok(bytes)
410    }
411
412    fn encode<'x>(&self, decoded: &'x Vec<u8>) -> Result<Option<Cow<'x, str>>, Error> {
413        // TODO: Super inefficient!
414        let mut bytes = Vec::with_capacity(decoded.len());
415        for byte in decoded {
416            bytes.push(format!("{:02X}", byte));
417        }
418        Ok(Some(Cow::Owned(bytes.join(":"))))
419    }
420}