text.rs

  1// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
  2//
  3// This Source Code Form is subject to the terms of the Mozilla Public
  4// License, v. 2.0. If a copy of the MPL was not distributed with this
  5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
  6
  7//! Module containing implementations for conversions to/from XML text.
  8
  9use core::marker::PhantomData;
 10
 11use std::borrow::Cow;
 12
 13use crate::{error::Error, AsXmlText, FromXmlText};
 14
 15#[cfg(feature = "base64")]
 16use base64::engine::{general_purpose::STANDARD as StandardBase64Engine, Engine as _};
 17
 18macro_rules! convert_via_fromstr_and_display {
 19    ($($(#[cfg $cfg:tt])?$t:ty,)+) => {
 20        $(
 21            $(
 22                #[cfg $cfg]
 23            )?
 24            impl FromXmlText for $t {
 25                #[doc = concat!("Parse [`", stringify!($t), "`] from XML text via [`FromStr`][`core::str::FromStr`].")]
 26                fn from_xml_text(s: String) -> Result<Self, Error> {
 27                    s.parse().map_err(Error::text_parse_error)
 28                }
 29            }
 30
 31            $(
 32                #[cfg $cfg]
 33            )?
 34            impl AsXmlText for $t {
 35                #[doc = concat!("Convert [`", stringify!($t), "`] to XML text via [`Display`][`core::fmt::Display`].\n\nThis implementation never fails.")]
 36                fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
 37                    Ok(Cow::Owned(self.to_string()))
 38                }
 39            }
 40        )+
 41    }
 42}
 43
 44/// This provides an implementation compliant with xsd::bool.
 45impl FromXmlText for bool {
 46    /// Parse a boolean from XML text.
 47    ///
 48    /// The values `"1"` and `"true"` are considered true. The values `"0"`
 49    /// and `"false"` are considered `false`. Any other value is invalid and
 50    /// will return an error.
 51    fn from_xml_text(s: String) -> Result<Self, Error> {
 52        match s.as_str() {
 53            "1" => "true",
 54            "0" => "false",
 55            other => other,
 56        }
 57        .parse()
 58        .map_err(Error::text_parse_error)
 59    }
 60}
 61
 62/// This provides an implementation compliant with xsd::bool.
 63impl AsXmlText for bool {
 64    /// Convert a boolean to XML text.
 65    ///
 66    /// `true` is converted to `"true"` and `false` is converted to `"false"`.
 67    /// This implementation never fails.
 68    fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
 69        match self {
 70            true => Ok(Cow::Borrowed("true")),
 71            false => Ok(Cow::Borrowed("false")),
 72        }
 73    }
 74}
 75
 76convert_via_fromstr_and_display! {
 77    u8,
 78    u16,
 79    u32,
 80    u64,
 81    u128,
 82    usize,
 83    i8,
 84    i16,
 85    i32,
 86    i64,
 87    i128,
 88    isize,
 89    f32,
 90    f64,
 91    char,
 92    core::net::IpAddr,
 93    core::net::Ipv4Addr,
 94    core::net::Ipv6Addr,
 95    core::net::SocketAddr,
 96    core::net::SocketAddrV4,
 97    core::net::SocketAddrV6,
 98    core::num::NonZeroU8,
 99    core::num::NonZeroU16,
100    core::num::NonZeroU32,
101    core::num::NonZeroU64,
102    core::num::NonZeroU128,
103    core::num::NonZeroUsize,
104    core::num::NonZeroI8,
105    core::num::NonZeroI16,
106    core::num::NonZeroI32,
107    core::num::NonZeroI64,
108    core::num::NonZeroI128,
109    core::num::NonZeroIsize,
110
111    #[cfg(feature = "uuid")]
112    uuid::Uuid,
113
114    #[cfg(feature = "jid")]
115    jid::Jid,
116    #[cfg(feature = "jid")]
117    jid::FullJid,
118    #[cfg(feature = "jid")]
119    jid::BareJid,
120    #[cfg(feature = "jid")]
121    jid::NodePart,
122    #[cfg(feature = "jid")]
123    jid::DomainPart,
124    #[cfg(feature = "jid")]
125    jid::ResourcePart,
126}
127
128/// Represent a way to encode/decode text data into a Rust type.
129///
130/// This trait can be used in scenarios where implementing [`FromXmlText`]
131/// and/or [`AsXmlText`] on a type is not feasible or sensible, such as the
132/// following:
133///
134/// 1. The type originates in a foreign crate, preventing the implementation
135///    of foreign traits.
136///
137/// 2. There is more than one way to convert a value to/from XML.
138///
139/// The codec to use for a text can be specified in the attributes understood
140/// by `FromXml` and `AsXml` derive macros. See the documentation of the
141/// [`FromXml`][`macro@crate::FromXml`] derive macro for details.
142pub trait TextCodec<T> {
143    /// Decode a string value into the type.
144    fn decode(&self, s: String) -> Result<T, Error>;
145
146    /// Encode the type as string value.
147    ///
148    /// If this returns `None`, the string value is not emitted at all.
149    fn encode<'x>(&self, value: &'x T) -> Result<Option<Cow<'x, str>>, Error>;
150
151    /// Apply a filter to this codec.
152    ///
153    /// Filters preprocess strings before they are handed to the codec for
154    /// parsing, allowing to, for example, make the codec ignore irrelevant
155    /// content by stripping it.
156    // NOTE: The bound on T is needed because any given type A may implement
157    // TextCodec for any number of types. If we pass T down to the `Filtered`
158    // struct, rustc can do type inference on which `TextCodec`
159    // implementation the `filtered` method is supposed to have been called
160    // on.
161    fn filtered<F: TextFilter>(self, filter: F) -> Filtered<F, Self, T>
162    where
163        // placing the bound here (instead of on the `TextCodec<T>` trait
164        // itself) preserves object-safety of TextCodec<T>.
165        Self: Sized,
166    {
167        Filtered {
168            filter,
169            codec: self,
170            bound: PhantomData,
171        }
172    }
173}
174
175/// Wrapper struct to apply a filter to a codec.
176///
177/// You can construct a value of this type via [`TextCodec::filtered`].
178// NOTE: see the note on TextCodec::filtered for why we bind `T` here, too.
179pub struct Filtered<F, C, T> {
180    filter: F,
181    codec: C,
182    bound: PhantomData<T>,
183}
184
185impl<T, F: TextFilter, C: TextCodec<T>> TextCodec<T> for Filtered<F, C, T> {
186    fn decode(&self, s: String) -> Result<T, Error> {
187        let s = self.filter.preprocess(s);
188        self.codec.decode(s)
189    }
190
191    fn encode<'x>(&self, value: &'x T) -> Result<Option<Cow<'x, str>>, Error> {
192        self.codec.encode(value)
193    }
194}
195
196/// Text codec which does no transform.
197pub struct Plain;
198
199impl TextCodec<String> for Plain {
200    fn decode(&self, s: String) -> Result<String, Error> {
201        Ok(s)
202    }
203
204    fn encode<'x>(&self, value: &'x String) -> Result<Option<Cow<'x, str>>, Error> {
205        Ok(Some(Cow::Borrowed(value.as_str())))
206    }
207}
208
209/// Text codec which returns `None` if the input to decode is the empty string, instead of
210/// attempting to decode it.
211///
212/// Particularly useful when parsing `Option<T>` on `#[xml(text)]`, which does not support
213/// `Option<_>` otherwise.
214pub struct EmptyAsNone;
215
216impl<T> TextCodec<Option<T>> for EmptyAsNone
217where
218    T: FromXmlText + AsXmlText,
219{
220    fn decode(&self, s: String) -> Result<Option<T>, Error> {
221        if s.is_empty() {
222            Ok(None)
223        } else {
224            Some(T::from_xml_text(s)).transpose()
225        }
226    }
227
228    fn encode<'x>(&self, value: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
229        Ok(value
230            .as_ref()
231            .map(AsXmlText::as_xml_text)
232            .transpose()?
233            .map(|v| (!v.is_empty()).then_some(v))
234            .flatten())
235    }
236}
237
238/// Text codec which returns None instead of the empty string.
239pub struct EmptyAsError;
240
241impl TextCodec<String> for EmptyAsError {
242    fn decode(&self, s: String) -> Result<String, Error> {
243        if s.is_empty() {
244            Err(Error::Other("Empty text node."))
245        } else {
246            Ok(s)
247        }
248    }
249
250    fn encode<'x>(&self, value: &'x String) -> Result<Option<Cow<'x, str>>, Error> {
251        if value.is_empty() {
252            Err(Error::Other("Empty text node."))
253        } else {
254            Ok(Some(Cow::Borrowed(value.as_str())))
255        }
256    }
257}
258
259/// Trait for preprocessing text data from XML.
260///
261/// This may be used by codecs to allow to customize some of their behaviour.
262pub trait TextFilter {
263    /// Process the incoming string and return the result of the processing.
264    fn preprocess(&self, s: String) -> String;
265}
266
267/// Text preprocessor which returns the input unchanged.
268pub struct NoFilter;
269
270impl TextFilter for NoFilter {
271    fn preprocess(&self, s: String) -> String {
272        s
273    }
274}
275
276/// Text preprocessor to remove all whitespace.
277pub struct StripWhitespace;
278
279impl TextFilter for StripWhitespace {
280    fn preprocess(&self, s: String) -> String {
281        let s: String = s
282            .chars()
283            .filter(|ch| *ch != ' ' && *ch != '\n' && *ch != '\t')
284            .collect();
285        s
286    }
287}
288
289/// Text codec transforming text to binary using standard base64.
290///
291/// The `Filter` type argument can be used to employ additional preprocessing
292/// of incoming text data. Most interestingly, passing [`StripWhitespace`]
293/// will make the implementation ignore any whitespace within the text.
294#[cfg(feature = "base64")]
295pub struct Base64;
296
297#[cfg(feature = "base64")]
298impl TextCodec<Vec<u8>> for Base64 {
299    fn decode(&self, s: String) -> Result<Vec<u8>, Error> {
300        StandardBase64Engine
301            .decode(s.as_bytes())
302            .map_err(Error::text_parse_error)
303    }
304
305    fn encode<'x>(&self, value: &'x Vec<u8>) -> Result<Option<Cow<'x, str>>, Error> {
306        Ok(Some(Cow::Owned(StandardBase64Engine.encode(&value))))
307    }
308}
309
310#[cfg(feature = "base64")]
311impl<'x> TextCodec<Cow<'x, [u8]>> for Base64 {
312    fn decode(&self, s: String) -> Result<Cow<'x, [u8]>, Error> {
313        StandardBase64Engine
314            .decode(s.as_bytes())
315            .map_err(Error::text_parse_error)
316            .map(Cow::Owned)
317    }
318
319    fn encode<'a>(&self, value: &'a Cow<'x, [u8]>) -> Result<Option<Cow<'a, str>>, Error> {
320        Ok(Some(Cow::Owned(StandardBase64Engine.encode(&value))))
321    }
322}
323
324#[cfg(feature = "base64")]
325impl<T> TextCodec<Option<T>> for Base64
326where
327    Base64: TextCodec<T>,
328{
329    fn decode(&self, s: String) -> Result<Option<T>, Error> {
330        if s.is_empty() {
331            return Ok(None);
332        }
333        Ok(Some(self.decode(s)?))
334    }
335
336    fn encode<'x>(&self, decoded: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
337        decoded
338            .as_ref()
339            .map(|x| self.encode(x))
340            .transpose()
341            .map(Option::flatten)
342    }
343}
344
345/// Text codec transforming text to binary using hexadecimal nibbles.
346///
347/// The length must be known at compile-time.
348pub struct FixedHex<const N: usize>;
349
350impl<const N: usize> TextCodec<[u8; N]> for FixedHex<N> {
351    fn decode(&self, s: String) -> Result<[u8; N], Error> {
352        if s.len() != 2 * N {
353            return Err(Error::Other("Invalid length"));
354        }
355
356        let mut bytes = [0u8; N];
357        for i in 0..N {
358            bytes[i] =
359                u8::from_str_radix(&s[2 * i..2 * i + 2], 16).map_err(Error::text_parse_error)?;
360        }
361
362        Ok(bytes)
363    }
364
365    fn encode<'x>(&self, value: &'x [u8; N]) -> Result<Option<Cow<'x, str>>, Error> {
366        let mut bytes = String::with_capacity(N * 2);
367        for byte in value {
368            bytes.extend(format!("{:02x}", byte).chars());
369        }
370        Ok(Some(Cow::Owned(bytes)))
371    }
372}
373
374impl<T, const N: usize> TextCodec<Option<T>> for FixedHex<N>
375where
376    FixedHex<N>: TextCodec<T>,
377{
378    fn decode(&self, s: String) -> Result<Option<T>, Error> {
379        if s.is_empty() {
380            return Ok(None);
381        }
382        Ok(Some(self.decode(s)?))
383    }
384
385    fn encode<'x>(&self, decoded: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
386        decoded
387            .as_ref()
388            .map(|x| self.encode(x))
389            .transpose()
390            .map(Option::flatten)
391    }
392}
393
394/// Text codec for colon-separated bytes of uppercase hexadecimal.
395pub struct ColonSeparatedHex;
396
397impl TextCodec<Vec<u8>> for ColonSeparatedHex {
398    fn decode(&self, s: String) -> Result<Vec<u8>, Error> {
399        assert_eq!((s.len() + 1) % 3, 0);
400        let mut bytes = Vec::with_capacity((s.len() + 1) / 3);
401        for i in 0..(1 + s.len()) / 3 {
402            let byte =
403                u8::from_str_radix(&s[3 * i..3 * i + 2], 16).map_err(Error::text_parse_error)?;
404            if 3 * i + 2 < s.len() {
405                assert_eq!(&s[3 * i + 2..3 * i + 3], ":");
406            }
407            bytes.push(byte);
408        }
409        Ok(bytes)
410    }
411
412    fn encode<'x>(&self, decoded: &'x Vec<u8>) -> Result<Option<Cow<'x, str>>, Error> {
413        // TODO: Super inefficient!
414        let mut bytes = Vec::with_capacity(decoded.len());
415        for byte in decoded {
416            bytes.push(format!("{:02X}", byte));
417        }
418        Ok(Some(Cow::Owned(bytes.join(":"))))
419    }
420}