text.rs

  1// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
  2//
  3// This Source Code Form is subject to the terms of the Mozilla Public
  4// License, v. 2.0. If a copy of the MPL was not distributed with this
  5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
  6
  7//! Module containing implementations for conversions to/from XML text.
  8
  9use core::marker::PhantomData;
 10
 11use alloc::{
 12    borrow::Cow,
 13    format,
 14    string::{String, ToString},
 15    vec::Vec,
 16};
 17
 18use crate::{error::Error, AsXmlText, FromXmlText};
 19
 20#[cfg(feature = "base64")]
 21use base64::engine::general_purpose::STANDARD as StandardBase64Engine;
 22
 23macro_rules! convert_via_fromstr_and_display {
 24    ($($(#[cfg $cfg:tt])?$t:ty,)+) => {
 25        $(
 26            $(
 27                #[cfg $cfg]
 28            )?
 29            impl FromXmlText for $t {
 30                #[doc = concat!("Parse [`", stringify!($t), "`] from XML text via [`FromStr`][`core::str::FromStr`].")]
 31                fn from_xml_text(s: String) -> Result<Self, Error> {
 32                    s.parse().map_err(Error::text_parse_error)
 33                }
 34            }
 35
 36            $(
 37                #[cfg $cfg]
 38            )?
 39            impl AsXmlText for $t {
 40                #[doc = concat!("Convert [`", stringify!($t), "`] to XML text via [`Display`][`core::fmt::Display`].\n\nThis implementation never fails.")]
 41                fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
 42                    Ok(Cow::Owned(self.to_string()))
 43                }
 44            }
 45        )+
 46    }
 47}
 48
 49/// This provides an implementation compliant with xsd::bool.
 50impl FromXmlText for bool {
 51    /// Parse a boolean from XML text.
 52    ///
 53    /// The values `"1"` and `"true"` are considered true. The values `"0"`
 54    /// and `"false"` are considered `false`. Any other value is invalid and
 55    /// will return an error.
 56    fn from_xml_text(s: String) -> Result<Self, Error> {
 57        match s.as_str() {
 58            "1" => "true",
 59            "0" => "false",
 60            other => other,
 61        }
 62        .parse()
 63        .map_err(Error::text_parse_error)
 64    }
 65}
 66
 67/// This provides an implementation compliant with xsd::bool.
 68impl AsXmlText for bool {
 69    /// Convert a boolean to XML text.
 70    ///
 71    /// `true` is converted to `"true"` and `false` is converted to `"false"`.
 72    /// This implementation never fails.
 73    fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
 74        match self {
 75            true => Ok(Cow::Borrowed("true")),
 76            false => Ok(Cow::Borrowed("false")),
 77        }
 78    }
 79}
 80
 81convert_via_fromstr_and_display! {
 82    u8,
 83    u16,
 84    u32,
 85    u64,
 86    u128,
 87    usize,
 88    i8,
 89    i16,
 90    i32,
 91    i64,
 92    i128,
 93    isize,
 94    f32,
 95    f64,
 96    char,
 97    core::net::IpAddr,
 98    core::net::Ipv4Addr,
 99    core::net::Ipv6Addr,
100    core::net::SocketAddr,
101    core::net::SocketAddrV4,
102    core::net::SocketAddrV6,
103    core::num::NonZeroU8,
104    core::num::NonZeroU16,
105    core::num::NonZeroU32,
106    core::num::NonZeroU64,
107    core::num::NonZeroU128,
108    core::num::NonZeroUsize,
109    core::num::NonZeroI8,
110    core::num::NonZeroI16,
111    core::num::NonZeroI32,
112    core::num::NonZeroI64,
113    core::num::NonZeroI128,
114    core::num::NonZeroIsize,
115
116    #[cfg(feature = "uuid")]
117    uuid::Uuid,
118
119    #[cfg(feature = "jid")]
120    jid::Jid,
121    #[cfg(feature = "jid")]
122    jid::FullJid,
123    #[cfg(feature = "jid")]
124    jid::BareJid,
125    #[cfg(feature = "jid")]
126    jid::NodePart,
127    #[cfg(feature = "jid")]
128    jid::DomainPart,
129    #[cfg(feature = "jid")]
130    jid::ResourcePart,
131}
132
133/// Represent a way to encode/decode text data into a Rust type.
134///
135/// This trait can be used in scenarios where implementing [`FromXmlText`]
136/// and/or [`AsXmlText`] on a type is not feasible or sensible, such as the
137/// following:
138///
139/// 1. The type originates in a foreign crate, preventing the implementation
140///    of foreign traits.
141///
142/// 2. There is more than one way to convert a value to/from XML.
143///
144/// The codec to use for a text can be specified in the attributes understood
145/// by `FromXml` and `AsXml` derive macros. See the documentation of the
146/// [`FromXml`][`macro@crate::FromXml`] derive macro for details.
147pub trait TextCodec<T> {
148    /// Decode a string value into the type.
149    fn decode(&self, s: String) -> Result<T, Error>;
150
151    /// Encode the type as string value.
152    ///
153    /// If this returns `None`, the string value is not emitted at all.
154    fn encode<'x>(&self, value: &'x T) -> Result<Option<Cow<'x, str>>, Error>;
155
156    /// Apply a filter to this codec.
157    ///
158    /// Filters preprocess strings before they are handed to the codec for
159    /// parsing, allowing to, for example, make the codec ignore irrelevant
160    /// content by stripping it.
161    // NOTE: The bound on T is needed because any given type A may implement
162    // TextCodec for any number of types. If we pass T down to the `Filtered`
163    // struct, rustc can do type inference on which `TextCodec`
164    // implementation the `filtered` method is supposed to have been called
165    // on.
166    fn filtered<F: TextFilter>(self, filter: F) -> Filtered<F, Self, T>
167    where
168        // placing the bound here (instead of on the `TextCodec<T>` trait
169        // itself) preserves object-safety of TextCodec<T>.
170        Self: Sized,
171    {
172        Filtered {
173            filter,
174            codec: self,
175            bound: PhantomData,
176        }
177    }
178}
179
180/// Wrapper struct to apply a filter to a codec.
181///
182/// You can construct a value of this type via [`TextCodec::filtered`].
183// NOTE: see the note on TextCodec::filtered for why we bind `T` here, too.
184pub struct Filtered<F, C, T> {
185    filter: F,
186    codec: C,
187    bound: PhantomData<T>,
188}
189
190impl<T, F: TextFilter, C: TextCodec<T>> TextCodec<T> for Filtered<F, C, T> {
191    fn decode(&self, s: String) -> Result<T, Error> {
192        let s = self.filter.preprocess(s);
193        self.codec.decode(s)
194    }
195
196    fn encode<'x>(&self, value: &'x T) -> Result<Option<Cow<'x, str>>, Error> {
197        self.codec.encode(value)
198    }
199}
200
201/// Text codec which does no transform.
202pub struct Plain;
203
204impl TextCodec<String> for Plain {
205    fn decode(&self, s: String) -> Result<String, Error> {
206        Ok(s)
207    }
208
209    fn encode<'x>(&self, value: &'x String) -> Result<Option<Cow<'x, str>>, Error> {
210        Ok(Some(Cow::Borrowed(value.as_str())))
211    }
212}
213
214/// Text codec which returns `None` if the input to decode is the empty string, instead of
215/// attempting to decode it.
216///
217/// Particularly useful when parsing `Option<T>` on `#[xml(text)]`, which does not support
218/// `Option<_>` otherwise.
219pub struct EmptyAsNone;
220
221impl<T> TextCodec<Option<T>> for EmptyAsNone
222where
223    T: FromXmlText + AsXmlText,
224{
225    fn decode(&self, s: String) -> Result<Option<T>, Error> {
226        if s.is_empty() {
227            Ok(None)
228        } else {
229            Some(T::from_xml_text(s)).transpose()
230        }
231    }
232
233    fn encode<'x>(&self, value: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
234        Ok(value
235            .as_ref()
236            .map(AsXmlText::as_xml_text)
237            .transpose()?
238            .map(|v| (!v.is_empty()).then_some(v))
239            .flatten())
240    }
241}
242
243/// Text codec which returns None instead of the empty string.
244pub struct EmptyAsError;
245
246impl TextCodec<String> for EmptyAsError {
247    fn decode(&self, s: String) -> Result<String, Error> {
248        if s.is_empty() {
249            Err(Error::Other("Empty text node."))
250        } else {
251            Ok(s)
252        }
253    }
254
255    fn encode<'x>(&self, value: &'x String) -> Result<Option<Cow<'x, str>>, Error> {
256        if value.is_empty() {
257            Err(Error::Other("Empty text node."))
258        } else {
259            Ok(Some(Cow::Borrowed(value.as_str())))
260        }
261    }
262}
263
264/// Trait for preprocessing text data from XML.
265///
266/// This may be used by codecs to allow to customize some of their behaviour.
267pub trait TextFilter {
268    /// Process the incoming string and return the result of the processing.
269    fn preprocess(&self, s: String) -> String;
270}
271
272/// Text preprocessor which returns the input unchanged.
273pub struct NoFilter;
274
275impl TextFilter for NoFilter {
276    fn preprocess(&self, s: String) -> String {
277        s
278    }
279}
280
281/// Text preprocessor to remove all whitespace.
282pub struct StripWhitespace;
283
284impl TextFilter for StripWhitespace {
285    fn preprocess(&self, s: String) -> String {
286        let s: String = s
287            .chars()
288            .filter(|ch| *ch != ' ' && *ch != '\n' && *ch != '\t')
289            .collect();
290        s
291    }
292}
293
294/// Text codec transforming text to binary using standard `base64`.
295///
296/// `Base64` uses the [`base64::engine::general_purpose::STANDARD`] engine.
297/// [`TextCodec`] is also automatically implemented for any value which
298/// implements [`base64::engine::Engine`], allowing you to choose different
299/// alphabets easily.
300#[cfg(feature = "base64")]
301pub struct Base64;
302
303#[cfg(feature = "base64")]
304impl TextCodec<Vec<u8>> for Base64 {
305    fn decode(&self, s: String) -> Result<Vec<u8>, Error> {
306        base64::engine::Engine::decode(&StandardBase64Engine, s.as_bytes())
307            .map_err(Error::text_parse_error)
308    }
309
310    fn encode<'x>(&self, value: &'x Vec<u8>) -> Result<Option<Cow<'x, str>>, Error> {
311        Ok(Some(Cow::Owned(base64::engine::Engine::encode(
312            &StandardBase64Engine,
313            &value,
314        ))))
315    }
316}
317
318#[cfg(feature = "base64")]
319impl<'x> TextCodec<Cow<'x, [u8]>> for Base64 {
320    fn decode(&self, s: String) -> Result<Cow<'x, [u8]>, Error> {
321        base64::engine::Engine::decode(&StandardBase64Engine, s.as_bytes())
322            .map_err(Error::text_parse_error)
323            .map(Cow::Owned)
324    }
325
326    fn encode<'a>(&self, value: &'a Cow<'x, [u8]>) -> Result<Option<Cow<'a, str>>, Error> {
327        Ok(Some(Cow::Owned(base64::engine::Engine::encode(
328            &StandardBase64Engine,
329            &value,
330        ))))
331    }
332}
333
334#[cfg(feature = "base64")]
335impl<T> TextCodec<Option<T>> for Base64
336where
337    Base64: TextCodec<T>,
338{
339    fn decode(&self, s: String) -> Result<Option<T>, Error> {
340        if s.is_empty() {
341            return Ok(None);
342        }
343        Ok(Some(self.decode(s)?))
344    }
345
346    fn encode<'x>(&self, decoded: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
347        decoded
348            .as_ref()
349            .map(|x| self.encode(x))
350            .transpose()
351            .map(Option::flatten)
352    }
353}
354
355#[cfg(feature = "base64")]
356impl<T: base64::engine::Engine> TextCodec<Vec<u8>> for T {
357    fn decode(&self, s: String) -> Result<Vec<u8>, Error> {
358        base64::engine::Engine::decode(self, s.as_bytes()).map_err(Error::text_parse_error)
359    }
360
361    fn encode<'x>(&self, value: &'x Vec<u8>) -> Result<Option<Cow<'x, str>>, Error> {
362        Ok(Some(Cow::Owned(base64::engine::Engine::encode(
363            self, &value,
364        ))))
365    }
366}
367
368#[cfg(feature = "base64")]
369impl<'a, T: base64::engine::Engine, U> TextCodec<Option<U>> for T
370where
371    T: TextCodec<U>,
372{
373    fn decode(&self, s: String) -> Result<Option<U>, Error> {
374        if s.is_empty() {
375            return Ok(None);
376        }
377        Ok(Some(TextCodec::decode(self, s)?))
378    }
379
380    fn encode<'x>(&self, decoded: &'x Option<U>) -> Result<Option<Cow<'x, str>>, Error> {
381        decoded
382            .as_ref()
383            .map(|x| TextCodec::encode(self, x))
384            .transpose()
385            .map(Option::flatten)
386    }
387}
388
389/// Text codec transforming text to binary using hexadecimal nibbles.
390///
391/// The length must be known at compile-time.
392pub struct FixedHex<const N: usize>;
393
394impl<const N: usize> TextCodec<[u8; N]> for FixedHex<N> {
395    fn decode(&self, s: String) -> Result<[u8; N], Error> {
396        if s.len() != 2 * N {
397            return Err(Error::Other("Invalid length"));
398        }
399
400        let mut bytes = [0u8; N];
401        for i in 0..N {
402            bytes[i] =
403                u8::from_str_radix(&s[2 * i..2 * i + 2], 16).map_err(Error::text_parse_error)?;
404        }
405
406        Ok(bytes)
407    }
408
409    fn encode<'x>(&self, value: &'x [u8; N]) -> Result<Option<Cow<'x, str>>, Error> {
410        let mut bytes = String::with_capacity(N * 2);
411        for byte in value {
412            bytes.extend(format!("{:02x}", byte).chars());
413        }
414        Ok(Some(Cow::Owned(bytes)))
415    }
416}
417
418impl<T, const N: usize> TextCodec<Option<T>> for FixedHex<N>
419where
420    FixedHex<N>: TextCodec<T>,
421{
422    fn decode(&self, s: String) -> Result<Option<T>, Error> {
423        if s.is_empty() {
424            return Ok(None);
425        }
426        Ok(Some(self.decode(s)?))
427    }
428
429    fn encode<'x>(&self, decoded: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
430        decoded
431            .as_ref()
432            .map(|x| self.encode(x))
433            .transpose()
434            .map(Option::flatten)
435    }
436}
437
438/// Text codec for colon-separated bytes of uppercase hexadecimal.
439pub struct ColonSeparatedHex;
440
441impl TextCodec<Vec<u8>> for ColonSeparatedHex {
442    fn decode(&self, s: String) -> Result<Vec<u8>, Error> {
443        assert_eq!((s.len() + 1) % 3, 0);
444        let mut bytes = Vec::with_capacity((s.len() + 1) / 3);
445        for i in 0..(1 + s.len()) / 3 {
446            let byte =
447                u8::from_str_radix(&s[3 * i..3 * i + 2], 16).map_err(Error::text_parse_error)?;
448            if 3 * i + 2 < s.len() {
449                assert_eq!(&s[3 * i + 2..3 * i + 3], ":");
450            }
451            bytes.push(byte);
452        }
453        Ok(bytes)
454    }
455
456    fn encode<'x>(&self, decoded: &'x Vec<u8>) -> Result<Option<Cow<'x, str>>, Error> {
457        // TODO: Super inefficient!
458        let mut bytes = Vec::with_capacity(decoded.len());
459        for byte in decoded {
460            bytes.push(format!("{:02X}", byte));
461        }
462        Ok(Some(Cow::Owned(bytes.join(":"))))
463    }
464}