1// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7//! # Convert data to and from XML text
8//!
9//! This module provides traits and types related to conversion of XML text
10//! data to and from Rust types, as well as the [`AsXmlText`],
11//! [`AsOptionalXmlText`][`crate::AsOptionalXmlText`] and [`FromXmlText`]
12//! implementations for foreign and standard-library types.
13//!
14//! ## Support for types from third-party crates
15//!
16//! Beyond the standard library types, the following additional types are
17//! supported:
18//!
19//! | Feature gate | Types |
20//! | --- | --- |
21//! | `jid` | `jid::Jid`, `jid::BareJid`, `jid::FullJid` |
22//! | `serde_json` | `serde_json::Value` |
23//! | `uuid` | `uuid::Uuid` |
24//!
25//! ### Adding support for more types
26//!
27//! Due to the orphan rule, it is not possible for applications to implement
28//! [`AsXmlText`], [`AsOptionalXmlText`][`crate::AsOptionalXmlText`] or
29//! [`FromXmlText`] on types which originate from third-party crates. Because
30//! of that, we are **extremely liberal** at accepting merge requests for
31//! implementations of these traits for types from third-party crates.
32//!
33//! The only requirement is that the implementation is gated behind a feature
34//! flag which is disabled-by-default.
35//!
36//! ### Workaround for unsupported types
37//!
38//! If making a merge request against `xso` and waiting for a release is not
39//! an option, you can use newtype wrappers in almost all cases, for example:
40//!
41#![cfg_attr(
42 not(all(feature = "std", feature = "macros")),
43 doc = "Because the std or macros feature was not enabled at doc build time, the example cannot be tested.\n\n```ignore\n"
44)]
45#![cfg_attr(all(feature = "std", feature = "macros"), doc = "\n```\n")]
46//! # use xso::{AsXml, FromXml, AsXmlText, FromXmlText, error::Error};
47//! # use std::borrow::Cow;
48//! use std::process::ExitCode;
49//!
50//! struct MyExitCode(ExitCode);
51//!
52//! impl AsXmlText for MyExitCode {
53//! fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
54//! match self.0 {
55//! ExitCode::FAILURE => Ok(Cow::Borrowed("failure")),
56//! ExitCode::SUCCESS => Ok(Cow::Borrowed("success")),
57//! _ => Err(Error::Other("unknown exit code")),
58//! }
59//! }
60//! }
61//!
62//! impl FromXmlText for MyExitCode {
63//! fn from_xml_text(s: String) -> Result<Self, Error> {
64//! match s.as_str() {
65//! "failure" => Ok(Self(ExitCode::FAILURE)),
66//! "success" => Ok(Self(ExitCode::SUCCESS)),
67//! _ => Err(Error::Other("unknown exit code")),
68//! }
69//! }
70//! }
71//!
72//! #[derive(AsXml, FromXml)]
73//! #[xml(namespace = "urn:example", name = "process-result")]
74//! struct ProcessResult {
75//! #[xml(attribute)]
76//! code: MyExitCode,
77//! #[xml(text)]
78//! stdout: String,
79//! }
80//! ```
81//!
82//! Of course, such an approach reduces the usability of your struct (and
83//! comes with issues once references are needed), so making a merge request
84//! against `xso` is generally preferable.
85
86use core::marker::PhantomData;
87
88use alloc::{
89 borrow::{Cow, ToOwned},
90 boxed::Box,
91 format,
92 string::{String, ToString},
93 vec::Vec,
94};
95
96use crate::{error::Error, AsOptionalXmlText, AsXmlText, FromXmlText};
97
98#[cfg(feature = "base64")]
99use base64::engine::general_purpose::STANDARD as StandardBase64Engine;
100
101/// # Generate `AsXmlText` and `FromXmlText` implementations
102///
103/// This macro generates an `AsXmlText` implementation which uses
104/// [`Display`][`core::fmt::Display`] and an `FromXmlText` which uses
105/// [`FromStr`][`core::str::FromStr`] for the types it is called on.
106///
107/// ## Syntax
108///
109/// The macro accepts a comma-separated list of types. Optionally, each type
110/// can be preceded by a `#[cfg(..)]` attribute to make the implementations
111/// conditional on a feature.
112///
113/// ## Example
114///
115#[cfg_attr(
116 not(feature = "macros"),
117 doc = "Because the macros feature was not enabled at doc build time, the example cannot be tested.\n\n```ignore\n"
118)]
119#[cfg_attr(feature = "macros", doc = "\n```\n")]
120/// # use xso::convert_via_fromstr_and_display;
121/// # use core::fmt::{self, Display};
122/// # use core::str::FromStr;
123/// struct Foo;
124///
125/// impl FromStr for Foo {
126/// # type Err = core::convert::Infallible;
127/// #
128/// # fn from_str(s: &str) -> Result<Self, Self::Err> { todo!() }
129/// /* ... */
130/// }
131///
132/// impl Display for Foo {
133/// # fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { todo!() }
134/// /* ... */
135/// }
136///
137/// convert_via_fromstr_and_display!(
138/// Foo,
139/// );
140/// ```
141#[macro_export]
142macro_rules! convert_via_fromstr_and_display {
143 ($($(#[cfg $cfg:tt])?$t:ty),+ $(,)?) => {
144 $(
145 $(
146 #[cfg $cfg]
147 )?
148 impl $crate::FromXmlText for $t {
149 #[doc = concat!("Parse [`", stringify!($t), "`] from XML text via [`FromStr`][`core::str::FromStr`].")]
150 fn from_xml_text(s: String) -> Result<Self, $crate::error::Error> {
151 s.parse().map_err($crate::error::Error::text_parse_error)
152 }
153 }
154
155 $(
156 #[cfg $cfg]
157 )?
158 impl $crate::AsXmlText for $t {
159 #[doc = concat!("Convert [`", stringify!($t), "`] to XML text via [`Display`][`core::fmt::Display`].\n\nThis implementation never fails.")]
160 fn as_xml_text(&self) -> Result<$crate::exports::alloc::borrow::Cow<'_, str>, $crate::error::Error> {
161 Ok($crate::exports::alloc::borrow::Cow::Owned(self.to_string()))
162 }
163 }
164 )+
165 }
166}
167
168/// This provides an implementation compliant with xsd::bool.
169impl FromXmlText for bool {
170 /// Parse a boolean from XML text.
171 ///
172 /// The values `"1"` and `"true"` are considered true. The values `"0"`
173 /// and `"false"` are considered `false`. Any other value is invalid and
174 /// will return an error.
175 fn from_xml_text(s: String) -> Result<Self, Error> {
176 match s.as_str() {
177 "1" => "true",
178 "0" => "false",
179 other => other,
180 }
181 .parse()
182 .map_err(Error::text_parse_error)
183 }
184}
185
186/// This provides an implementation compliant with xsd::bool.
187impl AsXmlText for bool {
188 /// Convert a boolean to XML text.
189 ///
190 /// `true` is converted to `"true"` and `false` is converted to `"false"`.
191 /// This implementation never fails.
192 fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
193 match self {
194 true => Ok(Cow::Borrowed("true")),
195 false => Ok(Cow::Borrowed("false")),
196 }
197 }
198}
199
200convert_via_fromstr_and_display! {
201 u8,
202 u16,
203 u32,
204 u64,
205 u128,
206 usize,
207 i8,
208 i16,
209 i32,
210 i64,
211 i128,
212 isize,
213 f32,
214 f64,
215 char,
216 core::net::IpAddr,
217 core::net::Ipv4Addr,
218 core::net::Ipv6Addr,
219 core::net::SocketAddr,
220 core::net::SocketAddrV4,
221 core::net::SocketAddrV6,
222 core::num::NonZeroU8,
223 core::num::NonZeroU16,
224 core::num::NonZeroU32,
225 core::num::NonZeroU64,
226 core::num::NonZeroU128,
227 core::num::NonZeroUsize,
228 core::num::NonZeroI8,
229 core::num::NonZeroI16,
230 core::num::NonZeroI32,
231 core::num::NonZeroI64,
232 core::num::NonZeroI128,
233 core::num::NonZeroIsize,
234
235 #[cfg(feature = "uuid")]
236 uuid::Uuid,
237
238 #[cfg(feature = "jid")]
239 jid::Jid,
240 #[cfg(feature = "jid")]
241 jid::FullJid,
242 #[cfg(feature = "jid")]
243 jid::BareJid,
244 #[cfg(feature = "jid")]
245 jid::NodePart,
246 #[cfg(feature = "jid")]
247 jid::DomainPart,
248 #[cfg(feature = "jid")]
249 jid::ResourcePart,
250
251 #[cfg(feature = "serde_json")]
252 serde_json::Value,
253}
254
255impl FromXmlText for String {
256 /// Return the string unchanged.
257 fn from_xml_text(data: String) -> Result<Self, Error> {
258 Ok(data)
259 }
260}
261
262impl<T: FromXmlText, B: ToOwned<Owned = T>> FromXmlText for Cow<'_, B> {
263 /// Return a [`Cow::Owned`] containing the parsed value.
264 fn from_xml_text(data: String) -> Result<Self, Error> {
265 Ok(Cow::Owned(T::from_xml_text(data)?))
266 }
267}
268
269impl<T: FromXmlText> FromXmlText for Option<T> {
270 /// Return a [`Some`] containing the parsed value.
271 fn from_xml_text(data: String) -> Result<Self, Error> {
272 Ok(Some(T::from_xml_text(data)?))
273 }
274}
275
276impl<T: FromXmlText> FromXmlText for Box<T> {
277 /// Return a [`Box`] containing the parsed value.
278 fn from_xml_text(data: String) -> Result<Self, Error> {
279 Ok(Box::new(T::from_xml_text(data)?))
280 }
281}
282
283impl AsXmlText for String {
284 /// Return the borrowed string contents.
285 fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
286 Ok(Cow::Borrowed(self))
287 }
288}
289
290impl AsXmlText for str {
291 /// Return the borrowed string contents.
292 fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
293 Ok(Cow::Borrowed(self))
294 }
295}
296
297impl AsXmlText for &str {
298 /// Return the borrowed string contents.
299 fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
300 Ok(Cow::Borrowed(self))
301 }
302}
303
304impl<T: AsXmlText> AsXmlText for Box<T> {
305 /// Return the borrowed [`Box`] contents.
306 fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
307 T::as_xml_text(self)
308 }
309}
310
311impl<B: AsXmlText + ToOwned> AsXmlText for Cow<'_, B> {
312 /// Return the borrowed [`Cow`] contents.
313 fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
314 B::as_xml_text(self)
315 }
316}
317
318impl<T: AsXmlText> AsXmlText for &T {
319 /// Delegate to the `AsXmlText` implementation on `T`.
320 fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
321 T::as_xml_text(*self)
322 }
323}
324
325impl<T: AsXmlText> AsOptionalXmlText for T {
326 fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, Error> {
327 <Self as AsXmlText>::as_optional_xml_text(self)
328 }
329}
330
331impl<T: AsXmlText> AsOptionalXmlText for Option<T> {
332 fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, Error> {
333 self.as_ref()
334 .map(T::as_optional_xml_text)
335 .transpose()
336 .map(Option::flatten)
337 }
338}
339
340/// Represent a way to encode/decode text data into a Rust type.
341///
342/// This trait can be used in scenarios where implementing [`FromXmlText`]
343/// and/or [`AsXmlText`] on a type is not feasible or sensible, such as the
344/// following:
345///
346/// 1. The type originates in a foreign crate, preventing the implementation
347/// of foreign traits.
348///
349/// 2. There is more than one way to convert a value to/from XML.
350///
351/// The codec to use for a text can be specified in the attributes understood
352/// by `FromXml` and `AsXml` derive macros. See the documentation of the
353/// [`FromXml`][`macro@crate::FromXml`] derive macro for details.
354#[diagnostic::on_unimplemented(
355 message = "`{Self}` cannot be used as XML text codec for values of type `{T}`."
356)]
357pub trait TextCodec<T> {
358 /// Decode a string value into the type.
359 fn decode(&self, s: String) -> Result<T, Error>;
360
361 /// Encode the type as string value.
362 ///
363 /// If this returns `None`, the string value is not emitted at all.
364 fn encode<'x>(&self, value: &'x T) -> Result<Option<Cow<'x, str>>, Error>;
365
366 /// Apply a filter to this codec.
367 ///
368 /// Filters preprocess strings before they are handed to the codec for
369 /// parsing, allowing to, for example, make the codec ignore irrelevant
370 /// content by stripping it.
371 // NOTE: The bound on T is needed because any given type A may implement
372 // TextCodec for any number of types. If we pass T down to the `Filtered`
373 // struct, rustc can do type inference on which `TextCodec`
374 // implementation the `filtered` method is supposed to have been called
375 // on.
376 fn filtered<F: TextFilter>(self, filter: F) -> Filtered<F, Self, T>
377 where
378 // placing the bound here (instead of on the `TextCodec<T>` trait
379 // itself) preserves object-safety of TextCodec<T>.
380 Self: Sized,
381 {
382 Filtered {
383 filter,
384 codec: self,
385 bound: PhantomData,
386 }
387 }
388}
389
390/// Wrapper struct to apply a filter to a codec.
391///
392/// You can construct a value of this type via [`TextCodec::filtered`].
393// NOTE: see the note on TextCodec::filtered for why we bind `T` here, too.
394pub struct Filtered<F, C, T> {
395 filter: F,
396 codec: C,
397 bound: PhantomData<T>,
398}
399
400impl<T, F: TextFilter, C: TextCodec<T>> TextCodec<T> for Filtered<F, C, T> {
401 fn decode(&self, s: String) -> Result<T, Error> {
402 let s = self.filter.preprocess(s);
403 self.codec.decode(s)
404 }
405
406 fn encode<'x>(&self, value: &'x T) -> Result<Option<Cow<'x, str>>, Error> {
407 self.codec.encode(value)
408 }
409}
410
411/// Text codec which does no transform.
412pub struct Plain;
413
414impl TextCodec<String> for Plain {
415 fn decode(&self, s: String) -> Result<String, Error> {
416 Ok(s)
417 }
418
419 fn encode<'x>(&self, value: &'x String) -> Result<Option<Cow<'x, str>>, Error> {
420 Ok(Some(Cow::Borrowed(value.as_str())))
421 }
422}
423
424/// Text codec which returns `None` if the input to decode is the empty string, instead of
425/// attempting to decode it.
426///
427/// Particularly useful when parsing `Option<T>` on `#[xml(text)]`, which does not support
428/// `Option<_>` otherwise.
429pub struct EmptyAsNone;
430
431impl<T> TextCodec<Option<T>> for EmptyAsNone
432where
433 T: FromXmlText + AsXmlText,
434{
435 fn decode(&self, s: String) -> Result<Option<T>, Error> {
436 if s.is_empty() {
437 Ok(None)
438 } else {
439 Some(T::from_xml_text(s)).transpose()
440 }
441 }
442
443 fn encode<'x>(&self, value: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
444 Ok(value
445 .as_ref()
446 .map(AsXmlText::as_xml_text)
447 .transpose()?
448 .and_then(|v| (!v.is_empty()).then_some(v)))
449 }
450}
451
452/// Text codec which returns None instead of the empty string.
453pub struct EmptyAsError;
454
455impl TextCodec<String> for EmptyAsError {
456 fn decode(&self, s: String) -> Result<String, Error> {
457 if s.is_empty() {
458 Err(Error::Other("Empty text node."))
459 } else {
460 Ok(s)
461 }
462 }
463
464 fn encode<'x>(&self, value: &'x String) -> Result<Option<Cow<'x, str>>, Error> {
465 if value.is_empty() {
466 Err(Error::Other("Empty text node."))
467 } else {
468 Ok(Some(Cow::Borrowed(value.as_str())))
469 }
470 }
471}
472
473/// Trait for preprocessing text data from XML.
474///
475/// This may be used by codecs to allow to customize some of their behaviour.
476pub trait TextFilter {
477 /// Process the incoming string and return the result of the processing.
478 fn preprocess(&self, s: String) -> String;
479}
480
481/// Text preprocessor which returns the input unchanged.
482pub struct NoFilter;
483
484impl TextFilter for NoFilter {
485 fn preprocess(&self, s: String) -> String {
486 s
487 }
488}
489
490/// Text preprocessor to remove all whitespace.
491pub struct StripWhitespace;
492
493impl TextFilter for StripWhitespace {
494 fn preprocess(&self, s: String) -> String {
495 let s: String = s
496 .chars()
497 .filter(|ch| *ch != ' ' && *ch != '\n' && *ch != '\t')
498 .collect();
499 s
500 }
501}
502
503/// Text codec transforming text to binary using standard `base64`.
504///
505/// `Base64` uses the [`base64::engine::general_purpose::STANDARD`] engine.
506/// [`TextCodec`] is also automatically implemented for any value which
507/// implements [`base64::engine::Engine`], allowing you to choose different
508/// alphabets easily.
509#[cfg(feature = "base64")]
510pub struct Base64;
511
512#[cfg(feature = "base64")]
513impl TextCodec<Vec<u8>> for Base64 {
514 fn decode(&self, s: String) -> Result<Vec<u8>, Error> {
515 base64::engine::Engine::decode(&StandardBase64Engine, s.as_bytes())
516 .map_err(Error::text_parse_error)
517 }
518
519 fn encode<'x>(&self, value: &'x Vec<u8>) -> Result<Option<Cow<'x, str>>, Error> {
520 Ok(Some(Cow::Owned(base64::engine::Engine::encode(
521 &StandardBase64Engine,
522 value,
523 ))))
524 }
525}
526
527#[cfg(feature = "base64")]
528impl<'x> TextCodec<Cow<'x, [u8]>> for Base64 {
529 fn decode(&self, s: String) -> Result<Cow<'x, [u8]>, Error> {
530 base64::engine::Engine::decode(&StandardBase64Engine, s.as_bytes())
531 .map_err(Error::text_parse_error)
532 .map(Cow::Owned)
533 }
534
535 fn encode<'a>(&self, value: &'a Cow<'x, [u8]>) -> Result<Option<Cow<'a, str>>, Error> {
536 Ok(Some(Cow::Owned(base64::engine::Engine::encode(
537 &StandardBase64Engine,
538 value,
539 ))))
540 }
541}
542
543#[cfg(feature = "base64")]
544impl<T> TextCodec<Option<T>> for Base64
545where
546 Base64: TextCodec<T>,
547{
548 fn decode(&self, s: String) -> Result<Option<T>, Error> {
549 if s.is_empty() {
550 return Ok(None);
551 }
552 Ok(Some(self.decode(s)?))
553 }
554
555 fn encode<'x>(&self, decoded: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
556 decoded
557 .as_ref()
558 .map(|x| self.encode(x))
559 .transpose()
560 .map(Option::flatten)
561 }
562}
563
564#[cfg(feature = "base64")]
565impl<T: base64::engine::Engine> TextCodec<Vec<u8>> for T {
566 fn decode(&self, s: String) -> Result<Vec<u8>, Error> {
567 base64::engine::Engine::decode(self, s.as_bytes()).map_err(Error::text_parse_error)
568 }
569
570 fn encode<'x>(&self, value: &'x Vec<u8>) -> Result<Option<Cow<'x, str>>, Error> {
571 Ok(Some(Cow::Owned(base64::engine::Engine::encode(
572 self, value,
573 ))))
574 }
575}
576
577#[cfg(feature = "base64")]
578impl<T: base64::engine::Engine, U> TextCodec<Option<U>> for T
579where
580 T: TextCodec<U>,
581{
582 fn decode(&self, s: String) -> Result<Option<U>, Error> {
583 if s.is_empty() {
584 return Ok(None);
585 }
586 Ok(Some(TextCodec::decode(self, s)?))
587 }
588
589 fn encode<'x>(&self, decoded: &'x Option<U>) -> Result<Option<Cow<'x, str>>, Error> {
590 decoded
591 .as_ref()
592 .map(|x| TextCodec::encode(self, x))
593 .transpose()
594 .map(Option::flatten)
595 }
596}
597
598/// Text codec transforming text to binary using hexadecimal nibbles.
599///
600/// The length must be known at compile-time.
601pub struct FixedHex<const N: usize>;
602
603impl<const N: usize> TextCodec<[u8; N]> for FixedHex<N> {
604 fn decode(&self, s: String) -> Result<[u8; N], Error> {
605 if s.len() != 2 * N {
606 return Err(Error::Other("Invalid length"));
607 }
608
609 let mut bytes = [0u8; N];
610 for i in 0..N {
611 bytes[i] =
612 u8::from_str_radix(&s[2 * i..2 * i + 2], 16).map_err(Error::text_parse_error)?;
613 }
614
615 Ok(bytes)
616 }
617
618 fn encode<'x>(&self, value: &'x [u8; N]) -> Result<Option<Cow<'x, str>>, Error> {
619 let mut bytes = String::with_capacity(N * 2);
620 for byte in value {
621 bytes.extend(format!("{:02x}", byte).chars());
622 }
623 Ok(Some(Cow::Owned(bytes)))
624 }
625}
626
627impl<T, const N: usize> TextCodec<Option<T>> for FixedHex<N>
628where
629 FixedHex<N>: TextCodec<T>,
630{
631 fn decode(&self, s: String) -> Result<Option<T>, Error> {
632 if s.is_empty() {
633 return Ok(None);
634 }
635 Ok(Some(self.decode(s)?))
636 }
637
638 fn encode<'x>(&self, decoded: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
639 decoded
640 .as_ref()
641 .map(|x| self.encode(x))
642 .transpose()
643 .map(Option::flatten)
644 }
645}
646
647/// Text codec for colon-separated bytes of uppercase hexadecimal.
648pub struct ColonSeparatedHex;
649
650impl TextCodec<Vec<u8>> for ColonSeparatedHex {
651 fn decode(&self, s: String) -> Result<Vec<u8>, Error> {
652 assert_eq!((s.len() + 1) % 3, 0);
653 let mut bytes = Vec::with_capacity((s.len() + 1) / 3);
654 for i in 0..(1 + s.len()) / 3 {
655 let byte =
656 u8::from_str_radix(&s[3 * i..3 * i + 2], 16).map_err(Error::text_parse_error)?;
657 if 3 * i + 2 < s.len() {
658 assert_eq!(&s[3 * i + 2..3 * i + 3], ":");
659 }
660 bytes.push(byte);
661 }
662 Ok(bytes)
663 }
664
665 fn encode<'x>(&self, decoded: &'x Vec<u8>) -> Result<Option<Cow<'x, str>>, Error> {
666 // TODO: Super inefficient!
667 let mut bytes = Vec::with_capacity(decoded.len());
668 for byte in decoded {
669 bytes.push(format!("{:02X}", byte));
670 }
671 Ok(Some(Cow::Owned(bytes.join(":"))))
672 }
673}