1// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7//! # Convert data to and from XML text
8//!
9//! This module provides traits and types related to conversion of XML text
10//! data to and from Rust types, as well as the [`AsXmlText`],
11//! [`AsOptionalXmlText`][`crate::AsOptionalXmlText`] and [`FromXmlText`]
12//! implementations for foreign and standard-library types.
13//!
14//! ## Support for types from third-party crates
15//!
16//! Beyond the standard library types, the following additional types are
17//! supported:
18//!
19//! | Feature gate | Types |
20//! | --- | --- |
21//! | `jid` | `jid::Jid`, `jid::BareJid`, `jid::FullJid` |
22//! | `serde_json` | `serde_json::Value` |
23//! | `uuid` | `uuid::Uuid` |
24//!
25//! ### Adding support for more types
26//!
27//! Due to the orphan rule, it is not possible for applications to implement
28//! [`AsXmlText`], [`AsOptionalXmlText`][`crate::AsOptionalXmlText`] or
29//! [`FromXmlText`] on types which originate from third-party crates. Because
30//! of that, we are **extremely liberal** at accepting merge requests for
31//! implementations of these traits for types from third-party crates.
32//!
33//! The only requirement is that the implementation is gated behind a feature
34//! flag which is disabled-by-default.
35//!
36//! ### Workaround for unsupported types
37//!
38//! If making a merge request against `xso` and waiting for a release is not
39//! an option, you can use newtype wrappers in almost all cases, for example:
40//!
41#![cfg_attr(
42 not(all(feature = "std", feature = "macros")),
43 doc = "Because the std or macros feature was not enabled at doc build time, the example cannot be tested.\n\n```ignore\n"
44)]
45#![cfg_attr(all(feature = "std", feature = "macros"), doc = "\n```\n")]
46//! # use xso::{AsXml, FromXml, AsXmlText, FromXmlText, error::Error};
47//! # use std::borrow::Cow;
48//! use std::process::ExitCode;
49//!
50//! struct MyExitCode(ExitCode);
51//!
52//! impl AsXmlText for MyExitCode {
53//! fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
54//! match self.0 {
55//! ExitCode::FAILURE => Ok(Cow::Borrowed("failure")),
56//! ExitCode::SUCCESS => Ok(Cow::Borrowed("success")),
57//! _ => Err(Error::Other("unknown exit code")),
58//! }
59//! }
60//! }
61//!
62//! impl FromXmlText for MyExitCode {
63//! fn from_xml_text(s: String) -> Result<Self, Error> {
64//! match s.as_str() {
65//! "failure" => Ok(Self(ExitCode::FAILURE)),
66//! "success" => Ok(Self(ExitCode::SUCCESS)),
67//! _ => Err(Error::Other("unknown exit code")),
68//! }
69//! }
70//! }
71//!
72//! #[derive(AsXml, FromXml)]
73//! #[xml(namespace = "urn:example", name = "process-result")]
74//! struct ProcessResult {
75//! #[xml(attribute)]
76//! code: MyExitCode,
77//! #[xml(text)]
78//! stdout: String,
79//! }
80//! ```
81//!
82//! Of course, such an approach reduces the usability of your struct (and
83//! comes with issues once references are needed), so making a merge request
84//! against `xso` is generally preferable.
85
86use core::marker::PhantomData;
87
88use alloc::{
89 borrow::Cow,
90 format,
91 string::{String, ToString},
92 vec::Vec,
93};
94
95use crate::{error::Error, AsXmlText, FromXmlText};
96
97#[cfg(feature = "base64")]
98use base64::engine::general_purpose::STANDARD as StandardBase64Engine;
99
100macro_rules! convert_via_fromstr_and_display {
101 ($($(#[cfg $cfg:tt])?$t:ty,)+) => {
102 $(
103 $(
104 #[cfg $cfg]
105 )?
106 impl FromXmlText for $t {
107 #[doc = concat!("Parse [`", stringify!($t), "`] from XML text via [`FromStr`][`core::str::FromStr`].")]
108 fn from_xml_text(s: String) -> Result<Self, Error> {
109 s.parse().map_err(Error::text_parse_error)
110 }
111 }
112
113 $(
114 #[cfg $cfg]
115 )?
116 impl AsXmlText for $t {
117 #[doc = concat!("Convert [`", stringify!($t), "`] to XML text via [`Display`][`core::fmt::Display`].\n\nThis implementation never fails.")]
118 fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
119 Ok(Cow::Owned(self.to_string()))
120 }
121 }
122 )+
123 }
124}
125
126/// This provides an implementation compliant with xsd::bool.
127impl FromXmlText for bool {
128 /// Parse a boolean from XML text.
129 ///
130 /// The values `"1"` and `"true"` are considered true. The values `"0"`
131 /// and `"false"` are considered `false`. Any other value is invalid and
132 /// will return an error.
133 fn from_xml_text(s: String) -> Result<Self, Error> {
134 match s.as_str() {
135 "1" => "true",
136 "0" => "false",
137 other => other,
138 }
139 .parse()
140 .map_err(Error::text_parse_error)
141 }
142}
143
144/// This provides an implementation compliant with xsd::bool.
145impl AsXmlText for bool {
146 /// Convert a boolean to XML text.
147 ///
148 /// `true` is converted to `"true"` and `false` is converted to `"false"`.
149 /// This implementation never fails.
150 fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
151 match self {
152 true => Ok(Cow::Borrowed("true")),
153 false => Ok(Cow::Borrowed("false")),
154 }
155 }
156}
157
158convert_via_fromstr_and_display! {
159 u8,
160 u16,
161 u32,
162 u64,
163 u128,
164 usize,
165 i8,
166 i16,
167 i32,
168 i64,
169 i128,
170 isize,
171 f32,
172 f64,
173 char,
174 core::net::IpAddr,
175 core::net::Ipv4Addr,
176 core::net::Ipv6Addr,
177 core::net::SocketAddr,
178 core::net::SocketAddrV4,
179 core::net::SocketAddrV6,
180 core::num::NonZeroU8,
181 core::num::NonZeroU16,
182 core::num::NonZeroU32,
183 core::num::NonZeroU64,
184 core::num::NonZeroU128,
185 core::num::NonZeroUsize,
186 core::num::NonZeroI8,
187 core::num::NonZeroI16,
188 core::num::NonZeroI32,
189 core::num::NonZeroI64,
190 core::num::NonZeroI128,
191 core::num::NonZeroIsize,
192
193 #[cfg(feature = "uuid")]
194 uuid::Uuid,
195
196 #[cfg(feature = "jid")]
197 jid::Jid,
198 #[cfg(feature = "jid")]
199 jid::FullJid,
200 #[cfg(feature = "jid")]
201 jid::BareJid,
202 #[cfg(feature = "jid")]
203 jid::NodePart,
204 #[cfg(feature = "jid")]
205 jid::DomainPart,
206 #[cfg(feature = "jid")]
207 jid::ResourcePart,
208
209 #[cfg(feature = "serde_json")]
210 serde_json::Value,
211}
212
213/// Represent a way to encode/decode text data into a Rust type.
214///
215/// This trait can be used in scenarios where implementing [`FromXmlText`]
216/// and/or [`AsXmlText`] on a type is not feasible or sensible, such as the
217/// following:
218///
219/// 1. The type originates in a foreign crate, preventing the implementation
220/// of foreign traits.
221///
222/// 2. There is more than one way to convert a value to/from XML.
223///
224/// The codec to use for a text can be specified in the attributes understood
225/// by `FromXml` and `AsXml` derive macros. See the documentation of the
226/// [`FromXml`][`macro@crate::FromXml`] derive macro for details.
227pub trait TextCodec<T> {
228 /// Decode a string value into the type.
229 fn decode(&self, s: String) -> Result<T, Error>;
230
231 /// Encode the type as string value.
232 ///
233 /// If this returns `None`, the string value is not emitted at all.
234 fn encode<'x>(&self, value: &'x T) -> Result<Option<Cow<'x, str>>, Error>;
235
236 /// Apply a filter to this codec.
237 ///
238 /// Filters preprocess strings before they are handed to the codec for
239 /// parsing, allowing to, for example, make the codec ignore irrelevant
240 /// content by stripping it.
241 // NOTE: The bound on T is needed because any given type A may implement
242 // TextCodec for any number of types. If we pass T down to the `Filtered`
243 // struct, rustc can do type inference on which `TextCodec`
244 // implementation the `filtered` method is supposed to have been called
245 // on.
246 fn filtered<F: TextFilter>(self, filter: F) -> Filtered<F, Self, T>
247 where
248 // placing the bound here (instead of on the `TextCodec<T>` trait
249 // itself) preserves object-safety of TextCodec<T>.
250 Self: Sized,
251 {
252 Filtered {
253 filter,
254 codec: self,
255 bound: PhantomData,
256 }
257 }
258}
259
260/// Wrapper struct to apply a filter to a codec.
261///
262/// You can construct a value of this type via [`TextCodec::filtered`].
263// NOTE: see the note on TextCodec::filtered for why we bind `T` here, too.
264pub struct Filtered<F, C, T> {
265 filter: F,
266 codec: C,
267 bound: PhantomData<T>,
268}
269
270impl<T, F: TextFilter, C: TextCodec<T>> TextCodec<T> for Filtered<F, C, T> {
271 fn decode(&self, s: String) -> Result<T, Error> {
272 let s = self.filter.preprocess(s);
273 self.codec.decode(s)
274 }
275
276 fn encode<'x>(&self, value: &'x T) -> Result<Option<Cow<'x, str>>, Error> {
277 self.codec.encode(value)
278 }
279}
280
281/// Text codec which does no transform.
282pub struct Plain;
283
284impl TextCodec<String> for Plain {
285 fn decode(&self, s: String) -> Result<String, Error> {
286 Ok(s)
287 }
288
289 fn encode<'x>(&self, value: &'x String) -> Result<Option<Cow<'x, str>>, Error> {
290 Ok(Some(Cow::Borrowed(value.as_str())))
291 }
292}
293
294/// Text codec which returns `None` if the input to decode is the empty string, instead of
295/// attempting to decode it.
296///
297/// Particularly useful when parsing `Option<T>` on `#[xml(text)]`, which does not support
298/// `Option<_>` otherwise.
299pub struct EmptyAsNone;
300
301impl<T> TextCodec<Option<T>> for EmptyAsNone
302where
303 T: FromXmlText + AsXmlText,
304{
305 fn decode(&self, s: String) -> Result<Option<T>, Error> {
306 if s.is_empty() {
307 Ok(None)
308 } else {
309 Some(T::from_xml_text(s)).transpose()
310 }
311 }
312
313 fn encode<'x>(&self, value: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
314 Ok(value
315 .as_ref()
316 .map(AsXmlText::as_xml_text)
317 .transpose()?
318 .and_then(|v| (!v.is_empty()).then_some(v)))
319 }
320}
321
322/// Text codec which returns None instead of the empty string.
323pub struct EmptyAsError;
324
325impl TextCodec<String> for EmptyAsError {
326 fn decode(&self, s: String) -> Result<String, Error> {
327 if s.is_empty() {
328 Err(Error::Other("Empty text node."))
329 } else {
330 Ok(s)
331 }
332 }
333
334 fn encode<'x>(&self, value: &'x String) -> Result<Option<Cow<'x, str>>, Error> {
335 if value.is_empty() {
336 Err(Error::Other("Empty text node."))
337 } else {
338 Ok(Some(Cow::Borrowed(value.as_str())))
339 }
340 }
341}
342
343/// Trait for preprocessing text data from XML.
344///
345/// This may be used by codecs to allow to customize some of their behaviour.
346pub trait TextFilter {
347 /// Process the incoming string and return the result of the processing.
348 fn preprocess(&self, s: String) -> String;
349}
350
351/// Text preprocessor which returns the input unchanged.
352pub struct NoFilter;
353
354impl TextFilter for NoFilter {
355 fn preprocess(&self, s: String) -> String {
356 s
357 }
358}
359
360/// Text preprocessor to remove all whitespace.
361pub struct StripWhitespace;
362
363impl TextFilter for StripWhitespace {
364 fn preprocess(&self, s: String) -> String {
365 let s: String = s
366 .chars()
367 .filter(|ch| *ch != ' ' && *ch != '\n' && *ch != '\t')
368 .collect();
369 s
370 }
371}
372
373/// Text codec transforming text to binary using standard `base64`.
374///
375/// `Base64` uses the [`base64::engine::general_purpose::STANDARD`] engine.
376/// [`TextCodec`] is also automatically implemented for any value which
377/// implements [`base64::engine::Engine`], allowing you to choose different
378/// alphabets easily.
379#[cfg(feature = "base64")]
380pub struct Base64;
381
382#[cfg(feature = "base64")]
383impl TextCodec<Vec<u8>> for Base64 {
384 fn decode(&self, s: String) -> Result<Vec<u8>, Error> {
385 base64::engine::Engine::decode(&StandardBase64Engine, s.as_bytes())
386 .map_err(Error::text_parse_error)
387 }
388
389 fn encode<'x>(&self, value: &'x Vec<u8>) -> Result<Option<Cow<'x, str>>, Error> {
390 Ok(Some(Cow::Owned(base64::engine::Engine::encode(
391 &StandardBase64Engine,
392 value,
393 ))))
394 }
395}
396
397#[cfg(feature = "base64")]
398impl<'x> TextCodec<Cow<'x, [u8]>> for Base64 {
399 fn decode(&self, s: String) -> Result<Cow<'x, [u8]>, Error> {
400 base64::engine::Engine::decode(&StandardBase64Engine, s.as_bytes())
401 .map_err(Error::text_parse_error)
402 .map(Cow::Owned)
403 }
404
405 fn encode<'a>(&self, value: &'a Cow<'x, [u8]>) -> Result<Option<Cow<'a, str>>, Error> {
406 Ok(Some(Cow::Owned(base64::engine::Engine::encode(
407 &StandardBase64Engine,
408 value,
409 ))))
410 }
411}
412
413#[cfg(feature = "base64")]
414impl<T> TextCodec<Option<T>> for Base64
415where
416 Base64: TextCodec<T>,
417{
418 fn decode(&self, s: String) -> Result<Option<T>, Error> {
419 if s.is_empty() {
420 return Ok(None);
421 }
422 Ok(Some(self.decode(s)?))
423 }
424
425 fn encode<'x>(&self, decoded: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
426 decoded
427 .as_ref()
428 .map(|x| self.encode(x))
429 .transpose()
430 .map(Option::flatten)
431 }
432}
433
434#[cfg(feature = "base64")]
435impl<T: base64::engine::Engine> TextCodec<Vec<u8>> for T {
436 fn decode(&self, s: String) -> Result<Vec<u8>, Error> {
437 base64::engine::Engine::decode(self, s.as_bytes()).map_err(Error::text_parse_error)
438 }
439
440 fn encode<'x>(&self, value: &'x Vec<u8>) -> Result<Option<Cow<'x, str>>, Error> {
441 Ok(Some(Cow::Owned(base64::engine::Engine::encode(
442 self, value,
443 ))))
444 }
445}
446
447#[cfg(feature = "base64")]
448impl<T: base64::engine::Engine, U> TextCodec<Option<U>> for T
449where
450 T: TextCodec<U>,
451{
452 fn decode(&self, s: String) -> Result<Option<U>, Error> {
453 if s.is_empty() {
454 return Ok(None);
455 }
456 Ok(Some(TextCodec::decode(self, s)?))
457 }
458
459 fn encode<'x>(&self, decoded: &'x Option<U>) -> Result<Option<Cow<'x, str>>, Error> {
460 decoded
461 .as_ref()
462 .map(|x| TextCodec::encode(self, x))
463 .transpose()
464 .map(Option::flatten)
465 }
466}
467
468/// Text codec transforming text to binary using hexadecimal nibbles.
469///
470/// The length must be known at compile-time.
471pub struct FixedHex<const N: usize>;
472
473impl<const N: usize> TextCodec<[u8; N]> for FixedHex<N> {
474 fn decode(&self, s: String) -> Result<[u8; N], Error> {
475 if s.len() != 2 * N {
476 return Err(Error::Other("Invalid length"));
477 }
478
479 let mut bytes = [0u8; N];
480 for i in 0..N {
481 bytes[i] =
482 u8::from_str_radix(&s[2 * i..2 * i + 2], 16).map_err(Error::text_parse_error)?;
483 }
484
485 Ok(bytes)
486 }
487
488 fn encode<'x>(&self, value: &'x [u8; N]) -> Result<Option<Cow<'x, str>>, Error> {
489 let mut bytes = String::with_capacity(N * 2);
490 for byte in value {
491 bytes.extend(format!("{:02x}", byte).chars());
492 }
493 Ok(Some(Cow::Owned(bytes)))
494 }
495}
496
497impl<T, const N: usize> TextCodec<Option<T>> for FixedHex<N>
498where
499 FixedHex<N>: TextCodec<T>,
500{
501 fn decode(&self, s: String) -> Result<Option<T>, Error> {
502 if s.is_empty() {
503 return Ok(None);
504 }
505 Ok(Some(self.decode(s)?))
506 }
507
508 fn encode<'x>(&self, decoded: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
509 decoded
510 .as_ref()
511 .map(|x| self.encode(x))
512 .transpose()
513 .map(Option::flatten)
514 }
515}
516
517/// Text codec for colon-separated bytes of uppercase hexadecimal.
518pub struct ColonSeparatedHex;
519
520impl TextCodec<Vec<u8>> for ColonSeparatedHex {
521 fn decode(&self, s: String) -> Result<Vec<u8>, Error> {
522 assert_eq!((s.len() + 1) % 3, 0);
523 let mut bytes = Vec::with_capacity((s.len() + 1) / 3);
524 for i in 0..(1 + s.len()) / 3 {
525 let byte =
526 u8::from_str_radix(&s[3 * i..3 * i + 2], 16).map_err(Error::text_parse_error)?;
527 if 3 * i + 2 < s.len() {
528 assert_eq!(&s[3 * i + 2..3 * i + 3], ":");
529 }
530 bytes.push(byte);
531 }
532 Ok(bytes)
533 }
534
535 fn encode<'x>(&self, decoded: &'x Vec<u8>) -> Result<Option<Cow<'x, str>>, Error> {
536 // TODO: Super inefficient!
537 let mut bytes = Vec::with_capacity(decoded.len());
538 for byte in decoded {
539 bytes.push(format!("{:02X}", byte));
540 }
541 Ok(Some(Cow::Owned(bytes.join(":"))))
542 }
543}