1// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7//! Module containing implementations for conversions to/from XML text.
8
9use core::marker::PhantomData;
10
11use alloc::{
12 borrow::Cow,
13 format,
14 string::{String, ToString},
15 vec::Vec,
16};
17
18use crate::{error::Error, AsXmlText, FromXmlText};
19
20#[cfg(feature = "base64")]
21use base64::engine::general_purpose::STANDARD as StandardBase64Engine;
22
23macro_rules! convert_via_fromstr_and_display {
24 ($($(#[cfg $cfg:tt])?$t:ty,)+) => {
25 $(
26 $(
27 #[cfg $cfg]
28 )?
29 impl FromXmlText for $t {
30 #[doc = concat!("Parse [`", stringify!($t), "`] from XML text via [`FromStr`][`core::str::FromStr`].")]
31 fn from_xml_text(s: String) -> Result<Self, Error> {
32 s.parse().map_err(Error::text_parse_error)
33 }
34 }
35
36 $(
37 #[cfg $cfg]
38 )?
39 impl AsXmlText for $t {
40 #[doc = concat!("Convert [`", stringify!($t), "`] to XML text via [`Display`][`core::fmt::Display`].\n\nThis implementation never fails.")]
41 fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
42 Ok(Cow::Owned(self.to_string()))
43 }
44 }
45 )+
46 }
47}
48
49/// This provides an implementation compliant with xsd::bool.
50impl FromXmlText for bool {
51 /// Parse a boolean from XML text.
52 ///
53 /// The values `"1"` and `"true"` are considered true. The values `"0"`
54 /// and `"false"` are considered `false`. Any other value is invalid and
55 /// will return an error.
56 fn from_xml_text(s: String) -> Result<Self, Error> {
57 match s.as_str() {
58 "1" => "true",
59 "0" => "false",
60 other => other,
61 }
62 .parse()
63 .map_err(Error::text_parse_error)
64 }
65}
66
67/// This provides an implementation compliant with xsd::bool.
68impl AsXmlText for bool {
69 /// Convert a boolean to XML text.
70 ///
71 /// `true` is converted to `"true"` and `false` is converted to `"false"`.
72 /// This implementation never fails.
73 fn as_xml_text(&self) -> Result<Cow<'_, str>, Error> {
74 match self {
75 true => Ok(Cow::Borrowed("true")),
76 false => Ok(Cow::Borrowed("false")),
77 }
78 }
79}
80
81convert_via_fromstr_and_display! {
82 u8,
83 u16,
84 u32,
85 u64,
86 u128,
87 usize,
88 i8,
89 i16,
90 i32,
91 i64,
92 i128,
93 isize,
94 f32,
95 f64,
96 char,
97 core::net::IpAddr,
98 core::net::Ipv4Addr,
99 core::net::Ipv6Addr,
100 core::net::SocketAddr,
101 core::net::SocketAddrV4,
102 core::net::SocketAddrV6,
103 core::num::NonZeroU8,
104 core::num::NonZeroU16,
105 core::num::NonZeroU32,
106 core::num::NonZeroU64,
107 core::num::NonZeroU128,
108 core::num::NonZeroUsize,
109 core::num::NonZeroI8,
110 core::num::NonZeroI16,
111 core::num::NonZeroI32,
112 core::num::NonZeroI64,
113 core::num::NonZeroI128,
114 core::num::NonZeroIsize,
115
116 #[cfg(feature = "uuid")]
117 uuid::Uuid,
118
119 #[cfg(feature = "jid")]
120 jid::Jid,
121 #[cfg(feature = "jid")]
122 jid::FullJid,
123 #[cfg(feature = "jid")]
124 jid::BareJid,
125 #[cfg(feature = "jid")]
126 jid::NodePart,
127 #[cfg(feature = "jid")]
128 jid::DomainPart,
129 #[cfg(feature = "jid")]
130 jid::ResourcePart,
131}
132
133/// Represent a way to encode/decode text data into a Rust type.
134///
135/// This trait can be used in scenarios where implementing [`FromXmlText`]
136/// and/or [`AsXmlText`] on a type is not feasible or sensible, such as the
137/// following:
138///
139/// 1. The type originates in a foreign crate, preventing the implementation
140/// of foreign traits.
141///
142/// 2. There is more than one way to convert a value to/from XML.
143///
144/// The codec to use for a text can be specified in the attributes understood
145/// by `FromXml` and `AsXml` derive macros. See the documentation of the
146/// [`FromXml`][`macro@crate::FromXml`] derive macro for details.
147pub trait TextCodec<T> {
148 /// Decode a string value into the type.
149 fn decode(&self, s: String) -> Result<T, Error>;
150
151 /// Encode the type as string value.
152 ///
153 /// If this returns `None`, the string value is not emitted at all.
154 fn encode<'x>(&self, value: &'x T) -> Result<Option<Cow<'x, str>>, Error>;
155
156 /// Apply a filter to this codec.
157 ///
158 /// Filters preprocess strings before they are handed to the codec for
159 /// parsing, allowing to, for example, make the codec ignore irrelevant
160 /// content by stripping it.
161 // NOTE: The bound on T is needed because any given type A may implement
162 // TextCodec for any number of types. If we pass T down to the `Filtered`
163 // struct, rustc can do type inference on which `TextCodec`
164 // implementation the `filtered` method is supposed to have been called
165 // on.
166 fn filtered<F: TextFilter>(self, filter: F) -> Filtered<F, Self, T>
167 where
168 // placing the bound here (instead of on the `TextCodec<T>` trait
169 // itself) preserves object-safety of TextCodec<T>.
170 Self: Sized,
171 {
172 Filtered {
173 filter,
174 codec: self,
175 bound: PhantomData,
176 }
177 }
178}
179
180/// Wrapper struct to apply a filter to a codec.
181///
182/// You can construct a value of this type via [`TextCodec::filtered`].
183// NOTE: see the note on TextCodec::filtered for why we bind `T` here, too.
184pub struct Filtered<F, C, T> {
185 filter: F,
186 codec: C,
187 bound: PhantomData<T>,
188}
189
190impl<T, F: TextFilter, C: TextCodec<T>> TextCodec<T> for Filtered<F, C, T> {
191 fn decode(&self, s: String) -> Result<T, Error> {
192 let s = self.filter.preprocess(s);
193 self.codec.decode(s)
194 }
195
196 fn encode<'x>(&self, value: &'x T) -> Result<Option<Cow<'x, str>>, Error> {
197 self.codec.encode(value)
198 }
199}
200
201/// Text codec which does no transform.
202pub struct Plain;
203
204impl TextCodec<String> for Plain {
205 fn decode(&self, s: String) -> Result<String, Error> {
206 Ok(s)
207 }
208
209 fn encode<'x>(&self, value: &'x String) -> Result<Option<Cow<'x, str>>, Error> {
210 Ok(Some(Cow::Borrowed(value.as_str())))
211 }
212}
213
214/// Text codec which returns `None` if the input to decode is the empty string, instead of
215/// attempting to decode it.
216///
217/// Particularly useful when parsing `Option<T>` on `#[xml(text)]`, which does not support
218/// `Option<_>` otherwise.
219pub struct EmptyAsNone;
220
221impl<T> TextCodec<Option<T>> for EmptyAsNone
222where
223 T: FromXmlText + AsXmlText,
224{
225 fn decode(&self, s: String) -> Result<Option<T>, Error> {
226 if s.is_empty() {
227 Ok(None)
228 } else {
229 Some(T::from_xml_text(s)).transpose()
230 }
231 }
232
233 fn encode<'x>(&self, value: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
234 Ok(value
235 .as_ref()
236 .map(AsXmlText::as_xml_text)
237 .transpose()?
238 .map(|v| (!v.is_empty()).then_some(v))
239 .flatten())
240 }
241}
242
243/// Text codec which returns None instead of the empty string.
244pub struct EmptyAsError;
245
246impl TextCodec<String> for EmptyAsError {
247 fn decode(&self, s: String) -> Result<String, Error> {
248 if s.is_empty() {
249 Err(Error::Other("Empty text node."))
250 } else {
251 Ok(s)
252 }
253 }
254
255 fn encode<'x>(&self, value: &'x String) -> Result<Option<Cow<'x, str>>, Error> {
256 if value.is_empty() {
257 Err(Error::Other("Empty text node."))
258 } else {
259 Ok(Some(Cow::Borrowed(value.as_str())))
260 }
261 }
262}
263
264/// Trait for preprocessing text data from XML.
265///
266/// This may be used by codecs to allow to customize some of their behaviour.
267pub trait TextFilter {
268 /// Process the incoming string and return the result of the processing.
269 fn preprocess(&self, s: String) -> String;
270}
271
272/// Text preprocessor which returns the input unchanged.
273pub struct NoFilter;
274
275impl TextFilter for NoFilter {
276 fn preprocess(&self, s: String) -> String {
277 s
278 }
279}
280
281/// Text preprocessor to remove all whitespace.
282pub struct StripWhitespace;
283
284impl TextFilter for StripWhitespace {
285 fn preprocess(&self, s: String) -> String {
286 let s: String = s
287 .chars()
288 .filter(|ch| *ch != ' ' && *ch != '\n' && *ch != '\t')
289 .collect();
290 s
291 }
292}
293
294/// Text codec transforming text to binary using standard `base64`.
295///
296/// `Base64` uses the [`base64::engine::general_purpose::STANDARD`] engine.
297/// [`TextCodec`] is also automatically implemented for any value which
298/// implements [`base64::engine::Engine`], allowing you to choose different
299/// alphabets easily.
300#[cfg(feature = "base64")]
301pub struct Base64;
302
303#[cfg(feature = "base64")]
304impl TextCodec<Vec<u8>> for Base64 {
305 fn decode(&self, s: String) -> Result<Vec<u8>, Error> {
306 base64::engine::Engine::decode(&StandardBase64Engine, s.as_bytes())
307 .map_err(Error::text_parse_error)
308 }
309
310 fn encode<'x>(&self, value: &'x Vec<u8>) -> Result<Option<Cow<'x, str>>, Error> {
311 Ok(Some(Cow::Owned(base64::engine::Engine::encode(
312 &StandardBase64Engine,
313 &value,
314 ))))
315 }
316}
317
318#[cfg(feature = "base64")]
319impl<'x> TextCodec<Cow<'x, [u8]>> for Base64 {
320 fn decode(&self, s: String) -> Result<Cow<'x, [u8]>, Error> {
321 base64::engine::Engine::decode(&StandardBase64Engine, s.as_bytes())
322 .map_err(Error::text_parse_error)
323 .map(Cow::Owned)
324 }
325
326 fn encode<'a>(&self, value: &'a Cow<'x, [u8]>) -> Result<Option<Cow<'a, str>>, Error> {
327 Ok(Some(Cow::Owned(base64::engine::Engine::encode(
328 &StandardBase64Engine,
329 &value,
330 ))))
331 }
332}
333
334#[cfg(feature = "base64")]
335impl<T> TextCodec<Option<T>> for Base64
336where
337 Base64: TextCodec<T>,
338{
339 fn decode(&self, s: String) -> Result<Option<T>, Error> {
340 if s.is_empty() {
341 return Ok(None);
342 }
343 Ok(Some(self.decode(s)?))
344 }
345
346 fn encode<'x>(&self, decoded: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
347 decoded
348 .as_ref()
349 .map(|x| self.encode(x))
350 .transpose()
351 .map(Option::flatten)
352 }
353}
354
355#[cfg(feature = "base64")]
356impl<T: base64::engine::Engine> TextCodec<Vec<u8>> for T {
357 fn decode(&self, s: String) -> Result<Vec<u8>, Error> {
358 base64::engine::Engine::decode(self, s.as_bytes()).map_err(Error::text_parse_error)
359 }
360
361 fn encode<'x>(&self, value: &'x Vec<u8>) -> Result<Option<Cow<'x, str>>, Error> {
362 Ok(Some(Cow::Owned(base64::engine::Engine::encode(
363 self, &value,
364 ))))
365 }
366}
367
368#[cfg(feature = "base64")]
369impl<'a, T: base64::engine::Engine, U> TextCodec<Option<U>> for T
370where
371 T: TextCodec<U>,
372{
373 fn decode(&self, s: String) -> Result<Option<U>, Error> {
374 if s.is_empty() {
375 return Ok(None);
376 }
377 Ok(Some(TextCodec::decode(self, s)?))
378 }
379
380 fn encode<'x>(&self, decoded: &'x Option<U>) -> Result<Option<Cow<'x, str>>, Error> {
381 decoded
382 .as_ref()
383 .map(|x| TextCodec::encode(self, x))
384 .transpose()
385 .map(Option::flatten)
386 }
387}
388
389/// Text codec transforming text to binary using hexadecimal nibbles.
390///
391/// The length must be known at compile-time.
392pub struct FixedHex<const N: usize>;
393
394impl<const N: usize> TextCodec<[u8; N]> for FixedHex<N> {
395 fn decode(&self, s: String) -> Result<[u8; N], Error> {
396 if s.len() != 2 * N {
397 return Err(Error::Other("Invalid length"));
398 }
399
400 let mut bytes = [0u8; N];
401 for i in 0..N {
402 bytes[i] =
403 u8::from_str_radix(&s[2 * i..2 * i + 2], 16).map_err(Error::text_parse_error)?;
404 }
405
406 Ok(bytes)
407 }
408
409 fn encode<'x>(&self, value: &'x [u8; N]) -> Result<Option<Cow<'x, str>>, Error> {
410 let mut bytes = String::with_capacity(N * 2);
411 for byte in value {
412 bytes.extend(format!("{:02x}", byte).chars());
413 }
414 Ok(Some(Cow::Owned(bytes)))
415 }
416}
417
418impl<T, const N: usize> TextCodec<Option<T>> for FixedHex<N>
419where
420 FixedHex<N>: TextCodec<T>,
421{
422 fn decode(&self, s: String) -> Result<Option<T>, Error> {
423 if s.is_empty() {
424 return Ok(None);
425 }
426 Ok(Some(self.decode(s)?))
427 }
428
429 fn encode<'x>(&self, decoded: &'x Option<T>) -> Result<Option<Cow<'x, str>>, Error> {
430 decoded
431 .as_ref()
432 .map(|x| self.encode(x))
433 .transpose()
434 .map(Option::flatten)
435 }
436}
437
438/// Text codec for colon-separated bytes of uppercase hexadecimal.
439pub struct ColonSeparatedHex;
440
441impl TextCodec<Vec<u8>> for ColonSeparatedHex {
442 fn decode(&self, s: String) -> Result<Vec<u8>, Error> {
443 assert_eq!((s.len() + 1) % 3, 0);
444 let mut bytes = Vec::with_capacity((s.len() + 1) / 3);
445 for i in 0..(1 + s.len()) / 3 {
446 let byte =
447 u8::from_str_radix(&s[3 * i..3 * i + 2], 16).map_err(Error::text_parse_error)?;
448 if 3 * i + 2 < s.len() {
449 assert_eq!(&s[3 * i + 2..3 * i + 3], ":");
450 }
451 bytes.push(byte);
452 }
453 Ok(bytes)
454 }
455
456 fn encode<'x>(&self, decoded: &'x Vec<u8>) -> Result<Option<Cow<'x, str>>, Error> {
457 // TODO: Super inefficient!
458 let mut bytes = Vec::with_capacity(decoded.len());
459 for byte in decoded {
460 bytes.push(format!("{:02X}", byte));
461 }
462 Ok(Some(Cow::Owned(bytes.join(":"))))
463 }
464}