1#![cfg_attr(docsrs, feature(doc_auto_cfg))]
2#![cfg_attr(docsrs, feature(doc_cfg))]
3#![forbid(unsafe_code)]
4#![warn(missing_docs)]
5/*!
6# XML Streamed Objects -- serde-like parsing for XML
7
8This crate provides the traits for parsing XML data into Rust structs, and
9vice versa.
10
11While it is in 0.0.x versions, many features still need to be developed, but
12rest assured that there is a solid plan to get it fully usable for even
13advanced XML scenarios.
14
15XSO is an acronym for XML Stream(ed) Objects, referring to the main field of
16use of this library in parsing XML streams like specified in RFC 6120.
17*/
18
19// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
20//
21// This Source Code Form is subject to the terms of the Mozilla Public
22// License, v. 2.0. If a copy of the MPL was not distributed with this
23// file, You can obtain one at http://mozilla.org/MPL/2.0/.
24
25#![no_std]
26
27extern crate alloc;
28#[cfg(feature = "std")]
29extern crate std;
30#[cfg(feature = "std")]
31use std::io;
32
33pub mod asxml;
34pub mod error;
35pub mod fromxml;
36#[cfg(feature = "minidom")]
37pub mod minidom_compat;
38mod rxml_util;
39pub mod text;
40
41#[doc(hidden)]
42#[cfg(feature = "macros")]
43pub mod exports {
44 #[cfg(feature = "minidom")]
45 pub use minidom;
46 pub use rxml;
47
48 /// The built-in `bool` type.
49 ///
50 /// This is re-exported for use by macros in cases where we cannot rely on
51 /// people not having done `type bool = str` or some similar shenanigans.
52 pub type CoreBool = bool;
53
54 /// The built-in `u8` type.
55 ///
56 /// This is re-exported for use by macros in cases where we cannot rely on
57 /// people not having done `type u8 = str` or some similar shenanigans.
58 pub type CoreU8 = u8;
59}
60
61use alloc::{
62 borrow::{Cow, ToOwned},
63 boxed::Box,
64 string::String,
65 vec::Vec,
66};
67
68#[doc(inline)]
69pub use text::TextCodec;
70
71#[doc(inline)]
72pub use rxml_util::Item;
73
74#[doc = include_str!("from_xml_doc.md")]
75#[doc(inline)]
76#[cfg(feature = "macros")]
77pub use xso_proc::FromXml;
78
79/// # Make a struct or enum serialisable to XML
80///
81/// This derives the [`AsXml`] trait on a struct or enum. It is the
82/// counterpart to [`macro@FromXml`].
83///
84/// The attributes necessary and available for the derivation to work are
85/// documented on [`macro@FromXml`].
86#[doc(inline)]
87#[cfg(feature = "macros")]
88pub use xso_proc::AsXml;
89
90/// Trait allowing to iterate a struct's contents as serialisable
91/// [`Item`]s.
92///
93/// **Important:** Changing the [`ItemIter`][`Self::ItemIter`] associated
94/// type is considered a non-breaking change for any given implementation of
95/// this trait. Always refer to a type's iterator type using fully-qualified
96/// notation, for example: `<T as xso::AsXml>::ItemIter`.
97pub trait AsXml {
98 /// The iterator type.
99 ///
100 /// **Important:** Changing this type is considered a non-breaking change
101 /// for any given implementation of this trait. Always refer to a type's
102 /// iterator type using fully-qualified notation, for example:
103 /// `<T as xso::AsXml>::ItemIter`.
104 type ItemIter<'x>: Iterator<Item = Result<Item<'x>, self::error::Error>>
105 where
106 Self: 'x;
107
108 /// Return an iterator which emits the contents of the struct or enum as
109 /// serialisable [`Item`] items.
110 fn as_xml_iter(&self) -> Result<Self::ItemIter<'_>, self::error::Error>;
111}
112
113/// Trait for a temporary object allowing to construct a struct from
114/// [`rxml::Event`] items.
115///
116/// Objects of this type are generally constructed through
117/// [`FromXml::from_events`] and are used to build Rust structs or enums from
118/// XML data. The XML data must be fed as `rxml::Event` to the
119/// [`feed`][`Self::feed`] method.
120pub trait FromEventsBuilder {
121 /// The type which will be constructed by this builder.
122 type Output;
123
124 /// Feed another [`rxml::Event`] into the element construction
125 /// process.
126 ///
127 /// Once the construction process completes, `Ok(Some(_))` is returned.
128 /// When valid data has been fed but more events are needed to fully
129 /// construct the resulting struct, `Ok(None)` is returned.
130 ///
131 /// If the construction fails, `Err(_)` is returned. Errors are generally
132 /// fatal and the builder should be assumed to be broken at that point.
133 /// Feeding more events after an error may result in panics, errors or
134 /// inconsistent result data, though it may never result in unsound or
135 /// unsafe behaviour.
136 fn feed(&mut self, ev: rxml::Event) -> Result<Option<Self::Output>, self::error::Error>;
137}
138
139/// Trait allowing to construct a struct from a stream of
140/// [`rxml::Event`] items.
141///
142/// To use this, first call [`FromXml::from_events`] with the qualified
143/// name and the attributes of the corresponding
144/// [`rxml::Event::StartElement`] event. If the call succeeds, the
145/// returned builder object must be fed with the events representing the
146/// contents of the element, and then with the `EndElement` event.
147///
148/// The `StartElement` passed to `from_events` must not be passed to `feed`.
149///
150/// **Important:** Changing the [`Builder`][`Self::Builder`] associated type
151/// is considered a non-breaking change for any given implementation of this
152/// trait. Always refer to a type's builder type using fully-qualified
153/// notation, for example: `<T as xso::FromXml>::Builder`.
154pub trait FromXml {
155 /// A builder type used to construct the element.
156 ///
157 /// **Important:** Changing this type is considered a non-breaking change
158 /// for any given implementation of this trait. Always refer to a type's
159 /// builder type using fully-qualified notation, for example:
160 /// `<T as xso::FromXml>::Builder`.
161 type Builder: FromEventsBuilder<Output = Self>;
162
163 /// Attempt to initiate the streamed construction of this struct from XML.
164 ///
165 /// If the passed qualified `name` and `attrs` match the element's type,
166 /// the [`Self::Builder`] is returned and should be fed with XML events
167 /// by the caller.
168 ///
169 /// Otherwise, an appropriate error is returned.
170 fn from_events(
171 name: rxml::QName,
172 attrs: rxml::AttrMap,
173 ) -> Result<Self::Builder, self::error::FromEventsError>;
174}
175
176/// Trait allowing to convert XML text to a value.
177///
178/// This trait is similar to [`core::str::FromStr`], however, due to
179/// restrictions imposed by the orphan rule, a separate trait is needed.
180/// Implementations for many standard library types are available. In
181/// addition, the following feature flags can enable more implementations:
182///
183/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
184/// - `uuid`: `uuid::Uuid`
185///
186/// Because of this unfortunate situation, we are **extremely liberal** with
187/// accepting optional dependencies for this purpose. You are very welcome to
188/// make merge requests against this crate adding support for parsing
189/// third-party crates.
190pub trait FromXmlText: Sized {
191 /// Convert the given XML text to a value.
192 fn from_xml_text(data: String) -> Result<Self, self::error::Error>;
193}
194
195impl FromXmlText for String {
196 /// Return the string unchanged.
197 fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
198 Ok(data)
199 }
200}
201
202impl<T: FromXmlText, B: ToOwned<Owned = T>> FromXmlText for Cow<'_, B> {
203 /// Return a [`Cow::Owned`] containing the parsed value.
204 fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
205 Ok(Cow::Owned(T::from_xml_text(data)?))
206 }
207}
208
209impl<T: FromXmlText> FromXmlText for Option<T> {
210 /// Return a [`Some`] containing the parsed value.
211 fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
212 Ok(Some(T::from_xml_text(data)?))
213 }
214}
215
216impl<T: FromXmlText> FromXmlText for Box<T> {
217 /// Return a [`Box`] containing the parsed value.
218 fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
219 Ok(Box::new(T::from_xml_text(data)?))
220 }
221}
222
223/// Trait to convert a value to an XML text string.
224///
225/// Implementing this trait for a type allows it to be used both for XML
226/// character data within elements and for XML attributes. For XML attributes,
227/// the behaviour is defined by [`AsXmlText::as_optional_xml_text`], while
228/// XML element text content uses [`AsXmlText::as_xml_text`]. Implementing
229/// [`AsXmlText`] automatically provides an implementation of
230/// [`AsOptionalXmlText`].
231///
232/// If your type should only be used in XML attributes and has no correct
233/// serialisation in XML text, you should *only* implement
234/// [`AsOptionalXmlText`] and omit the [`AsXmlText`] implementation.
235///
236/// This trait is implemented for many standard library types implementing
237/// [`core::fmt::Display`]. In addition, the following feature flags can enable
238/// more implementations:
239///
240/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
241/// - `uuid`: `uuid::Uuid`
242///
243/// Because of the unfortunate situation as described in [`FromXmlText`], we
244/// are **extremely liberal** with accepting optional dependencies for this
245/// purpose. You are very welcome to make merge requests against this crate
246/// adding support for parsing third-party crates.
247pub trait AsXmlText {
248 /// Convert the value to an XML string in a context where an absent value
249 /// cannot be represented.
250 fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error>;
251
252 /// Convert the value to an XML string in a context where an absent value
253 /// can be represented.
254 ///
255 /// The provided implementation will always return the result of
256 /// [`Self::as_xml_text`] wrapped into `Some(.)`. By re-implementing
257 /// this method, implementors can customize the behaviour for certain
258 /// values.
259 fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
260 Ok(Some(self.as_xml_text()?))
261 }
262}
263
264impl AsXmlText for String {
265 /// Return the borrowed string contents.
266 fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
267 Ok(Cow::Borrowed(self.as_str()))
268 }
269}
270
271impl AsXmlText for str {
272 /// Return the borrowed string contents.
273 fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
274 Ok(Cow::Borrowed(&*self))
275 }
276}
277
278impl<T: AsXmlText> AsXmlText for Box<T> {
279 /// Return the borrowed [`Box`] contents.
280 fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
281 T::as_xml_text(self)
282 }
283}
284
285impl<B: AsXmlText + ToOwned> AsXmlText for Cow<'_, B> {
286 /// Return the borrowed [`Cow`] contents.
287 fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
288 B::as_xml_text(self.as_ref())
289 }
290}
291
292impl<T: AsXmlText> AsXmlText for &T {
293 /// Delegate to the `AsXmlText` implementation on `T`.
294 fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
295 T::as_xml_text(*self)
296 }
297}
298
299/// Specialized variant of [`AsXmlText`].
300///
301/// Normally, it should not be necessary to implement this trait as it is
302/// automatically implemented for all types implementing [`AsXmlText`].
303/// However, if your type can only be serialised as an XML attribute (for
304/// example because an absent value has a particular meaning), it is correct
305/// to implement [`AsOptionalXmlText`] **instead of** [`AsXmlText`].
306///
307/// If your type can be serialised as both (text and attribute) but needs
308/// special handling in attributes, implement [`AsXmlText`] but provide a
309/// custom implementation of [`AsXmlText::as_optional_xml_text`].
310pub trait AsOptionalXmlText {
311 /// Convert the value to an XML string in a context where an absent value
312 /// can be represented.
313 fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error>;
314}
315
316impl<T: AsXmlText> AsOptionalXmlText for T {
317 fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
318 <Self as AsXmlText>::as_optional_xml_text(self)
319 }
320}
321
322impl<T: AsXmlText> AsOptionalXmlText for Option<T> {
323 fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
324 self.as_ref()
325 .map(T::as_optional_xml_text)
326 .transpose()
327 .map(Option::flatten)
328 }
329}
330
331/// Control how unknown attributes are handled.
332///
333/// The variants of this enum are referenced in the
334/// `#[xml(on_unknown_attribute = ..)]` which can be used on structs and
335/// enum variants. The specified variant controls how attributes, which are
336/// not handled by any member of the compound, are handled during parsing.
337#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default)]
338pub enum UnknownAttributePolicy {
339 /// All unknown attributes are discarded.
340 ///
341 /// This is the default policy if the crate is built with the
342 /// `non-pedantic` feature.
343 #[cfg_attr(feature = "non-pedantic", default)]
344 Discard,
345
346 /// The first unknown attribute which is encountered generates a fatal
347 /// parsing error.
348 ///
349 /// This is the default policy if the crate is built **without** the
350 /// `non-pedantic` feature.
351 #[cfg_attr(not(feature = "non-pedantic"), default)]
352 Fail,
353}
354
355impl UnknownAttributePolicy {
356 #[doc(hidden)]
357 /// Implementation of the policy.
358 ///
359 /// This is an internal API and not subject to semver versioning.
360 pub fn apply_policy(&self, msg: &'static str) -> Result<(), self::error::Error> {
361 match self {
362 Self::Fail => Err(self::error::Error::Other(msg)),
363 Self::Discard => Ok(()),
364 }
365 }
366}
367
368/// Control how unknown children are handled.
369///
370/// The variants of this enum are referenced in the
371/// `#[xml(on_unknown_child = ..)]` which can be used on structs and
372/// enum variants. The specified variant controls how children, which are not
373/// handled by any member of the compound, are handled during parsing.
374#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default)]
375pub enum UnknownChildPolicy {
376 /// All unknown children are discarded.
377 ///
378 /// This is the default policy if the crate is built with the
379 /// `non-pedantic` feature.
380 #[cfg_attr(feature = "non-pedantic", default)]
381 Discard,
382
383 /// The first unknown child which is encountered generates a fatal
384 /// parsing error.
385 ///
386 /// This is the default policy if the crate is built **without** the
387 /// `non-pedantic` feature.
388 #[cfg_attr(not(feature = "non-pedantic"), default)]
389 Fail,
390}
391
392impl UnknownChildPolicy {
393 #[doc(hidden)]
394 /// Implementation of the policy.
395 ///
396 /// This is an internal API and not subject to semver versioning.
397 pub fn apply_policy(&self, msg: &'static str) -> Result<(), self::error::Error> {
398 match self {
399 Self::Fail => Err(self::error::Error::Other(msg)),
400 Self::Discard => Ok(()),
401 }
402 }
403}
404
405/// Attempt to transform a type implementing [`AsXml`] into another
406/// type which implements [`FromXml`].
407pub fn transform<T: FromXml, F: AsXml>(from: &F) -> Result<T, self::error::Error> {
408 let mut iter = self::rxml_util::ItemToEvent::new(from.as_xml_iter()?);
409 let (qname, attrs) = match iter.next() {
410 Some(Ok(rxml::Event::StartElement(_, qname, attrs))) => (qname, attrs),
411 Some(Err(e)) => return Err(e),
412 _ => panic!("into_event_iter did not start with StartElement event!"),
413 };
414 let mut sink = match T::from_events(qname, attrs) {
415 Ok(v) => v,
416 Err(self::error::FromEventsError::Mismatch { .. }) => {
417 return Err(self::error::Error::TypeMismatch)
418 }
419 Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
420 };
421 for event in iter {
422 let event = event?;
423 if let Some(v) = sink.feed(event)? {
424 return Ok(v);
425 }
426 }
427 Err(self::error::Error::XmlError(rxml::Error::InvalidEof(None)))
428}
429
430/// Attempt to convert a [`minidom::Element`] into a type implementing
431/// [`FromXml`], fallably.
432///
433/// Unlike [`transform`] (which can also be used with an element), this
434/// function will return the element unharmed if its element header does not
435/// match the expectations of `T`.
436#[cfg(feature = "minidom")]
437pub fn try_from_element<T: FromXml>(
438 from: minidom::Element,
439) -> Result<T, self::error::FromElementError> {
440 let (qname, attrs) = minidom_compat::make_start_ev_parts(&from)?;
441 let mut sink = match T::from_events(qname, attrs) {
442 Ok(v) => v,
443 Err(self::error::FromEventsError::Mismatch { .. }) => {
444 return Err(self::error::FromElementError::Mismatch(from))
445 }
446 Err(self::error::FromEventsError::Invalid(e)) => {
447 return Err(self::error::FromElementError::Invalid(e))
448 }
449 };
450
451 let mut iter = from.as_xml_iter()?;
452 // consume the element header
453 for item in &mut iter {
454 let item = item?;
455 match item {
456 // discard the element header
457 Item::XmlDeclaration(..) => (),
458 Item::ElementHeadStart(..) => (),
459 Item::Attribute(..) => (),
460 Item::ElementHeadEnd => {
461 // now that the element header is over, we break out
462 break;
463 }
464 Item::Text(..) => panic!("text before end of element header"),
465 Item::ElementFoot => panic!("element foot before end of element header"),
466 }
467 }
468 let iter = self::rxml_util::ItemToEvent::new(iter);
469 for event in iter {
470 let event = event?;
471 if let Some(v) = sink.feed(event)? {
472 return Ok(v);
473 }
474 }
475 // unreachable! instead of error here, because minidom::Element always
476 // produces the complete event sequence of a single element, and FromXml
477 // implementations must be constructible from that.
478 unreachable!("minidom::Element did not produce enough events to complete element")
479}
480
481#[cfg(feature = "std")]
482fn map_nonio_error<T>(r: Result<T, io::Error>) -> Result<T, self::error::Error> {
483 match r {
484 Ok(v) => Ok(v),
485 Err(e) => match e.downcast::<rxml::Error>() {
486 Ok(e) => Err(e.into()),
487 Err(_) => unreachable!("I/O error cannot be caused by &[]"),
488 },
489 }
490}
491
492#[cfg(feature = "std")]
493fn read_start_event<I: io::BufRead>(
494 r: &mut rxml::Reader<I>,
495) -> Result<(rxml::QName, rxml::AttrMap), self::error::Error> {
496 for ev in r {
497 match map_nonio_error(ev)? {
498 rxml::Event::XmlDeclaration(_, rxml::XmlVersion::V1_0) => (),
499 rxml::Event::StartElement(_, name, attrs) => return Ok((name, attrs)),
500 _ => {
501 return Err(self::error::Error::Other(
502 "Unexpected event at start of document",
503 ))
504 }
505 }
506 }
507 Err(self::error::Error::XmlError(rxml::Error::InvalidEof(Some(
508 rxml::error::ErrorContext::DocumentBegin,
509 ))))
510}
511
512/// Attempt to parse a type implementing [`FromXml`] from a byte buffer
513/// containing XML data.
514#[cfg(feature = "std")]
515pub fn from_bytes<T: FromXml>(mut buf: &[u8]) -> Result<T, self::error::Error> {
516 let mut reader = rxml::Reader::new(&mut buf);
517 let (name, attrs) = read_start_event(&mut reader)?;
518 let mut builder = match T::from_events(name, attrs) {
519 Ok(v) => v,
520 Err(self::error::FromEventsError::Mismatch { .. }) => {
521 return Err(self::error::Error::TypeMismatch)
522 }
523 Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
524 };
525 for ev in reader {
526 if let Some(v) = builder.feed(map_nonio_error(ev)?)? {
527 return Ok(v);
528 }
529 }
530 Err(self::error::Error::XmlError(rxml::Error::InvalidEof(None)))
531}
532
533#[cfg(feature = "std")]
534fn read_start_event_io<I: io::BufRead>(
535 r: &mut rxml::Reader<I>,
536) -> io::Result<(rxml::QName, rxml::AttrMap)> {
537 for ev in r {
538 match ev? {
539 rxml::Event::XmlDeclaration(_, rxml::XmlVersion::V1_0) => (),
540 rxml::Event::StartElement(_, name, attrs) => return Ok((name, attrs)),
541 _ => {
542 return Err(io::Error::new(
543 io::ErrorKind::InvalidData,
544 self::error::Error::Other("Unexpected event at start of document"),
545 ))
546 }
547 }
548 }
549 Err(io::Error::new(
550 io::ErrorKind::InvalidData,
551 self::error::Error::XmlError(rxml::Error::InvalidEof(Some(
552 rxml::error::ErrorContext::DocumentBegin,
553 ))),
554 ))
555}
556
557/// Attempt to parse a type implementing [`FromXml`] from a reader.
558#[cfg(feature = "std")]
559pub fn from_reader<T: FromXml, R: io::BufRead>(r: R) -> io::Result<T> {
560 let mut reader = rxml::Reader::new(r);
561 let (name, attrs) = read_start_event_io(&mut reader)?;
562 let mut builder = match T::from_events(name, attrs) {
563 Ok(v) => v,
564 Err(self::error::FromEventsError::Mismatch { .. }) => {
565 return Err(self::error::Error::TypeMismatch)
566 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
567 }
568 Err(self::error::FromEventsError::Invalid(e)) => {
569 return Err(e).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
570 }
571 };
572 for ev in reader {
573 if let Some(v) = builder
574 .feed(ev?)
575 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?
576 {
577 return Ok(v);
578 }
579 }
580 Err(io::Error::new(
581 io::ErrorKind::UnexpectedEof,
582 self::error::Error::XmlError(rxml::Error::InvalidEof(None)),
583 ))
584}
585
586/// Attempt to serialise a type implementing [`AsXml`] to a vector of bytes.
587pub fn to_vec<T: AsXml>(xso: &T) -> Result<Vec<u8>, self::error::Error> {
588 let iter = xso.as_xml_iter()?;
589 let mut writer = rxml::writer::Encoder::new();
590 let mut buf = Vec::new();
591 for item in iter {
592 let item = item?;
593 writer.encode(item.as_rxml_item(), &mut buf)?;
594 }
595 Ok(buf)
596}
597
598/// Return true if the string contains exclusively XML whitespace.
599///
600/// XML whitespace is defined as U+0020 (space), U+0009 (tab), U+000a
601/// (newline) and U+000d (carriage return).
602pub fn is_xml_whitespace<T: AsRef<[u8]>>(s: T) -> bool {
603 s.as_ref()
604 .iter()
605 .all(|b| *b == b' ' || *b == b'\t' || *b == b'\r' || *b == b'\n')
606}