lib.rs

  1#![cfg_attr(docsrs, feature(doc_auto_cfg))]
  2#![cfg_attr(docsrs, feature(doc_cfg))]
  3#![forbid(unsafe_code)]
  4#![warn(missing_docs)]
  5/*!
  6# XML Streamed Objects -- serde-like parsing for XML
  7
  8This crate provides the traits for parsing XML data into Rust structs, and
  9vice versa.
 10
 11While it is in 0.0.x versions, many features still need to be developed, but
 12rest assured that there is a solid plan to get it fully usable for even
 13advanced XML scenarios.
 14
 15XSO is an acronym for XML Stream(ed) Objects, referring to the main field of
 16use of this library in parsing XML streams like specified in RFC 6120.
 17*/
 18
 19// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
 20//
 21// This Source Code Form is subject to the terms of the Mozilla Public
 22// License, v. 2.0. If a copy of the MPL was not distributed with this
 23// file, You can obtain one at http://mozilla.org/MPL/2.0/.
 24
 25#![no_std]
 26
 27extern crate alloc;
 28#[cfg(feature = "std")]
 29extern crate std;
 30#[cfg(feature = "std")]
 31use std::io;
 32
 33pub mod asxml;
 34pub mod error;
 35pub mod fromxml;
 36#[cfg(feature = "minidom")]
 37pub mod minidom_compat;
 38mod rxml_util;
 39pub mod text;
 40
 41#[doc(hidden)]
 42#[cfg(feature = "macros")]
 43pub mod exports {
 44    #[cfg(feature = "minidom")]
 45    pub use minidom;
 46    pub use rxml;
 47
 48    /// The built-in `bool` type.
 49    ///
 50    /// This is re-exported for use by macros in cases where we cannot rely on
 51    /// people not having done `type bool = str` or some similar shenanigans.
 52    pub type CoreBool = bool;
 53
 54    /// The built-in `u8` type.
 55    ///
 56    /// This is re-exported for use by macros in cases where we cannot rely on
 57    /// people not having done `type u8 = str` or some similar shenanigans.
 58    pub type CoreU8 = u8;
 59}
 60
 61use alloc::{
 62    borrow::{Cow, ToOwned},
 63    boxed::Box,
 64    string::String,
 65    vec::Vec,
 66};
 67
 68#[doc(inline)]
 69pub use text::TextCodec;
 70
 71#[doc(inline)]
 72pub use rxml_util::Item;
 73
 74#[doc = include_str!("from_xml_doc.md")]
 75#[doc(inline)]
 76#[cfg(feature = "macros")]
 77pub use xso_proc::FromXml;
 78
 79/// # Make a struct or enum serialisable to XML
 80///
 81/// This derives the [`AsXml`] trait on a struct or enum. It is the
 82/// counterpart to [`macro@FromXml`].
 83///
 84/// The attributes necessary and available for the derivation to work are
 85/// documented on [`macro@FromXml`].
 86#[doc(inline)]
 87#[cfg(feature = "macros")]
 88pub use xso_proc::AsXml;
 89
 90/// Trait allowing to iterate a struct's contents as serialisable
 91/// [`Item`]s.
 92///
 93/// **Important:** Changing the [`ItemIter`][`Self::ItemIter`] associated
 94/// type is considered a non-breaking change for any given implementation of
 95/// this trait. Always refer to a type's iterator type using fully-qualified
 96/// notation, for example: `<T as xso::AsXml>::ItemIter`.
 97pub trait AsXml {
 98    /// The iterator type.
 99    ///
100    /// **Important:** Changing this type is considered a non-breaking change
101    /// for any given implementation of this trait. Always refer to a type's
102    /// iterator type using fully-qualified notation, for example:
103    /// `<T as xso::AsXml>::ItemIter`.
104    type ItemIter<'x>: Iterator<Item = Result<Item<'x>, self::error::Error>>
105    where
106        Self: 'x;
107
108    /// Return an iterator which emits the contents of the struct or enum as
109    /// serialisable [`Item`] items.
110    fn as_xml_iter(&self) -> Result<Self::ItemIter<'_>, self::error::Error>;
111}
112
113/// Trait for a temporary object allowing to construct a struct from
114/// [`rxml::Event`] items.
115///
116/// Objects of this type are generally constructed through
117/// [`FromXml::from_events`] and are used to build Rust structs or enums from
118/// XML data. The XML data must be fed as `rxml::Event` to the
119/// [`feed`][`Self::feed`] method.
120pub trait FromEventsBuilder {
121    /// The type which will be constructed by this builder.
122    type Output;
123
124    /// Feed another [`rxml::Event`] into the element construction
125    /// process.
126    ///
127    /// Once the construction process completes, `Ok(Some(_))` is returned.
128    /// When valid data has been fed but more events are needed to fully
129    /// construct the resulting struct, `Ok(None)` is returned.
130    ///
131    /// If the construction fails, `Err(_)` is returned. Errors are generally
132    /// fatal and the builder should be assumed to be broken at that point.
133    /// Feeding more events after an error may result in panics, errors or
134    /// inconsistent result data, though it may never result in unsound or
135    /// unsafe behaviour.
136    fn feed(&mut self, ev: rxml::Event) -> Result<Option<Self::Output>, self::error::Error>;
137}
138
139/// Trait allowing to construct a struct from a stream of
140/// [`rxml::Event`] items.
141///
142/// To use this, first call [`FromXml::from_events`] with the qualified
143/// name and the attributes of the corresponding
144/// [`rxml::Event::StartElement`] event. If the call succeeds, the
145/// returned builder object must be fed with the events representing the
146/// contents of the element, and then with the `EndElement` event.
147///
148/// The `StartElement` passed to `from_events` must not be passed to `feed`.
149///
150/// **Important:** Changing the [`Builder`][`Self::Builder`] associated type
151/// is considered a non-breaking change for any given implementation of this
152/// trait. Always refer to a type's builder type using fully-qualified
153/// notation, for example: `<T as xso::FromXml>::Builder`.
154pub trait FromXml {
155    /// A builder type used to construct the element.
156    ///
157    /// **Important:** Changing this type is considered a non-breaking change
158    /// for any given implementation of this trait. Always refer to a type's
159    /// builder type using fully-qualified notation, for example:
160    /// `<T as xso::FromXml>::Builder`.
161    type Builder: FromEventsBuilder<Output = Self>;
162
163    /// Attempt to initiate the streamed construction of this struct from XML.
164    ///
165    /// If the passed qualified `name` and `attrs` match the element's type,
166    /// the [`Self::Builder`] is returned and should be fed with XML events
167    /// by the caller.
168    ///
169    /// Otherwise, an appropriate error is returned.
170    fn from_events(
171        name: rxml::QName,
172        attrs: rxml::AttrMap,
173    ) -> Result<Self::Builder, self::error::FromEventsError>;
174}
175
176/// Trait allowing to convert XML text to a value.
177///
178/// This trait is similar to [`core::str::FromStr`], however, due to
179/// restrictions imposed by the orphan rule, a separate trait is needed.
180/// Implementations for many standard library types are available. In
181/// addition, the following feature flags can enable more implementations:
182///
183/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
184/// - `uuid`: `uuid::Uuid`
185///
186/// Because of this unfortunate situation, we are **extremely liberal** with
187/// accepting optional dependencies for this purpose. You are very welcome to
188/// make merge requests against this crate adding support for parsing
189/// third-party crates.
190pub trait FromXmlText: Sized {
191    /// Convert the given XML text to a value.
192    fn from_xml_text(data: String) -> Result<Self, self::error::Error>;
193}
194
195impl FromXmlText for String {
196    /// Return the string unchanged.
197    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
198        Ok(data)
199    }
200}
201
202impl<T: FromXmlText, B: ToOwned<Owned = T>> FromXmlText for Cow<'_, B> {
203    /// Return a [`Cow::Owned`] containing the parsed value.
204    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
205        Ok(Cow::Owned(T::from_xml_text(data)?))
206    }
207}
208
209impl<T: FromXmlText> FromXmlText for Option<T> {
210    /// Return a [`Some`] containing the parsed value.
211    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
212        Ok(Some(T::from_xml_text(data)?))
213    }
214}
215
216impl<T: FromXmlText> FromXmlText for Box<T> {
217    /// Return a [`Box`] containing the parsed value.
218    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
219        Ok(Box::new(T::from_xml_text(data)?))
220    }
221}
222
223/// Trait to convert a value to an XML text string.
224///
225/// Implementing this trait for a type allows it to be used both for XML
226/// character data within elements and for XML attributes. For XML attributes,
227/// the behaviour is defined by [`AsXmlText::as_optional_xml_text`], while
228/// XML element text content uses [`AsXmlText::as_xml_text`]. Implementing
229/// [`AsXmlText`] automatically provides an implementation of
230/// [`AsOptionalXmlText`].
231///
232/// If your type should only be used in XML attributes and has no correct
233/// serialisation in XML text, you should *only* implement
234/// [`AsOptionalXmlText`] and omit the [`AsXmlText`] implementation.
235///
236/// This trait is implemented for many standard library types implementing
237/// [`core::fmt::Display`]. In addition, the following feature flags can enable
238/// more implementations:
239///
240/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
241/// - `uuid`: `uuid::Uuid`
242///
243/// Because of the unfortunate situation as described in [`FromXmlText`], we
244/// are **extremely liberal** with accepting optional dependencies for this
245/// purpose. You are very welcome to make merge requests against this crate
246/// adding support for parsing third-party crates.
247pub trait AsXmlText {
248    /// Convert the value to an XML string in a context where an absent value
249    /// cannot be represented.
250    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error>;
251
252    /// Convert the value to an XML string in a context where an absent value
253    /// can be represented.
254    ///
255    /// The provided implementation will always return the result of
256    /// [`Self::as_xml_text`] wrapped into `Some(.)`. By re-implementing
257    /// this method, implementors can customize the behaviour for certain
258    /// values.
259    fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
260        Ok(Some(self.as_xml_text()?))
261    }
262}
263
264impl AsXmlText for String {
265    /// Return the borrowed string contents.
266    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
267        Ok(Cow::Borrowed(self.as_str()))
268    }
269}
270
271impl AsXmlText for str {
272    /// Return the borrowed string contents.
273    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
274        Ok(Cow::Borrowed(&*self))
275    }
276}
277
278impl<T: AsXmlText> AsXmlText for Box<T> {
279    /// Return the borrowed [`Box`] contents.
280    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
281        T::as_xml_text(self)
282    }
283}
284
285impl<B: AsXmlText + ToOwned> AsXmlText for Cow<'_, B> {
286    /// Return the borrowed [`Cow`] contents.
287    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
288        B::as_xml_text(self.as_ref())
289    }
290}
291
292impl<T: AsXmlText> AsXmlText for &T {
293    /// Delegate to the `AsXmlText` implementation on `T`.
294    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
295        T::as_xml_text(*self)
296    }
297}
298
299/// Specialized variant of [`AsXmlText`].
300///
301/// Normally, it should not be necessary to implement this trait as it is
302/// automatically implemented for all types implementing [`AsXmlText`].
303/// However, if your type can only be serialised as an XML attribute (for
304/// example because an absent value has a particular meaning), it is correct
305/// to implement [`AsOptionalXmlText`] **instead of** [`AsXmlText`].
306///
307/// If your type can be serialised as both (text and attribute) but needs
308/// special handling in attributes, implement [`AsXmlText`] but provide a
309/// custom implementation of [`AsXmlText::as_optional_xml_text`].
310pub trait AsOptionalXmlText {
311    /// Convert the value to an XML string in a context where an absent value
312    /// can be represented.
313    fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error>;
314}
315
316impl<T: AsXmlText> AsOptionalXmlText for T {
317    fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
318        <Self as AsXmlText>::as_optional_xml_text(self)
319    }
320}
321
322impl<T: AsXmlText> AsOptionalXmlText for Option<T> {
323    fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
324        self.as_ref()
325            .map(T::as_optional_xml_text)
326            .transpose()
327            .map(Option::flatten)
328    }
329}
330
331/// Control how unknown attributes are handled.
332///
333/// The variants of this enum are referenced in the
334/// `#[xml(on_unknown_attribute = ..)]` which can be used on structs and
335/// enum variants. The specified variant controls how attributes, which are
336/// not handled by any member of the compound, are handled during parsing.
337#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default)]
338pub enum UnknownAttributePolicy {
339    /// All unknown attributes are discarded.
340    ///
341    /// This is the default policy if the crate is built with the
342    /// `non-pedantic` feature.
343    #[cfg_attr(feature = "non-pedantic", default)]
344    Discard,
345
346    /// The first unknown attribute which is encountered generates a fatal
347    /// parsing error.
348    ///
349    /// This is the default policy if the crate is built **without** the
350    /// `non-pedantic` feature.
351    #[cfg_attr(not(feature = "non-pedantic"), default)]
352    Fail,
353}
354
355impl UnknownAttributePolicy {
356    #[doc(hidden)]
357    /// Implementation of the policy.
358    ///
359    /// This is an internal API and not subject to semver versioning.
360    pub fn apply_policy(&self, msg: &'static str) -> Result<(), self::error::Error> {
361        match self {
362            Self::Fail => Err(self::error::Error::Other(msg)),
363            Self::Discard => Ok(()),
364        }
365    }
366}
367
368/// Control how unknown children are handled.
369///
370/// The variants of this enum are referenced in the
371/// `#[xml(on_unknown_child = ..)]` which can be used on structs and
372/// enum variants. The specified variant controls how children, which are not
373/// handled by any member of the compound, are handled during parsing.
374#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default)]
375pub enum UnknownChildPolicy {
376    /// All unknown children are discarded.
377    ///
378    /// This is the default policy if the crate is built with the
379    /// `non-pedantic` feature.
380    #[cfg_attr(feature = "non-pedantic", default)]
381    Discard,
382
383    /// The first unknown child which is encountered generates a fatal
384    /// parsing error.
385    ///
386    /// This is the default policy if the crate is built **without** the
387    /// `non-pedantic` feature.
388    #[cfg_attr(not(feature = "non-pedantic"), default)]
389    Fail,
390}
391
392impl UnknownChildPolicy {
393    #[doc(hidden)]
394    /// Implementation of the policy.
395    ///
396    /// This is an internal API and not subject to semver versioning.
397    pub fn apply_policy(&self, msg: &'static str) -> Result<(), self::error::Error> {
398        match self {
399            Self::Fail => Err(self::error::Error::Other(msg)),
400            Self::Discard => Ok(()),
401        }
402    }
403}
404
405/// Attempt to transform a type implementing [`AsXml`] into another
406/// type which implements [`FromXml`].
407pub fn transform<T: FromXml, F: AsXml>(from: &F) -> Result<T, self::error::Error> {
408    let mut iter = self::rxml_util::ItemToEvent::new(from.as_xml_iter()?);
409    let (qname, attrs) = match iter.next() {
410        Some(Ok(rxml::Event::StartElement(_, qname, attrs))) => (qname, attrs),
411        Some(Err(e)) => return Err(e),
412        _ => panic!("into_event_iter did not start with StartElement event!"),
413    };
414    let mut sink = match T::from_events(qname, attrs) {
415        Ok(v) => v,
416        Err(self::error::FromEventsError::Mismatch { .. }) => {
417            return Err(self::error::Error::TypeMismatch)
418        }
419        Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
420    };
421    for event in iter {
422        let event = event?;
423        if let Some(v) = sink.feed(event)? {
424            return Ok(v);
425        }
426    }
427    Err(self::error::Error::XmlError(rxml::Error::InvalidEof(None)))
428}
429
430/// Attempt to convert a [`minidom::Element`] into a type implementing
431/// [`FromXml`], fallably.
432///
433/// Unlike [`transform`] (which can also be used with an element), this
434/// function will return the element unharmed if its element header does not
435/// match the expectations of `T`.
436#[cfg(feature = "minidom")]
437pub fn try_from_element<T: FromXml>(
438    from: minidom::Element,
439) -> Result<T, self::error::FromElementError> {
440    let (qname, attrs) = minidom_compat::make_start_ev_parts(&from)?;
441    let mut sink = match T::from_events(qname, attrs) {
442        Ok(v) => v,
443        Err(self::error::FromEventsError::Mismatch { .. }) => {
444            return Err(self::error::FromElementError::Mismatch(from))
445        }
446        Err(self::error::FromEventsError::Invalid(e)) => {
447            return Err(self::error::FromElementError::Invalid(e))
448        }
449    };
450
451    let mut iter = from.as_xml_iter()?;
452    // consume the element header
453    for item in &mut iter {
454        let item = item?;
455        match item {
456            // discard the element header
457            Item::XmlDeclaration(..) => (),
458            Item::ElementHeadStart(..) => (),
459            Item::Attribute(..) => (),
460            Item::ElementHeadEnd => {
461                // now that the element header is over, we break out
462                break;
463            }
464            Item::Text(..) => panic!("text before end of element header"),
465            Item::ElementFoot => panic!("element foot before end of element header"),
466        }
467    }
468    let iter = self::rxml_util::ItemToEvent::new(iter);
469    for event in iter {
470        let event = event?;
471        if let Some(v) = sink.feed(event)? {
472            return Ok(v);
473        }
474    }
475    // unreachable! instead of error here, because minidom::Element always
476    // produces the complete event sequence of a single element, and FromXml
477    // implementations must be constructible from that.
478    unreachable!("minidom::Element did not produce enough events to complete element")
479}
480
481#[cfg(feature = "std")]
482fn map_nonio_error<T>(r: Result<T, io::Error>) -> Result<T, self::error::Error> {
483    match r {
484        Ok(v) => Ok(v),
485        Err(e) => match e.downcast::<rxml::Error>() {
486            Ok(e) => Err(e.into()),
487            Err(_) => unreachable!("I/O error cannot be caused by &[]"),
488        },
489    }
490}
491
492#[cfg(feature = "std")]
493fn read_start_event<I: io::BufRead>(
494    r: &mut rxml::Reader<I>,
495) -> Result<(rxml::QName, rxml::AttrMap), self::error::Error> {
496    for ev in r {
497        match map_nonio_error(ev)? {
498            rxml::Event::XmlDeclaration(_, rxml::XmlVersion::V1_0) => (),
499            rxml::Event::StartElement(_, name, attrs) => return Ok((name, attrs)),
500            _ => {
501                return Err(self::error::Error::Other(
502                    "Unexpected event at start of document",
503                ))
504            }
505        }
506    }
507    Err(self::error::Error::XmlError(rxml::Error::InvalidEof(Some(
508        rxml::error::ErrorContext::DocumentBegin,
509    ))))
510}
511
512/// Attempt to parse a type implementing [`FromXml`] from a byte buffer
513/// containing XML data.
514#[cfg(feature = "std")]
515pub fn from_bytes<T: FromXml>(mut buf: &[u8]) -> Result<T, self::error::Error> {
516    let mut reader = rxml::Reader::new(&mut buf);
517    let (name, attrs) = read_start_event(&mut reader)?;
518    let mut builder = match T::from_events(name, attrs) {
519        Ok(v) => v,
520        Err(self::error::FromEventsError::Mismatch { .. }) => {
521            return Err(self::error::Error::TypeMismatch)
522        }
523        Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
524    };
525    for ev in reader {
526        if let Some(v) = builder.feed(map_nonio_error(ev)?)? {
527            return Ok(v);
528        }
529    }
530    Err(self::error::Error::XmlError(rxml::Error::InvalidEof(None)))
531}
532
533#[cfg(feature = "std")]
534fn read_start_event_io<I: io::BufRead>(
535    r: &mut rxml::Reader<I>,
536) -> io::Result<(rxml::QName, rxml::AttrMap)> {
537    for ev in r {
538        match ev? {
539            rxml::Event::XmlDeclaration(_, rxml::XmlVersion::V1_0) => (),
540            rxml::Event::StartElement(_, name, attrs) => return Ok((name, attrs)),
541            _ => {
542                return Err(io::Error::new(
543                    io::ErrorKind::InvalidData,
544                    self::error::Error::Other("Unexpected event at start of document"),
545                ))
546            }
547        }
548    }
549    Err(io::Error::new(
550        io::ErrorKind::InvalidData,
551        self::error::Error::XmlError(rxml::Error::InvalidEof(Some(
552            rxml::error::ErrorContext::DocumentBegin,
553        ))),
554    ))
555}
556
557/// Attempt to parse a type implementing [`FromXml`] from a reader.
558#[cfg(feature = "std")]
559pub fn from_reader<T: FromXml, R: io::BufRead>(r: R) -> io::Result<T> {
560    let mut reader = rxml::Reader::new(r);
561    let (name, attrs) = read_start_event_io(&mut reader)?;
562    let mut builder = match T::from_events(name, attrs) {
563        Ok(v) => v,
564        Err(self::error::FromEventsError::Mismatch { .. }) => {
565            return Err(self::error::Error::TypeMismatch)
566                .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
567        }
568        Err(self::error::FromEventsError::Invalid(e)) => {
569            return Err(e).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
570        }
571    };
572    for ev in reader {
573        if let Some(v) = builder
574            .feed(ev?)
575            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?
576        {
577            return Ok(v);
578        }
579    }
580    Err(io::Error::new(
581        io::ErrorKind::UnexpectedEof,
582        self::error::Error::XmlError(rxml::Error::InvalidEof(None)),
583    ))
584}
585
586/// Attempt to serialise a type implementing [`AsXml`] to a vector of bytes.
587pub fn to_vec<T: AsXml>(xso: &T) -> Result<Vec<u8>, self::error::Error> {
588    let iter = xso.as_xml_iter()?;
589    let mut writer = rxml::writer::Encoder::new();
590    let mut buf = Vec::new();
591    for item in iter {
592        let item = item?;
593        writer.encode(item.as_rxml_item(), &mut buf)?;
594    }
595    Ok(buf)
596}
597
598/// Return true if the string contains exclusively XML whitespace.
599///
600/// XML whitespace is defined as U+0020 (space), U+0009 (tab), U+000a
601/// (newline) and U+000d (carriage return).
602pub fn is_xml_whitespace<T: AsRef<[u8]>>(s: T) -> bool {
603    s.as_ref()
604        .iter()
605        .all(|b| *b == b' ' || *b == b'\t' || *b == b'\r' || *b == b'\n')
606}