lib.rs

  1#![cfg_attr(docsrs, feature(doc_auto_cfg))]
  2#![cfg_attr(docsrs, feature(doc_cfg))]
  3#![forbid(unsafe_code)]
  4#![warn(missing_docs)]
  5/*!
  6# XML Streamed Objects -- serde-like parsing for XML
  7
  8This crate provides the traits for parsing XML data into Rust structs, and
  9vice versa.
 10
 11While it is in 0.0.x versions, many features still need to be developed, but
 12rest assured that there is a solid plan to get it fully usable for even
 13advanced XML scenarios.
 14
 15XSO is an acronym for XML Stream(ed) Objects, referring to the main field of
 16use of this library in parsing XML streams like specified in RFC 6120.
 17*/
 18
 19// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
 20//
 21// This Source Code Form is subject to the terms of the Mozilla Public
 22// License, v. 2.0. If a copy of the MPL was not distributed with this
 23// file, You can obtain one at http://mozilla.org/MPL/2.0/.
 24
 25#![no_std]
 26
 27extern crate alloc;
 28#[cfg(feature = "std")]
 29extern crate std;
 30#[cfg(feature = "std")]
 31use std::io;
 32
 33pub mod asxml;
 34pub mod error;
 35pub mod fromxml;
 36#[cfg(feature = "minidom")]
 37pub mod minidom_compat;
 38mod rxml_util;
 39pub mod text;
 40
 41#[doc(hidden)]
 42#[cfg(feature = "macros")]
 43pub mod exports {
 44    #[cfg(feature = "minidom")]
 45    pub use minidom;
 46    pub use rxml;
 47
 48    // These re-exports are necessary to support both std and no_std in code
 49    // generated by the macros.
 50    //
 51    // If we attempted to use ::alloc directly from macros, std builds would
 52    // not work because alloc is not generally present in builds using std.
 53    // If we used ::std, no_std builds would obviously not work. By exporting
 54    // std as alloc in std builds, we can safely use the alloc types from
 55    // there.
 56    //
 57    // Obviously, we have to be careful in xso-proc to not refer to types
 58    // which are not in alloc.
 59    #[cfg(not(feature = "std"))]
 60    pub extern crate alloc;
 61    #[cfg(feature = "std")]
 62    pub extern crate std as alloc;
 63
 64    /// The built-in `bool` type.
 65    ///
 66    /// This is re-exported for use by macros in cases where we cannot rely on
 67    /// people not having done `type bool = str` or some similar shenanigans.
 68    pub type CoreBool = bool;
 69
 70    /// The built-in `u8` type.
 71    ///
 72    /// This is re-exported for use by macros in cases where we cannot rely on
 73    /// people not having done `type u8 = str` or some similar shenanigans.
 74    pub type CoreU8 = u8;
 75}
 76
 77use alloc::{
 78    borrow::{Cow, ToOwned},
 79    boxed::Box,
 80    string::String,
 81    vec::Vec,
 82};
 83
 84#[doc(inline)]
 85pub use text::TextCodec;
 86
 87#[doc(inline)]
 88pub use rxml_util::Item;
 89
 90#[doc = include_str!("from_xml_doc.md")]
 91#[doc(inline)]
 92#[cfg(feature = "macros")]
 93pub use xso_proc::FromXml;
 94
 95/// # Make a struct or enum serialisable to XML
 96///
 97/// This derives the [`AsXml`] trait on a struct or enum. It is the
 98/// counterpart to [`macro@FromXml`].
 99///
100/// The attributes necessary and available for the derivation to work are
101/// documented on [`macro@FromXml`].
102#[doc(inline)]
103#[cfg(feature = "macros")]
104pub use xso_proc::AsXml;
105
106/// Trait allowing to iterate a struct's contents as serialisable
107/// [`Item`]s.
108///
109/// **Important:** Changing the [`ItemIter`][`Self::ItemIter`] associated
110/// type is considered a non-breaking change for any given implementation of
111/// this trait. Always refer to a type's iterator type using fully-qualified
112/// notation, for example: `<T as xso::AsXml>::ItemIter`.
113pub trait AsXml {
114    /// The iterator type.
115    ///
116    /// **Important:** Changing this type is considered a non-breaking change
117    /// for any given implementation of this trait. Always refer to a type's
118    /// iterator type using fully-qualified notation, for example:
119    /// `<T as xso::AsXml>::ItemIter`.
120    type ItemIter<'x>: Iterator<Item = Result<Item<'x>, self::error::Error>>
121    where
122        Self: 'x;
123
124    /// Return an iterator which emits the contents of the struct or enum as
125    /// serialisable [`Item`] items.
126    fn as_xml_iter(&self) -> Result<Self::ItemIter<'_>, self::error::Error>;
127}
128
129/// Trait for a temporary object allowing to construct a struct from
130/// [`rxml::Event`] items.
131///
132/// Objects of this type are generally constructed through
133/// [`FromXml::from_events`] and are used to build Rust structs or enums from
134/// XML data. The XML data must be fed as `rxml::Event` to the
135/// [`feed`][`Self::feed`] method.
136pub trait FromEventsBuilder {
137    /// The type which will be constructed by this builder.
138    type Output;
139
140    /// Feed another [`rxml::Event`] into the element construction
141    /// process.
142    ///
143    /// Once the construction process completes, `Ok(Some(_))` is returned.
144    /// When valid data has been fed but more events are needed to fully
145    /// construct the resulting struct, `Ok(None)` is returned.
146    ///
147    /// If the construction fails, `Err(_)` is returned. Errors are generally
148    /// fatal and the builder should be assumed to be broken at that point.
149    /// Feeding more events after an error may result in panics, errors or
150    /// inconsistent result data, though it may never result in unsound or
151    /// unsafe behaviour.
152    fn feed(&mut self, ev: rxml::Event) -> Result<Option<Self::Output>, self::error::Error>;
153}
154
155/// Trait allowing to construct a struct from a stream of
156/// [`rxml::Event`] items.
157///
158/// To use this, first call [`FromXml::from_events`] with the qualified
159/// name and the attributes of the corresponding
160/// [`rxml::Event::StartElement`] event. If the call succeeds, the
161/// returned builder object must be fed with the events representing the
162/// contents of the element, and then with the `EndElement` event.
163///
164/// The `StartElement` passed to `from_events` must not be passed to `feed`.
165///
166/// **Important:** Changing the [`Builder`][`Self::Builder`] associated type
167/// is considered a non-breaking change for any given implementation of this
168/// trait. Always refer to a type's builder type using fully-qualified
169/// notation, for example: `<T as xso::FromXml>::Builder`.
170pub trait FromXml {
171    /// A builder type used to construct the element.
172    ///
173    /// **Important:** Changing this type is considered a non-breaking change
174    /// for any given implementation of this trait. Always refer to a type's
175    /// builder type using fully-qualified notation, for example:
176    /// `<T as xso::FromXml>::Builder`.
177    type Builder: FromEventsBuilder<Output = Self>;
178
179    /// Attempt to initiate the streamed construction of this struct from XML.
180    ///
181    /// If the passed qualified `name` and `attrs` match the element's type,
182    /// the [`Self::Builder`] is returned and should be fed with XML events
183    /// by the caller.
184    ///
185    /// Otherwise, an appropriate error is returned.
186    fn from_events(
187        name: rxml::QName,
188        attrs: rxml::AttrMap,
189    ) -> Result<Self::Builder, self::error::FromEventsError>;
190}
191
192/// Trait allowing to convert XML text to a value.
193///
194/// This trait is similar to [`core::str::FromStr`], however, due to
195/// restrictions imposed by the orphan rule, a separate trait is needed.
196/// Implementations for many standard library types are available. In
197/// addition, the following feature flags can enable more implementations:
198///
199/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
200/// - `uuid`: `uuid::Uuid`
201///
202/// Because of this unfortunate situation, we are **extremely liberal** with
203/// accepting optional dependencies for this purpose. You are very welcome to
204/// make merge requests against this crate adding support for parsing
205/// third-party crates.
206pub trait FromXmlText: Sized {
207    /// Convert the given XML text to a value.
208    fn from_xml_text(data: String) -> Result<Self, self::error::Error>;
209}
210
211impl FromXmlText for String {
212    /// Return the string unchanged.
213    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
214        Ok(data)
215    }
216}
217
218impl<T: FromXmlText, B: ToOwned<Owned = T>> FromXmlText for Cow<'_, B> {
219    /// Return a [`Cow::Owned`] containing the parsed value.
220    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
221        Ok(Cow::Owned(T::from_xml_text(data)?))
222    }
223}
224
225impl<T: FromXmlText> FromXmlText for Option<T> {
226    /// Return a [`Some`] containing the parsed value.
227    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
228        Ok(Some(T::from_xml_text(data)?))
229    }
230}
231
232impl<T: FromXmlText> FromXmlText for Box<T> {
233    /// Return a [`Box`] containing the parsed value.
234    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
235        Ok(Box::new(T::from_xml_text(data)?))
236    }
237}
238
239/// Trait to convert a value to an XML text string.
240///
241/// Implementing this trait for a type allows it to be used both for XML
242/// character data within elements and for XML attributes. For XML attributes,
243/// the behaviour is defined by [`AsXmlText::as_optional_xml_text`], while
244/// XML element text content uses [`AsXmlText::as_xml_text`]. Implementing
245/// [`AsXmlText`] automatically provides an implementation of
246/// [`AsOptionalXmlText`].
247///
248/// If your type should only be used in XML attributes and has no correct
249/// serialisation in XML text, you should *only* implement
250/// [`AsOptionalXmlText`] and omit the [`AsXmlText`] implementation.
251///
252/// This trait is implemented for many standard library types implementing
253/// [`core::fmt::Display`]. In addition, the following feature flags can enable
254/// more implementations:
255///
256/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
257/// - `uuid`: `uuid::Uuid`
258///
259/// Because of the unfortunate situation as described in [`FromXmlText`], we
260/// are **extremely liberal** with accepting optional dependencies for this
261/// purpose. You are very welcome to make merge requests against this crate
262/// adding support for parsing third-party crates.
263pub trait AsXmlText {
264    /// Convert the value to an XML string in a context where an absent value
265    /// cannot be represented.
266    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error>;
267
268    /// Convert the value to an XML string in a context where an absent value
269    /// can be represented.
270    ///
271    /// The provided implementation will always return the result of
272    /// [`Self::as_xml_text`] wrapped into `Some(.)`. By re-implementing
273    /// this method, implementors can customize the behaviour for certain
274    /// values.
275    fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
276        Ok(Some(self.as_xml_text()?))
277    }
278}
279
280impl AsXmlText for String {
281    /// Return the borrowed string contents.
282    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
283        Ok(Cow::Borrowed(self.as_str()))
284    }
285}
286
287impl AsXmlText for str {
288    /// Return the borrowed string contents.
289    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
290        Ok(Cow::Borrowed(&*self))
291    }
292}
293
294impl<T: AsXmlText> AsXmlText for Box<T> {
295    /// Return the borrowed [`Box`] contents.
296    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
297        T::as_xml_text(self)
298    }
299}
300
301impl<B: AsXmlText + ToOwned> AsXmlText for Cow<'_, B> {
302    /// Return the borrowed [`Cow`] contents.
303    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
304        B::as_xml_text(self.as_ref())
305    }
306}
307
308impl<T: AsXmlText> AsXmlText for &T {
309    /// Delegate to the `AsXmlText` implementation on `T`.
310    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
311        T::as_xml_text(*self)
312    }
313}
314
315/// Specialized variant of [`AsXmlText`].
316///
317/// Normally, it should not be necessary to implement this trait as it is
318/// automatically implemented for all types implementing [`AsXmlText`].
319/// However, if your type can only be serialised as an XML attribute (for
320/// example because an absent value has a particular meaning), it is correct
321/// to implement [`AsOptionalXmlText`] **instead of** [`AsXmlText`].
322///
323/// If your type can be serialised as both (text and attribute) but needs
324/// special handling in attributes, implement [`AsXmlText`] but provide a
325/// custom implementation of [`AsXmlText::as_optional_xml_text`].
326pub trait AsOptionalXmlText {
327    /// Convert the value to an XML string in a context where an absent value
328    /// can be represented.
329    fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error>;
330}
331
332impl<T: AsXmlText> AsOptionalXmlText for T {
333    fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
334        <Self as AsXmlText>::as_optional_xml_text(self)
335    }
336}
337
338impl<T: AsXmlText> AsOptionalXmlText for Option<T> {
339    fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
340        self.as_ref()
341            .map(T::as_optional_xml_text)
342            .transpose()
343            .map(Option::flatten)
344    }
345}
346
347/// Control how unknown attributes are handled.
348///
349/// The variants of this enum are referenced in the
350/// `#[xml(on_unknown_attribute = ..)]` which can be used on structs and
351/// enum variants. The specified variant controls how attributes, which are
352/// not handled by any member of the compound, are handled during parsing.
353#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default)]
354pub enum UnknownAttributePolicy {
355    /// All unknown attributes are discarded.
356    ///
357    /// This is the default policy if the crate is built with the
358    /// `non-pedantic` feature.
359    #[cfg_attr(feature = "non-pedantic", default)]
360    Discard,
361
362    /// The first unknown attribute which is encountered generates a fatal
363    /// parsing error.
364    ///
365    /// This is the default policy if the crate is built **without** the
366    /// `non-pedantic` feature.
367    #[cfg_attr(not(feature = "non-pedantic"), default)]
368    Fail,
369}
370
371impl UnknownAttributePolicy {
372    #[doc(hidden)]
373    /// Implementation of the policy.
374    ///
375    /// This is an internal API and not subject to semver versioning.
376    pub fn apply_policy(&self, msg: &'static str) -> Result<(), self::error::Error> {
377        match self {
378            Self::Fail => Err(self::error::Error::Other(msg)),
379            Self::Discard => Ok(()),
380        }
381    }
382}
383
384/// Control how unknown children are handled.
385///
386/// The variants of this enum are referenced in the
387/// `#[xml(on_unknown_child = ..)]` which can be used on structs and
388/// enum variants. The specified variant controls how children, which are not
389/// handled by any member of the compound, are handled during parsing.
390#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default)]
391pub enum UnknownChildPolicy {
392    /// All unknown children are discarded.
393    ///
394    /// This is the default policy if the crate is built with the
395    /// `non-pedantic` feature.
396    #[cfg_attr(feature = "non-pedantic", default)]
397    Discard,
398
399    /// The first unknown child which is encountered generates a fatal
400    /// parsing error.
401    ///
402    /// This is the default policy if the crate is built **without** the
403    /// `non-pedantic` feature.
404    #[cfg_attr(not(feature = "non-pedantic"), default)]
405    Fail,
406}
407
408impl UnknownChildPolicy {
409    #[doc(hidden)]
410    /// Implementation of the policy.
411    ///
412    /// This is an internal API and not subject to semver versioning.
413    pub fn apply_policy(&self, msg: &'static str) -> Result<(), self::error::Error> {
414        match self {
415            Self::Fail => Err(self::error::Error::Other(msg)),
416            Self::Discard => Ok(()),
417        }
418    }
419}
420
421/// Attempt to transform a type implementing [`AsXml`] into another
422/// type which implements [`FromXml`].
423pub fn transform<T: FromXml, F: AsXml>(from: &F) -> Result<T, self::error::Error> {
424    let mut iter = self::rxml_util::ItemToEvent::new(from.as_xml_iter()?);
425    let (qname, attrs) = match iter.next() {
426        Some(Ok(rxml::Event::StartElement(_, qname, attrs))) => (qname, attrs),
427        Some(Err(e)) => return Err(e),
428        _ => panic!("into_event_iter did not start with StartElement event!"),
429    };
430    let mut sink = match T::from_events(qname, attrs) {
431        Ok(v) => v,
432        Err(self::error::FromEventsError::Mismatch { .. }) => {
433            return Err(self::error::Error::TypeMismatch)
434        }
435        Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
436    };
437    for event in iter {
438        let event = event?;
439        if let Some(v) = sink.feed(event)? {
440            return Ok(v);
441        }
442    }
443    Err(self::error::Error::XmlError(rxml::Error::InvalidEof(None)))
444}
445
446/// Attempt to convert a [`minidom::Element`] into a type implementing
447/// [`FromXml`], fallably.
448///
449/// Unlike [`transform`] (which can also be used with an element), this
450/// function will return the element unharmed if its element header does not
451/// match the expectations of `T`.
452#[cfg(feature = "minidom")]
453pub fn try_from_element<T: FromXml>(
454    from: minidom::Element,
455) -> Result<T, self::error::FromElementError> {
456    let (qname, attrs) = minidom_compat::make_start_ev_parts(&from)?;
457    let mut sink = match T::from_events(qname, attrs) {
458        Ok(v) => v,
459        Err(self::error::FromEventsError::Mismatch { .. }) => {
460            return Err(self::error::FromElementError::Mismatch(from))
461        }
462        Err(self::error::FromEventsError::Invalid(e)) => {
463            return Err(self::error::FromElementError::Invalid(e))
464        }
465    };
466
467    let mut iter = from.as_xml_iter()?;
468    // consume the element header
469    for item in &mut iter {
470        let item = item?;
471        match item {
472            // discard the element header
473            Item::XmlDeclaration(..) => (),
474            Item::ElementHeadStart(..) => (),
475            Item::Attribute(..) => (),
476            Item::ElementHeadEnd => {
477                // now that the element header is over, we break out
478                break;
479            }
480            Item::Text(..) => panic!("text before end of element header"),
481            Item::ElementFoot => panic!("element foot before end of element header"),
482        }
483    }
484    let iter = self::rxml_util::ItemToEvent::new(iter);
485    for event in iter {
486        let event = event?;
487        if let Some(v) = sink.feed(event)? {
488            return Ok(v);
489        }
490    }
491    // unreachable! instead of error here, because minidom::Element always
492    // produces the complete event sequence of a single element, and FromXml
493    // implementations must be constructible from that.
494    unreachable!("minidom::Element did not produce enough events to complete element")
495}
496
497#[cfg(feature = "std")]
498fn map_nonio_error<T>(r: Result<T, io::Error>) -> Result<T, self::error::Error> {
499    match r {
500        Ok(v) => Ok(v),
501        Err(e) => match e.downcast::<rxml::Error>() {
502            Ok(e) => Err(e.into()),
503            Err(_) => unreachable!("I/O error cannot be caused by &[]"),
504        },
505    }
506}
507
508#[cfg(feature = "std")]
509fn read_start_event<I: io::BufRead>(
510    r: &mut rxml::Reader<I>,
511) -> Result<(rxml::QName, rxml::AttrMap), self::error::Error> {
512    for ev in r {
513        match map_nonio_error(ev)? {
514            rxml::Event::XmlDeclaration(_, rxml::XmlVersion::V1_0) => (),
515            rxml::Event::StartElement(_, name, attrs) => return Ok((name, attrs)),
516            _ => {
517                return Err(self::error::Error::Other(
518                    "Unexpected event at start of document",
519                ))
520            }
521        }
522    }
523    Err(self::error::Error::XmlError(rxml::Error::InvalidEof(Some(
524        rxml::error::ErrorContext::DocumentBegin,
525    ))))
526}
527
528/// Attempt to parse a type implementing [`FromXml`] from a byte buffer
529/// containing XML data.
530#[cfg(feature = "std")]
531pub fn from_bytes<T: FromXml>(mut buf: &[u8]) -> Result<T, self::error::Error> {
532    let mut reader = rxml::Reader::new(&mut buf);
533    let (name, attrs) = read_start_event(&mut reader)?;
534    let mut builder = match T::from_events(name, attrs) {
535        Ok(v) => v,
536        Err(self::error::FromEventsError::Mismatch { .. }) => {
537            return Err(self::error::Error::TypeMismatch)
538        }
539        Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
540    };
541    for ev in reader {
542        if let Some(v) = builder.feed(map_nonio_error(ev)?)? {
543            return Ok(v);
544        }
545    }
546    Err(self::error::Error::XmlError(rxml::Error::InvalidEof(None)))
547}
548
549#[cfg(feature = "std")]
550fn read_start_event_io<I: io::BufRead>(
551    r: &mut rxml::Reader<I>,
552) -> io::Result<(rxml::QName, rxml::AttrMap)> {
553    for ev in r {
554        match ev? {
555            rxml::Event::XmlDeclaration(_, rxml::XmlVersion::V1_0) => (),
556            rxml::Event::StartElement(_, name, attrs) => return Ok((name, attrs)),
557            _ => {
558                return Err(io::Error::new(
559                    io::ErrorKind::InvalidData,
560                    self::error::Error::Other("Unexpected event at start of document"),
561                ))
562            }
563        }
564    }
565    Err(io::Error::new(
566        io::ErrorKind::InvalidData,
567        self::error::Error::XmlError(rxml::Error::InvalidEof(Some(
568            rxml::error::ErrorContext::DocumentBegin,
569        ))),
570    ))
571}
572
573/// Attempt to parse a type implementing [`FromXml`] from a reader.
574#[cfg(feature = "std")]
575pub fn from_reader<T: FromXml, R: io::BufRead>(r: R) -> io::Result<T> {
576    let mut reader = rxml::Reader::new(r);
577    let (name, attrs) = read_start_event_io(&mut reader)?;
578    let mut builder = match T::from_events(name, attrs) {
579        Ok(v) => v,
580        Err(self::error::FromEventsError::Mismatch { .. }) => {
581            return Err(self::error::Error::TypeMismatch)
582                .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
583        }
584        Err(self::error::FromEventsError::Invalid(e)) => {
585            return Err(e).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
586        }
587    };
588    for ev in reader {
589        if let Some(v) = builder
590            .feed(ev?)
591            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?
592        {
593            return Ok(v);
594        }
595    }
596    Err(io::Error::new(
597        io::ErrorKind::UnexpectedEof,
598        self::error::Error::XmlError(rxml::Error::InvalidEof(None)),
599    ))
600}
601
602/// Attempt to serialise a type implementing [`AsXml`] to a vector of bytes.
603pub fn to_vec<T: AsXml>(xso: &T) -> Result<Vec<u8>, self::error::Error> {
604    let iter = xso.as_xml_iter()?;
605    let mut writer = rxml::writer::Encoder::new();
606    let mut buf = Vec::new();
607    for item in iter {
608        let item = item?;
609        writer.encode(item.as_rxml_item(), &mut buf)?;
610    }
611    Ok(buf)
612}
613
614/// Return true if the string contains exclusively XML whitespace.
615///
616/// XML whitespace is defined as U+0020 (space), U+0009 (tab), U+000a
617/// (newline) and U+000d (carriage return).
618pub fn is_xml_whitespace<T: AsRef<[u8]>>(s: T) -> bool {
619    s.as_ref()
620        .iter()
621        .all(|b| *b == b' ' || *b == b'\t' || *b == b'\r' || *b == b'\n')
622}