lib.rs

  1#![cfg_attr(docsrs, feature(doc_auto_cfg))]
  2#![cfg_attr(docsrs, feature(doc_cfg))]
  3#![forbid(unsafe_code)]
  4#![warn(missing_docs)]
  5/*!
  6# XML Streamed Objects -- serde-like parsing for XML
  7
  8This crate provides the traits for parsing XML data into Rust structs, and
  9vice versa.
 10
 11While it is in 0.0.x versions, many features still need to be developed, but
 12rest assured that there is a solid plan to get it fully usable for even
 13advanced XML scenarios.
 14
 15XSO is an acronym for XML Stream(ed) Objects, referring to the main field of
 16use of this library in parsing XML streams like specified in RFC 6120.
 17*/
 18
 19// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
 20//
 21// This Source Code Form is subject to the terms of the Mozilla Public
 22// License, v. 2.0. If a copy of the MPL was not distributed with this
 23// file, You can obtain one at http://mozilla.org/MPL/2.0/.
 24
 25#![no_std]
 26
 27extern crate alloc;
 28#[cfg(feature = "std")]
 29extern crate std;
 30#[cfg(feature = "std")]
 31use std::io;
 32
 33pub mod asxml;
 34pub mod error;
 35pub mod fromxml;
 36#[cfg(feature = "minidom")]
 37pub mod minidom_compat;
 38mod rxml_util;
 39pub mod text;
 40
 41#[doc(hidden)]
 42pub mod exports {
 43    #[cfg(feature = "minidom")]
 44    pub use minidom;
 45    pub use rxml;
 46}
 47
 48use alloc::{
 49    borrow::{Cow, ToOwned},
 50    boxed::Box,
 51    string::String,
 52    vec::Vec,
 53};
 54
 55#[doc(inline)]
 56pub use text::TextCodec;
 57
 58#[doc(inline)]
 59pub use rxml_util::Item;
 60
 61#[doc = include_str!("from_xml_doc.md")]
 62#[doc(inline)]
 63#[cfg(feature = "macros")]
 64pub use xso_proc::FromXml;
 65
 66/// # Make a struct or enum serialisable to XML
 67///
 68/// This derives the [`AsXml`] trait on a struct or enum. It is the
 69/// counterpart to [`macro@FromXml`].
 70///
 71/// The attributes necessary and available for the derivation to work are
 72/// documented on [`macro@FromXml`].
 73#[doc(inline)]
 74#[cfg(feature = "macros")]
 75pub use xso_proc::AsXml;
 76
 77/// Trait allowing to iterate a struct's contents as serialisable
 78/// [`Item`]s.
 79///
 80/// **Important:** Changing the [`ItemIter`][`Self::ItemIter`] associated
 81/// type is considered a non-breaking change for any given implementation of
 82/// this trait. Always refer to a type's iterator type using fully-qualified
 83/// notation, for example: `<T as xso::AsXml>::ItemIter`.
 84pub trait AsXml {
 85    /// The iterator type.
 86    ///
 87    /// **Important:** Changing this type is considered a non-breaking change
 88    /// for any given implementation of this trait. Always refer to a type's
 89    /// iterator type using fully-qualified notation, for example:
 90    /// `<T as xso::AsXml>::ItemIter`.
 91    type ItemIter<'x>: Iterator<Item = Result<Item<'x>, self::error::Error>>
 92    where
 93        Self: 'x;
 94
 95    /// Return an iterator which emits the contents of the struct or enum as
 96    /// serialisable [`Item`] items.
 97    fn as_xml_iter(&self) -> Result<Self::ItemIter<'_>, self::error::Error>;
 98}
 99
100/// Trait for a temporary object allowing to construct a struct from
101/// [`rxml::Event`] items.
102///
103/// Objects of this type are generally constructed through
104/// [`FromXml::from_events`] and are used to build Rust structs or enums from
105/// XML data. The XML data must be fed as `rxml::Event` to the
106/// [`feed`][`Self::feed`] method.
107pub trait FromEventsBuilder {
108    /// The type which will be constructed by this builder.
109    type Output;
110
111    /// Feed another [`rxml::Event`] into the element construction
112    /// process.
113    ///
114    /// Once the construction process completes, `Ok(Some(_))` is returned.
115    /// When valid data has been fed but more events are needed to fully
116    /// construct the resulting struct, `Ok(None)` is returned.
117    ///
118    /// If the construction fails, `Err(_)` is returned. Errors are generally
119    /// fatal and the builder should be assumed to be broken at that point.
120    /// Feeding more events after an error may result in panics, errors or
121    /// inconsistent result data, though it may never result in unsound or
122    /// unsafe behaviour.
123    fn feed(&mut self, ev: rxml::Event) -> Result<Option<Self::Output>, self::error::Error>;
124}
125
126/// Trait allowing to construct a struct from a stream of
127/// [`rxml::Event`] items.
128///
129/// To use this, first call [`FromXml::from_events`] with the qualified
130/// name and the attributes of the corresponding
131/// [`rxml::Event::StartElement`] event. If the call succeeds, the
132/// returned builder object must be fed with the events representing the
133/// contents of the element, and then with the `EndElement` event.
134///
135/// The `StartElement` passed to `from_events` must not be passed to `feed`.
136///
137/// **Important:** Changing the [`Builder`][`Self::Builder`] associated type
138/// is considered a non-breaking change for any given implementation of this
139/// trait. Always refer to a type's builder type using fully-qualified
140/// notation, for example: `<T as xso::FromXml>::Builder`.
141pub trait FromXml {
142    /// A builder type used to construct the element.
143    ///
144    /// **Important:** Changing this type is considered a non-breaking change
145    /// for any given implementation of this trait. Always refer to a type's
146    /// builder type using fully-qualified notation, for example:
147    /// `<T as xso::FromXml>::Builder`.
148    type Builder: FromEventsBuilder<Output = Self>;
149
150    /// Attempt to initiate the streamed construction of this struct from XML.
151    ///
152    /// If the passed qualified `name` and `attrs` match the element's type,
153    /// the [`Self::Builder`] is returned and should be fed with XML events
154    /// by the caller.
155    ///
156    /// Otherwise, an appropriate error is returned.
157    fn from_events(
158        name: rxml::QName,
159        attrs: rxml::AttrMap,
160    ) -> Result<Self::Builder, self::error::FromEventsError>;
161}
162
163/// Trait allowing to convert XML text to a value.
164///
165/// This trait is similar to [`core::str::FromStr`], however, due to
166/// restrictions imposed by the orphan rule, a separate trait is needed.
167/// Implementations for many standard library types are available. In
168/// addition, the following feature flags can enable more implementations:
169///
170/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
171/// - `uuid`: `uuid::Uuid`
172///
173/// Because of this unfortunate situation, we are **extremely liberal** with
174/// accepting optional dependencies for this purpose. You are very welcome to
175/// make merge requests against this crate adding support for parsing
176/// third-party crates.
177pub trait FromXmlText: Sized {
178    /// Convert the given XML text to a value.
179    fn from_xml_text(data: String) -> Result<Self, self::error::Error>;
180}
181
182impl FromXmlText for String {
183    /// Return the string unchanged.
184    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
185        Ok(data)
186    }
187}
188
189impl<T: FromXmlText, B: ToOwned<Owned = T>> FromXmlText for Cow<'_, B> {
190    /// Return a [`Cow::Owned`] containing the parsed value.
191    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
192        Ok(Cow::Owned(T::from_xml_text(data)?))
193    }
194}
195
196impl<T: FromXmlText> FromXmlText for Option<T> {
197    /// Return a [`Some`] containing the parsed value.
198    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
199        Ok(Some(T::from_xml_text(data)?))
200    }
201}
202
203impl<T: FromXmlText> FromXmlText for Box<T> {
204    /// Return a [`Box`] containing the parsed value.
205    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
206        Ok(Box::new(T::from_xml_text(data)?))
207    }
208}
209
210/// Trait to convert a value to an XML text string.
211///
212/// Implementing this trait for a type allows it to be used both for XML
213/// character data within elements and for XML attributes. For XML attributes,
214/// the behaviour is defined by [`AsXmlText::as_optional_xml_text`], while
215/// XML element text content uses [`AsXmlText::as_xml_text`]. Implementing
216/// [`AsXmlText`] automatically provides an implementation of
217/// [`AsOptionalXmlText`].
218///
219/// If your type should only be used in XML attributes and has no correct
220/// serialisation in XML text, you should *only* implement
221/// [`AsOptionalXmlText`] and omit the [`AsXmlText`] implementation.
222///
223/// This trait is implemented for many standard library types implementing
224/// [`core::fmt::Display`]. In addition, the following feature flags can enable
225/// more implementations:
226///
227/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
228/// - `uuid`: `uuid::Uuid`
229///
230/// Because of the unfortunate situation as described in [`FromXmlText`], we
231/// are **extremely liberal** with accepting optional dependencies for this
232/// purpose. You are very welcome to make merge requests against this crate
233/// adding support for parsing third-party crates.
234pub trait AsXmlText {
235    /// Convert the value to an XML string in a context where an absent value
236    /// cannot be represented.
237    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error>;
238
239    /// Convert the value to an XML string in a context where an absent value
240    /// can be represented.
241    ///
242    /// The provided implementation will always return the result of
243    /// [`Self::as_xml_text`] wrapped into `Some(.)`. By re-implementing
244    /// this method, implementors can customize the behaviour for certain
245    /// values.
246    fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
247        Ok(Some(self.as_xml_text()?))
248    }
249}
250
251impl AsXmlText for String {
252    /// Return the borrowed string contents.
253    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
254        Ok(Cow::Borrowed(self.as_str()))
255    }
256}
257
258impl AsXmlText for str {
259    /// Return the borrowed string contents.
260    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
261        Ok(Cow::Borrowed(&*self))
262    }
263}
264
265impl<T: AsXmlText> AsXmlText for Box<T> {
266    /// Return the borrowed [`Box`] contents.
267    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
268        T::as_xml_text(self)
269    }
270}
271
272impl<B: AsXmlText + ToOwned> AsXmlText for Cow<'_, B> {
273    /// Return the borrowed [`Cow`] contents.
274    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
275        B::as_xml_text(self.as_ref())
276    }
277}
278
279impl<T: AsXmlText> AsXmlText for &T {
280    /// Delegate to the `AsXmlText` implementation on `T`.
281    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
282        T::as_xml_text(*self)
283    }
284}
285
286/// Specialized variant of [`AsXmlText`].
287///
288/// Normally, it should not be necessary to implement this trait as it is
289/// automatically implemented for all types implementing [`AsXmlText`].
290/// However, if your type can only be serialised as an XML attribute (for
291/// example because an absent value has a particular meaning), it is correct
292/// to implement [`AsOptionalXmlText`] **instead of** [`AsXmlText`].
293///
294/// If your type can be serialised as both (text and attribute) but needs
295/// special handling in attributes, implement [`AsXmlText`] but provide a
296/// custom implementation of [`AsXmlText::as_optional_xml_text`].
297pub trait AsOptionalXmlText {
298    /// Convert the value to an XML string in a context where an absent value
299    /// can be represented.
300    fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error>;
301}
302
303impl<T: AsXmlText> AsOptionalXmlText for T {
304    fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
305        <Self as AsXmlText>::as_optional_xml_text(self)
306    }
307}
308
309impl<T: AsXmlText> AsOptionalXmlText for Option<T> {
310    fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
311        self.as_ref()
312            .map(T::as_optional_xml_text)
313            .transpose()
314            .map(Option::flatten)
315    }
316}
317
318/// Control how unknown attributes are handled.
319///
320/// The variants of this enum are referenced in the
321/// `#[xml(on_unknown_attribute = ..)]` which can be used on structs and
322/// enum variants. The specified variant controls how attributes, which are
323/// not handled by any member of the compound, are handled during parsing.
324#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default)]
325pub enum UnknownAttributePolicy {
326    /// All unknown attributes are discarded.
327    ///
328    /// This is the default policy if the crate is built with the
329    /// `non-pedantic` feature.
330    #[cfg_attr(feature = "non-pedantic", default)]
331    Discard,
332
333    /// The first unknown attribute which is encountered generates a fatal
334    /// parsing error.
335    ///
336    /// This is the default policy if the crate is built **without** the
337    /// `non-pedantic` feature.
338    #[cfg_attr(not(feature = "non-pedantic"), default)]
339    Fail,
340}
341
342impl UnknownAttributePolicy {
343    #[doc(hidden)]
344    /// Implementation of the policy.
345    ///
346    /// This is an internal API and not subject to semver versioning.
347    pub fn apply_policy(&self, msg: &'static str) -> Result<(), self::error::Error> {
348        match self {
349            Self::Fail => Err(self::error::Error::Other(msg)),
350            Self::Discard => Ok(()),
351        }
352    }
353}
354
355/// Control how unknown children are handled.
356///
357/// The variants of this enum are referenced in the
358/// `#[xml(on_unknown_child = ..)]` which can be used on structs and
359/// enum variants. The specified variant controls how children, which are not
360/// handled by any member of the compound, are handled during parsing.
361#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default)]
362pub enum UnknownChildPolicy {
363    /// All unknown children are discarded.
364    ///
365    /// This is the default policy if the crate is built with the
366    /// `non-pedantic` feature.
367    #[cfg_attr(feature = "non-pedantic", default)]
368    Discard,
369
370    /// The first unknown child which is encountered generates a fatal
371    /// parsing error.
372    ///
373    /// This is the default policy if the crate is built **without** the
374    /// `non-pedantic` feature.
375    #[cfg_attr(not(feature = "non-pedantic"), default)]
376    Fail,
377}
378
379impl UnknownChildPolicy {
380    #[doc(hidden)]
381    /// Implementation of the policy.
382    ///
383    /// This is an internal API and not subject to semver versioning.
384    pub fn apply_policy(&self, msg: &'static str) -> Result<(), self::error::Error> {
385        match self {
386            Self::Fail => Err(self::error::Error::Other(msg)),
387            Self::Discard => Ok(()),
388        }
389    }
390}
391
392/// Attempt to transform a type implementing [`AsXml`] into another
393/// type which implements [`FromXml`].
394pub fn transform<T: FromXml, F: AsXml>(from: &F) -> Result<T, self::error::Error> {
395    let mut iter = self::rxml_util::ItemToEvent::new(from.as_xml_iter()?);
396    let (qname, attrs) = match iter.next() {
397        Some(Ok(rxml::Event::StartElement(_, qname, attrs))) => (qname, attrs),
398        Some(Err(e)) => return Err(e),
399        _ => panic!("into_event_iter did not start with StartElement event!"),
400    };
401    let mut sink = match T::from_events(qname, attrs) {
402        Ok(v) => v,
403        Err(self::error::FromEventsError::Mismatch { .. }) => {
404            return Err(self::error::Error::TypeMismatch)
405        }
406        Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
407    };
408    for event in iter {
409        let event = event?;
410        if let Some(v) = sink.feed(event)? {
411            return Ok(v);
412        }
413    }
414    Err(self::error::Error::XmlError(rxml::Error::InvalidEof(None)))
415}
416
417/// Attempt to convert a [`minidom::Element`] into a type implementing
418/// [`FromXml`], fallably.
419///
420/// Unlike [`transform`] (which can also be used with an element), this
421/// function will return the element unharmed if its element header does not
422/// match the expectations of `T`.
423#[cfg(feature = "minidom")]
424pub fn try_from_element<T: FromXml>(
425    from: minidom::Element,
426) -> Result<T, self::error::FromElementError> {
427    let (qname, attrs) = minidom_compat::make_start_ev_parts(&from)?;
428    let mut sink = match T::from_events(qname, attrs) {
429        Ok(v) => v,
430        Err(self::error::FromEventsError::Mismatch { .. }) => {
431            return Err(self::error::FromElementError::Mismatch(from))
432        }
433        Err(self::error::FromEventsError::Invalid(e)) => {
434            return Err(self::error::FromElementError::Invalid(e))
435        }
436    };
437
438    let mut iter = from.as_xml_iter()?;
439    // consume the element header
440    for item in &mut iter {
441        let item = item?;
442        match item {
443            // discard the element header
444            Item::XmlDeclaration(..) => (),
445            Item::ElementHeadStart(..) => (),
446            Item::Attribute(..) => (),
447            Item::ElementHeadEnd => {
448                // now that the element header is over, we break out
449                break;
450            }
451            Item::Text(..) => panic!("text before end of element header"),
452            Item::ElementFoot => panic!("element foot before end of element header"),
453        }
454    }
455    let iter = self::rxml_util::ItemToEvent::new(iter);
456    for event in iter {
457        let event = event?;
458        if let Some(v) = sink.feed(event)? {
459            return Ok(v);
460        }
461    }
462    // unreachable! instead of error here, because minidom::Element always
463    // produces the complete event sequence of a single element, and FromXml
464    // implementations must be constructible from that.
465    unreachable!("minidom::Element did not produce enough events to complete element")
466}
467
468#[cfg(feature = "std")]
469fn map_nonio_error<T>(r: Result<T, io::Error>) -> Result<T, self::error::Error> {
470    match r {
471        Ok(v) => Ok(v),
472        Err(e) => match e.downcast::<rxml::Error>() {
473            Ok(e) => Err(e.into()),
474            Err(_) => unreachable!("I/O error cannot be caused by &[]"),
475        },
476    }
477}
478
479#[cfg(feature = "std")]
480fn read_start_event<I: io::BufRead>(
481    r: &mut rxml::Reader<I>,
482) -> Result<(rxml::QName, rxml::AttrMap), self::error::Error> {
483    for ev in r {
484        match map_nonio_error(ev)? {
485            rxml::Event::XmlDeclaration(_, rxml::XmlVersion::V1_0) => (),
486            rxml::Event::StartElement(_, name, attrs) => return Ok((name, attrs)),
487            _ => {
488                return Err(self::error::Error::Other(
489                    "Unexpected event at start of document",
490                ))
491            }
492        }
493    }
494    Err(self::error::Error::XmlError(rxml::Error::InvalidEof(Some(
495        rxml::error::ErrorContext::DocumentBegin,
496    ))))
497}
498
499/// Attempt to parse a type implementing [`FromXml`] from a byte buffer
500/// containing XML data.
501#[cfg(feature = "std")]
502pub fn from_bytes<T: FromXml>(mut buf: &[u8]) -> Result<T, self::error::Error> {
503    let mut reader = rxml::Reader::new(&mut buf);
504    let (name, attrs) = read_start_event(&mut reader)?;
505    let mut builder = match T::from_events(name, attrs) {
506        Ok(v) => v,
507        Err(self::error::FromEventsError::Mismatch { .. }) => {
508            return Err(self::error::Error::TypeMismatch)
509        }
510        Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
511    };
512    for ev in reader {
513        if let Some(v) = builder.feed(map_nonio_error(ev)?)? {
514            return Ok(v);
515        }
516    }
517    Err(self::error::Error::XmlError(rxml::Error::InvalidEof(None)))
518}
519
520#[cfg(feature = "std")]
521fn read_start_event_io<I: io::BufRead>(
522    r: &mut rxml::Reader<I>,
523) -> io::Result<(rxml::QName, rxml::AttrMap)> {
524    for ev in r {
525        match ev? {
526            rxml::Event::XmlDeclaration(_, rxml::XmlVersion::V1_0) => (),
527            rxml::Event::StartElement(_, name, attrs) => return Ok((name, attrs)),
528            _ => {
529                return Err(io::Error::new(
530                    io::ErrorKind::InvalidData,
531                    self::error::Error::Other("Unexpected event at start of document"),
532                ))
533            }
534        }
535    }
536    Err(io::Error::new(
537        io::ErrorKind::InvalidData,
538        self::error::Error::XmlError(rxml::Error::InvalidEof(Some(
539            rxml::error::ErrorContext::DocumentBegin,
540        ))),
541    ))
542}
543
544/// Attempt to parse a type implementing [`FromXml`] from a reader.
545#[cfg(feature = "std")]
546pub fn from_reader<T: FromXml, R: io::BufRead>(r: R) -> io::Result<T> {
547    let mut reader = rxml::Reader::new(r);
548    let (name, attrs) = read_start_event_io(&mut reader)?;
549    let mut builder = match T::from_events(name, attrs) {
550        Ok(v) => v,
551        Err(self::error::FromEventsError::Mismatch { .. }) => {
552            return Err(self::error::Error::TypeMismatch)
553                .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
554        }
555        Err(self::error::FromEventsError::Invalid(e)) => {
556            return Err(e).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
557        }
558    };
559    for ev in reader {
560        if let Some(v) = builder
561            .feed(ev?)
562            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?
563        {
564            return Ok(v);
565        }
566    }
567    Err(io::Error::new(
568        io::ErrorKind::UnexpectedEof,
569        self::error::Error::XmlError(rxml::Error::InvalidEof(None)),
570    ))
571}
572
573/// Attempt to serialise a type implementing [`AsXml`] to a vector of bytes.
574pub fn to_vec<T: AsXml>(xso: &T) -> Result<Vec<u8>, self::error::Error> {
575    let iter = xso.as_xml_iter()?;
576    let mut writer = rxml::writer::Encoder::new();
577    let mut buf = Vec::new();
578    for item in iter {
579        let item = item?;
580        writer.encode(item.as_rxml_item(), &mut buf)?;
581    }
582    Ok(buf)
583}
584
585/// Return true if the string contains exclusively XML whitespace.
586///
587/// XML whitespace is defined as U+0020 (space), U+0009 (tab), U+000a
588/// (newline) and U+000d (carriage return).
589pub fn is_xml_whitespace<T: AsRef<[u8]>>(s: T) -> bool {
590    s.as_ref()
591        .iter()
592        .all(|b| *b == b' ' || *b == b'\t' || *b == b'\r' || *b == b'\n')
593}