lib.rs

  1#![cfg_attr(docsrs, feature(doc_auto_cfg))]
  2#![cfg_attr(docsrs, feature(doc_cfg))]
  3#![forbid(unsafe_code)]
  4#![warn(missing_docs)]
  5/*!
  6# XML Streamed Objects -- serde-like parsing for XML
  7
  8This crate provides the traits for parsing XML data into Rust structs, and
  9vice versa.
 10
 11While it is in 0.0.x versions, many features still need to be developed, but
 12rest assured that there is a solid plan to get it fully usable for even
 13advanced XML scenarios.
 14
 15XSO is an acronym for XML Stream(ed) Objects, referring to the main field of
 16use of this library in parsing XML streams like specified in RFC 6120.
 17*/
 18
 19// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
 20//
 21// This Source Code Form is subject to the terms of the Mozilla Public
 22// License, v. 2.0. If a copy of the MPL was not distributed with this
 23// file, You can obtain one at http://mozilla.org/MPL/2.0/.
 24
 25use std::io;
 26
 27pub mod asxml;
 28pub mod error;
 29pub mod fromxml;
 30#[cfg(feature = "minidom")]
 31pub mod minidom_compat;
 32mod rxml_util;
 33pub mod text;
 34
 35#[doc(hidden)]
 36pub mod exports {
 37    #[cfg(feature = "minidom")]
 38    pub use minidom;
 39    pub use rxml;
 40}
 41
 42use std::borrow::Cow;
 43
 44#[doc(inline)]
 45pub use text::TextCodec;
 46
 47#[doc(inline)]
 48pub use rxml_util::Item;
 49
 50#[doc = include_str!("from_xml_doc.md")]
 51#[doc(inline)]
 52#[cfg(feature = "macros")]
 53pub use xso_proc::FromXml;
 54
 55/// # Make a struct or enum serialisable to XML
 56///
 57/// This derives the [`AsXml`] trait on a struct or enum. It is the
 58/// counterpart to [`macro@FromXml`].
 59///
 60/// The attributes necessary and available for the derivation to work are
 61/// documented on [`macro@FromXml`].
 62#[doc(inline)]
 63#[cfg(feature = "macros")]
 64pub use xso_proc::AsXml;
 65
 66/// Trait allowing to iterate a struct's contents as serialisable
 67/// [`Item`]s.
 68///
 69/// **Important:** Changing the [`ItemIter`][`Self::ItemIter`] associated
 70/// type is considered a non-breaking change for any given implementation of
 71/// this trait. Always refer to a type's iterator type using fully-qualified
 72/// notation, for example: `<T as xso::AsXml>::ItemIter`.
 73pub trait AsXml {
 74    /// The iterator type.
 75    ///
 76    /// **Important:** Changing this type is considered a non-breaking change
 77    /// for any given implementation of this trait. Always refer to a type's
 78    /// iterator type using fully-qualified notation, for example:
 79    /// `<T as xso::AsXml>::ItemIter`.
 80    type ItemIter<'x>: Iterator<Item = Result<Item<'x>, self::error::Error>>
 81    where
 82        Self: 'x;
 83
 84    /// Return an iterator which emits the contents of the struct or enum as
 85    /// serialisable [`Item`] items.
 86    fn as_xml_iter(&self) -> Result<Self::ItemIter<'_>, self::error::Error>;
 87}
 88
 89/// Trait for a temporary object allowing to construct a struct from
 90/// [`rxml::Event`] items.
 91///
 92/// Objects of this type are generally constructed through
 93/// [`FromXml::from_events`] and are used to build Rust structs or enums from
 94/// XML data. The XML data must be fed as `rxml::Event` to the
 95/// [`feed`][`Self::feed`] method.
 96pub trait FromEventsBuilder {
 97    /// The type which will be constructed by this builder.
 98    type Output;
 99
100    /// Feed another [`rxml::Event`] into the element construction
101    /// process.
102    ///
103    /// Once the construction process completes, `Ok(Some(_))` is returned.
104    /// When valid data has been fed but more events are needed to fully
105    /// construct the resulting struct, `Ok(None)` is returned.
106    ///
107    /// If the construction fails, `Err(_)` is returned. Errors are generally
108    /// fatal and the builder should be assumed to be broken at that point.
109    /// Feeding more events after an error may result in panics, errors or
110    /// inconsistent result data, though it may never result in unsound or
111    /// unsafe behaviour.
112    fn feed(&mut self, ev: rxml::Event) -> Result<Option<Self::Output>, self::error::Error>;
113}
114
115/// Trait allowing to construct a struct from a stream of
116/// [`rxml::Event`] items.
117///
118/// To use this, first call [`FromXml::from_events`] with the qualified
119/// name and the attributes of the corresponding
120/// [`rxml::Event::StartElement`] event. If the call succeeds, the
121/// returned builder object must be fed with the events representing the
122/// contents of the element, and then with the `EndElement` event.
123///
124/// The `StartElement` passed to `from_events` must not be passed to `feed`.
125///
126/// **Important:** Changing the [`Builder`][`Self::Builder`] associated type
127/// is considered a non-breaking change for any given implementation of this
128/// trait. Always refer to a type's builder type using fully-qualified
129/// notation, for example: `<T as xso::FromXml>::Builder`.
130pub trait FromXml {
131    /// A builder type used to construct the element.
132    ///
133    /// **Important:** Changing this type is considered a non-breaking change
134    /// for any given implementation of this trait. Always refer to a type's
135    /// builder type using fully-qualified notation, for example:
136    /// `<T as xso::FromXml>::Builder`.
137    type Builder: FromEventsBuilder<Output = Self>;
138
139    /// Attempt to initiate the streamed construction of this struct from XML.
140    ///
141    /// If the passed qualified `name` and `attrs` match the element's type,
142    /// the [`Self::Builder`] is returned and should be fed with XML events
143    /// by the caller.
144    ///
145    /// Otherwise, an appropriate error is returned.
146    fn from_events(
147        name: rxml::QName,
148        attrs: rxml::AttrMap,
149    ) -> Result<Self::Builder, self::error::FromEventsError>;
150}
151
152/// Trait allowing to convert XML text to a value.
153///
154/// This trait is similar to [`core::str::FromStr`], however, due to
155/// restrictions imposed by the orphan rule, a separate trait is needed.
156/// Implementations for many standard library types are available. In
157/// addition, the following feature flags can enable more implementations:
158///
159/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
160/// - `uuid`: `uuid::Uuid`
161///
162/// Because of this unfortunate situation, we are **extremely liberal** with
163/// accepting optional dependencies for this purpose. You are very welcome to
164/// make merge requests against this crate adding support for parsing
165/// third-party crates.
166pub trait FromXmlText: Sized {
167    /// Convert the given XML text to a value.
168    fn from_xml_text(data: String) -> Result<Self, self::error::Error>;
169}
170
171impl FromXmlText for String {
172    /// Return the string unchanged.
173    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
174        Ok(data)
175    }
176}
177
178impl<T: FromXmlText, B: ToOwned<Owned = T>> FromXmlText for Cow<'_, B> {
179    /// Return a [`Cow::Owned`] containing the parsed value.
180    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
181        Ok(Cow::Owned(T::from_xml_text(data)?))
182    }
183}
184
185impl<T: FromXmlText> FromXmlText for Option<T> {
186    /// Return a [`Some`] containing the parsed value.
187    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
188        Ok(Some(T::from_xml_text(data)?))
189    }
190}
191
192impl<T: FromXmlText> FromXmlText for Box<T> {
193    /// Return a [`Box`] containing the parsed value.
194    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
195        Ok(Box::new(T::from_xml_text(data)?))
196    }
197}
198
199/// Trait to convert a value to an XML text string.
200///
201/// Implementing this trait for a type allows it to be used both for XML
202/// character data within elements and for XML attributes. For XML attributes,
203/// the behaviour is defined by [`AsXmlText::as_optional_xml_text`], while
204/// XML element text content uses [`AsXmlText::as_xml_text`]. Implementing
205/// [`AsXmlText`] automatically provides an implementation of
206/// [`AsOptionalXmlText`].
207///
208/// If your type should only be used in XML attributes and has no correct
209/// serialisation in XML text, you should *only* implement
210/// [`AsOptionalXmlText`] and omit the [`AsXmlText`] implementation.
211///
212/// This trait is implemented for many standard library types implementing
213/// [`core::fmt::Display`]. In addition, the following feature flags can enable
214/// more implementations:
215///
216/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
217/// - `uuid`: `uuid::Uuid`
218///
219/// Because of the unfortunate situation as described in [`FromXmlText`], we
220/// are **extremely liberal** with accepting optional dependencies for this
221/// purpose. You are very welcome to make merge requests against this crate
222/// adding support for parsing third-party crates.
223pub trait AsXmlText {
224    /// Convert the value to an XML string in a context where an absent value
225    /// cannot be represented.
226    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error>;
227
228    /// Convert the value to an XML string in a context where an absent value
229    /// can be represented.
230    ///
231    /// The provided implementation will always return the result of
232    /// [`Self::as_xml_text`] wrapped into `Some(.)`. By re-implementing
233    /// this method, implementors can customize the behaviour for certain
234    /// values.
235    fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
236        Ok(Some(self.as_xml_text()?))
237    }
238}
239
240impl AsXmlText for String {
241    /// Return the borrowed string contents.
242    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
243        Ok(Cow::Borrowed(self.as_str()))
244    }
245}
246
247impl AsXmlText for str {
248    /// Return the borrowed string contents.
249    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
250        Ok(Cow::Borrowed(&*self))
251    }
252}
253
254impl<T: AsXmlText> AsXmlText for Box<T> {
255    /// Return the borrowed [`Box`] contents.
256    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
257        T::as_xml_text(self)
258    }
259}
260
261impl<B: AsXmlText + ToOwned> AsXmlText for Cow<'_, B> {
262    /// Return the borrowed [`Cow`] contents.
263    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
264        B::as_xml_text(self.as_ref())
265    }
266}
267
268impl<T: AsXmlText> AsXmlText for &T {
269    /// Delegate to the `AsXmlText` implementation on `T`.
270    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
271        T::as_xml_text(*self)
272    }
273}
274
275/// Specialized variant of [`AsXmlText`].
276///
277/// Normally, it should not be necessary to implement this trait as it is
278/// automatically implemented for all types implementing [`AsXmlText`].
279/// However, if your type can only be serialised as an XML attribute (for
280/// example because an absent value has a particular meaning), it is correct
281/// to implement [`AsOptionalXmlText`] **instead of** [`AsXmlText`].
282///
283/// If your type can be serialised as both (text and attribute) but needs
284/// special handling in attributes, implement [`AsXmlText`] but provide a
285/// custom implementation of [`AsXmlText::as_optional_xml_text`].
286pub trait AsOptionalXmlText {
287    /// Convert the value to an XML string in a context where an absent value
288    /// can be represented.
289    fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error>;
290}
291
292impl<T: AsXmlText> AsOptionalXmlText for T {
293    fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
294        <Self as AsXmlText>::as_optional_xml_text(self)
295    }
296}
297
298impl<T: AsXmlText> AsOptionalXmlText for Option<T> {
299    fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
300        self.as_ref()
301            .map(T::as_optional_xml_text)
302            .transpose()
303            .map(Option::flatten)
304    }
305}
306
307/// Control how unknown attributes are handled.
308///
309/// The variants of this enum are referenced in the
310/// `#[xml(on_unknown_attribute = ..)]` which can be used on structs and
311/// enum variants. The specified variant controls how attributes, which are
312/// not handled by any member of the compound, are handled during parsing.
313#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default)]
314pub enum UnknownAttributePolicy {
315    /// All unknown attributes are discarded.
316    Discard,
317
318    /// The first unknown attribute which is encountered generates a fatal
319    /// parsing error.
320    ///
321    /// This is the default policy.
322    #[default]
323    Fail,
324}
325
326impl UnknownAttributePolicy {
327    #[doc(hidden)]
328    /// Implementation of the policy.
329    ///
330    /// This is an internal API and not subject to semver versioning.
331    pub fn apply_policy(&self, msg: &'static str) -> Result<(), self::error::Error> {
332        match self {
333            Self::Fail => Err(self::error::Error::Other(msg)),
334            Self::Discard => Ok(()),
335        }
336    }
337}
338
339/// Control how unknown children are handled.
340///
341/// The variants of this enum are referenced in the
342/// `#[xml(on_unknown_child = ..)]` which can be used on structs and
343/// enum variants. The specified variant controls how children, which are not
344/// handled by any member of the compound, are handled during parsing.
345#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default)]
346pub enum UnknownChildPolicy {
347    /// All unknown children are discarded.
348    Discard,
349
350    /// The first unknown child which is encountered generates a fatal
351    /// parsing error.
352    ///
353    /// This is the default policy.
354    #[default]
355    Fail,
356}
357
358impl UnknownChildPolicy {
359    #[doc(hidden)]
360    /// Implementation of the policy.
361    ///
362    /// This is an internal API and not subject to semver versioning.
363    pub fn apply_policy(&self, msg: &'static str) -> Result<(), self::error::Error> {
364        match self {
365            Self::Fail => Err(self::error::Error::Other(msg)),
366            Self::Discard => Ok(()),
367        }
368    }
369}
370
371/// Attempt to transform a type implementing [`AsXml`] into another
372/// type which implements [`FromXml`].
373pub fn transform<T: FromXml, F: AsXml>(from: F) -> Result<T, self::error::Error> {
374    let mut iter = self::rxml_util::ItemToEvent::new(from.as_xml_iter()?);
375    let (qname, attrs) = match iter.next() {
376        Some(Ok(rxml::Event::StartElement(_, qname, attrs))) => (qname, attrs),
377        Some(Err(e)) => return Err(e),
378        _ => panic!("into_event_iter did not start with StartElement event!"),
379    };
380    let mut sink = match T::from_events(qname, attrs) {
381        Ok(v) => v,
382        Err(self::error::FromEventsError::Mismatch { .. }) => {
383            return Err(self::error::Error::TypeMismatch)
384        }
385        Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
386    };
387    for event in iter {
388        let event = event?;
389        if let Some(v) = sink.feed(event)? {
390            return Ok(v);
391        }
392    }
393    Err(self::error::Error::XmlError(rxml::Error::InvalidEof(None)))
394}
395
396/// Attempt to convert a [`minidom::Element`] into a type implementing
397/// [`FromXml`], fallably.
398///
399/// Unlike [`transform`] (which can also be used with an element), this
400/// function will return the element unharmed if its element header does not
401/// match the expectations of `T`.
402#[cfg(feature = "minidom")]
403pub fn try_from_element<T: FromXml>(
404    from: minidom::Element,
405) -> Result<T, self::error::FromElementError> {
406    let (qname, attrs) = minidom_compat::make_start_ev_parts(&from)?;
407    let mut sink = match T::from_events(qname, attrs) {
408        Ok(v) => v,
409        Err(self::error::FromEventsError::Mismatch { .. }) => {
410            return Err(self::error::FromElementError::Mismatch(from))
411        }
412        Err(self::error::FromEventsError::Invalid(e)) => {
413            return Err(self::error::FromElementError::Invalid(e))
414        }
415    };
416
417    let mut iter = from.as_xml_iter()?;
418    // consume the element header
419    for item in &mut iter {
420        let item = item?;
421        match item {
422            // discard the element header
423            Item::XmlDeclaration(..) => (),
424            Item::ElementHeadStart(..) => (),
425            Item::Attribute(..) => (),
426            Item::ElementHeadEnd => {
427                // now that the element header is over, we break out
428                break;
429            }
430            Item::Text(..) => panic!("text before end of element header"),
431            Item::ElementFoot => panic!("element foot before end of element header"),
432        }
433    }
434    let iter = self::rxml_util::ItemToEvent::new(iter);
435    for event in iter {
436        let event = event?;
437        if let Some(v) = sink.feed(event)? {
438            return Ok(v);
439        }
440    }
441    // unreachable! instead of error here, because minidom::Element always
442    // produces the complete event sequence of a single element, and FromXml
443    // implementations must be constructible from that.
444    unreachable!("minidom::Element did not produce enough events to complete element")
445}
446
447fn map_nonio_error<T>(r: Result<T, io::Error>) -> Result<T, self::error::Error> {
448    match r {
449        Ok(v) => Ok(v),
450        Err(e) => match e.downcast::<rxml::Error>() {
451            Ok(e) => Err(e.into()),
452            Err(_) => unreachable!("I/O error cannot be caused by &[]"),
453        },
454    }
455}
456
457fn read_start_event<I: std::io::BufRead>(
458    r: &mut rxml::Reader<I>,
459) -> Result<(rxml::QName, rxml::AttrMap), self::error::Error> {
460    for ev in r {
461        match map_nonio_error(ev)? {
462            rxml::Event::XmlDeclaration(_, rxml::XmlVersion::V1_0) => (),
463            rxml::Event::StartElement(_, name, attrs) => return Ok((name, attrs)),
464            _ => {
465                return Err(self::error::Error::Other(
466                    "Unexpected event at start of document",
467                ))
468            }
469        }
470    }
471    Err(self::error::Error::XmlError(rxml::Error::InvalidEof(Some(
472        rxml::error::ErrorContext::DocumentBegin,
473    ))))
474}
475
476/// Attempt to parse a type implementing [`FromXml`] from a byte buffer
477/// containing XML data.
478pub fn from_bytes<T: FromXml>(mut buf: &[u8]) -> Result<T, self::error::Error> {
479    let mut reader = rxml::Reader::new(&mut buf);
480    let (name, attrs) = read_start_event(&mut reader)?;
481    let mut builder = match T::from_events(name, attrs) {
482        Ok(v) => v,
483        Err(self::error::FromEventsError::Mismatch { .. }) => {
484            return Err(self::error::Error::TypeMismatch)
485        }
486        Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
487    };
488    for ev in reader {
489        if let Some(v) = builder.feed(map_nonio_error(ev)?)? {
490            return Ok(v);
491        }
492    }
493    Err(self::error::Error::XmlError(rxml::Error::InvalidEof(None)))
494}
495
496fn read_start_event_io<I: std::io::BufRead>(
497    r: &mut rxml::Reader<I>,
498) -> io::Result<(rxml::QName, rxml::AttrMap)> {
499    for ev in r {
500        match ev? {
501            rxml::Event::XmlDeclaration(_, rxml::XmlVersion::V1_0) => (),
502            rxml::Event::StartElement(_, name, attrs) => return Ok((name, attrs)),
503            _ => {
504                return Err(io::Error::new(
505                    io::ErrorKind::InvalidData,
506                    self::error::Error::Other("Unexpected event at start of document"),
507                ))
508            }
509        }
510    }
511    Err(io::Error::new(
512        io::ErrorKind::InvalidData,
513        self::error::Error::XmlError(rxml::Error::InvalidEof(Some(
514            rxml::error::ErrorContext::DocumentBegin,
515        ))),
516    ))
517}
518
519/// Attempt to parse a type implementing [`FromXml`] from a reader.
520pub fn from_reader<T: FromXml, R: io::BufRead>(r: R) -> io::Result<T> {
521    let mut reader = rxml::Reader::new(r);
522    let (name, attrs) = read_start_event_io(&mut reader)?;
523    let mut builder = match T::from_events(name, attrs) {
524        Ok(v) => v,
525        Err(self::error::FromEventsError::Mismatch { .. }) => {
526            return Err(self::error::Error::TypeMismatch)
527                .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
528        }
529        Err(self::error::FromEventsError::Invalid(e)) => {
530            return Err(e).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
531        }
532    };
533    for ev in reader {
534        if let Some(v) = builder
535            .feed(ev?)
536            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?
537        {
538            return Ok(v);
539        }
540    }
541    Err(io::Error::new(
542        io::ErrorKind::UnexpectedEof,
543        self::error::Error::XmlError(rxml::Error::InvalidEof(None)),
544    ))
545}
546
547/// Attempt to serialise a type implementing [`AsXml`] to a vector of bytes.
548pub fn to_vec<T: AsXml>(xso: &T) -> Result<Vec<u8>, self::error::Error> {
549    let iter = xso.as_xml_iter()?;
550    let mut writer = rxml::writer::Encoder::new();
551    let mut buf = Vec::new();
552    for item in iter {
553        let item = item?;
554        writer.encode(item.as_rxml_item(), &mut buf)?;
555    }
556    Ok(buf)
557}
558
559/// Return true if the string contains exclusively XML whitespace.
560///
561/// XML whitespace is defined as U+0020 (space), U+0009 (tab), U+000a
562/// (newline) and U+000d (carriage return).
563pub fn is_xml_whitespace<T: AsRef<[u8]>>(s: T) -> bool {
564    s.as_ref()
565        .iter()
566        .all(|b| *b == b' ' || *b == b'\t' || *b == b'\r' || *b == b'\n')
567}