lib.rs

  1#![cfg_attr(docsrs, feature(doc_auto_cfg))]
  2#![cfg_attr(docsrs, feature(doc_cfg))]
  3#![forbid(unsafe_code)]
  4#![warn(missing_docs)]
  5/*!
  6# XML Streamed Objects -- serde-like parsing for XML
  7
  8This crate provides the traits for parsing XML data into Rust structs, and
  9vice versa.
 10
 11While it is in 0.0.x versions, many features still need to be developed, but
 12rest assured that there is a solid plan to get it fully usable for even
 13advanced XML scenarios.
 14
 15XSO is an acronym for XML Stream(ed) Objects, referring to the main field of
 16use of this library in parsing XML streams like specified in RFC 6120.
 17*/
 18
 19// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
 20//
 21// This Source Code Form is subject to the terms of the Mozilla Public
 22// License, v. 2.0. If a copy of the MPL was not distributed with this
 23// file, You can obtain one at http://mozilla.org/MPL/2.0/.
 24pub mod error;
 25#[cfg(feature = "minidom")]
 26pub mod minidom_compat;
 27mod rxml_util;
 28pub mod text;
 29
 30#[doc(hidden)]
 31pub mod exports {
 32    #[cfg(feature = "minidom")]
 33    pub use minidom;
 34    pub use rxml;
 35}
 36
 37use std::borrow::Cow;
 38
 39#[doc(inline)]
 40pub use text::TextCodec;
 41
 42#[doc(inline)]
 43pub use rxml_util::Item;
 44
 45#[doc = include_str!("from_xml_doc.md")]
 46#[doc(inline)]
 47#[cfg(feature = "macros")]
 48pub use xso_proc::FromXml;
 49
 50/// # Make a struct or enum serialisable to XML
 51///
 52/// This derives the [`AsXml`] trait on a struct or enum. It is the
 53/// counterpart to [`macro@FromXml`].
 54///
 55/// The attributes necessary and available for the derivation to work are
 56/// documented on [`macro@FromXml`].
 57#[doc(inline)]
 58#[cfg(feature = "macros")]
 59pub use xso_proc::AsXml;
 60
 61/// Trait allowing to iterate a struct's contents as serialisable
 62/// [`Item`]s.
 63///
 64/// **Important:** Changing the [`ItemIter`][`Self::ItemIter`] associated
 65/// type is considered a non-breaking change for any given implementation of
 66/// this trait. Always refer to a type's iterator type using fully-qualified
 67/// notation, for example: `<T as xso::AsXml>::ItemIter`.
 68pub trait AsXml {
 69    /// The iterator type.
 70    ///
 71    /// **Important:** Changing this type is considered a non-breaking change
 72    /// for any given implementation of this trait. Always refer to a type's
 73    /// iterator type using fully-qualified notation, for example:
 74    /// `<T as xso::AsXml>::ItemIter`.
 75    type ItemIter<'x>: Iterator<Item = Result<Item<'x>, self::error::Error>>
 76    where
 77        Self: 'x;
 78
 79    /// Return an iterator which emits the contents of the struct or enum as
 80    /// serialisable [`Item`] items.
 81    fn as_xml_iter(&self) -> Result<Self::ItemIter<'_>, self::error::Error>;
 82}
 83
 84/// Helper iterator to convert an `Option<T>` to XML.
 85pub struct OptionAsXml<T: Iterator>(Option<T>);
 86
 87impl<T: Iterator> OptionAsXml<T> {
 88    /// Construct a new iterator, wrapping the given iterator.
 89    ///
 90    /// If `inner` is `None`, this iterator terminates immediately. Otherwise,
 91    /// it yields the elements yielded by `inner` until `inner` finishes,
 92    /// after which this iterator completes, too.
 93    pub fn new(inner: Option<T>) -> Self {
 94        Self(inner)
 95    }
 96}
 97
 98impl<'x, T: Iterator<Item = Result<Item<'x>, self::error::Error>>> Iterator for OptionAsXml<T> {
 99    type Item = Result<Item<'x>, self::error::Error>;
100
101    fn next(&mut self) -> Option<Self::Item> {
102        self.0.as_mut()?.next()
103    }
104}
105
106/// Helper iterator to convert an `Box<T>` to XML.
107pub struct BoxAsXml<T: Iterator>(Box<T>);
108
109impl<'x, T: Iterator<Item = Result<Item<'x>, self::error::Error>>> Iterator for BoxAsXml<T> {
110    type Item = Result<Item<'x>, self::error::Error>;
111
112    fn next(&mut self) -> Option<Self::Item> {
113        self.0.next()
114    }
115}
116
117impl<T: AsXml> AsXml for Option<T> {
118    type ItemIter<'x> = OptionAsXml<T::ItemIter<'x>> where T: 'x;
119
120    fn as_xml_iter(&self) -> Result<Self::ItemIter<'_>, self::error::Error> {
121        match self {
122            Some(ref value) => Ok(OptionAsXml(Some(T::as_xml_iter(value)?))),
123            None => Ok(OptionAsXml(None)),
124        }
125    }
126}
127
128impl<T: AsXml> AsXml for Box<T> {
129    type ItemIter<'x> = BoxAsXml<T::ItemIter<'x>> where T: 'x;
130
131    fn as_xml_iter(&self) -> Result<Self::ItemIter<'_>, self::error::Error> {
132        Ok(BoxAsXml(Box::new(T::as_xml_iter(&self)?)))
133    }
134}
135
136/// Trait for a temporary object allowing to construct a struct from
137/// [`rxml::Event`] items.
138///
139/// Objects of this type are generally constructed through
140/// [`FromXml::from_events`] and are used to build Rust structs or enums from
141/// XML data. The XML data must be fed as `rxml::Event` to the
142/// [`feed`][`Self::feed`] method.
143pub trait FromEventsBuilder {
144    /// The type which will be constructed by this builder.
145    type Output;
146
147    /// Feed another [`rxml::Event`] into the element construction
148    /// process.
149    ///
150    /// Once the construction process completes, `Ok(Some(_))` is returned.
151    /// When valid data has been fed but more events are needed to fully
152    /// construct the resulting struct, `Ok(None)` is returned.
153    ///
154    /// If the construction fails, `Err(_)` is returned. Errors are generally
155    /// fatal and the builder should be assumed to be broken at that point.
156    /// Feeding more events after an error may result in panics, errors or
157    /// inconsistent result data, though it may never result in unsound or
158    /// unsafe behaviour.
159    fn feed(&mut self, ev: rxml::Event) -> Result<Option<Self::Output>, self::error::Error>;
160}
161
162/// Helper struct to construct an `Option<T>` from XML events.
163pub struct OptionBuilder<T: FromEventsBuilder>(T);
164
165/// Helper struct to construct an `Box<T>` from XML events.
166pub struct BoxBuilder<T: FromEventsBuilder>(Box<T>);
167
168impl<T: FromEventsBuilder> FromEventsBuilder for OptionBuilder<T> {
169    type Output = Option<T::Output>;
170
171    fn feed(&mut self, ev: rxml::Event) -> Result<Option<Self::Output>, self::error::Error> {
172        self.0.feed(ev).map(|ok| ok.map(|value| Some(value)))
173    }
174}
175
176impl<T: FromEventsBuilder> FromEventsBuilder for BoxBuilder<T> {
177    type Output = Box<T::Output>;
178
179    fn feed(&mut self, ev: rxml::Event) -> Result<Option<Self::Output>, self::error::Error> {
180        self.0.feed(ev).map(|ok| ok.map(|value| Box::new(value)))
181    }
182}
183
184/// Trait allowing to construct a struct from a stream of
185/// [`rxml::Event`] items.
186///
187/// To use this, first call [`FromXml::from_events`] with the qualified
188/// name and the attributes of the corresponding
189/// [`rxml::Event::StartElement`] event. If the call succeeds, the
190/// returned builder object must be fed with the events representing the
191/// contents of the element, and then with the `EndElement` event.
192///
193/// The `StartElement` passed to `from_events` must not be passed to `feed`.
194///
195/// **Important:** Changing the [`Builder`][`Self::Builder`] associated type
196/// is considered a non-breaking change for any given implementation of this
197/// trait. Always refer to a type's builder type using fully-qualified
198/// notation, for example: `<T as xso::FromXml>::Builder`.
199pub trait FromXml {
200    /// A builder type used to construct the element.
201    ///
202    /// **Important:** Changing this type is considered a non-breaking change
203    /// for any given implementation of this trait. Always refer to a type's
204    /// builder type using fully-qualified notation, for example:
205    /// `<T as xso::FromXml>::Builder`.
206    type Builder: FromEventsBuilder<Output = Self>;
207
208    /// Attempt to initiate the streamed construction of this struct from XML.
209    ///
210    /// If the passed qualified `name` and `attrs` match the element's type,
211    /// the [`Self::Builder`] is returned and should be fed with XML events
212    /// by the caller.
213    ///
214    /// Otherwise, an appropriate error is returned.
215    fn from_events(
216        name: rxml::QName,
217        attrs: rxml::AttrMap,
218    ) -> Result<Self::Builder, self::error::FromEventsError>;
219}
220
221impl<T: FromXml> FromXml for Option<T> {
222    type Builder = OptionBuilder<T::Builder>;
223
224    fn from_events(
225        name: rxml::QName,
226        attrs: rxml::AttrMap,
227    ) -> Result<Self::Builder, self::error::FromEventsError> {
228        Ok(OptionBuilder(T::from_events(name, attrs)?))
229    }
230}
231
232impl<T: FromXml> FromXml for Box<T> {
233    type Builder = BoxBuilder<T::Builder>;
234
235    fn from_events(
236        name: rxml::QName,
237        attrs: rxml::AttrMap,
238    ) -> Result<Self::Builder, self::error::FromEventsError> {
239        Ok(BoxBuilder(Box::new(T::from_events(name, attrs)?)))
240    }
241}
242
243/// Trait allowing to convert XML text to a value.
244///
245/// This trait is similar to [`core::str::FromStr`], however, due to
246/// restrictions imposed by the orphan rule, a separate trait is needed.
247/// Implementations for many standard library types are available. In
248/// addition, the following feature flags can enable more implementations:
249///
250/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
251/// - `uuid`: `uuid::Uuid`
252///
253/// Because of this unfortunate situation, we are **extremely liberal** with
254/// accepting optional dependencies for this purpose. You are very welcome to
255/// make merge requests against this crate adding support for parsing
256/// third-party crates.
257pub trait FromXmlText: Sized {
258    /// Convert the given XML text to a value.
259    fn from_xml_text(data: String) -> Result<Self, self::error::Error>;
260}
261
262impl FromXmlText for String {
263    /// Return the string unchanged.
264    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
265        Ok(data)
266    }
267}
268
269impl<T: FromXmlText, B: ToOwned<Owned = T>> FromXmlText for Cow<'_, B> {
270    /// Return a [`Cow::Owned`] containing the parsed value.
271    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
272        Ok(Cow::Owned(T::from_xml_text(data)?))
273    }
274}
275
276impl<T: FromXmlText> FromXmlText for Option<T> {
277    /// Return a [`Some`] containing the parsed value.
278    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
279        Ok(Some(T::from_xml_text(data)?))
280    }
281}
282
283impl<T: FromXmlText> FromXmlText for Box<T> {
284    /// Return a [`Box`] containing the parsed value.
285    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
286        Ok(Box::new(T::from_xml_text(data)?))
287    }
288}
289
290/// Trait to convert a value to an XML text string.
291///
292/// This trait is implemented for many standard library types implementing
293/// [`core::fmt::Display`]. In addition, the following feature flags can enable
294/// more implementations:
295///
296/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
297/// - `uuid`: `uuid::Uuid`
298///
299/// Because of the unfortunate situation as described in [`FromXmlText`], we
300/// are **extremely liberal** with accepting optional dependencies for this
301/// purpose. You are very welcome to make merge requests against this crate
302/// adding support for parsing third-party crates.
303pub trait AsXmlText {
304    /// Convert the value to an XML string in a context where an absent value
305    /// cannot be represented.
306    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error>;
307
308    /// Convert the value to an XML string in a context where an absent value
309    /// can be represented.
310    ///
311    /// The provided implementation will always return the result of
312    /// [`Self::as_xml_text`] wrapped into `Some(.)`. By re-implementing
313    /// this method, implementors can customize the behaviour for certain
314    /// values.
315    fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
316        Ok(Some(self.as_xml_text()?))
317    }
318}
319
320impl AsXmlText for String {
321    /// Return the borrowed string contents.
322    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
323        Ok(Cow::Borrowed(self.as_str()))
324    }
325}
326
327impl AsXmlText for str {
328    /// Return the borrowed string contents.
329    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
330        Ok(Cow::Borrowed(&*self))
331    }
332}
333
334impl<T: AsXmlText> AsXmlText for Box<T> {
335    /// Return the borrowed [`Box`] contents.
336    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
337        T::as_xml_text(self)
338    }
339}
340
341impl<B: AsXmlText + ToOwned> AsXmlText for Cow<'_, B> {
342    /// Return the borrowed [`Cow`] contents.
343    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
344        B::as_xml_text(self.as_ref())
345    }
346}
347
348impl<T: AsXmlText> AsXmlText for &T {
349    /// Delegate to the `AsXmlText` implementation on `T`.
350    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
351        T::as_xml_text(*self)
352    }
353}
354
355/// Specialized variant of [`AsXmlText`].
356///
357/// Do **not** implement this unless you cannot implement [`AsXmlText`]:
358/// implementing [`AsXmlText`] is more versatile and an
359/// [`AsOptionalXmlText`] implementation is automatically provided.
360///
361/// If you need to customize the behaviour of the [`AsOptionalXmlText`]
362/// blanket implementation, implement a custom
363/// [`AsXmlText::as_optional_xml_text`] instead.
364pub trait AsOptionalXmlText {
365    /// Convert the value to an XML string in a context where an absent value
366    /// can be represented.
367    fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error>;
368}
369
370impl<T: AsXmlText> AsOptionalXmlText for T {
371    fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
372        <Self as AsXmlText>::as_optional_xml_text(self)
373    }
374}
375
376impl<T: AsXmlText> AsOptionalXmlText for Option<T> {
377    fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
378        self.as_ref()
379            .map(T::as_optional_xml_text)
380            .transpose()
381            .map(Option::flatten)
382    }
383}
384
385/// Attempt to transform a type implementing [`AsXml`] into another
386/// type which implements [`FromXml`].
387pub fn transform<T: FromXml, F: AsXml>(from: F) -> Result<T, self::error::Error> {
388    let mut iter = self::rxml_util::ItemToEvent::new(from.as_xml_iter()?);
389    let (qname, attrs) = match iter.next() {
390        Some(Ok(rxml::Event::StartElement(_, qname, attrs))) => (qname, attrs),
391        Some(Err(e)) => return Err(e),
392        _ => panic!("into_event_iter did not start with StartElement event!"),
393    };
394    let mut sink = match T::from_events(qname, attrs) {
395        Ok(v) => v,
396        Err(self::error::FromEventsError::Mismatch { .. }) => {
397            return Err(self::error::Error::TypeMismatch)
398        }
399        Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
400    };
401    for event in iter {
402        let event = event?;
403        if let Some(v) = sink.feed(event)? {
404            return Ok(v);
405        }
406    }
407    Err(self::error::Error::XmlError(
408        rxml::error::XmlError::InvalidEof("during transform"),
409    ))
410}
411
412/// Attempt to convert a [`minidom::Element`] into a type implementing
413/// [`FromXml`], fallably.
414///
415/// Unlike [`transform`] (which can also be used with an element), this
416/// function will return the element unharmed if its element header does not
417/// match the expectations of `T`.
418#[cfg(feature = "minidom")]
419pub fn try_from_element<T: FromXml>(
420    from: minidom::Element,
421) -> Result<T, self::error::FromElementError> {
422    let (qname, attrs) = minidom_compat::make_start_ev_parts(&from)?;
423    let mut sink = match T::from_events(qname, attrs) {
424        Ok(v) => v,
425        Err(self::error::FromEventsError::Mismatch { .. }) => {
426            return Err(self::error::FromElementError::Mismatch(from))
427        }
428        Err(self::error::FromEventsError::Invalid(e)) => {
429            return Err(self::error::FromElementError::Invalid(e))
430        }
431    };
432
433    let mut iter = from.as_xml_iter()?;
434    // consume the element header
435    for item in &mut iter {
436        let item = item?;
437        match item {
438            // discard the element header
439            Item::XmlDeclaration(..) => (),
440            Item::ElementHeadStart(..) => (),
441            Item::Attribute(..) => (),
442            Item::ElementHeadEnd => {
443                // now that the element header is over, we break out
444                break;
445            }
446            Item::Text(..) => panic!("text before end of element header"),
447            Item::ElementFoot => panic!("element foot before end of element header"),
448        }
449    }
450    let iter = self::rxml_util::ItemToEvent::new(iter);
451    for event in iter {
452        let event = event?;
453        if let Some(v) = sink.feed(event)? {
454            return Ok(v);
455        }
456    }
457    // unreachable! instead of error here, because minidom::Element always
458    // produces the complete event sequence of a single element, and FromXml
459    // implementations must be constructible from that.
460    unreachable!("minidom::Element did not produce enough events to complete element")
461}
462
463fn map_nonio_error<T>(r: Result<T, rxml::Error>) -> Result<T, self::error::Error> {
464    match r {
465        Ok(v) => Ok(v),
466        Err(rxml::Error::IO(_)) => unreachable!(),
467        Err(rxml::Error::Xml(e)) => Err(e.into()),
468        Err(rxml::Error::InvalidUtf8Byte(_)) => Err(self::error::Error::Other("invalid utf-8")),
469        Err(rxml::Error::InvalidChar(_)) => {
470            Err(self::error::Error::Other("non-character encountered"))
471        }
472        Err(rxml::Error::RestrictedXml(_)) => Err(self::error::Error::Other("restricted xml")),
473    }
474}
475
476fn read_start_event<I: std::io::BufRead>(
477    r: &mut rxml::Reader<I>,
478) -> Result<(rxml::QName, rxml::AttrMap), self::error::Error> {
479    for ev in r {
480        match map_nonio_error(ev)? {
481            rxml::Event::XmlDeclaration(_, rxml::XmlVersion::V1_0) => (),
482            rxml::Event::StartElement(_, name, attrs) => return Ok((name, attrs)),
483            _ => {
484                return Err(self::error::Error::Other(
485                    "Unexpected event at start of document",
486                ))
487            }
488        }
489    }
490    Err(self::error::Error::XmlError(
491        rxml::error::XmlError::InvalidEof("before start of element"),
492    ))
493}
494
495/// Attempt to parse a type implementing [`FromXml`] from a byte buffer
496/// containing XML data.
497pub fn from_bytes<T: FromXml>(mut buf: &[u8]) -> Result<T, self::error::Error> {
498    let mut reader = rxml::Reader::new(&mut buf);
499    let (name, attrs) = read_start_event(&mut reader)?;
500    let mut builder = match T::from_events(name, attrs) {
501        Ok(v) => v,
502        Err(self::error::FromEventsError::Mismatch { .. }) => {
503            return Err(self::error::Error::TypeMismatch)
504        }
505        Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
506    };
507    for ev in reader {
508        if let Some(v) = builder.feed(map_nonio_error(ev)?)? {
509            return Ok(v);
510        }
511    }
512    Err(self::error::Error::XmlError(
513        rxml::error::XmlError::InvalidEof("while parsing FromXml impl"),
514    ))
515}