lib.rs

  1#![cfg_attr(docsrs, feature(doc_cfg))]
  2#![forbid(unsafe_code)]
  3#![warn(missing_docs)]
  4/*!
  5# XML Streamed Objects -- serde-like parsing for XML
  6
  7This crate provides the traits for parsing XML data into Rust structs, and
  8vice versa.
  9
 10While it is in 0.0.x versions, many features still need to be developed, but
 11rest assured that there is a solid plan to get it fully usable for even
 12advanced XML scenarios.
 13
 14XSO is an acronym for XML Stream(ed) Objects, referring to the main field of
 15use of this library in parsing XML streams like specified in RFC 6120.
 16*/
 17
 18// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
 19//
 20// This Source Code Form is subject to the terms of the Mozilla Public
 21// License, v. 2.0. If a copy of the MPL was not distributed with this
 22// file, You can obtain one at http://mozilla.org/MPL/2.0/.
 23pub mod error;
 24#[cfg(feature = "minidom")]
 25#[cfg_attr(docsrs, doc(cfg(feature = "minidom")))]
 26pub mod minidom_compat;
 27mod rxml_util;
 28pub mod text;
 29
 30#[doc(hidden)]
 31pub mod exports {
 32    #[cfg(feature = "minidom")]
 33    pub use minidom;
 34    pub use rxml;
 35}
 36
 37use std::borrow::Cow;
 38
 39#[doc(inline)]
 40pub use text::TextCodec;
 41
 42#[doc(inline)]
 43pub use rxml_util::Item;
 44
 45#[doc = include_str!("from_xml_doc.md")]
 46#[doc(inline)]
 47#[cfg(feature = "macros")]
 48#[cfg_attr(docsrs, doc(cfg(feature = "macros")))]
 49pub use xso_proc::FromXml;
 50
 51/// # Make a struct or enum serialisable to XML
 52///
 53/// This derives the [`AsXml`] trait on a struct or enum. It is the
 54/// counterpart to [`macro@FromXml`].
 55///
 56/// The attributes necessary and available for the derivation to work are
 57/// documented on [`macro@FromXml`].
 58#[doc(inline)]
 59#[cfg(feature = "macros")]
 60#[cfg_attr(docsrs, doc(cfg(feature = "macros")))]
 61pub use xso_proc::AsXml;
 62
 63/// Trait allowing to iterate a struct's contents as serialisable
 64/// [`Item`]s.
 65///
 66/// **Important:** Changing the [`ItemIter`][`Self::ItemIter`] associated
 67/// type is considered a non-breaking change for any given implementation of
 68/// this trait. Always refer to a type's iterator type using fully-qualified
 69/// notation, for example: `<T as xso::AsXml>::ItemIter`.
 70pub trait AsXml {
 71    /// The iterator type.
 72    ///
 73    /// **Important:** Changing this type is considered a non-breaking change
 74    /// for any given implementation of this trait. Always refer to a type's
 75    /// iterator type using fully-qualified notation, for example:
 76    /// `<T as xso::AsXml>::ItemIter`.
 77    type ItemIter<'x>: Iterator<Item = Result<Item<'x>, self::error::Error>>
 78    where
 79        Self: 'x;
 80
 81    /// Return an iterator which emits the contents of the struct or enum as
 82    /// serialisable [`Item`] items.
 83    fn as_xml_iter(&self) -> Result<Self::ItemIter<'_>, self::error::Error>;
 84}
 85
 86/// Helper iterator to convert an `Option<T>` to XML.
 87pub struct OptionAsXml<T: Iterator>(Option<T>);
 88
 89impl<'x, T: Iterator<Item = Result<Item<'x>, self::error::Error>>> Iterator for OptionAsXml<T> {
 90    type Item = Result<Item<'x>, self::error::Error>;
 91
 92    fn next(&mut self) -> Option<Self::Item> {
 93        self.0.as_mut()?.next()
 94    }
 95}
 96
 97/// Helper iterator to convert an `Box<T>` to XML.
 98pub struct BoxAsXml<T: Iterator>(Box<T>);
 99
100impl<'x, T: Iterator<Item = Result<Item<'x>, self::error::Error>>> Iterator for BoxAsXml<T> {
101    type Item = Result<Item<'x>, self::error::Error>;
102
103    fn next(&mut self) -> Option<Self::Item> {
104        self.0.next()
105    }
106}
107
108impl<T: AsXml> AsXml for Option<T> {
109    type ItemIter<'x> = OptionAsXml<T::ItemIter<'x>> where T: 'x;
110
111    fn as_xml_iter(&self) -> Result<Self::ItemIter<'_>, self::error::Error> {
112        match self {
113            Some(ref value) => Ok(OptionAsXml(Some(T::as_xml_iter(value)?))),
114            None => Ok(OptionAsXml(None)),
115        }
116    }
117}
118
119impl<T: AsXml> AsXml for Box<T> {
120    type ItemIter<'x> = BoxAsXml<T::ItemIter<'x>> where T: 'x;
121
122    fn as_xml_iter(&self) -> Result<Self::ItemIter<'_>, self::error::Error> {
123        Ok(BoxAsXml(Box::new(T::as_xml_iter(&self)?)))
124    }
125}
126
127/// Trait for a temporary object allowing to construct a struct from
128/// [`rxml::Event`] items.
129///
130/// Objects of this type are generally constructed through
131/// [`FromXml::from_events`] and are used to build Rust structs or enums from
132/// XML data. The XML data must be fed as `rxml::Event` to the
133/// [`feed`][`Self::feed`] method.
134pub trait FromEventsBuilder {
135    /// The type which will be constructed by this builder.
136    type Output;
137
138    /// Feed another [`rxml::Event`] into the element construction
139    /// process.
140    ///
141    /// Once the construction process completes, `Ok(Some(_))` is returned.
142    /// When valid data has been fed but more events are needed to fully
143    /// construct the resulting struct, `Ok(None)` is returned.
144    ///
145    /// If the construction fails, `Err(_)` is returned. Errors are generally
146    /// fatal and the builder should be assumed to be broken at that point.
147    /// Feeding more events after an error may result in panics, errors or
148    /// inconsistent result data, though it may never result in unsound or
149    /// unsafe behaviour.
150    fn feed(&mut self, ev: rxml::Event) -> Result<Option<Self::Output>, self::error::Error>;
151}
152
153/// Helper struct to construct an `Option<T>` from XML events.
154pub struct OptionBuilder<T: FromEventsBuilder>(T);
155
156/// Helper struct to construct an `Box<T>` from XML events.
157pub struct BoxBuilder<T: FromEventsBuilder>(Box<T>);
158
159impl<T: FromEventsBuilder> FromEventsBuilder for OptionBuilder<T> {
160    type Output = Option<T::Output>;
161
162    fn feed(&mut self, ev: rxml::Event) -> Result<Option<Self::Output>, self::error::Error> {
163        self.0.feed(ev).map(|ok| ok.map(|value| Some(value)))
164    }
165}
166
167impl<T: FromEventsBuilder> FromEventsBuilder for BoxBuilder<T> {
168    type Output = Box<T::Output>;
169
170    fn feed(&mut self, ev: rxml::Event) -> Result<Option<Self::Output>, self::error::Error> {
171        self.0.feed(ev).map(|ok| ok.map(|value| Box::new(value)))
172    }
173}
174
175/// Trait allowing to construct a struct from a stream of
176/// [`rxml::Event`] items.
177///
178/// To use this, first call [`FromXml::from_events`] with the qualified
179/// name and the attributes of the corresponding
180/// [`rxml::Event::StartElement`] event. If the call succeeds, the
181/// returned builder object must be fed with the events representing the
182/// contents of the element, and then with the `EndElement` event.
183///
184/// The `StartElement` passed to `from_events` must not be passed to `feed`.
185///
186/// **Important:** Changing the [`Builder`][`Self::Builder`] associated type
187/// is considered a non-breaking change for any given implementation of this
188/// trait. Always refer to a type's builder type using fully-qualified
189/// notation, for example: `<T as xso::FromXml>::Builder`.
190pub trait FromXml {
191    /// A builder type used to construct the element.
192    ///
193    /// **Important:** Changing this type is considered a non-breaking change
194    /// for any given implementation of this trait. Always refer to a type's
195    /// builder type using fully-qualified notation, for example:
196    /// `<T as xso::FromXml>::Builder`.
197    type Builder: FromEventsBuilder<Output = Self>;
198
199    /// Attempt to initiate the streamed construction of this struct from XML.
200    ///
201    /// If the passed qualified `name` and `attrs` match the element's type,
202    /// the [`Self::Builder`] is returned and should be fed with XML events
203    /// by the caller.
204    ///
205    /// Otherwise, an appropriate error is returned.
206    fn from_events(
207        name: rxml::QName,
208        attrs: rxml::AttrMap,
209    ) -> Result<Self::Builder, self::error::FromEventsError>;
210}
211
212impl<T: FromXml> FromXml for Option<T> {
213    type Builder = OptionBuilder<T::Builder>;
214
215    fn from_events(
216        name: rxml::QName,
217        attrs: rxml::AttrMap,
218    ) -> Result<Self::Builder, self::error::FromEventsError> {
219        Ok(OptionBuilder(T::from_events(name, attrs)?))
220    }
221}
222
223impl<T: FromXml> FromXml for Box<T> {
224    type Builder = BoxBuilder<T::Builder>;
225
226    fn from_events(
227        name: rxml::QName,
228        attrs: rxml::AttrMap,
229    ) -> Result<Self::Builder, self::error::FromEventsError> {
230        Ok(BoxBuilder(Box::new(T::from_events(name, attrs)?)))
231    }
232}
233
234/// Trait allowing to convert XML text to a value.
235///
236/// This trait is similar to [`std::str::FromStr`], however, due to
237/// restrictions imposed by the orphan rule, a separate trait is needed.
238/// Implementations for many standard library types are available. In
239/// addition, the following feature flags can enable more implementations:
240///
241/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
242/// - `uuid`: `uuid::Uuid`
243///
244/// Because of this unfortunate situation, we are **extremely liberal** with
245/// accepting optional dependencies for this purpose. You are very welcome to
246/// make merge requests against this crate adding support for parsing
247/// third-party crates.
248pub trait FromXmlText: Sized {
249    /// Convert the given XML text to a value.
250    fn from_xml_text(data: String) -> Result<Self, self::error::Error>;
251}
252
253impl FromXmlText for String {
254    /// Return the string unchanged.
255    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
256        Ok(data)
257    }
258}
259
260impl<T: FromXmlText, B: ToOwned<Owned = T>> FromXmlText for Cow<'_, B> {
261    /// Return a [`Cow::Owned`] containing the parsed value.
262    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
263        Ok(Cow::Owned(T::from_xml_text(data)?))
264    }
265}
266
267impl<T: FromXmlText> FromXmlText for Option<T> {
268    /// Return a [`Some`] containing the parsed value.
269    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
270        Ok(Some(T::from_xml_text(data)?))
271    }
272}
273
274impl<T: FromXmlText> FromXmlText for Box<T> {
275    /// Return a [`Box`] containing the parsed value.
276    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
277        Ok(Box::new(T::from_xml_text(data)?))
278    }
279}
280
281/// Trait to convert a value to an XML text string.
282///
283/// This trait is implemented for many standard library types implementing
284/// [`std::fmt::Display`]. In addition, the following feature flags can enable
285/// more implementations:
286///
287/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
288/// - `uuid`: `uuid::Uuid`
289///
290/// Because of the unfortunate situation as described in [`FromXmlText`], we
291/// are **extremely liberal** with accepting optional dependencies for this
292/// purpose. You are very welcome to make merge requests against this crate
293/// adding support for parsing third-party crates.
294pub trait AsXmlText {
295    /// Convert the value to an XML string in a context where an absent value
296    /// cannot be represented.
297    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error>;
298
299    /// Convert the value to an XML string in a context where an absent value
300    /// can be represented.
301    ///
302    /// The provided implementation will always return the result of
303    /// [`Self::as_xml_text`] wrapped into `Some(.)`. By re-implementing
304    /// this method, implementors can customize the behaviour for certain
305    /// values.
306    fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
307        Ok(Some(self.as_xml_text()?))
308    }
309}
310
311impl AsXmlText for String {
312    /// Return the borrowed string contents.
313    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
314        Ok(Cow::Borrowed(self.as_str()))
315    }
316}
317
318impl AsXmlText for str {
319    /// Return the borrowed string contents.
320    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
321        Ok(Cow::Borrowed(&*self))
322    }
323}
324
325impl<T: AsXmlText> AsXmlText for Box<T> {
326    /// Return the borrowed [`Box`] contents.
327    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
328        T::as_xml_text(self)
329    }
330}
331
332impl<B: AsXmlText + ToOwned> AsXmlText for Cow<'_, B> {
333    /// Return the borrowed [`Cow`] contents.
334    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
335        B::as_xml_text(self.as_ref())
336    }
337}
338
339impl<T: AsXmlText> AsXmlText for &T {
340    /// Delegate to the `AsXmlText` implementation on `T`.
341    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
342        T::as_xml_text(*self)
343    }
344}
345
346/// Specialized variant of [`AsXmlText`].
347///
348/// Do **not** implement this unless you cannot implement [`AsXmlText`]:
349/// implementing [`AsXmlText`] is more versatile and an
350/// [`AsOptionalXmlText`] implementation is automatically provided.
351///
352/// If you need to customize the behaviour of the [`AsOptionalXmlText`]
353/// blanket implementation, implement a custom
354/// [`AsXmlText::as_optional_xml_text`] instead.
355pub trait AsOptionalXmlText {
356    /// Convert the value to an XML string in a context where an absent value
357    /// can be represented.
358    fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error>;
359}
360
361impl<T: AsXmlText> AsOptionalXmlText for T {
362    fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
363        <Self as AsXmlText>::as_optional_xml_text(self)
364    }
365}
366
367impl<T: AsXmlText> AsOptionalXmlText for Option<T> {
368    fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
369        self.as_ref()
370            .map(T::as_optional_xml_text)
371            .transpose()
372            .map(Option::flatten)
373    }
374}
375
376/// Attempt to transform a type implementing [`AsXml`] into another
377/// type which implements [`FromXml`].
378pub fn transform<T: FromXml, F: AsXml>(from: F) -> Result<T, self::error::Error> {
379    let mut iter = self::rxml_util::ItemToEvent::new(from.as_xml_iter()?);
380    let (qname, attrs) = match iter.next() {
381        Some(Ok(rxml::Event::StartElement(_, qname, attrs))) => (qname, attrs),
382        Some(Err(e)) => return Err(e),
383        _ => panic!("into_event_iter did not start with StartElement event!"),
384    };
385    let mut sink = match T::from_events(qname, attrs) {
386        Ok(v) => v,
387        Err(self::error::FromEventsError::Mismatch { .. }) => {
388            return Err(self::error::Error::TypeMismatch)
389        }
390        Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
391    };
392    for event in iter {
393        let event = event?;
394        if let Some(v) = sink.feed(event)? {
395            return Ok(v);
396        }
397    }
398    Err(self::error::Error::XmlError(
399        rxml::error::XmlError::InvalidEof("during transform"),
400    ))
401}
402
403/// Attempt to convert a [`minidom::Element`] into a type implementing
404/// [`FromXml`], fallably.
405///
406/// Unlike [`transform`] (which can also be used with an element), this
407/// function will return the element unharmed if its element header does not
408/// match the expectations of `T`.
409#[cfg(feature = "minidom")]
410#[cfg_attr(docsrs, doc(cfg(feature = "minidom")))]
411pub fn try_from_element<T: FromXml>(
412    from: minidom::Element,
413) -> Result<T, self::error::FromElementError> {
414    let (qname, attrs) = minidom_compat::make_start_ev_parts(&from)?;
415    let mut sink = match T::from_events(qname, attrs) {
416        Ok(v) => v,
417        Err(self::error::FromEventsError::Mismatch { .. }) => {
418            return Err(self::error::FromElementError::Mismatch(from))
419        }
420        Err(self::error::FromEventsError::Invalid(e)) => {
421            return Err(self::error::FromElementError::Invalid(e))
422        }
423    };
424
425    let mut iter = from.as_xml_iter()?;
426    // consume the element header
427    for item in &mut iter {
428        let item = item?;
429        match item {
430            // discard the element header
431            Item::XmlDeclaration(..) => (),
432            Item::ElementHeadStart(..) => (),
433            Item::Attribute(..) => (),
434            Item::ElementHeadEnd => {
435                // now that the element header is over, we break out
436                break;
437            }
438            Item::Text(..) => panic!("text before end of element header"),
439            Item::ElementFoot => panic!("element foot before end of element header"),
440        }
441    }
442    let iter = self::rxml_util::ItemToEvent::new(iter);
443    for event in iter {
444        let event = event?;
445        if let Some(v) = sink.feed(event)? {
446            return Ok(v);
447        }
448    }
449    // unreachable! instead of error here, because minidom::Element always
450    // produces the complete event sequence of a single element, and FromXml
451    // implementations must be constructible from that.
452    unreachable!("minidom::Element did not produce enough events to complete element")
453}
454
455fn map_nonio_error<T>(r: Result<T, rxml::Error>) -> Result<T, self::error::Error> {
456    match r {
457        Ok(v) => Ok(v),
458        Err(rxml::Error::IO(_)) => unreachable!(),
459        Err(rxml::Error::Xml(e)) => Err(e.into()),
460        Err(rxml::Error::InvalidUtf8Byte(_)) => Err(self::error::Error::Other("invalid utf-8")),
461        Err(rxml::Error::InvalidChar(_)) => {
462            Err(self::error::Error::Other("non-character encountered"))
463        }
464        Err(rxml::Error::RestrictedXml(_)) => Err(self::error::Error::Other("restricted xml")),
465    }
466}
467
468fn read_start_event<I: std::io::BufRead>(
469    r: &mut rxml::Reader<I>,
470) -> Result<(rxml::QName, rxml::AttrMap), self::error::Error> {
471    for ev in r {
472        match map_nonio_error(ev)? {
473            rxml::Event::XmlDeclaration(_, rxml::XmlVersion::V1_0) => (),
474            rxml::Event::StartElement(_, name, attrs) => return Ok((name, attrs)),
475            _ => {
476                return Err(self::error::Error::Other(
477                    "Unexpected event at start of document",
478                ))
479            }
480        }
481    }
482    Err(self::error::Error::XmlError(
483        rxml::error::XmlError::InvalidEof("before start of element"),
484    ))
485}
486
487/// Attempt to parse a type implementing [`FromXml`] from a byte buffer
488/// containing XML data.
489pub fn from_bytes<T: FromXml>(mut buf: &[u8]) -> Result<T, self::error::Error> {
490    let mut reader = rxml::Reader::new(&mut buf);
491    let (name, attrs) = read_start_event(&mut reader)?;
492    let mut builder = match T::from_events(name, attrs) {
493        Ok(v) => v,
494        Err(self::error::FromEventsError::Mismatch { .. }) => {
495            return Err(self::error::Error::TypeMismatch)
496        }
497        Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
498    };
499    for ev in reader {
500        if let Some(v) = builder.feed(map_nonio_error(ev)?)? {
501            return Ok(v);
502        }
503    }
504    Err(self::error::Error::XmlError(
505        rxml::error::XmlError::InvalidEof("while parsing FromXml impl"),
506    ))
507}