lib.rs

  1#![cfg_attr(docsrs, feature(doc_auto_cfg))]
  2#![cfg_attr(docsrs, feature(doc_cfg))]
  3#![forbid(unsafe_code)]
  4#![warn(missing_docs)]
  5/*!
  6# XML Streamed Objects -- serde-like parsing for XML
  7
  8This crate provides the traits for parsing XML data into Rust structs, and
  9vice versa.
 10
 11While it is in 0.0.x versions, many features still need to be developed, but
 12rest assured that there is a solid plan to get it fully usable for even
 13advanced XML scenarios.
 14
 15XSO is an acronym for XML Stream(ed) Objects, referring to the main field of
 16use of this library in parsing XML streams like specified in RFC 6120.
 17*/
 18
 19// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
 20//
 21// This Source Code Form is subject to the terms of the Mozilla Public
 22// License, v. 2.0. If a copy of the MPL was not distributed with this
 23// file, You can obtain one at http://mozilla.org/MPL/2.0/.
 24
 25use std::io;
 26
 27pub mod asxml;
 28pub mod error;
 29pub mod fromxml;
 30#[cfg(feature = "minidom")]
 31pub mod minidom_compat;
 32mod rxml_util;
 33pub mod text;
 34
 35#[doc(hidden)]
 36pub mod exports {
 37    #[cfg(feature = "minidom")]
 38    pub use minidom;
 39    pub use rxml;
 40}
 41
 42use std::borrow::Cow;
 43
 44#[doc(inline)]
 45pub use text::TextCodec;
 46
 47#[doc(inline)]
 48pub use rxml_util::Item;
 49
 50#[doc = include_str!("from_xml_doc.md")]
 51#[doc(inline)]
 52#[cfg(feature = "macros")]
 53pub use xso_proc::FromXml;
 54
 55/// # Make a struct or enum serialisable to XML
 56///
 57/// This derives the [`AsXml`] trait on a struct or enum. It is the
 58/// counterpart to [`macro@FromXml`].
 59///
 60/// The attributes necessary and available for the derivation to work are
 61/// documented on [`macro@FromXml`].
 62#[doc(inline)]
 63#[cfg(feature = "macros")]
 64pub use xso_proc::AsXml;
 65
 66/// Trait allowing to iterate a struct's contents as serialisable
 67/// [`Item`]s.
 68///
 69/// **Important:** Changing the [`ItemIter`][`Self::ItemIter`] associated
 70/// type is considered a non-breaking change for any given implementation of
 71/// this trait. Always refer to a type's iterator type using fully-qualified
 72/// notation, for example: `<T as xso::AsXml>::ItemIter`.
 73pub trait AsXml {
 74    /// The iterator type.
 75    ///
 76    /// **Important:** Changing this type is considered a non-breaking change
 77    /// for any given implementation of this trait. Always refer to a type's
 78    /// iterator type using fully-qualified notation, for example:
 79    /// `<T as xso::AsXml>::ItemIter`.
 80    type ItemIter<'x>: Iterator<Item = Result<Item<'x>, self::error::Error>>
 81    where
 82        Self: 'x;
 83
 84    /// Return an iterator which emits the contents of the struct or enum as
 85    /// serialisable [`Item`] items.
 86    fn as_xml_iter(&self) -> Result<Self::ItemIter<'_>, self::error::Error>;
 87}
 88
 89/// Trait for a temporary object allowing to construct a struct from
 90/// [`rxml::Event`] items.
 91///
 92/// Objects of this type are generally constructed through
 93/// [`FromXml::from_events`] and are used to build Rust structs or enums from
 94/// XML data. The XML data must be fed as `rxml::Event` to the
 95/// [`feed`][`Self::feed`] method.
 96pub trait FromEventsBuilder {
 97    /// The type which will be constructed by this builder.
 98    type Output;
 99
100    /// Feed another [`rxml::Event`] into the element construction
101    /// process.
102    ///
103    /// Once the construction process completes, `Ok(Some(_))` is returned.
104    /// When valid data has been fed but more events are needed to fully
105    /// construct the resulting struct, `Ok(None)` is returned.
106    ///
107    /// If the construction fails, `Err(_)` is returned. Errors are generally
108    /// fatal and the builder should be assumed to be broken at that point.
109    /// Feeding more events after an error may result in panics, errors or
110    /// inconsistent result data, though it may never result in unsound or
111    /// unsafe behaviour.
112    fn feed(&mut self, ev: rxml::Event) -> Result<Option<Self::Output>, self::error::Error>;
113}
114
115/// Trait allowing to construct a struct from a stream of
116/// [`rxml::Event`] items.
117///
118/// To use this, first call [`FromXml::from_events`] with the qualified
119/// name and the attributes of the corresponding
120/// [`rxml::Event::StartElement`] event. If the call succeeds, the
121/// returned builder object must be fed with the events representing the
122/// contents of the element, and then with the `EndElement` event.
123///
124/// The `StartElement` passed to `from_events` must not be passed to `feed`.
125///
126/// **Important:** Changing the [`Builder`][`Self::Builder`] associated type
127/// is considered a non-breaking change for any given implementation of this
128/// trait. Always refer to a type's builder type using fully-qualified
129/// notation, for example: `<T as xso::FromXml>::Builder`.
130pub trait FromXml {
131    /// A builder type used to construct the element.
132    ///
133    /// **Important:** Changing this type is considered a non-breaking change
134    /// for any given implementation of this trait. Always refer to a type's
135    /// builder type using fully-qualified notation, for example:
136    /// `<T as xso::FromXml>::Builder`.
137    type Builder: FromEventsBuilder<Output = Self>;
138
139    /// Attempt to initiate the streamed construction of this struct from XML.
140    ///
141    /// If the passed qualified `name` and `attrs` match the element's type,
142    /// the [`Self::Builder`] is returned and should be fed with XML events
143    /// by the caller.
144    ///
145    /// Otherwise, an appropriate error is returned.
146    fn from_events(
147        name: rxml::QName,
148        attrs: rxml::AttrMap,
149    ) -> Result<Self::Builder, self::error::FromEventsError>;
150}
151
152/// Trait allowing to convert XML text to a value.
153///
154/// This trait is similar to [`core::str::FromStr`], however, due to
155/// restrictions imposed by the orphan rule, a separate trait is needed.
156/// Implementations for many standard library types are available. In
157/// addition, the following feature flags can enable more implementations:
158///
159/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
160/// - `uuid`: `uuid::Uuid`
161///
162/// Because of this unfortunate situation, we are **extremely liberal** with
163/// accepting optional dependencies for this purpose. You are very welcome to
164/// make merge requests against this crate adding support for parsing
165/// third-party crates.
166pub trait FromXmlText: Sized {
167    /// Convert the given XML text to a value.
168    fn from_xml_text(data: String) -> Result<Self, self::error::Error>;
169}
170
171impl FromXmlText for String {
172    /// Return the string unchanged.
173    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
174        Ok(data)
175    }
176}
177
178impl<T: FromXmlText, B: ToOwned<Owned = T>> FromXmlText for Cow<'_, B> {
179    /// Return a [`Cow::Owned`] containing the parsed value.
180    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
181        Ok(Cow::Owned(T::from_xml_text(data)?))
182    }
183}
184
185impl<T: FromXmlText> FromXmlText for Option<T> {
186    /// Return a [`Some`] containing the parsed value.
187    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
188        Ok(Some(T::from_xml_text(data)?))
189    }
190}
191
192impl<T: FromXmlText> FromXmlText for Box<T> {
193    /// Return a [`Box`] containing the parsed value.
194    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
195        Ok(Box::new(T::from_xml_text(data)?))
196    }
197}
198
199/// Trait to convert a value to an XML text string.
200///
201/// This trait is implemented for many standard library types implementing
202/// [`core::fmt::Display`]. In addition, the following feature flags can enable
203/// more implementations:
204///
205/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
206/// - `uuid`: `uuid::Uuid`
207///
208/// Because of the unfortunate situation as described in [`FromXmlText`], we
209/// are **extremely liberal** with accepting optional dependencies for this
210/// purpose. You are very welcome to make merge requests against this crate
211/// adding support for parsing third-party crates.
212pub trait AsXmlText {
213    /// Convert the value to an XML string in a context where an absent value
214    /// cannot be represented.
215    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error>;
216
217    /// Convert the value to an XML string in a context where an absent value
218    /// can be represented.
219    ///
220    /// The provided implementation will always return the result of
221    /// [`Self::as_xml_text`] wrapped into `Some(.)`. By re-implementing
222    /// this method, implementors can customize the behaviour for certain
223    /// values.
224    fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
225        Ok(Some(self.as_xml_text()?))
226    }
227}
228
229impl AsXmlText for String {
230    /// Return the borrowed string contents.
231    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
232        Ok(Cow::Borrowed(self.as_str()))
233    }
234}
235
236impl AsXmlText for str {
237    /// Return the borrowed string contents.
238    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
239        Ok(Cow::Borrowed(&*self))
240    }
241}
242
243impl<T: AsXmlText> AsXmlText for Box<T> {
244    /// Return the borrowed [`Box`] contents.
245    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
246        T::as_xml_text(self)
247    }
248}
249
250impl<B: AsXmlText + ToOwned> AsXmlText for Cow<'_, B> {
251    /// Return the borrowed [`Cow`] contents.
252    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
253        B::as_xml_text(self.as_ref())
254    }
255}
256
257impl<T: AsXmlText> AsXmlText for &T {
258    /// Delegate to the `AsXmlText` implementation on `T`.
259    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
260        T::as_xml_text(*self)
261    }
262}
263
264/// Specialized variant of [`AsXmlText`].
265///
266/// Do **not** implement this unless you cannot implement [`AsXmlText`]:
267/// implementing [`AsXmlText`] is more versatile and an
268/// [`AsOptionalXmlText`] implementation is automatically provided.
269///
270/// If you need to customize the behaviour of the [`AsOptionalXmlText`]
271/// blanket implementation, implement a custom
272/// [`AsXmlText::as_optional_xml_text`] instead.
273pub trait AsOptionalXmlText {
274    /// Convert the value to an XML string in a context where an absent value
275    /// can be represented.
276    fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error>;
277}
278
279impl<T: AsXmlText> AsOptionalXmlText for T {
280    fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
281        <Self as AsXmlText>::as_optional_xml_text(self)
282    }
283}
284
285impl<T: AsXmlText> AsOptionalXmlText for Option<T> {
286    fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
287        self.as_ref()
288            .map(T::as_optional_xml_text)
289            .transpose()
290            .map(Option::flatten)
291    }
292}
293
294/// Attempt to transform a type implementing [`AsXml`] into another
295/// type which implements [`FromXml`].
296pub fn transform<T: FromXml, F: AsXml>(from: F) -> Result<T, self::error::Error> {
297    let mut iter = self::rxml_util::ItemToEvent::new(from.as_xml_iter()?);
298    let (qname, attrs) = match iter.next() {
299        Some(Ok(rxml::Event::StartElement(_, qname, attrs))) => (qname, attrs),
300        Some(Err(e)) => return Err(e),
301        _ => panic!("into_event_iter did not start with StartElement event!"),
302    };
303    let mut sink = match T::from_events(qname, attrs) {
304        Ok(v) => v,
305        Err(self::error::FromEventsError::Mismatch { .. }) => {
306            return Err(self::error::Error::TypeMismatch)
307        }
308        Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
309    };
310    for event in iter {
311        let event = event?;
312        if let Some(v) = sink.feed(event)? {
313            return Ok(v);
314        }
315    }
316    Err(self::error::Error::XmlError(rxml::Error::InvalidEof(None)))
317}
318
319/// Attempt to convert a [`minidom::Element`] into a type implementing
320/// [`FromXml`], fallably.
321///
322/// Unlike [`transform`] (which can also be used with an element), this
323/// function will return the element unharmed if its element header does not
324/// match the expectations of `T`.
325#[cfg(feature = "minidom")]
326pub fn try_from_element<T: FromXml>(
327    from: minidom::Element,
328) -> Result<T, self::error::FromElementError> {
329    let (qname, attrs) = minidom_compat::make_start_ev_parts(&from)?;
330    let mut sink = match T::from_events(qname, attrs) {
331        Ok(v) => v,
332        Err(self::error::FromEventsError::Mismatch { .. }) => {
333            return Err(self::error::FromElementError::Mismatch(from))
334        }
335        Err(self::error::FromEventsError::Invalid(e)) => {
336            return Err(self::error::FromElementError::Invalid(e))
337        }
338    };
339
340    let mut iter = from.as_xml_iter()?;
341    // consume the element header
342    for item in &mut iter {
343        let item = item?;
344        match item {
345            // discard the element header
346            Item::XmlDeclaration(..) => (),
347            Item::ElementHeadStart(..) => (),
348            Item::Attribute(..) => (),
349            Item::ElementHeadEnd => {
350                // now that the element header is over, we break out
351                break;
352            }
353            Item::Text(..) => panic!("text before end of element header"),
354            Item::ElementFoot => panic!("element foot before end of element header"),
355        }
356    }
357    let iter = self::rxml_util::ItemToEvent::new(iter);
358    for event in iter {
359        let event = event?;
360        if let Some(v) = sink.feed(event)? {
361            return Ok(v);
362        }
363    }
364    // unreachable! instead of error here, because minidom::Element always
365    // produces the complete event sequence of a single element, and FromXml
366    // implementations must be constructible from that.
367    unreachable!("minidom::Element did not produce enough events to complete element")
368}
369
370fn map_nonio_error<T>(r: Result<T, io::Error>) -> Result<T, self::error::Error> {
371    match r {
372        Ok(v) => Ok(v),
373        Err(e) => match e.downcast::<rxml::Error>() {
374            Ok(e) => Err(e.into()),
375            Err(_) => unreachable!("I/O error cannot be caused by &[]"),
376        },
377    }
378}
379
380fn read_start_event<I: std::io::BufRead>(
381    r: &mut rxml::Reader<I>,
382) -> Result<(rxml::QName, rxml::AttrMap), self::error::Error> {
383    for ev in r {
384        match map_nonio_error(ev)? {
385            rxml::Event::XmlDeclaration(_, rxml::XmlVersion::V1_0) => (),
386            rxml::Event::StartElement(_, name, attrs) => return Ok((name, attrs)),
387            _ => {
388                return Err(self::error::Error::Other(
389                    "Unexpected event at start of document",
390                ))
391            }
392        }
393    }
394    Err(self::error::Error::XmlError(rxml::Error::InvalidEof(Some(
395        rxml::error::ErrorContext::DocumentBegin,
396    ))))
397}
398
399/// Attempt to parse a type implementing [`FromXml`] from a byte buffer
400/// containing XML data.
401pub fn from_bytes<T: FromXml>(mut buf: &[u8]) -> Result<T, self::error::Error> {
402    let mut reader = rxml::Reader::new(&mut buf);
403    let (name, attrs) = read_start_event(&mut reader)?;
404    let mut builder = match T::from_events(name, attrs) {
405        Ok(v) => v,
406        Err(self::error::FromEventsError::Mismatch { .. }) => {
407            return Err(self::error::Error::TypeMismatch)
408        }
409        Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
410    };
411    for ev in reader {
412        if let Some(v) = builder.feed(map_nonio_error(ev)?)? {
413            return Ok(v);
414        }
415    }
416    Err(self::error::Error::XmlError(rxml::Error::InvalidEof(None)))
417}
418
419/// Return true if the string contains exclusively XML whitespace.
420///
421/// XML whitespace is defined as U+0020 (space), U+0009 (tab), U+000a
422/// (newline) and U+000d (carriage return).
423pub fn is_xml_whitespace<T: AsRef<[u8]>>(s: T) -> bool {
424    s.as_ref()
425        .iter()
426        .all(|b| *b == b' ' || *b == b'\t' || *b == b'\r' || *b == b'\n')
427}