lib.rs

  1#![cfg_attr(docsrs, feature(doc_auto_cfg))]
  2#![cfg_attr(docsrs, feature(doc_cfg))]
  3#![forbid(unsafe_code)]
  4#![warn(missing_docs)]
  5/*!
  6# XML Streamed Objects -- serde-like parsing for XML
  7
  8This crate provides the traits for parsing XML data into Rust structs, and
  9vice versa.
 10
 11While it is in 0.0.x versions, many features still need to be developed, but
 12rest assured that there is a solid plan to get it fully usable for even
 13advanced XML scenarios.
 14
 15XSO is an acronym for XML Stream(ed) Objects, referring to the main field of
 16use of this library in parsing XML streams like specified in RFC 6120.
 17*/
 18
 19// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
 20//
 21// This Source Code Form is subject to the terms of the Mozilla Public
 22// License, v. 2.0. If a copy of the MPL was not distributed with this
 23// file, You can obtain one at http://mozilla.org/MPL/2.0/.
 24pub mod asxml;
 25pub mod error;
 26pub mod fromxml;
 27#[cfg(feature = "minidom")]
 28pub mod minidom_compat;
 29mod rxml_util;
 30pub mod text;
 31
 32#[doc(hidden)]
 33pub mod exports {
 34    #[cfg(feature = "minidom")]
 35    pub use minidom;
 36    pub use rxml;
 37}
 38
 39use std::borrow::Cow;
 40
 41#[doc(inline)]
 42pub use text::TextCodec;
 43
 44#[doc(inline)]
 45pub use rxml_util::Item;
 46
 47#[doc = include_str!("from_xml_doc.md")]
 48#[doc(inline)]
 49#[cfg(feature = "macros")]
 50pub use xso_proc::FromXml;
 51
 52/// # Make a struct or enum serialisable to XML
 53///
 54/// This derives the [`AsXml`] trait on a struct or enum. It is the
 55/// counterpart to [`macro@FromXml`].
 56///
 57/// The attributes necessary and available for the derivation to work are
 58/// documented on [`macro@FromXml`].
 59#[doc(inline)]
 60#[cfg(feature = "macros")]
 61pub use xso_proc::AsXml;
 62
 63/// Trait allowing to iterate a struct's contents as serialisable
 64/// [`Item`]s.
 65///
 66/// **Important:** Changing the [`ItemIter`][`Self::ItemIter`] associated
 67/// type is considered a non-breaking change for any given implementation of
 68/// this trait. Always refer to a type's iterator type using fully-qualified
 69/// notation, for example: `<T as xso::AsXml>::ItemIter`.
 70pub trait AsXml {
 71    /// The iterator type.
 72    ///
 73    /// **Important:** Changing this type is considered a non-breaking change
 74    /// for any given implementation of this trait. Always refer to a type's
 75    /// iterator type using fully-qualified notation, for example:
 76    /// `<T as xso::AsXml>::ItemIter`.
 77    type ItemIter<'x>: Iterator<Item = Result<Item<'x>, self::error::Error>>
 78    where
 79        Self: 'x;
 80
 81    /// Return an iterator which emits the contents of the struct or enum as
 82    /// serialisable [`Item`] items.
 83    fn as_xml_iter(&self) -> Result<Self::ItemIter<'_>, self::error::Error>;
 84}
 85
 86/// Trait for a temporary object allowing to construct a struct from
 87/// [`rxml::Event`] items.
 88///
 89/// Objects of this type are generally constructed through
 90/// [`FromXml::from_events`] and are used to build Rust structs or enums from
 91/// XML data. The XML data must be fed as `rxml::Event` to the
 92/// [`feed`][`Self::feed`] method.
 93pub trait FromEventsBuilder {
 94    /// The type which will be constructed by this builder.
 95    type Output;
 96
 97    /// Feed another [`rxml::Event`] into the element construction
 98    /// process.
 99    ///
100    /// Once the construction process completes, `Ok(Some(_))` is returned.
101    /// When valid data has been fed but more events are needed to fully
102    /// construct the resulting struct, `Ok(None)` is returned.
103    ///
104    /// If the construction fails, `Err(_)` is returned. Errors are generally
105    /// fatal and the builder should be assumed to be broken at that point.
106    /// Feeding more events after an error may result in panics, errors or
107    /// inconsistent result data, though it may never result in unsound or
108    /// unsafe behaviour.
109    fn feed(&mut self, ev: rxml::Event) -> Result<Option<Self::Output>, self::error::Error>;
110}
111
112/// Trait allowing to construct a struct from a stream of
113/// [`rxml::Event`] items.
114///
115/// To use this, first call [`FromXml::from_events`] with the qualified
116/// name and the attributes of the corresponding
117/// [`rxml::Event::StartElement`] event. If the call succeeds, the
118/// returned builder object must be fed with the events representing the
119/// contents of the element, and then with the `EndElement` event.
120///
121/// The `StartElement` passed to `from_events` must not be passed to `feed`.
122///
123/// **Important:** Changing the [`Builder`][`Self::Builder`] associated type
124/// is considered a non-breaking change for any given implementation of this
125/// trait. Always refer to a type's builder type using fully-qualified
126/// notation, for example: `<T as xso::FromXml>::Builder`.
127pub trait FromXml {
128    /// A builder type used to construct the element.
129    ///
130    /// **Important:** Changing this type is considered a non-breaking change
131    /// for any given implementation of this trait. Always refer to a type's
132    /// builder type using fully-qualified notation, for example:
133    /// `<T as xso::FromXml>::Builder`.
134    type Builder: FromEventsBuilder<Output = Self>;
135
136    /// Attempt to initiate the streamed construction of this struct from XML.
137    ///
138    /// If the passed qualified `name` and `attrs` match the element's type,
139    /// the [`Self::Builder`] is returned and should be fed with XML events
140    /// by the caller.
141    ///
142    /// Otherwise, an appropriate error is returned.
143    fn from_events(
144        name: rxml::QName,
145        attrs: rxml::AttrMap,
146    ) -> Result<Self::Builder, self::error::FromEventsError>;
147}
148
149/// Trait allowing to convert XML text to a value.
150///
151/// This trait is similar to [`core::str::FromStr`], however, due to
152/// restrictions imposed by the orphan rule, a separate trait is needed.
153/// Implementations for many standard library types are available. In
154/// addition, the following feature flags can enable more implementations:
155///
156/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
157/// - `uuid`: `uuid::Uuid`
158///
159/// Because of this unfortunate situation, we are **extremely liberal** with
160/// accepting optional dependencies for this purpose. You are very welcome to
161/// make merge requests against this crate adding support for parsing
162/// third-party crates.
163pub trait FromXmlText: Sized {
164    /// Convert the given XML text to a value.
165    fn from_xml_text(data: String) -> Result<Self, self::error::Error>;
166}
167
168impl FromXmlText for String {
169    /// Return the string unchanged.
170    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
171        Ok(data)
172    }
173}
174
175impl<T: FromXmlText, B: ToOwned<Owned = T>> FromXmlText for Cow<'_, B> {
176    /// Return a [`Cow::Owned`] containing the parsed value.
177    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
178        Ok(Cow::Owned(T::from_xml_text(data)?))
179    }
180}
181
182impl<T: FromXmlText> FromXmlText for Option<T> {
183    /// Return a [`Some`] containing the parsed value.
184    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
185        Ok(Some(T::from_xml_text(data)?))
186    }
187}
188
189impl<T: FromXmlText> FromXmlText for Box<T> {
190    /// Return a [`Box`] containing the parsed value.
191    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
192        Ok(Box::new(T::from_xml_text(data)?))
193    }
194}
195
196/// Trait to convert a value to an XML text string.
197///
198/// This trait is implemented for many standard library types implementing
199/// [`core::fmt::Display`]. In addition, the following feature flags can enable
200/// more implementations:
201///
202/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
203/// - `uuid`: `uuid::Uuid`
204///
205/// Because of the unfortunate situation as described in [`FromXmlText`], we
206/// are **extremely liberal** with accepting optional dependencies for this
207/// purpose. You are very welcome to make merge requests against this crate
208/// adding support for parsing third-party crates.
209pub trait AsXmlText {
210    /// Convert the value to an XML string in a context where an absent value
211    /// cannot be represented.
212    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error>;
213
214    /// Convert the value to an XML string in a context where an absent value
215    /// can be represented.
216    ///
217    /// The provided implementation will always return the result of
218    /// [`Self::as_xml_text`] wrapped into `Some(.)`. By re-implementing
219    /// this method, implementors can customize the behaviour for certain
220    /// values.
221    fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
222        Ok(Some(self.as_xml_text()?))
223    }
224}
225
226impl AsXmlText for String {
227    /// Return the borrowed string contents.
228    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
229        Ok(Cow::Borrowed(self.as_str()))
230    }
231}
232
233impl AsXmlText for str {
234    /// Return the borrowed string contents.
235    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
236        Ok(Cow::Borrowed(&*self))
237    }
238}
239
240impl<T: AsXmlText> AsXmlText for Box<T> {
241    /// Return the borrowed [`Box`] contents.
242    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
243        T::as_xml_text(self)
244    }
245}
246
247impl<B: AsXmlText + ToOwned> AsXmlText for Cow<'_, B> {
248    /// Return the borrowed [`Cow`] contents.
249    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
250        B::as_xml_text(self.as_ref())
251    }
252}
253
254impl<T: AsXmlText> AsXmlText for &T {
255    /// Delegate to the `AsXmlText` implementation on `T`.
256    fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
257        T::as_xml_text(*self)
258    }
259}
260
261/// Specialized variant of [`AsXmlText`].
262///
263/// Do **not** implement this unless you cannot implement [`AsXmlText`]:
264/// implementing [`AsXmlText`] is more versatile and an
265/// [`AsOptionalXmlText`] implementation is automatically provided.
266///
267/// If you need to customize the behaviour of the [`AsOptionalXmlText`]
268/// blanket implementation, implement a custom
269/// [`AsXmlText::as_optional_xml_text`] instead.
270pub trait AsOptionalXmlText {
271    /// Convert the value to an XML string in a context where an absent value
272    /// can be represented.
273    fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error>;
274}
275
276impl<T: AsXmlText> AsOptionalXmlText for T {
277    fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
278        <Self as AsXmlText>::as_optional_xml_text(self)
279    }
280}
281
282impl<T: AsXmlText> AsOptionalXmlText for Option<T> {
283    fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
284        self.as_ref()
285            .map(T::as_optional_xml_text)
286            .transpose()
287            .map(Option::flatten)
288    }
289}
290
291/// Attempt to transform a type implementing [`AsXml`] into another
292/// type which implements [`FromXml`].
293pub fn transform<T: FromXml, F: AsXml>(from: F) -> Result<T, self::error::Error> {
294    let mut iter = self::rxml_util::ItemToEvent::new(from.as_xml_iter()?);
295    let (qname, attrs) = match iter.next() {
296        Some(Ok(rxml::Event::StartElement(_, qname, attrs))) => (qname, attrs),
297        Some(Err(e)) => return Err(e),
298        _ => panic!("into_event_iter did not start with StartElement event!"),
299    };
300    let mut sink = match T::from_events(qname, attrs) {
301        Ok(v) => v,
302        Err(self::error::FromEventsError::Mismatch { .. }) => {
303            return Err(self::error::Error::TypeMismatch)
304        }
305        Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
306    };
307    for event in iter {
308        let event = event?;
309        if let Some(v) = sink.feed(event)? {
310            return Ok(v);
311        }
312    }
313    Err(self::error::Error::XmlError(
314        rxml::error::XmlError::InvalidEof("during transform"),
315    ))
316}
317
318/// Attempt to convert a [`minidom::Element`] into a type implementing
319/// [`FromXml`], fallably.
320///
321/// Unlike [`transform`] (which can also be used with an element), this
322/// function will return the element unharmed if its element header does not
323/// match the expectations of `T`.
324#[cfg(feature = "minidom")]
325pub fn try_from_element<T: FromXml>(
326    from: minidom::Element,
327) -> Result<T, self::error::FromElementError> {
328    let (qname, attrs) = minidom_compat::make_start_ev_parts(&from)?;
329    let mut sink = match T::from_events(qname, attrs) {
330        Ok(v) => v,
331        Err(self::error::FromEventsError::Mismatch { .. }) => {
332            return Err(self::error::FromElementError::Mismatch(from))
333        }
334        Err(self::error::FromEventsError::Invalid(e)) => {
335            return Err(self::error::FromElementError::Invalid(e))
336        }
337    };
338
339    let mut iter = from.as_xml_iter()?;
340    // consume the element header
341    for item in &mut iter {
342        let item = item?;
343        match item {
344            // discard the element header
345            Item::XmlDeclaration(..) => (),
346            Item::ElementHeadStart(..) => (),
347            Item::Attribute(..) => (),
348            Item::ElementHeadEnd => {
349                // now that the element header is over, we break out
350                break;
351            }
352            Item::Text(..) => panic!("text before end of element header"),
353            Item::ElementFoot => panic!("element foot before end of element header"),
354        }
355    }
356    let iter = self::rxml_util::ItemToEvent::new(iter);
357    for event in iter {
358        let event = event?;
359        if let Some(v) = sink.feed(event)? {
360            return Ok(v);
361        }
362    }
363    // unreachable! instead of error here, because minidom::Element always
364    // produces the complete event sequence of a single element, and FromXml
365    // implementations must be constructible from that.
366    unreachable!("minidom::Element did not produce enough events to complete element")
367}
368
369fn map_nonio_error<T>(r: Result<T, rxml::Error>) -> Result<T, self::error::Error> {
370    match r {
371        Ok(v) => Ok(v),
372        Err(rxml::Error::IO(_)) => unreachable!(),
373        Err(rxml::Error::Xml(e)) => Err(e.into()),
374        Err(rxml::Error::InvalidUtf8Byte(_)) => Err(self::error::Error::Other("invalid utf-8")),
375        Err(rxml::Error::InvalidChar(_)) => {
376            Err(self::error::Error::Other("non-character encountered"))
377        }
378        Err(rxml::Error::RestrictedXml(_)) => Err(self::error::Error::Other("restricted xml")),
379    }
380}
381
382fn read_start_event<I: std::io::BufRead>(
383    r: &mut rxml::Reader<I>,
384) -> Result<(rxml::QName, rxml::AttrMap), self::error::Error> {
385    for ev in r {
386        match map_nonio_error(ev)? {
387            rxml::Event::XmlDeclaration(_, rxml::XmlVersion::V1_0) => (),
388            rxml::Event::StartElement(_, name, attrs) => return Ok((name, attrs)),
389            _ => {
390                return Err(self::error::Error::Other(
391                    "Unexpected event at start of document",
392                ))
393            }
394        }
395    }
396    Err(self::error::Error::XmlError(
397        rxml::error::XmlError::InvalidEof("before start of element"),
398    ))
399}
400
401/// Attempt to parse a type implementing [`FromXml`] from a byte buffer
402/// containing XML data.
403pub fn from_bytes<T: FromXml>(mut buf: &[u8]) -> Result<T, self::error::Error> {
404    let mut reader = rxml::Reader::new(&mut buf);
405    let (name, attrs) = read_start_event(&mut reader)?;
406    let mut builder = match T::from_events(name, attrs) {
407        Ok(v) => v,
408        Err(self::error::FromEventsError::Mismatch { .. }) => {
409            return Err(self::error::Error::TypeMismatch)
410        }
411        Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
412    };
413    for ev in reader {
414        if let Some(v) = builder.feed(map_nonio_error(ev)?)? {
415            return Ok(v);
416        }
417    }
418    Err(self::error::Error::XmlError(
419        rxml::error::XmlError::InvalidEof("while parsing FromXml impl"),
420    ))
421}
422
423/// Return true if the string contains exclusively XML whitespace.
424///
425/// XML whitespace is defined as U+0020 (space), U+0009 (tab), U+000a
426/// (newline) and U+000d (carriage return).
427pub fn is_xml_whitespace<T: AsRef<[u8]>>(s: T) -> bool {
428    s.as_ref()
429        .iter()
430        .all(|b| *b == b' ' || *b == b'\t' || *b == b'\r' || *b == b'\n')
431}