lib.rs

  1#![cfg_attr(docsrs, feature(doc_cfg))]
  2#![forbid(unsafe_code)]
  3#![warn(missing_docs)]
  4/*!
  5# XML Streamed Objects -- serde-like parsing for XML
  6
  7This crate provides the traits for parsing XML data into Rust structs, and
  8vice versa.
  9
 10While it is in 0.0.x versions, many features still need to be developed, but
 11rest assured that there is a solid plan to get it fully usable for even
 12advanced XML scenarios.
 13
 14XSO is an acronym for XML Stream(ed) Objects, referring to the main field of
 15use of this library in parsing XML streams like specified in RFC 6120.
 16*/
 17
 18// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
 19//
 20// This Source Code Form is subject to the terms of the Mozilla Public
 21// License, v. 2.0. If a copy of the MPL was not distributed with this
 22// file, You can obtain one at http://mozilla.org/MPL/2.0/.
 23pub mod error;
 24#[cfg(feature = "minidom")]
 25pub mod minidom_compat;
 26mod text;
 27
 28#[doc(hidden)]
 29pub mod exports {
 30    #[cfg(feature = "minidom")]
 31    pub use minidom;
 32    pub use rxml;
 33}
 34
 35use std::borrow::Cow;
 36
 37#[doc = include_str!("from_xml_doc.md")]
 38#[doc(inline)]
 39#[cfg(feature = "macros")]
 40pub use xso_proc::FromXml;
 41
 42/// # Make a struct or enum serialisable to XML
 43///
 44/// This derives the [`IntoXml`] trait on a struct or enum. It is the
 45/// counterpart to [`macro@FromXml`].
 46///
 47/// The attributes necessary and available for the derivation to work are
 48/// documented on [`macro@FromXml`].
 49#[doc(inline)]
 50#[cfg(feature = "macros")]
 51pub use xso_proc::IntoXml;
 52
 53/// Trait allowing to consume a struct and iterate its contents as
 54/// serialisable [`rxml::Event`] items.
 55///
 56/// **Important:** Changing the [`EventIter`][`Self::EventIter`] associated
 57/// type is considered a non-breaking change for any given implementation of
 58/// this trait. Always refer to a type's iterator type using fully-qualified
 59/// notation, for example: `<T as xso::IntoXml>::EventIter`.
 60pub trait IntoXml {
 61    /// The iterator type.
 62    ///
 63    /// **Important:** Changing this type is considered a non-breaking change
 64    /// for any given implementation of this trait. Always refer to a type's
 65    /// iterator type using fully-qualified notation, for example:
 66    /// `<T as xso::IntoXml>::EventIter`.
 67    type EventIter: Iterator<Item = Result<rxml::Event, self::error::Error>>;
 68
 69    /// Return an iterator which emits the contents of the struct or enum as
 70    /// serialisable [`rxml::Event`] items.
 71    fn into_event_iter(self) -> Result<Self::EventIter, self::error::Error>;
 72}
 73
 74/// Trait for a temporary object allowing to construct a struct from
 75/// [`rxml::Event`] items.
 76///
 77/// Objects of this type are generally constructed through
 78/// [`FromXml::from_events`] and are used to build Rust structs or enums from
 79/// XML data. The XML data must be fed as `rxml::Event` to the
 80/// [`feed`][`Self::feed`] method.
 81pub trait FromEventsBuilder {
 82    /// The type which will be constructed by this builder.
 83    type Output;
 84
 85    /// Feed another [`rxml::Event`] into the element construction
 86    /// process.
 87    ///
 88    /// Once the construction process completes, `Ok(Some(_))` is returned.
 89    /// When valid data has been fed but more events are needed to fully
 90    /// construct the resulting struct, `Ok(None)` is returned.
 91    ///
 92    /// If the construction fails, `Err(_)` is returned. Errors are generally
 93    /// fatal and the builder should be assumed to be broken at that point.
 94    /// Feeding more events after an error may result in panics, errors or
 95    /// inconsistent result data, though it may never result in unsound or
 96    /// unsafe behaviour.
 97    fn feed(&mut self, ev: rxml::Event) -> Result<Option<Self::Output>, self::error::Error>;
 98}
 99
100/// Trait allowing to construct a struct from a stream of
101/// [`rxml::Event`] items.
102///
103/// To use this, first call [`FromXml::from_events`] with the qualified
104/// name and the attributes of the corresponding
105/// [`rxml::Event::StartElement`] event. If the call succeeds, the
106/// returned builder object must be fed with the events representing the
107/// contents of the element, and then with the `EndElement` event.
108///
109/// The `StartElement` passed to `from_events` must not be passed to `feed`.
110///
111/// **Important:** Changing the [`Builder`][`Self::Builder`] associated type
112/// is considered a non-breaking change for any given implementation of this
113/// trait. Always refer to a type's builder type using fully-qualified
114/// notation, for example: `<T as xso::FromXml>::Builder`.
115pub trait FromXml {
116    /// A builder type used to construct the element.
117    ///
118    /// **Important:** Changing this type is considered a non-breaking change
119    /// for any given implementation of this trait. Always refer to a type's
120    /// builder type using fully-qualified notation, for example:
121    /// `<T as xso::FromXml>::Builder`.
122    type Builder: FromEventsBuilder<Output = Self>;
123
124    /// Attempt to initiate the streamed construction of this struct from XML.
125    ///
126    /// If the passed qualified `name` and `attrs` match the element's type,
127    /// the [`Self::Builder`] is returned and should be fed with XML events
128    /// by the caller.
129    ///
130    /// Otherwise, an appropriate error is returned.
131    fn from_events(
132        name: rxml::QName,
133        attrs: rxml::AttrMap,
134    ) -> Result<Self::Builder, self::error::FromEventsError>;
135}
136
137/// Trait allowing to convert XML text to a value.
138///
139/// This trait is similar to [`std::str::FromStr`], however, due to
140/// restrictions imposed by the orphan rule, a separate trait is needed.
141/// Implementations for many standard library types are available. In
142/// addition, the following feature flags can enable more implementations:
143///
144/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
145/// - `uuid`: `uuid::Uuid`
146///
147/// Because of this unfortunate situation, we are **extremely liberal** with
148/// accepting optional dependencies for this purpose. You are very welcome to
149/// make merge requests against this crate adding support for parsing
150/// third-party crates.
151pub trait FromXmlText: Sized {
152    /// Convert the given XML text to a value.
153    fn from_xml_text(data: String) -> Result<Self, self::error::Error>;
154}
155
156impl FromXmlText for String {
157    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
158        Ok(data)
159    }
160}
161
162impl<T: FromXmlText, B: ToOwned<Owned = T>> FromXmlText for Cow<'_, B> {
163    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
164        Ok(Cow::Owned(T::from_xml_text(data)?))
165    }
166}
167
168impl<T: FromXmlText> FromXmlText for Option<T> {
169    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
170        Ok(Some(T::from_xml_text(data)?))
171    }
172}
173
174impl<T: FromXmlText> FromXmlText for Box<T> {
175    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
176        Ok(Box::new(T::from_xml_text(data)?))
177    }
178}
179
180/// Trait to convert a value to an XML text string.
181///
182/// This trait is implemented for many standard library types implementing
183/// [`std::fmt::Display`]. In addition, the following feature flags can enable
184/// more implementations:
185///
186/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
187/// - `uuid`: `uuid::Uuid`
188///
189/// Because of the unfortunate situation as described in [`FromXmlText`], we
190/// are **extremely liberal** with accepting optional dependencies for this
191/// purpose. You are very welcome to make merge requests against this crate
192/// adding support for parsing third-party crates.
193pub trait IntoXmlText: Sized {
194    /// Convert the value to an XML string in a context where an absent value
195    /// cannot be represented.
196    fn into_xml_text(self) -> Result<String, self::error::Error>;
197
198    /// Convert the value to an XML string in a context where an absent value
199    /// can be represented.
200    ///
201    /// The provided implementation will always return the result of
202    /// [`Self::into_xml_text`] wrapped into `Some(.)`. By re-implementing
203    /// this method, implementors can customize the behaviour for certain
204    /// values.
205    fn into_optional_xml_text(self) -> Result<Option<String>, self::error::Error> {
206        Ok(Some(self.into_xml_text()?))
207    }
208}
209
210impl IntoXmlText for String {
211    fn into_xml_text(self) -> Result<String, self::error::Error> {
212        Ok(self)
213    }
214}
215
216impl<T: IntoXmlText> IntoXmlText for Box<T> {
217    fn into_xml_text(self) -> Result<String, self::error::Error> {
218        T::into_xml_text(*self)
219    }
220}
221
222impl<T: IntoXmlText, B: ToOwned<Owned = T>> IntoXmlText for Cow<'_, B> {
223    fn into_xml_text(self) -> Result<String, self::error::Error> {
224        T::into_xml_text(self.into_owned())
225    }
226}
227
228/// Specialized variant of [`IntoXmlText`].
229///
230/// Do **not** implement this unless you cannot implement [`IntoXmlText`]:
231/// implementing [`IntoXmlText`] is more versatile and an
232/// [`IntoOptionalXmlText`] implementation is automatically provided.
233///
234/// If you need to customize the behaviour of the [`IntoOptionalXmlText`]
235/// blanket implementation, implement a custom
236/// [`IntoXmlText::into_optional_xml_text`] instead.
237pub trait IntoOptionalXmlText {
238    /// Convert the value to an XML string in a context where an absent value
239    /// can be represented.
240    fn into_optional_xml_text(self) -> Result<Option<String>, self::error::Error>;
241}
242
243impl<T: IntoXmlText> IntoOptionalXmlText for T {
244    fn into_optional_xml_text(self) -> Result<Option<String>, self::error::Error> {
245        <Self as IntoXmlText>::into_optional_xml_text(self)
246    }
247}
248
249impl<T: IntoOptionalXmlText> IntoOptionalXmlText for Option<T> {
250    fn into_optional_xml_text(self) -> Result<Option<String>, self::error::Error> {
251        self.map(T::into_optional_xml_text)
252            .transpose()
253            .map(Option::flatten)
254    }
255}
256
257/// Attempt to transform a type implementing [`IntoXml`] into another
258/// type which implements [`FromXml`].
259pub fn transform<T: FromXml, F: IntoXml>(from: F) -> Result<T, self::error::Error> {
260    let mut iter = from.into_event_iter()?;
261    let (qname, attrs) = match iter.next() {
262        Some(Ok(rxml::Event::StartElement(_, qname, attrs))) => (qname, attrs),
263        Some(Err(e)) => return Err(e),
264        _ => panic!("into_event_iter did not start with StartElement event!"),
265    };
266    let mut sink = match T::from_events(qname, attrs) {
267        Ok(v) => v,
268        Err(self::error::FromEventsError::Mismatch { .. }) => {
269            return Err(self::error::Error::TypeMismatch)
270        }
271        Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
272    };
273    for event in iter {
274        let event = event?;
275        match sink.feed(event)? {
276            Some(v) => return Ok(v),
277            None => (),
278        }
279    }
280    Err(self::error::Error::XmlError(
281        rxml::error::XmlError::InvalidEof("during transform"),
282    ))
283}
284
285/// Attempt to convert a [`minidom::Element`] into a type implementing
286/// [`FromXml`], fallably.
287///
288/// Unlike [`transform`] (which can also be used with an element), this
289/// function will return the element unharmed if its element header does not
290/// match the expectations of `T`.
291#[cfg(feature = "minidom")]
292pub fn try_from_element<T: FromXml>(
293    from: minidom::Element,
294) -> Result<T, self::error::FromElementError> {
295    let (qname, attrs) = minidom_compat::make_start_ev_parts(&from)?;
296    let mut sink = match T::from_events(qname, attrs) {
297        Ok(v) => v,
298        Err(self::error::FromEventsError::Mismatch { .. }) => {
299            return Err(self::error::FromElementError::Mismatch(from))
300        }
301        Err(self::error::FromEventsError::Invalid(e)) => {
302            return Err(self::error::FromElementError::Invalid(e))
303        }
304    };
305
306    let mut iter = from.into_event_iter()?;
307    iter.next().expect("first event from minidom::Element")?;
308    for event in iter {
309        let event = event?;
310        match sink.feed(event)? {
311            Some(v) => return Ok(v),
312            None => (),
313        }
314    }
315    // unreachable! instead of error here, because minidom::Element always
316    // produces the complete event sequence of a single element, and FromXml
317    // implementations must be constructible from that.
318    unreachable!("minidom::Element did not produce enough events to complete element")
319}
320
321fn map_nonio_error<T>(r: Result<T, rxml::Error>) -> Result<T, self::error::Error> {
322    match r {
323        Ok(v) => Ok(v),
324        Err(rxml::Error::IO(_)) => unreachable!(),
325        Err(rxml::Error::Xml(e)) => Err(e.into()),
326        Err(rxml::Error::InvalidUtf8Byte(_)) => Err(self::error::Error::Other("invalid utf-8")),
327        Err(rxml::Error::InvalidChar(_)) => {
328            Err(self::error::Error::Other("non-character encountered"))
329        }
330        Err(rxml::Error::RestrictedXml(_)) => Err(self::error::Error::Other("restricted xml")),
331    }
332}
333
334fn read_start_event<I: std::io::BufRead>(
335    r: &mut rxml::Reader<I>,
336) -> Result<(rxml::QName, rxml::AttrMap), self::error::Error> {
337    for ev in r {
338        match map_nonio_error(ev)? {
339            rxml::Event::XmlDeclaration(_, rxml::XmlVersion::V1_0) => (),
340            rxml::Event::StartElement(_, name, attrs) => return Ok((name, attrs)),
341            _ => {
342                return Err(self::error::Error::Other(
343                    "Unexpected event at start of document",
344                ))
345            }
346        }
347    }
348    Err(self::error::Error::XmlError(
349        rxml::error::XmlError::InvalidEof("before start of element"),
350    ))
351}
352
353/// Attempt to parse a type implementing [`FromXml`] from a byte buffer
354/// containing XML data.
355pub fn from_bytes<T: FromXml>(mut buf: &[u8]) -> Result<T, self::error::Error> {
356    let mut reader = rxml::Reader::new(&mut buf);
357    let (name, attrs) = read_start_event(&mut reader)?;
358    let mut builder = match T::from_events(name, attrs) {
359        Ok(v) => v,
360        Err(self::error::FromEventsError::Mismatch { .. }) => {
361            return Err(self::error::Error::TypeMismatch)
362        }
363        Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
364    };
365    for ev in reader {
366        match builder.feed(map_nonio_error(ev)?)? {
367            Some(v) => return Ok(v),
368            None => (),
369        }
370    }
371    Err(self::error::Error::XmlError(
372        rxml::error::XmlError::InvalidEof("while parsing FromXml impl"),
373    ))
374}