lib.rs

  1#![cfg_attr(docsrs, feature(doc_cfg))]
  2#![forbid(unsafe_code)]
  3#![warn(missing_docs)]
  4/*!
  5# XML Streamed Objects -- serde-like parsing for XML
  6
  7This crate provides the traits for parsing XML data into Rust structs, and
  8vice versa.
  9
 10While it is in 0.0.x versions, many features still need to be developed, but
 11rest assured that there is a solid plan to get it fully usable for even
 12advanced XML scenarios.
 13
 14XSO is an acronym for XML Stream(ed) Objects, referring to the main field of
 15use of this library in parsing XML streams like specified in RFC 6120.
 16*/
 17
 18// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
 19//
 20// This Source Code Form is subject to the terms of the Mozilla Public
 21// License, v. 2.0. If a copy of the MPL was not distributed with this
 22// file, You can obtain one at http://mozilla.org/MPL/2.0/.
 23pub mod error;
 24#[cfg(feature = "minidom")]
 25#[cfg_attr(docsrs, doc(cfg(feature = "minidom")))]
 26pub mod minidom_compat;
 27mod text;
 28
 29#[doc(hidden)]
 30pub mod exports {
 31    #[cfg(feature = "minidom")]
 32    pub use minidom;
 33    pub use rxml;
 34}
 35
 36use std::borrow::Cow;
 37
 38#[doc = include_str!("from_xml_doc.md")]
 39#[doc(inline)]
 40#[cfg(feature = "macros")]
 41pub use xso_proc::FromXml;
 42
 43/// # Make a struct or enum serialisable to XML
 44///
 45/// This derives the [`IntoXml`] trait on a struct or enum. It is the
 46/// counterpart to [`macro@FromXml`].
 47///
 48/// The attributes necessary and available for the derivation to work are
 49/// documented on [`macro@FromXml`].
 50#[doc(inline)]
 51#[cfg(feature = "macros")]
 52pub use xso_proc::IntoXml;
 53
 54/// Trait allowing to consume a struct and iterate its contents as
 55/// serialisable [`rxml::Event`] items.
 56///
 57/// **Important:** Changing the [`EventIter`][`Self::EventIter`] associated
 58/// type is considered a non-breaking change for any given implementation of
 59/// this trait. Always refer to a type's iterator type using fully-qualified
 60/// notation, for example: `<T as xso::IntoXml>::EventIter`.
 61pub trait IntoXml {
 62    /// The iterator type.
 63    ///
 64    /// **Important:** Changing this type is considered a non-breaking change
 65    /// for any given implementation of this trait. Always refer to a type's
 66    /// iterator type using fully-qualified notation, for example:
 67    /// `<T as xso::IntoXml>::EventIter`.
 68    type EventIter: Iterator<Item = Result<rxml::Event, self::error::Error>>;
 69
 70    /// Return an iterator which emits the contents of the struct or enum as
 71    /// serialisable [`rxml::Event`] items.
 72    fn into_event_iter(self) -> Result<Self::EventIter, self::error::Error>;
 73}
 74
 75/// Trait for a temporary object allowing to construct a struct from
 76/// [`rxml::Event`] items.
 77///
 78/// Objects of this type are generally constructed through
 79/// [`FromXml::from_events`] and are used to build Rust structs or enums from
 80/// XML data. The XML data must be fed as `rxml::Event` to the
 81/// [`feed`][`Self::feed`] method.
 82pub trait FromEventsBuilder {
 83    /// The type which will be constructed by this builder.
 84    type Output;
 85
 86    /// Feed another [`rxml::Event`] into the element construction
 87    /// process.
 88    ///
 89    /// Once the construction process completes, `Ok(Some(_))` is returned.
 90    /// When valid data has been fed but more events are needed to fully
 91    /// construct the resulting struct, `Ok(None)` is returned.
 92    ///
 93    /// If the construction fails, `Err(_)` is returned. Errors are generally
 94    /// fatal and the builder should be assumed to be broken at that point.
 95    /// Feeding more events after an error may result in panics, errors or
 96    /// inconsistent result data, though it may never result in unsound or
 97    /// unsafe behaviour.
 98    fn feed(&mut self, ev: rxml::Event) -> Result<Option<Self::Output>, self::error::Error>;
 99}
100
101/// Trait allowing to construct a struct from a stream of
102/// [`rxml::Event`] items.
103///
104/// To use this, first call [`FromXml::from_events`] with the qualified
105/// name and the attributes of the corresponding
106/// [`rxml::Event::StartElement`] event. If the call succeeds, the
107/// returned builder object must be fed with the events representing the
108/// contents of the element, and then with the `EndElement` event.
109///
110/// The `StartElement` passed to `from_events` must not be passed to `feed`.
111///
112/// **Important:** Changing the [`Builder`][`Self::Builder`] associated type
113/// is considered a non-breaking change for any given implementation of this
114/// trait. Always refer to a type's builder type using fully-qualified
115/// notation, for example: `<T as xso::FromXml>::Builder`.
116pub trait FromXml {
117    /// A builder type used to construct the element.
118    ///
119    /// **Important:** Changing this type is considered a non-breaking change
120    /// for any given implementation of this trait. Always refer to a type's
121    /// builder type using fully-qualified notation, for example:
122    /// `<T as xso::FromXml>::Builder`.
123    type Builder: FromEventsBuilder<Output = Self>;
124
125    /// Attempt to initiate the streamed construction of this struct from XML.
126    ///
127    /// If the passed qualified `name` and `attrs` match the element's type,
128    /// the [`Self::Builder`] is returned and should be fed with XML events
129    /// by the caller.
130    ///
131    /// Otherwise, an appropriate error is returned.
132    fn from_events(
133        name: rxml::QName,
134        attrs: rxml::AttrMap,
135    ) -> Result<Self::Builder, self::error::FromEventsError>;
136}
137
138/// Trait allowing to convert XML text to a value.
139///
140/// This trait is similar to [`std::str::FromStr`], however, due to
141/// restrictions imposed by the orphan rule, a separate trait is needed.
142/// Implementations for many standard library types are available. In
143/// addition, the following feature flags can enable more implementations:
144///
145/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
146/// - `uuid`: `uuid::Uuid`
147///
148/// Because of this unfortunate situation, we are **extremely liberal** with
149/// accepting optional dependencies for this purpose. You are very welcome to
150/// make merge requests against this crate adding support for parsing
151/// third-party crates.
152pub trait FromXmlText: Sized {
153    /// Convert the given XML text to a value.
154    fn from_xml_text(data: String) -> Result<Self, self::error::Error>;
155}
156
157impl FromXmlText for String {
158    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
159        Ok(data)
160    }
161}
162
163impl<T: FromXmlText, B: ToOwned<Owned = T>> FromXmlText for Cow<'_, B> {
164    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
165        Ok(Cow::Owned(T::from_xml_text(data)?))
166    }
167}
168
169impl<T: FromXmlText> FromXmlText for Option<T> {
170    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
171        Ok(Some(T::from_xml_text(data)?))
172    }
173}
174
175impl<T: FromXmlText> FromXmlText for Box<T> {
176    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
177        Ok(Box::new(T::from_xml_text(data)?))
178    }
179}
180
181/// Trait to convert a value to an XML text string.
182///
183/// This trait is implemented for many standard library types implementing
184/// [`std::fmt::Display`]. In addition, the following feature flags can enable
185/// more implementations:
186///
187/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
188/// - `uuid`: `uuid::Uuid`
189///
190/// Because of the unfortunate situation as described in [`FromXmlText`], we
191/// are **extremely liberal** with accepting optional dependencies for this
192/// purpose. You are very welcome to make merge requests against this crate
193/// adding support for parsing third-party crates.
194pub trait IntoXmlText: Sized {
195    /// Convert the value to an XML string in a context where an absent value
196    /// cannot be represented.
197    fn into_xml_text(self) -> Result<String, self::error::Error>;
198
199    /// Convert the value to an XML string in a context where an absent value
200    /// can be represented.
201    ///
202    /// The provided implementation will always return the result of
203    /// [`Self::into_xml_text`] wrapped into `Some(.)`. By re-implementing
204    /// this method, implementors can customize the behaviour for certain
205    /// values.
206    fn into_optional_xml_text(self) -> Result<Option<String>, self::error::Error> {
207        Ok(Some(self.into_xml_text()?))
208    }
209}
210
211impl IntoXmlText for String {
212    fn into_xml_text(self) -> Result<String, self::error::Error> {
213        Ok(self)
214    }
215}
216
217impl<T: IntoXmlText> IntoXmlText for Box<T> {
218    fn into_xml_text(self) -> Result<String, self::error::Error> {
219        T::into_xml_text(*self)
220    }
221}
222
223impl<T: IntoXmlText, B: ToOwned<Owned = T>> IntoXmlText for Cow<'_, B> {
224    fn into_xml_text(self) -> Result<String, self::error::Error> {
225        T::into_xml_text(self.into_owned())
226    }
227}
228
229/// Specialized variant of [`IntoXmlText`].
230///
231/// Do **not** implement this unless you cannot implement [`IntoXmlText`]:
232/// implementing [`IntoXmlText`] is more versatile and an
233/// [`IntoOptionalXmlText`] implementation is automatically provided.
234///
235/// If you need to customize the behaviour of the [`IntoOptionalXmlText`]
236/// blanket implementation, implement a custom
237/// [`IntoXmlText::into_optional_xml_text`] instead.
238pub trait IntoOptionalXmlText {
239    /// Convert the value to an XML string in a context where an absent value
240    /// can be represented.
241    fn into_optional_xml_text(self) -> Result<Option<String>, self::error::Error>;
242}
243
244impl<T: IntoXmlText> IntoOptionalXmlText for T {
245    fn into_optional_xml_text(self) -> Result<Option<String>, self::error::Error> {
246        <Self as IntoXmlText>::into_optional_xml_text(self)
247    }
248}
249
250impl<T: IntoOptionalXmlText> IntoOptionalXmlText for Option<T> {
251    fn into_optional_xml_text(self) -> Result<Option<String>, self::error::Error> {
252        self.map(T::into_optional_xml_text)
253            .transpose()
254            .map(Option::flatten)
255    }
256}
257
258/// Attempt to transform a type implementing [`IntoXml`] into another
259/// type which implements [`FromXml`].
260pub fn transform<T: FromXml, F: IntoXml>(from: F) -> Result<T, self::error::Error> {
261    let mut iter = from.into_event_iter()?;
262    let (qname, attrs) = match iter.next() {
263        Some(Ok(rxml::Event::StartElement(_, qname, attrs))) => (qname, attrs),
264        Some(Err(e)) => return Err(e),
265        _ => panic!("into_event_iter did not start with StartElement event!"),
266    };
267    let mut sink = match T::from_events(qname, attrs) {
268        Ok(v) => v,
269        Err(self::error::FromEventsError::Mismatch { .. }) => {
270            return Err(self::error::Error::TypeMismatch)
271        }
272        Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
273    };
274    for event in iter {
275        let event = event?;
276        match sink.feed(event)? {
277            Some(v) => return Ok(v),
278            None => (),
279        }
280    }
281    Err(self::error::Error::XmlError(
282        rxml::error::XmlError::InvalidEof("during transform"),
283    ))
284}
285
286/// Attempt to convert a [`minidom::Element`] into a type implementing
287/// [`FromXml`], fallably.
288///
289/// Unlike [`transform`] (which can also be used with an element), this
290/// function will return the element unharmed if its element header does not
291/// match the expectations of `T`.
292#[cfg(feature = "minidom")]
293#[cfg_attr(docsrs, doc(cfg(feature = "minidom")))]
294pub fn try_from_element<T: FromXml>(
295    from: minidom::Element,
296) -> Result<T, self::error::FromElementError> {
297    let (qname, attrs) = minidom_compat::make_start_ev_parts(&from)?;
298    let mut sink = match T::from_events(qname, attrs) {
299        Ok(v) => v,
300        Err(self::error::FromEventsError::Mismatch { .. }) => {
301            return Err(self::error::FromElementError::Mismatch(from))
302        }
303        Err(self::error::FromEventsError::Invalid(e)) => {
304            return Err(self::error::FromElementError::Invalid(e))
305        }
306    };
307
308    let mut iter = from.into_event_iter()?;
309    iter.next().expect("first event from minidom::Element")?;
310    for event in iter {
311        let event = event?;
312        match sink.feed(event)? {
313            Some(v) => return Ok(v),
314            None => (),
315        }
316    }
317    // unreachable! instead of error here, because minidom::Element always
318    // produces the complete event sequence of a single element, and FromXml
319    // implementations must be constructible from that.
320    unreachable!("minidom::Element did not produce enough events to complete element")
321}
322
323fn map_nonio_error<T>(r: Result<T, rxml::Error>) -> Result<T, self::error::Error> {
324    match r {
325        Ok(v) => Ok(v),
326        Err(rxml::Error::IO(_)) => unreachable!(),
327        Err(rxml::Error::Xml(e)) => Err(e.into()),
328        Err(rxml::Error::InvalidUtf8Byte(_)) => Err(self::error::Error::Other("invalid utf-8")),
329        Err(rxml::Error::InvalidChar(_)) => {
330            Err(self::error::Error::Other("non-character encountered"))
331        }
332        Err(rxml::Error::RestrictedXml(_)) => Err(self::error::Error::Other("restricted xml")),
333    }
334}
335
336fn read_start_event<I: std::io::BufRead>(
337    r: &mut rxml::Reader<I>,
338) -> Result<(rxml::QName, rxml::AttrMap), self::error::Error> {
339    for ev in r {
340        match map_nonio_error(ev)? {
341            rxml::Event::XmlDeclaration(_, rxml::XmlVersion::V1_0) => (),
342            rxml::Event::StartElement(_, name, attrs) => return Ok((name, attrs)),
343            _ => {
344                return Err(self::error::Error::Other(
345                    "Unexpected event at start of document",
346                ))
347            }
348        }
349    }
350    Err(self::error::Error::XmlError(
351        rxml::error::XmlError::InvalidEof("before start of element"),
352    ))
353}
354
355/// Attempt to parse a type implementing [`FromXml`] from a byte buffer
356/// containing XML data.
357pub fn from_bytes<T: FromXml>(mut buf: &[u8]) -> Result<T, self::error::Error> {
358    let mut reader = rxml::Reader::new(&mut buf);
359    let (name, attrs) = read_start_event(&mut reader)?;
360    let mut builder = match T::from_events(name, attrs) {
361        Ok(v) => v,
362        Err(self::error::FromEventsError::Mismatch { .. }) => {
363            return Err(self::error::Error::TypeMismatch)
364        }
365        Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
366    };
367    for ev in reader {
368        match builder.feed(map_nonio_error(ev)?)? {
369            Some(v) => return Ok(v),
370            None => (),
371        }
372    }
373    Err(self::error::Error::XmlError(
374        rxml::error::XmlError::InvalidEof("while parsing FromXml impl"),
375    ))
376}