lib.rs

  1#![cfg_attr(docsrs, feature(doc_cfg))]
  2#![forbid(unsafe_code)]
  3#![warn(missing_docs)]
  4/*!
  5# XML Streamed Objects -- serde-like parsing for XML
  6
  7This crate provides the traits for parsing XML data into Rust structs, and
  8vice versa.
  9
 10While it is in 0.0.x versions, many features still need to be developed, but
 11rest assured that there is a solid plan to get it fully usable for even
 12advanced XML scenarios.
 13
 14XSO is an acronym for XML Stream(ed) Objects, referring to the main field of
 15use of this library in parsing XML streams like specified in RFC 6120.
 16*/
 17
 18// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
 19//
 20// This Source Code Form is subject to the terms of the Mozilla Public
 21// License, v. 2.0. If a copy of the MPL was not distributed with this
 22// file, You can obtain one at http://mozilla.org/MPL/2.0/.
 23pub mod error;
 24#[cfg(feature = "minidom")]
 25#[cfg_attr(docsrs, doc(cfg(feature = "minidom")))]
 26pub mod minidom_compat;
 27mod rxml_util;
 28pub mod text;
 29
 30#[doc(hidden)]
 31pub mod exports {
 32    #[cfg(feature = "minidom")]
 33    pub use minidom;
 34    pub use rxml;
 35}
 36
 37use std::borrow::Cow;
 38
 39#[doc(inline)]
 40pub use text::TextCodec;
 41
 42#[doc(inline)]
 43pub use rxml_util::Item;
 44
 45#[doc = include_str!("from_xml_doc.md")]
 46#[doc(inline)]
 47#[cfg(feature = "macros")]
 48pub use xso_proc::FromXml;
 49
 50/// # Make a struct or enum serialisable to XML
 51///
 52/// This derives the [`IntoXml`] trait on a struct or enum. It is the
 53/// counterpart to [`macro@FromXml`].
 54///
 55/// The attributes necessary and available for the derivation to work are
 56/// documented on [`macro@FromXml`].
 57#[doc(inline)]
 58#[cfg(feature = "macros")]
 59pub use xso_proc::IntoXml;
 60
 61/// Trait allowing to consume a struct and iterate its contents as
 62/// serialisable [`rxml::Event`] items.
 63///
 64/// **Important:** Changing the [`EventIter`][`Self::EventIter`] associated
 65/// type is considered a non-breaking change for any given implementation of
 66/// this trait. Always refer to a type's iterator type using fully-qualified
 67/// notation, for example: `<T as xso::IntoXml>::EventIter`.
 68pub trait IntoXml {
 69    /// The iterator type.
 70    ///
 71    /// **Important:** Changing this type is considered a non-breaking change
 72    /// for any given implementation of this trait. Always refer to a type's
 73    /// iterator type using fully-qualified notation, for example:
 74    /// `<T as xso::IntoXml>::EventIter`.
 75    type EventIter: Iterator<Item = Result<rxml::Event, self::error::Error>>;
 76
 77    /// Return an iterator which emits the contents of the struct or enum as
 78    /// serialisable [`rxml::Event`] items.
 79    fn into_event_iter(self) -> Result<Self::EventIter, self::error::Error>;
 80}
 81
 82/// Trait allowing to iterate a struct's contents as serialisable
 83/// [`Item`]s.
 84///
 85/// **Important:** Changing the [`ItemIter`][`Self::ItemIter`] associated
 86/// type is considered a non-breaking change for any given implementation of
 87/// this trait. Always refer to a type's iterator type using fully-qualified
 88/// notation, for example: `<T as xso::AsXml>::ItemIter`.
 89pub trait AsXml {
 90    /// The iterator type.
 91    ///
 92    /// **Important:** Changing this type is considered a non-breaking change
 93    /// for any given implementation of this trait. Always refer to a type's
 94    /// iterator type using fully-qualified notation, for example:
 95    /// `<T as xso::AsXml>::ItemIter`.
 96    type ItemIter<'x>: Iterator<Item = Result<Item<'x>, self::error::Error>>
 97    where
 98        Self: 'x;
 99
100    /// Return an iterator which emits the contents of the struct or enum as
101    /// serialisable [`Item`] items.
102    fn as_xml_iter(&self) -> Result<Self::ItemIter<'_>, self::error::Error>;
103}
104
105/// Trait for a temporary object allowing to construct a struct from
106/// [`rxml::Event`] items.
107///
108/// Objects of this type are generally constructed through
109/// [`FromXml::from_events`] and are used to build Rust structs or enums from
110/// XML data. The XML data must be fed as `rxml::Event` to the
111/// [`feed`][`Self::feed`] method.
112pub trait FromEventsBuilder {
113    /// The type which will be constructed by this builder.
114    type Output;
115
116    /// Feed another [`rxml::Event`] into the element construction
117    /// process.
118    ///
119    /// Once the construction process completes, `Ok(Some(_))` is returned.
120    /// When valid data has been fed but more events are needed to fully
121    /// construct the resulting struct, `Ok(None)` is returned.
122    ///
123    /// If the construction fails, `Err(_)` is returned. Errors are generally
124    /// fatal and the builder should be assumed to be broken at that point.
125    /// Feeding more events after an error may result in panics, errors or
126    /// inconsistent result data, though it may never result in unsound or
127    /// unsafe behaviour.
128    fn feed(&mut self, ev: rxml::Event) -> Result<Option<Self::Output>, self::error::Error>;
129}
130
131/// Trait allowing to construct a struct from a stream of
132/// [`rxml::Event`] items.
133///
134/// To use this, first call [`FromXml::from_events`] with the qualified
135/// name and the attributes of the corresponding
136/// [`rxml::Event::StartElement`] event. If the call succeeds, the
137/// returned builder object must be fed with the events representing the
138/// contents of the element, and then with the `EndElement` event.
139///
140/// The `StartElement` passed to `from_events` must not be passed to `feed`.
141///
142/// **Important:** Changing the [`Builder`][`Self::Builder`] associated type
143/// is considered a non-breaking change for any given implementation of this
144/// trait. Always refer to a type's builder type using fully-qualified
145/// notation, for example: `<T as xso::FromXml>::Builder`.
146pub trait FromXml {
147    /// A builder type used to construct the element.
148    ///
149    /// **Important:** Changing this type is considered a non-breaking change
150    /// for any given implementation of this trait. Always refer to a type's
151    /// builder type using fully-qualified notation, for example:
152    /// `<T as xso::FromXml>::Builder`.
153    type Builder: FromEventsBuilder<Output = Self>;
154
155    /// Attempt to initiate the streamed construction of this struct from XML.
156    ///
157    /// If the passed qualified `name` and `attrs` match the element's type,
158    /// the [`Self::Builder`] is returned and should be fed with XML events
159    /// by the caller.
160    ///
161    /// Otherwise, an appropriate error is returned.
162    fn from_events(
163        name: rxml::QName,
164        attrs: rxml::AttrMap,
165    ) -> Result<Self::Builder, self::error::FromEventsError>;
166}
167
168/// Trait allowing to convert XML text to a value.
169///
170/// This trait is similar to [`std::str::FromStr`], however, due to
171/// restrictions imposed by the orphan rule, a separate trait is needed.
172/// Implementations for many standard library types are available. In
173/// addition, the following feature flags can enable more implementations:
174///
175/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
176/// - `uuid`: `uuid::Uuid`
177///
178/// Because of this unfortunate situation, we are **extremely liberal** with
179/// accepting optional dependencies for this purpose. You are very welcome to
180/// make merge requests against this crate adding support for parsing
181/// third-party crates.
182pub trait FromXmlText: Sized {
183    /// Convert the given XML text to a value.
184    fn from_xml_text(data: String) -> Result<Self, self::error::Error>;
185}
186
187impl FromXmlText for String {
188    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
189        Ok(data)
190    }
191}
192
193impl<T: FromXmlText, B: ToOwned<Owned = T>> FromXmlText for Cow<'_, B> {
194    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
195        Ok(Cow::Owned(T::from_xml_text(data)?))
196    }
197}
198
199impl<T: FromXmlText> FromXmlText for Option<T> {
200    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
201        Ok(Some(T::from_xml_text(data)?))
202    }
203}
204
205impl<T: FromXmlText> FromXmlText for Box<T> {
206    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
207        Ok(Box::new(T::from_xml_text(data)?))
208    }
209}
210
211/// Trait to convert a value to an XML text string.
212///
213/// This trait is implemented for many standard library types implementing
214/// [`std::fmt::Display`]. In addition, the following feature flags can enable
215/// more implementations:
216///
217/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
218/// - `uuid`: `uuid::Uuid`
219///
220/// Because of the unfortunate situation as described in [`FromXmlText`], we
221/// are **extremely liberal** with accepting optional dependencies for this
222/// purpose. You are very welcome to make merge requests against this crate
223/// adding support for parsing third-party crates.
224pub trait IntoXmlText: Sized {
225    /// Convert the value to an XML string in a context where an absent value
226    /// cannot be represented.
227    fn into_xml_text(self) -> Result<String, self::error::Error>;
228
229    /// Convert the value to an XML string in a context where an absent value
230    /// can be represented.
231    ///
232    /// The provided implementation will always return the result of
233    /// [`Self::into_xml_text`] wrapped into `Some(.)`. By re-implementing
234    /// this method, implementors can customize the behaviour for certain
235    /// values.
236    fn into_optional_xml_text(self) -> Result<Option<String>, self::error::Error> {
237        Ok(Some(self.into_xml_text()?))
238    }
239}
240
241impl IntoXmlText for String {
242    fn into_xml_text(self) -> Result<String, self::error::Error> {
243        Ok(self)
244    }
245}
246
247impl<T: IntoXmlText> IntoXmlText for Box<T> {
248    fn into_xml_text(self) -> Result<String, self::error::Error> {
249        T::into_xml_text(*self)
250    }
251}
252
253impl<T: IntoXmlText, B: ToOwned<Owned = T>> IntoXmlText for Cow<'_, B> {
254    fn into_xml_text(self) -> Result<String, self::error::Error> {
255        T::into_xml_text(self.into_owned())
256    }
257}
258
259/// Specialized variant of [`IntoXmlText`].
260///
261/// Do **not** implement this unless you cannot implement [`IntoXmlText`]:
262/// implementing [`IntoXmlText`] is more versatile and an
263/// [`IntoOptionalXmlText`] implementation is automatically provided.
264///
265/// If you need to customize the behaviour of the [`IntoOptionalXmlText`]
266/// blanket implementation, implement a custom
267/// [`IntoXmlText::into_optional_xml_text`] instead.
268pub trait IntoOptionalXmlText {
269    /// Convert the value to an XML string in a context where an absent value
270    /// can be represented.
271    fn into_optional_xml_text(self) -> Result<Option<String>, self::error::Error>;
272}
273
274impl<T: IntoXmlText> IntoOptionalXmlText for T {
275    fn into_optional_xml_text(self) -> Result<Option<String>, self::error::Error> {
276        <Self as IntoXmlText>::into_optional_xml_text(self)
277    }
278}
279
280impl<T: IntoOptionalXmlText> IntoOptionalXmlText for Option<T> {
281    fn into_optional_xml_text(self) -> Result<Option<String>, self::error::Error> {
282        self.map(T::into_optional_xml_text)
283            .transpose()
284            .map(Option::flatten)
285    }
286}
287
288/// Attempt to transform a type implementing [`IntoXml`] into another
289/// type which implements [`FromXml`].
290pub fn transform<T: FromXml, F: IntoXml>(from: F) -> Result<T, self::error::Error> {
291    let mut iter = from.into_event_iter()?;
292    let (qname, attrs) = match iter.next() {
293        Some(Ok(rxml::Event::StartElement(_, qname, attrs))) => (qname, attrs),
294        Some(Err(e)) => return Err(e),
295        _ => panic!("into_event_iter did not start with StartElement event!"),
296    };
297    let mut sink = match T::from_events(qname, attrs) {
298        Ok(v) => v,
299        Err(self::error::FromEventsError::Mismatch { .. }) => {
300            return Err(self::error::Error::TypeMismatch)
301        }
302        Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
303    };
304    for event in iter {
305        let event = event?;
306        if let Some(v) = sink.feed(event)? {
307            return Ok(v);
308        }
309    }
310    Err(self::error::Error::XmlError(
311        rxml::error::XmlError::InvalidEof("during transform"),
312    ))
313}
314
315/// Attempt to convert a [`minidom::Element`] into a type implementing
316/// [`FromXml`], fallably.
317///
318/// Unlike [`transform`] (which can also be used with an element), this
319/// function will return the element unharmed if its element header does not
320/// match the expectations of `T`.
321#[cfg(feature = "minidom")]
322#[cfg_attr(docsrs, doc(cfg(feature = "minidom")))]
323pub fn try_from_element<T: FromXml>(
324    from: minidom::Element,
325) -> Result<T, self::error::FromElementError> {
326    let (qname, attrs) = minidom_compat::make_start_ev_parts(&from)?;
327    let mut sink = match T::from_events(qname, attrs) {
328        Ok(v) => v,
329        Err(self::error::FromEventsError::Mismatch { .. }) => {
330            return Err(self::error::FromElementError::Mismatch(from))
331        }
332        Err(self::error::FromEventsError::Invalid(e)) => {
333            return Err(self::error::FromElementError::Invalid(e))
334        }
335    };
336
337    let mut iter = from.into_event_iter()?;
338    iter.next().expect("first event from minidom::Element")?;
339    for event in iter {
340        let event = event?;
341        if let Some(v) = sink.feed(event)? {
342            return Ok(v);
343        }
344    }
345    // unreachable! instead of error here, because minidom::Element always
346    // produces the complete event sequence of a single element, and FromXml
347    // implementations must be constructible from that.
348    unreachable!("minidom::Element did not produce enough events to complete element")
349}
350
351fn map_nonio_error<T>(r: Result<T, rxml::Error>) -> Result<T, self::error::Error> {
352    match r {
353        Ok(v) => Ok(v),
354        Err(rxml::Error::IO(_)) => unreachable!(),
355        Err(rxml::Error::Xml(e)) => Err(e.into()),
356        Err(rxml::Error::InvalidUtf8Byte(_)) => Err(self::error::Error::Other("invalid utf-8")),
357        Err(rxml::Error::InvalidChar(_)) => {
358            Err(self::error::Error::Other("non-character encountered"))
359        }
360        Err(rxml::Error::RestrictedXml(_)) => Err(self::error::Error::Other("restricted xml")),
361    }
362}
363
364fn read_start_event<I: std::io::BufRead>(
365    r: &mut rxml::Reader<I>,
366) -> Result<(rxml::QName, rxml::AttrMap), self::error::Error> {
367    for ev in r {
368        match map_nonio_error(ev)? {
369            rxml::Event::XmlDeclaration(_, rxml::XmlVersion::V1_0) => (),
370            rxml::Event::StartElement(_, name, attrs) => return Ok((name, attrs)),
371            _ => {
372                return Err(self::error::Error::Other(
373                    "Unexpected event at start of document",
374                ))
375            }
376        }
377    }
378    Err(self::error::Error::XmlError(
379        rxml::error::XmlError::InvalidEof("before start of element"),
380    ))
381}
382
383/// Attempt to parse a type implementing [`FromXml`] from a byte buffer
384/// containing XML data.
385pub fn from_bytes<T: FromXml>(mut buf: &[u8]) -> Result<T, self::error::Error> {
386    let mut reader = rxml::Reader::new(&mut buf);
387    let (name, attrs) = read_start_event(&mut reader)?;
388    let mut builder = match T::from_events(name, attrs) {
389        Ok(v) => v,
390        Err(self::error::FromEventsError::Mismatch { .. }) => {
391            return Err(self::error::Error::TypeMismatch)
392        }
393        Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
394    };
395    for ev in reader {
396        if let Some(v) = builder.feed(map_nonio_error(ev)?)? {
397            return Ok(v);
398        }
399    }
400    Err(self::error::Error::XmlError(
401        rxml::error::XmlError::InvalidEof("while parsing FromXml impl"),
402    ))
403}