lib.rs

  1#![cfg_attr(docsrs, feature(doc_cfg))]
  2#![forbid(unsafe_code)]
  3#![warn(missing_docs)]
  4/*!
  5# XML Streamed Objects -- serde-like parsing for XML
  6
  7This crate provides the traits for parsing XML data into Rust structs, and
  8vice versa.
  9
 10While it is in 0.0.x versions, many features still need to be developed, but
 11rest assured that there is a solid plan to get it fully usable for even
 12advanced XML scenarios.
 13
 14XSO is an acronym for XML Stream(ed) Objects, referring to the main field of
 15use of this library in parsing XML streams like specified in RFC 6120.
 16*/
 17
 18// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
 19//
 20// This Source Code Form is subject to the terms of the Mozilla Public
 21// License, v. 2.0. If a copy of the MPL was not distributed with this
 22// file, You can obtain one at http://mozilla.org/MPL/2.0/.
 23pub mod error;
 24#[cfg(feature = "minidom")]
 25#[cfg_attr(docsrs, doc(cfg(feature = "minidom")))]
 26pub mod minidom_compat;
 27pub mod text;
 28
 29#[doc(hidden)]
 30pub mod exports {
 31    #[cfg(feature = "minidom")]
 32    pub use minidom;
 33    pub use rxml;
 34}
 35
 36use std::borrow::Cow;
 37
 38#[doc(inline)]
 39pub use text::TextCodec;
 40
 41#[doc = include_str!("from_xml_doc.md")]
 42#[doc(inline)]
 43#[cfg(feature = "macros")]
 44pub use xso_proc::FromXml;
 45
 46/// # Make a struct or enum serialisable to XML
 47///
 48/// This derives the [`IntoXml`] trait on a struct or enum. It is the
 49/// counterpart to [`macro@FromXml`].
 50///
 51/// The attributes necessary and available for the derivation to work are
 52/// documented on [`macro@FromXml`].
 53#[doc(inline)]
 54#[cfg(feature = "macros")]
 55pub use xso_proc::IntoXml;
 56
 57/// Trait allowing to consume a struct and iterate its contents as
 58/// serialisable [`rxml::Event`] items.
 59///
 60/// **Important:** Changing the [`EventIter`][`Self::EventIter`] associated
 61/// type is considered a non-breaking change for any given implementation of
 62/// this trait. Always refer to a type's iterator type using fully-qualified
 63/// notation, for example: `<T as xso::IntoXml>::EventIter`.
 64pub trait IntoXml {
 65    /// The iterator type.
 66    ///
 67    /// **Important:** Changing this type is considered a non-breaking change
 68    /// for any given implementation of this trait. Always refer to a type's
 69    /// iterator type using fully-qualified notation, for example:
 70    /// `<T as xso::IntoXml>::EventIter`.
 71    type EventIter: Iterator<Item = Result<rxml::Event, self::error::Error>>;
 72
 73    /// Return an iterator which emits the contents of the struct or enum as
 74    /// serialisable [`rxml::Event`] items.
 75    fn into_event_iter(self) -> Result<Self::EventIter, self::error::Error>;
 76}
 77
 78/// Trait for a temporary object allowing to construct a struct from
 79/// [`rxml::Event`] items.
 80///
 81/// Objects of this type are generally constructed through
 82/// [`FromXml::from_events`] and are used to build Rust structs or enums from
 83/// XML data. The XML data must be fed as `rxml::Event` to the
 84/// [`feed`][`Self::feed`] method.
 85pub trait FromEventsBuilder {
 86    /// The type which will be constructed by this builder.
 87    type Output;
 88
 89    /// Feed another [`rxml::Event`] into the element construction
 90    /// process.
 91    ///
 92    /// Once the construction process completes, `Ok(Some(_))` is returned.
 93    /// When valid data has been fed but more events are needed to fully
 94    /// construct the resulting struct, `Ok(None)` is returned.
 95    ///
 96    /// If the construction fails, `Err(_)` is returned. Errors are generally
 97    /// fatal and the builder should be assumed to be broken at that point.
 98    /// Feeding more events after an error may result in panics, errors or
 99    /// inconsistent result data, though it may never result in unsound or
100    /// unsafe behaviour.
101    fn feed(&mut self, ev: rxml::Event) -> Result<Option<Self::Output>, self::error::Error>;
102}
103
104/// Trait allowing to construct a struct from a stream of
105/// [`rxml::Event`] items.
106///
107/// To use this, first call [`FromXml::from_events`] with the qualified
108/// name and the attributes of the corresponding
109/// [`rxml::Event::StartElement`] event. If the call succeeds, the
110/// returned builder object must be fed with the events representing the
111/// contents of the element, and then with the `EndElement` event.
112///
113/// The `StartElement` passed to `from_events` must not be passed to `feed`.
114///
115/// **Important:** Changing the [`Builder`][`Self::Builder`] associated type
116/// is considered a non-breaking change for any given implementation of this
117/// trait. Always refer to a type's builder type using fully-qualified
118/// notation, for example: `<T as xso::FromXml>::Builder`.
119pub trait FromXml {
120    /// A builder type used to construct the element.
121    ///
122    /// **Important:** Changing this type is considered a non-breaking change
123    /// for any given implementation of this trait. Always refer to a type's
124    /// builder type using fully-qualified notation, for example:
125    /// `<T as xso::FromXml>::Builder`.
126    type Builder: FromEventsBuilder<Output = Self>;
127
128    /// Attempt to initiate the streamed construction of this struct from XML.
129    ///
130    /// If the passed qualified `name` and `attrs` match the element's type,
131    /// the [`Self::Builder`] is returned and should be fed with XML events
132    /// by the caller.
133    ///
134    /// Otherwise, an appropriate error is returned.
135    fn from_events(
136        name: rxml::QName,
137        attrs: rxml::AttrMap,
138    ) -> Result<Self::Builder, self::error::FromEventsError>;
139}
140
141/// Trait allowing to convert XML text to a value.
142///
143/// This trait is similar to [`std::str::FromStr`], however, due to
144/// restrictions imposed by the orphan rule, a separate trait is needed.
145/// Implementations for many standard library types are available. In
146/// addition, the following feature flags can enable more implementations:
147///
148/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
149/// - `uuid`: `uuid::Uuid`
150///
151/// Because of this unfortunate situation, we are **extremely liberal** with
152/// accepting optional dependencies for this purpose. You are very welcome to
153/// make merge requests against this crate adding support for parsing
154/// third-party crates.
155pub trait FromXmlText: Sized {
156    /// Convert the given XML text to a value.
157    fn from_xml_text(data: String) -> Result<Self, self::error::Error>;
158}
159
160impl FromXmlText for String {
161    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
162        Ok(data)
163    }
164}
165
166impl<T: FromXmlText, B: ToOwned<Owned = T>> FromXmlText for Cow<'_, B> {
167    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
168        Ok(Cow::Owned(T::from_xml_text(data)?))
169    }
170}
171
172impl<T: FromXmlText> FromXmlText for Option<T> {
173    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
174        Ok(Some(T::from_xml_text(data)?))
175    }
176}
177
178impl<T: FromXmlText> FromXmlText for Box<T> {
179    fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
180        Ok(Box::new(T::from_xml_text(data)?))
181    }
182}
183
184/// Trait to convert a value to an XML text string.
185///
186/// This trait is implemented for many standard library types implementing
187/// [`std::fmt::Display`]. In addition, the following feature flags can enable
188/// more implementations:
189///
190/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
191/// - `uuid`: `uuid::Uuid`
192///
193/// Because of the unfortunate situation as described in [`FromXmlText`], we
194/// are **extremely liberal** with accepting optional dependencies for this
195/// purpose. You are very welcome to make merge requests against this crate
196/// adding support for parsing third-party crates.
197pub trait IntoXmlText: Sized {
198    /// Convert the value to an XML string in a context where an absent value
199    /// cannot be represented.
200    fn into_xml_text(self) -> Result<String, self::error::Error>;
201
202    /// Convert the value to an XML string in a context where an absent value
203    /// can be represented.
204    ///
205    /// The provided implementation will always return the result of
206    /// [`Self::into_xml_text`] wrapped into `Some(.)`. By re-implementing
207    /// this method, implementors can customize the behaviour for certain
208    /// values.
209    fn into_optional_xml_text(self) -> Result<Option<String>, self::error::Error> {
210        Ok(Some(self.into_xml_text()?))
211    }
212}
213
214impl IntoXmlText for String {
215    fn into_xml_text(self) -> Result<String, self::error::Error> {
216        Ok(self)
217    }
218}
219
220impl<T: IntoXmlText> IntoXmlText for Box<T> {
221    fn into_xml_text(self) -> Result<String, self::error::Error> {
222        T::into_xml_text(*self)
223    }
224}
225
226impl<T: IntoXmlText, B: ToOwned<Owned = T>> IntoXmlText for Cow<'_, B> {
227    fn into_xml_text(self) -> Result<String, self::error::Error> {
228        T::into_xml_text(self.into_owned())
229    }
230}
231
232/// Specialized variant of [`IntoXmlText`].
233///
234/// Do **not** implement this unless you cannot implement [`IntoXmlText`]:
235/// implementing [`IntoXmlText`] is more versatile and an
236/// [`IntoOptionalXmlText`] implementation is automatically provided.
237///
238/// If you need to customize the behaviour of the [`IntoOptionalXmlText`]
239/// blanket implementation, implement a custom
240/// [`IntoXmlText::into_optional_xml_text`] instead.
241pub trait IntoOptionalXmlText {
242    /// Convert the value to an XML string in a context where an absent value
243    /// can be represented.
244    fn into_optional_xml_text(self) -> Result<Option<String>, self::error::Error>;
245}
246
247impl<T: IntoXmlText> IntoOptionalXmlText for T {
248    fn into_optional_xml_text(self) -> Result<Option<String>, self::error::Error> {
249        <Self as IntoXmlText>::into_optional_xml_text(self)
250    }
251}
252
253impl<T: IntoOptionalXmlText> IntoOptionalXmlText for Option<T> {
254    fn into_optional_xml_text(self) -> Result<Option<String>, self::error::Error> {
255        self.map(T::into_optional_xml_text)
256            .transpose()
257            .map(Option::flatten)
258    }
259}
260
261/// Attempt to transform a type implementing [`IntoXml`] into another
262/// type which implements [`FromXml`].
263pub fn transform<T: FromXml, F: IntoXml>(from: F) -> Result<T, self::error::Error> {
264    let mut iter = from.into_event_iter()?;
265    let (qname, attrs) = match iter.next() {
266        Some(Ok(rxml::Event::StartElement(_, qname, attrs))) => (qname, attrs),
267        Some(Err(e)) => return Err(e),
268        _ => panic!("into_event_iter did not start with StartElement event!"),
269    };
270    let mut sink = match T::from_events(qname, attrs) {
271        Ok(v) => v,
272        Err(self::error::FromEventsError::Mismatch { .. }) => {
273            return Err(self::error::Error::TypeMismatch)
274        }
275        Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
276    };
277    for event in iter {
278        let event = event?;
279        if let Some(v) = sink.feed(event)? {
280            return Ok(v);
281        }
282    }
283    Err(self::error::Error::XmlError(
284        rxml::error::XmlError::InvalidEof("during transform"),
285    ))
286}
287
288/// Attempt to convert a [`minidom::Element`] into a type implementing
289/// [`FromXml`], fallably.
290///
291/// Unlike [`transform`] (which can also be used with an element), this
292/// function will return the element unharmed if its element header does not
293/// match the expectations of `T`.
294#[cfg(feature = "minidom")]
295#[cfg_attr(docsrs, doc(cfg(feature = "minidom")))]
296pub fn try_from_element<T: FromXml>(
297    from: minidom::Element,
298) -> Result<T, self::error::FromElementError> {
299    let (qname, attrs) = minidom_compat::make_start_ev_parts(&from)?;
300    let mut sink = match T::from_events(qname, attrs) {
301        Ok(v) => v,
302        Err(self::error::FromEventsError::Mismatch { .. }) => {
303            return Err(self::error::FromElementError::Mismatch(from))
304        }
305        Err(self::error::FromEventsError::Invalid(e)) => {
306            return Err(self::error::FromElementError::Invalid(e))
307        }
308    };
309
310    let mut iter = from.into_event_iter()?;
311    iter.next().expect("first event from minidom::Element")?;
312    for event in iter {
313        let event = event?;
314        if let Some(v) = sink.feed(event)? {
315            return Ok(v);
316        }
317    }
318    // unreachable! instead of error here, because minidom::Element always
319    // produces the complete event sequence of a single element, and FromXml
320    // implementations must be constructible from that.
321    unreachable!("minidom::Element did not produce enough events to complete element")
322}
323
324fn map_nonio_error<T>(r: Result<T, rxml::Error>) -> Result<T, self::error::Error> {
325    match r {
326        Ok(v) => Ok(v),
327        Err(rxml::Error::IO(_)) => unreachable!(),
328        Err(rxml::Error::Xml(e)) => Err(e.into()),
329        Err(rxml::Error::InvalidUtf8Byte(_)) => Err(self::error::Error::Other("invalid utf-8")),
330        Err(rxml::Error::InvalidChar(_)) => {
331            Err(self::error::Error::Other("non-character encountered"))
332        }
333        Err(rxml::Error::RestrictedXml(_)) => Err(self::error::Error::Other("restricted xml")),
334    }
335}
336
337fn read_start_event<I: std::io::BufRead>(
338    r: &mut rxml::Reader<I>,
339) -> Result<(rxml::QName, rxml::AttrMap), self::error::Error> {
340    for ev in r {
341        match map_nonio_error(ev)? {
342            rxml::Event::XmlDeclaration(_, rxml::XmlVersion::V1_0) => (),
343            rxml::Event::StartElement(_, name, attrs) => return Ok((name, attrs)),
344            _ => {
345                return Err(self::error::Error::Other(
346                    "Unexpected event at start of document",
347                ))
348            }
349        }
350    }
351    Err(self::error::Error::XmlError(
352        rxml::error::XmlError::InvalidEof("before start of element"),
353    ))
354}
355
356/// Attempt to parse a type implementing [`FromXml`] from a byte buffer
357/// containing XML data.
358pub fn from_bytes<T: FromXml>(mut buf: &[u8]) -> Result<T, self::error::Error> {
359    let mut reader = rxml::Reader::new(&mut buf);
360    let (name, attrs) = read_start_event(&mut reader)?;
361    let mut builder = match T::from_events(name, attrs) {
362        Ok(v) => v,
363        Err(self::error::FromEventsError::Mismatch { .. }) => {
364            return Err(self::error::Error::TypeMismatch)
365        }
366        Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
367    };
368    for ev in reader {
369        if let Some(v) = builder.feed(map_nonio_error(ev)?)? {
370            return Ok(v);
371        }
372    }
373    Err(self::error::Error::XmlError(
374        rxml::error::XmlError::InvalidEof("while parsing FromXml impl"),
375    ))
376}