1#![cfg_attr(docsrs, feature(doc_auto_cfg))]
2#![cfg_attr(docsrs, feature(doc_cfg))]
3#![forbid(unsafe_code)]
4#![warn(missing_docs)]
5/*!
6# XML Streamed Objects -- serde-like parsing for XML
7
8This crate provides the traits for parsing XML data into Rust structs, and
9vice versa.
10
11While it is in 0.0.x versions, many features still need to be developed, but
12rest assured that there is a solid plan to get it fully usable for even
13advanced XML scenarios.
14
15XSO is an acronym for XML Stream(ed) Objects, referring to the main field of
16use of this library in parsing XML streams like specified in RFC 6120.
17*/
18
19// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
20//
21// This Source Code Form is subject to the terms of the Mozilla Public
22// License, v. 2.0. If a copy of the MPL was not distributed with this
23// file, You can obtain one at http://mozilla.org/MPL/2.0/.
24pub mod error;
25#[cfg(feature = "minidom")]
26pub mod minidom_compat;
27mod rxml_util;
28pub mod text;
29
30#[doc(hidden)]
31pub mod exports {
32 #[cfg(feature = "minidom")]
33 pub use minidom;
34 pub use rxml;
35}
36
37use std::borrow::Cow;
38
39#[doc(inline)]
40pub use text::TextCodec;
41
42#[doc(inline)]
43pub use rxml_util::Item;
44
45#[doc = include_str!("from_xml_doc.md")]
46#[doc(inline)]
47#[cfg(feature = "macros")]
48pub use xso_proc::FromXml;
49
50/// # Make a struct or enum serialisable to XML
51///
52/// This derives the [`AsXml`] trait on a struct or enum. It is the
53/// counterpart to [`macro@FromXml`].
54///
55/// The attributes necessary and available for the derivation to work are
56/// documented on [`macro@FromXml`].
57#[doc(inline)]
58#[cfg(feature = "macros")]
59pub use xso_proc::AsXml;
60
61/// Trait allowing to iterate a struct's contents as serialisable
62/// [`Item`]s.
63///
64/// **Important:** Changing the [`ItemIter`][`Self::ItemIter`] associated
65/// type is considered a non-breaking change for any given implementation of
66/// this trait. Always refer to a type's iterator type using fully-qualified
67/// notation, for example: `<T as xso::AsXml>::ItemIter`.
68pub trait AsXml {
69 /// The iterator type.
70 ///
71 /// **Important:** Changing this type is considered a non-breaking change
72 /// for any given implementation of this trait. Always refer to a type's
73 /// iterator type using fully-qualified notation, for example:
74 /// `<T as xso::AsXml>::ItemIter`.
75 type ItemIter<'x>: Iterator<Item = Result<Item<'x>, self::error::Error>>
76 where
77 Self: 'x;
78
79 /// Return an iterator which emits the contents of the struct or enum as
80 /// serialisable [`Item`] items.
81 fn as_xml_iter(&self) -> Result<Self::ItemIter<'_>, self::error::Error>;
82}
83
84/// Helper iterator to convert an `Option<T>` to XML.
85pub struct OptionAsXml<T: Iterator>(Option<T>);
86
87impl<T: Iterator> OptionAsXml<T> {
88 /// Construct a new iterator, wrapping the given iterator.
89 ///
90 /// If `inner` is `None`, this iterator terminates immediately. Otherwise,
91 /// it yields the elements yielded by `inner` until `inner` finishes,
92 /// after which this iterator completes, too.
93 pub fn new(inner: Option<T>) -> Self {
94 Self(inner)
95 }
96}
97
98impl<'x, T: Iterator<Item = Result<Item<'x>, self::error::Error>>> Iterator for OptionAsXml<T> {
99 type Item = Result<Item<'x>, self::error::Error>;
100
101 fn next(&mut self) -> Option<Self::Item> {
102 self.0.as_mut()?.next()
103 }
104}
105
106/// Helper iterator to convert an `Box<T>` to XML.
107pub struct BoxAsXml<T: Iterator>(Box<T>);
108
109impl<'x, T: Iterator<Item = Result<Item<'x>, self::error::Error>>> Iterator for BoxAsXml<T> {
110 type Item = Result<Item<'x>, self::error::Error>;
111
112 fn next(&mut self) -> Option<Self::Item> {
113 self.0.next()
114 }
115}
116
117impl<T: AsXml> AsXml for Option<T> {
118 type ItemIter<'x> = OptionAsXml<T::ItemIter<'x>> where T: 'x;
119
120 fn as_xml_iter(&self) -> Result<Self::ItemIter<'_>, self::error::Error> {
121 match self {
122 Some(ref value) => Ok(OptionAsXml(Some(T::as_xml_iter(value)?))),
123 None => Ok(OptionAsXml(None)),
124 }
125 }
126}
127
128impl<T: AsXml> AsXml for Box<T> {
129 type ItemIter<'x> = BoxAsXml<T::ItemIter<'x>> where T: 'x;
130
131 fn as_xml_iter(&self) -> Result<Self::ItemIter<'_>, self::error::Error> {
132 Ok(BoxAsXml(Box::new(T::as_xml_iter(&self)?)))
133 }
134}
135
136/// Trait for a temporary object allowing to construct a struct from
137/// [`rxml::Event`] items.
138///
139/// Objects of this type are generally constructed through
140/// [`FromXml::from_events`] and are used to build Rust structs or enums from
141/// XML data. The XML data must be fed as `rxml::Event` to the
142/// [`feed`][`Self::feed`] method.
143pub trait FromEventsBuilder {
144 /// The type which will be constructed by this builder.
145 type Output;
146
147 /// Feed another [`rxml::Event`] into the element construction
148 /// process.
149 ///
150 /// Once the construction process completes, `Ok(Some(_))` is returned.
151 /// When valid data has been fed but more events are needed to fully
152 /// construct the resulting struct, `Ok(None)` is returned.
153 ///
154 /// If the construction fails, `Err(_)` is returned. Errors are generally
155 /// fatal and the builder should be assumed to be broken at that point.
156 /// Feeding more events after an error may result in panics, errors or
157 /// inconsistent result data, though it may never result in unsound or
158 /// unsafe behaviour.
159 fn feed(&mut self, ev: rxml::Event) -> Result<Option<Self::Output>, self::error::Error>;
160}
161
162/// Helper struct to construct an `Option<T>` from XML events.
163pub struct OptionBuilder<T: FromEventsBuilder>(T);
164
165/// Helper struct to construct an `Box<T>` from XML events.
166pub struct BoxBuilder<T: FromEventsBuilder>(Box<T>);
167
168impl<T: FromEventsBuilder> FromEventsBuilder for OptionBuilder<T> {
169 type Output = Option<T::Output>;
170
171 fn feed(&mut self, ev: rxml::Event) -> Result<Option<Self::Output>, self::error::Error> {
172 self.0.feed(ev).map(|ok| ok.map(|value| Some(value)))
173 }
174}
175
176impl<T: FromEventsBuilder> FromEventsBuilder for BoxBuilder<T> {
177 type Output = Box<T::Output>;
178
179 fn feed(&mut self, ev: rxml::Event) -> Result<Option<Self::Output>, self::error::Error> {
180 self.0.feed(ev).map(|ok| ok.map(|value| Box::new(value)))
181 }
182}
183
184/// Trait allowing to construct a struct from a stream of
185/// [`rxml::Event`] items.
186///
187/// To use this, first call [`FromXml::from_events`] with the qualified
188/// name and the attributes of the corresponding
189/// [`rxml::Event::StartElement`] event. If the call succeeds, the
190/// returned builder object must be fed with the events representing the
191/// contents of the element, and then with the `EndElement` event.
192///
193/// The `StartElement` passed to `from_events` must not be passed to `feed`.
194///
195/// **Important:** Changing the [`Builder`][`Self::Builder`] associated type
196/// is considered a non-breaking change for any given implementation of this
197/// trait. Always refer to a type's builder type using fully-qualified
198/// notation, for example: `<T as xso::FromXml>::Builder`.
199pub trait FromXml {
200 /// A builder type used to construct the element.
201 ///
202 /// **Important:** Changing this type is considered a non-breaking change
203 /// for any given implementation of this trait. Always refer to a type's
204 /// builder type using fully-qualified notation, for example:
205 /// `<T as xso::FromXml>::Builder`.
206 type Builder: FromEventsBuilder<Output = Self>;
207
208 /// Attempt to initiate the streamed construction of this struct from XML.
209 ///
210 /// If the passed qualified `name` and `attrs` match the element's type,
211 /// the [`Self::Builder`] is returned and should be fed with XML events
212 /// by the caller.
213 ///
214 /// Otherwise, an appropriate error is returned.
215 fn from_events(
216 name: rxml::QName,
217 attrs: rxml::AttrMap,
218 ) -> Result<Self::Builder, self::error::FromEventsError>;
219}
220
221impl<T: FromXml> FromXml for Option<T> {
222 type Builder = OptionBuilder<T::Builder>;
223
224 fn from_events(
225 name: rxml::QName,
226 attrs: rxml::AttrMap,
227 ) -> Result<Self::Builder, self::error::FromEventsError> {
228 Ok(OptionBuilder(T::from_events(name, attrs)?))
229 }
230}
231
232impl<T: FromXml> FromXml for Box<T> {
233 type Builder = BoxBuilder<T::Builder>;
234
235 fn from_events(
236 name: rxml::QName,
237 attrs: rxml::AttrMap,
238 ) -> Result<Self::Builder, self::error::FromEventsError> {
239 Ok(BoxBuilder(Box::new(T::from_events(name, attrs)?)))
240 }
241}
242
243/// Trait allowing to convert XML text to a value.
244///
245/// This trait is similar to [`core::str::FromStr`], however, due to
246/// restrictions imposed by the orphan rule, a separate trait is needed.
247/// Implementations for many standard library types are available. In
248/// addition, the following feature flags can enable more implementations:
249///
250/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
251/// - `uuid`: `uuid::Uuid`
252///
253/// Because of this unfortunate situation, we are **extremely liberal** with
254/// accepting optional dependencies for this purpose. You are very welcome to
255/// make merge requests against this crate adding support for parsing
256/// third-party crates.
257pub trait FromXmlText: Sized {
258 /// Convert the given XML text to a value.
259 fn from_xml_text(data: String) -> Result<Self, self::error::Error>;
260}
261
262impl FromXmlText for String {
263 /// Return the string unchanged.
264 fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
265 Ok(data)
266 }
267}
268
269impl<T: FromXmlText, B: ToOwned<Owned = T>> FromXmlText for Cow<'_, B> {
270 /// Return a [`Cow::Owned`] containing the parsed value.
271 fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
272 Ok(Cow::Owned(T::from_xml_text(data)?))
273 }
274}
275
276impl<T: FromXmlText> FromXmlText for Option<T> {
277 /// Return a [`Some`] containing the parsed value.
278 fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
279 Ok(Some(T::from_xml_text(data)?))
280 }
281}
282
283impl<T: FromXmlText> FromXmlText for Box<T> {
284 /// Return a [`Box`] containing the parsed value.
285 fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
286 Ok(Box::new(T::from_xml_text(data)?))
287 }
288}
289
290/// Trait to convert a value to an XML text string.
291///
292/// This trait is implemented for many standard library types implementing
293/// [`core::fmt::Display`]. In addition, the following feature flags can enable
294/// more implementations:
295///
296/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
297/// - `uuid`: `uuid::Uuid`
298///
299/// Because of the unfortunate situation as described in [`FromXmlText`], we
300/// are **extremely liberal** with accepting optional dependencies for this
301/// purpose. You are very welcome to make merge requests against this crate
302/// adding support for parsing third-party crates.
303pub trait AsXmlText {
304 /// Convert the value to an XML string in a context where an absent value
305 /// cannot be represented.
306 fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error>;
307
308 /// Convert the value to an XML string in a context where an absent value
309 /// can be represented.
310 ///
311 /// The provided implementation will always return the result of
312 /// [`Self::as_xml_text`] wrapped into `Some(.)`. By re-implementing
313 /// this method, implementors can customize the behaviour for certain
314 /// values.
315 fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
316 Ok(Some(self.as_xml_text()?))
317 }
318}
319
320impl AsXmlText for String {
321 /// Return the borrowed string contents.
322 fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
323 Ok(Cow::Borrowed(self.as_str()))
324 }
325}
326
327impl AsXmlText for str {
328 /// Return the borrowed string contents.
329 fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
330 Ok(Cow::Borrowed(&*self))
331 }
332}
333
334impl<T: AsXmlText> AsXmlText for Box<T> {
335 /// Return the borrowed [`Box`] contents.
336 fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
337 T::as_xml_text(self)
338 }
339}
340
341impl<B: AsXmlText + ToOwned> AsXmlText for Cow<'_, B> {
342 /// Return the borrowed [`Cow`] contents.
343 fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
344 B::as_xml_text(self.as_ref())
345 }
346}
347
348impl<T: AsXmlText> AsXmlText for &T {
349 /// Delegate to the `AsXmlText` implementation on `T`.
350 fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
351 T::as_xml_text(*self)
352 }
353}
354
355/// Specialized variant of [`AsXmlText`].
356///
357/// Do **not** implement this unless you cannot implement [`AsXmlText`]:
358/// implementing [`AsXmlText`] is more versatile and an
359/// [`AsOptionalXmlText`] implementation is automatically provided.
360///
361/// If you need to customize the behaviour of the [`AsOptionalXmlText`]
362/// blanket implementation, implement a custom
363/// [`AsXmlText::as_optional_xml_text`] instead.
364pub trait AsOptionalXmlText {
365 /// Convert the value to an XML string in a context where an absent value
366 /// can be represented.
367 fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error>;
368}
369
370impl<T: AsXmlText> AsOptionalXmlText for T {
371 fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
372 <Self as AsXmlText>::as_optional_xml_text(self)
373 }
374}
375
376impl<T: AsXmlText> AsOptionalXmlText for Option<T> {
377 fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
378 self.as_ref()
379 .map(T::as_optional_xml_text)
380 .transpose()
381 .map(Option::flatten)
382 }
383}
384
385/// Attempt to transform a type implementing [`AsXml`] into another
386/// type which implements [`FromXml`].
387pub fn transform<T: FromXml, F: AsXml>(from: F) -> Result<T, self::error::Error> {
388 let mut iter = self::rxml_util::ItemToEvent::new(from.as_xml_iter()?);
389 let (qname, attrs) = match iter.next() {
390 Some(Ok(rxml::Event::StartElement(_, qname, attrs))) => (qname, attrs),
391 Some(Err(e)) => return Err(e),
392 _ => panic!("into_event_iter did not start with StartElement event!"),
393 };
394 let mut sink = match T::from_events(qname, attrs) {
395 Ok(v) => v,
396 Err(self::error::FromEventsError::Mismatch { .. }) => {
397 return Err(self::error::Error::TypeMismatch)
398 }
399 Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
400 };
401 for event in iter {
402 let event = event?;
403 if let Some(v) = sink.feed(event)? {
404 return Ok(v);
405 }
406 }
407 Err(self::error::Error::XmlError(
408 rxml::error::XmlError::InvalidEof("during transform"),
409 ))
410}
411
412/// Attempt to convert a [`minidom::Element`] into a type implementing
413/// [`FromXml`], fallably.
414///
415/// Unlike [`transform`] (which can also be used with an element), this
416/// function will return the element unharmed if its element header does not
417/// match the expectations of `T`.
418#[cfg(feature = "minidom")]
419pub fn try_from_element<T: FromXml>(
420 from: minidom::Element,
421) -> Result<T, self::error::FromElementError> {
422 let (qname, attrs) = minidom_compat::make_start_ev_parts(&from)?;
423 let mut sink = match T::from_events(qname, attrs) {
424 Ok(v) => v,
425 Err(self::error::FromEventsError::Mismatch { .. }) => {
426 return Err(self::error::FromElementError::Mismatch(from))
427 }
428 Err(self::error::FromEventsError::Invalid(e)) => {
429 return Err(self::error::FromElementError::Invalid(e))
430 }
431 };
432
433 let mut iter = from.as_xml_iter()?;
434 // consume the element header
435 for item in &mut iter {
436 let item = item?;
437 match item {
438 // discard the element header
439 Item::XmlDeclaration(..) => (),
440 Item::ElementHeadStart(..) => (),
441 Item::Attribute(..) => (),
442 Item::ElementHeadEnd => {
443 // now that the element header is over, we break out
444 break;
445 }
446 Item::Text(..) => panic!("text before end of element header"),
447 Item::ElementFoot => panic!("element foot before end of element header"),
448 }
449 }
450 let iter = self::rxml_util::ItemToEvent::new(iter);
451 for event in iter {
452 let event = event?;
453 if let Some(v) = sink.feed(event)? {
454 return Ok(v);
455 }
456 }
457 // unreachable! instead of error here, because minidom::Element always
458 // produces the complete event sequence of a single element, and FromXml
459 // implementations must be constructible from that.
460 unreachable!("minidom::Element did not produce enough events to complete element")
461}
462
463fn map_nonio_error<T>(r: Result<T, rxml::Error>) -> Result<T, self::error::Error> {
464 match r {
465 Ok(v) => Ok(v),
466 Err(rxml::Error::IO(_)) => unreachable!(),
467 Err(rxml::Error::Xml(e)) => Err(e.into()),
468 Err(rxml::Error::InvalidUtf8Byte(_)) => Err(self::error::Error::Other("invalid utf-8")),
469 Err(rxml::Error::InvalidChar(_)) => {
470 Err(self::error::Error::Other("non-character encountered"))
471 }
472 Err(rxml::Error::RestrictedXml(_)) => Err(self::error::Error::Other("restricted xml")),
473 }
474}
475
476fn read_start_event<I: std::io::BufRead>(
477 r: &mut rxml::Reader<I>,
478) -> Result<(rxml::QName, rxml::AttrMap), self::error::Error> {
479 for ev in r {
480 match map_nonio_error(ev)? {
481 rxml::Event::XmlDeclaration(_, rxml::XmlVersion::V1_0) => (),
482 rxml::Event::StartElement(_, name, attrs) => return Ok((name, attrs)),
483 _ => {
484 return Err(self::error::Error::Other(
485 "Unexpected event at start of document",
486 ))
487 }
488 }
489 }
490 Err(self::error::Error::XmlError(
491 rxml::error::XmlError::InvalidEof("before start of element"),
492 ))
493}
494
495/// Attempt to parse a type implementing [`FromXml`] from a byte buffer
496/// containing XML data.
497pub fn from_bytes<T: FromXml>(mut buf: &[u8]) -> Result<T, self::error::Error> {
498 let mut reader = rxml::Reader::new(&mut buf);
499 let (name, attrs) = read_start_event(&mut reader)?;
500 let mut builder = match T::from_events(name, attrs) {
501 Ok(v) => v,
502 Err(self::error::FromEventsError::Mismatch { .. }) => {
503 return Err(self::error::Error::TypeMismatch)
504 }
505 Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
506 };
507 for ev in reader {
508 if let Some(v) = builder.feed(map_nonio_error(ev)?)? {
509 return Ok(v);
510 }
511 }
512 Err(self::error::Error::XmlError(
513 rxml::error::XmlError::InvalidEof("while parsing FromXml impl"),
514 ))
515}