1#![cfg_attr(docsrs, feature(doc_auto_cfg))]
2#![cfg_attr(docsrs, feature(doc_cfg))]
3#![forbid(unsafe_code)]
4#![warn(missing_docs)]
5/*!
6# XML Streamed Objects -- serde-like parsing for XML
7
8This crate provides the traits for parsing XML data into Rust structs, and
9vice versa.
10
11While it is in 0.0.x versions, many features still need to be developed, but
12rest assured that there is a solid plan to get it fully usable for even
13advanced XML scenarios.
14
15XSO is an acronym for XML Stream(ed) Objects, referring to the main field of
16use of this library in parsing XML streams like specified in RFC 6120.
17*/
18
19// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
20//
21// This Source Code Form is subject to the terms of the Mozilla Public
22// License, v. 2.0. If a copy of the MPL was not distributed with this
23// file, You can obtain one at http://mozilla.org/MPL/2.0/.
24
25use std::io;
26
27pub mod asxml;
28pub mod error;
29pub mod fromxml;
30#[cfg(feature = "minidom")]
31pub mod minidom_compat;
32mod rxml_util;
33pub mod text;
34
35#[doc(hidden)]
36pub mod exports {
37 #[cfg(feature = "minidom")]
38 pub use minidom;
39 pub use rxml;
40}
41
42use std::borrow::Cow;
43
44#[doc(inline)]
45pub use text::TextCodec;
46
47#[doc(inline)]
48pub use rxml_util::Item;
49
50#[doc = include_str!("from_xml_doc.md")]
51#[doc(inline)]
52#[cfg(feature = "macros")]
53pub use xso_proc::FromXml;
54
55/// # Make a struct or enum serialisable to XML
56///
57/// This derives the [`AsXml`] trait on a struct or enum. It is the
58/// counterpart to [`macro@FromXml`].
59///
60/// The attributes necessary and available for the derivation to work are
61/// documented on [`macro@FromXml`].
62#[doc(inline)]
63#[cfg(feature = "macros")]
64pub use xso_proc::AsXml;
65
66/// Trait allowing to iterate a struct's contents as serialisable
67/// [`Item`]s.
68///
69/// **Important:** Changing the [`ItemIter`][`Self::ItemIter`] associated
70/// type is considered a non-breaking change for any given implementation of
71/// this trait. Always refer to a type's iterator type using fully-qualified
72/// notation, for example: `<T as xso::AsXml>::ItemIter`.
73pub trait AsXml {
74 /// The iterator type.
75 ///
76 /// **Important:** Changing this type is considered a non-breaking change
77 /// for any given implementation of this trait. Always refer to a type's
78 /// iterator type using fully-qualified notation, for example:
79 /// `<T as xso::AsXml>::ItemIter`.
80 type ItemIter<'x>: Iterator<Item = Result<Item<'x>, self::error::Error>>
81 where
82 Self: 'x;
83
84 /// Return an iterator which emits the contents of the struct or enum as
85 /// serialisable [`Item`] items.
86 fn as_xml_iter(&self) -> Result<Self::ItemIter<'_>, self::error::Error>;
87}
88
89/// Trait for a temporary object allowing to construct a struct from
90/// [`rxml::Event`] items.
91///
92/// Objects of this type are generally constructed through
93/// [`FromXml::from_events`] and are used to build Rust structs or enums from
94/// XML data. The XML data must be fed as `rxml::Event` to the
95/// [`feed`][`Self::feed`] method.
96pub trait FromEventsBuilder {
97 /// The type which will be constructed by this builder.
98 type Output;
99
100 /// Feed another [`rxml::Event`] into the element construction
101 /// process.
102 ///
103 /// Once the construction process completes, `Ok(Some(_))` is returned.
104 /// When valid data has been fed but more events are needed to fully
105 /// construct the resulting struct, `Ok(None)` is returned.
106 ///
107 /// If the construction fails, `Err(_)` is returned. Errors are generally
108 /// fatal and the builder should be assumed to be broken at that point.
109 /// Feeding more events after an error may result in panics, errors or
110 /// inconsistent result data, though it may never result in unsound or
111 /// unsafe behaviour.
112 fn feed(&mut self, ev: rxml::Event) -> Result<Option<Self::Output>, self::error::Error>;
113}
114
115/// Trait allowing to construct a struct from a stream of
116/// [`rxml::Event`] items.
117///
118/// To use this, first call [`FromXml::from_events`] with the qualified
119/// name and the attributes of the corresponding
120/// [`rxml::Event::StartElement`] event. If the call succeeds, the
121/// returned builder object must be fed with the events representing the
122/// contents of the element, and then with the `EndElement` event.
123///
124/// The `StartElement` passed to `from_events` must not be passed to `feed`.
125///
126/// **Important:** Changing the [`Builder`][`Self::Builder`] associated type
127/// is considered a non-breaking change for any given implementation of this
128/// trait. Always refer to a type's builder type using fully-qualified
129/// notation, for example: `<T as xso::FromXml>::Builder`.
130pub trait FromXml {
131 /// A builder type used to construct the element.
132 ///
133 /// **Important:** Changing this type is considered a non-breaking change
134 /// for any given implementation of this trait. Always refer to a type's
135 /// builder type using fully-qualified notation, for example:
136 /// `<T as xso::FromXml>::Builder`.
137 type Builder: FromEventsBuilder<Output = Self>;
138
139 /// Attempt to initiate the streamed construction of this struct from XML.
140 ///
141 /// If the passed qualified `name` and `attrs` match the element's type,
142 /// the [`Self::Builder`] is returned and should be fed with XML events
143 /// by the caller.
144 ///
145 /// Otherwise, an appropriate error is returned.
146 fn from_events(
147 name: rxml::QName,
148 attrs: rxml::AttrMap,
149 ) -> Result<Self::Builder, self::error::FromEventsError>;
150}
151
152/// Trait allowing to convert XML text to a value.
153///
154/// This trait is similar to [`core::str::FromStr`], however, due to
155/// restrictions imposed by the orphan rule, a separate trait is needed.
156/// Implementations for many standard library types are available. In
157/// addition, the following feature flags can enable more implementations:
158///
159/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
160/// - `uuid`: `uuid::Uuid`
161///
162/// Because of this unfortunate situation, we are **extremely liberal** with
163/// accepting optional dependencies for this purpose. You are very welcome to
164/// make merge requests against this crate adding support for parsing
165/// third-party crates.
166pub trait FromXmlText: Sized {
167 /// Convert the given XML text to a value.
168 fn from_xml_text(data: String) -> Result<Self, self::error::Error>;
169}
170
171impl FromXmlText for String {
172 /// Return the string unchanged.
173 fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
174 Ok(data)
175 }
176}
177
178impl<T: FromXmlText, B: ToOwned<Owned = T>> FromXmlText for Cow<'_, B> {
179 /// Return a [`Cow::Owned`] containing the parsed value.
180 fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
181 Ok(Cow::Owned(T::from_xml_text(data)?))
182 }
183}
184
185impl<T: FromXmlText> FromXmlText for Option<T> {
186 /// Return a [`Some`] containing the parsed value.
187 fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
188 Ok(Some(T::from_xml_text(data)?))
189 }
190}
191
192impl<T: FromXmlText> FromXmlText for Box<T> {
193 /// Return a [`Box`] containing the parsed value.
194 fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
195 Ok(Box::new(T::from_xml_text(data)?))
196 }
197}
198
199/// Trait to convert a value to an XML text string.
200///
201/// Implementing this trait for a type allows it to be used both for XML
202/// character data within elements and for XML attributes. For XML attributes,
203/// the behaviour is defined by [`AsXmlText::as_optional_xml_text`], while
204/// XML element text content uses [`AsXmlText::as_xml_text`]. Implementing
205/// [`AsXmlText`] automatically provides an implementation of
206/// [`AsOptionalXmlText`].
207///
208/// If your type should only be used in XML attributes and has no correct
209/// serialisation in XML text, you should *only* implement
210/// [`AsOptionalXmlText`] and omit the [`AsXmlText`] implementation.
211///
212/// This trait is implemented for many standard library types implementing
213/// [`core::fmt::Display`]. In addition, the following feature flags can enable
214/// more implementations:
215///
216/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
217/// - `uuid`: `uuid::Uuid`
218///
219/// Because of the unfortunate situation as described in [`FromXmlText`], we
220/// are **extremely liberal** with accepting optional dependencies for this
221/// purpose. You are very welcome to make merge requests against this crate
222/// adding support for parsing third-party crates.
223pub trait AsXmlText {
224 /// Convert the value to an XML string in a context where an absent value
225 /// cannot be represented.
226 fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error>;
227
228 /// Convert the value to an XML string in a context where an absent value
229 /// can be represented.
230 ///
231 /// The provided implementation will always return the result of
232 /// [`Self::as_xml_text`] wrapped into `Some(.)`. By re-implementing
233 /// this method, implementors can customize the behaviour for certain
234 /// values.
235 fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
236 Ok(Some(self.as_xml_text()?))
237 }
238}
239
240impl AsXmlText for String {
241 /// Return the borrowed string contents.
242 fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
243 Ok(Cow::Borrowed(self.as_str()))
244 }
245}
246
247impl AsXmlText for str {
248 /// Return the borrowed string contents.
249 fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
250 Ok(Cow::Borrowed(&*self))
251 }
252}
253
254impl<T: AsXmlText> AsXmlText for Box<T> {
255 /// Return the borrowed [`Box`] contents.
256 fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
257 T::as_xml_text(self)
258 }
259}
260
261impl<B: AsXmlText + ToOwned> AsXmlText for Cow<'_, B> {
262 /// Return the borrowed [`Cow`] contents.
263 fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
264 B::as_xml_text(self.as_ref())
265 }
266}
267
268impl<T: AsXmlText> AsXmlText for &T {
269 /// Delegate to the `AsXmlText` implementation on `T`.
270 fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
271 T::as_xml_text(*self)
272 }
273}
274
275/// Specialized variant of [`AsXmlText`].
276///
277/// Normally, it should not be necessary to implement this trait as it is
278/// automatically implemented for all types implementing [`AsXmlText`].
279/// However, if your type can only be serialised as an XML attribute (for
280/// example because an absent value has a particular meaning), it is correct
281/// to implement [`AsOptionalXmlText`] **instead of** [`AsXmlText`].
282///
283/// If your type can be serialised as both (text and attribute) but needs
284/// special handling in attributes, implement [`AsXmlText`] but provide a
285/// custom implementation of [`AsXmlText::as_optional_xml_text`].
286pub trait AsOptionalXmlText {
287 /// Convert the value to an XML string in a context where an absent value
288 /// can be represented.
289 fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error>;
290}
291
292impl<T: AsXmlText> AsOptionalXmlText for T {
293 fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
294 <Self as AsXmlText>::as_optional_xml_text(self)
295 }
296}
297
298impl<T: AsXmlText> AsOptionalXmlText for Option<T> {
299 fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
300 self.as_ref()
301 .map(T::as_optional_xml_text)
302 .transpose()
303 .map(Option::flatten)
304 }
305}
306
307/// Control how unknown attributes are handled.
308///
309/// The variants of this enum are referenced in the
310/// `#[xml(on_unknown_attribute = ..)]` which can be used on structs and
311/// enum variants. The specified variant controls how attributes, which are
312/// not handled by any member of the compound, are handled during parsing.
313#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default)]
314pub enum UnknownAttributePolicy {
315 /// All unknown attributes are discarded.
316 Discard,
317
318 /// The first unknown attribute which is encountered generates a fatal
319 /// parsing error.
320 ///
321 /// This is the default policy.
322 #[default]
323 Fail,
324}
325
326impl UnknownAttributePolicy {
327 #[doc(hidden)]
328 /// Implementation of the policy.
329 ///
330 /// This is an internal API and not subject to semver versioning.
331 pub fn apply_policy(&self, msg: &'static str) -> Result<(), self::error::Error> {
332 match self {
333 Self::Fail => Err(self::error::Error::Other(msg)),
334 Self::Discard => Ok(()),
335 }
336 }
337}
338
339/// Control how unknown children are handled.
340///
341/// The variants of this enum are referenced in the
342/// `#[xml(on_unknown_child = ..)]` which can be used on structs and
343/// enum variants. The specified variant controls how children, which are not
344/// handled by any member of the compound, are handled during parsing.
345#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default)]
346pub enum UnknownChildPolicy {
347 /// All unknown children are discarded.
348 Discard,
349
350 /// The first unknown child which is encountered generates a fatal
351 /// parsing error.
352 ///
353 /// This is the default policy.
354 #[default]
355 Fail,
356}
357
358impl UnknownChildPolicy {
359 #[doc(hidden)]
360 /// Implementation of the policy.
361 ///
362 /// This is an internal API and not subject to semver versioning.
363 pub fn apply_policy(&self, msg: &'static str) -> Result<(), self::error::Error> {
364 match self {
365 Self::Fail => Err(self::error::Error::Other(msg)),
366 Self::Discard => Ok(()),
367 }
368 }
369}
370
371/// Attempt to transform a type implementing [`AsXml`] into another
372/// type which implements [`FromXml`].
373pub fn transform<T: FromXml, F: AsXml>(from: F) -> Result<T, self::error::Error> {
374 let mut iter = self::rxml_util::ItemToEvent::new(from.as_xml_iter()?);
375 let (qname, attrs) = match iter.next() {
376 Some(Ok(rxml::Event::StartElement(_, qname, attrs))) => (qname, attrs),
377 Some(Err(e)) => return Err(e),
378 _ => panic!("into_event_iter did not start with StartElement event!"),
379 };
380 let mut sink = match T::from_events(qname, attrs) {
381 Ok(v) => v,
382 Err(self::error::FromEventsError::Mismatch { .. }) => {
383 return Err(self::error::Error::TypeMismatch)
384 }
385 Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
386 };
387 for event in iter {
388 let event = event?;
389 if let Some(v) = sink.feed(event)? {
390 return Ok(v);
391 }
392 }
393 Err(self::error::Error::XmlError(rxml::Error::InvalidEof(None)))
394}
395
396/// Attempt to convert a [`minidom::Element`] into a type implementing
397/// [`FromXml`], fallably.
398///
399/// Unlike [`transform`] (which can also be used with an element), this
400/// function will return the element unharmed if its element header does not
401/// match the expectations of `T`.
402#[cfg(feature = "minidom")]
403pub fn try_from_element<T: FromXml>(
404 from: minidom::Element,
405) -> Result<T, self::error::FromElementError> {
406 let (qname, attrs) = minidom_compat::make_start_ev_parts(&from)?;
407 let mut sink = match T::from_events(qname, attrs) {
408 Ok(v) => v,
409 Err(self::error::FromEventsError::Mismatch { .. }) => {
410 return Err(self::error::FromElementError::Mismatch(from))
411 }
412 Err(self::error::FromEventsError::Invalid(e)) => {
413 return Err(self::error::FromElementError::Invalid(e))
414 }
415 };
416
417 let mut iter = from.as_xml_iter()?;
418 // consume the element header
419 for item in &mut iter {
420 let item = item?;
421 match item {
422 // discard the element header
423 Item::XmlDeclaration(..) => (),
424 Item::ElementHeadStart(..) => (),
425 Item::Attribute(..) => (),
426 Item::ElementHeadEnd => {
427 // now that the element header is over, we break out
428 break;
429 }
430 Item::Text(..) => panic!("text before end of element header"),
431 Item::ElementFoot => panic!("element foot before end of element header"),
432 }
433 }
434 let iter = self::rxml_util::ItemToEvent::new(iter);
435 for event in iter {
436 let event = event?;
437 if let Some(v) = sink.feed(event)? {
438 return Ok(v);
439 }
440 }
441 // unreachable! instead of error here, because minidom::Element always
442 // produces the complete event sequence of a single element, and FromXml
443 // implementations must be constructible from that.
444 unreachable!("minidom::Element did not produce enough events to complete element")
445}
446
447fn map_nonio_error<T>(r: Result<T, io::Error>) -> Result<T, self::error::Error> {
448 match r {
449 Ok(v) => Ok(v),
450 Err(e) => match e.downcast::<rxml::Error>() {
451 Ok(e) => Err(e.into()),
452 Err(_) => unreachable!("I/O error cannot be caused by &[]"),
453 },
454 }
455}
456
457fn read_start_event<I: std::io::BufRead>(
458 r: &mut rxml::Reader<I>,
459) -> Result<(rxml::QName, rxml::AttrMap), self::error::Error> {
460 for ev in r {
461 match map_nonio_error(ev)? {
462 rxml::Event::XmlDeclaration(_, rxml::XmlVersion::V1_0) => (),
463 rxml::Event::StartElement(_, name, attrs) => return Ok((name, attrs)),
464 _ => {
465 return Err(self::error::Error::Other(
466 "Unexpected event at start of document",
467 ))
468 }
469 }
470 }
471 Err(self::error::Error::XmlError(rxml::Error::InvalidEof(Some(
472 rxml::error::ErrorContext::DocumentBegin,
473 ))))
474}
475
476/// Attempt to parse a type implementing [`FromXml`] from a byte buffer
477/// containing XML data.
478pub fn from_bytes<T: FromXml>(mut buf: &[u8]) -> Result<T, self::error::Error> {
479 let mut reader = rxml::Reader::new(&mut buf);
480 let (name, attrs) = read_start_event(&mut reader)?;
481 let mut builder = match T::from_events(name, attrs) {
482 Ok(v) => v,
483 Err(self::error::FromEventsError::Mismatch { .. }) => {
484 return Err(self::error::Error::TypeMismatch)
485 }
486 Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
487 };
488 for ev in reader {
489 if let Some(v) = builder.feed(map_nonio_error(ev)?)? {
490 return Ok(v);
491 }
492 }
493 Err(self::error::Error::XmlError(rxml::Error::InvalidEof(None)))
494}
495
496fn read_start_event_io<I: std::io::BufRead>(
497 r: &mut rxml::Reader<I>,
498) -> io::Result<(rxml::QName, rxml::AttrMap)> {
499 for ev in r {
500 match ev? {
501 rxml::Event::XmlDeclaration(_, rxml::XmlVersion::V1_0) => (),
502 rxml::Event::StartElement(_, name, attrs) => return Ok((name, attrs)),
503 _ => {
504 return Err(io::Error::new(
505 io::ErrorKind::InvalidData,
506 self::error::Error::Other("Unexpected event at start of document"),
507 ))
508 }
509 }
510 }
511 Err(io::Error::new(
512 io::ErrorKind::InvalidData,
513 self::error::Error::XmlError(rxml::Error::InvalidEof(Some(
514 rxml::error::ErrorContext::DocumentBegin,
515 ))),
516 ))
517}
518
519/// Attempt to parse a type implementing [`FromXml`] from a reader.
520pub fn from_reader<T: FromXml, R: io::BufRead>(r: R) -> io::Result<T> {
521 let mut reader = rxml::Reader::new(r);
522 let (name, attrs) = read_start_event_io(&mut reader)?;
523 let mut builder = match T::from_events(name, attrs) {
524 Ok(v) => v,
525 Err(self::error::FromEventsError::Mismatch { .. }) => {
526 return Err(self::error::Error::TypeMismatch)
527 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
528 }
529 Err(self::error::FromEventsError::Invalid(e)) => {
530 return Err(e).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
531 }
532 };
533 for ev in reader {
534 if let Some(v) = builder
535 .feed(ev?)
536 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?
537 {
538 return Ok(v);
539 }
540 }
541 Err(io::Error::new(
542 io::ErrorKind::UnexpectedEof,
543 self::error::Error::XmlError(rxml::Error::InvalidEof(None)),
544 ))
545}
546
547/// Attempt to serialise a type implementing [`AsXml`] to a vector of bytes.
548pub fn to_vec<T: AsXml>(xso: &T) -> Result<Vec<u8>, self::error::Error> {
549 let iter = xso.as_xml_iter()?;
550 let mut writer = rxml::writer::Encoder::new();
551 let mut buf = Vec::new();
552 for item in iter {
553 let item = item?;
554 writer.encode(item.as_rxml_item(), &mut buf)?;
555 }
556 Ok(buf)
557}
558
559/// Return true if the string contains exclusively XML whitespace.
560///
561/// XML whitespace is defined as U+0020 (space), U+0009 (tab), U+000a
562/// (newline) and U+000d (carriage return).
563pub fn is_xml_whitespace<T: AsRef<[u8]>>(s: T) -> bool {
564 s.as_ref()
565 .iter()
566 .all(|b| *b == b' ' || *b == b'\t' || *b == b'\r' || *b == b'\n')
567}