1#![cfg_attr(docsrs, feature(doc_auto_cfg))]
2#![cfg_attr(docsrs, feature(doc_cfg))]
3#![forbid(unsafe_code)]
4#![warn(missing_docs)]
5/*!
6# XML Streamed Objects -- serde-like parsing for XML
7
8This crate provides the traits for parsing XML data into Rust structs, and
9vice versa.
10
11While it is in 0.0.x versions, many features still need to be developed, but
12rest assured that there is a solid plan to get it fully usable for even
13advanced XML scenarios.
14
15XSO is an acronym for XML Stream(ed) Objects, referring to the main field of
16use of this library in parsing XML streams like specified in RFC 6120.
17*/
18
19// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
20//
21// This Source Code Form is subject to the terms of the Mozilla Public
22// License, v. 2.0. If a copy of the MPL was not distributed with this
23// file, You can obtain one at http://mozilla.org/MPL/2.0/.
24
25use std::io;
26
27pub mod asxml;
28pub mod error;
29pub mod fromxml;
30#[cfg(feature = "minidom")]
31pub mod minidom_compat;
32mod rxml_util;
33pub mod text;
34
35#[doc(hidden)]
36pub mod exports {
37 #[cfg(feature = "minidom")]
38 pub use minidom;
39 pub use rxml;
40}
41
42use std::borrow::Cow;
43
44#[doc(inline)]
45pub use text::TextCodec;
46
47#[doc(inline)]
48pub use rxml_util::Item;
49
50#[doc = include_str!("from_xml_doc.md")]
51#[doc(inline)]
52#[cfg(feature = "macros")]
53pub use xso_proc::FromXml;
54
55/// # Make a struct or enum serialisable to XML
56///
57/// This derives the [`AsXml`] trait on a struct or enum. It is the
58/// counterpart to [`macro@FromXml`].
59///
60/// The attributes necessary and available for the derivation to work are
61/// documented on [`macro@FromXml`].
62#[doc(inline)]
63#[cfg(feature = "macros")]
64pub use xso_proc::AsXml;
65
66/// Trait allowing to iterate a struct's contents as serialisable
67/// [`Item`]s.
68///
69/// **Important:** Changing the [`ItemIter`][`Self::ItemIter`] associated
70/// type is considered a non-breaking change for any given implementation of
71/// this trait. Always refer to a type's iterator type using fully-qualified
72/// notation, for example: `<T as xso::AsXml>::ItemIter`.
73pub trait AsXml {
74 /// The iterator type.
75 ///
76 /// **Important:** Changing this type is considered a non-breaking change
77 /// for any given implementation of this trait. Always refer to a type's
78 /// iterator type using fully-qualified notation, for example:
79 /// `<T as xso::AsXml>::ItemIter`.
80 type ItemIter<'x>: Iterator<Item = Result<Item<'x>, self::error::Error>>
81 where
82 Self: 'x;
83
84 /// Return an iterator which emits the contents of the struct or enum as
85 /// serialisable [`Item`] items.
86 fn as_xml_iter(&self) -> Result<Self::ItemIter<'_>, self::error::Error>;
87}
88
89/// Trait for a temporary object allowing to construct a struct from
90/// [`rxml::Event`] items.
91///
92/// Objects of this type are generally constructed through
93/// [`FromXml::from_events`] and are used to build Rust structs or enums from
94/// XML data. The XML data must be fed as `rxml::Event` to the
95/// [`feed`][`Self::feed`] method.
96pub trait FromEventsBuilder {
97 /// The type which will be constructed by this builder.
98 type Output;
99
100 /// Feed another [`rxml::Event`] into the element construction
101 /// process.
102 ///
103 /// Once the construction process completes, `Ok(Some(_))` is returned.
104 /// When valid data has been fed but more events are needed to fully
105 /// construct the resulting struct, `Ok(None)` is returned.
106 ///
107 /// If the construction fails, `Err(_)` is returned. Errors are generally
108 /// fatal and the builder should be assumed to be broken at that point.
109 /// Feeding more events after an error may result in panics, errors or
110 /// inconsistent result data, though it may never result in unsound or
111 /// unsafe behaviour.
112 fn feed(&mut self, ev: rxml::Event) -> Result<Option<Self::Output>, self::error::Error>;
113}
114
115/// Trait allowing to construct a struct from a stream of
116/// [`rxml::Event`] items.
117///
118/// To use this, first call [`FromXml::from_events`] with the qualified
119/// name and the attributes of the corresponding
120/// [`rxml::Event::StartElement`] event. If the call succeeds, the
121/// returned builder object must be fed with the events representing the
122/// contents of the element, and then with the `EndElement` event.
123///
124/// The `StartElement` passed to `from_events` must not be passed to `feed`.
125///
126/// **Important:** Changing the [`Builder`][`Self::Builder`] associated type
127/// is considered a non-breaking change for any given implementation of this
128/// trait. Always refer to a type's builder type using fully-qualified
129/// notation, for example: `<T as xso::FromXml>::Builder`.
130pub trait FromXml {
131 /// A builder type used to construct the element.
132 ///
133 /// **Important:** Changing this type is considered a non-breaking change
134 /// for any given implementation of this trait. Always refer to a type's
135 /// builder type using fully-qualified notation, for example:
136 /// `<T as xso::FromXml>::Builder`.
137 type Builder: FromEventsBuilder<Output = Self>;
138
139 /// Attempt to initiate the streamed construction of this struct from XML.
140 ///
141 /// If the passed qualified `name` and `attrs` match the element's type,
142 /// the [`Self::Builder`] is returned and should be fed with XML events
143 /// by the caller.
144 ///
145 /// Otherwise, an appropriate error is returned.
146 fn from_events(
147 name: rxml::QName,
148 attrs: rxml::AttrMap,
149 ) -> Result<Self::Builder, self::error::FromEventsError>;
150}
151
152/// Trait allowing to convert XML text to a value.
153///
154/// This trait is similar to [`core::str::FromStr`], however, due to
155/// restrictions imposed by the orphan rule, a separate trait is needed.
156/// Implementations for many standard library types are available. In
157/// addition, the following feature flags can enable more implementations:
158///
159/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
160/// - `uuid`: `uuid::Uuid`
161///
162/// Because of this unfortunate situation, we are **extremely liberal** with
163/// accepting optional dependencies for this purpose. You are very welcome to
164/// make merge requests against this crate adding support for parsing
165/// third-party crates.
166pub trait FromXmlText: Sized {
167 /// Convert the given XML text to a value.
168 fn from_xml_text(data: String) -> Result<Self, self::error::Error>;
169}
170
171impl FromXmlText for String {
172 /// Return the string unchanged.
173 fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
174 Ok(data)
175 }
176}
177
178impl<T: FromXmlText, B: ToOwned<Owned = T>> FromXmlText for Cow<'_, B> {
179 /// Return a [`Cow::Owned`] containing the parsed value.
180 fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
181 Ok(Cow::Owned(T::from_xml_text(data)?))
182 }
183}
184
185impl<T: FromXmlText> FromXmlText for Option<T> {
186 /// Return a [`Some`] containing the parsed value.
187 fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
188 Ok(Some(T::from_xml_text(data)?))
189 }
190}
191
192impl<T: FromXmlText> FromXmlText for Box<T> {
193 /// Return a [`Box`] containing the parsed value.
194 fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
195 Ok(Box::new(T::from_xml_text(data)?))
196 }
197}
198
199/// Trait to convert a value to an XML text string.
200///
201/// This trait is implemented for many standard library types implementing
202/// [`core::fmt::Display`]. In addition, the following feature flags can enable
203/// more implementations:
204///
205/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
206/// - `uuid`: `uuid::Uuid`
207///
208/// Because of the unfortunate situation as described in [`FromXmlText`], we
209/// are **extremely liberal** with accepting optional dependencies for this
210/// purpose. You are very welcome to make merge requests against this crate
211/// adding support for parsing third-party crates.
212pub trait AsXmlText {
213 /// Convert the value to an XML string in a context where an absent value
214 /// cannot be represented.
215 fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error>;
216
217 /// Convert the value to an XML string in a context where an absent value
218 /// can be represented.
219 ///
220 /// The provided implementation will always return the result of
221 /// [`Self::as_xml_text`] wrapped into `Some(.)`. By re-implementing
222 /// this method, implementors can customize the behaviour for certain
223 /// values.
224 fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
225 Ok(Some(self.as_xml_text()?))
226 }
227}
228
229impl AsXmlText for String {
230 /// Return the borrowed string contents.
231 fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
232 Ok(Cow::Borrowed(self.as_str()))
233 }
234}
235
236impl AsXmlText for str {
237 /// Return the borrowed string contents.
238 fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
239 Ok(Cow::Borrowed(&*self))
240 }
241}
242
243impl<T: AsXmlText> AsXmlText for Box<T> {
244 /// Return the borrowed [`Box`] contents.
245 fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
246 T::as_xml_text(self)
247 }
248}
249
250impl<B: AsXmlText + ToOwned> AsXmlText for Cow<'_, B> {
251 /// Return the borrowed [`Cow`] contents.
252 fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
253 B::as_xml_text(self.as_ref())
254 }
255}
256
257impl<T: AsXmlText> AsXmlText for &T {
258 /// Delegate to the `AsXmlText` implementation on `T`.
259 fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
260 T::as_xml_text(*self)
261 }
262}
263
264/// Specialized variant of [`AsXmlText`].
265///
266/// Do **not** implement this unless you cannot implement [`AsXmlText`]:
267/// implementing [`AsXmlText`] is more versatile and an
268/// [`AsOptionalXmlText`] implementation is automatically provided.
269///
270/// If you need to customize the behaviour of the [`AsOptionalXmlText`]
271/// blanket implementation, implement a custom
272/// [`AsXmlText::as_optional_xml_text`] instead.
273pub trait AsOptionalXmlText {
274 /// Convert the value to an XML string in a context where an absent value
275 /// can be represented.
276 fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error>;
277}
278
279impl<T: AsXmlText> AsOptionalXmlText for T {
280 fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
281 <Self as AsXmlText>::as_optional_xml_text(self)
282 }
283}
284
285impl<T: AsXmlText> AsOptionalXmlText for Option<T> {
286 fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
287 self.as_ref()
288 .map(T::as_optional_xml_text)
289 .transpose()
290 .map(Option::flatten)
291 }
292}
293
294/// Control how unknown attributes are handled.
295///
296/// The variants of this enum are referenced in the
297/// `#[xml(on_unknown_attribute = ..)]` which can be used on structs and
298/// enum variants. The specified variant controls how attributes, which are
299/// not handled by any member of the compound, are handled during parsing.
300#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default)]
301pub enum UnknownAttributePolicy {
302 /// All unknown attributes are discarded.
303 Discard,
304
305 /// The first unknown attribute which is encountered generates a fatal
306 /// parsing error.
307 ///
308 /// This is the default policy.
309 #[default]
310 Fail,
311}
312
313impl UnknownAttributePolicy {
314 #[doc(hidden)]
315 /// Implementation of the policy.
316 ///
317 /// This is an internal API and not subject to semver versioning.
318 pub fn apply_policy(&self, msg: &'static str) -> Result<(), self::error::Error> {
319 match self {
320 Self::Fail => Err(self::error::Error::Other(msg)),
321 Self::Discard => Ok(()),
322 }
323 }
324}
325
326/// Control how unknown children are handled.
327///
328/// The variants of this enum are referenced in the
329/// `#[xml(on_unknown_child = ..)]` which can be used on structs and
330/// enum variants. The specified variant controls how children, which are not
331/// handled by any member of the compound, are handled during parsing.
332#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default)]
333pub enum UnknownChildPolicy {
334 /// All unknown children are discarded.
335 Discard,
336
337 /// The first unknown child which is encountered generates a fatal
338 /// parsing error.
339 ///
340 /// This is the default policy.
341 #[default]
342 Fail,
343}
344
345impl UnknownChildPolicy {
346 #[doc(hidden)]
347 /// Implementation of the policy.
348 ///
349 /// This is an internal API and not subject to semver versioning.
350 pub fn apply_policy(&self, msg: &'static str) -> Result<(), self::error::Error> {
351 match self {
352 Self::Fail => Err(self::error::Error::Other(msg)),
353 Self::Discard => Ok(()),
354 }
355 }
356}
357
358/// Attempt to transform a type implementing [`AsXml`] into another
359/// type which implements [`FromXml`].
360pub fn transform<T: FromXml, F: AsXml>(from: F) -> Result<T, self::error::Error> {
361 let mut iter = self::rxml_util::ItemToEvent::new(from.as_xml_iter()?);
362 let (qname, attrs) = match iter.next() {
363 Some(Ok(rxml::Event::StartElement(_, qname, attrs))) => (qname, attrs),
364 Some(Err(e)) => return Err(e),
365 _ => panic!("into_event_iter did not start with StartElement event!"),
366 };
367 let mut sink = match T::from_events(qname, attrs) {
368 Ok(v) => v,
369 Err(self::error::FromEventsError::Mismatch { .. }) => {
370 return Err(self::error::Error::TypeMismatch)
371 }
372 Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
373 };
374 for event in iter {
375 let event = event?;
376 if let Some(v) = sink.feed(event)? {
377 return Ok(v);
378 }
379 }
380 Err(self::error::Error::XmlError(rxml::Error::InvalidEof(None)))
381}
382
383/// Attempt to convert a [`minidom::Element`] into a type implementing
384/// [`FromXml`], fallably.
385///
386/// Unlike [`transform`] (which can also be used with an element), this
387/// function will return the element unharmed if its element header does not
388/// match the expectations of `T`.
389#[cfg(feature = "minidom")]
390pub fn try_from_element<T: FromXml>(
391 from: minidom::Element,
392) -> Result<T, self::error::FromElementError> {
393 let (qname, attrs) = minidom_compat::make_start_ev_parts(&from)?;
394 let mut sink = match T::from_events(qname, attrs) {
395 Ok(v) => v,
396 Err(self::error::FromEventsError::Mismatch { .. }) => {
397 return Err(self::error::FromElementError::Mismatch(from))
398 }
399 Err(self::error::FromEventsError::Invalid(e)) => {
400 return Err(self::error::FromElementError::Invalid(e))
401 }
402 };
403
404 let mut iter = from.as_xml_iter()?;
405 // consume the element header
406 for item in &mut iter {
407 let item = item?;
408 match item {
409 // discard the element header
410 Item::XmlDeclaration(..) => (),
411 Item::ElementHeadStart(..) => (),
412 Item::Attribute(..) => (),
413 Item::ElementHeadEnd => {
414 // now that the element header is over, we break out
415 break;
416 }
417 Item::Text(..) => panic!("text before end of element header"),
418 Item::ElementFoot => panic!("element foot before end of element header"),
419 }
420 }
421 let iter = self::rxml_util::ItemToEvent::new(iter);
422 for event in iter {
423 let event = event?;
424 if let Some(v) = sink.feed(event)? {
425 return Ok(v);
426 }
427 }
428 // unreachable! instead of error here, because minidom::Element always
429 // produces the complete event sequence of a single element, and FromXml
430 // implementations must be constructible from that.
431 unreachable!("minidom::Element did not produce enough events to complete element")
432}
433
434fn map_nonio_error<T>(r: Result<T, io::Error>) -> Result<T, self::error::Error> {
435 match r {
436 Ok(v) => Ok(v),
437 Err(e) => match e.downcast::<rxml::Error>() {
438 Ok(e) => Err(e.into()),
439 Err(_) => unreachable!("I/O error cannot be caused by &[]"),
440 },
441 }
442}
443
444fn read_start_event<I: std::io::BufRead>(
445 r: &mut rxml::Reader<I>,
446) -> Result<(rxml::QName, rxml::AttrMap), self::error::Error> {
447 for ev in r {
448 match map_nonio_error(ev)? {
449 rxml::Event::XmlDeclaration(_, rxml::XmlVersion::V1_0) => (),
450 rxml::Event::StartElement(_, name, attrs) => return Ok((name, attrs)),
451 _ => {
452 return Err(self::error::Error::Other(
453 "Unexpected event at start of document",
454 ))
455 }
456 }
457 }
458 Err(self::error::Error::XmlError(rxml::Error::InvalidEof(Some(
459 rxml::error::ErrorContext::DocumentBegin,
460 ))))
461}
462
463/// Attempt to parse a type implementing [`FromXml`] from a byte buffer
464/// containing XML data.
465pub fn from_bytes<T: FromXml>(mut buf: &[u8]) -> Result<T, self::error::Error> {
466 let mut reader = rxml::Reader::new(&mut buf);
467 let (name, attrs) = read_start_event(&mut reader)?;
468 let mut builder = match T::from_events(name, attrs) {
469 Ok(v) => v,
470 Err(self::error::FromEventsError::Mismatch { .. }) => {
471 return Err(self::error::Error::TypeMismatch)
472 }
473 Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
474 };
475 for ev in reader {
476 if let Some(v) = builder.feed(map_nonio_error(ev)?)? {
477 return Ok(v);
478 }
479 }
480 Err(self::error::Error::XmlError(rxml::Error::InvalidEof(None)))
481}
482
483/// Attempt to parse a type implementing [`FromXml`] from a reader.
484pub fn from_reader<T: FromXml, R: io::BufRead>(r: R) -> io::Result<T> {
485 let mut reader = rxml::Reader::new(r);
486 let (name, attrs) =
487 read_start_event(&mut reader).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
488 let mut builder = match T::from_events(name, attrs) {
489 Ok(v) => v,
490 Err(self::error::FromEventsError::Mismatch { .. }) => {
491 return Err(self::error::Error::TypeMismatch)
492 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
493 }
494 Err(self::error::FromEventsError::Invalid(e)) => {
495 return Err(e).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
496 }
497 };
498 for ev in reader {
499 if let Some(v) = builder
500 .feed(ev?)
501 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?
502 {
503 return Ok(v);
504 }
505 }
506 Err(io::Error::new(
507 io::ErrorKind::UnexpectedEof,
508 self::error::Error::XmlError(rxml::Error::InvalidEof(None)),
509 ))
510}
511
512/// Attempt to serialise a type implementing [`AsXml`] to a vector of bytes.
513pub fn to_vec<T: AsXml>(xso: &T) -> Result<Vec<u8>, self::error::Error> {
514 let iter = xso.as_xml_iter()?;
515 let mut writer = rxml::writer::Encoder::new();
516 let mut buf = Vec::new();
517 for item in iter {
518 let item = item?;
519 writer.encode(item.as_rxml_item(), &mut buf)?;
520 }
521 Ok(buf)
522}
523
524/// Return true if the string contains exclusively XML whitespace.
525///
526/// XML whitespace is defined as U+0020 (space), U+0009 (tab), U+000a
527/// (newline) and U+000d (carriage return).
528pub fn is_xml_whitespace<T: AsRef<[u8]>>(s: T) -> bool {
529 s.as_ref()
530 .iter()
531 .all(|b| *b == b' ' || *b == b'\t' || *b == b'\r' || *b == b'\n')
532}