1#![cfg_attr(docsrs, feature(doc_auto_cfg))]
2#![cfg_attr(docsrs, feature(doc_cfg))]
3#![forbid(unsafe_code)]
4#![warn(missing_docs)]
5/*!
6# XML Streamed Objects -- serde-like parsing for XML
7
8This crate provides the traits for parsing XML data into Rust structs, and
9vice versa.
10
11While it is in 0.0.x versions, many features still need to be developed, but
12rest assured that there is a solid plan to get it fully usable for even
13advanced XML scenarios.
14
15XSO is an acronym for XML Stream(ed) Objects, referring to the main field of
16use of this library in parsing XML streams like specified in RFC 6120.
17*/
18
19// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
20//
21// This Source Code Form is subject to the terms of the Mozilla Public
22// License, v. 2.0. If a copy of the MPL was not distributed with this
23// file, You can obtain one at http://mozilla.org/MPL/2.0/.
24
25use std::io;
26
27pub mod asxml;
28pub mod error;
29pub mod fromxml;
30#[cfg(feature = "minidom")]
31pub mod minidom_compat;
32mod rxml_util;
33pub mod text;
34
35#[doc(hidden)]
36pub mod exports {
37 #[cfg(feature = "minidom")]
38 pub use minidom;
39 pub use rxml;
40}
41
42use std::borrow::Cow;
43
44#[doc(inline)]
45pub use text::TextCodec;
46
47#[doc(inline)]
48pub use rxml_util::Item;
49
50#[doc = include_str!("from_xml_doc.md")]
51#[doc(inline)]
52#[cfg(feature = "macros")]
53pub use xso_proc::FromXml;
54
55/// # Make a struct or enum serialisable to XML
56///
57/// This derives the [`AsXml`] trait on a struct or enum. It is the
58/// counterpart to [`macro@FromXml`].
59///
60/// The attributes necessary and available for the derivation to work are
61/// documented on [`macro@FromXml`].
62#[doc(inline)]
63#[cfg(feature = "macros")]
64pub use xso_proc::AsXml;
65
66/// Trait allowing to iterate a struct's contents as serialisable
67/// [`Item`]s.
68///
69/// **Important:** Changing the [`ItemIter`][`Self::ItemIter`] associated
70/// type is considered a non-breaking change for any given implementation of
71/// this trait. Always refer to a type's iterator type using fully-qualified
72/// notation, for example: `<T as xso::AsXml>::ItemIter`.
73pub trait AsXml {
74 /// The iterator type.
75 ///
76 /// **Important:** Changing this type is considered a non-breaking change
77 /// for any given implementation of this trait. Always refer to a type's
78 /// iterator type using fully-qualified notation, for example:
79 /// `<T as xso::AsXml>::ItemIter`.
80 type ItemIter<'x>: Iterator<Item = Result<Item<'x>, self::error::Error>>
81 where
82 Self: 'x;
83
84 /// Return an iterator which emits the contents of the struct or enum as
85 /// serialisable [`Item`] items.
86 fn as_xml_iter(&self) -> Result<Self::ItemIter<'_>, self::error::Error>;
87}
88
89/// Trait for a temporary object allowing to construct a struct from
90/// [`rxml::Event`] items.
91///
92/// Objects of this type are generally constructed through
93/// [`FromXml::from_events`] and are used to build Rust structs or enums from
94/// XML data. The XML data must be fed as `rxml::Event` to the
95/// [`feed`][`Self::feed`] method.
96pub trait FromEventsBuilder {
97 /// The type which will be constructed by this builder.
98 type Output;
99
100 /// Feed another [`rxml::Event`] into the element construction
101 /// process.
102 ///
103 /// Once the construction process completes, `Ok(Some(_))` is returned.
104 /// When valid data has been fed but more events are needed to fully
105 /// construct the resulting struct, `Ok(None)` is returned.
106 ///
107 /// If the construction fails, `Err(_)` is returned. Errors are generally
108 /// fatal and the builder should be assumed to be broken at that point.
109 /// Feeding more events after an error may result in panics, errors or
110 /// inconsistent result data, though it may never result in unsound or
111 /// unsafe behaviour.
112 fn feed(&mut self, ev: rxml::Event) -> Result<Option<Self::Output>, self::error::Error>;
113}
114
115/// Trait allowing to construct a struct from a stream of
116/// [`rxml::Event`] items.
117///
118/// To use this, first call [`FromXml::from_events`] with the qualified
119/// name and the attributes of the corresponding
120/// [`rxml::Event::StartElement`] event. If the call succeeds, the
121/// returned builder object must be fed with the events representing the
122/// contents of the element, and then with the `EndElement` event.
123///
124/// The `StartElement` passed to `from_events` must not be passed to `feed`.
125///
126/// **Important:** Changing the [`Builder`][`Self::Builder`] associated type
127/// is considered a non-breaking change for any given implementation of this
128/// trait. Always refer to a type's builder type using fully-qualified
129/// notation, for example: `<T as xso::FromXml>::Builder`.
130pub trait FromXml {
131 /// A builder type used to construct the element.
132 ///
133 /// **Important:** Changing this type is considered a non-breaking change
134 /// for any given implementation of this trait. Always refer to a type's
135 /// builder type using fully-qualified notation, for example:
136 /// `<T as xso::FromXml>::Builder`.
137 type Builder: FromEventsBuilder<Output = Self>;
138
139 /// Attempt to initiate the streamed construction of this struct from XML.
140 ///
141 /// If the passed qualified `name` and `attrs` match the element's type,
142 /// the [`Self::Builder`] is returned and should be fed with XML events
143 /// by the caller.
144 ///
145 /// Otherwise, an appropriate error is returned.
146 fn from_events(
147 name: rxml::QName,
148 attrs: rxml::AttrMap,
149 ) -> Result<Self::Builder, self::error::FromEventsError>;
150}
151
152/// Trait allowing to convert XML text to a value.
153///
154/// This trait is similar to [`core::str::FromStr`], however, due to
155/// restrictions imposed by the orphan rule, a separate trait is needed.
156/// Implementations for many standard library types are available. In
157/// addition, the following feature flags can enable more implementations:
158///
159/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
160/// - `uuid`: `uuid::Uuid`
161///
162/// Because of this unfortunate situation, we are **extremely liberal** with
163/// accepting optional dependencies for this purpose. You are very welcome to
164/// make merge requests against this crate adding support for parsing
165/// third-party crates.
166pub trait FromXmlText: Sized {
167 /// Convert the given XML text to a value.
168 fn from_xml_text(data: String) -> Result<Self, self::error::Error>;
169}
170
171impl FromXmlText for String {
172 /// Return the string unchanged.
173 fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
174 Ok(data)
175 }
176}
177
178impl<T: FromXmlText, B: ToOwned<Owned = T>> FromXmlText for Cow<'_, B> {
179 /// Return a [`Cow::Owned`] containing the parsed value.
180 fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
181 Ok(Cow::Owned(T::from_xml_text(data)?))
182 }
183}
184
185impl<T: FromXmlText> FromXmlText for Option<T> {
186 /// Return a [`Some`] containing the parsed value.
187 fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
188 Ok(Some(T::from_xml_text(data)?))
189 }
190}
191
192impl<T: FromXmlText> FromXmlText for Box<T> {
193 /// Return a [`Box`] containing the parsed value.
194 fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
195 Ok(Box::new(T::from_xml_text(data)?))
196 }
197}
198
199/// Trait to convert a value to an XML text string.
200///
201/// This trait is implemented for many standard library types implementing
202/// [`core::fmt::Display`]. In addition, the following feature flags can enable
203/// more implementations:
204///
205/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
206/// - `uuid`: `uuid::Uuid`
207///
208/// Because of the unfortunate situation as described in [`FromXmlText`], we
209/// are **extremely liberal** with accepting optional dependencies for this
210/// purpose. You are very welcome to make merge requests against this crate
211/// adding support for parsing third-party crates.
212pub trait AsXmlText {
213 /// Convert the value to an XML string in a context where an absent value
214 /// cannot be represented.
215 fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error>;
216
217 /// Convert the value to an XML string in a context where an absent value
218 /// can be represented.
219 ///
220 /// The provided implementation will always return the result of
221 /// [`Self::as_xml_text`] wrapped into `Some(.)`. By re-implementing
222 /// this method, implementors can customize the behaviour for certain
223 /// values.
224 fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
225 Ok(Some(self.as_xml_text()?))
226 }
227}
228
229impl AsXmlText for String {
230 /// Return the borrowed string contents.
231 fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
232 Ok(Cow::Borrowed(self.as_str()))
233 }
234}
235
236impl AsXmlText for str {
237 /// Return the borrowed string contents.
238 fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
239 Ok(Cow::Borrowed(&*self))
240 }
241}
242
243impl<T: AsXmlText> AsXmlText for Box<T> {
244 /// Return the borrowed [`Box`] contents.
245 fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
246 T::as_xml_text(self)
247 }
248}
249
250impl<B: AsXmlText + ToOwned> AsXmlText for Cow<'_, B> {
251 /// Return the borrowed [`Cow`] contents.
252 fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
253 B::as_xml_text(self.as_ref())
254 }
255}
256
257impl<T: AsXmlText> AsXmlText for &T {
258 /// Delegate to the `AsXmlText` implementation on `T`.
259 fn as_xml_text(&self) -> Result<Cow<'_, str>, self::error::Error> {
260 T::as_xml_text(*self)
261 }
262}
263
264/// Specialized variant of [`AsXmlText`].
265///
266/// Do **not** implement this unless you cannot implement [`AsXmlText`]:
267/// implementing [`AsXmlText`] is more versatile and an
268/// [`AsOptionalXmlText`] implementation is automatically provided.
269///
270/// If you need to customize the behaviour of the [`AsOptionalXmlText`]
271/// blanket implementation, implement a custom
272/// [`AsXmlText::as_optional_xml_text`] instead.
273pub trait AsOptionalXmlText {
274 /// Convert the value to an XML string in a context where an absent value
275 /// can be represented.
276 fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error>;
277}
278
279impl<T: AsXmlText> AsOptionalXmlText for T {
280 fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
281 <Self as AsXmlText>::as_optional_xml_text(self)
282 }
283}
284
285impl<T: AsXmlText> AsOptionalXmlText for Option<T> {
286 fn as_optional_xml_text(&self) -> Result<Option<Cow<'_, str>>, self::error::Error> {
287 self.as_ref()
288 .map(T::as_optional_xml_text)
289 .transpose()
290 .map(Option::flatten)
291 }
292}
293
294/// Attempt to transform a type implementing [`AsXml`] into another
295/// type which implements [`FromXml`].
296pub fn transform<T: FromXml, F: AsXml>(from: F) -> Result<T, self::error::Error> {
297 let mut iter = self::rxml_util::ItemToEvent::new(from.as_xml_iter()?);
298 let (qname, attrs) = match iter.next() {
299 Some(Ok(rxml::Event::StartElement(_, qname, attrs))) => (qname, attrs),
300 Some(Err(e)) => return Err(e),
301 _ => panic!("into_event_iter did not start with StartElement event!"),
302 };
303 let mut sink = match T::from_events(qname, attrs) {
304 Ok(v) => v,
305 Err(self::error::FromEventsError::Mismatch { .. }) => {
306 return Err(self::error::Error::TypeMismatch)
307 }
308 Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
309 };
310 for event in iter {
311 let event = event?;
312 if let Some(v) = sink.feed(event)? {
313 return Ok(v);
314 }
315 }
316 Err(self::error::Error::XmlError(rxml::Error::InvalidEof(None)))
317}
318
319/// Attempt to convert a [`minidom::Element`] into a type implementing
320/// [`FromXml`], fallably.
321///
322/// Unlike [`transform`] (which can also be used with an element), this
323/// function will return the element unharmed if its element header does not
324/// match the expectations of `T`.
325#[cfg(feature = "minidom")]
326pub fn try_from_element<T: FromXml>(
327 from: minidom::Element,
328) -> Result<T, self::error::FromElementError> {
329 let (qname, attrs) = minidom_compat::make_start_ev_parts(&from)?;
330 let mut sink = match T::from_events(qname, attrs) {
331 Ok(v) => v,
332 Err(self::error::FromEventsError::Mismatch { .. }) => {
333 return Err(self::error::FromElementError::Mismatch(from))
334 }
335 Err(self::error::FromEventsError::Invalid(e)) => {
336 return Err(self::error::FromElementError::Invalid(e))
337 }
338 };
339
340 let mut iter = from.as_xml_iter()?;
341 // consume the element header
342 for item in &mut iter {
343 let item = item?;
344 match item {
345 // discard the element header
346 Item::XmlDeclaration(..) => (),
347 Item::ElementHeadStart(..) => (),
348 Item::Attribute(..) => (),
349 Item::ElementHeadEnd => {
350 // now that the element header is over, we break out
351 break;
352 }
353 Item::Text(..) => panic!("text before end of element header"),
354 Item::ElementFoot => panic!("element foot before end of element header"),
355 }
356 }
357 let iter = self::rxml_util::ItemToEvent::new(iter);
358 for event in iter {
359 let event = event?;
360 if let Some(v) = sink.feed(event)? {
361 return Ok(v);
362 }
363 }
364 // unreachable! instead of error here, because minidom::Element always
365 // produces the complete event sequence of a single element, and FromXml
366 // implementations must be constructible from that.
367 unreachable!("minidom::Element did not produce enough events to complete element")
368}
369
370fn map_nonio_error<T>(r: Result<T, io::Error>) -> Result<T, self::error::Error> {
371 match r {
372 Ok(v) => Ok(v),
373 Err(e) => match e.downcast::<rxml::Error>() {
374 Ok(e) => Err(e.into()),
375 Err(_) => unreachable!("I/O error cannot be caused by &[]"),
376 },
377 }
378}
379
380fn read_start_event<I: std::io::BufRead>(
381 r: &mut rxml::Reader<I>,
382) -> Result<(rxml::QName, rxml::AttrMap), self::error::Error> {
383 for ev in r {
384 match map_nonio_error(ev)? {
385 rxml::Event::XmlDeclaration(_, rxml::XmlVersion::V1_0) => (),
386 rxml::Event::StartElement(_, name, attrs) => return Ok((name, attrs)),
387 _ => {
388 return Err(self::error::Error::Other(
389 "Unexpected event at start of document",
390 ))
391 }
392 }
393 }
394 Err(self::error::Error::XmlError(rxml::Error::InvalidEof(Some(
395 rxml::error::ErrorContext::DocumentBegin,
396 ))))
397}
398
399/// Attempt to parse a type implementing [`FromXml`] from a byte buffer
400/// containing XML data.
401pub fn from_bytes<T: FromXml>(mut buf: &[u8]) -> Result<T, self::error::Error> {
402 let mut reader = rxml::Reader::new(&mut buf);
403 let (name, attrs) = read_start_event(&mut reader)?;
404 let mut builder = match T::from_events(name, attrs) {
405 Ok(v) => v,
406 Err(self::error::FromEventsError::Mismatch { .. }) => {
407 return Err(self::error::Error::TypeMismatch)
408 }
409 Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
410 };
411 for ev in reader {
412 if let Some(v) = builder.feed(map_nonio_error(ev)?)? {
413 return Ok(v);
414 }
415 }
416 Err(self::error::Error::XmlError(rxml::Error::InvalidEof(None)))
417}
418
419/// Return true if the string contains exclusively XML whitespace.
420///
421/// XML whitespace is defined as U+0020 (space), U+0009 (tab), U+000a
422/// (newline) and U+000d (carriage return).
423pub fn is_xml_whitespace<T: AsRef<[u8]>>(s: T) -> bool {
424 s.as_ref()
425 .iter()
426 .all(|b| *b == b' ' || *b == b'\t' || *b == b'\r' || *b == b'\n')
427}