1#![cfg_attr(docsrs, feature(doc_cfg))]
2#![forbid(unsafe_code)]
3#![warn(missing_docs)]
4/*!
5# XML Streamed Objects -- serde-like parsing for XML
6
7This crate provides the traits for parsing XML data into Rust structs, and
8vice versa.
9
10While it is in 0.0.x versions, many features still need to be developed, but
11rest assured that there is a solid plan to get it fully usable for even
12advanced XML scenarios.
13
14XSO is an acronym for XML Stream(ed) Objects, referring to the main field of
15use of this library in parsing XML streams like specified in RFC 6120.
16*/
17
18// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
19//
20// This Source Code Form is subject to the terms of the Mozilla Public
21// License, v. 2.0. If a copy of the MPL was not distributed with this
22// file, You can obtain one at http://mozilla.org/MPL/2.0/.
23pub mod error;
24#[cfg(feature = "minidom")]
25#[cfg_attr(docsrs, doc(cfg(feature = "minidom")))]
26pub mod minidom_compat;
27mod text;
28
29#[doc(hidden)]
30pub mod exports {
31 #[cfg(feature = "minidom")]
32 pub use minidom;
33 pub use rxml;
34}
35
36use std::borrow::Cow;
37
38#[doc = include_str!("from_xml_doc.md")]
39#[doc(inline)]
40#[cfg(feature = "macros")]
41pub use xso_proc::FromXml;
42
43/// # Make a struct or enum serialisable to XML
44///
45/// This derives the [`IntoXml`] trait on a struct or enum. It is the
46/// counterpart to [`macro@FromXml`].
47///
48/// The attributes necessary and available for the derivation to work are
49/// documented on [`macro@FromXml`].
50#[doc(inline)]
51#[cfg(feature = "macros")]
52pub use xso_proc::IntoXml;
53
54/// Trait allowing to consume a struct and iterate its contents as
55/// serialisable [`rxml::Event`] items.
56///
57/// **Important:** Changing the [`EventIter`][`Self::EventIter`] associated
58/// type is considered a non-breaking change for any given implementation of
59/// this trait. Always refer to a type's iterator type using fully-qualified
60/// notation, for example: `<T as xso::IntoXml>::EventIter`.
61pub trait IntoXml {
62 /// The iterator type.
63 ///
64 /// **Important:** Changing this type is considered a non-breaking change
65 /// for any given implementation of this trait. Always refer to a type's
66 /// iterator type using fully-qualified notation, for example:
67 /// `<T as xso::IntoXml>::EventIter`.
68 type EventIter: Iterator<Item = Result<rxml::Event, self::error::Error>>;
69
70 /// Return an iterator which emits the contents of the struct or enum as
71 /// serialisable [`rxml::Event`] items.
72 fn into_event_iter(self) -> Result<Self::EventIter, self::error::Error>;
73}
74
75/// Trait for a temporary object allowing to construct a struct from
76/// [`rxml::Event`] items.
77///
78/// Objects of this type are generally constructed through
79/// [`FromXml::from_events`] and are used to build Rust structs or enums from
80/// XML data. The XML data must be fed as `rxml::Event` to the
81/// [`feed`][`Self::feed`] method.
82pub trait FromEventsBuilder {
83 /// The type which will be constructed by this builder.
84 type Output;
85
86 /// Feed another [`rxml::Event`] into the element construction
87 /// process.
88 ///
89 /// Once the construction process completes, `Ok(Some(_))` is returned.
90 /// When valid data has been fed but more events are needed to fully
91 /// construct the resulting struct, `Ok(None)` is returned.
92 ///
93 /// If the construction fails, `Err(_)` is returned. Errors are generally
94 /// fatal and the builder should be assumed to be broken at that point.
95 /// Feeding more events after an error may result in panics, errors or
96 /// inconsistent result data, though it may never result in unsound or
97 /// unsafe behaviour.
98 fn feed(&mut self, ev: rxml::Event) -> Result<Option<Self::Output>, self::error::Error>;
99}
100
101/// Trait allowing to construct a struct from a stream of
102/// [`rxml::Event`] items.
103///
104/// To use this, first call [`FromXml::from_events`] with the qualified
105/// name and the attributes of the corresponding
106/// [`rxml::Event::StartElement`] event. If the call succeeds, the
107/// returned builder object must be fed with the events representing the
108/// contents of the element, and then with the `EndElement` event.
109///
110/// The `StartElement` passed to `from_events` must not be passed to `feed`.
111///
112/// **Important:** Changing the [`Builder`][`Self::Builder`] associated type
113/// is considered a non-breaking change for any given implementation of this
114/// trait. Always refer to a type's builder type using fully-qualified
115/// notation, for example: `<T as xso::FromXml>::Builder`.
116pub trait FromXml {
117 /// A builder type used to construct the element.
118 ///
119 /// **Important:** Changing this type is considered a non-breaking change
120 /// for any given implementation of this trait. Always refer to a type's
121 /// builder type using fully-qualified notation, for example:
122 /// `<T as xso::FromXml>::Builder`.
123 type Builder: FromEventsBuilder<Output = Self>;
124
125 /// Attempt to initiate the streamed construction of this struct from XML.
126 ///
127 /// If the passed qualified `name` and `attrs` match the element's type,
128 /// the [`Self::Builder`] is returned and should be fed with XML events
129 /// by the caller.
130 ///
131 /// Otherwise, an appropriate error is returned.
132 fn from_events(
133 name: rxml::QName,
134 attrs: rxml::AttrMap,
135 ) -> Result<Self::Builder, self::error::FromEventsError>;
136}
137
138/// Trait allowing to convert XML text to a value.
139///
140/// This trait is similar to [`std::str::FromStr`], however, due to
141/// restrictions imposed by the orphan rule, a separate trait is needed.
142/// Implementations for many standard library types are available. In
143/// addition, the following feature flags can enable more implementations:
144///
145/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
146/// - `uuid`: `uuid::Uuid`
147///
148/// Because of this unfortunate situation, we are **extremely liberal** with
149/// accepting optional dependencies for this purpose. You are very welcome to
150/// make merge requests against this crate adding support for parsing
151/// third-party crates.
152pub trait FromXmlText: Sized {
153 /// Convert the given XML text to a value.
154 fn from_xml_text(data: String) -> Result<Self, self::error::Error>;
155}
156
157impl FromXmlText for String {
158 fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
159 Ok(data)
160 }
161}
162
163impl<T: FromXmlText, B: ToOwned<Owned = T>> FromXmlText for Cow<'_, B> {
164 fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
165 Ok(Cow::Owned(T::from_xml_text(data)?))
166 }
167}
168
169impl<T: FromXmlText> FromXmlText for Option<T> {
170 fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
171 Ok(Some(T::from_xml_text(data)?))
172 }
173}
174
175impl<T: FromXmlText> FromXmlText for Box<T> {
176 fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
177 Ok(Box::new(T::from_xml_text(data)?))
178 }
179}
180
181/// Trait to convert a value to an XML text string.
182///
183/// This trait is implemented for many standard library types implementing
184/// [`std::fmt::Display`]. In addition, the following feature flags can enable
185/// more implementations:
186///
187/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
188/// - `uuid`: `uuid::Uuid`
189///
190/// Because of the unfortunate situation as described in [`FromXmlText`], we
191/// are **extremely liberal** with accepting optional dependencies for this
192/// purpose. You are very welcome to make merge requests against this crate
193/// adding support for parsing third-party crates.
194pub trait IntoXmlText: Sized {
195 /// Convert the value to an XML string in a context where an absent value
196 /// cannot be represented.
197 fn into_xml_text(self) -> Result<String, self::error::Error>;
198
199 /// Convert the value to an XML string in a context where an absent value
200 /// can be represented.
201 ///
202 /// The provided implementation will always return the result of
203 /// [`Self::into_xml_text`] wrapped into `Some(.)`. By re-implementing
204 /// this method, implementors can customize the behaviour for certain
205 /// values.
206 fn into_optional_xml_text(self) -> Result<Option<String>, self::error::Error> {
207 Ok(Some(self.into_xml_text()?))
208 }
209}
210
211impl IntoXmlText for String {
212 fn into_xml_text(self) -> Result<String, self::error::Error> {
213 Ok(self)
214 }
215}
216
217impl<T: IntoXmlText> IntoXmlText for Box<T> {
218 fn into_xml_text(self) -> Result<String, self::error::Error> {
219 T::into_xml_text(*self)
220 }
221}
222
223impl<T: IntoXmlText, B: ToOwned<Owned = T>> IntoXmlText for Cow<'_, B> {
224 fn into_xml_text(self) -> Result<String, self::error::Error> {
225 T::into_xml_text(self.into_owned())
226 }
227}
228
229/// Specialized variant of [`IntoXmlText`].
230///
231/// Do **not** implement this unless you cannot implement [`IntoXmlText`]:
232/// implementing [`IntoXmlText`] is more versatile and an
233/// [`IntoOptionalXmlText`] implementation is automatically provided.
234///
235/// If you need to customize the behaviour of the [`IntoOptionalXmlText`]
236/// blanket implementation, implement a custom
237/// [`IntoXmlText::into_optional_xml_text`] instead.
238pub trait IntoOptionalXmlText {
239 /// Convert the value to an XML string in a context where an absent value
240 /// can be represented.
241 fn into_optional_xml_text(self) -> Result<Option<String>, self::error::Error>;
242}
243
244impl<T: IntoXmlText> IntoOptionalXmlText for T {
245 fn into_optional_xml_text(self) -> Result<Option<String>, self::error::Error> {
246 <Self as IntoXmlText>::into_optional_xml_text(self)
247 }
248}
249
250impl<T: IntoOptionalXmlText> IntoOptionalXmlText for Option<T> {
251 fn into_optional_xml_text(self) -> Result<Option<String>, self::error::Error> {
252 self.map(T::into_optional_xml_text)
253 .transpose()
254 .map(Option::flatten)
255 }
256}
257
258/// Attempt to transform a type implementing [`IntoXml`] into another
259/// type which implements [`FromXml`].
260pub fn transform<T: FromXml, F: IntoXml>(from: F) -> Result<T, self::error::Error> {
261 let mut iter = from.into_event_iter()?;
262 let (qname, attrs) = match iter.next() {
263 Some(Ok(rxml::Event::StartElement(_, qname, attrs))) => (qname, attrs),
264 Some(Err(e)) => return Err(e),
265 _ => panic!("into_event_iter did not start with StartElement event!"),
266 };
267 let mut sink = match T::from_events(qname, attrs) {
268 Ok(v) => v,
269 Err(self::error::FromEventsError::Mismatch { .. }) => {
270 return Err(self::error::Error::TypeMismatch)
271 }
272 Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
273 };
274 for event in iter {
275 let event = event?;
276 match sink.feed(event)? {
277 Some(v) => return Ok(v),
278 None => (),
279 }
280 }
281 Err(self::error::Error::XmlError(
282 rxml::error::XmlError::InvalidEof("during transform"),
283 ))
284}
285
286/// Attempt to convert a [`minidom::Element`] into a type implementing
287/// [`FromXml`], fallably.
288///
289/// Unlike [`transform`] (which can also be used with an element), this
290/// function will return the element unharmed if its element header does not
291/// match the expectations of `T`.
292#[cfg(feature = "minidom")]
293#[cfg_attr(docsrs, doc(cfg(feature = "minidom")))]
294pub fn try_from_element<T: FromXml>(
295 from: minidom::Element,
296) -> Result<T, self::error::FromElementError> {
297 let (qname, attrs) = minidom_compat::make_start_ev_parts(&from)?;
298 let mut sink = match T::from_events(qname, attrs) {
299 Ok(v) => v,
300 Err(self::error::FromEventsError::Mismatch { .. }) => {
301 return Err(self::error::FromElementError::Mismatch(from))
302 }
303 Err(self::error::FromEventsError::Invalid(e)) => {
304 return Err(self::error::FromElementError::Invalid(e))
305 }
306 };
307
308 let mut iter = from.into_event_iter()?;
309 iter.next().expect("first event from minidom::Element")?;
310 for event in iter {
311 let event = event?;
312 match sink.feed(event)? {
313 Some(v) => return Ok(v),
314 None => (),
315 }
316 }
317 // unreachable! instead of error here, because minidom::Element always
318 // produces the complete event sequence of a single element, and FromXml
319 // implementations must be constructible from that.
320 unreachable!("minidom::Element did not produce enough events to complete element")
321}
322
323fn map_nonio_error<T>(r: Result<T, rxml::Error>) -> Result<T, self::error::Error> {
324 match r {
325 Ok(v) => Ok(v),
326 Err(rxml::Error::IO(_)) => unreachable!(),
327 Err(rxml::Error::Xml(e)) => Err(e.into()),
328 Err(rxml::Error::InvalidUtf8Byte(_)) => Err(self::error::Error::Other("invalid utf-8")),
329 Err(rxml::Error::InvalidChar(_)) => {
330 Err(self::error::Error::Other("non-character encountered"))
331 }
332 Err(rxml::Error::RestrictedXml(_)) => Err(self::error::Error::Other("restricted xml")),
333 }
334}
335
336fn read_start_event<I: std::io::BufRead>(
337 r: &mut rxml::Reader<I>,
338) -> Result<(rxml::QName, rxml::AttrMap), self::error::Error> {
339 for ev in r {
340 match map_nonio_error(ev)? {
341 rxml::Event::XmlDeclaration(_, rxml::XmlVersion::V1_0) => (),
342 rxml::Event::StartElement(_, name, attrs) => return Ok((name, attrs)),
343 _ => {
344 return Err(self::error::Error::Other(
345 "Unexpected event at start of document",
346 ))
347 }
348 }
349 }
350 Err(self::error::Error::XmlError(
351 rxml::error::XmlError::InvalidEof("before start of element"),
352 ))
353}
354
355/// Attempt to parse a type implementing [`FromXml`] from a byte buffer
356/// containing XML data.
357pub fn from_bytes<T: FromXml>(mut buf: &[u8]) -> Result<T, self::error::Error> {
358 let mut reader = rxml::Reader::new(&mut buf);
359 let (name, attrs) = read_start_event(&mut reader)?;
360 let mut builder = match T::from_events(name, attrs) {
361 Ok(v) => v,
362 Err(self::error::FromEventsError::Mismatch { .. }) => {
363 return Err(self::error::Error::TypeMismatch)
364 }
365 Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
366 };
367 for ev in reader {
368 match builder.feed(map_nonio_error(ev)?)? {
369 Some(v) => return Ok(v),
370 None => (),
371 }
372 }
373 Err(self::error::Error::XmlError(
374 rxml::error::XmlError::InvalidEof("while parsing FromXml impl"),
375 ))
376}