1#![cfg_attr(docsrs, feature(doc_cfg))]
2#![forbid(unsafe_code)]
3#![warn(missing_docs)]
4/*!
5# XML Streamed Objects -- serde-like parsing for XML
6
7This crate provides the traits for parsing XML data into Rust structs, and
8vice versa.
9
10While it is in 0.0.x versions, many features still need to be developed, but
11rest assured that there is a solid plan to get it fully usable for even
12advanced XML scenarios.
13
14XSO is an acronym for XML Stream(ed) Objects, referring to the main field of
15use of this library in parsing XML streams like specified in RFC 6120.
16*/
17
18// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
19//
20// This Source Code Form is subject to the terms of the Mozilla Public
21// License, v. 2.0. If a copy of the MPL was not distributed with this
22// file, You can obtain one at http://mozilla.org/MPL/2.0/.
23pub mod error;
24#[cfg(feature = "minidom")]
25pub mod minidom_compat;
26mod text;
27
28#[doc(hidden)]
29pub mod exports {
30 #[cfg(feature = "minidom")]
31 pub use minidom;
32 pub use rxml;
33}
34
35use std::borrow::Cow;
36
37#[doc = include_str!("from_xml_doc.md")]
38#[doc(inline)]
39#[cfg(feature = "macros")]
40pub use xso_proc::FromXml;
41
42/// # Make a struct or enum serialisable to XML
43///
44/// This derives the [`IntoXml`] trait on a struct or enum. It is the
45/// counterpart to [`macro@FromXml`].
46///
47/// The attributes necessary and available for the derivation to work are
48/// documented on [`macro@FromXml`].
49#[doc(inline)]
50#[cfg(feature = "macros")]
51pub use xso_proc::IntoXml;
52
53/// Trait allowing to consume a struct and iterate its contents as
54/// serialisable [`rxml::Event`] items.
55///
56/// **Important:** Changing the [`EventIter`][`Self::EventIter`] associated
57/// type is considered a non-breaking change for any given implementation of
58/// this trait. Always refer to a type's iterator type using fully-qualified
59/// notation, for example: `<T as xso::IntoXml>::EventIter`.
60pub trait IntoXml {
61 /// The iterator type.
62 ///
63 /// **Important:** Changing this type is considered a non-breaking change
64 /// for any given implementation of this trait. Always refer to a type's
65 /// iterator type using fully-qualified notation, for example:
66 /// `<T as xso::IntoXml>::EventIter`.
67 type EventIter: Iterator<Item = Result<rxml::Event, self::error::Error>>;
68
69 /// Return an iterator which emits the contents of the struct or enum as
70 /// serialisable [`rxml::Event`] items.
71 fn into_event_iter(self) -> Result<Self::EventIter, self::error::Error>;
72}
73
74/// Trait for a temporary object allowing to construct a struct from
75/// [`rxml::Event`] items.
76///
77/// Objects of this type are generally constructed through
78/// [`FromXml::from_events`] and are used to build Rust structs or enums from
79/// XML data. The XML data must be fed as `rxml::Event` to the
80/// [`feed`][`Self::feed`] method.
81pub trait FromEventsBuilder {
82 /// The type which will be constructed by this builder.
83 type Output;
84
85 /// Feed another [`rxml::Event`] into the element construction
86 /// process.
87 ///
88 /// Once the construction process completes, `Ok(Some(_))` is returned.
89 /// When valid data has been fed but more events are needed to fully
90 /// construct the resulting struct, `Ok(None)` is returned.
91 ///
92 /// If the construction fails, `Err(_)` is returned. Errors are generally
93 /// fatal and the builder should be assumed to be broken at that point.
94 /// Feeding more events after an error may result in panics, errors or
95 /// inconsistent result data, though it may never result in unsound or
96 /// unsafe behaviour.
97 fn feed(&mut self, ev: rxml::Event) -> Result<Option<Self::Output>, self::error::Error>;
98}
99
100/// Trait allowing to construct a struct from a stream of
101/// [`rxml::Event`] items.
102///
103/// To use this, first call [`FromXml::from_events`] with the qualified
104/// name and the attributes of the corresponding
105/// [`rxml::Event::StartElement`] event. If the call succeeds, the
106/// returned builder object must be fed with the events representing the
107/// contents of the element, and then with the `EndElement` event.
108///
109/// The `StartElement` passed to `from_events` must not be passed to `feed`.
110///
111/// **Important:** Changing the [`Builder`][`Self::Builder`] associated type
112/// is considered a non-breaking change for any given implementation of this
113/// trait. Always refer to a type's builder type using fully-qualified
114/// notation, for example: `<T as xso::FromXml>::Builder`.
115pub trait FromXml {
116 /// A builder type used to construct the element.
117 ///
118 /// **Important:** Changing this type is considered a non-breaking change
119 /// for any given implementation of this trait. Always refer to a type's
120 /// builder type using fully-qualified notation, for example:
121 /// `<T as xso::FromXml>::Builder`.
122 type Builder: FromEventsBuilder<Output = Self>;
123
124 /// Attempt to initiate the streamed construction of this struct from XML.
125 ///
126 /// If the passed qualified `name` and `attrs` match the element's type,
127 /// the [`Self::Builder`] is returned and should be fed with XML events
128 /// by the caller.
129 ///
130 /// Otherwise, an appropriate error is returned.
131 fn from_events(
132 name: rxml::QName,
133 attrs: rxml::AttrMap,
134 ) -> Result<Self::Builder, self::error::FromEventsError>;
135}
136
137/// Trait allowing to convert XML text to a value.
138///
139/// This trait is similar to [`std::str::FromStr`], however, due to
140/// restrictions imposed by the orphan rule, a separate trait is needed.
141/// Implementations for many standard library types are available. In
142/// addition, the following feature flags can enable more implementations:
143///
144/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
145/// - `uuid`: `uuid::Uuid`
146///
147/// Because of this unfortunate situation, we are **extremely liberal** with
148/// accepting optional dependencies for this purpose. You are very welcome to
149/// make merge requests against this crate adding support for parsing
150/// third-party crates.
151pub trait FromXmlText: Sized {
152 /// Convert the given XML text to a value.
153 fn from_xml_text(data: String) -> Result<Self, self::error::Error>;
154}
155
156impl FromXmlText for String {
157 fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
158 Ok(data)
159 }
160}
161
162impl<T: FromXmlText, B: ToOwned<Owned = T>> FromXmlText for Cow<'_, B> {
163 fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
164 Ok(Cow::Owned(T::from_xml_text(data)?))
165 }
166}
167
168impl<T: FromXmlText> FromXmlText for Option<T> {
169 fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
170 Ok(Some(T::from_xml_text(data)?))
171 }
172}
173
174impl<T: FromXmlText> FromXmlText for Box<T> {
175 fn from_xml_text(data: String) -> Result<Self, self::error::Error> {
176 Ok(Box::new(T::from_xml_text(data)?))
177 }
178}
179
180/// Trait to convert a value to an XML text string.
181///
182/// This trait is implemented for many standard library types implementing
183/// [`std::fmt::Display`]. In addition, the following feature flags can enable
184/// more implementations:
185///
186/// - `jid`: `jid::Jid`, `jid::BareJid`, `jid::FullJid`
187/// - `uuid`: `uuid::Uuid`
188///
189/// Because of the unfortunate situation as described in [`FromXmlText`], we
190/// are **extremely liberal** with accepting optional dependencies for this
191/// purpose. You are very welcome to make merge requests against this crate
192/// adding support for parsing third-party crates.
193pub trait IntoXmlText: Sized {
194 /// Convert the value to an XML string in a context where an absent value
195 /// cannot be represented.
196 fn into_xml_text(self) -> Result<String, self::error::Error>;
197
198 /// Convert the value to an XML string in a context where an absent value
199 /// can be represented.
200 ///
201 /// The provided implementation will always return the result of
202 /// [`Self::into_xml_text`] wrapped into `Some(.)`. By re-implementing
203 /// this method, implementors can customize the behaviour for certain
204 /// values.
205 fn into_optional_xml_text(self) -> Result<Option<String>, self::error::Error> {
206 Ok(Some(self.into_xml_text()?))
207 }
208}
209
210impl IntoXmlText for String {
211 fn into_xml_text(self) -> Result<String, self::error::Error> {
212 Ok(self)
213 }
214}
215
216impl<T: IntoXmlText> IntoXmlText for Box<T> {
217 fn into_xml_text(self) -> Result<String, self::error::Error> {
218 T::into_xml_text(*self)
219 }
220}
221
222impl<T: IntoXmlText, B: ToOwned<Owned = T>> IntoXmlText for Cow<'_, B> {
223 fn into_xml_text(self) -> Result<String, self::error::Error> {
224 T::into_xml_text(self.into_owned())
225 }
226}
227
228/// Specialized variant of [`IntoXmlText`].
229///
230/// Do **not** implement this unless you cannot implement [`IntoXmlText`]:
231/// implementing [`IntoXmlText`] is more versatile and an
232/// [`IntoOptionalXmlText`] implementation is automatically provided.
233///
234/// If you need to customize the behaviour of the [`IntoOptionalXmlText`]
235/// blanket implementation, implement a custom
236/// [`IntoXmlText::into_optional_xml_text`] instead.
237pub trait IntoOptionalXmlText {
238 /// Convert the value to an XML string in a context where an absent value
239 /// can be represented.
240 fn into_optional_xml_text(self) -> Result<Option<String>, self::error::Error>;
241}
242
243impl<T: IntoXmlText> IntoOptionalXmlText for T {
244 fn into_optional_xml_text(self) -> Result<Option<String>, self::error::Error> {
245 <Self as IntoXmlText>::into_optional_xml_text(self)
246 }
247}
248
249impl<T: IntoOptionalXmlText> IntoOptionalXmlText for Option<T> {
250 fn into_optional_xml_text(self) -> Result<Option<String>, self::error::Error> {
251 self.map(T::into_optional_xml_text)
252 .transpose()
253 .map(Option::flatten)
254 }
255}
256
257/// Attempt to transform a type implementing [`IntoXml`] into another
258/// type which implements [`FromXml`].
259pub fn transform<T: FromXml, F: IntoXml>(from: F) -> Result<T, self::error::Error> {
260 let mut iter = from.into_event_iter()?;
261 let (qname, attrs) = match iter.next() {
262 Some(Ok(rxml::Event::StartElement(_, qname, attrs))) => (qname, attrs),
263 Some(Err(e)) => return Err(e),
264 _ => panic!("into_event_iter did not start with StartElement event!"),
265 };
266 let mut sink = match T::from_events(qname, attrs) {
267 Ok(v) => v,
268 Err(self::error::FromEventsError::Mismatch { .. }) => {
269 return Err(self::error::Error::TypeMismatch)
270 }
271 Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
272 };
273 for event in iter {
274 let event = event?;
275 match sink.feed(event)? {
276 Some(v) => return Ok(v),
277 None => (),
278 }
279 }
280 Err(self::error::Error::XmlError(
281 rxml::error::XmlError::InvalidEof("during transform"),
282 ))
283}
284
285/// Attempt to convert a [`minidom::Element`] into a type implementing
286/// [`FromXml`], fallably.
287///
288/// Unlike [`transform`] (which can also be used with an element), this
289/// function will return the element unharmed if its element header does not
290/// match the expectations of `T`.
291#[cfg(feature = "minidom")]
292pub fn try_from_element<T: FromXml>(
293 from: minidom::Element,
294) -> Result<T, self::error::FromElementError> {
295 let (qname, attrs) = minidom_compat::make_start_ev_parts(&from)?;
296 let mut sink = match T::from_events(qname, attrs) {
297 Ok(v) => v,
298 Err(self::error::FromEventsError::Mismatch { .. }) => {
299 return Err(self::error::FromElementError::Mismatch(from))
300 }
301 Err(self::error::FromEventsError::Invalid(e)) => {
302 return Err(self::error::FromElementError::Invalid(e))
303 }
304 };
305
306 let mut iter = from.into_event_iter()?;
307 iter.next().expect("first event from minidom::Element")?;
308 for event in iter {
309 let event = event?;
310 match sink.feed(event)? {
311 Some(v) => return Ok(v),
312 None => (),
313 }
314 }
315 // unreachable! instead of error here, because minidom::Element always
316 // produces the complete event sequence of a single element, and FromXml
317 // implementations must be constructible from that.
318 unreachable!("minidom::Element did not produce enough events to complete element")
319}
320
321fn map_nonio_error<T>(r: Result<T, rxml::Error>) -> Result<T, self::error::Error> {
322 match r {
323 Ok(v) => Ok(v),
324 Err(rxml::Error::IO(_)) => unreachable!(),
325 Err(rxml::Error::Xml(e)) => Err(e.into()),
326 Err(rxml::Error::InvalidUtf8Byte(_)) => Err(self::error::Error::Other("invalid utf-8")),
327 Err(rxml::Error::InvalidChar(_)) => {
328 Err(self::error::Error::Other("non-character encountered"))
329 }
330 Err(rxml::Error::RestrictedXml(_)) => Err(self::error::Error::Other("restricted xml")),
331 }
332}
333
334fn read_start_event<I: std::io::BufRead>(
335 r: &mut rxml::Reader<I>,
336) -> Result<(rxml::QName, rxml::AttrMap), self::error::Error> {
337 for ev in r {
338 match map_nonio_error(ev)? {
339 rxml::Event::XmlDeclaration(_, rxml::XmlVersion::V1_0) => (),
340 rxml::Event::StartElement(_, name, attrs) => return Ok((name, attrs)),
341 _ => {
342 return Err(self::error::Error::Other(
343 "Unexpected event at start of document",
344 ))
345 }
346 }
347 }
348 Err(self::error::Error::XmlError(
349 rxml::error::XmlError::InvalidEof("before start of element"),
350 ))
351}
352
353/// Attempt to parse a type implementing [`FromXml`] from a byte buffer
354/// containing XML data.
355pub fn from_bytes<T: FromXml>(mut buf: &[u8]) -> Result<T, self::error::Error> {
356 let mut reader = rxml::Reader::new(&mut buf);
357 let (name, attrs) = read_start_event(&mut reader)?;
358 let mut builder = match T::from_events(name, attrs) {
359 Ok(v) => v,
360 Err(self::error::FromEventsError::Mismatch { .. }) => {
361 return Err(self::error::Error::TypeMismatch)
362 }
363 Err(self::error::FromEventsError::Invalid(e)) => return Err(e),
364 };
365 for ev in reader {
366 match builder.feed(map_nonio_error(ev)?)? {
367 Some(v) => return Ok(v),
368 None => (),
369 }
370 }
371 Err(self::error::Error::XmlError(
372 rxml::error::XmlError::InvalidEof("while parsing FromXml impl"),
373 ))
374}