fromxml.rs

  1//! # Generic builder type implementations
  2//!
  3//! This module contains [`FromEventsBuilder`] implementations for types from
  4//! foreign libraries (such as the standard library).
  5//!
  6//! In order to not clutter the `xso` crate's main namespace, they are
  7//! stashed away in a separate module.
  8
  9// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
 10//
 11// This Source Code Form is subject to the terms of the Mozilla Public
 12// License, v. 2.0. If a copy of the MPL was not distributed with this
 13// file, You can obtain one at http://mozilla.org/MPL/2.0/.
 14
 15use crate::error::{Error, FromEventsError};
 16use crate::{FromEventsBuilder, FromXml};
 17
 18/// Helper struct to construct an `Option<T>` from XML events.
 19pub struct OptionBuilder<T: FromEventsBuilder>(T);
 20
 21impl<T: FromEventsBuilder> FromEventsBuilder for OptionBuilder<T> {
 22    type Output = Option<T::Output>;
 23
 24    fn feed(&mut self, ev: rxml::Event) -> Result<Option<Self::Output>, Error> {
 25        self.0.feed(ev).map(|ok| ok.map(|value| Some(value)))
 26    }
 27}
 28
 29/// Parsers `T` into `Some(.)`.
 30///
 31/// Note that this never generates `None`: The main use case is to allow
 32/// external (i.e. without calling `from_events`) defaulting to `None` and
 33/// for optional serialisation (the [`AsXml`][`crate::AsXml`] implementation
 34/// on `Option<T>` emits nothing for `None`).
 35impl<T: FromXml> FromXml for Option<T> {
 36    type Builder = OptionBuilder<T::Builder>;
 37
 38    fn from_events(
 39        name: rxml::QName,
 40        attrs: rxml::AttrMap,
 41    ) -> Result<Self::Builder, FromEventsError> {
 42        Ok(OptionBuilder(T::from_events(name, attrs)?))
 43    }
 44}
 45
 46/// Helper struct to construct an `Box<T>` from XML events.
 47pub struct BoxBuilder<T: FromEventsBuilder>(Box<T>);
 48
 49impl<T: FromEventsBuilder> FromEventsBuilder for BoxBuilder<T> {
 50    type Output = Box<T::Output>;
 51
 52    fn feed(&mut self, ev: rxml::Event) -> Result<Option<Self::Output>, Error> {
 53        self.0.feed(ev).map(|ok| ok.map(|value| Box::new(value)))
 54    }
 55}
 56
 57/// Parsers `T` into a `Box`.
 58impl<T: FromXml> FromXml for Box<T> {
 59    type Builder = BoxBuilder<T::Builder>;
 60
 61    fn from_events(
 62        name: rxml::QName,
 63        attrs: rxml::AttrMap,
 64    ) -> Result<Self::Builder, FromEventsError> {
 65        Ok(BoxBuilder(Box::new(T::from_events(name, attrs)?)))
 66    }
 67}
 68
 69#[derive(Debug)]
 70enum FallibleBuilderInner<T: FromEventsBuilder, E> {
 71    Processing { depth: usize, builder: T },
 72    Failed { depth: usize, err: Option<E> },
 73    Done,
 74}
 75
 76/// Build a `Result<T, E>` from XML.
 77///
 78/// This builder, invoked generally via the [`FromXml`] implementation on
 79/// `Result<T, E> where T: FromXml, E: From<Error>`, allows to fallably parse
 80/// an XSO from XML.
 81///
 82/// If an error occurs while parsing the XSO, the remaining events which
 83/// belong to that XSO are discarded. Once all events have been seen, the
 84/// error is returned as `Err(.)` value.
 85///
 86/// If parsing succeeds, the parsed XSO is returned as `Ok(.)` value.
 87#[derive(Debug)]
 88pub struct FallibleBuilder<T: FromEventsBuilder, E>(FallibleBuilderInner<T, E>);
 89
 90impl<T: FromEventsBuilder, E: From<Error>> FromEventsBuilder for FallibleBuilder<T, E> {
 91    type Output = Result<T::Output, E>;
 92
 93    fn feed(&mut self, ev: rxml::Event) -> Result<Option<Self::Output>, Error> {
 94        match self.0 {
 95            FallibleBuilderInner::Processing {
 96                ref mut depth,
 97                ref mut builder,
 98            } => {
 99                let new_depth = match ev {
100                    rxml::Event::StartElement(..) => match depth.checked_add(1) {
101                        // I *think* it is OK to return an err here
102                        // instead of panicking. The reason is that anyone
103                        // who intends to resume processing at the level
104                        // of where we started to parse this thing in case
105                        // of an error either has to:
106                        // - Use this fallible implementation and rely on
107                        //   it capturing the error (which we don't in
108                        //   this case).
109                        // - Or count the depth themselves, which will
110                        //   either fail in the same way, or they use a
111                        //   wider type (in which case it's ok).
112                        None => {
113                            self.0 = FallibleBuilderInner::Done;
114                            return Err(Error::Other("maximum XML nesting depth exceeded"));
115                        }
116                        Some(v) => Some(v),
117                    },
118                    // In case of an element end, underflow means that we
119                    // have reached the end of the XSO we wanted to process.
120                    // We handle that case at the end of the outer match's
121                    // body: Either we have returned a value then (good), or,
122                    // if we reach the end there with a new_depth == None,
123                    // something went horribly wrong (and we panic).
124                    rxml::Event::EndElement(..) => depth.checked_sub(1),
125
126                    // Text and XML declarations have no influence on parsing
127                    // depth.
128                    rxml::Event::XmlDeclaration(..) | rxml::Event::Text(..) => Some(*depth),
129                };
130
131                match builder.feed(ev) {
132                    Ok(Some(v)) => {
133                        self.0 = FallibleBuilderInner::Done;
134                        return Ok(Some(Ok(v)));
135                    }
136                    Ok(None) => {
137                        // continue processing in the next round.
138                    }
139                    Err(e) => {
140                        // We are now officially failed ..
141                        match new_depth {
142                            // .. but we are not done yet, so enter the
143                            // failure backtracking state.
144                            Some(depth) => {
145                                self.0 = FallibleBuilderInner::Failed {
146                                    depth,
147                                    err: Some(e.into()),
148                                };
149                                return Ok(None);
150                            }
151                            // .. and we are done with parsing, so we return
152                            // the error as value.
153                            None => {
154                                self.0 = FallibleBuilderInner::Done;
155                                return Ok(Some(Err(e.into())));
156                            }
157                        }
158                    }
159                };
160
161                *depth = match new_depth {
162                    Some(v) => v,
163                    None => unreachable!("fallible parsing continued beyond end of element"),
164                };
165
166                // Need more events.
167                Ok(None)
168            }
169            FallibleBuilderInner::Failed {
170                ref mut depth,
171                ref mut err,
172            } => {
173                *depth = match ev {
174                    rxml::Event::StartElement(..) => match depth.checked_add(1) {
175                        // See above for error return rationale.
176                        None => {
177                            self.0 = FallibleBuilderInner::Done;
178                            return Err(Error::Other("maximum XML nesting depth exceeded"));
179                        }
180                        Some(v) => v,
181                    },
182                    rxml::Event::EndElement(..) => match depth.checked_sub(1) {
183                        Some(v) => v,
184                        None => {
185                            // We are officially done, return a value, switch
186                            // states, and be done with it.
187                            let err = err.take().expect("fallible parsing somehow lost its error");
188                            self.0 = FallibleBuilderInner::Done;
189                            return Ok(Some(Err(err)));
190                        }
191                    },
192
193                    // Text and XML declarations have no influence on parsing
194                    // depth.
195                    rxml::Event::XmlDeclaration(..) | rxml::Event::Text(..) => *depth,
196                };
197
198                // Need more events
199                Ok(None)
200            }
201            FallibleBuilderInner::Done => {
202                panic!("FromEventsBuilder called after it returned a value")
203            }
204        }
205    }
206}
207
208/// Parsers `T` fallibly. See [`FallibleBuilder`] for details.
209impl<T: FromXml, E: From<Error>> FromXml for Result<T, E> {
210    type Builder = FallibleBuilder<T::Builder, E>;
211
212    fn from_events(
213        name: rxml::QName,
214        attrs: rxml::AttrMap,
215    ) -> Result<Self::Builder, FromEventsError> {
216        match T::from_events(name, attrs) {
217            Ok(builder) => Ok(FallibleBuilder(FallibleBuilderInner::Processing {
218                depth: 0,
219                builder,
220            })),
221            Err(FromEventsError::Mismatch { name, attrs }) => {
222                Err(FromEventsError::Mismatch { name, attrs })
223            }
224            Err(FromEventsError::Invalid(e)) => Ok(FallibleBuilder(FallibleBuilderInner::Failed {
225                depth: 0,
226                err: Some(e.into()),
227            })),
228        }
229    }
230}
231
232/// Builder which discards an entire child tree without inspecting the
233/// contents.
234#[derive(Debug)]
235pub struct Discard {
236    depth: usize,
237}
238
239impl Discard {
240    /// Create a new discarding builder.
241    pub fn new() -> Self {
242        Self { depth: 0 }
243    }
244}
245
246impl FromEventsBuilder for Discard {
247    type Output = ();
248
249    fn feed(&mut self, ev: rxml::Event) -> Result<Option<Self::Output>, Error> {
250        match ev {
251            rxml::Event::StartElement(..) => {
252                self.depth = match self.depth.checked_add(1) {
253                    Some(v) => v,
254                    None => return Err(Error::Other("maximum XML nesting depth exceeded")),
255                };
256                Ok(None)
257            }
258            rxml::Event::EndElement(..) => match self.depth.checked_sub(1) {
259                None => Ok(Some(())),
260                Some(v) => {
261                    self.depth = v;
262                    Ok(None)
263                }
264            },
265            _ => Ok(None),
266        }
267    }
268}
269
270#[cfg(test)]
271mod tests {
272    use super::*;
273
274    use rxml::{parser::EventMetrics, Event, Namespace, NcName};
275
276    macro_rules! null_builder {
277        ($name:ident for $output:ident) => {
278            #[derive(Debug)]
279            enum $name {}
280
281            impl FromEventsBuilder for $name {
282                type Output = $output;
283
284                fn feed(&mut self, _: Event) -> Result<Option<Self::Output>, Error> {
285                    unreachable!();
286                }
287            }
288        };
289    }
290
291    null_builder!(AlwaysMismatchBuilder for AlwaysMismatch);
292    null_builder!(InitialErrorBuilder for InitialError);
293
294    #[derive(Debug)]
295    struct AlwaysMismatch;
296
297    impl FromXml for AlwaysMismatch {
298        type Builder = AlwaysMismatchBuilder;
299
300        fn from_events(
301            name: rxml::QName,
302            attrs: rxml::AttrMap,
303        ) -> Result<Self::Builder, FromEventsError> {
304            Err(FromEventsError::Mismatch { name, attrs })
305        }
306    }
307
308    #[derive(Debug)]
309    struct InitialError;
310
311    impl FromXml for InitialError {
312        type Builder = InitialErrorBuilder;
313
314        fn from_events(_: rxml::QName, _: rxml::AttrMap) -> Result<Self::Builder, FromEventsError> {
315            Err(FromEventsError::Invalid(Error::Other("some error")))
316        }
317    }
318
319    #[derive(Debug)]
320    struct FailOnContentBuilder;
321
322    impl FromEventsBuilder for FailOnContentBuilder {
323        type Output = FailOnContent;
324
325        fn feed(&mut self, _: Event) -> Result<Option<Self::Output>, Error> {
326            Err(Error::Other("content error"))
327        }
328    }
329
330    #[derive(Debug)]
331    struct FailOnContent;
332
333    impl FromXml for FailOnContent {
334        type Builder = FailOnContentBuilder;
335
336        fn from_events(_: rxml::QName, _: rxml::AttrMap) -> Result<Self::Builder, FromEventsError> {
337            Ok(FailOnContentBuilder)
338        }
339    }
340
341    fn qname() -> rxml::QName {
342        (Namespace::NONE, NcName::try_from("test").unwrap())
343    }
344
345    fn attrs() -> rxml::AttrMap {
346        rxml::AttrMap::new()
347    }
348
349    #[test]
350    fn fallible_builder_mismatch_passthrough() {
351        match Result::<AlwaysMismatch, Error>::from_events(qname(), attrs()) {
352            Err(FromEventsError::Mismatch { .. }) => (),
353            other => panic!("unexpected result: {:?}", other),
354        }
355    }
356
357    #[test]
358    fn fallible_builder_initial_error_capture() {
359        let mut builder = match Result::<InitialError, Error>::from_events(qname(), attrs()) {
360            Ok(v) => v,
361            other => panic!("unexpected result: {:?}", other),
362        };
363        match builder.feed(Event::Text(EventMetrics::zero(), "hello world!".to_owned())) {
364            Ok(None) => (),
365            other => panic!("unexpected result: {:?}", other),
366        };
367        match builder.feed(Event::EndElement(EventMetrics::zero())) {
368            Ok(Some(Err(Error::Other("some error")))) => (),
369            other => panic!("unexpected result: {:?}", other),
370        };
371    }
372
373    #[test]
374    fn fallible_builder_initial_error_capture_allows_nested_stuff() {
375        let mut builder = match Result::<InitialError, Error>::from_events(qname(), attrs()) {
376            Ok(v) => v,
377            other => panic!("unexpected result: {:?}", other),
378        };
379        match builder.feed(Event::StartElement(EventMetrics::zero(), qname(), attrs())) {
380            Ok(None) => (),
381            other => panic!("unexpected result: {:?}", other),
382        };
383        match builder.feed(Event::Text(EventMetrics::zero(), "hello world!".to_owned())) {
384            Ok(None) => (),
385            other => panic!("unexpected result: {:?}", other),
386        };
387        match builder.feed(Event::EndElement(EventMetrics::zero())) {
388            Ok(None) => (),
389            other => panic!("unexpected result: {:?}", other),
390        };
391        match builder.feed(Event::Text(EventMetrics::zero(), "hello world!".to_owned())) {
392            Ok(None) => (),
393            other => panic!("unexpected result: {:?}", other),
394        };
395        match builder.feed(Event::StartElement(EventMetrics::zero(), qname(), attrs())) {
396            Ok(None) => (),
397            other => panic!("unexpected result: {:?}", other),
398        };
399        match builder.feed(Event::StartElement(EventMetrics::zero(), qname(), attrs())) {
400            Ok(None) => (),
401            other => panic!("unexpected result: {:?}", other),
402        };
403        match builder.feed(Event::Text(EventMetrics::zero(), "hello world!".to_owned())) {
404            Ok(None) => (),
405            other => panic!("unexpected result: {:?}", other),
406        };
407        match builder.feed(Event::EndElement(EventMetrics::zero())) {
408            Ok(None) => (),
409            other => panic!("unexpected result: {:?}", other),
410        };
411        match builder.feed(Event::EndElement(EventMetrics::zero())) {
412            Ok(None) => (),
413            other => panic!("unexpected result: {:?}", other),
414        };
415        match builder.feed(Event::EndElement(EventMetrics::zero())) {
416            Ok(Some(Err(Error::Other("some error")))) => (),
417            other => panic!("unexpected result: {:?}", other),
418        };
419    }
420
421    #[test]
422    fn fallible_builder_content_error_capture() {
423        let mut builder = match Result::<FailOnContent, Error>::from_events(qname(), attrs()) {
424            Ok(v) => v,
425            other => panic!("unexpected result: {:?}", other),
426        };
427        match builder.feed(Event::EndElement(EventMetrics::zero())) {
428            Ok(Some(Err(Error::Other("content error")))) => (),
429            other => panic!("unexpected result: {:?}", other),
430        };
431    }
432
433    #[test]
434    fn fallible_builder_content_error_capture_with_more_content() {
435        let mut builder = match Result::<FailOnContent, Error>::from_events(qname(), attrs()) {
436            Ok(v) => v,
437            other => panic!("unexpected result: {:?}", other),
438        };
439        match builder.feed(Event::Text(EventMetrics::zero(), "hello world!".to_owned())) {
440            Ok(None) => (),
441            other => panic!("unexpected result: {:?}", other),
442        };
443        match builder.feed(Event::EndElement(EventMetrics::zero())) {
444            Ok(Some(Err(Error::Other("content error")))) => (),
445            other => panic!("unexpected result: {:?}", other),
446        };
447    }
448
449    #[test]
450    fn fallible_builder_content_error_capture_with_nested_content() {
451        let mut builder = match Result::<FailOnContent, Error>::from_events(qname(), attrs()) {
452            Ok(v) => v,
453            other => panic!("unexpected result: {:?}", other),
454        };
455        match builder.feed(Event::StartElement(EventMetrics::zero(), qname(), attrs())) {
456            Ok(None) => (),
457            other => panic!("unexpected result: {:?}", other),
458        };
459        match builder.feed(Event::Text(EventMetrics::zero(), "hello world!".to_owned())) {
460            Ok(None) => (),
461            other => panic!("unexpected result: {:?}", other),
462        };
463        match builder.feed(Event::EndElement(EventMetrics::zero())) {
464            Ok(None) => (),
465            other => panic!("unexpected result: {:?}", other),
466        };
467        match builder.feed(Event::Text(EventMetrics::zero(), "hello world!".to_owned())) {
468            Ok(None) => (),
469            other => panic!("unexpected result: {:?}", other),
470        };
471        match builder.feed(Event::StartElement(EventMetrics::zero(), qname(), attrs())) {
472            Ok(None) => (),
473            other => panic!("unexpected result: {:?}", other),
474        };
475        match builder.feed(Event::StartElement(EventMetrics::zero(), qname(), attrs())) {
476            Ok(None) => (),
477            other => panic!("unexpected result: {:?}", other),
478        };
479        match builder.feed(Event::Text(EventMetrics::zero(), "hello world!".to_owned())) {
480            Ok(None) => (),
481            other => panic!("unexpected result: {:?}", other),
482        };
483        match builder.feed(Event::EndElement(EventMetrics::zero())) {
484            Ok(None) => (),
485            other => panic!("unexpected result: {:?}", other),
486        };
487        match builder.feed(Event::EndElement(EventMetrics::zero())) {
488            Ok(None) => (),
489            other => panic!("unexpected result: {:?}", other),
490        };
491        match builder.feed(Event::EndElement(EventMetrics::zero())) {
492            Ok(Some(Err(Error::Other("content error")))) => (),
493            other => panic!("unexpected result: {:?}", other),
494        };
495    }
496}