fromxml.rs

  1//! # Generic builder type implementations
  2//!
  3//! This module contains [`FromEventsBuilder`] implementations for types from
  4//! foreign libraries (such as the standard library).
  5//!
  6//! In order to not clutter the `xso` crate's main namespace, they are
  7//! stashed away in a separate module.
  8
  9// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
 10//
 11// This Source Code Form is subject to the terms of the Mozilla Public
 12// License, v. 2.0. If a copy of the MPL was not distributed with this
 13// file, You can obtain one at http://mozilla.org/MPL/2.0/.
 14
 15use alloc::boxed::Box;
 16
 17use crate::error::{Error, FromEventsError};
 18use crate::{FromEventsBuilder, FromXml};
 19
 20/// # Parsing context for [`FromEventsBuilder`]
 21///
 22/// For the most part, [`FromEventsBuilder`] implementations can work with
 23/// only the information inside the [`rxml::Event`] which is delivered to
 24/// them (and any information they may have stored from previous events).
 25///
 26/// However, there is (currently) one special case: the `xml:lang` attribute.
 27/// That attribute is inherited across the entire document tree hierarchy. If
 28/// the parsed element is not the top-level element, there may be an implicit
 29/// value for `xml:lang`.
 30#[derive(Debug)]
 31#[doc(hidden)]
 32pub struct Context<'x> {
 33    language: Option<&'x str>,
 34}
 35
 36impl<'x> Context<'x> {
 37    /// A context suitable for the beginning of the document.
 38    ///
 39    /// `xml:lang` is assumed to be unset.
 40    pub fn empty() -> Self {
 41        Self { language: None }
 42    }
 43
 44    /// Set the effective `xml:lang` value on the context and return it.
 45    pub fn with_language(mut self, language: Option<&'x str>) -> Self {
 46        self.language = language;
 47        self
 48    }
 49
 50    /// Return the `xml:lang` value in effect at the end of the event which
 51    /// is currently being processed.
 52    pub fn language(&self) -> Option<&str> {
 53        self.language.as_deref()
 54    }
 55}
 56
 57/// Helper struct to construct an `Option<T>` from XML events.
 58pub struct OptionBuilder<T: FromEventsBuilder>(T);
 59
 60impl<T: FromEventsBuilder> FromEventsBuilder for OptionBuilder<T> {
 61    type Output = Option<T::Output>;
 62
 63    fn feed(&mut self, ev: rxml::Event, ctx: &Context<'_>) -> Result<Option<Self::Output>, Error> {
 64        self.0.feed(ev, ctx).map(|ok| ok.map(Some))
 65    }
 66}
 67
 68/// Parsers `T` into `Some(.)`.
 69///
 70/// Note that this never generates `None`: The main use case is to allow
 71/// external (i.e. without calling `from_events`) defaulting to `None` and
 72/// for optional serialisation (the [`AsXml`][`crate::AsXml`] implementation
 73/// on `Option<T>` emits nothing for `None`).
 74impl<T: FromXml> FromXml for Option<T> {
 75    type Builder = OptionBuilder<T::Builder>;
 76
 77    fn from_events(
 78        name: rxml::QName,
 79        attrs: rxml::AttrMap,
 80        ctx: &Context<'_>,
 81    ) -> Result<Self::Builder, FromEventsError> {
 82        Ok(OptionBuilder(T::from_events(name, attrs, ctx)?))
 83    }
 84}
 85
 86/// Helper struct to construct an `Box<T>` from XML events.
 87pub struct BoxBuilder<T: FromEventsBuilder + ?Sized>(Box<T>);
 88
 89impl<T: FromEventsBuilder + ?Sized> FromEventsBuilder for BoxBuilder<T> {
 90    type Output = Box<T::Output>;
 91
 92    fn feed(&mut self, ev: rxml::Event, ctx: &Context<'_>) -> Result<Option<Self::Output>, Error> {
 93        self.0.feed(ev, ctx).map(|ok| ok.map(Box::new))
 94    }
 95}
 96
 97/// Parses `T` into a `Box`.
 98impl<T: FromXml + ?Sized> FromXml for Box<T> {
 99    type Builder = BoxBuilder<T::Builder>;
100
101    fn from_events(
102        name: rxml::QName,
103        attrs: rxml::AttrMap,
104        ctx: &Context<'_>,
105    ) -> Result<Self::Builder, FromEventsError> {
106        Ok(BoxBuilder(Box::new(T::from_events(name, attrs, ctx)?)))
107    }
108}
109
110impl<T: FromEventsBuilder + ?Sized> FromEventsBuilder for Box<T> {
111    type Output = T::Output;
112
113    fn feed(&mut self, ev: rxml::Event, ctx: &Context<'_>) -> Result<Option<Self::Output>, Error> {
114        (**self).feed(ev, ctx)
115    }
116}
117
118#[derive(Debug)]
119enum FallibleBuilderInner<T: FromEventsBuilder, E> {
120    Processing { depth: usize, builder: T },
121    Failed { depth: usize, err: Option<E> },
122    Done,
123}
124
125/// Build a `Result<T, E>` from XML.
126///
127/// This builder, invoked generally via the [`FromXml`] implementation on
128/// `Result<T, E> where T: FromXml, E: From<Error>`, allows to fallably parse
129/// an XSO from XML.
130///
131/// If an error occurs while parsing the XSO, the remaining events which
132/// belong to that XSO are discarded. Once all events have been seen, the
133/// error is returned as `Err(.)` value.
134///
135/// If parsing succeeds, the parsed XSO is returned as `Ok(.)` value.
136#[derive(Debug)]
137pub struct FallibleBuilder<T: FromEventsBuilder, E>(FallibleBuilderInner<T, E>);
138
139impl<T: FromEventsBuilder, E: From<Error>> FromEventsBuilder for FallibleBuilder<T, E> {
140    type Output = Result<T::Output, E>;
141
142    fn feed(&mut self, ev: rxml::Event, ctx: &Context<'_>) -> Result<Option<Self::Output>, Error> {
143        match self.0 {
144            FallibleBuilderInner::Processing {
145                ref mut depth,
146                ref mut builder,
147            } => {
148                let new_depth = match ev {
149                    rxml::Event::StartElement(..) => match depth.checked_add(1) {
150                        // I *think* it is OK to return an err here
151                        // instead of panicking. The reason is that anyone
152                        // who intends to resume processing at the level
153                        // of where we started to parse this thing in case
154                        // of an error either has to:
155                        // - Use this fallible implementation and rely on
156                        //   it capturing the error (which we don't in
157                        //   this case).
158                        // - Or count the depth themselves, which will
159                        //   either fail in the same way, or they use a
160                        //   wider type (in which case it's ok).
161                        None => {
162                            self.0 = FallibleBuilderInner::Done;
163                            return Err(Error::Other("maximum XML nesting depth exceeded"));
164                        }
165                        Some(v) => Some(v),
166                    },
167                    // In case of an element end, underflow means that we
168                    // have reached the end of the XSO we wanted to process.
169                    // We handle that case at the end of the outer match's
170                    // body: Either we have returned a value then (good), or,
171                    // if we reach the end there with a new_depth == None,
172                    // something went horribly wrong (and we panic).
173                    rxml::Event::EndElement(..) => depth.checked_sub(1),
174
175                    // Text and XML declarations have no influence on parsing
176                    // depth.
177                    rxml::Event::XmlDeclaration(..) | rxml::Event::Text(..) => Some(*depth),
178                };
179
180                match builder.feed(ev, ctx) {
181                    Ok(Some(v)) => {
182                        self.0 = FallibleBuilderInner::Done;
183                        return Ok(Some(Ok(v)));
184                    }
185                    Ok(None) => {
186                        // continue processing in the next round.
187                    }
188                    Err(e) => {
189                        // We are now officially failed ..
190                        match new_depth {
191                            // .. but we are not done yet, so enter the
192                            // failure backtracking state.
193                            Some(depth) => {
194                                self.0 = FallibleBuilderInner::Failed {
195                                    depth,
196                                    err: Some(e.into()),
197                                };
198                                return Ok(None);
199                            }
200                            // .. and we are done with parsing, so we return
201                            // the error as value.
202                            None => {
203                                self.0 = FallibleBuilderInner::Done;
204                                return Ok(Some(Err(e.into())));
205                            }
206                        }
207                    }
208                };
209
210                *depth = match new_depth {
211                    Some(v) => v,
212                    None => unreachable!("fallible parsing continued beyond end of element"),
213                };
214
215                // Need more events.
216                Ok(None)
217            }
218            FallibleBuilderInner::Failed {
219                ref mut depth,
220                ref mut err,
221            } => {
222                *depth = match ev {
223                    rxml::Event::StartElement(..) => match depth.checked_add(1) {
224                        // See above for error return rationale.
225                        None => {
226                            self.0 = FallibleBuilderInner::Done;
227                            return Err(Error::Other("maximum XML nesting depth exceeded"));
228                        }
229                        Some(v) => v,
230                    },
231                    rxml::Event::EndElement(..) => match depth.checked_sub(1) {
232                        Some(v) => v,
233                        None => {
234                            // We are officially done, return a value, switch
235                            // states, and be done with it.
236                            let err = err.take().expect("fallible parsing somehow lost its error");
237                            self.0 = FallibleBuilderInner::Done;
238                            return Ok(Some(Err(err)));
239                        }
240                    },
241
242                    // Text and XML declarations have no influence on parsing
243                    // depth.
244                    rxml::Event::XmlDeclaration(..) | rxml::Event::Text(..) => *depth,
245                };
246
247                // Need more events
248                Ok(None)
249            }
250            FallibleBuilderInner::Done => {
251                panic!("FromEventsBuilder called after it returned a value")
252            }
253        }
254    }
255}
256
257/// Parsers `T` fallibly. See [`FallibleBuilder`] for details.
258impl<T: FromXml, E: From<Error>> FromXml for Result<T, E> {
259    type Builder = FallibleBuilder<T::Builder, E>;
260
261    fn from_events(
262        name: rxml::QName,
263        attrs: rxml::AttrMap,
264        ctx: &Context<'_>,
265    ) -> Result<Self::Builder, FromEventsError> {
266        match T::from_events(name, attrs, ctx) {
267            Ok(builder) => Ok(FallibleBuilder(FallibleBuilderInner::Processing {
268                depth: 0,
269                builder,
270            })),
271            Err(FromEventsError::Mismatch { name, attrs }) => {
272                Err(FromEventsError::Mismatch { name, attrs })
273            }
274            Err(FromEventsError::Invalid(e)) => Ok(FallibleBuilder(FallibleBuilderInner::Failed {
275                depth: 0,
276                err: Some(e.into()),
277            })),
278        }
279    }
280}
281
282/// Builder which discards an entire child tree without inspecting the
283/// contents.
284#[derive(Debug, Default)]
285pub struct Discard {
286    depth: usize,
287}
288
289impl Discard {
290    /// Create a new discarding builder.
291    pub fn new() -> Self {
292        Self::default()
293    }
294}
295
296impl FromEventsBuilder for Discard {
297    type Output = ();
298
299    fn feed(&mut self, ev: rxml::Event, _ctx: &Context<'_>) -> Result<Option<Self::Output>, Error> {
300        match ev {
301            rxml::Event::StartElement(..) => {
302                self.depth = match self.depth.checked_add(1) {
303                    Some(v) => v,
304                    None => return Err(Error::Other("maximum XML nesting depth exceeded")),
305                };
306                Ok(None)
307            }
308            rxml::Event::EndElement(..) => match self.depth.checked_sub(1) {
309                None => Ok(Some(())),
310                Some(v) => {
311                    self.depth = v;
312                    Ok(None)
313                }
314            },
315            _ => Ok(None),
316        }
317    }
318}
319
320/// Builder which discards the contents (or raises on unexpected contents).
321///
322/// This builder is only to be used from within the proc macros and is not
323/// stable, public API.
324#[doc(hidden)]
325#[cfg(feature = "macros")]
326pub struct EmptyBuilder {
327    childerr: &'static str,
328    texterr: &'static str,
329}
330
331#[cfg(feature = "macros")]
332impl FromEventsBuilder for EmptyBuilder {
333    type Output = ();
334
335    fn feed(&mut self, ev: rxml::Event, _ctx: &Context<'_>) -> Result<Option<Self::Output>, Error> {
336        match ev {
337            rxml::Event::EndElement(..) => Ok(Some(())),
338            rxml::Event::StartElement(..) => Err(Error::Other(self.childerr)),
339            rxml::Event::Text(..) => Err(Error::Other(self.texterr)),
340            _ => Err(Error::Other(
341                "unexpected content in supposed-to-be-empty element",
342            )),
343        }
344    }
345}
346
347/// Precursor struct for [`EmptyBuilder`].
348///
349/// This struct is only to be used from within the proc macros and is not
350/// stable, public API.
351#[doc(hidden)]
352#[cfg(feature = "macros")]
353pub struct Empty {
354    pub attributeerr: &'static str,
355    pub childerr: &'static str,
356    pub texterr: &'static str,
357}
358
359#[cfg(feature = "macros")]
360impl Empty {
361    pub fn start(self, attr: rxml::AttrMap) -> Result<EmptyBuilder, Error> {
362        if !attr.is_empty() {
363            return Err(Error::Other(self.attributeerr));
364        }
365        Ok(EmptyBuilder {
366            childerr: self.childerr,
367            texterr: self.texterr,
368        })
369    }
370}
371
372#[cfg(test)]
373mod tests {
374    use super::*;
375
376    use alloc::borrow::ToOwned;
377    use rxml::{parser::EventMetrics, Event, Namespace, NcName};
378
379    macro_rules! null_builder {
380        ($name:ident for $output:ident) => {
381            #[derive(Debug)]
382            enum $name {}
383
384            impl FromEventsBuilder for $name {
385                type Output = $output;
386
387                fn feed(
388                    &mut self,
389                    _: Event,
390                    _: &Context<'_>,
391                ) -> Result<Option<Self::Output>, Error> {
392                    unreachable!();
393                }
394            }
395        };
396    }
397
398    null_builder!(AlwaysMismatchBuilder for AlwaysMismatch);
399    null_builder!(InitialErrorBuilder for InitialError);
400
401    #[derive(Debug)]
402    struct AlwaysMismatch;
403
404    impl FromXml for AlwaysMismatch {
405        type Builder = AlwaysMismatchBuilder;
406
407        fn from_events(
408            name: rxml::QName,
409            attrs: rxml::AttrMap,
410            _ctx: &Context<'_>,
411        ) -> Result<Self::Builder, FromEventsError> {
412            Err(FromEventsError::Mismatch { name, attrs })
413        }
414    }
415
416    #[derive(Debug)]
417    struct InitialError;
418
419    impl FromXml for InitialError {
420        type Builder = InitialErrorBuilder;
421
422        fn from_events(
423            _: rxml::QName,
424            _: rxml::AttrMap,
425            _: &Context<'_>,
426        ) -> Result<Self::Builder, FromEventsError> {
427            Err(FromEventsError::Invalid(Error::Other("some error")))
428        }
429    }
430
431    #[derive(Debug)]
432    struct FailOnContentBuilder;
433
434    impl FromEventsBuilder for FailOnContentBuilder {
435        type Output = FailOnContent;
436
437        fn feed(&mut self, _: Event, _: &Context<'_>) -> Result<Option<Self::Output>, Error> {
438            Err(Error::Other("content error"))
439        }
440    }
441
442    #[derive(Debug)]
443    struct FailOnContent;
444
445    impl FromXml for FailOnContent {
446        type Builder = FailOnContentBuilder;
447
448        fn from_events(
449            _: rxml::QName,
450            _: rxml::AttrMap,
451            _: &Context<'_>,
452        ) -> Result<Self::Builder, FromEventsError> {
453            Ok(FailOnContentBuilder)
454        }
455    }
456
457    fn qname() -> rxml::QName {
458        (Namespace::NONE, NcName::try_from("test").unwrap())
459    }
460
461    fn attrs() -> rxml::AttrMap {
462        rxml::AttrMap::new()
463    }
464
465    #[test]
466    fn fallible_builder_mismatch_passthrough() {
467        match Result::<AlwaysMismatch, Error>::from_events(qname(), attrs(), &Context::empty()) {
468            Err(FromEventsError::Mismatch { .. }) => (),
469            other => panic!("unexpected result: {:?}", other),
470        }
471    }
472
473    #[test]
474    fn fallible_builder_initial_error_capture() {
475        let ctx = Context::empty();
476        let mut builder = match Result::<InitialError, Error>::from_events(qname(), attrs(), &ctx) {
477            Ok(v) => v,
478            other => panic!("unexpected result: {:?}", other),
479        };
480        match builder.feed(
481            Event::Text(EventMetrics::zero(), "hello world!".to_owned()),
482            &ctx,
483        ) {
484            Ok(None) => (),
485            other => panic!("unexpected result: {:?}", other),
486        };
487        match builder.feed(Event::EndElement(EventMetrics::zero()), &ctx) {
488            Ok(Some(Err(Error::Other("some error")))) => (),
489            other => panic!("unexpected result: {:?}", other),
490        };
491    }
492
493    #[test]
494    fn fallible_builder_initial_error_capture_allows_nested_stuff() {
495        let ctx = Context::empty();
496        let mut builder = match Result::<InitialError, Error>::from_events(qname(), attrs(), &ctx) {
497            Ok(v) => v,
498            other => panic!("unexpected result: {:?}", other),
499        };
500        match builder.feed(
501            Event::StartElement(EventMetrics::zero(), qname(), attrs()),
502            &ctx,
503        ) {
504            Ok(None) => (),
505            other => panic!("unexpected result: {:?}", other),
506        };
507        match builder.feed(
508            Event::Text(EventMetrics::zero(), "hello world!".to_owned()),
509            &ctx,
510        ) {
511            Ok(None) => (),
512            other => panic!("unexpected result: {:?}", other),
513        };
514        match builder.feed(Event::EndElement(EventMetrics::zero()), &ctx) {
515            Ok(None) => (),
516            other => panic!("unexpected result: {:?}", other),
517        };
518        match builder.feed(
519            Event::Text(EventMetrics::zero(), "hello world!".to_owned()),
520            &ctx,
521        ) {
522            Ok(None) => (),
523            other => panic!("unexpected result: {:?}", other),
524        };
525        match builder.feed(
526            Event::StartElement(EventMetrics::zero(), qname(), attrs()),
527            &ctx,
528        ) {
529            Ok(None) => (),
530            other => panic!("unexpected result: {:?}", other),
531        };
532        match builder.feed(
533            Event::StartElement(EventMetrics::zero(), qname(), attrs()),
534            &ctx,
535        ) {
536            Ok(None) => (),
537            other => panic!("unexpected result: {:?}", other),
538        };
539        match builder.feed(
540            Event::Text(EventMetrics::zero(), "hello world!".to_owned()),
541            &ctx,
542        ) {
543            Ok(None) => (),
544            other => panic!("unexpected result: {:?}", other),
545        };
546        match builder.feed(Event::EndElement(EventMetrics::zero()), &ctx) {
547            Ok(None) => (),
548            other => panic!("unexpected result: {:?}", other),
549        };
550        match builder.feed(Event::EndElement(EventMetrics::zero()), &ctx) {
551            Ok(None) => (),
552            other => panic!("unexpected result: {:?}", other),
553        };
554        match builder.feed(Event::EndElement(EventMetrics::zero()), &ctx) {
555            Ok(Some(Err(Error::Other("some error")))) => (),
556            other => panic!("unexpected result: {:?}", other),
557        };
558    }
559
560    #[test]
561    fn fallible_builder_content_error_capture() {
562        let ctx = Context::empty();
563        let mut builder = match Result::<FailOnContent, Error>::from_events(qname(), attrs(), &ctx)
564        {
565            Ok(v) => v,
566            other => panic!("unexpected result: {:?}", other),
567        };
568        match builder.feed(Event::EndElement(EventMetrics::zero()), &ctx) {
569            Ok(Some(Err(Error::Other("content error")))) => (),
570            other => panic!("unexpected result: {:?}", other),
571        };
572    }
573
574    #[test]
575    fn fallible_builder_content_error_capture_with_more_content() {
576        let ctx = Context::empty();
577        let mut builder = match Result::<FailOnContent, Error>::from_events(qname(), attrs(), &ctx)
578        {
579            Ok(v) => v,
580            other => panic!("unexpected result: {:?}", other),
581        };
582        match builder.feed(
583            Event::Text(EventMetrics::zero(), "hello world!".to_owned()),
584            &ctx,
585        ) {
586            Ok(None) => (),
587            other => panic!("unexpected result: {:?}", other),
588        };
589        match builder.feed(Event::EndElement(EventMetrics::zero()), &ctx) {
590            Ok(Some(Err(Error::Other("content error")))) => (),
591            other => panic!("unexpected result: {:?}", other),
592        };
593    }
594
595    #[test]
596    fn fallible_builder_content_error_capture_with_nested_content() {
597        let ctx = Context::empty();
598        let mut builder = match Result::<FailOnContent, Error>::from_events(qname(), attrs(), &ctx)
599        {
600            Ok(v) => v,
601            other => panic!("unexpected result: {:?}", other),
602        };
603        match builder.feed(
604            Event::StartElement(EventMetrics::zero(), qname(), attrs()),
605            &ctx,
606        ) {
607            Ok(None) => (),
608            other => panic!("unexpected result: {:?}", other),
609        };
610        match builder.feed(
611            Event::Text(EventMetrics::zero(), "hello world!".to_owned()),
612            &ctx,
613        ) {
614            Ok(None) => (),
615            other => panic!("unexpected result: {:?}", other),
616        };
617        match builder.feed(Event::EndElement(EventMetrics::zero()), &ctx) {
618            Ok(None) => (),
619            other => panic!("unexpected result: {:?}", other),
620        };
621        match builder.feed(
622            Event::Text(EventMetrics::zero(), "hello world!".to_owned()),
623            &ctx,
624        ) {
625            Ok(None) => (),
626            other => panic!("unexpected result: {:?}", other),
627        };
628        match builder.feed(
629            Event::StartElement(EventMetrics::zero(), qname(), attrs()),
630            &ctx,
631        ) {
632            Ok(None) => (),
633            other => panic!("unexpected result: {:?}", other),
634        };
635        match builder.feed(
636            Event::StartElement(EventMetrics::zero(), qname(), attrs()),
637            &ctx,
638        ) {
639            Ok(None) => (),
640            other => panic!("unexpected result: {:?}", other),
641        };
642        match builder.feed(
643            Event::Text(EventMetrics::zero(), "hello world!".to_owned()),
644            &ctx,
645        ) {
646            Ok(None) => (),
647            other => panic!("unexpected result: {:?}", other),
648        };
649        match builder.feed(Event::EndElement(EventMetrics::zero()), &ctx) {
650            Ok(None) => (),
651            other => panic!("unexpected result: {:?}", other),
652        };
653        match builder.feed(Event::EndElement(EventMetrics::zero()), &ctx) {
654            Ok(None) => (),
655            other => panic!("unexpected result: {:?}", other),
656        };
657        match builder.feed(Event::EndElement(EventMetrics::zero()), &ctx) {
658            Ok(Some(Err(Error::Other("content error")))) => (),
659            other => panic!("unexpected result: {:?}", other),
660        };
661    }
662}