minidom_compat.rs

  1//! Implementations of traits from this crate for minidom types
  2
  3// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
  4//
  5// This Source Code Form is subject to the terms of the Mozilla Public
  6// License, v. 2.0. If a copy of the MPL was not distributed with this
  7// file, You can obtain one at http://mozilla.org/MPL/2.0/.
  8use std::marker::PhantomData;
  9use std::vec::IntoIter;
 10
 11use minidom::{Element, Node};
 12
 13use rxml::{
 14    parser::EventMetrics,
 15    writer::{SimpleNamespaces, TrackNamespace},
 16    AttrMap, Event, Name, Namespace, NcName,
 17};
 18
 19use crate::{
 20    error::{Error, FromEventsError},
 21    FromEventsBuilder, FromXml, IntoXml,
 22};
 23
 24/// State machine for converting a minidom Element into rxml events.
 25enum IntoEventsInner {
 26    /// Element header: the element is still intact and we need to generate
 27    /// the [`rxml::Event::StartElement`] event from the namespace, name, and
 28    /// attributes.
 29    Header(Element),
 30
 31    /// Content: The contents of the element are streamed as events.
 32    Nodes {
 33        /// Remaining child nodes (text and/or children) to emit.
 34        remaining: IntoIter<Node>,
 35
 36        /// When emitting a child element, this is a nested [`IntoEvents`]
 37        /// instance for that child element.
 38        nested: Option<Box<IntoEvents>>,
 39    },
 40
 41    /// End of iteration: this state generates an end-of-iterator state.
 42    ///
 43    /// Note that the [`rxml::Event::EndElement`] event for the element itself
 44    /// is generated by the iterator alraedy in the `Nodes` state, when
 45    /// `nested` is None and `remaining` returns `None` from its `next()`
 46    /// implementation.
 47    Fin,
 48}
 49
 50/// Create the parts for a [`rxml::Event::StartElement`] from a
 51/// [`minidom::Element`].
 52///
 53/// Note that this copies the attribute data as well as namespace and name.
 54/// This is due to limitations in the [`minidom::Element`] API.
 55// NOTE to developers: The limitations are not fully trivial to overcome:
 56// the attributes use a BTreeMap internally, which does not offer a `drain`
 57// iterator.
 58pub fn make_start_ev_parts(el: &Element) -> Result<(rxml::QName, AttrMap), Error> {
 59    let name = NcName::try_from(el.name())?;
 60    let namespace = Namespace::from(el.ns());
 61
 62    let mut attrs = AttrMap::new();
 63    for (name, value) in el.attrs() {
 64        let name = Name::try_from(name)?;
 65        let (prefix, name) = name.split_name()?;
 66        let namespace = if let Some(prefix) = prefix {
 67            if prefix == "xml" {
 68                Namespace::XML
 69            } else {
 70                let ns = match el.prefixes.get(&Some(prefix.into())) {
 71                    Some(v) => v,
 72                    None => {
 73                        panic!("undeclared xml namespace prefix in minidom::Element")
 74                    }
 75                };
 76                Namespace::from(ns.to_owned())
 77            }
 78        } else {
 79            Namespace::NONE
 80        };
 81
 82        attrs.insert(namespace, name, value.to_owned());
 83    }
 84
 85    Ok(((namespace, name), attrs))
 86}
 87
 88impl IntoEventsInner {
 89    fn next(&mut self) -> Result<Option<Event>, Error> {
 90        match self {
 91            IntoEventsInner::Header(ref mut el) => {
 92                let (qname, attrs) = make_start_ev_parts(el)?;
 93                let event = Event::StartElement(EventMetrics::zero(), qname, attrs);
 94
 95                *self = IntoEventsInner::Nodes {
 96                    remaining: el.take_nodes().into_iter(),
 97                    nested: None,
 98                };
 99                return Ok(Some(event));
100            }
101            IntoEventsInner::Nodes {
102                ref mut nested,
103                ref mut remaining,
104            } => {
105                loop {
106                    if let Some(nested) = nested.as_mut() {
107                        if let Some(ev) = nested.next() {
108                            return Some(ev).transpose();
109                        }
110                    }
111                    match remaining.next() {
112                        Some(Node::Text(text)) => {
113                            return Ok(Some(Event::Text(EventMetrics::zero(), text)));
114                        }
115                        Some(Node::Element(el)) => {
116                            *nested = Some(Box::new(el.into_event_iter()?));
117                            // fallthrough to next loop iteration
118                        }
119                        None => {
120                            // end of element, switch state and emit EndElement
121                            *self = IntoEventsInner::Fin;
122                            return Ok(Some(Event::EndElement(EventMetrics::zero())));
123                        }
124                    }
125                }
126            }
127            IntoEventsInner::Fin => Ok(None),
128        }
129    }
130}
131
132/// Convert a [`minidom::Element`] into [`rxml::Event`]s.
133///
134/// This can be constructed from the
135/// [`IntoXml::into_event_iter`][`crate::IntoXml::into_event_iter`]
136/// implementation on [`minidom::Element`].
137pub struct IntoEvents(IntoEventsInner);
138
139impl Iterator for IntoEvents {
140    type Item = Result<Event, Error>;
141
142    fn next(&mut self) -> Option<Self::Item> {
143        self.0.next().transpose()
144    }
145}
146
147impl IntoXml for Element {
148    type EventIter = IntoEvents;
149
150    fn into_event_iter(self) -> Result<Self::EventIter, Error> {
151        Ok(IntoEvents(IntoEventsInner::Header(self)))
152    }
153}
154
155/// Construct a [`minidom::Element`] from [`rxml::Event`]s
156///
157/// This can be constructed from the
158/// [`FromXml::from_events`][`crate::FromXml::from_events`]
159/// implementation on [`minidom::Element`].
160pub struct ElementFromEvents {
161    inner: Option<Element>,
162    nested: Option<Box<ElementFromEvents>>,
163}
164
165impl FromEventsBuilder for ElementFromEvents {
166    type Output = minidom::Element;
167
168    fn feed(&mut self, ev: Event) -> Result<Option<Self::Output>, Error> {
169        let inner = self
170            .inner
171            .as_mut()
172            .expect("feed() called after it finished");
173        if let Some(nested) = self.nested.as_mut() {
174            match nested.feed(ev)? {
175                Some(v) => {
176                    inner.append_child(v);
177                    self.nested = None;
178                    return Ok(None);
179                }
180                None => return Ok(None),
181            }
182        }
183        match ev {
184            Event::XmlDeclaration(_, _) => Ok(None),
185            Event::StartElement(_, qname, attrs) => {
186                let nested = match Element::from_events(qname, attrs) {
187                    Ok(v) => v,
188                    Err(FromEventsError::Invalid(e)) => return Err(e),
189                    Err(FromEventsError::Mismatch { .. }) => {
190                        unreachable!("<Element as FromXml>::from_events should accept everything!")
191                    }
192                };
193                self.nested = Some(Box::new(nested));
194                Ok(None)
195            }
196            Event::Text(_, text) => {
197                inner.append_text_node(text);
198                Ok(None)
199            }
200            Event::EndElement(_) => Ok(Some(self.inner.take().unwrap())),
201        }
202    }
203}
204
205impl FromXml for Element {
206    type Builder = ElementFromEvents;
207
208    fn from_events(
209        qname: rxml::QName,
210        attrs: rxml::AttrMap,
211    ) -> Result<Self::Builder, FromEventsError> {
212        let mut prefixes = SimpleNamespaces::new();
213        let mut builder = Element::builder(qname.1, qname.0);
214        for ((namespace, name), value) in attrs.into_iter() {
215            if namespace.is_none() {
216                builder = builder.attr(name, String::from(value));
217            } else {
218                let (is_new, prefix) = prefixes.declare_with_auto_prefix(namespace.clone());
219                let name = prefix.with_suffix(&name);
220                if is_new {
221                    builder = builder
222                        .prefix(
223                            Some(prefix.as_str().to_owned()),
224                            namespace.as_str().to_owned(),
225                        )
226                        .unwrap();
227                }
228                builder = builder.attr(name, String::from(value));
229            }
230        }
231
232        let element = builder.build();
233        Ok(Self::Builder {
234            inner: Some(element),
235            nested: None,
236        })
237    }
238}
239
240/// Helper struct to streamingly parse a struct which implements conversion
241/// from [`minidom::Element`].
242pub struct FromEventsViaElement<T> {
243    inner: ElementFromEvents,
244    // needed here because we need to keep the type `T` around until
245    // `FromEventsBuilder` is done and it must always be the same type, so we
246    // have to nail it down in the struct's type, and to do that we need to
247    // bind it to a field. that's what PhantomData is for.
248    _phantom: PhantomData<T>,
249}
250
251impl<E, T: TryFrom<minidom::Element, Error = E>> FromEventsViaElement<T>
252where
253    Error: From<E>,
254{
255    /// Create a new streaming parser for `T`.
256    pub fn new(qname: rxml::QName, attrs: rxml::AttrMap) -> Result<Self, FromEventsError> {
257        Ok(Self {
258            _phantom: PhantomData,
259            inner: Element::from_events(qname, attrs)?,
260        })
261    }
262}
263
264impl<E, T: TryFrom<minidom::Element, Error = E>> FromEventsBuilder for FromEventsViaElement<T>
265where
266    Error: From<E>,
267{
268    type Output = T;
269
270    fn feed(&mut self, ev: Event) -> Result<Option<Self::Output>, Error> {
271        match self.inner.feed(ev) {
272            Ok(Some(v)) => Ok(Some(v.try_into()?)),
273            Ok(None) => Ok(None),
274            Err(e) => Err(e),
275        }
276    }
277}
278
279/// Helper struct to stream a struct which implements conversion
280/// to [`minidom::Element`].
281pub struct IntoEventsViaElement {
282    inner: IntoEvents,
283}
284
285impl IntoEventsViaElement {
286    /// Create a new streaming parser for `T`.
287    pub fn new<E, T>(value: T) -> Result<Self, crate::error::Error>
288    where
289        Error: From<E>,
290        minidom::Element: TryFrom<T, Error = E>,
291    {
292        let element: minidom::Element = value.try_into()?;
293        Ok(Self {
294            inner: element.into_event_iter()?,
295        })
296    }
297}
298
299impl Iterator for IntoEventsViaElement {
300    type Item = Result<Event, Error>;
301
302    fn next(&mut self) -> Option<Self::Item> {
303        self.inner.next()
304    }
305}
306
307#[cfg(test)]
308mod tests {
309    use super::*;
310
311    #[test]
312    fn transform_element_is_equivalent() {
313        let el: Element = "<foo xmlns='urn:a' a='b' c='d'><child a='x'/><child a='y'>some text</child><child xmlns='urn:b'><nested-child/></child></foo>".parse().unwrap();
314        let transformed: Element = crate::transform(el.clone()).unwrap();
315        assert_eq!(el, transformed);
316    }
317}