1//! Implementations of traits from this crate for minidom types
2
3// Copyright (c) 2024 Jonas Schäfer <jonas@zombofant.net>
4//
5// This Source Code Form is subject to the terms of the Mozilla Public
6// License, v. 2.0. If a copy of the MPL was not distributed with this
7// file, You can obtain one at http://mozilla.org/MPL/2.0/.
8use std::marker::PhantomData;
9use std::vec::IntoIter;
10
11use minidom::{Element, Node};
12
13use rxml::{
14 parser::EventMetrics,
15 writer::{SimpleNamespaces, TrackNamespace},
16 AttrMap, Event, Name, Namespace, NcName,
17};
18
19use crate::{
20 error::{Error, FromEventsError},
21 FromEventsBuilder, FromXml, IntoXml,
22};
23
24/// State machine for converting a minidom Element into rxml events.
25enum IntoEventsInner {
26 /// Element header: the element is still intact and we need to generate
27 /// the [`rxml::Event::StartElement`] event from the namespace, name, and
28 /// attributes.
29 Header(Element),
30
31 /// Content: The contents of the element are streamed as events.
32 Nodes {
33 /// Remaining child nodes (text and/or children) to emit.
34 remaining: IntoIter<Node>,
35
36 /// When emitting a child element, this is a nested [`IntoEvents`]
37 /// instance for that child element.
38 nested: Option<Box<IntoEvents>>,
39 },
40
41 /// End of iteration: this state generates an end-of-iterator state.
42 ///
43 /// Note that the [`rxml::Event::EndElement`] event for the element itself
44 /// is generated by the iterator alraedy in the `Nodes` state, when
45 /// `nested` is None and `remaining` returns `None` from its `next()`
46 /// implementation.
47 Fin,
48}
49
50/// Create the parts for a [`rxml::Event::StartElement`] from a
51/// [`minidom::Element`].
52///
53/// Note that this copies the attribute data as well as namespace and name.
54/// This is due to limitations in the [`minidom::Element`] API.
55// NOTE to developers: The limitations are not fully trivial to overcome:
56// the attributes use a BTreeMap internally, which does not offer a `drain`
57// iterator.
58pub fn make_start_ev_parts(el: &Element) -> Result<(rxml::QName, AttrMap), Error> {
59 let name = NcName::try_from(el.name())?;
60 let namespace = Namespace::from(el.ns());
61
62 let mut attrs = AttrMap::new();
63 for (name, value) in el.attrs() {
64 let name = Name::try_from(name)?;
65 let (prefix, name) = name.split_name()?;
66 let namespace = if let Some(prefix) = prefix {
67 if prefix == "xml" {
68 Namespace::XML
69 } else {
70 let ns = match el.prefixes.get(&Some(prefix.into())) {
71 Some(v) => v,
72 None => {
73 panic!("undeclared xml namespace prefix in minidom::Element")
74 }
75 };
76 Namespace::from(ns.to_owned())
77 }
78 } else {
79 Namespace::NONE
80 };
81
82 attrs.insert(namespace, name, value.to_owned());
83 }
84
85 Ok(((namespace, name), attrs))
86}
87
88impl IntoEventsInner {
89 fn next(&mut self) -> Result<Option<Event>, Error> {
90 match self {
91 IntoEventsInner::Header(ref mut el) => {
92 let (qname, attrs) = make_start_ev_parts(el)?;
93 let event = Event::StartElement(EventMetrics::zero(), qname, attrs);
94
95 *self = IntoEventsInner::Nodes {
96 remaining: el.take_nodes().into_iter(),
97 nested: None,
98 };
99 return Ok(Some(event));
100 }
101 IntoEventsInner::Nodes {
102 ref mut nested,
103 ref mut remaining,
104 } => {
105 loop {
106 if let Some(nested) = nested.as_mut() {
107 if let Some(ev) = nested.next() {
108 return Some(ev).transpose();
109 }
110 }
111 match remaining.next() {
112 Some(Node::Text(text)) => {
113 return Ok(Some(Event::Text(EventMetrics::zero(), text)));
114 }
115 Some(Node::Element(el)) => {
116 *nested = Some(Box::new(el.into_event_iter()?));
117 // fallthrough to next loop iteration
118 }
119 None => {
120 // end of element, switch state and emit EndElement
121 *self = IntoEventsInner::Fin;
122 return Ok(Some(Event::EndElement(EventMetrics::zero())));
123 }
124 }
125 }
126 }
127 IntoEventsInner::Fin => Ok(None),
128 }
129 }
130}
131
132/// Convert a [`minidom::Element`] into [`rxml::Event`]s.
133///
134/// This can be constructed from the
135/// [`IntoXml::into_event_iter`][`crate::IntoXml::into_event_iter`]
136/// implementation on [`minidom::Element`].
137pub struct IntoEvents(IntoEventsInner);
138
139impl Iterator for IntoEvents {
140 type Item = Result<Event, Error>;
141
142 fn next(&mut self) -> Option<Self::Item> {
143 self.0.next().transpose()
144 }
145}
146
147impl IntoXml for Element {
148 type EventIter = IntoEvents;
149
150 fn into_event_iter(self) -> Result<Self::EventIter, Error> {
151 Ok(IntoEvents(IntoEventsInner::Header(self)))
152 }
153}
154
155/// Construct a [`minidom::Element`] from [`rxml::Event`]s
156///
157/// This can be constructed from the
158/// [`FromXml::from_events`][`crate::FromXml::from_events`]
159/// implementation on [`minidom::Element`].
160pub struct ElementFromEvents {
161 inner: Option<Element>,
162 nested: Option<Box<ElementFromEvents>>,
163}
164
165impl FromEventsBuilder for ElementFromEvents {
166 type Output = minidom::Element;
167
168 fn feed(&mut self, ev: Event) -> Result<Option<Self::Output>, Error> {
169 let inner = self
170 .inner
171 .as_mut()
172 .expect("feed() called after it finished");
173 if let Some(nested) = self.nested.as_mut() {
174 match nested.feed(ev)? {
175 Some(v) => {
176 inner.append_child(v);
177 self.nested = None;
178 return Ok(None);
179 }
180 None => return Ok(None),
181 }
182 }
183 match ev {
184 Event::XmlDeclaration(_, _) => Ok(None),
185 Event::StartElement(_, qname, attrs) => {
186 let nested = match Element::from_events(qname, attrs) {
187 Ok(v) => v,
188 Err(FromEventsError::Invalid(e)) => return Err(e),
189 Err(FromEventsError::Mismatch { .. }) => {
190 unreachable!("<Element as FromXml>::from_events should accept everything!")
191 }
192 };
193 self.nested = Some(Box::new(nested));
194 Ok(None)
195 }
196 Event::Text(_, text) => {
197 inner.append_text_node(text);
198 Ok(None)
199 }
200 Event::EndElement(_) => Ok(Some(self.inner.take().unwrap())),
201 }
202 }
203}
204
205impl FromXml for Element {
206 type Builder = ElementFromEvents;
207
208 fn from_events(
209 qname: rxml::QName,
210 attrs: rxml::AttrMap,
211 ) -> Result<Self::Builder, FromEventsError> {
212 let mut prefixes = SimpleNamespaces::new();
213 let mut builder = Element::builder(qname.1, qname.0);
214 for ((namespace, name), value) in attrs.into_iter() {
215 if namespace.is_none() {
216 builder = builder.attr(name, String::from(value));
217 } else {
218 let (is_new, prefix) = prefixes.declare_with_auto_prefix(namespace.clone());
219 let name = prefix.with_suffix(&name);
220 if is_new {
221 builder = builder
222 .prefix(
223 Some(prefix.as_str().to_owned()),
224 namespace.as_str().to_owned(),
225 )
226 .unwrap();
227 }
228 builder = builder.attr(name, String::from(value));
229 }
230 }
231
232 let element = builder.build();
233 Ok(Self::Builder {
234 inner: Some(element),
235 nested: None,
236 })
237 }
238}
239
240/// Helper struct to streamingly parse a struct which implements conversion
241/// from [`minidom::Element`].
242pub struct FromEventsViaElement<T> {
243 inner: ElementFromEvents,
244 // needed here because we need to keep the type `T` around until
245 // `FromEventsBuilder` is done and it must always be the same type, so we
246 // have to nail it down in the struct's type, and to do that we need to
247 // bind it to a field. that's what PhantomData is for.
248 _phantom: PhantomData<T>,
249}
250
251impl<E, T: TryFrom<minidom::Element, Error = E>> FromEventsViaElement<T>
252where
253 Error: From<E>,
254{
255 /// Create a new streaming parser for `T`.
256 pub fn new(qname: rxml::QName, attrs: rxml::AttrMap) -> Result<Self, FromEventsError> {
257 Ok(Self {
258 _phantom: PhantomData,
259 inner: Element::from_events(qname, attrs)?,
260 })
261 }
262}
263
264impl<E, T: TryFrom<minidom::Element, Error = E>> FromEventsBuilder for FromEventsViaElement<T>
265where
266 Error: From<E>,
267{
268 type Output = T;
269
270 fn feed(&mut self, ev: Event) -> Result<Option<Self::Output>, Error> {
271 match self.inner.feed(ev) {
272 Ok(Some(v)) => Ok(Some(v.try_into()?)),
273 Ok(None) => Ok(None),
274 Err(e) => Err(e),
275 }
276 }
277}
278
279/// Helper struct to stream a struct which implements conversion
280/// to [`minidom::Element`].
281pub struct IntoEventsViaElement {
282 inner: IntoEvents,
283}
284
285impl IntoEventsViaElement {
286 /// Create a new streaming parser for `T`.
287 pub fn new<E, T>(value: T) -> Result<Self, crate::error::Error>
288 where
289 Error: From<E>,
290 minidom::Element: TryFrom<T, Error = E>,
291 {
292 let element: minidom::Element = value.try_into()?;
293 Ok(Self {
294 inner: element.into_event_iter()?,
295 })
296 }
297}
298
299impl Iterator for IntoEventsViaElement {
300 type Item = Result<Event, Error>;
301
302 fn next(&mut self) -> Option<Self::Item> {
303 self.inner.next()
304 }
305}
306
307#[cfg(test)]
308mod tests {
309 use super::*;
310
311 #[test]
312 fn transform_element_is_equivalent() {
313 let el: Element = "<foo xmlns='urn:a' a='b' c='d'><child a='x'/><child a='y'>some text</child><child xmlns='urn:b'><nested-child/></child></foo>".parse().unwrap();
314 let transformed: Element = crate::transform(el.clone()).unwrap();
315 assert_eq!(el, transformed);
316 }
317}