Replace xml-rs by quick_xml

Bastien Orivel created

quick_xml is way faster than xml-rs

Here is an example with a quick atom parser:
    With xml-rs:
        test parse_factorio_atom ... bench:   3,295,678 ns/iter (+/- 165,851)
    With quick_xml:
        test parse_factorio_atom ... bench:     203,215 ns/iter (+/- 13,485)

Unfortunately I had to break the API for this change to happen.
* Element::from_reader now takes `R: BufRead` instead of `R: Read`
* Element::write_to now takes `W: io::Write` instead of `EventWriter<W: Write>`

This migration also allow us to have a write_to function which assumes
we're already in a given namespace (see `write_to_in_namespace`).

Change summary

Cargo.toml     |   6 
src/element.rs | 223 ++++++++++++++++++++++++++++-----------------------
src/error.rs   |  22 ++--
src/lib.rs     |   2 
src/tests.rs   |  24 +++--
5 files changed, 153 insertions(+), 124 deletions(-)

Detailed changes

Cargo.toml 🔗

@@ -1,8 +1,8 @@
 [package]
 name = "minidom"
-version = "0.4.3"
+version = "0.5.0"
 authors = ["lumi <lumi@pew.im>", "Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>", "Bastien Orivel <eijebong+minidom@bananium.fr>"]
-description = "A small, simple DOM implementation on top of xml-rs."
+description = "A small, simple DOM implementation on top of quick-xml"
 homepage = "https://gitlab.com/lumi/minidom-rs"
 repository = "https://gitlab.com/lumi/minidom-rs"
 documentation = "https://docs.rs/minidom"
@@ -14,5 +14,5 @@ license = "MIT"
 gitlab = { repository = "lumi/minidom-rs" }
 
 [dependencies]
-xml-rs = "0.4.1"
+quick-xml = "0.7.3"
 error-chain = "0.10.0"

src/element.rs 🔗

@@ -1,18 +1,16 @@
 //! Provides an `Element` type, which represents DOM nodes, and a builder to create them with.
 
-use std::io::prelude::*;
-use std::io::Cursor;
-use std::collections::BTreeMap;
-use std::collections::btree_map;
+use std::io:: Write;
+use std::collections::{btree_map, BTreeMap};
 
-use std::fmt;
+use std::str;
 
 use error::{Error, ErrorKind, Result};
 
-use xml::reader::{XmlEvent as ReaderEvent, EventReader};
-use xml::writer::{XmlEvent as WriterEvent, EventWriter, EmitterConfig};
-use xml::name::Name;
-use xml::namespace::NS_NO_PREFIX;
+use quick_xml::reader::Reader as EventReader;
+use quick_xml::events::{Event, BytesStart};
+
+use std::io::BufRead;
 
 use std::str::FromStr;
 
@@ -69,9 +67,18 @@ impl Node {
             Node::Text(ref s) => Some(s),
         }
     }
+
+    fn write_to_inner<W: Write>(&self, writer: &mut W, last_namespace: &mut Option<String>) -> Result<()>{
+        match *self {
+            Node::Element(ref elmt) => elmt.write_to_inner(writer, last_namespace)?,
+            Node::Text(ref s) => write!(writer, "{}", s)?,
+        }
+
+        Ok(())
+    }
 }
 
-#[derive(Clone, PartialEq, Eq)]
+#[derive(Clone, PartialEq, Eq, Debug)]
 /// A struct representing a DOM Element.
 pub struct Element {
     name: String,
@@ -82,26 +89,18 @@ pub struct Element {
 
 impl<'a> From<&'a Element> for String {
     fn from(elem: &'a Element) -> String {
-        let mut out = Vec::new();
-        let config = EmitterConfig::new()
-                    .write_document_declaration(false);
-        elem.write_to(&mut EventWriter::new_with_config(&mut out, config)).unwrap();
-        String::from_utf8(out).unwrap()
+        let mut writer = Vec::new();
+        elem.write_to(&mut writer).unwrap();
+        String::from_utf8(writer).unwrap()
     }
 }
 
-impl fmt::Debug for Element {
-    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
-        write!(fmt, "{}", String::from(self))?;
-        Ok(())
-    }
-}
 
 impl FromStr for Element {
     type Err = Error;
 
     fn from_str(s: &str) -> Result<Element> {
-        let mut reader = EventReader::new(Cursor::new(s));
+        let mut reader = EventReader::from_str(s);
         Element::from_reader(&mut reader)
     }
 }
@@ -246,106 +245,105 @@ impl Element {
     }
 
     /// Parse a document from an `EventReader`.
-    pub fn from_reader<R: Read>(reader: &mut EventReader<R>) -> Result<Element> {
+    pub fn from_reader<R: BufRead>(reader: &mut EventReader<R>) -> Result<Element> {
+        let mut buf = Vec::new();
+        let root: Element;
+
         loop {
-            let e = reader.next()?;
+            let e = reader.read_event(&mut buf)?;
             match e {
-                ReaderEvent::StartElement { name, attributes, namespace } => {
-                    let attributes = attributes.into_iter()
-                                               .map(|o| {
-                                                    (match o.name.prefix {
-                                                        Some(prefix) => format!("{}:{}", prefix, o.name.local_name),
-                                                        None => o.name.local_name
-                                                    },
-                                                    o.value)
-                                                })
-                                               .collect();
-                    let ns = if let Some(ref prefix) = name.prefix {
-                        namespace.get(prefix)
-                    }
-                    else {
-                        namespace.get(NS_NO_PREFIX)
-                    }.map(|s| s.to_owned());
-
-                    let mut root = Element::new(name.local_name, ns, attributes, Vec::new());
-                    root.from_reader_inner(reader)?;
-                    return Ok(root);
+                Event::Empty(ref e) | Event::Start(ref e) => {
+                    root = build_element(e)?; // FIXME: could be break build_element(e)? when break value is stable
+                    break;
                 },
-                ReaderEvent::EndDocument => {
+                Event::Eof => {
                     bail!(ErrorKind::EndOfDocument);
                 },
                 _ => () // TODO: may need more errors
             }
-        }
-    }
+        };
+
+        let mut stack = vec![root];
 
-    #[cfg_attr(feature = "cargo-clippy", allow(wrong_self_convention))]
-    fn from_reader_inner<R: Read>(&mut self, reader: &mut EventReader<R>) -> Result<()> {
         loop {
-            let e = reader.next()?;
-            match e {
-                ReaderEvent::StartElement { name, attributes, namespace } => {
-                    let attributes = attributes.into_iter()
-                                               .map(|o| {
-                                                    (match o.name.prefix {
-                                                        Some(prefix) => format!("{}:{}", prefix, o.name.local_name),
-                                                        None => o.name.local_name
-                                                    },
-                                                    o.value)
-                                                })
-                                               .collect();
-                    let ns = if let Some(ref prefix) = name.prefix {
-                        namespace.get(prefix)
-                    }
-                    else {
-                        namespace.get(NS_NO_PREFIX)
-                    }.map(|s| s.to_owned());
-                    let elem = Element::new(name.local_name, ns, attributes, Vec::with_capacity(1));
-                    let elem_ref = self.append_child(elem);
-                    elem_ref.from_reader_inner(reader)?;
+            match reader.read_event(&mut buf)? {
+                Event::Empty(ref e) => {
+                    let elem = build_element(e)?;
+                    // Since there is no Event::End after, directly append it to the current node
+                    stack.last_mut().unwrap().append_child(elem);
                 },
-                ReaderEvent::EndElement { .. } => {
-                    // TODO: may want to check whether we're closing the correct element
-                    return Ok(());
+                Event::Start(ref e) => {
+                    let elem = build_element(e)?;
+                    stack.push(elem);
                 },
-                ReaderEvent::Characters(s) | ReaderEvent::CData(s) => {
-                    self.append_text_node(s);
+                Event::End(ref e) => {
+                    if stack.len() <= 1 {
+                        break;
+                    }
+                    let elem = stack.pop().unwrap();
+                    if let Some(to) = stack.last_mut() {
+                        if elem.name().as_bytes() != e.name() {
+                            bail!(ErrorKind::InvalidElementClosed);
+                        }
+                        to.append_child(elem);
+                    }
                 },
-                ReaderEvent::EndDocument => {
-                    bail!(ErrorKind::EndOfDocument);
+                Event::Text(s) | Event::CData(s) => {
+                    let text = s.unescape_and_decode(reader)?;
+                    if text != "" {
+                        let mut current_elem = stack.last_mut().unwrap();
+                        current_elem.append_text_node(text);
+                    }
+                },
+                Event::Eof => {
+                    break;
                 },
                 _ => (), // TODO: may need to implement more
             }
         }
+        Ok(stack.pop().unwrap())
     }
 
-    /// Output a document to an `EventWriter`.
-    pub fn write_to<W: Write>(&self, writer: &mut EventWriter<W>) -> Result<()> {
-        let name = if let Some(ref ns) = self.namespace {
-            Name::qualified(&self.name, ns, None)
-        }
-        else {
-            Name::local(&self.name)
-        };
-        let mut start = WriterEvent::start_element(name);
+    /// Output a document to a `Writer`.
+    pub fn write_to<W: Write>(&self, writer: &mut W) -> Result<()> {
+        let mut last_namespace = None;
+        write!(writer, "<?xml version=\"1.0\" encoding=\"utf-8\"?>")?;
+        self.write_to_inner(writer, &mut last_namespace)
+    }
+
+    /// Output a document to a `Writer` assuming you're already in the provided namespace
+    pub fn write_to_in_namespace<W: Write>(&self, writer: &mut W, namespace: &str) -> Result<()> {
+        write!(writer, "<?xml version=\"1.0\" encoding=\"utf-8\"?>")?;
+        self.write_to_inner(writer, &mut Some(namespace.to_owned()))
+    }
+
+    fn write_to_inner<W: Write>(&self, writer: &mut W, last_namespace: &mut Option<String>) -> Result<()> {
+        write!(writer, "<")?;
+        write!(writer, "{}", self.name)?;
+
         if let Some(ref ns) = self.namespace {
-            start = start.default_ns(ns.clone());
+            if *last_namespace != self.namespace {
+                write!(writer, " xmlns=\"{}\"", ns)?;
+                *last_namespace = Some(ns.clone());
+            }
+        }
+
+        for (key, value) in &self.attributes {
+            write!(writer, " {}=\"{}\"", key, value)?;
         }
-        for attr in &self.attributes { // TODO: I think this could be done a lot more efficiently
-            start = start.attr(Name::local(attr.0), attr.1);
+
+        if self.children.is_empty() {
+            write!(writer, " />")?;
+            return Ok(())
         }
-        writer.write(start)?;
+
+        write!(writer, ">")?;
+
         for child in &self.children {
-            match *child {
-                Node::Element(ref e) => {
-                    e.write_to(writer)?;
-                },
-                Node::Text(ref s) => {
-                    writer.write(WriterEvent::characters(s))?;
-                },
-            }
+            child.write_to_inner(writer, last_namespace)?;
         }
-        writer.write(WriterEvent::end_element())?;
+
+        write!(writer, "</{}>", self.name)?;
         Ok(())
     }
 
@@ -354,7 +352,7 @@ impl Element {
     /// # Examples
     ///
     /// ```rust
-    /// use minidom::{Element, Node};
+    /// use minidom::Element;
     ///
     /// let elem: Element = "<root>a<c1 />b<c2 />c</root>".parse().unwrap();
     ///
@@ -592,6 +590,31 @@ impl Element {
     }
 }
 
+fn build_element(event: &BytesStart) -> Result<Element> {
+    let mut attributes = event.attributes()
+                               .map(|o| {
+                                    let o = o?;
+                                    let key = str::from_utf8(o.key)?.to_owned();
+                                    let value = str::from_utf8(o.value)?.to_owned();
+                                    Ok((key, value))
+                                   }
+                                )
+                               .collect::<Result<BTreeMap<String, String>>>()?;
+        let mut ns_key = None;
+        for (key, _) in &attributes {
+            if key == "xmlns" || key.starts_with("xmlns:") {
+                ns_key = Some(key.clone());
+            }
+        }
+
+        let ns = match ns_key {
+            None => None,
+            Some(key) => attributes.remove(&key),
+        };
+        let name = str::from_utf8(event.name())?.to_owned();
+        Ok(Element::new(name, ns, attributes, Vec::new()))
+}
+
 /// An iterator over references to child elements of an `Element`.
 pub struct Children<'a> {
     iter: slice::Iter<'a, Node>,

src/error.rs 🔗

@@ -1,30 +1,30 @@
 //! Provides an error type for this crate.
 
-use std::io;
-
 use std::convert::From;
 
-use xml::writer::Error as WriterError;
-use xml::reader::Error as ReaderError;
-
 error_chain! {
     foreign_links {
-        XmlWriterError(WriterError)
-            /// An error with writing an XML event, from xml::writer::EventWriter.
+        XmlError(::quick_xml::errors::Error)
+            /// An error from quick_xml.
         ;
-        XmlReaderError(ReaderError)
-            /// An error with reading an XML event, from xml::reader::EventReader.
+        Utf8Error(::std::str::Utf8Error)
+            /// An UTF-8 conversion error.
         ;
-        IoError(io::Error)
+        IoError(::std::io::Error)
             /// An I/O error, from std::io.
         ;
     }
 
     errors {
-        /// En error which is returned when the end of the document was reached prematurely.
+        /// An error which is returned when the end of the document was reached prematurely.
         EndOfDocument {
             description("the end of the document has been reached prematurely")
             display("the end of the document has been reached prematurely")
         }
+        /// An error which is returned when an element is closed when it shouldn't be
+        InvalidElementClosed {
+            description("The XML is invalid, an element was wrongly closed")
+            display("the XML is invalid, an element was wrongly closed")
+        }
     }
 }

src/lib.rs 🔗

@@ -64,7 +64,7 @@
 //! minidom = "*"
 //! ```
 
-extern crate xml;
+extern crate quick_xml;
 #[macro_use] extern crate error_chain;
 
 pub mod error;

src/tests.rs 🔗

@@ -1,9 +1,6 @@
-use std::io::Cursor;
-
 use std::iter::Iterator;
 
-use xml::reader::EventReader;
-use xml::writer::EventWriter;
+use quick_xml::reader::Reader;
 
 use element::Element;
 
@@ -32,19 +29,18 @@ fn build_test_tree() -> Element {
 
 #[test]
 fn reader_works() {
-    let mut reader = EventReader::new(Cursor::new(TEST_STRING));
+    let mut reader = Reader::from_str(TEST_STRING);
     assert_eq!(Element::from_reader(&mut reader).unwrap(), build_test_tree());
 }
 
 #[test]
 fn writer_works() {
     let root = build_test_tree();
-    let mut out = Vec::new();
+    let mut writer = Vec::new();
     {
-        let mut writer = EventWriter::new(&mut out);
         root.write_to(&mut writer).unwrap();
     }
-    assert_eq!(String::from_utf8(out).unwrap(), TEST_STRING);
+    assert_eq!(String::from_utf8(writer).unwrap(), TEST_STRING);
 }
 
 #[test]
@@ -110,8 +106,18 @@ fn two_elements_with_same_arguments_different_order_are_equal() {
 
 #[test]
 fn namespace_attributes_works() {
-    let mut reader = EventReader::new(Cursor::new(TEST_STRING));
+    let mut reader = Reader::from_str(TEST_STRING);
     let root = Element::from_reader(&mut reader).unwrap();
     assert_eq!("en", root.attr("xml:lang").unwrap());
     assert_eq!("fr", root.get_child("child", "child_ns").unwrap().attr("xml:lang").unwrap());
 }
+
+#[test]
+fn wrongly_closed_elements_error() {
+    let elem1 = "<a></b>".parse::<Element>();
+    assert!(elem1.is_err());
+    let elem1 = "<a></c></a>".parse::<Element>();
+    assert!(elem1.is_err());
+    let elem1 = "<a><c><d/></c></a>".parse::<Element>();
+    assert!(elem1.is_ok());
+}