xhtml.rs

  1// Copyright (c) 2019 Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
  2//
  3// This Source Code Form is subject to the terms of the Mozilla Public
  4// License, v. 2.0. If a copy of the MPL was not distributed with this
  5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
  6
  7use crate::util::error::Error;
  8use crate::message::MessagePayload;
  9use crate::ns;
 10use minidom::{Element, Node};
 11use std::convert::TryFrom;
 12use std::collections::HashMap;
 13
 14// TODO: Use a proper lang type.
 15type Lang = String;
 16
 17/// Container for formatted text.
 18#[derive(Debug, Clone)]
 19pub struct XhtmlIm {
 20    /// Map of language to body element.
 21    bodies: HashMap<Lang, Body>,
 22}
 23
 24impl XhtmlIm {
 25    /// Serialise formatted text to HTML.
 26    pub fn to_html(self) -> String {
 27        let mut html = Vec::new();
 28        // TODO: use the best language instead.
 29        for (lang, body) in self.bodies {
 30            if lang.is_empty() {
 31                assert!(body.xml_lang.is_none());
 32            } else {
 33                assert_eq!(Some(lang), body.xml_lang);
 34            }
 35            for tag in body.children {
 36                html.push(tag.to_html());
 37            }
 38            break;
 39        }
 40        html.concat()
 41    }
 42
 43    /// Removes all unknown elements.
 44    fn flatten(self) -> XhtmlIm {
 45        let mut bodies = HashMap::new();
 46        for (lang, body) in self.bodies {
 47            let children = body.children.into_iter().fold(vec![], |mut acc, child| {
 48                match child {
 49                    Child::Tag(Tag::Unknown(children)) => acc.extend(children),
 50                    any => acc.push(any),
 51                }
 52                acc
 53            });
 54            let body = Body {
 55                children,
 56                ..body
 57            };
 58            bodies.insert(lang, body);
 59        }
 60        XhtmlIm {
 61            bodies,
 62        }
 63    }
 64}
 65
 66impl MessagePayload for XhtmlIm {}
 67
 68impl TryFrom<Element> for XhtmlIm {
 69    type Error = Error;
 70
 71    fn try_from(elem: Element) -> Result<XhtmlIm, Error> {
 72        check_self!(elem, "html", XHTML_IM);
 73        check_no_attributes!(elem, "html");
 74
 75        let mut bodies = HashMap::new();
 76        for child in elem.children() {
 77            if child.is("body", ns::XHTML) {
 78                let child = child.clone();
 79                let lang = match child.attr("xml:lang") {
 80                    Some(lang) => lang,
 81                    None => "",
 82                }.to_string();
 83                let body = Body::try_from(child)?;
 84                match bodies.insert(lang, body) {
 85                    None => (),
 86                    Some(_) => return Err(Error::ParseError("Two identical language bodies found in XHTML-IM."))
 87                }
 88            } else {
 89                return Err(Error::ParseError("Unknown element in XHTML-IM."));
 90            }
 91        }
 92
 93        Ok(XhtmlIm { bodies }.flatten())
 94    }
 95}
 96
 97impl From<XhtmlIm> for Element {
 98    fn from(wrapper: XhtmlIm) -> Element {
 99        Element::builder("html")
100            .ns(ns::XHTML_IM)
101            .append_all(wrapper.bodies.into_iter().map(|(lang, body)| {
102                if lang.is_empty() {
103                    assert!(body.xml_lang.is_none());
104                } else {
105                    assert_eq!(Some(lang), body.xml_lang);
106                }
107                Element::from(body)
108            }))
109            .build()
110    }
111}
112
113#[derive(Debug, Clone)]
114enum Child {
115    Tag(Tag),
116    Text(String),
117}
118
119impl Child {
120    fn to_html(self) -> String {
121        match self {
122            Child::Tag(tag) => tag.to_html(),
123            Child::Text(text) => text,
124        }
125    }
126}
127
128#[derive(Debug, Clone)]
129struct Property {
130    key: String,
131    value: String,
132}
133
134type Css = Vec<Property>;
135
136fn get_style_string(style: Css) -> Option<String> {
137    let mut result = vec![];
138    for Property { key, value } in style {
139        result.push(format!("{}: {}", key, value));
140    }
141    if result.is_empty() {
142        return None;
143    }
144    Some(result.join("; "))
145}
146
147#[derive(Debug, Clone)]
148struct Body {
149    style: Css,
150    xml_lang: Option<String>,
151    children: Vec<Child>,
152}
153
154impl TryFrom<Element> for Body {
155    type Error = Error;
156
157    fn try_from(elem: Element) -> Result<Body, Error> {
158        let mut children = vec![];
159        for child in elem.nodes() {
160            match child {
161                Node::Element(child) => children.push(Child::Tag(Tag::try_from(child.clone())?)),
162                Node::Text(text) => children.push(Child::Text(text.clone())),
163                Node::Comment(_) => unimplemented!() // XXX: remove!
164            }
165        }
166
167        Ok(Body { style: parse_css(elem.attr("style")), xml_lang: elem.attr("xml:lang").map(|xml_lang| xml_lang.to_string()), children })
168    }
169}
170
171impl From<Body> for Element {
172    fn from(body: Body) -> Element {
173        Element::builder("body")
174            .ns(ns::XHTML)
175            .attr("style", get_style_string(body.style))
176            .attr("xml:lang", body.xml_lang)
177            .append_all(children_to_nodes(body.children))
178            .build()
179    }
180}
181
182#[derive(Debug, Clone)]
183enum Tag {
184    A { href: Option<String>, style: Css, type_: Option<String>, children: Vec<Child> },
185    Blockquote { style: Css, children: Vec<Child> },
186    Br,
187    Cite { style: Css, children: Vec<Child> },
188    Em { children: Vec<Child> },
189    Img { src: Option<String>, alt: Option<String> }, // TODO: height, width, style
190    Li { style: Css, children: Vec<Child> },
191    Ol { style: Css, children: Vec<Child> },
192    P { style: Css, children: Vec<Child> },
193    Span { style: Css, children: Vec<Child> },
194    Strong { children: Vec<Child> },
195    Ul { style: Css, children: Vec<Child> },
196    Unknown(Vec<Child>),
197}
198
199impl Tag {
200    fn to_html(self) -> String {
201        match self {
202            Tag::A { href, style, type_, children } => {
203                let href = write_attr(href, "href");
204                let style = write_attr(get_style_string(style), "style");
205                let type_ = write_attr(type_, "type");
206                format!("<a{}{}{}>{}</a>", href, style, type_, children_to_html(children))
207            },
208            Tag::Blockquote { style, children } => {
209                let style = write_attr(get_style_string(style), "style");
210                format!("<blockquote{}>{}</blockquote>", style, children_to_html(children))
211            },
212            Tag::Br => String::from("<br>"),
213            Tag::Cite { style, children } => {
214                let style = write_attr(get_style_string(style), "style");
215                format!("<cite{}>{}</cite>", style, children_to_html(children))
216            },
217            Tag::Em { children } => format!("<em>{}</em>", children_to_html(children)),
218            Tag::Img { src, alt } => {
219                let src = write_attr(src, "src");
220                let alt = write_attr(alt, "alt");
221                format!("<img{}{}>", src, alt)
222            }
223            Tag::Li { style, children } => {
224                let style = write_attr(get_style_string(style), "style");
225                format!("<li{}>{}</li>", style, children_to_html(children))
226            }
227            Tag::Ol { style, children } => {
228                let style = write_attr(get_style_string(style), "style");
229                format!("<ol{}>{}</ol>", style, children_to_html(children))
230            }
231            Tag::P { style, children } => {
232                let style = write_attr(get_style_string(style), "style");
233                format!("<p{}>{}</p>", style, children_to_html(children))
234            }
235            Tag::Span { style, children } => {
236                let style = write_attr(get_style_string(style), "style");
237                format!("<span{}>{}</span>", style, children_to_html(children))
238            }
239            Tag::Strong { children } => format!("<strong>{}</strong>", children_to_html(children)),
240            Tag::Ul { style, children } => {
241                let style = write_attr(get_style_string(style), "style");
242                format!("<ul{}>{}</ul>", style, children_to_html(children))
243            }
244            Tag::Unknown(_) => panic!("No unknown element should be present in XHTML-IM after parsing."),
245        }
246    }
247}
248
249impl TryFrom<Element> for Tag {
250    type Error = Error;
251
252    fn try_from(elem: Element) -> Result<Tag, Error> {
253        let mut children = vec![];
254        for child in elem.nodes() {
255            match child {
256                Node::Element(child) => children.push(Child::Tag(Tag::try_from(child.clone())?)),
257                Node::Text(text) => children.push(Child::Text(text.clone())),
258                Node::Comment(_) => unimplemented!() // XXX: remove!
259            }
260        }
261
262        Ok(match elem.name() {
263            "a" => Tag::A { href: elem.attr("href").map(|href| href.to_string()), style: parse_css(elem.attr("style")), type_: elem.attr("type").map(|type_| type_.to_string()), children },
264            "blockquote" => Tag::Blockquote { style: parse_css(elem.attr("style")), children },
265            "br" => Tag::Br,
266            "cite" => Tag::Cite { style: parse_css(elem.attr("style")), children },
267            "em" => Tag::Em { children },
268            "img" => Tag::Img { src: elem.attr("src").map(|src| src.to_string()), alt: elem.attr("alt").map(|alt| alt.to_string()) },
269            "li" => Tag::Li { style: parse_css(elem.attr("style")), children },
270            "ol" => Tag::Ol { style: parse_css(elem.attr("style")), children },
271            "p" => Tag::P { style: parse_css(elem.attr("style")), children },
272            "span" => Tag::Span { style: parse_css(elem.attr("style")), children },
273            "strong" => Tag::Strong { children },
274            "ul" => Tag::Ul { style: parse_css(elem.attr("style")), children },
275            _ => Tag::Unknown(children),
276        })
277    }
278}
279
280impl From<Tag> for Element {
281    fn from(tag: Tag) -> Element {
282        let (name, attrs, children) = match tag {
283            Tag::A { href, style, type_, children } => ("a", {
284                let mut attrs = vec![];
285                if let Some(href) = href {
286                    attrs.push(("href", href));
287                }
288                if let Some(style) = get_style_string(style) {
289                    attrs.push(("style", style));
290                }
291                if let Some(type_) = type_ {
292                    attrs.push(("type", type_));
293                }
294                attrs
295            }, children),
296            Tag::Blockquote { style, children } => ("blockquote", match get_style_string(style) {
297                Some(style) => vec![("style", style)],
298                None => vec![],
299            }, children),
300            Tag::Br => ("br", vec![], vec![]),
301            Tag::Cite { style, children } => ("cite", match get_style_string(style) {
302                Some(style) => vec![("style", style)],
303                None => vec![],
304            }, children),
305            Tag::Em { children } => ("em", vec![], children),
306            Tag::Img { src, alt } => {
307                let mut attrs = vec![];
308                if let Some(src) = src {
309                    attrs.push(("src", src));
310                }
311                if let Some(alt) = alt {
312                    attrs.push(("alt", alt));
313                }
314                ("img", attrs, vec![])
315            },
316            Tag::Li { style, children } => ("li", match get_style_string(style) {
317                Some(style) => vec![("style", style)],
318                None => vec![],
319            }, children),
320            Tag::Ol { style, children } => ("ol", match get_style_string(style) {
321                Some(style) => vec![("style", style)],
322                None => vec![],
323            }, children),
324            Tag::P { style, children } => ("p", match get_style_string(style) {
325                Some(style) => vec![("style", style)],
326                None => vec![],
327            }, children),
328            Tag::Span { style, children } => ("span", match get_style_string(style) {
329                Some(style) => vec![("style", style)],
330                None => vec![],
331            }, children),
332            Tag::Strong { children } => ("strong", vec![], children),
333            Tag::Ul { style, children } => ("ul", match get_style_string(style) {
334                Some(style) => vec![("style", style)],
335                None => vec![],
336            }, children),
337            Tag::Unknown(_) => panic!("No unknown element should be present in XHTML-IM after parsing."),
338        };
339        let mut builder = Element::builder(name)
340            .ns(ns::XHTML)
341            .append_all(children_to_nodes(children));
342        for (key, value) in attrs {
343            builder = builder.attr(key, value);
344        }
345        builder.build()
346    }
347}
348
349fn children_to_nodes(children: Vec<Child>) -> impl IntoIterator<Item = Node> {
350    children.into_iter().map(|child| match child {
351        Child::Tag(tag) => Node::Element(Element::from(tag)),
352        Child::Text(text) => Node::Text(text),
353    })
354}
355
356fn children_to_html(children: Vec<Child>) -> String {
357    children.into_iter().map(|child| child.to_html()).collect::<Vec<_>>().concat()
358}
359
360fn write_attr(attr: Option<String>, name: &str) -> String {
361    match attr {
362        Some(attr) => format!(" {}='{}'", name, attr),
363        None => String::new(),
364    }
365}
366
367fn parse_css(style: Option<&str>) -> Css {
368    let mut properties = vec![];
369    if let Some(style) = style {
370        // TODO: make that parser a bit more resilient to things.
371        for part in style.split(";") {
372            let mut part = part.splitn(2, ":").map(|a| a.to_string()).collect::<Vec<_>>();
373            let key = part.pop().unwrap();
374            let value = part.pop().unwrap();
375            properties.push(Property { key, value });
376        }
377    }
378    properties
379}
380
381#[cfg(test)]
382mod tests {
383    use super::*;
384
385    #[cfg(target_pointer_width = "32")]
386    #[test]
387    #[ignore]
388    fn test_size() {
389        assert_size!(XhtmlIm, 0);
390        assert_size!(Child, 0);
391        assert_size!(Tag, 0);
392    }
393
394    #[cfg(target_pointer_width = "64")]
395    #[test]
396    fn test_size() {
397        assert_size!(XhtmlIm, 56);
398        assert_size!(Child, 112);
399        assert_size!(Tag, 104);
400    }
401
402    #[test]
403    fn test_empty() {
404        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'/>"
405            .parse()
406            .unwrap();
407        let xhtml = XhtmlIm::try_from(elem).unwrap();
408        assert_eq!(xhtml.bodies.len(), 0);
409
410        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'/></html>"
411            .parse()
412            .unwrap();
413        let xhtml = XhtmlIm::try_from(elem).unwrap();
414        assert_eq!(xhtml.bodies.len(), 1);
415
416        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im' xmlns:html='http://www.w3.org/1999/xhtml'><html:body xml:lang='fr'/><html:body xml:lang='en'/></html>"
417            .parse()
418            .unwrap();
419        let xhtml = XhtmlIm::try_from(elem).unwrap();
420        assert_eq!(xhtml.bodies.len(), 2);
421    }
422
423    #[test]
424    fn invalid_two_same_langs() {
425        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im' xmlns:html='http://www.w3.org/1999/xhtml'><html:body/><html:body/></html>"
426            .parse()
427            .unwrap();
428        let error = XhtmlIm::try_from(elem).unwrap_err();
429        let message = match error {
430            Error::ParseError(string) => string,
431            _ => panic!(),
432        };
433        assert_eq!(message, "Two identical language bodies found in XHTML-IM.");
434    }
435
436    #[test]
437    fn test_tag() {
438        let elem: Element = "<body xmlns='http://www.w3.org/1999/xhtml'/>"
439            .parse()
440            .unwrap();
441        let body = Body::try_from(elem).unwrap();
442        assert_eq!(body.children.len(), 0);
443
444        let elem: Element = "<body xmlns='http://www.w3.org/1999/xhtml'><p>Hello world!</p></body>"
445            .parse()
446            .unwrap();
447        let mut body = Body::try_from(elem).unwrap();
448        assert_eq!(body.style.len(), 0);
449        assert_eq!(body.xml_lang, None);
450        assert_eq!(body.children.len(), 1);
451        let p = match body.children.pop() {
452            Some(Child::Tag(tag)) => tag,
453            _ => panic!(),
454        };
455        let mut children = match p {
456            Tag::P { style, children } => {
457                assert_eq!(style.len(), 0);
458                assert_eq!(children.len(), 1);
459                children
460            },
461            _ => panic!(),
462        };
463        let text = match children.pop() {
464            Some(Child::Text(text)) => text,
465            _ => panic!(),
466        };
467        assert_eq!(text, "Hello world!");
468    }
469
470    #[test]
471    fn test_unknown_element() {
472        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'><coucou>Hello world!</coucou></body></html>"
473            .parse()
474            .unwrap();
475        let parsed = XhtmlIm::try_from(elem).unwrap();
476        let parsed2 = parsed.clone();
477        let html = parsed.to_html();
478        assert_eq!(html, "Hello world!");
479
480        let elem = Element::from(parsed2);
481        assert_eq!(String::from(&elem), "<?xml version=\"1.0\" encoding=\"utf-8\"?><html xmlns=\"http://jabber.org/protocol/xhtml-im\"><body xmlns=\"http://www.w3.org/1999/xhtml\">Hello world!</body></html>");
482    }
483
484    #[test]
485    fn test_generate_html() {
486        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'><p>Hello world!</p></body></html>"
487            .parse()
488            .unwrap();
489        let xhtml_im = XhtmlIm::try_from(elem).unwrap();
490        let html = xhtml_im.to_html();
491        assert_eq!(html, "<p>Hello world!</p>");
492
493        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'><p>Hello <strong>world</strong>!</p></body></html>"
494            .parse()
495            .unwrap();
496        let xhtml_im = XhtmlIm::try_from(elem).unwrap();
497        let html = xhtml_im.to_html();
498        assert_eq!(html, "<p>Hello <strong>world</strong>!</p>");
499    }
500
501    #[test]
502    fn generate_tree() {
503        let world = "world".to_string();
504
505        Body { style: vec![], xml_lang: Some("en".to_string()), children: vec![
506            Child::Tag(Tag::P { style: vec![], children: vec![
507                Child::Text("Hello ".to_string()),
508                Child::Tag(Tag::Strong { children: vec![
509                    Child::Text(world),
510                ] }),
511                Child::Text("!".to_string()),
512            ] }),
513        ] };
514    }
515}