xhtml.rs

  1// Copyright (c) 2019 Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
  2//
  3// This Source Code Form is subject to the terms of the Mozilla Public
  4// License, v. 2.0. If a copy of the MPL was not distributed with this
  5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
  6
  7use crate::util::error::Error;
  8use crate::message::MessagePayload;
  9use crate::ns;
 10use minidom::{Element, Node};
 11use std::convert::TryFrom;
 12use std::collections::HashMap;
 13
 14// TODO: Use a proper lang type.
 15type Lang = String;
 16
 17/// Container for formatted text.
 18#[derive(Debug, Clone)]
 19pub struct XhtmlIm {
 20    /// Map of language to body element.
 21    bodies: HashMap<Lang, Tag>,
 22}
 23
 24impl XhtmlIm {
 25    /// Serialise formatted text to HTML.
 26    pub fn to_html(self) -> String {
 27        let mut html = Vec::new();
 28        // TODO: use the best language instead.
 29        for (lang, body) in self.bodies {
 30            if let Tag::Body { style: _, xml_lang, children } = body {
 31                if lang.is_empty() {
 32                    assert!(xml_lang.is_none());
 33                } else {
 34                    assert_eq!(Some(lang), xml_lang);
 35                }
 36                for tag in children {
 37                    html.push(tag.to_html());
 38                }
 39                break;
 40            } else {
 41                unreachable!();
 42            }
 43        }
 44        html.concat()
 45    }
 46}
 47
 48impl MessagePayload for XhtmlIm {}
 49
 50impl TryFrom<Element> for XhtmlIm {
 51    type Error = Error;
 52
 53    fn try_from(elem: Element) -> Result<XhtmlIm, Error> {
 54        check_self!(elem, "html", XHTML_IM);
 55        check_no_attributes!(elem, "html");
 56
 57        let mut bodies = HashMap::new();
 58        for child in elem.children() {
 59            if child.is("body", ns::XHTML) {
 60                let child = child.clone();
 61                let lang = match child.attr("xml:lang") {
 62                    Some(lang) => lang,
 63                    None => "",
 64                }.to_string();
 65                let body = Tag::try_from(child)?;
 66                match bodies.insert(lang, body) {
 67                    None => (),
 68                    Some(_) => return Err(Error::ParseError("Two identical language bodies found in XHTML-IM."))
 69                }
 70            } else {
 71                return Err(Error::ParseError("Unknown element in XHTML-IM."));
 72            }
 73        }
 74
 75        Ok(XhtmlIm { bodies })
 76    }
 77}
 78
 79impl From<XhtmlIm> for Element {
 80    fn from(wrapper: XhtmlIm) -> Element {
 81        Element::builder("html")
 82            .ns(ns::XHTML_IM)
 83            .append(wrapper.bodies.into_iter().map(|(ref lang, ref body)| {
 84                if let Tag::Body { style, xml_lang, children } = body {
 85                    assert_eq!(Some(lang), xml_lang.as_ref());
 86                    Element::builder("body")
 87                        .ns(ns::XHTML_IM)
 88                        .attr("style", get_style_string(style.clone()))
 89                        .attr("xml:lang", xml_lang.clone())
 90                        .append(children_to_nodes(children.clone()))
 91                } else {
 92                    unreachable!();
 93                }
 94            }).collect::<Vec<_>>())
 95            .build()
 96    }
 97}
 98
 99#[derive(Debug, Clone)]
100enum Child {
101    Tag(Tag),
102    Text(String),
103}
104
105impl Child {
106    fn to_html(self) -> String {
107        match self {
108            Child::Tag(tag) => tag.to_html(),
109            Child::Text(text) => text,
110        }
111    }
112}
113
114#[derive(Debug, Clone)]
115struct Property {
116    key: String,
117    value: String,
118}
119
120type Css = Vec<Property>;
121
122fn get_style_string(style: Css) -> Option<String> {
123    let mut result = vec![];
124    for Property { key, value } in style {
125        result.push(format!("{}: {}", key, value));
126    }
127    if result.is_empty() {
128        return None;
129    }
130    Some(result.join("; "))
131}
132
133#[derive(Debug, Clone)]
134enum Tag {
135    A { href: Option<String>, style: Css, type_: Option<String>, children: Vec<Child> },
136    Blockquote { style: Css, children: Vec<Child> },
137    Body { style: Css, xml_lang: Option<String>, children: Vec<Child> },
138    Br,
139    Cite { style: Css, children: Vec<Child> },
140    Em { children: Vec<Child> },
141    Img { src: Option<String>, alt: Option<String> }, // TODO: height, width, style
142    Li { style: Css, children: Vec<Child> },
143    Ol { style: Css, children: Vec<Child> },
144    P { style: Css, children: Vec<Child> },
145    Span { style: Css, children: Vec<Child> },
146    Strong { children: Vec<Child> },
147    Ul { style: Css, children: Vec<Child> },
148    Unknown(Vec<Child>),
149}
150
151impl Tag {
152    fn to_html(self) -> String {
153        match self {
154            Tag::A { href, style, type_, children } => {
155                let href = write_attr(href, "href");
156                let style = write_attr(get_style_string(style), "style");
157                let type_ = write_attr(type_, "type");
158                format!("<a{}{}{}>{}</a>", href, style, type_, children_to_html(children))
159            },
160            Tag::Blockquote { style, children } => {
161                let style = write_attr(get_style_string(style), "style");
162                format!("<blockquote{}>{}</blockquote>", style, children_to_html(children))
163            },
164            Tag::Body { style, xml_lang: _, children } => {
165                let style = write_attr(get_style_string(style), "style");
166                format!("<body{}>{}</body>", style, children_to_html(children))
167            },
168            Tag::Br => String::from("<br>"),
169            Tag::Cite { style, children } => {
170                let style = write_attr(get_style_string(style), "style");
171                format!("<cite{}>{}</cite>", style, children_to_html(children))
172            },
173            Tag::Em { children } => format!("<em>{}</em>", children_to_html(children)),
174            Tag::Img { src, alt } => {
175                let src = write_attr(src, "src");
176                let alt = write_attr(alt, "alt");
177                format!("<img{}{}>", src, alt)
178            }
179            Tag::Li { style, children } => {
180                let style = write_attr(get_style_string(style), "style");
181                format!("<li{}>{}</li>", style, children_to_html(children))
182            }
183            Tag::Ol { style, children } => {
184                let style = write_attr(get_style_string(style), "style");
185                format!("<ol{}>{}</ol>", style, children_to_html(children))
186            }
187            Tag::P { style, children } => {
188                let style = write_attr(get_style_string(style), "style");
189                format!("<p{}>{}</p>", style, children_to_html(children))
190            }
191            Tag::Span { style, children } => {
192                let style = write_attr(get_style_string(style), "style");
193                format!("<span{}>{}</span>", style, children_to_html(children))
194            }
195            Tag::Strong { children } => format!("<strong>{}</strong>", children.into_iter().map(|child| child.to_html()).collect::<Vec<_>>().join("")),
196            Tag::Ul { style, children } => {
197                let style = write_attr(get_style_string(style), "style");
198                format!("<ul{}>{}</ul>", style, children_to_html(children))
199            }
200            Tag::Unknown(children) => children_to_html(children),
201        }
202    }
203}
204
205impl TryFrom<Element> for Tag {
206    type Error = Error;
207
208    fn try_from(elem: Element) -> Result<Tag, Error> {
209        let mut children = vec![];
210        for child in elem.nodes() {
211            match child {
212                Node::Element(child) => children.push(Child::Tag(Tag::try_from(child.clone())?)),
213                Node::Text(text) => children.push(Child::Text(text.clone())),
214                Node::Comment(_) => unimplemented!() // XXX: remove!
215            }
216        }
217
218        Ok(match elem.name() {
219            "a" => Tag::A { href: elem.attr("href").map(|href| href.to_string()), style: parse_css(elem.attr("style")), type_: elem.attr("type").map(|type_| type_.to_string()), children },
220            "blockquote" => Tag::Blockquote { style: parse_css(elem.attr("style")), children },
221            "body" => Tag::Body { style: parse_css(elem.attr("style")), xml_lang: elem.attr("xml:lang").map(|xml_lang| xml_lang.to_string()), children },
222            "br" => Tag::Br,
223            "cite" => Tag::Cite { style: parse_css(elem.attr("style")), children },
224            "em" => Tag::Em { children },
225            "img" => Tag::Img { src: elem.attr("src").map(|src| src.to_string()), alt: elem.attr("alt").map(|alt| alt.to_string()) },
226            "li" => Tag::Li { style: parse_css(elem.attr("style")), children },
227            "ol" => Tag::Ol { style: parse_css(elem.attr("style")), children },
228            "p" => Tag::P { style: parse_css(elem.attr("style")), children },
229            "span" => Tag::Span { style: parse_css(elem.attr("style")), children },
230            "strong" => Tag::Strong { children },
231            "ul" => Tag::Ul { style: parse_css(elem.attr("style")), children },
232            _ => Tag::Unknown(children),
233        })
234    }
235}
236
237impl From<Tag> for Element {
238    fn from(tag: Tag) -> Element {
239        let (name, attrs, children) = match tag {
240            Tag::A { href, style, type_, children } => ("a", {
241                let mut attrs = vec![];
242                if let Some(href) = href {
243                    attrs.push(("href", href));
244                }
245                if let Some(style) = get_style_string(style) {
246                    attrs.push(("style", style));
247                }
248                if let Some(type_) = type_ {
249                    attrs.push(("type", type_));
250                }
251                attrs
252            }, children),
253            Tag::Blockquote { style, children } => ("blockquote", match get_style_string(style) {
254                Some(style) => vec![("style", style)],
255                None => vec![],
256            }, children),
257            Tag::Body { style, xml_lang, children } => ("body", {
258                let mut attrs = vec![];
259                if let Some(style) = get_style_string(style) {
260                    attrs.push(("style", style));
261                }
262                if let Some(xml_lang) = xml_lang {
263                    attrs.push(("xml:lang", xml_lang));
264                }
265                attrs
266            }, children),
267            Tag::Br => ("br", vec![], vec![]),
268            Tag::Cite { style, children } => ("cite", match get_style_string(style) {
269                Some(style) => vec![("style", style)],
270                None => vec![],
271            }, children),
272            Tag::Em { children } => ("em", vec![], children),
273            Tag::Img { src, alt } => {
274                let mut attrs = vec![];
275                if let Some(src) = src {
276                    attrs.push(("src", src));
277                }
278                if let Some(alt) = alt {
279                    attrs.push(("alt", alt));
280                }
281                ("img", attrs, vec![])
282            },
283            Tag::Li { style, children } => ("li", match get_style_string(style) {
284                Some(style) => vec![("style", style)],
285                None => vec![],
286            }, children),
287            Tag::Ol { style, children } => ("ol", match get_style_string(style) {
288                Some(style) => vec![("style", style)],
289                None => vec![],
290            }, children),
291            Tag::P { style, children } => ("p", match get_style_string(style) {
292                Some(style) => vec![("style", style)],
293                None => vec![],
294            }, children),
295            Tag::Span { style, children } => ("span", match get_style_string(style) {
296                Some(style) => vec![("style", style)],
297                None => vec![],
298            }, children),
299            Tag::Strong { children } => ("strong", vec![], children),
300            Tag::Ul { style, children } => ("ul", match get_style_string(style) {
301                Some(style) => vec![("style", style)],
302                None => vec![],
303            }, children),
304            Tag::Unknown(children) => return Element::builder("unknown").ns(ns::XHTML).append(children_to_nodes(children)).build(),
305        };
306        let mut builder = Element::builder(name)
307            .ns(ns::XHTML)
308            .append(children_to_nodes(children));
309        for (key, value) in attrs {
310            builder = builder.attr(key, value);
311        }
312        builder.build()
313    }
314}
315
316fn children_to_nodes(children: Vec<Child>) -> Vec<Node> {
317    children.into_iter().map(|child| match child {
318        Child::Tag(tag) => Node::Element(Element::from(tag)),
319        Child::Text(text) => Node::Text(text),
320    }).collect::<Vec<_>>()
321}
322
323fn children_to_html(children: Vec<Child>) -> String {
324    children.into_iter().map(|child| child.to_html()).collect::<Vec<_>>().concat()
325}
326
327fn write_attr(attr: Option<String>, name: &str) -> String {
328    match attr {
329        Some(attr) => format!(" {}='{}'", name, attr),
330        None => String::new(),
331    }
332}
333
334fn parse_css(style: Option<&str>) -> Css {
335    let mut properties = vec![];
336    if let Some(style) = style {
337        // TODO: make that parser a bit more resilient to things.
338        for part in style.split(";") {
339            let mut part = part.splitn(2, ":").map(|a| a.to_string()).collect::<Vec<_>>();
340            let key = part.pop().unwrap();
341            let value = part.pop().unwrap();
342            properties.push(Property { key, value });
343        }
344    }
345    properties
346}
347
348#[cfg(test)]
349mod tests {
350    use super::*;
351
352    #[cfg(target_pointer_width = "32")]
353    #[test]
354    #[ignore]
355    fn test_size() {
356        assert_size!(XhtmlIm, 0);
357        assert_size!(Child, 0);
358        assert_size!(Tag, 0);
359    }
360
361    #[cfg(target_pointer_width = "64")]
362    #[test]
363    fn test_size() {
364        assert_size!(XhtmlIm, 56);
365        assert_size!(Child, 112);
366        assert_size!(Tag, 104);
367    }
368
369    #[test]
370    fn test_empty() {
371        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'/>"
372            .parse()
373            .unwrap();
374        let xhtml = XhtmlIm::try_from(elem).unwrap();
375        assert_eq!(xhtml.bodies.len(), 0);
376
377        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'/></html>"
378            .parse()
379            .unwrap();
380        let xhtml = XhtmlIm::try_from(elem).unwrap();
381        assert_eq!(xhtml.bodies.len(), 1);
382
383        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im' xmlns:html='http://www.w3.org/1999/xhtml'><html:body xml:lang='fr'/><html:body xml:lang='en'/></html>"
384            .parse()
385            .unwrap();
386        let xhtml = XhtmlIm::try_from(elem).unwrap();
387        assert_eq!(xhtml.bodies.len(), 2);
388    }
389
390    #[test]
391    fn invalid_two_same_langs() {
392        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im' xmlns:html='http://www.w3.org/1999/xhtml'><html:body/><html:body/></html>"
393            .parse()
394            .unwrap();
395        let error = XhtmlIm::try_from(elem).unwrap_err();
396        let message = match error {
397            Error::ParseError(string) => string,
398            _ => panic!(),
399        };
400        assert_eq!(message, "Two identical language bodies found in XHTML-IM.");
401    }
402
403    #[test]
404    fn test_tag() {
405        let elem: Element = "<body xmlns='http://www.w3.org/1999/xhtml'/>"
406            .parse()
407            .unwrap();
408        let body = Tag::try_from(elem).unwrap();
409        match body {
410            Tag::Body { style: _, xml_lang: _, children } => assert_eq!(children.len(), 0),
411            _ => panic!(),
412        }
413
414        let elem: Element = "<body xmlns='http://www.w3.org/1999/xhtml'><p>Hello world!</p></body>"
415            .parse()
416            .unwrap();
417        let body = Tag::try_from(elem).unwrap();
418        let mut children = match body {
419            Tag::Body { style, xml_lang, children } => {
420                assert_eq!(style.len(), 0);
421                assert_eq!(xml_lang, None);
422                assert_eq!(children.len(), 1);
423                children
424            },
425            _ => panic!(),
426        };
427        let p = match children.pop() {
428            Some(Child::Tag(tag)) => tag,
429            _ => panic!(),
430        };
431        let mut children = match p {
432            Tag::P { style, children } => {
433                assert_eq!(style.len(), 0);
434                assert_eq!(children.len(), 1);
435                children
436            },
437            _ => panic!(),
438        };
439        let text = match children.pop() {
440            Some(Child::Text(text)) => text,
441            _ => panic!(),
442        };
443        assert_eq!(text, "Hello world!");
444    }
445
446    #[test]
447    fn test_unknown_element() {
448        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'><coucou>Hello world!</coucou></body></html>"
449            .parse()
450            .unwrap();
451        let xhtml_im = XhtmlIm::try_from(elem).unwrap();
452        let html = xhtml_im.to_html();
453        assert_eq!(html, "Hello world!");
454    }
455
456    #[test]
457    fn test_generate_html() {
458        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'><p>Hello world!</p></body></html>"
459            .parse()
460            .unwrap();
461        let xhtml_im = XhtmlIm::try_from(elem).unwrap();
462        let html = xhtml_im.to_html();
463        assert_eq!(html, "<p>Hello world!</p>");
464
465        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'><p>Hello <strong>world</strong>!</p></body></html>"
466            .parse()
467            .unwrap();
468        let xhtml_im = XhtmlIm::try_from(elem).unwrap();
469        let html = xhtml_im.to_html();
470        assert_eq!(html, "<p>Hello <strong>world</strong>!</p>");
471    }
472
473    #[test]
474    fn generate_tree() {
475        let world = "world".to_string();
476
477        Body { style: vec![], xml_lang: Some("en".to_string()), children: vec![
478            Child::Tag(Tag::P { style: vec![], children: vec![
479                Child::Text("Hello ".to_string()),
480                Child::Tag(Tag::Strong { children: vec![
481                    Child::Text(world),
482                ] }),
483                Child::Text("!".to_string()),
484            ] }),
485        ] };
486    }
487}