xhtml.rs

  1// Copyright (c) 2019 Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
  2//
  3// This Source Code Form is subject to the terms of the Mozilla Public
  4// License, v. 2.0. If a copy of the MPL was not distributed with this
  5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
  6
  7use crate::message::MessagePayload;
  8use crate::ns;
  9use crate::util::error::Error;
 10use minidom::{Element, Node};
 11use std::collections::HashMap;
 12
 13// TODO: Use a proper lang type.
 14type Lang = String;
 15
 16/// Container for formatted text.
 17#[derive(Debug, Clone)]
 18pub struct XhtmlIm {
 19    /// Map of language to body element.
 20    bodies: HashMap<Lang, Body>,
 21}
 22
 23impl XhtmlIm {
 24    /// Serialise formatted text to HTML.
 25    pub fn into_html(self) -> String {
 26        let mut html = Vec::new();
 27        // TODO: use the best language instead.
 28        for (lang, body) in self.bodies {
 29            if lang.is_empty() {
 30                assert!(body.xml_lang.is_none());
 31            } else {
 32                assert_eq!(Some(lang), body.xml_lang);
 33            }
 34            for tag in body.children {
 35                html.push(tag.into_html());
 36            }
 37            break;
 38        }
 39        html.concat()
 40    }
 41
 42    /// Removes all unknown elements.
 43    fn flatten(self) -> XhtmlIm {
 44        let mut bodies = HashMap::new();
 45        for (lang, body) in self.bodies {
 46            let children = body.children.into_iter().fold(vec![], |mut acc, child| {
 47                match child {
 48                    Child::Tag(Tag::Unknown(children)) => acc.extend(children),
 49                    any => acc.push(any),
 50                }
 51                acc
 52            });
 53            let body = Body { children, ..body };
 54            bodies.insert(lang, body);
 55        }
 56        XhtmlIm { bodies }
 57    }
 58}
 59
 60impl MessagePayload for XhtmlIm {}
 61
 62impl TryFrom<Element> for XhtmlIm {
 63    type Error = Error;
 64
 65    fn try_from(elem: Element) -> Result<XhtmlIm, Error> {
 66        check_self!(elem, "html", XHTML_IM);
 67        check_no_attributes!(elem, "html");
 68
 69        let mut bodies = HashMap::new();
 70        for child in elem.children() {
 71            if child.is("body", ns::XHTML) {
 72                let child = child.clone();
 73                let lang = child.attr("xml:lang").unwrap_or("").to_string();
 74                let body = Body::try_from(child)?;
 75                match bodies.insert(lang, body) {
 76                    None => (),
 77                    Some(_) => {
 78                        return Err(Error::ParseError(
 79                            "Two identical language bodies found in XHTML-IM.",
 80                        ))
 81                    }
 82                }
 83            } else {
 84                return Err(Error::ParseError("Unknown element in XHTML-IM."));
 85            }
 86        }
 87
 88        Ok(XhtmlIm { bodies }.flatten())
 89    }
 90}
 91
 92impl From<XhtmlIm> for Element {
 93    fn from(wrapper: XhtmlIm) -> Element {
 94        Element::builder("html", ns::XHTML_IM)
 95            .append_all(wrapper.bodies.into_iter().map(|(lang, body)| {
 96                if lang.is_empty() {
 97                    assert!(body.xml_lang.is_none());
 98                } else {
 99                    assert_eq!(Some(lang), body.xml_lang);
100                }
101                Element::from(body)
102            }))
103            .build()
104    }
105}
106
107#[derive(Debug, Clone)]
108enum Child {
109    Tag(Tag),
110    Text(String),
111}
112
113impl Child {
114    fn into_html(self) -> String {
115        match self {
116            Child::Tag(tag) => tag.into_html(),
117            Child::Text(text) => text,
118        }
119    }
120}
121
122#[derive(Debug, Clone)]
123struct Property {
124    key: String,
125    value: String,
126}
127
128type Css = Vec<Property>;
129
130fn get_style_string(style: Css) -> Option<String> {
131    let mut result = vec![];
132    for Property { key, value } in style {
133        result.push(format!("{}: {}", key, value));
134    }
135    if result.is_empty() {
136        return None;
137    }
138    Some(result.join("; "))
139}
140
141#[derive(Debug, Clone)]
142struct Body {
143    style: Css,
144    xml_lang: Option<String>,
145    children: Vec<Child>,
146}
147
148impl TryFrom<Element> for Body {
149    type Error = Error;
150
151    fn try_from(elem: Element) -> Result<Body, Error> {
152        let mut children = vec![];
153        for child in elem.nodes() {
154            match child {
155                Node::Element(child) => children.push(Child::Tag(Tag::try_from(child.clone())?)),
156                Node::Text(text) => children.push(Child::Text(text.clone())),
157            }
158        }
159
160        Ok(Body {
161            style: parse_css(elem.attr("style")),
162            xml_lang: elem.attr("xml:lang").map(|xml_lang| xml_lang.to_string()),
163            children,
164        })
165    }
166}
167
168impl From<Body> for Element {
169    fn from(body: Body) -> Element {
170        Element::builder("body", ns::XHTML)
171            .attr("style", get_style_string(body.style))
172            .attr("xml:lang", body.xml_lang)
173            .append_all(children_to_nodes(body.children))
174            .build()
175    }
176}
177
178#[derive(Debug, Clone)]
179enum Tag {
180    A {
181        href: Option<String>,
182        style: Css,
183        type_: Option<String>,
184        children: Vec<Child>,
185    },
186    Blockquote {
187        style: Css,
188        children: Vec<Child>,
189    },
190    Br,
191    Cite {
192        style: Css,
193        children: Vec<Child>,
194    },
195    Em {
196        children: Vec<Child>,
197    },
198    Img {
199        src: Option<String>,
200        alt: Option<String>,
201    }, // TODO: height, width, style
202    Li {
203        style: Css,
204        children: Vec<Child>,
205    },
206    Ol {
207        style: Css,
208        children: Vec<Child>,
209    },
210    P {
211        style: Css,
212        children: Vec<Child>,
213    },
214    Span {
215        style: Css,
216        children: Vec<Child>,
217    },
218    Strong {
219        children: Vec<Child>,
220    },
221    Ul {
222        style: Css,
223        children: Vec<Child>,
224    },
225    Unknown(Vec<Child>),
226}
227
228impl Tag {
229    fn into_html(self) -> String {
230        match self {
231            Tag::A {
232                href,
233                style,
234                type_,
235                children,
236            } => {
237                let href = write_attr(href, "href");
238                let style = write_attr(get_style_string(style), "style");
239                let type_ = write_attr(type_, "type");
240                format!(
241                    "<a{}{}{}>{}</a>",
242                    href,
243                    style,
244                    type_,
245                    children_to_html(children)
246                )
247            }
248            Tag::Blockquote { style, children } => {
249                let style = write_attr(get_style_string(style), "style");
250                format!(
251                    "<blockquote{}>{}</blockquote>",
252                    style,
253                    children_to_html(children)
254                )
255            }
256            Tag::Br => String::from("<br>"),
257            Tag::Cite { style, children } => {
258                let style = write_attr(get_style_string(style), "style");
259                format!("<cite{}>{}</cite>", style, children_to_html(children))
260            }
261            Tag::Em { children } => format!("<em>{}</em>", children_to_html(children)),
262            Tag::Img { src, alt } => {
263                let src = write_attr(src, "src");
264                let alt = write_attr(alt, "alt");
265                format!("<img{}{}>", src, alt)
266            }
267            Tag::Li { style, children } => {
268                let style = write_attr(get_style_string(style), "style");
269                format!("<li{}>{}</li>", style, children_to_html(children))
270            }
271            Tag::Ol { style, children } => {
272                let style = write_attr(get_style_string(style), "style");
273                format!("<ol{}>{}</ol>", style, children_to_html(children))
274            }
275            Tag::P { style, children } => {
276                let style = write_attr(get_style_string(style), "style");
277                format!("<p{}>{}</p>", style, children_to_html(children))
278            }
279            Tag::Span { style, children } => {
280                let style = write_attr(get_style_string(style), "style");
281                format!("<span{}>{}</span>", style, children_to_html(children))
282            }
283            Tag::Strong { children } => format!("<strong>{}</strong>", children_to_html(children)),
284            Tag::Ul { style, children } => {
285                let style = write_attr(get_style_string(style), "style");
286                format!("<ul{}>{}</ul>", style, children_to_html(children))
287            }
288            Tag::Unknown(_) => {
289                panic!("No unknown element should be present in XHTML-IM after parsing.")
290            }
291        }
292    }
293}
294
295impl TryFrom<Element> for Tag {
296    type Error = Error;
297
298    fn try_from(elem: Element) -> Result<Tag, Error> {
299        let mut children = vec![];
300        for child in elem.nodes() {
301            match child {
302                Node::Element(child) => children.push(Child::Tag(Tag::try_from(child.clone())?)),
303                Node::Text(text) => children.push(Child::Text(text.clone())),
304            }
305        }
306
307        Ok(match elem.name() {
308            "a" => Tag::A {
309                href: elem.attr("href").map(|href| href.to_string()),
310                style: parse_css(elem.attr("style")),
311                type_: elem.attr("type").map(|type_| type_.to_string()),
312                children,
313            },
314            "blockquote" => Tag::Blockquote {
315                style: parse_css(elem.attr("style")),
316                children,
317            },
318            "br" => Tag::Br,
319            "cite" => Tag::Cite {
320                style: parse_css(elem.attr("style")),
321                children,
322            },
323            "em" => Tag::Em { children },
324            "img" => Tag::Img {
325                src: elem.attr("src").map(|src| src.to_string()),
326                alt: elem.attr("alt").map(|alt| alt.to_string()),
327            },
328            "li" => Tag::Li {
329                style: parse_css(elem.attr("style")),
330                children,
331            },
332            "ol" => Tag::Ol {
333                style: parse_css(elem.attr("style")),
334                children,
335            },
336            "p" => Tag::P {
337                style: parse_css(elem.attr("style")),
338                children,
339            },
340            "span" => Tag::Span {
341                style: parse_css(elem.attr("style")),
342                children,
343            },
344            "strong" => Tag::Strong { children },
345            "ul" => Tag::Ul {
346                style: parse_css(elem.attr("style")),
347                children,
348            },
349            _ => Tag::Unknown(children),
350        })
351    }
352}
353
354impl From<Tag> for Element {
355    fn from(tag: Tag) -> Element {
356        let (name, attrs, children) = match tag {
357            Tag::A {
358                href,
359                style,
360                type_,
361                children,
362            } => (
363                "a",
364                {
365                    let mut attrs = vec![];
366                    if let Some(href) = href {
367                        attrs.push(("href", href));
368                    }
369                    if let Some(style) = get_style_string(style) {
370                        attrs.push(("style", style));
371                    }
372                    if let Some(type_) = type_ {
373                        attrs.push(("type", type_));
374                    }
375                    attrs
376                },
377                children,
378            ),
379            Tag::Blockquote { style, children } => (
380                "blockquote",
381                match get_style_string(style) {
382                    Some(style) => vec![("style", style)],
383                    None => vec![],
384                },
385                children,
386            ),
387            Tag::Br => ("br", vec![], vec![]),
388            Tag::Cite { style, children } => (
389                "cite",
390                match get_style_string(style) {
391                    Some(style) => vec![("style", style)],
392                    None => vec![],
393                },
394                children,
395            ),
396            Tag::Em { children } => ("em", vec![], children),
397            Tag::Img { src, alt } => {
398                let mut attrs = vec![];
399                if let Some(src) = src {
400                    attrs.push(("src", src));
401                }
402                if let Some(alt) = alt {
403                    attrs.push(("alt", alt));
404                }
405                ("img", attrs, vec![])
406            }
407            Tag::Li { style, children } => (
408                "li",
409                match get_style_string(style) {
410                    Some(style) => vec![("style", style)],
411                    None => vec![],
412                },
413                children,
414            ),
415            Tag::Ol { style, children } => (
416                "ol",
417                match get_style_string(style) {
418                    Some(style) => vec![("style", style)],
419                    None => vec![],
420                },
421                children,
422            ),
423            Tag::P { style, children } => (
424                "p",
425                match get_style_string(style) {
426                    Some(style) => vec![("style", style)],
427                    None => vec![],
428                },
429                children,
430            ),
431            Tag::Span { style, children } => (
432                "span",
433                match get_style_string(style) {
434                    Some(style) => vec![("style", style)],
435                    None => vec![],
436                },
437                children,
438            ),
439            Tag::Strong { children } => ("strong", vec![], children),
440            Tag::Ul { style, children } => (
441                "ul",
442                match get_style_string(style) {
443                    Some(style) => vec![("style", style)],
444                    None => vec![],
445                },
446                children,
447            ),
448            Tag::Unknown(_) => {
449                panic!("No unknown element should be present in XHTML-IM after parsing.")
450            }
451        };
452        let mut builder = Element::builder(name, ns::XHTML).append_all(children_to_nodes(children));
453        for (key, value) in attrs {
454            builder = builder.attr(key, value);
455        }
456        builder.build()
457    }
458}
459
460fn children_to_nodes(children: Vec<Child>) -> impl IntoIterator<Item = Node> {
461    children.into_iter().map(|child| match child {
462        Child::Tag(tag) => Node::Element(Element::from(tag)),
463        Child::Text(text) => Node::Text(text),
464    })
465}
466
467fn children_to_html(children: Vec<Child>) -> String {
468    children
469        .into_iter()
470        .map(|child| child.into_html())
471        .collect::<Vec<_>>()
472        .concat()
473}
474
475fn write_attr(attr: Option<String>, name: &str) -> String {
476    match attr {
477        Some(attr) => format!(" {}='{}'", name, attr),
478        None => String::new(),
479    }
480}
481
482fn parse_css(style: Option<&str>) -> Css {
483    let mut properties = vec![];
484    if let Some(style) = style {
485        // TODO: make that parser a bit more resilient to things.
486        for part in style.split(';') {
487            let mut part = part
488                .splitn(2, ':')
489                .map(|a| a.to_string())
490                .collect::<Vec<_>>();
491            let key = part.pop().unwrap();
492            let value = part.pop().unwrap();
493            properties.push(Property { key, value });
494        }
495    }
496    properties
497}
498
499#[cfg(test)]
500mod tests {
501    use super::*;
502
503    #[cfg(target_pointer_width = "32")]
504    #[test]
505    fn test_size() {
506        assert_size!(XhtmlIm, 32);
507        assert_size!(Child, 48);
508        assert_size!(Tag, 48);
509    }
510
511    #[cfg(target_pointer_width = "64")]
512    #[test]
513    fn test_size() {
514        assert_size!(XhtmlIm, 48);
515        assert_size!(Child, 96);
516        assert_size!(Tag, 96);
517    }
518
519    #[test]
520    fn test_empty() {
521        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'/>"
522            .parse()
523            .unwrap();
524        let xhtml = XhtmlIm::try_from(elem).unwrap();
525        assert_eq!(xhtml.bodies.len(), 0);
526
527        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'/></html>"
528            .parse()
529            .unwrap();
530        let xhtml = XhtmlIm::try_from(elem).unwrap();
531        assert_eq!(xhtml.bodies.len(), 1);
532
533        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im' xmlns:html='http://www.w3.org/1999/xhtml'><html:body xml:lang='fr'/><html:body xml:lang='en'/></html>"
534            .parse()
535            .unwrap();
536        let xhtml = XhtmlIm::try_from(elem).unwrap();
537        assert_eq!(xhtml.bodies.len(), 2);
538    }
539
540    #[test]
541    fn invalid_two_same_langs() {
542        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im' xmlns:html='http://www.w3.org/1999/xhtml'><html:body/><html:body/></html>"
543            .parse()
544            .unwrap();
545        let error = XhtmlIm::try_from(elem).unwrap_err();
546        let message = match error {
547            Error::ParseError(string) => string,
548            _ => panic!(),
549        };
550        assert_eq!(message, "Two identical language bodies found in XHTML-IM.");
551    }
552
553    #[test]
554    fn test_tag() {
555        let elem: Element = "<body xmlns='http://www.w3.org/1999/xhtml'/>"
556            .parse()
557            .unwrap();
558        let body = Body::try_from(elem).unwrap();
559        assert_eq!(body.children.len(), 0);
560
561        let elem: Element = "<body xmlns='http://www.w3.org/1999/xhtml'><p>Hello world!</p></body>"
562            .parse()
563            .unwrap();
564        let mut body = Body::try_from(elem).unwrap();
565        assert_eq!(body.style.len(), 0);
566        assert_eq!(body.xml_lang, None);
567        assert_eq!(body.children.len(), 1);
568        let p = match body.children.pop() {
569            Some(Child::Tag(tag)) => tag,
570            _ => panic!(),
571        };
572        let mut children = match p {
573            Tag::P { style, children } => {
574                assert_eq!(style.len(), 0);
575                assert_eq!(children.len(), 1);
576                children
577            }
578            _ => panic!(),
579        };
580        let text = match children.pop() {
581            Some(Child::Text(text)) => text,
582            _ => panic!(),
583        };
584        assert_eq!(text, "Hello world!");
585    }
586
587    #[test]
588    fn test_unknown_element() {
589        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'><coucou>Hello world!</coucou></body></html>"
590            .parse()
591            .unwrap();
592        let parsed = XhtmlIm::try_from(elem).unwrap();
593        let parsed2 = parsed.clone();
594        let html = parsed.into_html();
595        assert_eq!(html, "Hello world!");
596
597        let elem = Element::from(parsed2);
598        assert_eq!(String::from(&elem), "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'>Hello world!</body></html>");
599    }
600
601    #[test]
602    fn test_generate_html() {
603        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'><p>Hello world!</p></body></html>"
604            .parse()
605            .unwrap();
606        let xhtml_im = XhtmlIm::try_from(elem).unwrap();
607        let html = xhtml_im.into_html();
608        assert_eq!(html, "<p>Hello world!</p>");
609
610        let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'><p>Hello <strong>world</strong>!</p></body></html>"
611            .parse()
612            .unwrap();
613        let xhtml_im = XhtmlIm::try_from(elem).unwrap();
614        let html = xhtml_im.into_html();
615        assert_eq!(html, "<p>Hello <strong>world</strong>!</p>");
616    }
617
618    #[test]
619    fn generate_tree() {
620        let world = "world".to_string();
621
622        Body {
623            style: vec![],
624            xml_lang: Some("en".to_string()),
625            children: vec![Child::Tag(Tag::P {
626                style: vec![],
627                children: vec![
628                    Child::Text("Hello ".to_string()),
629                    Child::Tag(Tag::Strong {
630                        children: vec![Child::Text(world)],
631                    }),
632                    Child::Text("!".to_string()),
633                ],
634            })],
635        };
636    }
637}