1// Copyright (c) 2019 Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7use crate::util::error::Error;
8use crate::message::MessagePayload;
9use crate::ns;
10use minidom::{Element, Node};
11use std::convert::TryFrom;
12use std::collections::HashMap;
13
14// TODO: Use a proper lang type.
15type Lang = String;
16
17/// Container for formatted text.
18#[derive(Debug, Clone)]
19pub struct XhtmlIm {
20 /// Map of language to body element.
21 bodies: HashMap<Lang, Tag>,
22}
23
24impl XhtmlIm {
25 /// Serialise formatted text to HTML.
26 pub fn to_html(self) -> String {
27 let mut html = Vec::new();
28 // TODO: use the best language instead.
29 for (lang, body) in self.bodies {
30 if let Tag::Body { style: _, xml_lang, children } = body {
31 if lang.is_empty() {
32 assert!(xml_lang.is_none());
33 } else {
34 assert_eq!(Some(lang), xml_lang);
35 }
36 for tag in children {
37 html.push(tag.to_html());
38 }
39 break;
40 } else {
41 unreachable!();
42 }
43 }
44 html.concat()
45 }
46}
47
48impl MessagePayload for XhtmlIm {}
49
50impl TryFrom<Element> for XhtmlIm {
51 type Error = Error;
52
53 fn try_from(elem: Element) -> Result<XhtmlIm, Error> {
54 check_self!(elem, "html", XHTML_IM);
55 check_no_attributes!(elem, "html");
56
57 let mut bodies = HashMap::new();
58 for child in elem.children() {
59 if child.is("body", ns::XHTML) {
60 let child = child.clone();
61 let lang = match child.attr("xml:lang") {
62 Some(lang) => lang,
63 None => "",
64 }.to_string();
65 let body = Tag::try_from(child)?;
66 match bodies.insert(lang, body) {
67 None => (),
68 Some(_) => return Err(Error::ParseError("Two identical language bodies found in XHTML-IM."))
69 }
70 } else {
71 return Err(Error::ParseError("Unknown element in XHTML-IM."));
72 }
73 }
74
75 Ok(XhtmlIm { bodies })
76 }
77}
78
79impl From<XhtmlIm> for Element {
80 fn from(wrapper: XhtmlIm) -> Element {
81 Element::builder("html")
82 .ns(ns::XHTML_IM)
83 .append(wrapper.bodies.into_iter().map(|(ref lang, ref body)| {
84 if let Tag::Body { style, xml_lang, children } = body {
85 assert_eq!(Some(lang), xml_lang.as_ref());
86 Element::builder("body")
87 .ns(ns::XHTML_IM)
88 .attr("style", get_style_string(style.clone()))
89 .attr("xml:lang", xml_lang.clone())
90 .append(children_to_nodes(children.clone()))
91 } else {
92 unreachable!();
93 }
94 }).collect::<Vec<_>>())
95 .build()
96 }
97}
98
99#[derive(Debug, Clone)]
100enum Child {
101 Tag(Tag),
102 Text(String),
103}
104
105impl Child {
106 fn to_html(self) -> String {
107 match self {
108 Child::Tag(tag) => tag.to_html(),
109 Child::Text(text) => text,
110 }
111 }
112}
113
114#[derive(Debug, Clone)]
115struct Property {
116 key: String,
117 value: String,
118}
119
120type Css = Vec<Property>;
121
122fn get_style_string(style: Css) -> Option<String> {
123 let mut result = vec![];
124 for Property { key, value } in style {
125 result.push(format!("{}: {}", key, value));
126 }
127 if result.is_empty() {
128 return None;
129 }
130 Some(result.join("; "))
131}
132
133#[derive(Debug, Clone)]
134enum Tag {
135 A { href: Option<String>, style: Css, type_: Option<String>, children: Vec<Child> },
136 Blockquote { style: Css, children: Vec<Child> },
137 Body { style: Css, xml_lang: Option<String>, children: Vec<Child> },
138 Br,
139 Cite { style: Css, children: Vec<Child> },
140 Em { children: Vec<Child> },
141 Img { src: Option<String>, alt: Option<String> }, // TODO: height, width, style
142 Li { style: Css, children: Vec<Child> },
143 Ol { style: Css, children: Vec<Child> },
144 P { style: Css, children: Vec<Child> },
145 Span { style: Css, children: Vec<Child> },
146 Strong { children: Vec<Child> },
147 Ul { style: Css, children: Vec<Child> },
148 Unknown(Vec<Child>),
149}
150
151impl Tag {
152 fn to_html(self) -> String {
153 match self {
154 Tag::A { href, style, type_, children } => {
155 let href = write_attr(href, "href");
156 let style = write_attr(get_style_string(style), "style");
157 let type_ = write_attr(type_, "type");
158 format!("<a{}{}{}>{}</a>", href, style, type_, children_to_html(children))
159 },
160 Tag::Blockquote { style, children } => {
161 let style = write_attr(get_style_string(style), "style");
162 format!("<blockquote{}>{}</blockquote>", style, children_to_html(children))
163 },
164 Tag::Body { style, xml_lang: _, children } => {
165 let style = write_attr(get_style_string(style), "style");
166 format!("<body{}>{}</body>", style, children_to_html(children))
167 },
168 Tag::Br => String::from("<br>"),
169 Tag::Cite { style, children } => {
170 let style = write_attr(get_style_string(style), "style");
171 format!("<cite{}>{}</cite>", style, children_to_html(children))
172 },
173 Tag::Em { children } => format!("<em>{}</em>", children_to_html(children)),
174 Tag::Img { src, alt } => {
175 let src = write_attr(src, "src");
176 let alt = write_attr(alt, "alt");
177 format!("<img{}{}>", src, alt)
178 }
179 Tag::Li { style, children } => {
180 let style = write_attr(get_style_string(style), "style");
181 format!("<li{}>{}</li>", style, children_to_html(children))
182 }
183 Tag::Ol { style, children } => {
184 let style = write_attr(get_style_string(style), "style");
185 format!("<ol{}>{}</ol>", style, children_to_html(children))
186 }
187 Tag::P { style, children } => {
188 let style = write_attr(get_style_string(style), "style");
189 format!("<p{}>{}</p>", style, children_to_html(children))
190 }
191 Tag::Span { style, children } => {
192 let style = write_attr(get_style_string(style), "style");
193 format!("<span{}>{}</span>", style, children_to_html(children))
194 }
195 Tag::Strong { children } => format!("<strong>{}</strong>", children.into_iter().map(|child| child.to_html()).collect::<Vec<_>>().join("")),
196 Tag::Ul { style, children } => {
197 let style = write_attr(get_style_string(style), "style");
198 format!("<ul{}>{}</ul>", style, children_to_html(children))
199 }
200 Tag::Unknown(children) => children_to_html(children),
201 }
202 }
203}
204
205impl TryFrom<Element> for Tag {
206 type Error = Error;
207
208 fn try_from(elem: Element) -> Result<Tag, Error> {
209 let mut children = vec![];
210 for child in elem.nodes() {
211 match child {
212 Node::Element(child) => children.push(Child::Tag(Tag::try_from(child.clone())?)),
213 Node::Text(text) => children.push(Child::Text(text.clone())),
214 Node::Comment(_) => unimplemented!() // XXX: remove!
215 }
216 }
217
218 Ok(match elem.name() {
219 "a" => Tag::A { href: elem.attr("href").map(|href| href.to_string()), style: parse_css(elem.attr("style")), type_: elem.attr("type").map(|type_| type_.to_string()), children },
220 "blockquote" => Tag::Blockquote { style: parse_css(elem.attr("style")), children },
221 "body" => Tag::Body { style: parse_css(elem.attr("style")), xml_lang: elem.attr("xml:lang").map(|xml_lang| xml_lang.to_string()), children },
222 "br" => Tag::Br,
223 "cite" => Tag::Cite { style: parse_css(elem.attr("style")), children },
224 "em" => Tag::Em { children },
225 "img" => Tag::Img { src: elem.attr("src").map(|src| src.to_string()), alt: elem.attr("alt").map(|alt| alt.to_string()) },
226 "li" => Tag::Li { style: parse_css(elem.attr("style")), children },
227 "ol" => Tag::Ol { style: parse_css(elem.attr("style")), children },
228 "p" => Tag::P { style: parse_css(elem.attr("style")), children },
229 "span" => Tag::Span { style: parse_css(elem.attr("style")), children },
230 "strong" => Tag::Strong { children },
231 "ul" => Tag::Ul { style: parse_css(elem.attr("style")), children },
232 _ => Tag::Unknown(children),
233 })
234 }
235}
236
237impl From<Tag> for Element {
238 fn from(tag: Tag) -> Element {
239 let (name, attrs, children) = match tag {
240 Tag::A { href, style, type_, children } => ("a", {
241 let mut attrs = vec![];
242 if let Some(href) = href {
243 attrs.push(("href", href));
244 }
245 if let Some(style) = get_style_string(style) {
246 attrs.push(("style", style));
247 }
248 if let Some(type_) = type_ {
249 attrs.push(("type", type_));
250 }
251 attrs
252 }, children),
253 Tag::Blockquote { style, children } => ("blockquote", match get_style_string(style) {
254 Some(style) => vec![("style", style)],
255 None => vec![],
256 }, children),
257 Tag::Body { style, xml_lang, children } => ("body", {
258 let mut attrs = vec![];
259 if let Some(style) = get_style_string(style) {
260 attrs.push(("style", style));
261 }
262 if let Some(xml_lang) = xml_lang {
263 attrs.push(("xml:lang", xml_lang));
264 }
265 attrs
266 }, children),
267 Tag::Br => ("br", vec![], vec![]),
268 Tag::Cite { style, children } => ("cite", match get_style_string(style) {
269 Some(style) => vec![("style", style)],
270 None => vec![],
271 }, children),
272 Tag::Em { children } => ("em", vec![], children),
273 Tag::Img { src, alt } => {
274 let mut attrs = vec![];
275 if let Some(src) = src {
276 attrs.push(("src", src));
277 }
278 if let Some(alt) = alt {
279 attrs.push(("alt", alt));
280 }
281 ("img", attrs, vec![])
282 },
283 Tag::Li { style, children } => ("li", match get_style_string(style) {
284 Some(style) => vec![("style", style)],
285 None => vec![],
286 }, children),
287 Tag::Ol { style, children } => ("ol", match get_style_string(style) {
288 Some(style) => vec![("style", style)],
289 None => vec![],
290 }, children),
291 Tag::P { style, children } => ("p", match get_style_string(style) {
292 Some(style) => vec![("style", style)],
293 None => vec![],
294 }, children),
295 Tag::Span { style, children } => ("span", match get_style_string(style) {
296 Some(style) => vec![("style", style)],
297 None => vec![],
298 }, children),
299 Tag::Strong { children } => ("strong", vec![], children),
300 Tag::Ul { style, children } => ("ul", match get_style_string(style) {
301 Some(style) => vec![("style", style)],
302 None => vec![],
303 }, children),
304 Tag::Unknown(children) => return Element::builder("unknown").ns(ns::XHTML).append(children_to_nodes(children)).build(),
305 };
306 let mut builder = Element::builder(name)
307 .ns(ns::XHTML)
308 .append(children_to_nodes(children));
309 for (key, value) in attrs {
310 builder = builder.attr(key, value);
311 }
312 builder.build()
313 }
314}
315
316fn children_to_nodes(children: Vec<Child>) -> Vec<Node> {
317 children.into_iter().map(|child| match child {
318 Child::Tag(tag) => Node::Element(Element::from(tag)),
319 Child::Text(text) => Node::Text(text),
320 }).collect::<Vec<_>>()
321}
322
323fn children_to_html(children: Vec<Child>) -> String {
324 children.into_iter().map(|child| child.to_html()).collect::<Vec<_>>().concat()
325}
326
327fn write_attr(attr: Option<String>, name: &str) -> String {
328 match attr {
329 Some(attr) => format!(" {}='{}'", name, attr),
330 None => String::new(),
331 }
332}
333
334fn parse_css(style: Option<&str>) -> Css {
335 let mut properties = vec![];
336 if let Some(style) = style {
337 // TODO: make that parser a bit more resilient to things.
338 for part in style.split(";") {
339 let mut part = part.splitn(2, ":").map(|a| a.to_string()).collect::<Vec<_>>();
340 let key = part.pop().unwrap();
341 let value = part.pop().unwrap();
342 properties.push(Property { key, value });
343 }
344 }
345 properties
346}
347
348#[cfg(test)]
349mod tests {
350 use super::*;
351
352 #[cfg(target_pointer_width = "32")]
353 #[test]
354 #[ignore]
355 fn test_size() {
356 assert_size!(XhtmlIm, 0);
357 assert_size!(Child, 0);
358 assert_size!(Tag, 0);
359 }
360
361 #[cfg(target_pointer_width = "64")]
362 #[test]
363 fn test_size() {
364 assert_size!(XhtmlIm, 56);
365 assert_size!(Child, 112);
366 assert_size!(Tag, 104);
367 }
368
369 #[test]
370 fn test_empty() {
371 let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'/>"
372 .parse()
373 .unwrap();
374 let xhtml = XhtmlIm::try_from(elem).unwrap();
375 assert_eq!(xhtml.bodies.len(), 0);
376
377 let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'/></html>"
378 .parse()
379 .unwrap();
380 let xhtml = XhtmlIm::try_from(elem).unwrap();
381 assert_eq!(xhtml.bodies.len(), 1);
382
383 let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im' xmlns:html='http://www.w3.org/1999/xhtml'><html:body xml:lang='fr'/><html:body xml:lang='en'/></html>"
384 .parse()
385 .unwrap();
386 let xhtml = XhtmlIm::try_from(elem).unwrap();
387 assert_eq!(xhtml.bodies.len(), 2);
388 }
389
390 #[test]
391 fn invalid_two_same_langs() {
392 let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im' xmlns:html='http://www.w3.org/1999/xhtml'><html:body/><html:body/></html>"
393 .parse()
394 .unwrap();
395 let error = XhtmlIm::try_from(elem).unwrap_err();
396 let message = match error {
397 Error::ParseError(string) => string,
398 _ => panic!(),
399 };
400 assert_eq!(message, "Two identical language bodies found in XHTML-IM.");
401 }
402
403 #[test]
404 fn test_tag() {
405 let elem: Element = "<body xmlns='http://www.w3.org/1999/xhtml'/>"
406 .parse()
407 .unwrap();
408 let body = Tag::try_from(elem).unwrap();
409 match body {
410 Tag::Body { style: _, xml_lang: _, children } => assert_eq!(children.len(), 0),
411 _ => panic!(),
412 }
413
414 let elem: Element = "<body xmlns='http://www.w3.org/1999/xhtml'><p>Hello world!</p></body>"
415 .parse()
416 .unwrap();
417 let body = Tag::try_from(elem).unwrap();
418 let mut children = match body {
419 Tag::Body { style, xml_lang, children } => {
420 assert_eq!(style.len(), 0);
421 assert_eq!(xml_lang, None);
422 assert_eq!(children.len(), 1);
423 children
424 },
425 _ => panic!(),
426 };
427 let p = match children.pop() {
428 Some(Child::Tag(tag)) => tag,
429 _ => panic!(),
430 };
431 let mut children = match p {
432 Tag::P { style, children } => {
433 assert_eq!(style.len(), 0);
434 assert_eq!(children.len(), 1);
435 children
436 },
437 _ => panic!(),
438 };
439 let text = match children.pop() {
440 Some(Child::Text(text)) => text,
441 _ => panic!(),
442 };
443 assert_eq!(text, "Hello world!");
444 }
445
446 #[test]
447 fn test_unknown_element() {
448 let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'><coucou>Hello world!</coucou></body></html>"
449 .parse()
450 .unwrap();
451 let xhtml_im = XhtmlIm::try_from(elem).unwrap();
452 let html = xhtml_im.to_html();
453 assert_eq!(html, "Hello world!");
454 }
455
456 #[test]
457 fn test_generate_html() {
458 let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'><p>Hello world!</p></body></html>"
459 .parse()
460 .unwrap();
461 let xhtml_im = XhtmlIm::try_from(elem).unwrap();
462 let html = xhtml_im.to_html();
463 assert_eq!(html, "<p>Hello world!</p>");
464
465 let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'><p>Hello <strong>world</strong>!</p></body></html>"
466 .parse()
467 .unwrap();
468 let xhtml_im = XhtmlIm::try_from(elem).unwrap();
469 let html = xhtml_im.to_html();
470 assert_eq!(html, "<p>Hello <strong>world</strong>!</p>");
471 }
472
473 #[test]
474 fn generate_tree() {
475 let world = "world".to_string();
476
477 Body { style: vec![], xml_lang: Some("en".to_string()), children: vec![
478 Child::Tag(Tag::P { style: vec![], children: vec![
479 Child::Text("Hello ".to_string()),
480 Child::Tag(Tag::Strong { children: vec![
481 Child::Text(world),
482 ] }),
483 Child::Text("!".to_string()),
484 ] }),
485 ] };
486 }
487}