1// Copyright (c) 2019 Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7use crate::util::error::Error;
8use crate::message::MessagePayload;
9use crate::ns;
10use minidom::{Element, Node};
11use std::convert::TryFrom;
12use std::collections::HashMap;
13
14// TODO: Use a proper lang type.
15type Lang = String;
16
17/// Container for formatted text.
18#[derive(Debug, Clone)]
19pub struct XhtmlIm {
20 /// Map of language to body element.
21 bodies: HashMap<Lang, Body>,
22}
23
24impl XhtmlIm {
25 /// Serialise formatted text to HTML.
26 pub fn to_html(self) -> String {
27 let mut html = Vec::new();
28 // TODO: use the best language instead.
29 for (lang, body) in self.bodies {
30 if lang.is_empty() {
31 assert!(body.xml_lang.is_none());
32 } else {
33 assert_eq!(Some(lang), body.xml_lang);
34 }
35 for tag in body.children {
36 html.push(tag.to_html());
37 }
38 break;
39 }
40 html.concat()
41 }
42
43 /// Removes all unknown elements.
44 fn flatten(self) -> XhtmlIm {
45 let mut bodies = HashMap::new();
46 for (lang, body) in self.bodies {
47 let children = body.children.into_iter().fold(vec![], |mut acc, child| {
48 match child {
49 Child::Tag(Tag::Unknown(children)) => acc.extend(children),
50 any => acc.push(any),
51 }
52 acc
53 });
54 let body = Body {
55 children,
56 ..body
57 };
58 bodies.insert(lang, body);
59 }
60 XhtmlIm {
61 bodies,
62 }
63 }
64}
65
66impl MessagePayload for XhtmlIm {}
67
68impl TryFrom<Element> for XhtmlIm {
69 type Error = Error;
70
71 fn try_from(elem: Element) -> Result<XhtmlIm, Error> {
72 check_self!(elem, "html", XHTML_IM);
73 check_no_attributes!(elem, "html");
74
75 let mut bodies = HashMap::new();
76 for child in elem.children() {
77 if child.is("body", ns::XHTML) {
78 let child = child.clone();
79 let lang = match child.attr("xml:lang") {
80 Some(lang) => lang,
81 None => "",
82 }.to_string();
83 let body = Body::try_from(child)?;
84 match bodies.insert(lang, body) {
85 None => (),
86 Some(_) => return Err(Error::ParseError("Two identical language bodies found in XHTML-IM."))
87 }
88 } else {
89 return Err(Error::ParseError("Unknown element in XHTML-IM."));
90 }
91 }
92
93 Ok(XhtmlIm { bodies }.flatten())
94 }
95}
96
97impl From<XhtmlIm> for Element {
98 fn from(wrapper: XhtmlIm) -> Element {
99 Element::builder("html")
100 .ns(ns::XHTML_IM)
101 .append_all(wrapper.bodies.into_iter().map(|(lang, body)| {
102 if lang.is_empty() {
103 assert!(body.xml_lang.is_none());
104 } else {
105 assert_eq!(Some(lang), body.xml_lang);
106 }
107 Element::from(body)
108 }))
109 .build()
110 }
111}
112
113#[derive(Debug, Clone)]
114enum Child {
115 Tag(Tag),
116 Text(String),
117}
118
119impl Child {
120 fn to_html(self) -> String {
121 match self {
122 Child::Tag(tag) => tag.to_html(),
123 Child::Text(text) => text,
124 }
125 }
126}
127
128#[derive(Debug, Clone)]
129struct Property {
130 key: String,
131 value: String,
132}
133
134type Css = Vec<Property>;
135
136fn get_style_string(style: Css) -> Option<String> {
137 let mut result = vec![];
138 for Property { key, value } in style {
139 result.push(format!("{}: {}", key, value));
140 }
141 if result.is_empty() {
142 return None;
143 }
144 Some(result.join("; "))
145}
146
147#[derive(Debug, Clone)]
148struct Body {
149 style: Css,
150 xml_lang: Option<String>,
151 children: Vec<Child>,
152}
153
154impl TryFrom<Element> for Body {
155 type Error = Error;
156
157 fn try_from(elem: Element) -> Result<Body, Error> {
158 let mut children = vec![];
159 for child in elem.nodes() {
160 match child {
161 Node::Element(child) => children.push(Child::Tag(Tag::try_from(child.clone())?)),
162 Node::Text(text) => children.push(Child::Text(text.clone())),
163 Node::Comment(_) => unimplemented!() // XXX: remove!
164 }
165 }
166
167 Ok(Body { style: parse_css(elem.attr("style")), xml_lang: elem.attr("xml:lang").map(|xml_lang| xml_lang.to_string()), children })
168 }
169}
170
171impl From<Body> for Element {
172 fn from(body: Body) -> Element {
173 Element::builder("body")
174 .ns(ns::XHTML)
175 .attr("style", get_style_string(body.style))
176 .attr("xml:lang", body.xml_lang)
177 .append_all(children_to_nodes(body.children))
178 .build()
179 }
180}
181
182#[derive(Debug, Clone)]
183enum Tag {
184 A { href: Option<String>, style: Css, type_: Option<String>, children: Vec<Child> },
185 Blockquote { style: Css, children: Vec<Child> },
186 Br,
187 Cite { style: Css, children: Vec<Child> },
188 Em { children: Vec<Child> },
189 Img { src: Option<String>, alt: Option<String> }, // TODO: height, width, style
190 Li { style: Css, children: Vec<Child> },
191 Ol { style: Css, children: Vec<Child> },
192 P { style: Css, children: Vec<Child> },
193 Span { style: Css, children: Vec<Child> },
194 Strong { children: Vec<Child> },
195 Ul { style: Css, children: Vec<Child> },
196 Unknown(Vec<Child>),
197}
198
199impl Tag {
200 fn to_html(self) -> String {
201 match self {
202 Tag::A { href, style, type_, children } => {
203 let href = write_attr(href, "href");
204 let style = write_attr(get_style_string(style), "style");
205 let type_ = write_attr(type_, "type");
206 format!("<a{}{}{}>{}</a>", href, style, type_, children_to_html(children))
207 },
208 Tag::Blockquote { style, children } => {
209 let style = write_attr(get_style_string(style), "style");
210 format!("<blockquote{}>{}</blockquote>", style, children_to_html(children))
211 },
212 Tag::Br => String::from("<br>"),
213 Tag::Cite { style, children } => {
214 let style = write_attr(get_style_string(style), "style");
215 format!("<cite{}>{}</cite>", style, children_to_html(children))
216 },
217 Tag::Em { children } => format!("<em>{}</em>", children_to_html(children)),
218 Tag::Img { src, alt } => {
219 let src = write_attr(src, "src");
220 let alt = write_attr(alt, "alt");
221 format!("<img{}{}>", src, alt)
222 }
223 Tag::Li { style, children } => {
224 let style = write_attr(get_style_string(style), "style");
225 format!("<li{}>{}</li>", style, children_to_html(children))
226 }
227 Tag::Ol { style, children } => {
228 let style = write_attr(get_style_string(style), "style");
229 format!("<ol{}>{}</ol>", style, children_to_html(children))
230 }
231 Tag::P { style, children } => {
232 let style = write_attr(get_style_string(style), "style");
233 format!("<p{}>{}</p>", style, children_to_html(children))
234 }
235 Tag::Span { style, children } => {
236 let style = write_attr(get_style_string(style), "style");
237 format!("<span{}>{}</span>", style, children_to_html(children))
238 }
239 Tag::Strong { children } => format!("<strong>{}</strong>", children_to_html(children)),
240 Tag::Ul { style, children } => {
241 let style = write_attr(get_style_string(style), "style");
242 format!("<ul{}>{}</ul>", style, children_to_html(children))
243 }
244 Tag::Unknown(_) => panic!("No unknown element should be present in XHTML-IM after parsing."),
245 }
246 }
247}
248
249impl TryFrom<Element> for Tag {
250 type Error = Error;
251
252 fn try_from(elem: Element) -> Result<Tag, Error> {
253 let mut children = vec![];
254 for child in elem.nodes() {
255 match child {
256 Node::Element(child) => children.push(Child::Tag(Tag::try_from(child.clone())?)),
257 Node::Text(text) => children.push(Child::Text(text.clone())),
258 Node::Comment(_) => unimplemented!() // XXX: remove!
259 }
260 }
261
262 Ok(match elem.name() {
263 "a" => Tag::A { href: elem.attr("href").map(|href| href.to_string()), style: parse_css(elem.attr("style")), type_: elem.attr("type").map(|type_| type_.to_string()), children },
264 "blockquote" => Tag::Blockquote { style: parse_css(elem.attr("style")), children },
265 "br" => Tag::Br,
266 "cite" => Tag::Cite { style: parse_css(elem.attr("style")), children },
267 "em" => Tag::Em { children },
268 "img" => Tag::Img { src: elem.attr("src").map(|src| src.to_string()), alt: elem.attr("alt").map(|alt| alt.to_string()) },
269 "li" => Tag::Li { style: parse_css(elem.attr("style")), children },
270 "ol" => Tag::Ol { style: parse_css(elem.attr("style")), children },
271 "p" => Tag::P { style: parse_css(elem.attr("style")), children },
272 "span" => Tag::Span { style: parse_css(elem.attr("style")), children },
273 "strong" => Tag::Strong { children },
274 "ul" => Tag::Ul { style: parse_css(elem.attr("style")), children },
275 _ => Tag::Unknown(children),
276 })
277 }
278}
279
280impl From<Tag> for Element {
281 fn from(tag: Tag) -> Element {
282 let (name, attrs, children) = match tag {
283 Tag::A { href, style, type_, children } => ("a", {
284 let mut attrs = vec![];
285 if let Some(href) = href {
286 attrs.push(("href", href));
287 }
288 if let Some(style) = get_style_string(style) {
289 attrs.push(("style", style));
290 }
291 if let Some(type_) = type_ {
292 attrs.push(("type", type_));
293 }
294 attrs
295 }, children),
296 Tag::Blockquote { style, children } => ("blockquote", match get_style_string(style) {
297 Some(style) => vec![("style", style)],
298 None => vec![],
299 }, children),
300 Tag::Br => ("br", vec![], vec![]),
301 Tag::Cite { style, children } => ("cite", match get_style_string(style) {
302 Some(style) => vec![("style", style)],
303 None => vec![],
304 }, children),
305 Tag::Em { children } => ("em", vec![], children),
306 Tag::Img { src, alt } => {
307 let mut attrs = vec![];
308 if let Some(src) = src {
309 attrs.push(("src", src));
310 }
311 if let Some(alt) = alt {
312 attrs.push(("alt", alt));
313 }
314 ("img", attrs, vec![])
315 },
316 Tag::Li { style, children } => ("li", match get_style_string(style) {
317 Some(style) => vec![("style", style)],
318 None => vec![],
319 }, children),
320 Tag::Ol { style, children } => ("ol", match get_style_string(style) {
321 Some(style) => vec![("style", style)],
322 None => vec![],
323 }, children),
324 Tag::P { style, children } => ("p", match get_style_string(style) {
325 Some(style) => vec![("style", style)],
326 None => vec![],
327 }, children),
328 Tag::Span { style, children } => ("span", match get_style_string(style) {
329 Some(style) => vec![("style", style)],
330 None => vec![],
331 }, children),
332 Tag::Strong { children } => ("strong", vec![], children),
333 Tag::Ul { style, children } => ("ul", match get_style_string(style) {
334 Some(style) => vec![("style", style)],
335 None => vec![],
336 }, children),
337 Tag::Unknown(_) => panic!("No unknown element should be present in XHTML-IM after parsing."),
338 };
339 let mut builder = Element::builder(name)
340 .ns(ns::XHTML)
341 .append_all(children_to_nodes(children));
342 for (key, value) in attrs {
343 builder = builder.attr(key, value);
344 }
345 builder.build()
346 }
347}
348
349fn children_to_nodes(children: Vec<Child>) -> impl IntoIterator<Item = Node> {
350 children.into_iter().map(|child| match child {
351 Child::Tag(tag) => Node::Element(Element::from(tag)),
352 Child::Text(text) => Node::Text(text),
353 })
354}
355
356fn children_to_html(children: Vec<Child>) -> String {
357 children.into_iter().map(|child| child.to_html()).collect::<Vec<_>>().concat()
358}
359
360fn write_attr(attr: Option<String>, name: &str) -> String {
361 match attr {
362 Some(attr) => format!(" {}='{}'", name, attr),
363 None => String::new(),
364 }
365}
366
367fn parse_css(style: Option<&str>) -> Css {
368 let mut properties = vec![];
369 if let Some(style) = style {
370 // TODO: make that parser a bit more resilient to things.
371 for part in style.split(";") {
372 let mut part = part.splitn(2, ":").map(|a| a.to_string()).collect::<Vec<_>>();
373 let key = part.pop().unwrap();
374 let value = part.pop().unwrap();
375 properties.push(Property { key, value });
376 }
377 }
378 properties
379}
380
381#[cfg(test)]
382mod tests {
383 use super::*;
384
385 #[cfg(target_pointer_width = "32")]
386 #[test]
387 #[ignore]
388 fn test_size() {
389 assert_size!(XhtmlIm, 0);
390 assert_size!(Child, 0);
391 assert_size!(Tag, 0);
392 }
393
394 #[cfg(target_pointer_width = "64")]
395 #[test]
396 fn test_size() {
397 assert_size!(XhtmlIm, 56);
398 assert_size!(Child, 112);
399 assert_size!(Tag, 104);
400 }
401
402 #[test]
403 fn test_empty() {
404 let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'/>"
405 .parse()
406 .unwrap();
407 let xhtml = XhtmlIm::try_from(elem).unwrap();
408 assert_eq!(xhtml.bodies.len(), 0);
409
410 let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'/></html>"
411 .parse()
412 .unwrap();
413 let xhtml = XhtmlIm::try_from(elem).unwrap();
414 assert_eq!(xhtml.bodies.len(), 1);
415
416 let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im' xmlns:html='http://www.w3.org/1999/xhtml'><html:body xml:lang='fr'/><html:body xml:lang='en'/></html>"
417 .parse()
418 .unwrap();
419 let xhtml = XhtmlIm::try_from(elem).unwrap();
420 assert_eq!(xhtml.bodies.len(), 2);
421 }
422
423 #[test]
424 fn invalid_two_same_langs() {
425 let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im' xmlns:html='http://www.w3.org/1999/xhtml'><html:body/><html:body/></html>"
426 .parse()
427 .unwrap();
428 let error = XhtmlIm::try_from(elem).unwrap_err();
429 let message = match error {
430 Error::ParseError(string) => string,
431 _ => panic!(),
432 };
433 assert_eq!(message, "Two identical language bodies found in XHTML-IM.");
434 }
435
436 #[test]
437 fn test_tag() {
438 let elem: Element = "<body xmlns='http://www.w3.org/1999/xhtml'/>"
439 .parse()
440 .unwrap();
441 let body = Body::try_from(elem).unwrap();
442 assert_eq!(body.children.len(), 0);
443
444 let elem: Element = "<body xmlns='http://www.w3.org/1999/xhtml'><p>Hello world!</p></body>"
445 .parse()
446 .unwrap();
447 let mut body = Body::try_from(elem).unwrap();
448 assert_eq!(body.style.len(), 0);
449 assert_eq!(body.xml_lang, None);
450 assert_eq!(body.children.len(), 1);
451 let p = match body.children.pop() {
452 Some(Child::Tag(tag)) => tag,
453 _ => panic!(),
454 };
455 let mut children = match p {
456 Tag::P { style, children } => {
457 assert_eq!(style.len(), 0);
458 assert_eq!(children.len(), 1);
459 children
460 },
461 _ => panic!(),
462 };
463 let text = match children.pop() {
464 Some(Child::Text(text)) => text,
465 _ => panic!(),
466 };
467 assert_eq!(text, "Hello world!");
468 }
469
470 #[test]
471 fn test_unknown_element() {
472 let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'><coucou>Hello world!</coucou></body></html>"
473 .parse()
474 .unwrap();
475 let parsed = XhtmlIm::try_from(elem).unwrap();
476 let parsed2 = parsed.clone();
477 let html = parsed.to_html();
478 assert_eq!(html, "Hello world!");
479
480 let elem = Element::from(parsed2);
481 assert_eq!(String::from(&elem), "<?xml version=\"1.0\" encoding=\"utf-8\"?><html xmlns=\"http://jabber.org/protocol/xhtml-im\"><body xmlns=\"http://www.w3.org/1999/xhtml\">Hello world!</body></html>");
482 }
483
484 #[test]
485 fn test_generate_html() {
486 let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'><p>Hello world!</p></body></html>"
487 .parse()
488 .unwrap();
489 let xhtml_im = XhtmlIm::try_from(elem).unwrap();
490 let html = xhtml_im.to_html();
491 assert_eq!(html, "<p>Hello world!</p>");
492
493 let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'><p>Hello <strong>world</strong>!</p></body></html>"
494 .parse()
495 .unwrap();
496 let xhtml_im = XhtmlIm::try_from(elem).unwrap();
497 let html = xhtml_im.to_html();
498 assert_eq!(html, "<p>Hello <strong>world</strong>!</p>");
499 }
500
501 #[test]
502 fn generate_tree() {
503 let world = "world".to_string();
504
505 Body { style: vec![], xml_lang: Some("en".to_string()), children: vec![
506 Child::Tag(Tag::P { style: vec![], children: vec![
507 Child::Text("Hello ".to_string()),
508 Child::Tag(Tag::Strong { children: vec![
509 Child::Text(world),
510 ] }),
511 Child::Text("!".to_string()),
512 ] }),
513 ] };
514 }
515}