1// Copyright (c) 2019 Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7use crate::message::MessagePayload;
8use crate::ns;
9use crate::util::error::Error;
10use minidom::{Element, Node};
11use std::collections::HashMap;
12
13// TODO: Use a proper lang type.
14type Lang = String;
15
16/// Container for formatted text.
17#[derive(Debug, Clone)]
18pub struct XhtmlIm {
19 /// Map of language to body element.
20 bodies: HashMap<Lang, Body>,
21}
22
23impl XhtmlIm {
24 /// Serialise formatted text to HTML.
25 pub fn into_html(self) -> String {
26 let mut html = Vec::new();
27 // TODO: use the best language instead.
28 for (lang, body) in self.bodies {
29 if lang.is_empty() {
30 assert!(body.xml_lang.is_none());
31 } else {
32 assert_eq!(Some(lang), body.xml_lang);
33 }
34 for tag in body.children {
35 html.push(tag.into_html());
36 }
37 break;
38 }
39 html.concat()
40 }
41
42 /// Removes all unknown elements.
43 fn flatten(self) -> XhtmlIm {
44 let mut bodies = HashMap::new();
45 for (lang, body) in self.bodies {
46 let children = body.children.into_iter().fold(vec![], |mut acc, child| {
47 match child {
48 Child::Tag(Tag::Unknown(children)) => acc.extend(children),
49 any => acc.push(any),
50 }
51 acc
52 });
53 let body = Body { children, ..body };
54 bodies.insert(lang, body);
55 }
56 XhtmlIm { bodies }
57 }
58}
59
60impl MessagePayload for XhtmlIm {}
61
62impl TryFrom<Element> for XhtmlIm {
63 type Error = Error;
64
65 fn try_from(elem: Element) -> Result<XhtmlIm, Error> {
66 check_self!(elem, "html", XHTML_IM);
67 check_no_attributes!(elem, "html");
68
69 let mut bodies = HashMap::new();
70 for child in elem.children() {
71 if child.is("body", ns::XHTML) {
72 let child = child.clone();
73 let lang = child.attr("xml:lang").unwrap_or("").to_string();
74 let body = Body::try_from(child)?;
75 match bodies.insert(lang, body) {
76 None => (),
77 Some(_) => {
78 return Err(Error::ParseError(
79 "Two identical language bodies found in XHTML-IM.",
80 ))
81 }
82 }
83 } else {
84 return Err(Error::ParseError("Unknown element in XHTML-IM."));
85 }
86 }
87
88 Ok(XhtmlIm { bodies }.flatten())
89 }
90}
91
92impl From<XhtmlIm> for Element {
93 fn from(wrapper: XhtmlIm) -> Element {
94 Element::builder("html", ns::XHTML_IM)
95 .append_all(wrapper.bodies.into_iter().map(|(lang, body)| {
96 if lang.is_empty() {
97 assert!(body.xml_lang.is_none());
98 } else {
99 assert_eq!(Some(lang), body.xml_lang);
100 }
101 Element::from(body)
102 }))
103 .build()
104 }
105}
106
107#[derive(Debug, Clone)]
108enum Child {
109 Tag(Tag),
110 Text(String),
111}
112
113impl Child {
114 fn into_html(self) -> String {
115 match self {
116 Child::Tag(tag) => tag.into_html(),
117 Child::Text(text) => text,
118 }
119 }
120}
121
122#[derive(Debug, Clone)]
123struct Property {
124 key: String,
125 value: String,
126}
127
128type Css = Vec<Property>;
129
130fn get_style_string(style: Css) -> Option<String> {
131 let mut result = vec![];
132 for Property { key, value } in style {
133 result.push(format!("{}: {}", key, value));
134 }
135 if result.is_empty() {
136 return None;
137 }
138 Some(result.join("; "))
139}
140
141#[derive(Debug, Clone)]
142struct Body {
143 style: Css,
144 xml_lang: Option<String>,
145 children: Vec<Child>,
146}
147
148impl TryFrom<Element> for Body {
149 type Error = Error;
150
151 fn try_from(elem: Element) -> Result<Body, Error> {
152 let mut children = vec![];
153 for child in elem.nodes() {
154 match child {
155 Node::Element(child) => children.push(Child::Tag(Tag::try_from(child.clone())?)),
156 Node::Text(text) => children.push(Child::Text(text.clone())),
157 }
158 }
159
160 Ok(Body {
161 style: parse_css(elem.attr("style")),
162 xml_lang: elem.attr("xml:lang").map(|xml_lang| xml_lang.to_string()),
163 children,
164 })
165 }
166}
167
168impl From<Body> for Element {
169 fn from(body: Body) -> Element {
170 Element::builder("body", ns::XHTML)
171 .attr("style", get_style_string(body.style))
172 .attr("xml:lang", body.xml_lang)
173 .append_all(children_to_nodes(body.children))
174 .build()
175 }
176}
177
178#[derive(Debug, Clone)]
179enum Tag {
180 A {
181 href: Option<String>,
182 style: Css,
183 type_: Option<String>,
184 children: Vec<Child>,
185 },
186 Blockquote {
187 style: Css,
188 children: Vec<Child>,
189 },
190 Br,
191 Cite {
192 style: Css,
193 children: Vec<Child>,
194 },
195 Em {
196 children: Vec<Child>,
197 },
198 Img {
199 src: Option<String>,
200 alt: Option<String>,
201 }, // TODO: height, width, style
202 Li {
203 style: Css,
204 children: Vec<Child>,
205 },
206 Ol {
207 style: Css,
208 children: Vec<Child>,
209 },
210 P {
211 style: Css,
212 children: Vec<Child>,
213 },
214 Span {
215 style: Css,
216 children: Vec<Child>,
217 },
218 Strong {
219 children: Vec<Child>,
220 },
221 Ul {
222 style: Css,
223 children: Vec<Child>,
224 },
225 Unknown(Vec<Child>),
226}
227
228impl Tag {
229 fn into_html(self) -> String {
230 match self {
231 Tag::A {
232 href,
233 style,
234 type_,
235 children,
236 } => {
237 let href = write_attr(href, "href");
238 let style = write_attr(get_style_string(style), "style");
239 let type_ = write_attr(type_, "type");
240 format!(
241 "<a{}{}{}>{}</a>",
242 href,
243 style,
244 type_,
245 children_to_html(children)
246 )
247 }
248 Tag::Blockquote { style, children } => {
249 let style = write_attr(get_style_string(style), "style");
250 format!(
251 "<blockquote{}>{}</blockquote>",
252 style,
253 children_to_html(children)
254 )
255 }
256 Tag::Br => String::from("<br>"),
257 Tag::Cite { style, children } => {
258 let style = write_attr(get_style_string(style), "style");
259 format!("<cite{}>{}</cite>", style, children_to_html(children))
260 }
261 Tag::Em { children } => format!("<em>{}</em>", children_to_html(children)),
262 Tag::Img { src, alt } => {
263 let src = write_attr(src, "src");
264 let alt = write_attr(alt, "alt");
265 format!("<img{}{}>", src, alt)
266 }
267 Tag::Li { style, children } => {
268 let style = write_attr(get_style_string(style), "style");
269 format!("<li{}>{}</li>", style, children_to_html(children))
270 }
271 Tag::Ol { style, children } => {
272 let style = write_attr(get_style_string(style), "style");
273 format!("<ol{}>{}</ol>", style, children_to_html(children))
274 }
275 Tag::P { style, children } => {
276 let style = write_attr(get_style_string(style), "style");
277 format!("<p{}>{}</p>", style, children_to_html(children))
278 }
279 Tag::Span { style, children } => {
280 let style = write_attr(get_style_string(style), "style");
281 format!("<span{}>{}</span>", style, children_to_html(children))
282 }
283 Tag::Strong { children } => format!("<strong>{}</strong>", children_to_html(children)),
284 Tag::Ul { style, children } => {
285 let style = write_attr(get_style_string(style), "style");
286 format!("<ul{}>{}</ul>", style, children_to_html(children))
287 }
288 Tag::Unknown(_) => {
289 panic!("No unknown element should be present in XHTML-IM after parsing.")
290 }
291 }
292 }
293}
294
295impl TryFrom<Element> for Tag {
296 type Error = Error;
297
298 fn try_from(elem: Element) -> Result<Tag, Error> {
299 let mut children = vec![];
300 for child in elem.nodes() {
301 match child {
302 Node::Element(child) => children.push(Child::Tag(Tag::try_from(child.clone())?)),
303 Node::Text(text) => children.push(Child::Text(text.clone())),
304 }
305 }
306
307 Ok(match elem.name() {
308 "a" => Tag::A {
309 href: elem.attr("href").map(|href| href.to_string()),
310 style: parse_css(elem.attr("style")),
311 type_: elem.attr("type").map(|type_| type_.to_string()),
312 children,
313 },
314 "blockquote" => Tag::Blockquote {
315 style: parse_css(elem.attr("style")),
316 children,
317 },
318 "br" => Tag::Br,
319 "cite" => Tag::Cite {
320 style: parse_css(elem.attr("style")),
321 children,
322 },
323 "em" => Tag::Em { children },
324 "img" => Tag::Img {
325 src: elem.attr("src").map(|src| src.to_string()),
326 alt: elem.attr("alt").map(|alt| alt.to_string()),
327 },
328 "li" => Tag::Li {
329 style: parse_css(elem.attr("style")),
330 children,
331 },
332 "ol" => Tag::Ol {
333 style: parse_css(elem.attr("style")),
334 children,
335 },
336 "p" => Tag::P {
337 style: parse_css(elem.attr("style")),
338 children,
339 },
340 "span" => Tag::Span {
341 style: parse_css(elem.attr("style")),
342 children,
343 },
344 "strong" => Tag::Strong { children },
345 "ul" => Tag::Ul {
346 style: parse_css(elem.attr("style")),
347 children,
348 },
349 _ => Tag::Unknown(children),
350 })
351 }
352}
353
354impl From<Tag> for Element {
355 fn from(tag: Tag) -> Element {
356 let (name, attrs, children) = match tag {
357 Tag::A {
358 href,
359 style,
360 type_,
361 children,
362 } => (
363 "a",
364 {
365 let mut attrs = vec![];
366 if let Some(href) = href {
367 attrs.push(("href", href));
368 }
369 if let Some(style) = get_style_string(style) {
370 attrs.push(("style", style));
371 }
372 if let Some(type_) = type_ {
373 attrs.push(("type", type_));
374 }
375 attrs
376 },
377 children,
378 ),
379 Tag::Blockquote { style, children } => (
380 "blockquote",
381 match get_style_string(style) {
382 Some(style) => vec![("style", style)],
383 None => vec![],
384 },
385 children,
386 ),
387 Tag::Br => ("br", vec![], vec![]),
388 Tag::Cite { style, children } => (
389 "cite",
390 match get_style_string(style) {
391 Some(style) => vec![("style", style)],
392 None => vec![],
393 },
394 children,
395 ),
396 Tag::Em { children } => ("em", vec![], children),
397 Tag::Img { src, alt } => {
398 let mut attrs = vec![];
399 if let Some(src) = src {
400 attrs.push(("src", src));
401 }
402 if let Some(alt) = alt {
403 attrs.push(("alt", alt));
404 }
405 ("img", attrs, vec![])
406 }
407 Tag::Li { style, children } => (
408 "li",
409 match get_style_string(style) {
410 Some(style) => vec![("style", style)],
411 None => vec![],
412 },
413 children,
414 ),
415 Tag::Ol { style, children } => (
416 "ol",
417 match get_style_string(style) {
418 Some(style) => vec![("style", style)],
419 None => vec![],
420 },
421 children,
422 ),
423 Tag::P { style, children } => (
424 "p",
425 match get_style_string(style) {
426 Some(style) => vec![("style", style)],
427 None => vec![],
428 },
429 children,
430 ),
431 Tag::Span { style, children } => (
432 "span",
433 match get_style_string(style) {
434 Some(style) => vec![("style", style)],
435 None => vec![],
436 },
437 children,
438 ),
439 Tag::Strong { children } => ("strong", vec![], children),
440 Tag::Ul { style, children } => (
441 "ul",
442 match get_style_string(style) {
443 Some(style) => vec![("style", style)],
444 None => vec![],
445 },
446 children,
447 ),
448 Tag::Unknown(_) => {
449 panic!("No unknown element should be present in XHTML-IM after parsing.")
450 }
451 };
452 let mut builder = Element::builder(name, ns::XHTML).append_all(children_to_nodes(children));
453 for (key, value) in attrs {
454 builder = builder.attr(key, value);
455 }
456 builder.build()
457 }
458}
459
460fn children_to_nodes(children: Vec<Child>) -> impl IntoIterator<Item = Node> {
461 children.into_iter().map(|child| match child {
462 Child::Tag(tag) => Node::Element(Element::from(tag)),
463 Child::Text(text) => Node::Text(text),
464 })
465}
466
467fn children_to_html(children: Vec<Child>) -> String {
468 children
469 .into_iter()
470 .map(|child| child.into_html())
471 .collect::<Vec<_>>()
472 .concat()
473}
474
475fn write_attr(attr: Option<String>, name: &str) -> String {
476 match attr {
477 Some(attr) => format!(" {}='{}'", name, attr),
478 None => String::new(),
479 }
480}
481
482fn parse_css(style: Option<&str>) -> Css {
483 let mut properties = vec![];
484 if let Some(style) = style {
485 // TODO: make that parser a bit more resilient to things.
486 for part in style.split(';') {
487 let mut part = part
488 .splitn(2, ':')
489 .map(|a| a.to_string())
490 .collect::<Vec<_>>();
491 let key = part.pop().unwrap();
492 let value = part.pop().unwrap();
493 properties.push(Property { key, value });
494 }
495 }
496 properties
497}
498
499#[cfg(test)]
500mod tests {
501 use super::*;
502
503 #[cfg(target_pointer_width = "32")]
504 #[test]
505 fn test_size() {
506 assert_size!(XhtmlIm, 32);
507 assert_size!(Child, 48);
508 assert_size!(Tag, 48);
509 }
510
511 #[cfg(target_pointer_width = "64")]
512 #[test]
513 fn test_size() {
514 assert_size!(XhtmlIm, 48);
515 assert_size!(Child, 96);
516 assert_size!(Tag, 96);
517 }
518
519 #[test]
520 fn test_empty() {
521 let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'/>"
522 .parse()
523 .unwrap();
524 let xhtml = XhtmlIm::try_from(elem).unwrap();
525 assert_eq!(xhtml.bodies.len(), 0);
526
527 let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'/></html>"
528 .parse()
529 .unwrap();
530 let xhtml = XhtmlIm::try_from(elem).unwrap();
531 assert_eq!(xhtml.bodies.len(), 1);
532
533 let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im' xmlns:html='http://www.w3.org/1999/xhtml'><html:body xml:lang='fr'/><html:body xml:lang='en'/></html>"
534 .parse()
535 .unwrap();
536 let xhtml = XhtmlIm::try_from(elem).unwrap();
537 assert_eq!(xhtml.bodies.len(), 2);
538 }
539
540 #[test]
541 fn invalid_two_same_langs() {
542 let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im' xmlns:html='http://www.w3.org/1999/xhtml'><html:body/><html:body/></html>"
543 .parse()
544 .unwrap();
545 let error = XhtmlIm::try_from(elem).unwrap_err();
546 let message = match error {
547 Error::ParseError(string) => string,
548 _ => panic!(),
549 };
550 assert_eq!(message, "Two identical language bodies found in XHTML-IM.");
551 }
552
553 #[test]
554 fn test_tag() {
555 let elem: Element = "<body xmlns='http://www.w3.org/1999/xhtml'/>"
556 .parse()
557 .unwrap();
558 let body = Body::try_from(elem).unwrap();
559 assert_eq!(body.children.len(), 0);
560
561 let elem: Element = "<body xmlns='http://www.w3.org/1999/xhtml'><p>Hello world!</p></body>"
562 .parse()
563 .unwrap();
564 let mut body = Body::try_from(elem).unwrap();
565 assert_eq!(body.style.len(), 0);
566 assert_eq!(body.xml_lang, None);
567 assert_eq!(body.children.len(), 1);
568 let p = match body.children.pop() {
569 Some(Child::Tag(tag)) => tag,
570 _ => panic!(),
571 };
572 let mut children = match p {
573 Tag::P { style, children } => {
574 assert_eq!(style.len(), 0);
575 assert_eq!(children.len(), 1);
576 children
577 }
578 _ => panic!(),
579 };
580 let text = match children.pop() {
581 Some(Child::Text(text)) => text,
582 _ => panic!(),
583 };
584 assert_eq!(text, "Hello world!");
585 }
586
587 #[test]
588 fn test_unknown_element() {
589 let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'><coucou>Hello world!</coucou></body></html>"
590 .parse()
591 .unwrap();
592 let parsed = XhtmlIm::try_from(elem).unwrap();
593 let parsed2 = parsed.clone();
594 let html = parsed.into_html();
595 assert_eq!(html, "Hello world!");
596
597 let elem = Element::from(parsed2);
598 assert_eq!(String::from(&elem), "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'>Hello world!</body></html>");
599 }
600
601 #[test]
602 fn test_generate_html() {
603 let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'><p>Hello world!</p></body></html>"
604 .parse()
605 .unwrap();
606 let xhtml_im = XhtmlIm::try_from(elem).unwrap();
607 let html = xhtml_im.into_html();
608 assert_eq!(html, "<p>Hello world!</p>");
609
610 let elem: Element = "<html xmlns='http://jabber.org/protocol/xhtml-im'><body xmlns='http://www.w3.org/1999/xhtml'><p>Hello <strong>world</strong>!</p></body></html>"
611 .parse()
612 .unwrap();
613 let xhtml_im = XhtmlIm::try_from(elem).unwrap();
614 let html = xhtml_im.into_html();
615 assert_eq!(html, "<p>Hello <strong>world</strong>!</p>");
616 }
617
618 #[test]
619 fn generate_tree() {
620 let world = "world".to_string();
621
622 Body {
623 style: vec![],
624 xml_lang: Some("en".to_string()),
625 children: vec![Child::Tag(Tag::P {
626 style: vec![],
627 children: vec![
628 Child::Text("Hello ".to_string()),
629 Child::Tag(Tag::Strong {
630 children: vec![Child::Text(world)],
631 }),
632 Child::Text("!".to_string()),
633 ],
634 })],
635 };
636 }
637}