1#include <stdio.h>
2#include <string.h>
3
4#include "md4c-html.h"
5#include "entity.h"
6
7#if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199409L
8
9 #if defined __GNUC__
10 #define inline __inline__
11 #elif defined _MSC_VER
12 #define inline __inline
13 #else
14 #define inline
15 #endif
16#endif
17
18#ifdef _WIN32
19 #define snprintf _snprintf
20#endif
21
22typedef struct MD_HTML_tag MD_HTML;
23struct MD_HTML_tag {
24 void (*process_output)(const MD_CHAR*, MD_SIZE, void*);
25 void* userdata;
26 unsigned flags;
27 int image_nesting_level;
28 char escape_map[256];
29};
30
31#define NEED_HTML_ESC_FLAG 0x1
32#define NEED_URL_ESC_FLAG 0x2
33
34#define ISDIGIT(ch) ('0' <= (ch) && (ch) <= '9')
35#define ISLOWER(ch) ('a' <= (ch) && (ch) <= 'z')
36#define ISUPPER(ch) ('A' <= (ch) && (ch) <= 'Z')
37#define ISALNUM(ch) (ISLOWER(ch) || ISUPPER(ch) || ISDIGIT(ch))
38
39static inline void
40render_verbatim(MD_HTML* r, const MD_CHAR* text, MD_SIZE size)
41{
42 r->process_output(text, size, r->userdata);
43}
44
45#define RENDER_VERBATIM(r, verbatim) \
46 render_verbatim((r), (verbatim), (MD_SIZE) (strlen(verbatim)))
47
48static void
49render_html_escaped(MD_HTML* r, const MD_CHAR* data, MD_SIZE size)
50{
51 MD_OFFSET beg = 0;
52 MD_OFFSET off = 0;
53
54 #define NEED_HTML_ESC(ch) (r->escape_map[(unsigned char)(ch)] & NEED_HTML_ESC_FLAG)
55
56 while(1) {
57
58 while(off + 3 < size && !NEED_HTML_ESC(data[off+0]) && !NEED_HTML_ESC(data[off+1])
59 && !NEED_HTML_ESC(data[off+2]) && !NEED_HTML_ESC(data[off+3]))
60 off += 4;
61 while(off < size && !NEED_HTML_ESC(data[off]))
62 off++;
63
64 if(off > beg)
65 render_verbatim(r, data + beg, off - beg);
66
67 if(off < size) {
68 switch(data[off]) {
69 case '&': RENDER_VERBATIM(r, "&"); break;
70 case '<': RENDER_VERBATIM(r, "<"); break;
71 case '>': RENDER_VERBATIM(r, ">"); break;
72 case '"': RENDER_VERBATIM(r, """); break;
73 }
74 off++;
75 } else {
76 break;
77 }
78 beg = off;
79 }
80}
81
82static void
83render_url_escaped(MD_HTML* r, const MD_CHAR* data, MD_SIZE size)
84{
85 static const MD_CHAR hex_chars[] = "0123456789ABCDEF";
86 MD_OFFSET beg = 0;
87 MD_OFFSET off = 0;
88
89 #define NEED_URL_ESC(ch) (r->escape_map[(unsigned char)(ch)] & NEED_URL_ESC_FLAG)
90
91 while(1) {
92 while(off < size && !NEED_URL_ESC(data[off]))
93 off++;
94 if(off > beg)
95 render_verbatim(r, data + beg, off - beg);
96
97 if(off < size) {
98 char hex[3];
99
100 switch(data[off]) {
101 case '&': RENDER_VERBATIM(r, "&"); break;
102 default:
103 hex[0] = '%';
104 hex[1] = hex_chars[((unsigned)data[off] >> 4) & 0xf];
105 hex[2] = hex_chars[((unsigned)data[off] >> 0) & 0xf];
106 render_verbatim(r, hex, 3);
107 break;
108 }
109 off++;
110 } else {
111 break;
112 }
113
114 beg = off;
115 }
116}
117
118static unsigned
119hex_val(char ch)
120{
121 if('0' <= ch && ch <= '9')
122 return ch - '0';
123 if('A' <= ch && ch <= 'Z')
124 return ch - 'A' + 10;
125 else
126 return ch - 'a' + 10;
127}
128
129static void
130render_utf8_codepoint(MD_HTML* r, unsigned codepoint,
131 void (*fn_append)(MD_HTML*, const MD_CHAR*, MD_SIZE))
132{
133 static const MD_CHAR utf8_replacement_char[] = { (char)0xef, (char)0xbf, (char)0xbd };
134
135 unsigned char utf8[4];
136 size_t n;
137
138 if(codepoint <= 0x7f) {
139 n = 1;
140 utf8[0] = codepoint;
141 } else if(codepoint <= 0x7ff) {
142 n = 2;
143 utf8[0] = 0xc0 | ((codepoint >> 6) & 0x1f);
144 utf8[1] = 0x80 + ((codepoint >> 0) & 0x3f);
145 } else if(codepoint <= 0xffff) {
146 n = 3;
147 utf8[0] = 0xe0 | ((codepoint >> 12) & 0xf);
148 utf8[1] = 0x80 + ((codepoint >> 6) & 0x3f);
149 utf8[2] = 0x80 + ((codepoint >> 0) & 0x3f);
150 } else {
151 n = 4;
152 utf8[0] = 0xf0 | ((codepoint >> 18) & 0x7);
153 utf8[1] = 0x80 + ((codepoint >> 12) & 0x3f);
154 utf8[2] = 0x80 + ((codepoint >> 6) & 0x3f);
155 utf8[3] = 0x80 + ((codepoint >> 0) & 0x3f);
156 }
157
158 if(0 < codepoint && codepoint <= 0x10ffff)
159 fn_append(r, (char*)utf8, (MD_SIZE)n);
160 else
161 fn_append(r, utf8_replacement_char, 3);
162}
163
164static void
165render_entity(MD_HTML* r, const MD_CHAR* text, MD_SIZE size,
166 void (*fn_append)(MD_HTML*, const MD_CHAR*, MD_SIZE))
167{
168 if(r->flags & MD_HTML_FLAG_VERBATIM_ENTITIES) {
169 render_verbatim(r, text, size);
170 return;
171 }
172
173 if(size > 3 && text[1] == '#') {
174 unsigned codepoint = 0;
175
176 if(text[2] == 'x' || text[2] == 'X') {
177
178 MD_SIZE i;
179 for(i = 3; i < size-1; i++)
180 codepoint = 16 * codepoint + hex_val(text[i]);
181 } else {
182
183 MD_SIZE i;
184 for(i = 2; i < size-1; i++)
185 codepoint = 10 * codepoint + (text[i] - '0');
186 }
187
188 render_utf8_codepoint(r, codepoint, fn_append);
189 return;
190 } else {
191
192 const ENTITY* ent;
193
194 ent = entity_lookup(text, size);
195 if(ent != NULL) {
196 render_utf8_codepoint(r, ent->codepoints[0], fn_append);
197 if(ent->codepoints[1])
198 render_utf8_codepoint(r, ent->codepoints[1], fn_append);
199 return;
200 }
201 }
202
203 fn_append(r, text, size);
204}
205
206static void
207render_attribute(MD_HTML* r, const MD_ATTRIBUTE* attr,
208 void (*fn_append)(MD_HTML*, const MD_CHAR*, MD_SIZE))
209{
210 int i;
211
212 for(i = 0; attr->substr_offsets[i] < attr->size; i++) {
213 MD_TEXTTYPE type = attr->substr_types[i];
214 MD_OFFSET off = attr->substr_offsets[i];
215 MD_SIZE size = attr->substr_offsets[i+1] - off;
216 const MD_CHAR* text = attr->text + off;
217
218 switch(type) {
219 case MD_TEXT_NULLCHAR: render_utf8_codepoint(r, 0x0000, render_verbatim); break;
220 case MD_TEXT_ENTITY: render_entity(r, text, size, fn_append); break;
221 default: fn_append(r, text, size); break;
222 }
223 }
224}
225
226static void
227render_open_ol_block(MD_HTML* r, const MD_BLOCK_OL_DETAIL* det)
228{
229 char buf[64];
230
231 if(det->start == 1) {
232 RENDER_VERBATIM(r, "<ol>\n");
233 return;
234 }
235
236 snprintf(buf, sizeof(buf), "<ol start=\"%u\">\n", det->start);
237 RENDER_VERBATIM(r, buf);
238}
239
240static void
241render_open_li_block(MD_HTML* r, const MD_BLOCK_LI_DETAIL* det)
242{
243 if(det->is_task) {
244 RENDER_VERBATIM(r, "<li class=\"task-list-item\">"
245 "<input type=\"checkbox\" class=\"task-list-item-checkbox\" disabled");
246 if(det->task_mark == 'x' || det->task_mark == 'X')
247 RENDER_VERBATIM(r, " checked");
248 RENDER_VERBATIM(r, ">");
249 } else {
250 RENDER_VERBATIM(r, "<li>");
251 }
252}
253
254static void
255render_open_code_block(MD_HTML* r, const MD_BLOCK_CODE_DETAIL* det)
256{
257 RENDER_VERBATIM(r, "<pre><code");
258
259 if(det->lang.text != NULL) {
260 RENDER_VERBATIM(r, " class=\"language-");
261 render_attribute(r, &det->lang, render_html_escaped);
262 RENDER_VERBATIM(r, "\"");
263 }
264
265 RENDER_VERBATIM(r, ">");
266}
267
268static void
269render_open_td_block(MD_HTML* r, const MD_CHAR* cell_type, const MD_BLOCK_TD_DETAIL* det)
270{
271 RENDER_VERBATIM(r, "<");
272 RENDER_VERBATIM(r, cell_type);
273
274 switch(det->align) {
275 case MD_ALIGN_LEFT: RENDER_VERBATIM(r, " align=\"left\">"); break;
276 case MD_ALIGN_CENTER: RENDER_VERBATIM(r, " align=\"center\">"); break;
277 case MD_ALIGN_RIGHT: RENDER_VERBATIM(r, " align=\"right\">"); break;
278 default: RENDER_VERBATIM(r, ">"); break;
279 }
280}
281
282static void
283render_open_a_span(MD_HTML* r, const MD_SPAN_A_DETAIL* det)
284{
285 RENDER_VERBATIM(r, "<a href=\"");
286 render_attribute(r, &det->href, render_url_escaped);
287
288 if(det->title.text != NULL) {
289 RENDER_VERBATIM(r, "\" title=\"");
290 render_attribute(r, &det->title, render_html_escaped);
291 }
292
293 RENDER_VERBATIM(r, "\">");
294}
295
296static void
297render_open_img_span(MD_HTML* r, const MD_SPAN_IMG_DETAIL* det)
298{
299 RENDER_VERBATIM(r, "<img src=\"");
300 render_attribute(r, &det->src, render_url_escaped);
301
302 RENDER_VERBATIM(r, "\" alt=\"");
303}
304
305static void
306render_close_img_span(MD_HTML* r, const MD_SPAN_IMG_DETAIL* det)
307{
308 if(det->title.text != NULL) {
309 RENDER_VERBATIM(r, "\" title=\"");
310 render_attribute(r, &det->title, render_html_escaped);
311 }
312
313 RENDER_VERBATIM(r, (r->flags & MD_HTML_FLAG_XHTML) ? "\" />" : "\">");
314}
315
316static void
317render_open_wikilink_span(MD_HTML* r, const MD_SPAN_WIKILINK_DETAIL* det)
318{
319 RENDER_VERBATIM(r, "<x-wikilink data-target=\"");
320 render_attribute(r, &det->target, render_html_escaped);
321
322 RENDER_VERBATIM(r, "\">");
323}
324
325static int
326enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
327{
328 static const MD_CHAR* head[6] = { "<h1>", "<h2>", "<h3>", "<h4>", "<h5>", "<h6>" };
329 MD_HTML* r = (MD_HTML*) userdata;
330
331 switch(type) {
332 case MD_BLOCK_DOC: break;
333 case MD_BLOCK_QUOTE: RENDER_VERBATIM(r, "<blockquote>\n"); break;
334 case MD_BLOCK_UL: RENDER_VERBATIM(r, "<ul>\n"); break;
335 case MD_BLOCK_OL: render_open_ol_block(r, (const MD_BLOCK_OL_DETAIL*)detail); break;
336 case MD_BLOCK_LI: render_open_li_block(r, (const MD_BLOCK_LI_DETAIL*)detail); break;
337 case MD_BLOCK_HR: RENDER_VERBATIM(r, (r->flags & MD_HTML_FLAG_XHTML) ? "<hr />\n" : "<hr>\n"); break;
338 case MD_BLOCK_H: RENDER_VERBATIM(r, head[((MD_BLOCK_H_DETAIL*)detail)->level - 1]); break;
339 case MD_BLOCK_CODE: render_open_code_block(r, (const MD_BLOCK_CODE_DETAIL*) detail); break;
340 case MD_BLOCK_HTML: break;
341 case MD_BLOCK_P: RENDER_VERBATIM(r, "<p>"); break;
342 case MD_BLOCK_TABLE: RENDER_VERBATIM(r, "<table>\n"); break;
343 case MD_BLOCK_THEAD: RENDER_VERBATIM(r, "<thead>\n"); break;
344 case MD_BLOCK_TBODY: RENDER_VERBATIM(r, "<tbody>\n"); break;
345 case MD_BLOCK_TR: RENDER_VERBATIM(r, "<tr>\n"); break;
346 case MD_BLOCK_TH: render_open_td_block(r, "th", (MD_BLOCK_TD_DETAIL*)detail); break;
347 case MD_BLOCK_TD: render_open_td_block(r, "td", (MD_BLOCK_TD_DETAIL*)detail); break;
348 }
349
350 return 0;
351}
352
353static int
354leave_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
355{
356 static const MD_CHAR* head[6] = { "</h1>\n", "</h2>\n", "</h3>\n", "</h4>\n", "</h5>\n", "</h6>\n" };
357 MD_HTML* r = (MD_HTML*) userdata;
358
359 switch(type) {
360 case MD_BLOCK_DOC: break;
361 case MD_BLOCK_QUOTE: RENDER_VERBATIM(r, "</blockquote>\n"); break;
362 case MD_BLOCK_UL: RENDER_VERBATIM(r, "</ul>\n"); break;
363 case MD_BLOCK_OL: RENDER_VERBATIM(r, "</ol>\n"); break;
364 case MD_BLOCK_LI: RENDER_VERBATIM(r, "</li>\n"); break;
365 case MD_BLOCK_HR: break;
366 case MD_BLOCK_H: RENDER_VERBATIM(r, head[((MD_BLOCK_H_DETAIL*)detail)->level - 1]); break;
367 case MD_BLOCK_CODE: RENDER_VERBATIM(r, "</code></pre>\n"); break;
368 case MD_BLOCK_HTML: break;
369 case MD_BLOCK_P: RENDER_VERBATIM(r, "</p>\n"); break;
370 case MD_BLOCK_TABLE: RENDER_VERBATIM(r, "</table>\n"); break;
371 case MD_BLOCK_THEAD: RENDER_VERBATIM(r, "</thead>\n"); break;
372 case MD_BLOCK_TBODY: RENDER_VERBATIM(r, "</tbody>\n"); break;
373 case MD_BLOCK_TR: RENDER_VERBATIM(r, "</tr>\n"); break;
374 case MD_BLOCK_TH: RENDER_VERBATIM(r, "</th>\n"); break;
375 case MD_BLOCK_TD: RENDER_VERBATIM(r, "</td>\n"); break;
376 }
377
378 return 0;
379}
380
381static int
382enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata)
383{
384 MD_HTML* r = (MD_HTML*) userdata;
385 int inside_img = (r->image_nesting_level > 0);
386
387 if(type == MD_SPAN_IMG)
388 r->image_nesting_level++;
389 if(inside_img)
390 return 0;
391
392 switch(type) {
393 case MD_SPAN_EM: RENDER_VERBATIM(r, "<em>"); break;
394 case MD_SPAN_STRONG: RENDER_VERBATIM(r, "<strong>"); break;
395 case MD_SPAN_U: RENDER_VERBATIM(r, "<u>"); break;
396 case MD_SPAN_A: render_open_a_span(r, (MD_SPAN_A_DETAIL*) detail); break;
397 case MD_SPAN_IMG: render_open_img_span(r, (MD_SPAN_IMG_DETAIL*) detail); break;
398 case MD_SPAN_CODE: RENDER_VERBATIM(r, "<code>"); break;
399 case MD_SPAN_DEL: RENDER_VERBATIM(r, "<del>"); break;
400 case MD_SPAN_LATEXMATH: RENDER_VERBATIM(r, "<x-equation>"); break;
401 case MD_SPAN_LATEXMATH_DISPLAY: RENDER_VERBATIM(r, "<x-equation type=\"display\">"); break;
402 case MD_SPAN_WIKILINK: render_open_wikilink_span(r, (MD_SPAN_WIKILINK_DETAIL*) detail); break;
403 }
404
405 return 0;
406}
407
408static int
409leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata)
410{
411 MD_HTML* r = (MD_HTML*) userdata;
412
413 if(type == MD_SPAN_IMG)
414 r->image_nesting_level--;
415 if(r->image_nesting_level > 0)
416 return 0;
417
418 switch(type) {
419 case MD_SPAN_EM: RENDER_VERBATIM(r, "</em>"); break;
420 case MD_SPAN_STRONG: RENDER_VERBATIM(r, "</strong>"); break;
421 case MD_SPAN_U: RENDER_VERBATIM(r, "</u>"); break;
422 case MD_SPAN_A: RENDER_VERBATIM(r, "</a>"); break;
423 case MD_SPAN_IMG: render_close_img_span(r, (MD_SPAN_IMG_DETAIL*) detail); break;
424 case MD_SPAN_CODE: RENDER_VERBATIM(r, "</code>"); break;
425 case MD_SPAN_DEL: RENDER_VERBATIM(r, "</del>"); break;
426 case MD_SPAN_LATEXMATH:
427 case MD_SPAN_LATEXMATH_DISPLAY: RENDER_VERBATIM(r, "</x-equation>"); break;
428 case MD_SPAN_WIKILINK: RENDER_VERBATIM(r, "</x-wikilink>"); break;
429 }
430
431 return 0;
432}
433
434static int
435text_callback(MD_TEXTTYPE type, const MD_CHAR* text, MD_SIZE size, void* userdata)
436{
437 MD_HTML* r = (MD_HTML*) userdata;
438
439 switch(type) {
440 case MD_TEXT_NULLCHAR: render_utf8_codepoint(r, 0x0000, render_verbatim); break;
441 case MD_TEXT_BR: RENDER_VERBATIM(r, (r->image_nesting_level == 0
442 ? ((r->flags & MD_HTML_FLAG_XHTML) ? "<br />\n" : "<br>\n")
443 : " "));
444 break;
445 case MD_TEXT_SOFTBR: RENDER_VERBATIM(r, (r->image_nesting_level == 0 ? "\n" : " ")); break;
446 case MD_TEXT_HTML: render_verbatim(r, text, size); break;
447 case MD_TEXT_ENTITY: render_entity(r, text, size, render_html_escaped); break;
448 default: render_html_escaped(r, text, size); break;
449 }
450
451 return 0;
452}
453
454static void
455debug_log_callback(const char* msg, void* userdata)
456{
457 MD_HTML* r = (MD_HTML*) userdata;
458 if(r->flags & MD_HTML_FLAG_DEBUG)
459 fprintf(stderr, "MD4C: %s\n", msg);
460}
461
462int
463md_html(const MD_CHAR* input, MD_SIZE input_size,
464 void (*process_output)(const MD_CHAR*, MD_SIZE, void*),
465 void* userdata, unsigned parser_flags, unsigned renderer_flags)
466{
467 MD_HTML render = { process_output, userdata, renderer_flags, 0, { 0 } };
468 int i;
469
470 MD_PARSER parser = {
471 0,
472 parser_flags,
473 enter_block_callback,
474 leave_block_callback,
475 enter_span_callback,
476 leave_span_callback,
477 text_callback,
478 debug_log_callback,
479 NULL
480 };
481
482 for(i = 0; i < 256; i++) {
483 unsigned char ch = (unsigned char) i;
484
485 if(strchr("\"&<>", ch) != NULL)
486 render.escape_map[i] |= NEED_HTML_ESC_FLAG;
487
488 if(!ISALNUM(ch) && strchr("~-_.+!*(),%#@?=;:/,+$", ch) == NULL)
489 render.escape_map[i] |= NEED_URL_ESC_FLAG;
490 }
491
492 if(renderer_flags & MD_HTML_FLAG_SKIP_UTF8_BOM && sizeof(MD_CHAR) == 1) {
493 static const MD_CHAR bom[3] = { (char)0xef, (char)0xbb, (char)0xbf };
494 if(input_size >= sizeof(bom) && memcmp(input, bom, sizeof(bom)) == 0) {
495 input += sizeof(bom);
496 input_size -= sizeof(bom);
497 }
498 }
499
500 return md_parse(input, input_size, &parser, (void*) &render);
501}