1#include "md4c.h"
2
3#include <limits.h>
4#include <stdint.h>
5#include <stdio.h>
6#include <stdlib.h>
7#include <string.h>
8
9#if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199409L
10
11 #if defined __GNUC__
12 #define inline __inline__
13 #elif defined _MSC_VER
14 #define inline __inline
15 #else
16 #define inline
17 #endif
18#endif
19
20#if !defined MD4C_USE_ASCII && !defined MD4C_USE_UTF8 && !defined MD4C_USE_UTF16
21 #define MD4C_USE_UTF8
22#endif
23
24#ifdef _T
25 #undef _T
26#endif
27#if defined MD4C_USE_UTF16
28 #define _T(x) L##x
29#else
30 #define _T(x) x
31#endif
32
33#define SIZEOF_ARRAY(a) (sizeof(a) / sizeof(a[0]))
34
35#define STRINGIZE_(x) #x
36#define STRINGIZE(x) STRINGIZE_(x)
37
38#define MAX(a,b) ((a) > (b) ? (a) : (b))
39#define MIN(a,b) ((a) < (b) ? (a) : (b))
40
41#ifndef TRUE
42 #define TRUE 1
43 #define FALSE 0
44#endif
45
46#define MD_LOG(msg) \
47 do { \
48 if(ctx->parser.debug_log != NULL) \
49 ctx->parser.debug_log((msg), ctx->userdata); \
50 } while(0)
51
52#ifdef DEBUG
53 #define MD_ASSERT(cond) \
54 do { \
55 if(!(cond)) { \
56 MD_LOG(__FILE__ ":" STRINGIZE(__LINE__) ": " \
57 "Assertion '" STRINGIZE(cond) "' failed."); \
58 exit(1); \
59 } \
60 } while(0)
61
62 #define MD_UNREACHABLE() MD_ASSERT(1 == 0)
63#else
64 #ifdef __GNUC__
65 #define MD_ASSERT(cond) do { if(!(cond)) __builtin_unreachable(); } while(0)
66 #define MD_UNREACHABLE() do { __builtin_unreachable(); } while(0)
67 #elif defined _MSC_VER && _MSC_VER > 120
68 #define MD_ASSERT(cond) do { __assume(cond); } while(0)
69 #define MD_UNREACHABLE() do { __assume(0); } while(0)
70 #else
71 #define MD_ASSERT(cond) do {} while(0)
72 #define MD_UNREACHABLE() do {} while(0)
73 #endif
74#endif
75
76#if defined __clang__ && __clang_major__ >= 12
77 #define MD_FALLTHROUGH() __attribute__((fallthrough))
78#elif defined __GNUC__ && __GNUC__ >= 7
79 #define MD_FALLTHROUGH() __attribute__((fallthrough))
80#else
81 #define MD_FALLTHROUGH() ((void)0)
82#endif
83
84#define MD_UNUSED(x) ((void)x)
85
86#define CODESPAN_MARK_MAXLEN 32
87
88#define TABLE_MAXCOLCOUNT 128
89
90#define CHAR MD_CHAR
91#define SZ MD_SIZE
92#define OFF MD_OFFSET
93
94#define SZ_MAX (sizeof(SZ) == 8 ? UINT64_MAX : UINT32_MAX)
95#define OFF_MAX (sizeof(OFF) == 8 ? UINT64_MAX : UINT32_MAX)
96
97typedef struct MD_MARK_tag MD_MARK;
98typedef struct MD_BLOCK_tag MD_BLOCK;
99typedef struct MD_CONTAINER_tag MD_CONTAINER;
100typedef struct MD_REF_DEF_tag MD_REF_DEF;
101
102typedef struct MD_MARKSTACK_tag MD_MARKSTACK;
103struct MD_MARKSTACK_tag {
104 int top;
105};
106
107typedef struct MD_CTX_tag MD_CTX;
108struct MD_CTX_tag {
109
110 const CHAR* text;
111 SZ size;
112 MD_PARSER parser;
113 void* userdata;
114
115 int doc_ends_with_newline;
116
117 CHAR* buffer;
118 unsigned alloc_buffer;
119
120 MD_REF_DEF* ref_defs;
121 int n_ref_defs;
122 int alloc_ref_defs;
123 void** ref_def_hashtable;
124 int ref_def_hashtable_size;
125 SZ max_ref_def_output;
126
127 MD_MARK* marks;
128 int n_marks;
129 int alloc_marks;
130
131#if defined MD4C_USE_UTF16
132 char mark_char_map[128];
133#else
134 char mark_char_map[256];
135#endif
136
137 MD_MARKSTACK opener_stacks[16];
138#define ASTERISK_OPENERS_oo_mod3_0 (ctx->opener_stacks[0])
139#define ASTERISK_OPENERS_oo_mod3_1 (ctx->opener_stacks[1])
140#define ASTERISK_OPENERS_oo_mod3_2 (ctx->opener_stacks[2])
141#define ASTERISK_OPENERS_oc_mod3_0 (ctx->opener_stacks[3])
142#define ASTERISK_OPENERS_oc_mod3_1 (ctx->opener_stacks[4])
143#define ASTERISK_OPENERS_oc_mod3_2 (ctx->opener_stacks[5])
144#define UNDERSCORE_OPENERS_oo_mod3_0 (ctx->opener_stacks[6])
145#define UNDERSCORE_OPENERS_oo_mod3_1 (ctx->opener_stacks[7])
146#define UNDERSCORE_OPENERS_oo_mod3_2 (ctx->opener_stacks[8])
147#define UNDERSCORE_OPENERS_oc_mod3_0 (ctx->opener_stacks[9])
148#define UNDERSCORE_OPENERS_oc_mod3_1 (ctx->opener_stacks[10])
149#define UNDERSCORE_OPENERS_oc_mod3_2 (ctx->opener_stacks[11])
150#define TILDE_OPENERS_1 (ctx->opener_stacks[12])
151#define TILDE_OPENERS_2 (ctx->opener_stacks[13])
152#define BRACKET_OPENERS (ctx->opener_stacks[14])
153#define DOLLAR_OPENERS (ctx->opener_stacks[15])
154
155 MD_MARKSTACK ptr_stack;
156
157 int n_table_cell_boundaries;
158 int table_cell_boundaries_head;
159 int table_cell_boundaries_tail;
160
161 int unresolved_link_head;
162 int unresolved_link_tail;
163
164 OFF html_comment_horizon;
165 OFF html_proc_instr_horizon;
166 OFF html_decl_horizon;
167 OFF html_cdata_horizon;
168
169 void* block_bytes;
170 MD_BLOCK* current_block;
171 int n_block_bytes;
172 int alloc_block_bytes;
173
174 MD_CONTAINER* containers;
175 int n_containers;
176 int alloc_containers;
177
178 unsigned code_indent_offset;
179
180 SZ code_fence_length;
181 int html_block_type;
182 int last_line_has_list_loosening_effect;
183 int last_list_item_starts_with_two_blank_lines;
184};
185
186enum MD_LINETYPE_tag {
187 MD_LINE_BLANK,
188 MD_LINE_HR,
189 MD_LINE_ATXHEADER,
190 MD_LINE_SETEXTHEADER,
191 MD_LINE_SETEXTUNDERLINE,
192 MD_LINE_INDENTEDCODE,
193 MD_LINE_FENCEDCODE,
194 MD_LINE_HTML,
195 MD_LINE_TEXT,
196 MD_LINE_TABLE,
197 MD_LINE_TABLEUNDERLINE
198};
199typedef enum MD_LINETYPE_tag MD_LINETYPE;
200
201typedef struct MD_LINE_ANALYSIS_tag MD_LINE_ANALYSIS;
202struct MD_LINE_ANALYSIS_tag {
203 MD_LINETYPE type;
204 unsigned data;
205 int enforce_new_block;
206 OFF beg;
207 OFF end;
208 unsigned indent;
209};
210
211typedef struct MD_LINE_tag MD_LINE;
212struct MD_LINE_tag {
213 OFF beg;
214 OFF end;
215};
216
217typedef struct MD_VERBATIMLINE_tag MD_VERBATIMLINE;
218struct MD_VERBATIMLINE_tag {
219 OFF beg;
220 OFF end;
221 OFF indent;
222};
223
224#define CH(off) (ctx->text[(off)])
225#define STR(off) (ctx->text + (off))
226
227#define ISIN_(ch, ch_min, ch_max) ((ch_min) <= (unsigned)(ch) && (unsigned)(ch) <= (ch_max))
228#define ISANYOF_(ch, palette) ((ch) != _T('\0') && md_strchr((palette), (ch)) != NULL)
229#define ISANYOF2_(ch, ch1, ch2) ((ch) == (ch1) || (ch) == (ch2))
230#define ISANYOF3_(ch, ch1, ch2, ch3) ((ch) == (ch1) || (ch) == (ch2) || (ch) == (ch3))
231#define ISASCII_(ch) ((unsigned)(ch) <= 127)
232#define ISBLANK_(ch) (ISANYOF2_((ch), _T(' '), _T('\t')))
233#define ISNEWLINE_(ch) (ISANYOF2_((ch), _T('\r'), _T('\n')))
234#define ISWHITESPACE_(ch) (ISBLANK_(ch) || ISANYOF2_((ch), _T('\v'), _T('\f')))
235#define ISCNTRL_(ch) ((unsigned)(ch) <= 31 || (unsigned)(ch) == 127)
236#define ISPUNCT_(ch) (ISIN_(ch, 33, 47) || ISIN_(ch, 58, 64) || ISIN_(ch, 91, 96) || ISIN_(ch, 123, 126))
237#define ISUPPER_(ch) (ISIN_(ch, _T('A'), _T('Z')))
238#define ISLOWER_(ch) (ISIN_(ch, _T('a'), _T('z')))
239#define ISALPHA_(ch) (ISUPPER_(ch) || ISLOWER_(ch))
240#define ISDIGIT_(ch) (ISIN_(ch, _T('0'), _T('9')))
241#define ISXDIGIT_(ch) (ISDIGIT_(ch) || ISIN_(ch, _T('A'), _T('F')) || ISIN_(ch, _T('a'), _T('f')))
242#define ISALNUM_(ch) (ISALPHA_(ch) || ISDIGIT_(ch))
243
244#define ISANYOF(off, palette) ISANYOF_(CH(off), (palette))
245#define ISANYOF2(off, ch1, ch2) ISANYOF2_(CH(off), (ch1), (ch2))
246#define ISANYOF3(off, ch1, ch2, ch3) ISANYOF3_(CH(off), (ch1), (ch2), (ch3))
247#define ISASCII(off) ISASCII_(CH(off))
248#define ISBLANK(off) ISBLANK_(CH(off))
249#define ISNEWLINE(off) ISNEWLINE_(CH(off))
250#define ISWHITESPACE(off) ISWHITESPACE_(CH(off))
251#define ISCNTRL(off) ISCNTRL_(CH(off))
252#define ISPUNCT(off) ISPUNCT_(CH(off))
253#define ISUPPER(off) ISUPPER_(CH(off))
254#define ISLOWER(off) ISLOWER_(CH(off))
255#define ISALPHA(off) ISALPHA_(CH(off))
256#define ISDIGIT(off) ISDIGIT_(CH(off))
257#define ISXDIGIT(off) ISXDIGIT_(CH(off))
258#define ISALNUM(off) ISALNUM_(CH(off))
259
260#if defined MD4C_USE_UTF16
261 #define md_strchr wcschr
262#else
263 #define md_strchr strchr
264#endif
265
266static inline int
267md_ascii_case_eq(const CHAR* s1, const CHAR* s2, SZ n)
268{
269 OFF i;
270 for(i = 0; i < n; i++) {
271 CHAR ch1 = s1[i];
272 CHAR ch2 = s2[i];
273
274 if(ISLOWER_(ch1))
275 ch1 += ('A'-'a');
276 if(ISLOWER_(ch2))
277 ch2 += ('A'-'a');
278 if(ch1 != ch2)
279 return FALSE;
280 }
281 return TRUE;
282}
283
284static inline int
285md_ascii_eq(const CHAR* s1, const CHAR* s2, SZ n)
286{
287 return memcmp(s1, s2, n * sizeof(CHAR)) == 0;
288}
289
290static int
291md_text_with_null_replacement(MD_CTX* ctx, MD_TEXTTYPE type, const CHAR* str, SZ size)
292{
293 OFF off = 0;
294 int ret = 0;
295
296 while(1) {
297 while(off < size && str[off] != _T('\0'))
298 off++;
299
300 if(off > 0) {
301 ret = ctx->parser.text(type, str, off, ctx->userdata);
302 if(ret != 0)
303 return ret;
304
305 str += off;
306 size -= off;
307 off = 0;
308 }
309
310 if(off >= size)
311 return 0;
312
313 ret = ctx->parser.text(MD_TEXT_NULLCHAR, _T(""), 1, ctx->userdata);
314 if(ret != 0)
315 return ret;
316 off++;
317 }
318}
319
320#define MD_CHECK(func) \
321 do { \
322 ret = (func); \
323 if(ret < 0) \
324 goto abort; \
325 } while(0)
326
327#define MD_TEMP_BUFFER(sz) \
328 do { \
329 if(sz > ctx->alloc_buffer) { \
330 CHAR* new_buffer; \
331 SZ new_size = ((sz) + (sz) / 2 + 128) & ~127; \
332 \
333 new_buffer = realloc(ctx->buffer, new_size); \
334 if(new_buffer == NULL) { \
335 MD_LOG("realloc() failed."); \
336 ret = -1; \
337 goto abort; \
338 } \
339 \
340 ctx->buffer = new_buffer; \
341 ctx->alloc_buffer = new_size; \
342 } \
343 } while(0)
344
345#define MD_ENTER_BLOCK(type, arg) \
346 do { \
347 ret = ctx->parser.enter_block((type), (arg), ctx->userdata); \
348 if(ret != 0) { \
349 MD_LOG("Aborted from enter_block() callback."); \
350 goto abort; \
351 } \
352 } while(0)
353
354#define MD_LEAVE_BLOCK(type, arg) \
355 do { \
356 ret = ctx->parser.leave_block((type), (arg), ctx->userdata); \
357 if(ret != 0) { \
358 MD_LOG("Aborted from leave_block() callback."); \
359 goto abort; \
360 } \
361 } while(0)
362
363#define MD_ENTER_SPAN(type, arg) \
364 do { \
365 ret = ctx->parser.enter_span((type), (arg), ctx->userdata); \
366 if(ret != 0) { \
367 MD_LOG("Aborted from enter_span() callback."); \
368 goto abort; \
369 } \
370 } while(0)
371
372#define MD_LEAVE_SPAN(type, arg) \
373 do { \
374 ret = ctx->parser.leave_span((type), (arg), ctx->userdata); \
375 if(ret != 0) { \
376 MD_LOG("Aborted from leave_span() callback."); \
377 goto abort; \
378 } \
379 } while(0)
380
381#define MD_TEXT(type, str, size) \
382 do { \
383 if(size > 0) { \
384 ret = ctx->parser.text((type), (str), (size), ctx->userdata); \
385 if(ret != 0) { \
386 MD_LOG("Aborted from text() callback."); \
387 goto abort; \
388 } \
389 } \
390 } while(0)
391
392#define MD_TEXT_INSECURE(type, str, size) \
393 do { \
394 if(size > 0) { \
395 ret = md_text_with_null_replacement(ctx, type, str, size); \
396 if(ret != 0) { \
397 MD_LOG("Aborted from text() callback."); \
398 goto abort; \
399 } \
400 } \
401 } while(0)
402
403static const MD_LINE*
404md_lookup_line(OFF off, const MD_LINE* lines, MD_SIZE n_lines, MD_SIZE* p_line_index)
405{
406 MD_SIZE lo, hi;
407 MD_SIZE pivot;
408 const MD_LINE* line;
409
410 lo = 0;
411 hi = n_lines - 1;
412 while(lo <= hi) {
413 pivot = (lo + hi) / 2;
414 line = &lines[pivot];
415
416 if(off < line->beg) {
417 if(hi == 0 || lines[hi-1].end < off) {
418 if(p_line_index != NULL)
419 *p_line_index = pivot;
420 return line;
421 }
422 hi = pivot - 1;
423 } else if(off > line->end) {
424 lo = pivot + 1;
425 } else {
426 if(p_line_index != NULL)
427 *p_line_index = pivot;
428 return line;
429 }
430 }
431
432 return NULL;
433}
434
435typedef struct MD_UNICODE_FOLD_INFO_tag MD_UNICODE_FOLD_INFO;
436struct MD_UNICODE_FOLD_INFO_tag {
437 unsigned codepoints[3];
438 unsigned n_codepoints;
439};
440
441#if defined MD4C_USE_UTF16 || defined MD4C_USE_UTF8
442
443 static int
444 md_unicode_bsearch__(unsigned codepoint, const unsigned* map, size_t map_size)
445 {
446 int beg, end;
447 int pivot_beg, pivot_end;
448
449 beg = 0;
450 end = (int) map_size-1;
451 while(beg <= end) {
452
453 pivot_beg = pivot_end = (beg + end) / 2;
454 if(map[pivot_end] & 0x40000000)
455 pivot_end++;
456 if(map[pivot_beg] & 0x80000000)
457 pivot_beg--;
458
459 if(codepoint < (map[pivot_beg] & 0x00ffffff))
460 end = pivot_beg - 1;
461 else if(codepoint > (map[pivot_end] & 0x00ffffff))
462 beg = pivot_end + 1;
463 else
464 return pivot_beg;
465 }
466
467 return -1;
468 }
469
470 static int
471 md_is_unicode_whitespace__(unsigned codepoint)
472 {
473#define R(cp_min, cp_max) ((cp_min) | 0x40000000), ((cp_max) | 0x80000000)
474#define S(cp) (cp)
475
476 static const unsigned WHITESPACE_MAP[] = {
477 S(0x0020), S(0x00a0), S(0x1680), R(0x2000,0x200a), S(0x202f), S(0x205f), S(0x3000)
478 };
479#undef R
480#undef S
481
482 if(codepoint <= 0x7f)
483 return ISWHITESPACE_(codepoint);
484
485 return (md_unicode_bsearch__(codepoint, WHITESPACE_MAP, SIZEOF_ARRAY(WHITESPACE_MAP)) >= 0);
486 }
487
488 static int
489 md_is_unicode_punct__(unsigned codepoint)
490 {
491#define R(cp_min, cp_max) ((cp_min) | 0x40000000), ((cp_max) | 0x80000000)
492#define S(cp) (cp)
493
494 static const unsigned PUNCT_MAP[] = {
495 R(0x0021,0x002f), R(0x003a,0x0040), R(0x005b,0x0060), R(0x007b,0x007e), R(0x00a1,0x00a9),
496 R(0x00ab,0x00ac), R(0x00ae,0x00b1), S(0x00b4), R(0x00b6,0x00b8), S(0x00bb), S(0x00bf), S(0x00d7),
497 S(0x00f7), R(0x02c2,0x02c5), R(0x02d2,0x02df), R(0x02e5,0x02eb), S(0x02ed), R(0x02ef,0x02ff), S(0x0375),
498 S(0x037e), R(0x0384,0x0385), S(0x0387), S(0x03f6), S(0x0482), R(0x055a,0x055f), R(0x0589,0x058a),
499 R(0x058d,0x058f), S(0x05be), S(0x05c0), S(0x05c3), S(0x05c6), R(0x05f3,0x05f4), R(0x0606,0x060f),
500 S(0x061b), R(0x061d,0x061f), R(0x066a,0x066d), S(0x06d4), S(0x06de), S(0x06e9), R(0x06fd,0x06fe),
501 R(0x0700,0x070d), R(0x07f6,0x07f9), R(0x07fe,0x07ff), R(0x0830,0x083e), S(0x085e), S(0x0888),
502 R(0x0964,0x0965), S(0x0970), R(0x09f2,0x09f3), R(0x09fa,0x09fb), S(0x09fd), S(0x0a76), R(0x0af0,0x0af1),
503 S(0x0b70), R(0x0bf3,0x0bfa), S(0x0c77), S(0x0c7f), S(0x0c84), S(0x0d4f), S(0x0d79), S(0x0df4), S(0x0e3f),
504 S(0x0e4f), R(0x0e5a,0x0e5b), R(0x0f01,0x0f17), R(0x0f1a,0x0f1f), S(0x0f34), S(0x0f36), S(0x0f38),
505 R(0x0f3a,0x0f3d), S(0x0f85), R(0x0fbe,0x0fc5), R(0x0fc7,0x0fcc), R(0x0fce,0x0fda), R(0x104a,0x104f),
506 R(0x109e,0x109f), S(0x10fb), R(0x1360,0x1368), R(0x1390,0x1399), S(0x1400), R(0x166d,0x166e),
507 R(0x169b,0x169c), R(0x16eb,0x16ed), R(0x1735,0x1736), R(0x17d4,0x17d6), R(0x17d8,0x17db),
508 R(0x1800,0x180a), S(0x1940), R(0x1944,0x1945), R(0x19de,0x19ff), R(0x1a1e,0x1a1f), R(0x1aa0,0x1aa6),
509 R(0x1aa8,0x1aad), R(0x1b5a,0x1b6a), R(0x1b74,0x1b7e), R(0x1bfc,0x1bff), R(0x1c3b,0x1c3f),
510 R(0x1c7e,0x1c7f), R(0x1cc0,0x1cc7), S(0x1cd3), S(0x1fbd), R(0x1fbf,0x1fc1), R(0x1fcd,0x1fcf),
511 R(0x1fdd,0x1fdf), R(0x1fed,0x1fef), R(0x1ffd,0x1ffe), R(0x2010,0x2027), R(0x2030,0x205e),
512 R(0x207a,0x207e), R(0x208a,0x208e), R(0x20a0,0x20c0), R(0x2100,0x2101), R(0x2103,0x2106),
513 R(0x2108,0x2109), S(0x2114), R(0x2116,0x2118), R(0x211e,0x2123), S(0x2125), S(0x2127), S(0x2129),
514 S(0x212e), R(0x213a,0x213b), R(0x2140,0x2144), R(0x214a,0x214d), S(0x214f), R(0x218a,0x218b),
515 R(0x2190,0x2426), R(0x2440,0x244a), R(0x249c,0x24e9), R(0x2500,0x2775), R(0x2794,0x2b73),
516 R(0x2b76,0x2b95), R(0x2b97,0x2bff), R(0x2ce5,0x2cea), R(0x2cf9,0x2cfc), R(0x2cfe,0x2cff), S(0x2d70),
517 R(0x2e00,0x2e2e), R(0x2e30,0x2e5d), R(0x2e80,0x2e99), R(0x2e9b,0x2ef3), R(0x2f00,0x2fd5),
518 R(0x2ff0,0x2fff), R(0x3001,0x3004), R(0x3008,0x3020), S(0x3030), R(0x3036,0x3037), R(0x303d,0x303f),
519 R(0x309b,0x309c), S(0x30a0), S(0x30fb), R(0x3190,0x3191), R(0x3196,0x319f), R(0x31c0,0x31e3), S(0x31ef),
520 R(0x3200,0x321e), R(0x322a,0x3247), S(0x3250), R(0x3260,0x327f), R(0x328a,0x32b0), R(0x32c0,0x33ff),
521 R(0x4dc0,0x4dff), R(0xa490,0xa4c6), R(0xa4fe,0xa4ff), R(0xa60d,0xa60f), S(0xa673), S(0xa67e),
522 R(0xa6f2,0xa6f7), R(0xa700,0xa716), R(0xa720,0xa721), R(0xa789,0xa78a), R(0xa828,0xa82b),
523 R(0xa836,0xa839), R(0xa874,0xa877), R(0xa8ce,0xa8cf), R(0xa8f8,0xa8fa), S(0xa8fc), R(0xa92e,0xa92f),
524 S(0xa95f), R(0xa9c1,0xa9cd), R(0xa9de,0xa9df), R(0xaa5c,0xaa5f), R(0xaa77,0xaa79), R(0xaade,0xaadf),
525 R(0xaaf0,0xaaf1), S(0xab5b), R(0xab6a,0xab6b), S(0xabeb), S(0xfb29), R(0xfbb2,0xfbc2), R(0xfd3e,0xfd4f),
526 S(0xfdcf), R(0xfdfc,0xfdff), R(0xfe10,0xfe19), R(0xfe30,0xfe52), R(0xfe54,0xfe66), R(0xfe68,0xfe6b),
527 R(0xff01,0xff0f), R(0xff1a,0xff20), R(0xff3b,0xff40), R(0xff5b,0xff65), R(0xffe0,0xffe6),
528 R(0xffe8,0xffee), R(0xfffc,0xfffd), R(0x10100,0x10102), R(0x10137,0x1013f), R(0x10179,0x10189),
529 R(0x1018c,0x1018e), R(0x10190,0x1019c), S(0x101a0), R(0x101d0,0x101fc), S(0x1039f), S(0x103d0),
530 S(0x1056f), S(0x10857), R(0x10877,0x10878), S(0x1091f), S(0x1093f), R(0x10a50,0x10a58), S(0x10a7f),
531 S(0x10ac8), R(0x10af0,0x10af6), R(0x10b39,0x10b3f), R(0x10b99,0x10b9c), S(0x10ead), R(0x10f55,0x10f59),
532 R(0x10f86,0x10f89), R(0x11047,0x1104d), R(0x110bb,0x110bc), R(0x110be,0x110c1), R(0x11140,0x11143),
533 R(0x11174,0x11175), R(0x111c5,0x111c8), S(0x111cd), S(0x111db), R(0x111dd,0x111df), R(0x11238,0x1123d),
534 S(0x112a9), R(0x1144b,0x1144f), R(0x1145a,0x1145b), S(0x1145d), S(0x114c6), R(0x115c1,0x115d7),
535 R(0x11641,0x11643), R(0x11660,0x1166c), S(0x116b9), R(0x1173c,0x1173f), S(0x1183b), R(0x11944,0x11946),
536 S(0x119e2), R(0x11a3f,0x11a46), R(0x11a9a,0x11a9c), R(0x11a9e,0x11aa2), R(0x11b00,0x11b09),
537 R(0x11c41,0x11c45), R(0x11c70,0x11c71), R(0x11ef7,0x11ef8), R(0x11f43,0x11f4f), R(0x11fd5,0x11ff1),
538 S(0x11fff), R(0x12470,0x12474), R(0x12ff1,0x12ff2), R(0x16a6e,0x16a6f), S(0x16af5), R(0x16b37,0x16b3f),
539 R(0x16b44,0x16b45), R(0x16e97,0x16e9a), S(0x16fe2), S(0x1bc9c), S(0x1bc9f), R(0x1cf50,0x1cfc3),
540 R(0x1d000,0x1d0f5), R(0x1d100,0x1d126), R(0x1d129,0x1d164), R(0x1d16a,0x1d16c), R(0x1d183,0x1d184),
541 R(0x1d18c,0x1d1a9), R(0x1d1ae,0x1d1ea), R(0x1d200,0x1d241), S(0x1d245), R(0x1d300,0x1d356), S(0x1d6c1),
542 S(0x1d6db), S(0x1d6fb), S(0x1d715), S(0x1d735), S(0x1d74f), S(0x1d76f), S(0x1d789), S(0x1d7a9),
543 S(0x1d7c3), R(0x1d800,0x1d9ff), R(0x1da37,0x1da3a), R(0x1da6d,0x1da74), R(0x1da76,0x1da83),
544 R(0x1da85,0x1da8b), S(0x1e14f), S(0x1e2ff), R(0x1e95e,0x1e95f), S(0x1ecac), S(0x1ecb0), S(0x1ed2e),
545 R(0x1eef0,0x1eef1), R(0x1f000,0x1f02b), R(0x1f030,0x1f093), R(0x1f0a0,0x1f0ae), R(0x1f0b1,0x1f0bf),
546 R(0x1f0c1,0x1f0cf), R(0x1f0d1,0x1f0f5), R(0x1f10d,0x1f1ad), R(0x1f1e6,0x1f202), R(0x1f210,0x1f23b),
547 R(0x1f240,0x1f248), R(0x1f250,0x1f251), R(0x1f260,0x1f265), R(0x1f300,0x1f6d7), R(0x1f6dc,0x1f6ec),
548 R(0x1f6f0,0x1f6fc), R(0x1f700,0x1f776), R(0x1f77b,0x1f7d9), R(0x1f7e0,0x1f7eb), S(0x1f7f0),
549 R(0x1f800,0x1f80b), R(0x1f810,0x1f847), R(0x1f850,0x1f859), R(0x1f860,0x1f887), R(0x1f890,0x1f8ad),
550 R(0x1f8b0,0x1f8b1), R(0x1f900,0x1fa53), R(0x1fa60,0x1fa6d), R(0x1fa70,0x1fa7c), R(0x1fa80,0x1fa88),
551 R(0x1fa90,0x1fabd), R(0x1fabf,0x1fac5), R(0x1face,0x1fadb), R(0x1fae0,0x1fae8), R(0x1faf0,0x1faf8),
552 R(0x1fb00,0x1fb92), R(0x1fb94,0x1fbca)
553 };
554#undef R
555#undef S
556
557 if(codepoint <= 0x7f)
558 return ISPUNCT_(codepoint);
559
560 return (md_unicode_bsearch__(codepoint, PUNCT_MAP, SIZEOF_ARRAY(PUNCT_MAP)) >= 0);
561 }
562
563 static void
564 md_get_unicode_fold_info(unsigned codepoint, MD_UNICODE_FOLD_INFO* info)
565 {
566#define R(cp_min, cp_max) ((cp_min) | 0x40000000), ((cp_max) | 0x80000000)
567#define S(cp) (cp)
568
569 static const unsigned FOLD_MAP_1[] = {
570 R(0x0041,0x005a), S(0x00b5), R(0x00c0,0x00d6), R(0x00d8,0x00de), R(0x0100,0x012e), R(0x0132,0x0136),
571 R(0x0139,0x0147), R(0x014a,0x0176), S(0x0178), R(0x0179,0x017d), S(0x017f), S(0x0181), S(0x0182),
572 S(0x0184), S(0x0186), S(0x0187), S(0x0189), S(0x018a), S(0x018b), S(0x018e), S(0x018f), S(0x0190),
573 S(0x0191), S(0x0193), S(0x0194), S(0x0196), S(0x0197), S(0x0198), S(0x019c), S(0x019d), S(0x019f),
574 R(0x01a0,0x01a4), S(0x01a6), S(0x01a7), S(0x01a9), S(0x01ac), S(0x01ae), S(0x01af), S(0x01b1), S(0x01b2),
575 S(0x01b3), S(0x01b5), S(0x01b7), S(0x01b8), S(0x01bc), S(0x01c4), S(0x01c5), S(0x01c7), S(0x01c8),
576 S(0x01ca), R(0x01cb,0x01db), R(0x01de,0x01ee), S(0x01f1), S(0x01f2), S(0x01f4), S(0x01f6), S(0x01f7),
577 R(0x01f8,0x021e), S(0x0220), R(0x0222,0x0232), S(0x023a), S(0x023b), S(0x023d), S(0x023e), S(0x0241),
578 S(0x0243), S(0x0244), S(0x0245), R(0x0246,0x024e), S(0x0345), S(0x0370), S(0x0372), S(0x0376), S(0x037f),
579 S(0x0386), R(0x0388,0x038a), S(0x038c), S(0x038e), S(0x038f), R(0x0391,0x03a1), R(0x03a3,0x03ab),
580 S(0x03c2), S(0x03cf), S(0x03d0), S(0x03d1), S(0x03d5), S(0x03d6), R(0x03d8,0x03ee), S(0x03f0), S(0x03f1),
581 S(0x03f4), S(0x03f5), S(0x03f7), S(0x03f9), S(0x03fa), R(0x03fd,0x03ff), R(0x0400,0x040f),
582 R(0x0410,0x042f), R(0x0460,0x0480), R(0x048a,0x04be), S(0x04c0), R(0x04c1,0x04cd), R(0x04d0,0x052e),
583 R(0x0531,0x0556), R(0x10a0,0x10c5), S(0x10c7), S(0x10cd), R(0x13f8,0x13fd), S(0x1c80), S(0x1c81),
584 S(0x1c82), S(0x1c83), S(0x1c84), S(0x1c85), S(0x1c86), S(0x1c87), S(0x1c88), R(0x1c90,0x1cba),
585 R(0x1cbd,0x1cbf), R(0x1e00,0x1e94), S(0x1e9b), R(0x1ea0,0x1efe), R(0x1f08,0x1f0f), R(0x1f18,0x1f1d),
586 R(0x1f28,0x1f2f), R(0x1f38,0x1f3f), R(0x1f48,0x1f4d), S(0x1f59), S(0x1f5b), S(0x1f5d), S(0x1f5f),
587 R(0x1f68,0x1f6f), S(0x1fb8), S(0x1fb9), S(0x1fba), S(0x1fbb), S(0x1fbe), R(0x1fc8,0x1fcb), S(0x1fd8),
588 S(0x1fd9), S(0x1fda), S(0x1fdb), S(0x1fe8), S(0x1fe9), S(0x1fea), S(0x1feb), S(0x1fec), S(0x1ff8),
589 S(0x1ff9), S(0x1ffa), S(0x1ffb), S(0x2126), S(0x212a), S(0x212b), S(0x2132), R(0x2160,0x216f), S(0x2183),
590 R(0x24b6,0x24cf), R(0x2c00,0x2c2f), S(0x2c60), S(0x2c62), S(0x2c63), S(0x2c64), R(0x2c67,0x2c6b),
591 S(0x2c6d), S(0x2c6e), S(0x2c6f), S(0x2c70), S(0x2c72), S(0x2c75), S(0x2c7e), S(0x2c7f), R(0x2c80,0x2ce2),
592 S(0x2ceb), S(0x2ced), S(0x2cf2), R(0xa640,0xa66c), R(0xa680,0xa69a), R(0xa722,0xa72e), R(0xa732,0xa76e),
593 S(0xa779), S(0xa77b), S(0xa77d), R(0xa77e,0xa786), S(0xa78b), S(0xa78d), S(0xa790), S(0xa792),
594 R(0xa796,0xa7a8), S(0xa7aa), S(0xa7ab), S(0xa7ac), S(0xa7ad), S(0xa7ae), S(0xa7b0), S(0xa7b1), S(0xa7b2),
595 S(0xa7b3), R(0xa7b4,0xa7c2), S(0xa7c4), S(0xa7c5), S(0xa7c6), S(0xa7c7), S(0xa7c9), S(0xa7d0), S(0xa7d6),
596 S(0xa7d8), S(0xa7f5), R(0xab70,0xabbf), R(0xff21,0xff3a), R(0x10400,0x10427), R(0x104b0,0x104d3),
597 R(0x10570,0x1057a), R(0x1057c,0x1058a), R(0x1058c,0x10592), S(0x10594), S(0x10595), R(0x10c80,0x10cb2),
598 R(0x118a0,0x118bf), R(0x16e40,0x16e5f), R(0x1e900,0x1e921)
599 };
600 static const unsigned FOLD_MAP_1_DATA[] = {
601 0x0061, 0x007a, 0x03bc, 0x00e0, 0x00f6, 0x00f8, 0x00fe, 0x0101, 0x012f, 0x0133, 0x0137, 0x013a, 0x0148,
602 0x014b, 0x0177, 0x00ff, 0x017a, 0x017e, 0x0073, 0x0253, 0x0183, 0x0185, 0x0254, 0x0188, 0x0256, 0x0257,
603 0x018c, 0x01dd, 0x0259, 0x025b, 0x0192, 0x0260, 0x0263, 0x0269, 0x0268, 0x0199, 0x026f, 0x0272, 0x0275,
604 0x01a1, 0x01a5, 0x0280, 0x01a8, 0x0283, 0x01ad, 0x0288, 0x01b0, 0x028a, 0x028b, 0x01b4, 0x01b6, 0x0292,
605 0x01b9, 0x01bd, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0x01cc, 0x01cc, 0x01dc, 0x01df, 0x01ef, 0x01f3, 0x01f3,
606 0x01f5, 0x0195, 0x01bf, 0x01f9, 0x021f, 0x019e, 0x0223, 0x0233, 0x2c65, 0x023c, 0x019a, 0x2c66, 0x0242,
607 0x0180, 0x0289, 0x028c, 0x0247, 0x024f, 0x03b9, 0x0371, 0x0373, 0x0377, 0x03f3, 0x03ac, 0x03ad, 0x03af,
608 0x03cc, 0x03cd, 0x03ce, 0x03b1, 0x03c1, 0x03c3, 0x03cb, 0x03c3, 0x03d7, 0x03b2, 0x03b8, 0x03c6, 0x03c0,
609 0x03d9, 0x03ef, 0x03ba, 0x03c1, 0x03b8, 0x03b5, 0x03f8, 0x03f2, 0x03fb, 0x037b, 0x037d, 0x0450, 0x045f,
610 0x0430, 0x044f, 0x0461, 0x0481, 0x048b, 0x04bf, 0x04cf, 0x04c2, 0x04ce, 0x04d1, 0x052f, 0x0561, 0x0586,
611 0x2d00, 0x2d25, 0x2d27, 0x2d2d, 0x13f0, 0x13f5, 0x0432, 0x0434, 0x043e, 0x0441, 0x0442, 0x0442, 0x044a,
612 0x0463, 0xa64b, 0x10d0, 0x10fa, 0x10fd, 0x10ff, 0x1e01, 0x1e95, 0x1e61, 0x1ea1, 0x1eff, 0x1f00, 0x1f07,
613 0x1f10, 0x1f15, 0x1f20, 0x1f27, 0x1f30, 0x1f37, 0x1f40, 0x1f45, 0x1f51, 0x1f53, 0x1f55, 0x1f57, 0x1f60,
614 0x1f67, 0x1fb0, 0x1fb1, 0x1f70, 0x1f71, 0x03b9, 0x1f72, 0x1f75, 0x1fd0, 0x1fd1, 0x1f76, 0x1f77, 0x1fe0,
615 0x1fe1, 0x1f7a, 0x1f7b, 0x1fe5, 0x1f78, 0x1f79, 0x1f7c, 0x1f7d, 0x03c9, 0x006b, 0x00e5, 0x214e, 0x2170,
616 0x217f, 0x2184, 0x24d0, 0x24e9, 0x2c30, 0x2c5f, 0x2c61, 0x026b, 0x1d7d, 0x027d, 0x2c68, 0x2c6c, 0x0251,
617 0x0271, 0x0250, 0x0252, 0x2c73, 0x2c76, 0x023f, 0x0240, 0x2c81, 0x2ce3, 0x2cec, 0x2cee, 0x2cf3, 0xa641,
618 0xa66d, 0xa681, 0xa69b, 0xa723, 0xa72f, 0xa733, 0xa76f, 0xa77a, 0xa77c, 0x1d79, 0xa77f, 0xa787, 0xa78c,
619 0x0265, 0xa791, 0xa793, 0xa797, 0xa7a9, 0x0266, 0x025c, 0x0261, 0x026c, 0x026a, 0x029e, 0x0287, 0x029d,
620 0xab53, 0xa7b5, 0xa7c3, 0xa794, 0x0282, 0x1d8e, 0xa7c8, 0xa7ca, 0xa7d1, 0xa7d7, 0xa7d9, 0xa7f6, 0x13a0,
621 0x13ef, 0xff41, 0xff5a, 0x10428, 0x1044f, 0x104d8, 0x104fb, 0x10597, 0x105a1, 0x105a3, 0x105b1, 0x105b3,
622 0x105b9, 0x105bb, 0x105bc, 0x10cc0, 0x10cf2, 0x118c0, 0x118df, 0x16e60, 0x16e7f, 0x1e922, 0x1e943
623 };
624 static const unsigned FOLD_MAP_2[] = {
625 S(0x00df), S(0x0130), S(0x0149), S(0x01f0), S(0x0587), S(0x1e96), S(0x1e97), S(0x1e98), S(0x1e99),
626 S(0x1e9a), S(0x1e9e), S(0x1f50), R(0x1f80,0x1f87), R(0x1f88,0x1f8f), R(0x1f90,0x1f97), R(0x1f98,0x1f9f),
627 R(0x1fa0,0x1fa7), R(0x1fa8,0x1faf), S(0x1fb2), S(0x1fb3), S(0x1fb4), S(0x1fb6), S(0x1fbc), S(0x1fc2),
628 S(0x1fc3), S(0x1fc4), S(0x1fc6), S(0x1fcc), S(0x1fd6), S(0x1fe4), S(0x1fe6), S(0x1ff2), S(0x1ff3),
629 S(0x1ff4), S(0x1ff6), S(0x1ffc), S(0xfb00), S(0xfb01), S(0xfb02), S(0xfb05), S(0xfb06), S(0xfb13),
630 S(0xfb14), S(0xfb15), S(0xfb16), S(0xfb17)
631 };
632 static const unsigned FOLD_MAP_2_DATA[] = {
633 0x0073,0x0073, 0x0069,0x0307, 0x02bc,0x006e, 0x006a,0x030c, 0x0565,0x0582, 0x0068,0x0331, 0x0074,0x0308,
634 0x0077,0x030a, 0x0079,0x030a, 0x0061,0x02be, 0x0073,0x0073, 0x03c5,0x0313, 0x1f00,0x03b9, 0x1f07,0x03b9,
635 0x1f00,0x03b9, 0x1f07,0x03b9, 0x1f20,0x03b9, 0x1f27,0x03b9, 0x1f20,0x03b9, 0x1f27,0x03b9, 0x1f60,0x03b9,
636 0x1f67,0x03b9, 0x1f60,0x03b9, 0x1f67,0x03b9, 0x1f70,0x03b9, 0x03b1,0x03b9, 0x03ac,0x03b9, 0x03b1,0x0342,
637 0x03b1,0x03b9, 0x1f74,0x03b9, 0x03b7,0x03b9, 0x03ae,0x03b9, 0x03b7,0x0342, 0x03b7,0x03b9, 0x03b9,0x0342,
638 0x03c1,0x0313, 0x03c5,0x0342, 0x1f7c,0x03b9, 0x03c9,0x03b9, 0x03ce,0x03b9, 0x03c9,0x0342, 0x03c9,0x03b9,
639 0x0066,0x0066, 0x0066,0x0069, 0x0066,0x006c, 0x0073,0x0074, 0x0073,0x0074, 0x0574,0x0576, 0x0574,0x0565,
640 0x0574,0x056b, 0x057e,0x0576, 0x0574,0x056d
641 };
642 static const unsigned FOLD_MAP_3[] = {
643 S(0x0390), S(0x03b0), S(0x1f52), S(0x1f54), S(0x1f56), S(0x1fb7), S(0x1fc7), S(0x1fd2), S(0x1fd3),
644 S(0x1fd7), S(0x1fe2), S(0x1fe3), S(0x1fe7), S(0x1ff7), S(0xfb03), S(0xfb04)
645 };
646 static const unsigned FOLD_MAP_3_DATA[] = {
647 0x03b9,0x0308,0x0301, 0x03c5,0x0308,0x0301, 0x03c5,0x0313,0x0300, 0x03c5,0x0313,0x0301,
648 0x03c5,0x0313,0x0342, 0x03b1,0x0342,0x03b9, 0x03b7,0x0342,0x03b9, 0x03b9,0x0308,0x0300,
649 0x03b9,0x0308,0x0301, 0x03b9,0x0308,0x0342, 0x03c5,0x0308,0x0300, 0x03c5,0x0308,0x0301,
650 0x03c5,0x0308,0x0342, 0x03c9,0x0342,0x03b9, 0x0066,0x0066,0x0069, 0x0066,0x0066,0x006c
651 };
652#undef R
653#undef S
654 static const struct {
655 const unsigned* map;
656 const unsigned* data;
657 size_t map_size;
658 unsigned n_codepoints;
659 } FOLD_MAP_LIST[] = {
660 { FOLD_MAP_1, FOLD_MAP_1_DATA, SIZEOF_ARRAY(FOLD_MAP_1), 1 },
661 { FOLD_MAP_2, FOLD_MAP_2_DATA, SIZEOF_ARRAY(FOLD_MAP_2), 2 },
662 { FOLD_MAP_3, FOLD_MAP_3_DATA, SIZEOF_ARRAY(FOLD_MAP_3), 3 }
663 };
664
665 int i;
666
667 if(codepoint <= 0x7f) {
668 info->codepoints[0] = codepoint;
669 if(ISUPPER_(codepoint))
670 info->codepoints[0] += 'a' - 'A';
671 info->n_codepoints = 1;
672 return;
673 }
674
675 for(i = 0; i < (int) SIZEOF_ARRAY(FOLD_MAP_LIST); i++) {
676 int index;
677
678 index = md_unicode_bsearch__(codepoint, FOLD_MAP_LIST[i].map, FOLD_MAP_LIST[i].map_size);
679 if(index >= 0) {
680
681 unsigned n_codepoints = FOLD_MAP_LIST[i].n_codepoints;
682 const unsigned* map = FOLD_MAP_LIST[i].map;
683 const unsigned* codepoints = FOLD_MAP_LIST[i].data + (index * n_codepoints);
684
685 memcpy(info->codepoints, codepoints, sizeof(unsigned) * n_codepoints);
686 info->n_codepoints = n_codepoints;
687
688 if(FOLD_MAP_LIST[i].map[index] != codepoint) {
689
690 if((map[index] & 0x00ffffff)+1 == codepoints[0]) {
691
692 info->codepoints[0] = codepoint + ((codepoint & 0x1) == (map[index] & 0x1) ? 1 : 0);
693 } else {
694
695 info->codepoints[0] += (codepoint - (map[index] & 0x00ffffff));
696 }
697 }
698
699 return;
700 }
701 }
702
703 info->codepoints[0] = codepoint;
704 info->n_codepoints = 1;
705 }
706#endif
707
708#if defined MD4C_USE_UTF16
709 #define IS_UTF16_SURROGATE_HI(word) (((WORD)(word) & 0xfc00) == 0xd800)
710 #define IS_UTF16_SURROGATE_LO(word) (((WORD)(word) & 0xfc00) == 0xdc00)
711 #define UTF16_DECODE_SURROGATE(hi, lo) (0x10000 + ((((unsigned)(hi) & 0x3ff) << 10) | (((unsigned)(lo) & 0x3ff) << 0)))
712
713 static unsigned
714 md_decode_utf16le__(const CHAR* str, SZ str_size, SZ* p_size)
715 {
716 if(IS_UTF16_SURROGATE_HI(str[0])) {
717 if(1 < str_size && IS_UTF16_SURROGATE_LO(str[1])) {
718 if(p_size != NULL)
719 *p_size = 2;
720 return UTF16_DECODE_SURROGATE(str[0], str[1]);
721 }
722 }
723
724 if(p_size != NULL)
725 *p_size = 1;
726 return str[0];
727 }
728
729 static unsigned
730 md_decode_utf16le_before__(MD_CTX* ctx, OFF off)
731 {
732 if(off > 2 && IS_UTF16_SURROGATE_HI(CH(off-2)) && IS_UTF16_SURROGATE_LO(CH(off-1)))
733 return UTF16_DECODE_SURROGATE(CH(off-2), CH(off-1));
734
735 return CH(off);
736 }
737
738 #define ISUNICODEWHITESPACE_(codepoint) md_is_unicode_whitespace__(codepoint)
739 #define ISUNICODEWHITESPACE(off) md_is_unicode_whitespace__(CH(off))
740 #define ISUNICODEWHITESPACEBEFORE(off) md_is_unicode_whitespace__(CH((off)-1))
741
742 #define ISUNICODEPUNCT(off) md_is_unicode_punct__(md_decode_utf16le__(STR(off), ctx->size - (off), NULL))
743 #define ISUNICODEPUNCTBEFORE(off) md_is_unicode_punct__(md_decode_utf16le_before__(ctx, off))
744
745 static inline int
746 md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_char_size)
747 {
748 return md_decode_utf16le__(str+off, str_size-off, p_char_size);
749 }
750#elif defined MD4C_USE_UTF8
751 #define IS_UTF8_LEAD1(byte) ((unsigned char)(byte) <= 0x7f)
752 #define IS_UTF8_LEAD2(byte) (((unsigned char)(byte) & 0xe0) == 0xc0)
753 #define IS_UTF8_LEAD3(byte) (((unsigned char)(byte) & 0xf0) == 0xe0)
754 #define IS_UTF8_LEAD4(byte) (((unsigned char)(byte) & 0xf8) == 0xf0)
755 #define IS_UTF8_TAIL(byte) (((unsigned char)(byte) & 0xc0) == 0x80)
756
757 static unsigned
758 md_decode_utf8__(const CHAR* str, SZ str_size, SZ* p_size)
759 {
760 if(!IS_UTF8_LEAD1(str[0])) {
761 if(IS_UTF8_LEAD2(str[0])) {
762 if(1 < str_size && IS_UTF8_TAIL(str[1])) {
763 if(p_size != NULL)
764 *p_size = 2;
765
766 return (((unsigned int)str[0] & 0x1f) << 6) |
767 (((unsigned int)str[1] & 0x3f) << 0);
768 }
769 } else if(IS_UTF8_LEAD3(str[0])) {
770 if(2 < str_size && IS_UTF8_TAIL(str[1]) && IS_UTF8_TAIL(str[2])) {
771 if(p_size != NULL)
772 *p_size = 3;
773
774 return (((unsigned int)str[0] & 0x0f) << 12) |
775 (((unsigned int)str[1] & 0x3f) << 6) |
776 (((unsigned int)str[2] & 0x3f) << 0);
777 }
778 } else if(IS_UTF8_LEAD4(str[0])) {
779 if(3 < str_size && IS_UTF8_TAIL(str[1]) && IS_UTF8_TAIL(str[2]) && IS_UTF8_TAIL(str[3])) {
780 if(p_size != NULL)
781 *p_size = 4;
782
783 return (((unsigned int)str[0] & 0x07) << 18) |
784 (((unsigned int)str[1] & 0x3f) << 12) |
785 (((unsigned int)str[2] & 0x3f) << 6) |
786 (((unsigned int)str[3] & 0x3f) << 0);
787 }
788 }
789 }
790
791 if(p_size != NULL)
792 *p_size = 1;
793 return (unsigned) str[0];
794 }
795
796 static unsigned
797 md_decode_utf8_before__(MD_CTX* ctx, OFF off)
798 {
799 if(!IS_UTF8_LEAD1(CH(off-1))) {
800 if(off > 1 && IS_UTF8_LEAD2(CH(off-2)) && IS_UTF8_TAIL(CH(off-1)))
801 return (((unsigned int)CH(off-2) & 0x1f) << 6) |
802 (((unsigned int)CH(off-1) & 0x3f) << 0);
803
804 if(off > 2 && IS_UTF8_LEAD3(CH(off-3)) && IS_UTF8_TAIL(CH(off-2)) && IS_UTF8_TAIL(CH(off-1)))
805 return (((unsigned int)CH(off-3) & 0x0f) << 12) |
806 (((unsigned int)CH(off-2) & 0x3f) << 6) |
807 (((unsigned int)CH(off-1) & 0x3f) << 0);
808
809 if(off > 3 && IS_UTF8_LEAD4(CH(off-4)) && IS_UTF8_TAIL(CH(off-3)) && IS_UTF8_TAIL(CH(off-2)) && IS_UTF8_TAIL(CH(off-1)))
810 return (((unsigned int)CH(off-4) & 0x07) << 18) |
811 (((unsigned int)CH(off-3) & 0x3f) << 12) |
812 (((unsigned int)CH(off-2) & 0x3f) << 6) |
813 (((unsigned int)CH(off-1) & 0x3f) << 0);
814 }
815
816 return (unsigned) CH(off-1);
817 }
818
819 #define ISUNICODEWHITESPACE_(codepoint) md_is_unicode_whitespace__(codepoint)
820 #define ISUNICODEWHITESPACE(off) md_is_unicode_whitespace__(md_decode_utf8__(STR(off), ctx->size - (off), NULL))
821 #define ISUNICODEWHITESPACEBEFORE(off) md_is_unicode_whitespace__(md_decode_utf8_before__(ctx, off))
822
823 #define ISUNICODEPUNCT(off) md_is_unicode_punct__(md_decode_utf8__(STR(off), ctx->size - (off), NULL))
824 #define ISUNICODEPUNCTBEFORE(off) md_is_unicode_punct__(md_decode_utf8_before__(ctx, off))
825
826 static inline unsigned
827 md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_char_size)
828 {
829 return md_decode_utf8__(str+off, str_size-off, p_char_size);
830 }
831#else
832 #define ISUNICODEWHITESPACE_(codepoint) ISWHITESPACE_(codepoint)
833 #define ISUNICODEWHITESPACE(off) ISWHITESPACE(off)
834 #define ISUNICODEWHITESPACEBEFORE(off) ISWHITESPACE((off)-1)
835
836 #define ISUNICODEPUNCT(off) ISPUNCT(off)
837 #define ISUNICODEPUNCTBEFORE(off) ISPUNCT((off)-1)
838
839 static inline void
840 md_get_unicode_fold_info(unsigned codepoint, MD_UNICODE_FOLD_INFO* info)
841 {
842 info->codepoints[0] = codepoint;
843 if(ISUPPER_(codepoint))
844 info->codepoints[0] += 'a' - 'A';
845 info->n_codepoints = 1;
846 }
847
848 static inline unsigned
849 md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_size)
850 {
851 *p_size = 1;
852 return (unsigned) str[off];
853 }
854#endif
855
856static void
857md_merge_lines(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, MD_SIZE n_lines,
858 CHAR line_break_replacement_char, CHAR* buffer, SZ* p_size)
859{
860 CHAR* ptr = buffer;
861 int line_index = 0;
862 OFF off = beg;
863
864 MD_UNUSED(n_lines);
865
866 while(1) {
867 const MD_LINE* line = &lines[line_index];
868 OFF line_end = line->end;
869 if(end < line_end)
870 line_end = end;
871
872 while(off < line_end) {
873 *ptr = CH(off);
874 ptr++;
875 off++;
876 }
877
878 if(off >= end) {
879 *p_size = (MD_SIZE)(ptr - buffer);
880 return;
881 }
882
883 *ptr = line_break_replacement_char;
884 ptr++;
885
886 line_index++;
887 off = lines[line_index].beg;
888 }
889}
890
891static int
892md_merge_lines_alloc(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, MD_SIZE n_lines,
893 CHAR line_break_replacement_char, CHAR** p_str, SZ* p_size)
894{
895 CHAR* buffer;
896
897 buffer = (CHAR*) malloc(sizeof(CHAR) * (end - beg));
898 if(buffer == NULL) {
899 MD_LOG("malloc() failed.");
900 return -1;
901 }
902
903 md_merge_lines(ctx, beg, end, lines, n_lines,
904 line_break_replacement_char, buffer, p_size);
905
906 *p_str = buffer;
907 return 0;
908}
909
910static OFF
911md_skip_unicode_whitespace(const CHAR* label, OFF off, SZ size)
912{
913 SZ char_size;
914 unsigned codepoint;
915
916 while(off < size) {
917 codepoint = md_decode_unicode(label, off, size, &char_size);
918 if(!ISUNICODEWHITESPACE_(codepoint) && !ISNEWLINE_(label[off]))
919 break;
920 off += char_size;
921 }
922
923 return off;
924}
925
926static int
927md_is_html_tag(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, OFF beg, OFF max_end, OFF* p_end)
928{
929 int attr_state;
930 OFF off = beg;
931 OFF line_end = (n_lines > 0) ? lines[0].end : ctx->size;
932 MD_SIZE line_index = 0;
933
934 MD_ASSERT(CH(beg) == _T('<'));
935
936 if(off + 1 >= line_end)
937 return FALSE;
938 off++;
939
940 attr_state = 0;
941
942 if(CH(off) == _T('/')) {
943
944 attr_state = -1;
945 off++;
946 }
947
948 if(off >= line_end || !ISALPHA(off))
949 return FALSE;
950 off++;
951 while(off < line_end && (ISALNUM(off) || CH(off) == _T('-')))
952 off++;
953
954 while(1) {
955 while(off < line_end && !ISNEWLINE(off)) {
956 if(attr_state > 40) {
957 if(attr_state == 41 && (ISBLANK(off) || ISANYOF(off, _T("\"'=<>`")))) {
958 attr_state = 0;
959 off--;
960 } else if(attr_state == 42 && CH(off) == _T('\'')) {
961 attr_state = 0;
962 } else if(attr_state == 43 && CH(off) == _T('"')) {
963 attr_state = 0;
964 }
965 off++;
966 } else if(ISWHITESPACE(off)) {
967 if(attr_state == 0)
968 attr_state = 1;
969 off++;
970 } else if(attr_state <= 2 && CH(off) == _T('>')) {
971
972 goto done;
973 } else if(attr_state <= 2 && CH(off) == _T('/') && off+1 < line_end && CH(off+1) == _T('>')) {
974
975 off++;
976 goto done;
977 } else if((attr_state == 1 || attr_state == 2) && (ISALPHA(off) || CH(off) == _T('_') || CH(off) == _T(':'))) {
978 off++;
979
980 while(off < line_end && (ISALNUM(off) || ISANYOF(off, _T("_.:-"))))
981 off++;
982 attr_state = 2;
983 } else if(attr_state == 2 && CH(off) == _T('=')) {
984
985 off++;
986 attr_state = 3;
987 } else if(attr_state == 3) {
988
989 if(CH(off) == _T('"'))
990 attr_state = 43;
991 else if(CH(off) == _T('\''))
992 attr_state = 42;
993 else if(!ISANYOF(off, _T("\"'=<>`")) && !ISNEWLINE(off))
994 attr_state = 41;
995 else
996 return FALSE;
997 off++;
998 } else {
999
1000 return FALSE;
1001 }
1002 }
1003
1004 if(n_lines == 0)
1005 return FALSE;
1006
1007 line_index++;
1008 if(line_index >= n_lines)
1009 return FALSE;
1010
1011 off = lines[line_index].beg;
1012 line_end = lines[line_index].end;
1013
1014 if(attr_state == 0 || attr_state == 41)
1015 attr_state = 1;
1016
1017 if(off >= max_end)
1018 return FALSE;
1019 }
1020
1021done:
1022 if(off >= max_end)
1023 return FALSE;
1024
1025 *p_end = off+1;
1026 return TRUE;
1027}
1028
1029static int
1030md_scan_for_html_closer(MD_CTX* ctx, const MD_CHAR* str, MD_SIZE len,
1031 const MD_LINE* lines, MD_SIZE n_lines,
1032 OFF beg, OFF max_end, OFF* p_end,
1033 OFF* p_scan_horizon)
1034{
1035 OFF off = beg;
1036 MD_SIZE line_index = 0;
1037
1038 if(off < *p_scan_horizon && *p_scan_horizon >= max_end - len) {
1039
1040 return FALSE;
1041 }
1042
1043 while(TRUE) {
1044 while(off + len <= lines[line_index].end && off + len <= max_end) {
1045 if(md_ascii_eq(STR(off), str, len)) {
1046
1047 *p_end = off + len;
1048 return TRUE;
1049 }
1050 off++;
1051 }
1052
1053 line_index++;
1054 if(off >= max_end || line_index >= n_lines) {
1055
1056 *p_scan_horizon = off;
1057 return FALSE;
1058 }
1059
1060 off = lines[line_index].beg;
1061 }
1062}
1063
1064static int
1065md_is_html_comment(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, OFF beg, OFF max_end, OFF* p_end)
1066{
1067 OFF off = beg;
1068
1069 MD_ASSERT(CH(beg) == _T('<'));
1070
1071 if(off + 4 >= lines[0].end)
1072 return FALSE;
1073 if(CH(off+1) != _T('!') || CH(off+2) != _T('-') || CH(off+3) != _T('-'))
1074 return FALSE;
1075
1076 off += 2;
1077
1078 return md_scan_for_html_closer(ctx, _T("-->"), 3,
1079 lines, n_lines, off, max_end, p_end, &ctx->html_comment_horizon);
1080}
1081
1082static int
1083md_is_html_processing_instruction(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, OFF beg, OFF max_end, OFF* p_end)
1084{
1085 OFF off = beg;
1086
1087 if(off + 2 >= lines[0].end)
1088 return FALSE;
1089 if(CH(off+1) != _T('?'))
1090 return FALSE;
1091 off += 2;
1092
1093 return md_scan_for_html_closer(ctx, _T("?>"), 2,
1094 lines, n_lines, off, max_end, p_end, &ctx->html_proc_instr_horizon);
1095}
1096
1097static int
1098md_is_html_declaration(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, OFF beg, OFF max_end, OFF* p_end)
1099{
1100 OFF off = beg;
1101
1102 if(off + 2 >= lines[0].end)
1103 return FALSE;
1104 if(CH(off+1) != _T('!'))
1105 return FALSE;
1106 off += 2;
1107
1108 if(off >= lines[0].end || !ISALPHA(off))
1109 return FALSE;
1110 off++;
1111 while(off < lines[0].end && ISALPHA(off))
1112 off++;
1113
1114 return md_scan_for_html_closer(ctx, _T(">"), 1,
1115 lines, n_lines, off, max_end, p_end, &ctx->html_decl_horizon);
1116}
1117
1118static int
1119md_is_html_cdata(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, OFF beg, OFF max_end, OFF* p_end)
1120{
1121 static const CHAR open_str[] = _T("<![CDATA[");
1122 static const SZ open_size = SIZEOF_ARRAY(open_str) - 1;
1123
1124 OFF off = beg;
1125
1126 if(off + open_size >= lines[0].end)
1127 return FALSE;
1128 if(memcmp(STR(off), open_str, open_size) != 0)
1129 return FALSE;
1130 off += open_size;
1131
1132 return md_scan_for_html_closer(ctx, _T("]]>"), 3,
1133 lines, n_lines, off, max_end, p_end, &ctx->html_cdata_horizon);
1134}
1135
1136static int
1137md_is_html_any(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, OFF beg, OFF max_end, OFF* p_end)
1138{
1139 MD_ASSERT(CH(beg) == _T('<'));
1140 return (md_is_html_tag(ctx, lines, n_lines, beg, max_end, p_end) ||
1141 md_is_html_comment(ctx, lines, n_lines, beg, max_end, p_end) ||
1142 md_is_html_processing_instruction(ctx, lines, n_lines, beg, max_end, p_end) ||
1143 md_is_html_declaration(ctx, lines, n_lines, beg, max_end, p_end) ||
1144 md_is_html_cdata(ctx, lines, n_lines, beg, max_end, p_end));
1145}
1146
1147static int
1148md_is_hex_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
1149{
1150 OFF off = beg;
1151 MD_UNUSED(ctx);
1152
1153 while(off < max_end && ISXDIGIT_(text[off]) && off - beg <= 8)
1154 off++;
1155
1156 if(1 <= off - beg && off - beg <= 6) {
1157 *p_end = off;
1158 return TRUE;
1159 } else {
1160 return FALSE;
1161 }
1162}
1163
1164static int
1165md_is_dec_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
1166{
1167 OFF off = beg;
1168 MD_UNUSED(ctx);
1169
1170 while(off < max_end && ISDIGIT_(text[off]) && off - beg <= 8)
1171 off++;
1172
1173 if(1 <= off - beg && off - beg <= 7) {
1174 *p_end = off;
1175 return TRUE;
1176 } else {
1177 return FALSE;
1178 }
1179}
1180
1181static int
1182md_is_named_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
1183{
1184 OFF off = beg;
1185 MD_UNUSED(ctx);
1186
1187 if(off < max_end && ISALPHA_(text[off]))
1188 off++;
1189 else
1190 return FALSE;
1191
1192 while(off < max_end && ISALNUM_(text[off]) && off - beg <= 48)
1193 off++;
1194
1195 if(2 <= off - beg && off - beg <= 48) {
1196 *p_end = off;
1197 return TRUE;
1198 } else {
1199 return FALSE;
1200 }
1201}
1202
1203static int
1204md_is_entity_str(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
1205{
1206 int is_contents;
1207 OFF off = beg;
1208
1209 MD_ASSERT(text[off] == _T('&'));
1210 off++;
1211
1212 if(off+2 < max_end && text[off] == _T('#') && (text[off+1] == _T('x') || text[off+1] == _T('X')))
1213 is_contents = md_is_hex_entity_contents(ctx, text, off+2, max_end, &off);
1214 else if(off+1 < max_end && text[off] == _T('#'))
1215 is_contents = md_is_dec_entity_contents(ctx, text, off+1, max_end, &off);
1216 else
1217 is_contents = md_is_named_entity_contents(ctx, text, off, max_end, &off);
1218
1219 if(is_contents && off < max_end && text[off] == _T(';')) {
1220 *p_end = off+1;
1221 return TRUE;
1222 } else {
1223 return FALSE;
1224 }
1225}
1226
1227static inline int
1228md_is_entity(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end)
1229{
1230 return md_is_entity_str(ctx, ctx->text, beg, max_end, p_end);
1231}
1232
1233typedef struct MD_ATTRIBUTE_BUILD_tag MD_ATTRIBUTE_BUILD;
1234struct MD_ATTRIBUTE_BUILD_tag {
1235 CHAR* text;
1236 MD_TEXTTYPE* substr_types;
1237 OFF* substr_offsets;
1238 int substr_count;
1239 int substr_alloc;
1240 MD_TEXTTYPE trivial_types[1];
1241 OFF trivial_offsets[2];
1242};
1243
1244#define MD_BUILD_ATTR_NO_ESCAPES 0x0001
1245
1246static int
1247md_build_attr_append_substr(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build,
1248 MD_TEXTTYPE type, OFF off)
1249{
1250 if(build->substr_count >= build->substr_alloc) {
1251 MD_TEXTTYPE* new_substr_types;
1252 OFF* new_substr_offsets;
1253
1254 build->substr_alloc = (build->substr_alloc > 0
1255 ? build->substr_alloc + build->substr_alloc / 2
1256 : 8);
1257 new_substr_types = (MD_TEXTTYPE*) realloc(build->substr_types,
1258 build->substr_alloc * sizeof(MD_TEXTTYPE));
1259 if(new_substr_types == NULL) {
1260 MD_LOG("realloc() failed.");
1261 return -1;
1262 }
1263
1264 new_substr_offsets = (OFF*) realloc(build->substr_offsets,
1265 (build->substr_alloc+1) * sizeof(OFF));
1266 if(new_substr_offsets == NULL) {
1267 MD_LOG("realloc() failed.");
1268 free(new_substr_types);
1269 return -1;
1270 }
1271
1272 build->substr_types = new_substr_types;
1273 build->substr_offsets = new_substr_offsets;
1274 }
1275
1276 build->substr_types[build->substr_count] = type;
1277 build->substr_offsets[build->substr_count] = off;
1278 build->substr_count++;
1279 return 0;
1280}
1281
1282static void
1283md_free_attribute(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build)
1284{
1285 MD_UNUSED(ctx);
1286
1287 if(build->substr_alloc > 0) {
1288 free(build->text);
1289 free(build->substr_types);
1290 free(build->substr_offsets);
1291 }
1292}
1293
1294static int
1295md_build_attribute(MD_CTX* ctx, const CHAR* raw_text, SZ raw_size,
1296 unsigned flags, MD_ATTRIBUTE* attr, MD_ATTRIBUTE_BUILD* build)
1297{
1298 OFF raw_off, off;
1299 int is_trivial;
1300 int ret = 0;
1301
1302 memset(build, 0, sizeof(MD_ATTRIBUTE_BUILD));
1303
1304 is_trivial = TRUE;
1305 for(raw_off = 0; raw_off < raw_size; raw_off++) {
1306 if(ISANYOF3_(raw_text[raw_off], _T('\\'), _T('&'), _T('\0'))) {
1307 is_trivial = FALSE;
1308 break;
1309 }
1310 }
1311
1312 if(is_trivial) {
1313 build->text = (CHAR*) (raw_size ? raw_text : NULL);
1314 build->substr_types = build->trivial_types;
1315 build->substr_offsets = build->trivial_offsets;
1316 build->substr_count = 1;
1317 build->substr_alloc = 0;
1318 build->trivial_types[0] = MD_TEXT_NORMAL;
1319 build->trivial_offsets[0] = 0;
1320 build->trivial_offsets[1] = raw_size;
1321 off = raw_size;
1322 } else {
1323 build->text = (CHAR*) malloc(raw_size * sizeof(CHAR));
1324 if(build->text == NULL) {
1325 MD_LOG("malloc() failed.");
1326 goto abort;
1327 }
1328
1329 raw_off = 0;
1330 off = 0;
1331
1332 while(raw_off < raw_size) {
1333 if(raw_text[raw_off] == _T('\0')) {
1334 MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_NULLCHAR, off));
1335 memcpy(build->text + off, raw_text + raw_off, 1);
1336 off++;
1337 raw_off++;
1338 continue;
1339 }
1340
1341 if(raw_text[raw_off] == _T('&')) {
1342 OFF ent_end;
1343
1344 if(md_is_entity_str(ctx, raw_text, raw_off, raw_size, &ent_end)) {
1345 MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_ENTITY, off));
1346 memcpy(build->text + off, raw_text + raw_off, ent_end - raw_off);
1347 off += ent_end - raw_off;
1348 raw_off = ent_end;
1349 continue;
1350 }
1351 }
1352
1353 if(build->substr_count == 0 || build->substr_types[build->substr_count-1] != MD_TEXT_NORMAL)
1354 MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_NORMAL, off));
1355
1356 if(!(flags & MD_BUILD_ATTR_NO_ESCAPES) &&
1357 raw_text[raw_off] == _T('\\') && raw_off+1 < raw_size &&
1358 (ISPUNCT_(raw_text[raw_off+1]) || ISNEWLINE_(raw_text[raw_off+1])))
1359 raw_off++;
1360
1361 build->text[off++] = raw_text[raw_off++];
1362 }
1363 build->substr_offsets[build->substr_count] = off;
1364 }
1365
1366 attr->text = build->text;
1367 attr->size = off;
1368 attr->substr_offsets = build->substr_offsets;
1369 attr->substr_types = build->substr_types;
1370 return 0;
1371
1372abort:
1373 md_free_attribute(ctx, build);
1374 return -1;
1375}
1376
1377#define MD_FNV1A_BASE 2166136261U
1378#define MD_FNV1A_PRIME 16777619U
1379
1380static inline unsigned
1381md_fnv1a(unsigned base, const void* data, size_t n)
1382{
1383 const unsigned char* buf = (const unsigned char*) data;
1384 unsigned hash = base;
1385 size_t i;
1386
1387 for(i = 0; i < n; i++) {
1388 hash ^= buf[i];
1389 hash *= MD_FNV1A_PRIME;
1390 }
1391
1392 return hash;
1393}
1394
1395struct MD_REF_DEF_tag {
1396 CHAR* label;
1397 CHAR* title;
1398 unsigned hash;
1399 SZ label_size;
1400 SZ title_size;
1401 OFF dest_beg;
1402 OFF dest_end;
1403 unsigned char label_needs_free : 1;
1404 unsigned char title_needs_free : 1;
1405};
1406
1407static unsigned
1408md_link_label_hash(const CHAR* label, SZ size)
1409{
1410 unsigned hash = MD_FNV1A_BASE;
1411 OFF off;
1412 unsigned codepoint;
1413 int is_whitespace = FALSE;
1414
1415 off = md_skip_unicode_whitespace(label, 0, size);
1416 while(off < size) {
1417 SZ char_size;
1418
1419 codepoint = md_decode_unicode(label, off, size, &char_size);
1420 is_whitespace = ISUNICODEWHITESPACE_(codepoint) || ISNEWLINE_(label[off]);
1421
1422 if(is_whitespace) {
1423 codepoint = ' ';
1424 hash = md_fnv1a(hash, &codepoint, sizeof(unsigned));
1425 off = md_skip_unicode_whitespace(label, off, size);
1426 } else {
1427 MD_UNICODE_FOLD_INFO fold_info;
1428
1429 md_get_unicode_fold_info(codepoint, &fold_info);
1430 hash = md_fnv1a(hash, fold_info.codepoints, fold_info.n_codepoints * sizeof(unsigned));
1431 off += char_size;
1432 }
1433 }
1434
1435 return hash;
1436}
1437
1438static OFF
1439md_link_label_cmp_load_fold_info(const CHAR* label, OFF off, SZ size,
1440 MD_UNICODE_FOLD_INFO* fold_info)
1441{
1442 unsigned codepoint;
1443 SZ char_size;
1444
1445 if(off >= size) {
1446
1447 goto whitespace;
1448 }
1449
1450 codepoint = md_decode_unicode(label, off, size, &char_size);
1451 off += char_size;
1452 if(ISUNICODEWHITESPACE_(codepoint)) {
1453
1454 goto whitespace;
1455 }
1456
1457 md_get_unicode_fold_info(codepoint, fold_info);
1458 return off;
1459
1460whitespace:
1461 fold_info->codepoints[0] = _T(' ');
1462 fold_info->n_codepoints = 1;
1463 return md_skip_unicode_whitespace(label, off, size);
1464}
1465
1466static int
1467md_link_label_cmp(const CHAR* a_label, SZ a_size, const CHAR* b_label, SZ b_size)
1468{
1469 OFF a_off;
1470 OFF b_off;
1471 MD_UNICODE_FOLD_INFO a_fi = { { 0 }, 0 };
1472 MD_UNICODE_FOLD_INFO b_fi = { { 0 }, 0 };
1473 OFF a_fi_off = 0;
1474 OFF b_fi_off = 0;
1475 int cmp;
1476
1477 a_off = md_skip_unicode_whitespace(a_label, 0, a_size);
1478 b_off = md_skip_unicode_whitespace(b_label, 0, b_size);
1479 while(a_off < a_size || a_fi_off < a_fi.n_codepoints ||
1480 b_off < b_size || b_fi_off < b_fi.n_codepoints)
1481 {
1482
1483 if(a_fi_off >= a_fi.n_codepoints) {
1484 a_fi_off = 0;
1485 a_off = md_link_label_cmp_load_fold_info(a_label, a_off, a_size, &a_fi);
1486 }
1487 if(b_fi_off >= b_fi.n_codepoints) {
1488 b_fi_off = 0;
1489 b_off = md_link_label_cmp_load_fold_info(b_label, b_off, b_size, &b_fi);
1490 }
1491
1492 cmp = b_fi.codepoints[b_fi_off] - a_fi.codepoints[a_fi_off];
1493 if(cmp != 0)
1494 return cmp;
1495
1496 a_fi_off++;
1497 b_fi_off++;
1498 }
1499
1500 return 0;
1501}
1502
1503typedef struct MD_REF_DEF_LIST_tag MD_REF_DEF_LIST;
1504struct MD_REF_DEF_LIST_tag {
1505 int n_ref_defs;
1506 int alloc_ref_defs;
1507 MD_REF_DEF* ref_defs[];
1508};
1509
1510static int
1511md_ref_def_cmp(const void* a, const void* b)
1512{
1513 const MD_REF_DEF* a_ref = *(const MD_REF_DEF**)a;
1514 const MD_REF_DEF* b_ref = *(const MD_REF_DEF**)b;
1515
1516 if(a_ref->hash < b_ref->hash)
1517 return -1;
1518 else if(a_ref->hash > b_ref->hash)
1519 return +1;
1520 else
1521 return md_link_label_cmp(a_ref->label, a_ref->label_size, b_ref->label, b_ref->label_size);
1522}
1523
1524static int
1525md_ref_def_cmp_for_sort(const void* a, const void* b)
1526{
1527 int cmp;
1528
1529 cmp = md_ref_def_cmp(a, b);
1530
1531 if(cmp == 0) {
1532 const MD_REF_DEF* a_ref = *(const MD_REF_DEF**)a;
1533 const MD_REF_DEF* b_ref = *(const MD_REF_DEF**)b;
1534
1535 if(a_ref < b_ref)
1536 cmp = -1;
1537 else if(a_ref > b_ref)
1538 cmp = +1;
1539 else
1540 cmp = 0;
1541 }
1542
1543 return cmp;
1544}
1545
1546static int
1547md_build_ref_def_hashtable(MD_CTX* ctx)
1548{
1549 int i, j;
1550
1551 if(ctx->n_ref_defs == 0)
1552 return 0;
1553
1554 ctx->ref_def_hashtable_size = (ctx->n_ref_defs * 5) / 4;
1555 ctx->ref_def_hashtable = malloc(ctx->ref_def_hashtable_size * sizeof(void*));
1556 if(ctx->ref_def_hashtable == NULL) {
1557 MD_LOG("malloc() failed.");
1558 goto abort;
1559 }
1560 memset(ctx->ref_def_hashtable, 0, ctx->ref_def_hashtable_size * sizeof(void*));
1561
1562 for(i = 0; i < ctx->n_ref_defs; i++) {
1563 MD_REF_DEF* def = &ctx->ref_defs[i];
1564 void* bucket;
1565 MD_REF_DEF_LIST* list;
1566
1567 def->hash = md_link_label_hash(def->label, def->label_size);
1568 bucket = ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size];
1569
1570 if(bucket == NULL) {
1571
1572 ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = def;
1573 continue;
1574 }
1575
1576 if(ctx->ref_defs <= (MD_REF_DEF*) bucket && (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs) {
1577
1578 MD_REF_DEF* old_def = (MD_REF_DEF*) bucket;
1579
1580 if(md_link_label_cmp(def->label, def->label_size, old_def->label, old_def->label_size) == 0) {
1581
1582 continue;
1583 }
1584
1585 list = (MD_REF_DEF_LIST*) malloc(sizeof(MD_REF_DEF_LIST) + 2 * sizeof(MD_REF_DEF*));
1586 if(list == NULL) {
1587 MD_LOG("malloc() failed.");
1588 goto abort;
1589 }
1590 list->ref_defs[0] = old_def;
1591 list->ref_defs[1] = def;
1592 list->n_ref_defs = 2;
1593 list->alloc_ref_defs = 2;
1594 ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = list;
1595 continue;
1596 }
1597
1598 list = (MD_REF_DEF_LIST*) bucket;
1599 if(list->n_ref_defs >= list->alloc_ref_defs) {
1600 int alloc_ref_defs = list->alloc_ref_defs + list->alloc_ref_defs / 2;
1601 MD_REF_DEF_LIST* list_tmp = (MD_REF_DEF_LIST*) realloc(list,
1602 sizeof(MD_REF_DEF_LIST) + alloc_ref_defs * sizeof(MD_REF_DEF*));
1603 if(list_tmp == NULL) {
1604 MD_LOG("realloc() failed.");
1605 goto abort;
1606 }
1607 list = list_tmp;
1608 list->alloc_ref_defs = alloc_ref_defs;
1609 ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = list;
1610 }
1611
1612 list->ref_defs[list->n_ref_defs] = def;
1613 list->n_ref_defs++;
1614 }
1615
1616 for(i = 0; i < ctx->ref_def_hashtable_size; i++) {
1617 void* bucket = ctx->ref_def_hashtable[i];
1618 MD_REF_DEF_LIST* list;
1619
1620 if(bucket == NULL)
1621 continue;
1622 if(ctx->ref_defs <= (MD_REF_DEF*) bucket && (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs)
1623 continue;
1624
1625 list = (MD_REF_DEF_LIST*) bucket;
1626 qsort(list->ref_defs, list->n_ref_defs, sizeof(MD_REF_DEF*), md_ref_def_cmp_for_sort);
1627
1628 for(j = 1; j < list->n_ref_defs; j++) {
1629 if(md_ref_def_cmp(&list->ref_defs[j-1], &list->ref_defs[j]) == 0)
1630 list->ref_defs[j] = list->ref_defs[j-1];
1631 }
1632 }
1633
1634 return 0;
1635
1636abort:
1637 return -1;
1638}
1639
1640static void
1641md_free_ref_def_hashtable(MD_CTX* ctx)
1642{
1643 if(ctx->ref_def_hashtable != NULL) {
1644 int i;
1645
1646 for(i = 0; i < ctx->ref_def_hashtable_size; i++) {
1647 void* bucket = ctx->ref_def_hashtable[i];
1648 if(bucket == NULL)
1649 continue;
1650 if(ctx->ref_defs <= (MD_REF_DEF*) bucket && (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs)
1651 continue;
1652 free(bucket);
1653 }
1654
1655 free(ctx->ref_def_hashtable);
1656 }
1657}
1658
1659static const MD_REF_DEF*
1660md_lookup_ref_def(MD_CTX* ctx, const CHAR* label, SZ label_size)
1661{
1662 unsigned hash;
1663 void* bucket;
1664
1665 if(ctx->ref_def_hashtable_size == 0)
1666 return NULL;
1667
1668 hash = md_link_label_hash(label, label_size);
1669 bucket = ctx->ref_def_hashtable[hash % ctx->ref_def_hashtable_size];
1670
1671 if(bucket == NULL) {
1672 return NULL;
1673 } else if(ctx->ref_defs <= (MD_REF_DEF*) bucket && (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs) {
1674 const MD_REF_DEF* def = (MD_REF_DEF*) bucket;
1675
1676 if(md_link_label_cmp(def->label, def->label_size, label, label_size) == 0)
1677 return def;
1678 else
1679 return NULL;
1680 } else {
1681 MD_REF_DEF_LIST* list = (MD_REF_DEF_LIST*) bucket;
1682 MD_REF_DEF key_buf;
1683 const MD_REF_DEF* key = &key_buf;
1684 const MD_REF_DEF** ret;
1685
1686 key_buf.label = (CHAR*) label;
1687 key_buf.label_size = label_size;
1688 key_buf.hash = md_link_label_hash(key_buf.label, key_buf.label_size);
1689
1690 ret = (const MD_REF_DEF**) bsearch(&key, list->ref_defs,
1691 list->n_ref_defs, sizeof(MD_REF_DEF*), md_ref_def_cmp);
1692 if(ret != NULL)
1693 return *ret;
1694 else
1695 return NULL;
1696 }
1697}
1698
1699typedef struct MD_LINK_ATTR_tag MD_LINK_ATTR;
1700struct MD_LINK_ATTR_tag {
1701 OFF dest_beg;
1702 OFF dest_end;
1703
1704 CHAR* title;
1705 SZ title_size;
1706 int title_needs_free;
1707};
1708
1709static int
1710md_is_link_label(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, OFF beg,
1711 OFF* p_end, MD_SIZE* p_beg_line_index, MD_SIZE* p_end_line_index,
1712 OFF* p_contents_beg, OFF* p_contents_end)
1713{
1714 OFF off = beg;
1715 OFF contents_beg = 0;
1716 OFF contents_end = 0;
1717 MD_SIZE line_index = 0;
1718 int len = 0;
1719
1720 *p_beg_line_index = 0;
1721
1722 if(CH(off) != _T('['))
1723 return FALSE;
1724 off++;
1725
1726 while(1) {
1727 OFF line_end = lines[line_index].end;
1728
1729 while(off < line_end) {
1730 if(CH(off) == _T('\\') && off+1 < ctx->size && (ISPUNCT(off+1) || ISNEWLINE(off+1))) {
1731 if(contents_end == 0) {
1732 contents_beg = off;
1733 *p_beg_line_index = line_index;
1734 }
1735 contents_end = off + 2;
1736 off += 2;
1737 } else if(CH(off) == _T('[')) {
1738 return FALSE;
1739 } else if(CH(off) == _T(']')) {
1740 if(contents_beg < contents_end) {
1741
1742 *p_contents_beg = contents_beg;
1743 *p_contents_end = contents_end;
1744 *p_end = off+1;
1745 *p_end_line_index = line_index;
1746 return TRUE;
1747 } else {
1748
1749 return FALSE;
1750 }
1751 } else {
1752 unsigned codepoint;
1753 SZ char_size;
1754
1755 codepoint = md_decode_unicode(ctx->text, off, ctx->size, &char_size);
1756 if(!ISUNICODEWHITESPACE_(codepoint)) {
1757 if(contents_end == 0) {
1758 contents_beg = off;
1759 *p_beg_line_index = line_index;
1760 }
1761 contents_end = off + char_size;
1762 }
1763
1764 off += char_size;
1765 }
1766
1767 len++;
1768 if(len > 999)
1769 return FALSE;
1770 }
1771
1772 line_index++;
1773 len++;
1774 if(line_index < n_lines)
1775 off = lines[line_index].beg;
1776 else
1777 break;
1778 }
1779
1780 return FALSE;
1781}
1782
1783static int
1784md_is_link_destination_A(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
1785 OFF* p_contents_beg, OFF* p_contents_end)
1786{
1787 OFF off = beg;
1788
1789 if(off >= max_end || CH(off) != _T('<'))
1790 return FALSE;
1791 off++;
1792
1793 while(off < max_end) {
1794 if(CH(off) == _T('\\') && off+1 < max_end && ISPUNCT(off+1)) {
1795 off += 2;
1796 continue;
1797 }
1798
1799 if(ISNEWLINE(off) || CH(off) == _T('<'))
1800 return FALSE;
1801
1802 if(CH(off) == _T('>')) {
1803
1804 *p_contents_beg = beg+1;
1805 *p_contents_end = off;
1806 *p_end = off+1;
1807 return TRUE;
1808 }
1809
1810 off++;
1811 }
1812
1813 return FALSE;
1814}
1815
1816static int
1817md_is_link_destination_B(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
1818 OFF* p_contents_beg, OFF* p_contents_end)
1819{
1820 OFF off = beg;
1821 int parenthesis_level = 0;
1822
1823 while(off < max_end) {
1824 if(CH(off) == _T('\\') && off+1 < max_end && ISPUNCT(off+1)) {
1825 off += 2;
1826 continue;
1827 }
1828
1829 if(ISWHITESPACE(off) || ISCNTRL(off))
1830 break;
1831
1832 if(CH(off) == _T('(')) {
1833 parenthesis_level++;
1834 if(parenthesis_level > 32)
1835 return FALSE;
1836 } else if(CH(off) == _T(')')) {
1837 if(parenthesis_level == 0)
1838 break;
1839 parenthesis_level--;
1840 }
1841
1842 off++;
1843 }
1844
1845 if(parenthesis_level != 0 || off == beg)
1846 return FALSE;
1847
1848 *p_contents_beg = beg;
1849 *p_contents_end = off;
1850 *p_end = off;
1851 return TRUE;
1852}
1853
1854static inline int
1855md_is_link_destination(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
1856 OFF* p_contents_beg, OFF* p_contents_end)
1857{
1858 if(CH(beg) == _T('<'))
1859 return md_is_link_destination_A(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end);
1860 else
1861 return md_is_link_destination_B(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end);
1862}
1863
1864static int
1865md_is_link_title(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, OFF beg,
1866 OFF* p_end, MD_SIZE* p_beg_line_index, MD_SIZE* p_end_line_index,
1867 OFF* p_contents_beg, OFF* p_contents_end)
1868{
1869 OFF off = beg;
1870 CHAR closer_char;
1871 MD_SIZE line_index = 0;
1872
1873 while(off < lines[line_index].end && ISWHITESPACE(off))
1874 off++;
1875 if(off >= lines[line_index].end) {
1876 line_index++;
1877 if(line_index >= n_lines)
1878 return FALSE;
1879 off = lines[line_index].beg;
1880 }
1881 if(off == beg)
1882 return FALSE;
1883
1884 *p_beg_line_index = line_index;
1885
1886 switch(CH(off)) {
1887 case _T('"'): closer_char = _T('"'); break;
1888 case _T('\''): closer_char = _T('\''); break;
1889 case _T('('): closer_char = _T(')'); break;
1890 default: return FALSE;
1891 }
1892 off++;
1893
1894 *p_contents_beg = off;
1895
1896 while(line_index < n_lines) {
1897 OFF line_end = lines[line_index].end;
1898
1899 while(off < line_end) {
1900 if(CH(off) == _T('\\') && off+1 < ctx->size && (ISPUNCT(off+1) || ISNEWLINE(off+1))) {
1901 off++;
1902 } else if(CH(off) == closer_char) {
1903
1904 *p_contents_end = off;
1905 *p_end = off+1;
1906 *p_end_line_index = line_index;
1907 return TRUE;
1908 } else if(closer_char == _T(')') && CH(off) == _T('(')) {
1909
1910 return FALSE;
1911 }
1912
1913 off++;
1914 }
1915
1916 line_index++;
1917 }
1918
1919 return FALSE;
1920}
1921
1922static int
1923md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines)
1924{
1925 OFF label_contents_beg;
1926 OFF label_contents_end;
1927 MD_SIZE label_contents_line_index;
1928 int label_is_multiline = FALSE;
1929 OFF dest_contents_beg;
1930 OFF dest_contents_end;
1931 OFF title_contents_beg;
1932 OFF title_contents_end;
1933 MD_SIZE title_contents_line_index;
1934 int title_is_multiline = FALSE;
1935 OFF off;
1936 MD_SIZE line_index = 0;
1937 MD_SIZE tmp_line_index;
1938 MD_REF_DEF* def = NULL;
1939 int ret = 0;
1940
1941 if(!md_is_link_label(ctx, lines, n_lines, lines[0].beg,
1942 &off, &label_contents_line_index, &line_index,
1943 &label_contents_beg, &label_contents_end))
1944 return FALSE;
1945 label_is_multiline = (label_contents_line_index != line_index);
1946
1947 if(off >= lines[line_index].end || CH(off) != _T(':'))
1948 return FALSE;
1949 off++;
1950
1951 while(off < lines[line_index].end && ISWHITESPACE(off))
1952 off++;
1953 if(off >= lines[line_index].end) {
1954 line_index++;
1955 if(line_index >= n_lines)
1956 return FALSE;
1957 off = lines[line_index].beg;
1958 }
1959
1960 if(!md_is_link_destination(ctx, off, lines[line_index].end,
1961 &off, &dest_contents_beg, &dest_contents_end))
1962 return FALSE;
1963
1964 if(md_is_link_title(ctx, lines + line_index, n_lines - line_index, off,
1965 &off, &title_contents_line_index, &tmp_line_index,
1966 &title_contents_beg, &title_contents_end)
1967 && off >= lines[line_index + tmp_line_index].end)
1968 {
1969 title_is_multiline = (tmp_line_index != title_contents_line_index);
1970 title_contents_line_index += line_index;
1971 line_index += tmp_line_index;
1972 } else {
1973
1974 title_is_multiline = FALSE;
1975 title_contents_beg = off;
1976 title_contents_end = off;
1977 title_contents_line_index = 0;
1978 }
1979
1980 if(off < lines[line_index].end)
1981 return FALSE;
1982
1983 if(ctx->n_ref_defs >= ctx->alloc_ref_defs) {
1984 MD_REF_DEF* new_defs;
1985
1986 ctx->alloc_ref_defs = (ctx->alloc_ref_defs > 0
1987 ? ctx->alloc_ref_defs + ctx->alloc_ref_defs / 2
1988 : 16);
1989 new_defs = (MD_REF_DEF*) realloc(ctx->ref_defs, ctx->alloc_ref_defs * sizeof(MD_REF_DEF));
1990 if(new_defs == NULL) {
1991 MD_LOG("realloc() failed.");
1992 goto abort;
1993 }
1994
1995 ctx->ref_defs = new_defs;
1996 }
1997 def = &ctx->ref_defs[ctx->n_ref_defs];
1998 memset(def, 0, sizeof(MD_REF_DEF));
1999
2000 if(label_is_multiline) {
2001 MD_CHECK(md_merge_lines_alloc(ctx, label_contents_beg, label_contents_end,
2002 lines + label_contents_line_index, n_lines - label_contents_line_index,
2003 _T(' '), &def->label, &def->label_size));
2004 def->label_needs_free = TRUE;
2005 } else {
2006 def->label = (CHAR*) STR(label_contents_beg);
2007 def->label_size = label_contents_end - label_contents_beg;
2008 }
2009
2010 if(title_is_multiline) {
2011 MD_CHECK(md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end,
2012 lines + title_contents_line_index, n_lines - title_contents_line_index,
2013 _T('\n'), &def->title, &def->title_size));
2014 def->title_needs_free = TRUE;
2015 } else {
2016 def->title = (CHAR*) STR(title_contents_beg);
2017 def->title_size = title_contents_end - title_contents_beg;
2018 }
2019
2020 def->dest_beg = dest_contents_beg;
2021 def->dest_end = dest_contents_end;
2022
2023 ctx->n_ref_defs++;
2024 return line_index + 1;
2025
2026abort:
2027
2028 if(def != NULL && def->label_needs_free)
2029 free(def->label);
2030 if(def != NULL && def->title_needs_free)
2031 free(def->title);
2032 return ret;
2033}
2034
2035static int
2036md_is_link_reference(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines,
2037 OFF beg, OFF end, MD_LINK_ATTR* attr)
2038{
2039 const MD_REF_DEF* def;
2040 const MD_LINE* beg_line;
2041 int is_multiline;
2042 CHAR* label;
2043 SZ label_size;
2044 int ret = FALSE;
2045
2046 MD_ASSERT(CH(beg) == _T('[') || CH(beg) == _T('!'));
2047 MD_ASSERT(CH(end-1) == _T(']'));
2048
2049 if(ctx->max_ref_def_output == 0)
2050 return FALSE;
2051
2052 beg += (CH(beg) == _T('!') ? 2 : 1);
2053 end--;
2054
2055 beg_line = md_lookup_line(beg, lines, n_lines, NULL);
2056 is_multiline = (end > beg_line->end);
2057
2058 if(is_multiline) {
2059 MD_CHECK(md_merge_lines_alloc(ctx, beg, end, beg_line,
2060 (int)(n_lines - (beg_line - lines)), _T(' '), &label, &label_size));
2061 } else {
2062 label = (CHAR*) STR(beg);
2063 label_size = end - beg;
2064 }
2065
2066 def = md_lookup_ref_def(ctx, label, label_size);
2067 if(def != NULL) {
2068 attr->dest_beg = def->dest_beg;
2069 attr->dest_end = def->dest_end;
2070 attr->title = def->title;
2071 attr->title_size = def->title_size;
2072 attr->title_needs_free = FALSE;
2073 }
2074
2075 if(is_multiline)
2076 free(label);
2077
2078 if(def != NULL) {
2079
2080 MD_SIZE output_size_estimation = def->label_size + def->title_size + def->dest_end - def->dest_beg;
2081 if(output_size_estimation < ctx->max_ref_def_output) {
2082 ctx->max_ref_def_output -= output_size_estimation;
2083 ret = TRUE;
2084 } else {
2085 MD_LOG("Too many link reference definition instantiations.");
2086 ctx->max_ref_def_output = 0;
2087 }
2088 }
2089
2090abort:
2091 return ret;
2092}
2093
2094static int
2095md_is_inline_link_spec(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines,
2096 OFF beg, OFF* p_end, MD_LINK_ATTR* attr)
2097{
2098 MD_SIZE line_index = 0;
2099 MD_SIZE tmp_line_index;
2100 OFF title_contents_beg;
2101 OFF title_contents_end;
2102 MD_SIZE title_contents_line_index;
2103 int title_is_multiline;
2104 OFF off = beg;
2105 int ret = FALSE;
2106
2107 md_lookup_line(off, lines, n_lines, &line_index);
2108
2109 MD_ASSERT(CH(off) == _T('('));
2110 off++;
2111
2112 while(off < lines[line_index].end && ISWHITESPACE(off))
2113 off++;
2114 if(off >= lines[line_index].end && (off >= ctx->size || ISNEWLINE(off))) {
2115 line_index++;
2116 if(line_index >= n_lines)
2117 return FALSE;
2118 off = lines[line_index].beg;
2119 }
2120
2121 if(off < ctx->size && CH(off) == _T(')')) {
2122 attr->dest_beg = off;
2123 attr->dest_end = off;
2124 attr->title = NULL;
2125 attr->title_size = 0;
2126 attr->title_needs_free = FALSE;
2127 off++;
2128 *p_end = off;
2129 return TRUE;
2130 }
2131
2132 if(!md_is_link_destination(ctx, off, lines[line_index].end,
2133 &off, &attr->dest_beg, &attr->dest_end))
2134 return FALSE;
2135
2136 if(md_is_link_title(ctx, lines + line_index, n_lines - line_index, off,
2137 &off, &title_contents_line_index, &tmp_line_index,
2138 &title_contents_beg, &title_contents_end))
2139 {
2140 title_is_multiline = (tmp_line_index != title_contents_line_index);
2141 title_contents_line_index += line_index;
2142 line_index += tmp_line_index;
2143 } else {
2144
2145 title_is_multiline = FALSE;
2146 title_contents_beg = off;
2147 title_contents_end = off;
2148 title_contents_line_index = 0;
2149 }
2150
2151 while(off < lines[line_index].end && ISWHITESPACE(off))
2152 off++;
2153 if(off >= lines[line_index].end) {
2154 line_index++;
2155 if(line_index >= n_lines)
2156 return FALSE;
2157 off = lines[line_index].beg;
2158 }
2159 if(CH(off) != _T(')'))
2160 goto abort;
2161 off++;
2162
2163 if(title_contents_beg >= title_contents_end) {
2164 attr->title = NULL;
2165 attr->title_size = 0;
2166 attr->title_needs_free = FALSE;
2167 } else if(!title_is_multiline) {
2168 attr->title = (CHAR*) STR(title_contents_beg);
2169 attr->title_size = title_contents_end - title_contents_beg;
2170 attr->title_needs_free = FALSE;
2171 } else {
2172 MD_CHECK(md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end,
2173 lines + title_contents_line_index, n_lines - title_contents_line_index,
2174 _T('\n'), &attr->title, &attr->title_size));
2175 attr->title_needs_free = TRUE;
2176 }
2177
2178 *p_end = off;
2179 ret = TRUE;
2180
2181abort:
2182 return ret;
2183}
2184
2185static void
2186md_free_ref_defs(MD_CTX* ctx)
2187{
2188 int i;
2189
2190 for(i = 0; i < ctx->n_ref_defs; i++) {
2191 MD_REF_DEF* def = &ctx->ref_defs[i];
2192
2193 if(def->label_needs_free)
2194 free(def->label);
2195 if(def->title_needs_free)
2196 free(def->title);
2197 }
2198
2199 free(ctx->ref_defs);
2200}
2201
2202struct MD_MARK_tag {
2203 OFF beg;
2204 OFF end;
2205
2206 int prev;
2207 int next;
2208 CHAR ch;
2209 unsigned char flags;
2210};
2211
2212#define MD_MARK_POTENTIAL_OPENER 0x01
2213#define MD_MARK_POTENTIAL_CLOSER 0x02
2214#define MD_MARK_OPENER 0x04
2215#define MD_MARK_CLOSER 0x08
2216#define MD_MARK_RESOLVED 0x10
2217
2218#define MD_MARK_EMPH_OC 0x20
2219#define MD_MARK_EMPH_MOD3_0 0x40
2220#define MD_MARK_EMPH_MOD3_1 0x80
2221#define MD_MARK_EMPH_MOD3_2 (0x40 | 0x80)
2222#define MD_MARK_EMPH_MOD3_MASK (0x40 | 0x80)
2223#define MD_MARK_AUTOLINK 0x20
2224#define MD_MARK_AUTOLINK_MISSING_MAILTO 0x40
2225#define MD_MARK_VALIDPERMISSIVEAUTOLINK 0x20
2226#define MD_MARK_HASNESTEDBRACKETS 0x20
2227
2228static MD_MARKSTACK*
2229md_emph_stack(MD_CTX* ctx, MD_CHAR ch, unsigned flags)
2230{
2231 MD_MARKSTACK* stack;
2232
2233 switch(ch) {
2234 case '*': stack = &ASTERISK_OPENERS_oo_mod3_0; break;
2235 case '_': stack = &UNDERSCORE_OPENERS_oo_mod3_0; break;
2236 default: MD_UNREACHABLE();
2237 }
2238
2239 if(flags & MD_MARK_EMPH_OC)
2240 stack += 3;
2241
2242 switch(flags & MD_MARK_EMPH_MOD3_MASK) {
2243 case MD_MARK_EMPH_MOD3_0: stack += 0; break;
2244 case MD_MARK_EMPH_MOD3_1: stack += 1; break;
2245 case MD_MARK_EMPH_MOD3_2: stack += 2; break;
2246 default: MD_UNREACHABLE();
2247 }
2248
2249 return stack;
2250}
2251
2252static MD_MARKSTACK*
2253md_opener_stack(MD_CTX* ctx, int mark_index)
2254{
2255 MD_MARK* mark = &ctx->marks[mark_index];
2256
2257 switch(mark->ch) {
2258 case _T('*'):
2259 case _T('_'): return md_emph_stack(ctx, mark->ch, mark->flags);
2260
2261 case _T('~'): return (mark->end - mark->beg == 1) ? &TILDE_OPENERS_1 : &TILDE_OPENERS_2;
2262
2263 case _T('!'):
2264 case _T('['): return &BRACKET_OPENERS;
2265
2266 default: MD_UNREACHABLE();
2267 }
2268}
2269
2270static MD_MARK*
2271md_add_mark(MD_CTX* ctx)
2272{
2273 if(ctx->n_marks >= ctx->alloc_marks) {
2274 MD_MARK* new_marks;
2275
2276 ctx->alloc_marks = (ctx->alloc_marks > 0
2277 ? ctx->alloc_marks + ctx->alloc_marks / 2
2278 : 64);
2279 new_marks = realloc(ctx->marks, ctx->alloc_marks * sizeof(MD_MARK));
2280 if(new_marks == NULL) {
2281 MD_LOG("realloc() failed.");
2282 return NULL;
2283 }
2284
2285 ctx->marks = new_marks;
2286 }
2287
2288 return &ctx->marks[ctx->n_marks++];
2289}
2290
2291#define ADD_MARK_() \
2292 do { \
2293 mark = md_add_mark(ctx); \
2294 if(mark == NULL) { \
2295 ret = -1; \
2296 goto abort; \
2297 } \
2298 } while(0)
2299
2300#define ADD_MARK(ch_, beg_, end_, flags_) \
2301 do { \
2302 ADD_MARK_(); \
2303 mark->beg = (beg_); \
2304 mark->end = (end_); \
2305 mark->prev = -1; \
2306 mark->next = -1; \
2307 mark->ch = (char)(ch_); \
2308 mark->flags = (flags_); \
2309 } while(0)
2310
2311static inline void
2312md_mark_stack_push(MD_CTX* ctx, MD_MARKSTACK* stack, int mark_index)
2313{
2314 ctx->marks[mark_index].next = stack->top;
2315 stack->top = mark_index;
2316}
2317
2318static inline int
2319md_mark_stack_pop(MD_CTX* ctx, MD_MARKSTACK* stack)
2320{
2321 int top = stack->top;
2322 if(top >= 0)
2323 stack->top = ctx->marks[top].next;
2324 return top;
2325}
2326
2327static inline void
2328md_mark_store_ptr(MD_CTX* ctx, int mark_index, void* ptr)
2329{
2330 MD_MARK* mark = &ctx->marks[mark_index];
2331 MD_ASSERT(mark->ch == 'D');
2332
2333 MD_ASSERT(sizeof(void*) <= 2 * sizeof(OFF));
2334 memcpy(mark, &ptr, sizeof(void*));
2335}
2336
2337static inline void*
2338md_mark_get_ptr(MD_CTX* ctx, int mark_index)
2339{
2340 void* ptr;
2341 MD_MARK* mark = &ctx->marks[mark_index];
2342 MD_ASSERT(mark->ch == 'D');
2343 memcpy(&ptr, mark, sizeof(void*));
2344 return ptr;
2345}
2346
2347static inline void
2348md_resolve_range(MD_CTX* ctx, int opener_index, int closer_index)
2349{
2350 MD_MARK* opener = &ctx->marks[opener_index];
2351 MD_MARK* closer = &ctx->marks[closer_index];
2352
2353 opener->next = closer_index;
2354 closer->prev = opener_index;
2355
2356 opener->flags |= MD_MARK_OPENER | MD_MARK_RESOLVED;
2357 closer->flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED;
2358}
2359
2360#define MD_ROLLBACK_CROSSING 0
2361#define MD_ROLLBACK_ALL 1
2362
2363static void
2364md_rollback(MD_CTX* ctx, int opener_index, int closer_index, int how)
2365{
2366 int i;
2367
2368 for(i = 0; i < (int) SIZEOF_ARRAY(ctx->opener_stacks); i++) {
2369 MD_MARKSTACK* stack = &ctx->opener_stacks[i];
2370 while(stack->top >= opener_index)
2371 md_mark_stack_pop(ctx, stack);
2372 }
2373
2374 if(how == MD_ROLLBACK_ALL) {
2375 for(i = opener_index + 1; i < closer_index; i++) {
2376 ctx->marks[i].ch = 'D';
2377 ctx->marks[i].flags = 0;
2378 }
2379 }
2380}
2381
2382static void
2383md_build_mark_char_map(MD_CTX* ctx)
2384{
2385 memset(ctx->mark_char_map, 0, sizeof(ctx->mark_char_map));
2386
2387 ctx->mark_char_map['\\'] = 1;
2388 ctx->mark_char_map['*'] = 1;
2389 ctx->mark_char_map['_'] = 1;
2390 ctx->mark_char_map['`'] = 1;
2391 ctx->mark_char_map['&'] = 1;
2392 ctx->mark_char_map[';'] = 1;
2393 ctx->mark_char_map['<'] = 1;
2394 ctx->mark_char_map['>'] = 1;
2395 ctx->mark_char_map['['] = 1;
2396 ctx->mark_char_map['!'] = 1;
2397 ctx->mark_char_map[']'] = 1;
2398 ctx->mark_char_map['\0'] = 1;
2399
2400 if(ctx->parser.flags & MD_FLAG_STRIKETHROUGH)
2401 ctx->mark_char_map['~'] = 1;
2402
2403 if(ctx->parser.flags & MD_FLAG_LATEXMATHSPANS)
2404 ctx->mark_char_map['$'] = 1;
2405
2406 if(ctx->parser.flags & MD_FLAG_PERMISSIVEEMAILAUTOLINKS)
2407 ctx->mark_char_map['@'] = 1;
2408
2409 if(ctx->parser.flags & MD_FLAG_PERMISSIVEURLAUTOLINKS)
2410 ctx->mark_char_map[':'] = 1;
2411
2412 if(ctx->parser.flags & MD_FLAG_PERMISSIVEWWWAUTOLINKS)
2413 ctx->mark_char_map['.'] = 1;
2414
2415 if((ctx->parser.flags & MD_FLAG_TABLES) || (ctx->parser.flags & MD_FLAG_WIKILINKS))
2416 ctx->mark_char_map['|'] = 1;
2417
2418 if(ctx->parser.flags & MD_FLAG_COLLAPSEWHITESPACE) {
2419 int i;
2420
2421 for(i = 0; i < (int) sizeof(ctx->mark_char_map); i++) {
2422 if(ISWHITESPACE_(i))
2423 ctx->mark_char_map[i] = 1;
2424 }
2425 }
2426}
2427
2428static int
2429md_is_code_span(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, OFF beg,
2430 MD_MARK* opener, MD_MARK* closer,
2431 OFF last_potential_closers[CODESPAN_MARK_MAXLEN],
2432 int* p_reached_paragraph_end)
2433{
2434 OFF opener_beg = beg;
2435 OFF opener_end;
2436 OFF closer_beg;
2437 OFF closer_end;
2438 SZ mark_len;
2439 OFF line_end;
2440 int has_space_after_opener = FALSE;
2441 int has_eol_after_opener = FALSE;
2442 int has_space_before_closer = FALSE;
2443 int has_eol_before_closer = FALSE;
2444 int has_only_space = TRUE;
2445 MD_SIZE line_index = 0;
2446
2447 line_end = lines[0].end;
2448 opener_end = opener_beg;
2449 while(opener_end < line_end && CH(opener_end) == _T('`'))
2450 opener_end++;
2451 has_space_after_opener = (opener_end < line_end && CH(opener_end) == _T(' '));
2452 has_eol_after_opener = (opener_end == line_end);
2453
2454 opener->end = opener_end;
2455
2456 mark_len = opener_end - opener_beg;
2457 if(mark_len > CODESPAN_MARK_MAXLEN)
2458 return FALSE;
2459
2460 if(last_potential_closers[mark_len-1] >= lines[n_lines-1].end ||
2461 (*p_reached_paragraph_end && last_potential_closers[mark_len-1] < opener_end))
2462 return FALSE;
2463
2464 closer_beg = opener_end;
2465 closer_end = opener_end;
2466
2467 while(TRUE) {
2468 while(closer_beg < line_end && CH(closer_beg) != _T('`')) {
2469 if(CH(closer_beg) != _T(' '))
2470 has_only_space = FALSE;
2471 closer_beg++;
2472 }
2473 closer_end = closer_beg;
2474 while(closer_end < line_end && CH(closer_end) == _T('`'))
2475 closer_end++;
2476
2477 if(closer_end - closer_beg == mark_len) {
2478
2479 has_space_before_closer = (closer_beg > lines[line_index].beg && CH(closer_beg-1) == _T(' '));
2480 has_eol_before_closer = (closer_beg == lines[line_index].beg);
2481 break;
2482 }
2483
2484 if(closer_end - closer_beg > 0) {
2485
2486 has_only_space = FALSE;
2487
2488 if(closer_end - closer_beg < CODESPAN_MARK_MAXLEN) {
2489 if(closer_beg > last_potential_closers[closer_end - closer_beg - 1])
2490 last_potential_closers[closer_end - closer_beg - 1] = closer_beg;
2491 }
2492 }
2493
2494 if(closer_end >= line_end) {
2495 line_index++;
2496 if(line_index >= n_lines) {
2497
2498 *p_reached_paragraph_end = TRUE;
2499 return FALSE;
2500 }
2501
2502 line_end = lines[line_index].end;
2503 closer_beg = lines[line_index].beg;
2504 } else {
2505 closer_beg = closer_end;
2506 }
2507 }
2508
2509 if(!has_only_space &&
2510 (has_space_after_opener || has_eol_after_opener) &&
2511 (has_space_before_closer || has_eol_before_closer))
2512 {
2513 if(has_space_after_opener)
2514 opener_end++;
2515 else
2516 opener_end = lines[1].beg;
2517
2518 if(has_space_before_closer)
2519 closer_beg--;
2520 else {
2521
2522 closer_beg = lines[line_index-1].end;
2523
2524 while(closer_beg < ctx->size && ISBLANK(closer_beg))
2525 closer_beg++;
2526 }
2527 }
2528
2529 opener->ch = _T('`');
2530 opener->beg = opener_beg;
2531 opener->end = opener_end;
2532 opener->flags = MD_MARK_POTENTIAL_OPENER;
2533 closer->ch = _T('`');
2534 closer->beg = closer_beg;
2535 closer->end = closer_end;
2536 closer->flags = MD_MARK_POTENTIAL_CLOSER;
2537 return TRUE;
2538}
2539
2540static int
2541md_is_autolink_uri(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end)
2542{
2543 OFF off = beg+1;
2544
2545 MD_ASSERT(CH(beg) == _T('<'));
2546
2547 if(off >= max_end || !ISASCII(off))
2548 return FALSE;
2549 off++;
2550 while(1) {
2551 if(off >= max_end)
2552 return FALSE;
2553 if(off - beg > 32)
2554 return FALSE;
2555 if(CH(off) == _T(':') && off - beg >= 3)
2556 break;
2557 if(!ISALNUM(off) && CH(off) != _T('+') && CH(off) != _T('-') && CH(off) != _T('.'))
2558 return FALSE;
2559 off++;
2560 }
2561
2562 while(off < max_end && CH(off) != _T('>')) {
2563 if(ISWHITESPACE(off) || ISCNTRL(off) || CH(off) == _T('<'))
2564 return FALSE;
2565 off++;
2566 }
2567
2568 if(off >= max_end)
2569 return FALSE;
2570
2571 MD_ASSERT(CH(off) == _T('>'));
2572 *p_end = off+1;
2573 return TRUE;
2574}
2575
2576static int
2577md_is_autolink_email(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end)
2578{
2579 OFF off = beg + 1;
2580 int label_len;
2581
2582 MD_ASSERT(CH(beg) == _T('<'));
2583
2584 while(off < max_end && (ISALNUM(off) || ISANYOF(off, _T(".!#$%&'*+/=?^_`{|}~-"))))
2585 off++;
2586 if(off <= beg+1)
2587 return FALSE;
2588
2589 if(off >= max_end || CH(off) != _T('@'))
2590 return FALSE;
2591 off++;
2592
2593 label_len = 0;
2594 while(off < max_end) {
2595 if(ISALNUM(off))
2596 label_len++;
2597 else if(CH(off) == _T('-') && label_len > 0)
2598 label_len++;
2599 else if(CH(off) == _T('.') && label_len > 0 && CH(off-1) != _T('-'))
2600 label_len = 0;
2601 else
2602 break;
2603
2604 if(label_len > 63)
2605 return FALSE;
2606
2607 off++;
2608 }
2609
2610 if(label_len <= 0 || off >= max_end || CH(off) != _T('>') || CH(off-1) == _T('-'))
2611 return FALSE;
2612
2613 *p_end = off+1;
2614 return TRUE;
2615}
2616
2617static int
2618md_is_autolink(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, int* p_missing_mailto)
2619{
2620 if(md_is_autolink_uri(ctx, beg, max_end, p_end)) {
2621 *p_missing_mailto = FALSE;
2622 return TRUE;
2623 }
2624
2625 if(md_is_autolink_email(ctx, beg, max_end, p_end)) {
2626 *p_missing_mailto = TRUE;
2627 return TRUE;
2628 }
2629
2630 return FALSE;
2631}
2632
2633static int
2634md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, int table_mode)
2635{
2636 MD_SIZE line_index;
2637 int ret = 0;
2638 MD_MARK* mark;
2639 OFF codespan_last_potential_closers[CODESPAN_MARK_MAXLEN] = { 0 };
2640 int codespan_scanned_till_paragraph_end = FALSE;
2641
2642 for(line_index = 0; line_index < n_lines; line_index++) {
2643 const MD_LINE* line = &lines[line_index];
2644 OFF off = line->beg;
2645
2646 while(TRUE) {
2647 CHAR ch;
2648
2649#ifdef MD4C_USE_UTF16
2650
2651 #define IS_MARK_CHAR(off) ((CH(off) < SIZEOF_ARRAY(ctx->mark_char_map)) && \
2652 (ctx->mark_char_map[(unsigned char) CH(off)]))
2653#else
2654
2655 #define IS_MARK_CHAR(off) (ctx->mark_char_map[(unsigned char) CH(off)])
2656#endif
2657
2658 while(off + 3 < line->end && !IS_MARK_CHAR(off+0) && !IS_MARK_CHAR(off+1)
2659 && !IS_MARK_CHAR(off+2) && !IS_MARK_CHAR(off+3))
2660 off += 4;
2661 while(off < line->end && !IS_MARK_CHAR(off+0))
2662 off++;
2663
2664 if(off >= line->end)
2665 break;
2666
2667 ch = CH(off);
2668
2669 if(ch == _T('\\') && off+1 < ctx->size && (ISPUNCT(off+1) || ISNEWLINE(off+1))) {
2670
2671 if(!ISNEWLINE(off+1) || line_index+1 < n_lines)
2672 ADD_MARK(ch, off, off+2, MD_MARK_RESOLVED);
2673 off += 2;
2674 continue;
2675 }
2676
2677 if(ch == _T('*') || ch == _T('_')) {
2678 OFF tmp = off+1;
2679 int left_level;
2680 int right_level;
2681
2682 while(tmp < line->end && CH(tmp) == ch)
2683 tmp++;
2684
2685 if(off == line->beg || ISUNICODEWHITESPACEBEFORE(off))
2686 left_level = 0;
2687 else if(ISUNICODEPUNCTBEFORE(off))
2688 left_level = 1;
2689 else
2690 left_level = 2;
2691
2692 if(tmp == line->end || ISUNICODEWHITESPACE(tmp))
2693 right_level = 0;
2694 else if(ISUNICODEPUNCT(tmp))
2695 right_level = 1;
2696 else
2697 right_level = 2;
2698
2699 if(ch == _T('_') && left_level == 2 && right_level == 2) {
2700 left_level = 0;
2701 right_level = 0;
2702 }
2703
2704 if(left_level != 0 || right_level != 0) {
2705 unsigned flags = 0;
2706
2707 if(left_level > 0 && left_level >= right_level)
2708 flags |= MD_MARK_POTENTIAL_CLOSER;
2709 if(right_level > 0 && right_level >= left_level)
2710 flags |= MD_MARK_POTENTIAL_OPENER;
2711 if(flags == (MD_MARK_POTENTIAL_OPENER | MD_MARK_POTENTIAL_CLOSER))
2712 flags |= MD_MARK_EMPH_OC;
2713
2714 switch((tmp - off) % 3) {
2715 case 0: flags |= MD_MARK_EMPH_MOD3_0; break;
2716 case 1: flags |= MD_MARK_EMPH_MOD3_1; break;
2717 case 2: flags |= MD_MARK_EMPH_MOD3_2; break;
2718 }
2719
2720 ADD_MARK(ch, off, tmp, flags);
2721
2722 off++;
2723 while(off < tmp) {
2724 ADD_MARK('D', off, off, 0);
2725 off++;
2726 }
2727 continue;
2728 }
2729
2730 off = tmp;
2731 continue;
2732 }
2733
2734 if(ch == _T('`')) {
2735 MD_MARK opener;
2736 MD_MARK closer;
2737 int is_code_span;
2738
2739 is_code_span = md_is_code_span(ctx, line, n_lines - line_index, off,
2740 &opener, &closer, codespan_last_potential_closers,
2741 &codespan_scanned_till_paragraph_end);
2742 if(is_code_span) {
2743 ADD_MARK(opener.ch, opener.beg, opener.end, opener.flags);
2744 ADD_MARK(closer.ch, closer.beg, closer.end, closer.flags);
2745 md_resolve_range(ctx, ctx->n_marks-2, ctx->n_marks-1);
2746 off = closer.end;
2747
2748 if(off > line->end)
2749 line = md_lookup_line(off, lines, n_lines, &line_index);
2750 continue;
2751 }
2752
2753 off = opener.end;
2754 continue;
2755 }
2756
2757 if(ch == _T('&')) {
2758 ADD_MARK(ch, off, off+1, MD_MARK_POTENTIAL_OPENER);
2759 off++;
2760 continue;
2761 }
2762
2763 if(ch == _T(';')) {
2764
2765 if(ctx->n_marks > 0 && ctx->marks[ctx->n_marks-1].ch == _T('&'))
2766 ADD_MARK(ch, off, off+1, MD_MARK_POTENTIAL_CLOSER);
2767
2768 off++;
2769 continue;
2770 }
2771
2772 if(ch == _T('<')) {
2773 int is_autolink;
2774 OFF autolink_end;
2775 int missing_mailto;
2776
2777 if(!(ctx->parser.flags & MD_FLAG_NOHTMLSPANS)) {
2778 int is_html;
2779 OFF html_end;
2780
2781 is_html = md_is_html_any(ctx, line, n_lines - line_index, off,
2782 lines[n_lines-1].end, &html_end);
2783 if(is_html) {
2784 ADD_MARK(_T('<'), off, off, MD_MARK_OPENER | MD_MARK_RESOLVED);
2785 ADD_MARK(_T('>'), html_end, html_end, MD_MARK_CLOSER | MD_MARK_RESOLVED);
2786 ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1;
2787 ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2;
2788 off = html_end;
2789
2790 if(off > line->end)
2791 line = md_lookup_line(off, lines, n_lines, &line_index);
2792 continue;
2793 }
2794 }
2795
2796 is_autolink = md_is_autolink(ctx, off, lines[n_lines-1].end,
2797 &autolink_end, &missing_mailto);
2798 if(is_autolink) {
2799 unsigned flags = MD_MARK_RESOLVED | MD_MARK_AUTOLINK;
2800 if(missing_mailto)
2801 flags |= MD_MARK_AUTOLINK_MISSING_MAILTO;
2802
2803 ADD_MARK(_T('<'), off, off+1, MD_MARK_OPENER | flags);
2804 ADD_MARK(_T('>'), autolink_end-1, autolink_end, MD_MARK_CLOSER | flags);
2805 ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1;
2806 ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2;
2807 off = autolink_end;
2808 continue;
2809 }
2810
2811 off++;
2812 continue;
2813 }
2814
2815 if(ch == _T('[') || (ch == _T('!') && off+1 < line->end && CH(off+1) == _T('['))) {
2816 OFF tmp = (ch == _T('[') ? off+1 : off+2);
2817 ADD_MARK(ch, off, tmp, MD_MARK_POTENTIAL_OPENER);
2818 off = tmp;
2819
2820 ADD_MARK('D', off, off, 0);
2821 ADD_MARK('D', off, off, 0);
2822 continue;
2823 }
2824 if(ch == _T(']')) {
2825 ADD_MARK(ch, off, off+1, MD_MARK_POTENTIAL_CLOSER);
2826 off++;
2827 continue;
2828 }
2829
2830 if(ch == _T('@')) {
2831 if(line->beg + 1 <= off && ISALNUM(off-1) &&
2832 off + 3 < line->end && ISALNUM(off+1))
2833 {
2834 ADD_MARK(ch, off, off+1, MD_MARK_POTENTIAL_OPENER);
2835
2836 ADD_MARK('D', line->beg, line->end, 0);
2837 }
2838
2839 off++;
2840 continue;
2841 }
2842
2843 if(ch == _T(':')) {
2844 static struct {
2845 const CHAR* scheme;
2846 SZ scheme_size;
2847 const CHAR* suffix;
2848 SZ suffix_size;
2849 } scheme_map[] = {
2850
2851 { _T("http"), 4, _T("//"), 2 },
2852 { _T("https"), 5, _T("//"), 2 },
2853 { _T("ftp"), 3, _T("//"), 2 }
2854 };
2855 int scheme_index;
2856
2857 for(scheme_index = 0; scheme_index < (int) SIZEOF_ARRAY(scheme_map); scheme_index++) {
2858 const CHAR* scheme = scheme_map[scheme_index].scheme;
2859 const SZ scheme_size = scheme_map[scheme_index].scheme_size;
2860 const CHAR* suffix = scheme_map[scheme_index].suffix;
2861 const SZ suffix_size = scheme_map[scheme_index].suffix_size;
2862
2863 if(line->beg + scheme_size <= off && md_ascii_eq(STR(off-scheme_size), scheme, scheme_size) &&
2864 off + 1 + suffix_size < line->end && md_ascii_eq(STR(off+1), suffix, suffix_size))
2865 {
2866 ADD_MARK(ch, off-scheme_size, off+1+suffix_size, MD_MARK_POTENTIAL_OPENER);
2867
2868 ADD_MARK('D', line->beg, line->end, 0);
2869 off += 1 + suffix_size;
2870 break;
2871 }
2872 }
2873
2874 off++;
2875 continue;
2876 }
2877
2878 if(ch == _T('.')) {
2879 if(line->beg + 3 <= off && md_ascii_eq(STR(off-3), _T("www"), 3) &&
2880 (off-3 == line->beg || ISUNICODEWHITESPACEBEFORE(off-3) || ISUNICODEPUNCTBEFORE(off-3)))
2881 {
2882 ADD_MARK(ch, off-3, off+1, MD_MARK_POTENTIAL_OPENER);
2883
2884 ADD_MARK('D', line->beg, line->end, 0);
2885 off++;
2886 continue;
2887 }
2888
2889 off++;
2890 continue;
2891 }
2892
2893 if((table_mode || ctx->parser.flags & MD_FLAG_WIKILINKS) && ch == _T('|')) {
2894 ADD_MARK(ch, off, off+1, 0);
2895 off++;
2896 continue;
2897 }
2898
2899 if(ch == _T('$') || ch == _T('~')) {
2900 OFF tmp = off+1;
2901
2902 while(tmp < line->end && CH(tmp) == ch)
2903 tmp++;
2904
2905 if(tmp - off <= 2) {
2906 unsigned flags = MD_MARK_POTENTIAL_OPENER | MD_MARK_POTENTIAL_CLOSER;
2907
2908 if(off > line->beg && !ISUNICODEWHITESPACEBEFORE(off) && !ISUNICODEPUNCTBEFORE(off))
2909 flags &= ~MD_MARK_POTENTIAL_OPENER;
2910 if(tmp < line->end && !ISUNICODEWHITESPACE(tmp) && !ISUNICODEPUNCT(tmp))
2911 flags &= ~MD_MARK_POTENTIAL_CLOSER;
2912 if(flags != 0)
2913 ADD_MARK(ch, off, tmp, flags);
2914 }
2915
2916 off = tmp;
2917 continue;
2918 }
2919
2920 if(ISWHITESPACE_(ch)) {
2921 OFF tmp = off+1;
2922
2923 while(tmp < line->end && ISWHITESPACE(tmp))
2924 tmp++;
2925
2926 if(tmp - off > 1 || ch != _T(' '))
2927 ADD_MARK(ch, off, tmp, MD_MARK_RESOLVED);
2928
2929 off = tmp;
2930 continue;
2931 }
2932
2933 if(ch == _T('\0')) {
2934 ADD_MARK(ch, off, off+1, MD_MARK_RESOLVED);
2935 off++;
2936 continue;
2937 }
2938
2939 off++;
2940 }
2941 }
2942
2943 ADD_MARK(127, ctx->size, ctx->size, MD_MARK_RESOLVED);
2944
2945abort:
2946 return ret;
2947}
2948
2949static void
2950md_analyze_bracket(MD_CTX* ctx, int mark_index)
2951{
2952
2953 MD_MARK* mark = &ctx->marks[mark_index];
2954
2955 if(mark->flags & MD_MARK_POTENTIAL_OPENER) {
2956 if(BRACKET_OPENERS.top >= 0)
2957 ctx->marks[BRACKET_OPENERS.top].flags |= MD_MARK_HASNESTEDBRACKETS;
2958
2959 md_mark_stack_push(ctx, &BRACKET_OPENERS, mark_index);
2960 return;
2961 }
2962
2963 if(BRACKET_OPENERS.top >= 0) {
2964 int opener_index = md_mark_stack_pop(ctx, &BRACKET_OPENERS);
2965 MD_MARK* opener = &ctx->marks[opener_index];
2966
2967 opener->next = mark_index;
2968 mark->prev = opener_index;
2969
2970 if(ctx->unresolved_link_tail >= 0)
2971 ctx->marks[ctx->unresolved_link_tail].prev = opener_index;
2972 else
2973 ctx->unresolved_link_head = opener_index;
2974 ctx->unresolved_link_tail = opener_index;
2975 opener->prev = -1;
2976 }
2977}
2978
2979static void md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines,
2980 int mark_beg, int mark_end);
2981
2982static int
2983md_resolve_links(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines)
2984{
2985 int opener_index = ctx->unresolved_link_head;
2986 OFF last_link_beg = 0;
2987 OFF last_link_end = 0;
2988 OFF last_img_beg = 0;
2989 OFF last_img_end = 0;
2990
2991 while(opener_index >= 0) {
2992 MD_MARK* opener = &ctx->marks[opener_index];
2993 int closer_index = opener->next;
2994 MD_MARK* closer = &ctx->marks[closer_index];
2995 int next_index = opener->prev;
2996 MD_MARK* next_opener;
2997 MD_MARK* next_closer;
2998 MD_LINK_ATTR attr;
2999 int is_link = FALSE;
3000
3001 if(next_index >= 0) {
3002 next_opener = &ctx->marks[next_index];
3003 next_closer = &ctx->marks[next_opener->next];
3004 } else {
3005 next_opener = NULL;
3006 next_closer = NULL;
3007 }
3008
3009 if((opener->beg < last_link_beg && closer->end < last_link_end) ||
3010 (opener->beg < last_img_beg && closer->end < last_img_end) ||
3011 (opener->beg < last_link_end && opener->ch == '['))
3012 {
3013 opener_index = next_index;
3014 continue;
3015 }
3016
3017 if ((ctx->parser.flags & MD_FLAG_WIKILINKS) &&
3018 (opener->end - opener->beg == 1) &&
3019 next_opener != NULL &&
3020 next_opener->ch == '[' &&
3021 (next_opener->beg == opener->beg - 1) &&
3022 (next_opener->end - next_opener->beg == 1) &&
3023 next_closer != NULL &&
3024 next_closer->ch == ']' &&
3025 (next_closer->beg == closer->beg + 1) &&
3026 (next_closer->end - next_closer->beg == 1))
3027 {
3028 MD_MARK* delim = NULL;
3029 int delim_index;
3030 OFF dest_beg, dest_end;
3031
3032 is_link = TRUE;
3033
3034 delim_index = opener_index + 1;
3035 while(delim_index < closer_index) {
3036 MD_MARK* m = &ctx->marks[delim_index];
3037 if(m->ch == '|') {
3038 delim = m;
3039 break;
3040 }
3041 if(m->ch != 'D') {
3042 if(m->beg - opener->end > 100)
3043 break;
3044 if(m->ch != 'D' && (m->flags & MD_MARK_OPENER))
3045 delim_index = m->next;
3046 }
3047 delim_index++;
3048 }
3049
3050 dest_beg = opener->end;
3051 dest_end = (delim != NULL) ? delim->beg : closer->beg;
3052 if(dest_end - dest_beg == 0 || dest_end - dest_beg > 100)
3053 is_link = FALSE;
3054
3055 if(is_link) {
3056 OFF off;
3057 for(off = dest_beg; off < dest_end; off++) {
3058 if(ISNEWLINE(off)) {
3059 is_link = FALSE;
3060 break;
3061 }
3062 }
3063 }
3064
3065 if(is_link) {
3066 if(delim != NULL) {
3067 if(delim->end < closer->beg) {
3068 md_rollback(ctx, opener_index, delim_index, MD_ROLLBACK_ALL);
3069 md_rollback(ctx, delim_index, closer_index, MD_ROLLBACK_CROSSING);
3070 delim->flags |= MD_MARK_RESOLVED;
3071 opener->end = delim->beg;
3072 } else {
3073
3074 md_rollback(ctx, opener_index, closer_index, MD_ROLLBACK_ALL);
3075 closer->beg = delim->beg;
3076 delim = NULL;
3077 }
3078 }
3079
3080 opener->beg = next_opener->beg;
3081 opener->next = closer_index;
3082 opener->flags |= MD_MARK_OPENER | MD_MARK_RESOLVED;
3083
3084 closer->end = next_closer->end;
3085 closer->prev = opener_index;
3086 closer->flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED;
3087
3088 last_link_beg = opener->beg;
3089 last_link_end = closer->end;
3090
3091 if(delim != NULL)
3092 md_analyze_link_contents(ctx, lines, n_lines, delim_index+1, closer_index);
3093
3094 opener_index = next_opener->prev;
3095 continue;
3096 }
3097 }
3098
3099 if(next_opener != NULL && next_opener->beg == closer->end) {
3100 if(next_closer->beg > closer->end + 1) {
3101
3102 if(!(next_opener->flags & MD_MARK_HASNESTEDBRACKETS))
3103 is_link = md_is_link_reference(ctx, lines, n_lines, next_opener->beg, next_closer->end, &attr);
3104 } else {
3105
3106 if(!(opener->flags & MD_MARK_HASNESTEDBRACKETS))
3107 is_link = md_is_link_reference(ctx, lines, n_lines, opener->beg, closer->end, &attr);
3108 }
3109
3110 if(is_link < 0)
3111 return -1;
3112
3113 if(is_link) {
3114
3115 closer->end = next_closer->end;
3116
3117 next_index = ctx->marks[next_index].prev;
3118 }
3119 } else {
3120 if(closer->end < ctx->size && CH(closer->end) == _T('(')) {
3121
3122 OFF inline_link_end = UINT_MAX;
3123
3124 is_link = md_is_inline_link_spec(ctx, lines, n_lines, closer->end, &inline_link_end, &attr);
3125 if(is_link < 0)
3126 return -1;
3127
3128 if(is_link) {
3129 int i = closer_index + 1;
3130
3131 while(i < ctx->n_marks) {
3132 MD_MARK* mark = &ctx->marks[i];
3133
3134 if(mark->beg >= inline_link_end)
3135 break;
3136 if((mark->flags & (MD_MARK_OPENER | MD_MARK_RESOLVED)) == (MD_MARK_OPENER | MD_MARK_RESOLVED)) {
3137 if(ctx->marks[mark->next].beg >= inline_link_end) {
3138
3139 if(attr.title_needs_free)
3140 free(attr.title);
3141 is_link = FALSE;
3142 break;
3143 }
3144
3145 i = mark->next + 1;
3146 } else {
3147 i++;
3148 }
3149 }
3150 }
3151
3152 if(is_link) {
3153
3154 closer->end = inline_link_end;
3155 }
3156 }
3157
3158 if(!is_link) {
3159
3160 if(!(opener->flags & MD_MARK_HASNESTEDBRACKETS))
3161 is_link = md_is_link_reference(ctx, lines, n_lines, opener->beg, closer->end, &attr);
3162 if(is_link < 0)
3163 return -1;
3164 }
3165 }
3166
3167 if(is_link) {
3168
3169 opener->flags |= MD_MARK_OPENER | MD_MARK_RESOLVED;
3170 closer->flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED;
3171
3172 MD_ASSERT(ctx->marks[opener_index+1].ch == 'D');
3173 ctx->marks[opener_index+1].beg = attr.dest_beg;
3174 ctx->marks[opener_index+1].end = attr.dest_end;
3175
3176 MD_ASSERT(ctx->marks[opener_index+2].ch == 'D');
3177 md_mark_store_ptr(ctx, opener_index+2, attr.title);
3178
3179 if(attr.title_needs_free)
3180 md_mark_stack_push(ctx, &ctx->ptr_stack, opener_index+2);
3181 ctx->marks[opener_index+2].prev = attr.title_size;
3182
3183 if(opener->ch == '[') {
3184 last_link_beg = opener->beg;
3185 last_link_end = closer->end;
3186 } else {
3187 last_img_beg = opener->beg;
3188 last_img_end = closer->end;
3189 }
3190
3191 md_analyze_link_contents(ctx, lines, n_lines, opener_index+1, closer_index);
3192
3193 if(ctx->parser.flags & MD_FLAG_PERMISSIVEAUTOLINKS) {
3194 MD_MARK* first_nested;
3195 MD_MARK* last_nested;
3196
3197 first_nested = opener + 1;
3198 while(first_nested->ch == _T('D') && first_nested < closer)
3199 first_nested++;
3200
3201 last_nested = closer - 1;
3202 while(first_nested->ch == _T('D') && last_nested > opener)
3203 last_nested--;
3204
3205 if((first_nested->flags & MD_MARK_RESOLVED) &&
3206 first_nested->beg == opener->end &&
3207 ISANYOF_(first_nested->ch, _T("@:.")) &&
3208 first_nested->next == (last_nested - ctx->marks) &&
3209 last_nested->end == closer->beg)
3210 {
3211 first_nested->ch = _T('D');
3212 first_nested->flags &= ~MD_MARK_RESOLVED;
3213 last_nested->ch = _T('D');
3214 last_nested->flags &= ~MD_MARK_RESOLVED;
3215 }
3216 }
3217 }
3218
3219 opener_index = next_index;
3220 }
3221
3222 return 0;
3223}
3224
3225static void
3226md_analyze_entity(MD_CTX* ctx, int mark_index)
3227{
3228 MD_MARK* opener = &ctx->marks[mark_index];
3229 MD_MARK* closer;
3230 OFF off;
3231
3232 if(mark_index + 1 >= ctx->n_marks)
3233 return;
3234 closer = &ctx->marks[mark_index+1];
3235 if(closer->ch != ';')
3236 return;
3237
3238 if(md_is_entity(ctx, opener->beg, closer->end, &off)) {
3239 MD_ASSERT(off == closer->end);
3240
3241 md_resolve_range(ctx, mark_index, mark_index+1);
3242 opener->end = closer->end;
3243 }
3244}
3245
3246static void
3247md_analyze_table_cell_boundary(MD_CTX* ctx, int mark_index)
3248{
3249 MD_MARK* mark = &ctx->marks[mark_index];
3250 mark->flags |= MD_MARK_RESOLVED;
3251 mark->next = -1;
3252
3253 if(ctx->table_cell_boundaries_head < 0)
3254 ctx->table_cell_boundaries_head = mark_index;
3255 else
3256 ctx->marks[ctx->table_cell_boundaries_tail].next = mark_index;
3257 ctx->table_cell_boundaries_tail = mark_index;
3258 ctx->n_table_cell_boundaries++;
3259}
3260
3261static int
3262md_split_emph_mark(MD_CTX* ctx, int mark_index, SZ n)
3263{
3264 MD_MARK* mark = &ctx->marks[mark_index];
3265 int new_mark_index = mark_index + (mark->end - mark->beg - n);
3266 MD_MARK* dummy = &ctx->marks[new_mark_index];
3267
3268 MD_ASSERT(mark->end - mark->beg > n);
3269 MD_ASSERT(dummy->ch == 'D');
3270
3271 memcpy(dummy, mark, sizeof(MD_MARK));
3272 mark->end -= n;
3273 dummy->beg = mark->end;
3274
3275 return new_mark_index;
3276}
3277
3278static void
3279md_analyze_emph(MD_CTX* ctx, int mark_index)
3280{
3281 MD_MARK* mark = &ctx->marks[mark_index];
3282
3283 if(mark->flags & MD_MARK_POTENTIAL_CLOSER) {
3284 MD_MARK* opener = NULL;
3285 int opener_index = 0;
3286 MD_MARKSTACK* opener_stacks[6];
3287 int i, n_opener_stacks;
3288 unsigned flags = mark->flags;
3289
3290 n_opener_stacks = 0;
3291
3292 opener_stacks[n_opener_stacks++] = md_emph_stack(ctx, mark->ch, MD_MARK_EMPH_MOD3_0 | MD_MARK_EMPH_OC);
3293 if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2)
3294 opener_stacks[n_opener_stacks++] = md_emph_stack(ctx, mark->ch, MD_MARK_EMPH_MOD3_1 | MD_MARK_EMPH_OC);
3295 if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_1)
3296 opener_stacks[n_opener_stacks++] = md_emph_stack(ctx, mark->ch, MD_MARK_EMPH_MOD3_2 | MD_MARK_EMPH_OC);
3297 opener_stacks[n_opener_stacks++] = md_emph_stack(ctx, mark->ch, MD_MARK_EMPH_MOD3_0);
3298 if(!(flags & MD_MARK_EMPH_OC) || (flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2)
3299 opener_stacks[n_opener_stacks++] = md_emph_stack(ctx, mark->ch, MD_MARK_EMPH_MOD3_1);
3300 if(!(flags & MD_MARK_EMPH_OC) || (flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_1)
3301 opener_stacks[n_opener_stacks++] = md_emph_stack(ctx, mark->ch, MD_MARK_EMPH_MOD3_2);
3302
3303 for(i = 0; i < n_opener_stacks; i++) {
3304 if(opener_stacks[i]->top >= 0) {
3305 int m_index = opener_stacks[i]->top;
3306 MD_MARK* m = &ctx->marks[m_index];
3307
3308 if(opener == NULL || m->end > opener->end) {
3309 opener_index = m_index;
3310 opener = m;
3311 }
3312 }
3313 }
3314
3315 if(opener != NULL) {
3316 SZ opener_size = opener->end - opener->beg;
3317 SZ closer_size = mark->end - mark->beg;
3318 MD_MARKSTACK* stack = md_opener_stack(ctx, opener_index);
3319
3320 if(opener_size > closer_size) {
3321 opener_index = md_split_emph_mark(ctx, opener_index, closer_size);
3322 md_mark_stack_push(ctx, stack, opener_index);
3323 } else if(opener_size < closer_size) {
3324 md_split_emph_mark(ctx, mark_index, closer_size - opener_size);
3325 }
3326
3327 md_mark_stack_pop(ctx, stack);
3328
3329 md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_CROSSING);
3330 md_resolve_range(ctx, opener_index, mark_index);
3331 return;
3332 }
3333 }
3334
3335 if(mark->flags & MD_MARK_POTENTIAL_OPENER)
3336 md_mark_stack_push(ctx, md_emph_stack(ctx, mark->ch, mark->flags), mark_index);
3337}
3338
3339static void
3340md_analyze_tilde(MD_CTX* ctx, int mark_index)
3341{
3342 MD_MARK* mark = &ctx->marks[mark_index];
3343 MD_MARKSTACK* stack = md_opener_stack(ctx, mark_index);
3344
3345 if((mark->flags & MD_MARK_POTENTIAL_CLOSER) && stack->top >= 0) {
3346 int opener_index = stack->top;
3347
3348 md_mark_stack_pop(ctx, stack);
3349 md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_CROSSING);
3350 md_resolve_range(ctx, opener_index, mark_index);
3351 return;
3352 }
3353
3354 if(mark->flags & MD_MARK_POTENTIAL_OPENER)
3355 md_mark_stack_push(ctx, stack, mark_index);
3356}
3357
3358static void
3359md_analyze_dollar(MD_CTX* ctx, int mark_index)
3360{
3361 MD_MARK* mark = &ctx->marks[mark_index];
3362
3363 if((mark->flags & MD_MARK_POTENTIAL_CLOSER) && DOLLAR_OPENERS.top >= 0) {
3364
3365 MD_MARK* opener = &ctx->marks[DOLLAR_OPENERS.top];
3366 int opener_index = DOLLAR_OPENERS.top;
3367 MD_MARK* closer = mark;
3368 int closer_index = mark_index;
3369
3370 if(opener->end - opener->beg == closer->end - closer->beg) {
3371
3372 md_mark_stack_pop(ctx, &DOLLAR_OPENERS);
3373 md_rollback(ctx, opener_index, closer_index, MD_ROLLBACK_ALL);
3374 md_resolve_range(ctx, opener_index, closer_index);
3375
3376 DOLLAR_OPENERS.top = -1;
3377 return;
3378 }
3379 }
3380
3381 if(mark->flags & MD_MARK_POTENTIAL_OPENER)
3382 md_mark_stack_push(ctx, &DOLLAR_OPENERS, mark_index);
3383}
3384
3385static MD_MARK*
3386md_scan_left_for_resolved_mark(MD_CTX* ctx, MD_MARK* mark_from, OFF off, MD_MARK** p_cursor)
3387{
3388 MD_MARK* mark;
3389
3390 for(mark = mark_from; mark >= ctx->marks; mark--) {
3391 if(mark->ch == 'D' || mark->beg > off)
3392 continue;
3393 if(mark->beg <= off && off < mark->end && (mark->flags & MD_MARK_RESOLVED)) {
3394 if(p_cursor != NULL)
3395 *p_cursor = mark;
3396 return mark;
3397 }
3398 if(mark->end <= off)
3399 break;
3400 }
3401
3402 if(p_cursor != NULL)
3403 *p_cursor = mark;
3404 return NULL;
3405}
3406
3407static MD_MARK*
3408md_scan_right_for_resolved_mark(MD_CTX* ctx, MD_MARK* mark_from, OFF off, MD_MARK** p_cursor)
3409{
3410 MD_MARK* mark;
3411
3412 for(mark = mark_from; mark < ctx->marks + ctx->n_marks; mark++) {
3413 if(mark->ch == 'D' || mark->end <= off)
3414 continue;
3415 if(mark->beg <= off && off < mark->end && (mark->flags & MD_MARK_RESOLVED)) {
3416 if(p_cursor != NULL)
3417 *p_cursor = mark;
3418 return mark;
3419 }
3420 if(mark->beg > off)
3421 break;
3422 }
3423
3424 if(p_cursor != NULL)
3425 *p_cursor = mark;
3426 return NULL;
3427}
3428
3429static void
3430md_analyze_permissive_autolink(MD_CTX* ctx, int mark_index)
3431{
3432 static const struct {
3433 const MD_CHAR start_char;
3434 const MD_CHAR delim_char;
3435 const MD_CHAR* allowed_nonalnum_chars;
3436 int min_components;
3437 const MD_CHAR optional_end_char;
3438 } URL_MAP[] = {
3439 { _T('\0'), _T('.'), _T(".-_"), 2, _T('\0') },
3440 { _T('/'), _T('/'), _T("/.-_"), 0, _T('/') },
3441 { _T('?'), _T('&'), _T("&.-+_=()"), 1, _T('\0') },
3442 { _T('#'), _T('\0'), _T(".-+_") , 1, _T('\0') }
3443 };
3444
3445 MD_MARK* opener = &ctx->marks[mark_index];
3446 MD_MARK* closer = &ctx->marks[mark_index + 1];
3447 OFF line_beg = closer->beg;
3448 OFF line_end = closer->end;
3449 OFF beg = opener->beg;
3450 OFF end = opener->end;
3451 MD_MARK* left_cursor = opener;
3452 int left_boundary_ok = FALSE;
3453 MD_MARK* right_cursor = opener;
3454 int right_boundary_ok = FALSE;
3455 unsigned i;
3456
3457 MD_ASSERT(closer->ch == 'D');
3458
3459 if(opener->ch == '@') {
3460 MD_ASSERT(CH(opener->beg) == _T('@'));
3461
3462 while(beg > line_beg) {
3463 if(ISALNUM(beg-1))
3464 beg--;
3465 else if(beg >= line_beg+2 && ISALNUM(beg-2) &&
3466 ISANYOF(beg-1, _T(".-_+")) &&
3467 md_scan_left_for_resolved_mark(ctx, left_cursor, beg-1, &left_cursor) == NULL &&
3468 ISALNUM(beg))
3469 beg--;
3470 else
3471 break;
3472 }
3473 if(beg == opener->beg)
3474 return;
3475 }
3476
3477 if(beg == line_beg || ISUNICODEWHITESPACEBEFORE(beg) || ISANYOF(beg-1, _T("({["))) {
3478 left_boundary_ok = TRUE;
3479 } else if(ISANYOF(beg-1, _T("*_~"))) {
3480 MD_MARK* left_mark;
3481
3482 left_mark = md_scan_left_for_resolved_mark(ctx, left_cursor, beg-1, &left_cursor);
3483 if(left_mark != NULL && (left_mark->flags & MD_MARK_OPENER))
3484 left_boundary_ok = TRUE;
3485 }
3486 if(!left_boundary_ok)
3487 return;
3488
3489 for(i = 0; i < SIZEOF_ARRAY(URL_MAP); i++) {
3490 int n_components = 0;
3491 int n_open_brackets = 0;
3492
3493 if(URL_MAP[i].start_char != _T('\0')) {
3494 if(end >= line_end || CH(end) != URL_MAP[i].start_char)
3495 continue;
3496 if(URL_MAP[i].min_components > 0 && (end+1 >= line_end || !ISALNUM(end+1)))
3497 continue;
3498 end++;
3499 }
3500
3501 while(end < line_end) {
3502 if(ISALNUM(end)) {
3503 if(n_components == 0)
3504 n_components++;
3505 end++;
3506 } else if(end < line_end &&
3507 ISANYOF(end, URL_MAP[i].allowed_nonalnum_chars) &&
3508 md_scan_right_for_resolved_mark(ctx, right_cursor, end, &right_cursor) == NULL &&
3509 ((end > line_beg && (ISALNUM(end-1) || CH(end-1) == _T(')'))) || CH(end) == _T('(')) &&
3510 ((end+1 < line_end && (ISALNUM(end+1) || CH(end+1) == _T('('))) || CH(end) == _T(')')))
3511 {
3512 if(CH(end) == URL_MAP[i].delim_char)
3513 n_components++;
3514
3515 if(CH(end) == _T('(')) {
3516 n_open_brackets++;
3517 } else if(CH(end) == _T(')')) {
3518 if(n_open_brackets <= 0)
3519 break;
3520 n_open_brackets--;
3521 }
3522
3523 end++;
3524 } else {
3525 break;
3526 }
3527 }
3528
3529 if(end < line_end && URL_MAP[i].optional_end_char != _T('\0') &&
3530 CH(end) == URL_MAP[i].optional_end_char)
3531 end++;
3532
3533 if(n_components < URL_MAP[i].min_components || n_open_brackets != 0)
3534 return;
3535
3536 if(opener->ch == '@')
3537 break;
3538 }
3539
3540 if(end == line_end || ISUNICODEWHITESPACE(end) || ISANYOF(end, _T(")}].!?,;"))) {
3541 right_boundary_ok = TRUE;
3542 } else {
3543 MD_MARK* right_mark;
3544
3545 right_mark = md_scan_right_for_resolved_mark(ctx, right_cursor, end, &right_cursor);
3546 if(right_mark != NULL && (right_mark->flags & MD_MARK_CLOSER))
3547 right_boundary_ok = TRUE;
3548 }
3549 if(!right_boundary_ok)
3550 return;
3551
3552 opener->beg = beg;
3553 opener->end = beg;
3554 closer->beg = end;
3555 closer->end = end;
3556 closer->ch = opener->ch;
3557 md_resolve_range(ctx, mark_index, mark_index + 1);
3558}
3559
3560#define MD_ANALYZE_NOSKIP_EMPH 0x01
3561
3562static inline void
3563md_analyze_marks(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines,
3564 int mark_beg, int mark_end, const CHAR* mark_chars, unsigned flags)
3565{
3566 int i = mark_beg;
3567 OFF last_end = lines[0].beg;
3568
3569 MD_UNUSED(lines);
3570 MD_UNUSED(n_lines);
3571
3572 while(i < mark_end) {
3573 MD_MARK* mark = &ctx->marks[i];
3574
3575 if(mark->flags & MD_MARK_RESOLVED) {
3576 if((mark->flags & MD_MARK_OPENER) &&
3577 !((flags & MD_ANALYZE_NOSKIP_EMPH) && ISANYOF_(mark->ch, "*_~")))
3578 {
3579 MD_ASSERT(i < mark->next);
3580 i = mark->next + 1;
3581 } else {
3582 i++;
3583 }
3584 continue;
3585 }
3586
3587 if(!ISANYOF_(mark->ch, mark_chars)) {
3588 i++;
3589 continue;
3590 }
3591
3592 if(mark->beg < last_end) {
3593 i++;
3594 continue;
3595 }
3596
3597 switch(mark->ch) {
3598 case '[':
3599 case '!':
3600 case ']': md_analyze_bracket(ctx, i); break;
3601 case '&': md_analyze_entity(ctx, i); break;
3602 case '|': md_analyze_table_cell_boundary(ctx, i); break;
3603 case '_':
3604 case '*': md_analyze_emph(ctx, i); break;
3605 case '~': md_analyze_tilde(ctx, i); break;
3606 case '$': md_analyze_dollar(ctx, i); break;
3607 case '.':
3608 case ':':
3609 case '@': md_analyze_permissive_autolink(ctx, i); break;
3610 }
3611
3612 if(mark->flags & MD_MARK_RESOLVED) {
3613 if(mark->flags & MD_MARK_OPENER)
3614 last_end = ctx->marks[mark->next].end;
3615 else
3616 last_end = mark->end;
3617 }
3618
3619 i++;
3620 }
3621}
3622
3623static int
3624md_analyze_inlines(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, int table_mode)
3625{
3626 int ret;
3627
3628 ctx->n_marks = 0;
3629
3630 MD_CHECK(md_collect_marks(ctx, lines, n_lines, table_mode));
3631
3632 md_analyze_marks(ctx, lines, n_lines, 0, ctx->n_marks, _T("[]!"), 0);
3633 MD_CHECK(md_resolve_links(ctx, lines, n_lines));
3634 BRACKET_OPENERS.top = -1;
3635 ctx->unresolved_link_head = -1;
3636 ctx->unresolved_link_tail = -1;
3637
3638 if(table_mode) {
3639
3640 MD_ASSERT(n_lines == 1);
3641 ctx->n_table_cell_boundaries = 0;
3642 md_analyze_marks(ctx, lines, n_lines, 0, ctx->n_marks, _T("|"), 0);
3643 return ret;
3644 }
3645
3646 md_analyze_link_contents(ctx, lines, n_lines, 0, ctx->n_marks);
3647
3648abort:
3649 return ret;
3650}
3651
3652static void
3653md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines,
3654 int mark_beg, int mark_end)
3655{
3656 int i;
3657
3658 md_analyze_marks(ctx, lines, n_lines, mark_beg, mark_end, _T("&"), 0);
3659 md_analyze_marks(ctx, lines, n_lines, mark_beg, mark_end, _T("*_~$"), 0);
3660
3661 if((ctx->parser.flags & MD_FLAG_PERMISSIVEAUTOLINKS) != 0) {
3662
3663 md_analyze_marks(ctx, lines, n_lines, mark_beg, mark_end, _T("@:."), MD_ANALYZE_NOSKIP_EMPH);
3664 }
3665
3666 for(i = 0; i < (int) SIZEOF_ARRAY(ctx->opener_stacks); i++)
3667 ctx->opener_stacks[i].top = -1;
3668}
3669
3670static int
3671md_enter_leave_span_a(MD_CTX* ctx, int enter, MD_SPANTYPE type,
3672 const CHAR* dest, SZ dest_size, int is_autolink,
3673 const CHAR* title, SZ title_size)
3674{
3675 MD_ATTRIBUTE_BUILD href_build = { 0 };
3676 MD_ATTRIBUTE_BUILD title_build = { 0 };
3677 MD_SPAN_A_DETAIL det;
3678 int ret = 0;
3679
3680 memset(&det, 0, sizeof(MD_SPAN_A_DETAIL));
3681 MD_CHECK(md_build_attribute(ctx, dest, dest_size,
3682 (is_autolink ? MD_BUILD_ATTR_NO_ESCAPES : 0),
3683 &det.href, &href_build));
3684 MD_CHECK(md_build_attribute(ctx, title, title_size, 0, &det.title, &title_build));
3685 det.is_autolink = is_autolink;
3686 if(enter)
3687 MD_ENTER_SPAN(type, &det);
3688 else
3689 MD_LEAVE_SPAN(type, &det);
3690
3691abort:
3692 md_free_attribute(ctx, &href_build);
3693 md_free_attribute(ctx, &title_build);
3694 return ret;
3695}
3696
3697static int
3698md_enter_leave_span_wikilink(MD_CTX* ctx, int enter, const CHAR* target, SZ target_size)
3699{
3700 MD_ATTRIBUTE_BUILD target_build = { 0 };
3701 MD_SPAN_WIKILINK_DETAIL det;
3702 int ret = 0;
3703
3704 memset(&det, 0, sizeof(MD_SPAN_WIKILINK_DETAIL));
3705 MD_CHECK(md_build_attribute(ctx, target, target_size, 0, &det.target, &target_build));
3706
3707 if (enter)
3708 MD_ENTER_SPAN(MD_SPAN_WIKILINK, &det);
3709 else
3710 MD_LEAVE_SPAN(MD_SPAN_WIKILINK, &det);
3711
3712abort:
3713 md_free_attribute(ctx, &target_build);
3714 return ret;
3715}
3716
3717static int
3718md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines)
3719{
3720 MD_TEXTTYPE text_type;
3721 const MD_LINE* line = lines;
3722 MD_MARK* prev_mark = NULL;
3723 MD_MARK* mark;
3724 OFF off = lines[0].beg;
3725 OFF end = lines[n_lines-1].end;
3726 OFF tmp;
3727 int enforce_hardbreak = 0;
3728 int ret = 0;
3729
3730 mark = ctx->marks;
3731 while(!(mark->flags & MD_MARK_RESOLVED))
3732 mark++;
3733
3734 text_type = MD_TEXT_NORMAL;
3735
3736 while(1) {
3737
3738 tmp = (line->end < mark->beg ? line->end : mark->beg);
3739 if(tmp > off) {
3740 MD_TEXT(text_type, STR(off), tmp - off);
3741 off = tmp;
3742 }
3743
3744 if(off >= mark->beg) {
3745 switch(mark->ch) {
3746 case '\\':
3747 if(ISNEWLINE(mark->beg+1))
3748 enforce_hardbreak = 1;
3749 else
3750 MD_TEXT(text_type, STR(mark->beg+1), 1);
3751 break;
3752
3753 case ' ':
3754 MD_TEXT(text_type, _T(" "), 1);
3755 break;
3756
3757 case '`':
3758 if(mark->flags & MD_MARK_OPENER) {
3759 MD_ENTER_SPAN(MD_SPAN_CODE, NULL);
3760 text_type = MD_TEXT_CODE;
3761 } else {
3762 MD_LEAVE_SPAN(MD_SPAN_CODE, NULL);
3763 text_type = MD_TEXT_NORMAL;
3764 }
3765 break;
3766
3767 case '_':
3768 if(ctx->parser.flags & MD_FLAG_UNDERLINE) {
3769 if(mark->flags & MD_MARK_OPENER) {
3770 while(off < mark->end) {
3771 MD_ENTER_SPAN(MD_SPAN_U, NULL);
3772 off++;
3773 }
3774 } else {
3775 while(off < mark->end) {
3776 MD_LEAVE_SPAN(MD_SPAN_U, NULL);
3777 off++;
3778 }
3779 }
3780 break;
3781 }
3782 MD_FALLTHROUGH();
3783
3784 case '*':
3785 if(mark->flags & MD_MARK_OPENER) {
3786 if((mark->end - off) % 2) {
3787 MD_ENTER_SPAN(MD_SPAN_EM, NULL);
3788 off++;
3789 }
3790 while(off + 1 < mark->end) {
3791 MD_ENTER_SPAN(MD_SPAN_STRONG, NULL);
3792 off += 2;
3793 }
3794 } else {
3795 while(off + 1 < mark->end) {
3796 MD_LEAVE_SPAN(MD_SPAN_STRONG, NULL);
3797 off += 2;
3798 }
3799 if((mark->end - off) % 2) {
3800 MD_LEAVE_SPAN(MD_SPAN_EM, NULL);
3801 off++;
3802 }
3803 }
3804 break;
3805
3806 case '~':
3807 if(mark->flags & MD_MARK_OPENER)
3808 MD_ENTER_SPAN(MD_SPAN_DEL, NULL);
3809 else
3810 MD_LEAVE_SPAN(MD_SPAN_DEL, NULL);
3811 break;
3812
3813 case '$':
3814 if(mark->flags & MD_MARK_OPENER) {
3815 MD_ENTER_SPAN((mark->end - off) % 2 ? MD_SPAN_LATEXMATH : MD_SPAN_LATEXMATH_DISPLAY, NULL);
3816 text_type = MD_TEXT_LATEXMATH;
3817 } else {
3818 MD_LEAVE_SPAN((mark->end - off) % 2 ? MD_SPAN_LATEXMATH : MD_SPAN_LATEXMATH_DISPLAY, NULL);
3819 text_type = MD_TEXT_NORMAL;
3820 }
3821 break;
3822
3823 case '[':
3824 case '!':
3825 case ']':
3826 {
3827 const MD_MARK* opener = (mark->ch != ']' ? mark : &ctx->marks[mark->prev]);
3828 const MD_MARK* closer = &ctx->marks[opener->next];
3829 const MD_MARK* dest_mark;
3830 const MD_MARK* title_mark;
3831
3832 if ((opener->ch == '[' && closer->ch == ']') &&
3833 opener->end - opener->beg >= 2 &&
3834 closer->end - closer->beg >= 2)
3835 {
3836 int has_label = (opener->end - opener->beg > 2);
3837 SZ target_sz;
3838
3839 if(has_label)
3840 target_sz = opener->end - (opener->beg+2);
3841 else
3842 target_sz = closer->beg - opener->end;
3843
3844 MD_CHECK(md_enter_leave_span_wikilink(ctx, (mark->ch != ']'),
3845 has_label ? STR(opener->beg+2) : STR(opener->end),
3846 target_sz));
3847
3848 break;
3849 }
3850
3851 dest_mark = opener+1;
3852 MD_ASSERT(dest_mark->ch == 'D');
3853 title_mark = opener+2;
3854 MD_ASSERT(title_mark->ch == 'D');
3855
3856 MD_CHECK(md_enter_leave_span_a(ctx, (mark->ch != ']'),
3857 (opener->ch == '!' ? MD_SPAN_IMG : MD_SPAN_A),
3858 STR(dest_mark->beg), dest_mark->end - dest_mark->beg, FALSE,
3859 md_mark_get_ptr(ctx, (int)(title_mark - ctx->marks)),
3860 title_mark->prev));
3861
3862 if(mark->ch == ']') {
3863 while(mark->end > line->end)
3864 line++;
3865 }
3866
3867 break;
3868 }
3869
3870 case '<':
3871 case '>':
3872 if(!(mark->flags & MD_MARK_AUTOLINK)) {
3873
3874 if(mark->flags & MD_MARK_OPENER)
3875 text_type = MD_TEXT_HTML;
3876 else
3877 text_type = MD_TEXT_NORMAL;
3878 break;
3879 }
3880
3881 MD_FALLTHROUGH();
3882
3883 case '@':
3884 case ':':
3885 case '.':
3886 {
3887 MD_MARK* opener = ((mark->flags & MD_MARK_OPENER) ? mark : &ctx->marks[mark->prev]);
3888 MD_MARK* closer = &ctx->marks[opener->next];
3889 const CHAR* dest = STR(opener->end);
3890 SZ dest_size = closer->beg - opener->end;
3891
3892 if(mark->flags & MD_MARK_OPENER)
3893 closer->flags |= MD_MARK_VALIDPERMISSIVEAUTOLINK;
3894
3895 if(opener->ch == '@' || opener->ch == '.' ||
3896 (opener->ch == '<' && (opener->flags & MD_MARK_AUTOLINK_MISSING_MAILTO)))
3897 {
3898 dest_size += 7;
3899 MD_TEMP_BUFFER(dest_size * sizeof(CHAR));
3900 memcpy(ctx->buffer,
3901 (opener->ch == '.' ? _T("http://") : _T("mailto:")),
3902 7 * sizeof(CHAR));
3903 memcpy(ctx->buffer + 7, dest, (dest_size-7) * sizeof(CHAR));
3904 dest = ctx->buffer;
3905 }
3906
3907 if(closer->flags & MD_MARK_VALIDPERMISSIVEAUTOLINK)
3908 MD_CHECK(md_enter_leave_span_a(ctx, (mark->flags & MD_MARK_OPENER),
3909 MD_SPAN_A, dest, dest_size, TRUE, NULL, 0));
3910 break;
3911 }
3912
3913 case '&':
3914 MD_TEXT(MD_TEXT_ENTITY, STR(mark->beg), mark->end - mark->beg);
3915 break;
3916
3917 case '\0':
3918 MD_TEXT(MD_TEXT_NULLCHAR, _T(""), 1);
3919 break;
3920
3921 case 127:
3922 goto abort;
3923 }
3924
3925 off = mark->end;
3926
3927 prev_mark = mark;
3928 mark++;
3929 while(!(mark->flags & MD_MARK_RESOLVED) || mark->beg < off)
3930 mark++;
3931 }
3932
3933 if(off >= line->end) {
3934
3935 if(off >= end)
3936 break;
3937
3938 if(text_type == MD_TEXT_CODE || text_type == MD_TEXT_LATEXMATH) {
3939 MD_ASSERT(prev_mark != NULL);
3940 MD_ASSERT(ISANYOF2_(prev_mark->ch, '`', '$') && (prev_mark->flags & MD_MARK_OPENER));
3941 MD_ASSERT(ISANYOF2_(mark->ch, '`', '$') && (mark->flags & MD_MARK_CLOSER));
3942
3943 tmp = off;
3944 while(off < ctx->size && ISBLANK(off))
3945 off++;
3946 if(off > tmp)
3947 MD_TEXT(text_type, STR(tmp), off-tmp);
3948
3949 if(off == line->end)
3950 MD_TEXT(text_type, _T(" "), 1);
3951 } else if(text_type == MD_TEXT_HTML) {
3952
3953 tmp = off;
3954 while(tmp < end && ISBLANK(tmp))
3955 tmp++;
3956 if(tmp > off)
3957 MD_TEXT(MD_TEXT_HTML, STR(off), tmp - off);
3958 MD_TEXT(MD_TEXT_HTML, _T("\n"), 1);
3959 } else {
3960
3961 MD_TEXTTYPE break_type = MD_TEXT_SOFTBR;
3962
3963 if(text_type == MD_TEXT_NORMAL) {
3964 if(enforce_hardbreak || (ctx->parser.flags & MD_FLAG_HARD_SOFT_BREAKS)) {
3965 break_type = MD_TEXT_BR;
3966 } else {
3967 while(off < ctx->size && ISBLANK(off))
3968 off++;
3969 if(off >= line->end + 2 && CH(off-2) == _T(' ') && CH(off-1) == _T(' ') && ISNEWLINE(off))
3970 break_type = MD_TEXT_BR;
3971 }
3972 }
3973
3974 MD_TEXT(break_type, _T("\n"), 1);
3975 }
3976
3977 line++;
3978 off = line->beg;
3979
3980 enforce_hardbreak = 0;
3981 }
3982 }
3983
3984abort:
3985 return ret;
3986}
3987
3988static void
3989md_analyze_table_alignment(MD_CTX* ctx, OFF beg, OFF end, MD_ALIGN* align, int n_align)
3990{
3991 static const MD_ALIGN align_map[] = { MD_ALIGN_DEFAULT, MD_ALIGN_LEFT, MD_ALIGN_RIGHT, MD_ALIGN_CENTER };
3992 OFF off = beg;
3993
3994 while(n_align > 0) {
3995 int index = 0;
3996
3997 while(CH(off) != _T('-'))
3998 off++;
3999 if(off > beg && CH(off-1) == _T(':'))
4000 index |= 1;
4001 while(off < end && CH(off) == _T('-'))
4002 off++;
4003 if(off < end && CH(off) == _T(':'))
4004 index |= 2;
4005
4006 *align = align_map[index];
4007 align++;
4008 n_align--;
4009 }
4010
4011}
4012
4013static int md_process_normal_block_contents(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines);
4014
4015static int
4016md_process_table_cell(MD_CTX* ctx, MD_BLOCKTYPE cell_type, MD_ALIGN align, OFF beg, OFF end)
4017{
4018 MD_LINE line;
4019 MD_BLOCK_TD_DETAIL det;
4020 int ret = 0;
4021
4022 while(beg < end && ISWHITESPACE(beg))
4023 beg++;
4024 while(end > beg && ISWHITESPACE(end-1))
4025 end--;
4026
4027 det.align = align;
4028 line.beg = beg;
4029 line.end = end;
4030
4031 MD_ENTER_BLOCK(cell_type, &det);
4032 MD_CHECK(md_process_normal_block_contents(ctx, &line, 1));
4033 MD_LEAVE_BLOCK(cell_type, &det);
4034
4035abort:
4036 return ret;
4037}
4038
4039static int
4040md_process_table_row(MD_CTX* ctx, MD_BLOCKTYPE cell_type, OFF beg, OFF end,
4041 const MD_ALIGN* align, int col_count)
4042{
4043 MD_LINE line;
4044 OFF* pipe_offs = NULL;
4045 int i, j, k, n;
4046 int ret = 0;
4047
4048 line.beg = beg;
4049 line.end = end;
4050
4051 MD_CHECK(md_analyze_inlines(ctx, &line, 1, TRUE));
4052
4053 n = ctx->n_table_cell_boundaries + 2;
4054 pipe_offs = (OFF*) malloc(n * sizeof(OFF));
4055 if(pipe_offs == NULL) {
4056 MD_LOG("malloc() failed.");
4057 ret = -1;
4058 goto abort;
4059 }
4060 j = 0;
4061 pipe_offs[j++] = beg;
4062 for(i = ctx->table_cell_boundaries_head; i >= 0; i = ctx->marks[i].next) {
4063 MD_MARK* mark = &ctx->marks[i];
4064 pipe_offs[j++] = mark->end;
4065 }
4066 pipe_offs[j++] = end+1;
4067
4068 MD_ENTER_BLOCK(MD_BLOCK_TR, NULL);
4069 k = 0;
4070 for(i = 0; i < j-1 && k < col_count; i++) {
4071 if(pipe_offs[i] < pipe_offs[i+1]-1)
4072 MD_CHECK(md_process_table_cell(ctx, cell_type, align[k++], pipe_offs[i], pipe_offs[i+1]-1));
4073 }
4074
4075 while(k < col_count)
4076 MD_CHECK(md_process_table_cell(ctx, cell_type, align[k++], 0, 0));
4077 MD_LEAVE_BLOCK(MD_BLOCK_TR, NULL);
4078
4079abort:
4080 free(pipe_offs);
4081
4082 ctx->table_cell_boundaries_head = -1;
4083 ctx->table_cell_boundaries_tail = -1;
4084
4085 return ret;
4086}
4087
4088static int
4089md_process_table_block_contents(MD_CTX* ctx, int col_count, const MD_LINE* lines, MD_SIZE n_lines)
4090{
4091 MD_ALIGN* align;
4092 MD_SIZE line_index;
4093 int ret = 0;
4094
4095 MD_ASSERT(n_lines >= 2);
4096
4097 align = malloc(col_count * sizeof(MD_ALIGN));
4098 if(align == NULL) {
4099 MD_LOG("malloc() failed.");
4100 ret = -1;
4101 goto abort;
4102 }
4103
4104 md_analyze_table_alignment(ctx, lines[1].beg, lines[1].end, align, col_count);
4105
4106 MD_ENTER_BLOCK(MD_BLOCK_THEAD, NULL);
4107 MD_CHECK(md_process_table_row(ctx, MD_BLOCK_TH,
4108 lines[0].beg, lines[0].end, align, col_count));
4109 MD_LEAVE_BLOCK(MD_BLOCK_THEAD, NULL);
4110
4111 if(n_lines > 2) {
4112 MD_ENTER_BLOCK(MD_BLOCK_TBODY, NULL);
4113 for(line_index = 2; line_index < n_lines; line_index++) {
4114 MD_CHECK(md_process_table_row(ctx, MD_BLOCK_TD,
4115 lines[line_index].beg, lines[line_index].end, align, col_count));
4116 }
4117 MD_LEAVE_BLOCK(MD_BLOCK_TBODY, NULL);
4118 }
4119
4120abort:
4121 free(align);
4122 return ret;
4123}
4124
4125#define MD_BLOCK_CONTAINER_OPENER 0x01
4126#define MD_BLOCK_CONTAINER_CLOSER 0x02
4127#define MD_BLOCK_CONTAINER (MD_BLOCK_CONTAINER_OPENER | MD_BLOCK_CONTAINER_CLOSER)
4128#define MD_BLOCK_LOOSE_LIST 0x04
4129#define MD_BLOCK_SETEXT_HEADER 0x08
4130
4131struct MD_BLOCK_tag {
4132 MD_BLOCKTYPE type : 8;
4133 unsigned flags : 8;
4134
4135 unsigned data : 16;
4136
4137 MD_SIZE n_lines;
4138};
4139
4140struct MD_CONTAINER_tag {
4141 CHAR ch;
4142 unsigned is_loose : 8;
4143 unsigned is_task : 8;
4144 unsigned start;
4145 unsigned mark_indent;
4146 unsigned contents_indent;
4147 OFF block_byte_off;
4148 OFF task_mark_off;
4149};
4150
4151static int
4152md_process_normal_block_contents(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines)
4153{
4154 int i;
4155 int ret;
4156
4157 MD_CHECK(md_analyze_inlines(ctx, lines, n_lines, FALSE));
4158 MD_CHECK(md_process_inlines(ctx, lines, n_lines));
4159
4160abort:
4161
4162 for(i = ctx->ptr_stack.top; i >= 0; i = ctx->marks[i].next)
4163 free(md_mark_get_ptr(ctx, i));
4164 ctx->ptr_stack.top = -1;
4165
4166 return ret;
4167}
4168
4169static int
4170md_process_verbatim_block_contents(MD_CTX* ctx, MD_TEXTTYPE text_type, const MD_VERBATIMLINE* lines, MD_SIZE n_lines)
4171{
4172 static const CHAR indent_chunk_str[] = _T(" ");
4173 static const SZ indent_chunk_size = SIZEOF_ARRAY(indent_chunk_str) - 1;
4174
4175 MD_SIZE line_index;
4176 int ret = 0;
4177
4178 for(line_index = 0; line_index < n_lines; line_index++) {
4179 const MD_VERBATIMLINE* line = &lines[line_index];
4180 int indent = line->indent;
4181
4182 MD_ASSERT(indent >= 0);
4183
4184 while(indent > (int) indent_chunk_size) {
4185 MD_TEXT(text_type, indent_chunk_str, indent_chunk_size);
4186 indent -= indent_chunk_size;
4187 }
4188 if(indent > 0)
4189 MD_TEXT(text_type, indent_chunk_str, indent);
4190
4191 MD_TEXT_INSECURE(text_type, STR(line->beg), line->end - line->beg);
4192
4193 MD_TEXT(text_type, _T("\n"), 1);
4194 }
4195
4196abort:
4197 return ret;
4198}
4199
4200static int
4201md_process_code_block_contents(MD_CTX* ctx, int is_fenced, const MD_VERBATIMLINE* lines, MD_SIZE n_lines)
4202{
4203 if(is_fenced) {
4204
4205 lines++;
4206 n_lines--;
4207 } else {
4208
4209 while(n_lines > 0 && lines[0].beg == lines[0].end) {
4210 lines++;
4211 n_lines--;
4212 }
4213 while(n_lines > 0 && lines[n_lines-1].beg == lines[n_lines-1].end) {
4214 n_lines--;
4215 }
4216 }
4217
4218 if(n_lines == 0)
4219 return 0;
4220
4221 return md_process_verbatim_block_contents(ctx, MD_TEXT_CODE, lines, n_lines);
4222}
4223
4224static int
4225md_setup_fenced_code_detail(MD_CTX* ctx, const MD_BLOCK* block, MD_BLOCK_CODE_DETAIL* det,
4226 MD_ATTRIBUTE_BUILD* info_build, MD_ATTRIBUTE_BUILD* lang_build)
4227{
4228 const MD_VERBATIMLINE* fence_line = (const MD_VERBATIMLINE*)(block + 1);
4229 OFF beg = fence_line->beg;
4230 OFF end = fence_line->end;
4231 OFF lang_end;
4232 CHAR fence_ch = CH(fence_line->beg);
4233 int ret = 0;
4234
4235 while(beg < ctx->size && CH(beg) == fence_ch)
4236 beg++;
4237
4238 while(beg < ctx->size && CH(beg) == _T(' '))
4239 beg++;
4240
4241 while(end > beg && CH(end-1) == _T(' '))
4242 end--;
4243
4244 MD_CHECK(md_build_attribute(ctx, STR(beg), end - beg, 0, &det->info, info_build));
4245
4246 lang_end = beg;
4247 while(lang_end < end && !ISWHITESPACE(lang_end))
4248 lang_end++;
4249 MD_CHECK(md_build_attribute(ctx, STR(beg), lang_end - beg, 0, &det->lang, lang_build));
4250
4251 det->fence_char = fence_ch;
4252
4253abort:
4254 return ret;
4255}
4256
4257static int
4258md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block)
4259{
4260 union {
4261 MD_BLOCK_H_DETAIL header;
4262 MD_BLOCK_CODE_DETAIL code;
4263 MD_BLOCK_TABLE_DETAIL table;
4264 } det;
4265 MD_ATTRIBUTE_BUILD info_build;
4266 MD_ATTRIBUTE_BUILD lang_build;
4267 int is_in_tight_list;
4268 int clean_fence_code_detail = FALSE;
4269 int ret = 0;
4270
4271 memset(&det, 0, sizeof(det));
4272
4273 if(ctx->n_containers == 0)
4274 is_in_tight_list = FALSE;
4275 else
4276 is_in_tight_list = !ctx->containers[ctx->n_containers-1].is_loose;
4277
4278 switch(block->type) {
4279 case MD_BLOCK_H:
4280 det.header.level = block->data;
4281 break;
4282
4283 case MD_BLOCK_CODE:
4284
4285 if(block->data != 0) {
4286 memset(&det.code, 0, sizeof(MD_BLOCK_CODE_DETAIL));
4287 clean_fence_code_detail = TRUE;
4288 MD_CHECK(md_setup_fenced_code_detail(ctx, block, &det.code, &info_build, &lang_build));
4289 }
4290 break;
4291
4292 case MD_BLOCK_TABLE:
4293 det.table.col_count = block->data;
4294 det.table.head_row_count = 1;
4295 det.table.body_row_count = block->n_lines - 2;
4296 break;
4297
4298 default:
4299
4300 break;
4301 }
4302
4303 if(!is_in_tight_list || block->type != MD_BLOCK_P)
4304 MD_ENTER_BLOCK(block->type, (void*) &det);
4305
4306 switch(block->type) {
4307 case MD_BLOCK_HR:
4308
4309 break;
4310
4311 case MD_BLOCK_CODE:
4312 MD_CHECK(md_process_code_block_contents(ctx, (block->data != 0),
4313 (const MD_VERBATIMLINE*)(block + 1), block->n_lines));
4314 break;
4315
4316 case MD_BLOCK_HTML:
4317 MD_CHECK(md_process_verbatim_block_contents(ctx, MD_TEXT_HTML,
4318 (const MD_VERBATIMLINE*)(block + 1), block->n_lines));
4319 break;
4320
4321 case MD_BLOCK_TABLE:
4322 MD_CHECK(md_process_table_block_contents(ctx, block->data,
4323 (const MD_LINE*)(block + 1), block->n_lines));
4324 break;
4325
4326 default:
4327 MD_CHECK(md_process_normal_block_contents(ctx,
4328 (const MD_LINE*)(block + 1), block->n_lines));
4329 break;
4330 }
4331
4332 if(!is_in_tight_list || block->type != MD_BLOCK_P)
4333 MD_LEAVE_BLOCK(block->type, (void*) &det);
4334
4335abort:
4336 if(clean_fence_code_detail) {
4337 md_free_attribute(ctx, &info_build);
4338 md_free_attribute(ctx, &lang_build);
4339 }
4340 return ret;
4341}
4342
4343static int
4344md_process_all_blocks(MD_CTX* ctx)
4345{
4346 int byte_off = 0;
4347 int ret = 0;
4348
4349 ctx->n_containers = 0;
4350
4351 while(byte_off < ctx->n_block_bytes) {
4352 MD_BLOCK* block = (MD_BLOCK*)((char*)ctx->block_bytes + byte_off);
4353 union {
4354 MD_BLOCK_UL_DETAIL ul;
4355 MD_BLOCK_OL_DETAIL ol;
4356 MD_BLOCK_LI_DETAIL li;
4357 } det;
4358
4359 switch(block->type) {
4360 case MD_BLOCK_UL:
4361 det.ul.is_tight = (block->flags & MD_BLOCK_LOOSE_LIST) ? FALSE : TRUE;
4362 det.ul.mark = (CHAR) block->data;
4363 break;
4364
4365 case MD_BLOCK_OL:
4366 det.ol.start = block->n_lines;
4367 det.ol.is_tight = (block->flags & MD_BLOCK_LOOSE_LIST) ? FALSE : TRUE;
4368 det.ol.mark_delimiter = (CHAR) block->data;
4369 break;
4370
4371 case MD_BLOCK_LI:
4372 det.li.is_task = (block->data != 0);
4373 det.li.task_mark = (CHAR) block->data;
4374 det.li.task_mark_offset = (OFF) block->n_lines;
4375 break;
4376
4377 default:
4378
4379 break;
4380 }
4381
4382 if(block->flags & MD_BLOCK_CONTAINER) {
4383 if(block->flags & MD_BLOCK_CONTAINER_CLOSER) {
4384 MD_LEAVE_BLOCK(block->type, &det);
4385
4386 if(block->type == MD_BLOCK_UL || block->type == MD_BLOCK_OL || block->type == MD_BLOCK_QUOTE)
4387 ctx->n_containers--;
4388 }
4389
4390 if(block->flags & MD_BLOCK_CONTAINER_OPENER) {
4391 MD_ENTER_BLOCK(block->type, &det);
4392
4393 if(block->type == MD_BLOCK_UL || block->type == MD_BLOCK_OL) {
4394 ctx->containers[ctx->n_containers].is_loose = (block->flags & MD_BLOCK_LOOSE_LIST);
4395 ctx->n_containers++;
4396 } else if(block->type == MD_BLOCK_QUOTE) {
4397
4398 ctx->containers[ctx->n_containers].is_loose = TRUE;
4399 ctx->n_containers++;
4400 }
4401 }
4402 } else {
4403 MD_CHECK(md_process_leaf_block(ctx, block));
4404
4405 if(block->type == MD_BLOCK_CODE || block->type == MD_BLOCK_HTML)
4406 byte_off += block->n_lines * sizeof(MD_VERBATIMLINE);
4407 else
4408 byte_off += block->n_lines * sizeof(MD_LINE);
4409 }
4410
4411 byte_off += sizeof(MD_BLOCK);
4412 }
4413
4414 ctx->n_block_bytes = 0;
4415
4416abort:
4417 return ret;
4418}
4419
4420static void*
4421md_push_block_bytes(MD_CTX* ctx, int n_bytes)
4422{
4423 void* ptr;
4424
4425 if(ctx->n_block_bytes + n_bytes > ctx->alloc_block_bytes) {
4426 void* new_block_bytes;
4427
4428 ctx->alloc_block_bytes = (ctx->alloc_block_bytes > 0
4429 ? ctx->alloc_block_bytes + ctx->alloc_block_bytes / 2
4430 : 512);
4431 new_block_bytes = realloc(ctx->block_bytes, ctx->alloc_block_bytes);
4432 if(new_block_bytes == NULL) {
4433 MD_LOG("realloc() failed.");
4434 return NULL;
4435 }
4436
4437 if(ctx->current_block != NULL) {
4438 OFF off_current_block = (OFF) ((char*) ctx->current_block - (char*) ctx->block_bytes);
4439 ctx->current_block = (MD_BLOCK*) ((char*) new_block_bytes + off_current_block);
4440 }
4441
4442 ctx->block_bytes = new_block_bytes;
4443 }
4444
4445 ptr = (char*)ctx->block_bytes + ctx->n_block_bytes;
4446 ctx->n_block_bytes += n_bytes;
4447 return ptr;
4448}
4449
4450static int
4451md_start_new_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* line)
4452{
4453 MD_BLOCK* block;
4454
4455 MD_ASSERT(ctx->current_block == NULL);
4456
4457 block = (MD_BLOCK*) md_push_block_bytes(ctx, sizeof(MD_BLOCK));
4458 if(block == NULL)
4459 return -1;
4460
4461 switch(line->type) {
4462 case MD_LINE_HR:
4463 block->type = MD_BLOCK_HR;
4464 break;
4465
4466 case MD_LINE_ATXHEADER:
4467 case MD_LINE_SETEXTHEADER:
4468 block->type = MD_BLOCK_H;
4469 break;
4470
4471 case MD_LINE_FENCEDCODE:
4472 case MD_LINE_INDENTEDCODE:
4473 block->type = MD_BLOCK_CODE;
4474 break;
4475
4476 case MD_LINE_TEXT:
4477 block->type = MD_BLOCK_P;
4478 break;
4479
4480 case MD_LINE_HTML:
4481 block->type = MD_BLOCK_HTML;
4482 break;
4483
4484 case MD_LINE_BLANK:
4485 case MD_LINE_SETEXTUNDERLINE:
4486 case MD_LINE_TABLEUNDERLINE:
4487 default:
4488 MD_UNREACHABLE();
4489 break;
4490 }
4491
4492 block->flags = 0;
4493 block->data = line->data;
4494 block->n_lines = 0;
4495
4496 ctx->current_block = block;
4497 return 0;
4498}
4499
4500static int
4501md_consume_link_reference_definitions(MD_CTX* ctx)
4502{
4503 MD_LINE* lines = (MD_LINE*) (ctx->current_block + 1);
4504 MD_SIZE n_lines = ctx->current_block->n_lines;
4505 MD_SIZE n = 0;
4506
4507 while(n < n_lines) {
4508 int n_link_ref_lines;
4509
4510 n_link_ref_lines = md_is_link_reference_definition(ctx,
4511 lines + n, n_lines - n);
4512
4513 if(n_link_ref_lines == 0)
4514 break;
4515
4516 if(n_link_ref_lines < 0)
4517 return -1;
4518
4519 n += n_link_ref_lines;
4520 }
4521
4522 if(n > 0) {
4523 if(n == n_lines) {
4524
4525 ctx->n_block_bytes -= n * sizeof(MD_LINE);
4526 ctx->n_block_bytes -= sizeof(MD_BLOCK);
4527 ctx->current_block = NULL;
4528 } else {
4529
4530 memmove(lines, lines + n, (n_lines - n) * sizeof(MD_LINE));
4531 ctx->current_block->n_lines -= n;
4532 ctx->n_block_bytes -= n * sizeof(MD_LINE);
4533 }
4534 }
4535
4536 return 0;
4537}
4538
4539static int
4540md_end_current_block(MD_CTX* ctx)
4541{
4542 int ret = 0;
4543
4544 if(ctx->current_block == NULL)
4545 return ret;
4546
4547 if(ctx->current_block->type == MD_BLOCK_P ||
4548 (ctx->current_block->type == MD_BLOCK_H && (ctx->current_block->flags & MD_BLOCK_SETEXT_HEADER)))
4549 {
4550 MD_LINE* lines = (MD_LINE*) (ctx->current_block + 1);
4551 if(lines[0].beg < ctx->size && CH(lines[0].beg) == _T('[')) {
4552 MD_CHECK(md_consume_link_reference_definitions(ctx));
4553 if(ctx->current_block == NULL)
4554 return ret;
4555 }
4556 }
4557
4558 if(ctx->current_block->type == MD_BLOCK_H && (ctx->current_block->flags & MD_BLOCK_SETEXT_HEADER)) {
4559 MD_SIZE n_lines = ctx->current_block->n_lines;
4560
4561 if(n_lines > 1) {
4562
4563 ctx->current_block->n_lines--;
4564 ctx->n_block_bytes -= sizeof(MD_LINE);
4565 } else {
4566
4567 ctx->current_block->type = MD_BLOCK_P;
4568 return 0;
4569 }
4570 }
4571
4572 ctx->current_block = NULL;
4573
4574abort:
4575 return ret;
4576}
4577
4578static int
4579md_add_line_into_current_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* analysis)
4580{
4581 MD_ASSERT(ctx->current_block != NULL);
4582
4583 if(ctx->current_block->type == MD_BLOCK_CODE || ctx->current_block->type == MD_BLOCK_HTML) {
4584 MD_VERBATIMLINE* line;
4585
4586 line = (MD_VERBATIMLINE*) md_push_block_bytes(ctx, sizeof(MD_VERBATIMLINE));
4587 if(line == NULL)
4588 return -1;
4589
4590 line->indent = analysis->indent;
4591 line->beg = analysis->beg;
4592 line->end = analysis->end;
4593 } else {
4594 MD_LINE* line;
4595
4596 line = (MD_LINE*) md_push_block_bytes(ctx, sizeof(MD_LINE));
4597 if(line == NULL)
4598 return -1;
4599
4600 line->beg = analysis->beg;
4601 line->end = analysis->end;
4602 }
4603 ctx->current_block->n_lines++;
4604
4605 return 0;
4606}
4607
4608static int
4609md_push_container_bytes(MD_CTX* ctx, MD_BLOCKTYPE type, unsigned start,
4610 unsigned data, unsigned flags)
4611{
4612 MD_BLOCK* block;
4613 int ret = 0;
4614
4615 MD_CHECK(md_end_current_block(ctx));
4616
4617 block = (MD_BLOCK*) md_push_block_bytes(ctx, sizeof(MD_BLOCK));
4618 if(block == NULL)
4619 return -1;
4620
4621 block->type = type;
4622 block->flags = flags;
4623 block->data = data;
4624 block->n_lines = start;
4625
4626abort:
4627 return ret;
4628}
4629
4630static int
4631md_is_hr_line(MD_CTX* ctx, OFF beg, OFF* p_end, OFF* p_killer)
4632{
4633 OFF off = beg + 1;
4634 int n = 1;
4635
4636 while(off < ctx->size && (CH(off) == CH(beg) || CH(off) == _T(' ') || CH(off) == _T('\t'))) {
4637 if(CH(off) == CH(beg))
4638 n++;
4639 off++;
4640 }
4641
4642 if(n < 3) {
4643 *p_killer = off;
4644 return FALSE;
4645 }
4646
4647 if(off < ctx->size && !ISNEWLINE(off)) {
4648 *p_killer = off;
4649 return FALSE;
4650 }
4651
4652 *p_end = off;
4653 return TRUE;
4654}
4655
4656static int
4657md_is_atxheader_line(MD_CTX* ctx, OFF beg, OFF* p_beg, OFF* p_end, unsigned* p_level)
4658{
4659 int n;
4660 OFF off = beg + 1;
4661
4662 while(off < ctx->size && CH(off) == _T('#') && off - beg < 7)
4663 off++;
4664 n = off - beg;
4665
4666 if(n > 6)
4667 return FALSE;
4668 *p_level = n;
4669
4670 if(!(ctx->parser.flags & MD_FLAG_PERMISSIVEATXHEADERS) && off < ctx->size &&
4671 !ISBLANK(off) && !ISNEWLINE(off))
4672 return FALSE;
4673
4674 while(off < ctx->size && ISBLANK(off))
4675 off++;
4676 *p_beg = off;
4677 *p_end = off;
4678 return TRUE;
4679}
4680
4681static int
4682md_is_setext_underline(MD_CTX* ctx, OFF beg, OFF* p_end, unsigned* p_level)
4683{
4684 OFF off = beg + 1;
4685
4686 while(off < ctx->size && CH(off) == CH(beg))
4687 off++;
4688
4689 while(off < ctx->size && ISBLANK(off))
4690 off++;
4691
4692 if(off < ctx->size && !ISNEWLINE(off))
4693 return FALSE;
4694
4695 *p_level = (CH(beg) == _T('=') ? 1 : 2);
4696 *p_end = off;
4697 return TRUE;
4698}
4699
4700static int
4701md_is_table_underline(MD_CTX* ctx, OFF beg, OFF* p_end, unsigned* p_col_count)
4702{
4703 OFF off = beg;
4704 int found_pipe = FALSE;
4705 unsigned col_count = 0;
4706
4707 if(off < ctx->size && CH(off) == _T('|')) {
4708 found_pipe = TRUE;
4709 off++;
4710 while(off < ctx->size && ISWHITESPACE(off))
4711 off++;
4712 }
4713
4714 while(1) {
4715 int delimited = FALSE;
4716
4717 if(off < ctx->size && CH(off) == _T(':'))
4718 off++;
4719 if(off >= ctx->size || CH(off) != _T('-'))
4720 return FALSE;
4721 while(off < ctx->size && CH(off) == _T('-'))
4722 off++;
4723 if(off < ctx->size && CH(off) == _T(':'))
4724 off++;
4725
4726 col_count++;
4727 if(col_count > TABLE_MAXCOLCOUNT) {
4728 MD_LOG("Suppressing table (column_count >" STRINGIZE(TABLE_MAXCOLCOUNT) ")");
4729 return FALSE;
4730 }
4731
4732 while(off < ctx->size && ISWHITESPACE(off))
4733 off++;
4734 if(off < ctx->size && CH(off) == _T('|')) {
4735 delimited = TRUE;
4736 found_pipe = TRUE;
4737 off++;
4738 while(off < ctx->size && ISWHITESPACE(off))
4739 off++;
4740 }
4741
4742 if(off >= ctx->size || ISNEWLINE(off))
4743 break;
4744
4745 if(!delimited)
4746 return FALSE;
4747 }
4748
4749 if(!found_pipe)
4750 return FALSE;
4751
4752 *p_end = off;
4753 *p_col_count = col_count;
4754 return TRUE;
4755}
4756
4757static int
4758md_is_opening_code_fence(MD_CTX* ctx, OFF beg, OFF* p_end)
4759{
4760 OFF off = beg;
4761
4762 while(off < ctx->size && CH(off) == CH(beg))
4763 off++;
4764
4765 if(off - beg < 3)
4766 return FALSE;
4767
4768 ctx->code_fence_length = off - beg;
4769
4770 while(off < ctx->size && CH(off) == _T(' '))
4771 off++;
4772
4773 while(off < ctx->size && !ISNEWLINE(off)) {
4774
4775 if(CH(beg) == _T('`') && CH(off) == _T('`'))
4776 return FALSE;
4777 off++;
4778 }
4779
4780 *p_end = off;
4781 return TRUE;
4782}
4783
4784static int
4785md_is_closing_code_fence(MD_CTX* ctx, CHAR ch, OFF beg, OFF* p_end)
4786{
4787 OFF off = beg;
4788 int ret = FALSE;
4789
4790 while(off < ctx->size && CH(off) == ch)
4791 off++;
4792 if(off - beg < ctx->code_fence_length)
4793 goto out;
4794
4795 while(off < ctx->size && CH(off) == _T(' '))
4796 off++;
4797
4798 if(off < ctx->size && !ISNEWLINE(off))
4799 goto out;
4800
4801 ret = TRUE;
4802
4803out:
4804
4805 *p_end = off;
4806 return ret;
4807}
4808
4809typedef struct TAG_tag TAG;
4810struct TAG_tag {
4811 const CHAR* name;
4812 unsigned len : 8;
4813};
4814
4815#ifdef X
4816 #undef X
4817#endif
4818#define X(name) { _T(name), (sizeof(name)-1) / sizeof(CHAR) }
4819#define Xend { NULL, 0 }
4820
4821static const TAG t1[] = { X("pre"), X("script"), X("style"), X("textarea"), Xend };
4822
4823static const TAG a6[] = { X("address"), X("article"), X("aside"), Xend };
4824static const TAG b6[] = { X("base"), X("basefont"), X("blockquote"), X("body"), Xend };
4825static const TAG c6[] = { X("caption"), X("center"), X("col"), X("colgroup"), Xend };
4826static const TAG d6[] = { X("dd"), X("details"), X("dialog"), X("dir"),
4827 X("div"), X("dl"), X("dt"), Xend };
4828static const TAG f6[] = { X("fieldset"), X("figcaption"), X("figure"), X("footer"),
4829 X("form"), X("frame"), X("frameset"), Xend };
4830static const TAG h6[] = { X("h1"), X("h2"), X("h3"), X("h4"), X("h5"), X("h6"),
4831 X("head"), X("header"), X("hr"), X("html"), Xend };
4832static const TAG i6[] = { X("iframe"), Xend };
4833static const TAG l6[] = { X("legend"), X("li"), X("link"), Xend };
4834static const TAG m6[] = { X("main"), X("menu"), X("menuitem"), Xend };
4835static const TAG n6[] = { X("nav"), X("noframes"), Xend };
4836static const TAG o6[] = { X("ol"), X("optgroup"), X("option"), Xend };
4837static const TAG p6[] = { X("p"), X("param"), Xend };
4838static const TAG s6[] = { X("search"), X("section"), X("summary"), Xend };
4839static const TAG t6[] = { X("table"), X("tbody"), X("td"), X("tfoot"), X("th"),
4840 X("thead"), X("title"), X("tr"), X("track"), Xend };
4841static const TAG u6[] = { X("ul"), Xend };
4842static const TAG xx[] = { Xend };
4843
4844#undef X
4845#undef Xend
4846
4847static int
4848md_is_html_block_start_condition(MD_CTX* ctx, OFF beg)
4849{
4850
4851 static const TAG* map6[26] = {
4852 a6, b6, c6, d6, xx, f6, xx, h6, i6, xx, xx, l6, m6,
4853 n6, o6, p6, xx, xx, s6, t6, u6, xx, xx, xx, xx, xx
4854 };
4855 OFF off = beg + 1;
4856 int i;
4857
4858 for(i = 0; t1[i].name != NULL; i++) {
4859 if(off + t1[i].len <= ctx->size) {
4860 if(md_ascii_case_eq(STR(off), t1[i].name, t1[i].len))
4861 return 1;
4862 }
4863 }
4864
4865 if(off + 3 < ctx->size && CH(off) == _T('!') && CH(off+1) == _T('-') && CH(off+2) == _T('-'))
4866 return 2;
4867
4868 if(off < ctx->size && CH(off) == _T('?'))
4869 return 3;
4870
4871 if(off < ctx->size && CH(off) == _T('!')) {
4872
4873 if(off + 1 < ctx->size && ISASCII(off+1))
4874 return 4;
4875
4876 if(off + 8 < ctx->size) {
4877 if(md_ascii_eq(STR(off), _T("![CDATA["), 8))
4878 return 5;
4879 }
4880 }
4881
4882 if(off + 1 < ctx->size && (ISALPHA(off) || (CH(off) == _T('/') && ISALPHA(off+1)))) {
4883 int slot;
4884 const TAG* tags;
4885
4886 if(CH(off) == _T('/'))
4887 off++;
4888
4889 slot = (ISUPPER(off) ? CH(off) - 'A' : CH(off) - 'a');
4890 tags = map6[slot];
4891
4892 for(i = 0; tags[i].name != NULL; i++) {
4893 if(off + tags[i].len <= ctx->size) {
4894 if(md_ascii_case_eq(STR(off), tags[i].name, tags[i].len)) {
4895 OFF tmp = off + tags[i].len;
4896 if(tmp >= ctx->size)
4897 return 6;
4898 if(ISBLANK(tmp) || ISNEWLINE(tmp) || CH(tmp) == _T('>'))
4899 return 6;
4900 if(tmp+1 < ctx->size && CH(tmp) == _T('/') && CH(tmp+1) == _T('>'))
4901 return 6;
4902 break;
4903 }
4904 }
4905 }
4906 }
4907
4908 if(off + 1 < ctx->size) {
4909 OFF end;
4910
4911 if(md_is_html_tag(ctx, NULL, 0, beg, ctx->size, &end)) {
4912
4913 while(end < ctx->size && ISWHITESPACE(end))
4914 end++;
4915 if(end >= ctx->size || ISNEWLINE(end))
4916 return 7;
4917 }
4918 }
4919
4920 return FALSE;
4921}
4922
4923static int
4924md_line_contains(MD_CTX* ctx, OFF beg, const CHAR* what, SZ what_len, OFF* p_end)
4925{
4926 OFF i;
4927 for(i = beg; i + what_len < ctx->size; i++) {
4928 if(ISNEWLINE(i))
4929 break;
4930 if(memcmp(STR(i), what, what_len * sizeof(CHAR)) == 0) {
4931 *p_end = i + what_len;
4932 return TRUE;
4933 }
4934 }
4935
4936 *p_end = i;
4937 return FALSE;
4938}
4939
4940static int
4941md_is_html_block_end_condition(MD_CTX* ctx, OFF beg, OFF* p_end)
4942{
4943 switch(ctx->html_block_type) {
4944 case 1:
4945 {
4946 OFF off = beg;
4947 int i;
4948
4949 while(off+1 < ctx->size && !ISNEWLINE(off)) {
4950 if(CH(off) == _T('<') && CH(off+1) == _T('/')) {
4951 for(i = 0; t1[i].name != NULL; i++) {
4952 if(off + 2 + t1[i].len < ctx->size) {
4953 if(md_ascii_case_eq(STR(off+2), t1[i].name, t1[i].len) &&
4954 CH(off+2+t1[i].len) == _T('>'))
4955 {
4956 *p_end = off+2+t1[i].len+1;
4957 return TRUE;
4958 }
4959 }
4960 }
4961 }
4962 off++;
4963 }
4964 *p_end = off;
4965 return FALSE;
4966 }
4967
4968 case 2:
4969 return (md_line_contains(ctx, beg, _T("-->"), 3, p_end) ? 2 : FALSE);
4970
4971 case 3:
4972 return (md_line_contains(ctx, beg, _T("?>"), 2, p_end) ? 3 : FALSE);
4973
4974 case 4:
4975 return (md_line_contains(ctx, beg, _T(">"), 1, p_end) ? 4 : FALSE);
4976
4977 case 5:
4978 return (md_line_contains(ctx, beg, _T("]]>"), 3, p_end) ? 5 : FALSE);
4979
4980 case 6:
4981 case 7:
4982 if(beg >= ctx->size || ISNEWLINE(beg)) {
4983
4984 *p_end = beg;
4985 return ctx->html_block_type;
4986 }
4987 return FALSE;
4988
4989 default:
4990 MD_UNREACHABLE();
4991 }
4992 return FALSE;
4993}
4994
4995static int
4996md_is_container_compatible(const MD_CONTAINER* pivot, const MD_CONTAINER* container)
4997{
4998
4999 if(container->ch == _T('>'))
5000 return FALSE;
5001
5002 if(container->ch != pivot->ch)
5003 return FALSE;
5004 if(container->mark_indent > pivot->contents_indent)
5005 return FALSE;
5006
5007 return TRUE;
5008}
5009
5010static int
5011md_push_container(MD_CTX* ctx, const MD_CONTAINER* container)
5012{
5013 if(ctx->n_containers >= ctx->alloc_containers) {
5014 MD_CONTAINER* new_containers;
5015
5016 ctx->alloc_containers = (ctx->alloc_containers > 0
5017 ? ctx->alloc_containers + ctx->alloc_containers / 2
5018 : 16);
5019 new_containers = realloc(ctx->containers, ctx->alloc_containers * sizeof(MD_CONTAINER));
5020 if(new_containers == NULL) {
5021 MD_LOG("realloc() failed.");
5022 return -1;
5023 }
5024
5025 ctx->containers = new_containers;
5026 }
5027
5028 memcpy(&ctx->containers[ctx->n_containers++], container, sizeof(MD_CONTAINER));
5029 return 0;
5030}
5031
5032static int
5033md_enter_child_containers(MD_CTX* ctx, int n_children)
5034{
5035 int i;
5036 int ret = 0;
5037
5038 for(i = ctx->n_containers - n_children; i < ctx->n_containers; i++) {
5039 MD_CONTAINER* c = &ctx->containers[i];
5040 int is_ordered_list = FALSE;
5041
5042 switch(c->ch) {
5043 case _T(')'):
5044 case _T('.'):
5045 is_ordered_list = TRUE;
5046 MD_FALLTHROUGH();
5047
5048 case _T('-'):
5049 case _T('+'):
5050 case _T('*'):
5051
5052 md_end_current_block(ctx);
5053 c->block_byte_off = ctx->n_block_bytes;
5054
5055 MD_CHECK(md_push_container_bytes(ctx,
5056 (is_ordered_list ? MD_BLOCK_OL : MD_BLOCK_UL),
5057 c->start, c->ch, MD_BLOCK_CONTAINER_OPENER));
5058 MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
5059 c->task_mark_off,
5060 (c->is_task ? CH(c->task_mark_off) : 0),
5061 MD_BLOCK_CONTAINER_OPENER));
5062 break;
5063
5064 case _T('>'):
5065 MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_QUOTE, 0, 0, MD_BLOCK_CONTAINER_OPENER));
5066 break;
5067
5068 default:
5069 MD_UNREACHABLE();
5070 break;
5071 }
5072 }
5073
5074abort:
5075 return ret;
5076}
5077
5078static int
5079md_leave_child_containers(MD_CTX* ctx, int n_keep)
5080{
5081 int ret = 0;
5082
5083 while(ctx->n_containers > n_keep) {
5084 MD_CONTAINER* c = &ctx->containers[ctx->n_containers-1];
5085 int is_ordered_list = FALSE;
5086
5087 switch(c->ch) {
5088 case _T(')'):
5089 case _T('.'):
5090 is_ordered_list = TRUE;
5091 MD_FALLTHROUGH();
5092
5093 case _T('-'):
5094 case _T('+'):
5095 case _T('*'):
5096 MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
5097 c->task_mark_off, (c->is_task ? CH(c->task_mark_off) : 0),
5098 MD_BLOCK_CONTAINER_CLOSER));
5099 MD_CHECK(md_push_container_bytes(ctx,
5100 (is_ordered_list ? MD_BLOCK_OL : MD_BLOCK_UL), 0,
5101 c->ch, MD_BLOCK_CONTAINER_CLOSER));
5102 break;
5103
5104 case _T('>'):
5105 MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_QUOTE, 0,
5106 0, MD_BLOCK_CONTAINER_CLOSER));
5107 break;
5108
5109 default:
5110 MD_UNREACHABLE();
5111 break;
5112 }
5113
5114 ctx->n_containers--;
5115 }
5116
5117abort:
5118 return ret;
5119}
5120
5121static int
5122md_is_container_mark(MD_CTX* ctx, unsigned indent, OFF beg, OFF* p_end, MD_CONTAINER* p_container)
5123{
5124 OFF off = beg;
5125 OFF max_end;
5126
5127 if(off >= ctx->size || indent >= ctx->code_indent_offset)
5128 return FALSE;
5129
5130 if(CH(off) == _T('>')) {
5131 off++;
5132 p_container->ch = _T('>');
5133 p_container->is_loose = FALSE;
5134 p_container->is_task = FALSE;
5135 p_container->mark_indent = indent;
5136 p_container->contents_indent = indent + 1;
5137 *p_end = off;
5138 return TRUE;
5139 }
5140
5141 if(ISANYOF(off, _T("-+*")) && (off+1 >= ctx->size || ISBLANK(off+1) || ISNEWLINE(off+1))) {
5142 p_container->ch = CH(off);
5143 p_container->is_loose = FALSE;
5144 p_container->is_task = FALSE;
5145 p_container->mark_indent = indent;
5146 p_container->contents_indent = indent + 1;
5147 *p_end = off+1;
5148 return TRUE;
5149 }
5150
5151 max_end = off + 9;
5152 if(max_end > ctx->size)
5153 max_end = ctx->size;
5154 p_container->start = 0;
5155 while(off < max_end && ISDIGIT(off)) {
5156 p_container->start = p_container->start * 10 + CH(off) - _T('0');
5157 off++;
5158 }
5159 if(off > beg &&
5160 off < ctx->size &&
5161 (CH(off) == _T('.') || CH(off) == _T(')')) &&
5162 (off+1 >= ctx->size || ISBLANK(off+1) || ISNEWLINE(off+1)))
5163 {
5164 p_container->ch = CH(off);
5165 p_container->is_loose = FALSE;
5166 p_container->is_task = FALSE;
5167 p_container->mark_indent = indent;
5168 p_container->contents_indent = indent + off - beg + 1;
5169 *p_end = off+1;
5170 return TRUE;
5171 }
5172
5173 return FALSE;
5174}
5175
5176static unsigned
5177md_line_indentation(MD_CTX* ctx, unsigned total_indent, OFF beg, OFF* p_end)
5178{
5179 OFF off = beg;
5180 unsigned indent = total_indent;
5181
5182 while(off < ctx->size && ISBLANK(off)) {
5183 if(CH(off) == _T('\t'))
5184 indent = (indent + 4) & ~3;
5185 else
5186 indent++;
5187 off++;
5188 }
5189
5190 *p_end = off;
5191 return indent - total_indent;
5192}
5193
5194static const MD_LINE_ANALYSIS md_dummy_blank_line = { MD_LINE_BLANK, 0, 0, 0, 0, 0 };
5195
5196static int
5197md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end,
5198 const MD_LINE_ANALYSIS* pivot_line, MD_LINE_ANALYSIS* line)
5199{
5200 unsigned total_indent = 0;
5201 int n_parents = 0;
5202 int n_brothers = 0;
5203 int n_children = 0;
5204 MD_CONTAINER container = { 0 };
5205 int prev_line_has_list_loosening_effect = ctx->last_line_has_list_loosening_effect;
5206 OFF off = beg;
5207 OFF hr_killer = 0;
5208 int ret = 0;
5209
5210 line->indent = md_line_indentation(ctx, total_indent, off, &off);
5211 total_indent += line->indent;
5212 line->beg = off;
5213 line->enforce_new_block = FALSE;
5214
5215 while(n_parents < ctx->n_containers) {
5216 MD_CONTAINER* c = &ctx->containers[n_parents];
5217
5218 if(c->ch == _T('>') && line->indent < ctx->code_indent_offset &&
5219 off < ctx->size && CH(off) == _T('>'))
5220 {
5221
5222 off++;
5223 total_indent++;
5224 line->indent = md_line_indentation(ctx, total_indent, off, &off);
5225 total_indent += line->indent;
5226
5227 if(line->indent > 0)
5228 line->indent--;
5229
5230 line->beg = off;
5231
5232 } else if(c->ch != _T('>') && line->indent >= c->contents_indent) {
5233
5234 line->indent -= c->contents_indent;
5235 } else {
5236 break;
5237 }
5238
5239 n_parents++;
5240 }
5241
5242 if(off >= ctx->size || ISNEWLINE(off)) {
5243
5244 if(n_brothers + n_children == 0) {
5245 while(n_parents < ctx->n_containers && ctx->containers[n_parents].ch != _T('>'))
5246 n_parents++;
5247 }
5248 }
5249
5250 while(TRUE) {
5251
5252 if(pivot_line->type == MD_LINE_FENCEDCODE) {
5253 line->beg = off;
5254
5255 if(line->indent < ctx->code_indent_offset) {
5256 if(md_is_closing_code_fence(ctx, CH(pivot_line->beg), off, &off)) {
5257 line->type = MD_LINE_BLANK;
5258 ctx->last_line_has_list_loosening_effect = FALSE;
5259 break;
5260 }
5261 }
5262
5263 if(n_parents == ctx->n_containers) {
5264 if(line->indent > pivot_line->indent)
5265 line->indent -= pivot_line->indent;
5266 else
5267 line->indent = 0;
5268
5269 line->type = MD_LINE_FENCEDCODE;
5270 break;
5271 }
5272 }
5273
5274 if(pivot_line->type == MD_LINE_HTML && ctx->html_block_type > 0) {
5275 if(n_parents < ctx->n_containers) {
5276
5277 ctx->html_block_type = 0;
5278 } else {
5279 int html_block_type;
5280
5281 html_block_type = md_is_html_block_end_condition(ctx, off, &off);
5282 if(html_block_type > 0) {
5283 MD_ASSERT(html_block_type == ctx->html_block_type);
5284
5285 ctx->html_block_type = 0;
5286
5287 if(html_block_type == 6 || html_block_type == 7) {
5288 line->type = MD_LINE_BLANK;
5289 line->indent = 0;
5290 break;
5291 }
5292 }
5293
5294 line->type = MD_LINE_HTML;
5295 n_parents = ctx->n_containers;
5296 break;
5297 }
5298 }
5299
5300 if(off >= ctx->size || ISNEWLINE(off)) {
5301 if(pivot_line->type == MD_LINE_INDENTEDCODE && n_parents == ctx->n_containers) {
5302 line->type = MD_LINE_INDENTEDCODE;
5303 if(line->indent > ctx->code_indent_offset)
5304 line->indent -= ctx->code_indent_offset;
5305 else
5306 line->indent = 0;
5307 ctx->last_line_has_list_loosening_effect = FALSE;
5308 } else {
5309 line->type = MD_LINE_BLANK;
5310 ctx->last_line_has_list_loosening_effect = (n_parents > 0 &&
5311 n_brothers + n_children == 0 &&
5312 ctx->containers[n_parents-1].ch != _T('>'));
5313
5314 #if 1
5315
5316 if(n_parents > 0 && ctx->containers[n_parents-1].ch != _T('>') &&
5317 n_brothers + n_children == 0 && ctx->current_block == NULL &&
5318 ctx->n_block_bytes > (int) sizeof(MD_BLOCK))
5319 {
5320 MD_BLOCK* top_block = (MD_BLOCK*) ((char*)ctx->block_bytes + ctx->n_block_bytes - sizeof(MD_BLOCK));
5321 if(top_block->type == MD_BLOCK_LI)
5322 ctx->last_list_item_starts_with_two_blank_lines = TRUE;
5323 }
5324 #endif
5325 }
5326 break;
5327 } else {
5328 #if 1
5329
5330 if(ctx->last_list_item_starts_with_two_blank_lines) {
5331 if(n_parents > 0 && n_parents == ctx->n_containers &&
5332 ctx->containers[n_parents-1].ch != _T('>') &&
5333 n_brothers + n_children == 0 && ctx->current_block == NULL &&
5334 ctx->n_block_bytes > (int) sizeof(MD_BLOCK))
5335 {
5336 MD_BLOCK* top_block = (MD_BLOCK*) ((char*)ctx->block_bytes + ctx->n_block_bytes - sizeof(MD_BLOCK));
5337 if(top_block->type == MD_BLOCK_LI) {
5338 n_parents--;
5339
5340 line->indent = total_indent;
5341 if(n_parents > 0)
5342 line->indent -= MIN(line->indent, ctx->containers[n_parents-1].contents_indent);
5343 }
5344 }
5345
5346 ctx->last_list_item_starts_with_two_blank_lines = FALSE;
5347 }
5348 #endif
5349 ctx->last_line_has_list_loosening_effect = FALSE;
5350 }
5351
5352 if(line->indent < ctx->code_indent_offset && pivot_line->type == MD_LINE_TEXT
5353 && off < ctx->size && ISANYOF2(off, _T('='), _T('-'))
5354 && (n_parents == ctx->n_containers))
5355 {
5356 unsigned level;
5357
5358 if(md_is_setext_underline(ctx, off, &off, &level)) {
5359 line->type = MD_LINE_SETEXTUNDERLINE;
5360 line->data = level;
5361 break;
5362 }
5363 }
5364
5365 if(line->indent < ctx->code_indent_offset
5366 && off < ctx->size && off >= hr_killer
5367 && ISANYOF(off, _T("-_*")))
5368 {
5369 if(md_is_hr_line(ctx, off, &off, &hr_killer)) {
5370 line->type = MD_LINE_HR;
5371 break;
5372 }
5373 }
5374
5375 if(n_parents < ctx->n_containers && n_brothers + n_children == 0) {
5376 OFF tmp;
5377
5378 if(md_is_container_mark(ctx, line->indent, off, &tmp, &container) &&
5379 md_is_container_compatible(&ctx->containers[n_parents], &container))
5380 {
5381 pivot_line = &md_dummy_blank_line;
5382
5383 off = tmp;
5384
5385 total_indent += container.contents_indent - container.mark_indent;
5386 line->indent = md_line_indentation(ctx, total_indent, off, &off);
5387 total_indent += line->indent;
5388 line->beg = off;
5389
5390 if(off >= ctx->size || ISNEWLINE(off)) {
5391 container.contents_indent++;
5392 } else if(line->indent <= ctx->code_indent_offset) {
5393 container.contents_indent += line->indent;
5394 line->indent = 0;
5395 } else {
5396 container.contents_indent += 1;
5397 line->indent--;
5398 }
5399
5400 ctx->containers[n_parents].mark_indent = container.mark_indent;
5401 ctx->containers[n_parents].contents_indent = container.contents_indent;
5402
5403 n_brothers++;
5404 continue;
5405 }
5406 }
5407
5408 if(line->indent >= ctx->code_indent_offset && (pivot_line->type != MD_LINE_TEXT)) {
5409 line->type = MD_LINE_INDENTEDCODE;
5410 line->indent -= ctx->code_indent_offset;
5411 line->data = 0;
5412 break;
5413 }
5414
5415 if(line->indent < ctx->code_indent_offset &&
5416 md_is_container_mark(ctx, line->indent, off, &off, &container))
5417 {
5418 if(pivot_line->type == MD_LINE_TEXT && n_parents == ctx->n_containers &&
5419 (off >= ctx->size || ISNEWLINE(off)) && container.ch != _T('>'))
5420 {
5421
5422 } else if(pivot_line->type == MD_LINE_TEXT && n_parents == ctx->n_containers &&
5423 ISANYOF2_(container.ch, _T('.'), _T(')')) && container.start != 1)
5424 {
5425
5426 } else {
5427 total_indent += container.contents_indent - container.mark_indent;
5428 line->indent = md_line_indentation(ctx, total_indent, off, &off);
5429 total_indent += line->indent;
5430
5431 line->beg = off;
5432 line->data = container.ch;
5433
5434 if(off >= ctx->size || ISNEWLINE(off)) {
5435 container.contents_indent++;
5436 } else if(line->indent <= ctx->code_indent_offset) {
5437 container.contents_indent += line->indent;
5438 line->indent = 0;
5439 } else {
5440 container.contents_indent += 1;
5441 line->indent--;
5442 }
5443
5444 if(n_brothers + n_children == 0)
5445 pivot_line = &md_dummy_blank_line;
5446
5447 if(n_children == 0)
5448 MD_CHECK(md_leave_child_containers(ctx, n_parents + n_brothers));
5449
5450 n_children++;
5451 MD_CHECK(md_push_container(ctx, &container));
5452 continue;
5453 }
5454 }
5455
5456 if(pivot_line->type == MD_LINE_TABLE && n_parents == ctx->n_containers) {
5457 line->type = MD_LINE_TABLE;
5458 break;
5459 }
5460
5461 if(line->indent < ctx->code_indent_offset &&
5462 off < ctx->size && CH(off) == _T('#'))
5463 {
5464 unsigned level;
5465
5466 if(md_is_atxheader_line(ctx, off, &line->beg, &off, &level)) {
5467 line->type = MD_LINE_ATXHEADER;
5468 line->data = level;
5469 break;
5470 }
5471 }
5472
5473 if(line->indent < ctx->code_indent_offset &&
5474 off < ctx->size && ISANYOF2(off, _T('`'), _T('~')))
5475 {
5476 if(md_is_opening_code_fence(ctx, off, &off)) {
5477 line->type = MD_LINE_FENCEDCODE;
5478 line->data = 1;
5479 line->enforce_new_block = TRUE;
5480 break;
5481 }
5482 }
5483
5484 if(off < ctx->size && CH(off) == _T('<')
5485 && !(ctx->parser.flags & MD_FLAG_NOHTMLBLOCKS))
5486 {
5487 ctx->html_block_type = md_is_html_block_start_condition(ctx, off);
5488
5489 if(ctx->html_block_type == 7 && pivot_line->type == MD_LINE_TEXT)
5490 ctx->html_block_type = 0;
5491
5492 if(ctx->html_block_type > 0) {
5493
5494 if(md_is_html_block_end_condition(ctx, off, &off) == ctx->html_block_type) {
5495
5496 ctx->html_block_type = 0;
5497 }
5498
5499 line->enforce_new_block = TRUE;
5500 line->type = MD_LINE_HTML;
5501 break;
5502 }
5503 }
5504
5505 if((ctx->parser.flags & MD_FLAG_TABLES) && pivot_line->type == MD_LINE_TEXT
5506 && off < ctx->size && ISANYOF3(off, _T('|'), _T('-'), _T(':'))
5507 && n_parents == ctx->n_containers)
5508 {
5509 unsigned col_count;
5510
5511 if(ctx->current_block != NULL && ctx->current_block->n_lines == 1 &&
5512 md_is_table_underline(ctx, off, &off, &col_count))
5513 {
5514 line->data = col_count;
5515 line->type = MD_LINE_TABLEUNDERLINE;
5516 break;
5517 }
5518 }
5519
5520 line->type = MD_LINE_TEXT;
5521 if(pivot_line->type == MD_LINE_TEXT && n_brothers + n_children == 0) {
5522
5523 n_parents = ctx->n_containers;
5524 }
5525
5526 if((ctx->parser.flags & MD_FLAG_TASKLISTS) && n_brothers + n_children > 0 &&
5527 ISANYOF_(ctx->containers[ctx->n_containers-1].ch, _T("-+*.)")))
5528 {
5529 OFF tmp = off;
5530
5531 while(tmp < ctx->size && tmp < off + 3 && ISBLANK(tmp))
5532 tmp++;
5533 if(tmp + 2 < ctx->size && CH(tmp) == _T('[') &&
5534 ISANYOF(tmp+1, _T("xX ")) && CH(tmp+2) == _T(']') &&
5535 (tmp + 3 == ctx->size || ISBLANK(tmp+3) || ISNEWLINE(tmp+3)))
5536 {
5537 MD_CONTAINER* task_container = (n_children > 0 ? &ctx->containers[ctx->n_containers-1] : &container);
5538 task_container->is_task = TRUE;
5539 task_container->task_mark_off = tmp + 1;
5540 off = tmp + 3;
5541 while(off < ctx->size && ISWHITESPACE(off))
5542 off++;
5543 line->beg = off;
5544 }
5545 }
5546
5547 break;
5548 }
5549
5550#if defined __linux__ && !defined MD4C_USE_UTF16
5551
5552 if(ctx->doc_ends_with_newline && off < ctx->size) {
5553 while(TRUE) {
5554 off += (OFF) strcspn(STR(off), "\r\n");
5555
5556 if(CH(off) == _T('\0'))
5557 off++;
5558 else
5559 break;
5560 }
5561 } else
5562#endif
5563 {
5564
5565 while(off + 3 < ctx->size && !ISNEWLINE(off+0) && !ISNEWLINE(off+1)
5566 && !ISNEWLINE(off+2) && !ISNEWLINE(off+3))
5567 off += 4;
5568 while(off < ctx->size && !ISNEWLINE(off))
5569 off++;
5570 }
5571
5572 line->end = off;
5573
5574 if(line->type == MD_LINE_ATXHEADER) {
5575 OFF tmp = line->end;
5576 while(tmp > line->beg && ISBLANK(tmp-1))
5577 tmp--;
5578 while(tmp > line->beg && CH(tmp-1) == _T('#'))
5579 tmp--;
5580 if(tmp == line->beg || ISBLANK(tmp-1) || (ctx->parser.flags & MD_FLAG_PERMISSIVEATXHEADERS))
5581 line->end = tmp;
5582 }
5583
5584 if(line->type != MD_LINE_INDENTEDCODE && line->type != MD_LINE_FENCEDCODE && line->type != MD_LINE_HTML) {
5585 while(line->end > line->beg && ISBLANK(line->end-1))
5586 line->end--;
5587 }
5588
5589 if(off < ctx->size && CH(off) == _T('\r'))
5590 off++;
5591 if(off < ctx->size && CH(off) == _T('\n'))
5592 off++;
5593
5594 *p_end = off;
5595
5596 if(prev_line_has_list_loosening_effect && line->type != MD_LINE_BLANK && n_parents + n_brothers > 0) {
5597 MD_CONTAINER* c = &ctx->containers[n_parents + n_brothers - 1];
5598 if(c->ch != _T('>')) {
5599 MD_BLOCK* block = (MD_BLOCK*) (((char*)ctx->block_bytes) + c->block_byte_off);
5600 block->flags |= MD_BLOCK_LOOSE_LIST;
5601 }
5602 }
5603
5604 if(n_children == 0 && n_parents + n_brothers < ctx->n_containers)
5605 MD_CHECK(md_leave_child_containers(ctx, n_parents + n_brothers));
5606
5607 if(n_brothers > 0) {
5608 MD_ASSERT(n_brothers == 1);
5609 MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
5610 ctx->containers[n_parents].task_mark_off,
5611 (ctx->containers[n_parents].is_task ? CH(ctx->containers[n_parents].task_mark_off) : 0),
5612 MD_BLOCK_CONTAINER_CLOSER));
5613 MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
5614 container.task_mark_off,
5615 (container.is_task ? CH(container.task_mark_off) : 0),
5616 MD_BLOCK_CONTAINER_OPENER));
5617 ctx->containers[n_parents].is_task = container.is_task;
5618 ctx->containers[n_parents].task_mark_off = container.task_mark_off;
5619 }
5620
5621 if(n_children > 0)
5622 MD_CHECK(md_enter_child_containers(ctx, n_children));
5623
5624abort:
5625 return ret;
5626}
5627
5628static int
5629md_process_line(MD_CTX* ctx, const MD_LINE_ANALYSIS** p_pivot_line, MD_LINE_ANALYSIS* line)
5630{
5631 const MD_LINE_ANALYSIS* pivot_line = *p_pivot_line;
5632 int ret = 0;
5633
5634 if(line->type == MD_LINE_BLANK) {
5635 MD_CHECK(md_end_current_block(ctx));
5636 *p_pivot_line = &md_dummy_blank_line;
5637 return 0;
5638 }
5639
5640 if(line->enforce_new_block)
5641 MD_CHECK(md_end_current_block(ctx));
5642
5643 if(line->type == MD_LINE_HR || line->type == MD_LINE_ATXHEADER) {
5644 MD_CHECK(md_end_current_block(ctx));
5645
5646 MD_CHECK(md_start_new_block(ctx, line));
5647 MD_CHECK(md_add_line_into_current_block(ctx, line));
5648 MD_CHECK(md_end_current_block(ctx));
5649 *p_pivot_line = &md_dummy_blank_line;
5650 return 0;
5651 }
5652
5653 if(line->type == MD_LINE_SETEXTUNDERLINE) {
5654 MD_ASSERT(ctx->current_block != NULL);
5655 ctx->current_block->type = MD_BLOCK_H;
5656 ctx->current_block->data = line->data;
5657 ctx->current_block->flags |= MD_BLOCK_SETEXT_HEADER;
5658 MD_CHECK(md_add_line_into_current_block(ctx, line));
5659 MD_CHECK(md_end_current_block(ctx));
5660 if(ctx->current_block == NULL) {
5661 *p_pivot_line = &md_dummy_blank_line;
5662 } else {
5663
5664 line->type = MD_LINE_TEXT;
5665 *p_pivot_line = line;
5666 }
5667 return 0;
5668 }
5669
5670 if(line->type == MD_LINE_TABLEUNDERLINE) {
5671 MD_ASSERT(ctx->current_block != NULL);
5672 MD_ASSERT(ctx->current_block->n_lines == 1);
5673 ctx->current_block->type = MD_BLOCK_TABLE;
5674 ctx->current_block->data = line->data;
5675 MD_ASSERT(pivot_line != &md_dummy_blank_line);
5676 ((MD_LINE_ANALYSIS*)pivot_line)->type = MD_LINE_TABLE;
5677 MD_CHECK(md_add_line_into_current_block(ctx, line));
5678 return 0;
5679 }
5680
5681 if(line->type != pivot_line->type)
5682 MD_CHECK(md_end_current_block(ctx));
5683
5684 if(ctx->current_block == NULL) {
5685 MD_CHECK(md_start_new_block(ctx, line));
5686 *p_pivot_line = line;
5687 }
5688
5689 MD_CHECK(md_add_line_into_current_block(ctx, line));
5690
5691abort:
5692 return ret;
5693}
5694
5695static int
5696md_process_doc(MD_CTX *ctx)
5697{
5698 const MD_LINE_ANALYSIS* pivot_line = &md_dummy_blank_line;
5699 MD_LINE_ANALYSIS line_buf[2];
5700 MD_LINE_ANALYSIS* line = &line_buf[0];
5701 OFF off = 0;
5702 int ret = 0;
5703
5704 MD_ENTER_BLOCK(MD_BLOCK_DOC, NULL);
5705
5706 while(off < ctx->size) {
5707 if(line == pivot_line)
5708 line = (line == &line_buf[0] ? &line_buf[1] : &line_buf[0]);
5709
5710 MD_CHECK(md_analyze_line(ctx, off, &off, pivot_line, line));
5711 MD_CHECK(md_process_line(ctx, &pivot_line, line));
5712 }
5713
5714 md_end_current_block(ctx);
5715
5716 MD_CHECK(md_build_ref_def_hashtable(ctx));
5717
5718 MD_CHECK(md_leave_child_containers(ctx, 0));
5719 MD_CHECK(md_process_all_blocks(ctx));
5720
5721 MD_LEAVE_BLOCK(MD_BLOCK_DOC, NULL);
5722
5723abort:
5724
5725#if 0
5726
5727 {
5728 char buffer[256];
5729 sprintf(buffer, "Alloced %u bytes for block buffer.",
5730 (unsigned)(ctx->alloc_block_bytes));
5731 MD_LOG(buffer);
5732
5733 sprintf(buffer, "Alloced %u bytes for containers buffer.",
5734 (unsigned)(ctx->alloc_containers * sizeof(MD_CONTAINER)));
5735 MD_LOG(buffer);
5736
5737 sprintf(buffer, "Alloced %u bytes for marks buffer.",
5738 (unsigned)(ctx->alloc_marks * sizeof(MD_MARK)));
5739 MD_LOG(buffer);
5740
5741 sprintf(buffer, "Alloced %u bytes for aux. buffer.",
5742 (unsigned)(ctx->alloc_buffer * sizeof(MD_CHAR)));
5743 MD_LOG(buffer);
5744 }
5745#endif
5746
5747 return ret;
5748}
5749
5750int
5751md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userdata)
5752{
5753 MD_CTX ctx;
5754 int i;
5755 int ret;
5756
5757 if(parser->abi_version != 0) {
5758 if(parser->debug_log != NULL)
5759 parser->debug_log("Unsupported abi_version.", userdata);
5760 return -1;
5761 }
5762
5763 memset(&ctx, 0, sizeof(MD_CTX));
5764 ctx.text = text;
5765 ctx.size = size;
5766 memcpy(&ctx.parser, parser, sizeof(MD_PARSER));
5767 ctx.userdata = userdata;
5768 ctx.code_indent_offset = (ctx.parser.flags & MD_FLAG_NOINDENTEDCODEBLOCKS) ? (OFF)(-1) : 4;
5769 md_build_mark_char_map(&ctx);
5770 ctx.doc_ends_with_newline = (size > 0 && ISNEWLINE_(text[size-1]));
5771 ctx.max_ref_def_output = MIN(MIN(16 * (uint64_t)size, (uint64_t)(1024 * 1024)), (uint64_t)SZ_MAX);
5772
5773 for(i = 0; i < (int) SIZEOF_ARRAY(ctx.opener_stacks); i++)
5774 ctx.opener_stacks[i].top = -1;
5775 ctx.ptr_stack.top = -1;
5776 ctx.unresolved_link_head = -1;
5777 ctx.unresolved_link_tail = -1;
5778 ctx.table_cell_boundaries_head = -1;
5779 ctx.table_cell_boundaries_tail = -1;
5780
5781 ret = md_process_doc(&ctx);
5782
5783 md_free_ref_defs(&ctx);
5784 md_free_ref_def_hashtable(&ctx);
5785 free(ctx.buffer);
5786 free(ctx.marks);
5787 free(ctx.block_bytes);
5788 free(ctx.containers);
5789
5790 return ret;
5791}