md4c.c

   1#include "md4c.h"
   2
   3#include <limits.h>
   4#include <stdint.h>
   5#include <stdio.h>
   6#include <stdlib.h>
   7#include <string.h>
   8
   9#if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199409L
  10
  11    #if defined __GNUC__
  12        #define inline __inline__
  13    #elif defined _MSC_VER
  14        #define inline __inline
  15    #else
  16        #define inline
  17    #endif
  18#endif
  19
  20#if !defined MD4C_USE_ASCII && !defined MD4C_USE_UTF8 && !defined MD4C_USE_UTF16
  21    #define MD4C_USE_UTF8
  22#endif
  23
  24#ifdef _T
  25    #undef _T
  26#endif
  27#if defined MD4C_USE_UTF16
  28    #define _T(x)           L##x
  29#else
  30    #define _T(x)           x
  31#endif
  32
  33#define SIZEOF_ARRAY(a)     (sizeof(a) / sizeof(a[0]))
  34
  35#define STRINGIZE_(x)       #x
  36#define STRINGIZE(x)        STRINGIZE_(x)
  37
  38#define MAX(a,b)            ((a) > (b) ? (a) : (b))
  39#define MIN(a,b)            ((a) < (b) ? (a) : (b))
  40
  41#ifndef TRUE
  42    #define TRUE            1
  43    #define FALSE           0
  44#endif
  45
  46#define MD_LOG(msg)                                                     \
  47    do {                                                                \
  48        if(ctx->parser.debug_log != NULL)                               \
  49            ctx->parser.debug_log((msg), ctx->userdata);                \
  50    } while(0)
  51
  52#ifdef DEBUG
  53    #define MD_ASSERT(cond)                                             \
  54            do {                                                        \
  55                if(!(cond)) {                                           \
  56                    MD_LOG(__FILE__ ":" STRINGIZE(__LINE__) ": "        \
  57                           "Assertion '" STRINGIZE(cond) "' failed.");  \
  58                    exit(1);                                            \
  59                }                                                       \
  60            } while(0)
  61
  62    #define MD_UNREACHABLE()        MD_ASSERT(1 == 0)
  63#else
  64    #ifdef __GNUC__
  65        #define MD_ASSERT(cond)     do { if(!(cond)) __builtin_unreachable(); } while(0)
  66        #define MD_UNREACHABLE()    do { __builtin_unreachable(); } while(0)
  67    #elif defined _MSC_VER  &&  _MSC_VER > 120
  68        #define MD_ASSERT(cond)     do { __assume(cond); } while(0)
  69        #define MD_UNREACHABLE()    do { __assume(0); } while(0)
  70    #else
  71        #define MD_ASSERT(cond)     do {} while(0)
  72        #define MD_UNREACHABLE()    do {} while(0)
  73    #endif
  74#endif
  75
  76#if defined __clang__ && __clang_major__ >= 12
  77    #define MD_FALLTHROUGH()        __attribute__((fallthrough))
  78#elif defined __GNUC__ && __GNUC__ >= 7
  79    #define MD_FALLTHROUGH()        __attribute__((fallthrough))
  80#else
  81    #define MD_FALLTHROUGH()        ((void)0)
  82#endif
  83
  84#define MD_UNUSED(x)                ((void)x)
  85
  86#define CODESPAN_MARK_MAXLEN    32
  87
  88#define TABLE_MAXCOLCOUNT       128
  89
  90#define CHAR    MD_CHAR
  91#define SZ      MD_SIZE
  92#define OFF     MD_OFFSET
  93
  94#define SZ_MAX      (sizeof(SZ) == 8 ? UINT64_MAX : UINT32_MAX)
  95#define OFF_MAX     (sizeof(OFF) == 8 ? UINT64_MAX : UINT32_MAX)
  96
  97typedef struct MD_MARK_tag MD_MARK;
  98typedef struct MD_BLOCK_tag MD_BLOCK;
  99typedef struct MD_CONTAINER_tag MD_CONTAINER;
 100typedef struct MD_REF_DEF_tag MD_REF_DEF;
 101
 102typedef struct MD_MARKSTACK_tag MD_MARKSTACK;
 103struct MD_MARKSTACK_tag {
 104    int top;
 105};
 106
 107typedef struct MD_CTX_tag MD_CTX;
 108struct MD_CTX_tag {
 109
 110    const CHAR* text;
 111    SZ size;
 112    MD_PARSER parser;
 113    void* userdata;
 114
 115    int doc_ends_with_newline;
 116
 117    CHAR* buffer;
 118    unsigned alloc_buffer;
 119
 120    MD_REF_DEF* ref_defs;
 121    int n_ref_defs;
 122    int alloc_ref_defs;
 123    void** ref_def_hashtable;
 124    int ref_def_hashtable_size;
 125    SZ max_ref_def_output;
 126
 127    MD_MARK* marks;
 128    int n_marks;
 129    int alloc_marks;
 130
 131#if defined MD4C_USE_UTF16
 132    char mark_char_map[128];
 133#else
 134    char mark_char_map[256];
 135#endif
 136
 137    MD_MARKSTACK opener_stacks[16];
 138#define ASTERISK_OPENERS_oo_mod3_0      (ctx->opener_stacks[0])
 139#define ASTERISK_OPENERS_oo_mod3_1      (ctx->opener_stacks[1])
 140#define ASTERISK_OPENERS_oo_mod3_2      (ctx->opener_stacks[2])
 141#define ASTERISK_OPENERS_oc_mod3_0      (ctx->opener_stacks[3])
 142#define ASTERISK_OPENERS_oc_mod3_1      (ctx->opener_stacks[4])
 143#define ASTERISK_OPENERS_oc_mod3_2      (ctx->opener_stacks[5])
 144#define UNDERSCORE_OPENERS_oo_mod3_0    (ctx->opener_stacks[6])
 145#define UNDERSCORE_OPENERS_oo_mod3_1    (ctx->opener_stacks[7])
 146#define UNDERSCORE_OPENERS_oo_mod3_2    (ctx->opener_stacks[8])
 147#define UNDERSCORE_OPENERS_oc_mod3_0    (ctx->opener_stacks[9])
 148#define UNDERSCORE_OPENERS_oc_mod3_1    (ctx->opener_stacks[10])
 149#define UNDERSCORE_OPENERS_oc_mod3_2    (ctx->opener_stacks[11])
 150#define TILDE_OPENERS_1                 (ctx->opener_stacks[12])
 151#define TILDE_OPENERS_2                 (ctx->opener_stacks[13])
 152#define BRACKET_OPENERS                 (ctx->opener_stacks[14])
 153#define DOLLAR_OPENERS                  (ctx->opener_stacks[15])
 154
 155    MD_MARKSTACK ptr_stack;
 156
 157    int n_table_cell_boundaries;
 158    int table_cell_boundaries_head;
 159    int table_cell_boundaries_tail;
 160
 161    int unresolved_link_head;
 162    int unresolved_link_tail;
 163
 164    OFF html_comment_horizon;
 165    OFF html_proc_instr_horizon;
 166    OFF html_decl_horizon;
 167    OFF html_cdata_horizon;
 168
 169    void* block_bytes;
 170    MD_BLOCK* current_block;
 171    int n_block_bytes;
 172    int alloc_block_bytes;
 173
 174    MD_CONTAINER* containers;
 175    int n_containers;
 176    int alloc_containers;
 177
 178    unsigned code_indent_offset;
 179
 180    SZ code_fence_length;
 181    int html_block_type;
 182    int last_line_has_list_loosening_effect;
 183    int last_list_item_starts_with_two_blank_lines;
 184};
 185
 186enum MD_LINETYPE_tag {
 187    MD_LINE_BLANK,
 188    MD_LINE_HR,
 189    MD_LINE_ATXHEADER,
 190    MD_LINE_SETEXTHEADER,
 191    MD_LINE_SETEXTUNDERLINE,
 192    MD_LINE_INDENTEDCODE,
 193    MD_LINE_FENCEDCODE,
 194    MD_LINE_HTML,
 195    MD_LINE_TEXT,
 196    MD_LINE_TABLE,
 197    MD_LINE_TABLEUNDERLINE
 198};
 199typedef enum MD_LINETYPE_tag MD_LINETYPE;
 200
 201typedef struct MD_LINE_ANALYSIS_tag MD_LINE_ANALYSIS;
 202struct MD_LINE_ANALYSIS_tag {
 203    MD_LINETYPE type;
 204    unsigned data;
 205    int enforce_new_block;
 206    OFF beg;
 207    OFF end;
 208    unsigned indent;
 209};
 210
 211typedef struct MD_LINE_tag MD_LINE;
 212struct MD_LINE_tag {
 213    OFF beg;
 214    OFF end;
 215};
 216
 217typedef struct MD_VERBATIMLINE_tag MD_VERBATIMLINE;
 218struct MD_VERBATIMLINE_tag {
 219    OFF beg;
 220    OFF end;
 221    OFF indent;
 222};
 223
 224#define CH(off)                 (ctx->text[(off)])
 225#define STR(off)                (ctx->text + (off))
 226
 227#define ISIN_(ch, ch_min, ch_max)       ((ch_min) <= (unsigned)(ch) && (unsigned)(ch) <= (ch_max))
 228#define ISANYOF_(ch, palette)           ((ch) != _T('\0')  &&  md_strchr((palette), (ch)) != NULL)
 229#define ISANYOF2_(ch, ch1, ch2)         ((ch) == (ch1) || (ch) == (ch2))
 230#define ISANYOF3_(ch, ch1, ch2, ch3)    ((ch) == (ch1) || (ch) == (ch2) || (ch) == (ch3))
 231#define ISASCII_(ch)                    ((unsigned)(ch) <= 127)
 232#define ISBLANK_(ch)                    (ISANYOF2_((ch), _T(' '), _T('\t')))
 233#define ISNEWLINE_(ch)                  (ISANYOF2_((ch), _T('\r'), _T('\n')))
 234#define ISWHITESPACE_(ch)               (ISBLANK_(ch) || ISANYOF2_((ch), _T('\v'), _T('\f')))
 235#define ISCNTRL_(ch)                    ((unsigned)(ch) <= 31 || (unsigned)(ch) == 127)
 236#define ISPUNCT_(ch)                    (ISIN_(ch, 33, 47) || ISIN_(ch, 58, 64) || ISIN_(ch, 91, 96) || ISIN_(ch, 123, 126))
 237#define ISUPPER_(ch)                    (ISIN_(ch, _T('A'), _T('Z')))
 238#define ISLOWER_(ch)                    (ISIN_(ch, _T('a'), _T('z')))
 239#define ISALPHA_(ch)                    (ISUPPER_(ch) || ISLOWER_(ch))
 240#define ISDIGIT_(ch)                    (ISIN_(ch, _T('0'), _T('9')))
 241#define ISXDIGIT_(ch)                   (ISDIGIT_(ch) || ISIN_(ch, _T('A'), _T('F')) || ISIN_(ch, _T('a'), _T('f')))
 242#define ISALNUM_(ch)                    (ISALPHA_(ch) || ISDIGIT_(ch))
 243
 244#define ISANYOF(off, palette)           ISANYOF_(CH(off), (palette))
 245#define ISANYOF2(off, ch1, ch2)         ISANYOF2_(CH(off), (ch1), (ch2))
 246#define ISANYOF3(off, ch1, ch2, ch3)    ISANYOF3_(CH(off), (ch1), (ch2), (ch3))
 247#define ISASCII(off)                    ISASCII_(CH(off))
 248#define ISBLANK(off)                    ISBLANK_(CH(off))
 249#define ISNEWLINE(off)                  ISNEWLINE_(CH(off))
 250#define ISWHITESPACE(off)               ISWHITESPACE_(CH(off))
 251#define ISCNTRL(off)                    ISCNTRL_(CH(off))
 252#define ISPUNCT(off)                    ISPUNCT_(CH(off))
 253#define ISUPPER(off)                    ISUPPER_(CH(off))
 254#define ISLOWER(off)                    ISLOWER_(CH(off))
 255#define ISALPHA(off)                    ISALPHA_(CH(off))
 256#define ISDIGIT(off)                    ISDIGIT_(CH(off))
 257#define ISXDIGIT(off)                   ISXDIGIT_(CH(off))
 258#define ISALNUM(off)                    ISALNUM_(CH(off))
 259
 260#if defined MD4C_USE_UTF16
 261    #define md_strchr wcschr
 262#else
 263    #define md_strchr strchr
 264#endif
 265
 266static inline int
 267md_ascii_case_eq(const CHAR* s1, const CHAR* s2, SZ n)
 268{
 269    OFF i;
 270    for(i = 0; i < n; i++) {
 271        CHAR ch1 = s1[i];
 272        CHAR ch2 = s2[i];
 273
 274        if(ISLOWER_(ch1))
 275            ch1 += ('A'-'a');
 276        if(ISLOWER_(ch2))
 277            ch2 += ('A'-'a');
 278        if(ch1 != ch2)
 279            return FALSE;
 280    }
 281    return TRUE;
 282}
 283
 284static inline int
 285md_ascii_eq(const CHAR* s1, const CHAR* s2, SZ n)
 286{
 287    return memcmp(s1, s2, n * sizeof(CHAR)) == 0;
 288}
 289
 290static int
 291md_text_with_null_replacement(MD_CTX* ctx, MD_TEXTTYPE type, const CHAR* str, SZ size)
 292{
 293    OFF off = 0;
 294    int ret = 0;
 295
 296    while(1) {
 297        while(off < size  &&  str[off] != _T('\0'))
 298            off++;
 299
 300        if(off > 0) {
 301            ret = ctx->parser.text(type, str, off, ctx->userdata);
 302            if(ret != 0)
 303                return ret;
 304
 305            str += off;
 306            size -= off;
 307            off = 0;
 308        }
 309
 310        if(off >= size)
 311            return 0;
 312
 313        ret = ctx->parser.text(MD_TEXT_NULLCHAR, _T(""), 1, ctx->userdata);
 314        if(ret != 0)
 315            return ret;
 316        off++;
 317    }
 318}
 319
 320#define MD_CHECK(func)                                                      \
 321    do {                                                                    \
 322        ret = (func);                                                       \
 323        if(ret < 0)                                                         \
 324            goto abort;                                                     \
 325    } while(0)
 326
 327#define MD_TEMP_BUFFER(sz)                                                  \
 328    do {                                                                    \
 329        if(sz > ctx->alloc_buffer) {                                        \
 330            CHAR* new_buffer;                                               \
 331            SZ new_size = ((sz) + (sz) / 2 + 128) & ~127;                   \
 332                                                                            \
 333            new_buffer = realloc(ctx->buffer, new_size);                    \
 334            if(new_buffer == NULL) {                                        \
 335                MD_LOG("realloc() failed.");                                \
 336                ret = -1;                                                   \
 337                goto abort;                                                 \
 338            }                                                               \
 339                                                                            \
 340            ctx->buffer = new_buffer;                                       \
 341            ctx->alloc_buffer = new_size;                                   \
 342        }                                                                   \
 343    } while(0)
 344
 345#define MD_ENTER_BLOCK(type, arg)                                           \
 346    do {                                                                    \
 347        ret = ctx->parser.enter_block((type), (arg), ctx->userdata);        \
 348        if(ret != 0) {                                                      \
 349            MD_LOG("Aborted from enter_block() callback.");                 \
 350            goto abort;                                                     \
 351        }                                                                   \
 352    } while(0)
 353
 354#define MD_LEAVE_BLOCK(type, arg)                                           \
 355    do {                                                                    \
 356        ret = ctx->parser.leave_block((type), (arg), ctx->userdata);        \
 357        if(ret != 0) {                                                      \
 358            MD_LOG("Aborted from leave_block() callback.");                 \
 359            goto abort;                                                     \
 360        }                                                                   \
 361    } while(0)
 362
 363#define MD_ENTER_SPAN(type, arg)                                            \
 364    do {                                                                    \
 365        ret = ctx->parser.enter_span((type), (arg), ctx->userdata);         \
 366        if(ret != 0) {                                                      \
 367            MD_LOG("Aborted from enter_span() callback.");                  \
 368            goto abort;                                                     \
 369        }                                                                   \
 370    } while(0)
 371
 372#define MD_LEAVE_SPAN(type, arg)                                            \
 373    do {                                                                    \
 374        ret = ctx->parser.leave_span((type), (arg), ctx->userdata);         \
 375        if(ret != 0) {                                                      \
 376            MD_LOG("Aborted from leave_span() callback.");                  \
 377            goto abort;                                                     \
 378        }                                                                   \
 379    } while(0)
 380
 381#define MD_TEXT(type, str, size)                                            \
 382    do {                                                                    \
 383        if(size > 0) {                                                      \
 384            ret = ctx->parser.text((type), (str), (size), ctx->userdata);   \
 385            if(ret != 0) {                                                  \
 386                MD_LOG("Aborted from text() callback.");                    \
 387                goto abort;                                                 \
 388            }                                                               \
 389        }                                                                   \
 390    } while(0)
 391
 392#define MD_TEXT_INSECURE(type, str, size)                                   \
 393    do {                                                                    \
 394        if(size > 0) {                                                      \
 395            ret = md_text_with_null_replacement(ctx, type, str, size);      \
 396            if(ret != 0) {                                                  \
 397                MD_LOG("Aborted from text() callback.");                    \
 398                goto abort;                                                 \
 399            }                                                               \
 400        }                                                                   \
 401    } while(0)
 402
 403static const MD_LINE*
 404md_lookup_line(OFF off, const MD_LINE* lines, MD_SIZE n_lines, MD_SIZE* p_line_index)
 405{
 406    MD_SIZE lo, hi;
 407    MD_SIZE pivot;
 408    const MD_LINE* line;
 409
 410    lo = 0;
 411    hi = n_lines - 1;
 412    while(lo <= hi) {
 413        pivot = (lo + hi) / 2;
 414        line = &lines[pivot];
 415
 416        if(off < line->beg) {
 417            if(hi == 0  ||  lines[hi-1].end < off) {
 418                if(p_line_index != NULL)
 419                    *p_line_index = pivot;
 420                return line;
 421            }
 422            hi = pivot - 1;
 423        } else if(off > line->end) {
 424            lo = pivot + 1;
 425        } else {
 426            if(p_line_index != NULL)
 427                *p_line_index = pivot;
 428            return line;
 429        }
 430    }
 431
 432    return NULL;
 433}
 434
 435typedef struct MD_UNICODE_FOLD_INFO_tag MD_UNICODE_FOLD_INFO;
 436struct MD_UNICODE_FOLD_INFO_tag {
 437    unsigned codepoints[3];
 438    unsigned n_codepoints;
 439};
 440
 441#if defined MD4C_USE_UTF16 || defined MD4C_USE_UTF8
 442
 443    static int
 444    md_unicode_bsearch__(unsigned codepoint, const unsigned* map, size_t map_size)
 445    {
 446        int beg, end;
 447        int pivot_beg, pivot_end;
 448
 449        beg = 0;
 450        end = (int) map_size-1;
 451        while(beg <= end) {
 452
 453            pivot_beg = pivot_end = (beg + end) / 2;
 454            if(map[pivot_end] & 0x40000000)
 455                pivot_end++;
 456            if(map[pivot_beg] & 0x80000000)
 457                pivot_beg--;
 458
 459            if(codepoint < (map[pivot_beg] & 0x00ffffff))
 460                end = pivot_beg - 1;
 461            else if(codepoint > (map[pivot_end] & 0x00ffffff))
 462                beg = pivot_end + 1;
 463            else
 464                return pivot_beg;
 465        }
 466
 467        return -1;
 468    }
 469
 470    static int
 471    md_is_unicode_whitespace__(unsigned codepoint)
 472    {
 473#define R(cp_min, cp_max)   ((cp_min) | 0x40000000), ((cp_max) | 0x80000000)
 474#define S(cp)               (cp)
 475
 476        static const unsigned WHITESPACE_MAP[] = {
 477            S(0x0020), S(0x00a0), S(0x1680), R(0x2000,0x200a), S(0x202f), S(0x205f), S(0x3000)
 478        };
 479#undef R
 480#undef S
 481
 482        if(codepoint <= 0x7f)
 483            return ISWHITESPACE_(codepoint);
 484
 485        return (md_unicode_bsearch__(codepoint, WHITESPACE_MAP, SIZEOF_ARRAY(WHITESPACE_MAP)) >= 0);
 486    }
 487
 488    static int
 489    md_is_unicode_punct__(unsigned codepoint)
 490    {
 491#define R(cp_min, cp_max)   ((cp_min) | 0x40000000), ((cp_max) | 0x80000000)
 492#define S(cp)               (cp)
 493
 494        static const unsigned PUNCT_MAP[] = {
 495            R(0x0021,0x002f), R(0x003a,0x0040), R(0x005b,0x0060), R(0x007b,0x007e), R(0x00a1,0x00a9),
 496            R(0x00ab,0x00ac), R(0x00ae,0x00b1), S(0x00b4), R(0x00b6,0x00b8), S(0x00bb), S(0x00bf), S(0x00d7),
 497            S(0x00f7), R(0x02c2,0x02c5), R(0x02d2,0x02df), R(0x02e5,0x02eb), S(0x02ed), R(0x02ef,0x02ff), S(0x0375),
 498            S(0x037e), R(0x0384,0x0385), S(0x0387), S(0x03f6), S(0x0482), R(0x055a,0x055f), R(0x0589,0x058a),
 499            R(0x058d,0x058f), S(0x05be), S(0x05c0), S(0x05c3), S(0x05c6), R(0x05f3,0x05f4), R(0x0606,0x060f),
 500            S(0x061b), R(0x061d,0x061f), R(0x066a,0x066d), S(0x06d4), S(0x06de), S(0x06e9), R(0x06fd,0x06fe),
 501            R(0x0700,0x070d), R(0x07f6,0x07f9), R(0x07fe,0x07ff), R(0x0830,0x083e), S(0x085e), S(0x0888),
 502            R(0x0964,0x0965), S(0x0970), R(0x09f2,0x09f3), R(0x09fa,0x09fb), S(0x09fd), S(0x0a76), R(0x0af0,0x0af1),
 503            S(0x0b70), R(0x0bf3,0x0bfa), S(0x0c77), S(0x0c7f), S(0x0c84), S(0x0d4f), S(0x0d79), S(0x0df4), S(0x0e3f),
 504            S(0x0e4f), R(0x0e5a,0x0e5b), R(0x0f01,0x0f17), R(0x0f1a,0x0f1f), S(0x0f34), S(0x0f36), S(0x0f38),
 505            R(0x0f3a,0x0f3d), S(0x0f85), R(0x0fbe,0x0fc5), R(0x0fc7,0x0fcc), R(0x0fce,0x0fda), R(0x104a,0x104f),
 506            R(0x109e,0x109f), S(0x10fb), R(0x1360,0x1368), R(0x1390,0x1399), S(0x1400), R(0x166d,0x166e),
 507            R(0x169b,0x169c), R(0x16eb,0x16ed), R(0x1735,0x1736), R(0x17d4,0x17d6), R(0x17d8,0x17db),
 508            R(0x1800,0x180a), S(0x1940), R(0x1944,0x1945), R(0x19de,0x19ff), R(0x1a1e,0x1a1f), R(0x1aa0,0x1aa6),
 509            R(0x1aa8,0x1aad), R(0x1b5a,0x1b6a), R(0x1b74,0x1b7e), R(0x1bfc,0x1bff), R(0x1c3b,0x1c3f),
 510            R(0x1c7e,0x1c7f), R(0x1cc0,0x1cc7), S(0x1cd3), S(0x1fbd), R(0x1fbf,0x1fc1), R(0x1fcd,0x1fcf),
 511            R(0x1fdd,0x1fdf), R(0x1fed,0x1fef), R(0x1ffd,0x1ffe), R(0x2010,0x2027), R(0x2030,0x205e),
 512            R(0x207a,0x207e), R(0x208a,0x208e), R(0x20a0,0x20c0), R(0x2100,0x2101), R(0x2103,0x2106),
 513            R(0x2108,0x2109), S(0x2114), R(0x2116,0x2118), R(0x211e,0x2123), S(0x2125), S(0x2127), S(0x2129),
 514            S(0x212e), R(0x213a,0x213b), R(0x2140,0x2144), R(0x214a,0x214d), S(0x214f), R(0x218a,0x218b),
 515            R(0x2190,0x2426), R(0x2440,0x244a), R(0x249c,0x24e9), R(0x2500,0x2775), R(0x2794,0x2b73),
 516            R(0x2b76,0x2b95), R(0x2b97,0x2bff), R(0x2ce5,0x2cea), R(0x2cf9,0x2cfc), R(0x2cfe,0x2cff), S(0x2d70),
 517            R(0x2e00,0x2e2e), R(0x2e30,0x2e5d), R(0x2e80,0x2e99), R(0x2e9b,0x2ef3), R(0x2f00,0x2fd5),
 518            R(0x2ff0,0x2fff), R(0x3001,0x3004), R(0x3008,0x3020), S(0x3030), R(0x3036,0x3037), R(0x303d,0x303f),
 519            R(0x309b,0x309c), S(0x30a0), S(0x30fb), R(0x3190,0x3191), R(0x3196,0x319f), R(0x31c0,0x31e3), S(0x31ef),
 520            R(0x3200,0x321e), R(0x322a,0x3247), S(0x3250), R(0x3260,0x327f), R(0x328a,0x32b0), R(0x32c0,0x33ff),
 521            R(0x4dc0,0x4dff), R(0xa490,0xa4c6), R(0xa4fe,0xa4ff), R(0xa60d,0xa60f), S(0xa673), S(0xa67e),
 522            R(0xa6f2,0xa6f7), R(0xa700,0xa716), R(0xa720,0xa721), R(0xa789,0xa78a), R(0xa828,0xa82b),
 523            R(0xa836,0xa839), R(0xa874,0xa877), R(0xa8ce,0xa8cf), R(0xa8f8,0xa8fa), S(0xa8fc), R(0xa92e,0xa92f),
 524            S(0xa95f), R(0xa9c1,0xa9cd), R(0xa9de,0xa9df), R(0xaa5c,0xaa5f), R(0xaa77,0xaa79), R(0xaade,0xaadf),
 525            R(0xaaf0,0xaaf1), S(0xab5b), R(0xab6a,0xab6b), S(0xabeb), S(0xfb29), R(0xfbb2,0xfbc2), R(0xfd3e,0xfd4f),
 526            S(0xfdcf), R(0xfdfc,0xfdff), R(0xfe10,0xfe19), R(0xfe30,0xfe52), R(0xfe54,0xfe66), R(0xfe68,0xfe6b),
 527            R(0xff01,0xff0f), R(0xff1a,0xff20), R(0xff3b,0xff40), R(0xff5b,0xff65), R(0xffe0,0xffe6),
 528            R(0xffe8,0xffee), R(0xfffc,0xfffd), R(0x10100,0x10102), R(0x10137,0x1013f), R(0x10179,0x10189),
 529            R(0x1018c,0x1018e), R(0x10190,0x1019c), S(0x101a0), R(0x101d0,0x101fc), S(0x1039f), S(0x103d0),
 530            S(0x1056f), S(0x10857), R(0x10877,0x10878), S(0x1091f), S(0x1093f), R(0x10a50,0x10a58), S(0x10a7f),
 531            S(0x10ac8), R(0x10af0,0x10af6), R(0x10b39,0x10b3f), R(0x10b99,0x10b9c), S(0x10ead), R(0x10f55,0x10f59),
 532            R(0x10f86,0x10f89), R(0x11047,0x1104d), R(0x110bb,0x110bc), R(0x110be,0x110c1), R(0x11140,0x11143),
 533            R(0x11174,0x11175), R(0x111c5,0x111c8), S(0x111cd), S(0x111db), R(0x111dd,0x111df), R(0x11238,0x1123d),
 534            S(0x112a9), R(0x1144b,0x1144f), R(0x1145a,0x1145b), S(0x1145d), S(0x114c6), R(0x115c1,0x115d7),
 535            R(0x11641,0x11643), R(0x11660,0x1166c), S(0x116b9), R(0x1173c,0x1173f), S(0x1183b), R(0x11944,0x11946),
 536            S(0x119e2), R(0x11a3f,0x11a46), R(0x11a9a,0x11a9c), R(0x11a9e,0x11aa2), R(0x11b00,0x11b09),
 537            R(0x11c41,0x11c45), R(0x11c70,0x11c71), R(0x11ef7,0x11ef8), R(0x11f43,0x11f4f), R(0x11fd5,0x11ff1),
 538            S(0x11fff), R(0x12470,0x12474), R(0x12ff1,0x12ff2), R(0x16a6e,0x16a6f), S(0x16af5), R(0x16b37,0x16b3f),
 539            R(0x16b44,0x16b45), R(0x16e97,0x16e9a), S(0x16fe2), S(0x1bc9c), S(0x1bc9f), R(0x1cf50,0x1cfc3),
 540            R(0x1d000,0x1d0f5), R(0x1d100,0x1d126), R(0x1d129,0x1d164), R(0x1d16a,0x1d16c), R(0x1d183,0x1d184),
 541            R(0x1d18c,0x1d1a9), R(0x1d1ae,0x1d1ea), R(0x1d200,0x1d241), S(0x1d245), R(0x1d300,0x1d356), S(0x1d6c1),
 542            S(0x1d6db), S(0x1d6fb), S(0x1d715), S(0x1d735), S(0x1d74f), S(0x1d76f), S(0x1d789), S(0x1d7a9),
 543            S(0x1d7c3), R(0x1d800,0x1d9ff), R(0x1da37,0x1da3a), R(0x1da6d,0x1da74), R(0x1da76,0x1da83),
 544            R(0x1da85,0x1da8b), S(0x1e14f), S(0x1e2ff), R(0x1e95e,0x1e95f), S(0x1ecac), S(0x1ecb0), S(0x1ed2e),
 545            R(0x1eef0,0x1eef1), R(0x1f000,0x1f02b), R(0x1f030,0x1f093), R(0x1f0a0,0x1f0ae), R(0x1f0b1,0x1f0bf),
 546            R(0x1f0c1,0x1f0cf), R(0x1f0d1,0x1f0f5), R(0x1f10d,0x1f1ad), R(0x1f1e6,0x1f202), R(0x1f210,0x1f23b),
 547            R(0x1f240,0x1f248), R(0x1f250,0x1f251), R(0x1f260,0x1f265), R(0x1f300,0x1f6d7), R(0x1f6dc,0x1f6ec),
 548            R(0x1f6f0,0x1f6fc), R(0x1f700,0x1f776), R(0x1f77b,0x1f7d9), R(0x1f7e0,0x1f7eb), S(0x1f7f0),
 549            R(0x1f800,0x1f80b), R(0x1f810,0x1f847), R(0x1f850,0x1f859), R(0x1f860,0x1f887), R(0x1f890,0x1f8ad),
 550            R(0x1f8b0,0x1f8b1), R(0x1f900,0x1fa53), R(0x1fa60,0x1fa6d), R(0x1fa70,0x1fa7c), R(0x1fa80,0x1fa88),
 551            R(0x1fa90,0x1fabd), R(0x1fabf,0x1fac5), R(0x1face,0x1fadb), R(0x1fae0,0x1fae8), R(0x1faf0,0x1faf8),
 552            R(0x1fb00,0x1fb92), R(0x1fb94,0x1fbca)
 553        };
 554#undef R
 555#undef S
 556
 557        if(codepoint <= 0x7f)
 558            return ISPUNCT_(codepoint);
 559
 560        return (md_unicode_bsearch__(codepoint, PUNCT_MAP, SIZEOF_ARRAY(PUNCT_MAP)) >= 0);
 561    }
 562
 563    static void
 564    md_get_unicode_fold_info(unsigned codepoint, MD_UNICODE_FOLD_INFO* info)
 565    {
 566#define R(cp_min, cp_max)   ((cp_min) | 0x40000000), ((cp_max) | 0x80000000)
 567#define S(cp)               (cp)
 568
 569        static const unsigned FOLD_MAP_1[] = {
 570            R(0x0041,0x005a), S(0x00b5), R(0x00c0,0x00d6), R(0x00d8,0x00de), R(0x0100,0x012e), R(0x0132,0x0136),
 571            R(0x0139,0x0147), R(0x014a,0x0176), S(0x0178), R(0x0179,0x017d), S(0x017f), S(0x0181), S(0x0182),
 572            S(0x0184), S(0x0186), S(0x0187), S(0x0189), S(0x018a), S(0x018b), S(0x018e), S(0x018f), S(0x0190),
 573            S(0x0191), S(0x0193), S(0x0194), S(0x0196), S(0x0197), S(0x0198), S(0x019c), S(0x019d), S(0x019f),
 574            R(0x01a0,0x01a4), S(0x01a6), S(0x01a7), S(0x01a9), S(0x01ac), S(0x01ae), S(0x01af), S(0x01b1), S(0x01b2),
 575            S(0x01b3), S(0x01b5), S(0x01b7), S(0x01b8), S(0x01bc), S(0x01c4), S(0x01c5), S(0x01c7), S(0x01c8),
 576            S(0x01ca), R(0x01cb,0x01db), R(0x01de,0x01ee), S(0x01f1), S(0x01f2), S(0x01f4), S(0x01f6), S(0x01f7),
 577            R(0x01f8,0x021e), S(0x0220), R(0x0222,0x0232), S(0x023a), S(0x023b), S(0x023d), S(0x023e), S(0x0241),
 578            S(0x0243), S(0x0244), S(0x0245), R(0x0246,0x024e), S(0x0345), S(0x0370), S(0x0372), S(0x0376), S(0x037f),
 579            S(0x0386), R(0x0388,0x038a), S(0x038c), S(0x038e), S(0x038f), R(0x0391,0x03a1), R(0x03a3,0x03ab),
 580            S(0x03c2), S(0x03cf), S(0x03d0), S(0x03d1), S(0x03d5), S(0x03d6), R(0x03d8,0x03ee), S(0x03f0), S(0x03f1),
 581            S(0x03f4), S(0x03f5), S(0x03f7), S(0x03f9), S(0x03fa), R(0x03fd,0x03ff), R(0x0400,0x040f),
 582            R(0x0410,0x042f), R(0x0460,0x0480), R(0x048a,0x04be), S(0x04c0), R(0x04c1,0x04cd), R(0x04d0,0x052e),
 583            R(0x0531,0x0556), R(0x10a0,0x10c5), S(0x10c7), S(0x10cd), R(0x13f8,0x13fd), S(0x1c80), S(0x1c81),
 584            S(0x1c82), S(0x1c83), S(0x1c84), S(0x1c85), S(0x1c86), S(0x1c87), S(0x1c88), R(0x1c90,0x1cba),
 585            R(0x1cbd,0x1cbf), R(0x1e00,0x1e94), S(0x1e9b), R(0x1ea0,0x1efe), R(0x1f08,0x1f0f), R(0x1f18,0x1f1d),
 586            R(0x1f28,0x1f2f), R(0x1f38,0x1f3f), R(0x1f48,0x1f4d), S(0x1f59), S(0x1f5b), S(0x1f5d), S(0x1f5f),
 587            R(0x1f68,0x1f6f), S(0x1fb8), S(0x1fb9), S(0x1fba), S(0x1fbb), S(0x1fbe), R(0x1fc8,0x1fcb), S(0x1fd8),
 588            S(0x1fd9), S(0x1fda), S(0x1fdb), S(0x1fe8), S(0x1fe9), S(0x1fea), S(0x1feb), S(0x1fec), S(0x1ff8),
 589            S(0x1ff9), S(0x1ffa), S(0x1ffb), S(0x2126), S(0x212a), S(0x212b), S(0x2132), R(0x2160,0x216f), S(0x2183),
 590            R(0x24b6,0x24cf), R(0x2c00,0x2c2f), S(0x2c60), S(0x2c62), S(0x2c63), S(0x2c64), R(0x2c67,0x2c6b),
 591            S(0x2c6d), S(0x2c6e), S(0x2c6f), S(0x2c70), S(0x2c72), S(0x2c75), S(0x2c7e), S(0x2c7f), R(0x2c80,0x2ce2),
 592            S(0x2ceb), S(0x2ced), S(0x2cf2), R(0xa640,0xa66c), R(0xa680,0xa69a), R(0xa722,0xa72e), R(0xa732,0xa76e),
 593            S(0xa779), S(0xa77b), S(0xa77d), R(0xa77e,0xa786), S(0xa78b), S(0xa78d), S(0xa790), S(0xa792),
 594            R(0xa796,0xa7a8), S(0xa7aa), S(0xa7ab), S(0xa7ac), S(0xa7ad), S(0xa7ae), S(0xa7b0), S(0xa7b1), S(0xa7b2),
 595            S(0xa7b3), R(0xa7b4,0xa7c2), S(0xa7c4), S(0xa7c5), S(0xa7c6), S(0xa7c7), S(0xa7c9), S(0xa7d0), S(0xa7d6),
 596            S(0xa7d8), S(0xa7f5), R(0xab70,0xabbf), R(0xff21,0xff3a), R(0x10400,0x10427), R(0x104b0,0x104d3),
 597            R(0x10570,0x1057a), R(0x1057c,0x1058a), R(0x1058c,0x10592), S(0x10594), S(0x10595), R(0x10c80,0x10cb2),
 598            R(0x118a0,0x118bf), R(0x16e40,0x16e5f), R(0x1e900,0x1e921)
 599        };
 600        static const unsigned FOLD_MAP_1_DATA[] = {
 601            0x0061, 0x007a, 0x03bc, 0x00e0, 0x00f6, 0x00f8, 0x00fe, 0x0101, 0x012f, 0x0133, 0x0137, 0x013a, 0x0148,
 602            0x014b, 0x0177, 0x00ff, 0x017a, 0x017e, 0x0073, 0x0253, 0x0183, 0x0185, 0x0254, 0x0188, 0x0256, 0x0257,
 603            0x018c, 0x01dd, 0x0259, 0x025b, 0x0192, 0x0260, 0x0263, 0x0269, 0x0268, 0x0199, 0x026f, 0x0272, 0x0275,
 604            0x01a1, 0x01a5, 0x0280, 0x01a8, 0x0283, 0x01ad, 0x0288, 0x01b0, 0x028a, 0x028b, 0x01b4, 0x01b6, 0x0292,
 605            0x01b9, 0x01bd, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0x01cc, 0x01cc, 0x01dc, 0x01df, 0x01ef, 0x01f3, 0x01f3,
 606            0x01f5, 0x0195, 0x01bf, 0x01f9, 0x021f, 0x019e, 0x0223, 0x0233, 0x2c65, 0x023c, 0x019a, 0x2c66, 0x0242,
 607            0x0180, 0x0289, 0x028c, 0x0247, 0x024f, 0x03b9, 0x0371, 0x0373, 0x0377, 0x03f3, 0x03ac, 0x03ad, 0x03af,
 608            0x03cc, 0x03cd, 0x03ce, 0x03b1, 0x03c1, 0x03c3, 0x03cb, 0x03c3, 0x03d7, 0x03b2, 0x03b8, 0x03c6, 0x03c0,
 609            0x03d9, 0x03ef, 0x03ba, 0x03c1, 0x03b8, 0x03b5, 0x03f8, 0x03f2, 0x03fb, 0x037b, 0x037d, 0x0450, 0x045f,
 610            0x0430, 0x044f, 0x0461, 0x0481, 0x048b, 0x04bf, 0x04cf, 0x04c2, 0x04ce, 0x04d1, 0x052f, 0x0561, 0x0586,
 611            0x2d00, 0x2d25, 0x2d27, 0x2d2d, 0x13f0, 0x13f5, 0x0432, 0x0434, 0x043e, 0x0441, 0x0442, 0x0442, 0x044a,
 612            0x0463, 0xa64b, 0x10d0, 0x10fa, 0x10fd, 0x10ff, 0x1e01, 0x1e95, 0x1e61, 0x1ea1, 0x1eff, 0x1f00, 0x1f07,
 613            0x1f10, 0x1f15, 0x1f20, 0x1f27, 0x1f30, 0x1f37, 0x1f40, 0x1f45, 0x1f51, 0x1f53, 0x1f55, 0x1f57, 0x1f60,
 614            0x1f67, 0x1fb0, 0x1fb1, 0x1f70, 0x1f71, 0x03b9, 0x1f72, 0x1f75, 0x1fd0, 0x1fd1, 0x1f76, 0x1f77, 0x1fe0,
 615            0x1fe1, 0x1f7a, 0x1f7b, 0x1fe5, 0x1f78, 0x1f79, 0x1f7c, 0x1f7d, 0x03c9, 0x006b, 0x00e5, 0x214e, 0x2170,
 616            0x217f, 0x2184, 0x24d0, 0x24e9, 0x2c30, 0x2c5f, 0x2c61, 0x026b, 0x1d7d, 0x027d, 0x2c68, 0x2c6c, 0x0251,
 617            0x0271, 0x0250, 0x0252, 0x2c73, 0x2c76, 0x023f, 0x0240, 0x2c81, 0x2ce3, 0x2cec, 0x2cee, 0x2cf3, 0xa641,
 618            0xa66d, 0xa681, 0xa69b, 0xa723, 0xa72f, 0xa733, 0xa76f, 0xa77a, 0xa77c, 0x1d79, 0xa77f, 0xa787, 0xa78c,
 619            0x0265, 0xa791, 0xa793, 0xa797, 0xa7a9, 0x0266, 0x025c, 0x0261, 0x026c, 0x026a, 0x029e, 0x0287, 0x029d,
 620            0xab53, 0xa7b5, 0xa7c3, 0xa794, 0x0282, 0x1d8e, 0xa7c8, 0xa7ca, 0xa7d1, 0xa7d7, 0xa7d9, 0xa7f6, 0x13a0,
 621            0x13ef, 0xff41, 0xff5a, 0x10428, 0x1044f, 0x104d8, 0x104fb, 0x10597, 0x105a1, 0x105a3, 0x105b1, 0x105b3,
 622            0x105b9, 0x105bb, 0x105bc, 0x10cc0, 0x10cf2, 0x118c0, 0x118df, 0x16e60, 0x16e7f, 0x1e922, 0x1e943
 623        };
 624        static const unsigned FOLD_MAP_2[] = {
 625            S(0x00df), S(0x0130), S(0x0149), S(0x01f0), S(0x0587), S(0x1e96), S(0x1e97), S(0x1e98), S(0x1e99),
 626            S(0x1e9a), S(0x1e9e), S(0x1f50), R(0x1f80,0x1f87), R(0x1f88,0x1f8f), R(0x1f90,0x1f97), R(0x1f98,0x1f9f),
 627            R(0x1fa0,0x1fa7), R(0x1fa8,0x1faf), S(0x1fb2), S(0x1fb3), S(0x1fb4), S(0x1fb6), S(0x1fbc), S(0x1fc2),
 628            S(0x1fc3), S(0x1fc4), S(0x1fc6), S(0x1fcc), S(0x1fd6), S(0x1fe4), S(0x1fe6), S(0x1ff2), S(0x1ff3),
 629            S(0x1ff4), S(0x1ff6), S(0x1ffc), S(0xfb00), S(0xfb01), S(0xfb02), S(0xfb05), S(0xfb06), S(0xfb13),
 630            S(0xfb14), S(0xfb15), S(0xfb16), S(0xfb17)
 631        };
 632        static const unsigned FOLD_MAP_2_DATA[] = {
 633            0x0073,0x0073, 0x0069,0x0307, 0x02bc,0x006e, 0x006a,0x030c, 0x0565,0x0582, 0x0068,0x0331, 0x0074,0x0308,
 634            0x0077,0x030a, 0x0079,0x030a, 0x0061,0x02be, 0x0073,0x0073, 0x03c5,0x0313, 0x1f00,0x03b9, 0x1f07,0x03b9,
 635            0x1f00,0x03b9, 0x1f07,0x03b9, 0x1f20,0x03b9, 0x1f27,0x03b9, 0x1f20,0x03b9, 0x1f27,0x03b9, 0x1f60,0x03b9,
 636            0x1f67,0x03b9, 0x1f60,0x03b9, 0x1f67,0x03b9, 0x1f70,0x03b9, 0x03b1,0x03b9, 0x03ac,0x03b9, 0x03b1,0x0342,
 637            0x03b1,0x03b9, 0x1f74,0x03b9, 0x03b7,0x03b9, 0x03ae,0x03b9, 0x03b7,0x0342, 0x03b7,0x03b9, 0x03b9,0x0342,
 638            0x03c1,0x0313, 0x03c5,0x0342, 0x1f7c,0x03b9, 0x03c9,0x03b9, 0x03ce,0x03b9, 0x03c9,0x0342, 0x03c9,0x03b9,
 639            0x0066,0x0066, 0x0066,0x0069, 0x0066,0x006c, 0x0073,0x0074, 0x0073,0x0074, 0x0574,0x0576, 0x0574,0x0565,
 640            0x0574,0x056b, 0x057e,0x0576, 0x0574,0x056d
 641        };
 642        static const unsigned FOLD_MAP_3[] = {
 643            S(0x0390), S(0x03b0), S(0x1f52), S(0x1f54), S(0x1f56), S(0x1fb7), S(0x1fc7), S(0x1fd2), S(0x1fd3),
 644            S(0x1fd7), S(0x1fe2), S(0x1fe3), S(0x1fe7), S(0x1ff7), S(0xfb03), S(0xfb04)
 645        };
 646        static const unsigned FOLD_MAP_3_DATA[] = {
 647            0x03b9,0x0308,0x0301, 0x03c5,0x0308,0x0301, 0x03c5,0x0313,0x0300, 0x03c5,0x0313,0x0301,
 648            0x03c5,0x0313,0x0342, 0x03b1,0x0342,0x03b9, 0x03b7,0x0342,0x03b9, 0x03b9,0x0308,0x0300,
 649            0x03b9,0x0308,0x0301, 0x03b9,0x0308,0x0342, 0x03c5,0x0308,0x0300, 0x03c5,0x0308,0x0301,
 650            0x03c5,0x0308,0x0342, 0x03c9,0x0342,0x03b9, 0x0066,0x0066,0x0069, 0x0066,0x0066,0x006c
 651        };
 652#undef R
 653#undef S
 654        static const struct {
 655            const unsigned* map;
 656            const unsigned* data;
 657            size_t map_size;
 658            unsigned n_codepoints;
 659        } FOLD_MAP_LIST[] = {
 660            { FOLD_MAP_1, FOLD_MAP_1_DATA, SIZEOF_ARRAY(FOLD_MAP_1), 1 },
 661            { FOLD_MAP_2, FOLD_MAP_2_DATA, SIZEOF_ARRAY(FOLD_MAP_2), 2 },
 662            { FOLD_MAP_3, FOLD_MAP_3_DATA, SIZEOF_ARRAY(FOLD_MAP_3), 3 }
 663        };
 664
 665        int i;
 666
 667        if(codepoint <= 0x7f) {
 668            info->codepoints[0] = codepoint;
 669            if(ISUPPER_(codepoint))
 670                info->codepoints[0] += 'a' - 'A';
 671            info->n_codepoints = 1;
 672            return;
 673        }
 674
 675        for(i = 0; i < (int) SIZEOF_ARRAY(FOLD_MAP_LIST); i++) {
 676            int index;
 677
 678            index = md_unicode_bsearch__(codepoint, FOLD_MAP_LIST[i].map, FOLD_MAP_LIST[i].map_size);
 679            if(index >= 0) {
 680
 681                unsigned n_codepoints = FOLD_MAP_LIST[i].n_codepoints;
 682                const unsigned* map = FOLD_MAP_LIST[i].map;
 683                const unsigned* codepoints = FOLD_MAP_LIST[i].data + (index * n_codepoints);
 684
 685                memcpy(info->codepoints, codepoints, sizeof(unsigned) * n_codepoints);
 686                info->n_codepoints = n_codepoints;
 687
 688                if(FOLD_MAP_LIST[i].map[index] != codepoint) {
 689
 690                    if((map[index] & 0x00ffffff)+1 == codepoints[0]) {
 691
 692                        info->codepoints[0] = codepoint + ((codepoint & 0x1) == (map[index] & 0x1) ? 1 : 0);
 693                    } else {
 694
 695                        info->codepoints[0] += (codepoint - (map[index] & 0x00ffffff));
 696                    }
 697                }
 698
 699                return;
 700            }
 701        }
 702
 703        info->codepoints[0] = codepoint;
 704        info->n_codepoints = 1;
 705    }
 706#endif
 707
 708#if defined MD4C_USE_UTF16
 709    #define IS_UTF16_SURROGATE_HI(word)     (((WORD)(word) & 0xfc00) == 0xd800)
 710    #define IS_UTF16_SURROGATE_LO(word)     (((WORD)(word) & 0xfc00) == 0xdc00)
 711    #define UTF16_DECODE_SURROGATE(hi, lo)  (0x10000 + ((((unsigned)(hi) & 0x3ff) << 10) | (((unsigned)(lo) & 0x3ff) << 0)))
 712
 713    static unsigned
 714    md_decode_utf16le__(const CHAR* str, SZ str_size, SZ* p_size)
 715    {
 716        if(IS_UTF16_SURROGATE_HI(str[0])) {
 717            if(1 < str_size && IS_UTF16_SURROGATE_LO(str[1])) {
 718                if(p_size != NULL)
 719                    *p_size = 2;
 720                return UTF16_DECODE_SURROGATE(str[0], str[1]);
 721            }
 722        }
 723
 724        if(p_size != NULL)
 725            *p_size = 1;
 726        return str[0];
 727    }
 728
 729    static unsigned
 730    md_decode_utf16le_before__(MD_CTX* ctx, OFF off)
 731    {
 732        if(off > 2 && IS_UTF16_SURROGATE_HI(CH(off-2)) && IS_UTF16_SURROGATE_LO(CH(off-1)))
 733            return UTF16_DECODE_SURROGATE(CH(off-2), CH(off-1));
 734
 735        return CH(off);
 736    }
 737
 738    #define ISUNICODEWHITESPACE_(codepoint) md_is_unicode_whitespace__(codepoint)
 739    #define ISUNICODEWHITESPACE(off)        md_is_unicode_whitespace__(CH(off))
 740    #define ISUNICODEWHITESPACEBEFORE(off)  md_is_unicode_whitespace__(CH((off)-1))
 741
 742    #define ISUNICODEPUNCT(off)             md_is_unicode_punct__(md_decode_utf16le__(STR(off), ctx->size - (off), NULL))
 743    #define ISUNICODEPUNCTBEFORE(off)       md_is_unicode_punct__(md_decode_utf16le_before__(ctx, off))
 744
 745    static inline int
 746    md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_char_size)
 747    {
 748        return md_decode_utf16le__(str+off, str_size-off, p_char_size);
 749    }
 750#elif defined MD4C_USE_UTF8
 751    #define IS_UTF8_LEAD1(byte)     ((unsigned char)(byte) <= 0x7f)
 752    #define IS_UTF8_LEAD2(byte)     (((unsigned char)(byte) & 0xe0) == 0xc0)
 753    #define IS_UTF8_LEAD3(byte)     (((unsigned char)(byte) & 0xf0) == 0xe0)
 754    #define IS_UTF8_LEAD4(byte)     (((unsigned char)(byte) & 0xf8) == 0xf0)
 755    #define IS_UTF8_TAIL(byte)      (((unsigned char)(byte) & 0xc0) == 0x80)
 756
 757    static unsigned
 758    md_decode_utf8__(const CHAR* str, SZ str_size, SZ* p_size)
 759    {
 760        if(!IS_UTF8_LEAD1(str[0])) {
 761            if(IS_UTF8_LEAD2(str[0])) {
 762                if(1 < str_size && IS_UTF8_TAIL(str[1])) {
 763                    if(p_size != NULL)
 764                        *p_size = 2;
 765
 766                    return (((unsigned int)str[0] & 0x1f) << 6) |
 767                           (((unsigned int)str[1] & 0x3f) << 0);
 768                }
 769            } else if(IS_UTF8_LEAD3(str[0])) {
 770                if(2 < str_size && IS_UTF8_TAIL(str[1]) && IS_UTF8_TAIL(str[2])) {
 771                    if(p_size != NULL)
 772                        *p_size = 3;
 773
 774                    return (((unsigned int)str[0] & 0x0f) << 12) |
 775                           (((unsigned int)str[1] & 0x3f) << 6) |
 776                           (((unsigned int)str[2] & 0x3f) << 0);
 777                }
 778            } else if(IS_UTF8_LEAD4(str[0])) {
 779                if(3 < str_size && IS_UTF8_TAIL(str[1]) && IS_UTF8_TAIL(str[2]) && IS_UTF8_TAIL(str[3])) {
 780                    if(p_size != NULL)
 781                        *p_size = 4;
 782
 783                    return (((unsigned int)str[0] & 0x07) << 18) |
 784                           (((unsigned int)str[1] & 0x3f) << 12) |
 785                           (((unsigned int)str[2] & 0x3f) << 6) |
 786                           (((unsigned int)str[3] & 0x3f) << 0);
 787                }
 788            }
 789        }
 790
 791        if(p_size != NULL)
 792            *p_size = 1;
 793        return (unsigned) str[0];
 794    }
 795
 796    static unsigned
 797    md_decode_utf8_before__(MD_CTX* ctx, OFF off)
 798    {
 799        if(!IS_UTF8_LEAD1(CH(off-1))) {
 800            if(off > 1 && IS_UTF8_LEAD2(CH(off-2)) && IS_UTF8_TAIL(CH(off-1)))
 801                return (((unsigned int)CH(off-2) & 0x1f) << 6) |
 802                       (((unsigned int)CH(off-1) & 0x3f) << 0);
 803
 804            if(off > 2 && IS_UTF8_LEAD3(CH(off-3)) && IS_UTF8_TAIL(CH(off-2)) && IS_UTF8_TAIL(CH(off-1)))
 805                return (((unsigned int)CH(off-3) & 0x0f) << 12) |
 806                       (((unsigned int)CH(off-2) & 0x3f) << 6) |
 807                       (((unsigned int)CH(off-1) & 0x3f) << 0);
 808
 809            if(off > 3 && IS_UTF8_LEAD4(CH(off-4)) && IS_UTF8_TAIL(CH(off-3)) && IS_UTF8_TAIL(CH(off-2)) && IS_UTF8_TAIL(CH(off-1)))
 810                return (((unsigned int)CH(off-4) & 0x07) << 18) |
 811                       (((unsigned int)CH(off-3) & 0x3f) << 12) |
 812                       (((unsigned int)CH(off-2) & 0x3f) << 6) |
 813                       (((unsigned int)CH(off-1) & 0x3f) << 0);
 814        }
 815
 816        return (unsigned) CH(off-1);
 817    }
 818
 819    #define ISUNICODEWHITESPACE_(codepoint) md_is_unicode_whitespace__(codepoint)
 820    #define ISUNICODEWHITESPACE(off)        md_is_unicode_whitespace__(md_decode_utf8__(STR(off), ctx->size - (off), NULL))
 821    #define ISUNICODEWHITESPACEBEFORE(off)  md_is_unicode_whitespace__(md_decode_utf8_before__(ctx, off))
 822
 823    #define ISUNICODEPUNCT(off)             md_is_unicode_punct__(md_decode_utf8__(STR(off), ctx->size - (off), NULL))
 824    #define ISUNICODEPUNCTBEFORE(off)       md_is_unicode_punct__(md_decode_utf8_before__(ctx, off))
 825
 826    static inline unsigned
 827    md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_char_size)
 828    {
 829        return md_decode_utf8__(str+off, str_size-off, p_char_size);
 830    }
 831#else
 832    #define ISUNICODEWHITESPACE_(codepoint) ISWHITESPACE_(codepoint)
 833    #define ISUNICODEWHITESPACE(off)        ISWHITESPACE(off)
 834    #define ISUNICODEWHITESPACEBEFORE(off)  ISWHITESPACE((off)-1)
 835
 836    #define ISUNICODEPUNCT(off)             ISPUNCT(off)
 837    #define ISUNICODEPUNCTBEFORE(off)       ISPUNCT((off)-1)
 838
 839    static inline void
 840    md_get_unicode_fold_info(unsigned codepoint, MD_UNICODE_FOLD_INFO* info)
 841    {
 842        info->codepoints[0] = codepoint;
 843        if(ISUPPER_(codepoint))
 844            info->codepoints[0] += 'a' - 'A';
 845        info->n_codepoints = 1;
 846    }
 847
 848    static inline unsigned
 849    md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_size)
 850    {
 851        *p_size = 1;
 852        return (unsigned) str[off];
 853    }
 854#endif
 855
 856static void
 857md_merge_lines(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, MD_SIZE n_lines,
 858               CHAR line_break_replacement_char, CHAR* buffer, SZ* p_size)
 859{
 860    CHAR* ptr = buffer;
 861    int line_index = 0;
 862    OFF off = beg;
 863
 864    MD_UNUSED(n_lines);
 865
 866    while(1) {
 867        const MD_LINE* line = &lines[line_index];
 868        OFF line_end = line->end;
 869        if(end < line_end)
 870            line_end = end;
 871
 872        while(off < line_end) {
 873            *ptr = CH(off);
 874            ptr++;
 875            off++;
 876        }
 877
 878        if(off >= end) {
 879            *p_size = (MD_SIZE)(ptr - buffer);
 880            return;
 881        }
 882
 883        *ptr = line_break_replacement_char;
 884        ptr++;
 885
 886        line_index++;
 887        off = lines[line_index].beg;
 888    }
 889}
 890
 891static int
 892md_merge_lines_alloc(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, MD_SIZE n_lines,
 893                    CHAR line_break_replacement_char, CHAR** p_str, SZ* p_size)
 894{
 895    CHAR* buffer;
 896
 897    buffer = (CHAR*) malloc(sizeof(CHAR) * (end - beg));
 898    if(buffer == NULL) {
 899        MD_LOG("malloc() failed.");
 900        return -1;
 901    }
 902
 903    md_merge_lines(ctx, beg, end, lines, n_lines,
 904                line_break_replacement_char, buffer, p_size);
 905
 906    *p_str = buffer;
 907    return 0;
 908}
 909
 910static OFF
 911md_skip_unicode_whitespace(const CHAR* label, OFF off, SZ size)
 912{
 913    SZ char_size;
 914    unsigned codepoint;
 915
 916    while(off < size) {
 917        codepoint = md_decode_unicode(label, off, size, &char_size);
 918        if(!ISUNICODEWHITESPACE_(codepoint)  &&  !ISNEWLINE_(label[off]))
 919            break;
 920        off += char_size;
 921    }
 922
 923    return off;
 924}
 925
 926static int
 927md_is_html_tag(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, OFF beg, OFF max_end, OFF* p_end)
 928{
 929    int attr_state;
 930    OFF off = beg;
 931    OFF line_end = (n_lines > 0) ? lines[0].end : ctx->size;
 932    MD_SIZE line_index = 0;
 933
 934    MD_ASSERT(CH(beg) == _T('<'));
 935
 936    if(off + 1 >= line_end)
 937        return FALSE;
 938    off++;
 939
 940    attr_state = 0;
 941
 942    if(CH(off) == _T('/')) {
 943
 944        attr_state = -1;
 945        off++;
 946    }
 947
 948    if(off >= line_end  ||  !ISALPHA(off))
 949        return FALSE;
 950    off++;
 951    while(off < line_end  &&  (ISALNUM(off)  ||  CH(off) == _T('-')))
 952        off++;
 953
 954    while(1) {
 955        while(off < line_end  &&  !ISNEWLINE(off)) {
 956            if(attr_state > 40) {
 957                if(attr_state == 41 && (ISBLANK(off) || ISANYOF(off, _T("\"'=<>`")))) {
 958                    attr_state = 0;
 959                    off--;
 960                } else if(attr_state == 42 && CH(off) == _T('\'')) {
 961                    attr_state = 0;
 962                } else if(attr_state == 43 && CH(off) == _T('"')) {
 963                    attr_state = 0;
 964                }
 965                off++;
 966            } else if(ISWHITESPACE(off)) {
 967                if(attr_state == 0)
 968                    attr_state = 1;
 969                off++;
 970            } else if(attr_state <= 2 && CH(off) == _T('>')) {
 971
 972                goto done;
 973            } else if(attr_state <= 2 && CH(off) == _T('/') && off+1 < line_end && CH(off+1) == _T('>')) {
 974
 975                off++;
 976                goto done;
 977            } else if((attr_state == 1 || attr_state == 2) && (ISALPHA(off) || CH(off) == _T('_') || CH(off) == _T(':'))) {
 978                off++;
 979
 980                while(off < line_end && (ISALNUM(off) || ISANYOF(off, _T("_.:-"))))
 981                    off++;
 982                attr_state = 2;
 983            } else if(attr_state == 2 && CH(off) == _T('=')) {
 984
 985                off++;
 986                attr_state = 3;
 987            } else if(attr_state == 3) {
 988
 989                if(CH(off) == _T('"'))
 990                    attr_state = 43;
 991                else if(CH(off) == _T('\''))
 992                    attr_state = 42;
 993                else if(!ISANYOF(off, _T("\"'=<>`"))  &&  !ISNEWLINE(off))
 994                    attr_state = 41;
 995                else
 996                    return FALSE;
 997                off++;
 998            } else {
 999
1000                return FALSE;
1001            }
1002        }
1003
1004        if(n_lines == 0)
1005            return FALSE;
1006
1007        line_index++;
1008        if(line_index >= n_lines)
1009            return FALSE;
1010
1011        off = lines[line_index].beg;
1012        line_end = lines[line_index].end;
1013
1014        if(attr_state == 0  ||  attr_state == 41)
1015            attr_state = 1;
1016
1017        if(off >= max_end)
1018            return FALSE;
1019    }
1020
1021done:
1022    if(off >= max_end)
1023        return FALSE;
1024
1025    *p_end = off+1;
1026    return TRUE;
1027}
1028
1029static int
1030md_scan_for_html_closer(MD_CTX* ctx, const MD_CHAR* str, MD_SIZE len,
1031                        const MD_LINE* lines, MD_SIZE n_lines,
1032                        OFF beg, OFF max_end, OFF* p_end,
1033                        OFF* p_scan_horizon)
1034{
1035    OFF off = beg;
1036    MD_SIZE line_index = 0;
1037
1038    if(off < *p_scan_horizon  &&  *p_scan_horizon >= max_end - len) {
1039
1040        return FALSE;
1041    }
1042
1043    while(TRUE) {
1044        while(off + len <= lines[line_index].end  &&  off + len <= max_end) {
1045            if(md_ascii_eq(STR(off), str, len)) {
1046
1047                *p_end = off + len;
1048                return TRUE;
1049            }
1050            off++;
1051        }
1052
1053        line_index++;
1054        if(off >= max_end  ||  line_index >= n_lines) {
1055
1056            *p_scan_horizon = off;
1057            return FALSE;
1058        }
1059
1060        off = lines[line_index].beg;
1061    }
1062}
1063
1064static int
1065md_is_html_comment(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, OFF beg, OFF max_end, OFF* p_end)
1066{
1067    OFF off = beg;
1068
1069    MD_ASSERT(CH(beg) == _T('<'));
1070
1071    if(off + 4 >= lines[0].end)
1072        return FALSE;
1073    if(CH(off+1) != _T('!')  ||  CH(off+2) != _T('-')  ||  CH(off+3) != _T('-'))
1074        return FALSE;
1075
1076    off += 2;
1077
1078    return md_scan_for_html_closer(ctx, _T("-->"), 3,
1079                lines, n_lines, off, max_end, p_end, &ctx->html_comment_horizon);
1080}
1081
1082static int
1083md_is_html_processing_instruction(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, OFF beg, OFF max_end, OFF* p_end)
1084{
1085    OFF off = beg;
1086
1087    if(off + 2 >= lines[0].end)
1088        return FALSE;
1089    if(CH(off+1) != _T('?'))
1090        return FALSE;
1091    off += 2;
1092
1093    return md_scan_for_html_closer(ctx, _T("?>"), 2,
1094                lines, n_lines, off, max_end, p_end, &ctx->html_proc_instr_horizon);
1095}
1096
1097static int
1098md_is_html_declaration(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, OFF beg, OFF max_end, OFF* p_end)
1099{
1100    OFF off = beg;
1101
1102    if(off + 2 >= lines[0].end)
1103        return FALSE;
1104    if(CH(off+1) != _T('!'))
1105        return FALSE;
1106    off += 2;
1107
1108    if(off >= lines[0].end  ||  !ISALPHA(off))
1109        return FALSE;
1110    off++;
1111    while(off < lines[0].end  &&  ISALPHA(off))
1112        off++;
1113
1114    return md_scan_for_html_closer(ctx, _T(">"), 1,
1115                lines, n_lines, off, max_end, p_end, &ctx->html_decl_horizon);
1116}
1117
1118static int
1119md_is_html_cdata(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, OFF beg, OFF max_end, OFF* p_end)
1120{
1121    static const CHAR open_str[] = _T("<![CDATA[");
1122    static const SZ open_size = SIZEOF_ARRAY(open_str) - 1;
1123
1124    OFF off = beg;
1125
1126    if(off + open_size >= lines[0].end)
1127        return FALSE;
1128    if(memcmp(STR(off), open_str, open_size) != 0)
1129        return FALSE;
1130    off += open_size;
1131
1132    return md_scan_for_html_closer(ctx, _T("]]>"), 3,
1133                lines, n_lines, off, max_end, p_end, &ctx->html_cdata_horizon);
1134}
1135
1136static int
1137md_is_html_any(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, OFF beg, OFF max_end, OFF* p_end)
1138{
1139    MD_ASSERT(CH(beg) == _T('<'));
1140    return (md_is_html_tag(ctx, lines, n_lines, beg, max_end, p_end)  ||
1141            md_is_html_comment(ctx, lines, n_lines, beg, max_end, p_end)  ||
1142            md_is_html_processing_instruction(ctx, lines, n_lines, beg, max_end, p_end)  ||
1143            md_is_html_declaration(ctx, lines, n_lines, beg, max_end, p_end)  ||
1144            md_is_html_cdata(ctx, lines, n_lines, beg, max_end, p_end));
1145}
1146
1147static int
1148md_is_hex_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
1149{
1150    OFF off = beg;
1151    MD_UNUSED(ctx);
1152
1153    while(off < max_end  &&  ISXDIGIT_(text[off])  &&  off - beg <= 8)
1154        off++;
1155
1156    if(1 <= off - beg  &&  off - beg <= 6) {
1157        *p_end = off;
1158        return TRUE;
1159    } else {
1160        return FALSE;
1161    }
1162}
1163
1164static int
1165md_is_dec_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
1166{
1167    OFF off = beg;
1168    MD_UNUSED(ctx);
1169
1170    while(off < max_end  &&  ISDIGIT_(text[off])  &&  off - beg <= 8)
1171        off++;
1172
1173    if(1 <= off - beg  &&  off - beg <= 7) {
1174        *p_end = off;
1175        return TRUE;
1176    } else {
1177        return FALSE;
1178    }
1179}
1180
1181static int
1182md_is_named_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
1183{
1184    OFF off = beg;
1185    MD_UNUSED(ctx);
1186
1187    if(off < max_end  &&  ISALPHA_(text[off]))
1188        off++;
1189    else
1190        return FALSE;
1191
1192    while(off < max_end  &&  ISALNUM_(text[off])  &&  off - beg <= 48)
1193        off++;
1194
1195    if(2 <= off - beg  &&  off - beg <= 48) {
1196        *p_end = off;
1197        return TRUE;
1198    } else {
1199        return FALSE;
1200    }
1201}
1202
1203static int
1204md_is_entity_str(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
1205{
1206    int is_contents;
1207    OFF off = beg;
1208
1209    MD_ASSERT(text[off] == _T('&'));
1210    off++;
1211
1212    if(off+2 < max_end  &&  text[off] == _T('#')  &&  (text[off+1] == _T('x') || text[off+1] == _T('X')))
1213        is_contents = md_is_hex_entity_contents(ctx, text, off+2, max_end, &off);
1214    else if(off+1 < max_end  &&  text[off] == _T('#'))
1215        is_contents = md_is_dec_entity_contents(ctx, text, off+1, max_end, &off);
1216    else
1217        is_contents = md_is_named_entity_contents(ctx, text, off, max_end, &off);
1218
1219    if(is_contents  &&  off < max_end  &&  text[off] == _T(';')) {
1220        *p_end = off+1;
1221        return TRUE;
1222    } else {
1223        return FALSE;
1224    }
1225}
1226
1227static inline int
1228md_is_entity(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end)
1229{
1230    return md_is_entity_str(ctx, ctx->text, beg, max_end, p_end);
1231}
1232
1233typedef struct MD_ATTRIBUTE_BUILD_tag MD_ATTRIBUTE_BUILD;
1234struct MD_ATTRIBUTE_BUILD_tag {
1235    CHAR* text;
1236    MD_TEXTTYPE* substr_types;
1237    OFF* substr_offsets;
1238    int substr_count;
1239    int substr_alloc;
1240    MD_TEXTTYPE trivial_types[1];
1241    OFF trivial_offsets[2];
1242};
1243
1244#define MD_BUILD_ATTR_NO_ESCAPES    0x0001
1245
1246static int
1247md_build_attr_append_substr(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build,
1248                            MD_TEXTTYPE type, OFF off)
1249{
1250    if(build->substr_count >= build->substr_alloc) {
1251        MD_TEXTTYPE* new_substr_types;
1252        OFF* new_substr_offsets;
1253
1254        build->substr_alloc = (build->substr_alloc > 0
1255                ? build->substr_alloc + build->substr_alloc / 2
1256                : 8);
1257        new_substr_types = (MD_TEXTTYPE*) realloc(build->substr_types,
1258                                    build->substr_alloc * sizeof(MD_TEXTTYPE));
1259        if(new_substr_types == NULL) {
1260            MD_LOG("realloc() failed.");
1261            return -1;
1262        }
1263
1264        new_substr_offsets = (OFF*) realloc(build->substr_offsets,
1265                                    (build->substr_alloc+1) * sizeof(OFF));
1266        if(new_substr_offsets == NULL) {
1267            MD_LOG("realloc() failed.");
1268            free(new_substr_types);
1269            return -1;
1270        }
1271
1272        build->substr_types = new_substr_types;
1273        build->substr_offsets = new_substr_offsets;
1274    }
1275
1276    build->substr_types[build->substr_count] = type;
1277    build->substr_offsets[build->substr_count] = off;
1278    build->substr_count++;
1279    return 0;
1280}
1281
1282static void
1283md_free_attribute(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build)
1284{
1285    MD_UNUSED(ctx);
1286
1287    if(build->substr_alloc > 0) {
1288        free(build->text);
1289        free(build->substr_types);
1290        free(build->substr_offsets);
1291    }
1292}
1293
1294static int
1295md_build_attribute(MD_CTX* ctx, const CHAR* raw_text, SZ raw_size,
1296                   unsigned flags, MD_ATTRIBUTE* attr, MD_ATTRIBUTE_BUILD* build)
1297{
1298    OFF raw_off, off;
1299    int is_trivial;
1300    int ret = 0;
1301
1302    memset(build, 0, sizeof(MD_ATTRIBUTE_BUILD));
1303
1304    is_trivial = TRUE;
1305    for(raw_off = 0; raw_off < raw_size; raw_off++) {
1306        if(ISANYOF3_(raw_text[raw_off], _T('\\'), _T('&'), _T('\0'))) {
1307            is_trivial = FALSE;
1308            break;
1309        }
1310    }
1311
1312    if(is_trivial) {
1313        build->text = (CHAR*) (raw_size ? raw_text : NULL);
1314        build->substr_types = build->trivial_types;
1315        build->substr_offsets = build->trivial_offsets;
1316        build->substr_count = 1;
1317        build->substr_alloc = 0;
1318        build->trivial_types[0] = MD_TEXT_NORMAL;
1319        build->trivial_offsets[0] = 0;
1320        build->trivial_offsets[1] = raw_size;
1321        off = raw_size;
1322    } else {
1323        build->text = (CHAR*) malloc(raw_size * sizeof(CHAR));
1324        if(build->text == NULL) {
1325            MD_LOG("malloc() failed.");
1326            goto abort;
1327        }
1328
1329        raw_off = 0;
1330        off = 0;
1331
1332        while(raw_off < raw_size) {
1333            if(raw_text[raw_off] == _T('\0')) {
1334                MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_NULLCHAR, off));
1335                memcpy(build->text + off, raw_text + raw_off, 1);
1336                off++;
1337                raw_off++;
1338                continue;
1339            }
1340
1341            if(raw_text[raw_off] == _T('&')) {
1342                OFF ent_end;
1343
1344                if(md_is_entity_str(ctx, raw_text, raw_off, raw_size, &ent_end)) {
1345                    MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_ENTITY, off));
1346                    memcpy(build->text + off, raw_text + raw_off, ent_end - raw_off);
1347                    off += ent_end - raw_off;
1348                    raw_off = ent_end;
1349                    continue;
1350                }
1351            }
1352
1353            if(build->substr_count == 0  ||  build->substr_types[build->substr_count-1] != MD_TEXT_NORMAL)
1354                MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_NORMAL, off));
1355
1356            if(!(flags & MD_BUILD_ATTR_NO_ESCAPES)  &&
1357               raw_text[raw_off] == _T('\\')  &&  raw_off+1 < raw_size  &&
1358               (ISPUNCT_(raw_text[raw_off+1]) || ISNEWLINE_(raw_text[raw_off+1])))
1359                raw_off++;
1360
1361            build->text[off++] = raw_text[raw_off++];
1362        }
1363        build->substr_offsets[build->substr_count] = off;
1364    }
1365
1366    attr->text = build->text;
1367    attr->size = off;
1368    attr->substr_offsets = build->substr_offsets;
1369    attr->substr_types = build->substr_types;
1370    return 0;
1371
1372abort:
1373    md_free_attribute(ctx, build);
1374    return -1;
1375}
1376
1377#define MD_FNV1A_BASE       2166136261U
1378#define MD_FNV1A_PRIME      16777619U
1379
1380static inline unsigned
1381md_fnv1a(unsigned base, const void* data, size_t n)
1382{
1383    const unsigned char* buf = (const unsigned char*) data;
1384    unsigned hash = base;
1385    size_t i;
1386
1387    for(i = 0; i < n; i++) {
1388        hash ^= buf[i];
1389        hash *= MD_FNV1A_PRIME;
1390    }
1391
1392    return hash;
1393}
1394
1395struct MD_REF_DEF_tag {
1396    CHAR* label;
1397    CHAR* title;
1398    unsigned hash;
1399    SZ label_size;
1400    SZ title_size;
1401    OFF dest_beg;
1402    OFF dest_end;
1403    unsigned char label_needs_free : 1;
1404    unsigned char title_needs_free : 1;
1405};
1406
1407static unsigned
1408md_link_label_hash(const CHAR* label, SZ size)
1409{
1410    unsigned hash = MD_FNV1A_BASE;
1411    OFF off;
1412    unsigned codepoint;
1413    int is_whitespace = FALSE;
1414
1415    off = md_skip_unicode_whitespace(label, 0, size);
1416    while(off < size) {
1417        SZ char_size;
1418
1419        codepoint = md_decode_unicode(label, off, size, &char_size);
1420        is_whitespace = ISUNICODEWHITESPACE_(codepoint) || ISNEWLINE_(label[off]);
1421
1422        if(is_whitespace) {
1423            codepoint = ' ';
1424            hash = md_fnv1a(hash, &codepoint, sizeof(unsigned));
1425            off = md_skip_unicode_whitespace(label, off, size);
1426        } else {
1427            MD_UNICODE_FOLD_INFO fold_info;
1428
1429            md_get_unicode_fold_info(codepoint, &fold_info);
1430            hash = md_fnv1a(hash, fold_info.codepoints, fold_info.n_codepoints * sizeof(unsigned));
1431            off += char_size;
1432        }
1433    }
1434
1435    return hash;
1436}
1437
1438static OFF
1439md_link_label_cmp_load_fold_info(const CHAR* label, OFF off, SZ size,
1440                                 MD_UNICODE_FOLD_INFO* fold_info)
1441{
1442    unsigned codepoint;
1443    SZ char_size;
1444
1445    if(off >= size) {
1446
1447        goto whitespace;
1448    }
1449
1450    codepoint = md_decode_unicode(label, off, size, &char_size);
1451    off += char_size;
1452    if(ISUNICODEWHITESPACE_(codepoint)) {
1453
1454        goto whitespace;
1455    }
1456
1457    md_get_unicode_fold_info(codepoint, fold_info);
1458    return off;
1459
1460whitespace:
1461    fold_info->codepoints[0] = _T(' ');
1462    fold_info->n_codepoints = 1;
1463    return md_skip_unicode_whitespace(label, off, size);
1464}
1465
1466static int
1467md_link_label_cmp(const CHAR* a_label, SZ a_size, const CHAR* b_label, SZ b_size)
1468{
1469    OFF a_off;
1470    OFF b_off;
1471    MD_UNICODE_FOLD_INFO a_fi = { { 0 }, 0 };
1472    MD_UNICODE_FOLD_INFO b_fi = { { 0 }, 0 };
1473    OFF a_fi_off = 0;
1474    OFF b_fi_off = 0;
1475    int cmp;
1476
1477    a_off = md_skip_unicode_whitespace(a_label, 0, a_size);
1478    b_off = md_skip_unicode_whitespace(b_label, 0, b_size);
1479    while(a_off < a_size || a_fi_off < a_fi.n_codepoints ||
1480          b_off < b_size || b_fi_off < b_fi.n_codepoints)
1481    {
1482
1483        if(a_fi_off >= a_fi.n_codepoints) {
1484            a_fi_off = 0;
1485            a_off = md_link_label_cmp_load_fold_info(a_label, a_off, a_size, &a_fi);
1486        }
1487        if(b_fi_off >= b_fi.n_codepoints) {
1488            b_fi_off = 0;
1489            b_off = md_link_label_cmp_load_fold_info(b_label, b_off, b_size, &b_fi);
1490        }
1491
1492        cmp = b_fi.codepoints[b_fi_off] - a_fi.codepoints[a_fi_off];
1493        if(cmp != 0)
1494            return cmp;
1495
1496        a_fi_off++;
1497        b_fi_off++;
1498    }
1499
1500    return 0;
1501}
1502
1503typedef struct MD_REF_DEF_LIST_tag MD_REF_DEF_LIST;
1504struct MD_REF_DEF_LIST_tag {
1505    int n_ref_defs;
1506    int alloc_ref_defs;
1507    MD_REF_DEF* ref_defs[];
1508};
1509
1510static int
1511md_ref_def_cmp(const void* a, const void* b)
1512{
1513    const MD_REF_DEF* a_ref = *(const MD_REF_DEF**)a;
1514    const MD_REF_DEF* b_ref = *(const MD_REF_DEF**)b;
1515
1516    if(a_ref->hash < b_ref->hash)
1517        return -1;
1518    else if(a_ref->hash > b_ref->hash)
1519        return +1;
1520    else
1521        return md_link_label_cmp(a_ref->label, a_ref->label_size, b_ref->label, b_ref->label_size);
1522}
1523
1524static int
1525md_ref_def_cmp_for_sort(const void* a, const void* b)
1526{
1527    int cmp;
1528
1529    cmp = md_ref_def_cmp(a, b);
1530
1531    if(cmp == 0) {
1532        const MD_REF_DEF* a_ref = *(const MD_REF_DEF**)a;
1533        const MD_REF_DEF* b_ref = *(const MD_REF_DEF**)b;
1534
1535        if(a_ref < b_ref)
1536            cmp = -1;
1537        else if(a_ref > b_ref)
1538            cmp = +1;
1539        else
1540            cmp = 0;
1541    }
1542
1543    return cmp;
1544}
1545
1546static int
1547md_build_ref_def_hashtable(MD_CTX* ctx)
1548{
1549    int i, j;
1550
1551    if(ctx->n_ref_defs == 0)
1552        return 0;
1553
1554    ctx->ref_def_hashtable_size = (ctx->n_ref_defs * 5) / 4;
1555    ctx->ref_def_hashtable = malloc(ctx->ref_def_hashtable_size * sizeof(void*));
1556    if(ctx->ref_def_hashtable == NULL) {
1557        MD_LOG("malloc() failed.");
1558        goto abort;
1559    }
1560    memset(ctx->ref_def_hashtable, 0, ctx->ref_def_hashtable_size * sizeof(void*));
1561
1562    for(i = 0; i < ctx->n_ref_defs; i++) {
1563        MD_REF_DEF* def = &ctx->ref_defs[i];
1564        void* bucket;
1565        MD_REF_DEF_LIST* list;
1566
1567        def->hash = md_link_label_hash(def->label, def->label_size);
1568        bucket = ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size];
1569
1570        if(bucket == NULL) {
1571
1572            ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = def;
1573            continue;
1574        }
1575
1576        if(ctx->ref_defs <= (MD_REF_DEF*) bucket  &&  (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs) {
1577
1578            MD_REF_DEF* old_def = (MD_REF_DEF*) bucket;
1579
1580            if(md_link_label_cmp(def->label, def->label_size, old_def->label, old_def->label_size) == 0) {
1581
1582                continue;
1583            }
1584
1585            list = (MD_REF_DEF_LIST*) malloc(sizeof(MD_REF_DEF_LIST) + 2 * sizeof(MD_REF_DEF*));
1586            if(list == NULL) {
1587                MD_LOG("malloc() failed.");
1588                goto abort;
1589            }
1590            list->ref_defs[0] = old_def;
1591            list->ref_defs[1] = def;
1592            list->n_ref_defs = 2;
1593            list->alloc_ref_defs = 2;
1594            ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = list;
1595            continue;
1596        }
1597
1598        list = (MD_REF_DEF_LIST*) bucket;
1599        if(list->n_ref_defs >= list->alloc_ref_defs) {
1600            int alloc_ref_defs = list->alloc_ref_defs + list->alloc_ref_defs / 2;
1601            MD_REF_DEF_LIST* list_tmp = (MD_REF_DEF_LIST*) realloc(list,
1602                        sizeof(MD_REF_DEF_LIST) + alloc_ref_defs * sizeof(MD_REF_DEF*));
1603            if(list_tmp == NULL) {
1604                MD_LOG("realloc() failed.");
1605                goto abort;
1606            }
1607            list = list_tmp;
1608            list->alloc_ref_defs = alloc_ref_defs;
1609            ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = list;
1610        }
1611
1612        list->ref_defs[list->n_ref_defs] = def;
1613        list->n_ref_defs++;
1614    }
1615
1616    for(i = 0; i < ctx->ref_def_hashtable_size; i++) {
1617        void* bucket = ctx->ref_def_hashtable[i];
1618        MD_REF_DEF_LIST* list;
1619
1620        if(bucket == NULL)
1621            continue;
1622        if(ctx->ref_defs <= (MD_REF_DEF*) bucket  &&  (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs)
1623            continue;
1624
1625        list = (MD_REF_DEF_LIST*) bucket;
1626        qsort(list->ref_defs, list->n_ref_defs, sizeof(MD_REF_DEF*), md_ref_def_cmp_for_sort);
1627
1628        for(j = 1; j < list->n_ref_defs; j++) {
1629            if(md_ref_def_cmp(&list->ref_defs[j-1], &list->ref_defs[j]) == 0)
1630                list->ref_defs[j] = list->ref_defs[j-1];
1631        }
1632    }
1633
1634    return 0;
1635
1636abort:
1637    return -1;
1638}
1639
1640static void
1641md_free_ref_def_hashtable(MD_CTX* ctx)
1642{
1643    if(ctx->ref_def_hashtable != NULL) {
1644        int i;
1645
1646        for(i = 0; i < ctx->ref_def_hashtable_size; i++) {
1647            void* bucket = ctx->ref_def_hashtable[i];
1648            if(bucket == NULL)
1649                continue;
1650            if(ctx->ref_defs <= (MD_REF_DEF*) bucket  &&  (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs)
1651                continue;
1652            free(bucket);
1653        }
1654
1655        free(ctx->ref_def_hashtable);
1656    }
1657}
1658
1659static const MD_REF_DEF*
1660md_lookup_ref_def(MD_CTX* ctx, const CHAR* label, SZ label_size)
1661{
1662    unsigned hash;
1663    void* bucket;
1664
1665    if(ctx->ref_def_hashtable_size == 0)
1666        return NULL;
1667
1668    hash = md_link_label_hash(label, label_size);
1669    bucket = ctx->ref_def_hashtable[hash % ctx->ref_def_hashtable_size];
1670
1671    if(bucket == NULL) {
1672        return NULL;
1673    } else if(ctx->ref_defs <= (MD_REF_DEF*) bucket  &&  (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs) {
1674        const MD_REF_DEF* def = (MD_REF_DEF*) bucket;
1675
1676        if(md_link_label_cmp(def->label, def->label_size, label, label_size) == 0)
1677            return def;
1678        else
1679            return NULL;
1680    } else {
1681        MD_REF_DEF_LIST* list = (MD_REF_DEF_LIST*) bucket;
1682        MD_REF_DEF key_buf;
1683        const MD_REF_DEF* key = &key_buf;
1684        const MD_REF_DEF** ret;
1685
1686        key_buf.label = (CHAR*) label;
1687        key_buf.label_size = label_size;
1688        key_buf.hash = md_link_label_hash(key_buf.label, key_buf.label_size);
1689
1690        ret = (const MD_REF_DEF**) bsearch(&key, list->ref_defs,
1691                    list->n_ref_defs, sizeof(MD_REF_DEF*), md_ref_def_cmp);
1692        if(ret != NULL)
1693            return *ret;
1694        else
1695            return NULL;
1696    }
1697}
1698
1699typedef struct MD_LINK_ATTR_tag MD_LINK_ATTR;
1700struct MD_LINK_ATTR_tag {
1701    OFF dest_beg;
1702    OFF dest_end;
1703
1704    CHAR* title;
1705    SZ title_size;
1706    int title_needs_free;
1707};
1708
1709static int
1710md_is_link_label(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, OFF beg,
1711                 OFF* p_end, MD_SIZE* p_beg_line_index, MD_SIZE* p_end_line_index,
1712                 OFF* p_contents_beg, OFF* p_contents_end)
1713{
1714    OFF off = beg;
1715    OFF contents_beg = 0;
1716    OFF contents_end = 0;
1717    MD_SIZE line_index = 0;
1718    int len = 0;
1719
1720    *p_beg_line_index = 0;
1721
1722    if(CH(off) != _T('['))
1723        return FALSE;
1724    off++;
1725
1726    while(1) {
1727        OFF line_end = lines[line_index].end;
1728
1729        while(off < line_end) {
1730            if(CH(off) == _T('\\')  &&  off+1 < ctx->size  &&  (ISPUNCT(off+1) || ISNEWLINE(off+1))) {
1731                if(contents_end == 0) {
1732                    contents_beg = off;
1733                    *p_beg_line_index = line_index;
1734                }
1735                contents_end = off + 2;
1736                off += 2;
1737            } else if(CH(off) == _T('[')) {
1738                return FALSE;
1739            } else if(CH(off) == _T(']')) {
1740                if(contents_beg < contents_end) {
1741
1742                    *p_contents_beg = contents_beg;
1743                    *p_contents_end = contents_end;
1744                    *p_end = off+1;
1745                    *p_end_line_index = line_index;
1746                    return TRUE;
1747                } else {
1748
1749                    return FALSE;
1750                }
1751            } else {
1752                unsigned codepoint;
1753                SZ char_size;
1754
1755                codepoint = md_decode_unicode(ctx->text, off, ctx->size, &char_size);
1756                if(!ISUNICODEWHITESPACE_(codepoint)) {
1757                    if(contents_end == 0) {
1758                        contents_beg = off;
1759                        *p_beg_line_index = line_index;
1760                    }
1761                    contents_end = off + char_size;
1762                }
1763
1764                off += char_size;
1765            }
1766
1767            len++;
1768            if(len > 999)
1769                return FALSE;
1770        }
1771
1772        line_index++;
1773        len++;
1774        if(line_index < n_lines)
1775            off = lines[line_index].beg;
1776        else
1777            break;
1778    }
1779
1780    return FALSE;
1781}
1782
1783static int
1784md_is_link_destination_A(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
1785                         OFF* p_contents_beg, OFF* p_contents_end)
1786{
1787    OFF off = beg;
1788
1789    if(off >= max_end  ||  CH(off) != _T('<'))
1790        return FALSE;
1791    off++;
1792
1793    while(off < max_end) {
1794        if(CH(off) == _T('\\')  &&  off+1 < max_end  &&  ISPUNCT(off+1)) {
1795            off += 2;
1796            continue;
1797        }
1798
1799        if(ISNEWLINE(off)  ||  CH(off) == _T('<'))
1800            return FALSE;
1801
1802        if(CH(off) == _T('>')) {
1803
1804            *p_contents_beg = beg+1;
1805            *p_contents_end = off;
1806            *p_end = off+1;
1807            return TRUE;
1808        }
1809
1810        off++;
1811    }
1812
1813    return FALSE;
1814}
1815
1816static int
1817md_is_link_destination_B(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
1818                         OFF* p_contents_beg, OFF* p_contents_end)
1819{
1820    OFF off = beg;
1821    int parenthesis_level = 0;
1822
1823    while(off < max_end) {
1824        if(CH(off) == _T('\\')  &&  off+1 < max_end  &&  ISPUNCT(off+1)) {
1825            off += 2;
1826            continue;
1827        }
1828
1829        if(ISWHITESPACE(off) || ISCNTRL(off))
1830            break;
1831
1832        if(CH(off) == _T('(')) {
1833            parenthesis_level++;
1834            if(parenthesis_level > 32)
1835                return FALSE;
1836        } else if(CH(off) == _T(')')) {
1837            if(parenthesis_level == 0)
1838                break;
1839            parenthesis_level--;
1840        }
1841
1842        off++;
1843    }
1844
1845    if(parenthesis_level != 0  ||  off == beg)
1846        return FALSE;
1847
1848    *p_contents_beg = beg;
1849    *p_contents_end = off;
1850    *p_end = off;
1851    return TRUE;
1852}
1853
1854static inline int
1855md_is_link_destination(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
1856                       OFF* p_contents_beg, OFF* p_contents_end)
1857{
1858    if(CH(beg) == _T('<'))
1859        return md_is_link_destination_A(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end);
1860    else
1861        return md_is_link_destination_B(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end);
1862}
1863
1864static int
1865md_is_link_title(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, OFF beg,
1866                 OFF* p_end, MD_SIZE* p_beg_line_index, MD_SIZE* p_end_line_index,
1867                 OFF* p_contents_beg, OFF* p_contents_end)
1868{
1869    OFF off = beg;
1870    CHAR closer_char;
1871    MD_SIZE line_index = 0;
1872
1873    while(off < lines[line_index].end  &&  ISWHITESPACE(off))
1874        off++;
1875    if(off >= lines[line_index].end) {
1876        line_index++;
1877        if(line_index >= n_lines)
1878            return FALSE;
1879        off = lines[line_index].beg;
1880    }
1881    if(off == beg)
1882        return FALSE;
1883
1884    *p_beg_line_index = line_index;
1885
1886    switch(CH(off)) {
1887        case _T('"'):   closer_char = _T('"'); break;
1888        case _T('\''):  closer_char = _T('\''); break;
1889        case _T('('):   closer_char = _T(')'); break;
1890        default:        return FALSE;
1891    }
1892    off++;
1893
1894    *p_contents_beg = off;
1895
1896    while(line_index < n_lines) {
1897        OFF line_end = lines[line_index].end;
1898
1899        while(off < line_end) {
1900            if(CH(off) == _T('\\')  &&  off+1 < ctx->size  &&  (ISPUNCT(off+1) || ISNEWLINE(off+1))) {
1901                off++;
1902            } else if(CH(off) == closer_char) {
1903
1904                *p_contents_end = off;
1905                *p_end = off+1;
1906                *p_end_line_index = line_index;
1907                return TRUE;
1908            } else if(closer_char == _T(')')  &&  CH(off) == _T('(')) {
1909
1910                return FALSE;
1911            }
1912
1913            off++;
1914        }
1915
1916        line_index++;
1917    }
1918
1919    return FALSE;
1920}
1921
1922static int
1923md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines)
1924{
1925    OFF label_contents_beg;
1926    OFF label_contents_end;
1927    MD_SIZE label_contents_line_index;
1928    int label_is_multiline = FALSE;
1929    OFF dest_contents_beg;
1930    OFF dest_contents_end;
1931    OFF title_contents_beg;
1932    OFF title_contents_end;
1933    MD_SIZE title_contents_line_index;
1934    int title_is_multiline = FALSE;
1935    OFF off;
1936    MD_SIZE line_index = 0;
1937    MD_SIZE tmp_line_index;
1938    MD_REF_DEF* def = NULL;
1939    int ret = 0;
1940
1941    if(!md_is_link_label(ctx, lines, n_lines, lines[0].beg,
1942                &off, &label_contents_line_index, &line_index,
1943                &label_contents_beg, &label_contents_end))
1944        return FALSE;
1945    label_is_multiline = (label_contents_line_index != line_index);
1946
1947    if(off >= lines[line_index].end  ||  CH(off) != _T(':'))
1948        return FALSE;
1949    off++;
1950
1951    while(off < lines[line_index].end  &&  ISWHITESPACE(off))
1952        off++;
1953    if(off >= lines[line_index].end) {
1954        line_index++;
1955        if(line_index >= n_lines)
1956            return FALSE;
1957        off = lines[line_index].beg;
1958    }
1959
1960    if(!md_is_link_destination(ctx, off, lines[line_index].end,
1961                &off, &dest_contents_beg, &dest_contents_end))
1962        return FALSE;
1963
1964    if(md_is_link_title(ctx, lines + line_index, n_lines - line_index, off,
1965                &off, &title_contents_line_index, &tmp_line_index,
1966                &title_contents_beg, &title_contents_end)
1967        &&  off >= lines[line_index + tmp_line_index].end)
1968    {
1969        title_is_multiline = (tmp_line_index != title_contents_line_index);
1970        title_contents_line_index += line_index;
1971        line_index += tmp_line_index;
1972    } else {
1973
1974        title_is_multiline = FALSE;
1975        title_contents_beg = off;
1976        title_contents_end = off;
1977        title_contents_line_index = 0;
1978    }
1979
1980    if(off < lines[line_index].end)
1981        return FALSE;
1982
1983    if(ctx->n_ref_defs >= ctx->alloc_ref_defs) {
1984        MD_REF_DEF* new_defs;
1985
1986        ctx->alloc_ref_defs = (ctx->alloc_ref_defs > 0
1987                ? ctx->alloc_ref_defs + ctx->alloc_ref_defs / 2
1988                : 16);
1989        new_defs = (MD_REF_DEF*) realloc(ctx->ref_defs, ctx->alloc_ref_defs * sizeof(MD_REF_DEF));
1990        if(new_defs == NULL) {
1991            MD_LOG("realloc() failed.");
1992            goto abort;
1993        }
1994
1995        ctx->ref_defs = new_defs;
1996    }
1997    def = &ctx->ref_defs[ctx->n_ref_defs];
1998    memset(def, 0, sizeof(MD_REF_DEF));
1999
2000    if(label_is_multiline) {
2001        MD_CHECK(md_merge_lines_alloc(ctx, label_contents_beg, label_contents_end,
2002                    lines + label_contents_line_index, n_lines - label_contents_line_index,
2003                    _T(' '), &def->label, &def->label_size));
2004        def->label_needs_free = TRUE;
2005    } else {
2006        def->label = (CHAR*) STR(label_contents_beg);
2007        def->label_size = label_contents_end - label_contents_beg;
2008    }
2009
2010    if(title_is_multiline) {
2011        MD_CHECK(md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end,
2012                    lines + title_contents_line_index, n_lines - title_contents_line_index,
2013                    _T('\n'), &def->title, &def->title_size));
2014        def->title_needs_free = TRUE;
2015    } else {
2016        def->title = (CHAR*) STR(title_contents_beg);
2017        def->title_size = title_contents_end - title_contents_beg;
2018    }
2019
2020    def->dest_beg = dest_contents_beg;
2021    def->dest_end = dest_contents_end;
2022
2023    ctx->n_ref_defs++;
2024    return line_index + 1;
2025
2026abort:
2027
2028    if(def != NULL  &&  def->label_needs_free)
2029        free(def->label);
2030    if(def != NULL  &&  def->title_needs_free)
2031        free(def->title);
2032    return ret;
2033}
2034
2035static int
2036md_is_link_reference(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines,
2037                     OFF beg, OFF end, MD_LINK_ATTR* attr)
2038{
2039    const MD_REF_DEF* def;
2040    const MD_LINE* beg_line;
2041    int is_multiline;
2042    CHAR* label;
2043    SZ label_size;
2044    int ret = FALSE;
2045
2046    MD_ASSERT(CH(beg) == _T('[') || CH(beg) == _T('!'));
2047    MD_ASSERT(CH(end-1) == _T(']'));
2048
2049    if(ctx->max_ref_def_output == 0)
2050        return FALSE;
2051
2052    beg += (CH(beg) == _T('!') ? 2 : 1);
2053    end--;
2054
2055    beg_line = md_lookup_line(beg, lines, n_lines, NULL);
2056    is_multiline = (end > beg_line->end);
2057
2058    if(is_multiline) {
2059        MD_CHECK(md_merge_lines_alloc(ctx, beg, end, beg_line,
2060                 (int)(n_lines - (beg_line - lines)), _T(' '), &label, &label_size));
2061    } else {
2062        label = (CHAR*) STR(beg);
2063        label_size = end - beg;
2064    }
2065
2066    def = md_lookup_ref_def(ctx, label, label_size);
2067    if(def != NULL) {
2068        attr->dest_beg = def->dest_beg;
2069        attr->dest_end = def->dest_end;
2070        attr->title = def->title;
2071        attr->title_size = def->title_size;
2072        attr->title_needs_free = FALSE;
2073    }
2074
2075    if(is_multiline)
2076        free(label);
2077
2078    if(def != NULL) {
2079
2080        MD_SIZE output_size_estimation = def->label_size + def->title_size + def->dest_end - def->dest_beg;
2081        if(output_size_estimation < ctx->max_ref_def_output) {
2082            ctx->max_ref_def_output -= output_size_estimation;
2083            ret = TRUE;
2084        } else {
2085            MD_LOG("Too many link reference definition instantiations.");
2086            ctx->max_ref_def_output = 0;
2087        }
2088    }
2089
2090abort:
2091    return ret;
2092}
2093
2094static int
2095md_is_inline_link_spec(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines,
2096                       OFF beg, OFF* p_end, MD_LINK_ATTR* attr)
2097{
2098    MD_SIZE line_index = 0;
2099    MD_SIZE tmp_line_index;
2100    OFF title_contents_beg;
2101    OFF title_contents_end;
2102    MD_SIZE title_contents_line_index;
2103    int title_is_multiline;
2104    OFF off = beg;
2105    int ret = FALSE;
2106
2107    md_lookup_line(off, lines, n_lines, &line_index);
2108
2109    MD_ASSERT(CH(off) == _T('('));
2110    off++;
2111
2112    while(off < lines[line_index].end  &&  ISWHITESPACE(off))
2113        off++;
2114    if(off >= lines[line_index].end  &&  (off >= ctx->size  ||  ISNEWLINE(off))) {
2115        line_index++;
2116        if(line_index >= n_lines)
2117            return FALSE;
2118        off = lines[line_index].beg;
2119    }
2120
2121    if(off < ctx->size  &&  CH(off) == _T(')')) {
2122        attr->dest_beg = off;
2123        attr->dest_end = off;
2124        attr->title = NULL;
2125        attr->title_size = 0;
2126        attr->title_needs_free = FALSE;
2127        off++;
2128        *p_end = off;
2129        return TRUE;
2130    }
2131
2132    if(!md_is_link_destination(ctx, off, lines[line_index].end,
2133                        &off, &attr->dest_beg, &attr->dest_end))
2134        return FALSE;
2135
2136    if(md_is_link_title(ctx, lines + line_index, n_lines - line_index, off,
2137                &off, &title_contents_line_index, &tmp_line_index,
2138                &title_contents_beg, &title_contents_end))
2139    {
2140        title_is_multiline = (tmp_line_index != title_contents_line_index);
2141        title_contents_line_index += line_index;
2142        line_index += tmp_line_index;
2143    } else {
2144
2145        title_is_multiline = FALSE;
2146        title_contents_beg = off;
2147        title_contents_end = off;
2148        title_contents_line_index = 0;
2149    }
2150
2151    while(off < lines[line_index].end  &&  ISWHITESPACE(off))
2152        off++;
2153    if(off >= lines[line_index].end) {
2154        line_index++;
2155        if(line_index >= n_lines)
2156            return FALSE;
2157        off = lines[line_index].beg;
2158    }
2159    if(CH(off) != _T(')'))
2160        goto abort;
2161    off++;
2162
2163    if(title_contents_beg >= title_contents_end) {
2164        attr->title = NULL;
2165        attr->title_size = 0;
2166        attr->title_needs_free = FALSE;
2167    } else if(!title_is_multiline) {
2168        attr->title = (CHAR*) STR(title_contents_beg);
2169        attr->title_size = title_contents_end - title_contents_beg;
2170        attr->title_needs_free = FALSE;
2171    } else {
2172        MD_CHECK(md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end,
2173                    lines + title_contents_line_index, n_lines - title_contents_line_index,
2174                    _T('\n'), &attr->title, &attr->title_size));
2175        attr->title_needs_free = TRUE;
2176    }
2177
2178    *p_end = off;
2179    ret = TRUE;
2180
2181abort:
2182    return ret;
2183}
2184
2185static void
2186md_free_ref_defs(MD_CTX* ctx)
2187{
2188    int i;
2189
2190    for(i = 0; i < ctx->n_ref_defs; i++) {
2191        MD_REF_DEF* def = &ctx->ref_defs[i];
2192
2193        if(def->label_needs_free)
2194            free(def->label);
2195        if(def->title_needs_free)
2196            free(def->title);
2197    }
2198
2199    free(ctx->ref_defs);
2200}
2201
2202struct MD_MARK_tag {
2203    OFF beg;
2204    OFF end;
2205
2206    int prev;
2207    int next;
2208    CHAR ch;
2209    unsigned char flags;
2210};
2211
2212#define MD_MARK_POTENTIAL_OPENER            0x01
2213#define MD_MARK_POTENTIAL_CLOSER            0x02
2214#define MD_MARK_OPENER                      0x04
2215#define MD_MARK_CLOSER                      0x08
2216#define MD_MARK_RESOLVED                    0x10
2217
2218#define MD_MARK_EMPH_OC                     0x20
2219#define MD_MARK_EMPH_MOD3_0                 0x40
2220#define MD_MARK_EMPH_MOD3_1                 0x80
2221#define MD_MARK_EMPH_MOD3_2                 (0x40 | 0x80)
2222#define MD_MARK_EMPH_MOD3_MASK              (0x40 | 0x80)
2223#define MD_MARK_AUTOLINK                    0x20
2224#define MD_MARK_AUTOLINK_MISSING_MAILTO     0x40
2225#define MD_MARK_VALIDPERMISSIVEAUTOLINK     0x20
2226#define MD_MARK_HASNESTEDBRACKETS           0x20
2227
2228static MD_MARKSTACK*
2229md_emph_stack(MD_CTX* ctx, MD_CHAR ch, unsigned flags)
2230{
2231    MD_MARKSTACK* stack;
2232
2233    switch(ch) {
2234        case '*':   stack = &ASTERISK_OPENERS_oo_mod3_0; break;
2235        case '_':   stack = &UNDERSCORE_OPENERS_oo_mod3_0; break;
2236        default:    MD_UNREACHABLE();
2237    }
2238
2239    if(flags & MD_MARK_EMPH_OC)
2240        stack += 3;
2241
2242    switch(flags & MD_MARK_EMPH_MOD3_MASK) {
2243        case MD_MARK_EMPH_MOD3_0:   stack += 0; break;
2244        case MD_MARK_EMPH_MOD3_1:   stack += 1; break;
2245        case MD_MARK_EMPH_MOD3_2:   stack += 2; break;
2246        default:                    MD_UNREACHABLE();
2247    }
2248
2249    return stack;
2250}
2251
2252static MD_MARKSTACK*
2253md_opener_stack(MD_CTX* ctx, int mark_index)
2254{
2255    MD_MARK* mark = &ctx->marks[mark_index];
2256
2257    switch(mark->ch) {
2258        case _T('*'):
2259        case _T('_'):   return md_emph_stack(ctx, mark->ch, mark->flags);
2260
2261        case _T('~'):   return (mark->end - mark->beg == 1) ? &TILDE_OPENERS_1 : &TILDE_OPENERS_2;
2262
2263        case _T('!'):
2264        case _T('['):   return &BRACKET_OPENERS;
2265
2266        default:        MD_UNREACHABLE();
2267    }
2268}
2269
2270static MD_MARK*
2271md_add_mark(MD_CTX* ctx)
2272{
2273    if(ctx->n_marks >= ctx->alloc_marks) {
2274        MD_MARK* new_marks;
2275
2276        ctx->alloc_marks = (ctx->alloc_marks > 0
2277                ? ctx->alloc_marks + ctx->alloc_marks / 2
2278                : 64);
2279        new_marks = realloc(ctx->marks, ctx->alloc_marks * sizeof(MD_MARK));
2280        if(new_marks == NULL) {
2281            MD_LOG("realloc() failed.");
2282            return NULL;
2283        }
2284
2285        ctx->marks = new_marks;
2286    }
2287
2288    return &ctx->marks[ctx->n_marks++];
2289}
2290
2291#define ADD_MARK_()                                                     \
2292        do {                                                            \
2293            mark = md_add_mark(ctx);                                    \
2294            if(mark == NULL) {                                          \
2295                ret = -1;                                               \
2296                goto abort;                                             \
2297            }                                                           \
2298        } while(0)
2299
2300#define ADD_MARK(ch_, beg_, end_, flags_)                               \
2301        do {                                                            \
2302            ADD_MARK_();                                                \
2303            mark->beg = (beg_);                                         \
2304            mark->end = (end_);                                         \
2305            mark->prev = -1;                                            \
2306            mark->next = -1;                                            \
2307            mark->ch = (char)(ch_);                                     \
2308            mark->flags = (flags_);                                     \
2309        } while(0)
2310
2311static inline void
2312md_mark_stack_push(MD_CTX* ctx, MD_MARKSTACK* stack, int mark_index)
2313{
2314    ctx->marks[mark_index].next = stack->top;
2315    stack->top = mark_index;
2316}
2317
2318static inline int
2319md_mark_stack_pop(MD_CTX* ctx, MD_MARKSTACK* stack)
2320{
2321    int top = stack->top;
2322    if(top >= 0)
2323        stack->top = ctx->marks[top].next;
2324    return top;
2325}
2326
2327static inline void
2328md_mark_store_ptr(MD_CTX* ctx, int mark_index, void* ptr)
2329{
2330    MD_MARK* mark = &ctx->marks[mark_index];
2331    MD_ASSERT(mark->ch == 'D');
2332
2333    MD_ASSERT(sizeof(void*) <= 2 * sizeof(OFF));
2334    memcpy(mark, &ptr, sizeof(void*));
2335}
2336
2337static inline void*
2338md_mark_get_ptr(MD_CTX* ctx, int mark_index)
2339{
2340    void* ptr;
2341    MD_MARK* mark = &ctx->marks[mark_index];
2342    MD_ASSERT(mark->ch == 'D');
2343    memcpy(&ptr, mark, sizeof(void*));
2344    return ptr;
2345}
2346
2347static inline void
2348md_resolve_range(MD_CTX* ctx, int opener_index, int closer_index)
2349{
2350    MD_MARK* opener = &ctx->marks[opener_index];
2351    MD_MARK* closer = &ctx->marks[closer_index];
2352
2353    opener->next = closer_index;
2354    closer->prev = opener_index;
2355
2356    opener->flags |= MD_MARK_OPENER | MD_MARK_RESOLVED;
2357    closer->flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED;
2358}
2359
2360#define MD_ROLLBACK_CROSSING    0
2361#define MD_ROLLBACK_ALL         1
2362
2363static void
2364md_rollback(MD_CTX* ctx, int opener_index, int closer_index, int how)
2365{
2366    int i;
2367
2368    for(i = 0; i < (int) SIZEOF_ARRAY(ctx->opener_stacks); i++) {
2369        MD_MARKSTACK* stack = &ctx->opener_stacks[i];
2370        while(stack->top >= opener_index)
2371            md_mark_stack_pop(ctx, stack);
2372    }
2373
2374    if(how == MD_ROLLBACK_ALL) {
2375        for(i = opener_index + 1; i < closer_index; i++) {
2376            ctx->marks[i].ch = 'D';
2377            ctx->marks[i].flags = 0;
2378        }
2379    }
2380}
2381
2382static void
2383md_build_mark_char_map(MD_CTX* ctx)
2384{
2385    memset(ctx->mark_char_map, 0, sizeof(ctx->mark_char_map));
2386
2387    ctx->mark_char_map['\\'] = 1;
2388    ctx->mark_char_map['*'] = 1;
2389    ctx->mark_char_map['_'] = 1;
2390    ctx->mark_char_map['`'] = 1;
2391    ctx->mark_char_map['&'] = 1;
2392    ctx->mark_char_map[';'] = 1;
2393    ctx->mark_char_map['<'] = 1;
2394    ctx->mark_char_map['>'] = 1;
2395    ctx->mark_char_map['['] = 1;
2396    ctx->mark_char_map['!'] = 1;
2397    ctx->mark_char_map[']'] = 1;
2398    ctx->mark_char_map['\0'] = 1;
2399
2400    if(ctx->parser.flags & MD_FLAG_STRIKETHROUGH)
2401        ctx->mark_char_map['~'] = 1;
2402
2403    if(ctx->parser.flags & MD_FLAG_LATEXMATHSPANS)
2404        ctx->mark_char_map['$'] = 1;
2405
2406    if(ctx->parser.flags & MD_FLAG_PERMISSIVEEMAILAUTOLINKS)
2407        ctx->mark_char_map['@'] = 1;
2408
2409    if(ctx->parser.flags & MD_FLAG_PERMISSIVEURLAUTOLINKS)
2410        ctx->mark_char_map[':'] = 1;
2411
2412    if(ctx->parser.flags & MD_FLAG_PERMISSIVEWWWAUTOLINKS)
2413        ctx->mark_char_map['.'] = 1;
2414
2415    if((ctx->parser.flags & MD_FLAG_TABLES) || (ctx->parser.flags & MD_FLAG_WIKILINKS))
2416        ctx->mark_char_map['|'] = 1;
2417
2418    if(ctx->parser.flags & MD_FLAG_COLLAPSEWHITESPACE) {
2419        int i;
2420
2421        for(i = 0; i < (int) sizeof(ctx->mark_char_map); i++) {
2422            if(ISWHITESPACE_(i))
2423                ctx->mark_char_map[i] = 1;
2424        }
2425    }
2426}
2427
2428static int
2429md_is_code_span(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, OFF beg,
2430                MD_MARK* opener, MD_MARK* closer,
2431                OFF last_potential_closers[CODESPAN_MARK_MAXLEN],
2432                int* p_reached_paragraph_end)
2433{
2434    OFF opener_beg = beg;
2435    OFF opener_end;
2436    OFF closer_beg;
2437    OFF closer_end;
2438    SZ mark_len;
2439    OFF line_end;
2440    int has_space_after_opener = FALSE;
2441    int has_eol_after_opener = FALSE;
2442    int has_space_before_closer = FALSE;
2443    int has_eol_before_closer = FALSE;
2444    int has_only_space = TRUE;
2445    MD_SIZE line_index = 0;
2446
2447    line_end = lines[0].end;
2448    opener_end = opener_beg;
2449    while(opener_end < line_end  &&  CH(opener_end) == _T('`'))
2450        opener_end++;
2451    has_space_after_opener = (opener_end < line_end && CH(opener_end) == _T(' '));
2452    has_eol_after_opener = (opener_end == line_end);
2453
2454    opener->end = opener_end;
2455
2456    mark_len = opener_end - opener_beg;
2457    if(mark_len > CODESPAN_MARK_MAXLEN)
2458        return FALSE;
2459
2460    if(last_potential_closers[mark_len-1] >= lines[n_lines-1].end  ||
2461       (*p_reached_paragraph_end  &&  last_potential_closers[mark_len-1] < opener_end))
2462        return FALSE;
2463
2464    closer_beg = opener_end;
2465    closer_end = opener_end;
2466
2467    while(TRUE) {
2468        while(closer_beg < line_end  &&  CH(closer_beg) != _T('`')) {
2469            if(CH(closer_beg) != _T(' '))
2470                has_only_space = FALSE;
2471            closer_beg++;
2472        }
2473        closer_end = closer_beg;
2474        while(closer_end < line_end  &&  CH(closer_end) == _T('`'))
2475            closer_end++;
2476
2477        if(closer_end - closer_beg == mark_len) {
2478
2479            has_space_before_closer = (closer_beg > lines[line_index].beg && CH(closer_beg-1) == _T(' '));
2480            has_eol_before_closer = (closer_beg == lines[line_index].beg);
2481            break;
2482        }
2483
2484        if(closer_end - closer_beg > 0) {
2485
2486            has_only_space = FALSE;
2487
2488            if(closer_end - closer_beg < CODESPAN_MARK_MAXLEN) {
2489                if(closer_beg > last_potential_closers[closer_end - closer_beg - 1])
2490                    last_potential_closers[closer_end - closer_beg - 1] = closer_beg;
2491            }
2492        }
2493
2494        if(closer_end >= line_end) {
2495            line_index++;
2496            if(line_index >= n_lines) {
2497
2498                *p_reached_paragraph_end = TRUE;
2499                return FALSE;
2500            }
2501
2502            line_end = lines[line_index].end;
2503            closer_beg = lines[line_index].beg;
2504        } else {
2505            closer_beg = closer_end;
2506        }
2507    }
2508
2509    if(!has_only_space  &&
2510       (has_space_after_opener || has_eol_after_opener)  &&
2511       (has_space_before_closer || has_eol_before_closer))
2512    {
2513        if(has_space_after_opener)
2514            opener_end++;
2515        else
2516            opener_end = lines[1].beg;
2517
2518        if(has_space_before_closer)
2519            closer_beg--;
2520        else {
2521
2522            closer_beg = lines[line_index-1].end;
2523
2524            while(closer_beg < ctx->size  &&  ISBLANK(closer_beg))
2525                closer_beg++;
2526        }
2527    }
2528
2529    opener->ch = _T('`');
2530    opener->beg = opener_beg;
2531    opener->end = opener_end;
2532    opener->flags = MD_MARK_POTENTIAL_OPENER;
2533    closer->ch = _T('`');
2534    closer->beg = closer_beg;
2535    closer->end = closer_end;
2536    closer->flags = MD_MARK_POTENTIAL_CLOSER;
2537    return TRUE;
2538}
2539
2540static int
2541md_is_autolink_uri(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end)
2542{
2543    OFF off = beg+1;
2544
2545    MD_ASSERT(CH(beg) == _T('<'));
2546
2547    if(off >= max_end  ||  !ISASCII(off))
2548        return FALSE;
2549    off++;
2550    while(1) {
2551        if(off >= max_end)
2552            return FALSE;
2553        if(off - beg > 32)
2554            return FALSE;
2555        if(CH(off) == _T(':')  &&  off - beg >= 3)
2556            break;
2557        if(!ISALNUM(off) && CH(off) != _T('+') && CH(off) != _T('-') && CH(off) != _T('.'))
2558            return FALSE;
2559        off++;
2560    }
2561
2562    while(off < max_end  &&  CH(off) != _T('>')) {
2563        if(ISWHITESPACE(off) || ISCNTRL(off) || CH(off) == _T('<'))
2564            return FALSE;
2565        off++;
2566    }
2567
2568    if(off >= max_end)
2569        return FALSE;
2570
2571    MD_ASSERT(CH(off) == _T('>'));
2572    *p_end = off+1;
2573    return TRUE;
2574}
2575
2576static int
2577md_is_autolink_email(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end)
2578{
2579    OFF off = beg + 1;
2580    int label_len;
2581
2582    MD_ASSERT(CH(beg) == _T('<'));
2583
2584    while(off < max_end  &&  (ISALNUM(off) || ISANYOF(off, _T(".!#$%&'*+/=?^_`{|}~-"))))
2585        off++;
2586    if(off <= beg+1)
2587        return FALSE;
2588
2589    if(off >= max_end  ||  CH(off) != _T('@'))
2590        return FALSE;
2591    off++;
2592
2593    label_len = 0;
2594    while(off < max_end) {
2595        if(ISALNUM(off))
2596            label_len++;
2597        else if(CH(off) == _T('-')  &&  label_len > 0)
2598            label_len++;
2599        else if(CH(off) == _T('.')  &&  label_len > 0  &&  CH(off-1) != _T('-'))
2600            label_len = 0;
2601        else
2602            break;
2603
2604        if(label_len > 63)
2605            return FALSE;
2606
2607        off++;
2608    }
2609
2610    if(label_len <= 0  || off >= max_end  ||  CH(off) != _T('>') ||  CH(off-1) == _T('-'))
2611        return FALSE;
2612
2613    *p_end = off+1;
2614    return TRUE;
2615}
2616
2617static int
2618md_is_autolink(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, int* p_missing_mailto)
2619{
2620    if(md_is_autolink_uri(ctx, beg, max_end, p_end)) {
2621        *p_missing_mailto = FALSE;
2622        return TRUE;
2623    }
2624
2625    if(md_is_autolink_email(ctx, beg, max_end, p_end)) {
2626        *p_missing_mailto = TRUE;
2627        return TRUE;
2628    }
2629
2630    return FALSE;
2631}
2632
2633static int
2634md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, int table_mode)
2635{
2636    MD_SIZE line_index;
2637    int ret = 0;
2638    MD_MARK* mark;
2639    OFF codespan_last_potential_closers[CODESPAN_MARK_MAXLEN] = { 0 };
2640    int codespan_scanned_till_paragraph_end = FALSE;
2641
2642    for(line_index = 0; line_index < n_lines; line_index++) {
2643        const MD_LINE* line = &lines[line_index];
2644        OFF off = line->beg;
2645
2646        while(TRUE) {
2647            CHAR ch;
2648
2649#ifdef MD4C_USE_UTF16
2650
2651    #define IS_MARK_CHAR(off)   ((CH(off) < SIZEOF_ARRAY(ctx->mark_char_map))  &&  \
2652                                (ctx->mark_char_map[(unsigned char) CH(off)]))
2653#else
2654
2655    #define IS_MARK_CHAR(off)   (ctx->mark_char_map[(unsigned char) CH(off)])
2656#endif
2657
2658            while(off + 3 < line->end  &&  !IS_MARK_CHAR(off+0)  &&  !IS_MARK_CHAR(off+1)
2659                                       &&  !IS_MARK_CHAR(off+2)  &&  !IS_MARK_CHAR(off+3))
2660                off += 4;
2661            while(off < line->end  &&  !IS_MARK_CHAR(off+0))
2662                off++;
2663
2664            if(off >= line->end)
2665                break;
2666
2667            ch = CH(off);
2668
2669            if(ch == _T('\\')  &&  off+1 < ctx->size  &&  (ISPUNCT(off+1) || ISNEWLINE(off+1))) {
2670
2671                if(!ISNEWLINE(off+1)  ||  line_index+1 < n_lines)
2672                    ADD_MARK(ch, off, off+2, MD_MARK_RESOLVED);
2673                off += 2;
2674                continue;
2675            }
2676
2677            if(ch == _T('*')  ||  ch == _T('_')) {
2678                OFF tmp = off+1;
2679                int left_level;
2680                int right_level;
2681
2682                while(tmp < line->end  &&  CH(tmp) == ch)
2683                    tmp++;
2684
2685                if(off == line->beg  ||  ISUNICODEWHITESPACEBEFORE(off))
2686                    left_level = 0;
2687                else if(ISUNICODEPUNCTBEFORE(off))
2688                    left_level = 1;
2689                else
2690                    left_level = 2;
2691
2692                if(tmp == line->end  ||  ISUNICODEWHITESPACE(tmp))
2693                    right_level = 0;
2694                else if(ISUNICODEPUNCT(tmp))
2695                    right_level = 1;
2696                else
2697                    right_level = 2;
2698
2699                if(ch == _T('_')  &&  left_level == 2  &&  right_level == 2) {
2700                    left_level = 0;
2701                    right_level = 0;
2702                }
2703
2704                if(left_level != 0  ||  right_level != 0) {
2705                    unsigned flags = 0;
2706
2707                    if(left_level > 0  &&  left_level >= right_level)
2708                        flags |= MD_MARK_POTENTIAL_CLOSER;
2709                    if(right_level > 0  &&  right_level >= left_level)
2710                        flags |= MD_MARK_POTENTIAL_OPENER;
2711                    if(flags == (MD_MARK_POTENTIAL_OPENER | MD_MARK_POTENTIAL_CLOSER))
2712                        flags |= MD_MARK_EMPH_OC;
2713
2714                    switch((tmp - off) % 3) {
2715                        case 0: flags |= MD_MARK_EMPH_MOD3_0; break;
2716                        case 1: flags |= MD_MARK_EMPH_MOD3_1; break;
2717                        case 2: flags |= MD_MARK_EMPH_MOD3_2; break;
2718                    }
2719
2720                    ADD_MARK(ch, off, tmp, flags);
2721
2722                    off++;
2723                    while(off < tmp) {
2724                        ADD_MARK('D', off, off, 0);
2725                        off++;
2726                    }
2727                    continue;
2728                }
2729
2730                off = tmp;
2731                continue;
2732            }
2733
2734            if(ch == _T('`')) {
2735                MD_MARK opener;
2736                MD_MARK closer;
2737                int is_code_span;
2738
2739                is_code_span = md_is_code_span(ctx, line, n_lines - line_index, off,
2740                            &opener, &closer, codespan_last_potential_closers,
2741                            &codespan_scanned_till_paragraph_end);
2742                if(is_code_span) {
2743                    ADD_MARK(opener.ch, opener.beg, opener.end, opener.flags);
2744                    ADD_MARK(closer.ch, closer.beg, closer.end, closer.flags);
2745                    md_resolve_range(ctx, ctx->n_marks-2, ctx->n_marks-1);
2746                    off = closer.end;
2747
2748                    if(off > line->end)
2749                        line = md_lookup_line(off, lines, n_lines, &line_index);
2750                    continue;
2751                }
2752
2753                off = opener.end;
2754                continue;
2755            }
2756
2757            if(ch == _T('&')) {
2758                ADD_MARK(ch, off, off+1, MD_MARK_POTENTIAL_OPENER);
2759                off++;
2760                continue;
2761            }
2762
2763            if(ch == _T(';')) {
2764
2765                if(ctx->n_marks > 0  &&  ctx->marks[ctx->n_marks-1].ch == _T('&'))
2766                    ADD_MARK(ch, off, off+1, MD_MARK_POTENTIAL_CLOSER);
2767
2768                off++;
2769                continue;
2770            }
2771
2772            if(ch == _T('<')) {
2773                int is_autolink;
2774                OFF autolink_end;
2775                int missing_mailto;
2776
2777                if(!(ctx->parser.flags & MD_FLAG_NOHTMLSPANS)) {
2778                    int is_html;
2779                    OFF html_end;
2780
2781                    is_html = md_is_html_any(ctx, line, n_lines - line_index, off,
2782                                    lines[n_lines-1].end, &html_end);
2783                    if(is_html) {
2784                        ADD_MARK(_T('<'), off, off, MD_MARK_OPENER | MD_MARK_RESOLVED);
2785                        ADD_MARK(_T('>'), html_end, html_end, MD_MARK_CLOSER | MD_MARK_RESOLVED);
2786                        ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1;
2787                        ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2;
2788                        off = html_end;
2789
2790                        if(off > line->end)
2791                            line = md_lookup_line(off, lines, n_lines, &line_index);
2792                        continue;
2793                    }
2794                }
2795
2796                is_autolink = md_is_autolink(ctx, off, lines[n_lines-1].end,
2797                                    &autolink_end, &missing_mailto);
2798                if(is_autolink) {
2799                    unsigned flags = MD_MARK_RESOLVED | MD_MARK_AUTOLINK;
2800                    if(missing_mailto)
2801                        flags |= MD_MARK_AUTOLINK_MISSING_MAILTO;
2802
2803                    ADD_MARK(_T('<'), off, off+1, MD_MARK_OPENER | flags);
2804                    ADD_MARK(_T('>'), autolink_end-1, autolink_end, MD_MARK_CLOSER | flags);
2805                    ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1;
2806                    ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2;
2807                    off = autolink_end;
2808                    continue;
2809                }
2810
2811                off++;
2812                continue;
2813            }
2814
2815            if(ch == _T('[')  ||  (ch == _T('!') && off+1 < line->end && CH(off+1) == _T('['))) {
2816                OFF tmp = (ch == _T('[') ? off+1 : off+2);
2817                ADD_MARK(ch, off, tmp, MD_MARK_POTENTIAL_OPENER);
2818                off = tmp;
2819
2820                ADD_MARK('D', off, off, 0);
2821                ADD_MARK('D', off, off, 0);
2822                continue;
2823            }
2824            if(ch == _T(']')) {
2825                ADD_MARK(ch, off, off+1, MD_MARK_POTENTIAL_CLOSER);
2826                off++;
2827                continue;
2828            }
2829
2830            if(ch == _T('@')) {
2831                if(line->beg + 1 <= off  &&  ISALNUM(off-1)  &&
2832                    off + 3 < line->end  &&  ISALNUM(off+1))
2833                {
2834                    ADD_MARK(ch, off, off+1, MD_MARK_POTENTIAL_OPENER);
2835
2836                    ADD_MARK('D', line->beg, line->end, 0);
2837                }
2838
2839                off++;
2840                continue;
2841            }
2842
2843            if(ch == _T(':')) {
2844                static struct {
2845                    const CHAR* scheme;
2846                    SZ scheme_size;
2847                    const CHAR* suffix;
2848                    SZ suffix_size;
2849                } scheme_map[] = {
2850
2851                    { _T("http"), 4,    _T("//"), 2 },
2852                    { _T("https"), 5,   _T("//"), 2 },
2853                    { _T("ftp"), 3,     _T("//"), 2 }
2854                };
2855                int scheme_index;
2856
2857                for(scheme_index = 0; scheme_index < (int) SIZEOF_ARRAY(scheme_map); scheme_index++) {
2858                    const CHAR* scheme = scheme_map[scheme_index].scheme;
2859                    const SZ scheme_size = scheme_map[scheme_index].scheme_size;
2860                    const CHAR* suffix = scheme_map[scheme_index].suffix;
2861                    const SZ suffix_size = scheme_map[scheme_index].suffix_size;
2862
2863                    if(line->beg + scheme_size <= off  &&  md_ascii_eq(STR(off-scheme_size), scheme, scheme_size)  &&
2864                        off + 1 + suffix_size < line->end  &&  md_ascii_eq(STR(off+1), suffix, suffix_size))
2865                    {
2866                        ADD_MARK(ch, off-scheme_size, off+1+suffix_size, MD_MARK_POTENTIAL_OPENER);
2867
2868                        ADD_MARK('D', line->beg, line->end, 0);
2869                        off += 1 + suffix_size;
2870                        break;
2871                    }
2872                }
2873
2874                off++;
2875                continue;
2876            }
2877
2878            if(ch == _T('.')) {
2879                if(line->beg + 3 <= off  &&  md_ascii_eq(STR(off-3), _T("www"), 3)  &&
2880                   (off-3 == line->beg || ISUNICODEWHITESPACEBEFORE(off-3) || ISUNICODEPUNCTBEFORE(off-3)))
2881                {
2882                    ADD_MARK(ch, off-3, off+1, MD_MARK_POTENTIAL_OPENER);
2883
2884                    ADD_MARK('D', line->beg, line->end, 0);
2885                    off++;
2886                    continue;
2887                }
2888
2889                off++;
2890                continue;
2891            }
2892
2893            if((table_mode || ctx->parser.flags & MD_FLAG_WIKILINKS) && ch == _T('|')) {
2894                ADD_MARK(ch, off, off+1, 0);
2895                off++;
2896                continue;
2897            }
2898
2899            if(ch == _T('$') || ch == _T('~')) {
2900                OFF tmp = off+1;
2901
2902                while(tmp < line->end && CH(tmp) == ch)
2903                    tmp++;
2904
2905                if(tmp - off <= 2) {
2906                    unsigned flags = MD_MARK_POTENTIAL_OPENER | MD_MARK_POTENTIAL_CLOSER;
2907
2908                    if(off > line->beg  &&  !ISUNICODEWHITESPACEBEFORE(off)  &&  !ISUNICODEPUNCTBEFORE(off))
2909                        flags &= ~MD_MARK_POTENTIAL_OPENER;
2910                    if(tmp < line->end  &&  !ISUNICODEWHITESPACE(tmp)  &&  !ISUNICODEPUNCT(tmp))
2911                        flags &= ~MD_MARK_POTENTIAL_CLOSER;
2912                    if(flags != 0)
2913                        ADD_MARK(ch, off, tmp, flags);
2914                }
2915
2916                off = tmp;
2917                continue;
2918            }
2919
2920            if(ISWHITESPACE_(ch)) {
2921                OFF tmp = off+1;
2922
2923                while(tmp < line->end  &&  ISWHITESPACE(tmp))
2924                    tmp++;
2925
2926                if(tmp - off > 1  ||  ch != _T(' '))
2927                    ADD_MARK(ch, off, tmp, MD_MARK_RESOLVED);
2928
2929                off = tmp;
2930                continue;
2931            }
2932
2933            if(ch == _T('\0')) {
2934                ADD_MARK(ch, off, off+1, MD_MARK_RESOLVED);
2935                off++;
2936                continue;
2937            }
2938
2939            off++;
2940        }
2941    }
2942
2943    ADD_MARK(127, ctx->size, ctx->size, MD_MARK_RESOLVED);
2944
2945abort:
2946    return ret;
2947}
2948
2949static void
2950md_analyze_bracket(MD_CTX* ctx, int mark_index)
2951{
2952
2953    MD_MARK* mark = &ctx->marks[mark_index];
2954
2955    if(mark->flags & MD_MARK_POTENTIAL_OPENER) {
2956        if(BRACKET_OPENERS.top >= 0)
2957            ctx->marks[BRACKET_OPENERS.top].flags |= MD_MARK_HASNESTEDBRACKETS;
2958
2959        md_mark_stack_push(ctx, &BRACKET_OPENERS, mark_index);
2960        return;
2961    }
2962
2963    if(BRACKET_OPENERS.top >= 0) {
2964        int opener_index = md_mark_stack_pop(ctx, &BRACKET_OPENERS);
2965        MD_MARK* opener = &ctx->marks[opener_index];
2966
2967        opener->next = mark_index;
2968        mark->prev = opener_index;
2969
2970        if(ctx->unresolved_link_tail >= 0)
2971            ctx->marks[ctx->unresolved_link_tail].prev = opener_index;
2972        else
2973            ctx->unresolved_link_head = opener_index;
2974        ctx->unresolved_link_tail = opener_index;
2975        opener->prev = -1;
2976    }
2977}
2978
2979static void md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines,
2980                                     int mark_beg, int mark_end);
2981
2982static int
2983md_resolve_links(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines)
2984{
2985    int opener_index = ctx->unresolved_link_head;
2986    OFF last_link_beg = 0;
2987    OFF last_link_end = 0;
2988    OFF last_img_beg = 0;
2989    OFF last_img_end = 0;
2990
2991    while(opener_index >= 0) {
2992        MD_MARK* opener = &ctx->marks[opener_index];
2993        int closer_index = opener->next;
2994        MD_MARK* closer = &ctx->marks[closer_index];
2995        int next_index = opener->prev;
2996        MD_MARK* next_opener;
2997        MD_MARK* next_closer;
2998        MD_LINK_ATTR attr;
2999        int is_link = FALSE;
3000
3001        if(next_index >= 0) {
3002            next_opener = &ctx->marks[next_index];
3003            next_closer = &ctx->marks[next_opener->next];
3004        } else {
3005            next_opener = NULL;
3006            next_closer = NULL;
3007        }
3008
3009        if((opener->beg < last_link_beg  &&  closer->end < last_link_end)  ||
3010           (opener->beg < last_img_beg  &&  closer->end < last_img_end)  ||
3011           (opener->beg < last_link_end  &&  opener->ch == '['))
3012        {
3013            opener_index = next_index;
3014            continue;
3015        }
3016
3017        if ((ctx->parser.flags & MD_FLAG_WIKILINKS) &&
3018            (opener->end - opener->beg == 1) &&
3019            next_opener != NULL &&
3020            next_opener->ch == '[' &&
3021            (next_opener->beg == opener->beg - 1) &&
3022            (next_opener->end - next_opener->beg == 1) &&
3023            next_closer != NULL &&
3024            next_closer->ch == ']' &&
3025            (next_closer->beg == closer->beg + 1) &&
3026            (next_closer->end - next_closer->beg == 1))
3027        {
3028            MD_MARK* delim = NULL;
3029            int delim_index;
3030            OFF dest_beg, dest_end;
3031
3032            is_link = TRUE;
3033
3034            delim_index = opener_index + 1;
3035            while(delim_index < closer_index) {
3036                MD_MARK* m = &ctx->marks[delim_index];
3037                if(m->ch == '|') {
3038                    delim = m;
3039                    break;
3040                }
3041                if(m->ch != 'D') {
3042                    if(m->beg - opener->end > 100)
3043                        break;
3044                    if(m->ch != 'D'  &&  (m->flags & MD_MARK_OPENER))
3045                        delim_index = m->next;
3046                }
3047                delim_index++;
3048            }
3049
3050            dest_beg = opener->end;
3051            dest_end = (delim != NULL) ? delim->beg : closer->beg;
3052            if(dest_end - dest_beg == 0 || dest_end - dest_beg > 100)
3053                is_link = FALSE;
3054
3055            if(is_link) {
3056                OFF off;
3057                for(off = dest_beg; off < dest_end; off++) {
3058                    if(ISNEWLINE(off)) {
3059                        is_link = FALSE;
3060                        break;
3061                    }
3062                }
3063            }
3064
3065            if(is_link) {
3066                if(delim != NULL) {
3067                    if(delim->end < closer->beg) {
3068                        md_rollback(ctx, opener_index, delim_index, MD_ROLLBACK_ALL);
3069                        md_rollback(ctx, delim_index, closer_index, MD_ROLLBACK_CROSSING);
3070                        delim->flags |= MD_MARK_RESOLVED;
3071                        opener->end = delim->beg;
3072                    } else {
3073
3074                        md_rollback(ctx, opener_index, closer_index, MD_ROLLBACK_ALL);
3075                        closer->beg = delim->beg;
3076                        delim = NULL;
3077                    }
3078                }
3079
3080                opener->beg = next_opener->beg;
3081                opener->next = closer_index;
3082                opener->flags |= MD_MARK_OPENER | MD_MARK_RESOLVED;
3083
3084                closer->end = next_closer->end;
3085                closer->prev = opener_index;
3086                closer->flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED;
3087
3088                last_link_beg = opener->beg;
3089                last_link_end = closer->end;
3090
3091                if(delim != NULL)
3092                    md_analyze_link_contents(ctx, lines, n_lines, delim_index+1, closer_index);
3093
3094                opener_index = next_opener->prev;
3095                continue;
3096            }
3097        }
3098
3099        if(next_opener != NULL  &&  next_opener->beg == closer->end) {
3100            if(next_closer->beg > closer->end + 1) {
3101
3102                if(!(next_opener->flags & MD_MARK_HASNESTEDBRACKETS))
3103                    is_link = md_is_link_reference(ctx, lines, n_lines, next_opener->beg, next_closer->end, &attr);
3104            } else {
3105
3106                if(!(opener->flags & MD_MARK_HASNESTEDBRACKETS))
3107                    is_link = md_is_link_reference(ctx, lines, n_lines, opener->beg, closer->end, &attr);
3108            }
3109
3110            if(is_link < 0)
3111                return -1;
3112
3113            if(is_link) {
3114
3115                closer->end = next_closer->end;
3116
3117                next_index = ctx->marks[next_index].prev;
3118            }
3119        } else {
3120            if(closer->end < ctx->size  &&  CH(closer->end) == _T('(')) {
3121
3122                OFF inline_link_end = UINT_MAX;
3123
3124                is_link = md_is_inline_link_spec(ctx, lines, n_lines, closer->end, &inline_link_end, &attr);
3125                if(is_link < 0)
3126                    return -1;
3127
3128                if(is_link) {
3129                    int i = closer_index + 1;
3130
3131                    while(i < ctx->n_marks) {
3132                        MD_MARK* mark = &ctx->marks[i];
3133
3134                        if(mark->beg >= inline_link_end)
3135                            break;
3136                        if((mark->flags & (MD_MARK_OPENER | MD_MARK_RESOLVED)) == (MD_MARK_OPENER | MD_MARK_RESOLVED)) {
3137                            if(ctx->marks[mark->next].beg >= inline_link_end) {
3138
3139                                if(attr.title_needs_free)
3140                                    free(attr.title);
3141                                is_link = FALSE;
3142                                break;
3143                            }
3144
3145                            i = mark->next + 1;
3146                        } else {
3147                            i++;
3148                        }
3149                    }
3150                }
3151
3152                if(is_link) {
3153
3154                    closer->end = inline_link_end;
3155                }
3156            }
3157
3158            if(!is_link) {
3159
3160                if(!(opener->flags & MD_MARK_HASNESTEDBRACKETS))
3161                    is_link = md_is_link_reference(ctx, lines, n_lines, opener->beg, closer->end, &attr);
3162                if(is_link < 0)
3163                    return -1;
3164            }
3165        }
3166
3167        if(is_link) {
3168
3169            opener->flags |= MD_MARK_OPENER | MD_MARK_RESOLVED;
3170            closer->flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED;
3171
3172            MD_ASSERT(ctx->marks[opener_index+1].ch == 'D');
3173            ctx->marks[opener_index+1].beg = attr.dest_beg;
3174            ctx->marks[opener_index+1].end = attr.dest_end;
3175
3176            MD_ASSERT(ctx->marks[opener_index+2].ch == 'D');
3177            md_mark_store_ptr(ctx, opener_index+2, attr.title);
3178
3179            if(attr.title_needs_free)
3180                md_mark_stack_push(ctx, &ctx->ptr_stack, opener_index+2);
3181            ctx->marks[opener_index+2].prev = attr.title_size;
3182
3183            if(opener->ch == '[') {
3184                last_link_beg = opener->beg;
3185                last_link_end = closer->end;
3186            } else {
3187                last_img_beg = opener->beg;
3188                last_img_end = closer->end;
3189            }
3190
3191            md_analyze_link_contents(ctx, lines, n_lines, opener_index+1, closer_index);
3192
3193            if(ctx->parser.flags & MD_FLAG_PERMISSIVEAUTOLINKS) {
3194                MD_MARK* first_nested;
3195                MD_MARK* last_nested;
3196
3197                first_nested = opener + 1;
3198                while(first_nested->ch == _T('D')  &&  first_nested < closer)
3199                    first_nested++;
3200
3201                last_nested = closer - 1;
3202                while(first_nested->ch == _T('D')  &&  last_nested > opener)
3203                    last_nested--;
3204
3205                if((first_nested->flags & MD_MARK_RESOLVED)  &&
3206                   first_nested->beg == opener->end  &&
3207                   ISANYOF_(first_nested->ch, _T("@:."))  &&
3208                   first_nested->next == (last_nested - ctx->marks)  &&
3209                   last_nested->end == closer->beg)
3210                {
3211                    first_nested->ch = _T('D');
3212                    first_nested->flags &= ~MD_MARK_RESOLVED;
3213                    last_nested->ch = _T('D');
3214                    last_nested->flags &= ~MD_MARK_RESOLVED;
3215                }
3216            }
3217        }
3218
3219        opener_index = next_index;
3220    }
3221
3222    return 0;
3223}
3224
3225static void
3226md_analyze_entity(MD_CTX* ctx, int mark_index)
3227{
3228    MD_MARK* opener = &ctx->marks[mark_index];
3229    MD_MARK* closer;
3230    OFF off;
3231
3232    if(mark_index + 1 >= ctx->n_marks)
3233        return;
3234    closer = &ctx->marks[mark_index+1];
3235    if(closer->ch != ';')
3236        return;
3237
3238    if(md_is_entity(ctx, opener->beg, closer->end, &off)) {
3239        MD_ASSERT(off == closer->end);
3240
3241        md_resolve_range(ctx, mark_index, mark_index+1);
3242        opener->end = closer->end;
3243    }
3244}
3245
3246static void
3247md_analyze_table_cell_boundary(MD_CTX* ctx, int mark_index)
3248{
3249    MD_MARK* mark = &ctx->marks[mark_index];
3250    mark->flags |= MD_MARK_RESOLVED;
3251    mark->next = -1;
3252
3253    if(ctx->table_cell_boundaries_head < 0)
3254        ctx->table_cell_boundaries_head = mark_index;
3255    else
3256        ctx->marks[ctx->table_cell_boundaries_tail].next = mark_index;
3257    ctx->table_cell_boundaries_tail = mark_index;
3258    ctx->n_table_cell_boundaries++;
3259}
3260
3261static int
3262md_split_emph_mark(MD_CTX* ctx, int mark_index, SZ n)
3263{
3264    MD_MARK* mark = &ctx->marks[mark_index];
3265    int new_mark_index = mark_index + (mark->end - mark->beg - n);
3266    MD_MARK* dummy = &ctx->marks[new_mark_index];
3267
3268    MD_ASSERT(mark->end - mark->beg > n);
3269    MD_ASSERT(dummy->ch == 'D');
3270
3271    memcpy(dummy, mark, sizeof(MD_MARK));
3272    mark->end -= n;
3273    dummy->beg = mark->end;
3274
3275    return new_mark_index;
3276}
3277
3278static void
3279md_analyze_emph(MD_CTX* ctx, int mark_index)
3280{
3281    MD_MARK* mark = &ctx->marks[mark_index];
3282
3283    if(mark->flags & MD_MARK_POTENTIAL_CLOSER) {
3284        MD_MARK* opener = NULL;
3285        int opener_index = 0;
3286        MD_MARKSTACK* opener_stacks[6];
3287        int i, n_opener_stacks;
3288        unsigned flags = mark->flags;
3289
3290        n_opener_stacks = 0;
3291
3292        opener_stacks[n_opener_stacks++] = md_emph_stack(ctx, mark->ch, MD_MARK_EMPH_MOD3_0 | MD_MARK_EMPH_OC);
3293        if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2)
3294            opener_stacks[n_opener_stacks++] = md_emph_stack(ctx, mark->ch, MD_MARK_EMPH_MOD3_1 | MD_MARK_EMPH_OC);
3295        if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_1)
3296            opener_stacks[n_opener_stacks++] = md_emph_stack(ctx, mark->ch, MD_MARK_EMPH_MOD3_2 | MD_MARK_EMPH_OC);
3297        opener_stacks[n_opener_stacks++] = md_emph_stack(ctx, mark->ch, MD_MARK_EMPH_MOD3_0);
3298        if(!(flags & MD_MARK_EMPH_OC)  ||  (flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2)
3299            opener_stacks[n_opener_stacks++] = md_emph_stack(ctx, mark->ch, MD_MARK_EMPH_MOD3_1);
3300        if(!(flags & MD_MARK_EMPH_OC)  ||  (flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_1)
3301            opener_stacks[n_opener_stacks++] = md_emph_stack(ctx, mark->ch, MD_MARK_EMPH_MOD3_2);
3302
3303        for(i = 0; i < n_opener_stacks; i++) {
3304            if(opener_stacks[i]->top >= 0) {
3305                int m_index = opener_stacks[i]->top;
3306                MD_MARK* m = &ctx->marks[m_index];
3307
3308                if(opener == NULL  ||  m->end > opener->end) {
3309                    opener_index = m_index;
3310                    opener = m;
3311                }
3312            }
3313        }
3314
3315        if(opener != NULL) {
3316            SZ opener_size = opener->end - opener->beg;
3317            SZ closer_size = mark->end - mark->beg;
3318            MD_MARKSTACK* stack = md_opener_stack(ctx, opener_index);
3319
3320            if(opener_size > closer_size) {
3321                opener_index = md_split_emph_mark(ctx, opener_index, closer_size);
3322                md_mark_stack_push(ctx, stack, opener_index);
3323            } else if(opener_size < closer_size) {
3324                md_split_emph_mark(ctx, mark_index, closer_size - opener_size);
3325            }
3326
3327            md_mark_stack_pop(ctx, stack);
3328
3329            md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_CROSSING);
3330            md_resolve_range(ctx, opener_index, mark_index);
3331            return;
3332        }
3333    }
3334
3335    if(mark->flags & MD_MARK_POTENTIAL_OPENER)
3336        md_mark_stack_push(ctx, md_emph_stack(ctx, mark->ch, mark->flags), mark_index);
3337}
3338
3339static void
3340md_analyze_tilde(MD_CTX* ctx, int mark_index)
3341{
3342    MD_MARK* mark = &ctx->marks[mark_index];
3343    MD_MARKSTACK* stack = md_opener_stack(ctx, mark_index);
3344
3345    if((mark->flags & MD_MARK_POTENTIAL_CLOSER)  &&  stack->top >= 0) {
3346        int opener_index = stack->top;
3347
3348        md_mark_stack_pop(ctx, stack);
3349        md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_CROSSING);
3350        md_resolve_range(ctx, opener_index, mark_index);
3351        return;
3352    }
3353
3354    if(mark->flags & MD_MARK_POTENTIAL_OPENER)
3355        md_mark_stack_push(ctx, stack, mark_index);
3356}
3357
3358static void
3359md_analyze_dollar(MD_CTX* ctx, int mark_index)
3360{
3361    MD_MARK* mark = &ctx->marks[mark_index];
3362
3363    if((mark->flags & MD_MARK_POTENTIAL_CLOSER)  &&  DOLLAR_OPENERS.top >= 0) {
3364
3365        MD_MARK* opener = &ctx->marks[DOLLAR_OPENERS.top];
3366        int opener_index = DOLLAR_OPENERS.top;
3367        MD_MARK* closer = mark;
3368        int closer_index = mark_index;
3369
3370        if(opener->end - opener->beg == closer->end - closer->beg) {
3371
3372            md_mark_stack_pop(ctx, &DOLLAR_OPENERS);
3373            md_rollback(ctx, opener_index, closer_index, MD_ROLLBACK_ALL);
3374            md_resolve_range(ctx, opener_index, closer_index);
3375
3376            DOLLAR_OPENERS.top = -1;
3377            return;
3378        }
3379    }
3380
3381    if(mark->flags & MD_MARK_POTENTIAL_OPENER)
3382        md_mark_stack_push(ctx, &DOLLAR_OPENERS, mark_index);
3383}
3384
3385static MD_MARK*
3386md_scan_left_for_resolved_mark(MD_CTX* ctx, MD_MARK* mark_from, OFF off, MD_MARK** p_cursor)
3387{
3388    MD_MARK* mark;
3389
3390    for(mark = mark_from; mark >= ctx->marks; mark--) {
3391        if(mark->ch == 'D'  ||  mark->beg > off)
3392            continue;
3393        if(mark->beg <= off  &&  off < mark->end  &&  (mark->flags & MD_MARK_RESOLVED)) {
3394            if(p_cursor != NULL)
3395                *p_cursor = mark;
3396            return mark;
3397        }
3398        if(mark->end <= off)
3399            break;
3400    }
3401
3402    if(p_cursor != NULL)
3403        *p_cursor = mark;
3404    return NULL;
3405}
3406
3407static MD_MARK*
3408md_scan_right_for_resolved_mark(MD_CTX* ctx, MD_MARK* mark_from, OFF off, MD_MARK** p_cursor)
3409{
3410    MD_MARK* mark;
3411
3412    for(mark = mark_from; mark < ctx->marks + ctx->n_marks; mark++) {
3413        if(mark->ch == 'D'  ||  mark->end <= off)
3414            continue;
3415        if(mark->beg <= off  &&  off < mark->end  &&  (mark->flags & MD_MARK_RESOLVED)) {
3416            if(p_cursor != NULL)
3417                *p_cursor = mark;
3418            return mark;
3419        }
3420        if(mark->beg > off)
3421            break;
3422    }
3423
3424    if(p_cursor != NULL)
3425        *p_cursor = mark;
3426    return NULL;
3427}
3428
3429static void
3430md_analyze_permissive_autolink(MD_CTX* ctx, int mark_index)
3431{
3432    static const struct {
3433        const MD_CHAR start_char;
3434        const MD_CHAR delim_char;
3435        const MD_CHAR* allowed_nonalnum_chars;
3436        int min_components;
3437        const MD_CHAR optional_end_char;
3438    } URL_MAP[] = {
3439        { _T('\0'), _T('.'),  _T(".-_"),      2, _T('\0') },
3440        { _T('/'),  _T('/'),  _T("/.-_"),     0, _T('/') },
3441        { _T('?'),  _T('&'),  _T("&.-+_=()"), 1, _T('\0') },
3442        { _T('#'),  _T('\0'), _T(".-+_") ,    1, _T('\0') }
3443    };
3444
3445    MD_MARK* opener = &ctx->marks[mark_index];
3446    MD_MARK* closer = &ctx->marks[mark_index + 1];
3447    OFF line_beg = closer->beg;
3448    OFF line_end = closer->end;
3449    OFF beg = opener->beg;
3450    OFF end = opener->end;
3451    MD_MARK* left_cursor = opener;
3452    int left_boundary_ok = FALSE;
3453    MD_MARK* right_cursor = opener;
3454    int right_boundary_ok = FALSE;
3455    unsigned i;
3456
3457    MD_ASSERT(closer->ch == 'D');
3458
3459    if(opener->ch == '@') {
3460        MD_ASSERT(CH(opener->beg) == _T('@'));
3461
3462        while(beg > line_beg) {
3463            if(ISALNUM(beg-1))
3464                beg--;
3465            else if(beg >= line_beg+2  &&  ISALNUM(beg-2)  &&
3466                        ISANYOF(beg-1, _T(".-_+"))  &&
3467                        md_scan_left_for_resolved_mark(ctx, left_cursor, beg-1, &left_cursor) == NULL  &&
3468                        ISALNUM(beg))
3469                beg--;
3470            else
3471                break;
3472        }
3473        if(beg == opener->beg)
3474            return;
3475    }
3476
3477    if(beg == line_beg  ||  ISUNICODEWHITESPACEBEFORE(beg)  ||  ISANYOF(beg-1, _T("({["))) {
3478        left_boundary_ok = TRUE;
3479    } else if(ISANYOF(beg-1, _T("*_~"))) {
3480        MD_MARK* left_mark;
3481
3482        left_mark = md_scan_left_for_resolved_mark(ctx, left_cursor, beg-1, &left_cursor);
3483        if(left_mark != NULL  &&  (left_mark->flags & MD_MARK_OPENER))
3484            left_boundary_ok = TRUE;
3485    }
3486    if(!left_boundary_ok)
3487        return;
3488
3489    for(i = 0; i < SIZEOF_ARRAY(URL_MAP); i++) {
3490        int n_components = 0;
3491        int n_open_brackets = 0;
3492
3493        if(URL_MAP[i].start_char != _T('\0')) {
3494            if(end >= line_end  ||  CH(end) != URL_MAP[i].start_char)
3495                continue;
3496            if(URL_MAP[i].min_components > 0  &&  (end+1 >= line_end  ||  !ISALNUM(end+1)))
3497                continue;
3498            end++;
3499        }
3500
3501        while(end < line_end) {
3502            if(ISALNUM(end)) {
3503                if(n_components == 0)
3504                    n_components++;
3505                end++;
3506            } else if(end < line_end  &&
3507                        ISANYOF(end, URL_MAP[i].allowed_nonalnum_chars)  &&
3508                        md_scan_right_for_resolved_mark(ctx, right_cursor, end, &right_cursor) == NULL  &&
3509                        ((end > line_beg && (ISALNUM(end-1) || CH(end-1) == _T(')')))  ||  CH(end) == _T('('))  &&
3510                        ((end+1 < line_end && (ISALNUM(end+1) || CH(end+1) == _T('(')))  ||  CH(end) == _T(')')))
3511            {
3512                if(CH(end) == URL_MAP[i].delim_char)
3513                    n_components++;
3514
3515                if(CH(end) == _T('(')) {
3516                    n_open_brackets++;
3517                } else if(CH(end) == _T(')')) {
3518                    if(n_open_brackets <= 0)
3519                        break;
3520                    n_open_brackets--;
3521                }
3522
3523                end++;
3524            } else {
3525                break;
3526            }
3527        }
3528
3529        if(end < line_end  &&  URL_MAP[i].optional_end_char != _T('\0')  &&
3530                CH(end) == URL_MAP[i].optional_end_char)
3531            end++;
3532
3533        if(n_components < URL_MAP[i].min_components  ||  n_open_brackets != 0)
3534            return;
3535
3536        if(opener->ch == '@')
3537            break;
3538    }
3539
3540    if(end == line_end  ||  ISUNICODEWHITESPACE(end)  ||  ISANYOF(end, _T(")}].!?,;"))) {
3541        right_boundary_ok = TRUE;
3542    } else {
3543        MD_MARK* right_mark;
3544
3545        right_mark = md_scan_right_for_resolved_mark(ctx, right_cursor, end, &right_cursor);
3546        if(right_mark != NULL  &&  (right_mark->flags & MD_MARK_CLOSER))
3547            right_boundary_ok = TRUE;
3548    }
3549    if(!right_boundary_ok)
3550        return;
3551
3552    opener->beg = beg;
3553    opener->end = beg;
3554    closer->beg = end;
3555    closer->end = end;
3556    closer->ch = opener->ch;
3557    md_resolve_range(ctx, mark_index, mark_index + 1);
3558}
3559
3560#define MD_ANALYZE_NOSKIP_EMPH  0x01
3561
3562static inline void
3563md_analyze_marks(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines,
3564                 int mark_beg, int mark_end, const CHAR* mark_chars, unsigned flags)
3565{
3566    int i = mark_beg;
3567    OFF last_end = lines[0].beg;
3568
3569    MD_UNUSED(lines);
3570    MD_UNUSED(n_lines);
3571
3572    while(i < mark_end) {
3573        MD_MARK* mark = &ctx->marks[i];
3574
3575        if(mark->flags & MD_MARK_RESOLVED) {
3576            if((mark->flags & MD_MARK_OPENER)  &&
3577               !((flags & MD_ANALYZE_NOSKIP_EMPH) && ISANYOF_(mark->ch, "*_~")))
3578            {
3579                MD_ASSERT(i < mark->next);
3580                i = mark->next + 1;
3581            } else {
3582                i++;
3583            }
3584            continue;
3585        }
3586
3587        if(!ISANYOF_(mark->ch, mark_chars)) {
3588            i++;
3589            continue;
3590        }
3591
3592        if(mark->beg < last_end) {
3593            i++;
3594            continue;
3595        }
3596
3597        switch(mark->ch) {
3598            case '[':
3599            case '!':
3600            case ']':   md_analyze_bracket(ctx, i); break;
3601            case '&':   md_analyze_entity(ctx, i); break;
3602            case '|':   md_analyze_table_cell_boundary(ctx, i); break;
3603            case '_':
3604            case '*':   md_analyze_emph(ctx, i); break;
3605            case '~':   md_analyze_tilde(ctx, i); break;
3606            case '$':   md_analyze_dollar(ctx, i); break;
3607            case '.':
3608            case ':':
3609            case '@':   md_analyze_permissive_autolink(ctx, i); break;
3610        }
3611
3612        if(mark->flags & MD_MARK_RESOLVED) {
3613            if(mark->flags & MD_MARK_OPENER)
3614                last_end = ctx->marks[mark->next].end;
3615            else
3616                last_end = mark->end;
3617        }
3618
3619        i++;
3620    }
3621}
3622
3623static int
3624md_analyze_inlines(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, int table_mode)
3625{
3626    int ret;
3627
3628    ctx->n_marks = 0;
3629
3630    MD_CHECK(md_collect_marks(ctx, lines, n_lines, table_mode));
3631
3632    md_analyze_marks(ctx, lines, n_lines, 0, ctx->n_marks, _T("[]!"), 0);
3633    MD_CHECK(md_resolve_links(ctx, lines, n_lines));
3634    BRACKET_OPENERS.top = -1;
3635    ctx->unresolved_link_head = -1;
3636    ctx->unresolved_link_tail = -1;
3637
3638    if(table_mode) {
3639
3640        MD_ASSERT(n_lines == 1);
3641        ctx->n_table_cell_boundaries = 0;
3642        md_analyze_marks(ctx, lines, n_lines, 0, ctx->n_marks, _T("|"), 0);
3643        return ret;
3644    }
3645
3646    md_analyze_link_contents(ctx, lines, n_lines, 0, ctx->n_marks);
3647
3648abort:
3649    return ret;
3650}
3651
3652static void
3653md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines,
3654                         int mark_beg, int mark_end)
3655{
3656    int i;
3657
3658    md_analyze_marks(ctx, lines, n_lines, mark_beg, mark_end, _T("&"), 0);
3659    md_analyze_marks(ctx, lines, n_lines, mark_beg, mark_end, _T("*_~$"), 0);
3660
3661    if((ctx->parser.flags & MD_FLAG_PERMISSIVEAUTOLINKS) != 0) {
3662
3663        md_analyze_marks(ctx, lines, n_lines, mark_beg, mark_end, _T("@:."), MD_ANALYZE_NOSKIP_EMPH);
3664    }
3665
3666    for(i = 0; i < (int) SIZEOF_ARRAY(ctx->opener_stacks); i++)
3667        ctx->opener_stacks[i].top = -1;
3668}
3669
3670static int
3671md_enter_leave_span_a(MD_CTX* ctx, int enter, MD_SPANTYPE type,
3672                      const CHAR* dest, SZ dest_size, int is_autolink,
3673                      const CHAR* title, SZ title_size)
3674{
3675    MD_ATTRIBUTE_BUILD href_build = { 0 };
3676    MD_ATTRIBUTE_BUILD title_build = { 0 };
3677    MD_SPAN_A_DETAIL det;
3678    int ret = 0;
3679
3680    memset(&det, 0, sizeof(MD_SPAN_A_DETAIL));
3681    MD_CHECK(md_build_attribute(ctx, dest, dest_size,
3682                    (is_autolink ? MD_BUILD_ATTR_NO_ESCAPES : 0),
3683                    &det.href, &href_build));
3684    MD_CHECK(md_build_attribute(ctx, title, title_size, 0, &det.title, &title_build));
3685    det.is_autolink = is_autolink;
3686    if(enter)
3687        MD_ENTER_SPAN(type, &det);
3688    else
3689        MD_LEAVE_SPAN(type, &det);
3690
3691abort:
3692    md_free_attribute(ctx, &href_build);
3693    md_free_attribute(ctx, &title_build);
3694    return ret;
3695}
3696
3697static int
3698md_enter_leave_span_wikilink(MD_CTX* ctx, int enter, const CHAR* target, SZ target_size)
3699{
3700    MD_ATTRIBUTE_BUILD target_build = { 0 };
3701    MD_SPAN_WIKILINK_DETAIL det;
3702    int ret = 0;
3703
3704    memset(&det, 0, sizeof(MD_SPAN_WIKILINK_DETAIL));
3705    MD_CHECK(md_build_attribute(ctx, target, target_size, 0, &det.target, &target_build));
3706
3707    if (enter)
3708        MD_ENTER_SPAN(MD_SPAN_WIKILINK, &det);
3709    else
3710        MD_LEAVE_SPAN(MD_SPAN_WIKILINK, &det);
3711
3712abort:
3713    md_free_attribute(ctx, &target_build);
3714    return ret;
3715}
3716
3717static int
3718md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines)
3719{
3720    MD_TEXTTYPE text_type;
3721    const MD_LINE* line = lines;
3722    MD_MARK* prev_mark = NULL;
3723    MD_MARK* mark;
3724    OFF off = lines[0].beg;
3725    OFF end = lines[n_lines-1].end;
3726    OFF tmp;
3727    int enforce_hardbreak = 0;
3728    int ret = 0;
3729
3730    mark = ctx->marks;
3731    while(!(mark->flags & MD_MARK_RESOLVED))
3732        mark++;
3733
3734    text_type = MD_TEXT_NORMAL;
3735
3736    while(1) {
3737
3738        tmp = (line->end < mark->beg ? line->end : mark->beg);
3739        if(tmp > off) {
3740            MD_TEXT(text_type, STR(off), tmp - off);
3741            off = tmp;
3742        }
3743
3744        if(off >= mark->beg) {
3745            switch(mark->ch) {
3746                case '\\':
3747                    if(ISNEWLINE(mark->beg+1))
3748                        enforce_hardbreak = 1;
3749                    else
3750                        MD_TEXT(text_type, STR(mark->beg+1), 1);
3751                    break;
3752
3753                case ' ':
3754                    MD_TEXT(text_type, _T(" "), 1);
3755                    break;
3756
3757                case '`':
3758                    if(mark->flags & MD_MARK_OPENER) {
3759                        MD_ENTER_SPAN(MD_SPAN_CODE, NULL);
3760                        text_type = MD_TEXT_CODE;
3761                    } else {
3762                        MD_LEAVE_SPAN(MD_SPAN_CODE, NULL);
3763                        text_type = MD_TEXT_NORMAL;
3764                    }
3765                    break;
3766
3767                case '_':
3768                    if(ctx->parser.flags & MD_FLAG_UNDERLINE) {
3769                        if(mark->flags & MD_MARK_OPENER) {
3770                            while(off < mark->end) {
3771                                MD_ENTER_SPAN(MD_SPAN_U, NULL);
3772                                off++;
3773                            }
3774                        } else {
3775                            while(off < mark->end) {
3776                                MD_LEAVE_SPAN(MD_SPAN_U, NULL);
3777                                off++;
3778                            }
3779                        }
3780                        break;
3781                    }
3782                    MD_FALLTHROUGH();
3783
3784                case '*':
3785                    if(mark->flags & MD_MARK_OPENER) {
3786                        if((mark->end - off) % 2) {
3787                            MD_ENTER_SPAN(MD_SPAN_EM, NULL);
3788                            off++;
3789                        }
3790                        while(off + 1 < mark->end) {
3791                            MD_ENTER_SPAN(MD_SPAN_STRONG, NULL);
3792                            off += 2;
3793                        }
3794                    } else {
3795                        while(off + 1 < mark->end) {
3796                            MD_LEAVE_SPAN(MD_SPAN_STRONG, NULL);
3797                            off += 2;
3798                        }
3799                        if((mark->end - off) % 2) {
3800                            MD_LEAVE_SPAN(MD_SPAN_EM, NULL);
3801                            off++;
3802                        }
3803                    }
3804                    break;
3805
3806                case '~':
3807                    if(mark->flags & MD_MARK_OPENER)
3808                        MD_ENTER_SPAN(MD_SPAN_DEL, NULL);
3809                    else
3810                        MD_LEAVE_SPAN(MD_SPAN_DEL, NULL);
3811                    break;
3812
3813                case '$':
3814                    if(mark->flags & MD_MARK_OPENER) {
3815                        MD_ENTER_SPAN((mark->end - off) % 2 ? MD_SPAN_LATEXMATH : MD_SPAN_LATEXMATH_DISPLAY, NULL);
3816                        text_type = MD_TEXT_LATEXMATH;
3817                    } else {
3818                        MD_LEAVE_SPAN((mark->end - off) % 2 ? MD_SPAN_LATEXMATH : MD_SPAN_LATEXMATH_DISPLAY, NULL);
3819                        text_type = MD_TEXT_NORMAL;
3820                    }
3821                    break;
3822
3823                case '[':
3824                case '!':
3825                case ']':
3826                {
3827                    const MD_MARK* opener = (mark->ch != ']' ? mark : &ctx->marks[mark->prev]);
3828                    const MD_MARK* closer = &ctx->marks[opener->next];
3829                    const MD_MARK* dest_mark;
3830                    const MD_MARK* title_mark;
3831
3832                    if ((opener->ch == '[' && closer->ch == ']') &&
3833                        opener->end - opener->beg >= 2 &&
3834                        closer->end - closer->beg >= 2)
3835                    {
3836                        int has_label = (opener->end - opener->beg > 2);
3837                        SZ target_sz;
3838
3839                        if(has_label)
3840                            target_sz = opener->end - (opener->beg+2);
3841                        else
3842                            target_sz = closer->beg - opener->end;
3843
3844                        MD_CHECK(md_enter_leave_span_wikilink(ctx, (mark->ch != ']'),
3845                                 has_label ? STR(opener->beg+2) : STR(opener->end),
3846                                 target_sz));
3847
3848                        break;
3849                    }
3850
3851                    dest_mark = opener+1;
3852                    MD_ASSERT(dest_mark->ch == 'D');
3853                    title_mark = opener+2;
3854                    MD_ASSERT(title_mark->ch == 'D');
3855
3856                    MD_CHECK(md_enter_leave_span_a(ctx, (mark->ch != ']'),
3857                                (opener->ch == '!' ? MD_SPAN_IMG : MD_SPAN_A),
3858                                STR(dest_mark->beg), dest_mark->end - dest_mark->beg, FALSE,
3859                                md_mark_get_ptr(ctx, (int)(title_mark - ctx->marks)),
3860								title_mark->prev));
3861
3862                    if(mark->ch == ']') {
3863                        while(mark->end > line->end)
3864                            line++;
3865                    }
3866
3867                    break;
3868                }
3869
3870                case '<':
3871                case '>':
3872                    if(!(mark->flags & MD_MARK_AUTOLINK)) {
3873
3874                        if(mark->flags & MD_MARK_OPENER)
3875                            text_type = MD_TEXT_HTML;
3876                        else
3877                            text_type = MD_TEXT_NORMAL;
3878                        break;
3879                    }
3880
3881                    MD_FALLTHROUGH();
3882
3883                case '@':
3884                case ':':
3885                case '.':
3886                {
3887                    MD_MARK* opener = ((mark->flags & MD_MARK_OPENER) ? mark : &ctx->marks[mark->prev]);
3888                    MD_MARK* closer = &ctx->marks[opener->next];
3889                    const CHAR* dest = STR(opener->end);
3890                    SZ dest_size = closer->beg - opener->end;
3891
3892                    if(mark->flags & MD_MARK_OPENER)
3893                        closer->flags |= MD_MARK_VALIDPERMISSIVEAUTOLINK;
3894
3895                    if(opener->ch == '@' || opener->ch == '.' ||
3896                        (opener->ch == '<' && (opener->flags & MD_MARK_AUTOLINK_MISSING_MAILTO)))
3897                    {
3898                        dest_size += 7;
3899                        MD_TEMP_BUFFER(dest_size * sizeof(CHAR));
3900                        memcpy(ctx->buffer,
3901                                (opener->ch == '.' ? _T("http://") : _T("mailto:")),
3902                                7 * sizeof(CHAR));
3903                        memcpy(ctx->buffer + 7, dest, (dest_size-7) * sizeof(CHAR));
3904                        dest = ctx->buffer;
3905                    }
3906
3907                    if(closer->flags & MD_MARK_VALIDPERMISSIVEAUTOLINK)
3908                        MD_CHECK(md_enter_leave_span_a(ctx, (mark->flags & MD_MARK_OPENER),
3909                                    MD_SPAN_A, dest, dest_size, TRUE, NULL, 0));
3910                    break;
3911                }
3912
3913                case '&':
3914                    MD_TEXT(MD_TEXT_ENTITY, STR(mark->beg), mark->end - mark->beg);
3915                    break;
3916
3917                case '\0':
3918                    MD_TEXT(MD_TEXT_NULLCHAR, _T(""), 1);
3919                    break;
3920
3921                case 127:
3922                    goto abort;
3923            }
3924
3925            off = mark->end;
3926
3927            prev_mark = mark;
3928            mark++;
3929            while(!(mark->flags & MD_MARK_RESOLVED)  ||  mark->beg < off)
3930                mark++;
3931        }
3932
3933        if(off >= line->end) {
3934
3935            if(off >= end)
3936                break;
3937
3938            if(text_type == MD_TEXT_CODE || text_type == MD_TEXT_LATEXMATH) {
3939                MD_ASSERT(prev_mark != NULL);
3940                MD_ASSERT(ISANYOF2_(prev_mark->ch, '`', '$')  &&  (prev_mark->flags & MD_MARK_OPENER));
3941                MD_ASSERT(ISANYOF2_(mark->ch, '`', '$')  &&  (mark->flags & MD_MARK_CLOSER));
3942
3943                tmp = off;
3944                while(off < ctx->size  &&  ISBLANK(off))
3945                    off++;
3946                if(off > tmp)
3947                    MD_TEXT(text_type, STR(tmp), off-tmp);
3948
3949                if(off == line->end)
3950                    MD_TEXT(text_type, _T(" "), 1);
3951            } else if(text_type == MD_TEXT_HTML) {
3952
3953                tmp = off;
3954                while(tmp < end  &&  ISBLANK(tmp))
3955                    tmp++;
3956                if(tmp > off)
3957                    MD_TEXT(MD_TEXT_HTML, STR(off), tmp - off);
3958                MD_TEXT(MD_TEXT_HTML, _T("\n"), 1);
3959            } else {
3960
3961                MD_TEXTTYPE break_type = MD_TEXT_SOFTBR;
3962
3963                if(text_type == MD_TEXT_NORMAL) {
3964                    if(enforce_hardbreak  ||  (ctx->parser.flags & MD_FLAG_HARD_SOFT_BREAKS)) {
3965                        break_type = MD_TEXT_BR;
3966                    } else {
3967                        while(off < ctx->size  &&  ISBLANK(off))
3968                            off++;
3969                        if(off >= line->end + 2  &&  CH(off-2) == _T(' ')  &&  CH(off-1) == _T(' ')  &&  ISNEWLINE(off))
3970                            break_type = MD_TEXT_BR;
3971                    }
3972                }
3973
3974                MD_TEXT(break_type, _T("\n"), 1);
3975            }
3976
3977            line++;
3978            off = line->beg;
3979
3980            enforce_hardbreak = 0;
3981        }
3982    }
3983
3984abort:
3985    return ret;
3986}
3987
3988static void
3989md_analyze_table_alignment(MD_CTX* ctx, OFF beg, OFF end, MD_ALIGN* align, int n_align)
3990{
3991    static const MD_ALIGN align_map[] = { MD_ALIGN_DEFAULT, MD_ALIGN_LEFT, MD_ALIGN_RIGHT, MD_ALIGN_CENTER };
3992    OFF off = beg;
3993
3994    while(n_align > 0) {
3995        int index = 0;
3996
3997        while(CH(off) != _T('-'))
3998            off++;
3999        if(off > beg  &&  CH(off-1) == _T(':'))
4000            index |= 1;
4001        while(off < end  &&  CH(off) == _T('-'))
4002            off++;
4003        if(off < end  &&  CH(off) == _T(':'))
4004            index |= 2;
4005
4006        *align = align_map[index];
4007        align++;
4008        n_align--;
4009    }
4010
4011}
4012
4013static int md_process_normal_block_contents(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines);
4014
4015static int
4016md_process_table_cell(MD_CTX* ctx, MD_BLOCKTYPE cell_type, MD_ALIGN align, OFF beg, OFF end)
4017{
4018    MD_LINE line;
4019    MD_BLOCK_TD_DETAIL det;
4020    int ret = 0;
4021
4022    while(beg < end  &&  ISWHITESPACE(beg))
4023        beg++;
4024    while(end > beg  &&  ISWHITESPACE(end-1))
4025        end--;
4026
4027    det.align = align;
4028    line.beg = beg;
4029    line.end = end;
4030
4031    MD_ENTER_BLOCK(cell_type, &det);
4032    MD_CHECK(md_process_normal_block_contents(ctx, &line, 1));
4033    MD_LEAVE_BLOCK(cell_type, &det);
4034
4035abort:
4036    return ret;
4037}
4038
4039static int
4040md_process_table_row(MD_CTX* ctx, MD_BLOCKTYPE cell_type, OFF beg, OFF end,
4041                     const MD_ALIGN* align, int col_count)
4042{
4043    MD_LINE line;
4044    OFF* pipe_offs = NULL;
4045    int i, j, k, n;
4046    int ret = 0;
4047
4048    line.beg = beg;
4049    line.end = end;
4050
4051    MD_CHECK(md_analyze_inlines(ctx, &line, 1, TRUE));
4052
4053    n = ctx->n_table_cell_boundaries + 2;
4054    pipe_offs = (OFF*) malloc(n * sizeof(OFF));
4055    if(pipe_offs == NULL) {
4056        MD_LOG("malloc() failed.");
4057        ret = -1;
4058        goto abort;
4059    }
4060    j = 0;
4061    pipe_offs[j++] = beg;
4062    for(i = ctx->table_cell_boundaries_head; i >= 0; i = ctx->marks[i].next) {
4063        MD_MARK* mark = &ctx->marks[i];
4064        pipe_offs[j++] = mark->end;
4065    }
4066    pipe_offs[j++] = end+1;
4067
4068    MD_ENTER_BLOCK(MD_BLOCK_TR, NULL);
4069    k = 0;
4070    for(i = 0; i < j-1  &&  k < col_count; i++) {
4071        if(pipe_offs[i] < pipe_offs[i+1]-1)
4072            MD_CHECK(md_process_table_cell(ctx, cell_type, align[k++], pipe_offs[i], pipe_offs[i+1]-1));
4073    }
4074
4075    while(k < col_count)
4076        MD_CHECK(md_process_table_cell(ctx, cell_type, align[k++], 0, 0));
4077    MD_LEAVE_BLOCK(MD_BLOCK_TR, NULL);
4078
4079abort:
4080    free(pipe_offs);
4081
4082    ctx->table_cell_boundaries_head = -1;
4083    ctx->table_cell_boundaries_tail = -1;
4084
4085    return ret;
4086}
4087
4088static int
4089md_process_table_block_contents(MD_CTX* ctx, int col_count, const MD_LINE* lines, MD_SIZE n_lines)
4090{
4091    MD_ALIGN* align;
4092    MD_SIZE line_index;
4093    int ret = 0;
4094
4095    MD_ASSERT(n_lines >= 2);
4096
4097    align = malloc(col_count * sizeof(MD_ALIGN));
4098    if(align == NULL) {
4099        MD_LOG("malloc() failed.");
4100        ret = -1;
4101        goto abort;
4102    }
4103
4104    md_analyze_table_alignment(ctx, lines[1].beg, lines[1].end, align, col_count);
4105
4106    MD_ENTER_BLOCK(MD_BLOCK_THEAD, NULL);
4107    MD_CHECK(md_process_table_row(ctx, MD_BLOCK_TH,
4108                        lines[0].beg, lines[0].end, align, col_count));
4109    MD_LEAVE_BLOCK(MD_BLOCK_THEAD, NULL);
4110
4111    if(n_lines > 2) {
4112        MD_ENTER_BLOCK(MD_BLOCK_TBODY, NULL);
4113        for(line_index = 2; line_index < n_lines; line_index++) {
4114            MD_CHECK(md_process_table_row(ctx, MD_BLOCK_TD,
4115                     lines[line_index].beg, lines[line_index].end, align, col_count));
4116        }
4117        MD_LEAVE_BLOCK(MD_BLOCK_TBODY, NULL);
4118    }
4119
4120abort:
4121    free(align);
4122    return ret;
4123}
4124
4125#define MD_BLOCK_CONTAINER_OPENER   0x01
4126#define MD_BLOCK_CONTAINER_CLOSER   0x02
4127#define MD_BLOCK_CONTAINER          (MD_BLOCK_CONTAINER_OPENER | MD_BLOCK_CONTAINER_CLOSER)
4128#define MD_BLOCK_LOOSE_LIST         0x04
4129#define MD_BLOCK_SETEXT_HEADER      0x08
4130
4131struct MD_BLOCK_tag {
4132    MD_BLOCKTYPE type  :  8;
4133    unsigned flags     :  8;
4134
4135    unsigned data      : 16;
4136
4137    MD_SIZE n_lines;
4138};
4139
4140struct MD_CONTAINER_tag {
4141    CHAR ch;
4142    unsigned is_loose    : 8;
4143    unsigned is_task     : 8;
4144    unsigned start;
4145    unsigned mark_indent;
4146    unsigned contents_indent;
4147    OFF block_byte_off;
4148    OFF task_mark_off;
4149};
4150
4151static int
4152md_process_normal_block_contents(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines)
4153{
4154    int i;
4155    int ret;
4156
4157    MD_CHECK(md_analyze_inlines(ctx, lines, n_lines, FALSE));
4158    MD_CHECK(md_process_inlines(ctx, lines, n_lines));
4159
4160abort:
4161
4162    for(i = ctx->ptr_stack.top; i >= 0; i = ctx->marks[i].next)
4163        free(md_mark_get_ptr(ctx, i));
4164    ctx->ptr_stack.top = -1;
4165
4166    return ret;
4167}
4168
4169static int
4170md_process_verbatim_block_contents(MD_CTX* ctx, MD_TEXTTYPE text_type, const MD_VERBATIMLINE* lines, MD_SIZE n_lines)
4171{
4172    static const CHAR indent_chunk_str[] = _T("                ");
4173    static const SZ indent_chunk_size = SIZEOF_ARRAY(indent_chunk_str) - 1;
4174
4175    MD_SIZE line_index;
4176    int ret = 0;
4177
4178    for(line_index = 0; line_index < n_lines; line_index++) {
4179        const MD_VERBATIMLINE* line = &lines[line_index];
4180        int indent = line->indent;
4181
4182        MD_ASSERT(indent >= 0);
4183
4184        while(indent > (int) indent_chunk_size) {
4185            MD_TEXT(text_type, indent_chunk_str, indent_chunk_size);
4186            indent -= indent_chunk_size;
4187        }
4188        if(indent > 0)
4189            MD_TEXT(text_type, indent_chunk_str, indent);
4190
4191        MD_TEXT_INSECURE(text_type, STR(line->beg), line->end - line->beg);
4192
4193        MD_TEXT(text_type, _T("\n"), 1);
4194    }
4195
4196abort:
4197    return ret;
4198}
4199
4200static int
4201md_process_code_block_contents(MD_CTX* ctx, int is_fenced, const MD_VERBATIMLINE* lines, MD_SIZE n_lines)
4202{
4203    if(is_fenced) {
4204
4205        lines++;
4206        n_lines--;
4207    } else {
4208
4209        while(n_lines > 0  &&  lines[0].beg == lines[0].end) {
4210            lines++;
4211            n_lines--;
4212        }
4213        while(n_lines > 0  &&  lines[n_lines-1].beg == lines[n_lines-1].end) {
4214            n_lines--;
4215        }
4216    }
4217
4218    if(n_lines == 0)
4219        return 0;
4220
4221    return md_process_verbatim_block_contents(ctx, MD_TEXT_CODE, lines, n_lines);
4222}
4223
4224static int
4225md_setup_fenced_code_detail(MD_CTX* ctx, const MD_BLOCK* block, MD_BLOCK_CODE_DETAIL* det,
4226                            MD_ATTRIBUTE_BUILD* info_build, MD_ATTRIBUTE_BUILD* lang_build)
4227{
4228    const MD_VERBATIMLINE* fence_line = (const MD_VERBATIMLINE*)(block + 1);
4229    OFF beg = fence_line->beg;
4230    OFF end = fence_line->end;
4231    OFF lang_end;
4232    CHAR fence_ch = CH(fence_line->beg);
4233    int ret = 0;
4234
4235    while(beg < ctx->size  &&  CH(beg) == fence_ch)
4236        beg++;
4237
4238    while(beg < ctx->size  &&  CH(beg) == _T(' '))
4239        beg++;
4240
4241    while(end > beg  &&  CH(end-1) == _T(' '))
4242        end--;
4243
4244    MD_CHECK(md_build_attribute(ctx, STR(beg), end - beg, 0, &det->info, info_build));
4245
4246    lang_end = beg;
4247    while(lang_end < end  &&  !ISWHITESPACE(lang_end))
4248        lang_end++;
4249    MD_CHECK(md_build_attribute(ctx, STR(beg), lang_end - beg, 0, &det->lang, lang_build));
4250
4251    det->fence_char = fence_ch;
4252
4253abort:
4254    return ret;
4255}
4256
4257static int
4258md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block)
4259{
4260    union {
4261        MD_BLOCK_H_DETAIL header;
4262        MD_BLOCK_CODE_DETAIL code;
4263        MD_BLOCK_TABLE_DETAIL table;
4264    } det;
4265    MD_ATTRIBUTE_BUILD info_build;
4266    MD_ATTRIBUTE_BUILD lang_build;
4267    int is_in_tight_list;
4268    int clean_fence_code_detail = FALSE;
4269    int ret = 0;
4270
4271    memset(&det, 0, sizeof(det));
4272
4273    if(ctx->n_containers == 0)
4274        is_in_tight_list = FALSE;
4275    else
4276        is_in_tight_list = !ctx->containers[ctx->n_containers-1].is_loose;
4277
4278    switch(block->type) {
4279        case MD_BLOCK_H:
4280            det.header.level = block->data;
4281            break;
4282
4283        case MD_BLOCK_CODE:
4284
4285            if(block->data != 0) {
4286                memset(&det.code, 0, sizeof(MD_BLOCK_CODE_DETAIL));
4287                clean_fence_code_detail = TRUE;
4288                MD_CHECK(md_setup_fenced_code_detail(ctx, block, &det.code, &info_build, &lang_build));
4289            }
4290            break;
4291
4292        case MD_BLOCK_TABLE:
4293            det.table.col_count = block->data;
4294            det.table.head_row_count = 1;
4295            det.table.body_row_count = block->n_lines - 2;
4296            break;
4297
4298        default:
4299
4300            break;
4301    }
4302
4303    if(!is_in_tight_list  ||  block->type != MD_BLOCK_P)
4304        MD_ENTER_BLOCK(block->type, (void*) &det);
4305
4306    switch(block->type) {
4307        case MD_BLOCK_HR:
4308
4309            break;
4310
4311        case MD_BLOCK_CODE:
4312            MD_CHECK(md_process_code_block_contents(ctx, (block->data != 0),
4313                            (const MD_VERBATIMLINE*)(block + 1), block->n_lines));
4314            break;
4315
4316        case MD_BLOCK_HTML:
4317            MD_CHECK(md_process_verbatim_block_contents(ctx, MD_TEXT_HTML,
4318                            (const MD_VERBATIMLINE*)(block + 1), block->n_lines));
4319            break;
4320
4321        case MD_BLOCK_TABLE:
4322            MD_CHECK(md_process_table_block_contents(ctx, block->data,
4323                            (const MD_LINE*)(block + 1), block->n_lines));
4324            break;
4325
4326        default:
4327            MD_CHECK(md_process_normal_block_contents(ctx,
4328                            (const MD_LINE*)(block + 1), block->n_lines));
4329            break;
4330    }
4331
4332    if(!is_in_tight_list  ||  block->type != MD_BLOCK_P)
4333        MD_LEAVE_BLOCK(block->type, (void*) &det);
4334
4335abort:
4336    if(clean_fence_code_detail) {
4337        md_free_attribute(ctx, &info_build);
4338        md_free_attribute(ctx, &lang_build);
4339    }
4340    return ret;
4341}
4342
4343static int
4344md_process_all_blocks(MD_CTX* ctx)
4345{
4346    int byte_off = 0;
4347    int ret = 0;
4348
4349    ctx->n_containers = 0;
4350
4351    while(byte_off < ctx->n_block_bytes) {
4352        MD_BLOCK* block = (MD_BLOCK*)((char*)ctx->block_bytes + byte_off);
4353        union {
4354            MD_BLOCK_UL_DETAIL ul;
4355            MD_BLOCK_OL_DETAIL ol;
4356            MD_BLOCK_LI_DETAIL li;
4357        } det;
4358
4359        switch(block->type) {
4360            case MD_BLOCK_UL:
4361                det.ul.is_tight = (block->flags & MD_BLOCK_LOOSE_LIST) ? FALSE : TRUE;
4362                det.ul.mark = (CHAR) block->data;
4363                break;
4364
4365            case MD_BLOCK_OL:
4366                det.ol.start = block->n_lines;
4367                det.ol.is_tight =  (block->flags & MD_BLOCK_LOOSE_LIST) ? FALSE : TRUE;
4368                det.ol.mark_delimiter = (CHAR) block->data;
4369                break;
4370
4371            case MD_BLOCK_LI:
4372                det.li.is_task = (block->data != 0);
4373                det.li.task_mark = (CHAR) block->data;
4374                det.li.task_mark_offset = (OFF) block->n_lines;
4375                break;
4376
4377            default:
4378
4379                break;
4380        }
4381
4382        if(block->flags & MD_BLOCK_CONTAINER) {
4383            if(block->flags & MD_BLOCK_CONTAINER_CLOSER) {
4384                MD_LEAVE_BLOCK(block->type, &det);
4385
4386                if(block->type == MD_BLOCK_UL || block->type == MD_BLOCK_OL || block->type == MD_BLOCK_QUOTE)
4387                    ctx->n_containers--;
4388            }
4389
4390            if(block->flags & MD_BLOCK_CONTAINER_OPENER) {
4391                MD_ENTER_BLOCK(block->type, &det);
4392
4393                if(block->type == MD_BLOCK_UL || block->type == MD_BLOCK_OL) {
4394                    ctx->containers[ctx->n_containers].is_loose = (block->flags & MD_BLOCK_LOOSE_LIST);
4395                    ctx->n_containers++;
4396                } else if(block->type == MD_BLOCK_QUOTE) {
4397
4398                    ctx->containers[ctx->n_containers].is_loose = TRUE;
4399                    ctx->n_containers++;
4400                }
4401            }
4402        } else {
4403            MD_CHECK(md_process_leaf_block(ctx, block));
4404
4405            if(block->type == MD_BLOCK_CODE || block->type == MD_BLOCK_HTML)
4406                byte_off += block->n_lines * sizeof(MD_VERBATIMLINE);
4407            else
4408                byte_off += block->n_lines * sizeof(MD_LINE);
4409        }
4410
4411        byte_off += sizeof(MD_BLOCK);
4412    }
4413
4414    ctx->n_block_bytes = 0;
4415
4416abort:
4417    return ret;
4418}
4419
4420static void*
4421md_push_block_bytes(MD_CTX* ctx, int n_bytes)
4422{
4423    void* ptr;
4424
4425    if(ctx->n_block_bytes + n_bytes > ctx->alloc_block_bytes) {
4426        void* new_block_bytes;
4427
4428        ctx->alloc_block_bytes = (ctx->alloc_block_bytes > 0
4429                ? ctx->alloc_block_bytes + ctx->alloc_block_bytes / 2
4430                : 512);
4431        new_block_bytes = realloc(ctx->block_bytes, ctx->alloc_block_bytes);
4432        if(new_block_bytes == NULL) {
4433            MD_LOG("realloc() failed.");
4434            return NULL;
4435        }
4436
4437        if(ctx->current_block != NULL) {
4438            OFF off_current_block = (OFF) ((char*) ctx->current_block - (char*) ctx->block_bytes);
4439            ctx->current_block = (MD_BLOCK*) ((char*) new_block_bytes + off_current_block);
4440        }
4441
4442        ctx->block_bytes = new_block_bytes;
4443    }
4444
4445    ptr = (char*)ctx->block_bytes + ctx->n_block_bytes;
4446    ctx->n_block_bytes += n_bytes;
4447    return ptr;
4448}
4449
4450static int
4451md_start_new_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* line)
4452{
4453    MD_BLOCK* block;
4454
4455    MD_ASSERT(ctx->current_block == NULL);
4456
4457    block = (MD_BLOCK*) md_push_block_bytes(ctx, sizeof(MD_BLOCK));
4458    if(block == NULL)
4459        return -1;
4460
4461    switch(line->type) {
4462        case MD_LINE_HR:
4463            block->type = MD_BLOCK_HR;
4464            break;
4465
4466        case MD_LINE_ATXHEADER:
4467        case MD_LINE_SETEXTHEADER:
4468            block->type = MD_BLOCK_H;
4469            break;
4470
4471        case MD_LINE_FENCEDCODE:
4472        case MD_LINE_INDENTEDCODE:
4473            block->type = MD_BLOCK_CODE;
4474            break;
4475
4476        case MD_LINE_TEXT:
4477            block->type = MD_BLOCK_P;
4478            break;
4479
4480        case MD_LINE_HTML:
4481            block->type = MD_BLOCK_HTML;
4482            break;
4483
4484        case MD_LINE_BLANK:
4485        case MD_LINE_SETEXTUNDERLINE:
4486        case MD_LINE_TABLEUNDERLINE:
4487        default:
4488            MD_UNREACHABLE();
4489            break;
4490    }
4491
4492    block->flags = 0;
4493    block->data = line->data;
4494    block->n_lines = 0;
4495
4496    ctx->current_block = block;
4497    return 0;
4498}
4499
4500static int
4501md_consume_link_reference_definitions(MD_CTX* ctx)
4502{
4503    MD_LINE* lines = (MD_LINE*) (ctx->current_block + 1);
4504    MD_SIZE n_lines = ctx->current_block->n_lines;
4505    MD_SIZE n = 0;
4506
4507    while(n < n_lines) {
4508        int n_link_ref_lines;
4509
4510        n_link_ref_lines = md_is_link_reference_definition(ctx,
4511                                    lines + n, n_lines - n);
4512
4513        if(n_link_ref_lines == 0)
4514            break;
4515
4516        if(n_link_ref_lines < 0)
4517            return -1;
4518
4519        n += n_link_ref_lines;
4520    }
4521
4522    if(n > 0) {
4523        if(n == n_lines) {
4524
4525            ctx->n_block_bytes -= n * sizeof(MD_LINE);
4526            ctx->n_block_bytes -= sizeof(MD_BLOCK);
4527            ctx->current_block = NULL;
4528        } else {
4529
4530            memmove(lines, lines + n, (n_lines - n) * sizeof(MD_LINE));
4531            ctx->current_block->n_lines -= n;
4532            ctx->n_block_bytes -= n * sizeof(MD_LINE);
4533        }
4534    }
4535
4536    return 0;
4537}
4538
4539static int
4540md_end_current_block(MD_CTX* ctx)
4541{
4542    int ret = 0;
4543
4544    if(ctx->current_block == NULL)
4545        return ret;
4546
4547    if(ctx->current_block->type == MD_BLOCK_P  ||
4548       (ctx->current_block->type == MD_BLOCK_H  &&  (ctx->current_block->flags & MD_BLOCK_SETEXT_HEADER)))
4549    {
4550        MD_LINE* lines = (MD_LINE*) (ctx->current_block + 1);
4551        if(lines[0].beg < ctx->size  &&  CH(lines[0].beg) == _T('[')) {
4552            MD_CHECK(md_consume_link_reference_definitions(ctx));
4553            if(ctx->current_block == NULL)
4554                return ret;
4555        }
4556    }
4557
4558    if(ctx->current_block->type == MD_BLOCK_H  &&  (ctx->current_block->flags & MD_BLOCK_SETEXT_HEADER)) {
4559        MD_SIZE n_lines = ctx->current_block->n_lines;
4560
4561        if(n_lines > 1) {
4562
4563            ctx->current_block->n_lines--;
4564            ctx->n_block_bytes -= sizeof(MD_LINE);
4565        } else {
4566
4567            ctx->current_block->type = MD_BLOCK_P;
4568            return 0;
4569        }
4570    }
4571
4572    ctx->current_block = NULL;
4573
4574abort:
4575    return ret;
4576}
4577
4578static int
4579md_add_line_into_current_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* analysis)
4580{
4581    MD_ASSERT(ctx->current_block != NULL);
4582
4583    if(ctx->current_block->type == MD_BLOCK_CODE || ctx->current_block->type == MD_BLOCK_HTML) {
4584        MD_VERBATIMLINE* line;
4585
4586        line = (MD_VERBATIMLINE*) md_push_block_bytes(ctx, sizeof(MD_VERBATIMLINE));
4587        if(line == NULL)
4588            return -1;
4589
4590        line->indent = analysis->indent;
4591        line->beg = analysis->beg;
4592        line->end = analysis->end;
4593    } else {
4594        MD_LINE* line;
4595
4596        line = (MD_LINE*) md_push_block_bytes(ctx, sizeof(MD_LINE));
4597        if(line == NULL)
4598            return -1;
4599
4600        line->beg = analysis->beg;
4601        line->end = analysis->end;
4602    }
4603    ctx->current_block->n_lines++;
4604
4605    return 0;
4606}
4607
4608static int
4609md_push_container_bytes(MD_CTX* ctx, MD_BLOCKTYPE type, unsigned start,
4610                        unsigned data, unsigned flags)
4611{
4612    MD_BLOCK* block;
4613    int ret = 0;
4614
4615    MD_CHECK(md_end_current_block(ctx));
4616
4617    block = (MD_BLOCK*) md_push_block_bytes(ctx, sizeof(MD_BLOCK));
4618    if(block == NULL)
4619        return -1;
4620
4621    block->type = type;
4622    block->flags = flags;
4623    block->data = data;
4624    block->n_lines = start;
4625
4626abort:
4627    return ret;
4628}
4629
4630static int
4631md_is_hr_line(MD_CTX* ctx, OFF beg, OFF* p_end, OFF* p_killer)
4632{
4633    OFF off = beg + 1;
4634    int n = 1;
4635
4636    while(off < ctx->size  &&  (CH(off) == CH(beg) || CH(off) == _T(' ') || CH(off) == _T('\t'))) {
4637        if(CH(off) == CH(beg))
4638            n++;
4639        off++;
4640    }
4641
4642    if(n < 3) {
4643        *p_killer = off;
4644        return FALSE;
4645    }
4646
4647    if(off < ctx->size  &&  !ISNEWLINE(off)) {
4648        *p_killer = off;
4649        return FALSE;
4650    }
4651
4652    *p_end = off;
4653    return TRUE;
4654}
4655
4656static int
4657md_is_atxheader_line(MD_CTX* ctx, OFF beg, OFF* p_beg, OFF* p_end, unsigned* p_level)
4658{
4659    int n;
4660    OFF off = beg + 1;
4661
4662    while(off < ctx->size  &&  CH(off) == _T('#')  &&  off - beg < 7)
4663        off++;
4664    n = off - beg;
4665
4666    if(n > 6)
4667        return FALSE;
4668    *p_level = n;
4669
4670    if(!(ctx->parser.flags & MD_FLAG_PERMISSIVEATXHEADERS)  &&  off < ctx->size  &&
4671       !ISBLANK(off)  &&  !ISNEWLINE(off))
4672        return FALSE;
4673
4674    while(off < ctx->size  &&  ISBLANK(off))
4675        off++;
4676    *p_beg = off;
4677    *p_end = off;
4678    return TRUE;
4679}
4680
4681static int
4682md_is_setext_underline(MD_CTX* ctx, OFF beg, OFF* p_end, unsigned* p_level)
4683{
4684    OFF off = beg + 1;
4685
4686    while(off < ctx->size  &&  CH(off) == CH(beg))
4687        off++;
4688
4689    while(off < ctx->size  &&  ISBLANK(off))
4690        off++;
4691
4692    if(off < ctx->size  &&  !ISNEWLINE(off))
4693        return FALSE;
4694
4695    *p_level = (CH(beg) == _T('=') ? 1 : 2);
4696    *p_end = off;
4697    return TRUE;
4698}
4699
4700static int
4701md_is_table_underline(MD_CTX* ctx, OFF beg, OFF* p_end, unsigned* p_col_count)
4702{
4703    OFF off = beg;
4704    int found_pipe = FALSE;
4705    unsigned col_count = 0;
4706
4707    if(off < ctx->size  &&  CH(off) == _T('|')) {
4708        found_pipe = TRUE;
4709        off++;
4710        while(off < ctx->size  &&  ISWHITESPACE(off))
4711            off++;
4712    }
4713
4714    while(1) {
4715        int delimited = FALSE;
4716
4717        if(off < ctx->size  &&  CH(off) == _T(':'))
4718            off++;
4719        if(off >= ctx->size  ||  CH(off) != _T('-'))
4720            return FALSE;
4721        while(off < ctx->size  &&  CH(off) == _T('-'))
4722            off++;
4723        if(off < ctx->size  &&  CH(off) == _T(':'))
4724            off++;
4725
4726        col_count++;
4727        if(col_count > TABLE_MAXCOLCOUNT) {
4728            MD_LOG("Suppressing table (column_count >" STRINGIZE(TABLE_MAXCOLCOUNT) ")");
4729            return FALSE;
4730        }
4731
4732        while(off < ctx->size  &&  ISWHITESPACE(off))
4733            off++;
4734        if(off < ctx->size  &&  CH(off) == _T('|')) {
4735            delimited = TRUE;
4736            found_pipe =  TRUE;
4737            off++;
4738            while(off < ctx->size  &&  ISWHITESPACE(off))
4739                off++;
4740        }
4741
4742        if(off >= ctx->size  ||  ISNEWLINE(off))
4743            break;
4744
4745        if(!delimited)
4746            return FALSE;
4747    }
4748
4749    if(!found_pipe)
4750        return FALSE;
4751
4752    *p_end = off;
4753    *p_col_count = col_count;
4754    return TRUE;
4755}
4756
4757static int
4758md_is_opening_code_fence(MD_CTX* ctx, OFF beg, OFF* p_end)
4759{
4760    OFF off = beg;
4761
4762    while(off < ctx->size && CH(off) == CH(beg))
4763        off++;
4764
4765    if(off - beg < 3)
4766        return FALSE;
4767
4768    ctx->code_fence_length = off - beg;
4769
4770    while(off < ctx->size  &&  CH(off) == _T(' '))
4771        off++;
4772
4773    while(off < ctx->size  &&  !ISNEWLINE(off)) {
4774
4775        if(CH(beg) == _T('`')  &&  CH(off) == _T('`'))
4776            return FALSE;
4777        off++;
4778    }
4779
4780    *p_end = off;
4781    return TRUE;
4782}
4783
4784static int
4785md_is_closing_code_fence(MD_CTX* ctx, CHAR ch, OFF beg, OFF* p_end)
4786{
4787    OFF off = beg;
4788    int ret = FALSE;
4789
4790    while(off < ctx->size  &&  CH(off) == ch)
4791        off++;
4792    if(off - beg < ctx->code_fence_length)
4793        goto out;
4794
4795    while(off < ctx->size  &&  CH(off) == _T(' '))
4796        off++;
4797
4798    if(off < ctx->size  &&  !ISNEWLINE(off))
4799        goto out;
4800
4801    ret = TRUE;
4802
4803out:
4804
4805    *p_end = off;
4806    return ret;
4807}
4808
4809typedef struct TAG_tag TAG;
4810struct TAG_tag {
4811    const CHAR* name;
4812    unsigned len    : 8;
4813};
4814
4815#ifdef X
4816    #undef X
4817#endif
4818#define X(name)     { _T(name), (sizeof(name)-1) / sizeof(CHAR) }
4819#define Xend        { NULL, 0 }
4820
4821static const TAG t1[] = { X("pre"), X("script"), X("style"), X("textarea"), Xend };
4822
4823static const TAG a6[] = { X("address"), X("article"), X("aside"), Xend };
4824static const TAG b6[] = { X("base"), X("basefont"), X("blockquote"), X("body"), Xend };
4825static const TAG c6[] = { X("caption"), X("center"), X("col"), X("colgroup"), Xend };
4826static const TAG d6[] = { X("dd"), X("details"), X("dialog"), X("dir"),
4827                          X("div"), X("dl"), X("dt"), Xend };
4828static const TAG f6[] = { X("fieldset"), X("figcaption"), X("figure"), X("footer"),
4829                          X("form"), X("frame"), X("frameset"), Xend };
4830static const TAG h6[] = { X("h1"), X("h2"), X("h3"), X("h4"), X("h5"), X("h6"),
4831                          X("head"), X("header"), X("hr"), X("html"), Xend };
4832static const TAG i6[] = { X("iframe"), Xend };
4833static const TAG l6[] = { X("legend"), X("li"), X("link"), Xend };
4834static const TAG m6[] = { X("main"), X("menu"), X("menuitem"), Xend };
4835static const TAG n6[] = { X("nav"), X("noframes"), Xend };
4836static const TAG o6[] = { X("ol"), X("optgroup"), X("option"), Xend };
4837static const TAG p6[] = { X("p"), X("param"), Xend };
4838static const TAG s6[] = { X("search"), X("section"), X("summary"), Xend };
4839static const TAG t6[] = { X("table"), X("tbody"), X("td"), X("tfoot"), X("th"),
4840                          X("thead"), X("title"), X("tr"), X("track"), Xend };
4841static const TAG u6[] = { X("ul"), Xend };
4842static const TAG xx[] = { Xend };
4843
4844#undef X
4845#undef Xend
4846
4847static int
4848md_is_html_block_start_condition(MD_CTX* ctx, OFF beg)
4849{
4850
4851    static const TAG* map6[26] = {
4852        a6, b6, c6, d6, xx, f6, xx, h6, i6, xx, xx, l6, m6,
4853        n6, o6, p6, xx, xx, s6, t6, u6, xx, xx, xx, xx, xx
4854    };
4855    OFF off = beg + 1;
4856    int i;
4857
4858    for(i = 0; t1[i].name != NULL; i++) {
4859        if(off + t1[i].len <= ctx->size) {
4860            if(md_ascii_case_eq(STR(off), t1[i].name, t1[i].len))
4861                return 1;
4862        }
4863    }
4864
4865    if(off + 3 < ctx->size  &&  CH(off) == _T('!')  &&  CH(off+1) == _T('-')  &&  CH(off+2) == _T('-'))
4866        return 2;
4867
4868    if(off < ctx->size  &&  CH(off) == _T('?'))
4869        return 3;
4870
4871    if(off < ctx->size  &&  CH(off) == _T('!')) {
4872
4873        if(off + 1 < ctx->size  &&  ISASCII(off+1))
4874            return 4;
4875
4876        if(off + 8 < ctx->size) {
4877            if(md_ascii_eq(STR(off), _T("![CDATA["), 8))
4878                return 5;
4879        }
4880    }
4881
4882    if(off + 1 < ctx->size  &&  (ISALPHA(off) || (CH(off) == _T('/') && ISALPHA(off+1)))) {
4883        int slot;
4884        const TAG* tags;
4885
4886        if(CH(off) == _T('/'))
4887            off++;
4888
4889        slot = (ISUPPER(off) ? CH(off) - 'A' : CH(off) - 'a');
4890        tags = map6[slot];
4891
4892        for(i = 0; tags[i].name != NULL; i++) {
4893            if(off + tags[i].len <= ctx->size) {
4894                if(md_ascii_case_eq(STR(off), tags[i].name, tags[i].len)) {
4895                    OFF tmp = off + tags[i].len;
4896                    if(tmp >= ctx->size)
4897                        return 6;
4898                    if(ISBLANK(tmp) || ISNEWLINE(tmp) || CH(tmp) == _T('>'))
4899                        return 6;
4900                    if(tmp+1 < ctx->size && CH(tmp) == _T('/') && CH(tmp+1) == _T('>'))
4901                        return 6;
4902                    break;
4903                }
4904            }
4905        }
4906    }
4907
4908    if(off + 1 < ctx->size) {
4909        OFF end;
4910
4911        if(md_is_html_tag(ctx, NULL, 0, beg, ctx->size, &end)) {
4912
4913            while(end < ctx->size  &&  ISWHITESPACE(end))
4914                end++;
4915            if(end >= ctx->size  ||  ISNEWLINE(end))
4916                return 7;
4917        }
4918    }
4919
4920    return FALSE;
4921}
4922
4923static int
4924md_line_contains(MD_CTX* ctx, OFF beg, const CHAR* what, SZ what_len, OFF* p_end)
4925{
4926    OFF i;
4927    for(i = beg; i + what_len < ctx->size; i++) {
4928        if(ISNEWLINE(i))
4929            break;
4930        if(memcmp(STR(i), what, what_len * sizeof(CHAR)) == 0) {
4931            *p_end = i + what_len;
4932            return TRUE;
4933        }
4934    }
4935
4936    *p_end = i;
4937    return FALSE;
4938}
4939
4940static int
4941md_is_html_block_end_condition(MD_CTX* ctx, OFF beg, OFF* p_end)
4942{
4943    switch(ctx->html_block_type) {
4944        case 1:
4945        {
4946            OFF off = beg;
4947            int i;
4948
4949            while(off+1 < ctx->size  &&  !ISNEWLINE(off)) {
4950                if(CH(off) == _T('<')  &&  CH(off+1) == _T('/')) {
4951                    for(i = 0; t1[i].name != NULL; i++) {
4952                        if(off + 2 + t1[i].len < ctx->size) {
4953                            if(md_ascii_case_eq(STR(off+2), t1[i].name, t1[i].len)  &&
4954                               CH(off+2+t1[i].len) == _T('>'))
4955                            {
4956                                *p_end = off+2+t1[i].len+1;
4957                                return TRUE;
4958                            }
4959                        }
4960                    }
4961                }
4962                off++;
4963            }
4964            *p_end = off;
4965            return FALSE;
4966        }
4967
4968        case 2:
4969            return (md_line_contains(ctx, beg, _T("-->"), 3, p_end) ? 2 : FALSE);
4970
4971        case 3:
4972            return (md_line_contains(ctx, beg, _T("?>"), 2, p_end) ? 3 : FALSE);
4973
4974        case 4:
4975            return (md_line_contains(ctx, beg, _T(">"), 1, p_end) ? 4 : FALSE);
4976
4977        case 5:
4978            return (md_line_contains(ctx, beg, _T("]]>"), 3, p_end) ? 5 : FALSE);
4979
4980        case 6:
4981        case 7:
4982            if(beg >= ctx->size  ||  ISNEWLINE(beg)) {
4983
4984                *p_end = beg;
4985                return ctx->html_block_type;
4986            }
4987            return FALSE;
4988
4989        default:
4990            MD_UNREACHABLE();
4991    }
4992    return FALSE;
4993}
4994
4995static int
4996md_is_container_compatible(const MD_CONTAINER* pivot, const MD_CONTAINER* container)
4997{
4998
4999    if(container->ch == _T('>'))
5000        return FALSE;
5001
5002    if(container->ch != pivot->ch)
5003        return FALSE;
5004    if(container->mark_indent > pivot->contents_indent)
5005        return FALSE;
5006
5007    return TRUE;
5008}
5009
5010static int
5011md_push_container(MD_CTX* ctx, const MD_CONTAINER* container)
5012{
5013    if(ctx->n_containers >= ctx->alloc_containers) {
5014        MD_CONTAINER* new_containers;
5015
5016        ctx->alloc_containers = (ctx->alloc_containers > 0
5017                ? ctx->alloc_containers + ctx->alloc_containers / 2
5018                : 16);
5019        new_containers = realloc(ctx->containers, ctx->alloc_containers * sizeof(MD_CONTAINER));
5020        if(new_containers == NULL) {
5021            MD_LOG("realloc() failed.");
5022            return -1;
5023        }
5024
5025        ctx->containers = new_containers;
5026    }
5027
5028    memcpy(&ctx->containers[ctx->n_containers++], container, sizeof(MD_CONTAINER));
5029    return 0;
5030}
5031
5032static int
5033md_enter_child_containers(MD_CTX* ctx, int n_children)
5034{
5035    int i;
5036    int ret = 0;
5037
5038    for(i = ctx->n_containers - n_children; i < ctx->n_containers; i++) {
5039        MD_CONTAINER* c = &ctx->containers[i];
5040        int is_ordered_list = FALSE;
5041
5042        switch(c->ch) {
5043            case _T(')'):
5044            case _T('.'):
5045                is_ordered_list = TRUE;
5046                MD_FALLTHROUGH();
5047
5048            case _T('-'):
5049            case _T('+'):
5050            case _T('*'):
5051
5052                md_end_current_block(ctx);
5053                c->block_byte_off = ctx->n_block_bytes;
5054
5055                MD_CHECK(md_push_container_bytes(ctx,
5056                                (is_ordered_list ? MD_BLOCK_OL : MD_BLOCK_UL),
5057                                c->start, c->ch, MD_BLOCK_CONTAINER_OPENER));
5058                MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
5059                                c->task_mark_off,
5060                                (c->is_task ? CH(c->task_mark_off) : 0),
5061                                MD_BLOCK_CONTAINER_OPENER));
5062                break;
5063
5064            case _T('>'):
5065                MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_QUOTE, 0, 0, MD_BLOCK_CONTAINER_OPENER));
5066                break;
5067
5068            default:
5069                MD_UNREACHABLE();
5070                break;
5071        }
5072    }
5073
5074abort:
5075    return ret;
5076}
5077
5078static int
5079md_leave_child_containers(MD_CTX* ctx, int n_keep)
5080{
5081    int ret = 0;
5082
5083    while(ctx->n_containers > n_keep) {
5084        MD_CONTAINER* c = &ctx->containers[ctx->n_containers-1];
5085        int is_ordered_list = FALSE;
5086
5087        switch(c->ch) {
5088            case _T(')'):
5089            case _T('.'):
5090                is_ordered_list = TRUE;
5091                MD_FALLTHROUGH();
5092
5093            case _T('-'):
5094            case _T('+'):
5095            case _T('*'):
5096                MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
5097                                c->task_mark_off, (c->is_task ? CH(c->task_mark_off) : 0),
5098                                MD_BLOCK_CONTAINER_CLOSER));
5099                MD_CHECK(md_push_container_bytes(ctx,
5100                                (is_ordered_list ? MD_BLOCK_OL : MD_BLOCK_UL), 0,
5101                                c->ch, MD_BLOCK_CONTAINER_CLOSER));
5102                break;
5103
5104            case _T('>'):
5105                MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_QUOTE, 0,
5106                                0, MD_BLOCK_CONTAINER_CLOSER));
5107                break;
5108
5109            default:
5110                MD_UNREACHABLE();
5111                break;
5112        }
5113
5114        ctx->n_containers--;
5115    }
5116
5117abort:
5118    return ret;
5119}
5120
5121static int
5122md_is_container_mark(MD_CTX* ctx, unsigned indent, OFF beg, OFF* p_end, MD_CONTAINER* p_container)
5123{
5124    OFF off = beg;
5125    OFF max_end;
5126
5127    if(off >= ctx->size  ||  indent >= ctx->code_indent_offset)
5128        return FALSE;
5129
5130    if(CH(off) == _T('>')) {
5131        off++;
5132        p_container->ch = _T('>');
5133        p_container->is_loose = FALSE;
5134        p_container->is_task = FALSE;
5135        p_container->mark_indent = indent;
5136        p_container->contents_indent = indent + 1;
5137        *p_end = off;
5138        return TRUE;
5139    }
5140
5141    if(ISANYOF(off, _T("-+*"))  &&  (off+1 >= ctx->size || ISBLANK(off+1) || ISNEWLINE(off+1))) {
5142        p_container->ch = CH(off);
5143        p_container->is_loose = FALSE;
5144        p_container->is_task = FALSE;
5145        p_container->mark_indent = indent;
5146        p_container->contents_indent = indent + 1;
5147        *p_end = off+1;
5148        return TRUE;
5149    }
5150
5151    max_end = off + 9;
5152    if(max_end > ctx->size)
5153        max_end = ctx->size;
5154    p_container->start = 0;
5155    while(off < max_end  &&  ISDIGIT(off)) {
5156        p_container->start = p_container->start * 10 + CH(off) - _T('0');
5157        off++;
5158    }
5159    if(off > beg  &&
5160       off < ctx->size  &&
5161       (CH(off) == _T('.') || CH(off) == _T(')'))  &&
5162       (off+1 >= ctx->size || ISBLANK(off+1) || ISNEWLINE(off+1)))
5163    {
5164        p_container->ch = CH(off);
5165        p_container->is_loose = FALSE;
5166        p_container->is_task = FALSE;
5167        p_container->mark_indent = indent;
5168        p_container->contents_indent = indent + off - beg + 1;
5169        *p_end = off+1;
5170        return TRUE;
5171    }
5172
5173    return FALSE;
5174}
5175
5176static unsigned
5177md_line_indentation(MD_CTX* ctx, unsigned total_indent, OFF beg, OFF* p_end)
5178{
5179    OFF off = beg;
5180    unsigned indent = total_indent;
5181
5182    while(off < ctx->size  &&  ISBLANK(off)) {
5183        if(CH(off) == _T('\t'))
5184            indent = (indent + 4) & ~3;
5185        else
5186            indent++;
5187        off++;
5188    }
5189
5190    *p_end = off;
5191    return indent - total_indent;
5192}
5193
5194static const MD_LINE_ANALYSIS md_dummy_blank_line = { MD_LINE_BLANK, 0, 0, 0, 0, 0 };
5195
5196static int
5197md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end,
5198                const MD_LINE_ANALYSIS* pivot_line, MD_LINE_ANALYSIS* line)
5199{
5200    unsigned total_indent = 0;
5201    int n_parents = 0;
5202    int n_brothers = 0;
5203    int n_children = 0;
5204    MD_CONTAINER container = { 0 };
5205    int prev_line_has_list_loosening_effect = ctx->last_line_has_list_loosening_effect;
5206    OFF off = beg;
5207    OFF hr_killer = 0;
5208    int ret = 0;
5209
5210    line->indent = md_line_indentation(ctx, total_indent, off, &off);
5211    total_indent += line->indent;
5212    line->beg = off;
5213    line->enforce_new_block = FALSE;
5214
5215    while(n_parents < ctx->n_containers) {
5216        MD_CONTAINER* c = &ctx->containers[n_parents];
5217
5218        if(c->ch == _T('>')  &&  line->indent < ctx->code_indent_offset  &&
5219            off < ctx->size  &&  CH(off) == _T('>'))
5220        {
5221
5222            off++;
5223            total_indent++;
5224            line->indent = md_line_indentation(ctx, total_indent, off, &off);
5225            total_indent += line->indent;
5226
5227            if(line->indent > 0)
5228                line->indent--;
5229
5230            line->beg = off;
5231
5232        } else if(c->ch != _T('>')  &&  line->indent >= c->contents_indent) {
5233
5234            line->indent -= c->contents_indent;
5235        } else {
5236            break;
5237        }
5238
5239        n_parents++;
5240    }
5241
5242    if(off >= ctx->size  ||  ISNEWLINE(off)) {
5243
5244        if(n_brothers + n_children == 0) {
5245            while(n_parents < ctx->n_containers  &&  ctx->containers[n_parents].ch != _T('>'))
5246                n_parents++;
5247        }
5248    }
5249
5250    while(TRUE) {
5251
5252        if(pivot_line->type == MD_LINE_FENCEDCODE) {
5253            line->beg = off;
5254
5255            if(line->indent < ctx->code_indent_offset) {
5256                if(md_is_closing_code_fence(ctx, CH(pivot_line->beg), off, &off)) {
5257                    line->type = MD_LINE_BLANK;
5258                    ctx->last_line_has_list_loosening_effect = FALSE;
5259                    break;
5260                }
5261            }
5262
5263            if(n_parents == ctx->n_containers) {
5264                if(line->indent > pivot_line->indent)
5265                    line->indent -= pivot_line->indent;
5266                else
5267                    line->indent = 0;
5268
5269                line->type = MD_LINE_FENCEDCODE;
5270                break;
5271            }
5272        }
5273
5274        if(pivot_line->type == MD_LINE_HTML  &&  ctx->html_block_type > 0) {
5275            if(n_parents < ctx->n_containers) {
5276
5277                ctx->html_block_type = 0;
5278            } else {
5279                int html_block_type;
5280
5281                html_block_type = md_is_html_block_end_condition(ctx, off, &off);
5282                if(html_block_type > 0) {
5283                    MD_ASSERT(html_block_type == ctx->html_block_type);
5284
5285                    ctx->html_block_type = 0;
5286
5287                    if(html_block_type == 6 || html_block_type == 7) {
5288                        line->type = MD_LINE_BLANK;
5289                        line->indent = 0;
5290                        break;
5291                    }
5292                }
5293
5294                line->type = MD_LINE_HTML;
5295                n_parents = ctx->n_containers;
5296                break;
5297            }
5298        }
5299
5300        if(off >= ctx->size  ||  ISNEWLINE(off)) {
5301            if(pivot_line->type == MD_LINE_INDENTEDCODE  &&  n_parents == ctx->n_containers) {
5302                line->type = MD_LINE_INDENTEDCODE;
5303                if(line->indent > ctx->code_indent_offset)
5304                    line->indent -= ctx->code_indent_offset;
5305                else
5306                    line->indent = 0;
5307                ctx->last_line_has_list_loosening_effect = FALSE;
5308            } else {
5309                line->type = MD_LINE_BLANK;
5310                ctx->last_line_has_list_loosening_effect = (n_parents > 0  &&
5311                        n_brothers + n_children == 0  &&
5312                        ctx->containers[n_parents-1].ch != _T('>'));
5313
5314    #if 1
5315
5316                if(n_parents > 0  &&  ctx->containers[n_parents-1].ch != _T('>')  &&
5317                   n_brothers + n_children == 0  &&  ctx->current_block == NULL  &&
5318                   ctx->n_block_bytes > (int) sizeof(MD_BLOCK))
5319                {
5320                    MD_BLOCK* top_block = (MD_BLOCK*) ((char*)ctx->block_bytes + ctx->n_block_bytes - sizeof(MD_BLOCK));
5321                    if(top_block->type == MD_BLOCK_LI)
5322                        ctx->last_list_item_starts_with_two_blank_lines = TRUE;
5323                }
5324    #endif
5325            }
5326            break;
5327        } else {
5328    #if 1
5329
5330            if(ctx->last_list_item_starts_with_two_blank_lines) {
5331                if(n_parents > 0  &&  n_parents == ctx->n_containers  &&
5332                   ctx->containers[n_parents-1].ch != _T('>')  &&
5333                   n_brothers + n_children == 0  &&  ctx->current_block == NULL  &&
5334                   ctx->n_block_bytes > (int) sizeof(MD_BLOCK))
5335                {
5336                    MD_BLOCK* top_block = (MD_BLOCK*) ((char*)ctx->block_bytes + ctx->n_block_bytes - sizeof(MD_BLOCK));
5337                    if(top_block->type == MD_BLOCK_LI) {
5338                        n_parents--;
5339
5340                        line->indent = total_indent;
5341                        if(n_parents > 0)
5342                            line->indent -= MIN(line->indent, ctx->containers[n_parents-1].contents_indent);
5343                    }
5344                }
5345
5346                ctx->last_list_item_starts_with_two_blank_lines = FALSE;
5347            }
5348    #endif
5349            ctx->last_line_has_list_loosening_effect = FALSE;
5350        }
5351
5352        if(line->indent < ctx->code_indent_offset  &&  pivot_line->type == MD_LINE_TEXT
5353            &&  off < ctx->size  &&  ISANYOF2(off, _T('='), _T('-'))
5354            &&  (n_parents == ctx->n_containers))
5355        {
5356            unsigned level;
5357
5358            if(md_is_setext_underline(ctx, off, &off, &level)) {
5359                line->type = MD_LINE_SETEXTUNDERLINE;
5360                line->data = level;
5361                break;
5362            }
5363        }
5364
5365        if(line->indent < ctx->code_indent_offset
5366            &&  off < ctx->size  &&  off >= hr_killer
5367            &&  ISANYOF(off, _T("-_*")))
5368        {
5369            if(md_is_hr_line(ctx, off, &off, &hr_killer)) {
5370                line->type = MD_LINE_HR;
5371                break;
5372            }
5373        }
5374
5375        if(n_parents < ctx->n_containers  &&  n_brothers + n_children == 0) {
5376            OFF tmp;
5377
5378            if(md_is_container_mark(ctx, line->indent, off, &tmp, &container)  &&
5379               md_is_container_compatible(&ctx->containers[n_parents], &container))
5380            {
5381                pivot_line = &md_dummy_blank_line;
5382
5383                off = tmp;
5384
5385                total_indent += container.contents_indent - container.mark_indent;
5386                line->indent = md_line_indentation(ctx, total_indent, off, &off);
5387                total_indent += line->indent;
5388                line->beg = off;
5389
5390                if(off >= ctx->size || ISNEWLINE(off)) {
5391                    container.contents_indent++;
5392                } else if(line->indent <= ctx->code_indent_offset) {
5393                    container.contents_indent += line->indent;
5394                    line->indent = 0;
5395                } else {
5396                    container.contents_indent += 1;
5397                    line->indent--;
5398                }
5399
5400                ctx->containers[n_parents].mark_indent = container.mark_indent;
5401                ctx->containers[n_parents].contents_indent = container.contents_indent;
5402
5403                n_brothers++;
5404                continue;
5405            }
5406        }
5407
5408        if(line->indent >= ctx->code_indent_offset  &&  (pivot_line->type != MD_LINE_TEXT)) {
5409            line->type = MD_LINE_INDENTEDCODE;
5410            line->indent -= ctx->code_indent_offset;
5411            line->data = 0;
5412            break;
5413        }
5414
5415        if(line->indent < ctx->code_indent_offset  &&
5416           md_is_container_mark(ctx, line->indent, off, &off, &container))
5417        {
5418            if(pivot_line->type == MD_LINE_TEXT  &&  n_parents == ctx->n_containers  &&
5419                        (off >= ctx->size || ISNEWLINE(off))  &&  container.ch != _T('>'))
5420            {
5421
5422            } else if(pivot_line->type == MD_LINE_TEXT  &&  n_parents == ctx->n_containers  &&
5423                        ISANYOF2_(container.ch, _T('.'), _T(')'))  &&  container.start != 1)
5424            {
5425
5426            } else {
5427                total_indent += container.contents_indent - container.mark_indent;
5428                line->indent = md_line_indentation(ctx, total_indent, off, &off);
5429                total_indent += line->indent;
5430
5431                line->beg = off;
5432                line->data = container.ch;
5433
5434                if(off >= ctx->size || ISNEWLINE(off)) {
5435                    container.contents_indent++;
5436                } else if(line->indent <= ctx->code_indent_offset) {
5437                    container.contents_indent += line->indent;
5438                    line->indent = 0;
5439                } else {
5440                    container.contents_indent += 1;
5441                    line->indent--;
5442                }
5443
5444                if(n_brothers + n_children == 0)
5445                    pivot_line = &md_dummy_blank_line;
5446
5447                if(n_children == 0)
5448                    MD_CHECK(md_leave_child_containers(ctx, n_parents + n_brothers));
5449
5450                n_children++;
5451                MD_CHECK(md_push_container(ctx, &container));
5452                continue;
5453            }
5454        }
5455
5456        if(pivot_line->type == MD_LINE_TABLE  &&  n_parents == ctx->n_containers) {
5457            line->type = MD_LINE_TABLE;
5458            break;
5459        }
5460
5461        if(line->indent < ctx->code_indent_offset  &&
5462                off < ctx->size  &&  CH(off) == _T('#'))
5463        {
5464            unsigned level;
5465
5466            if(md_is_atxheader_line(ctx, off, &line->beg, &off, &level)) {
5467                line->type = MD_LINE_ATXHEADER;
5468                line->data = level;
5469                break;
5470            }
5471        }
5472
5473        if(line->indent < ctx->code_indent_offset  &&
5474                off < ctx->size  &&  ISANYOF2(off, _T('`'), _T('~')))
5475        {
5476            if(md_is_opening_code_fence(ctx, off, &off)) {
5477                line->type = MD_LINE_FENCEDCODE;
5478                line->data = 1;
5479                line->enforce_new_block = TRUE;
5480                break;
5481            }
5482        }
5483
5484        if(off < ctx->size  &&  CH(off) == _T('<')
5485            &&  !(ctx->parser.flags & MD_FLAG_NOHTMLBLOCKS))
5486        {
5487            ctx->html_block_type = md_is_html_block_start_condition(ctx, off);
5488
5489            if(ctx->html_block_type == 7  &&  pivot_line->type == MD_LINE_TEXT)
5490                ctx->html_block_type = 0;
5491
5492            if(ctx->html_block_type > 0) {
5493
5494                if(md_is_html_block_end_condition(ctx, off, &off) == ctx->html_block_type) {
5495
5496                    ctx->html_block_type = 0;
5497                }
5498
5499                line->enforce_new_block = TRUE;
5500                line->type = MD_LINE_HTML;
5501                break;
5502            }
5503        }
5504
5505        if((ctx->parser.flags & MD_FLAG_TABLES)  &&  pivot_line->type == MD_LINE_TEXT
5506            &&  off < ctx->size  &&  ISANYOF3(off, _T('|'), _T('-'), _T(':'))
5507            &&  n_parents == ctx->n_containers)
5508        {
5509            unsigned col_count;
5510
5511            if(ctx->current_block != NULL  &&  ctx->current_block->n_lines == 1  &&
5512                md_is_table_underline(ctx, off, &off, &col_count))
5513            {
5514                line->data = col_count;
5515                line->type = MD_LINE_TABLEUNDERLINE;
5516                break;
5517            }
5518        }
5519
5520        line->type = MD_LINE_TEXT;
5521        if(pivot_line->type == MD_LINE_TEXT  &&  n_brothers + n_children == 0) {
5522
5523            n_parents = ctx->n_containers;
5524        }
5525
5526        if((ctx->parser.flags & MD_FLAG_TASKLISTS)  &&  n_brothers + n_children > 0  &&
5527           ISANYOF_(ctx->containers[ctx->n_containers-1].ch, _T("-+*.)")))
5528        {
5529            OFF tmp = off;
5530
5531            while(tmp < ctx->size  &&  tmp < off + 3  &&  ISBLANK(tmp))
5532                tmp++;
5533            if(tmp + 2 < ctx->size  &&  CH(tmp) == _T('[')  &&
5534               ISANYOF(tmp+1, _T("xX "))  &&  CH(tmp+2) == _T(']')  &&
5535               (tmp + 3 == ctx->size  ||  ISBLANK(tmp+3)  ||  ISNEWLINE(tmp+3)))
5536            {
5537                MD_CONTAINER* task_container = (n_children > 0 ? &ctx->containers[ctx->n_containers-1] : &container);
5538                task_container->is_task = TRUE;
5539                task_container->task_mark_off = tmp + 1;
5540                off = tmp + 3;
5541                while(off < ctx->size  &&  ISWHITESPACE(off))
5542                    off++;
5543                line->beg = off;
5544            }
5545        }
5546
5547        break;
5548    }
5549
5550#if defined __linux__ && !defined MD4C_USE_UTF16
5551
5552    if(ctx->doc_ends_with_newline  &&  off < ctx->size) {
5553        while(TRUE) {
5554            off += (OFF) strcspn(STR(off), "\r\n");
5555
5556            if(CH(off) == _T('\0'))
5557                off++;
5558            else
5559                break;
5560        }
5561    } else
5562#endif
5563    {
5564
5565        while(off + 3 < ctx->size  &&  !ISNEWLINE(off+0)  &&  !ISNEWLINE(off+1)
5566                                   &&  !ISNEWLINE(off+2)  &&  !ISNEWLINE(off+3))
5567            off += 4;
5568        while(off < ctx->size  &&  !ISNEWLINE(off))
5569            off++;
5570    }
5571
5572    line->end = off;
5573
5574    if(line->type == MD_LINE_ATXHEADER) {
5575        OFF tmp = line->end;
5576        while(tmp > line->beg && ISBLANK(tmp-1))
5577            tmp--;
5578        while(tmp > line->beg && CH(tmp-1) == _T('#'))
5579            tmp--;
5580        if(tmp == line->beg || ISBLANK(tmp-1) || (ctx->parser.flags & MD_FLAG_PERMISSIVEATXHEADERS))
5581            line->end = tmp;
5582    }
5583
5584    if(line->type != MD_LINE_INDENTEDCODE  &&  line->type != MD_LINE_FENCEDCODE  && line->type != MD_LINE_HTML) {
5585        while(line->end > line->beg && ISBLANK(line->end-1))
5586            line->end--;
5587    }
5588
5589    if(off < ctx->size && CH(off) == _T('\r'))
5590        off++;
5591    if(off < ctx->size && CH(off) == _T('\n'))
5592        off++;
5593
5594    *p_end = off;
5595
5596    if(prev_line_has_list_loosening_effect  &&  line->type != MD_LINE_BLANK  &&  n_parents + n_brothers > 0) {
5597        MD_CONTAINER* c = &ctx->containers[n_parents + n_brothers - 1];
5598        if(c->ch != _T('>')) {
5599            MD_BLOCK* block = (MD_BLOCK*) (((char*)ctx->block_bytes) + c->block_byte_off);
5600            block->flags |= MD_BLOCK_LOOSE_LIST;
5601        }
5602    }
5603
5604    if(n_children == 0  &&  n_parents + n_brothers < ctx->n_containers)
5605        MD_CHECK(md_leave_child_containers(ctx, n_parents + n_brothers));
5606
5607    if(n_brothers > 0) {
5608        MD_ASSERT(n_brothers == 1);
5609        MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
5610                    ctx->containers[n_parents].task_mark_off,
5611                    (ctx->containers[n_parents].is_task ? CH(ctx->containers[n_parents].task_mark_off) : 0),
5612                    MD_BLOCK_CONTAINER_CLOSER));
5613        MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
5614                    container.task_mark_off,
5615                    (container.is_task ? CH(container.task_mark_off) : 0),
5616                    MD_BLOCK_CONTAINER_OPENER));
5617        ctx->containers[n_parents].is_task = container.is_task;
5618        ctx->containers[n_parents].task_mark_off = container.task_mark_off;
5619    }
5620
5621    if(n_children > 0)
5622        MD_CHECK(md_enter_child_containers(ctx, n_children));
5623
5624abort:
5625    return ret;
5626}
5627
5628static int
5629md_process_line(MD_CTX* ctx, const MD_LINE_ANALYSIS** p_pivot_line, MD_LINE_ANALYSIS* line)
5630{
5631    const MD_LINE_ANALYSIS* pivot_line = *p_pivot_line;
5632    int ret = 0;
5633
5634    if(line->type == MD_LINE_BLANK) {
5635        MD_CHECK(md_end_current_block(ctx));
5636        *p_pivot_line = &md_dummy_blank_line;
5637        return 0;
5638    }
5639
5640    if(line->enforce_new_block)
5641        MD_CHECK(md_end_current_block(ctx));
5642
5643    if(line->type == MD_LINE_HR || line->type == MD_LINE_ATXHEADER) {
5644        MD_CHECK(md_end_current_block(ctx));
5645
5646        MD_CHECK(md_start_new_block(ctx, line));
5647        MD_CHECK(md_add_line_into_current_block(ctx, line));
5648        MD_CHECK(md_end_current_block(ctx));
5649        *p_pivot_line = &md_dummy_blank_line;
5650        return 0;
5651    }
5652
5653    if(line->type == MD_LINE_SETEXTUNDERLINE) {
5654        MD_ASSERT(ctx->current_block != NULL);
5655        ctx->current_block->type = MD_BLOCK_H;
5656        ctx->current_block->data = line->data;
5657        ctx->current_block->flags |= MD_BLOCK_SETEXT_HEADER;
5658        MD_CHECK(md_add_line_into_current_block(ctx, line));
5659        MD_CHECK(md_end_current_block(ctx));
5660        if(ctx->current_block == NULL) {
5661            *p_pivot_line = &md_dummy_blank_line;
5662        } else {
5663
5664            line->type = MD_LINE_TEXT;
5665            *p_pivot_line = line;
5666        }
5667        return 0;
5668    }
5669
5670    if(line->type == MD_LINE_TABLEUNDERLINE) {
5671        MD_ASSERT(ctx->current_block != NULL);
5672        MD_ASSERT(ctx->current_block->n_lines == 1);
5673        ctx->current_block->type = MD_BLOCK_TABLE;
5674        ctx->current_block->data = line->data;
5675        MD_ASSERT(pivot_line != &md_dummy_blank_line);
5676        ((MD_LINE_ANALYSIS*)pivot_line)->type = MD_LINE_TABLE;
5677        MD_CHECK(md_add_line_into_current_block(ctx, line));
5678        return 0;
5679    }
5680
5681    if(line->type != pivot_line->type)
5682        MD_CHECK(md_end_current_block(ctx));
5683
5684    if(ctx->current_block == NULL) {
5685        MD_CHECK(md_start_new_block(ctx, line));
5686        *p_pivot_line = line;
5687    }
5688
5689    MD_CHECK(md_add_line_into_current_block(ctx, line));
5690
5691abort:
5692    return ret;
5693}
5694
5695static int
5696md_process_doc(MD_CTX *ctx)
5697{
5698    const MD_LINE_ANALYSIS* pivot_line = &md_dummy_blank_line;
5699    MD_LINE_ANALYSIS line_buf[2];
5700    MD_LINE_ANALYSIS* line = &line_buf[0];
5701    OFF off = 0;
5702    int ret = 0;
5703
5704    MD_ENTER_BLOCK(MD_BLOCK_DOC, NULL);
5705
5706    while(off < ctx->size) {
5707        if(line == pivot_line)
5708            line = (line == &line_buf[0] ? &line_buf[1] : &line_buf[0]);
5709
5710        MD_CHECK(md_analyze_line(ctx, off, &off, pivot_line, line));
5711        MD_CHECK(md_process_line(ctx, &pivot_line, line));
5712    }
5713
5714    md_end_current_block(ctx);
5715
5716    MD_CHECK(md_build_ref_def_hashtable(ctx));
5717
5718    MD_CHECK(md_leave_child_containers(ctx, 0));
5719    MD_CHECK(md_process_all_blocks(ctx));
5720
5721    MD_LEAVE_BLOCK(MD_BLOCK_DOC, NULL);
5722
5723abort:
5724
5725#if 0
5726
5727    {
5728        char buffer[256];
5729        sprintf(buffer, "Alloced %u bytes for block buffer.",
5730                    (unsigned)(ctx->alloc_block_bytes));
5731        MD_LOG(buffer);
5732
5733        sprintf(buffer, "Alloced %u bytes for containers buffer.",
5734                    (unsigned)(ctx->alloc_containers * sizeof(MD_CONTAINER)));
5735        MD_LOG(buffer);
5736
5737        sprintf(buffer, "Alloced %u bytes for marks buffer.",
5738                    (unsigned)(ctx->alloc_marks * sizeof(MD_MARK)));
5739        MD_LOG(buffer);
5740
5741        sprintf(buffer, "Alloced %u bytes for aux. buffer.",
5742                    (unsigned)(ctx->alloc_buffer * sizeof(MD_CHAR)));
5743        MD_LOG(buffer);
5744    }
5745#endif
5746
5747    return ret;
5748}
5749
5750int
5751md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userdata)
5752{
5753    MD_CTX ctx;
5754    int i;
5755    int ret;
5756
5757    if(parser->abi_version != 0) {
5758        if(parser->debug_log != NULL)
5759            parser->debug_log("Unsupported abi_version.", userdata);
5760        return -1;
5761    }
5762
5763    memset(&ctx, 0, sizeof(MD_CTX));
5764    ctx.text = text;
5765    ctx.size = size;
5766    memcpy(&ctx.parser, parser, sizeof(MD_PARSER));
5767    ctx.userdata = userdata;
5768    ctx.code_indent_offset = (ctx.parser.flags & MD_FLAG_NOINDENTEDCODEBLOCKS) ? (OFF)(-1) : 4;
5769    md_build_mark_char_map(&ctx);
5770    ctx.doc_ends_with_newline = (size > 0  &&  ISNEWLINE_(text[size-1]));
5771    ctx.max_ref_def_output = MIN(MIN(16 * (uint64_t)size, (uint64_t)(1024 * 1024)), (uint64_t)SZ_MAX);
5772
5773    for(i = 0; i < (int) SIZEOF_ARRAY(ctx.opener_stacks); i++)
5774        ctx.opener_stacks[i].top = -1;
5775    ctx.ptr_stack.top = -1;
5776    ctx.unresolved_link_head = -1;
5777    ctx.unresolved_link_tail = -1;
5778    ctx.table_cell_boundaries_head = -1;
5779    ctx.table_cell_boundaries_tail = -1;
5780
5781    ret = md_process_doc(&ctx);
5782
5783    md_free_ref_defs(&ctx);
5784    md_free_ref_def_hashtable(&ctx);
5785    free(ctx.buffer);
5786    free(ctx.marks);
5787    free(ctx.block_bytes);
5788    free(ctx.containers);
5789
5790    return ret;
5791}