1package text
2
3import (
4 "bytes"
5 "io"
6 "regexp"
7 "unicode/utf8"
8
9 "github.com/yuin/goldmark/util"
10)
11
12const invalidValue = -1
13
14// EOF indicates the end of file.
15const EOF = byte(0xff)
16
17// A Reader interface provides abstracted method for reading text.
18type Reader interface {
19 io.RuneReader
20
21 // Source returns a source of the reader.
22 Source() []byte
23
24 // ResetPosition resets positions.
25 ResetPosition()
26
27 // Peek returns a byte at current position without advancing the internal pointer.
28 Peek() byte
29
30 // PeekLine returns the current line without advancing the internal pointer.
31 PeekLine() ([]byte, Segment)
32
33 // PrecendingCharacter returns a character just before current internal pointer.
34 PrecendingCharacter() rune
35
36 // Value returns a value of the given segment.
37 Value(Segment) []byte
38
39 // LineOffset returns a distance from the line head to current position.
40 LineOffset() int
41
42 // Position returns current line number and position.
43 Position() (int, Segment)
44
45 // SetPosition sets current line number and position.
46 SetPosition(int, Segment)
47
48 // SetPadding sets padding to the reader.
49 SetPadding(int)
50
51 // Advance advances the internal pointer.
52 Advance(int)
53
54 // AdvanceAndSetPadding advances the internal pointer and add padding to the
55 // reader.
56 AdvanceAndSetPadding(int, int)
57
58 // AdvanceLine advances the internal pointer to the next line head.
59 AdvanceLine()
60
61 // SkipSpaces skips space characters and returns a non-blank line.
62 // If it reaches EOF, returns false.
63 SkipSpaces() (Segment, int, bool)
64
65 // SkipSpaces skips blank lines and returns a non-blank line.
66 // If it reaches EOF, returns false.
67 SkipBlankLines() (Segment, int, bool)
68
69 // Match performs regular expression matching to current line.
70 Match(reg *regexp.Regexp) bool
71
72 // Match performs regular expression searching to current line.
73 FindSubMatch(reg *regexp.Regexp) [][]byte
74
75 // FindClosure finds corresponding closure.
76 FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool)
77}
78
79// FindClosureOptions is options for Reader.FindClosure.
80type FindClosureOptions struct {
81 // CodeSpan is a flag for the FindClosure. If this is set to true,
82 // FindClosure ignores closers in codespans.
83 CodeSpan bool
84
85 // Nesting is a flag for the FindClosure. If this is set to true,
86 // FindClosure allows nesting.
87 Nesting bool
88
89 // Newline is a flag for the FindClosure. If this is set to true,
90 // FindClosure searches for a closer over multiple lines.
91 Newline bool
92
93 // Advance is a flag for the FindClosure. If this is set to true,
94 // FindClosure advances pointers when closer is found.
95 Advance bool
96}
97
98type reader struct {
99 source []byte
100 sourceLength int
101 line int
102 peekedLine []byte
103 pos Segment
104 head int
105 lineOffset int
106}
107
108// NewReader return a new Reader that can read UTF-8 bytes .
109func NewReader(source []byte) Reader {
110 r := &reader{
111 source: source,
112 sourceLength: len(source),
113 }
114 r.ResetPosition()
115 return r
116}
117
118func (r *reader) FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool) {
119 return findClosureReader(r, opener, closer, options)
120}
121
122func (r *reader) ResetPosition() {
123 r.line = -1
124 r.head = 0
125 r.lineOffset = -1
126 r.AdvanceLine()
127}
128
129func (r *reader) Source() []byte {
130 return r.source
131}
132
133func (r *reader) Value(seg Segment) []byte {
134 return seg.Value(r.source)
135}
136
137func (r *reader) Peek() byte {
138 if r.pos.Start >= 0 && r.pos.Start < r.sourceLength {
139 if r.pos.Padding != 0 {
140 return space[0]
141 }
142 return r.source[r.pos.Start]
143 }
144 return EOF
145}
146
147func (r *reader) PeekLine() ([]byte, Segment) {
148 if r.pos.Start >= 0 && r.pos.Start < r.sourceLength {
149 if r.peekedLine == nil {
150 r.peekedLine = r.pos.Value(r.Source())
151 }
152 return r.peekedLine, r.pos
153 }
154 return nil, r.pos
155}
156
157// io.RuneReader interface.
158func (r *reader) ReadRune() (rune, int, error) {
159 return readRuneReader(r)
160}
161
162func (r *reader) LineOffset() int {
163 if r.lineOffset < 0 {
164 v := 0
165 for i := r.head; i < r.pos.Start; i++ {
166 if r.source[i] == '\t' {
167 v += util.TabWidth(v)
168 } else {
169 v++
170 }
171 }
172 r.lineOffset = v - r.pos.Padding
173 }
174 return r.lineOffset
175}
176
177func (r *reader) PrecendingCharacter() rune {
178 if r.pos.Start <= 0 {
179 if r.pos.Padding != 0 {
180 return rune(' ')
181 }
182 return rune('\n')
183 }
184 i := r.pos.Start - 1
185 for ; i >= 0; i-- {
186 if utf8.RuneStart(r.source[i]) {
187 break
188 }
189 }
190 rn, _ := utf8.DecodeRune(r.source[i:])
191 return rn
192}
193
194func (r *reader) Advance(n int) {
195 r.lineOffset = -1
196 if n < len(r.peekedLine) && r.pos.Padding == 0 {
197 r.pos.Start += n
198 r.peekedLine = nil
199 return
200 }
201 r.peekedLine = nil
202 l := r.sourceLength
203 for ; n > 0 && r.pos.Start < l; n-- {
204 if r.pos.Padding != 0 {
205 r.pos.Padding--
206 continue
207 }
208 if r.source[r.pos.Start] == '\n' {
209 r.AdvanceLine()
210 continue
211 }
212 r.pos.Start++
213 }
214}
215
216func (r *reader) AdvanceAndSetPadding(n, padding int) {
217 r.Advance(n)
218 if padding > r.pos.Padding {
219 r.SetPadding(padding)
220 }
221}
222
223func (r *reader) AdvanceLine() {
224 r.lineOffset = -1
225 r.peekedLine = nil
226 r.pos.Start = r.pos.Stop
227 r.head = r.pos.Start
228 if r.pos.Start < 0 {
229 return
230 }
231 r.pos.Stop = r.sourceLength
232 for i := r.pos.Start; i < r.sourceLength; i++ {
233 c := r.source[i]
234 if c == '\n' {
235 r.pos.Stop = i + 1
236 break
237 }
238 }
239 r.line++
240 r.pos.Padding = 0
241}
242
243func (r *reader) Position() (int, Segment) {
244 return r.line, r.pos
245}
246
247func (r *reader) SetPosition(line int, pos Segment) {
248 r.lineOffset = -1
249 r.line = line
250 r.pos = pos
251}
252
253func (r *reader) SetPadding(v int) {
254 r.pos.Padding = v
255}
256
257func (r *reader) SkipSpaces() (Segment, int, bool) {
258 return skipSpacesReader(r)
259}
260
261func (r *reader) SkipBlankLines() (Segment, int, bool) {
262 return skipBlankLinesReader(r)
263}
264
265func (r *reader) Match(reg *regexp.Regexp) bool {
266 return matchReader(r, reg)
267}
268
269func (r *reader) FindSubMatch(reg *regexp.Regexp) [][]byte {
270 return findSubMatchReader(r, reg)
271}
272
273// A BlockReader interface is a reader that is optimized for Blocks.
274type BlockReader interface {
275 Reader
276 // Reset resets current state and sets new segments to the reader.
277 Reset(segment *Segments)
278}
279
280type blockReader struct {
281 source []byte
282 segments *Segments
283 segmentsLength int
284 line int
285 pos Segment
286 head int
287 last int
288 lineOffset int
289}
290
291// NewBlockReader returns a new BlockReader.
292func NewBlockReader(source []byte, segments *Segments) BlockReader {
293 r := &blockReader{
294 source: source,
295 }
296 if segments != nil {
297 r.Reset(segments)
298 }
299 return r
300}
301
302func (r *blockReader) FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool) {
303 return findClosureReader(r, opener, closer, options)
304}
305
306func (r *blockReader) ResetPosition() {
307 r.line = -1
308 r.head = 0
309 r.last = 0
310 r.lineOffset = -1
311 r.pos.Start = -1
312 r.pos.Stop = -1
313 r.pos.Padding = 0
314 if r.segmentsLength > 0 {
315 last := r.segments.At(r.segmentsLength - 1)
316 r.last = last.Stop
317 }
318 r.AdvanceLine()
319}
320
321func (r *blockReader) Reset(segments *Segments) {
322 r.segments = segments
323 r.segmentsLength = segments.Len()
324 r.ResetPosition()
325}
326
327func (r *blockReader) Source() []byte {
328 return r.source
329}
330
331func (r *blockReader) Value(seg Segment) []byte {
332 line := r.segmentsLength - 1
333 ret := make([]byte, 0, seg.Stop-seg.Start+1)
334 for ; line >= 0; line-- {
335 if seg.Start >= r.segments.At(line).Start {
336 break
337 }
338 }
339 i := seg.Start
340 for ; line < r.segmentsLength; line++ {
341 s := r.segments.At(line)
342 if i < 0 {
343 i = s.Start
344 }
345 ret = s.ConcatPadding(ret)
346 for ; i < seg.Stop && i < s.Stop; i++ {
347 ret = append(ret, r.source[i])
348 }
349 i = -1
350 if s.Stop > seg.Stop {
351 break
352 }
353 }
354 return ret
355}
356
357// io.RuneReader interface.
358func (r *blockReader) ReadRune() (rune, int, error) {
359 return readRuneReader(r)
360}
361
362func (r *blockReader) PrecendingCharacter() rune {
363 if r.pos.Padding != 0 {
364 return rune(' ')
365 }
366 if r.segments.Len() < 1 {
367 return rune('\n')
368 }
369 firstSegment := r.segments.At(0)
370 if r.line == 0 && r.pos.Start <= firstSegment.Start {
371 return rune('\n')
372 }
373 l := len(r.source)
374 i := r.pos.Start - 1
375 for ; i < l && i >= 0; i-- {
376 if utf8.RuneStart(r.source[i]) {
377 break
378 }
379 }
380 if i < 0 || i >= l {
381 return rune('\n')
382 }
383 rn, _ := utf8.DecodeRune(r.source[i:])
384 return rn
385}
386
387func (r *blockReader) LineOffset() int {
388 if r.lineOffset < 0 {
389 v := 0
390 for i := r.head; i < r.pos.Start; i++ {
391 if r.source[i] == '\t' {
392 v += util.TabWidth(v)
393 } else {
394 v++
395 }
396 }
397 r.lineOffset = v - r.pos.Padding
398 }
399 return r.lineOffset
400}
401
402func (r *blockReader) Peek() byte {
403 if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last {
404 if r.pos.Padding != 0 {
405 return space[0]
406 }
407 return r.source[r.pos.Start]
408 }
409 return EOF
410}
411
412func (r *blockReader) PeekLine() ([]byte, Segment) {
413 if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last {
414 return r.pos.Value(r.source), r.pos
415 }
416 return nil, r.pos
417}
418
419func (r *blockReader) Advance(n int) {
420 r.lineOffset = -1
421
422 if n < r.pos.Stop-r.pos.Start && r.pos.Padding == 0 {
423 r.pos.Start += n
424 return
425 }
426
427 for ; n > 0; n-- {
428 if r.pos.Padding != 0 {
429 r.pos.Padding--
430 continue
431 }
432 if r.pos.Start >= r.pos.Stop-1 && r.pos.Stop < r.last {
433 r.AdvanceLine()
434 continue
435 }
436 r.pos.Start++
437 }
438}
439
440func (r *blockReader) AdvanceAndSetPadding(n, padding int) {
441 r.Advance(n)
442 if padding > r.pos.Padding {
443 r.SetPadding(padding)
444 }
445}
446
447func (r *blockReader) AdvanceLine() {
448 r.SetPosition(r.line+1, NewSegment(invalidValue, invalidValue))
449 r.head = r.pos.Start
450}
451
452func (r *blockReader) Position() (int, Segment) {
453 return r.line, r.pos
454}
455
456func (r *blockReader) SetPosition(line int, pos Segment) {
457 r.lineOffset = -1
458 r.line = line
459 if pos.Start == invalidValue {
460 if r.line < r.segmentsLength {
461 s := r.segments.At(line)
462 r.head = s.Start
463 r.pos = s
464 }
465 } else {
466 r.pos = pos
467 if r.line < r.segmentsLength {
468 s := r.segments.At(line)
469 r.head = s.Start
470 }
471 }
472}
473
474func (r *blockReader) SetPadding(v int) {
475 r.lineOffset = -1
476 r.pos.Padding = v
477}
478
479func (r *blockReader) SkipSpaces() (Segment, int, bool) {
480 return skipSpacesReader(r)
481}
482
483func (r *blockReader) SkipBlankLines() (Segment, int, bool) {
484 return skipBlankLinesReader(r)
485}
486
487func (r *blockReader) Match(reg *regexp.Regexp) bool {
488 return matchReader(r, reg)
489}
490
491func (r *blockReader) FindSubMatch(reg *regexp.Regexp) [][]byte {
492 return findSubMatchReader(r, reg)
493}
494
495func skipBlankLinesReader(r Reader) (Segment, int, bool) {
496 lines := 0
497 for {
498 line, seg := r.PeekLine()
499 if line == nil {
500 return seg, lines, false
501 }
502 if util.IsBlank(line) {
503 lines++
504 r.AdvanceLine()
505 } else {
506 return seg, lines, true
507 }
508 }
509}
510
511func skipSpacesReader(r Reader) (Segment, int, bool) {
512 chars := 0
513 for {
514 line, segment := r.PeekLine()
515 if line == nil {
516 return segment, chars, false
517 }
518 for i, c := range line {
519 if util.IsSpace(c) {
520 chars++
521 r.Advance(1)
522 continue
523 }
524 return segment.WithStart(segment.Start + i + 1), chars, true
525 }
526 }
527}
528
529func matchReader(r Reader, reg *regexp.Regexp) bool {
530 oldline, oldseg := r.Position()
531 match := reg.FindReaderSubmatchIndex(r)
532 r.SetPosition(oldline, oldseg)
533 if match == nil {
534 return false
535 }
536 r.Advance(match[1] - match[0])
537 return true
538}
539
540func findSubMatchReader(r Reader, reg *regexp.Regexp) [][]byte {
541 oldLine, oldSeg := r.Position()
542 match := reg.FindReaderSubmatchIndex(r)
543 r.SetPosition(oldLine, oldSeg)
544 if match == nil {
545 return nil
546 }
547 var bb bytes.Buffer
548 bb.Grow(match[1] - match[0])
549 for i := 0; i < match[1]; {
550 r, size, _ := readRuneReader(r)
551 i += size
552 bb.WriteRune(r)
553 }
554 bs := bb.Bytes()
555 var result [][]byte
556 for i := 0; i < len(match); i += 2 {
557 if match[i] < 0 {
558 result = append(result, []byte{})
559 continue
560 }
561 result = append(result, bs[match[i]:match[i+1]])
562 }
563
564 r.SetPosition(oldLine, oldSeg)
565 r.Advance(match[1] - match[0])
566 return result
567}
568
569func readRuneReader(r Reader) (rune, int, error) {
570 line, _ := r.PeekLine()
571 if line == nil {
572 return 0, 0, io.EOF
573 }
574 rn, size := utf8.DecodeRune(line)
575 if rn == utf8.RuneError {
576 return 0, 0, io.EOF
577 }
578 r.Advance(size)
579 return rn, size, nil
580}
581
582func findClosureReader(r Reader, opener, closer byte, opts FindClosureOptions) (*Segments, bool) {
583 opened := 1
584 codeSpanOpener := 0
585 closed := false
586 orgline, orgpos := r.Position()
587 var ret *Segments
588
589 for {
590 bs, seg := r.PeekLine()
591 if bs == nil {
592 goto end
593 }
594 i := 0
595 for i < len(bs) {
596 c := bs[i]
597 if opts.CodeSpan && codeSpanOpener != 0 && c == '`' {
598 codeSpanCloser := 0
599 for ; i < len(bs); i++ {
600 if bs[i] == '`' {
601 codeSpanCloser++
602 } else {
603 i--
604 break
605 }
606 }
607 if codeSpanCloser == codeSpanOpener {
608 codeSpanOpener = 0
609 }
610 } else if codeSpanOpener == 0 && c == '\\' && i < len(bs)-1 && util.IsPunct(bs[i+1]) {
611 i += 2
612 continue
613 } else if opts.CodeSpan && codeSpanOpener == 0 && c == '`' {
614 for ; i < len(bs); i++ {
615 if bs[i] == '`' {
616 codeSpanOpener++
617 } else {
618 i--
619 break
620 }
621 }
622 } else if (opts.CodeSpan && codeSpanOpener == 0) || !opts.CodeSpan {
623 if c == closer {
624 opened--
625 if opened == 0 {
626 if ret == nil {
627 ret = NewSegments()
628 }
629 ret.Append(seg.WithStop(seg.Start + i))
630 r.Advance(i + 1)
631 closed = true
632 goto end
633 }
634 } else if c == opener {
635 if !opts.Nesting {
636 goto end
637 }
638 opened++
639 }
640 }
641 i++
642 }
643 if !opts.Newline {
644 goto end
645 }
646 r.AdvanceLine()
647 if ret == nil {
648 ret = NewSegments()
649 }
650 ret.Append(seg)
651 }
652end:
653 if !opts.Advance {
654 r.SetPosition(orgline, orgpos)
655 }
656 if closed {
657 return ret, true
658 }
659 return nil, false
660}