reader.go

  1package text
  2
  3import (
  4	"bytes"
  5	"io"
  6	"regexp"
  7	"unicode/utf8"
  8
  9	"github.com/yuin/goldmark/util"
 10)
 11
 12const invalidValue = -1
 13
 14// EOF indicates the end of file.
 15const EOF = byte(0xff)
 16
 17// A Reader interface provides abstracted method for reading text.
 18type Reader interface {
 19	io.RuneReader
 20
 21	// Source returns a source of the reader.
 22	Source() []byte
 23
 24	// ResetPosition resets positions.
 25	ResetPosition()
 26
 27	// Peek returns a byte at current position without advancing the internal pointer.
 28	Peek() byte
 29
 30	// PeekLine returns the current line without advancing the internal pointer.
 31	PeekLine() ([]byte, Segment)
 32
 33	// PrecendingCharacter returns a character just before current internal pointer.
 34	PrecendingCharacter() rune
 35
 36	// Value returns a value of the given segment.
 37	Value(Segment) []byte
 38
 39	// LineOffset returns a distance from the line head to current position.
 40	LineOffset() int
 41
 42	// Position returns current line number and position.
 43	Position() (int, Segment)
 44
 45	// SetPosition sets current line number and position.
 46	SetPosition(int, Segment)
 47
 48	// SetPadding sets padding to the reader.
 49	SetPadding(int)
 50
 51	// Advance advances the internal pointer.
 52	Advance(int)
 53
 54	// AdvanceAndSetPadding advances the internal pointer and add padding to the
 55	// reader.
 56	AdvanceAndSetPadding(int, int)
 57
 58	// AdvanceLine advances the internal pointer to the next line head.
 59	AdvanceLine()
 60
 61	// SkipSpaces skips space characters and returns a non-blank line.
 62	// If it reaches EOF, returns false.
 63	SkipSpaces() (Segment, int, bool)
 64
 65	// SkipSpaces skips blank lines and returns a non-blank line.
 66	// If it reaches EOF, returns false.
 67	SkipBlankLines() (Segment, int, bool)
 68
 69	// Match performs regular expression matching to current line.
 70	Match(reg *regexp.Regexp) bool
 71
 72	// Match performs regular expression searching to current line.
 73	FindSubMatch(reg *regexp.Regexp) [][]byte
 74
 75	// FindClosure finds corresponding closure.
 76	FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool)
 77}
 78
 79// FindClosureOptions is options for Reader.FindClosure.
 80type FindClosureOptions struct {
 81	// CodeSpan is a flag for the FindClosure. If this is set to true,
 82	// FindClosure ignores closers in codespans.
 83	CodeSpan bool
 84
 85	// Nesting is a flag for the FindClosure. If this is set to true,
 86	// FindClosure allows nesting.
 87	Nesting bool
 88
 89	// Newline is a flag for the FindClosure. If this is set to true,
 90	// FindClosure searches for a closer over multiple lines.
 91	Newline bool
 92
 93	// Advance is a flag for the FindClosure. If this is set to true,
 94	// FindClosure advances pointers when closer is found.
 95	Advance bool
 96}
 97
 98type reader struct {
 99	source       []byte
100	sourceLength int
101	line         int
102	peekedLine   []byte
103	pos          Segment
104	head         int
105	lineOffset   int
106}
107
108// NewReader return a new Reader that can read UTF-8 bytes .
109func NewReader(source []byte) Reader {
110	r := &reader{
111		source:       source,
112		sourceLength: len(source),
113	}
114	r.ResetPosition()
115	return r
116}
117
118func (r *reader) FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool) {
119	return findClosureReader(r, opener, closer, options)
120}
121
122func (r *reader) ResetPosition() {
123	r.line = -1
124	r.head = 0
125	r.lineOffset = -1
126	r.AdvanceLine()
127}
128
129func (r *reader) Source() []byte {
130	return r.source
131}
132
133func (r *reader) Value(seg Segment) []byte {
134	return seg.Value(r.source)
135}
136
137func (r *reader) Peek() byte {
138	if r.pos.Start >= 0 && r.pos.Start < r.sourceLength {
139		if r.pos.Padding != 0 {
140			return space[0]
141		}
142		return r.source[r.pos.Start]
143	}
144	return EOF
145}
146
147func (r *reader) PeekLine() ([]byte, Segment) {
148	if r.pos.Start >= 0 && r.pos.Start < r.sourceLength {
149		if r.peekedLine == nil {
150			r.peekedLine = r.pos.Value(r.Source())
151		}
152		return r.peekedLine, r.pos
153	}
154	return nil, r.pos
155}
156
157// io.RuneReader interface.
158func (r *reader) ReadRune() (rune, int, error) {
159	return readRuneReader(r)
160}
161
162func (r *reader) LineOffset() int {
163	if r.lineOffset < 0 {
164		v := 0
165		for i := r.head; i < r.pos.Start; i++ {
166			if r.source[i] == '\t' {
167				v += util.TabWidth(v)
168			} else {
169				v++
170			}
171		}
172		r.lineOffset = v - r.pos.Padding
173	}
174	return r.lineOffset
175}
176
177func (r *reader) PrecendingCharacter() rune {
178	if r.pos.Start <= 0 {
179		if r.pos.Padding != 0 {
180			return rune(' ')
181		}
182		return rune('\n')
183	}
184	i := r.pos.Start - 1
185	for ; i >= 0; i-- {
186		if utf8.RuneStart(r.source[i]) {
187			break
188		}
189	}
190	rn, _ := utf8.DecodeRune(r.source[i:])
191	return rn
192}
193
194func (r *reader) Advance(n int) {
195	r.lineOffset = -1
196	if n < len(r.peekedLine) && r.pos.Padding == 0 {
197		r.pos.Start += n
198		r.peekedLine = nil
199		return
200	}
201	r.peekedLine = nil
202	l := r.sourceLength
203	for ; n > 0 && r.pos.Start < l; n-- {
204		if r.pos.Padding != 0 {
205			r.pos.Padding--
206			continue
207		}
208		if r.source[r.pos.Start] == '\n' {
209			r.AdvanceLine()
210			continue
211		}
212		r.pos.Start++
213	}
214}
215
216func (r *reader) AdvanceAndSetPadding(n, padding int) {
217	r.Advance(n)
218	if padding > r.pos.Padding {
219		r.SetPadding(padding)
220	}
221}
222
223func (r *reader) AdvanceLine() {
224	r.lineOffset = -1
225	r.peekedLine = nil
226	r.pos.Start = r.pos.Stop
227	r.head = r.pos.Start
228	if r.pos.Start < 0 {
229		return
230	}
231	r.pos.Stop = r.sourceLength
232	for i := r.pos.Start; i < r.sourceLength; i++ {
233		c := r.source[i]
234		if c == '\n' {
235			r.pos.Stop = i + 1
236			break
237		}
238	}
239	r.line++
240	r.pos.Padding = 0
241}
242
243func (r *reader) Position() (int, Segment) {
244	return r.line, r.pos
245}
246
247func (r *reader) SetPosition(line int, pos Segment) {
248	r.lineOffset = -1
249	r.line = line
250	r.pos = pos
251}
252
253func (r *reader) SetPadding(v int) {
254	r.pos.Padding = v
255}
256
257func (r *reader) SkipSpaces() (Segment, int, bool) {
258	return skipSpacesReader(r)
259}
260
261func (r *reader) SkipBlankLines() (Segment, int, bool) {
262	return skipBlankLinesReader(r)
263}
264
265func (r *reader) Match(reg *regexp.Regexp) bool {
266	return matchReader(r, reg)
267}
268
269func (r *reader) FindSubMatch(reg *regexp.Regexp) [][]byte {
270	return findSubMatchReader(r, reg)
271}
272
273// A BlockReader interface is a reader that is optimized for Blocks.
274type BlockReader interface {
275	Reader
276	// Reset resets current state and sets new segments to the reader.
277	Reset(segment *Segments)
278}
279
280type blockReader struct {
281	source         []byte
282	segments       *Segments
283	segmentsLength int
284	line           int
285	pos            Segment
286	head           int
287	last           int
288	lineOffset     int
289}
290
291// NewBlockReader returns a new BlockReader.
292func NewBlockReader(source []byte, segments *Segments) BlockReader {
293	r := &blockReader{
294		source: source,
295	}
296	if segments != nil {
297		r.Reset(segments)
298	}
299	return r
300}
301
302func (r *blockReader) FindClosure(opener, closer byte, options FindClosureOptions) (*Segments, bool) {
303	return findClosureReader(r, opener, closer, options)
304}
305
306func (r *blockReader) ResetPosition() {
307	r.line = -1
308	r.head = 0
309	r.last = 0
310	r.lineOffset = -1
311	r.pos.Start = -1
312	r.pos.Stop = -1
313	r.pos.Padding = 0
314	if r.segmentsLength > 0 {
315		last := r.segments.At(r.segmentsLength - 1)
316		r.last = last.Stop
317	}
318	r.AdvanceLine()
319}
320
321func (r *blockReader) Reset(segments *Segments) {
322	r.segments = segments
323	r.segmentsLength = segments.Len()
324	r.ResetPosition()
325}
326
327func (r *blockReader) Source() []byte {
328	return r.source
329}
330
331func (r *blockReader) Value(seg Segment) []byte {
332	line := r.segmentsLength - 1
333	ret := make([]byte, 0, seg.Stop-seg.Start+1)
334	for ; line >= 0; line-- {
335		if seg.Start >= r.segments.At(line).Start {
336			break
337		}
338	}
339	i := seg.Start
340	for ; line < r.segmentsLength; line++ {
341		s := r.segments.At(line)
342		if i < 0 {
343			i = s.Start
344		}
345		ret = s.ConcatPadding(ret)
346		for ; i < seg.Stop && i < s.Stop; i++ {
347			ret = append(ret, r.source[i])
348		}
349		i = -1
350		if s.Stop > seg.Stop {
351			break
352		}
353	}
354	return ret
355}
356
357// io.RuneReader interface.
358func (r *blockReader) ReadRune() (rune, int, error) {
359	return readRuneReader(r)
360}
361
362func (r *blockReader) PrecendingCharacter() rune {
363	if r.pos.Padding != 0 {
364		return rune(' ')
365	}
366	if r.segments.Len() < 1 {
367		return rune('\n')
368	}
369	firstSegment := r.segments.At(0)
370	if r.line == 0 && r.pos.Start <= firstSegment.Start {
371		return rune('\n')
372	}
373	l := len(r.source)
374	i := r.pos.Start - 1
375	for ; i < l && i >= 0; i-- {
376		if utf8.RuneStart(r.source[i]) {
377			break
378		}
379	}
380	if i < 0 || i >= l {
381		return rune('\n')
382	}
383	rn, _ := utf8.DecodeRune(r.source[i:])
384	return rn
385}
386
387func (r *blockReader) LineOffset() int {
388	if r.lineOffset < 0 {
389		v := 0
390		for i := r.head; i < r.pos.Start; i++ {
391			if r.source[i] == '\t' {
392				v += util.TabWidth(v)
393			} else {
394				v++
395			}
396		}
397		r.lineOffset = v - r.pos.Padding
398	}
399	return r.lineOffset
400}
401
402func (r *blockReader) Peek() byte {
403	if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last {
404		if r.pos.Padding != 0 {
405			return space[0]
406		}
407		return r.source[r.pos.Start]
408	}
409	return EOF
410}
411
412func (r *blockReader) PeekLine() ([]byte, Segment) {
413	if r.line < r.segmentsLength && r.pos.Start >= 0 && r.pos.Start < r.last {
414		return r.pos.Value(r.source), r.pos
415	}
416	return nil, r.pos
417}
418
419func (r *blockReader) Advance(n int) {
420	r.lineOffset = -1
421
422	if n < r.pos.Stop-r.pos.Start && r.pos.Padding == 0 {
423		r.pos.Start += n
424		return
425	}
426
427	for ; n > 0; n-- {
428		if r.pos.Padding != 0 {
429			r.pos.Padding--
430			continue
431		}
432		if r.pos.Start >= r.pos.Stop-1 && r.pos.Stop < r.last {
433			r.AdvanceLine()
434			continue
435		}
436		r.pos.Start++
437	}
438}
439
440func (r *blockReader) AdvanceAndSetPadding(n, padding int) {
441	r.Advance(n)
442	if padding > r.pos.Padding {
443		r.SetPadding(padding)
444	}
445}
446
447func (r *blockReader) AdvanceLine() {
448	r.SetPosition(r.line+1, NewSegment(invalidValue, invalidValue))
449	r.head = r.pos.Start
450}
451
452func (r *blockReader) Position() (int, Segment) {
453	return r.line, r.pos
454}
455
456func (r *blockReader) SetPosition(line int, pos Segment) {
457	r.lineOffset = -1
458	r.line = line
459	if pos.Start == invalidValue {
460		if r.line < r.segmentsLength {
461			s := r.segments.At(line)
462			r.head = s.Start
463			r.pos = s
464		}
465	} else {
466		r.pos = pos
467		if r.line < r.segmentsLength {
468			s := r.segments.At(line)
469			r.head = s.Start
470		}
471	}
472}
473
474func (r *blockReader) SetPadding(v int) {
475	r.lineOffset = -1
476	r.pos.Padding = v
477}
478
479func (r *blockReader) SkipSpaces() (Segment, int, bool) {
480	return skipSpacesReader(r)
481}
482
483func (r *blockReader) SkipBlankLines() (Segment, int, bool) {
484	return skipBlankLinesReader(r)
485}
486
487func (r *blockReader) Match(reg *regexp.Regexp) bool {
488	return matchReader(r, reg)
489}
490
491func (r *blockReader) FindSubMatch(reg *regexp.Regexp) [][]byte {
492	return findSubMatchReader(r, reg)
493}
494
495func skipBlankLinesReader(r Reader) (Segment, int, bool) {
496	lines := 0
497	for {
498		line, seg := r.PeekLine()
499		if line == nil {
500			return seg, lines, false
501		}
502		if util.IsBlank(line) {
503			lines++
504			r.AdvanceLine()
505		} else {
506			return seg, lines, true
507		}
508	}
509}
510
511func skipSpacesReader(r Reader) (Segment, int, bool) {
512	chars := 0
513	for {
514		line, segment := r.PeekLine()
515		if line == nil {
516			return segment, chars, false
517		}
518		for i, c := range line {
519			if util.IsSpace(c) {
520				chars++
521				r.Advance(1)
522				continue
523			}
524			return segment.WithStart(segment.Start + i + 1), chars, true
525		}
526	}
527}
528
529func matchReader(r Reader, reg *regexp.Regexp) bool {
530	oldline, oldseg := r.Position()
531	match := reg.FindReaderSubmatchIndex(r)
532	r.SetPosition(oldline, oldseg)
533	if match == nil {
534		return false
535	}
536	r.Advance(match[1] - match[0])
537	return true
538}
539
540func findSubMatchReader(r Reader, reg *regexp.Regexp) [][]byte {
541	oldLine, oldSeg := r.Position()
542	match := reg.FindReaderSubmatchIndex(r)
543	r.SetPosition(oldLine, oldSeg)
544	if match == nil {
545		return nil
546	}
547	var bb bytes.Buffer
548	bb.Grow(match[1] - match[0])
549	for i := 0; i < match[1]; {
550		r, size, _ := readRuneReader(r)
551		i += size
552		bb.WriteRune(r)
553	}
554	bs := bb.Bytes()
555	var result [][]byte
556	for i := 0; i < len(match); i += 2 {
557		if match[i] < 0 {
558			result = append(result, []byte{})
559			continue
560		}
561		result = append(result, bs[match[i]:match[i+1]])
562	}
563
564	r.SetPosition(oldLine, oldSeg)
565	r.Advance(match[1] - match[0])
566	return result
567}
568
569func readRuneReader(r Reader) (rune, int, error) {
570	line, _ := r.PeekLine()
571	if line == nil {
572		return 0, 0, io.EOF
573	}
574	rn, size := utf8.DecodeRune(line)
575	if rn == utf8.RuneError {
576		return 0, 0, io.EOF
577	}
578	r.Advance(size)
579	return rn, size, nil
580}
581
582func findClosureReader(r Reader, opener, closer byte, opts FindClosureOptions) (*Segments, bool) {
583	opened := 1
584	codeSpanOpener := 0
585	closed := false
586	orgline, orgpos := r.Position()
587	var ret *Segments
588
589	for {
590		bs, seg := r.PeekLine()
591		if bs == nil {
592			goto end
593		}
594		i := 0
595		for i < len(bs) {
596			c := bs[i]
597			if opts.CodeSpan && codeSpanOpener != 0 && c == '`' {
598				codeSpanCloser := 0
599				for ; i < len(bs); i++ {
600					if bs[i] == '`' {
601						codeSpanCloser++
602					} else {
603						i--
604						break
605					}
606				}
607				if codeSpanCloser == codeSpanOpener {
608					codeSpanOpener = 0
609				}
610			} else if codeSpanOpener == 0 && c == '\\' && i < len(bs)-1 && util.IsPunct(bs[i+1]) {
611				i += 2
612				continue
613			} else if opts.CodeSpan && codeSpanOpener == 0 && c == '`' {
614				for ; i < len(bs); i++ {
615					if bs[i] == '`' {
616						codeSpanOpener++
617					} else {
618						i--
619						break
620					}
621				}
622			} else if (opts.CodeSpan && codeSpanOpener == 0) || !opts.CodeSpan {
623				if c == closer {
624					opened--
625					if opened == 0 {
626						if ret == nil {
627							ret = NewSegments()
628						}
629						ret.Append(seg.WithStop(seg.Start + i))
630						r.Advance(i + 1)
631						closed = true
632						goto end
633					}
634				} else if c == opener {
635					if !opts.Nesting {
636						goto end
637					}
638					opened++
639				}
640			}
641			i++
642		}
643		if !opts.Newline {
644			goto end
645		}
646		r.AdvanceLine()
647		if ret == nil {
648			ret = NewSegments()
649		}
650		ret.Append(seg)
651	}
652end:
653	if !opts.Advance {
654		r.SetPosition(orgline, orgpos)
655	}
656	if closed {
657		return ret, true
658	}
659	return nil, false
660}