delegate.go

  1package chroma
  2
  3import (
  4	"bytes"
  5)
  6
  7type delegatingLexer struct {
  8	root     Lexer
  9	language Lexer
 10}
 11
 12// DelegatingLexer combines two lexers to handle the common case of a language embedded inside another, such as PHP
 13// inside HTML or PHP inside plain text.
 14//
 15// It takes two lexer as arguments: a root lexer and a language lexer.  First everything is scanned using the language
 16// lexer, which must return "Other" for unrecognised tokens. Then all "Other" tokens are lexed using the root lexer.
 17// Finally, these two sets of tokens are merged.
 18//
 19// The lexers from the template lexer package use this base lexer.
 20func DelegatingLexer(root Lexer, language Lexer) Lexer {
 21	return &delegatingLexer{
 22		root:     root,
 23		language: language,
 24	}
 25}
 26
 27func (d *delegatingLexer) Config() *Config {
 28	return d.language.Config()
 29}
 30
 31// An insertion is the character range where language tokens should be inserted.
 32type insertion struct {
 33	start, end int
 34	tokens     []Token
 35}
 36
 37func (d *delegatingLexer) Tokenise(options *TokeniseOptions, text string) (Iterator, error) { // nolint: gocognit
 38	tokens, err := Tokenise(Coalesce(d.language), options, text)
 39	if err != nil {
 40		return nil, err
 41	}
 42	// Compute insertions and gather "Other" tokens.
 43	others := &bytes.Buffer{}
 44	insertions := []*insertion{}
 45	var insert *insertion
 46	offset := 0
 47	var last Token
 48	for _, t := range tokens {
 49		if t.Type == Other {
 50			if last != EOF && insert != nil && last.Type != Other {
 51				insert.end = offset
 52			}
 53			others.WriteString(t.Value)
 54		} else {
 55			if last == EOF || last.Type == Other {
 56				insert = &insertion{start: offset}
 57				insertions = append(insertions, insert)
 58			}
 59			insert.tokens = append(insert.tokens, t)
 60		}
 61		last = t
 62		offset += len(t.Value)
 63	}
 64
 65	if len(insertions) == 0 {
 66		return d.root.Tokenise(options, text)
 67	}
 68
 69	// Lex the other tokens.
 70	rootTokens, err := Tokenise(Coalesce(d.root), options, others.String())
 71	if err != nil {
 72		return nil, err
 73	}
 74
 75	// Interleave the two sets of tokens.
 76	var out []Token
 77	offset = 0 // Offset into text.
 78	tokenIndex := 0
 79	nextToken := func() Token {
 80		if tokenIndex >= len(rootTokens) {
 81			return EOF
 82		}
 83		t := rootTokens[tokenIndex]
 84		tokenIndex++
 85		return t
 86	}
 87	insertionIndex := 0
 88	nextInsertion := func() *insertion {
 89		if insertionIndex >= len(insertions) {
 90			return nil
 91		}
 92		i := insertions[insertionIndex]
 93		insertionIndex++
 94		return i
 95	}
 96	t := nextToken()
 97	i := nextInsertion()
 98	for t != EOF || i != nil {
 99		// fmt.Printf("%d->%d:%q   %d->%d:%q\n", offset, offset+len(t.Value), t.Value, i.start, i.end, Stringify(i.tokens...))
100		if t == EOF || (i != nil && i.start < offset+len(t.Value)) {
101			var l Token
102			l, t = splitToken(t, i.start-offset)
103			if l != EOF {
104				out = append(out, l)
105				offset += len(l.Value)
106			}
107			out = append(out, i.tokens...)
108			offset += i.end - i.start
109			if t == EOF {
110				t = nextToken()
111			}
112			i = nextInsertion()
113		} else {
114			out = append(out, t)
115			offset += len(t.Value)
116			t = nextToken()
117		}
118	}
119	return Literator(out...), nil
120}
121
122func splitToken(t Token, offset int) (l Token, r Token) {
123	if t == EOF {
124		return EOF, EOF
125	}
126	if offset == 0 {
127		return EOF, t
128	}
129	if offset == len(t.Value) {
130		return t, EOF
131	}
132	l = t.Clone()
133	r = t.Clone()
134	l.Value = l.Value[:offset]
135	r.Value = r.Value[offset:]
136	return
137}