1package chroma
  2
  3import (
  4	"bytes"
  5)
  6
  7type delegatingLexer struct {
  8	root     Lexer
  9	language Lexer
 10}
 11
 12// DelegatingLexer combines two lexers to handle the common case of a language embedded inside another, such as PHP
 13// inside HTML or PHP inside plain text.
 14//
 15// It takes two lexer as arguments: a root lexer and a language lexer.  First everything is scanned using the language
 16// lexer, which must return "Other" for unrecognised tokens. Then all "Other" tokens are lexed using the root lexer.
 17// Finally, these two sets of tokens are merged.
 18//
 19// The lexers from the template lexer package use this base lexer.
 20func DelegatingLexer(root Lexer, language Lexer) Lexer {
 21	return &delegatingLexer{
 22		root:     root,
 23		language: language,
 24	}
 25}
 26
 27func (d *delegatingLexer) AnalyseText(text string) float32 {
 28	return d.root.AnalyseText(text)
 29}
 30
 31func (d *delegatingLexer) SetAnalyser(analyser func(text string) float32) Lexer {
 32	d.root.SetAnalyser(analyser)
 33	return d
 34}
 35
 36func (d *delegatingLexer) SetRegistry(r *LexerRegistry) Lexer {
 37	d.root.SetRegistry(r)
 38	d.language.SetRegistry(r)
 39	return d
 40}
 41
 42func (d *delegatingLexer) Config() *Config {
 43	return d.language.Config()
 44}
 45
 46// An insertion is the character range where language tokens should be inserted.
 47type insertion struct {
 48	start, end int
 49	tokens     []Token
 50}
 51
 52func (d *delegatingLexer) Tokenise(options *TokeniseOptions, text string) (Iterator, error) { // nolint: gocognit
 53	tokens, err := Tokenise(Coalesce(d.language), options, text)
 54	if err != nil {
 55		return nil, err
 56	}
 57	// Compute insertions and gather "Other" tokens.
 58	others := &bytes.Buffer{}
 59	insertions := []*insertion{}
 60	var insert *insertion
 61	offset := 0
 62	var last Token
 63	for _, t := range tokens {
 64		if t.Type == Other {
 65			if last != EOF && insert != nil && last.Type != Other {
 66				insert.end = offset
 67			}
 68			others.WriteString(t.Value)
 69		} else {
 70			if last == EOF || last.Type == Other {
 71				insert = &insertion{start: offset}
 72				insertions = append(insertions, insert)
 73			}
 74			insert.tokens = append(insert.tokens, t)
 75		}
 76		last = t
 77		offset += len(t.Value)
 78	}
 79
 80	if len(insertions) == 0 {
 81		return d.root.Tokenise(options, text)
 82	}
 83
 84	// Lex the other tokens.
 85	rootTokens, err := Tokenise(Coalesce(d.root), options, others.String())
 86	if err != nil {
 87		return nil, err
 88	}
 89
 90	// Interleave the two sets of tokens.
 91	var out []Token
 92	offset = 0 // Offset into text.
 93	tokenIndex := 0
 94	nextToken := func() Token {
 95		if tokenIndex >= len(rootTokens) {
 96			return EOF
 97		}
 98		t := rootTokens[tokenIndex]
 99		tokenIndex++
100		return t
101	}
102	insertionIndex := 0
103	nextInsertion := func() *insertion {
104		if insertionIndex >= len(insertions) {
105			return nil
106		}
107		i := insertions[insertionIndex]
108		insertionIndex++
109		return i
110	}
111	t := nextToken()
112	i := nextInsertion()
113	for t != EOF || i != nil {
114		// fmt.Printf("%d->%d:%q   %d->%d:%q\n", offset, offset+len(t.Value), t.Value, i.start, i.end, Stringify(i.tokens...))
115		if t == EOF || (i != nil && i.start < offset+len(t.Value)) {
116			var l Token
117			l, t = splitToken(t, i.start-offset)
118			if l != EOF {
119				out = append(out, l)
120				offset += len(l.Value)
121			}
122			out = append(out, i.tokens...)
123			offset += i.end - i.start
124			if t == EOF {
125				t = nextToken()
126			}
127			i = nextInsertion()
128		} else {
129			out = append(out, t)
130			offset += len(t.Value)
131			t = nextToken()
132		}
133	}
134	return Literator(out...), nil
135}
136
137func splitToken(t Token, offset int) (l Token, r Token) {
138	if t == EOF {
139		return EOF, EOF
140	}
141	if offset == 0 {
142		return EOF, t
143	}
144	if offset == len(t.Value) {
145		return t, EOF
146	}
147	l = t.Clone()
148	r = t.Clone()
149	l.Value = l.Value[:offset]
150	r.Value = r.Value[offset:]
151	return
152}