1package chroma
2
3import (
4 "bytes"
5)
6
7type delegatingLexer struct {
8 root Lexer
9 language Lexer
10}
11
12// DelegatingLexer combines two lexers to handle the common case of a language embedded inside another, such as PHP
13// inside HTML or PHP inside plain text.
14//
15// It takes two lexer as arguments: a root lexer and a language lexer. First everything is scanned using the language
16// lexer, which must return "Other" for unrecognised tokens. Then all "Other" tokens are lexed using the root lexer.
17// Finally, these two sets of tokens are merged.
18//
19// The lexers from the template lexer package use this base lexer.
20func DelegatingLexer(root Lexer, language Lexer) Lexer {
21 return &delegatingLexer{
22 root: root,
23 language: language,
24 }
25}
26
27func (d *delegatingLexer) AnalyseText(text string) float32 {
28 return d.root.AnalyseText(text)
29}
30
31func (d *delegatingLexer) SetAnalyser(analyser func(text string) float32) Lexer {
32 d.root.SetAnalyser(analyser)
33 return d
34}
35
36func (d *delegatingLexer) SetRegistry(r *LexerRegistry) Lexer {
37 d.root.SetRegistry(r)
38 d.language.SetRegistry(r)
39 return d
40}
41
42func (d *delegatingLexer) Config() *Config {
43 return d.language.Config()
44}
45
46// An insertion is the character range where language tokens should be inserted.
47type insertion struct {
48 start, end int
49 tokens []Token
50}
51
52func (d *delegatingLexer) Tokenise(options *TokeniseOptions, text string) (Iterator, error) { // nolint: gocognit
53 tokens, err := Tokenise(Coalesce(d.language), options, text)
54 if err != nil {
55 return nil, err
56 }
57 // Compute insertions and gather "Other" tokens.
58 others := &bytes.Buffer{}
59 insertions := []*insertion{}
60 var insert *insertion
61 offset := 0
62 var last Token
63 for _, t := range tokens {
64 if t.Type == Other {
65 if last != EOF && insert != nil && last.Type != Other {
66 insert.end = offset
67 }
68 others.WriteString(t.Value)
69 } else {
70 if last == EOF || last.Type == Other {
71 insert = &insertion{start: offset}
72 insertions = append(insertions, insert)
73 }
74 insert.tokens = append(insert.tokens, t)
75 }
76 last = t
77 offset += len(t.Value)
78 }
79
80 if len(insertions) == 0 {
81 return d.root.Tokenise(options, text)
82 }
83
84 // Lex the other tokens.
85 rootTokens, err := Tokenise(Coalesce(d.root), options, others.String())
86 if err != nil {
87 return nil, err
88 }
89
90 // Interleave the two sets of tokens.
91 var out []Token
92 offset = 0 // Offset into text.
93 tokenIndex := 0
94 nextToken := func() Token {
95 if tokenIndex >= len(rootTokens) {
96 return EOF
97 }
98 t := rootTokens[tokenIndex]
99 tokenIndex++
100 return t
101 }
102 insertionIndex := 0
103 nextInsertion := func() *insertion {
104 if insertionIndex >= len(insertions) {
105 return nil
106 }
107 i := insertions[insertionIndex]
108 insertionIndex++
109 return i
110 }
111 t := nextToken()
112 i := nextInsertion()
113 for t != EOF || i != nil {
114 // fmt.Printf("%d->%d:%q %d->%d:%q\n", offset, offset+len(t.Value), t.Value, i.start, i.end, Stringify(i.tokens...))
115 if t == EOF || (i != nil && i.start < offset+len(t.Value)) {
116 var l Token
117 l, t = splitToken(t, i.start-offset)
118 if l != EOF {
119 out = append(out, l)
120 offset += len(l.Value)
121 }
122 out = append(out, i.tokens...)
123 offset += i.end - i.start
124 if t == EOF {
125 t = nextToken()
126 }
127 i = nextInsertion()
128 } else {
129 out = append(out, t)
130 offset += len(t.Value)
131 t = nextToken()
132 }
133 }
134 return Literator(out...), nil
135}
136
137func splitToken(t Token, offset int) (l Token, r Token) {
138 if t == EOF {
139 return EOF, EOF
140 }
141 if offset == 0 {
142 return EOF, t
143 }
144 if offset == len(t.Value) {
145 return t, EOF
146 }
147 l = t.Clone()
148 r = t.Clone()
149 l.Value = l.Value[:offset]
150 r.Value = r.Value[offset:]
151 return
152}