1package chroma
2
3import (
4 "bytes"
5)
6
7type delegatingLexer struct {
8 root Lexer
9 language Lexer
10}
11
12// DelegatingLexer combines two lexers to handle the common case of a language embedded inside another, such as PHP
13// inside HTML or PHP inside plain text.
14//
15// It takes two lexer as arguments: a root lexer and a language lexer. First everything is scanned using the language
16// lexer, which must return "Other" for unrecognised tokens. Then all "Other" tokens are lexed using the root lexer.
17// Finally, these two sets of tokens are merged.
18//
19// The lexers from the template lexer package use this base lexer.
20func DelegatingLexer(root Lexer, language Lexer) Lexer {
21 return &delegatingLexer{
22 root: root,
23 language: language,
24 }
25}
26
27func (d *delegatingLexer) Config() *Config {
28 return d.language.Config()
29}
30
31// An insertion is the character range where language tokens should be inserted.
32type insertion struct {
33 start, end int
34 tokens []Token
35}
36
37func (d *delegatingLexer) Tokenise(options *TokeniseOptions, text string) (Iterator, error) { // nolint: gocognit
38 tokens, err := Tokenise(Coalesce(d.language), options, text)
39 if err != nil {
40 return nil, err
41 }
42 // Compute insertions and gather "Other" tokens.
43 others := &bytes.Buffer{}
44 insertions := []*insertion{}
45 var insert *insertion
46 offset := 0
47 var last Token
48 for _, t := range tokens {
49 if t.Type == Other {
50 if last != EOF && insert != nil && last.Type != Other {
51 insert.end = offset
52 }
53 others.WriteString(t.Value)
54 } else {
55 if last == EOF || last.Type == Other {
56 insert = &insertion{start: offset}
57 insertions = append(insertions, insert)
58 }
59 insert.tokens = append(insert.tokens, t)
60 }
61 last = t
62 offset += len(t.Value)
63 }
64
65 if len(insertions) == 0 {
66 return d.root.Tokenise(options, text)
67 }
68
69 // Lex the other tokens.
70 rootTokens, err := Tokenise(Coalesce(d.root), options, others.String())
71 if err != nil {
72 return nil, err
73 }
74
75 // Interleave the two sets of tokens.
76 var out []Token
77 offset = 0 // Offset into text.
78 tokenIndex := 0
79 nextToken := func() Token {
80 if tokenIndex >= len(rootTokens) {
81 return EOF
82 }
83 t := rootTokens[tokenIndex]
84 tokenIndex++
85 return t
86 }
87 insertionIndex := 0
88 nextInsertion := func() *insertion {
89 if insertionIndex >= len(insertions) {
90 return nil
91 }
92 i := insertions[insertionIndex]
93 insertionIndex++
94 return i
95 }
96 t := nextToken()
97 i := nextInsertion()
98 for t != EOF || i != nil {
99 // fmt.Printf("%d->%d:%q %d->%d:%q\n", offset, offset+len(t.Value), t.Value, i.start, i.end, Stringify(i.tokens...))
100 if t == EOF || (i != nil && i.start < offset+len(t.Value)) {
101 var l Token
102 l, t = splitToken(t, i.start-offset)
103 if l != EOF {
104 out = append(out, l)
105 offset += len(l.Value)
106 }
107 out = append(out, i.tokens...)
108 offset += i.end - i.start
109 if t == EOF {
110 t = nextToken()
111 }
112 i = nextInsertion()
113 } else {
114 out = append(out, t)
115 offset += len(t.Value)
116 t = nextToken()
117 }
118 }
119 return Literator(out...), nil
120}
121
122func splitToken(t Token, offset int) (l Token, r Token) {
123 if t == EOF {
124 return EOF, EOF
125 }
126 if offset == 0 {
127 return EOF, t
128 }
129 if offset == len(t.Value) {
130 return t, EOF
131 }
132 l = t.Clone()
133 r = t.Clone()
134 l.Value = l.Value[:offset]
135 r.Value = r.Value[offset:]
136 return
137}