1package chroma
2
3import (
4 "fmt"
5)
6
7// An Emitter takes group matches and returns tokens.
8type Emitter interface {
9 // Emit tokens for the given regex groups.
10 Emit(groups []string, state *LexerState) Iterator
11}
12
13// SerialisableEmitter is an Emitter that can be serialised and deserialised to/from JSON.
14type SerialisableEmitter interface {
15 Emitter
16 EmitterKind() string
17}
18
19// EmitterFunc is a function that is an Emitter.
20type EmitterFunc func(groups []string, state *LexerState) Iterator
21
22// Emit tokens for groups.
23func (e EmitterFunc) Emit(groups []string, state *LexerState) Iterator {
24 return e(groups, state)
25}
26
27type Emitters []Emitter
28
29type byGroupsEmitter struct {
30 Emitters
31}
32
33// ByGroups emits a token for each matching group in the rule's regex.
34func ByGroups(emitters ...Emitter) Emitter {
35 return &byGroupsEmitter{Emitters: emitters}
36}
37
38func (b *byGroupsEmitter) EmitterKind() string { return "bygroups" }
39
40func (b *byGroupsEmitter) Emit(groups []string, state *LexerState) Iterator {
41 iterators := make([]Iterator, 0, len(groups)-1)
42 if len(b.Emitters) != len(groups)-1 {
43 iterators = append(iterators, Error.Emit(groups, state))
44 // panic(errors.Errorf("number of groups %q does not match number of emitters %v", groups, emitters))
45 } else {
46 for i, group := range groups[1:] {
47 if b.Emitters[i] != nil {
48 iterators = append(iterators, b.Emitters[i].Emit([]string{group}, state))
49 }
50 }
51 }
52 return Concaterator(iterators...)
53}
54
55// ByGroupNames emits a token for each named matching group in the rule's regex.
56func ByGroupNames(emitters map[string]Emitter) Emitter {
57 return EmitterFunc(func(groups []string, state *LexerState) Iterator {
58 iterators := make([]Iterator, 0, len(state.NamedGroups)-1)
59 if len(state.NamedGroups)-1 == 0 {
60 if emitter, ok := emitters[`0`]; ok {
61 iterators = append(iterators, emitter.Emit(groups, state))
62 } else {
63 iterators = append(iterators, Error.Emit(groups, state))
64 }
65 } else {
66 ruleRegex := state.Rules[state.State][state.Rule].Regexp
67 for i := 1; i < len(state.NamedGroups); i++ {
68 groupName := ruleRegex.GroupNameFromNumber(i)
69 group := state.NamedGroups[groupName]
70 if emitter, ok := emitters[groupName]; ok {
71 if emitter != nil {
72 iterators = append(iterators, emitter.Emit([]string{group}, state))
73 }
74 } else {
75 iterators = append(iterators, Error.Emit([]string{group}, state))
76 }
77 }
78 }
79 return Concaterator(iterators...)
80 })
81}
82
83// UsingByGroup emits tokens for the matched groups in the regex using a
84// sublexer. Used when lexing code blocks where the name of a sublexer is
85// contained within the block, for example on a Markdown text block or SQL
86// language block.
87//
88// An attempt to load the sublexer will be made using the captured value from
89// the text of the matched sublexerNameGroup. If a sublexer matching the
90// sublexerNameGroup is available, then tokens for the matched codeGroup will
91// be emitted using the sublexer. Otherwise, if no sublexer is available, then
92// tokens will be emitted from the passed emitter.
93//
94// Example:
95//
96// var Markdown = internal.Register(MustNewLexer(
97// &Config{
98// Name: "markdown",
99// Aliases: []string{"md", "mkd"},
100// Filenames: []string{"*.md", "*.mkd", "*.markdown"},
101// MimeTypes: []string{"text/x-markdown"},
102// },
103// Rules{
104// "root": {
105// {"^(```)(\\w+)(\\n)([\\w\\W]*?)(^```$)",
106// UsingByGroup(
107// 2, 4,
108// String, String, String, Text, String,
109// ),
110// nil,
111// },
112// },
113// },
114// ))
115//
116// See the lexers/markdown.go for the complete example.
117//
118// Note: panic's if the number of emitters does not equal the number of matched
119// groups in the regex.
120func UsingByGroup(sublexerNameGroup, codeGroup int, emitters ...Emitter) Emitter {
121 return &usingByGroup{
122 SublexerNameGroup: sublexerNameGroup,
123 CodeGroup: codeGroup,
124 Emitters: emitters,
125 }
126}
127
128type usingByGroup struct {
129 SublexerNameGroup int `xml:"sublexer_name_group"`
130 CodeGroup int `xml:"code_group"`
131 Emitters Emitters `xml:"emitters"`
132}
133
134func (u *usingByGroup) EmitterKind() string { return "usingbygroup" }
135func (u *usingByGroup) Emit(groups []string, state *LexerState) Iterator {
136 // bounds check
137 if len(u.Emitters) != len(groups)-1 {
138 panic("UsingByGroup expects number of emitters to be the same as len(groups)-1")
139 }
140
141 // grab sublexer
142 sublexer := state.Registry.Get(groups[u.SublexerNameGroup])
143
144 // build iterators
145 iterators := make([]Iterator, len(groups)-1)
146 for i, group := range groups[1:] {
147 if i == u.CodeGroup-1 && sublexer != nil {
148 var err error
149 iterators[i], err = sublexer.Tokenise(nil, groups[u.CodeGroup])
150 if err != nil {
151 panic(err)
152 }
153 } else if u.Emitters[i] != nil {
154 iterators[i] = u.Emitters[i].Emit([]string{group}, state)
155 }
156 }
157 return Concaterator(iterators...)
158}
159
160// UsingLexer returns an Emitter that uses a given Lexer for parsing and emitting.
161//
162// This Emitter is not serialisable.
163func UsingLexer(lexer Lexer) Emitter {
164 return EmitterFunc(func(groups []string, _ *LexerState) Iterator {
165 it, err := lexer.Tokenise(&TokeniseOptions{State: "root", Nested: true}, groups[0])
166 if err != nil {
167 panic(err)
168 }
169 return it
170 })
171}
172
173type usingEmitter struct {
174 Lexer string `xml:"lexer,attr"`
175}
176
177func (u *usingEmitter) EmitterKind() string { return "using" }
178
179func (u *usingEmitter) Emit(groups []string, state *LexerState) Iterator {
180 if state.Registry == nil {
181 panic(fmt.Sprintf("no LexerRegistry available for Using(%q)", u.Lexer))
182 }
183 lexer := state.Registry.Get(u.Lexer)
184 if lexer == nil {
185 panic(fmt.Sprintf("no such lexer %q", u.Lexer))
186 }
187 it, err := lexer.Tokenise(&TokeniseOptions{State: "root", Nested: true}, groups[0])
188 if err != nil {
189 panic(err)
190 }
191 return it
192}
193
194// Using returns an Emitter that uses a given Lexer reference for parsing and emitting.
195//
196// The referenced lexer must be stored in the same LexerRegistry.
197func Using(lexer string) Emitter {
198 return &usingEmitter{Lexer: lexer}
199}
200
201type usingSelfEmitter struct {
202 State string `xml:"state,attr"`
203}
204
205func (u *usingSelfEmitter) EmitterKind() string { return "usingself" }
206
207func (u *usingSelfEmitter) Emit(groups []string, state *LexerState) Iterator {
208 it, err := state.Lexer.Tokenise(&TokeniseOptions{State: u.State, Nested: true}, groups[0])
209 if err != nil {
210 panic(err)
211 }
212 return it
213}
214
215// UsingSelf is like Using, but uses the current Lexer.
216func UsingSelf(stateName string) Emitter {
217 return &usingSelfEmitter{stateName}
218}