1package chroma
2
3import (
4 "fmt"
5 "strings"
6)
7
8var (
9 defaultOptions = &TokeniseOptions{
10 State: "root",
11 EnsureLF: true,
12 }
13)
14
15// Config for a lexer.
16type Config struct {
17 // Name of the lexer.
18 Name string `xml:"name,omitempty"`
19
20 // Shortcuts for the lexer
21 Aliases []string `xml:"alias,omitempty"`
22
23 // File name globs
24 Filenames []string `xml:"filename,omitempty"`
25
26 // Secondary file name globs
27 AliasFilenames []string `xml:"alias_filename,omitempty"`
28
29 // MIME types
30 MimeTypes []string `xml:"mime_type,omitempty"`
31
32 // Regex matching is case-insensitive.
33 CaseInsensitive bool `xml:"case_insensitive,omitempty"`
34
35 // Regex matches all characters.
36 DotAll bool `xml:"dot_all,omitempty"`
37
38 // Regex does not match across lines ($ matches EOL).
39 //
40 // Defaults to multiline.
41 NotMultiline bool `xml:"not_multiline,omitempty"`
42
43 // Don't strip leading and trailing newlines from the input.
44 // DontStripNL bool
45
46 // Strip all leading and trailing whitespace from the input
47 // StripAll bool
48
49 // Make sure that the input ends with a newline. This
50 // is required for some lexers that consume input linewise.
51 EnsureNL bool `xml:"ensure_nl,omitempty"`
52
53 // If given and greater than 0, expand tabs in the input.
54 // TabSize int
55
56 // Priority of lexer.
57 //
58 // If this is 0 it will be treated as a default of 1.
59 Priority float32 `xml:"priority,omitempty"`
60
61 // Analyse is a list of regexes to match against the input.
62 //
63 // If a match is found, the score is returned if single attribute is set to true,
64 // otherwise the sum of all the score of matching patterns will be
65 // used as the final score.
66 Analyse *AnalyseConfig `xml:"analyse,omitempty"`
67}
68
69// AnalyseConfig defines the list of regexes analysers.
70type AnalyseConfig struct {
71 Regexes []RegexConfig `xml:"regex,omitempty"`
72 // If true, the first matching score is returned.
73 First bool `xml:"first,attr"`
74}
75
76// RegexConfig defines a single regex pattern and its score in case of match.
77type RegexConfig struct {
78 Pattern string `xml:"pattern,attr"`
79 Score float32 `xml:"score,attr"`
80}
81
82// Token output to formatter.
83type Token struct {
84 Type TokenType `json:"type"`
85 Value string `json:"value"`
86}
87
88func (t *Token) String() string { return t.Value }
89func (t *Token) GoString() string { return fmt.Sprintf("&Token{%s, %q}", t.Type, t.Value) }
90
91// Clone returns a clone of the Token.
92func (t *Token) Clone() Token {
93 return *t
94}
95
96// EOF is returned by lexers at the end of input.
97var EOF Token
98
99// TokeniseOptions contains options for tokenisers.
100type TokeniseOptions struct {
101 // State to start tokenisation in. Defaults to "root".
102 State string
103 // Nested tokenisation.
104 Nested bool
105
106 // If true, all EOLs are converted into LF
107 // by replacing CRLF and CR
108 EnsureLF bool
109}
110
111// A Lexer for tokenising source code.
112type Lexer interface {
113 // Config describing the features of the Lexer.
114 Config() *Config
115 // Tokenise returns an Iterator over tokens in text.
116 Tokenise(options *TokeniseOptions, text string) (Iterator, error)
117 // SetRegistry sets the registry this Lexer is associated with.
118 //
119 // The registry should be used by the Lexer if it needs to look up other
120 // lexers.
121 SetRegistry(registry *LexerRegistry) Lexer
122 // SetAnalyser sets a function the Lexer should use for scoring how
123 // likely a fragment of text is to match this lexer, between 0.0 and 1.0.
124 // A value of 1 indicates high confidence.
125 //
126 // Lexers may ignore this if they implement their own analysers.
127 SetAnalyser(analyser func(text string) float32) Lexer
128 // AnalyseText scores how likely a fragment of text is to match
129 // this lexer, between 0.0 and 1.0. A value of 1 indicates high confidence.
130 AnalyseText(text string) float32
131}
132
133// Lexers is a slice of lexers sortable by name.
134type Lexers []Lexer
135
136func (l Lexers) Len() int { return len(l) }
137func (l Lexers) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
138func (l Lexers) Less(i, j int) bool {
139 return strings.ToLower(l[i].Config().Name) < strings.ToLower(l[j].Config().Name)
140}
141
142// PrioritisedLexers is a slice of lexers sortable by priority.
143type PrioritisedLexers []Lexer
144
145func (l PrioritisedLexers) Len() int { return len(l) }
146func (l PrioritisedLexers) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
147func (l PrioritisedLexers) Less(i, j int) bool {
148 ip := l[i].Config().Priority
149 if ip == 0 {
150 ip = 1
151 }
152 jp := l[j].Config().Priority
153 if jp == 0 {
154 jp = 1
155 }
156 return ip > jp
157}
158
159// Analyser determines how appropriate this lexer is for the given text.
160type Analyser interface {
161 AnalyseText(text string) float32
162}