1package chroma
2
3import (
4 "fmt"
5)
6
7var (
8 defaultOptions = &TokeniseOptions{
9 State: "root",
10 }
11)
12
13// Config for a lexer.
14type Config struct {
15 // Name of the lexer.
16 Name string
17
18 // Shortcuts for the lexer
19 Aliases []string
20
21 // File name globs
22 Filenames []string
23
24 // Secondary file name globs
25 AliasFilenames []string
26
27 // MIME types
28 MimeTypes []string
29
30 // Regex matching is case-insensitive.
31 CaseInsensitive bool
32
33 // Regex matches all characters.
34 DotAll bool
35
36 // Regex does not match across lines ($ matches EOL).
37 //
38 // Defaults to multiline.
39 NotMultiline bool
40
41 // Don't strip leading and trailing newlines from the input.
42 // DontStripNL bool
43
44 // Strip all leading and trailing whitespace from the input
45 // StripAll bool
46
47 // Make sure that the input ends with a newline. This
48 // is required for some lexers that consume input linewise.
49 EnsureNL bool
50
51 // If given and greater than 0, expand tabs in the input.
52 // TabSize int
53
54 // Priority of lexer.
55 //
56 // If this is 0 it will be treated as a default of 1.
57 Priority float32
58}
59
60// Token output to formatter.
61type Token struct {
62 Type TokenType `json:"type"`
63 Value string `json:"value"`
64}
65
66func (t *Token) String() string { return t.Value }
67func (t *Token) GoString() string { return fmt.Sprintf("&Token{%s, %q}", t.Type, t.Value) }
68
69// Clone returns a clone of the Token.
70func (t *Token) Clone() Token {
71 return *t
72}
73
74// EOF is returned by lexers at the end of input.
75var EOF Token
76
77// TokeniseOptions contains options for tokenisers.
78type TokeniseOptions struct {
79 // State to start tokenisation in. Defaults to "root".
80 State string
81 // Nested tokenisation.
82 Nested bool
83}
84
85// A Lexer for tokenising source code.
86type Lexer interface {
87 // Config describing the features of the Lexer.
88 Config() *Config
89 // Tokenise returns an Iterator over tokens in text.
90 Tokenise(options *TokeniseOptions, text string) (Iterator, error)
91}
92
93// Lexers is a slice of lexers sortable by name.
94type Lexers []Lexer
95
96func (l Lexers) Len() int { return len(l) }
97func (l Lexers) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
98func (l Lexers) Less(i, j int) bool { return l[i].Config().Name < l[j].Config().Name }
99
100// PrioritisedLexers is a slice of lexers sortable by priority.
101type PrioritisedLexers []Lexer
102
103func (l PrioritisedLexers) Len() int { return len(l) }
104func (l PrioritisedLexers) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
105func (l PrioritisedLexers) Less(i, j int) bool {
106 ip := l[i].Config().Priority
107 if ip == 0 {
108 ip = 1
109 }
110 jp := l[j].Config().Priority
111 if jp == 0 {
112 jp = 1
113 }
114 return ip > jp
115}
116
117// Analyser determines how appropriate this lexer is for the given text.
118type Analyser interface {
119 AnalyseText(text string) float32
120}