lexer.go

  1package chroma
  2
  3import (
  4	"fmt"
  5	"strings"
  6)
  7
  8var (
  9	defaultOptions = &TokeniseOptions{
 10		State:    "root",
 11		EnsureLF: true,
 12	}
 13)
 14
 15// Config for a lexer.
 16type Config struct {
 17	// Name of the lexer.
 18	Name string `xml:"name,omitempty"`
 19
 20	// Shortcuts for the lexer
 21	Aliases []string `xml:"alias,omitempty"`
 22
 23	// File name globs
 24	Filenames []string `xml:"filename,omitempty"`
 25
 26	// Secondary file name globs
 27	AliasFilenames []string `xml:"alias_filename,omitempty"`
 28
 29	// MIME types
 30	MimeTypes []string `xml:"mime_type,omitempty"`
 31
 32	// Regex matching is case-insensitive.
 33	CaseInsensitive bool `xml:"case_insensitive,omitempty"`
 34
 35	// Regex matches all characters.
 36	DotAll bool `xml:"dot_all,omitempty"`
 37
 38	// Regex does not match across lines ($ matches EOL).
 39	//
 40	// Defaults to multiline.
 41	NotMultiline bool `xml:"not_multiline,omitempty"`
 42
 43	// Don't strip leading and trailing newlines from the input.
 44	// DontStripNL bool
 45
 46	// Strip all leading and trailing whitespace from the input
 47	// StripAll bool
 48
 49	// Make sure that the input ends with a newline. This
 50	// is required for some lexers that consume input linewise.
 51	EnsureNL bool `xml:"ensure_nl,omitempty"`
 52
 53	// If given and greater than 0, expand tabs in the input.
 54	// TabSize int
 55
 56	// Priority of lexer.
 57	//
 58	// If this is 0 it will be treated as a default of 1.
 59	Priority float32 `xml:"priority,omitempty"`
 60
 61	// Analyse is a list of regexes to match against the input.
 62	//
 63	// If a match is found, the score is returned if single attribute is set to true,
 64	// otherwise the sum of all the score of matching patterns will be
 65	// used as the final score.
 66	Analyse *AnalyseConfig `xml:"analyse,omitempty"`
 67}
 68
 69// AnalyseConfig defines the list of regexes analysers.
 70type AnalyseConfig struct {
 71	Regexes []RegexConfig `xml:"regex,omitempty"`
 72	// If true, the first matching score is returned.
 73	First bool `xml:"first,attr"`
 74}
 75
 76// RegexConfig defines a single regex pattern and its score in case of match.
 77type RegexConfig struct {
 78	Pattern string  `xml:"pattern,attr"`
 79	Score   float32 `xml:"score,attr"`
 80}
 81
 82// Token output to formatter.
 83type Token struct {
 84	Type  TokenType `json:"type"`
 85	Value string    `json:"value"`
 86}
 87
 88func (t *Token) String() string   { return t.Value }
 89func (t *Token) GoString() string { return fmt.Sprintf("&Token{%s, %q}", t.Type, t.Value) }
 90
 91// Clone returns a clone of the Token.
 92func (t *Token) Clone() Token {
 93	return *t
 94}
 95
 96// EOF is returned by lexers at the end of input.
 97var EOF Token
 98
 99// TokeniseOptions contains options for tokenisers.
100type TokeniseOptions struct {
101	// State to start tokenisation in. Defaults to "root".
102	State string
103	// Nested tokenisation.
104	Nested bool
105
106	// If true, all EOLs are converted into LF
107	// by replacing CRLF and CR
108	EnsureLF bool
109}
110
111// A Lexer for tokenising source code.
112type Lexer interface {
113	// Config describing the features of the Lexer.
114	Config() *Config
115	// Tokenise returns an Iterator over tokens in text.
116	Tokenise(options *TokeniseOptions, text string) (Iterator, error)
117	// SetRegistry sets the registry this Lexer is associated with.
118	//
119	// The registry should be used by the Lexer if it needs to look up other
120	// lexers.
121	SetRegistry(registry *LexerRegistry) Lexer
122	// SetAnalyser sets a function the Lexer should use for scoring how
123	// likely a fragment of text is to match this lexer, between 0.0 and 1.0.
124	// A value of 1 indicates high confidence.
125	//
126	// Lexers may ignore this if they implement their own analysers.
127	SetAnalyser(analyser func(text string) float32) Lexer
128	// AnalyseText scores how likely a fragment of text is to match
129	// this lexer, between 0.0 and 1.0. A value of 1 indicates high confidence.
130	AnalyseText(text string) float32
131}
132
133// Lexers is a slice of lexers sortable by name.
134type Lexers []Lexer
135
136func (l Lexers) Len() int      { return len(l) }
137func (l Lexers) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
138func (l Lexers) Less(i, j int) bool {
139	return strings.ToLower(l[i].Config().Name) < strings.ToLower(l[j].Config().Name)
140}
141
142// PrioritisedLexers is a slice of lexers sortable by priority.
143type PrioritisedLexers []Lexer
144
145func (l PrioritisedLexers) Len() int      { return len(l) }
146func (l PrioritisedLexers) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
147func (l PrioritisedLexers) Less(i, j int) bool {
148	ip := l[i].Config().Priority
149	if ip == 0 {
150		ip = 1
151	}
152	jp := l[j].Config().Priority
153	if jp == 0 {
154		jp = 1
155	}
156	return ip > jp
157}
158
159// Analyser determines how appropriate this lexer is for the given text.
160type Analyser interface {
161	AnalyseText(text string) float32
162}