lexer.go

  1package chroma
  2
  3import (
  4	"fmt"
  5)
  6
  7var (
  8	defaultOptions = &TokeniseOptions{
  9		State: "root",
 10	}
 11)
 12
 13// Config for a lexer.
 14type Config struct {
 15	// Name of the lexer.
 16	Name string
 17
 18	// Shortcuts for the lexer
 19	Aliases []string
 20
 21	// File name globs
 22	Filenames []string
 23
 24	// Secondary file name globs
 25	AliasFilenames []string
 26
 27	// MIME types
 28	MimeTypes []string
 29
 30	// Regex matching is case-insensitive.
 31	CaseInsensitive bool
 32
 33	// Regex matches all characters.
 34	DotAll bool
 35
 36	// Regex does not match across lines ($ matches EOL).
 37	//
 38	// Defaults to multiline.
 39	NotMultiline bool
 40
 41	// Don't strip leading and trailing newlines from the input.
 42	// DontStripNL bool
 43
 44	// Strip all leading and trailing whitespace from the input
 45	// StripAll bool
 46
 47	// Make sure that the input ends with a newline. This
 48	// is required for some lexers that consume input linewise.
 49	EnsureNL bool
 50
 51	// If given and greater than 0, expand tabs in the input.
 52	// TabSize int
 53
 54	// Priority of lexer.
 55	//
 56	// If this is 0 it will be treated as a default of 1.
 57	Priority float32
 58}
 59
 60// Token output to formatter.
 61type Token struct {
 62	Type  TokenType `json:"type"`
 63	Value string    `json:"value"`
 64}
 65
 66func (t *Token) String() string   { return t.Value }
 67func (t *Token) GoString() string { return fmt.Sprintf("&Token{%s, %q}", t.Type, t.Value) }
 68
 69// Clone returns a clone of the Token.
 70func (t *Token) Clone() Token {
 71	return *t
 72}
 73
 74// EOF is returned by lexers at the end of input.
 75var EOF Token
 76
 77// TokeniseOptions contains options for tokenisers.
 78type TokeniseOptions struct {
 79	// State to start tokenisation in. Defaults to "root".
 80	State string
 81	// Nested tokenisation.
 82	Nested bool
 83}
 84
 85// A Lexer for tokenising source code.
 86type Lexer interface {
 87	// Config describing the features of the Lexer.
 88	Config() *Config
 89	// Tokenise returns an Iterator over tokens in text.
 90	Tokenise(options *TokeniseOptions, text string) (Iterator, error)
 91}
 92
 93// Lexers is a slice of lexers sortable by name.
 94type Lexers []Lexer
 95
 96func (l Lexers) Len() int           { return len(l) }
 97func (l Lexers) Swap(i, j int)      { l[i], l[j] = l[j], l[i] }
 98func (l Lexers) Less(i, j int) bool { return l[i].Config().Name < l[j].Config().Name }
 99
100// PrioritisedLexers is a slice of lexers sortable by priority.
101type PrioritisedLexers []Lexer
102
103func (l PrioritisedLexers) Len() int      { return len(l) }
104func (l PrioritisedLexers) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
105func (l PrioritisedLexers) Less(i, j int) bool {
106	ip := l[i].Config().Priority
107	if ip == 0 {
108		ip = 1
109	}
110	jp := l[j].Config().Priority
111	if jp == 0 {
112		jp = 1
113	}
114	return ip > jp
115}
116
117// Analyser determines how appropriate this lexer is for the given text.
118type Analyser interface {
119	AnalyseText(text string) float32
120}