1package query
2
3import (
4 "fmt"
5 "strings"
6 "unicode"
7)
8
9type tokenKind int
10
11const (
12 _ tokenKind = iota
13 tokenKindKV
14 tokenKindKVV
15 tokenKindSearch
16)
17
18type token struct {
19 kind tokenKind
20
21 // KV and KVV
22 qualifier string
23 value string
24
25 // KVV only
26 subQualifier string
27
28 // Search
29 term string
30}
31
32func newTokenKV(qualifier, value string) token {
33 return token{
34 kind: tokenKindKV,
35 qualifier: qualifier,
36 value: value,
37 }
38}
39
40func newTokenKVV(qualifier, subQualifier, value string) token {
41 return token{
42 kind: tokenKindKVV,
43 qualifier: qualifier,
44 subQualifier: subQualifier,
45 value: value,
46 }
47}
48
49func newTokenSearch(term string) token {
50 return token{
51 kind: tokenKindSearch,
52 term: term,
53 }
54}
55
56// tokenize parse and break a input into tokens ready to be
57// interpreted later by a parser to get the semantic.
58func tokenize(query string) ([]token, error) {
59 fields, err := splitFunc(query, unicode.IsSpace)
60 if err != nil {
61 return nil, err
62 }
63
64 var tokens []token
65 for _, field := range fields {
66 chunks, err := splitFunc(field, func(r rune) bool { return r == ':' })
67 if err != nil {
68 return nil, err
69 }
70
71 if strings.HasPrefix(field, ":") || strings.HasSuffix(field, ":") {
72 return nil, fmt.Errorf("empty qualifier or value")
73 }
74
75 // pre-process chunks
76 for i, chunk := range chunks {
77 if len(chunk) == 0 {
78 return nil, fmt.Errorf("empty qualifier or value")
79 }
80 chunks[i] = removeQuote(chunk)
81 }
82
83 switch len(chunks) {
84 case 1: // full text search
85 tokens = append(tokens, newTokenSearch(chunks[0]))
86
87 case 2: // KV
88 tokens = append(tokens, newTokenKV(chunks[0], chunks[1]))
89
90 case 3: // KVV
91 tokens = append(tokens, newTokenKVV(chunks[0], chunks[1], chunks[2]))
92
93 default:
94 return nil, fmt.Errorf("can't tokenize \"%s\": too many separators", field)
95 }
96 }
97 return tokens, nil
98}
99
100func removeQuote(field string) string {
101 runes := []rune(field)
102 if len(runes) >= 2 {
103 r1 := runes[0]
104 r2 := runes[len(runes)-1]
105
106 if r1 == r2 && isQuote(r1) {
107 return string(runes[1 : len(runes)-1])
108 }
109 }
110 return field
111}
112
113// split the input into chunks by splitting according to separatorFunc but respecting
114// quotes
115func splitFunc(input string, separatorFunc func(r rune) bool) ([]string, error) {
116 lastQuote := rune(0)
117 inQuote := false
118
119 // return true if it's part of a chunk, or false if it's a rune that delimit one, as determined by the separatorFunc.
120 isChunk := func(r rune) bool {
121 switch {
122 case !inQuote && isQuote(r):
123 lastQuote = r
124 inQuote = true
125 return true
126 case inQuote && r == lastQuote:
127 lastQuote = rune(0)
128 inQuote = false
129 return true
130 case inQuote:
131 return true
132 default:
133 return !separatorFunc(r)
134 }
135 }
136
137 var result []string
138 var chunk strings.Builder
139 for _, r := range input {
140 if isChunk(r) {
141 chunk.WriteRune(r)
142 } else {
143 if chunk.Len() > 0 {
144 result = append(result, chunk.String())
145 chunk.Reset()
146 }
147 }
148 }
149
150 if inQuote {
151 return nil, fmt.Errorf("unmatched quote")
152 }
153
154 if chunk.Len() > 0 {
155 result = append(result, chunk.String())
156 }
157
158 return result, nil
159}
160
161func isQuote(r rune) bool {
162 return r == '"' || r == '\''
163}