1package query
2
3import (
4 "fmt"
5 "strings"
6 "unicode"
7)
8
9type tokenKind int
10
11const (
12 _ tokenKind = iota
13 tokenKindKV
14 tokenKindKVV
15 tokenKindSearch
16)
17
18type token struct {
19 kind tokenKind
20
21 // KV and KVV
22 qualifier string
23 value string
24
25 // KVV only
26 subQualifier string
27
28 // Search
29 term string
30}
31
32func newTokenKV(qualifier, value string) token {
33 return token{
34 kind: tokenKindKV,
35 qualifier: qualifier,
36 value: value,
37 }
38}
39
40func newTokenKVV(qualifier, subQualifier, value string) token {
41 return token{
42 kind: tokenKindKVV,
43 qualifier: qualifier,
44 subQualifier: subQualifier,
45 value: value,
46 }
47}
48
49func newTokenSearch(term string) token {
50 return token{
51 kind: tokenKindSearch,
52 term: term,
53 }
54}
55
56// tokenize parse and break a input into tokens ready to be
57// interpreted later by a parser to get the semantic.
58func tokenize(query string) ([]token, error) {
59 fields, err := splitQuery(query)
60 if err != nil {
61 return nil, err
62 }
63
64 var tokens []token
65 for _, field := range fields {
66 // Split using ':' as separator, but separators inside '"' don't count.
67 quoted := false
68 split := strings.FieldsFunc(field, func(r rune) bool {
69 if r == '"' {
70 quoted = !quoted
71 }
72 return !quoted && r == ':'
73 })
74 if (strings.HasPrefix(field, ":")) {
75 split = append([]string{""}, split...)
76 }
77 if (strings.HasSuffix(field, ":")) {
78 split = append(split, "")
79 }
80 if (quoted) {
81 return nil, fmt.Errorf("can't tokenize \"%s\": unmatched quote", field)
82 }
83
84 // full text search
85 if len(split) == 1 {
86 tokens = append(tokens, newTokenSearch(removeQuote(field)))
87 continue
88 }
89
90 if len(split) > 3 {
91 return nil, fmt.Errorf("can't tokenize \"%s\": too many separators", field)
92 }
93
94 if len(split[0]) == 0 {
95 return nil, fmt.Errorf("can't tokenize \"%s\": empty qualifier", field)
96 }
97
98 if len(split) == 2 {
99 if len(split[1]) == 0 {
100 return nil, fmt.Errorf("empty value for qualifier \"%s\"", split[0])
101 }
102
103 tokens = append(tokens, newTokenKV(split[0], removeQuote(split[1])))
104 } else {
105 if len(split[1]) == 0 {
106 return nil, fmt.Errorf("empty sub-qualifier for qualifier \"%s\"", split[0])
107 }
108
109 if len(split[2]) == 0 {
110 return nil, fmt.Errorf("empty value for qualifier \"%s:%s\"", split[0], split[1])
111 }
112
113 tokens = append(tokens, newTokenKVV(split[0], removeQuote(split[1]), removeQuote(split[2])))
114 }
115 }
116 return tokens, nil
117}
118
119// split the query into chunks by splitting on whitespaces but respecting
120// quotes
121func splitQuery(query string) ([]string, error) {
122 lastQuote := rune(0)
123 inQuote := false
124
125 isToken := func(r rune) bool {
126 switch {
127 case !inQuote && isQuote(r):
128 lastQuote = r
129 inQuote = true
130 return true
131 case inQuote && r == lastQuote:
132 lastQuote = rune(0)
133 inQuote = false
134 return true
135 case inQuote:
136 return true
137 default:
138 return !unicode.IsSpace(r)
139 }
140 }
141
142 var result []string
143 var token strings.Builder
144 for _, r := range query {
145 if isToken(r) {
146 token.WriteRune(r)
147 } else {
148 if token.Len() > 0 {
149 result = append(result, token.String())
150 token.Reset()
151 }
152 }
153 }
154
155 if inQuote {
156 return nil, fmt.Errorf("unmatched quote")
157 }
158
159 if token.Len() > 0 {
160 result = append(result, token.String())
161 }
162
163 return result, nil
164}
165
166func isQuote(r rune) bool {
167 return r == '"' || r == '\''
168}
169
170func removeQuote(field string) string {
171 runes := []rune(field)
172 if len(runes) >= 2 {
173 r1 := runes[0]
174 r2 := runes[len(runes)-1]
175
176 if r1 == r2 && isQuote(r1) {
177 return string(runes[1 : len(runes)-1])
178 }
179 }
180 return field
181}