1package query
2
3import (
4 "fmt"
5 "strings"
6 "unicode"
7)
8
9type tokenKind int
10
11const (
12 _ tokenKind = iota
13 tokenKindKV
14 tokenKindSearch
15)
16
17type token struct {
18 kind tokenKind
19
20 // KV
21 qualifier string
22 value string
23
24 // Search
25 term string
26}
27
28func newTokenKV(qualifier, value string) token {
29 return token{
30 kind: tokenKindKV,
31 qualifier: qualifier,
32 value: value,
33 }
34}
35
36func newTokenSearch(term string) token {
37 return token{
38 kind: tokenKindSearch,
39 term: term,
40 }
41}
42
43// tokenize parse and break a input into tokens ready to be
44// interpreted later by a parser to get the semantic.
45func tokenize(query string) ([]token, error) {
46 fields, err := splitQuery(query)
47 if err != nil {
48 return nil, err
49 }
50
51 var tokens []token
52 for _, field := range fields {
53 split := strings.Split(field, ":")
54
55 // full text search
56 if len(split) == 1 {
57 tokens = append(tokens, newTokenSearch(removeQuote(field)))
58 continue
59 }
60
61 if len(split) != 2 {
62 return nil, fmt.Errorf("can't tokenize \"%s\"", field)
63 }
64
65 if len(split[0]) == 0 {
66 return nil, fmt.Errorf("can't tokenize \"%s\": empty qualifier", field)
67 }
68 if len(split[1]) == 0 {
69 return nil, fmt.Errorf("empty value for qualifier \"%s\"", split[0])
70 }
71
72 tokens = append(tokens, newTokenKV(split[0], removeQuote(split[1])))
73 }
74 return tokens, nil
75}
76
77// split the query into chunks by splitting on whitespaces but respecting
78// quotes
79func splitQuery(query string) ([]string, error) {
80 lastQuote := rune(0)
81 inQuote := false
82
83 isToken := func(r rune) bool {
84 switch {
85 case !inQuote && isQuote(r):
86 lastQuote = r
87 inQuote = true
88 return true
89 case inQuote && r == lastQuote:
90 lastQuote = rune(0)
91 inQuote = false
92 return true
93 case inQuote:
94 return true
95 default:
96 return !unicode.IsSpace(r)
97 }
98 }
99
100 var result []string
101 var token strings.Builder
102 for _, r := range query {
103 if isToken(r) {
104 token.WriteRune(r)
105 } else {
106 if token.Len() > 0 {
107 result = append(result, token.String())
108 token.Reset()
109 }
110 }
111 }
112
113 if inQuote {
114 return nil, fmt.Errorf("unmatched quote")
115 }
116
117 if token.Len() > 0 {
118 result = append(result, token.String())
119 }
120
121 return result, nil
122}
123
124func isQuote(r rune) bool {
125 return r == '"' || r == '\''
126}
127
128func removeQuote(field string) string {
129 runes := []rune(field)
130 if len(runes) >= 2 {
131 r1 := runes[0]
132 r2 := runes[len(runes)-1]
133
134 if r1 == r2 && isQuote(r1) {
135 return string(runes[1 : len(runes)-1])
136 }
137 }
138 return field
139}