1package query
2
3import (
4 "fmt"
5 "strings"
6 "unicode"
7)
8
9type token struct {
10 qualifier string
11 value string
12}
13
14// TODO: this lexer implementation behave badly with unmatched quotes.
15// A hand written one would be better instead of relying on strings.FieldsFunc()
16
17// tokenize parse and break a input into tokens ready to be
18// interpreted later by a parser to get the semantic.
19func tokenize(query string) ([]token, error) {
20 fields := splitQuery(query)
21
22 var tokens []token
23 for _, field := range fields {
24 split := strings.Split(field, ":")
25 if len(split) != 2 {
26 return nil, fmt.Errorf("can't tokenize \"%s\"", field)
27 }
28
29 if len(split[0]) == 0 {
30 return nil, fmt.Errorf("can't tokenize \"%s\": empty qualifier", field)
31 }
32 if len(split[1]) == 0 {
33 return nil, fmt.Errorf("empty value for qualifier \"%s\"", split[0])
34 }
35
36 tokens = append(tokens, token{
37 qualifier: split[0],
38 value: removeQuote(split[1]),
39 })
40 }
41 return tokens, nil
42}
43
44func splitQuery(query string) []string {
45 lastQuote := rune(0)
46 f := func(c rune) bool {
47 switch {
48 case c == lastQuote:
49 lastQuote = rune(0)
50 return false
51 case lastQuote != rune(0):
52 return false
53 case unicode.In(c, unicode.Quotation_Mark):
54 lastQuote = c
55 return false
56 default:
57 return unicode.IsSpace(c)
58 }
59 }
60
61 return strings.FieldsFunc(query, f)
62}
63
64func removeQuote(field string) string {
65 if len(field) >= 2 {
66 if field[0] == '"' && field[len(field)-1] == '"' {
67 return field[1 : len(field)-1]
68 }
69 }
70 return field
71}