1package query
  2
  3import (
  4	"fmt"
  5	"strings"
  6	"unicode"
  7)
  8
  9type tokenKind int
 10
 11const (
 12	_ tokenKind = iota
 13	tokenKindKV
 14	tokenKindSearch
 15)
 16
 17type token struct {
 18	kind tokenKind
 19
 20	// KV
 21	qualifier string
 22	value     string
 23
 24	// Search
 25	term string
 26}
 27
 28func newTokenKV(qualifier, value string) token {
 29	return token{
 30		kind:      tokenKindKV,
 31		qualifier: qualifier,
 32		value:     value,
 33	}
 34}
 35
 36func newTokenSearch(term string) token {
 37	return token{
 38		kind: tokenKindSearch,
 39		term: term,
 40	}
 41}
 42
 43// tokenize parse and break a input into tokens ready to be
 44// interpreted later by a parser to get the semantic.
 45func tokenize(query string) ([]token, error) {
 46	fields, err := splitQuery(query)
 47	if err != nil {
 48		return nil, err
 49	}
 50
 51	var tokens []token
 52	for _, field := range fields {
 53		split := strings.Split(field, ":")
 54
 55		// full text search
 56		if len(split) == 1 {
 57			tokens = append(tokens, newTokenSearch(removeQuote(field)))
 58			continue
 59		}
 60
 61		if len(split) != 2 {
 62			return nil, fmt.Errorf("can't tokenize \"%s\"", field)
 63		}
 64
 65		if len(split[0]) == 0 {
 66			return nil, fmt.Errorf("can't tokenize \"%s\": empty qualifier", field)
 67		}
 68		if len(split[1]) == 0 {
 69			return nil, fmt.Errorf("empty value for qualifier \"%s\"", split[0])
 70		}
 71
 72		tokens = append(tokens, newTokenKV(split[0], removeQuote(split[1])))
 73	}
 74	return tokens, nil
 75}
 76
 77// split the query into chunks by splitting on whitespaces but respecting
 78// quotes
 79func splitQuery(query string) ([]string, error) {
 80	lastQuote := rune(0)
 81	inQuote := false
 82
 83	isToken := func(r rune) bool {
 84		switch {
 85		case !inQuote && isQuote(r):
 86			lastQuote = r
 87			inQuote = true
 88			return true
 89		case inQuote && r == lastQuote:
 90			lastQuote = rune(0)
 91			inQuote = false
 92			return true
 93		case inQuote:
 94			return true
 95		default:
 96			return !unicode.IsSpace(r)
 97		}
 98	}
 99
100	var result []string
101	var token strings.Builder
102	for _, r := range query {
103		if isToken(r) {
104			token.WriteRune(r)
105		} else {
106			if token.Len() > 0 {
107				result = append(result, token.String())
108				token.Reset()
109			}
110		}
111	}
112
113	if inQuote {
114		return nil, fmt.Errorf("unmatched quote")
115	}
116
117	if token.Len() > 0 {
118		result = append(result, token.String())
119	}
120
121	return result, nil
122}
123
124func isQuote(r rune) bool {
125	return r == '"' || r == '\''
126}
127
128func removeQuote(field string) string {
129	runes := []rune(field)
130	if len(runes) >= 2 {
131		r1 := runes[0]
132		r2 := runes[len(runes)-1]
133
134		if r1 == r2 && isQuote(r1) {
135			return string(runes[1 : len(runes)-1])
136		}
137	}
138	return field
139}