lexer.go

 1package query
 2
 3import (
 4	"fmt"
 5	"strings"
 6	"unicode"
 7)
 8
 9type token struct {
10	qualifier string
11	value     string
12}
13
14// TODO: this lexer implementation behave badly with unmatched quotes.
15// A hand written one would be better instead of relying on strings.FieldsFunc()
16
17// tokenize parse and break a input into tokens ready to be
18// interpreted later by a parser to get the semantic.
19func tokenize(query string) ([]token, error) {
20	fields := splitQuery(query)
21
22	var tokens []token
23	for _, field := range fields {
24		split := strings.Split(field, ":")
25		if len(split) != 2 {
26			return nil, fmt.Errorf("can't tokenize \"%s\"", field)
27		}
28
29		if len(split[0]) == 0 {
30			return nil, fmt.Errorf("can't tokenize \"%s\": empty qualifier", field)
31		}
32		if len(split[1]) == 0 {
33			return nil, fmt.Errorf("empty value for qualifier \"%s\"", split[0])
34		}
35
36		tokens = append(tokens, token{
37			qualifier: split[0],
38			value:     removeQuote(split[1]),
39		})
40	}
41	return tokens, nil
42}
43
44func splitQuery(query string) []string {
45	lastQuote := rune(0)
46	f := func(c rune) bool {
47		switch {
48		case c == lastQuote:
49			lastQuote = rune(0)
50			return false
51		case lastQuote != rune(0):
52			return false
53		case unicode.In(c, unicode.Quotation_Mark):
54			lastQuote = c
55			return false
56		default:
57			return unicode.IsSpace(c)
58		}
59	}
60
61	return strings.FieldsFunc(query, f)
62}
63
64func removeQuote(field string) string {
65	if len(field) >= 2 {
66		if field[0] == '"' && field[len(field)-1] == '"' {
67			return field[1 : len(field)-1]
68		}
69	}
70	return field
71}