lexer.go

  1package query
  2
  3import (
  4	"fmt"
  5	"strings"
  6	"unicode"
  7)
  8
  9type token struct {
 10	qualifier string
 11	value     string
 12}
 13
 14// tokenize parse and break a input into tokens ready to be
 15// interpreted later by a parser to get the semantic.
 16func tokenize(query string) ([]token, error) {
 17	fields, err := splitQuery(query)
 18	if err != nil {
 19		return nil, err
 20	}
 21
 22	var tokens []token
 23	for _, field := range fields {
 24		split := strings.Split(field, ":")
 25		if len(split) != 2 {
 26			return nil, fmt.Errorf("can't tokenize \"%s\"", field)
 27		}
 28
 29		if len(split[0]) == 0 {
 30			return nil, fmt.Errorf("can't tokenize \"%s\": empty qualifier", field)
 31		}
 32		if len(split[1]) == 0 {
 33			return nil, fmt.Errorf("empty value for qualifier \"%s\"", split[0])
 34		}
 35
 36		tokens = append(tokens, token{
 37			qualifier: split[0],
 38			value:     removeQuote(split[1]),
 39		})
 40	}
 41	return tokens, nil
 42}
 43
 44func splitQuery(query string) ([]string, error) {
 45	lastQuote := rune(0)
 46	inQuote := false
 47
 48	isToken := func(r rune) bool {
 49		switch {
 50		case !inQuote && isQuote(r):
 51			lastQuote = r
 52			inQuote = true
 53			return true
 54		case inQuote && r == lastQuote:
 55			lastQuote = rune(0)
 56			inQuote = false
 57			return true
 58		case inQuote:
 59			return true
 60		default:
 61			return !unicode.IsSpace(r)
 62		}
 63	}
 64
 65	var result []string
 66	var token strings.Builder
 67	for _, r := range query {
 68		if isToken(r) {
 69			token.WriteRune(r)
 70		} else {
 71			if token.Len() > 0 {
 72				result = append(result, token.String())
 73				token.Reset()
 74			}
 75		}
 76	}
 77
 78	if inQuote {
 79		return nil, fmt.Errorf("unmatched quote")
 80	}
 81
 82	if token.Len() > 0 {
 83		result = append(result, token.String())
 84	}
 85
 86	return result, nil
 87}
 88
 89func isQuote(r rune) bool {
 90	return r == '"' || r == '\''
 91}
 92
 93func removeQuote(field string) string {
 94	runes := []rune(field)
 95	if len(runes) >= 2 {
 96		r1 := runes[0]
 97		r2 := runes[len(runes)-1]
 98
 99		if r1 == r2 && isQuote(r1) {
100			return string(runes[1 : len(runes)-1])
101		}
102	}
103	return field
104}