1package repository
  2
  3import (
  4	"fmt"
  5	"os"
  6	"strings"
  7	"sync"
  8	"unicode/utf8"
  9
 10	"github.com/blevesearch/bleve"
 11)
 12
 13var _ Index = &bleveIndex{}
 14
 15type bleveIndex struct {
 16	path string
 17
 18	mu    sync.RWMutex
 19	index bleve.Index
 20}
 21
 22func openBleveIndex(path string) (*bleveIndex, error) {
 23	index, err := bleve.Open(path)
 24	if err == nil {
 25		return &bleveIndex{path: path, index: index}, nil
 26	}
 27
 28	b := &bleveIndex{path: path}
 29	err = b.makeIndex()
 30	if err != nil {
 31		return nil, err
 32	}
 33
 34	return b, nil
 35}
 36
 37func (b *bleveIndex) makeIndex() error {
 38	err := os.MkdirAll(b.path, os.ModePerm)
 39	if err != nil {
 40		return err
 41	}
 42
 43	// TODO: follow https://github.com/blevesearch/bleve/issues/1576 recommendations
 44
 45	mapping := bleve.NewIndexMapping()
 46	mapping.DefaultAnalyzer = "en"
 47
 48	index, err := bleve.New(b.path, mapping)
 49	if err != nil {
 50		return err
 51	}
 52	b.index = index
 53	return nil
 54}
 55
 56func (b *bleveIndex) IndexOne(id string, texts []string) error {
 57	b.mu.Lock()
 58	defer b.mu.Unlock()
 59	return b._index(b.index.Index, id, texts)
 60}
 61
 62func (b *bleveIndex) IndexBatch() (indexer func(id string, texts []string) error, closer func() error) {
 63	b.mu.Lock()
 64	defer b.mu.Unlock()
 65
 66	batch := b.index.NewBatch()
 67
 68	indexer = func(id string, texts []string) error {
 69		return b._index(batch.Index, id, texts)
 70	}
 71
 72	closer = func() error {
 73		return b.index.Batch(batch)
 74	}
 75
 76	return indexer, closer
 77}
 78
 79func (b *bleveIndex) _index(indexer func(string, interface{}) error, id string, texts []string) error {
 80	searchable := struct{ Text []string }{Text: texts}
 81
 82	// See https://github.com/blevesearch/bleve/issues/1576
 83	var sb strings.Builder
 84	normalize := func(text string) string {
 85		sb.Reset()
 86		for _, field := range strings.Fields(text) {
 87			if utf8.RuneCountInString(field) < 100 {
 88				sb.WriteString(field)
 89				sb.WriteRune(' ')
 90			}
 91		}
 92		return sb.String()
 93	}
 94
 95	for i, s := range searchable.Text {
 96		searchable.Text[i] = normalize(s)
 97	}
 98
 99	return indexer(id, searchable)
100}
101
102func (b *bleveIndex) Search(terms []string) ([]string, error) {
103	b.mu.RLock()
104	defer b.mu.RUnlock()
105
106	for i, term := range terms {
107		if strings.Contains(term, " ") {
108			terms[i] = fmt.Sprintf("\"%s\"", term)
109		}
110	}
111
112	query := bleve.NewQueryStringQuery(strings.Join(terms, " "))
113	search := bleve.NewSearchRequest(query)
114
115	res, err := b.index.Search(search)
116	if err != nil {
117		return nil, err
118	}
119
120	ids := make([]string, len(res.Hits))
121	for i, hit := range res.Hits {
122		ids[i] = hit.ID
123	}
124
125	return ids, nil
126}
127
128func (b *bleveIndex) DocCount() (uint64, error) {
129	return b.index.DocCount()
130}
131
132func (b *bleveIndex) Remove(id string) error {
133	b.mu.Lock()
134	defer b.mu.Unlock()
135
136	return b.index.Delete(id)
137}
138
139func (b *bleveIndex) Clear() error {
140	b.mu.Lock()
141	defer b.mu.Unlock()
142
143	err := b.index.Close()
144	if err != nil {
145		return err
146	}
147
148	err = os.RemoveAll(b.path)
149	if err != nil {
150		return err
151	}
152
153	return b.makeIndex()
154}
155
156func (b *bleveIndex) Close() error {
157	b.mu.Lock()
158	defer b.mu.Unlock()
159
160	return b.index.Close()
161}