1package repository
2
3import (
4 "fmt"
5 "os"
6 "strings"
7 "sync"
8 "unicode/utf8"
9
10 "github.com/blevesearch/bleve"
11)
12
13var _ Index = &bleveIndex{}
14
15type bleveIndex struct {
16 path string
17
18 mu sync.RWMutex
19 index bleve.Index
20}
21
22func openBleveIndex(path string) (*bleveIndex, error) {
23 index, err := bleve.Open(path)
24 if err == nil {
25 return &bleveIndex{path: path, index: index}, nil
26 }
27
28 b := &bleveIndex{path: path}
29 err = b.makeIndex()
30 if err != nil {
31 return nil, err
32 }
33
34 return b, nil
35}
36
37func (b *bleveIndex) makeIndex() error {
38 err := os.MkdirAll(b.path, os.ModePerm)
39 if err != nil {
40 return err
41 }
42
43 // TODO: follow https://github.com/blevesearch/bleve/issues/1576 recommendations
44
45 mapping := bleve.NewIndexMapping()
46 mapping.DefaultAnalyzer = "en"
47
48 index, err := bleve.New(b.path, mapping)
49 if err != nil {
50 return err
51 }
52 b.index = index
53 return nil
54}
55
56func (b *bleveIndex) IndexOne(id string, texts []string) error {
57 b.mu.Lock()
58 defer b.mu.Unlock()
59 return b._index(b.index.Index, id, texts)
60}
61
62func (b *bleveIndex) IndexBatch() (indexer func(id string, texts []string) error, closer func() error) {
63 b.mu.Lock()
64 defer b.mu.Unlock()
65
66 batch := b.index.NewBatch()
67
68 indexer = func(id string, texts []string) error {
69 return b._index(batch.Index, id, texts)
70 }
71
72 closer = func() error {
73 return b.index.Batch(batch)
74 }
75
76 return indexer, closer
77}
78
79func (b *bleveIndex) _index(indexer func(string, interface{}) error, id string, texts []string) error {
80 searchable := struct{ Text []string }{Text: texts}
81
82 // See https://github.com/blevesearch/bleve/issues/1576
83 var sb strings.Builder
84 normalize := func(text string) string {
85 sb.Reset()
86 for _, field := range strings.Fields(text) {
87 if utf8.RuneCountInString(field) < 100 {
88 sb.WriteString(field)
89 sb.WriteRune(' ')
90 }
91 }
92 return sb.String()
93 }
94
95 for i, s := range searchable.Text {
96 searchable.Text[i] = normalize(s)
97 }
98
99 return indexer(id, searchable)
100}
101
102func (b *bleveIndex) Search(terms []string) ([]string, error) {
103 b.mu.RLock()
104 defer b.mu.RUnlock()
105
106 for i, term := range terms {
107 if strings.Contains(term, " ") {
108 terms[i] = fmt.Sprintf("\"%s\"", term)
109 }
110 }
111
112 query := bleve.NewQueryStringQuery(strings.Join(terms, " "))
113 search := bleve.NewSearchRequest(query)
114
115 res, err := b.index.Search(search)
116 if err != nil {
117 return nil, err
118 }
119
120 ids := make([]string, len(res.Hits))
121 for i, hit := range res.Hits {
122 ids[i] = hit.ID
123 }
124
125 return ids, nil
126}
127
128func (b *bleveIndex) DocCount() (uint64, error) {
129 return b.index.DocCount()
130}
131
132func (b *bleveIndex) Remove(id string) error {
133 b.mu.Lock()
134 defer b.mu.Unlock()
135
136 return b.index.Delete(id)
137}
138
139func (b *bleveIndex) Clear() error {
140 b.mu.Lock()
141 defer b.mu.Unlock()
142
143 err := b.index.Close()
144 if err != nil {
145 return err
146 }
147
148 err = os.RemoveAll(b.path)
149 if err != nil {
150 return err
151 }
152
153 return b.makeIndex()
154}
155
156func (b *bleveIndex) Close() error {
157 b.mu.Lock()
158 defer b.mu.Unlock()
159
160 return b.index.Close()
161}