1package repository
2
3import (
4 "fmt"
5 "os"
6 "strings"
7 "sync"
8 "unicode/utf8"
9
10 "github.com/blevesearch/bleve/v2"
11 "github.com/blevesearch/bleve/v2/index/upsidedown"
12)
13
14var _ Index = &bleveIndex{}
15
16type bleveIndex struct {
17 path string
18
19 mu sync.RWMutex
20 index bleve.Index
21}
22
23func openBleveIndex(path string) (*bleveIndex, error) {
24 index, err := bleve.Open(path)
25 if err != nil {
26 // likely we have no index yet, we make one.
27 b := &bleveIndex{path: path}
28 return b, b.makeIndex()
29 }
30
31 adv, err := index.Advanced()
32 if err != nil {
33 _ = index.Close()
34 return nil, fmt.Errorf("bleve: couldn't get the advanced index to assert index type: %v", err)
35 }
36
37 // if we detect the v1 format (upside-down), we force a rebuild to the v2 format (scorch)
38 // which is much smaller.
39 if _, ok := adv.(*upsidedown.UpsideDownCouch); ok {
40 _ = index.Close()
41 err = os.RemoveAll(path)
42 if err != nil {
43 return nil, err
44 }
45 b := &bleveIndex{path: path}
46 return b, b.makeIndex()
47 }
48
49 return &bleveIndex{path: path, index: index}, nil
50}
51
52func (b *bleveIndex) makeIndex() error {
53 err := os.MkdirAll(b.path, os.ModePerm)
54 if err != nil {
55 return err
56 }
57
58 // TODO: follow https://github.com/blevesearch/bleve/issues/1576 recommendations
59
60 mapping := bleve.NewIndexMapping()
61 mapping.DefaultAnalyzer = "en"
62
63 index, err := bleve.New(b.path, mapping)
64 if err != nil {
65 return err
66 }
67 b.index = index
68 return nil
69}
70
71func (b *bleveIndex) IndexOne(id string, texts []string) error {
72 b.mu.Lock()
73 defer b.mu.Unlock()
74 return b._index(b.index.Index, id, texts)
75}
76
77func (b *bleveIndex) IndexBatch() (indexer func(id string, texts []string) error, closer func() error) {
78 b.mu.Lock()
79 defer b.mu.Unlock()
80
81 batch := b.index.NewBatch()
82
83 indexer = func(id string, texts []string) error {
84 return b._index(batch.Index, id, texts)
85 }
86
87 closer = func() error {
88 return b.index.Batch(batch)
89 }
90
91 return indexer, closer
92}
93
94func (b *bleveIndex) _index(indexer func(string, interface{}) error, id string, texts []string) error {
95 searchable := struct{ Text []string }{Text: texts}
96
97 // See https://github.com/blevesearch/bleve/issues/1576
98 var sb strings.Builder
99 normalize := func(text string) string {
100 sb.Reset()
101 for _, field := range strings.Fields(text) {
102 if utf8.RuneCountInString(field) < 100 {
103 sb.WriteString(field)
104 sb.WriteRune(' ')
105 }
106 }
107 return sb.String()
108 }
109
110 for i, s := range searchable.Text {
111 searchable.Text[i] = normalize(s)
112 }
113
114 return indexer(id, searchable)
115}
116
117func (b *bleveIndex) Search(terms []string) ([]string, error) {
118 b.mu.RLock()
119 defer b.mu.RUnlock()
120
121 for i, term := range terms {
122 if strings.Contains(term, " ") {
123 terms[i] = fmt.Sprintf("\"%s\"", term)
124 }
125 }
126
127 query := bleve.NewQueryStringQuery(strings.Join(terms, " "))
128 search := bleve.NewSearchRequest(query)
129
130 res, err := b.index.Search(search)
131 if err != nil {
132 return nil, err
133 }
134
135 ids := make([]string, len(res.Hits))
136 for i, hit := range res.Hits {
137 ids[i] = hit.ID
138 }
139
140 return ids, nil
141}
142
143func (b *bleveIndex) DocCount() (uint64, error) {
144 return b.index.DocCount()
145}
146
147func (b *bleveIndex) Remove(id string) error {
148 b.mu.Lock()
149 defer b.mu.Unlock()
150
151 return b.index.Delete(id)
152}
153
154func (b *bleveIndex) Clear() error {
155 b.mu.Lock()
156 defer b.mu.Unlock()
157
158 err := b.index.Close()
159 if err != nil {
160 return err
161 }
162
163 err = os.RemoveAll(b.path)
164 if err != nil {
165 return err
166 }
167
168 return b.makeIndex()
169}
170
171func (b *bleveIndex) Close() error {
172 b.mu.Lock()
173 defer b.mu.Unlock()
174
175 return b.index.Close()
176}