1package repository
  2
  3import (
  4	"fmt"
  5	"os"
  6	"strings"
  7	"sync"
  8	"unicode/utf8"
  9
 10	"github.com/blevesearch/bleve/v2"
 11	"github.com/blevesearch/bleve/v2/index/upsidedown"
 12)
 13
 14var _ Index = &bleveIndex{}
 15
 16type bleveIndex struct {
 17	path string
 18
 19	mu    sync.RWMutex
 20	index bleve.Index
 21}
 22
 23func openBleveIndex(path string) (*bleveIndex, error) {
 24	index, err := bleve.Open(path)
 25	if err != nil {
 26		// likely we have no index yet, we make one.
 27		b := &bleveIndex{path: path}
 28		return b, b.makeIndex()
 29	}
 30
 31	adv, err := index.Advanced()
 32	if err != nil {
 33		_ = index.Close()
 34		return nil, fmt.Errorf("bleve: couldn't get the advanced index to assert index type: %v", err)
 35	}
 36
 37	// if we detect the v1 format (upside-down), we force a rebuild to the v2 format (scorch)
 38	// which is much smaller.
 39	if _, ok := adv.(*upsidedown.UpsideDownCouch); ok {
 40		_ = index.Close()
 41		err = os.RemoveAll(path)
 42		if err != nil {
 43			return nil, err
 44		}
 45		b := &bleveIndex{path: path}
 46		return b, b.makeIndex()
 47	}
 48
 49	return &bleveIndex{path: path, index: index}, nil
 50}
 51
 52func (b *bleveIndex) makeIndex() error {
 53	err := os.MkdirAll(b.path, os.ModePerm)
 54	if err != nil {
 55		return err
 56	}
 57
 58	// TODO: follow https://github.com/blevesearch/bleve/issues/1576 recommendations
 59
 60	mapping := bleve.NewIndexMapping()
 61	mapping.DefaultAnalyzer = "en"
 62
 63	index, err := bleve.New(b.path, mapping)
 64	if err != nil {
 65		return err
 66	}
 67	b.index = index
 68	return nil
 69}
 70
 71func (b *bleveIndex) IndexOne(id string, texts []string) error {
 72	b.mu.Lock()
 73	defer b.mu.Unlock()
 74	return b._index(b.index.Index, id, texts)
 75}
 76
 77func (b *bleveIndex) IndexBatch() (indexer func(id string, texts []string) error, closer func() error) {
 78	b.mu.Lock()
 79	defer b.mu.Unlock()
 80
 81	batch := b.index.NewBatch()
 82
 83	indexer = func(id string, texts []string) error {
 84		return b._index(batch.Index, id, texts)
 85	}
 86
 87	closer = func() error {
 88		return b.index.Batch(batch)
 89	}
 90
 91	return indexer, closer
 92}
 93
 94func (b *bleveIndex) _index(indexer func(string, interface{}) error, id string, texts []string) error {
 95	searchable := struct{ Text []string }{Text: texts}
 96
 97	// See https://github.com/blevesearch/bleve/issues/1576
 98	var sb strings.Builder
 99	normalize := func(text string) string {
100		sb.Reset()
101		for _, field := range strings.Fields(text) {
102			if utf8.RuneCountInString(field) < 100 {
103				sb.WriteString(field)
104				sb.WriteRune(' ')
105			}
106		}
107		return sb.String()
108	}
109
110	for i, s := range searchable.Text {
111		searchable.Text[i] = normalize(s)
112	}
113
114	return indexer(id, searchable)
115}
116
117func (b *bleveIndex) Search(terms []string) ([]string, error) {
118	b.mu.RLock()
119	defer b.mu.RUnlock()
120
121	for i, term := range terms {
122		if strings.Contains(term, " ") {
123			terms[i] = fmt.Sprintf("\"%s\"", term)
124		}
125	}
126
127	query := bleve.NewQueryStringQuery(strings.Join(terms, " "))
128	search := bleve.NewSearchRequest(query)
129
130	res, err := b.index.Search(search)
131	if err != nil {
132		return nil, err
133	}
134
135	ids := make([]string, len(res.Hits))
136	for i, hit := range res.Hits {
137		ids[i] = hit.ID
138	}
139
140	return ids, nil
141}
142
143func (b *bleveIndex) DocCount() (uint64, error) {
144	return b.index.DocCount()
145}
146
147func (b *bleveIndex) Remove(id string) error {
148	b.mu.Lock()
149	defer b.mu.Unlock()
150
151	return b.index.Delete(id)
152}
153
154func (b *bleveIndex) Clear() error {
155	b.mu.Lock()
156	defer b.mu.Unlock()
157
158	err := b.index.Close()
159	if err != nil {
160		return err
161	}
162
163	err = os.RemoveAll(b.path)
164	if err != nil {
165		return err
166	}
167
168	return b.makeIndex()
169}
170
171func (b *bleveIndex) Close() error {
172	b.mu.Lock()
173	defer b.mu.Unlock()
174
175	return b.index.Close()
176}