repo_cache.go

  1package cache
  2
  3import (
  4	"fmt"
  5	"io"
  6	"os"
  7	"strconv"
  8	"strings"
  9	"sync"
 10
 11	"github.com/git-bug/git-bug/entities/bug"
 12	"github.com/git-bug/git-bug/entities/identity"
 13	"github.com/git-bug/git-bug/entity"
 14	"github.com/git-bug/git-bug/repository"
 15	"github.com/git-bug/git-bug/util/multierr"
 16	"github.com/git-bug/git-bug/util/process"
 17)
 18
 19// 1: original format
 20// 2: added cache for identities with a reference in the bug cache
 21// 3: no more legacy identity
 22// 4: entities make their IDs from data, not git commit
 23const formatVersion = 4
 24
 25// The maximum number of bugs loaded in memory. After that, eviction will be done.
 26const defaultMaxLoadedBugs = 1000
 27
 28var _ repository.RepoCommon = &RepoCache{}
 29var _ repository.RepoConfig = &RepoCache{}
 30var _ repository.RepoKeyring = &RepoCache{}
 31
 32// cacheMgmt is the expected interface for a sub-cache.
 33type cacheMgmt interface {
 34	Typename() string
 35	Load() error
 36	Build() <-chan BuildEvent
 37	SetCacheSize(size int)
 38	RemoveAll() error
 39	MergeAll(remote string) <-chan entity.MergeResult
 40	GetNamespace() string
 41	RegisterObserver(repoName string, observer Observer)
 42	UnregisterObserver(observer Observer)
 43	Close() error
 44	SyncLocalRef(id entity.Id) error
 45}
 46
 47// RepoCache is a cache for a Repository. This cache has multiple functions:
 48//
 49//  1. After being loaded, a Bug is kept in memory in the cache, allowing for fast
 50//     access later.
 51//  2. The cache maintains in memory and on disk a pre-digested excerpt for each bug,
 52//     allowing for fast querying the whole set of bugs without having to load
 53//     them individually.
 54//  3. The cache guarantees that a single instance of a Bug is loaded at once, avoiding
 55//     loss of data that we could have with multiple copies in the same process.
 56//  4. The same way, the cache maintains in memory a single copy of the loaded identities.
 57//
 58// The cache also protects the on-disk data by locking the git repository for its
 59// own usage, by writing a lock file. Of course, normal git operations are not
 60// affected, only git-bug related one.
 61type RepoCache struct {
 62	// the underlying repo
 63	repo repository.ClockedRepo
 64
 65	// the name of the repository, as defined in the MultiRepoCache
 66	name string
 67
 68	// resolvers for all known entities and excerpts
 69	resolvers entity.Resolvers
 70
 71	bugs       *RepoCacheBug
 72	identities *RepoCacheIdentity
 73
 74	subcaches []cacheMgmt
 75
 76	// the user identity's id, if known
 77	muUserIdentity sync.RWMutex
 78	userIdentityId entity.Id
 79}
 80
 81// NewRepoCache create or open a cache on top of a raw repository.
 82// The caller is expected to read all returned events before the cache is considered
 83// ready to use.
 84func NewRepoCache(r repository.ClockedRepo) (*RepoCache, chan BuildEvent) {
 85	return NewNamedRepoCache(r, defaultRepoName)
 86}
 87
 88// NewNamedRepoCache create or open a named cache on top of a raw repository.
 89// The caller is expected to read all returned events before the cache is considered
 90// ready to use.
 91func NewNamedRepoCache(r repository.ClockedRepo, name string) (*RepoCache, chan BuildEvent) {
 92	c := &RepoCache{
 93		repo: r,
 94		name: name,
 95	}
 96
 97	c.identities = NewRepoCacheIdentity(r, c.getResolvers, c.GetUserIdentity)
 98	c.subcaches = append(c.subcaches, c.identities)
 99
100	c.bugs = NewRepoCacheBug(r, c.getResolvers, c.GetUserIdentity)
101	c.subcaches = append(c.subcaches, c.bugs)
102
103	c.resolvers = entity.Resolvers{
104		&IdentityCache{}:   entity.ResolverFunc[*IdentityCache](c.identities.Resolve),
105		&IdentityExcerpt{}: entity.ResolverFunc[*IdentityExcerpt](c.identities.ResolveExcerpt),
106		&BugCache{}:        entity.ResolverFunc[*BugCache](c.bugs.Resolve),
107		&BugExcerpt{}:      entity.ResolverFunc[*BugExcerpt](c.bugs.ResolveExcerpt),
108	}
109
110	// small buffer so that the functions below can emit an event without blocking
111	events := make(chan BuildEvent)
112
113	go func() {
114		defer close(events)
115
116		err := c.lock(events)
117		if err != nil {
118			events <- BuildEvent{Err: err}
119			return
120		}
121
122		err = c.load()
123		if err == nil {
124			return
125		}
126
127		// Cache is either missing, broken or outdated. Rebuilding.
128		c.buildCache(events)
129	}()
130
131	return c, events
132}
133
134func NewRepoCacheNoEvents(r repository.ClockedRepo) (*RepoCache, error) {
135	cache, events := NewRepoCache(r)
136	for event := range events {
137		if event.Err != nil {
138			for range events {
139			}
140			return nil, event.Err
141		}
142	}
143	return cache, nil
144}
145
146// Bugs gives access to the Bug entities
147func (c *RepoCache) Bugs() *RepoCacheBug {
148	return c.bugs
149}
150
151// Identities gives access to the Identity entities
152func (c *RepoCache) Identities() *RepoCacheIdentity {
153	return c.identities
154}
155
156func (c *RepoCache) getResolvers() entity.Resolvers {
157	return c.resolvers
158}
159
160// setCacheSize change the maximum number of loaded bugs
161func (c *RepoCache) setCacheSize(size int) {
162	for _, subcache := range c.subcaches {
163		subcache.SetCacheSize(size)
164	}
165}
166
167// load will try to read from the disk all the cache files
168func (c *RepoCache) load() error {
169	var errWait multierr.ErrWaitGroup
170	for _, mgmt := range c.subcaches {
171		errWait.Go(mgmt.Load)
172	}
173	return errWait.Wait()
174}
175
176func (c *RepoCache) lock(events chan BuildEvent) error {
177	err := repoIsAvailable(c.repo, events)
178	if err != nil {
179		return err
180	}
181
182	f, err := c.repo.LocalStorage().Create(lockfile)
183	if err != nil {
184		return err
185	}
186
187	pid := fmt.Sprintf("%d", os.Getpid())
188	_, err = f.Write([]byte(pid))
189	if err != nil {
190		_ = f.Close()
191		return err
192	}
193
194	return f.Close()
195}
196
197func (c *RepoCache) Close() error {
198	var errWait multierr.ErrWaitGroup
199	for _, mgmt := range c.subcaches {
200		errWait.Go(mgmt.Close)
201	}
202	err := errWait.Wait()
203	if err != nil {
204		return err
205	}
206
207	err = c.repo.Close()
208	if err != nil {
209		return err
210	}
211
212	return c.repo.LocalStorage().Remove(lockfile)
213}
214
215func (c *RepoCache) buildCache(events chan BuildEvent) {
216	events <- BuildEvent{Event: BuildEventCacheIsBuilt}
217
218	var wg sync.WaitGroup
219	for _, subcache := range c.subcaches {
220		wg.Add(1)
221		go func(subcache cacheMgmt) {
222			defer wg.Done()
223
224			buildEvents := subcache.Build()
225			for buildEvent := range buildEvents {
226				events <- buildEvent
227				if buildEvent.Err != nil {
228					return
229				}
230			}
231		}(subcache)
232	}
233	wg.Wait()
234}
235
236func (c *RepoCache) registerObserver(repoName string, typename string, observer Observer) error {
237	switch typename {
238	case bug.Typename:
239		c.bugs.RegisterObserver(repoName, observer)
240	case identity.Typename:
241		c.identities.RegisterObserver(repoName, observer)
242	default:
243		var allTypenames []string
244		for _, subcache := range c.subcaches {
245			allTypenames = append(allTypenames, subcache.Typename())
246		}
247		return fmt.Errorf("unknown typename `%s`, available types are [%s]", typename, strings.Join(allTypenames, ", "))
248	}
249	return nil
250}
251
252func (c *RepoCache) registerAllObservers(repoName string, observer Observer) {
253	for _, subcache := range c.subcaches {
254		subcache.RegisterObserver(repoName, observer)
255	}
256}
257
258func (c *RepoCache) unregisterAllObservers(observer Observer) {
259	for _, subcache := range c.subcaches {
260		subcache.UnregisterObserver(observer)
261	}
262}
263
264// repoIsAvailable check is the given repository is locked by a Cache.
265// Note: this is a smart function that will clean the lock file if the
266// corresponding process is not there anymore.
267// If no error is returned, the repo is free to edit.
268func repoIsAvailable(repo repository.RepoStorage, events chan BuildEvent) error {
269	// Todo: this leave way for a racey access to the repo between the test
270	// if the file exist and the actual write. It's probably not a problem in
271	// practice because using a repository will be done from user interaction
272	// or in a context where a single instance of git-bug is already guaranteed
273	// (say, a server with the web UI running). But still, that might be nice to
274	// have a mutex or something to guard that.
275
276	// Todo: this will fail if somehow the filesystem is shared with another
277	// computer. Should add a configuration that prevent the cleaning of the
278	// lock file
279
280	f, err := repo.LocalStorage().Open(lockfile)
281	if err != nil && !os.IsNotExist(err) {
282		return err
283	}
284
285	if err == nil {
286		// lock file already exist
287		buf, err := io.ReadAll(io.LimitReader(f, 10))
288		if err != nil {
289			_ = f.Close()
290			return err
291		}
292
293		err = f.Close()
294		if err != nil {
295			return err
296		}
297
298		if len(buf) >= 10 {
299			return fmt.Errorf("the lock file should be < 10 bytes")
300		}
301
302		pid, err := strconv.Atoi(string(buf))
303		if err != nil {
304			return err
305		}
306
307		if process.IsRunning(pid) {
308			return fmt.Errorf("the repository you want to access is already locked by the process pid %d", pid)
309		}
310
311		// The lock file is just laying there after a crash, clean it
312
313		events <- BuildEvent{Event: BuildEventRemoveLock}
314
315		err = repo.LocalStorage().Remove(lockfile)
316		if err != nil {
317			return err
318		}
319	}
320
321	return nil
322}