repo_cache.go

  1package cache
  2
  3import (
  4	"fmt"
  5	"io"
  6	"os"
  7	"strconv"
  8	"sync"
  9
 10	"github.com/MichaelMure/git-bug/entity"
 11	"github.com/MichaelMure/git-bug/repository"
 12	"github.com/MichaelMure/git-bug/util/multierr"
 13	"github.com/MichaelMure/git-bug/util/process"
 14)
 15
 16// 1: original format
 17// 2: added cache for identities with a reference in the bug cache
 18// 3: no more legacy identity
 19// 4: entities make their IDs from data, not git commit
 20const formatVersion = 4
 21
 22// The maximum number of bugs loaded in memory. After that, eviction will be done.
 23const defaultMaxLoadedBugs = 1000
 24
 25var _ repository.RepoCommon = &RepoCache{}
 26var _ repository.RepoConfig = &RepoCache{}
 27var _ repository.RepoKeyring = &RepoCache{}
 28
 29// cacheMgmt is the expected interface for a sub-cache.
 30type cacheMgmt interface {
 31	Typename() string
 32	Load() error
 33	Build() <-chan BuildEvent
 34	SetCacheSize(size int)
 35	RemoveAll() error
 36	MergeAll(remote string) <-chan entity.MergeResult
 37	GetNamespace() string
 38	Close() error
 39}
 40
 41// RepoCache is a cache for a Repository. This cache has multiple functions:
 42//
 43//  1. After being loaded, a Bug is kept in memory in the cache, allowing for fast
 44//     access later.
 45//  2. The cache maintain in memory and on disk a pre-digested excerpt for each bug,
 46//     allowing for fast querying the whole set of bugs without having to load
 47//     them individually.
 48//  3. The cache guarantee that a single instance of a Bug is loaded at once, avoiding
 49//     loss of data that we could have with multiple copies in the same process.
 50//  4. The same way, the cache maintain in memory a single copy of the loaded identities.
 51//
 52// The cache also protect the on-disk data by locking the git repository for its
 53// own usage, by writing a lock file. Of course, normal git operations are not
 54// affected, only git-bug related one.
 55type RepoCache struct {
 56	// the underlying repo
 57	repo repository.ClockedRepo
 58
 59	// the name of the repository, as defined in the MultiRepoCache
 60	name string
 61
 62	// resolvers for all known entities and excerpts
 63	resolvers entity.Resolvers
 64
 65	bugs       *RepoCacheBug
 66	identities *RepoCacheIdentity
 67
 68	subcaches []cacheMgmt
 69
 70	// the user identity's id, if known
 71	muUserIdentity sync.RWMutex
 72	userIdentityId entity.Id
 73}
 74
 75// NewRepoCache create or open a cache on top of a raw repository.
 76// The caller is expected to read all returned events before the cache is considered
 77// ready to use.
 78func NewRepoCache(r repository.ClockedRepo) (*RepoCache, chan BuildEvent) {
 79	return NewNamedRepoCache(r, defaultRepoName)
 80}
 81
 82// NewNamedRepoCache create or open a named cache on top of a raw repository.
 83// The caller is expected to read all returned events before the cache is considered
 84// ready to use.
 85func NewNamedRepoCache(r repository.ClockedRepo, name string) (*RepoCache, chan BuildEvent) {
 86	c := &RepoCache{
 87		repo: r,
 88		name: name,
 89	}
 90
 91	c.identities = NewRepoCacheIdentity(r, c.getResolvers, c.GetUserIdentity)
 92	c.subcaches = append(c.subcaches, c.identities)
 93
 94	c.bugs = NewRepoCacheBug(r, c.getResolvers, c.GetUserIdentity)
 95	c.subcaches = append(c.subcaches, c.bugs)
 96
 97	c.resolvers = entity.Resolvers{
 98		&IdentityCache{}:   entity.ResolverFunc[*IdentityCache](c.identities.Resolve),
 99		&IdentityExcerpt{}: entity.ResolverFunc[*IdentityExcerpt](c.identities.ResolveExcerpt),
100		&BugCache{}:        entity.ResolverFunc[*BugCache](c.bugs.Resolve),
101		&BugExcerpt{}:      entity.ResolverFunc[*BugExcerpt](c.bugs.ResolveExcerpt),
102	}
103
104	// small buffer so that below functions can emit an event without blocking
105	events := make(chan BuildEvent)
106
107	go func() {
108		defer close(events)
109
110		err := c.lock(events)
111		if err != nil {
112			events <- BuildEvent{Err: err}
113			return
114		}
115
116		err = c.load()
117		if err == nil {
118			return
119		}
120
121		// Cache is either missing, broken or outdated. Rebuilding.
122		c.buildCache(events)
123	}()
124
125	return c, events
126}
127
128func NewRepoCacheNoEvents(r repository.ClockedRepo) (*RepoCache, error) {
129	cache, events := NewRepoCache(r)
130	for event := range events {
131		if event.Err != nil {
132			for range events {
133			}
134			return nil, event.Err
135		}
136	}
137	return cache, nil
138}
139
140// Bugs gives access to the Bug entities
141func (c *RepoCache) Bugs() *RepoCacheBug {
142	return c.bugs
143}
144
145// Identities gives access to the Identity entities
146func (c *RepoCache) Identities() *RepoCacheIdentity {
147	return c.identities
148}
149
150func (c *RepoCache) getResolvers() entity.Resolvers {
151	return c.resolvers
152}
153
154// setCacheSize change the maximum number of loaded bugs
155func (c *RepoCache) setCacheSize(size int) {
156	for _, subcache := range c.subcaches {
157		subcache.SetCacheSize(size)
158	}
159}
160
161// load will try to read from the disk all the cache files
162func (c *RepoCache) load() error {
163	var errWait multierr.ErrWaitGroup
164	for _, mgmt := range c.subcaches {
165		errWait.Go(mgmt.Load)
166	}
167	return errWait.Wait()
168}
169
170func (c *RepoCache) lock(events chan BuildEvent) error {
171	err := repoIsAvailable(c.repo, events)
172	if err != nil {
173		return err
174	}
175
176	f, err := c.repo.LocalStorage().Create(lockfile)
177	if err != nil {
178		return err
179	}
180
181	pid := fmt.Sprintf("%d", os.Getpid())
182	_, err = f.Write([]byte(pid))
183	if err != nil {
184		_ = f.Close()
185		return err
186	}
187
188	return f.Close()
189}
190
191func (c *RepoCache) Close() error {
192	var errWait multierr.ErrWaitGroup
193	for _, mgmt := range c.subcaches {
194		errWait.Go(mgmt.Close)
195	}
196	err := errWait.Wait()
197	if err != nil {
198		return err
199	}
200
201	err = c.repo.Close()
202	if err != nil {
203		return err
204	}
205
206	return c.repo.LocalStorage().Remove(lockfile)
207}
208
209type BuildEventType int
210
211const (
212	_ BuildEventType = iota
213	BuildEventCacheIsBuilt
214	BuildEventRemoveLock
215	BuildEventStarted
216	BuildEventProgress
217	BuildEventFinished
218)
219
220// BuildEvent carry an event happening during the cache build process.
221type BuildEvent struct {
222	// Err carry an error if the build process failed. If set, no other field matter.
223	Err error
224	// Typename is the name of the entity of which the event relate to. Can be empty if not particular entity is involved.
225	Typename string
226	// Event is the type of the event.
227	Event BuildEventType
228	// Total is the total number of element being built. Set if Event is BuildEventStarted.
229	Total int64
230	// Progress is the current count of processed element. Set if Event is BuildEventProgress.
231	Progress int64
232}
233
234func (c *RepoCache) buildCache(events chan BuildEvent) {
235	events <- BuildEvent{Event: BuildEventCacheIsBuilt}
236
237	var wg sync.WaitGroup
238	for _, subcache := range c.subcaches {
239		wg.Add(1)
240		go func(subcache cacheMgmt) {
241			defer wg.Done()
242
243			buildEvents := subcache.Build()
244			for buildEvent := range buildEvents {
245				events <- buildEvent
246				if buildEvent.Err != nil {
247					return
248				}
249			}
250		}(subcache)
251	}
252	wg.Wait()
253}
254
255// repoIsAvailable check is the given repository is locked by a Cache.
256// Note: this is a smart function that will clean the lock file if the
257// corresponding process is not there anymore.
258// If no error is returned, the repo is free to edit.
259func repoIsAvailable(repo repository.RepoStorage, events chan BuildEvent) error {
260	// Todo: this leave way for a racey access to the repo between the test
261	// if the file exist and the actual write. It's probably not a problem in
262	// practice because using a repository will be done from user interaction
263	// or in a context where a single instance of git-bug is already guaranteed
264	// (say, a server with the web UI running). But still, that might be nice to
265	// have a mutex or something to guard that.
266
267	// Todo: this will fail if somehow the filesystem is shared with another
268	// computer. Should add a configuration that prevent the cleaning of the
269	// lock file
270
271	f, err := repo.LocalStorage().Open(lockfile)
272	if err != nil && !os.IsNotExist(err) {
273		return err
274	}
275
276	if err == nil {
277		// lock file already exist
278		buf, err := io.ReadAll(io.LimitReader(f, 10))
279		if err != nil {
280			_ = f.Close()
281			return err
282		}
283
284		err = f.Close()
285		if err != nil {
286			return err
287		}
288
289		if len(buf) >= 10 {
290			return fmt.Errorf("the lock file should be < 10 bytes")
291		}
292
293		pid, err := strconv.Atoi(string(buf))
294		if err != nil {
295			return err
296		}
297
298		if process.IsRunning(pid) {
299			return fmt.Errorf("the repository you want to access is already locked by the process pid %d", pid)
300		}
301
302		// The lock file is just laying there after a crash, clean it
303
304		events <- BuildEvent{Event: BuildEventRemoveLock}
305
306		err = repo.LocalStorage().Remove(lockfile)
307		if err != nil {
308			return err
309		}
310	}
311
312	return nil
313}