cache: simplify cache eviction

Michael MurΓ© created

Change summary

cache/lru_id_cache.go    |  15 +---
cache/repo_cache.go      |  30 ++++++----
cache/repo_cache_bug.go  | 115 ++++++++++++++++++++---------------------
cache/repo_cache_test.go |  27 +++------
4 files changed, 89 insertions(+), 98 deletions(-)

Detailed changes

cache/lru_id_cache.go πŸ”—

@@ -1,6 +1,8 @@
 package cache
 
 import (
+	"math"
+
 	lru "github.com/hashicorp/golang-lru"
 
 	"github.com/MichaelMure/git-bug/entity"
@@ -8,16 +10,14 @@ import (
 
 type LRUIdCache struct {
 	parentCache *lru.Cache
-	maxSize     int
 }
 
-func NewLRUIdCache(size int) *LRUIdCache {
+func NewLRUIdCache() *LRUIdCache {
 	// Ignore error here
-	cache, _ := lru.New(size)
+	cache, _ := lru.New(math.MaxInt32)
 
 	return &LRUIdCache{
 		cache,
-		size,
 	}
 }
 
@@ -39,7 +39,7 @@ func (c *LRUIdCache) GetOldest() (entity.Id, bool) {
 	return id.(entity.Id), present
 }
 
-func (c *LRUIdCache) GetAll() (ids []entity.Id) {
+func (c *LRUIdCache) GetOldestToNewest() (ids []entity.Id) {
 	interfaceKeys := c.parentCache.Keys()
 	for _, id := range interfaceKeys {
 		ids = append(ids, id.(entity.Id))
@@ -54,8 +54,3 @@ func (c *LRUIdCache) Len() int {
 func (c *LRUIdCache) Remove(id entity.Id) bool {
 	return c.parentCache.Remove(id)
 }
-
-func (c *LRUIdCache) Resize(size int) int {
-	c.maxSize = size
-	return c.parentCache.Resize(size)
-}

cache/repo_cache.go πŸ”—

@@ -21,7 +21,8 @@ import (
 // 2: added cache for identities with a reference in the bug cache
 const formatVersion = 2
 
-const lruCacheSize = 100
+// The maximum number of bugs loaded in memory. After that, eviction will be done.
+const defaultMaxLoadedBugs = 100
 
 var _ repository.RepoCommon = &RepoCache{}
 
@@ -46,14 +47,16 @@ type RepoCache struct {
 	// the name of the repository, as defined in the MultiRepoCache
 	name string
 
+	// maximum number of loaded bugs
+	maxLoadedBugs int
+
 	muBug sync.RWMutex
 	// excerpt of bugs data for all bugs
 	bugExcerpts map[entity.Id]*BugExcerpt
 	// bug loaded in memory
 	bugs map[entity.Id]*BugCache
-
-	// presentBugs is an LRU cache that records which bugs the cache has loaded in
-	presentBugs *LRUIdCache
+	// loadedBugs is an LRU cache that records which bugs the cache has loaded in
+	loadedBugs *LRUIdCache
 
 	muIdentity sync.RWMutex
 	// excerpt of identities data for all identities
@@ -71,10 +74,12 @@ func NewRepoCache(r repository.ClockedRepo) (*RepoCache, error) {
 
 func NewNamedRepoCache(r repository.ClockedRepo, name string) (*RepoCache, error) {
 	c := &RepoCache{
-		repo:       r,
-		name:       name,
-		bugs:       make(map[entity.Id]*BugCache),
-		identities: make(map[entity.Id]*IdentityCache),
+		repo:          r,
+		name:          name,
+		maxLoadedBugs: defaultMaxLoadedBugs,
+		bugs:          make(map[entity.Id]*BugCache),
+		loadedBugs:    NewLRUIdCache(),
+		identities:    make(map[entity.Id]*IdentityCache),
 	}
 
 	err := c.lock()
@@ -82,9 +87,6 @@ func NewNamedRepoCache(r repository.ClockedRepo, name string) (*RepoCache, error
 		return &RepoCache{}, err
 	}
 
-	presentBugs := NewLRUIdCache(lruCacheSize)
-	c.presentBugs = presentBugs
-
 	err = c.load()
 	if err == nil {
 		return c, nil
@@ -99,6 +101,12 @@ func NewNamedRepoCache(r repository.ClockedRepo, name string) (*RepoCache, error
 	return c, c.write()
 }
 
+// setCacheSize change the maximum number of loaded bugs
+func (c *RepoCache) setCacheSize(size int) {
+	c.maxLoadedBugs = size
+	c.evictIfNeeded()
+}
+
 // load will try to read from the disk all the cache files
 func (c *RepoCache) load() error {
 	err := c.loadBugCache()

cache/repo_cache_bug.go πŸ”—

@@ -19,7 +19,6 @@ import (
 const bugCacheFile = "bug-cache"
 
 var errBugNotInCache = errors.New("bug missing from cache")
-var errCantEvictBug = errors.New("unable to evict a bug from the cache")
 
 func bugCacheFilePath(repo repository.Repo) string {
 	return path.Join(repo.GetPath(), "git-bug", bugCacheFile)
@@ -32,9 +31,15 @@ func (c *RepoCache) bugUpdated(id entity.Id) error {
 	b, ok := c.bugs[id]
 	if !ok {
 		c.muBug.Unlock()
+
+		// if the bug is not loaded at this point, it means it was loaded before
+		// but got evicted. Which means we potentially have multiple copies in
+		// memory and thus concurrent write.
+		// Failing immediately here is the simple and safe solution to avoid
+		// complicated data loss.
 		return errBugNotInCache
 	}
-	c.presentBugs.Get(id)
+	c.loadedBugs.Get(id)
 	c.bugExcerpts[id] = NewBugExcerpt(b.bug, b.Snapshot())
 	c.muBug.Unlock()
 
@@ -111,72 +116,66 @@ func (c *RepoCache) writeBugCache() error {
 func (c *RepoCache) ResolveBugExcerpt(id entity.Id) (*BugExcerpt, error) {
 	c.muBug.RLock()
 	defer c.muBug.RUnlock()
+
 	excerpt, ok := c.bugExcerpts[id]
 	if !ok {
-		return nil, errBugNotInCache
+		panic("missing bug in the cache")
 	}
-	c.presentBugs.Get(id)
+
 	return excerpt, nil
 }
 
 // ResolveBug retrieve a bug matching the exact given id
 func (c *RepoCache) ResolveBug(id entity.Id) (*BugCache, error) {
-	err := c.ensureBugLoaded(id)
-	if err != nil {
-		return nil, err
-	}
-
 	c.muBug.RLock()
-	defer c.muBug.RUnlock()
-	return c.bugs[id], nil
-}
-
-// ensureBugLoaded ensures a bug (if it exists) is loaded into the cache
-// it will automatically evict a bug if needed
-func (c *RepoCache) ensureBugLoaded(id entity.Id) error {
-	if c.presentBugs.Get(id) {
-		return nil
+	cached, ok := c.bugs[id]
+	if ok {
+		c.loadedBugs.Get(id)
+		c.muBug.RUnlock()
+		return cached, nil
 	}
+	c.muBug.RUnlock()
 
 	b, err := bug.ReadLocalBug(c.repo, id)
 	if err != nil {
-		return err
+		return nil, err
 	}
 
-	bugCache := NewBugCache(c, b)
-
-	err = c.evictIfNeeded()
-	if err != nil {
-		return err
-	}
+	cached = NewBugCache(c, b)
 
 	c.muBug.Lock()
-	defer c.muBug.Unlock()
-	c.presentBugs.Add(id)
-	c.bugs[id] = bugCache
-	return nil
+	c.bugs[id] = cached
+	c.loadedBugs.Add(id)
+	c.muBug.Unlock()
+
+	c.evictIfNeeded()
+
+	return cached, nil
 }
 
 // evictIfNeeded will evict a bug from the cache if needed
-// it also removes references of the bug from the bugs and bugExcerpts
-func (c *RepoCache) evictIfNeeded() error {
+// it also removes references of the bug from the bugs
+func (c *RepoCache) evictIfNeeded() {
 	c.muBug.Lock()
 	defer c.muBug.Unlock()
-	if c.presentBugs.Len() < c.presentBugs.maxSize {
-		return nil
+	if c.loadedBugs.Len() <= c.maxLoadedBugs {
+		return
 	}
 
-	for _, id := range c.presentBugs.GetAll() {
-		if b := c.bugs[id]; !b.NeedCommit() {
-			b.mu.Lock()
-			c.presentBugs.Remove(id)
-			delete(c.bugExcerpts, id)
-			delete(c.bugs, id)
-			return nil
+	for _, id := range c.loadedBugs.GetOldestToNewest() {
+		b := c.bugs[id]
+		if b.NeedCommit() {
+			continue
 		}
-	}
 
-	return errCantEvictBug
+		b.mu.Lock()
+		c.loadedBugs.Remove(id)
+		delete(c.bugs, id)
+
+		if c.loadedBugs.Len() <= c.maxLoadedBugs {
+			return
+		}
+	}
 }
 
 // ResolveBugExcerptPrefix retrieve a BugExcerpt matching an id prefix. It fails if multiple
@@ -380,18 +379,19 @@ func (c *RepoCache) NewBugRaw(author *IdentityCache, unixTime int64, title strin
 		return nil, nil, err
 	}
 
-	if other, err := c.ResolveBug(b.Id()); other != nil {
-		if err != nil {
-			return nil, nil, err
-		}
+	c.muBug.Lock()
+	if _, has := c.bugs[b.Id()]; has {
+		c.muBug.Unlock()
+		return nil, nil, fmt.Errorf("bug %s already exist in the cache", b.Id())
 	}
 
 	cached := NewBugCache(c, b)
-
-	c.muBug.Lock()
 	c.bugs[b.Id()] = cached
+	c.loadedBugs.Add(b.Id())
 	c.muBug.Unlock()
 
+	c.evictIfNeeded()
+
 	// force the write of the excerpt
 	err = c.bugUpdated(b.Id())
 	if err != nil {
@@ -403,28 +403,23 @@ func (c *RepoCache) NewBugRaw(author *IdentityCache, unixTime int64, title strin
 
 // RemoveBug removes a bug from the cache and repo given a bug id prefix
 func (c *RepoCache) RemoveBug(prefix string) error {
-	b, err := c.ResolveBugPrefix(prefix)
-	if err != nil {
-		return err
-	}
+	c.muBug.RLock()
 
-	err = c.ensureBugLoaded(b.Id())
+	b, err := c.ResolveBugPrefix(prefix)
 	if err != nil {
+		c.muBug.RUnlock()
 		return err
 	}
+	c.muBug.RUnlock()
 
 	c.muBug.Lock()
-	b.mu.Lock()
 	err = bug.RemoveBug(c.repo, b.Id())
-	if err != nil {
-		c.muBug.Unlock()
-		b.mu.Unlock()
-		return err
-	}
-	c.presentBugs.Remove(b.Id())
+
 	delete(c.bugs, b.Id())
 	delete(c.bugExcerpts, b.Id())
+	c.loadedBugs.Remove(b.Id())
 
 	c.muBug.Unlock()
+
 	return c.writeBugCache()
 }

cache/repo_cache_test.go πŸ”—

@@ -214,12 +214,11 @@ func TestCacheEviction(t *testing.T) {
 	repo := repository.CreateTestRepo(false)
 	repoCache, err := NewRepoCache(repo)
 	require.NoError(t, err)
-	repoCache.presentBugs.Resize(2)
+	repoCache.setCacheSize(2)
 
-	require.Equal(t, 2, repoCache.presentBugs.maxSize)
-	require.Equal(t, 0, repoCache.presentBugs.Len())
+	require.Equal(t, 2, repoCache.maxLoadedBugs)
+	require.Equal(t, 0, repoCache.loadedBugs.Len())
 	require.Equal(t, 0, len(repoCache.bugs))
-	require.Equal(t, 0, len(repoCache.bugExcerpts))
 
 	// Generating some bugs
 	rene, err := repoCache.NewIdentity("RenΓ© Descartes", "rene@descartes.fr")
@@ -231,51 +230,45 @@ func TestCacheEviction(t *testing.T) {
 	require.NoError(t, err)
 
 	checkBugPresence(t, repoCache, bug1, true)
-	require.Equal(t, 1, repoCache.presentBugs.Len())
+	require.Equal(t, 1, repoCache.loadedBugs.Len())
 	require.Equal(t, 1, len(repoCache.bugs))
-	require.Equal(t, 1, len(repoCache.bugExcerpts))
 
 	bug2, _, err := repoCache.NewBug("title", "message")
 	require.NoError(t, err)
 
 	checkBugPresence(t, repoCache, bug1, true)
 	checkBugPresence(t, repoCache, bug2, true)
-	require.Equal(t, 2, repoCache.presentBugs.Len())
+	require.Equal(t, 2, repoCache.loadedBugs.Len())
 	require.Equal(t, 2, len(repoCache.bugs))
-	require.Equal(t, 2, len(repoCache.bugExcerpts))
 
 	// Number of bugs should not exceed max size of lruCache, oldest one should be evicted
 	bug3, _, err := repoCache.NewBug("title", "message")
 	require.NoError(t, err)
 
-	require.Equal(t, 2, repoCache.presentBugs.Len())
+	require.Equal(t, 2, repoCache.loadedBugs.Len())
 	require.Equal(t, 2, len(repoCache.bugs))
-	require.Equal(t, 2, len(repoCache.bugExcerpts))
 	checkBugPresence(t, repoCache, bug1, false)
 	checkBugPresence(t, repoCache, bug2, true)
 	checkBugPresence(t, repoCache, bug3, true)
 
 	// Accessing bug should update position in lruCache and therefore it should not be evicted
-	repoCache.presentBugs.Get(bug2.Id())
-	oldestId, _ := repoCache.presentBugs.GetOldest()
+	repoCache.loadedBugs.Get(bug2.Id())
+	oldestId, _ := repoCache.loadedBugs.GetOldest()
 	require.Equal(t, bug3.Id(), oldestId)
 
 	checkBugPresence(t, repoCache, bug1, false)
 	checkBugPresence(t, repoCache, bug2, true)
 	checkBugPresence(t, repoCache, bug3, true)
-	require.Equal(t, 2, repoCache.presentBugs.Len())
+	require.Equal(t, 2, repoCache.loadedBugs.Len())
 	require.Equal(t, 2, len(repoCache.bugs))
-	require.Equal(t, 2, len(repoCache.bugExcerpts))
 }
 
 func checkBugPresence(t *testing.T, cache *RepoCache, bug *BugCache, presence bool) {
 	id := bug.Id()
-	require.Equal(t, presence, cache.presentBugs.Contains(id))
+	require.Equal(t, presence, cache.loadedBugs.Contains(id))
 	b, ok := cache.bugs[id]
 	require.Equal(t, presence, ok)
 	if ok {
 		require.Equal(t, bug, b)
 	}
-	_, ok = cache.bugExcerpts[id]
-	require.Equal(t, presence, ok)
 }