Merge pull request #449 from MichaelMure/lru-cache

Michael MurΓ© created

LRU Cache

Change summary

cache/bug_cache.go       |  2 -
cache/lru_id_cache.go    | 56 ++++++++++++++++++++++++++++
cache/repo_cache.go      | 24 ++++++++++--
cache/repo_cache_bug.go  | 62 +++++++++++++++++++++++++++---
cache/repo_cache_test.go | 84 ++++++++++++++++++++++++++++++++++++-----
go.mod                   |  2 
go.sum                   |  1 
7 files changed, 206 insertions(+), 25 deletions(-)

Detailed changes

cache/bug_cache.go πŸ”—

@@ -37,8 +37,6 @@ func (c *BugCache) Snapshot() *bug.Snapshot {
 }
 
 func (c *BugCache) Id() entity.Id {
-	c.mu.RLock()
-	defer c.mu.RUnlock()
 	return c.bug.Id()
 }
 

cache/lru_id_cache.go πŸ”—

@@ -0,0 +1,56 @@
+package cache
+
+import (
+	"math"
+
+	lru "github.com/hashicorp/golang-lru"
+
+	"github.com/MichaelMure/git-bug/entity"
+)
+
+type LRUIdCache struct {
+	parentCache *lru.Cache
+}
+
+func NewLRUIdCache() *LRUIdCache {
+	// we can ignore the error here as it would only fail if the size is negative.
+	cache, _ := lru.New(math.MaxInt32)
+
+	return &LRUIdCache{
+		cache,
+	}
+}
+
+func (c *LRUIdCache) Add(id entity.Id) bool {
+	return c.parentCache.Add(id, nil)
+}
+
+func (c *LRUIdCache) Contains(id entity.Id) bool {
+	return c.parentCache.Contains(id)
+}
+
+func (c *LRUIdCache) Get(id entity.Id) bool {
+	_, present := c.parentCache.Get(id)
+	return present
+}
+
+func (c *LRUIdCache) GetOldest() (entity.Id, bool) {
+	id, _, present := c.parentCache.GetOldest()
+	return id.(entity.Id), present
+}
+
+func (c *LRUIdCache) GetOldestToNewest() (ids []entity.Id) {
+	interfaceKeys := c.parentCache.Keys()
+	for _, id := range interfaceKeys {
+		ids = append(ids, id.(entity.Id))
+	}
+	return
+}
+
+func (c *LRUIdCache) Len() int {
+	return c.parentCache.Len()
+}
+
+func (c *LRUIdCache) Remove(id entity.Id) bool {
+	return c.parentCache.Remove(id)
+}

cache/repo_cache.go πŸ”—

@@ -21,6 +21,9 @@ import (
 // 2: added cache for identities with a reference in the bug cache
 const formatVersion = 2
 
+// The maximum number of bugs loaded in memory. After that, eviction will be done.
+const defaultMaxLoadedBugs = 1000
+
 var _ repository.RepoCommon = &RepoCache{}
 
 // RepoCache is a cache for a Repository. This cache has multiple functions:
@@ -44,11 +47,16 @@ type RepoCache struct {
 	// the name of the repository, as defined in the MultiRepoCache
 	name string
 
+	// maximum number of loaded bugs
+	maxLoadedBugs int
+
 	muBug sync.RWMutex
 	// excerpt of bugs data for all bugs
 	bugExcerpts map[entity.Id]*BugExcerpt
 	// bug loaded in memory
 	bugs map[entity.Id]*BugCache
+	// loadedBugs is an LRU cache that records which bugs the cache has loaded in
+	loadedBugs *LRUIdCache
 
 	muIdentity sync.RWMutex
 	// excerpt of identities data for all identities
@@ -66,10 +74,12 @@ func NewRepoCache(r repository.ClockedRepo) (*RepoCache, error) {
 
 func NewNamedRepoCache(r repository.ClockedRepo, name string) (*RepoCache, error) {
 	c := &RepoCache{
-		repo:       r,
-		name:       name,
-		bugs:       make(map[entity.Id]*BugCache),
-		identities: make(map[entity.Id]*IdentityCache),
+		repo:          r,
+		name:          name,
+		maxLoadedBugs: defaultMaxLoadedBugs,
+		bugs:          make(map[entity.Id]*BugCache),
+		loadedBugs:    NewLRUIdCache(),
+		identities:    make(map[entity.Id]*IdentityCache),
 	}
 
 	err := c.lock()
@@ -91,6 +101,12 @@ func NewNamedRepoCache(r repository.ClockedRepo, name string) (*RepoCache, error
 	return c, c.write()
 }
 
+// setCacheSize change the maximum number of loaded bugs
+func (c *RepoCache) setCacheSize(size int) {
+	c.maxLoadedBugs = size
+	c.evictIfNeeded()
+}
+
 // load will try to read from the disk all the cache files
 func (c *RepoCache) load() error {
 	err := c.loadBugCache()

cache/repo_cache_bug.go πŸ”—

@@ -3,6 +3,7 @@ package cache
 import (
 	"bytes"
 	"encoding/gob"
+	"errors"
 	"fmt"
 	"os"
 	"path"
@@ -17,6 +18,8 @@ import (
 
 const bugCacheFile = "bug-cache"
 
+var errBugNotInCache = errors.New("bug missing from cache")
+
 func bugCacheFilePath(repo repository.Repo) string {
 	return path.Join(repo.GetPath(), "git-bug", bugCacheFile)
 }
@@ -25,13 +28,18 @@ func bugCacheFilePath(repo repository.Repo) string {
 // that is each time a bug is updated
 func (c *RepoCache) bugUpdated(id entity.Id) error {
 	c.muBug.Lock()
-
 	b, ok := c.bugs[id]
 	if !ok {
 		c.muBug.Unlock()
-		panic("missing bug in the cache")
-	}
 
+		// if the bug is not loaded at this point, it means it was loaded before
+		// but got evicted. Which means we potentially have multiple copies in
+		// memory and thus concurrent write.
+		// Failing immediately here is the simple and safe solution to avoid
+		// complicated data loss.
+		return errBugNotInCache
+	}
+	c.loadedBugs.Get(id)
 	c.bugExcerpts[id] = NewBugExcerpt(b.bug, b.Snapshot())
 	c.muBug.Unlock()
 
@@ -109,22 +117,24 @@ func (c *RepoCache) ResolveBugExcerpt(id entity.Id) (*BugExcerpt, error) {
 	c.muBug.RLock()
 	defer c.muBug.RUnlock()
 
-	e, ok := c.bugExcerpts[id]
+	excerpt, ok := c.bugExcerpts[id]
 	if !ok {
 		return nil, bug.ErrBugNotExist
 	}
 
-	return e, nil
+	return excerpt, nil
 }
 
 // ResolveBug retrieve a bug matching the exact given id
 func (c *RepoCache) ResolveBug(id entity.Id) (*BugCache, error) {
 	c.muBug.RLock()
 	cached, ok := c.bugs[id]
-	c.muBug.RUnlock()
 	if ok {
+		c.loadedBugs.Get(id)
+		c.muBug.RUnlock()
 		return cached, nil
 	}
+	c.muBug.RUnlock()
 
 	b, err := bug.ReadLocalBug(c.repo, id)
 	if err != nil {
@@ -135,11 +145,39 @@ func (c *RepoCache) ResolveBug(id entity.Id) (*BugCache, error) {
 
 	c.muBug.Lock()
 	c.bugs[id] = cached
+	c.loadedBugs.Add(id)
 	c.muBug.Unlock()
 
+	c.evictIfNeeded()
+
 	return cached, nil
 }
 
+// evictIfNeeded will evict a bug from the cache if needed
+// it also removes references of the bug from the bugs
+func (c *RepoCache) evictIfNeeded() {
+	c.muBug.Lock()
+	defer c.muBug.Unlock()
+	if c.loadedBugs.Len() <= c.maxLoadedBugs {
+		return
+	}
+
+	for _, id := range c.loadedBugs.GetOldestToNewest() {
+		b := c.bugs[id]
+		if b.NeedCommit() {
+			continue
+		}
+
+		b.mu.Lock()
+		c.loadedBugs.Remove(id)
+		delete(c.bugs, id)
+
+		if c.loadedBugs.Len() <= c.maxLoadedBugs {
+			return
+		}
+	}
+}
+
 // ResolveBugExcerptPrefix retrieve a BugExcerpt matching an id prefix. It fails if multiple
 // bugs match.
 func (c *RepoCache) ResolveBugExcerptPrefix(prefix string) (*BugExcerpt, error) {
@@ -349,8 +387,11 @@ func (c *RepoCache) NewBugRaw(author *IdentityCache, unixTime int64, title strin
 
 	cached := NewBugCache(c, b)
 	c.bugs[b.Id()] = cached
+	c.loadedBugs.Add(b.Id())
 	c.muBug.Unlock()
 
+	c.evictIfNeeded()
+
 	// force the write of the excerpt
 	err = c.bugUpdated(b.Id())
 	if err != nil {
@@ -362,16 +403,23 @@ func (c *RepoCache) NewBugRaw(author *IdentityCache, unixTime int64, title strin
 
 // RemoveBug removes a bug from the cache and repo given a bug id prefix
 func (c *RepoCache) RemoveBug(prefix string) error {
-	b, err := c.ResolveBugPrefix(prefix)
+	c.muBug.RLock()
 
+	b, err := c.ResolveBugPrefix(prefix)
 	if err != nil {
+		c.muBug.RUnlock()
 		return err
 	}
+	c.muBug.RUnlock()
 
+	c.muBug.Lock()
 	err = bug.RemoveBug(c.repo, b.Id())
 
 	delete(c.bugs, b.Id())
 	delete(c.bugExcerpts, b.Id())
+	c.loadedBugs.Remove(b.Id())
+
+	c.muBug.Unlock()
 
 	return c.writeBugCache()
 }

cache/repo_cache_test.go πŸ”—

@@ -1,9 +1,7 @@
 package cache
 
 import (
-	"fmt"
 	"testing"
-	"time"
 
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
@@ -85,7 +83,8 @@ func TestCache(t *testing.T) {
 	require.Empty(t, cache.identitiesExcerpts)
 
 	// Reload, only excerpt are loaded
-	require.NoError(t, cache.load())
+	cache, err = NewRepoCache(repo)
+	require.NoError(t, err)
 	require.Empty(t, cache.bugs)
 	require.Empty(t, cache.identities)
 	require.Len(t, cache.bugExcerpts, 2)
@@ -177,17 +176,17 @@ func TestRemove(t *testing.T) {
 	repoCache, err := NewRepoCache(repo)
 	require.NoError(t, err)
 
-	// generate a bunch of bugs
 	rene, err := repoCache.NewIdentity("RenΓ© Descartes", "rene@descartes.fr")
 	require.NoError(t, err)
 
-	for i := 0; i < 100; i++ {
-		_, _, err := repoCache.NewBugRaw(rene, time.Now().Unix(), "title", fmt.Sprintf("message%v", i), nil, nil)
-		require.NoError(t, err)
-	}
+	err = repoCache.SetUserIdentity(rene)
+	require.NoError(t, err)
+
+	_, _, err = repoCache.NewBug("title", "message")
+	require.NoError(t, err)
 
 	// and one more for testing
-	b1, _, err := repoCache.NewBugRaw(rene, time.Now().Unix(), "title", "message", nil, nil)
+	b1, _, err := repoCache.NewBug("title", "message")
 	require.NoError(t, err)
 
 	_, err = repoCache.Push("remoteA")
@@ -204,9 +203,72 @@ func TestRemove(t *testing.T) {
 
 	err = repoCache.RemoveBug(b1.Id().String())
 	require.NoError(t, err)
-	assert.Equal(t, 100, len(repoCache.bugs))
-	assert.Equal(t, 100, len(repoCache.bugExcerpts))
+	assert.Equal(t, 1, len(repoCache.bugs))
+	assert.Equal(t, 1, len(repoCache.bugExcerpts))
 
 	_, err = repoCache.ResolveBug(b1.Id())
 	assert.Error(t, bug.ErrBugNotExist, err)
 }
+
+func TestCacheEviction(t *testing.T) {
+	repo := repository.CreateTestRepo(false)
+	repoCache, err := NewRepoCache(repo)
+	require.NoError(t, err)
+	repoCache.setCacheSize(2)
+
+	require.Equal(t, 2, repoCache.maxLoadedBugs)
+	require.Equal(t, 0, repoCache.loadedBugs.Len())
+	require.Equal(t, 0, len(repoCache.bugs))
+
+	// Generating some bugs
+	rene, err := repoCache.NewIdentity("RenΓ© Descartes", "rene@descartes.fr")
+	require.NoError(t, err)
+	err = repoCache.SetUserIdentity(rene)
+	require.NoError(t, err)
+
+	bug1, _, err := repoCache.NewBug("title", "message")
+	require.NoError(t, err)
+
+	checkBugPresence(t, repoCache, bug1, true)
+	require.Equal(t, 1, repoCache.loadedBugs.Len())
+	require.Equal(t, 1, len(repoCache.bugs))
+
+	bug2, _, err := repoCache.NewBug("title", "message")
+	require.NoError(t, err)
+
+	checkBugPresence(t, repoCache, bug1, true)
+	checkBugPresence(t, repoCache, bug2, true)
+	require.Equal(t, 2, repoCache.loadedBugs.Len())
+	require.Equal(t, 2, len(repoCache.bugs))
+
+	// Number of bugs should not exceed max size of lruCache, oldest one should be evicted
+	bug3, _, err := repoCache.NewBug("title", "message")
+	require.NoError(t, err)
+
+	require.Equal(t, 2, repoCache.loadedBugs.Len())
+	require.Equal(t, 2, len(repoCache.bugs))
+	checkBugPresence(t, repoCache, bug1, false)
+	checkBugPresence(t, repoCache, bug2, true)
+	checkBugPresence(t, repoCache, bug3, true)
+
+	// Accessing bug should update position in lruCache and therefore it should not be evicted
+	repoCache.loadedBugs.Get(bug2.Id())
+	oldestId, _ := repoCache.loadedBugs.GetOldest()
+	require.Equal(t, bug3.Id(), oldestId)
+
+	checkBugPresence(t, repoCache, bug1, false)
+	checkBugPresence(t, repoCache, bug2, true)
+	checkBugPresence(t, repoCache, bug3, true)
+	require.Equal(t, 2, repoCache.loadedBugs.Len())
+	require.Equal(t, 2, len(repoCache.bugs))
+}
+
+func checkBugPresence(t *testing.T, cache *RepoCache, bug *BugCache, presence bool) {
+	id := bug.Id()
+	require.Equal(t, presence, cache.loadedBugs.Contains(id))
+	b, ok := cache.bugs[id]
+	require.Equal(t, presence, ok)
+	if ok {
+		require.Equal(t, bug, b)
+	}
+}

go.mod πŸ”—

@@ -13,7 +13,7 @@ require (
 	github.com/dustin/go-humanize v1.0.0
 	github.com/fatih/color v1.9.0
 	github.com/gorilla/mux v1.7.4
-	github.com/hashicorp/golang-lru v0.5.4 // indirect
+	github.com/hashicorp/golang-lru v0.5.4
 	github.com/icrowley/fake v0.0.0-20180203215853-4178557ae428
 	github.com/mattn/go-isatty v0.0.12
 	github.com/phayes/freeport v0.0.0-20171002181615-b8543db493a5

go.sum πŸ”—

@@ -109,6 +109,7 @@ github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgf
 github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY=
 github.com/hashicorp/go-cleanhttp v0.5.1 h1:dH3aiDG9Jvb5r5+bYHsikaOUIpcM0xvgMXVoDkXMzJM=
 github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=
+github.com/hashicorp/go-hclog v0.9.2 h1:CG6TE5H9/JXsFWJCfoIVpKFIkFe6ysEuHirp4DxCsHI=
 github.com/hashicorp/go-hclog v0.9.2/go.mod h1:5CU+agLiy3J7N7QjHK5d05KxGsuXiQLrjA0H7acj2lQ=
 github.com/hashicorp/go-retryablehttp v0.6.4 h1:BbgctKO892xEyOXnGiaAwIoSq1QZ/SS4AhjoAh9DnfY=
 github.com/hashicorp/go-retryablehttp v0.6.4/go.mod h1:vAew36LZh98gCBJNLH42IQ1ER/9wtLZZ8meHqQvEYWY=