feat: global email body cache eviction (#1257)

FromSi created

## What?

Adds a configurable global size limit for cached email bodies.

- Defaults `body_cache_threshold_mb` to 100 MB.
- Applies the limit across the whole `~/.cache/matcha/email_bodies/`
directory, not per folder cache file.
- Evicts least recently accessed cached bodies until the total cache
size is back under the limit.
- Skips caching bodies larger than the configured limit.
- Adds tests for cross-folder LRU eviction, multi-entry eviction,
oversized replacements, and the default threshold.
- Updates the seed script to generate 10 KB email bodies for cache
testing.

## Why?

Email body cache files could grow without a global cap for active users
with large mailboxes. Existing pruning only removed bodies for emails no
longer present in a folder, so valid cached emails could accumulate
indefinitely. A global LRU limit keeps disk usage bounded while
preserving recently used cached messages.

Closes #521

Change summary

config/cache.go            | 151 +++++++++++++++++++++++++++++++++---
config/cache_test.go       | 162 ++++++++++++++++++++++++++++++++++++++++
config/config.go           |   4 
docs/docs/Configuration.md |   4 
4 files changed, 302 insertions(+), 19 deletions(-)

Detailed changes

config/cache.go 🔗

@@ -531,14 +531,134 @@ func calculateTotalCacheSize(cache *EmailBodyCache) int {
 	return total
 }
 
-func evict(cache *EmailBodyCache, newSize int, threshold int) {
-	sort.Slice(cache.Bodies, func(i, j int) bool {
-		return cache.Bodies[i].LastAccessedAt.Before(cache.Bodies[j].LastAccessedAt)
+type bodyCacheFileState struct {
+	path  string
+	cache EmailBodyCache
+}
+
+type bodyCacheEntryRef struct {
+	fileIndex int
+	bodyIndex int
+}
+
+func loadAllEmailBodyCaches() ([]bodyCacheFileState, error) {
+	dir, err := bodyCacheDir()
+	if err != nil {
+		return nil, err
+	}
+
+	entries, err := os.ReadDir(dir)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return nil, nil
+		}
+		return nil, err
+	}
+
+	var caches []bodyCacheFileState
+	for _, entry := range entries {
+		if entry.IsDir() || filepath.Ext(entry.Name()) != ".json" {
+			continue
+		}
+
+		path := filepath.Join(dir, entry.Name())
+		data, err := SecureReadFile(path)
+		if err != nil {
+			return nil, err
+		}
+
+		var cache EmailBodyCache
+		if err := json.Unmarshal(data, &cache); err != nil {
+			return nil, err
+		}
+		for i := range cache.Bodies {
+			if cache.Bodies[i].SizeBytes <= 0 {
+				cache.Bodies[i].SizeBytes = calculateEmailBodySize(&cache.Bodies[i])
+			}
+		}
+
+		caches = append(caches, bodyCacheFileState{
+			path:  path,
+			cache: cache,
+		})
+	}
+
+	return caches, nil
+}
+
+func saveEmailBodyCacheFile(state *bodyCacheFileState) error {
+	if err := os.MkdirAll(filepath.Dir(state.path), 0700); err != nil {
+		return err
+	}
+
+	state.cache.UpdatedAt = time.Now()
+	data, err := json.Marshal(&state.cache)
+	if err != nil {
+		return err
+	}
+	return SecureWriteFile(state.path, data, 0600)
+}
+
+func pruneEmailBodyCacheSize(threshold int) error {
+	if threshold <= 0 {
+		return nil
+	}
+
+	caches, err := loadAllEmailBodyCaches()
+	if err != nil {
+		return err
+	}
+
+	totalSize := 0
+	var refs []bodyCacheEntryRef
+	for fileIndex := range caches {
+		for bodyIndex, body := range caches[fileIndex].cache.Bodies {
+			totalSize += body.SizeBytes
+			refs = append(refs, bodyCacheEntryRef{
+				fileIndex: fileIndex,
+				bodyIndex: bodyIndex,
+			})
+		}
+	}
+	if totalSize <= threshold {
+		return nil
+	}
+
+	sort.Slice(refs, func(i, j int) bool {
+		left := caches[refs[i].fileIndex].cache.Bodies[refs[i].bodyIndex]
+		right := caches[refs[j].fileIndex].cache.Bodies[refs[j].bodyIndex]
+		return left.LastAccessedAt.Before(right.LastAccessedAt)
 	})
 
-	for len(cache.Bodies) > 0 && calculateTotalCacheSize(cache)+newSize > threshold {
-		cache.Bodies = cache.Bodies[1:]
+	remove := make(map[int]map[int]struct{})
+	for _, ref := range refs {
+		if totalSize <= threshold {
+			break
+		}
+
+		body := caches[ref.fileIndex].cache.Bodies[ref.bodyIndex]
+		totalSize -= body.SizeBytes
+		if remove[ref.fileIndex] == nil {
+			remove[ref.fileIndex] = make(map[int]struct{})
+		}
+		remove[ref.fileIndex][ref.bodyIndex] = struct{}{}
+	}
+
+	for fileIndex, bodyIndexes := range remove {
+		bodies := caches[fileIndex].cache.Bodies
+		kept := bodies[:0]
+		for bodyIndex, body := range bodies {
+			if _, ok := bodyIndexes[bodyIndex]; !ok {
+				kept = append(kept, body)
+			}
+		}
+		caches[fileIndex].cache.Bodies = kept
+		if err := saveEmailBodyCacheFile(&caches[fileIndex]); err != nil {
+			return err
+		}
 	}
+
+	return nil
 }
 
 // SaveEmailBody saves or updates a cached email body for a folder.
@@ -556,22 +676,23 @@ func SaveEmailBody(folderName string, body CachedEmailBody, threshold int) error
 	found := false
 	for i, b := range cache.Bodies {
 		if b.UID == body.UID && b.AccountID == body.AccountID {
-			cache.Bodies[i] = body
+			if body.SizeBytes <= threshold {
+				cache.Bodies[i] = body
+			} else {
+				cache.Bodies = append(cache.Bodies[:i], cache.Bodies[i+1:]...)
+			}
 			found = true
 			break
 		}
 	}
-	if !found {
-		if body.SizeBytes <= threshold {
-			if calculateTotalCacheSize(cache)+body.SizeBytes > threshold {
-				evict(cache, body.SizeBytes, threshold)
-			}
-
-			cache.Bodies = append(cache.Bodies, body)
-		}
+	if !found && body.SizeBytes <= threshold {
+		cache.Bodies = append(cache.Bodies, body)
 	}
 
-	return saveEmailBodyCache(cache)
+	if err := saveEmailBodyCache(cache); err != nil {
+		return err
+	}
+	return pruneEmailBodyCacheSize(threshold)
 }
 
 // PruneEmailBodyCache removes cached bodies for emails that are no longer in the folder.

config/cache_test.go 🔗

@@ -0,0 +1,162 @@
+package config
+
+import (
+	"reflect"
+	"strings"
+	"testing"
+	"time"
+)
+
+func TestSaveEmailBodyEvictsLeastRecentlyAccessedAcrossFolders(t *testing.T) {
+	folderCacheTestSetup(t)
+
+	oldTime := time.Now().Add(-2 * time.Hour)
+	recentTime := time.Now().Add(-1 * time.Hour)
+
+	if err := saveEmailBodyCache(&EmailBodyCache{
+		FolderName: "INBOX",
+		Bodies: []CachedEmailBody{
+			{
+				UID:            1,
+				AccountID:      "acct",
+				Body:           strings.Repeat("a", 10),
+				SizeBytes:      10,
+				CachedAt:       oldTime,
+				LastAccessedAt: oldTime,
+			},
+		},
+	}); err != nil {
+		t.Fatalf("save old cache: %v", err)
+	}
+
+	if err := saveEmailBodyCache(&EmailBodyCache{
+		FolderName: "Archive",
+		Bodies: []CachedEmailBody{
+			{
+				UID:            2,
+				AccountID:      "acct",
+				Body:           strings.Repeat("b", 10),
+				SizeBytes:      10,
+				CachedAt:       recentTime,
+				LastAccessedAt: recentTime,
+			},
+		},
+	}); err != nil {
+		t.Fatalf("save recent cache: %v", err)
+	}
+
+	if err := SaveEmailBody("Sent", CachedEmailBody{
+		UID:       3,
+		AccountID: "acct",
+		Body:      strings.Repeat("c", 10),
+	}, 20); err != nil {
+		t.Fatalf("SaveEmailBody: %v", err)
+	}
+
+	inbox, err := LoadEmailBodyCache("INBOX")
+	if err != nil {
+		t.Fatalf("LoadEmailBodyCache(INBOX): %v", err)
+	}
+	if len(inbox.Bodies) != 0 {
+		t.Fatalf("oldest INBOX body should be evicted, got %d bodies", len(inbox.Bodies))
+	}
+
+	archive, err := LoadEmailBodyCache("Archive")
+	if err != nil {
+		t.Fatalf("LoadEmailBodyCache(Archive): %v", err)
+	}
+	if len(archive.Bodies) != 1 || archive.Bodies[0].UID != 2 {
+		t.Fatalf("recent Archive body should remain, got %+v", archive.Bodies)
+	}
+
+	sent, err := LoadEmailBodyCache("Sent")
+	if err != nil {
+		t.Fatalf("LoadEmailBodyCache(Sent): %v", err)
+	}
+	if len(sent.Bodies) != 1 || sent.Bodies[0].UID != 3 {
+		t.Fatalf("new Sent body should remain, got %+v", sent.Bodies)
+	}
+}
+
+func TestSaveEmailBodyEvictsMultipleEntriesUntilUnderLimit(t *testing.T) {
+	folderCacheTestSetup(t)
+
+	now := time.Now()
+	bodies := make([]CachedEmailBody, 0, 4)
+	for i := 1; i <= 4; i++ {
+		accessedAt := now.Add(-time.Duration(5-i) * time.Minute)
+		bodies = append(bodies, CachedEmailBody{
+			UID:            uint32(i),
+			AccountID:      "acct",
+			Body:           strings.Repeat(string(rune('a'+i-1)), 10),
+			SizeBytes:      10,
+			CachedAt:       accessedAt,
+			LastAccessedAt: accessedAt,
+		})
+	}
+
+	if err := saveEmailBodyCache(&EmailBodyCache{
+		FolderName: "INBOX",
+		Bodies:     bodies,
+	}); err != nil {
+		t.Fatalf("save cache: %v", err)
+	}
+
+	if err := SaveEmailBody("Archive", CachedEmailBody{
+		UID:       5,
+		AccountID: "acct",
+		Body:      strings.Repeat("e", 30),
+	}, 50); err != nil {
+		t.Fatalf("SaveEmailBody: %v", err)
+	}
+
+	inbox, err := LoadEmailBodyCache("INBOX")
+	if err != nil {
+		t.Fatalf("LoadEmailBodyCache(INBOX): %v", err)
+	}
+
+	gotUIDs := make([]uint32, 0, len(inbox.Bodies))
+	for _, body := range inbox.Bodies {
+		gotUIDs = append(gotUIDs, body.UID)
+	}
+	wantUIDs := []uint32{3, 4}
+	if !reflect.DeepEqual(gotUIDs, wantUIDs) {
+		t.Fatalf("remaining INBOX UIDs = %v, want %v", gotUIDs, wantUIDs)
+	}
+
+	archive, err := LoadEmailBodyCache("Archive")
+	if err != nil {
+		t.Fatalf("LoadEmailBodyCache(Archive): %v", err)
+	}
+	if len(archive.Bodies) != 1 || archive.Bodies[0].UID != 5 {
+		t.Fatalf("new Archive body should remain, got %+v", archive.Bodies)
+	}
+}
+
+func TestSaveEmailBodyDropsOversizedReplacement(t *testing.T) {
+	folderCacheTestSetup(t)
+
+	if err := SaveEmailBody("INBOX", CachedEmailBody{
+		UID:       1,
+		AccountID: "acct",
+		Body:      strings.Repeat("a", 10),
+	}, 20); err != nil {
+		t.Fatalf("initial SaveEmailBody: %v", err)
+	}
+
+	if err := SaveEmailBody("INBOX", CachedEmailBody{
+		UID:       1,
+		AccountID: "acct",
+		Body:      strings.Repeat("b", 25),
+	}, 20); err != nil {
+		t.Fatalf("oversized SaveEmailBody: %v", err)
+	}
+
+	cache, err := LoadEmailBodyCache("INBOX")
+	if err != nil {
+		t.Fatalf("LoadEmailBodyCache: %v", err)
+	}
+	if len(cache.Bodies) != 0 {
+		t.Fatalf("oversized replacement should not remain cached, got %+v", cache.Bodies)
+	}
+}

config/config.go 🔗

@@ -100,10 +100,10 @@ type Config struct {
 }
 
 // GetBodyCacheThreshold returns the email body cache threshold in bytes.
-// It defaults to 500MB if unset or zero.
+// It defaults to 100MB if unset or zero.
 func (c *Config) GetBodyCacheThreshold() int {
 	if c.BodyCacheThresholdMB <= 0 {
-		return 500 * 1024 * 1024
+		return 100 * 1024 * 1024
 	}
 	return c.BodyCacheThresholdMB * 1024 * 1024
 }

docs/docs/Configuration.md 🔗

@@ -46,7 +46,7 @@ Configuration is stored in `~/.config/matcha/config.json`.
   "enable_split_pane": true,
   "disable_images": true,
   "hide_tips": true,
-  "body_cache_threshold_mb": 500
+  "body_cache_threshold_mb": 100
 }
 ```
 
@@ -54,7 +54,7 @@ Configuration is stored in `~/.config/matcha/config.json`.
 
 `enable_split_pane` enables a side-by-side view where the email list and the selected email are shown on the same screen.
 
-`body_cache_threshold_mb` sets the maximum size (in megabytes) for the local email body cache. When this limit is reached, older cached emails are evicted to make room for new ones. Defaults to `500` MB if not specified.
+`body_cache_threshold_mb` sets the maximum size (in megabytes) for the local email body cache. When this limit is reached, least recently accessed cached emails are evicted across all folders to make room for new ones. Defaults to `100` MB if not specified.
 
 ## Data Locations