Re-build cache per-request

Timeline

Amolith opened (edited)

NewRepoCacheNoEvents() fetches identity/bug data from the cache without incorporating new bug/identity data. We need to trigger a re-build using

// Rebuild identities cache
for ev := range rc.Identities().Build() {
    if ev.Err != nil {
        // handle error
    }
}

// Rebuild bugs cache  
for ev := range rc.Bugs().Build() {
    if ev.Err != nil {
        // handle error
    }
}

This will become more and more expensive the more bugs and identities there are. We should eventually refactor to open the cache at startup, serve web requests from the cache, and only trigger cache rebuilds in a timed goroutine. This ensures web requests are answered quickly and we perform expensive operations asynchronously.

For now, we'll accept the performance trade-off and re-build the cache inline.

Amolith commented

This implementation provides a complete solution for managing git-bug caches across all repositories in soft-serve, addressing the performance issues identified in the main bug.

Create pkg/backend/gitbug_cache.go:

package backend

import (
    "context"
    "os"
    "path/filepath"
    "sync"
    
    "github.com/charmbracelet/log/v2"
    "github.com/charmbracelet/soft-serve/pkg/config"
    "github.com/git-bug/git-bug/cache"
    "github.com/git-bug/git-bug/repository"
)

// GitBugCacheManager manages git-bug caches for all repositories with git-bug data.
type GitBugCacheManager struct {
    mu     sync.RWMutex
    caches map[string]*cache.RepoCache // keyed by repo name
    ctx    context.Context
    cfg    *config.Config
    logger *log.Logger
}

// NewGitBugCacheManager creates a new cache manager.
func NewGitBugCacheManager(ctx context.Context) *GitBugCacheManager {
    return &GitBugCacheManager{
        caches: make(map[string]*cache.RepoCache),
        ctx:    ctx,
        cfg:    config.FromContext(ctx),
        logger: log.FromContext(ctx).WithPrefix("gitbug-cache"),
    }
}

// GetOrOpen returns cached RepoCache or opens and caches a new one.
// Returns nil if repository doesn't have git-bug data.
func (m *GitBugCacheManager) GetOrOpen(repoName string) *cache.RepoCache {
    m.mu.RLock()
    rc, exists := m.caches[repoName]
    m.mu.RUnlock()
    
    if exists {
        return rc
    }
    
    // Check if repo has git-bug
    repoPath := filepath.Join(m.cfg.DataPath, "repos", repoName+".git")
    bugsPath := filepath.Join(repoPath, "refs", "bugs")
    if info, err := os.Stat(bugsPath); err != nil || !info.IsDir() {
        return nil
    }
    
    // Open and cache
    goGitRepo, err := repository.OpenGoGitRepo(repoPath, "git-bug", nil)
    if err != nil {
        m.logger.Debug("failed to open gogit repo", "repo", repoName, "err", err)
        return nil
    }
    
    rc, err = cache.NewRepoCacheNoEvents(goGitRepo)
    if err != nil {
        goGitRepo.Close()
        m.logger.Debug("failed to create repo cache", "repo", repoName, "err", err)
        return nil
    }
    
    // Initial build
    if err := m.buildCache(rc, repoName); err != nil {
        rc.Close()
        return nil
    }
    
    m.mu.Lock()
    m.caches[repoName] = rc
    m.mu.Unlock()
    
    return rc
}

// buildCache rebuilds identities and bugs from git.
func (m *GitBugCacheManager) buildCache(rc *cache.RepoCache, repoName string) error {
    // Rebuild identities
    for ev := range rc.Identities().Build() {
        if ev.Err != nil {
            m.logger.Error("identities build failed", "repo", repoName, "err", ev.Err)
            return ev.Err
        }
    }
    
    // Rebuild bugs
    for ev := range rc.Bugs().Build() {
        if ev.Err != nil {
            m.logger.Error("bugs build failed", "repo", repoName, "err", ev.Err)
            return ev.Err
        }
    }
    
    m.logger.Debug("cache rebuilt", "repo", repoName)
    return nil
}

// RebuildAll rebuilds caches for all currently cached repositories.
func (m *GitBugCacheManager) RebuildAll() {
    m.mu.RLock()
    repos := make([]string, 0, len(m.caches))
    for name := range m.caches {
        repos = append(repos, name)
    }
    m.mu.RUnlock()
    
    for _, name := range repos {
        m.mu.RLock()
        rc := m.caches[name]
        m.mu.RUnlock()
        
        if rc != nil {
            if err := m.buildCache(rc, name); err != nil {
                m.logger.Error("rebuild failed", "repo", name, "err", err)
            }
        }
    }
}

// Close closes all cached RepoCaches.
func (m *GitBugCacheManager) Close() error {
    m.mu.Lock()
    defer m.mu.Unlock()
    
    for name, rc := range m.caches {
        if err := rc.Close(); err != nil {
            m.logger.Error("failed to close cache", "repo", name, "err", err)
        }
    }
    m.caches = make(map[string]*cache.RepoCache)
    return nil
}

Modify pkg/backend/backend.go:

type Backend struct {
    ctx          context.Context
    cfg          *config.Config
    db           *db.DB
    store        store.Store
    logger       *log.Logger
    cache        *cache
    manager      *task.Manager
    gitBugCache  *GitBugCacheManager  // ADD THIS
}

func New(ctx context.Context, cfg *config.Config, db *db.DB, st store.Store) *Backend {
    // ... existing code ...
    
    b.gitBugCache = NewGitBugCacheManager(ctx)  // ADD THIS
    
    return b
}

// GitBugCache returns the git-bug cache manager.
func (b *Backend) GitBugCache() *GitBugCacheManager {
    return b.gitBugCache
}

Create pkg/jobs/gitbug.go:

package jobs

import (
    "context"
    
    "github.com/charmbracelet/log/v2"
    "github.com/charmbracelet/soft-serve/pkg/backend"
    "github.com/charmbracelet/soft-serve/pkg/config"
)

func init() {
    Register("gitbug-cache-refresh", gitBugCacheRefresh{})
}

type gitBugCacheRefresh struct{}

// Spec returns the cron schedule (default: every 5 minutes).
func (g gitBugCacheRefresh) Spec(ctx context.Context) string {
    cfg := config.FromContext(ctx)
    if cfg.Jobs.GitBugCacheRefresh != "" {
        return cfg.Jobs.GitBugCacheRefresh
    }
    return "@every 5m"
}

// Func returns the cache refresh function.
func (g gitBugCacheRefresh) Func(ctx context.Context) func() {
    logger := log.FromContext(ctx).WithPrefix("jobs.gitbug")
    be := backend.FromContext(ctx)
    
    return func() {
        logger.Debug("refreshing git-bug caches")
        be.GitBugCache().RebuildAll()
    }
}

Modify pkg/web/webui_bugs.go to use cached instances:

func openBugCache(ctx context.Context, repo proto.Repository) (*cache.RepoCache, error) {
    be := backend.FromContext(ctx)
    rc := be.GitBugCache().GetOrOpen(repo.Name())
    if rc == nil {
        return nil, fmt.Errorf("no git-bug data or failed to open cache")
    }
    return rc, nil
}

func repoBugs(w http.ResponseWriter, r *http.Request) {
    // ... existing code ...
    
    rc, err := openBugCache(ctx, repo)
    if err != nil {
        logger.Debug("failed to open bug cache", "repo", repo.Name(), "err", err)
        renderNotFound(w, r)
        return
    }
    // DO NOT defer rc.Close() - cache is managed by backend
    
    // ... rest of handler ...
}

func repoBug(w http.ResponseWriter, r *http.Request) {
    // ... existing code ...
    
    rc, err := openBugCache(ctx, repo)
    if err != nil {
        logger.Debug("failed to open bug cache", "repo", repo.Name(), "err", err)
        renderNotFound(w, r)
        return
    }
    // DO NOT defer rc.Close() - cache is managed by backend
    
    // ... rest of handler ...
}

Modify cmd/soft/serve/server.go:

func (s *Server) Shutdown(ctx context.Context) error {
    errg, ctx := errgroup.WithContext(ctx)
    // ... existing shutdowns ...
    
    errg.Go(func() error {
        return s.Backend.GitBugCache().Close()  // ADD THIS
    })
    
    return errg.Wait()
}

This implementation follows soft-serve's existing patterns for cache management and background jobs, ensuring consistency with the codebase architecture.

Co-Authored-By: Crush crush@charm.land

Amolith closed the bug

Amolith commented

Comment ^ moved to bug-bfaefd8