graphql: add tree-->lastcommit, further optimize history querying (#1547)

Michael Muré created

Change summary

api/graphql/graph/git.generated.go        | 119 ++++++++++++++++++-
api/graphql/graph/repository.generated.go |   9 
api/graphql/graph/root_.generated.go      |  23 +++
api/graphql/graphql_test.go               |  21 +++
api/graphql/models/models.go              |  12 ++
api/graphql/resolvers/git.go              |  26 +++
api/graphql/resolvers/repo.go             |  10 +
api/graphql/resolvers/root.go             |   8 
api/graphql/schema/git.graphql            |   5 
repository/gogit.go                       | 147 +++++++++++++-----------
repository/repo_testing.go                |  33 +++++
11 files changed, 317 insertions(+), 96 deletions(-)

Detailed changes

api/graphql/graph/git.generated.go 🔗

@@ -28,6 +28,9 @@ type GitCommitResolver interface {
 	Files(ctx context.Context, obj *models.GitCommitMeta, after *string, before *string, first *int, last *int) (*models.GitChangedFileConnection, error)
 	Diff(ctx context.Context, obj *models.GitCommitMeta, path string) (*repository.FileDiff, error)
 }
+type GitTreeEntryResolver interface {
+	LastCommit(ctx context.Context, obj *models.GitTreeEntry) (*models.GitCommitMeta, error)
+}
 
 // endregion ************************** generated!.gotpl **************************
 
@@ -2514,7 +2517,7 @@ func (ec *executionContext) fieldContext_GitRefConnection_totalCount(_ context.C
 	return fc, nil
 }
 
-func (ec *executionContext) _GitTreeEntry_name(ctx context.Context, field graphql.CollectedField, obj *repository.TreeEntry) (ret graphql.Marshaler) {
+func (ec *executionContext) _GitTreeEntry_name(ctx context.Context, field graphql.CollectedField, obj *models.GitTreeEntry) (ret graphql.Marshaler) {
 	fc, err := ec.fieldContext_GitTreeEntry_name(ctx, field)
 	if err != nil {
 		return graphql.Null
@@ -2558,7 +2561,7 @@ func (ec *executionContext) fieldContext_GitTreeEntry_name(_ context.Context, fi
 	return fc, nil
 }
 
-func (ec *executionContext) _GitTreeEntry_type(ctx context.Context, field graphql.CollectedField, obj *repository.TreeEntry) (ret graphql.Marshaler) {
+func (ec *executionContext) _GitTreeEntry_type(ctx context.Context, field graphql.CollectedField, obj *models.GitTreeEntry) (ret graphql.Marshaler) {
 	fc, err := ec.fieldContext_GitTreeEntry_type(ctx, field)
 	if err != nil {
 		return graphql.Null
@@ -2602,7 +2605,7 @@ func (ec *executionContext) fieldContext_GitTreeEntry_type(_ context.Context, fi
 	return fc, nil
 }
 
-func (ec *executionContext) _GitTreeEntry_hash(ctx context.Context, field graphql.CollectedField, obj *repository.TreeEntry) (ret graphql.Marshaler) {
+func (ec *executionContext) _GitTreeEntry_hash(ctx context.Context, field graphql.CollectedField, obj *models.GitTreeEntry) (ret graphql.Marshaler) {
 	fc, err := ec.fieldContext_GitTreeEntry_hash(ctx, field)
 	if err != nil {
 		return graphql.Null
@@ -2646,6 +2649,69 @@ func (ec *executionContext) fieldContext_GitTreeEntry_hash(_ context.Context, fi
 	return fc, nil
 }
 
+func (ec *executionContext) _GitTreeEntry_lastCommit(ctx context.Context, field graphql.CollectedField, obj *models.GitTreeEntry) (ret graphql.Marshaler) {
+	fc, err := ec.fieldContext_GitTreeEntry_lastCommit(ctx, field)
+	if err != nil {
+		return graphql.Null
+	}
+	ctx = graphql.WithFieldContext(ctx, fc)
+	defer func() {
+		if r := recover(); r != nil {
+			ec.Error(ctx, ec.Recover(ctx, r))
+			ret = graphql.Null
+		}
+	}()
+	resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (any, error) {
+		ctx = rctx // use context from middleware stack in children
+		return ec.resolvers.GitTreeEntry().LastCommit(rctx, obj)
+	})
+	if err != nil {
+		ec.Error(ctx, err)
+		return graphql.Null
+	}
+	if resTmp == nil {
+		return graphql.Null
+	}
+	res := resTmp.(*models.GitCommitMeta)
+	fc.Result = res
+	return ec.marshalOGitCommit2ᚖgithubᚗcomᚋgitᚑbugᚋgitᚑbugᚋapiᚋgraphqlᚋmodelsᚐGitCommitMeta(ctx, field.Selections, res)
+}
+
+func (ec *executionContext) fieldContext_GitTreeEntry_lastCommit(_ context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
+	fc = &graphql.FieldContext{
+		Object:     "GitTreeEntry",
+		Field:      field,
+		IsMethod:   true,
+		IsResolver: true,
+		Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
+			switch field.Name {
+			case "hash":
+				return ec.fieldContext_GitCommit_hash(ctx, field)
+			case "shortHash":
+				return ec.fieldContext_GitCommit_shortHash(ctx, field)
+			case "message":
+				return ec.fieldContext_GitCommit_message(ctx, field)
+			case "fullMessage":
+				return ec.fieldContext_GitCommit_fullMessage(ctx, field)
+			case "authorName":
+				return ec.fieldContext_GitCommit_authorName(ctx, field)
+			case "authorEmail":
+				return ec.fieldContext_GitCommit_authorEmail(ctx, field)
+			case "date":
+				return ec.fieldContext_GitCommit_date(ctx, field)
+			case "parents":
+				return ec.fieldContext_GitCommit_parents(ctx, field)
+			case "files":
+				return ec.fieldContext_GitCommit_files(ctx, field)
+			case "diff":
+				return ec.fieldContext_GitCommit_diff(ctx, field)
+			}
+			return nil, fmt.Errorf("no field named %q was found under type GitCommit", field.Name)
+		},
+	}
+	return fc, nil
+}
+
 // endregion **************************** field.gotpl *****************************
 
 // region    **************************** input.gotpl *****************************
@@ -3427,7 +3493,7 @@ func (ec *executionContext) _GitRefConnection(ctx context.Context, sel ast.Selec
 
 var gitTreeEntryImplementors = []string{"GitTreeEntry"}
 
-func (ec *executionContext) _GitTreeEntry(ctx context.Context, sel ast.SelectionSet, obj *repository.TreeEntry) graphql.Marshaler {
+func (ec *executionContext) _GitTreeEntry(ctx context.Context, sel ast.SelectionSet, obj *models.GitTreeEntry) graphql.Marshaler {
 	fields := graphql.CollectFields(ec.OperationContext, sel, gitTreeEntryImplementors)
 
 	out := graphql.NewFieldSet(fields)
@@ -3439,18 +3505,51 @@ func (ec *executionContext) _GitTreeEntry(ctx context.Context, sel ast.Selection
 		case "name":
 			out.Values[i] = ec._GitTreeEntry_name(ctx, field, obj)
 			if out.Values[i] == graphql.Null {
-				out.Invalids++
+				atomic.AddUint32(&out.Invalids, 1)
 			}
 		case "type":
 			out.Values[i] = ec._GitTreeEntry_type(ctx, field, obj)
 			if out.Values[i] == graphql.Null {
-				out.Invalids++
+				atomic.AddUint32(&out.Invalids, 1)
 			}
 		case "hash":
 			out.Values[i] = ec._GitTreeEntry_hash(ctx, field, obj)
 			if out.Values[i] == graphql.Null {
-				out.Invalids++
+				atomic.AddUint32(&out.Invalids, 1)
+			}
+		case "lastCommit":
+			field := field
+
+			innerFunc := func(ctx context.Context, _ *graphql.FieldSet) (res graphql.Marshaler) {
+				defer func() {
+					if r := recover(); r != nil {
+						ec.Error(ctx, ec.Recover(ctx, r))
+					}
+				}()
+				res = ec._GitTreeEntry_lastCommit(ctx, field, obj)
+				return res
+			}
+
+			if field.Deferrable != nil {
+				dfs, ok := deferred[field.Deferrable.Label]
+				di := 0
+				if ok {
+					dfs.AddField(field)
+					di = len(dfs.Values) - 1
+				} else {
+					dfs = graphql.NewFieldSet([]graphql.CollectedField{field})
+					deferred[field.Deferrable.Label] = dfs
+				}
+				dfs.Concurrently(di, func(ctx context.Context) graphql.Marshaler {
+					return innerFunc(ctx, dfs)
+				})
+
+				// don't run the out.Concurrently() call below
+				out.Values[i] = graphql.Null
+				continue
 			}
+
+			out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) })
 		default:
 			panic("unknown field " + strconv.Quote(field.Name))
 		}
@@ -3890,7 +3989,7 @@ var (
 	}
 )
 
-func (ec *executionContext) marshalNGitTreeEntry2ᚕᚖgithubᚗcomᚋgitᚑbugᚋgitᚑbugᚋrepositoryᚐTreeEntryᚄ(ctx context.Context, sel ast.SelectionSet, v []*repository.TreeEntry) graphql.Marshaler {
+func (ec *executionContext) marshalNGitTreeEntry2ᚕᚖgithubᚗcomᚋgitᚑbugᚋgitᚑbugᚋapiᚋgraphqlᚋmodelsᚐGitTreeEntryᚄ(ctx context.Context, sel ast.SelectionSet, v []*models.GitTreeEntry) graphql.Marshaler {
 	ret := make(graphql.Array, len(v))
 	var wg sync.WaitGroup
 	isLen1 := len(v) == 1
@@ -3914,7 +4013,7 @@ func (ec *executionContext) marshalNGitTreeEntry2ᚕᚖgithubᚗcomᚋgitᚑbug
 			if !isLen1 {
 				defer wg.Done()
 			}
-			ret[i] = ec.marshalNGitTreeEntry2ᚖgithubᚗcomᚋgitᚑbugᚋgitᚑbugᚋrepositoryᚐTreeEntry(ctx, sel, v[i])
+			ret[i] = ec.marshalNGitTreeEntry2ᚖgithubᚗcomᚋgitᚑbugᚋgitᚑbugᚋapiᚋgraphqlᚋmodelsᚐGitTreeEntry(ctx, sel, v[i])
 		}
 		if isLen1 {
 			f(i)
@@ -3934,7 +4033,7 @@ func (ec *executionContext) marshalNGitTreeEntry2ᚕᚖgithubᚗcomᚋgitᚑbug
 	return ret
 }
 
-func (ec *executionContext) marshalNGitTreeEntry2ᚖgithubᚗcomᚋgitᚑbugᚋgitᚑbugᚋrepositoryᚐTreeEntry(ctx context.Context, sel ast.SelectionSet, v *repository.TreeEntry) graphql.Marshaler {
+func (ec *executionContext) marshalNGitTreeEntry2ᚖgithubᚗcomᚋgitᚑbugᚋgitᚑbugᚋapiᚋgraphqlᚋmodelsᚐGitTreeEntry(ctx context.Context, sel ast.SelectionSet, v *models.GitTreeEntry) graphql.Marshaler {
 	if v == nil {
 		if !graphql.HasFieldError(ctx, graphql.GetFieldContext(ctx)) {
 			ec.Errorf(ctx, "the requested element is null which the schema does not allow")

api/graphql/graph/repository.generated.go 🔗

@@ -13,7 +13,6 @@ import (
 
 	"github.com/99designs/gqlgen/graphql"
 	"github.com/git-bug/git-bug/api/graphql/models"
-	"github.com/git-bug/git-bug/repository"
 	"github.com/vektah/gqlparser/v2/ast"
 )
 
@@ -27,7 +26,7 @@ type RepositoryResolver interface {
 	Identity(ctx context.Context, obj *models.Repository, prefix string) (models.IdentityWrapper, error)
 	UserIdentity(ctx context.Context, obj *models.Repository) (models.IdentityWrapper, error)
 	Refs(ctx context.Context, obj *models.Repository, after *string, before *string, first *int, last *int, typeArg *models.GitRefType) (*models.GitRefConnection, error)
-	Tree(ctx context.Context, obj *models.Repository, ref string, path *string) ([]*repository.TreeEntry, error)
+	Tree(ctx context.Context, obj *models.Repository, ref string, path *string) ([]*models.GitTreeEntry, error)
 	Blob(ctx context.Context, obj *models.Repository, ref string, path string) (*models.GitBlob, error)
 	Commits(ctx context.Context, obj *models.Repository, after *string, first *int, ref string, path *string, since *time.Time, until *time.Time) (*models.GitCommitConnection, error)
 	Commit(ctx context.Context, obj *models.Repository, hash string) (*models.GitCommitMeta, error)
@@ -1353,9 +1352,9 @@ func (ec *executionContext) _Repository_tree(ctx context.Context, field graphql.
 		}
 		return graphql.Null
 	}
-	res := resTmp.([]*repository.TreeEntry)
+	res := resTmp.([]*models.GitTreeEntry)
 	fc.Result = res
-	return ec.marshalNGitTreeEntry2ᚕᚖgithubᚗcomᚋgitᚑbugᚋgitᚑbugᚋrepositoryᚐTreeEntryᚄ(ctx, field.Selections, res)
+	return ec.marshalNGitTreeEntry2ᚕᚖgithubᚗcomᚋgitᚑbugᚋgitᚑbugᚋapiᚋgraphqlᚋmodelsᚐGitTreeEntryᚄ(ctx, field.Selections, res)
 }
 
 func (ec *executionContext) fieldContext_Repository_tree(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
@@ -1372,6 +1371,8 @@ func (ec *executionContext) fieldContext_Repository_tree(ctx context.Context, fi
 				return ec.fieldContext_GitTreeEntry_type(ctx, field)
 			case "hash":
 				return ec.fieldContext_GitTreeEntry_hash(ctx, field)
+			case "lastCommit":
+				return ec.fieldContext_GitTreeEntry_lastCommit(ctx, field)
 			}
 			return nil, fmt.Errorf("no field named %q was found under type GitTreeEntry", field.Name)
 		},

api/graphql/graph/root_.generated.go 🔗

@@ -50,6 +50,7 @@ type ResolverRoot interface {
 	BugSetTitleTimelineItem() BugSetTitleTimelineItemResolver
 	Color() ColorResolver
 	GitCommit() GitCommitResolver
+	GitTreeEntry() GitTreeEntryResolver
 	Identity() IdentityResolver
 	Label() LabelResolver
 	Mutation() MutationResolver
@@ -380,6 +381,7 @@ type ComplexityRoot struct {
 
 	GitTreeEntry struct {
 		Hash       func(childComplexity int) int
+		LastCommit func(childComplexity int) int
 		Name       func(childComplexity int) int
 		ObjectType func(childComplexity int) int
 	}
@@ -1875,6 +1877,13 @@ func (e *executableSchema) Complexity(ctx context.Context, typeName, field strin
 
 		return e.complexity.GitTreeEntry.Hash(childComplexity), true
 
+	case "GitTreeEntry.lastCommit":
+		if e.complexity.GitTreeEntry.LastCommit == nil {
+			break
+		}
+
+		return e.complexity.GitTreeEntry.LastCommit(childComplexity), true
+
 	case "GitTreeEntry.name":
 		if e.complexity.GitTreeEntry.Name == nil {
 			break
@@ -3172,13 +3181,16 @@ type GitRef {
 
 """An entry in a git tree (directory listing)."""
 type GitTreeEntry
-@goModel(model: "github.com/git-bug/git-bug/repository.TreeEntry") {
+@goModel(model: "github.com/git-bug/git-bug/api/graphql/models.GitTreeEntry") {
     """File or directory name within the parent tree."""
     name: String!
     """Whether this entry is a file, directory, symlink, or submodule."""
     type: GitObjectType! @goField(name: "ObjectType")
     """Git object hash."""
     hash: String!
+    """The last git commit that touched this tree entry. Null when the entry
+    cannot be resolved within the history depth limit."""
+    lastCommit: GitCommit
 }
 
 """The content of a git blob (file)."""
@@ -3467,7 +3479,7 @@ type OperationEdge {
 }
 `, BuiltIn: false},
 	{Name: "../schema/repository.graphql", Input: `type Repository {
-    """The name of the repository"""
+    """The name of the repository. Null for the default (unnamed) repository in a single-repo setup."""
     name: String
 
     """All the bugs"""
@@ -3484,6 +3496,7 @@ type OperationEdge {
         query: String
     ): BugConnection!
 
+    """Look up a bug by id prefix. Returns null if no bug matches the prefix."""
     bug(prefix: String!): Bug
 
     """All the identities"""
@@ -3498,6 +3511,7 @@ type OperationEdge {
         last: Int
     ): IdentityConnection!
 
+    """Look up an identity by id prefix. Returns null if no identity matches the prefix."""
     identity(prefix: String!): Identity
 
     """The identity created or selected by the user as its own"""
@@ -3542,7 +3556,7 @@ type OperationEdge {
         until: Time
     ): GitCommitConnection!
 
-    """A single commit by hash."""
+    """A single commit by hash. Returns null if the hash does not exist in the repository."""
     commit(hash: String!): GitCommit
 
     """The most recent commit that touched each of the named entries in the
@@ -3576,7 +3590,8 @@ type RepositoryEdge {
 }
 `, BuiltIn: false},
 	{Name: "../schema/root.graphql", Input: `type Query {
-    """Access a repository by reference/name. If no ref is given, the default repository is returned if any."""
+    """Access a repository by reference/name. If no ref is given, the default repository is returned if any.
+    Returns null if the referenced repository does not exist."""
     repository(ref: String): Repository
 
     """List all registered repositories."""

api/graphql/graphql_test.go 🔗

@@ -393,6 +393,27 @@ func TestGitBrowseQueries(t *testing.T) {
 		require.Equal(t, "TREE", byName["src"])
 	})
 
+	t.Run("tree_lastCommit", func(t *testing.T) {
+		var resp struct {
+			Repository struct {
+				Tree []struct {
+					Name       string
+					LastCommit struct{ Hash string }
+				}
+			}
+		}
+		require.NoError(t, c.Post(`query {
+			repository { tree(ref: "main", path: "") { name lastCommit { hash } } }
+		}`, &resp))
+		byName := make(map[string]string)
+		for _, e := range resp.Repository.Tree {
+			byName[e.Name] = e.LastCommit.Hash
+		}
+		require.Equal(t, string(c3), byName["README.md"]) // changed in c3
+		require.Equal(t, string(c2), byName["main.go"])   // changed in c2
+		require.Equal(t, string(c2), byName["src"])       // util.go added in c2
+	})
+
 	// ── commits ───────────────────────────────────────────────────────────────
 
 	t.Run("commits", func(t *testing.T) {

api/graphql/models/models.go 🔗

@@ -23,3 +23,15 @@ type GitCommitMeta struct {
 	Repo *cache.RepoCache
 	repository.CommitMeta
 }
+
+// GitTreeEntry wraps a TreeEntry with the repository context (Repo, Ref, Path)
+// of the resolution to that tree. SiblingNames lists all entries in the same
+// directory so that the first lastCommit resolver call walks history for the whole
+// directory at once; subsequent sibling calls hit the cache.
+type GitTreeEntry struct {
+	Repo         *cache.RepoCache
+	Ref          string
+	Path         string
+	SiblingNames []string
+	repository.TreeEntry
+}

api/graphql/resolvers/git.go 🔗

@@ -6,7 +6,6 @@ import (
 	"github.com/git-bug/git-bug/api/graphql/connections"
 	"github.com/git-bug/git-bug/api/graphql/graph"
 	"github.com/git-bug/git-bug/api/graphql/models"
-	"github.com/git-bug/git-bug/cache"
 	"github.com/git-bug/git-bug/repository"
 )
 
@@ -14,9 +13,7 @@ const blobTruncateSize = 1 << 20 // 1 MiB
 
 var _ graph.GitCommitResolver = &gitCommitResolver{}
 
-type gitCommitResolver struct {
-	cache *cache.MultiRepoCache
-}
+type gitCommitResolver struct{}
 
 func (r gitCommitResolver) ShortHash(_ context.Context, obj *models.GitCommitMeta) (string, error) {
 	s := string(obj.Hash)
@@ -72,3 +69,24 @@ func (r gitCommitResolver) Diff(_ context.Context, obj *models.GitCommitMeta, pa
 	}
 	return &fd, nil
 }
+
+var _ graph.GitTreeEntryResolver = &gitTreeEntryResolver{}
+
+type gitTreeEntryResolver struct{}
+
+func (r gitTreeEntryResolver) LastCommit(_ context.Context, obj *models.GitTreeEntry) (*models.GitCommitMeta, error) {
+	repo := obj.Repo.BrowseRepo()
+	// Pass all sibling names so the history walk covers the whole directory,
+	// which is nearly the same cost as walking for a single entry.
+	// Concurrent calls for the same directory are deduplicated by a singleflight
+	// inside LastCommitForEntries; subsequent calls hit the LRU cache.
+	commits, err := repo.LastCommitForEntries(obj.Ref, obj.Path, obj.SiblingNames)
+	if err != nil {
+		return nil, err
+	}
+	meta, ok := commits[obj.Name]
+	if !ok {
+		return nil, nil
+	}
+	return &models.GitCommitMeta{Repo: obj.Repo, CommitMeta: meta}, nil
+}

api/graphql/resolvers/repo.go 🔗

@@ -260,7 +260,7 @@ func (repoResolver) Refs(_ context.Context, obj *models.Repository, after *strin
 	return connections.Connection(refs, edger, conMaker, input)
 }
 
-func (repoResolver) Tree(_ context.Context, obj *models.Repository, ref string, path *string) ([]*repository.TreeEntry, error) {
+func (repoResolver) Tree(_ context.Context, obj *models.Repository, ref string, path *string) ([]*models.GitTreeEntry, error) {
 	repo := obj.Repo.BrowseRepo()
 	p := ""
 	if path != nil {
@@ -270,9 +270,13 @@ func (repoResolver) Tree(_ context.Context, obj *models.Repository, ref string,
 	if err != nil {
 		return nil, err
 	}
-	ptrs := make([]*repository.TreeEntry, len(entries))
+	names := make([]string, len(entries))
+	for i, e := range entries {
+		names[i] = e.Name
+	}
+	ptrs := make([]*models.GitTreeEntry, len(entries))
 	for i := range entries {
-		ptrs[i] = &entries[i]
+		ptrs[i] = &models.GitTreeEntry{Repo: obj.Repo, Ref: ref, Path: p, SiblingNames: names, TreeEntry: entries[i]}
 	}
 	return ptrs, nil
 }

api/graphql/resolvers/root.go 🔗

@@ -58,7 +58,9 @@ func (RootResolver) Bug() graph.BugResolver {
 }
 
 func (r RootResolver) GitCommit() graph.GitCommitResolver {
-	return &gitCommitResolver{
-		cache: r.MultiRepoCache,
-	}
+	return &gitCommitResolver{}
+}
+
+func (r RootResolver) GitTreeEntry() graph.GitTreeEntryResolver {
+	return &gitTreeEntryResolver{}
 }

api/graphql/schema/git.graphql 🔗

@@ -14,13 +14,16 @@ type GitRef {
 
 """An entry in a git tree (directory listing)."""
 type GitTreeEntry
-@goModel(model: "github.com/git-bug/git-bug/repository.TreeEntry") {
+@goModel(model: "github.com/git-bug/git-bug/api/graphql/models.GitTreeEntry") {
     """File or directory name within the parent tree."""
     name: String!
     """Whether this entry is a file, directory, symlink, or submodule."""
     type: GitObjectType! @goField(name: "ObjectType")
     """Git object hash."""
     hash: String!
+    """The last git commit that touched this tree entry. Null when the entry
+    cannot be resolved within the history depth limit."""
+    lastCommit: GitCommit
 }
 
 """The content of a git blob (file)."""

repository/gogit.go 🔗

@@ -23,6 +23,7 @@ import (
 	"github.com/go-git/go-git/v5/plumbing/object"
 	lru "github.com/hashicorp/golang-lru/v2"
 	"golang.org/x/sync/errgroup"
+	"golang.org/x/sync/singleflight"
 	"golang.org/x/sys/execabs"
 
 	"github.com/git-bug/git-bug/util/lamport"
@@ -66,6 +67,9 @@ type GoGitRepo struct {
 	// across refs that point to the same directory tree. The LRU bounds
 	// memory to lastCommitCacheSize unique (treeHash, directory) pairs.
 	lastCommitCache *lru.Cache[string, map[string]CommitMeta]
+	// lastCommitSF deduplicates concurrent walks for the same cache key so
+	// that a cold cache under parallel requests triggers only one history walk.
+	lastCommitSF singleflight.Group
 
 	keyring      Keyring
 	localStorage LocalStorage
@@ -1330,93 +1334,106 @@ func (repo *GoGitRepo) LastCommitForEntries(ref, path string, names []string) (m
 		return result, nil
 	}
 
-	// Cache miss: walk history for ALL entries in this directory so the
-	// cached result is complete and valid for any future name subset.
-	remaining := make(map[string]bool, len(startEntries))
-	for name := range startEntries {
-		remaining[name] = true
-	}
-	result := make(map[string]CommitMeta, len(remaining))
-
-	repo.rMutex.Lock()
+	// Cache miss: use singleflight so that concurrent calls for the same
+	// directory share one history walk instead of each doing their own.
+	val, err, _ := repo.lastCommitSF.Do(cacheKey, func() (any, error) {
+		// Re-check inside Do: another goroutine may have populated the cache
+		// between our initial Get and acquiring the singleflight key.
+		if cached, ok := repo.lastCommitCache.Get(cacheKey); ok {
+			return cached, nil
+		}
 
-	iter, err := repo.r.Log(&gogit.LogOptions{
-		From:  startHash,
-		Order: gogit.LogOrderCommitterTime,
-	})
-	if err != nil {
-		repo.rMutex.Unlock()
-		return nil, err
-	}
+		remaining := make(map[string]bool, len(startEntries))
+		for name := range startEntries {
+			remaining[name] = true
+		}
+		result := make(map[string]CommitMeta, len(remaining))
 
-	// Seed the parent-reuse cache with the entries we already fetched above
-	// so the first iteration's current-tree read is skipped for free.
-	// In a linear history this halves tree reads for every subsequent step:
-	// the parent fetched at depth D is the current commit at depth D+1.
-	cachedParentHash := startHash
-	cachedParentEntries := startEntries
+		repo.rMutex.Lock()
 
-	for depth := 0; len(remaining) > 0 && depth < lastCommitDepthLimit; depth++ {
-		c, err := iter.Next()
-		if err == io.EOF {
-			break
-		}
+		iter, err := repo.r.Log(&gogit.LogOptions{
+			From:  startHash,
+			Order: gogit.LogOrderCommitterTime,
+		})
 		if err != nil {
-			iter.Close()
 			repo.rMutex.Unlock()
 			return nil, err
 		}
 
-		var currentEntries map[string]plumbing.Hash
-		if c.Hash == cachedParentHash && cachedParentEntries != nil {
-			currentEntries = cachedParentEntries
-		} else {
-			_, currentEntries, err = treeEntriesAtPath(c, path)
+		// Seed the parent-reuse cache with the entries we already fetched above
+		// so the first iteration's current-tree read is skipped for free.
+		// In a linear history this halves tree reads for every subsequent step:
+		// the parent fetched at depth D is the current commit at depth D+1.
+		cachedParentHash := startHash
+		cachedParentEntries := startEntries
+
+		for depth := 0; len(remaining) > 0 && depth < lastCommitDepthLimit; depth++ {
+			c, err := iter.Next()
+			if err == io.EOF {
+				break
+			}
 			if err != nil {
-				// path may not exist in this commit; treat as empty
-				currentEntries = map[string]plumbing.Hash{}
+				iter.Close()
+				repo.rMutex.Unlock()
+				return nil, err
 			}
-		}
 
-		var parentEntries map[string]plumbing.Hash
-		cachedParentHash = plumbing.ZeroHash
-		cachedParentEntries = nil
-		if len(c.ParentHashes) > 0 {
-			if parent, err := c.Parents().Next(); err == nil {
-				_, parentEntries, _ = treeEntriesAtPath(parent, path)
-				cachedParentHash = c.ParentHashes[0]
-				cachedParentEntries = parentEntries
+			var currentEntries map[string]plumbing.Hash
+			if c.Hash == cachedParentHash && cachedParentEntries != nil {
+				currentEntries = cachedParentEntries
+			} else {
+				_, currentEntries, err = treeEntriesAtPath(c, path)
+				if err != nil {
+					// path may not exist in this commit; treat as empty
+					currentEntries = map[string]plumbing.Hash{}
+				}
+			}
+
+			var parentEntries map[string]plumbing.Hash
+			cachedParentHash = plumbing.ZeroHash
+			cachedParentEntries = nil
+			if len(c.ParentHashes) > 0 {
+				if parent, err := c.Parents().Next(); err == nil {
+					_, parentEntries, _ = treeEntriesAtPath(parent, path)
+					cachedParentHash = c.ParentHashes[0]
+					cachedParentEntries = parentEntries
+				}
 			}
-		}
 
-		meta := commitToMeta(c)
-		for name := range remaining {
-			curHash, inCurrent := currentEntries[name]
-			parentHash, inParent := parentEntries[name]
-			if inCurrent != inParent || (inCurrent && curHash != parentHash) {
-				result[name] = meta
-				delete(remaining, name)
+			meta := commitToMeta(c)
+			for name := range remaining {
+				curHash, inCurrent := currentEntries[name]
+				parentHash, inParent := parentEntries[name]
+				if inCurrent != inParent || (inCurrent && curHash != parentHash) {
+					result[name] = meta
+					delete(remaining, name)
+				}
 			}
 		}
-	}
 
-	iter.Close()
-	repo.rMutex.Unlock()
+		iter.Close()
+		repo.rMutex.Unlock()
 
-	// Store a defensive copy so that callers cannot mutate cached entries.
-	// The cached map contains all directory entries, not just the requested
-	// names, so future calls for the same directory are fully served from
-	// cache regardless of which names they request.
-	cached := make(map[string]CommitMeta, len(result))
-	for k, v := range result {
-		cached[k] = v
+		// Store a defensive copy so that callers cannot mutate cached entries.
+		// The cached map contains all directory entries, not just the requested
+		// names, so future calls for the same directory are fully served from
+		// cache regardless of which names they request.
+		cached := make(map[string]CommitMeta, len(result))
+		for k, v := range result {
+			cached[k] = v
+		}
+		repo.lastCommitCache.Add(cacheKey, cached)
+		return cached, nil
+	})
+	if err != nil {
+		return nil, err
 	}
-	repo.lastCommitCache.Add(cacheKey, cached)
 
 	// Return only the entries that were requested.
+	full := val.(map[string]CommitMeta)
 	filtered := make(map[string]CommitMeta, len(names))
 	for _, n := range names {
-		if m, ok := result[n]; ok {
+		if m, ok := full[n]; ok {
 			filtered[n] = m
 		}
 	}

repository/repo_testing.go 🔗

@@ -4,6 +4,7 @@ import (
 	"io"
 	"math/rand"
 	"os"
+	"sync"
 	"testing"
 	"time"
 
@@ -601,7 +602,7 @@ func RepoBrowseTest(t *testing.T, repo browsable) {
 		require.Equal(t, c1, readmeLog[1].Hash)
 	})
 
-	t.Run("CommitLog/since-until", func(t *testing.T) {
+	t.Run("CommitLog_since-until", func(t *testing.T) {
 		// since = far future → no commits
 		future := time.Now().Add(24 * time.Hour)
 		none, err := repo.CommitLog("main", "", 10, "", &future, nil)
@@ -654,7 +655,7 @@ func RepoBrowseTest(t *testing.T, repo browsable) {
 		require.NotContains(t, partial, "ghost.txt")
 	})
 
-	t.Run("LastCommitForEntries/cache-subset", func(t *testing.T) {
+	t.Run("LastCommitForEntries_cache-subset", func(t *testing.T) {
 		// First call with one name — seeds (or hits) the cache for this directory.
 		r1, err := repo.LastCommitForEntries("main", "", []string{"README.md"})
 		require.NoError(t, err)
@@ -676,6 +677,34 @@ func RepoBrowseTest(t *testing.T, repo browsable) {
 		require.Equal(t, c2, r3["main.go"].Hash)
 	})
 
+	t.Run("LastCommitForEntries_concurrent", func(t *testing.T) {
+		// Use the "feature" ref so the cache is cold for this key.
+		// This exercises the singleflight path.
+		// The race detector will catch any data races in the cache or walk logic.
+		const workers = 20
+		type result struct {
+			m   map[string]CommitMeta
+			err error
+		}
+		results := make([]result, workers)
+		var wg sync.WaitGroup
+		wg.Add(workers)
+		for i := range workers {
+			go func() {
+				defer wg.Done()
+				m, err := repo.LastCommitForEntries("feature", "", []string{"README.md", "main.go", "src"})
+				results[i] = result{m, err}
+			}()
+		}
+		wg.Wait()
+		for _, r := range results {
+			require.NoError(t, r.err)
+			require.Equal(t, c1, r.m["README.md"].Hash) // feature is at c2, README unchanged since c1
+			require.Equal(t, c2, r.m["main.go"].Hash)
+			require.Equal(t, c2, r.m["src"].Hash)
+		}
+	})
+
 	// ── CommitDetail ──────────────────────────────────────────────────────────
 
 	t.Run("CommitDetail", func(t *testing.T) {