api/http: add support for ref+path to get a blob content (#1550)

Michael Muré created

Change summary

api/http/git_file_handler.go       | 104 +++++++++++++---
api/http/git_file_handlers_test.go | 199 +++++++++++++++++++++++++------
commands/webui.go                  |   6 
3 files changed, 247 insertions(+), 62 deletions(-)

Detailed changes

api/http/git_file_handler.go 🔗

@@ -5,6 +5,8 @@ import (
 	"errors"
 	"io"
 	"net/http"
+	"strconv"
+	"strings"
 
 	"github.com/gorilla/mux"
 
@@ -12,11 +14,17 @@ import (
 	"github.com/git-bug/git-bug/repository"
 )
 
-// implement a http.Handler that will read and server git blob.
+// gitFileHandler implements an http.Handler that reads and serves a git blob.
+//
+// The route is /gitfile/{repo}/{rest:.+} where rest is resolved as follows:
+//   - If rest contains no slash and is a valid git hash: serve the blob by hash.
+//   - Otherwise: try each slash as a split point between ref and path, longest
+//     ref first (right to left). This handles refs with slashes (e.g.
+//     "feature/foo") and matches the ref that is most specific.
 //
 // Expected gorilla/mux parameters:
-//   - "repo" : the ref of the repo or "" for the default one
-//   - "hash" : the git hash of the file to retrieve
+//   - "repo": the repo ref or "" for the default one
+//   - "rest": the hash, or the ref+path combined
 type gitFileHandler struct {
 	mrc *cache.MultiRepoCache
 }
@@ -26,47 +34,101 @@ func NewGitFileHandler(mrc *cache.MultiRepoCache) http.Handler {
 }
 
 func (gfh *gitFileHandler) ServeHTTP(rw http.ResponseWriter, r *http.Request) {
+	vars := mux.Vars(r)
+
 	var repo *cache.RepoCache
 	var err error
 
-	repoVar := mux.Vars(r)["repo"]
-	switch repoVar {
+	switch repoVar := vars["repo"]; repoVar {
 	case "":
 		repo, err = gfh.mrc.DefaultRepo()
 	default:
 		repo, err = gfh.mrc.ResolveRepo(repoVar)
 	}
-
 	if err != nil {
 		http.Error(rw, "invalid repo reference", http.StatusBadRequest)
 		return
 	}
 
-	hash := repository.Hash(mux.Vars(r)["hash"])
-	if !hash.IsValid() {
-		http.Error(rw, "invalid git hash", http.StatusBadRequest)
+	rest := vars["rest"]
+	if rest == "" {
+		http.Error(rw, "missing path", http.StatusBadRequest)
 		return
 	}
 
-	reader, err := repo.ReadData(hash)
-	if errors.Is(err, repository.ErrNotFound) {
-		http.Error(rw, "blob not found", http.StatusNotFound)
+	// If rest is a single segment that is a valid git hash, serve by hash.
+	if !strings.Contains(rest, "/") {
+		if hash := repository.Hash(rest); hash.IsValid() {
+			reader, err := repo.ReadData(hash)
+			if errors.Is(err, repository.ErrNotFound) {
+				http.Error(rw, "not found", http.StatusNotFound)
+				return
+			}
+			if err != nil {
+				http.Error(rw, "internal server error", http.StatusInternalServerError)
+				return
+			}
+			defer reader.Close()
+			serveContent(rw, r, reader, -1, hash)
+			return
+		}
+		// Single segment that is not a hash: malformed request.
+		http.Error(rw, "expected <hash> or <ref>/<path>", http.StatusBadRequest)
 		return
 	}
-	if err != nil {
-		http.Error(rw, err.Error(), http.StatusInternalServerError)
+
+	// Greedy ref+path resolution: try split points longest ref first (right to
+	// left) so that refs with slashes (e.g. "feature/foo") take precedence over
+	// shorter prefixes.
+	segments := strings.Split(rest, "/")
+	for i := len(segments) - 1; i >= 1; i-- {
+		ref := strings.Join(segments[:i], "/")
+		path := strings.Join(segments[i:], "/")
+		rc, size, hash, err := repo.BrowseRepo().BlobAtPath(ref, path)
+		if errors.Is(err, repository.ErrNotFound) {
+			continue
+		}
+		if err != nil {
+			http.Error(rw, "internal server error", http.StatusInternalServerError)
+			return
+		}
+		defer rc.Close()
+		serveContent(rw, r, rc, size, hash)
 		return
 	}
-	defer func() {
-		_ = reader.Close()
-	}()
 
-	serveContent(rw, r, reader)
+	http.Error(rw, "not found", http.StatusNotFound)
+}
+
+// ifNoneMatchContains reports whether the If-None-Match header value matches
+// etag per RFC 9110 weak comparison: handles "*", comma-separated lists, and
+// weak validators (W/"...").
+func ifNoneMatchContains(header, etag string) bool {
+	if header == "*" {
+		return true
+	}
+	for _, token := range strings.Split(header, ",") {
+		token = strings.TrimSpace(token)
+		token = strings.TrimPrefix(token, "W/")
+		if token == etag {
+			return true
+		}
+	}
+	return false
 }
 
 // serveContent is a somewhat equivalent of http.serveContent, without support for range request.
 // This is necessary as the repo (and go-git)'s data reader doesn't support Seek().
-func serveContent(w http.ResponseWriter, r *http.Request, content io.Reader) {
+func serveContent(w http.ResponseWriter, r *http.Request, content io.Reader, size int64, hash repository.Hash) {
+	if hash.IsValid() {
+		etag := `"` + string(hash) + `"`
+		w.Header().Set("ETag", etag)
+		if ifNoneMatchContains(r.Header.Get("If-None-Match"), etag) {
+			w.WriteHeader(http.StatusNotModified)
+			return
+		}
+	}
+
 	if w.Header().Get("Content-Type") == "" {
 		// Sniff the type from the first up to 512 bytes.
 		var buf [512]byte
@@ -84,6 +146,10 @@ func serveContent(w http.ResponseWriter, r *http.Request, content io.Reader) {
 		}
 	}
 
+	if size >= 0 {
+		w.Header().Set("Content-Length", strconv.FormatInt(size, 10))
+	}
+
 	w.WriteHeader(http.StatusOK)
 	if r.Method == http.MethodHead {
 		return

api/http/git_file_handlers_test.go 🔗

@@ -2,6 +2,7 @@ package http
 
 import (
 	"bytes"
+	"context"
 	"image"
 	"image/png"
 	"mime/multipart"
@@ -29,66 +30,186 @@ func TestGitFileHandlers(t *testing.T) {
 
 	author, err := repoCache.Identities().New("test identity", "test@test.org")
 	require.NoError(t, err)
-
 	err = repoCache.SetUserIdentity(author)
 	require.NoError(t, err)
 
-	// UPLOAD
-
-	uploadHandler := NewGitUploadFileHandler(mrc)
-
+	// Build a PNG image to use as test content.
 	img := image.NewNRGBA(image.Rect(0, 0, 50, 50))
 	data := &bytes.Buffer{}
 	err = png.Encode(data, img)
 	require.NoError(t, err)
+	imgBytes := data.Bytes()
+
+	// ── Upload ────────────────────────────────────────────────────────────────
+
+	t.Run("Upload", func(t *testing.T) {
+		body := &bytes.Buffer{}
+		writer := multipart.NewWriter(body)
+		part, err := writer.CreateFormFile("uploadfile", "noname")
+		require.NoError(t, err)
+		_, err = part.Write(imgBytes)
+		require.NoError(t, err)
+		require.NoError(t, writer.Close())
+
+		w := httptest.NewRecorder()
+		r, _ := http.NewRequest("POST", "/", body)
+		r.Header.Add("Content-Type", writer.FormDataContentType())
+		r = r.WithContext(auth.CtxWithUser(r.Context(), author.Id()))
+		r = mux.SetURLVars(r, map[string]string{"repo": ""})
+
+		NewGitUploadFileHandler(mrc).ServeHTTP(w, r)
+		assert.Equal(t, http.StatusOK, w.Code)
+		assert.Equal(t, `{"hash":"3426a1488292d8f3f3c59ca679681336542b986f"}`, w.Body.String())
+	})
+
+	// ── Download by hash ──────────────────────────────────────────────────────
+
+	t.Run("DownloadByHash", func(t *testing.T) {
+		w := httptest.NewRecorder()
+		r, _ := http.NewRequest("GET", "/", nil)
+		r = r.WithContext(auth.CtxWithUser(r.Context(), author.Id()))
+		r = mux.SetURLVars(r, map[string]string{
+			"repo": "",
+			"rest": "3426a1488292d8f3f3c59ca679681336542b986f",
+		})
+
+		NewGitFileHandler(mrc).ServeHTTP(w, r)
+		assert.Equal(t, http.StatusOK, w.Code)
+		assert.Equal(t, "image/png", w.Header().Get("Content-Type"))
+		assert.Equal(t, imgBytes, w.Body.Bytes())
+	})
+
+	// Set up commits to test ref+path resolution.
+	//
+	// Ambiguity test: git's filesystem prevents refs/heads/feature and
+	// refs/heads/feature/foo from coexisting (file vs directory). Instead, use
+	// a branch "feature" and a tag "feature/foo" — different namespaces, no
+	// conflict. resolveRefToHash checks both heads and tags, so the tag is a
+	// valid ref. With longest-ref-first resolution, "feature/foo/image.png"
+	// must resolve via the tag (imgBytes), not via the branch (which has no
+	// foo/image.png and would 404).
+	imgHash, err := repo.StoreData(imgBytes)
+	require.NoError(t, err)
 
-	body := &bytes.Buffer{}
-	writer := multipart.NewWriter(body)
-	part, err := writer.CreateFormFile("uploadfile", "noname")
-	assert.NoError(t, err)
+	otherBytes := []byte("other content")
+	otherHash, err := repo.StoreData(otherBytes)
+	require.NoError(t, err)
 
-	_, err = part.Write(data.Bytes())
-	assert.NoError(t, err)
+	imgTreeHash, err := repo.StoreTree([]repository.TreeEntry{
+		{ObjectType: repository.Blob, Hash: imgHash, Name: "image.png"},
+	})
+	require.NoError(t, err)
 
-	err = writer.Close()
-	assert.NoError(t, err)
+	otherTreeHash, err := repo.StoreTree([]repository.TreeEntry{
+		{ObjectType: repository.Blob, Hash: otherHash, Name: "image.png"},
+	})
+	require.NoError(t, err)
 
-	w := httptest.NewRecorder()
-	r, _ := http.NewRequest("GET", "/", body)
-	r.Header.Add("Content-Type", writer.FormDataContentType())
+	mainCommit, err := repo.StoreCommit(imgTreeHash)
+	require.NoError(t, err)
+	featureCommit, err := repo.StoreCommit(otherTreeHash)
+	require.NoError(t, err)
 
-	// Simulate auth
-	r = r.WithContext(auth.CtxWithUser(r.Context(), author.Id()))
+	require.NoError(t, repo.UpdateRef("refs/heads/main", mainCommit))
+	// "feature" branch has otherBytes; "feature/foo" tag has imgBytes.
+	require.NoError(t, repo.UpdateRef("refs/heads/feature", featureCommit))
+	require.NoError(t, repo.UpdateRef("refs/tags/feature/foo", mainCommit))
+
+	handler := NewGitFileHandler(mrc)
+	authCtx := auth.CtxWithUser(context.Background(), author.Id())
+
+	serve := func(rest string) *httptest.ResponseRecorder {
+		w := httptest.NewRecorder()
+		r, _ := http.NewRequest("GET", "/", nil)
+		r = r.WithContext(authCtx)
+		r = mux.SetURLVars(r, map[string]string{"repo": "", "rest": rest})
+		handler.ServeHTTP(w, r)
+		return w
+	}
 
-	// Handler's params
-	r = mux.SetURLVars(r, map[string]string{
-		"repo": "",
+	serveWithHeader := func(rest, headerName, headerVal string) *httptest.ResponseRecorder {
+		w := httptest.NewRecorder()
+		r, _ := http.NewRequest("GET", "/", nil)
+		r = r.WithContext(authCtx)
+		r.Header.Set(headerName, headerVal)
+		r = mux.SetURLVars(r, map[string]string{"repo": "", "rest": rest})
+		handler.ServeHTTP(w, r)
+		return w
+	}
+
+	// ── Download by ref+path (simple ref) ─────────────────────────────────────
+
+	t.Run("DownloadByRefPath", func(t *testing.T) {
+		w := serve("main/image.png")
+		assert.Equal(t, http.StatusOK, w.Code)
+		assert.Equal(t, "image/png", w.Header().Get("Content-Type"))
+		assert.Equal(t, imgBytes, w.Body.Bytes())
 	})
 
-	uploadHandler.ServeHTTP(w, r)
+	// ── Download by ref+path (ref with slash) ─────────────────────────────────
+
+	t.Run("DownloadByRefWithSlash", func(t *testing.T) {
+		// "feature/foo" is a tag; verify multi-segment refs resolve correctly.
+		w := serve("feature/foo/image.png")
+		assert.Equal(t, http.StatusOK, w.Code)
+		assert.Equal(t, "image/png", w.Header().Get("Content-Type"))
+		assert.Equal(t, imgBytes, w.Body.Bytes())
+	})
 
-	assert.Equal(t, http.StatusOK, w.Code)
-	assert.Equal(t, `{"hash":"3426a1488292d8f3f3c59ca679681336542b986f"}`, w.Body.String())
-	// DOWNLOAD
+	// ── Ambiguous ref: longest ref wins ───────────────────────────────────────
+	// Both "feature" (branch, otherBytes) and "feature/foo" (tag, imgBytes)
+	// exist. "feature/foo/image.png" must resolve via the longer ref
+	// "feature/foo" → imgBytes, not via "feature" → foo/image.png (404).
 
-	downloadHandler := NewGitFileHandler(mrc)
+	t.Run("AmbiguousRefLongestWins", func(t *testing.T) {
+		w := serve("feature/foo/image.png")
+		assert.Equal(t, http.StatusOK, w.Code)
+		// Must be imgBytes (from tag feature/foo), not otherBytes (from branch feature).
+		assert.Equal(t, imgBytes, w.Body.Bytes())
+	})
 
-	w = httptest.NewRecorder()
-	r, _ = http.NewRequest("GET", "/", nil)
+	// ── Conditional GET: 304 with matching ETag ───────────────────────────────
 
-	// Simulate auth
-	r = r.WithContext(auth.CtxWithUser(r.Context(), author.Id()))
+	t.Run("ConditionalGet304", func(t *testing.T) {
+		// First request to get the ETag.
+		w := serve("main/image.png")
+		require.Equal(t, http.StatusOK, w.Code)
+		etag := w.Header().Get("ETag")
+		require.NotEmpty(t, etag)
 
-	// Handler's params
-	r = mux.SetURLVars(r, map[string]string{
-		"repo": "",
-		"hash": "3426a1488292d8f3f3c59ca679681336542b986f",
+		// Second request with If-None-Match should get 304.
+		w = serveWithHeader("main/image.png", "If-None-Match", etag)
+		assert.Equal(t, http.StatusNotModified, w.Code)
+		assert.Equal(t, etag, w.Header().Get("ETag"))
+		assert.Empty(t, w.Body.Bytes())
 	})
 
-	downloadHandler.ServeHTTP(w, r)
-	assert.Equal(t, http.StatusOK, w.Code)
-	// check if content type detection is working
-	assert.Equal(t, "image/png", w.Header().Get("Content-Type"))
+	t.Run("ConditionalGetWeakETag", func(t *testing.T) {
+		w := serve("main/image.png")
+		require.Equal(t, http.StatusOK, w.Code)
+		etag := w.Header().Get("ETag")
+
+		// Weak form of the same ETag should also match.
+		w = serveWithHeader("main/image.png", "If-None-Match", "W/"+etag)
+		assert.Equal(t, http.StatusNotModified, w.Code)
+	})
 
-	assert.Equal(t, data.Bytes(), w.Body.Bytes())
+	t.Run("ConditionalGetWildcard", func(t *testing.T) {
+		w := serveWithHeader("main/image.png", "If-None-Match", "*")
+		assert.Equal(t, http.StatusNotModified, w.Code)
+	})
+
+	// ── Not found ─────────────────────────────────────────────────────────────
+
+	t.Run("NotFound", func(t *testing.T) {
+		w := serve("main/nonexistent.png")
+		assert.Equal(t, http.StatusNotFound, w.Code)
+	})
+
+	// ── Malformed: single segment that is not a hash ───────────────────────────
+
+	t.Run("MalformedSingleSegment", func(t *testing.T) {
+		w := serve("main")
+		assert.Equal(t, http.StatusBadRequest, w.Code)
+	})
 }

commands/webui.go 🔗

@@ -96,11 +96,9 @@ func setupRoutes(env *execenv.Env, opts webUIOptions) (*mux.Router, func() error
 		errOut = env.Err
 	}
 
-	graphqlHandler := graphql.NewHandler(mrc, errOut)
-
 	router.Path("/playground").Handler(playground.Handler("git-bug", "/graphql"))
-	router.Path("/graphql").Handler(graphqlHandler)
-	router.Path("/gitfile/{repo}/{hash}").Handler(httpapi.NewGitFileHandler(mrc))
+	router.Path("/graphql").Handler(graphql.NewHandler(mrc, errOut))
+	router.Path("/gitfile/{repo}/{rest:.+}").Handler(httpapi.NewGitFileHandler(mrc))
 	router.Path("/upload/{repo}").Methods("POST").Handler(httpapi.NewGitUploadFileHandler(mrc))
 	router.PathPrefix("/").Handler(webui.NewHandler())