From 88df36188bba0697438a0dcef14c218d61ee5741 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Mur=C3=A9?= Date: Wed, 8 Apr 2026 15:31:07 +0200 Subject: [PATCH] repo: ReadData return a reader, improve the http handler (#1549) --- api/http/git_file_handler.go | 46 +++++++++++++++++++++++++----- api/http/git_file_handlers_test.go | 2 ++ cache/repo_cache_common.go | 2 +- entities/identity/identity.go | 3 +- entity/dag/operation_pack.go | 8 +++++- repository/gogit.go | 12 ++------ repository/mock_repo.go | 4 +-- repository/repo.go | 5 ++-- repository/repo_testing.go | 5 +++- 9 files changed, 63 insertions(+), 24 deletions(-) diff --git a/api/http/git_file_handler.go b/api/http/git_file_handler.go index b28c4ff46049ff73b33c59e50d13b87ea71e9610..06ab32a9278c47329ca54f6258b306b9864b91d1 100644 --- a/api/http/git_file_handler.go +++ b/api/http/git_file_handler.go @@ -2,8 +2,9 @@ package http import ( "bytes" + "errors" + "io" "net/http" - "time" "github.com/gorilla/mux" @@ -47,15 +48,46 @@ func (gfh *gitFileHandler) ServeHTTP(rw http.ResponseWriter, r *http.Request) { return } - // TODO: this mean that the whole file will be buffered in memory - // This can be a problem for big files. There might be a way around - // that by implementing a io.ReadSeeker that would read and discard - // data when a seek is called. - data, err := repo.ReadData(hash) + reader, err := repo.ReadData(hash) + if errors.Is(err, repository.ErrNotFound) { + http.Error(rw, "blob not found", http.StatusNotFound) + return + } if err != nil { http.Error(rw, err.Error(), http.StatusInternalServerError) return } + defer func() { + _ = reader.Close() + }() + + serveContent(rw, r, reader) +} + +// serveContent is a somewhat equivalent of http.serveContent, without support for range request. +// This is necessary as the repo (and go-git)'s data reader doesn't support Seek(). +func serveContent(w http.ResponseWriter, r *http.Request, content io.Reader) { + if w.Header().Get("Content-Type") == "" { + // Sniff the type from the first up to 512 bytes. + var buf [512]byte + n, err := io.ReadFull(content, buf[:]) + switch err { + case nil: + w.Header().Set("Content-Type", http.DetectContentType(buf[:n])) + content = io.MultiReader(bytes.NewReader(buf[:n]), content) + case io.ErrUnexpectedEOF, io.EOF: + w.Header().Set("Content-Type", http.DetectContentType(buf[:n])) + content = bytes.NewReader(buf[:n]) + default: + http.Error(w, "internal server error", http.StatusInternalServerError) + return + } + } + + w.WriteHeader(http.StatusOK) + if r.Method == http.MethodHead { + return + } - http.ServeContent(rw, r, "", time.Now(), bytes.NewReader(data)) + _, _ = io.Copy(w, content) } diff --git a/api/http/git_file_handlers_test.go b/api/http/git_file_handlers_test.go index 830e035e402379aa788e2c7727ad6f8c72616d8b..d7a215322b8d06eb9dd952d67c0eae8d83c5e3e8 100644 --- a/api/http/git_file_handlers_test.go +++ b/api/http/git_file_handlers_test.go @@ -87,6 +87,8 @@ func TestGitFileHandlers(t *testing.T) { downloadHandler.ServeHTTP(w, r) assert.Equal(t, http.StatusOK, w.Code) + // check if content type detection is working + assert.Equal(t, "image/png", w.Header().Get("Content-Type")) assert.Equal(t, data.Bytes(), w.Body.Bytes()) } diff --git a/cache/repo_cache_common.go b/cache/repo_cache_common.go index 89ffe3d453ad2c90e1c3ca60f8d6657956ff7c5a..5ed9e7a3b47b1055c7e18ecf831b53edf3e02da3 100644 --- a/cache/repo_cache_common.go +++ b/cache/repo_cache_common.go @@ -78,7 +78,7 @@ func (c *RepoCache) LocalStorage() repository.LocalStorage { } // ReadData will attempt to read arbitrary data from the given hash -func (c *RepoCache) ReadData(hash repository.Hash) ([]byte, error) { +func (c *RepoCache) ReadData(hash repository.Hash) (io.ReadCloser, error) { return c.repo.ReadData(hash) } diff --git a/entities/identity/identity.go b/entities/identity/identity.go index 9a10098fa21dd798506e9dcd1a364d62277d3313..1436259afccb74a489903e599bdbb354aaae5cf2 100644 --- a/entities/identity/identity.go +++ b/entities/identity/identity.go @@ -136,7 +136,8 @@ func read(repo repository.Repo, ref string) (*Identity, error) { } var version version - err = json.Unmarshal(data, &version) + err = json.NewDecoder(data).Decode(&version) + _ = data.Close() if err != nil { return nil, errors.Wrapf(err, "failed to decode Identity version json %s", hash) } diff --git a/entity/dag/operation_pack.go b/entity/dag/operation_pack.go index 1fd4daf7d7c9ef7af17e381b07c95732bd246cfa..eab42bc57d0d79d5f0d2e351f650f21cd37b874f 100644 --- a/entity/dag/operation_pack.go +++ b/entity/dag/operation_pack.go @@ -3,6 +3,7 @@ package dag import ( "encoding/json" "fmt" + "io" "strconv" "strings" @@ -242,7 +243,12 @@ func readOperationPack(def Definition, repo repository.RepoData, resolvers entit for _, entry := range entries { switch { case entry.Name == opsEntryName: - data, err := repo.ReadData(entry.Hash) + r, err := repo.ReadData(entry.Hash) + if err != nil { + return nil, errors.Wrap(err, "failed to read git blob data") + } + data, err := io.ReadAll(r) + _ = r.Close() if err != nil { return nil, errors.Wrap(err, "failed to read git blob data") } diff --git a/repository/gogit.go b/repository/gogit.go index 1fa716c08a37929ff66c19bee0266cad5d8e8828..aff89fea72c1ba7ee483a22762e8d37d6c89a45b 100644 --- a/repository/gogit.go +++ b/repository/gogit.go @@ -550,25 +550,19 @@ func (repo *GoGitRepo) StoreData(data []byte) (Hash, error) { } // ReadData will attempt to read arbitrary data from the given hash -func (repo *GoGitRepo) ReadData(hash Hash) ([]byte, error) { +func (repo *GoGitRepo) ReadData(hash Hash) (io.ReadCloser, error) { repo.rMutex.Lock() defer repo.rMutex.Unlock() obj, err := repo.r.BlobObject(plumbing.NewHash(hash.String())) - if err == plumbing.ErrObjectNotFound { + if errors.Is(err, plumbing.ErrObjectNotFound) { return nil, ErrNotFound } if err != nil { return nil, err } - r, err := obj.Reader() - if err != nil { - return nil, err - } - - // TODO: return a io.Reader instead - return io.ReadAll(r) + return obj.Reader() } // StoreTree will store a mapping key-->Hash as a Git tree diff --git a/repository/mock_repo.go b/repository/mock_repo.go index 0b5500038e12431b481fb1b5e8b1acd5f7a583a9..3fcb508f4dcd0212d083f58ae5ef7857a4408627 100644 --- a/repository/mock_repo.go +++ b/repository/mock_repo.go @@ -266,13 +266,13 @@ func (r *mockRepoDataBrowse) StoreData(data []byte) (Hash, error) { return hash, nil } -func (r *mockRepoDataBrowse) ReadData(hash Hash) ([]byte, error) { +func (r *mockRepoDataBrowse) ReadData(hash Hash) (io.ReadCloser, error) { data, ok := r.blobs[hash] if !ok { return nil, ErrNotFound } - return data, nil + return io.NopCloser(bytes.NewReader(data)), nil } func (r *mockRepoDataBrowse) StoreTree(entries []TreeEntry) (Hash, error) { diff --git a/repository/repo.go b/repository/repo.go index d799f85dd5185b1cd3d4ab88b8d03287242eb636..8fdf9949aee1be090def8c3a5a11ec6874ae0e67 100644 --- a/repository/repo.go +++ b/repository/repo.go @@ -148,9 +148,10 @@ type RepoData interface { // StoreData will store arbitrary data and return the corresponding hash StoreData(data []byte) (Hash, error) - // ReadData will attempt to read arbitrary data from the given hash + // ReadData returns a reader for arbitrary data associated with the given hash. // Returns ErrNotFound if not found. - ReadData(hash Hash) ([]byte, error) + // The caller must close the reader. + ReadData(hash Hash) (io.ReadCloser, error) // StoreTree will store a mapping key-->Hash as a Git tree StoreTree(mapping []TreeEntry) (Hash, error) diff --git a/repository/repo_testing.go b/repository/repo_testing.go index 58ee39b286f8e2deeab71bf649e18aa6e095f61e..2ba28a1856759f557047d833c95c09fc0cad7706 100644 --- a/repository/repo_testing.go +++ b/repository/repo_testing.go @@ -107,7 +107,10 @@ func RepoDataTest(t *testing.T, repo RepoData) { require.NoError(t, err) require.True(t, blobHash1.IsValid()) - blob1Read, err := repo.ReadData(blobHash1) + blob1Reader, err := repo.ReadData(blobHash1) + require.NoError(t, err) + defer func() { require.NoError(t, blob1Reader.Close()) }() + blob1Read, err := io.ReadAll(blob1Reader) require.NoError(t, err) require.Equal(t, data, blob1Read)