refactor,fix: add rename lfs objects migration (#409)

Ayman Bagabas created

* refactor,fix: add rename lfs objects migration

Rename lfs objects from OID[:2]/OID[2:4]/OID[4:] to OID[:2]/OID[2:4]/OID
to follow specs https://github.com/git-lfs/git-lfs/blob/main/docs/spec.md#intercepting-git

* fix: disable pure ssh lfs by default

Change summary

README.md                                  |  4 
go.mod                                     |  2 
go.sum                                     |  4 
pkg/config/config.go                       |  2 
pkg/db/migrate/0003_migrate_lfs_objects.go | 70 ++++++++++++++++++++++
pkg/db/migrate/migrations.go               |  1 
pkg/git/lfs.go                             | 75 ++++++++---------------
pkg/git/lfs_log.go                         | 17 +++++
pkg/lfs/pointer.go                         |  3 
pkg/lfs/pointer_test.go                    |  6 
10 files changed, 128 insertions(+), 56 deletions(-)

Detailed changes

README.md 🔗

@@ -208,7 +208,7 @@ lfs:
   # Enable Git LFS.
   enabled: true
   # Enable Git SSH transfer.
-  ssh_enabled: true
+  ssh_enabled: false
 
 # Cron job configuration
 jobs:
@@ -268,6 +268,8 @@ Soft Serve supports both Git LFS [HTTP](https://github.com/git-lfs/git-lfs/blob/
 
 Use the `lfs` config section to customize your Git LFS server.
 
+> **Note**: The pure-SSH transfer is disabled by default.
+
 ## Server Access
 
 Soft Serve at its core manages your server authentication and authorization. Authentication verifies the identity of a user, while authorization determines their access rights to a repository.

go.mod 🔗

@@ -23,7 +23,7 @@ require (
 	github.com/caarlos0/duration v0.0.0-20220103233809-8df7c22fe305
 	github.com/caarlos0/env/v8 v8.0.0
 	github.com/caarlos0/tablewriter v0.1.0
-	github.com/charmbracelet/git-lfs-transfer v0.1.1-0.20230725143853-5dd0632f9245
+	github.com/charmbracelet/git-lfs-transfer v0.1.1-0.20231027181609-f7ff6baf2ed0
 	github.com/charmbracelet/keygen v0.5.0
 	github.com/charmbracelet/log v0.2.5
 	github.com/charmbracelet/ssh v0.0.0-20230822194956-1a051f898e09

go.sum 🔗

@@ -25,8 +25,8 @@ github.com/charmbracelet/bubbles v0.16.1 h1:6uzpAAaT9ZqKssntbvZMlksWHruQLNxg49H5
 github.com/charmbracelet/bubbles v0.16.1/go.mod h1:2QCp9LFlEsBQMvIYERr7Ww2H2bA7xen1idUDIzm/+Xc=
 github.com/charmbracelet/bubbletea v0.24.2 h1:uaQIKx9Ai6Gdh5zpTbGiWpytMU+CfsPp06RaW2cx/SY=
 github.com/charmbracelet/bubbletea v0.24.2/go.mod h1:XdrNrV4J8GiyshTtx3DNuYkR1FDaJmO3l2nejekbsgg=
-github.com/charmbracelet/git-lfs-transfer v0.1.1-0.20230725143853-5dd0632f9245 h1:PeGKqKX84IAFhFSWjTyPGiLzzEPcv94C9qKsYBk2nbQ=
-github.com/charmbracelet/git-lfs-transfer v0.1.1-0.20230725143853-5dd0632f9245/go.mod h1:eXJuVicxnjRgRMokmutZdistxoMRjBjjfqvrYq7bCIU=
+github.com/charmbracelet/git-lfs-transfer v0.1.1-0.20231027181609-f7ff6baf2ed0 h1:Wi80d2xNAqvi6r8Udlc9UgPMdrJ+ld5ylKH7d8SQ7gE=
+github.com/charmbracelet/git-lfs-transfer v0.1.1-0.20231027181609-f7ff6baf2ed0/go.mod h1:AHLgIZ2TXQCgt3pDNXR6FgmpGHLH1wPM9cEJPHSVaYg=
 github.com/charmbracelet/glamour v0.6.0 h1:wi8fse3Y7nfcabbbDuwolqTqMQPMnVPeZhDM273bISc=
 github.com/charmbracelet/glamour v0.6.0/go.mod h1:taqWV4swIMMbWALc0m7AfE9JkPSU8om2538k9ITBxOc=
 github.com/charmbracelet/keygen v0.5.0 h1:XY0fsoYiCSM9axkrU+2ziE6u6YjJulo/b9Dghnw6MZc=

pkg/config/config.go 🔗

@@ -339,7 +339,7 @@ func DefaultConfig() *Config {
 		},
 		LFS: LFSConfig{
 			Enabled:    true,
-			SSHEnabled: true,
+			SSHEnabled: false,
 		},
 	}
 }

pkg/db/migrate/0003_migrate_lfs_objects.go 🔗

@@ -0,0 +1,70 @@
+package migrate
+
+import (
+	"context"
+	"os"
+	"path/filepath"
+	"strconv"
+
+	"github.com/charmbracelet/log"
+	"github.com/charmbracelet/soft-serve/pkg/config"
+	"github.com/charmbracelet/soft-serve/pkg/db"
+	"github.com/charmbracelet/soft-serve/pkg/db/models"
+)
+
+const (
+	migrateLfsObjectsName    = "migrate_lfs_objects"
+	migrateLfsObjectsVersion = 3
+)
+
+// Correct LFS objects relative path.
+// From OID[:2]/OID[2:4]/OID[4:] to OID[:2]/OID[2:4]/OID
+// See: https://github.com/git-lfs/git-lfs/blob/main/docs/spec.md#intercepting-git
+var migrateLfsObjects = Migration{
+	Name:    migrateLfsObjectsName,
+	Version: migrateLfsObjectsVersion,
+	Migrate: func(ctx context.Context, tx *db.Tx) error {
+		cfg := config.FromContext(ctx)
+		logger := log.FromContext(ctx).WithPrefix("migrate_lfs_objects")
+
+		var repoIds []int64
+		if err := tx.Select(&repoIds, "SELECT id FROM repos"); err != nil {
+			return err
+		}
+		for _, r := range repoIds {
+			var objs []models.LFSObject
+			if err := tx.Select(&objs, "SELECT * FROM lfs_objects WHERE repo_id = ?", r); err != nil {
+				return err
+			}
+			objsp := filepath.Join(cfg.DataPath, "lfs", strconv.FormatInt(r, 10), "objects")
+			for _, obj := range objs {
+				oldpath := filepath.Join(objsp, badRelativePath(obj.Oid))
+				newpath := filepath.Join(objsp, goodRelativePath(obj.Oid))
+				if _, err := os.Stat(oldpath); err == nil {
+					if err := os.Rename(oldpath, newpath); err != nil {
+						logger.Error("rename lfs object", "oldpath", oldpath, "newpath", newpath, "err", err)
+						continue
+					}
+				}
+			}
+		}
+		return nil
+	},
+	Rollback: func(ctx context.Context, tx *db.Tx) error {
+		return nil
+	},
+}
+
+func goodRelativePath(oid string) string {
+	if len(oid) < 5 {
+		return oid
+	}
+	return filepath.Join(oid[:2], oid[2:4], oid)
+}
+
+func badRelativePath(oid string) string {
+	if len(oid) < 5 {
+		return oid
+	}
+	return filepath.Join(oid[:2], oid[2:4], oid[4:])
+}

pkg/db/migrate/migrations.go 🔗

@@ -17,6 +17,7 @@ var sqls embed.FS
 var migrations = []Migration{
 	createTables,
 	webhooks,
+	migrateLfsObjects,
 }
 
 func execMigration(ctx context.Context, tx *db.Tx, version int, name string, down bool) error {

pkg/git/lfs.go 🔗

@@ -10,7 +10,6 @@ import (
 	"path"
 	"path/filepath"
 	"strconv"
-	"strings"
 	"time"
 
 	"github.com/charmbracelet/git-lfs-transfer/transfer"
@@ -62,7 +61,7 @@ func LFSTransfer(ctx context.Context, cmd ServiceCommand) error {
 	}
 
 	logger := log.FromContext(ctx).WithPrefix("lfs-transfer")
-	handler := transfer.NewPktline(cmd.Stdin, cmd.Stdout)
+	handler := transfer.NewPktline(cmd.Stdin, cmd.Stdout, &lfsLogger{logger})
 	repo := proto.RepositoryFromContext(ctx)
 	if repo == nil {
 		logger.Error("no repository in context")
@@ -95,43 +94,36 @@ func LFSTransfer(ctx context.Context, cmd ServiceCommand) error {
 		logger:  logger,
 		storage: storage.NewLocalStorage(filepath.Join(cfg.DataPath, "lfs", repoID)),
 		repo:    repo,
-	})
+	}, &lfsLogger{logger})
 
 	return processor.ProcessCommands(op)
 }
 
 // Batch implements transfer.Backend.
-func (t *lfsTransfer) Batch(_ string, pointers []transfer.Pointer, _ map[string]string) ([]transfer.BatchItem, error) {
-	items := make([]transfer.BatchItem, 0)
-	for _, p := range pointers {
-		obj, err := t.store.GetLFSObjectByOid(t.ctx, t.dbx, t.repo.ID(), p.Oid)
+func (t *lfsTransfer) Batch(_ string, pointers []transfer.BatchItem, _ transfer.Args) ([]transfer.BatchItem, error) {
+	for i := range pointers {
+		obj, err := t.store.GetLFSObjectByOid(t.ctx, t.dbx, t.repo.ID(), pointers[i].Oid)
 		if err != nil && !errors.Is(err, db.ErrRecordNotFound) {
-			return items, db.WrapError(err)
+			return pointers, db.WrapError(err)
 		}
 
-		exist, err := t.storage.Exists(path.Join("objects", p.RelativePath()))
+		pointers[i].Present, err = t.storage.Exists(path.Join("objects", pointers[i].RelativePath()))
 		if err != nil {
-			return items, err
+			return pointers, err
 		}
 
-		if exist && obj.ID == 0 {
-			if err := t.store.CreateLFSObject(t.ctx, t.dbx, t.repo.ID(), p.Oid, p.Size); err != nil {
-				return items, db.WrapError(err)
+		if pointers[i].Present && obj.ID == 0 {
+			if err := t.store.CreateLFSObject(t.ctx, t.dbx, t.repo.ID(), pointers[i].Oid, pointers[i].Size); err != nil {
+				return pointers, db.WrapError(err)
 			}
 		}
-
-		item := transfer.BatchItem{
-			Pointer: p,
-			Present: exist,
-		}
-		items = append(items, item)
 	}
 
-	return items, nil
+	return pointers, nil
 }
 
 // Download implements transfer.Backend.
-func (t *lfsTransfer) Download(oid string, _ map[string]string) (fs.File, error) {
+func (t *lfsTransfer) Download(oid string, _ transfer.Args) (fs.File, error) {
 	cfg := config.FromContext(t.ctx)
 	repoID := strconv.FormatInt(t.repo.ID(), 10)
 	strg := storage.NewLocalStorage(filepath.Join(cfg.DataPath, "lfs", repoID))
@@ -145,8 +137,12 @@ type uploadObject struct {
 	object storage.Object
 }
 
+func (u *uploadObject) Close() error {
+	return u.object.Close()
+}
+
 // StartUpload implements transfer.Backend.
-func (t *lfsTransfer) StartUpload(oid string, r io.Reader, _ map[string]string) (interface{}, error) {
+func (t *lfsTransfer) StartUpload(oid string, r io.Reader, _ transfer.Args) (io.Closer, error) {
 	if r == nil {
 		return nil, fmt.Errorf("no reader: %w", transfer.ErrMissingData)
 	}
@@ -174,7 +170,7 @@ func (t *lfsTransfer) StartUpload(oid string, r io.Reader, _ map[string]string)
 
 	t.logger.Infof("Object name: %s", obj.Name())
 
-	return uploadObject{
+	return &uploadObject{
 		oid:    oid,
 		size:   written,
 		object: obj,
@@ -182,20 +178,13 @@ func (t *lfsTransfer) StartUpload(oid string, r io.Reader, _ map[string]string)
 }
 
 // FinishUpload implements transfer.Backend.
-func (t *lfsTransfer) FinishUpload(state interface{}, args map[string]string) error {
-	upl, ok := state.(uploadObject)
+func (t *lfsTransfer) FinishUpload(state io.Closer, args transfer.Args) error {
+	upl, ok := state.(*uploadObject)
 	if !ok {
 		return errors.New("invalid state")
 	}
 
-	var size int64
-	for _, arg := range args {
-		if strings.HasPrefix(arg, "size=") {
-			size, _ = strconv.ParseInt(strings.TrimPrefix(arg, "size="), 10, 64)
-			break
-		}
-	}
-
+	size, _ := transfer.SizeFromArgs(args)
 	pointer := transfer.Pointer{
 		Oid: upl.oid,
 	}
@@ -220,24 +209,16 @@ func (t *lfsTransfer) FinishUpload(state interface{}, args map[string]string) er
 }
 
 // Verify implements transfer.Backend.
-func (t *lfsTransfer) Verify(oid string, args map[string]string) (transfer.Status, error) {
-	var expectedSize int64
-	var err error
-	size, ok := args[transfer.SizeKey]
-	if !ok {
-		return transfer.NewFailureStatus(transfer.StatusBadRequest, "missing size"), nil
-	}
-
-	expectedSize, err = strconv.ParseInt(size, 10, 64)
+func (t *lfsTransfer) Verify(oid string, args transfer.Args) (transfer.Status, error) {
+	expectedSize, err := transfer.SizeFromArgs(args)
 	if err != nil {
-		t.logger.Errorf("invalid size argument: %v", err)
-		return transfer.NewFailureStatus(transfer.StatusBadRequest, "invalid size argument"), nil
+		return transfer.NewStatus(transfer.StatusBadRequest, "missing size"), nil // nolint: nilerr
 	}
 
 	obj, err := t.store.GetLFSObjectByOid(t.ctx, t.dbx, t.repo.ID(), oid)
 	if err != nil {
 		if errors.Is(err, db.ErrRecordNotFound) {
-			return transfer.NewFailureStatus(transfer.StatusNotFound, "object not found"), nil
+			return transfer.NewStatus(transfer.StatusNotFound, "object not found"), nil
 		}
 		t.logger.Errorf("error getting object: %v", err)
 		return nil, err
@@ -245,7 +226,7 @@ func (t *lfsTransfer) Verify(oid string, args map[string]string) (transfer.Statu
 
 	if obj.Size != expectedSize {
 		t.logger.Errorf("size mismatch: %d != %d", obj.Size, expectedSize)
-		return transfer.NewFailureStatus(transfer.StatusConflict, "size mismatch"), nil
+		return transfer.NewStatus(transfer.StatusConflict, "size mismatch"), nil
 	}
 
 	return transfer.SuccessStatus(), nil
@@ -260,7 +241,7 @@ type lfsLockBackend struct {
 var _ transfer.LockBackend = (*lfsLockBackend)(nil)
 
 // LockBackend implements transfer.Backend.
-func (t *lfsTransfer) LockBackend(args map[string]string) transfer.LockBackend {
+func (t *lfsTransfer) LockBackend(args transfer.Args) transfer.LockBackend {
 	user := proto.UserFromContext(t.ctx)
 	if user == nil {
 		t.logger.Errorf("no user in context while creating lock backend, repo %s", t.repo.Name())

pkg/git/lfs_log.go 🔗

@@ -0,0 +1,17 @@
+package git
+
+import (
+	"github.com/charmbracelet/git-lfs-transfer/transfer"
+	"github.com/charmbracelet/log"
+)
+
+type lfsLogger struct {
+	l *log.Logger
+}
+
+var _ transfer.Logger = &lfsLogger{}
+
+// Log implements transfer.Logger.
+func (l *lfsLogger) Log(msg string, kv ...interface{}) {
+	l.l.Debug(msg, kv...)
+}

pkg/lfs/pointer.go 🔗

@@ -102,12 +102,13 @@ func (p Pointer) String() string {
 }
 
 // RelativePath returns the relative storage path of the pointer
+// https://github.com/git-lfs/git-lfs/blob/main/docs/spec.md#intercepting-git
 func (p Pointer) RelativePath() string {
 	if len(p.Oid) < 5 {
 		return p.Oid
 	}
 
-	return path.Join(p.Oid[0:2], p.Oid[2:4], p.Oid[4:])
+	return path.Join(p.Oid[0:2], p.Oid[2:4], p.Oid)
 }
 
 // GeneratePointer generates a pointer for arbitrary content

pkg/lfs/pointer_test.go 🔗

@@ -2,8 +2,8 @@ package lfs
 
 import (
 	"errors"
+	"path"
 	"strconv"
-	"strings"
 	"testing"
 )
 
@@ -78,8 +78,8 @@ size abc
 					t.Errorf("Expected a valid pointer")
 					return
 				}
-				if p.Oid != strings.ReplaceAll(p.RelativePath(), "/", "") {
-					t.Errorf("Expected oid to be the relative path without slashes")
+				if path.Join(p.Oid[:2], p.Oid[2:4], p.Oid) != p.RelativePath() {
+					t.Errorf("Expected a valid relative path")
 					return
 				}
 			}