1package lfs
  2
  3import (
  4	"bufio"
  5	"bytes"
  6	"context"
  7	"fmt"
  8	"io"
  9	"strconv"
 10	"strings"
 11	"sync"
 12
 13	"github.com/charmbracelet/soft-serve/git"
 14	gitm "github.com/gogs/git-module"
 15)
 16
 17// SearchPointerBlobs scans the whole repository for LFS pointer files
 18func SearchPointerBlobs(ctx context.Context, repo *git.Repository, pointerChan chan<- PointerBlob, errChan chan<- error) {
 19	basePath := repo.Path
 20
 21	catFileCheckReader, catFileCheckWriter := io.Pipe()
 22	shasToBatchReader, shasToBatchWriter := io.Pipe()
 23	catFileBatchReader, catFileBatchWriter := io.Pipe()
 24
 25	wg := sync.WaitGroup{}
 26	wg.Add(6)
 27
 28	// Create the go-routines in reverse order.
 29
 30	// 4. Take the output of cat-file --batch and check if each file in turn
 31	// to see if they're pointers to files in the LFS store
 32	go createPointerResultsFromCatFileBatch(ctx, catFileBatchReader, &wg, pointerChan)
 33
 34	// 3. Take the shas of the blobs and batch read them
 35	go catFileBatch(ctx, shasToBatchReader, catFileBatchWriter, &wg, basePath)
 36
 37	// 2. From the provided objects restrict to blobs <=1k
 38	go blobsLessThan1024FromCatFileBatchCheck(catFileCheckReader, shasToBatchWriter, &wg)
 39
 40	// 1. Run batch-check on all objects in the repository
 41	revListReader, revListWriter := io.Pipe()
 42	shasToCheckReader, shasToCheckWriter := io.Pipe()
 43	go catFileBatchCheck(ctx, shasToCheckReader, catFileCheckWriter, &wg, basePath)
 44	go blobsFromRevListObjects(revListReader, shasToCheckWriter, &wg)
 45	go revListAllObjects(ctx, revListWriter, &wg, basePath, errChan)
 46	wg.Wait()
 47
 48	close(pointerChan)
 49	close(errChan)
 50}
 51
 52func createPointerResultsFromCatFileBatch(ctx context.Context, catFileBatchReader *io.PipeReader, wg *sync.WaitGroup, pointerChan chan<- PointerBlob) {
 53	defer wg.Done()
 54	defer catFileBatchReader.Close() // nolint: errcheck
 55
 56	bufferedReader := bufio.NewReader(catFileBatchReader)
 57	buf := make([]byte, 1025)
 58
 59loop:
 60	for {
 61		select {
 62		case <-ctx.Done():
 63			break loop
 64		default:
 65		}
 66
 67		// File descriptor line: sha
 68		sha, err := bufferedReader.ReadString(' ')
 69		if err != nil {
 70			_ = catFileBatchReader.CloseWithError(err)
 71			break
 72		}
 73		sha = strings.TrimSpace(sha)
 74		// Throw away the blob
 75		if _, err := bufferedReader.ReadString(' '); err != nil {
 76			_ = catFileBatchReader.CloseWithError(err)
 77			break
 78		}
 79		sizeStr, err := bufferedReader.ReadString('\n')
 80		if err != nil {
 81			_ = catFileBatchReader.CloseWithError(err)
 82			break
 83		}
 84		size, err := strconv.Atoi(sizeStr[:len(sizeStr)-1])
 85		if err != nil {
 86			_ = catFileBatchReader.CloseWithError(err)
 87			break
 88		}
 89		pointerBuf := buf[:size+1]
 90		if _, err := io.ReadFull(bufferedReader, pointerBuf); err != nil {
 91			_ = catFileBatchReader.CloseWithError(err)
 92			break
 93		}
 94		pointerBuf = pointerBuf[:size]
 95		// Now we need to check if the pointerBuf is an LFS pointer
 96		pointer, _ := ReadPointerFromBuffer(pointerBuf)
 97		if !pointer.IsValid() {
 98			continue
 99		}
100
101		pointerChan <- PointerBlob{Hash: sha, Pointer: pointer}
102	}
103}
104
105func catFileBatch(ctx context.Context, shasToBatchReader *io.PipeReader, catFileBatchWriter *io.PipeWriter, wg *sync.WaitGroup, basePath string) {
106	defer wg.Done()
107	defer shasToBatchReader.Close()  // nolint: errcheck
108	defer catFileBatchWriter.Close() // nolint: errcheck
109
110	stderr := new(bytes.Buffer)
111	var errbuf strings.Builder
112	if err := gitm.NewCommandWithContext(ctx, "cat-file", "--batch").RunInDirWithOptions(basePath, gitm.RunInDirOptions{
113		Stdout: catFileBatchWriter,
114		Stdin:  shasToBatchReader,
115		Stderr: stderr,
116	}); err != nil {
117		_ = shasToBatchReader.CloseWithError(fmt.Errorf("git rev-list [%s]: %w - %s", basePath, err, errbuf.String()))
118	}
119}
120
121func blobsLessThan1024FromCatFileBatchCheck(catFileCheckReader *io.PipeReader, shasToBatchWriter *io.PipeWriter, wg *sync.WaitGroup) {
122	defer wg.Done()
123	defer catFileCheckReader.Close() // nolint: errcheck
124	scanner := bufio.NewScanner(catFileCheckReader)
125	defer func() {
126		_ = shasToBatchWriter.CloseWithError(scanner.Err())
127	}()
128	for scanner.Scan() {
129		line := scanner.Text()
130		if len(line) == 0 {
131			continue
132		}
133		fields := strings.Split(line, " ")
134		if len(fields) < 3 || fields[1] != "blob" {
135			continue
136		}
137		size, _ := strconv.Atoi(fields[2])
138		if size > 1024 {
139			continue
140		}
141		toWrite := []byte(fields[0] + "\n")
142		for len(toWrite) > 0 {
143			n, err := shasToBatchWriter.Write(toWrite)
144			if err != nil {
145				_ = catFileCheckReader.CloseWithError(err)
146				break
147			}
148			toWrite = toWrite[n:]
149		}
150	}
151}
152
153func catFileBatchCheck(ctx context.Context, shasToCheckReader *io.PipeReader, catFileCheckWriter *io.PipeWriter, wg *sync.WaitGroup, basePath string) {
154	defer wg.Done()
155	defer shasToCheckReader.Close()  // nolint: errcheck
156	defer catFileCheckWriter.Close() // nolint: errcheck
157
158	stderr := new(bytes.Buffer)
159	var errbuf strings.Builder
160	if err := gitm.NewCommandWithContext(ctx, "cat-file", "--batch-check").RunInDirWithOptions(basePath, gitm.RunInDirOptions{
161		Stdout: catFileCheckWriter,
162		Stdin:  shasToCheckReader,
163		Stderr: stderr,
164	}); err != nil {
165		_ = shasToCheckReader.CloseWithError(fmt.Errorf("git rev-list [%s]: %w - %s", basePath, err, errbuf.String()))
166	}
167}
168
169func blobsFromRevListObjects(revListReader *io.PipeReader, shasToCheckWriter *io.PipeWriter, wg *sync.WaitGroup) {
170	defer wg.Done()
171	defer revListReader.Close() // nolint: errcheck
172	scanner := bufio.NewScanner(revListReader)
173	defer func() {
174		_ = shasToCheckWriter.CloseWithError(scanner.Err())
175	}()
176
177	for scanner.Scan() {
178		line := scanner.Text()
179		if len(line) == 0 {
180			continue
181		}
182		fields := strings.Split(line, " ")
183		if len(fields) < 2 || len(fields[1]) == 0 {
184			continue
185		}
186		toWrite := []byte(fields[0] + "\n")
187		for len(toWrite) > 0 {
188			n, err := shasToCheckWriter.Write(toWrite)
189			if err != nil {
190				_ = revListReader.CloseWithError(err)
191				break
192			}
193			toWrite = toWrite[n:]
194		}
195	}
196}
197
198func revListAllObjects(ctx context.Context, revListWriter *io.PipeWriter, wg *sync.WaitGroup, basePath string, errChan chan<- error) {
199	defer wg.Done()
200	defer revListWriter.Close() // nolint: errcheck
201
202	stderr := new(bytes.Buffer)
203	var errbuf strings.Builder
204	if err := gitm.NewCommandWithContext(ctx, "rev-list", "--objects", "--all").RunInDirWithOptions(basePath, gitm.RunInDirOptions{
205		Stdout: revListWriter,
206		Stderr: stderr,
207	}); err != nil {
208		errChan <- fmt.Errorf("git rev-list [%s]: %w - %s", basePath, err, errbuf.String())
209	}
210}