1package lfs
2
3import (
4 "bufio"
5 "bytes"
6 "context"
7 "fmt"
8 "io"
9 "strconv"
10 "strings"
11 "sync"
12
13 gitm "github.com/aymanbagabas/git-module"
14 "github.com/charmbracelet/soft-serve/git"
15)
16
17// SearchPointerBlobs scans the whole repository for LFS pointer files.
18func SearchPointerBlobs(ctx context.Context, repo *git.Repository, pointerChan chan<- PointerBlob, errChan chan<- error) {
19 basePath := repo.Path
20
21 catFileCheckReader, catFileCheckWriter := io.Pipe()
22 shasToBatchReader, shasToBatchWriter := io.Pipe()
23 catFileBatchReader, catFileBatchWriter := io.Pipe()
24
25 wg := sync.WaitGroup{}
26 wg.Add(6)
27
28 // Create the go-routines in reverse order.
29
30 // 4. Take the output of cat-file --batch and check if each file in turn
31 // to see if they're pointers to files in the LFS store
32 go createPointerResultsFromCatFileBatch(ctx, catFileBatchReader, &wg, pointerChan)
33
34 // 3. Take the shas of the blobs and batch read them
35 go catFileBatch(ctx, shasToBatchReader, catFileBatchWriter, &wg, basePath)
36
37 // 2. From the provided objects restrict to blobs <=1k
38 go blobsLessThan1024FromCatFileBatchCheck(catFileCheckReader, shasToBatchWriter, &wg)
39
40 // 1. Run batch-check on all objects in the repository
41 revListReader, revListWriter := io.Pipe()
42 shasToCheckReader, shasToCheckWriter := io.Pipe()
43 go catFileBatchCheck(ctx, shasToCheckReader, catFileCheckWriter, &wg, basePath)
44 go blobsFromRevListObjects(revListReader, shasToCheckWriter, &wg)
45 go revListAllObjects(ctx, revListWriter, &wg, basePath, errChan)
46 wg.Wait()
47
48 close(pointerChan)
49 close(errChan)
50}
51
52func createPointerResultsFromCatFileBatch(ctx context.Context, catFileBatchReader *io.PipeReader, wg *sync.WaitGroup, pointerChan chan<- PointerBlob) {
53 defer wg.Done()
54 defer catFileBatchReader.Close() //nolint: errcheck
55
56 bufferedReader := bufio.NewReader(catFileBatchReader)
57 buf := make([]byte, 1025)
58
59loop:
60 for {
61 select {
62 case <-ctx.Done():
63 break loop
64 default:
65 }
66
67 // File descriptor line: sha
68 sha, err := bufferedReader.ReadString(' ')
69 if err != nil {
70 _ = catFileBatchReader.CloseWithError(err)
71 break
72 }
73 sha = strings.TrimSpace(sha)
74 // Throw away the blob
75 if _, err := bufferedReader.ReadString(' '); err != nil {
76 _ = catFileBatchReader.CloseWithError(err)
77 break
78 }
79 sizeStr, err := bufferedReader.ReadString('\n')
80 if err != nil {
81 _ = catFileBatchReader.CloseWithError(err)
82 break
83 }
84 size, err := strconv.Atoi(sizeStr[:len(sizeStr)-1])
85 if err != nil {
86 _ = catFileBatchReader.CloseWithError(err)
87 break
88 }
89 pointerBuf := buf[:size+1]
90 if _, err := io.ReadFull(bufferedReader, pointerBuf); err != nil {
91 _ = catFileBatchReader.CloseWithError(err)
92 break
93 }
94 pointerBuf = pointerBuf[:size]
95 // Now we need to check if the pointerBuf is an LFS pointer
96 pointer, _ := ReadPointerFromBuffer(pointerBuf)
97 if !pointer.IsValid() {
98 continue
99 }
100
101 pointerChan <- PointerBlob{Hash: sha, Pointer: pointer}
102 }
103}
104
105func catFileBatch(ctx context.Context, shasToBatchReader *io.PipeReader, catFileBatchWriter *io.PipeWriter, wg *sync.WaitGroup, basePath string) {
106 defer wg.Done()
107 defer shasToBatchReader.Close() //nolint: errcheck
108 defer catFileBatchWriter.Close() //nolint: errcheck
109
110 stderr := new(bytes.Buffer)
111 var errbuf strings.Builder
112 if err := gitm.NewCommandWithContext(ctx, "cat-file", "--batch").
113 WithTimeout(-1).
114 RunInDirWithOptions(basePath, gitm.RunInDirOptions{
115 Stdout: catFileBatchWriter,
116 Stdin: shasToBatchReader,
117 Stderr: stderr,
118 }); err != nil {
119 _ = shasToBatchReader.CloseWithError(fmt.Errorf("git rev-list [%s]: %w - %s", basePath, err, errbuf.String()))
120 }
121}
122
123func blobsLessThan1024FromCatFileBatchCheck(catFileCheckReader *io.PipeReader, shasToBatchWriter *io.PipeWriter, wg *sync.WaitGroup) {
124 defer wg.Done()
125 defer catFileCheckReader.Close() //nolint: errcheck
126 scanner := bufio.NewScanner(catFileCheckReader)
127 defer func() {
128 _ = shasToBatchWriter.CloseWithError(scanner.Err())
129 }()
130 for scanner.Scan() {
131 line := scanner.Text()
132 if len(line) == 0 {
133 continue
134 }
135 fields := strings.Split(line, " ")
136 if len(fields) < 3 || fields[1] != "blob" {
137 continue
138 }
139 size, _ := strconv.Atoi(fields[2])
140 if size > 1024 {
141 continue
142 }
143 toWrite := []byte(fields[0] + "\n")
144 for len(toWrite) > 0 {
145 n, err := shasToBatchWriter.Write(toWrite)
146 if err != nil {
147 _ = catFileCheckReader.CloseWithError(err)
148 break
149 }
150 toWrite = toWrite[n:]
151 }
152 }
153}
154
155func catFileBatchCheck(ctx context.Context, shasToCheckReader *io.PipeReader, catFileCheckWriter *io.PipeWriter, wg *sync.WaitGroup, basePath string) {
156 defer wg.Done()
157 defer shasToCheckReader.Close() //nolint: errcheck
158 defer catFileCheckWriter.Close() //nolint: errcheck
159
160 stderr := new(bytes.Buffer)
161 var errbuf strings.Builder
162 if err := gitm.NewCommandWithContext(ctx, "cat-file", "--batch-check").
163 WithTimeout(-1).
164 RunInDirWithOptions(basePath, gitm.RunInDirOptions{
165 Stdout: catFileCheckWriter,
166 Stdin: shasToCheckReader,
167 Stderr: stderr,
168 }); err != nil {
169 _ = shasToCheckReader.CloseWithError(fmt.Errorf("git rev-list [%s]: %w - %s", basePath, err, errbuf.String()))
170 }
171}
172
173func blobsFromRevListObjects(revListReader *io.PipeReader, shasToCheckWriter *io.PipeWriter, wg *sync.WaitGroup) {
174 defer wg.Done()
175 defer revListReader.Close() //nolint: errcheck
176 scanner := bufio.NewScanner(revListReader)
177 defer func() {
178 _ = shasToCheckWriter.CloseWithError(scanner.Err())
179 }()
180
181 for scanner.Scan() {
182 line := scanner.Text()
183 if len(line) == 0 {
184 continue
185 }
186 fields := strings.Split(line, " ")
187 if len(fields) < 2 || len(fields[1]) == 0 {
188 continue
189 }
190 toWrite := []byte(fields[0] + "\n")
191 for len(toWrite) > 0 {
192 n, err := shasToCheckWriter.Write(toWrite)
193 if err != nil {
194 _ = revListReader.CloseWithError(err)
195 break
196 }
197 toWrite = toWrite[n:]
198 }
199 }
200}
201
202func revListAllObjects(ctx context.Context, revListWriter *io.PipeWriter, wg *sync.WaitGroup, basePath string, errChan chan<- error) {
203 defer wg.Done()
204 defer revListWriter.Close() //nolint: errcheck
205
206 stderr := new(bytes.Buffer)
207 var errbuf strings.Builder
208 if err := gitm.NewCommandWithContext(ctx, "rev-list", "--objects", "--all").
209 WithTimeout(-1).
210 RunInDirWithOptions(basePath, gitm.RunInDirOptions{
211 Stdout: revListWriter,
212 Stderr: stderr,
213 }); err != nil {
214 errChan <- fmt.Errorf("git rev-list [%s]: %w - %s", basePath, err, errbuf.String())
215 }
216}