1package lfs
2
3import (
4 "bufio"
5 "bytes"
6 "context"
7 "fmt"
8 "io"
9 "strconv"
10 "strings"
11 "sync"
12
13 "github.com/charmbracelet/soft-serve/git"
14 gitm "github.com/gogs/git-module"
15)
16
17// SearchPointerBlobs scans the whole repository for LFS pointer files
18func SearchPointerBlobs(ctx context.Context, repo *git.Repository, pointerChan chan<- PointerBlob, errChan chan<- error) {
19 basePath := repo.Path
20
21 catFileCheckReader, catFileCheckWriter := io.Pipe()
22 shasToBatchReader, shasToBatchWriter := io.Pipe()
23 catFileBatchReader, catFileBatchWriter := io.Pipe()
24
25 wg := sync.WaitGroup{}
26 wg.Add(6)
27
28 // Create the go-routines in reverse order.
29
30 // 4. Take the output of cat-file --batch and check if each file in turn
31 // to see if they're pointers to files in the LFS store
32 go createPointerResultsFromCatFileBatch(ctx, catFileBatchReader, &wg, pointerChan)
33
34 // 3. Take the shas of the blobs and batch read them
35 go catFileBatch(ctx, shasToBatchReader, catFileBatchWriter, &wg, basePath)
36
37 // 2. From the provided objects restrict to blobs <=1k
38 go blobsLessThan1024FromCatFileBatchCheck(catFileCheckReader, shasToBatchWriter, &wg)
39
40 // 1. Run batch-check on all objects in the repository
41 revListReader, revListWriter := io.Pipe()
42 shasToCheckReader, shasToCheckWriter := io.Pipe()
43 go catFileBatchCheck(ctx, shasToCheckReader, catFileCheckWriter, &wg, basePath)
44 go blobsFromRevListObjects(revListReader, shasToCheckWriter, &wg)
45 go revListAllObjects(ctx, revListWriter, &wg, basePath, errChan)
46 wg.Wait()
47
48 close(pointerChan)
49 close(errChan)
50}
51
52func createPointerResultsFromCatFileBatch(ctx context.Context, catFileBatchReader *io.PipeReader, wg *sync.WaitGroup, pointerChan chan<- PointerBlob) {
53 defer wg.Done()
54 defer catFileBatchReader.Close() // nolint: errcheck
55
56 bufferedReader := bufio.NewReader(catFileBatchReader)
57 buf := make([]byte, 1025)
58
59loop:
60 for {
61 select {
62 case <-ctx.Done():
63 break loop
64 default:
65 }
66
67 // File descriptor line: sha
68 sha, err := bufferedReader.ReadString(' ')
69 if err != nil {
70 _ = catFileBatchReader.CloseWithError(err)
71 break
72 }
73 sha = strings.TrimSpace(sha)
74 // Throw away the blob
75 if _, err := bufferedReader.ReadString(' '); err != nil {
76 _ = catFileBatchReader.CloseWithError(err)
77 break
78 }
79 sizeStr, err := bufferedReader.ReadString('\n')
80 if err != nil {
81 _ = catFileBatchReader.CloseWithError(err)
82 break
83 }
84 size, err := strconv.Atoi(sizeStr[:len(sizeStr)-1])
85 if err != nil {
86 _ = catFileBatchReader.CloseWithError(err)
87 break
88 }
89 pointerBuf := buf[:size+1]
90 if _, err := io.ReadFull(bufferedReader, pointerBuf); err != nil {
91 _ = catFileBatchReader.CloseWithError(err)
92 break
93 }
94 pointerBuf = pointerBuf[:size]
95 // Now we need to check if the pointerBuf is an LFS pointer
96 pointer, _ := ReadPointerFromBuffer(pointerBuf)
97 if !pointer.IsValid() {
98 continue
99 }
100
101 pointerChan <- PointerBlob{Hash: sha, Pointer: pointer}
102 }
103}
104
105func catFileBatch(ctx context.Context, shasToBatchReader *io.PipeReader, catFileBatchWriter *io.PipeWriter, wg *sync.WaitGroup, basePath string) {
106 defer wg.Done()
107 defer shasToBatchReader.Close() // nolint: errcheck
108 defer catFileBatchWriter.Close() // nolint: errcheck
109
110 stderr := new(bytes.Buffer)
111 var errbuf strings.Builder
112 if err := gitm.NewCommandWithContext(ctx, "cat-file", "--batch").RunInDirWithOptions(basePath, gitm.RunInDirOptions{
113 Stdout: catFileBatchWriter,
114 Stdin: shasToBatchReader,
115 Stderr: stderr,
116 }); err != nil {
117 _ = shasToBatchReader.CloseWithError(fmt.Errorf("git rev-list [%s]: %w - %s", basePath, err, errbuf.String()))
118 }
119}
120
121func blobsLessThan1024FromCatFileBatchCheck(catFileCheckReader *io.PipeReader, shasToBatchWriter *io.PipeWriter, wg *sync.WaitGroup) {
122 defer wg.Done()
123 defer catFileCheckReader.Close() // nolint: errcheck
124 scanner := bufio.NewScanner(catFileCheckReader)
125 defer func() {
126 _ = shasToBatchWriter.CloseWithError(scanner.Err())
127 }()
128 for scanner.Scan() {
129 line := scanner.Text()
130 if len(line) == 0 {
131 continue
132 }
133 fields := strings.Split(line, " ")
134 if len(fields) < 3 || fields[1] != "blob" {
135 continue
136 }
137 size, _ := strconv.Atoi(fields[2])
138 if size > 1024 {
139 continue
140 }
141 toWrite := []byte(fields[0] + "\n")
142 for len(toWrite) > 0 {
143 n, err := shasToBatchWriter.Write(toWrite)
144 if err != nil {
145 _ = catFileCheckReader.CloseWithError(err)
146 break
147 }
148 toWrite = toWrite[n:]
149 }
150 }
151}
152
153func catFileBatchCheck(ctx context.Context, shasToCheckReader *io.PipeReader, catFileCheckWriter *io.PipeWriter, wg *sync.WaitGroup, basePath string) {
154 defer wg.Done()
155 defer shasToCheckReader.Close() // nolint: errcheck
156 defer catFileCheckWriter.Close() // nolint: errcheck
157
158 stderr := new(bytes.Buffer)
159 var errbuf strings.Builder
160 if err := gitm.NewCommandWithContext(ctx, "cat-file", "--batch-check").RunInDirWithOptions(basePath, gitm.RunInDirOptions{
161 Stdout: catFileCheckWriter,
162 Stdin: shasToCheckReader,
163 Stderr: stderr,
164 }); err != nil {
165 _ = shasToCheckReader.CloseWithError(fmt.Errorf("git rev-list [%s]: %w - %s", basePath, err, errbuf.String()))
166 }
167}
168
169func blobsFromRevListObjects(revListReader *io.PipeReader, shasToCheckWriter *io.PipeWriter, wg *sync.WaitGroup) {
170 defer wg.Done()
171 defer revListReader.Close() // nolint: errcheck
172 scanner := bufio.NewScanner(revListReader)
173 defer func() {
174 _ = shasToCheckWriter.CloseWithError(scanner.Err())
175 }()
176
177 for scanner.Scan() {
178 line := scanner.Text()
179 if len(line) == 0 {
180 continue
181 }
182 fields := strings.Split(line, " ")
183 if len(fields) < 2 || len(fields[1]) == 0 {
184 continue
185 }
186 toWrite := []byte(fields[0] + "\n")
187 for len(toWrite) > 0 {
188 n, err := shasToCheckWriter.Write(toWrite)
189 if err != nil {
190 _ = revListReader.CloseWithError(err)
191 break
192 }
193 toWrite = toWrite[n:]
194 }
195 }
196}
197
198func revListAllObjects(ctx context.Context, revListWriter *io.PipeWriter, wg *sync.WaitGroup, basePath string, errChan chan<- error) {
199 defer wg.Done()
200 defer revListWriter.Close() // nolint: errcheck
201
202 stderr := new(bytes.Buffer)
203 var errbuf strings.Builder
204 if err := gitm.NewCommandWithContext(ctx, "rev-list", "--objects", "--all").RunInDirWithOptions(basePath, gitm.RunInDirOptions{
205 Stdout: revListWriter,
206 Stderr: stderr,
207 }); err != nil {
208 errChan <- fmt.Errorf("git rev-list [%s]: %w - %s", basePath, err, errbuf.String())
209 }
210}