forked from numtide/treefmt
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwalk.go
301 lines (249 loc) · 7.8 KB
/
walk.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
package walk
import (
"context"
"crypto/md5" //nolint:gosec
"errors"
"fmt"
"io"
"io/fs"
"os"
"path/filepath"
"github.com/numtide/treefmt/v2/stats"
bolt "go.etcd.io/bbolt"
)
//nolint:recvcheck
//go:generate enumer -type=Type -text -transform=snake -output=./type_enum.go
type Type int
const (
Auto Type = iota
Stdin
Filesystem
Git
BatchSize = 1024
)
type ReleaseFunc func(ctx context.Context) error
// File represents a file object with its path, relative path, file info, and potential cache entry.
type File struct {
Path string
RelPath string
Info fs.FileInfo
// FormattedInfo is the result of os.stat after formatting the file.
FormattedInfo fs.FileInfo
// FormattersSignature represents the sequence of formatters and their config that was applied to this file.
FormattersSignature []byte
// CachedFormatSignature is the last FormatSignature generated for this file, retrieved from the cache.
CachedFormatSignature []byte
releaseFuncs []ReleaseFunc
}
func formatSignature(formattersSig []byte, info fs.FileInfo) []byte {
h := md5.New() //nolint:gosec
h.Write(formattersSig)
// add mod time and size
h.Write([]byte(fmt.Sprintf("%v %v", info.ModTime().Unix(), info.Size())))
return h.Sum(nil)
}
// FormatSignature takes the file's info from when it was traversed and appends it to formattersSig, generating
// a unique format signature which encapsulates the sequence of formatters that were applied to this file and the
// outcome.
func (f *File) FormatSignature(formattersSig []byte) ([]byte, error) {
if f.Info == nil {
return nil, errors.New("file has no info")
}
return formatSignature(formattersSig, f.Info), nil
}
// NewFormatSignature takes the file's info after being formatted and appends it to FormattersSignature, generating
// a unique format signature which encapsulates the sequence of formatters that were applied to this file and the
// outcome.
func (f *File) NewFormatSignature() ([]byte, error) {
info := f.FormattedInfo // we start by assuming the file was formatted
if info == nil {
// if it wasn't, we fall back to the original file info from when it was first read
info = f.Info
}
if info == nil {
// ensure info is not nil
return nil, errors.New("file has no info")
} else if f.FormattersSignature == nil {
// ensure we have a formatters signature
return nil, errors.New("file has no formatters signature")
}
return formatSignature(f.FormattersSignature, info), nil
}
// Release calls all registered release functions for the File and returns an error if any function fails.
// Accepts a context which can be used to pass parameters to the release hooks.
func (f *File) Release(ctx context.Context) error {
for _, fn := range f.releaseFuncs {
if err := fn(ctx); err != nil {
return err
}
}
return nil
}
// AddReleaseFunc adds a release function to the File's list of release functions.
func (f *File) AddReleaseFunc(fn ReleaseFunc) {
f.releaseFuncs = append(f.releaseFuncs, fn)
}
// Stat checks if the file has changed by comparing its current state (size, mod time) to when it was first read.
// It returns a boolean indicating if the file has changed, the current file info, and an error if any.
func (f *File) Stat() (changed bool, info fs.FileInfo, err error) {
// Get the file's current state
current, err := os.Stat(f.Path)
if err != nil {
return false, nil, fmt.Errorf("failed to stat %s: %w", f.Path, err)
}
// Check the size first
if f.Info.Size() != current.Size() {
return true, current, nil
}
// POSIX specifies EPOCH time for Mod time, but some filesystems give more precision.
// Some formatters mess with the mod time (e.g. dos2unix) but not to the same precision,
// triggering false positives.
// We truncate everything below a second.
if f.Info.ModTime().Unix() != current.ModTime().Unix() {
return true, current, nil
}
return false, nil, nil
}
// String returns the file's path as a string.
func (f *File) String() string {
return f.Path
}
// Reader is an interface for reading files.
type Reader interface {
Read(ctx context.Context, files []*File) (n int, err error)
Close() error
}
// CompositeReader combines multiple Readers into one.
// It iterates over the given readers, reading each until completion.
type CompositeReader struct {
idx int
current Reader
readers []Reader
}
func (c *CompositeReader) Read(ctx context.Context, files []*File) (n int, err error) {
if c.current == nil {
// check if we have exhausted all the readers
if c.idx >= len(c.readers) {
return 0, io.EOF
}
// if not, select the next reader
c.current = c.readers[c.idx]
c.idx++
}
// attempt a read
n, err = c.current.Read(ctx, files)
// check if the current reader has been exhausted
if errors.Is(err, io.EOF) {
// reset the error if it's EOF
err = nil
// set the current reader to nil so we try to read from the next reader on the next call
c.current = nil
} else if err != nil {
err = fmt.Errorf("failed to read from current reader: %w", err)
}
// return the number of files read in this call and any error
return n, err
}
func (c *CompositeReader) Close() error {
for _, reader := range c.readers {
if err := reader.Close(); err != nil {
return fmt.Errorf("failed to close reader: %w", err)
}
}
return nil
}
//nolint:ireturn
func NewReader(
walkType Type,
root string,
path string,
db *bolt.DB,
statz *stats.Stats,
) (Reader, error) {
var (
err error
reader Reader
)
switch walkType {
case Auto:
// for now, we keep it simple and try git first, filesystem second
reader, err = NewReader(Git, root, path, db, statz)
if err != nil {
reader, err = NewReader(Filesystem, root, path, db, statz)
}
return reader, err
case Stdin:
return nil, errors.New("stdin walk type is not supported")
case Filesystem:
reader = NewFilesystemReader(root, path, statz, BatchSize)
case Git:
reader, err = NewGitReader(root, path, statz)
default:
return nil, fmt.Errorf("unknown walk type: %v", walkType)
}
if err != nil {
return nil, err
}
if db != nil {
// wrap with cached reader
// db will be null if --no-cache is enabled
reader, err = NewCachedReader(db, BatchSize, reader)
}
return reader, err
}
// NewCompositeReader returns a composite reader for the `root` and all `paths`. It
// never follows symlinks.
//
//nolint:ireturn
func NewCompositeReader(
walkType Type,
root string,
paths []string,
db *bolt.DB,
statz *stats.Stats,
) (Reader, error) {
// if not paths are provided we default to processing the tree root
if len(paths) == 0 {
return NewReader(walkType, root, "", db, statz)
}
readers := make([]Reader, len(paths))
// check we have received 1 path for the stdin walk type
if walkType == Stdin {
if len(paths) != 1 {
return nil, errors.New("stdin walk requires exactly one path")
}
return NewStdinReader(root, paths[0], statz), nil
}
// create a reader for each provided path
for idx, relPath := range paths {
var (
err error
info os.FileInfo
)
// create a clean absolute path
path := filepath.Clean(filepath.Join(root, relPath))
// check the path exists (don't follow symlinks)
info, err = os.Lstat(path)
if err != nil {
return nil, fmt.Errorf("failed to stat %s: %w", path, err)
}
switch {
case info.Mode()&os.ModeSymlink == os.ModeSymlink:
// for symlinks -> we ignore them since it does not make sense to follow them
// as normal files in the `root` will be picked up nevertheless.
continue
case info.IsDir():
// for directories, we honour the walk type as we traverse them
readers[idx], err = NewReader(walkType, root, relPath, db, statz)
default:
// for files, we enforce a simple filesystem read
readers[idx], err = NewReader(Filesystem, root, relPath, db, statz)
}
if err != nil {
return nil, fmt.Errorf("failed to create reader for %s: %w", relPath, err)
}
}
return &CompositeReader{
readers: readers,
}, nil
}