Skip to content

Commit 404b0a9

Browse files
author
Brian McGee
committed
Merge pull request 'improve caching and tracking' (#34) from feat/improve-caching into main
Reviewed-on: https://git.numtide.com/numtide/treefmt/pulls/34
2 parents 618f6f7 + ed10f97 commit 404b0a9

16 files changed

+383
-296
lines changed

cache/cache.go

+34-52
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,7 @@ import (
55
"crypto/sha1"
66
"encoding/hex"
77
"fmt"
8-
"io/fs"
98
"os"
10-
"path/filepath"
119
"runtime"
1210
"time"
1311

@@ -57,38 +55,38 @@ func Open(treeRoot string, clean bool, formatters map[string]*format.Formatter)
5755
name := hex.EncodeToString(digest)
5856
path, err := xdg.CacheFile(fmt.Sprintf("treefmt/eval-cache/%v.db", name))
5957
if err != nil {
60-
return fmt.Errorf("%w: could not resolve local path for the cache", err)
58+
return fmt.Errorf("could not resolve local path for the cache: %w", err)
6159
}
6260

6361
db, err = bolt.Open(path, 0o600, nil)
6462
if err != nil {
65-
return fmt.Errorf("%w: failed to open cache", err)
63+
return fmt.Errorf("failed to open cache at %v: %w", path, err)
6664
}
6765

6866
err = db.Update(func(tx *bolt.Tx) error {
6967
// create bucket for tracking paths
7068
pathsBucket, err := tx.CreateBucketIfNotExists([]byte(pathsBucket))
7169
if err != nil {
72-
return fmt.Errorf("%w: failed to create paths bucket", err)
70+
return fmt.Errorf("failed to create paths bucket: %w", err)
7371
}
7472

7573
// create bucket for tracking formatters
7674
formattersBucket, err := tx.CreateBucketIfNotExists([]byte(formattersBucket))
7775
if err != nil {
78-
return fmt.Errorf("%w: failed to create formatters bucket", err)
76+
return fmt.Errorf("failed to create formatters bucket: %w", err)
7977
}
8078

8179
// check for any newly configured or modified formatters
8280
for name, formatter := range formatters {
8381

8482
stat, err := os.Lstat(formatter.Executable())
8583
if err != nil {
86-
return fmt.Errorf("%w: failed to state formatter executable", err)
84+
return fmt.Errorf("failed to stat formatter executable %v: %w", formatter.Executable(), err)
8785
}
8886

8987
entry, err := getEntry(formattersBucket, name)
9088
if err != nil {
91-
return fmt.Errorf("%w: failed to retrieve entry for formatter", err)
89+
return fmt.Errorf("failed to retrieve cache entry for formatter %v: %w", name, err)
9290
}
9391

9492
clean = clean || entry == nil || !(entry.Size == stat.Size() && entry.Modified == stat.ModTime())
@@ -107,7 +105,7 @@ func Open(treeRoot string, clean bool, formatters map[string]*format.Formatter)
107105
}
108106

109107
if err = putEntry(formattersBucket, name, entry); err != nil {
110-
return fmt.Errorf("%w: failed to write formatter entry", err)
108+
return fmt.Errorf("failed to write cache entry for formatter %v: %w", name, err)
111109
}
112110
}
113111

@@ -117,22 +115,22 @@ func Open(treeRoot string, clean bool, formatters map[string]*format.Formatter)
117115
if !ok {
118116
// remove the formatter entry from the cache
119117
if err = formattersBucket.Delete(key); err != nil {
120-
return fmt.Errorf("%w: failed to remove formatter entry", err)
118+
return fmt.Errorf("failed to remove cache entry for formatter %v: %w", key, err)
121119
}
122120
// indicate a clean is required
123121
clean = true
124122
}
125123
return nil
126124
}); err != nil {
127-
return fmt.Errorf("%w: failed to check for removed formatters", err)
125+
return fmt.Errorf("failed to check cache for removed formatters: %w", err)
128126
}
129127

130128
if clean {
131129
// remove all path entries
132130
c := pathsBucket.Cursor()
133131
for k, v := c.First(); !(k == nil && v == nil); k, v = c.Next() {
134132
if err = c.Delete(); err != nil {
135-
return fmt.Errorf("%w: failed to remove path entry", err)
133+
return fmt.Errorf("failed to remove path entry: %w", err)
136134
}
137135
}
138136
}
@@ -157,7 +155,7 @@ func getEntry(bucket *bolt.Bucket, path string) (*Entry, error) {
157155
if b != nil {
158156
var cached Entry
159157
if err := msgpack.Unmarshal(b, &cached); err != nil {
160-
return nil, fmt.Errorf("%w: failed to unmarshal cache info for path '%v'", err, path)
158+
return nil, fmt.Errorf("failed to unmarshal cache info for path '%v': %w", path, err)
161159
}
162160
return &cached, nil
163161
} else {
@@ -169,18 +167,18 @@ func getEntry(bucket *bolt.Bucket, path string) (*Entry, error) {
169167
func putEntry(bucket *bolt.Bucket, path string, entry *Entry) error {
170168
bytes, err := msgpack.Marshal(entry)
171169
if err != nil {
172-
return fmt.Errorf("%w: failed to marshal cache entry", err)
170+
return fmt.Errorf("failed to marshal cache path %v: %w", path, err)
173171
}
174172

175173
if err = bucket.Put([]byte(path), bytes); err != nil {
176-
return fmt.Errorf("%w: failed to put cache entry", err)
174+
return fmt.Errorf("failed to put cache path %v: %w", path, err)
177175
}
178176
return nil
179177
}
180178

181179
// ChangeSet is used to walk a filesystem, starting at root, and outputting any new or changed paths using pathsCh.
182180
// It determines if a path is new or has changed by comparing against cache entries.
183-
func ChangeSet(ctx context.Context, walker walk.Walker, pathsCh chan<- string) error {
181+
func ChangeSet(ctx context.Context, walker walk.Walker, filesCh chan<- *walk.File) error {
184182
start := time.Now()
185183

186184
defer func() {
@@ -198,24 +196,21 @@ func ChangeSet(ctx context.Context, walker walk.Walker, pathsCh chan<- string) e
198196
}
199197
}()
200198

201-
// for quick removal of tree root from paths
202-
relPathOffset := len(walker.Root()) + 1
203-
204-
return walker.Walk(ctx, func(path string, info fs.FileInfo, err error) error {
199+
return walker.Walk(ctx, func(file *walk.File, err error) error {
205200
select {
206201
case <-ctx.Done():
207202
return ctx.Err()
208203
default:
209204
if err != nil {
210-
return fmt.Errorf("%w: failed to walk path", err)
211-
} else if info.IsDir() {
205+
return fmt.Errorf("failed to walk path: %w", err)
206+
} else if file.Info.IsDir() {
212207
// ignore directories
213208
return nil
214209
}
215210
}
216211

217212
// ignore symlinks
218-
if info.Mode()&os.ModeSymlink == os.ModeSymlink {
213+
if file.Info.Mode()&os.ModeSymlink == os.ModeSymlink {
219214
return nil
220215
}
221216

@@ -224,18 +219,17 @@ func ChangeSet(ctx context.Context, walker walk.Walker, pathsCh chan<- string) e
224219
if tx == nil {
225220
tx, err = db.Begin(false)
226221
if err != nil {
227-
return fmt.Errorf("%w: failed to open a new read tx", err)
222+
return fmt.Errorf("failed to open a new cache read tx: %w", err)
228223
}
229224
bucket = tx.Bucket([]byte(pathsBucket))
230225
}
231226

232-
relPath := path[relPathOffset:]
233-
cached, err := getEntry(bucket, relPath)
227+
cached, err := getEntry(bucket, file.RelPath)
234228
if err != nil {
235229
return err
236230
}
237231

238-
changedOrNew := cached == nil || !(cached.Modified == info.ModTime() && cached.Size == info.Size())
232+
changedOrNew := cached == nil || !(cached.Modified == file.Info.ModTime() && cached.Size == file.Info.Size())
239233

240234
stats.Add(stats.Traversed, 1)
241235
if !changedOrNew {
@@ -250,7 +244,7 @@ func ChangeSet(ctx context.Context, walker walk.Walker, pathsCh chan<- string) e
250244
case <-ctx.Done():
251245
return ctx.Err()
252246
default:
253-
pathsCh <- relPath
247+
filesCh <- file
254248
}
255249

256250
// close the current tx if we have reached the batch size
@@ -266,47 +260,35 @@ func ChangeSet(ctx context.Context, walker walk.Walker, pathsCh chan<- string) e
266260
}
267261

268262
// Update is used to record updated cache information for the specified list of paths.
269-
func Update(treeRoot string, paths []string) (int, error) {
263+
func Update(files []*walk.File) error {
270264
start := time.Now()
271265
defer func() {
272-
logger.Infof("finished updating %v paths in %v", len(paths), time.Since(start))
266+
logger.Infof("finished processing %v paths in %v", len(files), time.Since(start))
273267
}()
274268

275-
if len(paths) == 0 {
276-
return 0, nil
269+
if len(files) == 0 {
270+
return nil
277271
}
278272

279-
var changes int
280-
281-
return changes, db.Update(func(tx *bolt.Tx) error {
273+
return db.Update(func(tx *bolt.Tx) error {
282274
bucket := tx.Bucket([]byte(pathsBucket))
283275

284-
for _, path := range paths {
285-
cached, err := getEntry(bucket, path)
286-
if err != nil {
287-
return err
288-
}
289-
290-
pathInfo, err := os.Stat(filepath.Join(treeRoot, path))
276+
for _, f := range files {
277+
currentInfo, err := os.Stat(f.Path)
291278
if err != nil {
292279
return err
293280
}
294281

295-
if cached == nil || !(cached.Modified == pathInfo.ModTime() && cached.Size == pathInfo.Size()) {
296-
changes += 1
297-
} else {
298-
// no change to write
299-
continue
282+
if !(f.Info.ModTime() == currentInfo.ModTime() && f.Info.Size() == currentInfo.Size()) {
283+
stats.Add(stats.Formatted, 1)
300284
}
301285

302-
stats.Add(stats.Formatted, 1)
303-
304286
entry := Entry{
305-
Size: pathInfo.Size(),
306-
Modified: pathInfo.ModTime(),
287+
Size: currentInfo.Size(),
288+
Modified: currentInfo.ModTime(),
307289
}
308290

309-
if err = putEntry(bucket, path, &entry); err != nil {
291+
if err = putEntry(bucket, f.RelPath, &entry); err != nil {
310292
return err
311293
}
312294
}

cli/cli.go

+4-4
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,14 @@ type Format struct {
2626
Stdin bool `help:"Format the context passed in via stdin"`
2727
}
2828

29-
func (f *Format) Configure() {
29+
func ConfigureLogging() {
3030
log.SetReportTimestamp(false)
3131

32-
if f.Verbosity == 0 {
32+
if Cli.Verbosity == 0 {
3333
log.SetLevel(log.WarnLevel)
34-
} else if f.Verbosity == 1 {
34+
} else if Cli.Verbosity == 1 {
3535
log.SetLevel(log.InfoLevel)
36-
} else if f.Verbosity > 1 {
36+
} else if Cli.Verbosity > 1 {
3737
log.SetLevel(log.DebugLevel)
3838
}
3939
}

0 commit comments

Comments
 (0)