Skip to content

Commit fcce518

Browse files
committed
feat: various perf improvements
Signed-off-by: Brian McGee <[email protected]>
1 parent 6ae0e4f commit fcce518

File tree

8 files changed

+96
-36
lines changed

8 files changed

+96
-36
lines changed

cache/cache.go

+32-11
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ import (
77
"fmt"
88
"io/fs"
99
"os"
10+
"path/filepath"
11+
"runtime"
1012
"time"
1113

1214
"git.numtide.com/numtide/treefmt/format"
@@ -22,8 +24,6 @@ import (
2224
const (
2325
pathsBucket = "paths"
2426
formattersBucket = "formatters"
25-
26-
readBatchSize = 1024
2727
)
2828

2929
// Entry represents a cache entry, indicating the last size and modified time for a file path.
@@ -32,15 +32,19 @@ type Entry struct {
3232
Modified time.Time
3333
}
3434

35-
var db *bolt.DB
35+
var (
36+
db *bolt.DB
37+
ReadBatchSize = 1024 * runtime.NumCPU()
38+
logger *log.Logger
39+
)
3640

3741
// Open creates an instance of bolt.DB for a given treeRoot path.
3842
// If clean is true, Open will delete any existing data in the cache.
3943
//
4044
// The database will be located in `XDG_CACHE_DIR/treefmt/eval-cache/<id>.db`, where <id> is determined by hashing
4145
// the treeRoot path. This associates a given treeRoot with a given instance of the cache.
4246
func Open(treeRoot string, clean bool, formatters map[string]*format.Formatter) (err error) {
43-
l := log.WithPrefix("cache")
47+
logger = log.WithPrefix("cache")
4448

4549
// determine a unique and consistent db name for the tree root
4650
h := sha1.New()
@@ -85,7 +89,7 @@ func Open(treeRoot string, clean bool, formatters map[string]*format.Formatter)
8589
}
8690

8791
clean = clean || entry == nil || !(entry.Size == stat.Size() && entry.Modified == stat.ModTime())
88-
l.Debug(
92+
logger.Debug(
8993
"checking if formatter has changed",
9094
"name", name,
9195
"clean", clean,
@@ -174,6 +178,12 @@ func putEntry(bucket *bolt.Bucket, path string, entry *Entry) error {
174178
// ChangeSet is used to walk a filesystem, starting at root, and outputting any new or changed paths using pathsCh.
175179
// It determines if a path is new or has changed by comparing against cache entries.
176180
func ChangeSet(ctx context.Context, walker walk.Walker, pathsCh chan<- string) error {
181+
start := time.Now()
182+
183+
defer func() {
184+
logger.Infof("finished generating change set in %v", time.Since(start))
185+
}()
186+
177187
var tx *bolt.Tx
178188
var bucket *bolt.Bucket
179189
var processed int
@@ -185,6 +195,9 @@ func ChangeSet(ctx context.Context, walker walk.Walker, pathsCh chan<- string) e
185195
}
186196
}()
187197

198+
// for quick removal of tree root from paths
199+
relPathOffset := len(walker.Root()) + 1
200+
188201
return walker.Walk(ctx, func(path string, info fs.FileInfo, err error) error {
189202
select {
190203
case <-ctx.Done():
@@ -213,7 +226,8 @@ func ChangeSet(ctx context.Context, walker walk.Walker, pathsCh chan<- string) e
213226
bucket = tx.Bucket([]byte(pathsBucket))
214227
}
215228

216-
cached, err := getEntry(bucket, path)
229+
relPath := path[relPathOffset:]
230+
cached, err := getEntry(bucket, relPath)
217231
if err != nil {
218232
return err
219233
}
@@ -230,21 +244,28 @@ func ChangeSet(ctx context.Context, walker walk.Walker, pathsCh chan<- string) e
230244
case <-ctx.Done():
231245
return ctx.Err()
232246
default:
233-
pathsCh <- path
247+
pathsCh <- relPath
234248
}
235249

236250
// close the current tx if we have reached the batch size
237251
processed += 1
238-
if processed == readBatchSize {
239-
return tx.Rollback()
252+
if processed == ReadBatchSize {
253+
err = tx.Rollback()
254+
tx = nil
255+
return err
240256
}
241257

242258
return nil
243259
})
244260
}
245261

246262
// Update is used to record updated cache information for the specified list of paths.
247-
func Update(paths []string) (int, error) {
263+
func Update(treeRoot string, paths []string) (int, error) {
264+
start := time.Now()
265+
defer func() {
266+
logger.Infof("finished updating %v paths in %v", len(paths), time.Since(start))
267+
}()
268+
248269
if len(paths) == 0 {
249270
return 0, nil
250271
}
@@ -260,7 +281,7 @@ func Update(paths []string) (int, error) {
260281
return err
261282
}
262283

263-
pathInfo, err := os.Stat(path)
284+
pathInfo, err := os.Stat(filepath.Join(treeRoot, path))
264285
if err != nil {
265286
return err
266287
}

cli/cli.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ type Format struct {
2727
}
2828

2929
func (f *Format) Configure() {
30-
log.SetReportTimestamp(false)
30+
log.SetReportTimestamp(true)
3131

3232
if f.Verbosity == 0 {
3333
log.SetLevel(log.WarnLevel)

cli/format.go

+20-5
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@ import (
88
"io/fs"
99
"os"
1010
"os/signal"
11+
"path/filepath"
12+
"runtime"
1113
"slices"
14+
"strings"
1215
"syscall"
1316
"time"
1417

@@ -83,7 +86,7 @@ func (f *Format) Run() (err error) {
8386

8487
// init formatters
8588
for name, formatterCfg := range cfg.Formatters {
86-
formatter, err := format.NewFormatter(name, formatterCfg, globalExcludes)
89+
formatter, err := format.NewFormatter(name, Cli.TreeRoot, formatterCfg, globalExcludes)
8790
if errors.Is(err, format.ErrCommandNotFound) && Cli.AllowMissingFormatter {
8891
l.Debugf("formatter not found: %v", name)
8992
continue
@@ -129,7 +132,7 @@ func (f *Format) Run() (err error) {
129132

130133
// create a channel for paths to be processed
131134
// we use a multiple of batch size here to allow for greater concurrency
132-
pathsCh = make(chan string, 10*BatchSize)
135+
pathsCh = make(chan string, BatchSize*runtime.NumCPU())
133136

134137
// create a channel for tracking paths that have been processed
135138
processedCh = make(chan string, cap(pathsCh))
@@ -148,10 +151,22 @@ func walkFilesystem(ctx context.Context) func() error {
148151
paths := Cli.Paths
149152

150153
if len(paths) == 0 && Cli.Stdin {
154+
155+
cwd, err := os.Getwd()
156+
if err != nil {
157+
return fmt.Errorf("%w: failed to determine current working directory", err)
158+
}
159+
151160
// read in all the paths
152161
scanner := bufio.NewScanner(os.Stdin)
153162
for scanner.Scan() {
154-
paths = append(paths, scanner.Text())
163+
path := scanner.Text()
164+
if !strings.HasPrefix(path, "/") {
165+
// append the cwd
166+
path = filepath.Join(cwd, path)
167+
}
168+
169+
paths = append(paths, path)
155170
}
156171
}
157172

@@ -194,7 +209,7 @@ func updateCache(ctx context.Context) func() error {
194209
if Cli.NoCache {
195210
changes += len(batch)
196211
} else {
197-
count, err := cache.Update(batch)
212+
count, err := cache.Update(Cli.TreeRoot, batch)
198213
if err != nil {
199214
return err
200215
}
@@ -278,7 +293,7 @@ func applyFormatters(ctx context.Context) func() error {
278293
if len(batch) > 0 {
279294
fg.Go(func() error {
280295
if err := pipeline.Apply(ctx, batch); err != nil {
281-
return fmt.Errorf("%w: pipeline failure, %s", err, key)
296+
return fmt.Errorf("%s failure: %w", key, err)
282297
}
283298
for _, path := range batch {
284299
processedCh <- path

cli/format_test.go

+14-14
Original file line numberDiff line numberDiff line change
@@ -108,23 +108,23 @@ func TestIncludesAndExcludes(t *testing.T) {
108108
test.WriteConfig(t, configPath, cfg)
109109
out, err := cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
110110
as.NoError(err)
111-
as.Contains(string(out), fmt.Sprintf("%d files changed", 30))
111+
as.Contains(string(out), fmt.Sprintf("%d files changed", 31))
112112

113113
// globally exclude nix files
114114
cfg.Global.Excludes = []string{"*.nix"}
115115

116116
test.WriteConfig(t, configPath, cfg)
117117
out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
118118
as.NoError(err)
119-
as.Contains(string(out), fmt.Sprintf("%d files changed", 29))
119+
as.Contains(string(out), fmt.Sprintf("%d files changed", 30))
120120

121121
// add haskell files to the global exclude
122122
cfg.Global.Excludes = []string{"*.nix", "*.hs"}
123123

124124
test.WriteConfig(t, configPath, cfg)
125125
out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
126126
as.NoError(err)
127-
as.Contains(string(out), fmt.Sprintf("%d files changed", 23))
127+
as.Contains(string(out), fmt.Sprintf("%d files changed", 24))
128128

129129
echo := cfg.Formatters["echo"]
130130

@@ -134,15 +134,15 @@ func TestIncludesAndExcludes(t *testing.T) {
134134
test.WriteConfig(t, configPath, cfg)
135135
out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
136136
as.NoError(err)
137-
as.Contains(string(out), fmt.Sprintf("%d files changed", 21))
137+
as.Contains(string(out), fmt.Sprintf("%d files changed", 22))
138138

139139
// remove go files from the echo formatter
140140
echo.Excludes = []string{"*.py", "*.go"}
141141

142142
test.WriteConfig(t, configPath, cfg)
143143
out, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
144144
as.NoError(err)
145-
as.Contains(string(out), fmt.Sprintf("%d files changed", 20))
145+
as.Contains(string(out), fmt.Sprintf("%d files changed", 21))
146146

147147
// adjust the includes for echo to only include elm files
148148
echo.Includes = []string{"*.elm"}
@@ -180,7 +180,7 @@ func TestCache(t *testing.T) {
180180
test.WriteConfig(t, configPath, cfg)
181181
out, err := cmd(t, "--config-file", configPath, "--tree-root", tempDir)
182182
as.NoError(err)
183-
as.Contains(string(out), fmt.Sprintf("%d files changed", 30))
183+
as.Contains(string(out), fmt.Sprintf("%d files changed", 31))
184184

185185
out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir)
186186
as.NoError(err)
@@ -189,7 +189,7 @@ func TestCache(t *testing.T) {
189189
// clear cache
190190
out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir, "-c")
191191
as.NoError(err)
192-
as.Contains(string(out), fmt.Sprintf("%d files changed", 30))
192+
as.Contains(string(out), fmt.Sprintf("%d files changed", 31))
193193

194194
out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir)
195195
as.NoError(err)
@@ -198,7 +198,7 @@ func TestCache(t *testing.T) {
198198
// clear cache
199199
out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir, "-c")
200200
as.NoError(err)
201-
as.Contains(string(out), fmt.Sprintf("%d files changed", 30))
201+
as.Contains(string(out), fmt.Sprintf("%d files changed", 31))
202202

203203
out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir)
204204
as.NoError(err)
@@ -207,7 +207,7 @@ func TestCache(t *testing.T) {
207207
// no cache
208208
out, err = cmd(t, "--config-file", configPath, "--tree-root", tempDir, "--no-cache")
209209
as.NoError(err)
210-
as.Contains(string(out), fmt.Sprintf("%d files changed", 30))
210+
as.Contains(string(out), fmt.Sprintf("%d files changed", 31))
211211
}
212212

213213
func TestChangeWorkingDirectory(t *testing.T) {
@@ -241,7 +241,7 @@ func TestChangeWorkingDirectory(t *testing.T) {
241241
// this should fail if the working directory hasn't been changed first
242242
out, err := cmd(t, "-C", tempDir)
243243
as.NoError(err)
244-
as.Contains(string(out), fmt.Sprintf("%d files changed", 30))
244+
as.Contains(string(out), fmt.Sprintf("%d files changed", 31))
245245
}
246246

247247
func TestFailOnChange(t *testing.T) {
@@ -418,16 +418,16 @@ func TestGitWorktree(t *testing.T) {
418418
// add everything to the worktree
419419
as.NoError(wt.AddGlob("."))
420420
as.NoError(err)
421-
run(30)
421+
run(31)
422422

423423
// remove python directory
424424
as.NoError(wt.RemoveGlob("python/*"))
425-
run(27)
425+
run(28)
426426

427427
// walk with filesystem instead of git
428428
out, err := cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--walk", "filesystem")
429429
as.NoError(err)
430-
as.Contains(string(out), fmt.Sprintf("%d files changed", 57))
430+
as.Contains(string(out), fmt.Sprintf("%d files changed", 59))
431431
}
432432

433433
func TestPathsArg(t *testing.T) {
@@ -462,7 +462,7 @@ func TestPathsArg(t *testing.T) {
462462
// without any path args
463463
out, err := cmd(t, "-C", tempDir)
464464
as.NoError(err)
465-
as.Contains(string(out), fmt.Sprintf("%d files changed", 30))
465+
as.Contains(string(out), fmt.Sprintf("%d files changed", 31))
466466

467467
// specify some explicit paths
468468
out, err = cmd(t, "-C", tempDir, "-c", "elm/elm.json", "haskell/Nested/Foo.hs")

format/formatter.go

+14-5
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"errors"
66
"fmt"
7+
"os"
78
"os/exec"
89
"time"
910

@@ -23,6 +24,7 @@ type Formatter struct {
2324

2425
log *log.Logger
2526
executable string // path to the executable described by Command
27+
workingDir string
2628

2729
// internal compiled versions of Includes and Excludes.
2830
includes []glob.Glob
@@ -37,6 +39,8 @@ func (f *Formatter) Executable() string {
3739
}
3840

3941
func (f *Formatter) Apply(ctx context.Context, paths []string, filter bool) error {
42+
start := time.Now()
43+
4044
// construct args, starting with config
4145
args := f.config.Options
4246

@@ -45,7 +49,7 @@ func (f *Formatter) Apply(ctx context.Context, paths []string, filter bool) erro
4549
// files in a pipeline.
4650
if filter {
4751
// reset the batch
48-
f.batch = f.batch[:]
52+
f.batch = f.batch[:0]
4953

5054
// filter paths
5155
for _, path := range paths {
@@ -72,15 +76,18 @@ func (f *Formatter) Apply(ctx context.Context, paths []string, filter bool) erro
7276
}
7377

7478
// execute the command
75-
start := time.Now()
7679
cmd := exec.CommandContext(ctx, f.config.Command, args...)
80+
cmd.Dir = f.workingDir
7781

7882
if out, err := cmd.CombinedOutput(); err != nil {
79-
f.log.Debugf("\n%v", string(out))
80-
// todo log output
81-
return err
83+
if len(out) > 0 {
84+
_, _ = fmt.Fprintf(os.Stderr, "%s error:\n%s\n", f.name, out)
85+
}
86+
return fmt.Errorf("%w: formatter %s failed to apply", err, f.name)
8287
}
8388

89+
//
90+
8491
f.log.Infof("%v files processed in %v", len(paths), time.Now().Sub(start))
8592

8693
return nil
@@ -99,6 +106,7 @@ func (f *Formatter) Wants(path string) bool {
99106
// NewFormatter is used to create a new Formatter.
100107
func NewFormatter(
101108
name string,
109+
treeRoot string,
102110
config *config.Formatter,
103111
globalExcludes []glob.Glob,
104112
) (*Formatter, error) {
@@ -109,6 +117,7 @@ func NewFormatter(
109117
// capture config and the formatter's name
110118
f.name = name
111119
f.config = config
120+
f.workingDir = treeRoot
112121

113122
// test if the formatter is available
114123
executable, err := exec.LookPath(config.Command)

0 commit comments

Comments
 (0)