Skip to content

Commit ce14ee8

Browse files
committed
feat: simplify pipeline model
For each path we determine the list of formatters that are interested in formatting it. From there, we sort the list of formatters first by priority (lower value, higher priority) and then by name (lexicographically). With this information we create a batch key which is based on the unique sequence of formatters. When enough paths with the same sequence is ready we apply them in order to each formatter. By default, with no special configuration, this model guarantees that a given path will only be processed by one formatter at a time. If a user wishes to influence the order in which formatters are applied they can use the priority field. Signed-off-by: Brian McGee <[email protected]>
1 parent 6ce3d27 commit ce14ee8

12 files changed

+147
-229
lines changed

cli/format.go

+64-88
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,11 @@ const (
3030
)
3131

3232
var (
33-
globalExcludes []glob.Glob
34-
formatters map[string]*format.Formatter
35-
pipelines map[string]*format.Pipeline
36-
filesCh chan *walk.File
37-
processedCh chan *walk.File
33+
excludes []glob.Glob
34+
formatters map[string]*format.Formatter
35+
36+
filesCh chan *walk.File
37+
processedCh chan *walk.File
3838

3939
ErrFailOnChange = errors.New("unexpected changes detected, --fail-on-change is enabled")
4040
)
@@ -73,45 +73,25 @@ func (f *Format) Run() (err error) {
7373
}
7474

7575
// compile global exclude globs
76-
if globalExcludes, err = format.CompileGlobs(cfg.Global.Excludes); err != nil {
76+
if excludes, err = format.CompileGlobs(cfg.Global.Excludes); err != nil {
7777
return fmt.Errorf("failed to compile global excludes: %w", err)
7878
}
7979

80-
// initialise pipelines
81-
pipelines = make(map[string]*format.Pipeline)
80+
// initialise formatters
8281
formatters = make(map[string]*format.Formatter)
8382

84-
// iterate the formatters in lexicographical order
85-
for _, name := range cfg.Names {
86-
// init formatter
87-
formatterCfg := cfg.Formatters[name]
88-
formatter, err := format.NewFormatter(name, Cli.TreeRoot, formatterCfg, globalExcludes)
83+
for name, formatterCfg := range cfg.Formatters {
84+
formatter, err := format.NewFormatter(name, Cli.TreeRoot, formatterCfg, excludes)
85+
8986
if errors.Is(err, format.ErrCommandNotFound) && Cli.AllowMissingFormatter {
90-
log.Debugf("formatter not found: %v", name)
87+
log.Debugf("formatter command not found: %v", name)
9188
continue
9289
} else if err != nil {
9390
return fmt.Errorf("%w: failed to initialise formatter: %v", err, name)
9491
}
9592

9693
// store formatter by name
9794
formatters[name] = formatter
98-
99-
// If no pipeline is configured, we add the formatter to a nominal pipeline of size 1 with the key being the
100-
// formatter's name. If a pipeline is configured, we add the formatter to a pipeline keyed by
101-
// 'p:<pipeline_name>' in which it is sorted by priority.
102-
if formatterCfg.Pipeline == "" {
103-
pipeline := format.Pipeline{}
104-
pipeline.Add(formatter)
105-
pipelines[name] = &pipeline
106-
} else {
107-
key := fmt.Sprintf("p:%s", formatterCfg.Pipeline)
108-
pipeline, ok := pipelines[key]
109-
if !ok {
110-
pipeline = &format.Pipeline{}
111-
pipelines[key] = pipeline
112-
}
113-
pipeline.Add(formatter)
114-
}
11595
}
11696

11797
// open the cache
@@ -304,37 +284,43 @@ func walkFilesystem(ctx context.Context) func() error {
304284
func applyFormatters(ctx context.Context) func() error {
305285
// create our own errgroup for concurrent formatting tasks
306286
fg, ctx := errgroup.WithContext(ctx)
287+
// simple optimization to avoid too many concurrent formatting tasks
288+
// we can queue them up faster than the formatters can process them, this paces things a bit
289+
fg.SetLimit(runtime.NumCPU())
307290

308-
// pre-initialise batches keyed by pipeline
309-
batches := make(map[string][]*walk.File)
310-
for key := range pipelines {
311-
batches[key] = make([]*walk.File, 0, BatchSize)
312-
}
313-
314-
// for a given pipeline key, add the provided file to the current batch and trigger a format if the batch size has
315-
// been reached
316-
tryApply := func(key string, file *walk.File) {
317-
// append to batch
318-
batches[key] = append(batches[key], file)
291+
// track batches of formatting task based on their batch keys, which are determined by the unique sequence of
292+
// formatters which should be applied to their respective files
293+
batches := make(map[string][]*format.Task)
319294

320-
// check if the batch is full
295+
apply := func(key string, flush bool) {
296+
// lookup the batch and exit early if it's empty
321297
batch := batches[key]
322-
if len(batch) == BatchSize {
323-
// get the pipeline
324-
pipeline := pipelines[key]
298+
if len(batch) == 0 {
299+
return
300+
}
301+
302+
// process the batch if it's full, or we've been asked to flush partial batches
303+
if flush || len(batch) == BatchSize {
325304

326-
// copy the batch
327-
files := make([]*walk.File, len(batch))
328-
copy(files, batch)
305+
// copy the batch as we re-use it for the next batch
306+
tasks := make([]*format.Task, len(batch))
307+
copy(tasks, batch)
329308

330-
// apply to the pipeline
309+
// asynchronously apply the sequence formatters to the batch
331310
fg.Go(func() error {
332-
if err := pipeline.Apply(ctx, files); err != nil {
333-
return err
311+
// iterate the formatters, applying them in sequence to the batch of tasks
312+
// we get the formatters list from the first task since they have all the same formatters list
313+
for _, f := range tasks[0].Formatters {
314+
if err := f.Apply(ctx, tasks); err != nil {
315+
return err
316+
}
334317
}
335-
for _, path := range files {
336-
processedCh <- path
318+
319+
// pass each file to the processed channel
320+
for _, task := range tasks {
321+
processedCh <- task.File
337322
}
323+
338324
return nil
339325
})
340326

@@ -343,25 +329,12 @@ func applyFormatters(ctx context.Context) func() error {
343329
}
344330
}
345331

346-
// format any partial batches
347-
flushBatches := func() {
348-
for key, pipeline := range pipelines {
349-
350-
batch := batches[key]
351-
pipeline := pipeline // capture for closure
352-
353-
if len(batch) > 0 {
354-
fg.Go(func() error {
355-
if err := pipeline.Apply(ctx, batch); err != nil {
356-
return fmt.Errorf("%s failure: %w", key, err)
357-
}
358-
for _, path := range batch {
359-
processedCh <- path
360-
}
361-
return nil
362-
})
363-
}
364-
}
332+
tryApply := func(task *format.Task) {
333+
// append to batch
334+
key := task.BatchKey
335+
batches[key] = append(batches[key], task)
336+
// try to apply
337+
apply(key, false)
365338
}
366339

367340
return func() error {
@@ -370,35 +343,38 @@ func applyFormatters(ctx context.Context) func() error {
370343
close(processedCh)
371344
}()
372345

373-
// iterate the files channel, checking if any pipeline wants it, and attempting to apply if so.
346+
// iterate the files channel
374347
for file := range filesCh {
375-
var matches []string
376348

377-
for key, pipeline := range pipelines {
378-
if !pipeline.Wants(file) {
379-
continue
349+
// determine a list of formatters that are interested in file
350+
var matches []*format.Formatter
351+
for _, formatter := range formatters {
352+
if formatter.Wants(file) {
353+
matches = append(matches, formatter)
380354
}
381-
matches = append(matches, key)
382-
tryApply(key, file)
383355
}
384-
switch len(matches) {
385-
case 0:
386-
log.Debugf("no match found: %s", file.Path)
356+
357+
if len(matches) == 0 {
387358
// no match, so we send it direct to the processed channel
359+
log.Debugf("no match found: %s", file.Path)
388360
processedCh <- file
389-
case 1:
361+
} else {
362+
// record the match
390363
stats.Add(stats.Matched, 1)
391-
default:
392-
return fmt.Errorf("path '%s' matched multiple formatters/pipelines %v", file.Path, matches)
364+
// create a new format task, add it to a batch based on its batch key and try to apply if the batch is full
365+
task := format.NewTask(file, matches)
366+
tryApply(&task)
393367
}
394368
}
395369

396370
// flush any partial batches which remain
397-
flushBatches()
371+
for key := range batches {
372+
apply(key, true)
373+
}
398374

399375
// wait for all outstanding formatting tasks to complete
400376
if err := fg.Wait(); err != nil {
401-
return fmt.Errorf("pipeline processing failure: %w", err)
377+
return fmt.Errorf("formatting failure: %w", err)
402378
}
403379
return nil
404380
}

cli/format_test.go

+3-36
Original file line numberDiff line numberDiff line change
@@ -196,37 +196,6 @@ func TestIncludesAndExcludes(t *testing.T) {
196196
assertStats(t, as, 31, 31, 2, 0)
197197
}
198198

199-
func TestMatchingMultiplePipelines(t *testing.T) {
200-
as := require.New(t)
201-
202-
tempDir := test.TempExamples(t)
203-
configPath := tempDir + "/multiple.toml"
204-
205-
cfg := config2.Config{
206-
Formatters: map[string]*config2.Formatter{
207-
"echo": {
208-
Command: "echo",
209-
Includes: []string{"*"},
210-
},
211-
"touch": {
212-
Command: "touch",
213-
Includes: []string{"*"},
214-
},
215-
},
216-
}
217-
218-
test.WriteConfig(t, configPath, cfg)
219-
_, err := cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir)
220-
as.ErrorContains(err, "matched multiple formatters/pipelines")
221-
as.ErrorContains(err, "echo")
222-
as.ErrorContains(err, "touch")
223-
224-
// run with only one formatter
225-
test.WriteConfig(t, configPath, cfg)
226-
_, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "-f", "echo")
227-
as.NoError(err)
228-
}
229-
230199
func TestCache(t *testing.T) {
231200
as := require.New(t)
232201

@@ -604,25 +573,23 @@ func TestDeterministicOrderingInPipeline(t *testing.T) {
604573

605574
test.WriteConfig(t, configPath, config2.Config{
606575
Formatters: map[string]*config2.Formatter{
607-
// a and b should execute in lexicographical order as they have default priority 0, with c last since it has
608-
// priority 1
576+
// a and b have no priority set, which means they default to 0 and should execute first
577+
// a and b should execute in lexicographical order
578+
// c should execute first since it has a priority of 1
609579
"fmt-a": {
610580
Command: "test-fmt",
611581
Options: []string{"fmt-a"},
612582
Includes: []string{"*.py"},
613-
Pipeline: "foo",
614583
},
615584
"fmt-b": {
616585
Command: "test-fmt",
617586
Options: []string{"fmt-b"},
618587
Includes: []string{"*.py"},
619-
Pipeline: "foo",
620588
},
621589
"fmt-c": {
622590
Command: "test-fmt",
623591
Options: []string{"fmt-c"},
624592
Includes: []string{"*.py"},
625-
Pipeline: "foo",
626593
Priority: 1,
627594
},
628595
},

config/config.go

-9
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ package config
22

33
import (
44
"fmt"
5-
"sort"
65

76
"github.com/BurntSushi/toml"
87
)
@@ -13,7 +12,6 @@ type Config struct {
1312
// Excludes is an optional list of glob patterns used to exclude certain files from all formatters.
1413
Excludes []string
1514
}
16-
Names []string `toml:"-"`
1715
Formatters map[string]*Formatter `toml:"formatter"`
1816
}
1917

@@ -40,12 +38,5 @@ func ReadFile(path string, names []string) (cfg *Config, err error) {
4038
cfg.Formatters = filtered
4139
}
4240

43-
// sort the formatter names so that, as we construct pipelines, we add formatters in a determinstic fashion. This
44-
// ensures a deterministic order even when all priority values are the same e.g. 0
45-
for name := range cfg.Formatters {
46-
cfg.Names = append(cfg.Names, name)
47-
}
48-
sort.Strings(cfg.Names)
49-
5041
return
5142
}

config/config_test.go

-2
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@ func TestReadConfigFile(t *testing.T) {
5959
as.Nil(alejandra.Options)
6060
as.Equal([]string{"*.nix"}, alejandra.Includes)
6161
as.Equal([]string{"examples/nix/sources.nix"}, alejandra.Excludes)
62-
as.Equal("nix", alejandra.Pipeline)
6362
as.Equal(1, alejandra.Priority)
6463

6564
// deadnix
@@ -69,7 +68,6 @@ func TestReadConfigFile(t *testing.T) {
6968
as.Nil(deadnix.Options)
7069
as.Equal([]string{"*.nix"}, deadnix.Includes)
7170
as.Nil(deadnix.Excludes)
72-
as.Equal("nix", deadnix.Pipeline)
7371
as.Equal(2, deadnix.Priority)
7472

7573
// ruby

config/formatter.go

+1-3
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@ type Formatter struct {
99
Includes []string
1010
// Excludes is an optional list of glob patterns used to exclude certain files from this Formatter.
1111
Excludes []string
12-
// Indicates this formatter should be executed as part of a group of formatters all sharing the same pipeline key.
13-
Pipeline string
14-
// Indicates the order of precedence when executing as part of a pipeline.
12+
// Indicates the order of precedence when executing this Formatter in a sequence of Formatters.
1513
Priority int
1614
}

docs/configure.md

+17-9
Original file line numberDiff line numberDiff line change
@@ -23,19 +23,19 @@ includes = ["*.go"]
2323
command = "black"
2424
includes = ["*.py"]
2525

26+
# use the priority field to control the order of execution
27+
2628
# run shellcheck first
2729
[formatter.shellcheck]
2830
command = "shellcheck"
2931
includes = ["*.sh"]
30-
pipeline = "sh"
31-
priority = 0
32+
priority = 0 # default is 0, but we set it here for clarity
3233

3334
# shfmt second
3435
[formatter.shfmt]
3536
command = "shfmt"
3637
options = ["-s", "-w"]
3738
includes = ["*.sh"]
38-
pipeline = "sh"
3939
priority = 1
4040
```
4141

@@ -49,13 +49,21 @@ priority = 1
4949
- `options` - an optional list of args to be passed to `command`.
5050
- `includes` - a list of glob patterns used to determine whether the formatter should be applied against a given path.
5151
- `excludes` - an optional list of glob patterns used to exclude certain files from this formatter.
52-
- `pipeline` - an optional key used to group related formatters together, ensuring they are executed sequentially
53-
against a given path.
54-
- `priority` - indicates the order of execution when this formatter is operating as part of a pipeline. Greater
55-
precedence is given to lower numbers, with the default being `0`.
52+
- `priority` - influences the order of execution. Greater precedence is given to lower numbers, with the default being `0`.
53+
54+
## Same file, multiple formatters?
55+
56+
For each file, `treefmt` determines a list of formatters based on the configured `includes` / `excludes` rules. This list is
57+
then sorted, first by priority (lower the value, higher the precedence) and secondly by formatter name (lexicographically).
58+
59+
The resultant sequence of formatters is used to create a batch key, and similarly matched files get added to that batch
60+
until it is full, at which point the files are passed to each formatter in turn.
61+
62+
This means that `treefmt` **guarantees only one formatter will be operating on a given file at any point in time**.
63+
Another consequence is that formatting is deterministic for a given file and a given `treefmt` configuration.
5664

57-
> When two or more formatters in a pipeline have the same priority, they are executed in lexicographical order to
58-
> ensure deterministic behaviour over multiple executions.
65+
By setting the priority fields appropriately, you can control the order in which those formatters are applied for any
66+
files they _both happen to match on_.
5967

6068
## Supported Formatters
6169

0 commit comments

Comments
 (0)