Skip to content

Commit 147dcdb

Browse files
committedSep 24, 2024··
fix: path handling and checking git index
- validates that all path arguments exist and are contained within the tree root - fixes a bug with git index checking, where we were ignoring directories Signed-off-by: Brian McGee <[email protected]>
1 parent 601b122 commit 147dcdb

File tree

4 files changed

+203
-50
lines changed

4 files changed

+203
-50
lines changed
 

‎cli/format.go

+25-3
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"path/filepath"
1111
"runtime"
1212
"runtime/pprof"
13+
"strings"
1314
"syscall"
1415
"time"
1516

@@ -35,6 +36,9 @@ func (f *Format) Run() (err error) {
3536
// set log level and other options
3637
f.configureLogging()
3738

39+
// initialise stats collection
40+
stats.Init()
41+
3842
// ci mode
3943
if f.Ci {
4044
f.NoCache = true
@@ -123,6 +127,27 @@ func (f *Format) Run() (err error) {
123127

124128
log.Debugf("config-file=%s tree-root=%s", f.ConfigFile, f.TreeRoot)
125129

130+
// ensure all path arguments exist and are contained within the tree root
131+
for _, path := range f.Paths {
132+
relPath, err := filepath.Rel(f.TreeRoot, path)
133+
if err != nil {
134+
return fmt.Errorf("failed to determine relative path for %s to the tree root %s: %w", path, f.TreeRoot, err)
135+
}
136+
if strings.Contains(relPath, "..") {
137+
return fmt.Errorf("path %s is outside the tree root %s", path, f.TreeRoot)
138+
}
139+
if f.Stdin {
140+
// skip checking if the file exists if we are processing from stdin
141+
// the file path is just used for matching against glob rules
142+
continue
143+
}
144+
// check the path exists
145+
_, err = os.Stat(path)
146+
if err != nil {
147+
return err
148+
}
149+
}
150+
126151
// read config
127152
cfg, err := config.ReadFile(f.ConfigFile, f.Formatters)
128153
if err != nil {
@@ -173,9 +198,6 @@ func (f *Format) Run() (err error) {
173198
cancel()
174199
}()
175200

176-
// initialise stats collection
177-
stats.Init()
178-
179201
// create an overall error group for executing high level tasks concurrently
180202
eg, ctx := errgroup.WithContext(ctx)
181203

‎cli/format_test.go

+42-1
Original file line numberDiff line numberDiff line change
@@ -539,6 +539,42 @@ func TestGitWorktree(t *testing.T) {
539539
_, err = cmd(t, "-c", "--config-file", configPath, "--tree-root", tempDir, "--walk", "filesystem")
540540
as.NoError(err)
541541
assertStats(t, as, 60, 60, 60, 0)
542+
543+
// capture current cwd, so we can replace it after the test is finished
544+
cwd, err := os.Getwd()
545+
as.NoError(err)
546+
547+
t.Cleanup(func() {
548+
// return to the previous working directory
549+
as.NoError(os.Chdir(cwd))
550+
})
551+
552+
// format specific sub paths
553+
_, err = cmd(t, "-C", tempDir, "-c", "go", "-vv")
554+
as.NoError(err)
555+
assertStats(t, as, 2, 2, 2, 0)
556+
557+
_, err = cmd(t, "-C", tempDir, "-c", "go", "haskell")
558+
as.NoError(err)
559+
assertStats(t, as, 9, 9, 9, 0)
560+
561+
_, err = cmd(t, "-C", tempDir, "-c", "go", "haskell", "ruby")
562+
as.NoError(err)
563+
assertStats(t, as, 10, 10, 10, 0)
564+
565+
// try with a bad path
566+
_, err = cmd(t, "-C", tempDir, "-c", "haskell", "foo")
567+
as.ErrorContains(err, fmt.Sprintf("stat %s: no such file or directory", filepath.Join(tempDir, "foo")))
568+
assertStats(t, as, 0, 0, 0, 0)
569+
570+
// try with a path not in the git index, e.g. it is skipped
571+
_, err = os.Create(filepath.Join(tempDir, "foo.txt"))
572+
as.NoError(err)
573+
assertStats(t, as, 0, 0, 0, 0)
574+
575+
_, err = cmd(t, "-C", tempDir, "-c", "foo.txt")
576+
as.NoError(err)
577+
assertStats(t, as, 0, 0, 0, 0)
542578
}
543579

544580
func TestPathsArg(t *testing.T) {
@@ -583,6 +619,11 @@ func TestPathsArg(t *testing.T) {
583619
// specify a bad path
584620
_, err = cmd(t, "-c", "elm/elm.json", "haskell/Nested/Bar.hs")
585621
as.ErrorContains(err, "no such file or directory")
622+
623+
// specify a path outside the tree root
624+
externalPath := filepath.Join(cwd, "go.mod")
625+
_, err = cmd(t, "-c", externalPath)
626+
as.ErrorContains(err, fmt.Sprintf("%s is outside the tree root %s", externalPath, tempDir))
586627
}
587628

588629
func TestStdIn(t *testing.T) {
@@ -752,7 +793,7 @@ func TestRunInSubdir(t *testing.T) {
752793
as.NoError(err)
753794
assertStats(t, as, 32, 32, 32, 0)
754795

755-
// specify some explicit paths, relative to the elm/ sub-folder
796+
// specify some explicit paths, relative to the tree root
756797
_, err = cmd(t, "-c", "elm.json", "../haskell/Nested/Foo.hs")
757798
as.NoError(err)
758799
assertStats(t, as, 2, 2, 2, 0)

‎walk/git.go

+105-46
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,71 @@ package walk
33
import (
44
"context"
55
"fmt"
6+
"github.com/charmbracelet/log"
7+
"github.com/go-git/go-git/v5/plumbing/filemode"
8+
"github.com/go-git/go-git/v5/plumbing/format/index"
69
"io/fs"
710
"os"
811
"path/filepath"
9-
10-
"github.com/charmbracelet/log"
12+
"strings"
1113

1214
"github.com/go-git/go-git/v5"
13-
"github.com/go-git/go-git/v5/plumbing/filemode"
1415
)
1516

17+
// fileTree represents a hierarchical file structure with directories and files.
18+
type fileTree struct {
19+
name string
20+
entries map[string]*fileTree
21+
}
22+
23+
// add inserts a file path into the fileTree structure, creating necessary parent directories if they do not exist.
24+
func (n *fileTree) add(path []string) {
25+
if len(path) == 0 {
26+
return
27+
} else if n.entries == nil {
28+
n.entries = make(map[string]*fileTree)
29+
}
30+
31+
name := path[0]
32+
child, ok := n.entries[name]
33+
if !ok {
34+
child = &fileTree{name: name}
35+
n.entries[name] = child
36+
}
37+
child.add(path[1:])
38+
}
39+
40+
// addPath splits the given path by the filepath separator and inserts it into the fileTree structure.
41+
func (n *fileTree) addPath(path string) {
42+
n.add(strings.Split(path, string(filepath.Separator)))
43+
}
44+
45+
// has returns true if the specified path exists in the fileTree, false otherwise.
46+
func (n *fileTree) has(path []string) bool {
47+
if len(path) == 0 {
48+
return true
49+
} else if len(n.entries) == 0 {
50+
return false
51+
}
52+
child, ok := n.entries[path[0]]
53+
if !ok {
54+
return false
55+
}
56+
return child.has(path[1:])
57+
}
58+
59+
// hasPath splits the given path by the filepath separator and checks if it exists in the fileTree.
60+
func (n *fileTree) hasPath(path string) bool {
61+
return n.has(strings.Split(path, string(filepath.Separator)))
62+
}
63+
64+
// readIndex traverses the index entries and adds each file path to the fileTree structure.
65+
func (n *fileTree) readIndex(idx *index.Index) {
66+
for _, entry := range idx.Entries {
67+
n.addPath(entry.Name)
68+
}
69+
}
70+
1671
type gitWalker struct {
1772
log *log.Logger
1873
root string
@@ -25,12 +80,7 @@ func (g gitWalker) Root() string {
2580
return g.root
2681
}
2782

28-
func (g gitWalker) relPath(path string) (string, error) {
29-
// quick optimization for the majority of use cases
30-
if len(path) >= g.relPathOffset && path[:len(g.root)] == g.root {
31-
return path[g.relPathOffset:], nil
32-
}
33-
// fallback to proper relative path resolution
83+
func (g gitWalker) relPath(path string) (string, error) { //
3484
return filepath.Rel(g.root, path)
3585
}
3686

@@ -40,12 +90,16 @@ func (g gitWalker) Walk(ctx context.Context, fn WalkFunc) error {
4090
return fmt.Errorf("failed to open git index: %w", err)
4191
}
4292

43-
// cache in-memory whether a path is present in the git index
44-
var cache map[string]bool
93+
// if we need to walk a path that is not the root of the repository, we will read the directory structure of the
94+
// git index into memory for faster lookups
95+
var cache *fileTree
4596

4697
for path := range g.paths {
4798

48-
if path == g.root {
99+
switch path {
100+
101+
case g.root:
102+
49103
// we can just iterate the index entries
50104
for _, entry := range idx.Entries {
51105
select {
@@ -86,51 +140,56 @@ func (g gitWalker) Walk(ctx context.Context, fn WalkFunc) error {
86140
}
87141
}
88142
}
89-
continue
90-
}
91143

92-
// otherwise we ensure the git index entries are cached and then check if they are in the git index
93-
if cache == nil {
94-
cache = make(map[string]bool)
95-
for _, entry := range idx.Entries {
96-
cache[entry.Name] = true
97-
}
98-
}
99-
100-
relPath, err := filepath.Rel(g.root, path)
101-
if err != nil {
102-
return fmt.Errorf("failed to find relative path for %v: %w", path, err)
103-
}
104-
105-
_, ok := cache[relPath]
106-
if !(path == g.root || ok) {
107-
log.Debugf("path %v not found in git index, skipping", path)
108-
continue
109-
}
144+
default:
110145

111-
return filepath.Walk(path, func(path string, info fs.FileInfo, _ error) error {
112-
if info.IsDir() {
113-
return nil
146+
// read the git index into memory if it hasn't already
147+
if cache == nil {
148+
cache = &fileTree{name: ""}
149+
cache.readIndex(idx)
114150
}
115151

152+
// git index entries are relative to the repository root, so we need to determine a relative path for the
153+
// one we are currently processing before checking if it exists within the git index
116154
relPath, err := g.relPath(path)
117155
if err != nil {
118-
return fmt.Errorf("failed to determine a relative path for %s: %w", path, err)
156+
return fmt.Errorf("failed to find root relative path for %v: %w", path, err)
119157
}
120158

121-
if _, ok := cache[relPath]; !ok {
122-
log.Debugf("path %v not found in git index, skipping", path)
123-
return nil
159+
if !cache.hasPath(relPath) {
160+
log.Debugf("path %s not found in git index, skipping", relPath)
161+
continue
124162
}
125163

126-
file := File{
127-
Path: path,
128-
RelPath: relPath,
129-
Info: info,
130-
}
164+
err = filepath.Walk(path, func(path string, info fs.FileInfo, _ error) error {
165+
// skip directories
166+
if info.IsDir() {
167+
return nil
168+
}
169+
170+
// determine a path relative to g.root before checking presence in the git index
171+
relPath, err := g.relPath(path)
172+
if err != nil {
173+
return fmt.Errorf("failed to determine a relative path for %s: %w", path, err)
174+
}
175+
176+
if !cache.hasPath(relPath) {
177+
log.Debugf("path %v not found in git index, skipping", relPath)
178+
return nil
179+
}
131180

132-
return fn(&file, err)
133-
})
181+
file := File{
182+
Path: path,
183+
RelPath: relPath,
184+
Info: info,
185+
}
186+
187+
return fn(&file, err)
188+
})
189+
if err != nil {
190+
return fmt.Errorf("failed to walk %s: %w", path, err)
191+
}
192+
}
134193
}
135194

136195
return nil

‎walk/git_test.go

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
package walk
2+
3+
import (
4+
"github.com/stretchr/testify/require"
5+
"testing"
6+
)
7+
8+
func TestFileTree(t *testing.T) {
9+
10+
as := require.New(t)
11+
12+
node := &fileTree{name: ""}
13+
node.addPath("foo/bar/baz")
14+
node.addPath("fizz/buzz")
15+
node.addPath("hello/world")
16+
node.addPath("foo/bar/fizz")
17+
node.addPath("foo/fizz/baz")
18+
19+
as.True(node.hasPath("foo"))
20+
as.True(node.hasPath("foo/bar"))
21+
as.True(node.hasPath("foo/bar/baz"))
22+
as.True(node.hasPath("fizz"))
23+
as.True(node.hasPath("fizz/buzz"))
24+
as.True(node.hasPath("hello"))
25+
as.True(node.hasPath("hello/world"))
26+
as.True(node.hasPath("foo/bar/fizz"))
27+
as.True(node.hasPath("foo/fizz/baz"))
28+
29+
as.False(node.hasPath("fo"))
30+
as.False(node.hasPath("world"))
31+
}

0 commit comments

Comments
 (0)
Please sign in to comment.