1
- import fnmatch
2
- from itertools import zip_longest
1
+ import os
3
2
from pathlib import Path
4
- from typing import Dict , List , Optional
3
+ from typing import List
4
+
5
+ from gitignore_parser import parse_gitignore_str
5
6
6
7
from dstack ._internal .utils .path import PathLike
7
8
@@ -16,77 +17,83 @@ def __init__(
16
17
if ignore_files is not None
17
18
else [".gitignore" , ".git/info/exclude" , ".dstackignore" ]
18
19
)
19
- self .ignore_globs : Dict [ str , List [ str ]] = { "." : globs or []}
20
- self .load_recursive ( )
20
+ self .parser = None
21
+ self ._create_combined_parser ( globs or [] )
21
22
22
- def load_ignore_file (self , path : str , ignore_file : Path ):
23
- if path != "." and not path .startswith ("./" ):
24
- path = "./" + path
25
- if path not in self .ignore_globs :
26
- self .ignore_globs [path ] = []
27
- with ignore_file .open ("r" ) as f :
28
- for line in f :
29
- line = self .rstrip (line .rstrip ("\n " )).rstrip ("/" )
30
- line = line .replace ("\\ " , " " )
31
- if line .startswith ("#" ) or not line :
32
- continue
33
- self .ignore_globs [path ].append (line )
23
+ def _create_combined_parser (self , additional_globs : List [str ]):
24
+ """Create a single parser from all ignore files and additional globs."""
25
+ all_patterns = []
34
26
35
- def load_recursive (self , path : Optional [Path ] = None ):
36
- path = path or self .root_dir
37
- for ignore_file in self .ignore_files :
38
- ignore_file = path / ignore_file
39
- if ignore_file .exists ():
40
- self .load_ignore_file (str (path .relative_to (self .root_dir )), ignore_file )
27
+ # Collect patterns from all ignore files recursively
28
+ self ._collect_patterns_recursive (self .root_dir , all_patterns )
41
29
42
- for subdir in path .iterdir ():
43
- if not subdir .is_dir () or self .ignore (subdir .relative_to (self .root_dir )):
44
- continue
45
- self .load_recursive (subdir )
30
+ # Add additional glob patterns
31
+ all_patterns .extend (additional_globs )
46
32
47
- @staticmethod
48
- def rstrip (value : str ) -> str :
49
- end = len (value ) - 1
50
- while end >= 0 :
51
- if not value [end ].isspace ():
52
- break
53
- if end > 0 and value [end - 1 ] == "\\ " :
54
- break # escaped space
55
- end -= 1
56
- else :
57
- return ""
58
- return value [: end + 1 ]
33
+ self .parser = parse_gitignore_str ("\n " .join (all_patterns ), self .root_dir )
59
34
60
- @ staticmethod
61
- def fnmatch ( name : str , pattern : str , sep = "/" ) -> bool :
62
- if pattern . startswith ( sep ):
63
- name = sep + name
64
- for n , p in zip_longest (
65
- reversed ( name . split ( sep )), reversed ( pattern . split ( sep )), fillvalue = None
66
- ):
67
- if p == "**" :
68
- raise NotImplementedError ()
69
- if p is None :
70
- return True
71
- if n is None or not fnmatch . fnmatch ( n , p ):
72
- return False
73
- return True
35
+ def _collect_patterns_recursive ( self , path : Path , patterns : List [ str ]):
36
+ """
37
+ Recursively collect patterns from all ignore files and combine them into a single gitignore,
38
+ with the root directory as the base path.
39
+ """
40
+ for ignore_file_name in self . ignore_files :
41
+ ignore_file = path / ignore_file_name
42
+ if ignore_file . exists () :
43
+ try :
44
+ # Get relative path from root to this directory
45
+ if path == self . root_dir :
46
+ prefix = ""
47
+ else :
48
+ prefix = path . relative_to ( self . root_dir )
74
49
75
- def ignore (self , path : PathLike , sep = "/" ) -> bool :
76
- if not path :
50
+ # Read patterns and prefix them with directory path
51
+ with ignore_file .open ("r" , encoding = "utf-8" , errors = "ignore" ) as f :
52
+ for line in f :
53
+ line = line .strip ()
54
+ if line and not line .startswith ("#" ):
55
+ if prefix :
56
+ # Prefix patterns with directory path for subdirectories
57
+ if line .startswith ("/" ):
58
+ # Absolute pattern within subdirectory
59
+ patterns .append (os .path .join (prefix , line [1 :]))
60
+ else :
61
+ # Relative pattern within subdirectory
62
+ # Add pattern that matches files directly in the subdirectory
63
+ patterns .append (os .path .join (prefix , line ))
64
+ # Add pattern that matches files in deeper subdirectories
65
+ patterns .append (os .path .join (prefix , "**" , line ))
66
+ else :
67
+ # Root directory patterns
68
+ patterns .append (line )
69
+ except (OSError , UnicodeDecodeError ):
70
+ # Skip files we can't read
71
+ continue
72
+
73
+ # Recursively process subdirectories
74
+ # Note: We need to check if directories should be ignored, but we can't
75
+ # use self.ignore() yet since we're still building the parser
76
+ # So we'll process all directories and let gitignore_parser handle the logic
77
+ try :
78
+ for subdir in path .iterdir ():
79
+ if subdir .is_dir ():
80
+ self ._collect_patterns_recursive (subdir , patterns )
81
+ except (OSError , PermissionError ):
82
+ # Skip directories we can't read
83
+ pass
84
+
85
+ def ignore (self , path : PathLike ) -> bool :
86
+ """Check if a path should be ignored."""
87
+ if not path or not self .parser :
77
88
return False
89
+
78
90
path = Path (path )
79
91
if path .is_absolute ():
80
- path = path .relative_to (self .root_dir )
92
+ try :
93
+ path = path .relative_to (self .root_dir )
94
+ except ValueError :
95
+ return False
81
96
82
- tokens = ("." + sep + str (path )).split (sep )
83
- for i in range (1 , len (tokens )):
84
- parent = sep .join (tokens [:- i ])
85
- globs = self .ignore_globs .get (parent )
86
- if not globs :
87
- continue
88
- name = sep .join (tokens [- i :])
89
- for glob in globs :
90
- if self .fnmatch (name , glob , sep = sep ):
91
- return True
92
- return False
97
+ # Convert to absolute path for gitignore_parser
98
+ abs_path = str (self .root_dir / path )
99
+ return self .parser (abs_path )
0 commit comments