Skip to content

Commit ec926a5

Browse files
committed
Update gitignore implementation to support more cases
1 parent 4daf142 commit ec926a5

File tree

5 files changed

+338
-67
lines changed

5 files changed

+338
-67
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,4 @@ build/
2525
.aider*
2626
uv.lock
2727
.local/
28+
.claude/settings.local.json

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ dependencies = [
3535
"gpuhunt==0.1.6",
3636
"argcomplete>=3.5.0",
3737
"humanize>=4.12.3",
38+
"gitignore-parser>=0.1.12",
3839
]
3940

4041
[project.urls]

src/dstack/_internal/core/models/repos/local.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,17 @@
22
from pathlib import Path
33
from typing import BinaryIO, Optional
44

5+
import humanize
56
from typing_extensions import Literal
67

78
from dstack._internal.core.models.repos.base import BaseRepoInfo, Repo
89
from dstack._internal.utils.hash import get_sha256, slugify
910
from dstack._internal.utils.ignore import GitIgnore
11+
from dstack._internal.utils.logging import get_logger
1012
from dstack._internal.utils.path import PathLike
1113

14+
logger = get_logger(__name__)
15+
1216

1317
class LocalRepoInfo(BaseRepoInfo):
1418
repo_type: Literal["local"] = "local"
@@ -75,6 +79,8 @@ def write_code_file(self, fp: BinaryIO) -> str:
7579
arcname="",
7680
filter=TarIgnore(self.run_repo_data.repo_dir, globs=[".git"]),
7781
)
82+
83+
logger.debug(f"Code file size: {humanize.naturalsize(fp.tell())} bytes")
7884
return get_sha256(fp)
7985

8086
def get_repo_info(self) -> LocalRepoInfo:

src/dstack/_internal/utils/ignore.py

Lines changed: 74 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
import fnmatch
2-
from itertools import zip_longest
1+
import os
32
from pathlib import Path
4-
from typing import Dict, List, Optional
3+
from typing import List
4+
5+
from gitignore_parser import parse_gitignore_str
56

67
from dstack._internal.utils.path import PathLike
78

@@ -16,77 +17,83 @@ def __init__(
1617
if ignore_files is not None
1718
else [".gitignore", ".git/info/exclude", ".dstackignore"]
1819
)
19-
self.ignore_globs: Dict[str, List[str]] = {".": globs or []}
20-
self.load_recursive()
20+
self.parser = None
21+
self._create_combined_parser(globs or [])
2122

22-
def load_ignore_file(self, path: str, ignore_file: Path):
23-
if path != "." and not path.startswith("./"):
24-
path = "./" + path
25-
if path not in self.ignore_globs:
26-
self.ignore_globs[path] = []
27-
with ignore_file.open("r") as f:
28-
for line in f:
29-
line = self.rstrip(line.rstrip("\n")).rstrip("/")
30-
line = line.replace("\\ ", " ")
31-
if line.startswith("#") or not line:
32-
continue
33-
self.ignore_globs[path].append(line)
23+
def _create_combined_parser(self, additional_globs: List[str]):
24+
"""Create a single parser from all ignore files and additional globs."""
25+
all_patterns = []
3426

35-
def load_recursive(self, path: Optional[Path] = None):
36-
path = path or self.root_dir
37-
for ignore_file in self.ignore_files:
38-
ignore_file = path / ignore_file
39-
if ignore_file.exists():
40-
self.load_ignore_file(str(path.relative_to(self.root_dir)), ignore_file)
27+
# Collect patterns from all ignore files recursively
28+
self._collect_patterns_recursive(self.root_dir, all_patterns)
4129

42-
for subdir in path.iterdir():
43-
if not subdir.is_dir() or self.ignore(subdir.relative_to(self.root_dir)):
44-
continue
45-
self.load_recursive(subdir)
30+
# Add additional glob patterns
31+
all_patterns.extend(additional_globs)
4632

47-
@staticmethod
48-
def rstrip(value: str) -> str:
49-
end = len(value) - 1
50-
while end >= 0:
51-
if not value[end].isspace():
52-
break
53-
if end > 0 and value[end - 1] == "\\":
54-
break # escaped space
55-
end -= 1
56-
else:
57-
return ""
58-
return value[: end + 1]
33+
self.parser = parse_gitignore_str("\n".join(all_patterns), self.root_dir)
5934

60-
@staticmethod
61-
def fnmatch(name: str, pattern: str, sep="/") -> bool:
62-
if pattern.startswith(sep):
63-
name = sep + name
64-
for n, p in zip_longest(
65-
reversed(name.split(sep)), reversed(pattern.split(sep)), fillvalue=None
66-
):
67-
if p == "**":
68-
raise NotImplementedError()
69-
if p is None:
70-
return True
71-
if n is None or not fnmatch.fnmatch(n, p):
72-
return False
73-
return True
35+
def _collect_patterns_recursive(self, path: Path, patterns: List[str]):
36+
"""
37+
Recursively collect patterns from all ignore files and combine them into a single gitignore,
38+
with the root directory as the base path.
39+
"""
40+
for ignore_file_name in self.ignore_files:
41+
ignore_file = path / ignore_file_name
42+
if ignore_file.exists():
43+
try:
44+
# Get relative path from root to this directory
45+
if path == self.root_dir:
46+
prefix = ""
47+
else:
48+
prefix = path.relative_to(self.root_dir)
7449

75-
def ignore(self, path: PathLike, sep="/") -> bool:
76-
if not path:
50+
# Read patterns and prefix them with directory path
51+
with ignore_file.open("r", encoding="utf-8", errors="ignore") as f:
52+
for line in f:
53+
line = line.strip()
54+
if line and not line.startswith("#"):
55+
if prefix:
56+
# Prefix patterns with directory path for subdirectories
57+
if line.startswith("/"):
58+
# Absolute pattern within subdirectory
59+
patterns.append(os.path.join(prefix, line[1:]))
60+
else:
61+
# Relative pattern within subdirectory
62+
# Add pattern that matches files directly in the subdirectory
63+
patterns.append(os.path.join(prefix, line))
64+
# Add pattern that matches files in deeper subdirectories
65+
patterns.append(os.path.join(prefix, "**", line))
66+
else:
67+
# Root directory patterns
68+
patterns.append(line)
69+
except (OSError, UnicodeDecodeError):
70+
# Skip files we can't read
71+
continue
72+
73+
# Recursively process subdirectories
74+
# Note: We need to check if directories should be ignored, but we can't
75+
# use self.ignore() yet since we're still building the parser
76+
# So we'll process all directories and let gitignore_parser handle the logic
77+
try:
78+
for subdir in path.iterdir():
79+
if subdir.is_dir():
80+
self._collect_patterns_recursive(subdir, patterns)
81+
except (OSError, PermissionError):
82+
# Skip directories we can't read
83+
pass
84+
85+
def ignore(self, path: PathLike) -> bool:
86+
"""Check if a path should be ignored."""
87+
if not path or not self.parser:
7788
return False
89+
7890
path = Path(path)
7991
if path.is_absolute():
80-
path = path.relative_to(self.root_dir)
92+
try:
93+
path = path.relative_to(self.root_dir)
94+
except ValueError:
95+
return False
8196

82-
tokens = ("." + sep + str(path)).split(sep)
83-
for i in range(1, len(tokens)):
84-
parent = sep.join(tokens[:-i])
85-
globs = self.ignore_globs.get(parent)
86-
if not globs:
87-
continue
88-
name = sep.join(tokens[-i:])
89-
for glob in globs:
90-
if self.fnmatch(name, glob, sep=sep):
91-
return True
92-
return False
97+
# Convert to absolute path for gitignore_parser
98+
abs_path = str(self.root_dir / path)
99+
return self.parser(abs_path)

0 commit comments

Comments
 (0)