Skip to content

Commit de4bbf1

Browse files
committed
Update gitignore implementation to support more cases
1 parent ab6c490 commit de4bbf1

File tree

5 files changed

+502
-65
lines changed

5 files changed

+502
-65
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,7 @@ build/
2121
.fleet
2222
.env
2323
.aider*
24+
uv.lock
25+
26+
.local/
27+
.claude/settings.local.json

pyproject.toml

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
[project]
2+
name = "dstack"
3+
dynamic = ["version", "readme"]
4+
authors = [{ name = "Andrey Cheptsov", email = "[email protected]" }]
5+
description = "dstack is an open-source orchestration engine for running AI workloads on any cloud or on-premises."
6+
requires-python = ">=3.9"
7+
classifiers = [
8+
"Development Status :: 4 - Beta",
9+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
10+
"License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)",
11+
"Programming Language :: Python :: 3",
12+
]
13+
dependencies = [
14+
"pyyaml",
15+
"requests",
16+
"typing-extensions>=4.0.0",
17+
"cryptography",
18+
"packaging",
19+
"python-dateutil",
20+
"cachetools",
21+
"gitpython",
22+
"jsonschema",
23+
"paramiko>=3.2.0",
24+
"cursor",
25+
"rich",
26+
"rich-argparse",
27+
"tqdm",
28+
"simple-term-menu",
29+
"pydantic>=1.10.10,<2.0.0",
30+
"pydantic-duality>=1.2.4",
31+
"websocket-client",
32+
"python-multipart>=0.0.16",
33+
"filelock",
34+
"psutil",
35+
"gpuhunt==0.1.6",
36+
"argcomplete>=3.5.0",
37+
"gitignore-parser>=0.1.12",
38+
]
39+
40+
[project.urls]
41+
Homepage = "https://dstack.ai"
42+
Source = "https://github.com/dstackai/dstack"
43+
Documentation = "https://dstack.ai/docs"
44+
Issues = "https://github.com/dstackai/dstack/issues"
45+
Changelog = "https://github.com/dstackai/dstack/releases"
46+
Discord = "https://discord.gg/u8SmfwPpMd"
47+
48+
[build-system]
49+
requires = ["hatchling", "hatch-fancy-pypi-readme"]
50+
build-backend = "hatchling.build"
51+
52+
[project.scripts]
53+
dstack = "dstack._internal.cli.main:main"
54+
55+
[tool.hatch.version]
56+
path = "src/dstack/version.py"
57+
58+
[tool.hatch.build.targets.sdist]
59+
artifacts = ["src/dstack/_internal/server/statics/**"]
60+
61+
[tool.hatch.build.targets.wheel]
62+
artifacts = ["src/dstack/_internal/server/statics/**"]
63+
64+
[tool.hatch.metadata.hooks.fancy-pypi-readme]
65+
content-type = "text/markdown"
66+
67+
[[tool.hatch.metadata.hooks.fancy-pypi-readme.fragments]]
68+
path = "README.md"
69+
70+
[[tool.hatch.metadata.hooks.fancy-pypi-readme.substitutions]]
71+
pattern = '<picture>\s*|<source[^>]*>\s*|\s*</picture>|<video[^>]*>\s*|</video>\s*|### Demo\s*'
72+
replacement = ''
73+
ignore-case = true
74+
75+
[dependency-groups]
76+
dev = [
77+
"build>=1.2.2.post1",
78+
"httpx>=0.28.1",
79+
"pre-commit>=4.2.0",
80+
"pytest-asyncio>=0.23.8",
81+
"pytest-httpbin>=2.1.0",
82+
"httpbin>=0.10.2", # indirect to make compatible with Werkzeug 3
83+
"pytest~=7.2",
84+
"pytest-socket>=0.7.0",
85+
"requests-mock>=1.12.1",
86+
"openai>=1.68.2",
87+
"freezegun>=1.5.1",
88+
"ruff==0.11.6", # should match .pre-commit-config.yaml
89+
"testcontainers>=4.9.2",
90+
"pytest-xdist>=3.6.1",
91+
]
92+
93+
[project.optional-dependencies]
94+
gateway = [
95+
"fastapi",
96+
"starlette>=0.26.0",
97+
"uvicorn",
98+
"aiorwlock",
99+
"aiocache",
100+
"httpx",
101+
"jinja2",
102+
]
103+
server = [
104+
"fastapi",
105+
"starlette>=0.26.0",
106+
"uvicorn",
107+
"aiorwlock",
108+
"aiocache",
109+
"httpx",
110+
"jinja2",
111+
"watchfiles",
112+
"sqlalchemy[asyncio]>=2.0.0",
113+
"sqlalchemy_utils>=0.40.0",
114+
"alembic>=1.10.2",
115+
"apscheduler<4",
116+
"aiosqlite",
117+
"docker>=6.0.0",
118+
"python-dxf==12.1.0",
119+
"sentry-sdk[fastapi]",
120+
"alembic-postgresql-enum",
121+
"asyncpg",
122+
"python-json-logger>=3.1.0",
123+
"prometheus-client",
124+
"grpcio>=1.50",
125+
"backports.entry-points-selectable",
126+
]
127+
aws = ["boto3>=1.38.13", "botocore", "dstack[server]"]
128+
azure = [
129+
"azure-identity>=1.12.0",
130+
"azure-mgmt-subscription>=3.1.1",
131+
"azure-mgmt-compute>=29.1.0",
132+
"azure-mgmt-network>=23.0.0,<28.0.0",
133+
"azure-mgmt-resource>=22.0.0",
134+
"azure-mgmt-authorization>=3.0.0",
135+
"azure-mgmt-msi>=7.0.0",
136+
"dstack[server]",
137+
]
138+
gcp = [
139+
"google-auth>=2.3.0",
140+
"google-cloud-storage>=2.0.0",
141+
"google-cloud-compute>=1.5.0",
142+
"google-cloud-logging>=2.0.0",
143+
"google-api-python-client>=2.80.0",
144+
"google-cloud-billing>=1.11.0",
145+
"google-cloud-tpu>=1.18.3",
146+
"dstack[server]",
147+
]
148+
datacrunch = ["datacrunch", "dstack[server]"]
149+
kubernetes = ["kubernetes", "dstack[server]"]
150+
lambda = ["boto3>=1.38.13", "botocore", "dstack[server]"]
151+
oci = [
152+
"oci>=2.150.0",
153+
"cryptography>=44.0.3",
154+
# pyopenssl is indirect to avoid uv falling back to the old version
155+
# due to an upper limit from oci
156+
"pyopenssl>=23.2.0",
157+
"dstack[server]",
158+
]
159+
nebius = ["nebius>=0.2.19,<0.3; python_version >= '3.10'", "dstack[server]"]
160+
all = [
161+
"dstack[gateway,server,aws,azure,gcp,datacrunch,kubernetes,lambda,nebius,oci]",
162+
]

src/dstack/_internal/core/models/repos/local.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,14 @@
55
from typing_extensions import Literal
66

77
from dstack._internal.core.models.repos.base import BaseRepoInfo, Repo
8+
from dstack._internal.utils.common import sizeof_fmt
89
from dstack._internal.utils.hash import get_sha256, slugify
910
from dstack._internal.utils.ignore import GitIgnore
11+
from dstack._internal.utils.logging import get_logger
1012
from dstack._internal.utils.path import PathLike
1113

14+
logger = get_logger(__name__)
15+
1216

1317
class LocalRepoInfo(BaseRepoInfo):
1418
repo_type: Literal["local"] = "local"
@@ -75,6 +79,8 @@ def write_code_file(self, fp: BinaryIO) -> str:
7579
arcname="",
7680
filter=TarIgnore(self.run_repo_data.repo_dir, globs=[".git"]),
7781
)
82+
83+
logger.debug(f"Code file size: {sizeof_fmt(fp.tell())} bytes")
7884
return get_sha256(fp)
7985

8086
def get_repo_info(self) -> LocalRepoInfo:

src/dstack/_internal/utils/ignore.py

Lines changed: 74 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
import fnmatch
2-
from itertools import zip_longest
1+
import os
32
from pathlib import Path
4-
from typing import Dict, List, Optional
3+
from typing import List
4+
5+
from gitignore_parser import parse_gitignore_str
56

67
from dstack._internal.utils.path import PathLike
78

@@ -16,75 +17,83 @@ def __init__(
1617
if ignore_files is not None
1718
else [".gitignore", ".git/info/exclude", ".dstackignore"]
1819
)
19-
self.ignore_globs: Dict[str, List[str]] = {".": globs or []}
20-
self.load_recursive()
20+
self.parser = None
21+
self._create_combined_parser(globs or [])
2122

22-
def load_ignore_file(self, path: str, ignore_file: Path):
23-
if path not in self.ignore_globs:
24-
self.ignore_globs[path] = []
25-
with ignore_file.open("r") as f:
26-
for line in f:
27-
line = self.rstrip(line.rstrip("\n")).rstrip("/")
28-
line = line.replace("\\ ", " ")
29-
if line.startswith("#") or not line:
30-
continue
31-
self.ignore_globs[path].append(line)
23+
def _create_combined_parser(self, additional_globs: List[str]):
24+
"""Create a single parser from all ignore files and additional globs."""
25+
all_patterns = []
3226

33-
def load_recursive(self, path: Optional[Path] = None):
34-
path = path or self.root_dir
35-
for ignore_file in self.ignore_files:
36-
ignore_file = path / ignore_file
37-
if ignore_file.exists():
38-
self.load_ignore_file(str(path.relative_to(self.root_dir)), ignore_file)
27+
# Collect patterns from all ignore files recursively
28+
self._collect_patterns_recursive(self.root_dir, all_patterns)
3929

40-
for subdir in path.iterdir():
41-
if not subdir.is_dir() or self.ignore(subdir.relative_to(self.root_dir)):
42-
continue
43-
self.load_recursive(subdir)
30+
# Add additional glob patterns
31+
all_patterns.extend(additional_globs)
4432

45-
@staticmethod
46-
def rstrip(value: str) -> str:
47-
end = len(value) - 1
48-
while end >= 0:
49-
if not value[end].isspace():
50-
break
51-
if end > 0 and value[end - 1] == "\\":
52-
break # escaped space
53-
end -= 1
54-
else:
55-
return ""
56-
return value[: end + 1]
33+
self.parser = parse_gitignore_str("\n".join(all_patterns), self.root_dir)
5734

58-
@staticmethod
59-
def fnmatch(name: str, pattern: str, sep="/") -> bool:
60-
if pattern.startswith(sep):
61-
name = sep + name
62-
for n, p in zip_longest(
63-
reversed(name.split(sep)), reversed(pattern.split(sep)), fillvalue=None
64-
):
65-
if p == "**":
66-
raise NotImplementedError()
67-
if p is None:
68-
return True
69-
if n is None or not fnmatch.fnmatch(n, p):
70-
return False
71-
return True
35+
def _collect_patterns_recursive(self, path: Path, patterns: List[str]):
36+
"""
37+
Recursively collect patterns from all ignore files and combine them into a single gitignore,
38+
with the root directory as the base path.
39+
"""
40+
for ignore_file_name in self.ignore_files:
41+
ignore_file = path / ignore_file_name
42+
if ignore_file.exists():
43+
try:
44+
# Get relative path from root to this directory
45+
if path == self.root_dir:
46+
prefix = ""
47+
else:
48+
prefix = path.relative_to(self.root_dir)
7249

73-
def ignore(self, path: PathLike, sep="/") -> bool:
74-
if not path:
50+
# Read patterns and prefix them with directory path
51+
with ignore_file.open("r", encoding="utf-8", errors="ignore") as f:
52+
for line in f:
53+
line = line.strip()
54+
if line and not line.startswith("#"):
55+
if prefix:
56+
# Prefix patterns with directory path for subdirectories
57+
if line.startswith("/"):
58+
# Absolute pattern within subdirectory
59+
patterns.append(os.path.join(prefix, line[1:]))
60+
else:
61+
# Relative pattern within subdirectory
62+
# Add pattern that matches files directly in the subdirectory
63+
patterns.append(os.path.join(prefix, line))
64+
# Add pattern that matches files in deeper subdirectories
65+
patterns.append(os.path.join(prefix, "**", line))
66+
else:
67+
# Root directory patterns
68+
patterns.append(line)
69+
except (OSError, UnicodeDecodeError):
70+
# Skip files we can't read
71+
continue
72+
73+
# Recursively process subdirectories
74+
# Note: We need to check if directories should be ignored, but we can't
75+
# use self.ignore() yet since we're still building the parser
76+
# So we'll process all directories and let gitignore_parser handle the logic
77+
try:
78+
for subdir in path.iterdir():
79+
if subdir.is_dir():
80+
self._collect_patterns_recursive(subdir, patterns)
81+
except (OSError, PermissionError):
82+
# Skip directories we can't read
83+
pass
84+
85+
def ignore(self, path: PathLike) -> bool:
86+
"""Check if a path should be ignored."""
87+
if not path or not self.parser:
7588
return False
89+
7690
path = Path(path)
7791
if path.is_absolute():
78-
path = path.relative_to(self.root_dir)
92+
try:
93+
path = path.relative_to(self.root_dir)
94+
except ValueError:
95+
return False
7996

80-
tokens = ("." + sep + str(path)).split(sep)
81-
for i in range(1, len(tokens)):
82-
parent = sep.join(tokens[:-i])
83-
globs = self.ignore_globs.get(parent)
84-
if not globs:
85-
continue
86-
name = sep.join(tokens[-i:])
87-
for glob in globs:
88-
if self.fnmatch(name, glob, sep=sep):
89-
return True
90-
return False
97+
# Convert to absolute path for gitignore_parser
98+
abs_path = str(self.root_dir / path)
99+
return self.parser(abs_path)

0 commit comments

Comments
 (0)