Skip to content

Commit 7d8854c

Browse files
authored
Check symlinks support per directory instead of globally (#1077)
* check symlinks support per cache folder, not globally * check symlink creation with relative symlinks * make dir before checking if symlinks work
1 parent 5958f17 commit 7d8854c

File tree

2 files changed

+56
-26
lines changed

2 files changed

+56
-26
lines changed

src/huggingface_hub/file_download.py

+34-15
Original file line numberDiff line numberDiff line change
@@ -172,33 +172,51 @@ def get_jinja_version():
172172
return _jinja_version
173173

174174

175-
_are_symlinks_supported: Optional[bool] = None
175+
_are_symlinks_supported_in_dir: Dict[str, bool] = {}
176176

177177

178-
def are_symlinks_supported() -> bool:
179-
# Check symlink compatibility only once at first time use
180-
global _are_symlinks_supported
178+
def are_symlinks_supported(cache_dir: Union[str, Path, None] = None) -> bool:
179+
"""Return whether the symlinks are supported on the machine.
181180
182-
if _are_symlinks_supported is None:
183-
_are_symlinks_supported = True
181+
Since symlinks support can change depending on the mounted disk, we need to check
182+
on the precise cache folder. By default, the default HF cache directory is checked.
184183
185-
with tempfile.TemporaryDirectory() as tmpdir:
184+
Args:
185+
cache_dir (`str`, `Path`, *optional*):
186+
Path to the folder where cached files are stored.
187+
188+
Returns: [bool] Whether symlinks are supported in the directory.
189+
"""
190+
# Defaults to HF cache
191+
if cache_dir is None:
192+
cache_dir = HUGGINGFACE_HUB_CACHE
193+
cache_dir = str(Path(cache_dir).expanduser().resolve()) # make it unique
194+
195+
# Check symlink compatibility only once (per cache directory) at first time use
196+
if cache_dir not in _are_symlinks_supported_in_dir:
197+
_are_symlinks_supported_in_dir[cache_dir] = True
198+
199+
os.makedirs(cache_dir, exist_ok=True)
200+
with tempfile.TemporaryDirectory(dir=cache_dir) as tmpdir:
186201
src_path = Path(tmpdir) / "dummy_file_src"
187202
src_path.touch()
188203
dst_path = Path(tmpdir) / "dummy_file_dst"
204+
205+
# Relative source path as in `_create_relative_symlink``
206+
relative_src = os.path.relpath(src_path, start=os.path.dirname(dst_path))
189207
try:
190-
os.symlink(src_path, dst_path)
208+
os.symlink(relative_src, dst_path)
191209
except OSError:
192210
# Likely running on Windows
193-
_are_symlinks_supported = False
211+
_are_symlinks_supported_in_dir[cache_dir] = False
194212

195213
if not os.environ.get("DISABLE_SYMLINKS_WARNING"):
196214
message = (
197215
"`huggingface_hub` cache-system uses symlinks by default to"
198-
" efficiently store duplicated files but your machine doesn't"
199-
" support them. Caching files will still work but in a degraded"
200-
" version that might require more space on your disk. This"
201-
" warning can be disabled by setting the"
216+
" efficiently store duplicated files but your machine does not"
217+
f" support them in {cache_dir}. Caching files will still work"
218+
" but in a degraded version that might require more space on"
219+
" your disk. This warning can be disabled by setting the"
202220
" `DISABLE_SYMLINKS_WARNING` environment variable. For more"
203221
" details, see"
204222
" https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations."
@@ -213,7 +231,7 @@ def are_symlinks_supported() -> bool:
213231
)
214232
warnings.warn(message)
215233

216-
return _are_symlinks_supported
234+
return _are_symlinks_supported_in_dir[cache_dir]
217235

218236

219237
# Return value when trying to load a file from cache but the file does not exist in the distant repo.
@@ -920,7 +938,8 @@ def _create_relative_symlink(src: str, dst: str, new_blob: bool = False) -> None
920938
except OSError:
921939
pass
922940

923-
if are_symlinks_supported():
941+
cache_dir = os.path.dirname(os.path.commonpath([src, dst]))
942+
if are_symlinks_supported(cache_dir=cache_dir):
924943
os.symlink(relative_src, dst)
925944
elif new_blob:
926945
os.replace(src, dst)

tests/test_cache_no_symlinks.py

+22-11
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import pytest
77

88
from huggingface_hub import hf_hub_download, scan_cache_dir
9-
from huggingface_hub.constants import CONFIG_NAME
9+
from huggingface_hub.constants import CONFIG_NAME, HUGGINGFACE_HUB_CACHE
1010
from huggingface_hub.file_download import are_symlinks_supported
1111

1212
from .testing_constants import TOKEN
@@ -18,24 +18,35 @@
1818
class TestCacheLayoutIfSymlinksNotSupported(unittest.TestCase):
1919
cache_dir: Path
2020

21-
@patch("huggingface_hub.file_download._are_symlinks_supported", None)
22-
def test_are_symlinks_supported_normal(self) -> None:
21+
@patch(
22+
"huggingface_hub.file_download._are_symlinks_supported_in_dir",
23+
{HUGGINGFACE_HUB_CACHE: True},
24+
)
25+
def test_are_symlinks_supported_default(self) -> None:
2326
self.assertTrue(are_symlinks_supported())
2427

25-
@patch("huggingface_hub.file_download.os.symlink") # Symlinks not supported
26-
@patch("huggingface_hub.file_download._are_symlinks_supported", None) # first use
27-
def test_are_symlinks_supported_windows(self, mock_symlink: Mock) -> None:
28-
mock_symlink.side_effect = OSError()
28+
@patch("huggingface_hub.file_download.os.symlink")
29+
@patch("huggingface_hub.file_download._are_symlinks_supported_in_dir", {})
30+
def test_are_symlinks_supported_windows_specific_dir(
31+
self, mock_symlink: Mock
32+
) -> None:
33+
mock_symlink.side_effect = [OSError(), None] # First dir not supported then yes
34+
this_dir = Path(__file__).parent
2935

30-
# First time: warning is raised
36+
# First time in `this_dir`: warning is raised
3137
with self.assertWarns(UserWarning):
32-
self.assertFalse(are_symlinks_supported())
38+
self.assertFalse(are_symlinks_supported(this_dir))
3339

34-
# Afterward: value is cached (no warning raised)
3540
with warnings.catch_warnings():
41+
# Assert no warnings raised
3642
# Taken from https://stackoverflow.com/a/45671804
3743
warnings.simplefilter("error")
38-
self.assertFalse(are_symlinks_supported())
44+
45+
# Second time in `this_dir` but with absolute path: value is still cached
46+
self.assertFalse(are_symlinks_supported(this_dir.absolute()))
47+
48+
# Try with another directory: symlinks are supported, no warnings
49+
self.assertTrue(are_symlinks_supported()) # True
3950

4051
@patch("huggingface_hub.file_download.are_symlinks_supported")
4152
def test_download_no_symlink_new_file(

0 commit comments

Comments
 (0)