Skip to content

Commit 89950c5

Browse files
authored
Merge pull request pypa#12327 from notatallshaw/Optimize---find-links=<path-to-dir>
2 parents e57cf4f + 0997930 commit 89950c5

File tree

6 files changed

+130
-23
lines changed

6 files changed

+130
-23
lines changed

news/12327.bugfix.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Optimized usage of ``--find-links=<path-to-dir>``, by only scanning the relevant directory once, only considering file names that are valid wheel or sdist names, and only considering files in the directory that are related to the install.

src/pip/_internal/index/collector.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,7 @@ def collect_sources(
473473
page_validator=self.session.is_secure_origin,
474474
expand_dir=False,
475475
cache_link_parsing=False,
476+
project_name=project_name,
476477
)
477478
for loc in self.search_scope.get_index_urls_locations(project_name)
478479
).values()
@@ -483,6 +484,7 @@ def collect_sources(
483484
page_validator=self.session.is_secure_origin,
484485
expand_dir=True,
485486
cache_link_parsing=True,
487+
project_name=project_name,
486488
)
487489
for loc in self.find_links
488490
).values()

src/pip/_internal/index/sources.py

Lines changed: 73 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,17 @@
11
import logging
22
import mimetypes
33
import os
4-
import pathlib
5-
from typing import Callable, Iterable, Optional, Tuple
4+
from collections import defaultdict
5+
from typing import Callable, Dict, Iterable, List, Optional, Tuple
6+
7+
from pip._vendor.packaging.utils import (
8+
InvalidSdistFilename,
9+
InvalidVersion,
10+
InvalidWheelFilename,
11+
canonicalize_name,
12+
parse_sdist_filename,
13+
parse_wheel_filename,
14+
)
615

716
from pip._internal.models.candidate import InstallationCandidate
817
from pip._internal.models.link import Link
@@ -36,6 +45,53 @@ def _is_html_file(file_url: str) -> bool:
3645
return mimetypes.guess_type(file_url, strict=False)[0] == "text/html"
3746

3847

48+
class _FlatDirectoryToUrls:
49+
"""Scans directory and caches results"""
50+
51+
def __init__(self, path: str) -> None:
52+
self._path = path
53+
self._page_candidates: List[str] = []
54+
self._project_name_to_urls: Dict[str, List[str]] = defaultdict(list)
55+
self._scanned_directory = False
56+
57+
def _scan_directory(self) -> None:
58+
"""Scans directory once and populates both page_candidates
59+
and project_name_to_urls at the same time
60+
"""
61+
for entry in os.scandir(self._path):
62+
url = path_to_url(entry.path)
63+
if _is_html_file(url):
64+
self._page_candidates.append(url)
65+
continue
66+
67+
# File must have a valid wheel or sdist name,
68+
# otherwise not worth considering as a package
69+
try:
70+
project_filename = parse_wheel_filename(entry.name)[0]
71+
except (InvalidWheelFilename, InvalidVersion):
72+
try:
73+
project_filename = parse_sdist_filename(entry.name)[0]
74+
except (InvalidSdistFilename, InvalidVersion):
75+
continue
76+
77+
self._project_name_to_urls[project_filename].append(url)
78+
self._scanned_directory = True
79+
80+
@property
81+
def page_candidates(self) -> List[str]:
82+
if not self._scanned_directory:
83+
self._scan_directory()
84+
85+
return self._page_candidates
86+
87+
@property
88+
def project_name_to_urls(self) -> Dict[str, List[str]]:
89+
if not self._scanned_directory:
90+
self._scan_directory()
91+
92+
return self._project_name_to_urls
93+
94+
3995
class _FlatDirectorySource(LinkSource):
4096
"""Link source specified by ``--find-links=<path-to-dir>``.
4197
@@ -45,30 +101,34 @@ class _FlatDirectorySource(LinkSource):
45101
* ``file_candidates``: Archives in the directory.
46102
"""
47103

104+
_paths_to_urls: Dict[str, _FlatDirectoryToUrls] = {}
105+
48106
def __init__(
49107
self,
50108
candidates_from_page: CandidatesFromPage,
51109
path: str,
110+
project_name: str,
52111
) -> None:
53112
self._candidates_from_page = candidates_from_page
54-
self._path = pathlib.Path(os.path.realpath(path))
113+
self._project_name = canonicalize_name(project_name)
114+
115+
# Get existing instance of _FlatDirectoryToUrls if it exists
116+
if path in self._paths_to_urls:
117+
self._path_to_urls = self._paths_to_urls[path]
118+
else:
119+
self._path_to_urls = _FlatDirectoryToUrls(path=path)
120+
self._paths_to_urls[path] = self._path_to_urls
55121

56122
@property
57123
def link(self) -> Optional[Link]:
58124
return None
59125

60126
def page_candidates(self) -> FoundCandidates:
61-
for path in self._path.iterdir():
62-
url = path_to_url(str(path))
63-
if not _is_html_file(url):
64-
continue
127+
for url in self._path_to_urls.page_candidates:
65128
yield from self._candidates_from_page(Link(url))
66129

67130
def file_links(self) -> FoundLinks:
68-
for path in self._path.iterdir():
69-
url = path_to_url(str(path))
70-
if _is_html_file(url):
71-
continue
131+
for url in self._path_to_urls.project_name_to_urls[self._project_name]:
72132
yield Link(url)
73133

74134

@@ -170,6 +230,7 @@ def build_source(
170230
page_validator: PageValidator,
171231
expand_dir: bool,
172232
cache_link_parsing: bool,
233+
project_name: str,
173234
) -> Tuple[Optional[str], Optional[LinkSource]]:
174235
path: Optional[str] = None
175236
url: Optional[str] = None
@@ -203,6 +264,7 @@ def build_source(
203264
source = _FlatDirectorySource(
204265
candidates_from_page=candidates_from_page,
205266
path=path,
267+
project_name=project_name,
206268
)
207269
else:
208270
source = _IndexDirectorySource(

tests/functional/test_install_config.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -125,9 +125,6 @@ def test_command_line_append_flags(
125125
"Fetching project page and analyzing links: https://test.pypi.org"
126126
in result.stdout
127127
)
128-
assert (
129-
f"Skipping link: not a file: {data.find_links}" in result.stdout
130-
), f"stdout: {result.stdout}"
131128

132129

133130
@pytest.mark.network
@@ -151,9 +148,6 @@ def test_command_line_appends_correctly(
151148
"Fetching project page and analyzing links: https://test.pypi.org"
152149
in result.stdout
153150
), result.stdout
154-
assert (
155-
f"Skipping link: not a file: {data.find_links}" in result.stdout
156-
), f"stdout: {result.stdout}"
157151

158152

159153
def test_config_file_override_stack(

tests/unit/test_collector.py

Lines changed: 50 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -862,7 +862,7 @@ def test_collect_sources__file_expand_dir(data: TestData) -> None:
862862
)
863863
sources = collector.collect_sources(
864864
# Shouldn't be used.
865-
project_name=None, # type: ignore[arg-type]
865+
project_name="",
866866
candidates_from_page=None, # type: ignore[arg-type]
867867
)
868868
assert (
@@ -960,7 +960,7 @@ def test_fetch_response(self, mock_get_simple_response: mock.Mock) -> None:
960960
session=link_collector.session,
961961
)
962962

963-
def test_collect_sources(
963+
def test_collect_page_sources(
964964
self, caplog: pytest.LogCaptureFixture, data: TestData
965965
) -> None:
966966
caplog.set_level(logging.DEBUG)
@@ -993,9 +993,8 @@ def test_collect_sources(
993993
files = list(files_it)
994994
pages = list(pages_it)
995995

996-
# Spot-check the returned sources.
997-
assert len(files) > 20
998-
check_links_include(files, names=["simple-1.0.tar.gz"])
996+
# Only "twine" should return from collecting sources
997+
assert len(files) == 1
999998

1000999
assert [page.link for page in pages] == [Link("https://pypi.org/simple/twine/")]
10011000
# Check that index URLs are marked as *un*cacheable.
@@ -1010,6 +1009,52 @@ def test_collect_sources(
10101009
("pip._internal.index.collector", logging.DEBUG, expected_message),
10111010
]
10121011

1012+
def test_collect_file_sources(
1013+
self, caplog: pytest.LogCaptureFixture, data: TestData
1014+
) -> None:
1015+
caplog.set_level(logging.DEBUG)
1016+
1017+
link_collector = make_test_link_collector(
1018+
find_links=[data.find_links],
1019+
# Include two copies of the URL to check that the second one
1020+
# is skipped.
1021+
index_urls=[PyPI.simple_url, PyPI.simple_url],
1022+
)
1023+
collected_sources = link_collector.collect_sources(
1024+
"singlemodule",
1025+
candidates_from_page=lambda link: [
1026+
InstallationCandidate("singlemodule", "0.0.1", link)
1027+
],
1028+
)
1029+
1030+
files_it = itertools.chain.from_iterable(
1031+
source.file_links()
1032+
for sources in collected_sources
1033+
for source in sources
1034+
if source is not None
1035+
)
1036+
pages_it = itertools.chain.from_iterable(
1037+
source.page_candidates()
1038+
for sources in collected_sources
1039+
for source in sources
1040+
if source is not None
1041+
)
1042+
files = list(files_it)
1043+
_ = list(pages_it)
1044+
1045+
# singlemodule should return files
1046+
assert len(files) > 0
1047+
check_links_include(files, names=["singlemodule-0.0.1.tar.gz"])
1048+
1049+
expected_message = dedent(
1050+
"""\
1051+
1 location(s) to search for versions of singlemodule:
1052+
* https://pypi.org/simple/singlemodule/"""
1053+
)
1054+
assert caplog.record_tuples == [
1055+
("pip._internal.index.collector", logging.DEBUG, expected_message),
1056+
]
1057+
10131058

10141059
@pytest.mark.parametrize(
10151060
"find_links, no_index, suppress_no_index, expected",

tests/unit/test_finder.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,10 @@ def test_skip_invalid_wheel_link(
128128
with pytest.raises(DistributionNotFound):
129129
finder.find_requirement(req, True)
130130

131-
assert "Skipping link: invalid wheel filename:" in caplog.text
131+
assert (
132+
"Could not find a version that satisfies the requirement invalid"
133+
" (from versions:" in caplog.text
134+
)
132135

133136
def test_not_find_wheel_not_supported(self, data: TestData) -> None:
134137
"""

0 commit comments

Comments
 (0)