Skip to content

Commit 16ea298

Browse files
committed
Implement "lazy sequence" to avoid Internet
find_matches() is modified to return a special type that implements the sequence protocol (instead of a plain list). This special sequence type tries to use the installed candidate as the first element if possible, and only access indexes when the installed candidate is considered unsatisfactory.
1 parent ccc84a6 commit 16ea298

File tree

4 files changed

+181
-106
lines changed

4 files changed

+181
-106
lines changed

src/pip/_internal/resolution/resolvelib/factory.py

+28-31
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
1-
import collections
21
import logging
32

4-
from pip._vendor import six
53
from pip._vendor.packaging.utils import canonicalize_name
64

75
from pip._internal.exceptions import (
@@ -30,6 +28,7 @@
3028
LinkCandidate,
3129
RequiresPythonCandidate,
3230
)
31+
from .found_candidates import FoundCandidates
3332
from .requirements import (
3433
ExplicitRequirement,
3534
RequiresPythonRequirement,
@@ -41,6 +40,7 @@
4140
Dict,
4241
FrozenSet,
4342
Iterable,
43+
Iterator,
4444
List,
4545
Optional,
4646
Sequence,
@@ -100,7 +100,7 @@ def __init__(
100100
if not ignore_installed:
101101
self._installed_dists = {
102102
canonicalize_name(dist.project_name): dist
103-
for dist in get_installed_distributions()
103+
for dist in get_installed_distributions(local_only=False)
104104
}
105105
else:
106106
self._installed_dists = {}
@@ -154,6 +154,7 @@ def _iter_found_candidates(
154154
ireqs, # type: Sequence[InstallRequirement]
155155
specifier, # type: SpecifierSet
156156
hashes, # type: Hashes
157+
prefers_installed, # type: bool
157158
):
158159
# type: (...) -> Iterable[Candidate]
159160
if not ireqs:
@@ -172,54 +173,49 @@ def _iter_found_candidates(
172173
hashes &= ireq.hashes(trust_internet=False)
173174
extras |= frozenset(ireq.extras)
174175

175-
# We use this to ensure that we only yield a single candidate for
176-
# each version (the finder's preferred one for that version). The
177-
# requirement needs to return only one candidate per version, so we
178-
# implement that logic here so that requirements using this helper
179-
# don't all have to do the same thing later.
180-
candidates = collections.OrderedDict() # type: VersionCandidates
181-
182176
# Get the installed version, if it matches, unless the user
183177
# specified `--force-reinstall`, when we want the version from
184178
# the index instead.
185-
installed_version = None
186179
installed_candidate = None
187180
if not self._force_reinstall and name in self._installed_dists:
188181
installed_dist = self._installed_dists[name]
189-
installed_version = installed_dist.parsed_version
190-
if specifier.contains(installed_version, prereleases=True):
182+
if specifier.contains(installed_dist.version, prereleases=True):
191183
installed_candidate = self._make_candidate_from_dist(
192184
dist=installed_dist,
193185
extras=extras,
194186
template=template,
195187
)
196188

197-
found = self._finder.find_best_candidate(
198-
project_name=name,
199-
specifier=specifier,
200-
hashes=hashes,
201-
)
202-
for ican in found.iter_applicable():
203-
if ican.version == installed_version and installed_candidate:
204-
candidate = installed_candidate
205-
else:
206-
candidate = self._make_candidate_from_link(
189+
def iter_index_candidates():
190+
# type: () -> Iterator[Candidate]
191+
result = self._finder.find_best_candidate(
192+
project_name=name,
193+
specifier=specifier,
194+
hashes=hashes,
195+
)
196+
# PackageFinder returns earlier versions first, so we reverse.
197+
for ican in reversed(list(result.iter_applicable())):
198+
yield self._make_candidate_from_link(
207199
link=ican.link,
208200
extras=extras,
209201
template=template,
210202
name=name,
211203
version=ican.version,
212204
)
213-
candidates[ican.version] = candidate
214-
215-
# Yield the installed version even if it is not found on the index.
216-
if installed_version and installed_candidate:
217-
candidates[installed_version] = installed_candidate
218205

219-
return six.itervalues(candidates)
206+
return FoundCandidates(
207+
iter_index_candidates,
208+
installed_candidate,
209+
prefers_installed,
210+
)
220211

221-
def find_candidates(self, requirements, constraint):
222-
# type: (Sequence[Requirement], Constraint) -> Iterable[Candidate]
212+
def find_candidates(
213+
self,
214+
requirements, # type: Sequence[Requirement]
215+
constraint, # type: Constraint
216+
prefers_installed, # type: bool
217+
):
218+
# type: (...) -> Iterable[Candidate]
223219
explicit_candidates = set() # type: Set[Candidate]
224220
ireqs = [] # type: List[InstallRequirement]
225221
for req in requirements:
@@ -236,6 +232,7 @@ def find_candidates(self, requirements, constraint):
236232
ireqs,
237233
constraint.specifier,
238234
constraint.hashes,
235+
prefers_installed,
239236
)
240237

241238
if constraint:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
from pip._vendor.six.moves import collections_abc
2+
3+
from pip._internal.utils.typing import MYPY_CHECK_RUNNING
4+
5+
if MYPY_CHECK_RUNNING:
6+
from typing import Callable, Iterator, Optional, Set
7+
8+
from pip._vendor.packaging.version import _BaseVersion
9+
10+
from .base import Candidate
11+
12+
13+
class _InstalledFirstCandidatesIterator(collections_abc.Iterator):
14+
"""Iterator for ``FoundCandidates``.
15+
16+
This iterator is used when the resolver prefers to keep the version of an
17+
already-installed package. The already-installed candidate is always
18+
returned first. Candidates from index are accessed only when the resolver
19+
wants them, and the already-installed version is excluded from them.
20+
"""
21+
def __init__(
22+
self,
23+
get_others, # type: Callable[[], Iterator[Candidate]]
24+
installed, # type: Optional[Candidate]
25+
):
26+
self._installed = installed
27+
self._get_others = get_others
28+
self._others = None # type: Optional[Iterator[Candidate]]
29+
self._returned = set() # type: Set[_BaseVersion]
30+
31+
def __next__(self):
32+
# type: () -> Candidate
33+
if self._installed and self._installed.version not in self._returned:
34+
self._returned.add(self._installed.version)
35+
return self._installed
36+
if self._others is None:
37+
self._others = self._get_others()
38+
cand = next(self._others)
39+
while cand.version in self._returned:
40+
cand = next(self._others)
41+
self._returned.add(cand.version)
42+
return cand
43+
44+
next = __next__ # XXX: Python 2.
45+
46+
47+
class _InstalledReplacesCandidatesIterator(collections_abc.Iterator):
48+
"""Iterator for ``FoundCandidates``.
49+
50+
This iterator is used when the resolver prefers to upgrade an
51+
already-installed package. Candidates from index are returned in their
52+
normal ordering, except replaced when the version is already installed.
53+
"""
54+
def __init__(
55+
self,
56+
get_others, # type: Callable[[], Iterator[Candidate]]
57+
installed, # type: Optional[Candidate]
58+
):
59+
self._installed = installed
60+
self._get_others = get_others
61+
self._others = None # type: Optional[Iterator[Candidate]]
62+
self._returned = set() # type: Set[_BaseVersion]
63+
64+
def __next__(self):
65+
# type: () -> Candidate
66+
if self._others is None:
67+
self._others = self._get_others()
68+
cand = next(self._others)
69+
while cand.version in self._returned:
70+
cand = next(self._others)
71+
if self._installed and cand.version == self._installed.version:
72+
cand = self._installed
73+
self._returned.add(cand.version)
74+
return cand
75+
76+
next = __next__ # XXX: Python 2.
77+
78+
79+
class FoundCandidates(collections_abc.Sequence):
80+
"""A lazy sequence to provide candidates to the resolver.
81+
82+
The intended usage is to return this from `find_matches()` so the resolver
83+
can iterate through the sequence multiple times, but only access the index
84+
page when remote packages are actually needed. This improve performances
85+
when suitable candidates are already installed on disk.
86+
"""
87+
def __init__(
88+
self,
89+
get_others, # type: Callable[[], Iterator[Candidate]]
90+
installed, # type: Optional[Candidate]
91+
prefers_installed, # type: bool
92+
):
93+
self._get_others = get_others
94+
self._installed = installed
95+
self._prefers_installed = prefers_installed
96+
97+
def __getitem__(self, index):
98+
# type: (int) -> Candidate
99+
for i, value in enumerate(self):
100+
if index == i:
101+
return value
102+
raise IndexError(index)
103+
104+
def __iter__(self):
105+
# type: () -> Iterator[Candidate]
106+
if self._prefers_installed:
107+
klass = _InstalledFirstCandidatesIterator
108+
else:
109+
klass = _InstalledReplacesCandidatesIterator
110+
return klass(self._get_others, self._installed)
111+
112+
def __len__(self):
113+
# type: () -> int
114+
return sum(1 for _ in self)
115+
116+
def __bool__(self):
117+
# type: () -> bool
118+
if self._prefers_installed and self._installed:
119+
return True
120+
return any(self)
121+
122+
__nonzero__ = __bool__ # XXX: Python 2.

src/pip/_internal/resolution/resolvelib/provider.py

+24-73
Original file line numberDiff line numberDiff line change
@@ -45,30 +45,26 @@ def __init__(
4545
self._upgrade_strategy = upgrade_strategy
4646
self._user_requested = user_requested
4747

48-
def _sort_matches(self, matches):
49-
# type: (Iterable[Candidate]) -> Sequence[Candidate]
50-
51-
# The requirement is responsible for returning a sequence of potential
52-
# candidates, one per version. The provider handles the logic of
53-
# deciding the order in which these candidates should be passed to
54-
# the resolver.
55-
56-
# The `matches` argument is a sequence of candidates, one per version,
57-
# which are potential options to be installed. The requirement will
58-
# have already sorted out whether to give us an already-installed
59-
# candidate or a version from PyPI (i.e., it will deal with options
60-
# like --force-reinstall and --ignore-installed).
61-
62-
# We now work out the correct order.
63-
#
64-
# 1. If no other considerations apply, later versions take priority.
65-
# 2. An already installed distribution is preferred over any other,
66-
# unless the user has requested an upgrade.
67-
# Upgrades are allowed when:
68-
# * The --upgrade flag is set, and
69-
# - The project was specified on the command line, or
70-
# - The project is a dependency and the "eager" upgrade strategy
71-
# was requested.
48+
def identify(self, dependency):
49+
# type: (Union[Requirement, Candidate]) -> str
50+
return dependency.name
51+
52+
def get_preference(
53+
self,
54+
resolution, # type: Optional[Candidate]
55+
candidates, # type: Sequence[Candidate]
56+
information # type: Sequence[Tuple[Requirement, Candidate]]
57+
):
58+
# type: (...) -> Any
59+
transitive = all(parent is not None for _, parent in information)
60+
return (transitive, bool(candidates))
61+
62+
def find_matches(self, requirements):
63+
# type: (Sequence[Requirement]) -> Iterable[Candidate]
64+
if not requirements:
65+
return []
66+
name = requirements[0].name
67+
7268
def _eligible_for_upgrade(name):
7369
# type: (str) -> bool
7470
"""Are upgrades allowed for this project?
@@ -87,56 +83,11 @@ def _eligible_for_upgrade(name):
8783
return (name in self._user_requested)
8884
return False
8985

90-
def sort_key(c):
91-
# type: (Candidate) -> int
92-
"""Return a sort key for the matches.
93-
94-
The highest priority should be given to installed candidates that
95-
are not eligible for upgrade. We use the integer value in the first
96-
part of the key to sort these before other candidates.
97-
98-
We only pull the installed candidate to the bottom (i.e. most
99-
preferred), but otherwise keep the ordering returned by the
100-
requirement. The requirement is responsible for returning a list
101-
otherwise sorted for the resolver, taking account for versions
102-
and binary preferences as specified by the user.
103-
"""
104-
if c.is_installed and not _eligible_for_upgrade(c.name):
105-
return 1
106-
return 0
107-
108-
return sorted(matches, key=sort_key)
109-
110-
def identify(self, dependency):
111-
# type: (Union[Requirement, Candidate]) -> str
112-
return dependency.name
113-
114-
def get_preference(
115-
self,
116-
resolution, # type: Optional[Candidate]
117-
candidates, # type: Sequence[Candidate]
118-
information # type: Sequence[Tuple[Requirement, Optional[Candidate]]]
119-
):
120-
# type: (...) -> Any
121-
"""Return a sort key to determine what dependency to look next.
122-
123-
A smaller value makes a dependency higher priority. We put direct
124-
(user-requested) dependencies first since they may contain useful
125-
user-specified version ranges. Users tend to expect us to catch
126-
problems in them early as well.
127-
"""
128-
transitive = all(parent is not None for _, parent in information)
129-
return (transitive, len(candidates))
130-
131-
def find_matches(self, requirements):
132-
# type: (Sequence[Requirement]) -> Iterable[Candidate]
133-
if not requirements:
134-
return []
135-
constraint = self._constraints.get(
136-
requirements[0].name, Constraint.empty(),
86+
return self._factory.find_candidates(
87+
requirements,
88+
constraint=self._constraints.get(name, Constraint.empty()),
89+
prefers_installed=(not _eligible_for_upgrade(name)),
13790
)
138-
candidates = self._factory.find_candidates(requirements, constraint)
139-
return reversed(self._sort_matches(candidates))
14091

14192
def is_satisfied_by(self, requirement, candidate):
14293
# type: (Requirement, Candidate) -> bool

tests/unit/resolution_resolvelib/test_requirement.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,9 @@ def test_new_resolver_correct_number_of_matches(test_cases, factory):
5858
"""Requirements should return the correct number of candidates"""
5959
for spec, _, match_count in test_cases:
6060
req = factory.make_requirement_from_spec(spec, comes_from=None)
61-
matches = factory.find_candidates([req], Constraint.empty())
61+
matches = factory.find_candidates(
62+
[req], Constraint.empty(), prefers_installed=False,
63+
)
6264
assert len(list(matches)) == match_count
6365

6466

@@ -67,7 +69,10 @@ def test_new_resolver_candidates_match_requirement(test_cases, factory):
6769
"""
6870
for spec, _, _ in test_cases:
6971
req = factory.make_requirement_from_spec(spec, comes_from=None)
70-
for c in factory.find_candidates([req], Constraint.empty()):
72+
candidates = factory.find_candidates(
73+
[req], Constraint.empty(), prefers_installed=False,
74+
)
75+
for c in candidates:
7176
assert isinstance(c, Candidate)
7277
assert req.is_satisfied_by(c)
7378

0 commit comments

Comments
 (0)