Skip to content

Commit 745ca16

Browse files
committed
Use git's partial clone feature to speed up pip
Clone with --filter=blob:none - as it fetches all metadata, but only dynamically fetches the blobs as needed by checkout. Since typically, pip only needs the blobs for a single revision, this can be a big improvement, especially when fetching from repositories with a lot of history, particularly on slower network connections. Added unit test for the rev-less path. Confirmed that both of the if/else paths are tested by the unit tests.
1 parent b400ee3 commit 745ca16

File tree

3 files changed

+108
-1
lines changed

3 files changed

+108
-1
lines changed

news/9086.feature.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
When a revision is specified in a Git URL, use git's partial clone feature to speed up source retrieval.

src/pip/_internal/vcs/git.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,12 +259,21 @@ def fetch_new(self, dest, url, rev_options):
259259
# type: (str, HiddenText, RevOptions) -> None
260260
rev_display = rev_options.to_display()
261261
logger.info('Cloning %s%s to %s', url, rev_display, display_path(dest))
262-
self.run_command(make_command('clone', '-q', url, dest))
262+
if self.get_git_version() >= (2, 17):
263+
# Git added support for partial clone in 2.17
264+
# https://git-scm.com/docs/partial-clone
265+
# Speeds up cloning by functioning without a complete copy of repository
266+
self.run_command(make_command(
267+
'clone', '--filter=blob:none', '-q', url, dest,
268+
))
269+
else:
270+
self.run_command(make_command('clone', '-q', url, dest))
263271

264272
if rev_options.rev:
265273
# Then a specific revision was requested.
266274
rev_options = self.resolve_revision(dest, url, rev_options)
267275
branch_name = getattr(rev_options, 'branch_name', None)
276+
logger.debug('Rev options %s, branch_name %s', rev_options, branch_name)
268277
if branch_name is None:
269278
# Only do a checkout if the current commit id doesn't match
270279
# the requested revision.

tests/functional/test_vcs_git.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"""
44

55
import os
6+
from unittest.mock import patch
67

78
import pytest
89

@@ -282,3 +283,99 @@ def test_resolve_commit_not_on_branch(script, tmp_path):
282283
# check we can fetch our commit
283284
rev_options = Git.make_rev_options(commit)
284285
Git().fetch_new(str(clone_path), repo_path.as_uri(), rev_options)
286+
287+
288+
def _initialize_clonetest_server(repo_path, script, enable_partial_clone):
289+
repo_path.mkdir()
290+
script.run("git", "init", cwd=str(repo_path))
291+
repo_file = repo_path / "file.txt"
292+
repo_file.write_text(u".")
293+
script.run("git", "add", "file.txt", cwd=str(repo_path))
294+
script.run("git", "commit", "-m", "initial commit", cwd=str(repo_path))
295+
296+
# Enable filtering support on server
297+
if enable_partial_clone:
298+
script.run("git", "config", "uploadpack.allowFilter", "true", cwd=repo_path)
299+
script.run("git", "config", "uploadpack.allowanysha1inwant", "true", cwd=repo_path)
300+
301+
return repo_file
302+
303+
304+
@pytest.mark.skipif(Git().get_git_version() < (2, 17), reason="git too old")
305+
def test_partial_clone(script, tmp_path):
306+
"""Test partial clone w/ a git-server that supports it"""
307+
repo_path = tmp_path / "repo"
308+
repo_file = _initialize_clonetest_server(repo_path, script, enable_partial_clone=True)
309+
clone_path1 = repo_path / "clone1"
310+
clone_path2 = repo_path / "clone2"
311+
312+
commit = script.run("git", "rev-parse", "HEAD", cwd=str(repo_path)).stdout.strip()
313+
314+
# Check that we can clone at HEAD
315+
Git().fetch_new(str(clone_path1), repo_path.as_uri(), Git.make_rev_options())
316+
# Check that we can clone to commit
317+
Git().fetch_new(str(clone_path2), repo_path.as_uri(), Git.make_rev_options(commit))
318+
319+
# Write some additional stuff to git pull
320+
repo_file.write_text(u"..")
321+
script.run("git", "commit", "-am", "second commit", cwd=str(repo_path))
322+
323+
# Make sure git pull works - with server supporting filtering
324+
assert (
325+
"warning: filtering not recognized by server, ignoring"
326+
not in script.run("git", "pull", cwd=clone_path1).stderr
327+
)
328+
assert (
329+
"warning: filtering not recognized by server, ignoring"
330+
not in script.run("git", "pull", cwd=clone_path2).stderr
331+
)
332+
333+
334+
@pytest.mark.skipif(Git().get_git_version() < (2, 17), reason="git too old")
335+
def test_partial_clone_without_server_support(script, tmp_path):
336+
"""Test partial clone w/ a git-server that does not support it"""
337+
repo_path = tmp_path / "repo"
338+
repo_file = _initialize_clonetest_server(repo_path, script, enable_partial_clone=False)
339+
clone_path1 = repo_path / "clone1"
340+
clone_path2 = repo_path / "clone2"
341+
342+
commit = script.run("git", "rev-parse", "HEAD", cwd=str(repo_path)).stdout.strip()
343+
344+
# Check that we can clone at HEAD
345+
Git().fetch_new(str(clone_path1), repo_path.as_uri(), Git.make_rev_options())
346+
# Check that we can clone to commit
347+
Git().fetch_new(str(clone_path2), repo_path.as_uri(), Git.make_rev_options(commit))
348+
349+
# Write some additional stuff to git pull
350+
repo_file.write_text(u"..")
351+
script.run("git", "commit", "-am", "second commit", cwd=str(repo_path))
352+
353+
# Make sure git pull works - even though server doesn't support filtering
354+
assert (
355+
"warning: filtering not recognized by server, ignoring"
356+
in script.run("git", "pull", cwd=clone_path1).stderr
357+
)
358+
assert (
359+
"warning: filtering not recognized by server, ignoring"
360+
in script.run("git", "pull", cwd=clone_path2).stderr
361+
)
362+
363+
364+
def test_clone_without_partial_clone_support(script, tmp_path):
365+
"""Older git clients don't support partial clone. Test the fallback path"""
366+
repo_path = tmp_path / "repo"
367+
repo_file = _initialize_clonetest_server(repo_path, script, enable_partial_clone=True)
368+
clone_path = repo_path / "clone1"
369+
370+
# Check that we can clone w/ old version of git w/o --filter
371+
with patch("pip._internal.vcs.git.Git.get_git_version", return_value=(2, 16)):
372+
Git().fetch_new(str(clone_path), repo_path.as_uri(), Git.make_rev_options())
373+
374+
repo_file.write_text(u"...")
375+
script.run("git", "commit", "-am", "third commit", cwd=str(repo_path))
376+
377+
# Should work fine w/o attempting to use `--filter` args
378+
assert (
379+
"warning: filtering not recognized by server, ignoring"
380+
not in script.run("git", "pull", cwd=clone_path).stderr
381+
)

0 commit comments

Comments
 (0)