Skip to content

Add cache command #3968

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 26 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/reference/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,5 @@ Reference Guide
pip_show
pip_search
pip_wheel
pip_cache
pip_hash
24 changes: 24 additions & 0 deletions docs/reference/pip_cache.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
.. _`pip cache`:

pip cache
---------

.. contents::


Usage
*****

.. pip-command-usage:: cache


Description
***********

.. pip-command-description:: cache


Options
*******

.. pip-command-options:: cache
3 changes: 3 additions & 0 deletions pip/commands/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from pip.commands.install import InstallCommand
from pip.commands.uninstall import UninstallCommand
from pip.commands.wheel import WheelCommand
from pip.commands.cache import CacheCommand


commands_dict = {
Expand All @@ -30,6 +31,7 @@
ListCommand.name: ListCommand,
CheckCommand.name: CheckCommand,
WheelCommand.name: WheelCommand,
CacheCommand.name: CacheCommand,
}


Expand All @@ -45,6 +47,7 @@
WheelCommand,
HashCommand,
CompletionCommand,
CacheCommand,
HelpCommand,
]

Expand Down
171 changes: 171 additions & 0 deletions pip/commands/cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
from __future__ import absolute_import

import fnmatch
import logging
import os
from os.path import basename, isdir, islink
import textwrap

from pip.basecommand import Command
from pip.exceptions import CommandError
from pip.status_codes import SUCCESS, ERROR
from pip.utils import format_size, rmtree
from pip.utils.filesystem import tree_statistics, find_files
from pip.wheel import Wheel, InvalidWheelFilename
from pip._vendor.pkg_resources import safe_name


logger = logging.getLogger(__name__)


class CacheCommand(Command):
"""\
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the trailing slash here for?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Better auto-dedenting behavior; IIRC if the first line of the block quote doesn't start with whitespace the rest of the docstring won't be dedented.

Operate on pip's caches.

Subcommands:
info:
Show information about the caches.
list (wheel cache only):
List filenames of wheels stored in the cache.
remove <pattern|packagename> (wheel cache only):
Remove one or more wheels from the cache. `remove` accepts one or more
package names, filenames, or shell glob expressions matching filenames.
purge:
Remove all items from the cache.
""" # noqa
actions = ["info", "list", "remove", "purge"]
name = "cache"
usage = """
%%prog [options] %s""" % "|".join(actions)
summary = "Operate on pip's caches."

def __init__(self, *args, **kw):
super(CacheCommand, self).__init__(*args, **kw)

cache_types = ["all", "http", "wheel"]

self.cmd_opts.add_option(
"--type", "-t",
choices=cache_types,
default="wheel",
help="The cache upon which to operate: %s (default: %%default)" %
", ".join(cache_types)
)
self.parser.insert_option_group(0, self.cmd_opts)

def run(self, options, args):
if not args or args[0] not in self.actions:
raise CommandError(
"Please provide one of these subcommands: %s" %
", ".join(self.actions)
)
method = getattr(self, "action_%s" % args[0])
return method(options, args[1:])

@staticmethod
def get_cache_location(cache_root, cache_type):
location = cache_root
suffix = {"wheel": "wheels", "http": "http"}
if cache_type != "all":
location = os.path.join(location, suffix[cache_type])
return location

@staticmethod
def wheels_matching(cache_location, pattern):
"""Returns a list of absolute filenames of wheels with filenames
matching `pattern`. A pattern may be:
* the name of a package
* a shell glob expression matching the basename of the wheel
* an exact basename
"""
shell_metachars = '*?'
if (any(m in pattern for m in shell_metachars) or
pattern.endswith(".whl")):
matches = find_files(cache_location, pattern)
matches = fnmatch.filter(matches, "*.whl")
else:
wheels = find_files(cache_location, "*.whl")
pkgname = safe_name(pattern).lower()
matches = []
for filename in wheels:
try:
wheel = Wheel(basename(filename))
except InvalidWheelFilename:
continue
if wheel.name.lower() == pkgname:
matches.append(filename)
return matches

def action_info(self, options, args):
caches = ["http", "wheel"] if options.type == "all" else [options.type]
result = []
for cache_type in caches:
location = self.get_cache_location(options.cache_dir, cache_type)
stats = tree_statistics(location)
name = {"wheel": "Wheel cache", "http": "HTTP cache"}
result.append(textwrap.dedent(
"""\
%s info:
Location: %s
Files: %s
Size: %s""" %
(name[cache_type], location, stats["files"],
format_size(stats["size"]))
))
logger.info((os.linesep * 2).join(result))
return SUCCESS

def action_list(self, options, args):
if options.type != "wheel":
raise CommandError(
"pip cache list only operates on the wheel cache.")
cache_location = self.get_cache_location(options.cache_dir, "wheel")
wheels = [basename(f) for f in find_files(cache_location, "*.whl")]
wheels.sort()
if wheels:
logger.info(os.linesep.join(wheels))
return SUCCESS

def action_remove(self, options, args):
if options.type != "wheel":
raise CommandError(
"pip cache remove only operates on the wheel cache.")
if len(args) == 0:
raise CommandError(
"Must specify the filename of (a) wheel(s) to remove.")
cache_location = self.get_cache_location(options.cache_dir, "wheel")
value = SUCCESS
for pattern in args:
matches = self.wheels_matching(cache_location, pattern)
if not matches:
logger.info("No match found for %s" % pattern)
continue
for match in matches:
try:
os.unlink(match)
Copy link
Contributor Author

@tdsmith tdsmith Sep 20, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On reflection, I think this symlink check narrows the window for a symlink attack race condition but doesn't eliminate it.

Let's assume our hapless user is getting rid of the wheels for a library called passwd by running sudo pip cache rm passwd*.

It's bad luck if an attacker manages to replace an intermediate directory of the cache with a symlink to /etc in the interval between os.walk() (in find_files) deciding to enter the directory and listing its contents. (os.walk tries not to follow symlinks into directories.)

This additional check permits a second race: after winning the first race, the attacker would have to make the intermediate directory real again before the check and then make it a symlink again before the os.unlink() call.

I think, rather than layering races, a better mitigation strategy is just to make sure the file looks like a wheel (i.e. matches the glob *.whl).

except OSError as e:
logger.warning(
"Could not remove %s; %s" % (match, e))
value = ERROR
else:
logger.info("Removed %s" % match)
return value

def action_purge(self, options, args):
caches = ["http", "wheel"] if options.type == "all" else [options.type]
value = SUCCESS
for cache_type in caches:
cache_location = self.get_cache_location(
options.cache_dir, cache_type)
if islink(cache_location) or not isdir(cache_location):
logger.info("%s is not a directory; skipping"
% cache_location)
continue
try:
rmtree(cache_location)
except OSError as e:
logger.warning("Could not remove %s; %s" % (cache_location, e))
value = ERROR
else:
logger.info("Removed %s" % cache_location)
return value
25 changes: 25 additions & 0 deletions pip/utils/filesystem.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import fnmatch
import os
import os.path

Expand Down Expand Up @@ -26,3 +27,27 @@ def check_path_owner(path):
return os.access(path, os.W_OK)
else:
previous, path = path, os.path.dirname(path)


def tree_statistics(path):
"""Computes statistics on a filesystem tree.
Returns a dictionary with keys:
files: number of files
size: total size in bytes
"""
result = {"files": 0, "size": 0}
for root, dirs, files in os.walk(path):
result["files"] += len(files)
abs_paths = (os.path.join(root, f) for f in files)
result["size"] += sum(os.path.getsize(f) for f in abs_paths)
return result


def find_files(path, pattern):
"""Returns a list of absolute paths of files beneath path, recursively,
with filenames which match the UNIX-style shell glob pattern."""
result = []
for root, dirs, files in os.walk(path):
matches = fnmatch.filter(files, pattern)
result.extend(os.path.join(root, f) for f in matches)
return result
61 changes: 61 additions & 0 deletions tests/functional/test_cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import os
import re

import pytest

from pip.utils import appdirs


def test_cache_rejects_invalid_cache_type(script):
result = script.pip("cache", "--type", "wombat", "info",
expect_error=True)
assert "invalid choice" in result.stderr


@pytest.mark.parametrize("cache_type", ["all", "wheel", "http"])
def test_cache_info(script, monkeypatch, cache_type):
result = script.pip("cache", "-t", cache_type, "info")

for k, v in script.environ.items():
monkeypatch.setenv(k, v)
cache_base = appdirs.user_cache_dir("pip")
wheel_cache_dir = os.path.join(cache_base, "wheels")
http_cache_dir = os.path.join(cache_base, "http")

assert "Size:" in result.stdout
if cache_type == "wheel":
assert "Location: %s" % wheel_cache_dir in result.stdout
assert http_cache_dir not in result.stdout
elif cache_type == "http":
assert "Location: %s" % http_cache_dir in result.stdout
assert wheel_cache_dir not in result.stdout
else:
assert "Location: %s" % wheel_cache_dir in result.stdout
assert "Location: %s" % http_cache_dir in result.stdout


def test_cache_list(script, monkeypatch):
for k, v in script.environ.items():
monkeypatch.setenv(k, v)
cache_base = appdirs.user_cache_dir("pip")
wheel_cache_dir = os.path.join(cache_base, "wheels")
destination = os.path.join(wheel_cache_dir, "arbitrary", "pathname")
os.makedirs(destination)
with open(os.path.join(destination, "zzz.whl"), "w"):
pass
result = script.pip("cache", "list")
assert "zzz.whl" in result.stdout


def test_cache_remove(script, monkeypatch):
for k, v in script.environ.items():
monkeypatch.setenv(k, v)
cache_base = appdirs.user_cache_dir("pip")
wheel_cache_dir = os.path.join(cache_base, "wheels")
os.makedirs(wheel_cache_dir)
with open(os.path.join(wheel_cache_dir, "zzz.whl"), "w"):
pass

script.pip("cache", "remove", expect_error=True)
result = script.pip("cache", "remove", "zzz.whl")
assert re.match(r"^Removed.*zzz\.whl$", result.stdout)