Skip to content

Commit e82b567

Browse files
authored
Add is_resource() and contents() (python#37)
Support for is_resource() and contents()
1 parent 3e9cda3 commit e82b567

File tree

14 files changed

+492
-54
lines changed

14 files changed

+492
-54
lines changed

importlib_resources/__init__.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66

77

88
if sys.version_info >= (3,):
9-
from importlib_resources._py3 import open, path, read
9+
from importlib_resources._py3 import (
10+
contents, is_resource, open, path, read)
1011
else:
11-
from importlib_resources._py2 import open, path, read
12+
from importlib_resources._py2 import (
13+
contents, is_resource, open, path, read)

importlib_resources/_py2.py

+114
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import os
2+
import errno
23
import tempfile
34

45
from ._compat import FileNotFoundError
@@ -7,6 +8,7 @@
78
from importlib import import_module
89
from io import BytesIO, open as io_open
910
from pathlib2 import Path
11+
from zipfile import ZipFile
1012

1113

1214
def _get_package(package):
@@ -120,3 +122,115 @@ def path(package, file_name):
120122
os.remove(raw_path)
121123
except FileNotFoundError:
122124
pass
125+
126+
127+
def is_resource(package, file_name):
128+
"""True if file_name is a resource inside package.
129+
130+
Directories are *not* resources.
131+
"""
132+
package = _get_package(package)
133+
_normalize_path(file_name)
134+
try:
135+
package_contents = set(contents(package))
136+
except OSError as error:
137+
if error.errno not in (errno.ENOENT, errno.ENOTDIR):
138+
# We won't hit this in the Python 2 tests, so it'll appear
139+
# uncovered. We could mock os.listdir() to return a non-ENOENT or
140+
# ENOTDIR, but then we'd have to depend on another external
141+
# library since Python 2 doesn't have unittest.mock. It's not
142+
# worth it.
143+
raise # pragma: ge3
144+
return False
145+
if file_name not in package_contents:
146+
return False
147+
# Just because the given file_name lives as an entry in the package's
148+
# contents doesn't necessarily mean it's a resource. Directories are not
149+
# resources, so let's try to find out if it's a directory or not.
150+
path = Path(package.__file__).parent / file_name
151+
if path.is_file():
152+
return True
153+
if path.is_dir():
154+
return False
155+
# If it's not a file and it's not a directory, what is it? Well, this
156+
# means the file doesn't exist on the file system, so it probably lives
157+
# inside a zip file. We have to crack open the zip, look at its table of
158+
# contents, and make sure that this entry doesn't have sub-entries.
159+
archive_path = package.__loader__.archive # type: ignore
160+
package_directory = Path(package.__file__).parent
161+
with ZipFile(archive_path) as zf:
162+
toc = zf.namelist()
163+
relpath = package_directory.relative_to(archive_path)
164+
candidate_path = relpath / file_name
165+
for entry in toc: # pragma: nobranch
166+
try:
167+
relative_to_candidate = Path(entry).relative_to(candidate_path)
168+
except ValueError:
169+
# The two paths aren't relative to each other so we can ignore it.
170+
continue
171+
# Since directories aren't explicitly listed in the zip file, we must
172+
# infer their 'directory-ness' by looking at the number of path
173+
# components in the path relative to the package resource we're
174+
# looking up. If there are zero additional parts, it's a file, i.e. a
175+
# resource. If there are more than zero it's a directory, i.e. not a
176+
# resource. It has to be one of these two cases.
177+
return len(relative_to_candidate.parts) == 0
178+
# I think it's impossible to get here. It would mean that we are looking
179+
# for a resource in a zip file, there's an entry matching it in the return
180+
# value of contents(), but we never actually found it in the zip's table of
181+
# contents.
182+
raise AssertionError('Impossible situation')
183+
184+
185+
def contents(package):
186+
"""Return the list of entries in package.
187+
188+
Note that not all entries are resources. Specifically, directories are
189+
not considered resources. Use `is_resource()` on each entry returned here
190+
to check if it is a resource or not.
191+
"""
192+
package = _get_package(package)
193+
package_directory = Path(package.__file__).parent
194+
try:
195+
# Python 2 doesn't support `yield from`. We fall back to using
196+
# os.listdir() here to simplify the returning of just the name.
197+
for entry in os.listdir(str(package_directory)):
198+
yield entry
199+
except OSError as error:
200+
if error.errno not in (errno.ENOENT, errno.ENOTDIR):
201+
# We won't hit this in the Python 2 tests, so it'll appear
202+
# uncovered. We could mock os.listdir() to return a non-ENOENT or
203+
# ENOTDIR, but then we'd have to depend on another external
204+
# library since Python 2 doesn't have unittest.mock. It's not
205+
# worth it.
206+
raise # pragma: ge3
207+
# The package is probably in a zip file.
208+
archive_path = getattr(package.__loader__, 'archive', None)
209+
if archive_path is None:
210+
raise
211+
relpath = package_directory.relative_to(archive_path)
212+
with ZipFile(archive_path) as zf:
213+
toc = zf.namelist()
214+
subdirs_seen = set() # type: Set
215+
for filename in toc:
216+
path = Path(filename)
217+
# Strip off any path component parts that are in common with the
218+
# package directory, relative to the zip archive's file system
219+
# path. This gives us all the parts that live under the named
220+
# package inside the zip file. If the length of these subparts is
221+
# exactly 1, then it is situated inside the package. The resulting
222+
# length will be 0 if it's above the package, and it will be
223+
# greater than 1 if it lives in a subdirectory of the package
224+
# directory.
225+
#
226+
# However, since directories themselves don't appear in the zip
227+
# archive as a separate entry, we need to return the first path
228+
# component for any case that has > 1 subparts -- but only once!
229+
subparts = path.parts[len(relpath.parts):]
230+
if len(subparts) == 1:
231+
yield subparts[0]
232+
elif len(subparts) > 1: # pragma: nobranch
233+
subdir = subparts[0]
234+
if subdir not in subdirs_seen:
235+
subdirs_seen.add(subdir)
236+
yield subdir

importlib_resources/_py3.py

+142-28
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,10 @@
1111
from io import BytesIO, TextIOWrapper
1212
from pathlib import Path
1313
from types import ModuleType
14-
from typing import Iterator, Union
14+
from typing import Iterator, Optional, Set, Union # noqa: F401
1515
from typing import cast
1616
from typing.io import IO
17+
from zipfile import ZipFile
1718

1819

1920
Package = Union[ModuleType, str]
@@ -47,42 +48,53 @@ def _normalize_path(path) -> str:
4748
return file_name
4849

4950

51+
def _get_resource_reader(
52+
package: ModuleType) -> Optional[resources_abc.ResourceReader]:
53+
# Return the package's loader if it's a ResourceReader. We can't use
54+
# a issubclass() check here because apparently abc.'s __subclasscheck__()
55+
# hook wants to create a weak reference to the object, but
56+
# zipimport.zipimporter does not support weak references, resulting in a
57+
# TypeError. That seems terrible.
58+
if hasattr(package.__spec__.loader, 'open_resource'):
59+
return cast(resources_abc.ResourceReader, package.__spec__.loader)
60+
return None
61+
62+
5063
def open(package: Package,
5164
file_name: FileName,
5265
encoding: str = None,
5366
errors: str = None) -> IO:
5467
"""Return a file-like object opened for reading of the resource."""
5568
file_name = _normalize_path(file_name)
5669
package = _get_package(package)
57-
if hasattr(package.__spec__.loader, 'open_resource'):
58-
reader = cast(resources_abc.ResourceReader, package.__spec__.loader)
70+
reader = _get_resource_reader(package)
71+
if reader is not None:
5972
return _wrap_file(reader.open_resource(file_name), encoding, errors)
73+
# Using pathlib doesn't work well here due to the lack of 'strict'
74+
# argument for pathlib.Path.resolve() prior to Python 3.6.
75+
absolute_package_path = os.path.abspath(package.__spec__.origin)
76+
package_path = os.path.dirname(absolute_package_path)
77+
full_path = os.path.join(package_path, file_name)
78+
if encoding is None:
79+
args = dict(mode='rb')
6080
else:
61-
# Using pathlib doesn't work well here due to the lack of 'strict'
62-
# argument for pathlib.Path.resolve() prior to Python 3.6.
63-
absolute_package_path = os.path.abspath(package.__spec__.origin)
64-
package_path = os.path.dirname(absolute_package_path)
65-
full_path = os.path.join(package_path, file_name)
66-
if encoding is None:
67-
args = dict(mode='rb')
68-
else:
69-
args = dict(mode='r', encoding=encoding, errors=errors)
81+
args = dict(mode='r', encoding=encoding, errors=errors)
82+
try:
83+
return builtins_open(full_path, **args) # type: ignore
84+
except IOError:
85+
# Just assume the loader is a resource loader; all the relevant
86+
# importlib.machinery loaders are and an AttributeError for
87+
# get_data() will make it clear what is needed from the loader.
88+
loader = cast(ResourceLoader, package.__spec__.loader)
7089
try:
71-
return builtins_open(full_path, **args) # type: ignore
90+
data = loader.get_data(full_path)
7291
except IOError:
73-
# Just assume the loader is a resource loader; all the relevant
74-
# importlib.machinery loaders are and an AttributeError for
75-
# get_data() will make it clear what is needed from the loader.
76-
loader = cast(ResourceLoader, package.__spec__.loader)
77-
try:
78-
data = loader.get_data(full_path)
79-
except IOError:
80-
package_name = package.__spec__.name
81-
message = '{!r} resource not found in {!r}'.format(
82-
file_name, package_name)
83-
raise FileNotFoundError(message)
84-
else:
85-
return _wrap_file(BytesIO(data), encoding, errors)
92+
package_name = package.__spec__.name
93+
message = '{!r} resource not found in {!r}'.format(
94+
file_name, package_name)
95+
raise FileNotFoundError(message)
96+
else:
97+
return _wrap_file(BytesIO(data), encoding, errors)
8698

8799

88100
def read(package: Package,
@@ -119,8 +131,8 @@ def path(package: Package, file_name: FileName) -> Iterator[Path]:
119131
"""
120132
file_name = _normalize_path(file_name)
121133
package = _get_package(package)
122-
if hasattr(package.__spec__.loader, 'resource_path'):
123-
reader = cast(resources_abc.ResourceReader, package.__spec__.loader)
134+
reader = _get_resource_reader(package)
135+
if reader is not None:
124136
try:
125137
yield Path(reader.resource_path(file_name))
126138
return
@@ -148,3 +160,105 @@ def path(package: Package, file_name: FileName) -> Iterator[Path]:
148160
os.remove(raw_path)
149161
except FileNotFoundError:
150162
pass
163+
164+
165+
def is_resource(package: Package, file_name: str) -> bool:
166+
"""True if file_name is a resource inside package.
167+
168+
Directories are *not* resources.
169+
"""
170+
package = _get_package(package)
171+
_normalize_path(file_name)
172+
reader = _get_resource_reader(package)
173+
if reader is not None:
174+
return reader.is_resource(file_name)
175+
try:
176+
package_contents = set(contents(package))
177+
except (NotADirectoryError, FileNotFoundError):
178+
return False
179+
if file_name not in package_contents:
180+
return False
181+
# Just because the given file_name lives as an entry in the package's
182+
# contents doesn't necessarily mean it's a resource. Directories are not
183+
# resources, so let's try to find out if it's a directory or not.
184+
path = Path(package.__spec__.origin).parent / file_name
185+
if path.is_file():
186+
return True
187+
if path.is_dir():
188+
return False
189+
# If it's not a file and it's not a directory, what is it? Well, this
190+
# means the file doesn't exist on the file system, so it probably lives
191+
# inside a zip file. We have to crack open the zip, look at its table of
192+
# contents, and make sure that this entry doesn't have sub-entries.
193+
archive_path = package.__spec__.loader.archive # type: ignore
194+
package_directory = Path(package.__spec__.origin).parent
195+
with ZipFile(archive_path) as zf:
196+
toc = zf.namelist()
197+
relpath = package_directory.relative_to(archive_path)
198+
candidate_path = relpath / file_name
199+
for entry in toc: # pragma: nobranch
200+
try:
201+
relative_to_candidate = Path(entry).relative_to(candidate_path)
202+
except ValueError:
203+
# The two paths aren't relative to each other so we can ignore it.
204+
continue
205+
# Since directories aren't explicitly listed in the zip file, we must
206+
# infer their 'directory-ness' by looking at the number of path
207+
# components in the path relative to the package resource we're
208+
# looking up. If there are zero additional parts, it's a file, i.e. a
209+
# resource. If there are more than zero it's a directory, i.e. not a
210+
# resource. It has to be one of these two cases.
211+
return len(relative_to_candidate.parts) == 0
212+
# I think it's impossible to get here. It would mean that we are looking
213+
# for a resource in a zip file, there's an entry matching it in the return
214+
# value of contents(), but we never actually found it in the zip's table of
215+
# contents.
216+
raise AssertionError('Impossible situation')
217+
218+
219+
def contents(package: Package) -> Iterator[str]:
220+
"""Return the list of entries in package.
221+
222+
Note that not all entries are resources. Specifically, directories are
223+
not considered resources. Use `is_resource()` on each entry returned here
224+
to check if it is a resource or not.
225+
"""
226+
package = _get_package(package)
227+
reader = _get_resource_reader(package)
228+
if reader is not None:
229+
yield from reader.contents()
230+
return
231+
package_directory = Path(package.__spec__.origin).parent
232+
try:
233+
yield from os.listdir(str(package_directory))
234+
except (NotADirectoryError, FileNotFoundError):
235+
# The package is probably in a zip file.
236+
archive_path = getattr(package.__spec__.loader, 'archive', None)
237+
if archive_path is None:
238+
raise
239+
relpath = package_directory.relative_to(archive_path)
240+
with ZipFile(archive_path) as zf:
241+
toc = zf.namelist()
242+
subdirs_seen = set() # type: Set
243+
for filename in toc:
244+
path = Path(filename)
245+
# Strip off any path component parts that are in common with the
246+
# package directory, relative to the zip archive's file system
247+
# path. This gives us all the parts that live under the named
248+
# package inside the zip file. If the length of these subparts is
249+
# exactly 1, then it is situated inside the package. The resulting
250+
# length will be 0 if it's above the package, and it will be
251+
# greater than 1 if it lives in a subdirectory of the package
252+
# directory.
253+
#
254+
# However, since directories themselves don't appear in the zip
255+
# archive as a separate entry, we need to return the first path
256+
# component for any case that has > 1 subparts -- but only once!
257+
subparts = path.parts[len(relpath.parts):]
258+
if len(subparts) == 1:
259+
yield subparts[0]
260+
elif len(subparts) > 1: # pragma: nobranch
261+
subdir = subparts[0]
262+
if subdir not in subdirs_seen:
263+
subdirs_seen.add(subdir)
264+
yield subdir

importlib_resources/abc.py

+16-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
# We use mypy's comment syntax here since this file must be compatible with
77
# both Python 2 and 3.
88
try:
9-
from typing import BinaryIO, Text # noqa: F401
9+
from typing import BinaryIO, Iterator, Text # noqa: F401
1010
except ImportError:
1111
# Python 2
1212
pass
@@ -41,3 +41,18 @@ def resource_path(self, path):
4141
# NotImplementedError so that if this method is accidentally called,
4242
# it'll still do the right thing.
4343
raise FileNotFoundError
44+
45+
@abstractmethod
46+
def is_resource(self, path):
47+
# type: (Text) -> bool
48+
"""Return True if the named path is a resource.
49+
50+
Files are resources, directories are not.
51+
"""
52+
raise FileNotFoundError
53+
54+
@abstractmethod
55+
def contents(self):
56+
# type: () -> Iterator[str]
57+
"""Return an iterator over the string contents of the resource."""
58+
raise FileNotFoundError

0 commit comments

Comments
 (0)