|
11 | 11 | from io import BytesIO, TextIOWrapper
|
12 | 12 | from pathlib import Path
|
13 | 13 | from types import ModuleType
|
14 |
| -from typing import Iterator, Union |
| 14 | +from typing import Iterator, Optional, Set, Union # noqa: F401 |
15 | 15 | from typing import cast
|
16 | 16 | from typing.io import IO
|
| 17 | +from zipfile import ZipFile |
17 | 18 |
|
18 | 19 |
|
19 | 20 | Package = Union[ModuleType, str]
|
@@ -47,42 +48,53 @@ def _normalize_path(path) -> str:
|
47 | 48 | return file_name
|
48 | 49 |
|
49 | 50 |
|
| 51 | +def _get_resource_reader( |
| 52 | + package: ModuleType) -> Optional[resources_abc.ResourceReader]: |
| 53 | + # Return the package's loader if it's a ResourceReader. We can't use |
| 54 | + # a issubclass() check here because apparently abc.'s __subclasscheck__() |
| 55 | + # hook wants to create a weak reference to the object, but |
| 56 | + # zipimport.zipimporter does not support weak references, resulting in a |
| 57 | + # TypeError. That seems terrible. |
| 58 | + if hasattr(package.__spec__.loader, 'open_resource'): |
| 59 | + return cast(resources_abc.ResourceReader, package.__spec__.loader) |
| 60 | + return None |
| 61 | + |
| 62 | + |
50 | 63 | def open(package: Package,
|
51 | 64 | file_name: FileName,
|
52 | 65 | encoding: str = None,
|
53 | 66 | errors: str = None) -> IO:
|
54 | 67 | """Return a file-like object opened for reading of the resource."""
|
55 | 68 | file_name = _normalize_path(file_name)
|
56 | 69 | package = _get_package(package)
|
57 |
| - if hasattr(package.__spec__.loader, 'open_resource'): |
58 |
| - reader = cast(resources_abc.ResourceReader, package.__spec__.loader) |
| 70 | + reader = _get_resource_reader(package) |
| 71 | + if reader is not None: |
59 | 72 | return _wrap_file(reader.open_resource(file_name), encoding, errors)
|
| 73 | + # Using pathlib doesn't work well here due to the lack of 'strict' |
| 74 | + # argument for pathlib.Path.resolve() prior to Python 3.6. |
| 75 | + absolute_package_path = os.path.abspath(package.__spec__.origin) |
| 76 | + package_path = os.path.dirname(absolute_package_path) |
| 77 | + full_path = os.path.join(package_path, file_name) |
| 78 | + if encoding is None: |
| 79 | + args = dict(mode='rb') |
60 | 80 | else:
|
61 |
| - # Using pathlib doesn't work well here due to the lack of 'strict' |
62 |
| - # argument for pathlib.Path.resolve() prior to Python 3.6. |
63 |
| - absolute_package_path = os.path.abspath(package.__spec__.origin) |
64 |
| - package_path = os.path.dirname(absolute_package_path) |
65 |
| - full_path = os.path.join(package_path, file_name) |
66 |
| - if encoding is None: |
67 |
| - args = dict(mode='rb') |
68 |
| - else: |
69 |
| - args = dict(mode='r', encoding=encoding, errors=errors) |
| 81 | + args = dict(mode='r', encoding=encoding, errors=errors) |
| 82 | + try: |
| 83 | + return builtins_open(full_path, **args) # type: ignore |
| 84 | + except IOError: |
| 85 | + # Just assume the loader is a resource loader; all the relevant |
| 86 | + # importlib.machinery loaders are and an AttributeError for |
| 87 | + # get_data() will make it clear what is needed from the loader. |
| 88 | + loader = cast(ResourceLoader, package.__spec__.loader) |
70 | 89 | try:
|
71 |
| - return builtins_open(full_path, **args) # type: ignore |
| 90 | + data = loader.get_data(full_path) |
72 | 91 | except IOError:
|
73 |
| - # Just assume the loader is a resource loader; all the relevant |
74 |
| - # importlib.machinery loaders are and an AttributeError for |
75 |
| - # get_data() will make it clear what is needed from the loader. |
76 |
| - loader = cast(ResourceLoader, package.__spec__.loader) |
77 |
| - try: |
78 |
| - data = loader.get_data(full_path) |
79 |
| - except IOError: |
80 |
| - package_name = package.__spec__.name |
81 |
| - message = '{!r} resource not found in {!r}'.format( |
82 |
| - file_name, package_name) |
83 |
| - raise FileNotFoundError(message) |
84 |
| - else: |
85 |
| - return _wrap_file(BytesIO(data), encoding, errors) |
| 92 | + package_name = package.__spec__.name |
| 93 | + message = '{!r} resource not found in {!r}'.format( |
| 94 | + file_name, package_name) |
| 95 | + raise FileNotFoundError(message) |
| 96 | + else: |
| 97 | + return _wrap_file(BytesIO(data), encoding, errors) |
86 | 98 |
|
87 | 99 |
|
88 | 100 | def read(package: Package,
|
@@ -119,8 +131,8 @@ def path(package: Package, file_name: FileName) -> Iterator[Path]:
|
119 | 131 | """
|
120 | 132 | file_name = _normalize_path(file_name)
|
121 | 133 | package = _get_package(package)
|
122 |
| - if hasattr(package.__spec__.loader, 'resource_path'): |
123 |
| - reader = cast(resources_abc.ResourceReader, package.__spec__.loader) |
| 134 | + reader = _get_resource_reader(package) |
| 135 | + if reader is not None: |
124 | 136 | try:
|
125 | 137 | yield Path(reader.resource_path(file_name))
|
126 | 138 | return
|
@@ -148,3 +160,105 @@ def path(package: Package, file_name: FileName) -> Iterator[Path]:
|
148 | 160 | os.remove(raw_path)
|
149 | 161 | except FileNotFoundError:
|
150 | 162 | pass
|
| 163 | + |
| 164 | + |
| 165 | +def is_resource(package: Package, file_name: str) -> bool: |
| 166 | + """True if file_name is a resource inside package. |
| 167 | +
|
| 168 | + Directories are *not* resources. |
| 169 | + """ |
| 170 | + package = _get_package(package) |
| 171 | + _normalize_path(file_name) |
| 172 | + reader = _get_resource_reader(package) |
| 173 | + if reader is not None: |
| 174 | + return reader.is_resource(file_name) |
| 175 | + try: |
| 176 | + package_contents = set(contents(package)) |
| 177 | + except (NotADirectoryError, FileNotFoundError): |
| 178 | + return False |
| 179 | + if file_name not in package_contents: |
| 180 | + return False |
| 181 | + # Just because the given file_name lives as an entry in the package's |
| 182 | + # contents doesn't necessarily mean it's a resource. Directories are not |
| 183 | + # resources, so let's try to find out if it's a directory or not. |
| 184 | + path = Path(package.__spec__.origin).parent / file_name |
| 185 | + if path.is_file(): |
| 186 | + return True |
| 187 | + if path.is_dir(): |
| 188 | + return False |
| 189 | + # If it's not a file and it's not a directory, what is it? Well, this |
| 190 | + # means the file doesn't exist on the file system, so it probably lives |
| 191 | + # inside a zip file. We have to crack open the zip, look at its table of |
| 192 | + # contents, and make sure that this entry doesn't have sub-entries. |
| 193 | + archive_path = package.__spec__.loader.archive # type: ignore |
| 194 | + package_directory = Path(package.__spec__.origin).parent |
| 195 | + with ZipFile(archive_path) as zf: |
| 196 | + toc = zf.namelist() |
| 197 | + relpath = package_directory.relative_to(archive_path) |
| 198 | + candidate_path = relpath / file_name |
| 199 | + for entry in toc: # pragma: nobranch |
| 200 | + try: |
| 201 | + relative_to_candidate = Path(entry).relative_to(candidate_path) |
| 202 | + except ValueError: |
| 203 | + # The two paths aren't relative to each other so we can ignore it. |
| 204 | + continue |
| 205 | + # Since directories aren't explicitly listed in the zip file, we must |
| 206 | + # infer their 'directory-ness' by looking at the number of path |
| 207 | + # components in the path relative to the package resource we're |
| 208 | + # looking up. If there are zero additional parts, it's a file, i.e. a |
| 209 | + # resource. If there are more than zero it's a directory, i.e. not a |
| 210 | + # resource. It has to be one of these two cases. |
| 211 | + return len(relative_to_candidate.parts) == 0 |
| 212 | + # I think it's impossible to get here. It would mean that we are looking |
| 213 | + # for a resource in a zip file, there's an entry matching it in the return |
| 214 | + # value of contents(), but we never actually found it in the zip's table of |
| 215 | + # contents. |
| 216 | + raise AssertionError('Impossible situation') |
| 217 | + |
| 218 | + |
| 219 | +def contents(package: Package) -> Iterator[str]: |
| 220 | + """Return the list of entries in package. |
| 221 | +
|
| 222 | + Note that not all entries are resources. Specifically, directories are |
| 223 | + not considered resources. Use `is_resource()` on each entry returned here |
| 224 | + to check if it is a resource or not. |
| 225 | + """ |
| 226 | + package = _get_package(package) |
| 227 | + reader = _get_resource_reader(package) |
| 228 | + if reader is not None: |
| 229 | + yield from reader.contents() |
| 230 | + return |
| 231 | + package_directory = Path(package.__spec__.origin).parent |
| 232 | + try: |
| 233 | + yield from os.listdir(str(package_directory)) |
| 234 | + except (NotADirectoryError, FileNotFoundError): |
| 235 | + # The package is probably in a zip file. |
| 236 | + archive_path = getattr(package.__spec__.loader, 'archive', None) |
| 237 | + if archive_path is None: |
| 238 | + raise |
| 239 | + relpath = package_directory.relative_to(archive_path) |
| 240 | + with ZipFile(archive_path) as zf: |
| 241 | + toc = zf.namelist() |
| 242 | + subdirs_seen = set() # type: Set |
| 243 | + for filename in toc: |
| 244 | + path = Path(filename) |
| 245 | + # Strip off any path component parts that are in common with the |
| 246 | + # package directory, relative to the zip archive's file system |
| 247 | + # path. This gives us all the parts that live under the named |
| 248 | + # package inside the zip file. If the length of these subparts is |
| 249 | + # exactly 1, then it is situated inside the package. The resulting |
| 250 | + # length will be 0 if it's above the package, and it will be |
| 251 | + # greater than 1 if it lives in a subdirectory of the package |
| 252 | + # directory. |
| 253 | + # |
| 254 | + # However, since directories themselves don't appear in the zip |
| 255 | + # archive as a separate entry, we need to return the first path |
| 256 | + # component for any case that has > 1 subparts -- but only once! |
| 257 | + subparts = path.parts[len(relpath.parts):] |
| 258 | + if len(subparts) == 1: |
| 259 | + yield subparts[0] |
| 260 | + elif len(subparts) > 1: # pragma: nobranch |
| 261 | + subdir = subparts[0] |
| 262 | + if subdir not in subdirs_seen: |
| 263 | + subdirs_seen.add(subdir) |
| 264 | + yield subdir |
0 commit comments