Skip to content

Commit 2c673d5

Browse files
authored
pythonGH-101362: Omit path anchor from pathlib.PurePath()._parts (pythonGH-102476)
Improve performance of path construction by skipping the addition of the path anchor (`drive + root`) to the internal `_parts` list. Rename this attribute to `_tail` for clarity.
1 parent 0a675f4 commit 2c673d5

File tree

3 files changed

+108
-67
lines changed

3 files changed

+108
-67
lines changed

Lib/pathlib.py

+106-65
Original file line numberDiff line numberDiff line change
@@ -210,20 +210,17 @@ def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
210210
class _PathParents(Sequence):
211211
"""This object provides sequence-like access to the logical ancestors
212212
of a path. Don't try to construct it yourself."""
213-
__slots__ = ('_pathcls', '_drv', '_root', '_parts')
213+
__slots__ = ('_pathcls', '_drv', '_root', '_tail')
214214

215215
def __init__(self, path):
216216
# We don't store the instance to avoid reference cycles
217217
self._pathcls = type(path)
218218
self._drv = path.drive
219219
self._root = path.root
220-
self._parts = path._parts
220+
self._tail = path._tail
221221

222222
def __len__(self):
223-
if self._drv or self._root:
224-
return len(self._parts) - 1
225-
else:
226-
return len(self._parts)
223+
return len(self._tail)
227224

228225
def __getitem__(self, idx):
229226
if isinstance(idx, slice):
@@ -234,7 +231,7 @@ def __getitem__(self, idx):
234231
if idx < 0:
235232
idx += len(self)
236233
return self._pathcls._from_parsed_parts(self._drv, self._root,
237-
self._parts[:-idx - 1])
234+
self._tail[:-idx - 1])
238235

239236
def __repr__(self):
240237
return "<{}.parents>".format(self._pathcls.__name__)
@@ -249,9 +246,41 @@ class PurePath(object):
249246
PureWindowsPath object. You can also instantiate either of these classes
250247
directly, regardless of your system.
251248
"""
249+
252250
__slots__ = (
253-
'_raw_path', '_drv', '_root', '_parts_cached',
254-
'_str', '_hash', '_parts_tuple', '_parts_normcase_cached',
251+
# The `_raw_path` slot stores an unnormalized string path. This is set
252+
# in the `__init__()` method.
253+
'_raw_path',
254+
255+
# The `_drv`, `_root` and `_tail_cached` slots store parsed and
256+
# normalized parts of the path. They are set when any of the `drive`,
257+
# `root` or `_tail` properties are accessed for the first time. The
258+
# three-part division corresponds to the result of
259+
# `os.path.splitroot()`, except that the tail is further split on path
260+
# separators (i.e. it is a list of strings), and that the root and
261+
# tail are normalized.
262+
'_drv', '_root', '_tail_cached',
263+
264+
# The `_str` slot stores the string representation of the path,
265+
# computed from the drive, root and tail when `__str__()` is called
266+
# for the first time. It's used to implement `_str_normcase`
267+
'_str',
268+
269+
# The `_str_normcase_cached` slot stores the string path with
270+
# normalized case. It is set when the `_str_normcase` property is
271+
# accessed for the first time. It's used to implement `__eq__()`
272+
# `__hash__()`, and `_parts_normcase`
273+
'_str_normcase_cached',
274+
275+
# The `_parts_normcase_cached` slot stores the case-normalized
276+
# string path after splitting on path separators. It's set when the
277+
# `_parts_normcase` property is accessed for the first time. It's used
278+
# to implement comparison methods like `__lt__()`.
279+
'_parts_normcase_cached',
280+
281+
# The `_hash` slot stores the hash of the case-normalized string
282+
# path. It's set when `__hash__()` is called for the first time.
283+
'_hash',
255284
)
256285
_flavour = os.path
257286

@@ -277,10 +306,7 @@ def __init__(self, *args):
277306
path = os.fspath(args[0])
278307
else:
279308
path = self._flavour.join(*args)
280-
if isinstance(path, str):
281-
# Force-cast str subclasses to str (issue #21127)
282-
path = str(path)
283-
else:
309+
if not isinstance(path, str):
284310
raise TypeError(
285311
"argument should be a str or an os.PathLike "
286312
"object where __fspath__ returns a str, "
@@ -299,33 +325,32 @@ def _parse_path(cls, path):
299325
if drv.startswith(sep):
300326
# pathlib assumes that UNC paths always have a root.
301327
root = sep
302-
unfiltered_parsed = [drv + root] + rel.split(sep)
303-
parsed = [sys.intern(x) for x in unfiltered_parsed if x and x != '.']
328+
parsed = [sys.intern(str(x)) for x in rel.split(sep) if x and x != '.']
304329
return drv, root, parsed
305330

306331
def _load_parts(self):
307-
drv, root, parts = self._parse_path(self._raw_path)
332+
drv, root, tail = self._parse_path(self._raw_path)
308333
self._drv = drv
309334
self._root = root
310-
self._parts_cached = parts
335+
self._tail_cached = tail
311336

312337
@classmethod
313-
def _from_parsed_parts(cls, drv, root, parts):
314-
path = cls._format_parsed_parts(drv, root, parts)
338+
def _from_parsed_parts(cls, drv, root, tail):
339+
path = cls._format_parsed_parts(drv, root, tail)
315340
self = cls(path)
316341
self._str = path or '.'
317342
self._drv = drv
318343
self._root = root
319-
self._parts_cached = parts
344+
self._tail_cached = tail
320345
return self
321346

322347
@classmethod
323-
def _format_parsed_parts(cls, drv, root, parts):
348+
def _format_parsed_parts(cls, drv, root, tail):
324349
if drv or root:
325-
return drv + root + cls._flavour.sep.join(parts[1:])
326-
elif parts and cls._flavour.splitdrive(parts[0])[0]:
327-
parts = ['.'] + parts
328-
return cls._flavour.sep.join(parts)
350+
return drv + root + cls._flavour.sep.join(tail)
351+
elif tail and cls._flavour.splitdrive(tail[0])[0]:
352+
tail = ['.'] + tail
353+
return cls._flavour.sep.join(tail)
329354

330355
def __str__(self):
331356
"""Return the string representation of the path, suitable for
@@ -334,7 +359,7 @@ def __str__(self):
334359
return self._str
335360
except AttributeError:
336361
self._str = self._format_parsed_parts(self.drive, self.root,
337-
self._parts) or '.'
362+
self._tail) or '.'
338363
return self._str
339364

340365
def __fspath__(self):
@@ -374,25 +399,34 @@ def as_uri(self):
374399
path = str(self)
375400
return prefix + urlquote_from_bytes(os.fsencode(path))
376401

402+
@property
403+
def _str_normcase(self):
404+
# String with normalized case, for hashing and equality checks
405+
try:
406+
return self._str_normcase_cached
407+
except AttributeError:
408+
self._str_normcase_cached = self._flavour.normcase(str(self))
409+
return self._str_normcase_cached
410+
377411
@property
378412
def _parts_normcase(self):
379-
# Cached parts with normalized case, for hashing and comparison.
413+
# Cached parts with normalized case, for comparisons.
380414
try:
381415
return self._parts_normcase_cached
382416
except AttributeError:
383-
self._parts_normcase_cached = [self._flavour.normcase(p) for p in self._parts]
417+
self._parts_normcase_cached = self._str_normcase.split(self._flavour.sep)
384418
return self._parts_normcase_cached
385419

386420
def __eq__(self, other):
387421
if not isinstance(other, PurePath):
388422
return NotImplemented
389-
return self._parts_normcase == other._parts_normcase and self._flavour is other._flavour
423+
return self._str_normcase == other._str_normcase and self._flavour is other._flavour
390424

391425
def __hash__(self):
392426
try:
393427
return self._hash
394428
except AttributeError:
395-
self._hash = hash(tuple(self._parts_normcase))
429+
self._hash = hash(self._str_normcase)
396430
return self._hash
397431

398432
def __lt__(self, other):
@@ -434,12 +468,12 @@ def root(self):
434468
return self._root
435469

436470
@property
437-
def _parts(self):
471+
def _tail(self):
438472
try:
439-
return self._parts_cached
473+
return self._tail_cached
440474
except AttributeError:
441475
self._load_parts()
442-
return self._parts_cached
476+
return self._tail_cached
443477

444478
@property
445479
def anchor(self):
@@ -450,10 +484,10 @@ def anchor(self):
450484
@property
451485
def name(self):
452486
"""The final path component, if any."""
453-
parts = self._parts
454-
if len(parts) == (1 if (self.drive or self.root) else 0):
487+
tail = self._tail
488+
if not tail:
455489
return ''
456-
return parts[-1]
490+
return tail[-1]
457491

458492
@property
459493
def suffix(self):
@@ -501,7 +535,7 @@ def with_name(self, name):
501535
if drv or root or not tail or f.sep in tail or (f.altsep and f.altsep in tail):
502536
raise ValueError("Invalid name %r" % (name))
503537
return self._from_parsed_parts(self.drive, self.root,
504-
self._parts[:-1] + [name])
538+
self._tail[:-1] + [name])
505539

506540
def with_stem(self, stem):
507541
"""Return a new path with the stem changed."""
@@ -526,7 +560,7 @@ def with_suffix(self, suffix):
526560
else:
527561
name = name[:-len(old_suffix)] + suffix
528562
return self._from_parsed_parts(self.drive, self.root,
529-
self._parts[:-1] + [name])
563+
self._tail[:-1] + [name])
530564

531565
def relative_to(self, other, /, *_deprecated, walk_up=False):
532566
"""Return the relative path to another path identified by the passed
@@ -551,7 +585,7 @@ def relative_to(self, other, /, *_deprecated, walk_up=False):
551585
raise ValueError(f"{str(self)!r} and {str(other)!r} have different anchors")
552586
if step and not walk_up:
553587
raise ValueError(f"{str(self)!r} is not in the subpath of {str(other)!r}")
554-
parts = ('..',) * step + self.parts[len(path.parts):]
588+
parts = ['..'] * step + self._tail[len(path._tail):]
555589
return path_cls(*parts)
556590

557591
def is_relative_to(self, other, /, *_deprecated):
@@ -570,13 +604,10 @@ def is_relative_to(self, other, /, *_deprecated):
570604
def parts(self):
571605
"""An object providing sequence-like access to the
572606
components in the filesystem path."""
573-
# We cache the tuple to avoid building a new one each time .parts
574-
# is accessed. XXX is this necessary?
575-
try:
576-
return self._parts_tuple
577-
except AttributeError:
578-
self._parts_tuple = tuple(self._parts)
579-
return self._parts_tuple
607+
if self.drive or self.root:
608+
return (self.drive + self.root,) + tuple(self._tail)
609+
else:
610+
return tuple(self._tail)
580611

581612
def joinpath(self, *args):
582613
"""Combine this path with one or several arguments, and return a
@@ -603,10 +634,10 @@ def parent(self):
603634
"""The logical parent of the path."""
604635
drv = self.drive
605636
root = self.root
606-
parts = self._parts
607-
if len(parts) == 1 and (drv or root):
637+
tail = self._tail
638+
if not tail:
608639
return self
609-
return self._from_parsed_parts(drv, root, parts[:-1])
640+
return self._from_parsed_parts(drv, root, tail[:-1])
610641

611642
@property
612643
def parents(self):
@@ -624,29 +655,29 @@ def is_absolute(self):
624655
def is_reserved(self):
625656
"""Return True if the path contains one of the special names reserved
626657
by the system, if any."""
627-
if self._flavour is posixpath or not self._parts:
658+
if self._flavour is posixpath or not self._tail:
628659
return False
629660

630661
# NOTE: the rules for reserved names seem somewhat complicated
631662
# (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not
632663
# exist). We err on the side of caution and return True for paths
633664
# which are not considered reserved by Windows.
634-
if self._parts[0].startswith('\\\\'):
665+
if self.drive.startswith('\\\\'):
635666
# UNC paths are never reserved.
636667
return False
637-
name = self._parts[-1].partition('.')[0].partition(':')[0].rstrip(' ')
668+
name = self._tail[-1].partition('.')[0].partition(':')[0].rstrip(' ')
638669
return name.upper() in _WIN_RESERVED_NAMES
639670

640671
def match(self, path_pattern):
641672
"""
642673
Return True if this path matches the given pattern.
643674
"""
644-
path_pattern = self._flavour.normcase(path_pattern)
645-
drv, root, pat_parts = self._parse_path(path_pattern)
646-
if not pat_parts:
675+
pat = type(self)(path_pattern)
676+
if not pat.parts:
647677
raise ValueError("empty pattern")
678+
pat_parts = pat._parts_normcase
648679
parts = self._parts_normcase
649-
if drv or root:
680+
if pat.drive or pat.root:
650681
if len(pat_parts) != len(parts):
651682
return False
652683
elif len(pat_parts) > len(parts):
@@ -707,11 +738,21 @@ def __new__(cls, *args, **kwargs):
707738
cls = WindowsPath if os.name == 'nt' else PosixPath
708739
return object.__new__(cls)
709740

710-
def _make_child_relpath(self, part):
711-
# This is an optimization used for dir walking. `part` must be
712-
# a single part relative to this path.
713-
parts = self._parts + [part]
714-
return self._from_parsed_parts(self.drive, self.root, parts)
741+
def _make_child_relpath(self, name):
742+
path_str = str(self)
743+
tail = self._tail
744+
if tail:
745+
path_str = f'{path_str}{self._flavour.sep}{name}'
746+
elif path_str != '.':
747+
path_str = f'{path_str}{name}'
748+
else:
749+
path_str = name
750+
path = type(self)(path_str)
751+
path._str = path_str
752+
path._drv = self.drive
753+
path._root = self.root
754+
path._tail_cached = tail + [name]
755+
return path
715756

716757
def __enter__(self):
717758
# In previous versions of pathlib, __exit__() marked this path as
@@ -1196,12 +1237,12 @@ def expanduser(self):
11961237
(as returned by os.path.expanduser)
11971238
"""
11981239
if (not (self.drive or self.root) and
1199-
self._parts and self._parts[0][:1] == '~'):
1200-
homedir = self._flavour.expanduser(self._parts[0])
1240+
self._tail and self._tail[0][:1] == '~'):
1241+
homedir = self._flavour.expanduser(self._tail[0])
12011242
if homedir[:1] == "~":
12021243
raise RuntimeError("Could not determine home directory.")
1203-
drv, root, parts = self._parse_path(homedir)
1204-
return self._from_parsed_parts(drv, root, parts + self._parts[1:])
1244+
drv, root, tail = self._parse_path(homedir)
1245+
return self._from_parsed_parts(drv, root, tail + self._tail[1:])
12051246

12061247
return self
12071248

Lib/test/test_pathlib.py

-2
Original file line numberDiff line numberDiff line change
@@ -346,8 +346,6 @@ def test_parts_common(self):
346346
p = P('a/b')
347347
parts = p.parts
348348
self.assertEqual(parts, ('a', 'b'))
349-
# The object gets reused.
350-
self.assertIs(parts, p.parts)
351349
# When the path is absolute, the anchor is a separate part.
352350
p = P('/a/b')
353351
parts = p.parts
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Speed up :class:`pathlib.Path` construction by omitting the path anchor from
2+
the internal list of path parts.

0 commit comments

Comments
 (0)