Skip to content

[WIP] GH-102783: Speed up pathlib.PurePath.__fspath__() by returning raw path #112397

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 28 additions & 21 deletions Lib/pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,12 @@ class PurePath:
# in the `__init__()` method.
'_raw_paths',

# The `_raw_path_cached` slot stores a joined but unnormalized string
# path. This is set in the `__init__()` method or `_raw_path`
# property. It's returned from `__fspath__()`, and used as the basis
# for the normalized path parts (see following slots).
'_raw_path_cached',

# The `_drv`, `_root` and `_tail_cached` slots store parsed and
# normalized parts of the path. They are set when any of the `drive`,
# `root` or `_tail` properties are accessed for the first time. The
Expand Down Expand Up @@ -255,8 +261,6 @@ def with_segments(self, *pathsegments):

@classmethod
def _parse_path(cls, path):
if not path:
return '', '', []
sep = cls.pathmod.sep
altsep = cls.pathmod.altsep
if altsep:
Expand All @@ -273,19 +277,6 @@ def _parse_path(cls, path):
parsed = [sys.intern(str(x)) for x in rel.split(sep) if x and x != '.']
return drv, root, parsed

def _load_parts(self):
paths = self._raw_paths
if len(paths) == 0:
path = ''
elif len(paths) == 1:
path = paths[0]
else:
path = self.pathmod.join(*paths)
drv, root, tail = self._parse_path(path)
self._drv = drv
self._root = root
self._tail_cached = tail

def _from_parsed_parts(self, drv, root, tail):
path_str = self._format_parsed_parts(drv, root, tail)
path = self.with_segments(path_str)
Expand Down Expand Up @@ -321,13 +312,22 @@ def as_posix(self):
def __repr__(self):
return "{}({!r})".format(self.__class__.__name__, self.as_posix())

@property
def _raw_path(self):
"""The joined (but unnormalized) string path."""
try:
return self._raw_path_cached
except AttributeError:
self._raw_path_cached = self.pathmod.join(*self._raw_paths)
return self._raw_path_cached

@property
def drive(self):
"""The drive prefix (letter or UNC path), if any."""
try:
return self._drv
except AttributeError:
self._load_parts()
self._drv, self._root, self._tail_cached = self._parse_path(self._raw_path)
return self._drv

@property
Expand All @@ -336,15 +336,15 @@ def root(self):
try:
return self._root
except AttributeError:
self._load_parts()
self._drv, self._root, self._tail_cached = self._parse_path(self._raw_path)
return self._root

@property
def _tail(self):
try:
return self._tail_cached
except AttributeError:
self._load_parts()
self._drv, self._root, self._tail_cached = self._parse_path(self._raw_path)
return self._tail_cached

@property
Expand Down Expand Up @@ -603,8 +603,15 @@ def __init__(self, *args):
"argument should be a str or an os.PathLike "
"object where __fspath__ returns a str, "
f"not {type(path).__name__!r}")
paths.append(path)
if path:
paths.append(path)
self._raw_paths = paths
if len(paths) == 1:
self._raw_path_cached = paths[0]
elif len(paths) == 0:
self._str = self._raw_path_cached = '.'
self._drv = self._root = ''
self._tail_cached = []
self._resolving = False

def __reduce__(self):
Expand All @@ -613,12 +620,12 @@ def __reduce__(self):
return (self.__class__, self.parts)

def __fspath__(self):
return str(self)
return self._raw_path

def __bytes__(self):
"""Return the bytes representation of the path. This is only
recommended to use under Unix."""
return os.fsencode(self)
return os.fsencode(str(self))

@property
def _str_normcase(self):
Expand Down
7 changes: 7 additions & 0 deletions Lib/test/test_pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -708,6 +708,13 @@ def test_fspath_common(self):
p = P('a/b')
self._check_str(p.__fspath__(), ('a/b',))
self._check_str(os.fspath(p), ('a/b',))
self.assertEqual('.', P().__fspath__())
self.assertEqual('.', P('').__fspath__())
self.assertEqual('.', P('.').__fspath__())
self.assertEqual('.', P('', '').__fspath__())
self.assertEqual('.', P('.', '').__fspath__())
self.assertEqual('.', P('', '.').__fspath__())
self.assertEqual(f'.{self.sep}.', P('.', '.').__fspath__())

def test_bytes(self):
P = self.cls
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Speed up ``pathlib.PurePath.__fspath__()`` by returning an unnormalized
path.