Skip to content

Commit 011d169

Browse files
barneygalenineteendo
authored andcommitted
pathlib ABCs: remove duplicate realpath() implementation. (python#119178)
Add private `posixpath._realpath()` function, which is a generic version of `realpath()` that can be parameterised with string tokens (`sep`, `curdir`, `pardir`) and query functions (`getcwd`, `lstat`, `readlink`). Also add support for limiting the number of symlink traversals. In the private `pathlib._abc.PathBase` class, call `posixpath._realpath()` and remove our re-implementation of the same algorithm. No change to any public APIs, either in `posixpath` or `pathlib`. Co-authored-by: Nice Zombies <[email protected]>
1 parent 7b39399 commit 011d169

File tree

2 files changed

+57
-70
lines changed

2 files changed

+57
-70
lines changed

Lib/pathlib/_abc.py

Lines changed: 28 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
"""
1313

1414
import functools
15+
import posixpath
1516
from glob import _Globber, _no_recurse_symlinks
16-
from errno import ENOTDIR, ELOOP
1717
from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO
1818

1919

@@ -696,65 +696,34 @@ def resolve(self, strict=False):
696696
"""
697697
if self._resolving:
698698
return self
699-
path_root, parts = self._stack
700-
path = self.with_segments(path_root)
701-
try:
702-
path = path.absolute()
703-
except UnsupportedOperation:
704-
path_tail = []
705-
else:
706-
path_root, path_tail = path._stack
707-
path_tail.reverse()
708-
709-
# If the user has *not* overridden the `readlink()` method, then symlinks are unsupported
710-
# and (in non-strict mode) we can improve performance by not calling `stat()`.
711-
querying = strict or getattr(self.readlink, '_supported', True)
712-
link_count = 0
713-
while parts:
714-
part = parts.pop()
715-
if not part or part == '.':
716-
continue
717-
if part == '..':
718-
if not path_tail:
719-
if path_root:
720-
# Delete '..' segment immediately following root
721-
continue
722-
elif path_tail[-1] != '..':
723-
# Delete '..' segment and its predecessor
724-
path_tail.pop()
725-
continue
726-
path_tail.append(part)
727-
if querying and part != '..':
728-
path = self.with_segments(path_root + self.parser.sep.join(path_tail))
699+
700+
def getcwd():
701+
return str(self.with_segments().absolute())
702+
703+
if strict or getattr(self.readlink, '_supported', True):
704+
def lstat(path_str):
705+
path = self.with_segments(path_str)
729706
path._resolving = True
730-
try:
731-
st = path.stat(follow_symlinks=False)
732-
if S_ISLNK(st.st_mode):
733-
# Like Linux and macOS, raise OSError(errno.ELOOP) if too many symlinks are
734-
# encountered during resolution.
735-
link_count += 1
736-
if link_count >= self._max_symlinks:
737-
raise OSError(ELOOP, "Too many symbolic links in path", self._raw_path)
738-
target_root, target_parts = path.readlink()._stack
739-
# If the symlink target is absolute (like '/etc/hosts'), set the current
740-
# path to its uppermost parent (like '/').
741-
if target_root:
742-
path_root = target_root
743-
path_tail.clear()
744-
else:
745-
path_tail.pop()
746-
# Add the symlink target's reversed tail parts (like ['hosts', 'etc']) to
747-
# the stack of unresolved path parts.
748-
parts.extend(target_parts)
749-
continue
750-
elif parts and not S_ISDIR(st.st_mode):
751-
raise NotADirectoryError(ENOTDIR, "Not a directory", self._raw_path)
752-
except OSError:
753-
if strict:
754-
raise
755-
else:
756-
querying = False
757-
return self.with_segments(path_root + self.parser.sep.join(path_tail))
707+
return path.lstat()
708+
709+
def readlink(path_str):
710+
path = self.with_segments(path_str)
711+
path._resolving = True
712+
return str(path.readlink())
713+
else:
714+
# If the user has *not* overridden the `readlink()` method, then
715+
# symlinks are unsupported and (in non-strict mode) we can improve
716+
# performance by not calling `path.lstat()`.
717+
def skip(path_str):
718+
# This exception will be internally consumed by `_realpath()`.
719+
raise OSError("Operation skipped.")
720+
721+
lstat = readlink = skip
722+
723+
return self.with_segments(posixpath._realpath(
724+
str(self), strict, self.parser.sep,
725+
getcwd=getcwd, lstat=lstat, readlink=readlink,
726+
maxlinks=self._max_symlinks))
758727

759728
def symlink_to(self, target, target_is_directory=False):
760729
"""

Lib/posixpath.py

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
altsep = None
2323
devnull = '/dev/null'
2424

25+
import errno
2526
import os
2627
import sys
2728
import stat
@@ -401,7 +402,10 @@ def realpath(filename, *, strict=False):
401402
curdir = '.'
402403
pardir = '..'
403404
getcwd = os.getcwd
405+
return _realpath(filename, strict, sep, curdir, pardir, getcwd)
404406

407+
def _realpath(filename, strict=False, sep=sep, curdir=curdir, pardir=pardir,
408+
getcwd=os.getcwd, lstat=os.lstat, readlink=os.readlink, maxlinks=None):
405409
# The stack of unresolved path parts. When popped, a special value of None
406410
# indicates that a symlink target has been resolved, and that the original
407411
# symlink path can be retrieved by popping again. The [::-1] slice is a
@@ -418,6 +422,10 @@ def realpath(filename, *, strict=False):
418422
# the same links.
419423
seen = {}
420424

425+
# Number of symlinks traversed. When the number of traversals is limited
426+
# by *maxlinks*, this is used instead of *seen* to detect symlink loops.
427+
link_count = 0
428+
421429
while rest:
422430
name = rest.pop()
423431
if name is None:
@@ -436,38 +444,48 @@ def realpath(filename, *, strict=False):
436444
else:
437445
newpath = path + sep + name
438446
try:
439-
st = os.lstat(newpath)
447+
st = lstat(newpath)
440448
if not stat.S_ISLNK(st.st_mode):
441449
path = newpath
442450
continue
443-
if newpath in seen:
451+
elif maxlinks is not None:
452+
link_count += 1
453+
if link_count > maxlinks:
454+
if strict:
455+
raise OSError(errno.ELOOP, os.strerror(errno.ELOOP),
456+
newpath)
457+
path = newpath
458+
continue
459+
elif newpath in seen:
444460
# Already seen this path
445461
path = seen[newpath]
446462
if path is not None:
447463
# use cached value
448464
continue
449465
# The symlink is not resolved, so we must have a symlink loop.
450466
if strict:
451-
# Raise OSError(errno.ELOOP)
452-
os.stat(newpath)
467+
raise OSError(errno.ELOOP, os.strerror(errno.ELOOP),
468+
newpath)
453469
path = newpath
454470
continue
455-
target = os.readlink(newpath)
471+
target = readlink(newpath)
456472
except OSError:
457473
if strict:
458474
raise
459475
path = newpath
460476
continue
461477
# Resolve the symbolic link
462-
seen[newpath] = None # not resolved symlink
463478
if target.startswith(sep):
464479
# Symlink target is absolute; reset resolved path.
465480
path = sep
466-
# Push the symlink path onto the stack, and signal its specialness by
467-
# also pushing None. When these entries are popped, we'll record the
468-
# fully-resolved symlink target in the 'seen' mapping.
469-
rest.append(newpath)
470-
rest.append(None)
481+
if maxlinks is None:
482+
# Mark this symlink as seen but not fully resolved.
483+
seen[newpath] = None
484+
# Push the symlink path onto the stack, and signal its specialness
485+
# by also pushing None. When these entries are popped, we'll
486+
# record the fully-resolved symlink target in the 'seen' mapping.
487+
rest.append(newpath)
488+
rest.append(None)
471489
# Push the unresolved symlink target parts onto the stack.
472490
rest.extend(target.split(sep)[::-1])
473491

0 commit comments

Comments
 (0)