Skip to content

Commit

Permalink
bpo-9949: Enable symlink traversal for ntpath.realpath (GH-15287)
Browse files Browse the repository at this point in the history
  • Loading branch information
zooba authored Aug 21, 2019
1 parent e1c638d commit 75e0649
Show file tree
Hide file tree
Showing 8 changed files with 304 additions and 32 deletions.
10 changes: 9 additions & 1 deletion Doc/library/os.path.rst
Original file line number Diff line number Diff line change
Expand Up @@ -350,11 +350,19 @@ the :mod:`glob` module.)
.. function:: realpath(path)

Return the canonical path of the specified filename, eliminating any symbolic
links encountered in the path (if they are supported by the operating system).
links encountered in the path (if they are supported by the operating
system).

.. note::
When symbolic link cycles occur, the returned path will be one member of
the cycle, but no guarantee is made about which member that will be.

.. versionchanged:: 3.6
Accepts a :term:`path-like object`.

.. versionchanged:: 3.8
Symbolic links and junctions are now resolved on Windows.


.. function:: relpath(path, start=os.curdir)

Expand Down
3 changes: 3 additions & 0 deletions Doc/whatsnew/3.8.rst
Original file line number Diff line number Diff line change
Expand Up @@ -824,6 +824,9 @@ characters or bytes unrepresentable at the OS level.
environment variable and does not use :envvar:`HOME`, which is not normally set
for regular user accounts.

:func:`~os.path.realpath` on Windows now resolves reparse points, including
symlinks and directory junctions.


ncurses
-------
Expand Down
107 changes: 88 additions & 19 deletions Lib/ntpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -519,8 +519,94 @@ def abspath(path):
except (OSError, ValueError):
return _abspath_fallback(path)

# realpath is a no-op on systems without islink support
realpath = abspath
try:
from nt import _getfinalpathname, readlink as _nt_readlink
except ImportError:
# realpath is a no-op on systems without _getfinalpathname support.
realpath = abspath
else:
def _readlink_deep(path, seen=None):
if seen is None:
seen = set()

while normcase(path) not in seen:
seen.add(normcase(path))
try:
path = _nt_readlink(path)
except OSError as ex:
# Stop on file (2) or directory (3) not found, or
# paths that are not reparse points (4390)
if ex.winerror in (2, 3, 4390):
break
raise
except ValueError:
# Stop on reparse points that are not symlinks
break
return path

def _getfinalpathname_nonstrict(path):
# Fast path to get the final path name. If this succeeds, there
# is no need to go any further.
try:
return _getfinalpathname(path)
except OSError:
pass

# Allow file (2) or directory (3) not found, invalid syntax (123),
# and symlinks that cannot be followed (1921)
allowed_winerror = 2, 3, 123, 1921

# Non-strict algorithm is to find as much of the target directory
# as we can and join the rest.
tail = ''
seen = set()
while path:
try:
path = _readlink_deep(path, seen)
path = _getfinalpathname(path)
return join(path, tail) if tail else path
except OSError as ex:
if ex.winerror not in allowed_winerror:
raise
path, name = split(path)
if path and not name:
return abspath(path + tail)
tail = join(name, tail) if tail else name
return abspath(tail)

def realpath(path):
path = os.fspath(path)
if isinstance(path, bytes):
prefix = b'\\\\?\\'
unc_prefix = b'\\\\?\\UNC\\'
new_unc_prefix = b'\\\\'
cwd = os.getcwdb()
else:
prefix = '\\\\?\\'
unc_prefix = '\\\\?\\UNC\\'
new_unc_prefix = '\\\\'
cwd = os.getcwd()
had_prefix = path.startswith(prefix)
path = _getfinalpathname_nonstrict(path)
# The path returned by _getfinalpathname will always start with \\?\ -
# strip off that prefix unless it was already provided on the original
# path.
if not had_prefix and path.startswith(prefix):
# For UNC paths, the prefix will actually be \\?\UNC\
# Handle that case as well.
if path.startswith(unc_prefix):
spath = new_unc_prefix + path[len(unc_prefix):]
else:
spath = path[len(prefix):]
# Ensure that the non-prefixed path resolves to the same path
try:
if _getfinalpathname(spath) == path:
path = spath
except OSError as ex:
pass
return path


# Win9x family and earlier have no Unicode filename support.
supports_unicode_filenames = (hasattr(sys, "getwindowsversion") and
sys.getwindowsversion()[3] >= 2)
Expand Down Expand Up @@ -633,23 +719,6 @@ def commonpath(paths):
raise


# determine if two files are in fact the same file
try:
# GetFinalPathNameByHandle is available starting with Windows 6.0.
# Windows XP and non-Windows OS'es will mock _getfinalpathname.
if sys.getwindowsversion()[:2] >= (6, 0):
from nt import _getfinalpathname
else:
raise ImportError
except (AttributeError, ImportError):
# On Windows XP and earlier, two files are the same if their absolute
# pathnames are the same.
# Non-Windows operating systems fake this method with an XP
# approximation.
def _getfinalpathname(f):
return normcase(abspath(f))


try:
# The genericpath.isdir implementation uses os.stat and checks the mode
# attribute to tell whether or not the path is a directory.
Expand Down
198 changes: 195 additions & 3 deletions Lib/test/test_ntpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,22 @@
from test import support, test_genericpath
from tempfile import TemporaryFile


try:
import nt
except ImportError:
# Most tests can complete without the nt module,
# but for those that require it we import here.
nt = None

try:
ntpath._getfinalpathname
except AttributeError:
HAVE_GETFINALPATHNAME = False
else:
HAVE_GETFINALPATHNAME = True


def tester(fn, wantResult):
fn = fn.replace("\\", "\\\\")
gotResult = eval(fn)
Expand Down Expand Up @@ -194,6 +203,189 @@ def test_normpath(self):
tester("ntpath.normpath('\\\\.\\NUL')", r'\\.\NUL')
tester("ntpath.normpath('\\\\?\\D:/XY\\Z')", r'\\?\D:/XY\Z')

def test_realpath_curdir(self):
expected = ntpath.normpath(os.getcwd())
tester("ntpath.realpath('.')", expected)
tester("ntpath.realpath('./.')", expected)
tester("ntpath.realpath('/'.join(['.'] * 100))", expected)
tester("ntpath.realpath('.\\.')", expected)
tester("ntpath.realpath('\\'.join(['.'] * 100))", expected)

def test_realpath_pardir(self):
expected = ntpath.normpath(os.getcwd())
tester("ntpath.realpath('..')", ntpath.dirname(expected))
tester("ntpath.realpath('../..')",
ntpath.dirname(ntpath.dirname(expected)))
tester("ntpath.realpath('/'.join(['..'] * 50))",
ntpath.splitdrive(expected)[0] + '\\')
tester("ntpath.realpath('..\\..')",
ntpath.dirname(ntpath.dirname(expected)))
tester("ntpath.realpath('\\'.join(['..'] * 50))",
ntpath.splitdrive(expected)[0] + '\\')

@support.skip_unless_symlink
@unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
def test_realpath_basic(self):
ABSTFN = ntpath.abspath(support.TESTFN)
open(ABSTFN, "wb").close()
self.addCleanup(support.unlink, ABSTFN)
self.addCleanup(support.unlink, ABSTFN + "1")

os.symlink(ABSTFN, ABSTFN + "1")
self.assertEqual(ntpath.realpath(ABSTFN + "1"), ABSTFN)
self.assertEqual(ntpath.realpath(os.fsencode(ABSTFN + "1")),
os.fsencode(ABSTFN))

@support.skip_unless_symlink
@unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
def test_realpath_relative(self):
ABSTFN = ntpath.abspath(support.TESTFN)
open(ABSTFN, "wb").close()
self.addCleanup(support.unlink, ABSTFN)
self.addCleanup(support.unlink, ABSTFN + "1")

os.symlink(ABSTFN, ntpath.relpath(ABSTFN + "1"))
self.assertEqual(ntpath.realpath(ABSTFN + "1"), ABSTFN)

@support.skip_unless_symlink
@unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
def test_realpath_broken_symlinks(self):
ABSTFN = ntpath.abspath(support.TESTFN)
os.mkdir(ABSTFN)
self.addCleanup(support.rmtree, ABSTFN)

with support.change_cwd(ABSTFN):
os.mkdir("subdir")
os.chdir("subdir")
os.symlink(".", "recursive")
os.symlink("..", "parent")
os.chdir("..")
os.symlink(".", "self")
os.symlink("missing", "broken")
os.symlink(r"broken\bar", "broken1")
os.symlink(r"self\self\broken", "broken2")
os.symlink(r"subdir\parent\subdir\parent\broken", "broken3")
os.symlink(ABSTFN + r"\broken", "broken4")
os.symlink(r"recursive\..\broken", "broken5")

self.assertEqual(ntpath.realpath("broken"),
ABSTFN + r"\missing")
self.assertEqual(ntpath.realpath(r"broken\foo"),
ABSTFN + r"\missing\foo")
self.assertEqual(ntpath.realpath(r"broken1"),
ABSTFN + r"\missing\bar")
self.assertEqual(ntpath.realpath(r"broken1\baz"),
ABSTFN + r"\missing\bar\baz")
self.assertEqual(ntpath.realpath("broken2"),
ABSTFN + r"\missing")
self.assertEqual(ntpath.realpath("broken3"),
ABSTFN + r"\missing")
self.assertEqual(ntpath.realpath("broken4"),
ABSTFN + r"\missing")
self.assertEqual(ntpath.realpath("broken5"),
ABSTFN + r"\missing")

self.assertEqual(ntpath.realpath(b"broken"),
os.fsencode(ABSTFN + r"\missing"))
self.assertEqual(ntpath.realpath(rb"broken\foo"),
os.fsencode(ABSTFN + r"\missing\foo"))
self.assertEqual(ntpath.realpath(rb"broken1"),
os.fsencode(ABSTFN + r"\missing\bar"))
self.assertEqual(ntpath.realpath(rb"broken1\baz"),
os.fsencode(ABSTFN + r"\missing\bar\baz"))
self.assertEqual(ntpath.realpath(b"broken2"),
os.fsencode(ABSTFN + r"\missing"))
self.assertEqual(ntpath.realpath(rb"broken3"),
os.fsencode(ABSTFN + r"\missing"))
self.assertEqual(ntpath.realpath(b"broken4"),
os.fsencode(ABSTFN + r"\missing"))
self.assertEqual(ntpath.realpath(b"broken5"),
os.fsencode(ABSTFN + r"\missing"))

@support.skip_unless_symlink
@unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
def test_realpath_symlink_loops(self):
# Bug #930024, return the path unchanged if we get into an infinite
# symlink loop.
ABSTFN = ntpath.abspath(support.TESTFN)
self.addCleanup(support.unlink, ABSTFN)
self.addCleanup(support.unlink, ABSTFN + "1")
self.addCleanup(support.unlink, ABSTFN + "2")
self.addCleanup(support.unlink, ABSTFN + "y")
self.addCleanup(support.unlink, ABSTFN + "c")
self.addCleanup(support.unlink, ABSTFN + "a")

P = "\\\\?\\"

os.symlink(ABSTFN, ABSTFN)
self.assertEqual(ntpath.realpath(ABSTFN), P + ABSTFN)

# cycles are non-deterministic as to which path is returned, but
# it will always be the fully resolved path of one member of the cycle
os.symlink(ABSTFN + "1", ABSTFN + "2")
os.symlink(ABSTFN + "2", ABSTFN + "1")
expected = (P + ABSTFN + "1", P + ABSTFN + "2")
self.assertIn(ntpath.realpath(ABSTFN + "1"), expected)
self.assertIn(ntpath.realpath(ABSTFN + "2"), expected)

self.assertIn(ntpath.realpath(ABSTFN + "1\\x"),
(ntpath.join(r, "x") for r in expected))
self.assertEqual(ntpath.realpath(ABSTFN + "1\\.."),
ntpath.dirname(ABSTFN))
self.assertEqual(ntpath.realpath(ABSTFN + "1\\..\\x"),
ntpath.dirname(P + ABSTFN) + "\\x")
os.symlink(ABSTFN + "x", ABSTFN + "y")
self.assertEqual(ntpath.realpath(ABSTFN + "1\\..\\"
+ ntpath.basename(ABSTFN) + "y"),
P + ABSTFN + "x")
self.assertIn(ntpath.realpath(ABSTFN + "1\\..\\"
+ ntpath.basename(ABSTFN) + "1"),
expected)

os.symlink(ntpath.basename(ABSTFN) + "a\\b", ABSTFN + "a")
self.assertEqual(ntpath.realpath(ABSTFN + "a"), P + ABSTFN + "a")

os.symlink("..\\" + ntpath.basename(ntpath.dirname(ABSTFN))
+ "\\" + ntpath.basename(ABSTFN) + "c", ABSTFN + "c")
self.assertEqual(ntpath.realpath(ABSTFN + "c"), P + ABSTFN + "c")

# Test using relative path as well.
self.assertEqual(ntpath.realpath(ntpath.basename(ABSTFN)), P + ABSTFN)

@support.skip_unless_symlink
@unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
def test_realpath_symlink_prefix(self):
ABSTFN = ntpath.abspath(support.TESTFN)
self.addCleanup(support.unlink, ABSTFN + "3")
self.addCleanup(support.unlink, "\\\\?\\" + ABSTFN + "3.")
self.addCleanup(support.unlink, ABSTFN + "3link")
self.addCleanup(support.unlink, ABSTFN + "3.link")

with open(ABSTFN + "3", "wb") as f:
f.write(b'0')
os.symlink(ABSTFN + "3", ABSTFN + "3link")

with open("\\\\?\\" + ABSTFN + "3.", "wb") as f:
f.write(b'1')
os.symlink("\\\\?\\" + ABSTFN + "3.", ABSTFN + "3.link")

self.assertEqual(ntpath.realpath(ABSTFN + "3link"),
ABSTFN + "3")
self.assertEqual(ntpath.realpath(ABSTFN + "3.link"),
"\\\\?\\" + ABSTFN + "3.")

# Resolved paths should be usable to open target files
with open(ntpath.realpath(ABSTFN + "3link"), "rb") as f:
self.assertEqual(f.read(), b'0')
with open(ntpath.realpath(ABSTFN + "3.link"), "rb") as f:
self.assertEqual(f.read(), b'1')

# When the prefix is included, it is not stripped
self.assertEqual(ntpath.realpath("\\\\?\\" + ABSTFN + "3link"),
"\\\\?\\" + ABSTFN + "3")
self.assertEqual(ntpath.realpath("\\\\?\\" + ABSTFN + "3.link"),
"\\\\?\\" + ABSTFN + "3.")

def test_expandvars(self):
with support.EnvironmentVarGuard() as env:
env.clear()
Expand Down Expand Up @@ -288,11 +480,11 @@ def test_abspath(self):

def test_relpath(self):
tester('ntpath.relpath("a")', 'a')
tester('ntpath.relpath(os.path.abspath("a"))', 'a')
tester('ntpath.relpath(ntpath.abspath("a"))', 'a')
tester('ntpath.relpath("a/b")', 'a\\b')
tester('ntpath.relpath("../a/b")', '..\\a\\b')
with support.temp_cwd(support.TESTFN) as cwd_dir:
currentdir = os.path.basename(cwd_dir)
currentdir = ntpath.basename(cwd_dir)
tester('ntpath.relpath("a", "../b")', '..\\'+currentdir+'\\a')
tester('ntpath.relpath("a/b", "../c")', '..\\'+currentdir+'\\a\\b')
tester('ntpath.relpath("a", "b/c")', '..\\..\\a')
Expand Down Expand Up @@ -417,7 +609,7 @@ def test_ismount(self):
# locations below cannot then refer to mount points
#
drive, path = ntpath.splitdrive(sys.executable)
with support.change_cwd(os.path.dirname(sys.executable)):
with support.change_cwd(ntpath.dirname(sys.executable)):
self.assertFalse(ntpath.ismount(drive.lower()))
self.assertFalse(ntpath.ismount(drive.upper()))

Expand Down
5 changes: 1 addition & 4 deletions Lib/test/test_os.py
Original file line number Diff line number Diff line change
Expand Up @@ -3358,10 +3358,7 @@ def test_oserror_filename(self):
if hasattr(os, "lchmod"):
funcs.append((self.filenames, os.lchmod, 0o777))
if hasattr(os, "readlink"):
if sys.platform == "win32":
funcs.append((self.unicode_filenames, os.readlink,))
else:
funcs.append((self.filenames, os.readlink,))
funcs.append((self.filenames, os.readlink,))


for filenames, func, *func_args in funcs:
Expand Down
Loading

0 comments on commit 75e0649

Please sign in to comment.