Skip to content

Commit ae5e111

Browse files
finnaginzooba
authored andcommitted
pythongh-106242: Fix path truncation in os.path.normpath (pythonGH-106816)
1 parent db4400b commit ae5e111

File tree

5 files changed

+31
-10
lines changed

5 files changed

+31
-10
lines changed

Include/internal/pycore_fileutils.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,8 @@ extern int _Py_add_relfile(wchar_t *dirname,
244244
const wchar_t *relfile,
245245
size_t bufsize);
246246
extern size_t _Py_find_basename(const wchar_t *filename);
247-
PyAPI_FUNC(wchar_t *) _Py_normpath(wchar_t *path, Py_ssize_t size);
247+
PyAPI_FUNC(wchar_t*) _Py_normpath(wchar_t *path, Py_ssize_t size);
248+
extern wchar_t *_Py_normpath_and_size(wchar_t *path, Py_ssize_t size, Py_ssize_t *length);
248249

249250

250251
// Macros to protect CRT calls against instant termination when passed an

Lib/test/test_genericpath.py

+4
Original file line numberDiff line numberDiff line change
@@ -460,6 +460,10 @@ def test_normpath_issue5827(self):
460460
for path in ('', '.', '/', '\\', '///foo/.//bar//'):
461461
self.assertIsInstance(self.pathmodule.normpath(path), str)
462462

463+
def test_normpath_issue106242(self):
464+
for path in ('\x00', 'foo\x00bar', '\x00\x00', '\x00foo', 'foo\x00'):
465+
self.assertEqual(self.pathmodule.normpath(path), path)
466+
463467
def test_abspath_issue3426(self):
464468
# Check that abspath returns unicode when the arg is unicode
465469
# with both ASCII and non-ASCII cwds.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fixes :func:`os.path.normpath` to handle embedded null characters without truncating the path.

Modules/posixmodule.c

+3-1
Original file line numberDiff line numberDiff line change
@@ -4552,7 +4552,9 @@ os__path_normpath_impl(PyObject *module, PyObject *path)
45524552
if (!buffer) {
45534553
return NULL;
45544554
}
4555-
PyObject *result = PyUnicode_FromWideChar(_Py_normpath(buffer, len), -1);
4555+
Py_ssize_t norm_len;
4556+
wchar_t *norm_path = _Py_normpath_and_size(buffer, len, &norm_len);
4557+
PyObject *result = PyUnicode_FromWideChar(norm_path, norm_len);
45564558
PyMem_Free(buffer);
45574559
return result;
45584560
}

Python/fileutils.c

+21-8
Original file line numberDiff line numberDiff line change
@@ -2179,12 +2179,14 @@ _Py_find_basename(const wchar_t *filename)
21792179
path, which will be within the original buffer. Guaranteed to not
21802180
make the path longer, and will not fail. 'size' is the length of
21812181
the path, if known. If -1, the first null character will be assumed
2182-
to be the end of the path. */
2182+
to be the end of the path. 'normsize' will be set to contain the
2183+
length of the resulting normalized path. */
21832184
wchar_t *
2184-
_Py_normpath(wchar_t *path, Py_ssize_t size)
2185+
_Py_normpath_and_size(wchar_t *path, Py_ssize_t size, Py_ssize_t *normsize)
21852186
{
21862187
assert(path != NULL);
2187-
if (!path[0] || size == 0) {
2188+
if (!path[0] && size < 0 || size == 0) {
2189+
*normsize = 0;
21882190
return path;
21892191
}
21902192
wchar_t *pEnd = size >= 0 ? &path[size] : NULL;
@@ -2233,11 +2235,7 @@ _Py_normpath(wchar_t *path, Py_ssize_t size)
22332235
*p2++ = lastC = *p1;
22342236
}
22352237
}
2236-
if (sepCount) {
2237-
minP2 = p2; // Invalid path
2238-
} else {
2239-
minP2 = p2 - 1; // Absolute path has SEP at minP2
2240-
}
2238+
minP2 = p2 - 1;
22412239
}
22422240
#else
22432241
// Skip past two leading SEPs
@@ -2297,13 +2295,28 @@ _Py_normpath(wchar_t *path, Py_ssize_t size)
22972295
while (--p2 != minP2 && *p2 == SEP) {
22982296
*p2 = L'\0';
22992297
}
2298+
} else {
2299+
--p2;
23002300
}
2301+
*normsize = p2 - path + 1;
23012302
#undef SEP_OR_END
23022303
#undef IS_SEP
23032304
#undef IS_END
23042305
return path;
23052306
}
23062307

2308+
/* In-place path normalisation. Returns the start of the normalized
2309+
path, which will be within the original buffer. Guaranteed to not
2310+
make the path longer, and will not fail. 'size' is the length of
2311+
the path, if known. If -1, the first null character will be assumed
2312+
to be the end of the path. */
2313+
wchar_t *
2314+
_Py_normpath(wchar_t *path, Py_ssize_t size)
2315+
{
2316+
Py_ssize_t norm_length;
2317+
return _Py_normpath_and_size(path, size, &norm_length);
2318+
}
2319+
23072320

23082321
/* Get the current directory. buflen is the buffer size in wide characters
23092322
including the null character. Decode the path from the locale encoding.

0 commit comments

Comments
 (0)