Skip to content

Commit ede9895

Browse files
zoobafinnaginserhiy-storchaka
authored
[3.12] gh-106242: Fix path truncation in os.path.normpath (GH-106816) (#107981)
* gh-106242: Fix path truncation in os.path.normpath (GH-106816) * gh-106242: Minor fixup to avoid compiler warnings --------- Co-authored-by: Finn Womack <[email protected]> Co-authored-by: Serhiy Storchaka <[email protected]>
1 parent 2a00cf2 commit ede9895

File tree

5 files changed

+31
-10
lines changed

5 files changed

+31
-10
lines changed

Include/internal/pycore_fileutils.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,8 @@ extern int _Py_add_relfile(wchar_t *dirname,
252252
const wchar_t *relfile,
253253
size_t bufsize);
254254
extern size_t _Py_find_basename(const wchar_t *filename);
255-
PyAPI_FUNC(wchar_t *) _Py_normpath(wchar_t *path, Py_ssize_t size);
255+
PyAPI_FUNC(wchar_t*) _Py_normpath(wchar_t *path, Py_ssize_t size);
256+
extern wchar_t *_Py_normpath_and_size(wchar_t *path, Py_ssize_t size, Py_ssize_t *length);
256257

257258
// The Windows Games API family does not provide these functions
258259
// so provide our own implementations. Remove them in case they get added

Lib/test/test_genericpath.py

+4
Original file line numberDiff line numberDiff line change
@@ -460,6 +460,10 @@ def test_normpath_issue5827(self):
460460
for path in ('', '.', '/', '\\', '///foo/.//bar//'):
461461
self.assertIsInstance(self.pathmodule.normpath(path), str)
462462

463+
def test_normpath_issue106242(self):
464+
for path in ('\x00', 'foo\x00bar', '\x00\x00', '\x00foo', 'foo\x00'):
465+
self.assertEqual(self.pathmodule.normpath(path), path)
466+
463467
def test_abspath_issue3426(self):
464468
# Check that abspath returns unicode when the arg is unicode
465469
# with both ASCII and non-ASCII cwds.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fixes :func:`os.path.normpath` to handle embedded null characters without truncating the path.

Modules/posixmodule.c

+3-1
Original file line numberDiff line numberDiff line change
@@ -5275,7 +5275,9 @@ os__path_normpath_impl(PyObject *module, PyObject *path)
52755275
if (!buffer) {
52765276
return NULL;
52775277
}
5278-
PyObject *result = PyUnicode_FromWideChar(_Py_normpath(buffer, len), -1);
5278+
Py_ssize_t norm_len;
5279+
wchar_t *norm_path = _Py_normpath_and_size(buffer, len, &norm_len);
5280+
PyObject *result = PyUnicode_FromWideChar(norm_path, norm_len);
52795281
PyMem_Free(buffer);
52805282
return result;
52815283
}

Python/fileutils.c

+21-8
Original file line numberDiff line numberDiff line change
@@ -2377,12 +2377,14 @@ _Py_find_basename(const wchar_t *filename)
23772377
path, which will be within the original buffer. Guaranteed to not
23782378
make the path longer, and will not fail. 'size' is the length of
23792379
the path, if known. If -1, the first null character will be assumed
2380-
to be the end of the path. */
2380+
to be the end of the path. 'normsize' will be set to contain the
2381+
length of the resulting normalized path. */
23812382
wchar_t *
2382-
_Py_normpath(wchar_t *path, Py_ssize_t size)
2383+
_Py_normpath_and_size(wchar_t *path, Py_ssize_t size, Py_ssize_t *normsize)
23832384
{
23842385
assert(path != NULL);
2385-
if (!path[0] || size == 0) {
2386+
if ((size < 0 && !path[0]) || size == 0) {
2387+
*normsize = 0;
23862388
return path;
23872389
}
23882390
wchar_t *pEnd = size >= 0 ? &path[size] : NULL;
@@ -2431,11 +2433,7 @@ _Py_normpath(wchar_t *path, Py_ssize_t size)
24312433
*p2++ = lastC = *p1;
24322434
}
24332435
}
2434-
if (sepCount) {
2435-
minP2 = p2; // Invalid path
2436-
} else {
2437-
minP2 = p2 - 1; // Absolute path has SEP at minP2
2438-
}
2436+
minP2 = p2 - 1;
24392437
}
24402438
#else
24412439
// Skip past two leading SEPs
@@ -2495,13 +2493,28 @@ _Py_normpath(wchar_t *path, Py_ssize_t size)
24952493
while (--p2 != minP2 && *p2 == SEP) {
24962494
*p2 = L'\0';
24972495
}
2496+
} else {
2497+
--p2;
24982498
}
2499+
*normsize = p2 - path + 1;
24992500
#undef SEP_OR_END
25002501
#undef IS_SEP
25012502
#undef IS_END
25022503
return path;
25032504
}
25042505

2506+
/* In-place path normalisation. Returns the start of the normalized
2507+
path, which will be within the original buffer. Guaranteed to not
2508+
make the path longer, and will not fail. 'size' is the length of
2509+
the path, if known. If -1, the first null character will be assumed
2510+
to be the end of the path. */
2511+
wchar_t *
2512+
_Py_normpath(wchar_t *path, Py_ssize_t size)
2513+
{
2514+
Py_ssize_t norm_length;
2515+
return _Py_normpath_and_size(path, size, &norm_length);
2516+
}
2517+
25052518

25062519
/* Get the current directory. buflen is the buffer size in wide characters
25072520
including the null character. Decode the path from the locale encoding.

0 commit comments

Comments
 (0)