Skip to content

Commit 2b9e987

Browse files
lordmauvepicnixzvstinner
authored andcommitted
pythongh-129349: Accept bytes in bytes.fromhex()/bytearray.fromhex() (python#129844)
Co-authored-by: Bénédikt Tran <[email protected]> Co-authored-by: Victor Stinner <[email protected]>
1 parent fb06201 commit 2b9e987

File tree

8 files changed

+90
-69
lines changed

8 files changed

+90
-69
lines changed

Doc/library/stdtypes.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2744,6 +2744,10 @@ data and are closely related to string objects in a variety of other ways.
27442744
:meth:`bytes.fromhex` now skips all ASCII whitespace in the string,
27452745
not just spaces.
27462746

2747+
.. versionchanged:: next
2748+
:meth:`bytes.fromhex` now accepts ASCII :class:`bytes` and
2749+
:term:`bytes-like objects <bytes-like object>` as input.
2750+
27472751
A reverse conversion function exists to transform a bytes object into its
27482752
hexadecimal representation.
27492753

@@ -2829,6 +2833,10 @@ objects.
28292833
:meth:`bytearray.fromhex` now skips all ASCII whitespace in the string,
28302834
not just spaces.
28312835

2836+
.. versionchanged:: next
2837+
:meth:`bytearray.fromhex` now accepts ASCII :class:`bytes` and
2838+
:term:`bytes-like objects <bytes-like object>` as input.
2839+
28322840
A reverse conversion function exists to transform a bytearray object into its
28332841
hexadecimal representation.
28342842

Doc/whatsnew/3.14.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,10 @@ Other language changes
354354
(with :func:`format` or :ref:`f-strings`).
355355
(Contrubuted by Sergey B Kirpichev in :gh:`87790`.)
356356

357+
* The :func:`bytes.fromhex` and :func:`bytearray.fromhex` methods now accept
358+
ASCII :class:`bytes` and :term:`bytes-like objects <bytes-like object>`.
359+
(Contributed by Daniel Pope in :gh:`129349`.)
360+
357361
* ``\B`` in :mod:`regular expression <re>` now matches empty input string.
358362
Now it is always the opposite of ``\b``.
359363
(Contributed by Serhiy Storchaka in :gh:`124130`.)

Lib/test/test_bytes.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -450,13 +450,34 @@ def test_fromhex(self):
450450

451451
# check that ASCII whitespace is ignored
452452
self.assertEqual(self.type2test.fromhex(' 1A\n2B\t30\v'), b)
453+
self.assertEqual(self.type2test.fromhex(b' 1A\n2B\t30\v'), b)
453454
for c in "\x09\x0A\x0B\x0C\x0D\x20":
454455
self.assertEqual(self.type2test.fromhex(c), self.type2test())
455456
for c in "\x1C\x1D\x1E\x1F\x85\xa0\u2000\u2002\u2028":
456457
self.assertRaises(ValueError, self.type2test.fromhex, c)
457458

459+
# Check that we can parse bytes and bytearray
460+
tests = [
461+
("bytes", bytes),
462+
("bytearray", bytearray),
463+
("memoryview", memoryview),
464+
("array.array", lambda bs: array.array('B', bs)),
465+
]
466+
for name, factory in tests:
467+
with self.subTest(name=name):
468+
self.assertEqual(self.type2test.fromhex(factory(b' 1A 2B 30 ')), b)
469+
470+
# Invalid bytes are rejected
471+
for u8 in b"\0\x1C\x1D\x1E\x1F\x85\xa0":
472+
b = bytes([30, 31, u8])
473+
self.assertRaises(ValueError, self.type2test.fromhex, b)
474+
458475
self.assertEqual(self.type2test.fromhex('0000'), b'\0\0')
459-
self.assertRaises(TypeError, self.type2test.fromhex, b'1B')
476+
with self.assertRaisesRegex(
477+
TypeError,
478+
r'fromhex\(\) argument must be str or bytes-like, not tuple',
479+
):
480+
self.type2test.fromhex(())
460481
self.assertRaises(ValueError, self.type2test.fromhex, 'a')
461482
self.assertRaises(ValueError, self.type2test.fromhex, 'rt')
462483
self.assertRaises(ValueError, self.type2test.fromhex, '1a b cd')
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
:meth:`bytes.fromhex` and :meth:`bytearray.fromhex` now accepts ASCII
2+
:class:`bytes` and :term:`bytes-like objects <bytes-like object>`.

Objects/bytearrayobject.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2533,7 +2533,7 @@ bytearray_splitlines_impl(PyByteArrayObject *self, int keepends)
25332533
@classmethod
25342534
bytearray.fromhex
25352535
2536-
string: unicode
2536+
string: object
25372537
/
25382538
25392539
Create a bytearray object from a string of hexadecimal numbers.
@@ -2543,8 +2543,8 @@ Example: bytearray.fromhex('B9 01EF') -> bytearray(b'\\xb9\\x01\\xef')
25432543
[clinic start generated code]*/
25442544

25452545
static PyObject *
2546-
bytearray_fromhex_impl(PyTypeObject *type, PyObject *string)
2547-
/*[clinic end generated code: output=8f0f0b6d30fb3ba0 input=f033a16d1fb21f48]*/
2546+
bytearray_fromhex(PyTypeObject *type, PyObject *string)
2547+
/*[clinic end generated code: output=da84dc708e9c4b36 input=7e314e5b2d7ab484]*/
25482548
{
25492549
PyObject *result = _PyBytes_FromHex(string, type == &PyByteArray_Type);
25502550
if (type != &PyByteArray_Type && result != NULL) {

Objects/bytesobject.c

Lines changed: 49 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2484,7 +2484,7 @@ bytes_splitlines_impl(PyBytesObject *self, int keepends)
24842484
@classmethod
24852485
bytes.fromhex
24862486
2487-
string: unicode
2487+
string: object
24882488
/
24892489
24902490
Create a bytes object from a string of hexadecimal numbers.
@@ -2494,8 +2494,8 @@ Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
24942494
[clinic start generated code]*/
24952495

24962496
static PyObject *
2497-
bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
2498-
/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
2497+
bytes_fromhex(PyTypeObject *type, PyObject *string)
2498+
/*[clinic end generated code: output=d458ec88195da6b3 input=f37d98ed51088a21]*/
24992499
{
25002500
PyObject *result = _PyBytes_FromHex(string, 0);
25012501
if (type != &PyBytes_Type && result != NULL) {
@@ -2510,37 +2510,55 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray)
25102510
char *buf;
25112511
Py_ssize_t hexlen, invalid_char;
25122512
unsigned int top, bot;
2513-
const Py_UCS1 *str, *end;
2513+
const Py_UCS1 *str, *start, *end;
25142514
_PyBytesWriter writer;
2515+
Py_buffer view;
2516+
view.obj = NULL;
25152517

25162518
_PyBytesWriter_Init(&writer);
25172519
writer.use_bytearray = use_bytearray;
25182520

2519-
assert(PyUnicode_Check(string));
2520-
hexlen = PyUnicode_GET_LENGTH(string);
2521+
if (PyUnicode_Check(string)) {
2522+
hexlen = PyUnicode_GET_LENGTH(string);
25212523

2522-
if (!PyUnicode_IS_ASCII(string)) {
2523-
const void *data = PyUnicode_DATA(string);
2524-
int kind = PyUnicode_KIND(string);
2525-
Py_ssize_t i;
2524+
if (!PyUnicode_IS_ASCII(string)) {
2525+
const void *data = PyUnicode_DATA(string);
2526+
int kind = PyUnicode_KIND(string);
2527+
Py_ssize_t i;
25262528

2527-
/* search for the first non-ASCII character */
2528-
for (i = 0; i < hexlen; i++) {
2529-
if (PyUnicode_READ(kind, data, i) >= 128)
2530-
break;
2529+
/* search for the first non-ASCII character */
2530+
for (i = 0; i < hexlen; i++) {
2531+
if (PyUnicode_READ(kind, data, i) >= 128)
2532+
break;
2533+
}
2534+
invalid_char = i;
2535+
goto error;
25312536
}
2532-
invalid_char = i;
2533-
goto error;
2534-
}
25352537

2536-
assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2537-
str = PyUnicode_1BYTE_DATA(string);
2538+
assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2539+
str = PyUnicode_1BYTE_DATA(string);
2540+
}
2541+
else if (PyObject_CheckBuffer(string)) {
2542+
if (PyObject_GetBuffer(string, &view, PyBUF_SIMPLE) != 0) {
2543+
return NULL;
2544+
}
2545+
hexlen = view.len;
2546+
str = view.buf;
2547+
}
2548+
else {
2549+
PyErr_Format(PyExc_TypeError,
2550+
"fromhex() argument must be str or bytes-like, not %T",
2551+
string);
2552+
return NULL;
2553+
}
25382554

25392555
/* This overestimates if there are spaces */
25402556
buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2541-
if (buf == NULL)
2542-
return NULL;
2557+
if (buf == NULL) {
2558+
goto release_buffer;
2559+
}
25432560

2561+
start = str;
25442562
end = str + hexlen;
25452563
while (str < end) {
25462564
/* skip over spaces in the input */
@@ -2554,7 +2572,7 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray)
25542572

25552573
top = _PyLong_DigitValue[*str];
25562574
if (top >= 16) {
2557-
invalid_char = str - PyUnicode_1BYTE_DATA(string);
2575+
invalid_char = str - start;
25582576
goto error;
25592577
}
25602578
str++;
@@ -2565,7 +2583,7 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray)
25652583
if (str >= end){
25662584
invalid_char = -1;
25672585
} else {
2568-
invalid_char = str - PyUnicode_1BYTE_DATA(string);
2586+
invalid_char = str - start;
25692587
}
25702588
goto error;
25712589
}
@@ -2574,6 +2592,9 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray)
25742592
*buf++ = (unsigned char)((top << 4) + bot);
25752593
}
25762594

2595+
if (view.obj != NULL) {
2596+
PyBuffer_Release(&view);
2597+
}
25772598
return _PyBytesWriter_Finish(&writer, buf);
25782599

25792600
error:
@@ -2586,6 +2607,11 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray)
25862607
"fromhex() arg at position %zd", invalid_char);
25872608
}
25882609
_PyBytesWriter_Dealloc(&writer);
2610+
2611+
release_buffer:
2612+
if (view.obj != NULL) {
2613+
PyBuffer_Release(&view);
2614+
}
25892615
return NULL;
25902616
}
25912617

Objects/clinic/bytearrayobject.c.h

Lines changed: 1 addition & 21 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Objects/clinic/bytesobject.c.h

Lines changed: 1 addition & 21 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)