Skip to content

Commit 0c2f930

Browse files
authored
bpo-22385: Support output separators in hex methods. (#13578)
* bpo-22385: Support output separators in hex methods. Also in binascii.hexlify aka b2a_hex. The underlying implementation behind all hex generation in CPython uses the same pystrhex.c implementation. This adds support to bytes, bytearray, and memoryview objects. The binascii module functions exist rather than being slated for deprecation because they return bytes rather than requiring an intermediate step through a str object. This change was inspired by MicroPython which supports sep in its binascii implementation (and does not yet support the .hex methods). https://bugs.python.org/issue22385
1 parent aacc77f commit 0c2f930

16 files changed

+624
-64
lines changed

Doc/library/binascii.rst

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,8 @@ The :mod:`binascii` module defines the following functions:
145145
platforms, use ``crc32(data) & 0xffffffff``.
146146

147147

148-
.. function:: b2a_hex(data)
149-
hexlify(data)
148+
.. function:: b2a_hex(data[, sep[, bytes_per_sep=1]])
149+
hexlify(data[, sep[, bytes_per_sep=1]])
150150

151151
Return the hexadecimal representation of the binary *data*. Every byte of
152152
*data* is converted into the corresponding 2-digit hex representation. The
@@ -155,6 +155,24 @@ The :mod:`binascii` module defines the following functions:
155155
Similar functionality (but returning a text string) is also conveniently
156156
accessible using the :meth:`bytes.hex` method.
157157

158+
If *sep* is specified, it must be a single character str or bytes object.
159+
It will be inserted in the output after every *bytes_per_sep* input bytes.
160+
Separator placement is counted from the right end of the output by default,
161+
if you wish to count from the left, supply a negative *bytes_per_sep* value.
162+
163+
>>> import binascii
164+
>>> binascii.b2a_hex(b'\xb9\x01\xef')
165+
b'b901ef'
166+
>>> binascii.hexlify(b'\xb9\x01\xef', '-')
167+
b'b9-01-ef'
168+
>>> binascii.b2a_hex(b'\xb9\x01\xef', b'_', 2)
169+
b'b9_01ef'
170+
>>> binascii.b2a_hex(b'\xb9\x01\xef', b' ', -2)
171+
b'b901 ef'
172+
173+
.. versionchanged:: 3.8
174+
The *sep* and *bytes_per_sep* parameters were added.
175+
158176
.. function:: a2b_hex(hexstr)
159177
unhexlify(hexstr)
160178

Doc/library/stdtypes.rst

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2404,8 +2404,26 @@ data and are closely related to string objects in a variety of other ways.
24042404
>>> b'\xf0\xf1\xf2'.hex()
24052405
'f0f1f2'
24062406

2407+
If you want to make the hex string easier to read, you can specify a
2408+
single character separator *sep* parameter to include in the output.
2409+
By default between each byte. A second optional *bytes_per_sep*
2410+
parameter controls the spacing. Positive values calculate the
2411+
separator position from the right, negative values from the left.
2412+
2413+
>>> value = b'\xf0\xf1\xf2'
2414+
>>> value.hex('-')
2415+
'f0-f1-f2'
2416+
>>> value.hex('_', 2)
2417+
'f0_f1f2'
2418+
>>> b'UUDDLRLRAB'.hex(' ', -4)
2419+
'55554444 4c524c52 4142'
2420+
24072421
.. versionadded:: 3.5
24082422

2423+
.. versionchanged:: 3.8
2424+
:meth:`bytes.hex` now supports optional *sep* and *bytes_per_sep*
2425+
parameters to insert separators between bytes in the hex output.
2426+
24092427
Since bytes objects are sequences of integers (akin to a tuple), for a bytes
24102428
object *b*, ``b[0]`` will be an integer, while ``b[0:1]`` will be a bytes
24112429
object of length 1. (This contrasts with text strings, where both indexing

Include/pystrhex.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ extern "C" {
1010
PyAPI_FUNC(PyObject*) _Py_strhex(const char* argbuf, const Py_ssize_t arglen);
1111
/* Returns a bytes() containing the ASCII hex representation of argbuf. */
1212
PyAPI_FUNC(PyObject*) _Py_strhex_bytes(const char* argbuf, const Py_ssize_t arglen);
13+
/* These variants include support for a separator between every N bytes: */
14+
PyAPI_FUNC(PyObject*) _Py_strhex_with_sep(const char* argbuf, const Py_ssize_t arglen, const PyObject* sep, const int bytes_per_group);
15+
PyAPI_FUNC(PyObject*) _Py_strhex_bytes_with_sep(const char* argbuf, const Py_ssize_t arglen, const PyObject* sep, const int bytes_per_group);
1316
#endif /* !Py_LIMITED_API */
1417

1518
#ifdef __cplusplus

Lib/test/test_binascii.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,18 @@ def test_hex(self):
240240
self.assertEqual(binascii.hexlify(self.type2test(s)), t)
241241
self.assertEqual(binascii.unhexlify(self.type2test(t)), u)
242242

243+
def test_hex_separator(self):
244+
"""Test that hexlify and b2a_hex are binary versions of bytes.hex."""
245+
# Logic of separators is tested in test_bytes.py. This checks that
246+
# arg parsing works and exercises the direct to bytes object code
247+
# path within pystrhex.c.
248+
s = b'{s\005\000\000\000worldi\002\000\000\000s\005\000\000\000helloi\001\000\000\0000'
249+
self.assertEqual(binascii.hexlify(self.type2test(s)), s.hex().encode('ascii'))
250+
expected8 = s.hex('.', 8).encode('ascii')
251+
self.assertEqual(binascii.hexlify(self.type2test(s), '.', 8), expected8)
252+
expected1 = s.hex(':').encode('ascii')
253+
self.assertEqual(binascii.b2a_hex(self.type2test(s), ':'), expected1)
254+
243255
def test_qp(self):
244256
type2test = self.type2test
245257
a2b_qp = binascii.a2b_qp

Lib/test/test_bytes.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,63 @@ def test_hex(self):
417417
self.assertEqual(self.type2test(b"\x1a\x2b\x30").hex(), '1a2b30')
418418
self.assertEqual(memoryview(b"\x1a\x2b\x30").hex(), '1a2b30')
419419

420+
def test_hex_separator_basics(self):
421+
three_bytes = self.type2test(b'\xb9\x01\xef')
422+
self.assertEqual(three_bytes.hex(), 'b901ef')
423+
with self.assertRaises(ValueError):
424+
three_bytes.hex('')
425+
with self.assertRaises(ValueError):
426+
three_bytes.hex('xx')
427+
self.assertEqual(three_bytes.hex(':', 0), 'b901ef')
428+
with self.assertRaises(TypeError):
429+
three_bytes.hex(None, 0)
430+
with self.assertRaises(ValueError):
431+
three_bytes.hex('\xff')
432+
with self.assertRaises(ValueError):
433+
three_bytes.hex(b'\xff')
434+
with self.assertRaises(ValueError):
435+
three_bytes.hex(b'\x80')
436+
with self.assertRaises(ValueError):
437+
three_bytes.hex(chr(0x100))
438+
self.assertEqual(three_bytes.hex(':', 0), 'b901ef')
439+
self.assertEqual(three_bytes.hex(b'\x00'), 'b9\x0001\x00ef')
440+
self.assertEqual(three_bytes.hex('\x00'), 'b9\x0001\x00ef')
441+
self.assertEqual(three_bytes.hex(b'\x7f'), 'b9\x7f01\x7fef')
442+
self.assertEqual(three_bytes.hex('\x7f'), 'b9\x7f01\x7fef')
443+
self.assertEqual(three_bytes.hex(':', 3), 'b901ef')
444+
self.assertEqual(three_bytes.hex(':', 4), 'b901ef')
445+
self.assertEqual(three_bytes.hex(':', -4), 'b901ef')
446+
self.assertEqual(three_bytes.hex(':'), 'b9:01:ef')
447+
self.assertEqual(three_bytes.hex(b'$'), 'b9$01$ef')
448+
self.assertEqual(three_bytes.hex(':', 1), 'b9:01:ef')
449+
self.assertEqual(three_bytes.hex(':', -1), 'b9:01:ef')
450+
self.assertEqual(three_bytes.hex(':', 2), 'b9:01ef')
451+
self.assertEqual(three_bytes.hex(':', 1), 'b9:01:ef')
452+
self.assertEqual(three_bytes.hex('*', -2), 'b901*ef')
453+
454+
value = b'{s\005\000\000\000worldi\002\000\000\000s\005\000\000\000helloi\001\000\000\0000'
455+
self.assertEqual(value.hex('.', 8), '7b7305000000776f.726c646902000000.730500000068656c.6c6f690100000030')
456+
457+
def test_hex_separator_five_bytes(self):
458+
five_bytes = self.type2test(range(90,95))
459+
self.assertEqual(five_bytes.hex(), '5a5b5c5d5e')
460+
461+
def test_hex_separator_six_bytes(self):
462+
six_bytes = self.type2test(x*3 for x in range(1, 7))
463+
self.assertEqual(six_bytes.hex(), '0306090c0f12')
464+
self.assertEqual(six_bytes.hex('.', 1), '03.06.09.0c.0f.12')
465+
self.assertEqual(six_bytes.hex(' ', 2), '0306 090c 0f12')
466+
self.assertEqual(six_bytes.hex('-', 3), '030609-0c0f12')
467+
self.assertEqual(six_bytes.hex(':', 4), '0306:090c0f12')
468+
self.assertEqual(six_bytes.hex(':', 5), '03:06090c0f12')
469+
self.assertEqual(six_bytes.hex(':', 6), '0306090c0f12')
470+
self.assertEqual(six_bytes.hex(':', 95), '0306090c0f12')
471+
self.assertEqual(six_bytes.hex('_', -3), '030609_0c0f12')
472+
self.assertEqual(six_bytes.hex(':', -4), '0306090c:0f12')
473+
self.assertEqual(six_bytes.hex(b'@', -5), '0306090c0f@12')
474+
self.assertEqual(six_bytes.hex(':', -6), '0306090c0f12')
475+
self.assertEqual(six_bytes.hex(' ', -95), '0306090c0f12')
476+
420477
def test_join(self):
421478
self.assertEqual(self.type2test(b"").join([]), b"")
422479
self.assertEqual(self.type2test(b"").join([b""]), b"")

Lib/test/test_doctest.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -665,18 +665,21 @@ def non_Python_modules(): r"""
665665
True
666666
>>> real_tests = [t for t in tests if len(t.examples) > 0]
667667
>>> len(real_tests) # objects that actually have doctests
668-
9
668+
12
669669
>>> for t in real_tests:
670670
... print('{} {}'.format(len(t.examples), t.name))
671671
...
672672
1 builtins.bin
673+
5 builtins.bytearray.hex
674+
5 builtins.bytes.hex
673675
3 builtins.float.as_integer_ratio
674676
2 builtins.float.fromhex
675677
2 builtins.float.hex
676678
1 builtins.hex
677679
1 builtins.int
678680
3 builtins.int.as_integer_ratio
679681
2 builtins.int.bit_length
682+
5 builtins.memoryview.hex
680683
1 builtins.oct
681684
682685
Note here that 'bin', 'oct', and 'hex' are functions; 'float.as_integer_ratio',
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
The `bytes.hex`, `bytearray.hex`, and `memoryview.hex` methods as well as
2+
the `binascii.hexlify` and `b2a_hex` functions now have the ability to
3+
include an optional separator between hex bytes. This functionality was
4+
inspired by MicroPython's hexlify implementation.

Modules/binascii.c

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1159,34 +1159,51 @@ binascii_crc32_impl(PyObject *module, Py_buffer *data, unsigned int crc)
11591159
binascii.b2a_hex
11601160
11611161
data: Py_buffer
1162-
/
1162+
sep: object = NULL
1163+
An optional single character or byte to separate hex bytes.
1164+
bytes_per_sep: int = 1
1165+
How many bytes between separators. Positive values count from the
1166+
right, negative values count from the left.
11631167
11641168
Hexadecimal representation of binary data.
11651169
11661170
The return value is a bytes object. This function is also
11671171
available as "hexlify()".
1172+
1173+
Example:
1174+
>>> binascii.b2a_hex(b'\xb9\x01\xef')
1175+
b'b901ef'
1176+
>>> binascii.hexlify(b'\xb9\x01\xef', ':')
1177+
b'b9:01:ef'
1178+
>>> binascii.b2a_hex(b'\xb9\x01\xef', b'_', 2)
1179+
b'b9_01ef'
11681180
[clinic start generated code]*/
11691181

11701182
static PyObject *
1171-
binascii_b2a_hex_impl(PyObject *module, Py_buffer *data)
1172-
/*[clinic end generated code: output=92fec1a95c9897a0 input=96423cfa299ff3b1]*/
1183+
binascii_b2a_hex_impl(PyObject *module, Py_buffer *data, PyObject *sep,
1184+
int bytes_per_sep)
1185+
/*[clinic end generated code: output=a26937946a81d2c7 input=ec0ade6ba2e43543]*/
11731186
{
1174-
return _Py_strhex_bytes((const char *)data->buf, data->len);
1187+
return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len,
1188+
sep, bytes_per_sep);
11751189
}
11761190

11771191
/*[clinic input]
11781192
binascii.hexlify = binascii.b2a_hex
11791193
11801194
Hexadecimal representation of binary data.
11811195
1182-
The return value is a bytes object.
1196+
The return value is a bytes object. This function is also
1197+
available as "b2a_hex()".
11831198
[clinic start generated code]*/
11841199

11851200
static PyObject *
1186-
binascii_hexlify_impl(PyObject *module, Py_buffer *data)
1187-
/*[clinic end generated code: output=749e95e53c14880c input=2e3afae7f083f061]*/
1201+
binascii_hexlify_impl(PyObject *module, Py_buffer *data, PyObject *sep,
1202+
int bytes_per_sep)
1203+
/*[clinic end generated code: output=d12aa1b001b15199 input=bc317bd4e241f76b]*/
11881204
{
1189-
return _Py_strhex_bytes((const char *)data->buf, data->len);
1205+
return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len,
1206+
sep, bytes_per_sep);
11901207
}
11911208

11921209
/*[clinic input]

0 commit comments

Comments
 (0)