Skip to content

Commit 4ce6faa

Browse files
authored
bpo-16995: add support for base32 extended hex (base32hex) (pythonGH-20441)
cc @pganssle Automerge-Triggered-By: @pganssle
1 parent 39042e0 commit 4ce6faa

File tree

5 files changed

+155
-32
lines changed

5 files changed

+155
-32
lines changed

Doc/library/base64.rst

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ The modern interface provides:
124124
whether a lowercase alphabet is acceptable as input. For security purposes,
125125
the default is ``False``.
126126

127-
:rfc:`3548` allows for optional mapping of the digit 0 (zero) to the letter O
127+
:rfc:`4648` allows for optional mapping of the digit 0 (zero) to the letter O
128128
(oh), and for optional mapping of the digit 1 (one) to either the letter I (eye)
129129
or letter L (el). The optional argument *map01* when not ``None``, specifies
130130
which letter the digit 1 should be mapped to (when *map01* is not ``None``, the
@@ -136,6 +136,27 @@ The modern interface provides:
136136
input.
137137

138138

139+
.. function:: b32hexencode(s)
140+
141+
Similar to :func:`b32encode` but uses the Extended Hex Alphabet, as defined in
142+
:rfc:`4648`.
143+
144+
.. versionadded:: 3.10
145+
146+
147+
.. function:: b32hexdecode(s, casefold=False)
148+
149+
Similar to :func:`b32decode` but uses the Extended Hex Alphabet, as defined in
150+
:rfc:`4648`.
151+
152+
This version does not allow the digit 0 (zero) to the letter O (oh) and digit
153+
1 (one) to either the letter I (eye) or letter L (el) mappings, all these
154+
characters are included in the Extended Hex Alphabet and are not
155+
interchangable.
156+
157+
.. versionadded:: 3.10
158+
159+
139160
.. function:: b16encode(s)
140161

141162
Encode the :term:`bytes-like object` *s* using Base16 and return the

Doc/whatsnew/3.10.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,12 @@ New Modules
103103
Improved Modules
104104
================
105105

106+
base64
107+
------
108+
109+
Add :func:`base64.b32hexencode` and :func:`base64.b32hexdecode` to support the
110+
Base32 Encoding with Extended Hex Alphabet.
111+
106112
curses
107113
------
108114

Lib/base64.py

Lines changed: 55 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
'encode', 'decode', 'encodebytes', 'decodebytes',
1717
# Generalized interface for other encodings
1818
'b64encode', 'b64decode', 'b32encode', 'b32decode',
19-
'b16encode', 'b16decode',
19+
'b32hexencode', 'b32hexdecode', 'b16encode', 'b16decode',
2020
# Base85 and Ascii85 encodings
2121
'b85encode', 'b85decode', 'a85encode', 'a85decode',
2222
# Standard Base64 encoding
@@ -135,19 +135,40 @@ def urlsafe_b64decode(s):
135135

136136

137137
# Base32 encoding/decoding must be done in Python
138+
_B32_ENCODE_DOCSTRING = '''
139+
Encode the bytes-like objects using {encoding} and return a bytes object.
140+
'''
141+
_B32_DECODE_DOCSTRING = '''
142+
Decode the {encoding} encoded bytes-like object or ASCII string s.
143+
144+
Optional casefold is a flag specifying whether a lowercase alphabet is
145+
acceptable as input. For security purposes, the default is False.
146+
{extra_args}
147+
The result is returned as a bytes object. A binascii.Error is raised if
148+
the input is incorrectly padded or if there are non-alphabet
149+
characters present in the input.
150+
'''
151+
_B32_DECODE_MAP01_DOCSTRING = '''
152+
RFC 3548 allows for optional mapping of the digit 0 (zero) to the
153+
letter O (oh), and for optional mapping of the digit 1 (one) to
154+
either the letter I (eye) or letter L (el). The optional argument
155+
map01 when not None, specifies which letter the digit 1 should be
156+
mapped to (when map01 is not None, the digit 0 is always mapped to
157+
the letter O). For security purposes the default is None, so that
158+
0 and 1 are not allowed in the input.
159+
'''
138160
_b32alphabet = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'
139-
_b32tab2 = None
140-
_b32rev = None
161+
_b32hexalphabet = b'0123456789ABCDEFGHIJKLMNOPQRSTUV'
162+
_b32tab2 = {}
163+
_b32rev = {}
141164

142-
def b32encode(s):
143-
"""Encode the bytes-like object s using Base32 and return a bytes object.
144-
"""
165+
def _b32encode(alphabet, s):
145166
global _b32tab2
146167
# Delay the initialization of the table to not waste memory
147168
# if the function is never called
148-
if _b32tab2 is None:
149-
b32tab = [bytes((i,)) for i in _b32alphabet]
150-
_b32tab2 = [a + b for a in b32tab for b in b32tab]
169+
if alphabet not in _b32tab2:
170+
b32tab = [bytes((i,)) for i in alphabet]
171+
_b32tab2[alphabet] = [a + b for a in b32tab for b in b32tab]
151172
b32tab = None
152173

153174
if not isinstance(s, bytes_types):
@@ -158,7 +179,7 @@ def b32encode(s):
158179
s = s + b'\0' * (5 - leftover) # Don't use += !
159180
encoded = bytearray()
160181
from_bytes = int.from_bytes
161-
b32tab2 = _b32tab2
182+
b32tab2 = _b32tab2[alphabet]
162183
for i in range(0, len(s), 5):
163184
c = from_bytes(s[i: i + 5], 'big')
164185
encoded += (b32tab2[c >> 30] + # bits 1 - 10
@@ -177,29 +198,12 @@ def b32encode(s):
177198
encoded[-1:] = b'='
178199
return bytes(encoded)
179200

180-
def b32decode(s, casefold=False, map01=None):
181-
"""Decode the Base32 encoded bytes-like object or ASCII string s.
182-
183-
Optional casefold is a flag specifying whether a lowercase alphabet is
184-
acceptable as input. For security purposes, the default is False.
185-
186-
RFC 3548 allows for optional mapping of the digit 0 (zero) to the
187-
letter O (oh), and for optional mapping of the digit 1 (one) to
188-
either the letter I (eye) or letter L (el). The optional argument
189-
map01 when not None, specifies which letter the digit 1 should be
190-
mapped to (when map01 is not None, the digit 0 is always mapped to
191-
the letter O). For security purposes the default is None, so that
192-
0 and 1 are not allowed in the input.
193-
194-
The result is returned as a bytes object. A binascii.Error is raised if
195-
the input is incorrectly padded or if there are non-alphabet
196-
characters present in the input.
197-
"""
201+
def _b32decode(alphabet, s, casefold=False, map01=None):
198202
global _b32rev
199203
# Delay the initialization of the table to not waste memory
200204
# if the function is never called
201-
if _b32rev is None:
202-
_b32rev = {v: k for k, v in enumerate(_b32alphabet)}
205+
if alphabet not in _b32rev:
206+
_b32rev[alphabet] = {v: k for k, v in enumerate(alphabet)}
203207
s = _bytes_from_decode_data(s)
204208
if len(s) % 8:
205209
raise binascii.Error('Incorrect padding')
@@ -220,7 +224,7 @@ def b32decode(s, casefold=False, map01=None):
220224
padchars = l - len(s)
221225
# Now decode the full quanta
222226
decoded = bytearray()
223-
b32rev = _b32rev
227+
b32rev = _b32rev[alphabet]
224228
for i in range(0, len(s), 8):
225229
quanta = s[i: i + 8]
226230
acc = 0
@@ -241,6 +245,26 @@ def b32decode(s, casefold=False, map01=None):
241245
return bytes(decoded)
242246

243247

248+
def b32encode(s):
249+
return _b32encode(_b32alphabet, s)
250+
b32encode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32')
251+
252+
def b32decode(s, casefold=False, map01=None):
253+
return _b32decode(_b32alphabet, s, casefold, map01)
254+
b32decode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32',
255+
extra_args=_B32_DECODE_MAP01_DOCSTRING)
256+
257+
def b32hexencode(s):
258+
return _b32encode(_b32hexalphabet, s)
259+
b32hexencode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32hex')
260+
261+
def b32hexdecode(s, casefold=False):
262+
# base32hex does not have the 01 mapping
263+
return _b32decode(_b32hexalphabet, s, casefold)
264+
b32hexdecode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32hex',
265+
extra_args='')
266+
267+
244268
# RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns
245269
# lowercase. The RFC also recommends against accepting input case
246270
# insensitively.

Lib/test/test_base64.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,76 @@ def test_b32decode_error(self):
351351
with self.assertRaises(binascii.Error):
352352
base64.b32decode(data.decode('ascii'))
353353

354+
def test_b32hexencode(self):
355+
test_cases = [
356+
# to_encode, expected
357+
(b'', b''),
358+
(b'\x00', b'00======'),
359+
(b'a', b'C4======'),
360+
(b'ab', b'C5H0===='),
361+
(b'abc', b'C5H66==='),
362+
(b'abcd', b'C5H66P0='),
363+
(b'abcde', b'C5H66P35'),
364+
]
365+
for to_encode, expected in test_cases:
366+
with self.subTest(to_decode=to_encode):
367+
self.assertEqual(base64.b32hexencode(to_encode), expected)
368+
369+
def test_b32hexencode_other_types(self):
370+
self.check_other_types(base64.b32hexencode, b'abcd', b'C5H66P0=')
371+
self.check_encode_type_errors(base64.b32hexencode)
372+
373+
def test_b32hexdecode(self):
374+
test_cases = [
375+
# to_decode, expected, casefold
376+
(b'', b'', False),
377+
(b'00======', b'\x00', False),
378+
(b'C4======', b'a', False),
379+
(b'C5H0====', b'ab', False),
380+
(b'C5H66===', b'abc', False),
381+
(b'C5H66P0=', b'abcd', False),
382+
(b'C5H66P35', b'abcde', False),
383+
(b'', b'', True),
384+
(b'00======', b'\x00', True),
385+
(b'C4======', b'a', True),
386+
(b'C5H0====', b'ab', True),
387+
(b'C5H66===', b'abc', True),
388+
(b'C5H66P0=', b'abcd', True),
389+
(b'C5H66P35', b'abcde', True),
390+
(b'c4======', b'a', True),
391+
(b'c5h0====', b'ab', True),
392+
(b'c5h66===', b'abc', True),
393+
(b'c5h66p0=', b'abcd', True),
394+
(b'c5h66p35', b'abcde', True),
395+
]
396+
for to_decode, expected, casefold in test_cases:
397+
with self.subTest(to_decode=to_decode, casefold=casefold):
398+
self.assertEqual(base64.b32hexdecode(to_decode, casefold),
399+
expected)
400+
self.assertEqual(base64.b32hexdecode(to_decode.decode('ascii'),
401+
casefold), expected)
402+
403+
def test_b32hexdecode_other_types(self):
404+
self.check_other_types(base64.b32hexdecode, b'C5H66===', b'abc')
405+
self.check_decode_type_errors(base64.b32hexdecode)
406+
407+
def test_b32hexdecode_error(self):
408+
tests = [b'abc', b'ABCDEF==', b'==ABCDEF', b'c4======']
409+
prefixes = [b'M', b'ME', b'MFRA', b'MFRGG', b'MFRGGZA', b'MFRGGZDF']
410+
for i in range(0, 17):
411+
if i:
412+
tests.append(b'='*i)
413+
for prefix in prefixes:
414+
if len(prefix) + i != 8:
415+
tests.append(prefix + b'='*i)
416+
for data in tests:
417+
with self.subTest(to_decode=data):
418+
with self.assertRaises(binascii.Error):
419+
base64.b32hexdecode(data)
420+
with self.assertRaises(binascii.Error):
421+
base64.b32hexdecode(data.decode('ascii'))
422+
423+
354424
def test_b16encode(self):
355425
eq = self.assertEqual
356426
eq(base64.b16encode(b'\x01\x02\xab\xcd\xef'), b'0102ABCDEF')
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Add :func:`base64.b32hexencode` and :func:`base64.b32hexdecode` to support the
2+
Base32 Encoding with Extended Hex Alphabet.

0 commit comments

Comments
 (0)