Skip to content

Commit ceaf4d1

Browse files
msprotzblhsing
authored andcommitted
pythongh-99108: Add HACL* Blake2 implementation to hashlib (pythonGH-119316)
This replaces the existing hashlib Blake2 module with a single implementation that uses HACL\*'s Blake2b/Blake2s implementations. We added support for all the modes exposed by the Python API, including tree hashing, leaf nodes, and so on. We ported and merged all of these changes upstream in HACL\*, added test vectors based on Python's existing implementation, and exposed everything needed for hashlib. This was joint work done with @R1kM. See the PR for much discussion and benchmarking details. TL;DR: On many systems, 8-50% faster (!) than `libb2`, on some systems it appeared 10-20% slower than `libb2`.
1 parent 8c46cb4 commit ceaf4d1

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+11594
-5133
lines changed

Lib/test/test_hashlib.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,17 @@ def test_sha3_256_update_over_4gb(self):
368368
h.update(b"hello world")
369369
self.assertEqual(h.hexdigest(), "e2d4535e3b613135c14f2fe4e026d7ad8d569db44901740beffa30d430acb038")
370370

371+
@requires_resource('cpu')
372+
def test_blake2_update_over_4gb(self):
373+
# blake2s or blake2b doesn't matter based on how our C code is structured, this tests the
374+
# common loop macro logic.
375+
zero_1mb = b"\0" * 1024 * 1024
376+
h = hashlib.blake2s()
377+
for i in range(0, 4096):
378+
h.update(zero_1mb)
379+
h.update(b"hello world")
380+
self.assertEqual(h.hexdigest(), "8a268e83dd30528bc0907fa2008c91de8f090a0b6e0e60a5ff0d999d8485526f")
381+
371382
def check(self, name, data, hexdigest, shake=False, **kwargs):
372383
length = len(hexdigest)//2
373384
hexdigest = hexdigest.lower()

Makefile.pre.in

Lines changed: 53 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,11 @@ ENSUREPIP= @ENSUREPIP@
221221
LIBMPDEC_A= Modules/_decimal/libmpdec/libmpdec.a
222222
LIBEXPAT_A= Modules/expat/libexpat.a
223223
LIBHACL_SHA2_A= Modules/_hacl/libHacl_Hash_SHA2.a
224+
LIBHACL_BLAKE2_A= Modules/_hacl/libHacl_Hash_Blake2.a
225+
LIBHACL_SIMD128_FLAGS=@LIBHACL_SIMD128_FLAGS@
226+
LIBHACL_SIMD256_FLAGS=@LIBHACL_SIMD256_FLAGS@
227+
LIBHACL_SIMD128_OBJS=@LIBHACL_SIMD128_OBJS@
228+
LIBHACL_SIMD256_OBJS=@LIBHACL_SIMD256_OBJS@
224229

225230
# Module state, compiler flags and linker flags
226231
# Empty CFLAGS and LDFLAGS are omitted.
@@ -646,6 +651,13 @@ LIBEXPAT_HEADERS= \
646651
LIBHACL_SHA2_OBJS= \
647652
Modules/_hacl/Hacl_Hash_SHA2.o
648653

654+
LIBHACL_BLAKE2_OBJS= \
655+
Modules/_hacl/Hacl_Hash_Blake2s.o \
656+
Modules/_hacl/Hacl_Hash_Blake2b.o \
657+
Modules/_hacl/Lib_Memzero0.o \
658+
$(LIBHACL_SIMD128_OBJS) \
659+
$(LIBHACL_SIMD256_OBJS)
660+
649661
LIBHACL_HEADERS= \
650662
Modules/_hacl/include/krml/FStar_UInt128_Verified.h \
651663
Modules/_hacl/include/krml/FStar_UInt_8_16_32_64.h \
@@ -661,6 +673,18 @@ LIBHACL_SHA2_HEADERS= \
661673
Modules/_hacl/internal/Hacl_Hash_SHA2.h \
662674
$(LIBHACL_HEADERS)
663675

676+
LIBHACL_BLAKE2_HEADERS= \
677+
Modules/_hacl/Hacl_Hash_Blake2b.h \
678+
Modules/_hacl/Hacl_Hash_Blake2s.h \
679+
Modules/_hacl/Hacl_Hash_Blake2s_Simd128.h \
680+
Modules/_hacl/Hacl_Hash_Blake2b_Simd256.h \
681+
Modules/_hacl/internal/Hacl_Hash_Blake2b.h \
682+
Modules/_hacl/internal/Hacl_Hash_Blake2s.h \
683+
Modules/_hacl/internal/Hacl_Impl_Blake2_Constants.h \
684+
Modules/_hacl/internal/Hacl_Hash_Blake2s_Simd128.h \
685+
Modules/_hacl/internal/Hacl_Hash_Blake2b_Simd256.h \
686+
$(LIBHACL_HEADERS)
687+
664688
#########################################################################
665689
# Rules
666690

@@ -840,7 +864,7 @@ coverage-lcov:
840864
@ # remove 3rd party modules, system headers and internal files with
841865
@ # debug, test or dummy functions.
842866
@lcov $(COVERAGE_LCOV_OPTIONS) --remove $(COVERAGE_INFO) \
843-
'*/Modules/_blake2/impl/*' \
867+
'*/Modules/_hacl/*' \
844868
'*/Modules/_ctypes/libffi*/*' \
845869
'*/Modules/_decimal/libmpdec/*' \
846870
'*/Modules/expat/*' \
@@ -870,7 +894,7 @@ coverage-report: regen-token regen-frozen
870894

871895
# Run "Argument Clinic" over all source files
872896
.PHONY: clinic
873-
clinic: check-clean-src $(srcdir)/Modules/_blake2/blake2s_impl.c
897+
clinic: check-clean-src
874898
$(PYTHON_FOR_REGEN) $(srcdir)/Tools/clinic/clinic.py --make --exclude Lib/test/clinic.test.c --srcdir $(srcdir)
875899

876900
.PHONY: clinic-tests
@@ -900,11 +924,6 @@ pybuilddir.txt: $(PYTHON_FOR_BUILD_DEPS)
900924
exit 1 ; \
901925
fi
902926

903-
# blake2s is auto-generated from blake2b
904-
$(srcdir)/Modules/_blake2/blake2s_impl.c: $(srcdir)/Modules/_blake2/blake2b_impl.c $(srcdir)/Modules/_blake2/blake2b2s.py
905-
$(PYTHON_FOR_REGEN) $(srcdir)/Modules/_blake2/blake2b2s.py
906-
$(PYTHON_FOR_REGEN) $(srcdir)/Tools/clinic/clinic.py -f $@
907-
908927
# Build static library
909928
$(LIBRARY): $(LIBRARY_OBJS)
910929
-rm -f $@
@@ -1346,8 +1365,10 @@ $(LIBEXPAT_A): $(LIBEXPAT_OBJS)
13461365
$(AR) $(ARFLAGS) $@ $(LIBEXPAT_OBJS)
13471366

13481367
##########################################################################
1349-
# Build HACL* static libraries for hashlib: libHacl_Hash_SHA2.a
1350-
LIBHACL_CFLAGS=-I$(srcdir)/Modules/_hacl/include -D_BSD_SOURCE -D_DEFAULT_SOURCE $(PY_STDMODULE_CFLAGS) $(CCSHARED)
1368+
# Build HACL* static libraries for hashlib: libHacl_Hash_SHA2.a, and
1369+
# libHacl_Blake2.a -- the contents of the latter vary depending on whether we
1370+
# have the ability to compile vectorized versions
1371+
LIBHACL_CFLAGS=-I$(srcdir)/Modules/_hacl -I$(srcdir)/Modules/_hacl/include -D_BSD_SOURCE -D_DEFAULT_SOURCE $(PY_STDMODULE_CFLAGS) $(CCSHARED)
13511372

13521373
Modules/_hacl/Hacl_Hash_SHA2.o: $(srcdir)/Modules/_hacl/Hacl_Hash_SHA2.c $(LIBHACL_SHA2_HEADERS)
13531374
$(CC) -c $(LIBHACL_CFLAGS) -o $@ $(srcdir)/Modules/_hacl/Hacl_Hash_SHA2.c
@@ -1356,6 +1377,25 @@ $(LIBHACL_SHA2_A): $(LIBHACL_SHA2_OBJS)
13561377
-rm -f $@
13571378
$(AR) $(ARFLAGS) $@ $(LIBHACL_SHA2_OBJS)
13581379

1380+
Modules/_hacl/Hacl_Hash_Blake2s.o: $(srcdir)/Modules/_hacl/Hacl_Hash_Blake2s.c $(LIBHACL_BLAKE2_HEADERS)
1381+
$(CC) -c $(LIBHACL_CFLAGS) -o $@ $(srcdir)/Modules/_hacl/Hacl_Hash_Blake2s.c
1382+
1383+
Modules/_hacl/Hacl_Hash_Blake2b.o: $(srcdir)/Modules/_hacl/Hacl_Hash_Blake2b.c $(LIBHACL_BLAKE2_HEADERS)
1384+
$(CC) -c $(LIBHACL_CFLAGS) -o $@ $(srcdir)/Modules/_hacl/Hacl_Hash_Blake2b.c
1385+
1386+
Modules/_hacl/Hacl_Hash_Blake2s_Simd128.o: $(srcdir)/Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c $(LIBHACL_BLAKE2_HEADERS)
1387+
$(CC) -c $(LIBHACL_CFLAGS) $(LIBHACL_SIMD128_FLAGS) -DHACL_CAN_COMPILE_VEC128 -o $@ $(srcdir)/Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c
1388+
1389+
Modules/_hacl/Hacl_Hash_Blake2b_Simd256.o: $(srcdir)/Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c $(LIBHACL_BLAKE2_HEADERS)
1390+
$(CC) -c $(LIBHACL_CFLAGS) $(LIBHACL_SIMD256_FLAGS) -DHACL_CAN_COMPILE_VEC256 -o $@ $(srcdir)/Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c
1391+
1392+
Modules/_hacl/Lib_Memzero0.o: $(srcdir)/Modules/_hacl/Lib_Memzero0.c $(LIBHACL_BLAKE2_HEADERS)
1393+
$(CC) -c $(LIBHACL_CFLAGS) -o $@ $(srcdir)/Modules/_hacl/Lib_Memzero0.c
1394+
1395+
$(LIBHACL_BLAKE2_A): $(LIBHACL_BLAKE2_OBJS)
1396+
-rm -f $@
1397+
$(AR) $(ARFLAGS) $@ $(LIBHACL_BLAKE2_OBJS)
1398+
13591399
# create relative links from build/lib.platform/egg.so to Modules/egg.so
13601400
# pybuilddir.txt is created too late. We cannot use it in Makefile
13611401
# targets. ln --relative is not portable.
@@ -3136,18 +3176,18 @@ MODULE_CMATH_DEPS=$(srcdir)/Modules/_math.h
31363176
MODULE_MATH_DEPS=$(srcdir)/Modules/_math.h
31373177
MODULE_PYEXPAT_DEPS=@LIBEXPAT_INTERNAL@
31383178
MODULE_UNICODEDATA_DEPS=$(srcdir)/Modules/unicodedata_db.h $(srcdir)/Modules/unicodename_db.h
3139-
MODULE__BLAKE2_DEPS=$(srcdir)/Modules/_blake2/impl/blake2-config.h $(srcdir)/Modules/_blake2/impl/blake2-impl.h $(srcdir)/Modules/_blake2/impl/blake2.h $(srcdir)/Modules/_blake2/impl/blake2b-load-sse2.h $(srcdir)/Modules/_blake2/impl/blake2b-load-sse41.h $(srcdir)/Modules/_blake2/impl/blake2b-ref.c $(srcdir)/Modules/_blake2/impl/blake2b-round.h $(srcdir)/Modules/_blake2/impl/blake2b.c $(srcdir)/Modules/_blake2/impl/blake2s-load-sse2.h $(srcdir)/Modules/_blake2/impl/blake2s-load-sse41.h $(srcdir)/Modules/_blake2/impl/blake2s-load-xop.h $(srcdir)/Modules/_blake2/impl/blake2s-ref.c $(srcdir)/Modules/_blake2/impl/blake2s-round.h $(srcdir)/Modules/_blake2/impl/blake2s.c $(srcdir)/Modules/_blake2/blake2module.h $(srcdir)/Modules/hashlib.h
31403179
MODULE__CTYPES_DEPS=$(srcdir)/Modules/_ctypes/ctypes.h $(srcdir)/Modules/_complex.h
31413180
MODULE__CTYPES_TEST_DEPS=$(srcdir)/Modules/_ctypes/_ctypes_test_generated.c.h
31423181
MODULE__CTYPES_MALLOC_CLOSURE=@MODULE__CTYPES_MALLOC_CLOSURE@
31433182
MODULE__DECIMAL_DEPS=$(srcdir)/Modules/_decimal/docstrings.h @LIBMPDEC_INTERNAL@
31443183
MODULE__ELEMENTTREE_DEPS=$(srcdir)/Modules/pyexpat.c @LIBEXPAT_INTERNAL@
31453184
MODULE__HASHLIB_DEPS=$(srcdir)/Modules/hashlib.h
31463185
MODULE__IO_DEPS=$(srcdir)/Modules/_io/_iomodule.h
3147-
MODULE__MD5_DEPS=$(srcdir)/Modules/hashlib.h $(LIBHACL_HEADERS) Modules/_hacl/Hacl_Hash_MD5.h Modules/_hacl/Hacl_Hash_MD5.c
3148-
MODULE__SHA1_DEPS=$(srcdir)/Modules/hashlib.h $(LIBHACL_HEADERS) Modules/_hacl/Hacl_Hash_SHA1.h Modules/_hacl/Hacl_Hash_SHA1.c
3186+
MODULE__MD5_DEPS=$(srcdir)/Modules/hashlib.h $(LIBHACL_HEADERS) Modules/_hacl/Hacl_Hash_MD5.h Modules/_hacl/internal/Hacl_Hash_MD5.h Modules/_hacl/Hacl_Hash_MD5.c
3187+
MODULE__SHA1_DEPS=$(srcdir)/Modules/hashlib.h $(LIBHACL_HEADERS) Modules/_hacl/Hacl_Hash_SHA1.h Modules/_hacl/internal/Hacl_Hash_SHA1.h Modules/_hacl/Hacl_Hash_SHA1.c
31493188
MODULE__SHA2_DEPS=$(srcdir)/Modules/hashlib.h $(LIBHACL_SHA2_HEADERS) $(LIBHACL_SHA2_A)
3150-
MODULE__SHA3_DEPS=$(srcdir)/Modules/hashlib.h $(LIBHACL_HEADERS) Modules/_hacl/Hacl_Hash_SHA3.h Modules/_hacl/Hacl_Hash_SHA3.c
3189+
MODULE__SHA3_DEPS=$(srcdir)/Modules/hashlib.h $(LIBHACL_HEADERS) Modules/_hacl/Hacl_Hash_SHA3.h Modules/_hacl/internal/Hacl_Hash_SHA3.h Modules/_hacl/Hacl_Hash_SHA3.c
3190+
MODULE__BLAKE2_DEPS=$(srcdir)/Modules/hashlib.h $(LIBHACL_BLAKE2_HEADERS) $(LIBHACL_BLAKE2_A)
31513191
MODULE__SOCKET_DEPS=$(srcdir)/Modules/socketmodule.h $(srcdir)/Modules/addrinfo.h $(srcdir)/Modules/getaddrinfo.c $(srcdir)/Modules/getnameinfo.c
31523192
MODULE__SSL_DEPS=$(srcdir)/Modules/_ssl.h $(srcdir)/Modules/_ssl/cert.c $(srcdir)/Modules/_ssl/debughelpers.c $(srcdir)/Modules/_ssl/misc.c $(srcdir)/Modules/_ssl_data_111.h $(srcdir)/Modules/_ssl_data_300.h $(srcdir)/Modules/socketmodule.h
31533193
MODULE__TESTCAPI_DEPS=$(srcdir)/Modules/_testcapi/parts.h $(srcdir)/Modules/_testcapi/util.h
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
Python's hashlib now unconditionally uses the vendored HACL* library for
2+
Blake2. Python no longer accepts libb2 as an optional dependency for Blake2.
3+
4+
We refreshed HACL* to the latest version, and now vendor HACL*'s 128-bit and
5+
256-bit wide vector implementations for Blake2, which are used on x86/x64
6+
toolchains when the required CPU features are available at runtime.
7+
8+
HACL*'s 128-bit wide vector implementation of Blake2 can also run on ARM
9+
NEON and Power8, but lacking evidence of a performance gain, these are not
10+
enabled (yet).

0 commit comments

Comments
 (0)