diff --git a/Lib/hashlib.py b/Lib/hashlib.py index 562501860a72b3..fb7a904c3c35f8 100644 --- a/Lib/hashlib.py +++ b/Lib/hashlib.py @@ -53,6 +53,8 @@ """ +import io + # This tuple and __get_builtin_constructor() must be modified if a new # always available algorithm is added. __always_supported = ('md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512', @@ -65,7 +67,8 @@ algorithms_available = set(__always_supported) __all__ = __always_supported + ('new', 'algorithms_guaranteed', - 'algorithms_available', 'pbkdf2_hmac') + 'algorithms_available', 'pbkdf2_hmac', + 'from_file', 'from_raw_file') __builtin_constructor_cache = {} @@ -95,6 +98,8 @@ def __get_builtin_constructor(name): import _sha256 cache['SHA224'] = cache['sha224'] = _sha256.sha224 cache['SHA256'] = cache['sha256'] = _sha256.sha256 + cache['sha224_fd'] = _sha256._sha224_from_file_descriptor + cache['sha256_fd'] = _sha256._sha256_from_file_descriptor elif name in {'SHA512', 'sha512', 'SHA384', 'sha384'}: import _sha512 cache['SHA384'] = cache['sha384'] = _sha512.sha384 @@ -264,6 +269,34 @@ def prf(msg, inner=inner, outer=outer): logging.exception('code for hash %s was not found.', __func_name) +def from_raw_file(name, fobj=None): + """from_raw_file(name, fobj=None, **kwargs) - Return a new hashing object using the named algorithm; + initialized from a non-buffered file object (RawIOBase instance), which you can get with + e.g. `open(path, mode='rb', buffering=0)`. The Python :term:`GIL` is released while initializing the hash + with the contents of the file. + """ + if not isinstance(fobj, (io.RawIOBase, io._io._RawIOBase)): + raise TypeError(f'from_file() must get a non-buffered file object. {fobj} is not an instance of io.RawIOBase') + __get_builtin_constructor(name) + func = __builtin_constructor_cache.get(name + '_fd') + return func(fobj.fileno()) + +_READ_BUFFER_SIZE = 65536 +def from_file(name, fobj=None, **kwargs): + """from_file(name, fobj=None, **kwargs) - Return a new hashing object using the named algorithm; + initialized from a file object. If a non-buffered file object (RawIOBase instance) is passed, which + you can get with e.g. `open(path, mode='rb', buffering=0)`, the Python :term:`GIL` is released while + initializing the hash with the contents of the file. + """ + if isinstance(fobj, (io.RawIOBase, io._io._RawIOBase)): + return from_raw_file(name, fobj, **kwargs) + hash_obj = new(name, **kwargs) + while True: + chunk = fobj.read(_READ_BUFFER_SIZE) + if not chunk: + return hash_obj + hash_obj.update(chunk) + # Cleanup locals() del __always_supported, __func_name, __get_hash del __py_new, __hash_new, __get_openssl_constructor diff --git a/Modules/clinic/sha256module.c.h b/Modules/clinic/sha256module.c.h index 89205c4f14f4e4..6d33f04194796d 100644 --- a/Modules/clinic/sha256module.c.h +++ b/Modules/clinic/sha256module.c.h @@ -76,6 +76,88 @@ PyDoc_STRVAR(SHA256Type_update__doc__, #define SHA256TYPE_UPDATE_METHODDEF \ {"update", (PyCFunction)SHA256Type_update, METH_O, SHA256Type_update__doc__}, +PyDoc_STRVAR(_sha256__sha256_from_file_descriptor__doc__, +"_sha256_from_file_descriptor($module, fd, usedforsecurity=True, /)\n" +"--\n" +"\n" +"Create hash object initialized with the content of the file given as a file descriptor."); + +#define _SHA256__SHA256_FROM_FILE_DESCRIPTOR_METHODDEF \ + {"_sha256_from_file_descriptor", (PyCFunction)(void(*)(void))_sha256__sha256_from_file_descriptor, METH_FASTCALL, _sha256__sha256_from_file_descriptor__doc__}, + +static PyObject * +_sha256__sha256_from_file_descriptor_impl(PyObject *module, int fd, + int usedforsecurity); + +static PyObject * +_sha256__sha256_from_file_descriptor(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + int fd; + int usedforsecurity = 1; + + if (!_PyArg_CheckPositional("_sha256_from_file_descriptor", nargs, 1, 2)) { + goto exit; + } + fd = _PyLong_AsInt(args[0]); + if (fd == -1 && PyErr_Occurred()) { + goto exit; + } + if (nargs < 2) { + goto skip_optional; + } + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } +skip_optional: + return_value = _sha256__sha256_from_file_descriptor_impl(module, fd, usedforsecurity); + +exit: + return return_value; +} + +PyDoc_STRVAR(_sha256__sha224_from_file_descriptor__doc__, +"_sha224_from_file_descriptor($module, fd, usedforsecurity=True, /)\n" +"--\n" +"\n" +"Create hash object initialized with the content of the file given as a file descriptor."); + +#define _SHA256__SHA224_FROM_FILE_DESCRIPTOR_METHODDEF \ + {"_sha224_from_file_descriptor", (PyCFunction)(void(*)(void))_sha256__sha224_from_file_descriptor, METH_FASTCALL, _sha256__sha224_from_file_descriptor__doc__}, + +static PyObject * +_sha256__sha224_from_file_descriptor_impl(PyObject *module, int fd, + int usedforsecurity); + +static PyObject * +_sha256__sha224_from_file_descriptor(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + int fd; + int usedforsecurity = 1; + + if (!_PyArg_CheckPositional("_sha224_from_file_descriptor", nargs, 1, 2)) { + goto exit; + } + fd = _PyLong_AsInt(args[0]); + if (fd == -1 && PyErr_Occurred()) { + goto exit; + } + if (nargs < 2) { + goto skip_optional; + } + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } +skip_optional: + return_value = _sha256__sha224_from_file_descriptor_impl(module, fd, usedforsecurity); + +exit: + return return_value; +} + PyDoc_STRVAR(_sha256_sha256__doc__, "sha256($module, /, string=b\'\', *, usedforsecurity=True)\n" "--\n" @@ -177,4 +259,4 @@ _sha256_sha224(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObje exit: return return_value; } -/*[clinic end generated code: output=b7283f75c9d08f30 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=b57afe09426b4e37 input=a9049054013a1b77]*/ diff --git a/Modules/hashlib.h b/Modules/hashlib.h index 56ae7a5e50bf58..0677905e889c26 100644 --- a/Modules/hashlib.h +++ b/Modules/hashlib.h @@ -57,3 +57,4 @@ * to allow the user to optimize based on the platform they're using. */ #define HASHLIB_GIL_MINSIZE 2048 +#define HASHLIB_READ_BUFFER_SIZE 65536 diff --git a/Modules/sha256module.c b/Modules/sha256module.c index 17ee86683b7a89..8e445119fdf9c9 100644 --- a/Modules/sha256module.c +++ b/Modules/sha256module.c @@ -22,6 +22,7 @@ #include "Python.h" #include "pycore_bitutils.h" // _Py_bswap32() #include "pycore_strhex.h" // _Py_strhex() +#include "pycore_fileutils.h" // *_SUPPRESS_IPH #include "structmember.h" // PyMemberDef #include "hashlib.h" @@ -509,6 +510,99 @@ SHA256Type_update(SHAobject *self, PyObject *obj) Py_RETURN_NONE; } +/* used by _sha*_from_file_descriptor_impl */ +static PyObject * +_from_file_descriptor(SHAobject* sha, int fd) +{ + SHA_BYTE buf[HASHLIB_READ_BUFFER_SIZE]; + int count, err, async_err = 0; + /* invariant: New objects can't be accessed by other code yet, + * thus it's safe to release the GIL without locking the object. + */ + _Py_BEGIN_SUPPRESS_IPH + Py_BEGIN_ALLOW_THREADS + while (1) { + while(!async_err) { + errno = 0; + count = read(fd, buf, HASHLIB_READ_BUFFER_SIZE); + /* save/restore errno because PyErr_CheckSignals() + * and PyErr_SetFromErrno() can modify it */ + err = errno; + if (count < 0 && err == EINTR) { + PyEval_RestoreThread(_save); + async_err = PyErr_CheckSignals(); + _save = PyEval_SaveThread(); + } else { + break; + } + } + if (count <= 0) { + break; + } + sha_update(sha, buf, count); + } + Py_END_ALLOW_THREADS + _Py_END_SUPPRESS_IPH + if (async_err) { + /* read() was interrupted by a signal (failed with EINTR) + * and the Python signal handler raised an exception */ + errno = err; + assert(errno == EINTR); + return PyErr_Occurred(); + } + if (count < 0) { + Py_DECREF(sha); + errno = err; // in _Py_Read() this was below the PyErr_SetFromErrno(), but that can't be right, can it? + return PyErr_SetFromErrno(PyExc_OSError); + } + + return (PyObject *)sha; +} + +/*[clinic input] +_sha256._sha256_from_file_descriptor + + fd: int + usedforsecurity: bool = True + / + +Create hash object initialized with the content of the file given as a file descriptor. +[clinic start generated code]*/ + +static PyObject * +_sha256__sha256_from_file_descriptor_impl(PyObject *module, int fd, + int usedforsecurity) +/*[clinic end generated code: output=2cb7fd5ffad8fcd6 input=925c7ddd28d59dc8]*/ +{ + SHAobject *sha = (SHAobject*)_sha256_sha256_impl(module, NULL, usedforsecurity); + if (sha == NULL) { + return NULL; + } + return _from_file_descriptor(sha, fd); +} + +/*[clinic input] +_sha256._sha224_from_file_descriptor + + fd: int + usedforsecurity: bool = True + / + +Create hash object initialized with the content of the file given as a file descriptor. +[clinic start generated code]*/ + +static PyObject * +_sha256__sha224_from_file_descriptor_impl(PyObject *module, int fd, + int usedforsecurity) +/*[clinic end generated code: output=4efbc5c69598e4db input=259aa08ff2f58c2d]*/ +{ + SHAobject *sha = (SHAobject*)_sha256_sha224_impl(module, NULL, usedforsecurity); + if (sha == NULL) { + return NULL; + } + return _from_file_descriptor(sha, fd); +} + static PyMethodDef SHA_methods[] = { SHA256TYPE_COPY_METHODDEF SHA256TYPE_DIGEST_METHODDEF @@ -674,6 +768,8 @@ _sha256_sha224_impl(PyObject *module, PyObject *string, int usedforsecurity) static struct PyMethodDef SHA_functions[] = { _SHA256_SHA256_METHODDEF _SHA256_SHA224_METHODDEF + _SHA256__SHA256_FROM_FILE_DESCRIPTOR_METHODDEF + _SHA256__SHA224_FROM_FILE_DESCRIPTOR_METHODDEF {NULL, NULL} /* Sentinel */ };