Skip to content

bpo-45150: draft implementation only for sha224,sha256 #31928

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 34 additions & 1 deletion Lib/hashlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@

"""

import io

# This tuple and __get_builtin_constructor() must be modified if a new
# always available algorithm is added.
__always_supported = ('md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512',
Expand All @@ -65,7 +67,8 @@
algorithms_available = set(__always_supported)

__all__ = __always_supported + ('new', 'algorithms_guaranteed',
'algorithms_available', 'pbkdf2_hmac')
'algorithms_available', 'pbkdf2_hmac',
'from_file', 'from_raw_file')


__builtin_constructor_cache = {}
Expand Down Expand Up @@ -95,6 +98,8 @@ def __get_builtin_constructor(name):
import _sha256
cache['SHA224'] = cache['sha224'] = _sha256.sha224
cache['SHA256'] = cache['sha256'] = _sha256.sha256
cache['sha224_fd'] = _sha256._sha224_from_file_descriptor
cache['sha256_fd'] = _sha256._sha256_from_file_descriptor
elif name in {'SHA512', 'sha512', 'SHA384', 'sha384'}:
import _sha512
cache['SHA384'] = cache['sha384'] = _sha512.sha384
Expand Down Expand Up @@ -264,6 +269,34 @@ def prf(msg, inner=inner, outer=outer):
logging.exception('code for hash %s was not found.', __func_name)


def from_raw_file(name, fobj=None):
"""from_raw_file(name, fobj=None, **kwargs) - Return a new hashing object using the named algorithm;
initialized from a non-buffered file object (RawIOBase instance), which you can get with
e.g. `open(path, mode='rb', buffering=0)`. The Python :term:`GIL` is released while initializing the hash
with the contents of the file.
"""
if not isinstance(fobj, (io.RawIOBase, io._io._RawIOBase)):
raise TypeError(f'from_file() must get a non-buffered file object. {fobj} is not an instance of io.RawIOBase')
__get_builtin_constructor(name)
func = __builtin_constructor_cache.get(name + '_fd')
return func(fobj.fileno())

_READ_BUFFER_SIZE = 65536
def from_file(name, fobj=None, **kwargs):
"""from_file(name, fobj=None, **kwargs) - Return a new hashing object using the named algorithm;
initialized from a file object. If a non-buffered file object (RawIOBase instance) is passed, which
you can get with e.g. `open(path, mode='rb', buffering=0)`, the Python :term:`GIL` is released while
initializing the hash with the contents of the file.
"""
if isinstance(fobj, (io.RawIOBase, io._io._RawIOBase)):
return from_raw_file(name, fobj, **kwargs)
hash_obj = new(name, **kwargs)
while True:
chunk = fobj.read(_READ_BUFFER_SIZE)
if not chunk:
return hash_obj
hash_obj.update(chunk)

# Cleanup locals()
del __always_supported, __func_name, __get_hash
del __py_new, __hash_new, __get_openssl_constructor
84 changes: 83 additions & 1 deletion Modules/clinic/sha256module.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Modules/hashlib.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,4 @@
* to allow the user to optimize based on the platform they're using. */
#define HASHLIB_GIL_MINSIZE 2048

#define HASHLIB_READ_BUFFER_SIZE 65536
96 changes: 96 additions & 0 deletions Modules/sha256module.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "Python.h"
#include "pycore_bitutils.h" // _Py_bswap32()
#include "pycore_strhex.h" // _Py_strhex()
#include "pycore_fileutils.h" // *_SUPPRESS_IPH
#include "structmember.h" // PyMemberDef
#include "hashlib.h"

Expand Down Expand Up @@ -509,6 +510,99 @@ SHA256Type_update(SHAobject *self, PyObject *obj)
Py_RETURN_NONE;
}

/* used by _sha*_from_file_descriptor_impl */
static PyObject *
_from_file_descriptor(SHAobject* sha, int fd)
{
SHA_BYTE buf[HASHLIB_READ_BUFFER_SIZE];
int count, err, async_err = 0;
/* invariant: New objects can't be accessed by other code yet,
* thus it's safe to release the GIL without locking the object.
*/
_Py_BEGIN_SUPPRESS_IPH
Py_BEGIN_ALLOW_THREADS
while (1) {
while(!async_err) {
errno = 0;
count = read(fd, buf, HASHLIB_READ_BUFFER_SIZE);
/* save/restore errno because PyErr_CheckSignals()
* and PyErr_SetFromErrno() can modify it */
err = errno;
if (count < 0 && err == EINTR) {
PyEval_RestoreThread(_save);
async_err = PyErr_CheckSignals();
_save = PyEval_SaveThread();
} else {
break;
}
}
if (count <= 0) {
break;
}
sha_update(sha, buf, count);
}
Py_END_ALLOW_THREADS
_Py_END_SUPPRESS_IPH
if (async_err) {
/* read() was interrupted by a signal (failed with EINTR)
* and the Python signal handler raised an exception */
errno = err;
assert(errno == EINTR);
return PyErr_Occurred();
}
if (count < 0) {
Py_DECREF(sha);
errno = err; // in _Py_Read() this was below the PyErr_SetFromErrno(), but that can't be right, can it?
return PyErr_SetFromErrno(PyExc_OSError);
}

return (PyObject *)sha;
}

/*[clinic input]
_sha256._sha256_from_file_descriptor

fd: int
usedforsecurity: bool = True
/

Create hash object initialized with the content of the file given as a file descriptor.
[clinic start generated code]*/

static PyObject *
_sha256__sha256_from_file_descriptor_impl(PyObject *module, int fd,
int usedforsecurity)
/*[clinic end generated code: output=2cb7fd5ffad8fcd6 input=925c7ddd28d59dc8]*/
{
SHAobject *sha = (SHAobject*)_sha256_sha256_impl(module, NULL, usedforsecurity);
if (sha == NULL) {
return NULL;
}
return _from_file_descriptor(sha, fd);
}

/*[clinic input]
_sha256._sha224_from_file_descriptor

fd: int
usedforsecurity: bool = True
/

Create hash object initialized with the content of the file given as a file descriptor.
[clinic start generated code]*/

static PyObject *
_sha256__sha224_from_file_descriptor_impl(PyObject *module, int fd,
int usedforsecurity)
/*[clinic end generated code: output=4efbc5c69598e4db input=259aa08ff2f58c2d]*/
{
SHAobject *sha = (SHAobject*)_sha256_sha224_impl(module, NULL, usedforsecurity);
if (sha == NULL) {
return NULL;
}
return _from_file_descriptor(sha, fd);
}

static PyMethodDef SHA_methods[] = {
SHA256TYPE_COPY_METHODDEF
SHA256TYPE_DIGEST_METHODDEF
Expand Down Expand Up @@ -674,6 +768,8 @@ _sha256_sha224_impl(PyObject *module, PyObject *string, int usedforsecurity)
static struct PyMethodDef SHA_functions[] = {
_SHA256_SHA256_METHODDEF
_SHA256_SHA224_METHODDEF
_SHA256__SHA256_FROM_FILE_DESCRIPTOR_METHODDEF
_SHA256__SHA224_FROM_FILE_DESCRIPTOR_METHODDEF
{NULL, NULL} /* Sentinel */
};

Expand Down