diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 90214a314031d1..b07db953e2c6bb 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -916,6 +916,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(entrypoint)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(env)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(errors)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(estimate)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(event)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(eventmask)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(exc_type)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 97a75d0c46c867..a93cc79bda63c6 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -405,6 +405,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(entrypoint) STRUCT_FOR_ID(env) STRUCT_FOR_ID(errors) + STRUCT_FOR_ID(estimate) STRUCT_FOR_ID(event) STRUCT_FOR_ID(eventmask) STRUCT_FOR_ID(exc_type) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 4f928cc050bf8e..81bd0aefc668ee 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -914,6 +914,7 @@ extern "C" { INIT_ID(entrypoint), \ INIT_ID(env), \ INIT_ID(errors), \ + INIT_ID(estimate), \ INIT_ID(event), \ INIT_ID(eventmask), \ INIT_ID(exc_type), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index 5b78d038fc1192..cad8f2731fc222 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -1416,6 +1416,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(estimate); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(event); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Lib/_compression.py b/Lib/_compression.py index e8b70aa0a3e680..d3530d32cf6d3b 100644 --- a/Lib/_compression.py +++ b/Lib/_compression.py @@ -111,14 +111,10 @@ def read(self, size=-1): return data def readall(self): - chunks = [] - # sys.maxsize means the max length of output buffer is unlimited, - # so that the whole input buffer can be decompressed within one - # .decompress() call. - while data := self.read(sys.maxsize): - chunks.append(data) - - return b"".join(chunks) + # FIXME(cmaloney): non blocking support? + bio = io.BytesIO() + bio.readfrom(self) + return bio.getvalue() # Rewind the file to the beginning of the data stream. def _rewind(self): diff --git a/Lib/_pyio.py b/Lib/_pyio.py index f7370dff19efc8..19445848e12f07 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -926,6 +926,82 @@ def read1(self, size=-1): """ return self.read(size) + def readfrom(self, file, /, *, estimate=None, limit=None): + """Efficiently read from the provided file and return True if hit end. + + Returns True if and only if a read into a non-zero length buffer + returns 0 bytes. On most systems this indicates end of file / stream. + """ + if self.closed: + raise ValueError("read from closed file") + + # In order to detect end of file, need a read() of at least 1 + # byte which returns size 0. Oversize the buffer by 1 byte so the + # I/O can be completed with two read() calls (one for all data, one + # for EOF) without needing to resize the buffer. + target_read = None + if estimate is not None: + target_read = int(estimate) + 1 + else: + target_read = DEFAULT_BUFFER_SIZE + + # Cap to limit + if limit is not None: + limit = int(limit) + if limit == 0: # Nothing to read. + return False + if limit < 0: + raise ValueError(f"limit must be larger than 0, got {limit}") + + if limit is not None: + target_read = min(target_read, limit) + + # Expand buffer to get target read in one read when possible. + if len(self._buffer) < target_read + self._pos: + self._buffer.resize(self._pos + target_read) + + if isinstance(file, int): # File descriptor + read_fn = lambda: os.readinto(file, memoryview(self._buffer)[self._pos:]) + elif file_readinto := getattr(file, "readinto", None): + read_fn = lambda: file_readinto(memoryview(self._buffer)[self._pos:]) + elif file_read := getattr(file, "read", None): + def read_fn(): + data = file_read(len(self._buffer) - self._pos) + self._buffer[self._pos:self._pos + len(data)] = data + + found_eof = False + start_pos = self._pos + try: + while n := read_fn(): + self._pos += n + # Expand buffer if needed. + if len(self._buffer) - self._pos <= 0: + bytes_read = self._pos - start_pos + target_read = _new_buffersize(bytes_read) + + # Keep buffer size <= limit, so only need to check against + # limit when resizing. + if limit is not None: + remaining = limit - bytes_read + if remaining <= 0: + assert remaining == 0, "should never pass limit" + break + target_read = min(remaining, target_read) + + self._buffer.resize(target_read + len(self._buffer)) + + else: + assert len(self._buffer) - self._pos >= 1, \ + "os.readinto buffer size 0 will result in erroneous EOF / returns 0" + found_eof = True + + except BlockingIOError: + pass + + # Remove all excess bytes. + self._buffer.resize(self._pos) + return found_eof + def write(self, b): if self.closed: raise ValueError("write to closed file") @@ -1666,38 +1742,22 @@ def readall(self): """ self._checkClosed() self._checkReadable() - if self._stat_atopen is None or self._stat_atopen.st_size <= 0: - bufsize = DEFAULT_BUFFER_SIZE - else: - # In order to detect end of file, need a read() of at least 1 - # byte which returns size 0. Oversize the buffer by 1 byte so the - # I/O can be completed with two read() calls (one for all data, one - # for EOF) without needing to resize the buffer. - bufsize = self._stat_atopen.st_size + 1 - - if self._stat_atopen.st_size > 65536: + estimate = None + if self._stat_atopen and self._stat_atopen.st_size >= 0: + estimate = self._stat_atopen.st_size + if estimate > 65536: try: pos = os.lseek(self._fd, 0, SEEK_CUR) - if self._stat_atopen.st_size >= pos: - bufsize = self._stat_atopen.st_size - pos + 1 + estimate = estimate - pos if estimate > pos else 0 except OSError: pass - result = bytearray(bufsize) - bytes_read = 0 - try: - while n := os.readinto(self._fd, memoryview(result)[bytes_read:]): - bytes_read += n - if bytes_read >= len(result): - result.resize(_new_buffersize(bytes_read)) - except BlockingIOError: - if not bytes_read: - return None + bio = BytesIO() + found_eof = bio.readfrom(self._fd, estimate=estimate) + result = bio.getvalue() + # No limit in readfrom, so not finding eof indicates blocked. + return result if result or found_eof else None - assert len(result) - bytes_read >= 1, \ - "os.readinto buffer size 0 will result in erroneous EOF / returns 0" - result.resize(bytes_read) - return bytes(result) def readinto(self, buffer): """Same as RawIOBase.readinto().""" diff --git a/Lib/subprocess.py b/Lib/subprocess.py index 2044d2a42897e9..e0f449b72011dc 100644 --- a/Lib/subprocess.py +++ b/Lib/subprocess.py @@ -1921,12 +1921,9 @@ def _execute_child(self, args, executable, preexec_fn, close_fds, # Wait for exec to fail or succeed; possibly raising an # exception (limited in size) - errpipe_data = bytearray() - while True: - part = os.read(errpipe_read, 50000) - errpipe_data += part - if not part or len(errpipe_data) > 50000: - break + bio = io.BytesIO() + bio.readfrom(errpipe_read, estimate=0, limit=50_000) + errpipe_data = bio.getvalue() finally: # be sure the FD is closed no matter what os.close(errpipe_read) diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c index dc4e40b9f09a1d..571204f4f2bbef 100644 --- a/Modules/_io/bytesio.c +++ b/Modules/_io/bytesio.c @@ -5,6 +5,9 @@ #include // offsetof() #include "_iomodule.h" + +#define STACK_BUFER_SIZE 1024 + /*[clinic input] module _io class _io.BytesIO "bytesio *" "clinic_state()->PyBytesIO_Type" @@ -465,6 +468,219 @@ _io_BytesIO_read1_impl(bytesio *self, Py_ssize_t size) return _io_BytesIO_read_impl(self, size); } +static size_t +_bytesio_new_buffersize(size_t bytes_read) +{ + size_t addend; + + /* Expand the buffer by an amount proportional to the current size, + giving us amortized linear-time behavior. For bigger sizes, use a + less-than-double growth factor to avoid excessive allocation. */ + assert(bytes_read <= PY_SSIZE_T_MAX); + if (bytes_read > 65536) + addend = bytes_read >> 3; + else + addend = 256 + bytes_read; + if (addend < 8 * 1024) + /* Avoid tiny read() calls. */ + addend = 8 * 1024; + return bytes_read + addend; +} + +/* Read from a fd where there is no data expected to be read. +This is faster (less allocations, less copies) when there is no data, at the +expense of slightly slower if there is actual data to read. Falls back to normal +read loop if more than one buffer of data. + +-1 == error, 0 == hit cap or blocked, exit, 1 == hit eof / True return, 2 == read more +*/ +static int _bytesio_readfrom_small_fast(bytesio *self, int fd, Py_ssize_t *limit) { + assert(*limit > 0 ** "Must attempt to read at least one byte."); + char local_buffer[STACK_BUFER_SIZE]; + Py_ssize_t read_size = Py_MIN(STACK_BUFER_SIZE, *limit); + Py_ssize_t result = _Py_read(fd, local_buffer, read_size); + + /* Hit EOF in a single read, return True. */ + if (result == 0) { + return 1; + } + if (result == -1) { + /* BlockingIOError -> return False (didn't find EOF). */ + if (errno == EAGAIN) { + PyErr_Clear(); + return 0; + } + return -1; + } + + /* Got data, copy across to the buf, then proceed with normal read loop. + + FIXME? The temporary bytes object is an unnecessary copy + allocation. + yea: faster / less copies, remove some redundant checks + nay: resizing, appending, copying, updating pointers is a lot. */ + PyObject *bytes = PyBytes_FromStringAndSize(local_buffer, result); + if (!bytes) { + return -1; + } + result = write_bytes(self, bytes); + Py_DECREF(bytes); + if (result < 0) { + return -1; + } + /* Hit cap, nothing left to do. */ + if (result == *limit) { + return 0; + } + *limit -= result; + return 2; +} + + +/*[clinic input] +_io.BytesIO.readfrom -> bool + file: int + / + * + estimate: Py_ssize_t(accept={int, NoneType}) = -1 + limit: Py_ssize_t(accept={int, NoneType}) = -1 + +Efficiently read from the provided file and return True if hit end of file. + +Returns True if and only if a read into a non-zero length buffer returns 0 +bytes. On most systems this indicates end of file / stream. + +FIXME?: Allow fileobj that provides readinto.? +FIXME?:Allow fileobj that only has read? + +If a readinto call raises NonBlockingError or returns None, data returned to +that point will be stored in buffer, and will return False. For other exceptions +while reading, as much data as possible will be in the buffer. + +FIXME: BlockingIOError contains data from partial reads. Append it. + -> Include test that no data is lost w/ multiple repeated blocks + (There is one already in tests, make sure this is exercised and passes + it) +FIXME: Does this need to document that all reads are Limited to PY_SSIZE_T_MAX. +FIXME? It would be nice if this could support a timeout, but probably a feature + for later. +[clinic start generated code]*/ + +static int +_io_BytesIO_readfrom_impl(bytesio *self, int file, Py_ssize_t estimate, + Py_ssize_t limit) +/*[clinic end generated code: output=71dcfcf7e9a50527 input=9bce10ea48db6415]*/ +{ + /* FIXME: Cap to _PY_READ_MAX */ + if (check_closed(self)) { + return -1; + } + if (check_exports(self)) { + return -1; + } + /* Limit all reads to PY_SSIZE_T_MAX */ + if (limit < 0) { + limit = PY_SSIZE_T_MAX; + } else if (limit == 0) { + // Limit == 0. no read. + // FIXME(cmaloney): Should this guarantee at least one read? (os.readinto technically accepts 0 length...) + return 0; + } + assert(limit > 0); + + /* Try and get estimated_size in a single read. */ + Py_ssize_t read_size = DEFAULT_BUFFER_SIZE; + if (estimate > 0) { + /* In order to detect end of file, need a read() of at + least 1 byte which returns size 0. Oversize the buffer + by 1 byte so the I/O can be completed with two read() + calls (one for all data, one for EOF) without needing + to resize the buffer. */ + read_size = estimate + ((estimate <= PY_SSIZE_T_MAX - 1) ? 1 : 0); + } else if (estimate == 0 || limit < STACK_BUFER_SIZE) { + /* A number of things in the normal path expect no data, use a small + temp buffer for those, only expanding buffer if absolutely needed. */ + Py_ssize_t result = _bytesio_readfrom_small_fast(self, file, &limit); + if (result != 2) { + return result; + } + } + + /* Never read more than limit. */ + read_size = Py_MIN(read_size, limit); + assert(read_size > 0); + + Py_ssize_t current_size = PyBytes_GET_SIZE(self->buf); + if (PY_SSIZE_T_MAX - read_size - current_size > 0) + current_size += read_size; + else { + current_size = PY_SSIZE_T_MAX; + } + if (_PyBytes_Resize(&self->buf, current_size)) { + return -1; + } + Py_ssize_t bytes_read = 0; + Py_ssize_t found_eof = 0; + while (true) { + /* Expand buffer if needed. */ + if (self->string_size >= current_size) { + if (current_size >= PY_SSIZE_T_MAX) { + PyErr_SetString(PyExc_OverflowError, + "unbounded read returned more bytes " + "than a Python bytes object can hold"); + + } + Py_ssize_t target_read = _bytesio_new_buffersize(bytes_read); + /* Never read more than limit bytes_read. */ + target_read = Py_MIN(target_read, limit - bytes_read); + + /* Buffer can't get larger than PY_SSIZE_T_MAX */ + if (PY_SSIZE_T_MAX - current_size < target_read) { + target_read = PY_SSIZE_T_MAX - current_size; + } + + current_size += target_read; + if (_PyBytes_Resize(&self->buf, current_size)) { + return -1; + } + } + // DEBUG: printf("cs: %zd, ss: %zd, limit: %zd, read: %zd\n", current_size, self->string_size, limit, bytes_read); + read_size = Py_MIN(current_size - self->string_size, limit - bytes_read); + assert(read_size > 0); // Should always be reading some bytes. + assert(self->string_size + read_size <= current_size); + Py_ssize_t result = _Py_read(file, + PyBytes_AS_STRING(self->buf) + self->string_size, + read_size); + if (result == -1) { + // Blocking -> early exit without error. + if (errno == EAGAIN) { + PyErr_Clear(); + break; + } + return -1; + } + // Found EOF. + if (result == 0) { + found_eof = 1; + break; + } + assert(result >= 0); // Should have got bytes + self->string_size += result; + bytes_read += result; + assert(bytes_read <= limit); // Shold + if (bytes_read >= limit) { + found_eof = 0; + break; + } + } + // FIXME? There could be quite a bit of space between current_size and + // self->string_size, should this downsize then? + // + // yea: Save excess memory + // nay: Efficient pre-allocated buffer reuse if long lived, getting out the + // bytes() will do anyways + return found_eof; +} + /*[clinic input] _io.BytesIO.readline size: Py_ssize_t(accept={int, NoneType}) = -1 @@ -1027,6 +1243,7 @@ static struct PyMethodDef bytesio_methods[] = { _IO_BYTESIO_WRITE_METHODDEF _IO_BYTESIO_WRITELINES_METHODDEF _IO_BYTESIO_READ1_METHODDEF + _IO_BYTESIO_READFROM_METHODDEF _IO_BYTESIO_READINTO_METHODDEF _IO_BYTESIO_READLINE_METHODDEF _IO_BYTESIO_READLINES_METHODDEF diff --git a/Modules/_io/clinic/bytesio.c.h b/Modules/_io/clinic/bytesio.c.h index 5528df952c33fb..797fdf1027463b 100644 --- a/Modules/_io/clinic/bytesio.c.h +++ b/Modules/_io/clinic/bytesio.c.h @@ -233,6 +233,107 @@ _io_BytesIO_read1(PyObject *self, PyObject *const *args, Py_ssize_t nargs) return return_value; } +PyDoc_STRVAR(_io_BytesIO_readfrom__doc__, +"readfrom($self, file, /, *, estimate=-1, limit=-1)\n" +"--\n" +"\n" +"Efficiently read from the provided file and return True if hit end of file.\n" +"\n" +"Returns True if and only if a read into a non-zero length buffer returns 0\n" +"bytes. On most systems this indicates end of file / stream.\n" +"\n" +"FIXME?: Allow fileobj that provides readinto.?\n" +"FIXME?:Allow fileobj that only has read?\n" +"\n" +"If a readinto call raises NonBlockingError or returns None, data returned to\n" +"that point will be stored in buffer, and will return False. For other exceptions\n" +"while reading, as much data as possible will be in the buffer.\n" +"\n" +"FIXME: BlockingIOError contains data from partial reads. Append it.\n" +" -> Include test that no data is lost w/ multiple repeated blocks\n" +" (There is one already in tests, make sure this is exercised and passes\n" +" it)\n" +"FIXME: Does this need to document that all reads are Limited to PY_SSIZE_T_MAX.\n" +"FIXME? It would be nice if this could support a timeout, but probably a feature\n" +" for later."); + +#define _IO_BYTESIO_READFROM_METHODDEF \ + {"readfrom", _PyCFunction_CAST(_io_BytesIO_readfrom), METH_FASTCALL|METH_KEYWORDS, _io_BytesIO_readfrom__doc__}, + +static int +_io_BytesIO_readfrom_impl(bytesio *self, int file, Py_ssize_t estimate, + Py_ssize_t limit); + +static PyObject * +_io_BytesIO_readfrom(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(estimate), &_Py_ID(limit), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"", "estimate", "limit", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "readfrom", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[3]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; + int file; + Py_ssize_t estimate = -1; + Py_ssize_t limit = -1; + int _return_value; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + file = PyLong_AsInt(args[0]); + if (file == -1 && PyErr_Occurred()) { + goto exit; + } + if (!noptargs) { + goto skip_optional_kwonly; + } + if (args[1]) { + if (!_Py_convert_optional_to_ssize_t(args[1], &estimate)) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (!_Py_convert_optional_to_ssize_t(args[2], &limit)) { + goto exit; + } +skip_optional_kwonly: + _return_value = _io_BytesIO_readfrom_impl((bytesio *)self, file, estimate, limit); + if ((_return_value == -1) && PyErr_Occurred()) { + goto exit; + } + return_value = PyBool_FromLong((long)_return_value); + +exit: + return return_value; +} + PyDoc_STRVAR(_io_BytesIO_readline__doc__, "readline($self, size=-1, /)\n" "--\n" @@ -535,4 +636,4 @@ _io_BytesIO___init__(PyObject *self, PyObject *args, PyObject *kwargs) exit: return return_value; } -/*[clinic end generated code: output=8a5e153bc7584b55 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=bab5a4081d518e22 input=a9049054013a1b77]*/ diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index 89f1cfe6b20935..80c9649598a800 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -703,25 +703,6 @@ _io_FileIO_readinto_impl(fileio *self, PyTypeObject *cls, Py_buffer *buffer) return PyLong_FromSsize_t(n); } -static size_t -new_buffersize(fileio *self, size_t currentsize) -{ - size_t addend; - - /* Expand the buffer by an amount proportional to the current size, - giving us amortized linear-time behavior. For bigger sizes, use a - less-than-double growth factor to avoid excessive allocation. */ - assert(currentsize <= PY_SSIZE_T_MAX); - if (currentsize > LARGE_BUFFER_CUTOFF_SIZE) - addend = currentsize >> 3; - else - addend = 256 + currentsize; - if (addend < SMALLCHUNK) - /* Avoid tiny read() calls. */ - addend = SMALLCHUNK; - return addend + currentsize; -} - /*[clinic input] _io.FileIO.readall @@ -735,46 +716,26 @@ static PyObject * _io_FileIO_readall_impl(fileio *self) /*[clinic end generated code: output=faa0292b213b4022 input=dbdc137f55602834]*/ { - Py_off_t pos, end; - PyObject *result; - Py_ssize_t bytes_read = 0; - Py_ssize_t n; - size_t bufsize; + PyObject* estimate_obj = Py_None; + PyObject *args[3] = {NULL, NULL, NULL}; + PyObject *fn_name = NULL; + PyObject *keyword = NULL; + PyObject *result = NULL; + PyObject *found_eof = NULL; if (self->fd < 0) { return err_closed(); } - if (self->stat_atopen != NULL && self->stat_atopen->st_size < _PY_READ_MAX) { - end = (Py_off_t)self->stat_atopen->st_size; - } - else { - end = -1; - } - if (end <= 0) { - /* Use a default size and resize as needed. */ - bufsize = SMALLCHUNK; - } - else { - /* This is probably a real file. */ - if (end > _PY_READ_MAX - 1) { - bufsize = _PY_READ_MAX; - } - else { - /* In order to detect end of file, need a read() of at - least 1 byte which returns size 0. Oversize the buffer - by 1 byte so the I/O can be completed with two read() - calls (one for all data, one for EOF) without needing - to resize the buffer. */ - bufsize = (size_t)end + 1; - } - + if (self->stat_atopen != NULL && self->stat_atopen->st_size >= 0) { + Py_ssize_t pos = 0; + Py_ssize_t estimate = self->stat_atopen->st_size; /* While a lot of code does open().read() to get the whole contents of a file it is possible a caller seeks/reads a ways into the file then calls readall() to get the rest, which would result in allocating more than required. Guard against that for larger files where we expect the I/O time to dominate anyways while keeping small files fast. */ - if (bufsize > LARGE_BUFFER_CUTOFF_SIZE) { + if (estimate > LARGE_BUFFER_CUTOFF_SIZE) { Py_BEGIN_ALLOW_THREADS _Py_BEGIN_SUPPRESS_IPH #ifdef MS_WINDOWS @@ -785,58 +746,81 @@ _io_FileIO_readall_impl(fileio *self) _Py_END_SUPPRESS_IPH Py_END_ALLOW_THREADS - if (end >= pos && pos >= 0 && (end - pos) < (_PY_READ_MAX - 1)) { - bufsize = (size_t)(end - pos) + 1; + if (estimate >= pos) { + estimate -= pos; } } + estimate_obj = PyLong_FromSsize_t(estimate); + if(!estimate_obj) { + return NULL; + } } - - result = PyBytes_FromStringAndSize(NULL, bufsize); - if (result == NULL) + /* bio = io.BytesIO(); + found_eof = bio.readfrom(self->fd, estimate=estimate) */ + PyObject *bytesio_class = PyImport_ImportModuleAttrString("_io", "BytesIO"); + if (!bytesio_class) { + Py_DECREF(estimate_obj); return NULL; + } + args[2] = estimate_obj; + estimate_obj = NULL; + + args[0] = PyObject_CallNoArgs(bytesio_class); + Py_DECREF(bytesio_class); + bytesio_class = NULL; + if (!args[0]) { + Py_DECREF(estimate_obj); + return NULL; + } - while (1) { - if (bytes_read >= (Py_ssize_t)bufsize) { - bufsize = new_buffersize(self, bytes_read); - if (bufsize > PY_SSIZE_T_MAX || bufsize <= 0) { - PyErr_SetString(PyExc_OverflowError, - "unbounded read returned more bytes " - "than a Python bytes object can hold"); - Py_DECREF(result); - return NULL; - } - - if (PyBytes_GET_SIZE(result) < (Py_ssize_t)bufsize) { - if (_PyBytes_Resize(&result, bufsize) < 0) - return NULL; - } - } - - n = _Py_read(self->fd, - PyBytes_AS_STRING(result) + bytes_read, - bufsize - bytes_read); + args[1] = PyLong_FromLong(self->fd); + if(!args[1]) { + goto leave; + } + fn_name = PyUnicode_InternFromString("readfrom"); + if (!fn_name) { + goto leave; + } + keyword = Py_BuildValue("(s)", "estimate"); + if (!keyword) { + goto leave; + } + found_eof = PyObject_VectorcallMethod( + fn_name, + args, + 2 | PY_VECTORCALL_ARGUMENTS_OFFSET, + keyword + ); + if (!found_eof) { + goto leave; + } - if (n == 0) - break; - if (n == -1) { - if (errno == EAGAIN) { - PyErr_Clear(); - if (bytes_read > 0) - break; - Py_DECREF(result); - Py_RETURN_NONE; - } - Py_DECREF(result); - return NULL; - } - bytes_read += n; + /* result = bio.getvalue() + return result if result or found_eof else None */ + Py_DECREF(keyword); + keyword = PyUnicode_InternFromString("getvalue"); + if (!keyword) { + goto leave; + } + result = PyObject_CallMethodNoArgs(args[0], keyword); + if (!result) { + goto leave; } - if (PyBytes_GET_SIZE(result) > bytes_read) { - if (_PyBytes_Resize(&result, bytes_read) < 0) - return NULL; + /* Read was blocked (didn't get to end, and didn't find data) */ + if (!PyObject_IsTrue(result) && !PyObject_IsTrue(found_eof)) { + Py_DECREF(result); + result = Py_None; } + +leave: + Py_XDECREF(args[0]); + Py_XDECREF(args[1]); + Py_XDECREF(args[2]); + Py_XDECREF(fn_name); + Py_XDECREF(keyword); + Py_XDECREF(found_eof); return result; }