Skip to content

Commit 1ed4487

Browse files
cmaloneypicnixzvstinner
authored
gh-129205: Add os.readinto() API for reading data into a caller provided buffer (#129211)
Add a new OS API which will read data directly into a caller provided writeable buffer protocol object. Co-authored-by: Bénédikt Tran <[email protected]> Co-authored-by: Victor Stinner <[email protected]>
1 parent 0ef8d47 commit 1ed4487

File tree

7 files changed

+267
-1
lines changed

7 files changed

+267
-1
lines changed

Doc/library/os.rst

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1659,6 +1659,33 @@ or `the MSDN <https://msdn.microsoft.com/en-us/library/z0kc8e3z.aspx>`_ on Windo
16591659
:exc:`InterruptedError` exception (see :pep:`475` for the rationale).
16601660

16611661

1662+
.. function:: readinto(fd, buffer, /)
1663+
1664+
Read from a file descriptor *fd* into a mutable
1665+
:ref:`buffer object <bufferobjects>` *buffer*.
1666+
1667+
The *buffer* should be mutable and :term:`bytes-like <bytes-like object>`. On
1668+
success, returns the number of bytes read. Less bytes may be read than the
1669+
size of the buffer. The underlying system call will be retried when
1670+
interrupted by a signal, unless the signal handler raises an exception.
1671+
Other errors will not be retried and an error will be raised.
1672+
1673+
Returns 0 if *fd* is at end of file or if the provided *buffer* has
1674+
length 0 (which can be used to check for errors without reading data).
1675+
Never returns negative.
1676+
1677+
.. note::
1678+
1679+
This function is intended for low-level I/O and must be applied to a file
1680+
descriptor as returned by :func:`os.open` or :func:`os.pipe`. To read a
1681+
"file object" returned by the built-in function :func:`open`, or
1682+
:data:`sys.stdin`, use its member functions, for example
1683+
:meth:`io.BufferedIOBase.readinto`, :meth:`io.BufferedIOBase.read`, or
1684+
:meth:`io.TextIOBase.read`
1685+
1686+
.. versionadded:: next
1687+
1688+
16621689
.. function:: sendfile(out_fd, in_fd, offset, count)
16631690
sendfile(out_fd, in_fd, offset, count, headers=(), trailers=(), flags=0)
16641691

Doc/whatsnew/3.14.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -561,6 +561,10 @@ os
561561
to the :mod:`os` module.
562562
(Contributed by James Roy in :gh:`127688`.)
563563

564+
* Add the :func:`os.readinto` function to read into a
565+
:ref:`buffer object <bufferobjects>` from a file descriptor.
566+
(Contributed by Cody Maloney in :gh:`129205`.)
567+
564568

565569
pathlib
566570
-------

Lib/test/_test_eintr.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,37 @@ def test_read(self):
152152
self.assertEqual(data, os.read(rd, len(data)))
153153
self.assertEqual(proc.wait(), 0)
154154

155+
def test_readinto(self):
156+
rd, wr = os.pipe()
157+
self.addCleanup(os.close, rd)
158+
# wr closed explicitly by parent
159+
160+
# the payload below are smaller than PIPE_BUF, hence the writes will be
161+
# atomic
162+
datas = [b"hello", b"world", b"spam"]
163+
164+
code = '\n'.join((
165+
'import os, sys, time',
166+
'',
167+
'wr = int(sys.argv[1])',
168+
'datas = %r' % datas,
169+
'sleep_time = %r' % self.sleep_time,
170+
'',
171+
'for data in datas:',
172+
' # let the parent block on read()',
173+
' time.sleep(sleep_time)',
174+
' os.write(wr, data)',
175+
))
176+
177+
proc = self.subprocess(code, str(wr), pass_fds=[wr])
178+
with kill_on_error(proc):
179+
os.close(wr)
180+
for data in datas:
181+
buffer = bytearray(len(data))
182+
self.assertEqual(os.readinto(rd, buffer), len(data))
183+
self.assertEqual(buffer, data)
184+
self.assertEqual(proc.wait(), 0)
185+
155186
def test_write(self):
156187
rd, wr = os.pipe()
157188
self.addCleanup(os.close, wr)

Lib/test/test_os.py

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,93 @@ def test_read(self):
230230
self.assertEqual(type(s), bytes)
231231
self.assertEqual(s, b"spam")
232232

233+
def test_readinto(self):
234+
with open(os_helper.TESTFN, "w+b") as fobj:
235+
fobj.write(b"spam")
236+
fobj.flush()
237+
fd = fobj.fileno()
238+
os.lseek(fd, 0, 0)
239+
# Oversized so readinto without hitting end.
240+
buffer = bytearray(7)
241+
s = os.readinto(fd, buffer)
242+
self.assertEqual(type(s), int)
243+
self.assertEqual(s, 4)
244+
# Should overwrite the first 4 bytes of the buffer.
245+
self.assertEqual(buffer[:4], b"spam")
246+
247+
# Readinto at EOF should return 0 and not touch buffer.
248+
buffer[:] = b"notspam"
249+
s = os.readinto(fd, buffer)
250+
self.assertEqual(type(s), int)
251+
self.assertEqual(s, 0)
252+
self.assertEqual(bytes(buffer), b"notspam")
253+
s = os.readinto(fd, buffer)
254+
self.assertEqual(s, 0)
255+
self.assertEqual(bytes(buffer), b"notspam")
256+
257+
# Readinto a 0 length bytearray when at EOF should return 0
258+
self.assertEqual(os.readinto(fd, bytearray()), 0)
259+
260+
# Readinto a 0 length bytearray with data available should return 0.
261+
os.lseek(fd, 0, 0)
262+
self.assertEqual(os.readinto(fd, bytearray()), 0)
263+
264+
@unittest.skipUnless(hasattr(os, 'get_blocking'),
265+
'needs os.get_blocking() and os.set_blocking()')
266+
@unittest.skipUnless(hasattr(os, "pipe"), "requires os.pipe()")
267+
def test_readinto_non_blocking(self):
268+
# Verify behavior of a readinto which would block on a non-blocking fd.
269+
r, w = os.pipe()
270+
try:
271+
os.set_blocking(r, False)
272+
with self.assertRaises(BlockingIOError):
273+
os.readinto(r, bytearray(5))
274+
275+
# Pass some data through
276+
os.write(w, b"spam")
277+
self.assertEqual(os.readinto(r, bytearray(4)), 4)
278+
279+
# Still don't block or return 0.
280+
with self.assertRaises(BlockingIOError):
281+
os.readinto(r, bytearray(5))
282+
283+
# At EOF should return size 0
284+
os.close(w)
285+
w = None
286+
self.assertEqual(os.readinto(r, bytearray(5)), 0)
287+
self.assertEqual(os.readinto(r, bytearray(5)), 0) # Still EOF
288+
289+
finally:
290+
os.close(r)
291+
if w is not None:
292+
os.close(w)
293+
294+
def test_readinto_badarg(self):
295+
with open(os_helper.TESTFN, "w+b") as fobj:
296+
fobj.write(b"spam")
297+
fobj.flush()
298+
fd = fobj.fileno()
299+
os.lseek(fd, 0, 0)
300+
301+
for bad_arg in ("test", bytes(), 14):
302+
with self.subTest(f"bad buffer {type(bad_arg)}"):
303+
with self.assertRaises(TypeError):
304+
os.readinto(fd, bad_arg)
305+
306+
with self.subTest("doesn't work on file objects"):
307+
with self.assertRaises(TypeError):
308+
os.readinto(fobj, bytearray(5))
309+
310+
# takes two args
311+
with self.assertRaises(TypeError):
312+
os.readinto(fd)
313+
314+
# No data should have been read with the bad arguments.
315+
buffer = bytearray(4)
316+
s = os.readinto(fd, buffer)
317+
self.assertEqual(s, 4)
318+
self.assertEqual(buffer, b"spam")
319+
233320
@support.cpython_only
234321
# Skip the test on 32-bit platforms: the number of bytes must fit in a
235322
# Py_ssize_t type
@@ -249,6 +336,29 @@ def test_large_read(self, size):
249336
# operating system is free to return less bytes than requested.
250337
self.assertEqual(data, b'test')
251338

339+
340+
@support.cpython_only
341+
# Skip the test on 32-bit platforms: the number of bytes must fit in a
342+
# Py_ssize_t type
343+
@unittest.skipUnless(INT_MAX < PY_SSIZE_T_MAX,
344+
"needs INT_MAX < PY_SSIZE_T_MAX")
345+
@support.bigmemtest(size=INT_MAX + 10, memuse=1, dry_run=False)
346+
def test_large_readinto(self, size):
347+
self.addCleanup(os_helper.unlink, os_helper.TESTFN)
348+
create_file(os_helper.TESTFN, b'test')
349+
350+
# Issue #21932: For readinto the buffer contains the length rather than
351+
# a length being passed explicitly to read, should still get capped to a
352+
# valid size / not raise an OverflowError for sizes larger than INT_MAX.
353+
buffer = bytearray(INT_MAX + 10)
354+
with open(os_helper.TESTFN, "rb") as fp:
355+
length = os.readinto(fp.fileno(), buffer)
356+
357+
# The test does not try to read more than 2 GiB at once because the
358+
# operating system is free to return less bytes than requested.
359+
self.assertEqual(length, 4)
360+
self.assertEqual(buffer[:4], b'test')
361+
252362
def test_write(self):
253363
# os.write() accepts bytes- and buffer-like objects but not strings
254364
fd = os.open(os_helper.TESTFN, os.O_CREAT | os.O_WRONLY)
@@ -2467,6 +2577,10 @@ def test_lseek(self):
24672577
def test_read(self):
24682578
self.check(os.read, 1)
24692579

2580+
@unittest.skipUnless(hasattr(os, 'readinto'), 'test needs os.readinto()')
2581+
def test_readinto(self):
2582+
self.check(os.readinto, bytearray(5))
2583+
24702584
@unittest.skipUnless(hasattr(os, 'readv'), 'test needs os.readv()')
24712585
def test_readv(self):
24722586
buf = bytearray(10)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add :func:`os.readinto` to read into a :ref:`buffer object <bufferobjects>` from a file descriptor.

Modules/clinic/posixmodule.c.h

Lines changed: 57 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Modules/posixmodule.c

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11433,6 +11433,38 @@ os_read_impl(PyObject *module, int fd, Py_ssize_t length)
1143311433
return buffer;
1143411434
}
1143511435

11436+
/*[clinic input]
11437+
os.readinto -> Py_ssize_t
11438+
fd: int
11439+
buffer: Py_buffer(accept={rwbuffer})
11440+
/
11441+
11442+
Read into a buffer object from a file descriptor.
11443+
11444+
The buffer should be mutable and bytes-like. On success, returns the number of
11445+
bytes read. Less bytes may be read than the size of the buffer. The underlying
11446+
system call will be retried when interrupted by a signal, unless the signal
11447+
handler raises an exception. Other errors will not be retried and an error will
11448+
be raised.
11449+
11450+
Returns 0 if *fd* is at end of file or if the provided *buffer* has length 0
11451+
(which can be used to check for errors without reading data). Never returns
11452+
negative.
11453+
[clinic start generated code]*/
11454+
11455+
static Py_ssize_t
11456+
os_readinto_impl(PyObject *module, int fd, Py_buffer *buffer)
11457+
/*[clinic end generated code: output=8091a3513c683a80 input=d40074d0a68de575]*/
11458+
{
11459+
assert(buffer->len >= 0);
11460+
Py_ssize_t result = _Py_read(fd, buffer->buf, buffer->len);
11461+
/* Ensure negative is never returned without an error. Simplifies calling
11462+
code. _Py_read should succeed, possibly reading 0 bytes, _or_ set an
11463+
error. */
11464+
assert(result >= 0 || (result == -1 && PyErr_Occurred()));
11465+
return result;
11466+
}
11467+
1143611468
#if (defined(HAVE_SENDFILE) && (defined(__FreeBSD__) || defined(__DragonFly__) \
1143711469
|| defined(__APPLE__))) \
1143811470
|| defined(HAVE_READV) || defined(HAVE_PREADV) || defined (HAVE_PREADV2) \
@@ -16973,6 +17005,7 @@ static PyMethodDef posix_methods[] = {
1697317005
OS_LOCKF_METHODDEF
1697417006
OS_LSEEK_METHODDEF
1697517007
OS_READ_METHODDEF
17008+
OS_READINTO_METHODDEF
1697617009
OS_READV_METHODDEF
1697717010
OS_PREAD_METHODDEF
1697817011
OS_PREADV_METHODDEF

0 commit comments

Comments
 (0)