Skip to content

Commit 676ee14

Browse files
committed
Auto merge of #79930 - tgnottingham:bufwriter_performance, r=m-ou-se
Optimize BufWriter
2 parents 377d1a9 + 01e7018 commit 676ee14

File tree

1 file changed

+178
-39
lines changed

1 file changed

+178
-39
lines changed

Diff for: library/std/src/io/buffered/bufwriter.rs

+178-39
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use crate::io::{
44
self, Error, ErrorKind, IntoInnerError, IoSlice, Seek, SeekFrom, Write, DEFAULT_BUF_SIZE,
55
};
66
use crate::mem;
7+
use crate::ptr;
78

89
/// Wraps a writer and buffers its output.
910
///
@@ -68,6 +69,10 @@ use crate::mem;
6869
#[stable(feature = "rust1", since = "1.0.0")]
6970
pub struct BufWriter<W: Write> {
7071
inner: Option<W>,
72+
// The buffer. Avoid using this like a normal `Vec` in common code paths.
73+
// That is, don't use `buf.push`, `buf.extend_from_slice`, or any other
74+
// methods that require bounds checking or the like. This makes an enormous
75+
// difference to performance (we may want to stop using a `Vec` entirely).
7176
buf: Vec<u8>,
7277
// #30888: If the inner writer panics in a call to write, we don't want to
7378
// write the buffered data a second time in BufWriter's destructor. This
@@ -181,9 +186,14 @@ impl<W: Write> BufWriter<W> {
181186
/// data. Writes as much as possible without exceeding capacity. Returns
182187
/// the number of bytes written.
183188
pub(super) fn write_to_buf(&mut self, buf: &[u8]) -> usize {
184-
let available = self.buf.capacity() - self.buf.len();
189+
let available = self.spare_capacity();
185190
let amt_to_buffer = available.min(buf.len());
186-
self.buf.extend_from_slice(&buf[..amt_to_buffer]);
191+
192+
// SAFETY: `amt_to_buffer` is <= buffer's spare capacity by construction.
193+
unsafe {
194+
self.write_to_buffer_unchecked(&buf[..amt_to_buffer]);
195+
}
196+
187197
amt_to_buffer
188198
}
189199

@@ -331,6 +341,103 @@ impl<W: Write> BufWriter<W> {
331341
let buf = if !self.panicked { Ok(buf) } else { Err(WriterPanicked { buf }) };
332342
(self.inner.take().unwrap(), buf)
333343
}
344+
345+
// Ensure this function does not get inlined into `write`, so that it
346+
// remains inlineable and its common path remains as short as possible.
347+
// If this function ends up being called frequently relative to `write`,
348+
// it's likely a sign that the client is using an improperly sized buffer
349+
// or their write patterns are somewhat pathological.
350+
#[cold]
351+
#[inline(never)]
352+
fn write_cold(&mut self, buf: &[u8]) -> io::Result<usize> {
353+
if buf.len() > self.spare_capacity() {
354+
self.flush_buf()?;
355+
}
356+
357+
// Why not len > capacity? To avoid a needless trip through the buffer when the input
358+
// exactly fills it. We'd just need to flush it to the underlying writer anyway.
359+
if buf.len() >= self.buf.capacity() {
360+
self.panicked = true;
361+
let r = self.get_mut().write(buf);
362+
self.panicked = false;
363+
r
364+
} else {
365+
// Write to the buffer. In this case, we write to the buffer even if it fills it
366+
// exactly. Doing otherwise would mean flushing the buffer, then writing this
367+
// input to the inner writer, which in many cases would be a worse strategy.
368+
369+
// SAFETY: There was either enough spare capacity already, or there wasn't and we
370+
// flushed the buffer to ensure that there is. In the latter case, we know that there
371+
// is because flushing ensured that our entire buffer is spare capacity, and we entered
372+
// this block because the input buffer length is less than that capacity. In either
373+
// case, it's safe to write the input buffer to our buffer.
374+
unsafe {
375+
self.write_to_buffer_unchecked(buf);
376+
}
377+
378+
Ok(buf.len())
379+
}
380+
}
381+
382+
// Ensure this function does not get inlined into `write_all`, so that it
383+
// remains inlineable and its common path remains as short as possible.
384+
// If this function ends up being called frequently relative to `write_all`,
385+
// it's likely a sign that the client is using an improperly sized buffer
386+
// or their write patterns are somewhat pathological.
387+
#[cold]
388+
#[inline(never)]
389+
fn write_all_cold(&mut self, buf: &[u8]) -> io::Result<()> {
390+
// Normally, `write_all` just calls `write` in a loop. We can do better
391+
// by calling `self.get_mut().write_all()` directly, which avoids
392+
// round trips through the buffer in the event of a series of partial
393+
// writes in some circumstances.
394+
395+
if buf.len() > self.spare_capacity() {
396+
self.flush_buf()?;
397+
}
398+
399+
// Why not len > capacity? To avoid a needless trip through the buffer when the input
400+
// exactly fills it. We'd just need to flush it to the underlying writer anyway.
401+
if buf.len() >= self.buf.capacity() {
402+
self.panicked = true;
403+
let r = self.get_mut().write_all(buf);
404+
self.panicked = false;
405+
r
406+
} else {
407+
// Write to the buffer. In this case, we write to the buffer even if it fills it
408+
// exactly. Doing otherwise would mean flushing the buffer, then writing this
409+
// input to the inner writer, which in many cases would be a worse strategy.
410+
411+
// SAFETY: There was either enough spare capacity already, or there wasn't and we
412+
// flushed the buffer to ensure that there is. In the latter case, we know that there
413+
// is because flushing ensured that our entire buffer is spare capacity, and we entered
414+
// this block because the input buffer length is less than that capacity. In either
415+
// case, it's safe to write the input buffer to our buffer.
416+
unsafe {
417+
self.write_to_buffer_unchecked(buf);
418+
}
419+
420+
Ok(())
421+
}
422+
}
423+
424+
// SAFETY: Requires `buf.len() <= self.buf.capacity() - self.buf.len()`,
425+
// i.e., that input buffer length is less than or equal to spare capacity.
426+
#[inline]
427+
unsafe fn write_to_buffer_unchecked(&mut self, buf: &[u8]) {
428+
debug_assert!(buf.len() <= self.spare_capacity());
429+
let old_len = self.buf.len();
430+
let buf_len = buf.len();
431+
let src = buf.as_ptr();
432+
let dst = self.buf.as_mut_ptr().add(old_len);
433+
ptr::copy_nonoverlapping(src, dst, buf_len);
434+
self.buf.set_len(old_len + buf_len);
435+
}
436+
437+
#[inline]
438+
fn spare_capacity(&self) -> usize {
439+
self.buf.capacity() - self.buf.len()
440+
}
334441
}
335442

336443
#[unstable(feature = "bufwriter_into_raw_parts", issue = "80690")]
@@ -402,63 +509,82 @@ impl fmt::Debug for WriterPanicked {
402509

403510
#[stable(feature = "rust1", since = "1.0.0")]
404511
impl<W: Write> Write for BufWriter<W> {
512+
#[inline]
405513
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
406-
if self.buf.len() + buf.len() > self.buf.capacity() {
407-
self.flush_buf()?;
408-
}
409-
// FIXME: Why no len > capacity? Why not buffer len == capacity? #72919
410-
if buf.len() >= self.buf.capacity() {
411-
self.panicked = true;
412-
let r = self.get_mut().write(buf);
413-
self.panicked = false;
414-
r
415-
} else {
416-
self.buf.extend_from_slice(buf);
514+
// Use < instead of <= to avoid a needless trip through the buffer in some cases.
515+
// See `write_cold` for details.
516+
if buf.len() < self.spare_capacity() {
517+
// SAFETY: safe by above conditional.
518+
unsafe {
519+
self.write_to_buffer_unchecked(buf);
520+
}
521+
417522
Ok(buf.len())
523+
} else {
524+
self.write_cold(buf)
418525
}
419526
}
420527

528+
#[inline]
421529
fn write_all(&mut self, buf: &[u8]) -> io::Result<()> {
422-
// Normally, `write_all` just calls `write` in a loop. We can do better
423-
// by calling `self.get_mut().write_all()` directly, which avoids
424-
// round trips through the buffer in the event of a series of partial
425-
// writes in some circumstances.
426-
if self.buf.len() + buf.len() > self.buf.capacity() {
427-
self.flush_buf()?;
428-
}
429-
// FIXME: Why no len > capacity? Why not buffer len == capacity? #72919
430-
if buf.len() >= self.buf.capacity() {
431-
self.panicked = true;
432-
let r = self.get_mut().write_all(buf);
433-
self.panicked = false;
434-
r
435-
} else {
436-
self.buf.extend_from_slice(buf);
530+
// Use < instead of <= to avoid a needless trip through the buffer in some cases.
531+
// See `write_all_cold` for details.
532+
if buf.len() < self.spare_capacity() {
533+
// SAFETY: safe by above conditional.
534+
unsafe {
535+
self.write_to_buffer_unchecked(buf);
536+
}
537+
437538
Ok(())
539+
} else {
540+
self.write_all_cold(buf)
438541
}
439542
}
440543

441544
fn write_vectored(&mut self, bufs: &[IoSlice<'_>]) -> io::Result<usize> {
545+
// FIXME: Consider applying `#[inline]` / `#[inline(never)]` optimizations already applied
546+
// to `write` and `write_all`. The performance benefits can be significant. See #79930.
442547
if self.get_ref().is_write_vectored() {
443-
let total_len = bufs.iter().map(|b| b.len()).sum::<usize>();
444-
if self.buf.len() + total_len > self.buf.capacity() {
548+
// We have to handle the possibility that the total length of the buffers overflows
549+
// `usize` (even though this can only happen if multiple `IoSlice`s reference the
550+
// same underlying buffer, as otherwise the buffers wouldn't fit in memory). If the
551+
// computation overflows, then surely the input cannot fit in our buffer, so we forward
552+
// to the inner writer's `write_vectored` method to let it handle it appropriately.
553+
let saturated_total_len =
554+
bufs.iter().fold(0usize, |acc, b| acc.saturating_add(b.len()));
555+
556+
if saturated_total_len > self.spare_capacity() {
557+
// Flush if the total length of the input exceeds our buffer's spare capacity.
558+
// If we would have overflowed, this condition also holds, and we need to flush.
445559
self.flush_buf()?;
446560
}
447-
if total_len >= self.buf.capacity() {
561+
562+
if saturated_total_len >= self.buf.capacity() {
563+
// Forward to our inner writer if the total length of the input is greater than or
564+
// equal to our buffer capacity. If we would have overflowed, this condition also
565+
// holds, and we punt to the inner writer.
448566
self.panicked = true;
449567
let r = self.get_mut().write_vectored(bufs);
450568
self.panicked = false;
451569
r
452570
} else {
453-
bufs.iter().for_each(|b| self.buf.extend_from_slice(b));
454-
Ok(total_len)
571+
// `saturated_total_len < self.buf.capacity()` implies that we did not saturate.
572+
573+
// SAFETY: We checked whether or not the spare capacity was large enough above. If
574+
// it was, then we're safe already. If it wasn't, we flushed, making sufficient
575+
// room for any input <= the buffer size, which includes this input.
576+
unsafe {
577+
bufs.iter().for_each(|b| self.write_to_buffer_unchecked(b));
578+
};
579+
580+
Ok(saturated_total_len)
455581
}
456582
} else {
457583
let mut iter = bufs.iter();
458584
let mut total_written = if let Some(buf) = iter.by_ref().find(|&buf| !buf.is_empty()) {
459585
// This is the first non-empty slice to write, so if it does
460586
// not fit in the buffer, we still get to flush and proceed.
461-
if self.buf.len() + buf.len() > self.buf.capacity() {
587+
if buf.len() > self.spare_capacity() {
462588
self.flush_buf()?;
463589
}
464590
if buf.len() >= self.buf.capacity() {
@@ -469,19 +595,32 @@ impl<W: Write> Write for BufWriter<W> {
469595
self.panicked = false;
470596
return r;
471597
} else {
472-
self.buf.extend_from_slice(buf);
598+
// SAFETY: We checked whether or not the spare capacity was large enough above.
599+
// If it was, then we're safe already. If it wasn't, we flushed, making
600+
// sufficient room for any input <= the buffer size, which includes this input.
601+
unsafe {
602+
self.write_to_buffer_unchecked(buf);
603+
}
604+
473605
buf.len()
474606
}
475607
} else {
476608
return Ok(0);
477609
};
478610
debug_assert!(total_written != 0);
479611
for buf in iter {
480-
if self.buf.len() + buf.len() > self.buf.capacity() {
481-
break;
482-
} else {
483-
self.buf.extend_from_slice(buf);
612+
if buf.len() <= self.spare_capacity() {
613+
// SAFETY: safe by above conditional.
614+
unsafe {
615+
self.write_to_buffer_unchecked(buf);
616+
}
617+
618+
// This cannot overflow `usize`. If we are here, we've written all of the bytes
619+
// so far to our buffer, and we've ensured that we never exceed the buffer's
620+
// capacity. Therefore, `total_written` <= `self.buf.capacity()` <= `usize::MAX`.
484621
total_written += buf.len();
622+
} else {
623+
break;
485624
}
486625
}
487626
Ok(total_written)

0 commit comments

Comments
 (0)