Skip to content

Commit 12899e7

Browse files
committed
Rollup merge of rust-lang#26950 - AlisdairO:memset, r=alexcrichton
In general, it's undesirable to have read_to_end use a buffer with uninitialized memory, as that could lead to undefined behaviour in the event of a bad Read implementation. Since we control the implementations of Read for Stdin and File, however, it should be okay for us to specialise them to improve performance. This PR is to do that! Adds some unsafe code to deal with creating the buffers. Since the read_to_end function needed to be used from the io and fs crates, I moved it into a newly-created sys::common::io module. Alternatively we could expose the new read_to_end functions to allow people to create their own read_to_end implementations for code they trust. Benchmarks: Read a 2.5MB file: sys_common::io::tests::bench_init_file ... bench: 27,473,317 ns/iter (+/- 2,490,767) sys_common::io::tests::bench_uninit_file ... bench: 25,611,793 ns/iter (+/- 2,137,387) Read a buffer full of constant values sys_common::io::tests::bench_uninitialized ... bench: 12,877,645 ns/iter (+/- 931,025) sys_common::io::tests::bench_zeroed ... bench: 18,581,082 ns/iter (+/- 1,541,108) So, approx a 7% speedup for file reading, which I think is worthwhile.
2 parents 1da1a46 + 98f2872 commit 12899e7

File tree

6 files changed

+166
-0
lines changed

6 files changed

+166
-0
lines changed

src/libstd/fs.rs

+4
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ use io::{self, SeekFrom, Seek, Read, Write};
2525
use path::{Path, PathBuf};
2626
use sys::fs as fs_imp;
2727
use sys_common::{AsInnerMut, FromInner, AsInner};
28+
use sys_common::io::read_to_end_uninitialized;
2829
use vec::Vec;
2930

3031
/// A reference to an open file on the filesystem.
@@ -328,6 +329,9 @@ impl Read for File {
328329
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
329330
self.inner.read(buf)
330331
}
332+
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
333+
unsafe { read_to_end_uninitialized(self, buf) }
334+
}
331335
}
332336
#[stable(feature = "rust1", since = "1.0.0")]
333337
impl Write for File {

src/libstd/io/mod.rs

+11
Original file line numberDiff line numberDiff line change
@@ -906,6 +906,8 @@ mod tests {
906906
use io::prelude::*;
907907
use io;
908908
use super::Cursor;
909+
use test;
910+
use super::repeat;
909911

910912
#[test]
911913
fn read_until() {
@@ -1024,4 +1026,13 @@ mod tests {
10241026
let mut buf = [0; 1];
10251027
assert_eq!(0, R.take(0).read(&mut buf).unwrap());
10261028
}
1029+
1030+
#[bench]
1031+
fn bench_read_to_end(b: &mut test::Bencher) {
1032+
b.iter(|| {
1033+
let mut lr = repeat(1).take(10000000);
1034+
let mut vec = Vec::with_capacity(1024);
1035+
super::read_to_end(&mut lr, &mut vec);
1036+
});
1037+
}
10271038
}

src/libstd/io/stdio.rs

+4
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ use io::lazy::Lazy;
1818
use io::{self, BufReader, LineWriter};
1919
use sync::{Arc, Mutex, MutexGuard};
2020
use sys::stdio;
21+
use sys_common::io::{read_to_end_uninitialized};
2122
use sys_common::remutex::{ReentrantMutex, ReentrantMutexGuard};
2223
use libc;
2324

@@ -277,6 +278,9 @@ impl<'a> Read for StdinLock<'a> {
277278
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
278279
self.inner.read(buf)
279280
}
281+
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
282+
unsafe { read_to_end_uninitialized(self, buf) }
283+
}
280284
}
281285

282286
#[stable(feature = "rust1", since = "1.0.0")]

src/libstd/net/tcp.rs

+7
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ use io;
1919
use net::{ToSocketAddrs, SocketAddr, Shutdown};
2020
use sys_common::net as net_imp;
2121
use sys_common::{AsInner, FromInner};
22+
use sys_common::io::read_to_end_uninitialized;
2223
use time::Duration;
2324

2425
/// A structure which represents a TCP stream between a local socket and a
@@ -189,6 +190,9 @@ impl TcpStream {
189190
#[stable(feature = "rust1", since = "1.0.0")]
190191
impl Read for TcpStream {
191192
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { self.0.read(buf) }
193+
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
194+
unsafe { read_to_end_uninitialized(self, buf) }
195+
}
192196
}
193197
#[stable(feature = "rust1", since = "1.0.0")]
194198
impl Write for TcpStream {
@@ -198,6 +202,9 @@ impl Write for TcpStream {
198202
#[stable(feature = "rust1", since = "1.0.0")]
199203
impl<'a> Read for &'a TcpStream {
200204
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { self.0.read(buf) }
205+
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
206+
unsafe { read_to_end_uninitialized(self, buf) }
207+
}
201208
}
202209
#[stable(feature = "rust1", since = "1.0.0")]
203210
impl<'a> Write for &'a TcpStream {

src/libstd/sys/common/io.rs

+139
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
use prelude::v1::*;
11+
use io;
12+
use io::ErrorKind;
13+
use io::Read;
14+
use slice::from_raw_parts_mut;
15+
16+
// Provides read_to_end functionality over an uninitialized buffer.
17+
// This function is unsafe because it calls the underlying
18+
// read function with a slice into uninitialized memory. The default
19+
// implementation of read_to_end for readers will zero out new memory in
20+
// the buf before passing it to read, but avoiding this zero can often
21+
// lead to a fairly significant performance win.
22+
//
23+
// Implementations using this method have to adhere to two guarantees:
24+
// * The implementation of read never reads the buffer provided.
25+
// * The implementation of read correctly reports how many bytes were written.
26+
pub unsafe fn read_to_end_uninitialized(r: &mut Read, buf: &mut Vec<u8>) -> io::Result<usize> {
27+
28+
let start_len = buf.len();
29+
buf.reserve(16);
30+
31+
// Always try to read into the empty space of the vector (from the length to the capacity).
32+
// If the vector ever fills up then we reserve an extra byte which should trigger the normal
33+
// reallocation routines for the vector, which will likely double the size.
34+
//
35+
// This function is similar to the read_to_end function in std::io, but the logic about
36+
// reservations and slicing is different enough that this is duplicated here.
37+
loop {
38+
if buf.len() == buf.capacity() {
39+
buf.reserve(1);
40+
}
41+
42+
let buf_slice = from_raw_parts_mut(buf.as_mut_ptr().offset(buf.len() as isize),
43+
buf.capacity() - buf.len());
44+
45+
match r.read(buf_slice) {
46+
Ok(0) => { return Ok(buf.len() - start_len); }
47+
Ok(n) => { let len = buf.len() + n; buf.set_len(len); },
48+
Err(ref e) if e.kind() == ErrorKind::Interrupted => { }
49+
Err(e) => { return Err(e); }
50+
}
51+
}
52+
}
53+
54+
#[cfg(test)]
55+
mod tests {
56+
use prelude::v1::*;
57+
use io::prelude::*;
58+
use super::*;
59+
use io;
60+
use io::{ErrorKind, Take, Repeat, repeat};
61+
use test;
62+
use slice::from_raw_parts;
63+
64+
struct ErrorRepeat {
65+
lr: Take<Repeat>
66+
}
67+
68+
fn error_repeat(byte: u8, limit: u64) -> ErrorRepeat {
69+
ErrorRepeat { lr: repeat(byte).take(limit) }
70+
}
71+
72+
impl Read for ErrorRepeat {
73+
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
74+
let ret = self.lr.read(buf);
75+
if let Ok(0) = ret {
76+
return Err(io::Error::new(ErrorKind::Other, ""))
77+
}
78+
ret
79+
}
80+
}
81+
82+
fn init_vec_data() -> Vec<u8> {
83+
let mut vec = vec![10u8; 200];
84+
unsafe { vec.set_len(0); }
85+
vec
86+
}
87+
88+
fn assert_all_eq(buf: &[u8], value: u8) {
89+
for n in buf {
90+
assert_eq!(*n, value);
91+
}
92+
}
93+
94+
fn validate(buf: &Vec<u8>, good_read_len: usize) {
95+
assert_all_eq(buf, 1u8);
96+
let cap = buf.capacity();
97+
let end_slice = unsafe { from_raw_parts(buf.as_ptr().offset(good_read_len as isize),
98+
cap - good_read_len) };
99+
assert_all_eq(end_slice, 10u8);
100+
}
101+
102+
#[test]
103+
fn read_to_end_uninit_error() {
104+
let mut er = error_repeat(1,100);
105+
let mut vec = init_vec_data();
106+
if let Err(_) = unsafe { read_to_end_uninitialized(&mut er, &mut vec) } {
107+
validate(&vec, 100);
108+
} else {
109+
assert!(false);
110+
}
111+
}
112+
113+
#[test]
114+
fn read_to_end_uninit_zero_len_vec() {
115+
let mut er = repeat(1).take(100);
116+
let mut vec = Vec::new();
117+
let n = unsafe{ read_to_end_uninitialized(&mut er, &mut vec).unwrap() };
118+
assert_all_eq(&vec, 1u8);
119+
assert_eq!(vec.len(), n);
120+
}
121+
122+
#[test]
123+
fn read_to_end_uninit_good() {
124+
let mut er = repeat(1).take(100);
125+
let mut vec = init_vec_data();
126+
let n = unsafe{ read_to_end_uninitialized(&mut er, &mut vec).unwrap() };
127+
validate(&vec, 100);
128+
assert_eq!(vec.len(), n);
129+
}
130+
131+
#[bench]
132+
fn bench_uninitialized(b: &mut test::Bencher) {
133+
b.iter(|| {
134+
let mut lr = repeat(1).take(10000000);
135+
let mut vec = Vec::with_capacity(1024);
136+
unsafe { read_to_end_uninitialized(&mut lr, &mut vec) };
137+
});
138+
}
139+
}

src/libstd/sys/common/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ pub mod backtrace;
1616
pub mod condvar;
1717
pub mod mutex;
1818
pub mod net;
19+
pub mod io;
1920
pub mod poison;
2021
pub mod remutex;
2122
pub mod rwlock;

0 commit comments

Comments
 (0)