Skip to content

Commit 8234db5

Browse files
committed
Auto merge of rust-lang#80463 - tgnottingham:incr_comp_serial_mem_usage, r=oli-obk
Serialize incr comp structures to file via fixed-size buffer Reduce a large memory spike that happens during serialization by writing the incr comp structures to file by way of a fixed-size buffer, rather than an unbounded vector. Effort was made to keep the instruction count close to that of the previous implementation. However, buffered writing to a file inherently has more overhead than writing to a vector, because each write may result in a handleable error. To reduce this overhead, arrangements are made so that each LEB128-encoded integer can be written to the buffer with only one capacity and error check. Higher-level optimizations in which entire composite structures can be written with one capacity and error check are possible, but would require much more work. The performance is mostly on par with the previous implementation, with small to moderate instruction count regressions. The memory reduction is significant, however, so it seems like a worth-while trade-off.
2 parents 467f5e9 + f15fae8 commit 8234db5

File tree

11 files changed

+686
-220
lines changed

11 files changed

+686
-220
lines changed

compiler/rustc_data_structures/src/fingerprint.rs

+12-10
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use crate::stable_hasher;
22
use rustc_serialize::{
3-
opaque::{self, EncodeResult},
3+
opaque::{self, EncodeResult, FileEncodeResult},
44
Decodable, Encodable,
55
};
66
use std::hash::{Hash, Hasher};
@@ -53,13 +53,6 @@ impl Fingerprint {
5353
format!("{:x}{:x}", self.0, self.1)
5454
}
5555

56-
pub fn encode_opaque(&self, encoder: &mut opaque::Encoder) -> EncodeResult {
57-
let bytes: [u8; 16] = unsafe { mem::transmute([self.0.to_le(), self.1.to_le()]) };
58-
59-
encoder.emit_raw_bytes(&bytes);
60-
Ok(())
61-
}
62-
6356
pub fn decode_opaque(decoder: &mut opaque::Decoder<'_>) -> Result<Fingerprint, String> {
6457
let mut bytes: [MaybeUninit<u8>; 16] = MaybeUninit::uninit_array();
6558

@@ -142,7 +135,16 @@ impl<E: rustc_serialize::Encoder> FingerprintEncoder for E {
142135

143136
impl FingerprintEncoder for opaque::Encoder {
144137
fn encode_fingerprint(&mut self, f: &Fingerprint) -> EncodeResult {
145-
f.encode_opaque(self)
138+
let bytes: [u8; 16] = unsafe { mem::transmute([f.0.to_le(), f.1.to_le()]) };
139+
self.emit_raw_bytes(&bytes);
140+
Ok(())
141+
}
142+
}
143+
144+
impl FingerprintEncoder for opaque::FileEncoder {
145+
fn encode_fingerprint(&mut self, f: &Fingerprint) -> FileEncodeResult {
146+
let bytes: [u8; 16] = unsafe { mem::transmute([f.0.to_le(), f.1.to_le()]) };
147+
self.emit_raw_bytes(&bytes)
146148
}
147149
}
148150

@@ -198,7 +200,7 @@ impl<E: rustc_serialize::Encoder> Encodable<E> for PackedFingerprint {
198200
impl<D: rustc_serialize::Decoder> Decodable<D> for PackedFingerprint {
199201
#[inline]
200202
fn decode(d: &mut D) -> Result<Self, D::Error> {
201-
Fingerprint::decode(d).map(|f| PackedFingerprint(f))
203+
Fingerprint::decode(d).map(PackedFingerprint)
202204
}
203205
}
204206

compiler/rustc_incremental/src/persist/file_format.rs

+9-7
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ use std::fs;
1414
use std::io::{self, Read};
1515
use std::path::Path;
1616

17-
use rustc_serialize::opaque::Encoder;
17+
use rustc_serialize::opaque::{FileEncodeResult, FileEncoder};
1818

1919
/// The first few bytes of files generated by incremental compilation.
2020
const FILE_MAGIC: &[u8] = b"RSIC";
@@ -27,15 +27,17 @@ const HEADER_FORMAT_VERSION: u16 = 0;
2727
/// the Git commit hash.
2828
const RUSTC_VERSION: Option<&str> = option_env!("CFG_VERSION");
2929

30-
pub fn write_file_header(stream: &mut Encoder, nightly_build: bool) {
31-
stream.emit_raw_bytes(FILE_MAGIC);
32-
stream
33-
.emit_raw_bytes(&[(HEADER_FORMAT_VERSION >> 0) as u8, (HEADER_FORMAT_VERSION >> 8) as u8]);
30+
pub fn write_file_header(stream: &mut FileEncoder, nightly_build: bool) -> FileEncodeResult {
31+
stream.emit_raw_bytes(FILE_MAGIC)?;
32+
stream.emit_raw_bytes(&[
33+
(HEADER_FORMAT_VERSION >> 0) as u8,
34+
(HEADER_FORMAT_VERSION >> 8) as u8,
35+
])?;
3436

3537
let rustc_version = rustc_version(nightly_build);
3638
assert_eq!(rustc_version.len(), (rustc_version.len() as u8) as usize);
37-
stream.emit_raw_bytes(&[rustc_version.len() as u8]);
38-
stream.emit_raw_bytes(rustc_version.as_bytes());
39+
stream.emit_raw_bytes(&[rustc_version.len() as u8])?;
40+
stream.emit_raw_bytes(rustc_version.as_bytes())
3941
}
4042

4143
/// Reads the contents of a file with a file header as defined in this module.

compiler/rustc_incremental/src/persist/save.rs

+38-32
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use rustc_data_structures::fx::FxHashMap;
22
use rustc_data_structures::sync::join;
33
use rustc_middle::dep_graph::{DepGraph, DepKind, WorkProduct, WorkProductId};
44
use rustc_middle::ty::TyCtxt;
5-
use rustc_serialize::opaque::Encoder;
5+
use rustc_serialize::opaque::{FileEncodeResult, FileEncoder};
66
use rustc_serialize::Encodable as RustcEncodable;
77
use rustc_session::Session;
88
use std::fs;
@@ -33,12 +33,12 @@ pub fn save_dep_graph(tcx: TyCtxt<'_>) {
3333
join(
3434
move || {
3535
sess.time("incr_comp_persist_result_cache", || {
36-
save_in(sess, query_cache_path, |e| encode_query_cache(tcx, e));
36+
save_in(sess, query_cache_path, "query cache", |e| encode_query_cache(tcx, e));
3737
});
3838
},
3939
|| {
4040
sess.time("incr_comp_persist_dep_graph", || {
41-
save_in(sess, dep_graph_path, |e| {
41+
save_in(sess, dep_graph_path, "dependency graph", |e| {
4242
sess.time("incr_comp_encode_dep_graph", || encode_dep_graph(tcx, e))
4343
});
4444
});
@@ -65,7 +65,7 @@ pub fn save_work_product_index(
6565
debug!("save_work_product_index()");
6666
dep_graph.assert_ignored();
6767
let path = work_products_path(sess);
68-
save_in(sess, path, |e| encode_work_product_index(&new_work_products, e));
68+
save_in(sess, path, "work product index", |e| encode_work_product_index(&new_work_products, e));
6969

7070
// We also need to clean out old work-products, as not all of them are
7171
// deleted during invalidation. Some object files don't change their
@@ -92,13 +92,13 @@ pub fn save_work_product_index(
9292
});
9393
}
9494

95-
fn save_in<F>(sess: &Session, path_buf: PathBuf, encode: F)
95+
fn save_in<F>(sess: &Session, path_buf: PathBuf, name: &str, encode: F)
9696
where
97-
F: FnOnce(&mut Encoder),
97+
F: FnOnce(&mut FileEncoder) -> FileEncodeResult,
9898
{
9999
debug!("save: storing data in {}", path_buf.display());
100100

101-
// delete the old dep-graph, if any
101+
// Delete the old file, if any.
102102
// Note: It's important that we actually delete the old file and not just
103103
// truncate and overwrite it, since it might be a shared hard-link, the
104104
// underlying data of which we don't want to modify
@@ -109,34 +109,44 @@ where
109109
Err(err) if err.kind() == io::ErrorKind::NotFound => (),
110110
Err(err) => {
111111
sess.err(&format!(
112-
"unable to delete old dep-graph at `{}`: {}",
112+
"unable to delete old {} at `{}`: {}",
113+
name,
113114
path_buf.display(),
114115
err
115116
));
116117
return;
117118
}
118119
}
119120

120-
// generate the data in a memory buffer
121-
let mut encoder = Encoder::new(Vec::new());
122-
file_format::write_file_header(&mut encoder, sess.is_nightly_build());
123-
encode(&mut encoder);
124-
125-
// write the data out
126-
let data = encoder.into_inner();
127-
match fs::write(&path_buf, data) {
128-
Ok(_) => {
129-
debug!("save: data written to disk successfully");
130-
}
121+
let mut encoder = match FileEncoder::new(&path_buf) {
122+
Ok(encoder) => encoder,
131123
Err(err) => {
132-
sess.err(&format!("failed to write dep-graph to `{}`: {}", path_buf.display(), err));
124+
sess.err(&format!("failed to create {} at `{}`: {}", name, path_buf.display(), err));
125+
return;
133126
}
127+
};
128+
129+
if let Err(err) = file_format::write_file_header(&mut encoder, sess.is_nightly_build()) {
130+
sess.err(&format!("failed to write {} header to `{}`: {}", name, path_buf.display(), err));
131+
return;
132+
}
133+
134+
if let Err(err) = encode(&mut encoder) {
135+
sess.err(&format!("failed to write {} to `{}`: {}", name, path_buf.display(), err));
136+
return;
134137
}
138+
139+
if let Err(err) = encoder.flush() {
140+
sess.err(&format!("failed to flush {} to `{}`: {}", name, path_buf.display(), err));
141+
return;
142+
}
143+
144+
debug!("save: data written to disk successfully");
135145
}
136146

137-
fn encode_dep_graph(tcx: TyCtxt<'_>, encoder: &mut Encoder) {
147+
fn encode_dep_graph(tcx: TyCtxt<'_>, encoder: &mut FileEncoder) -> FileEncodeResult {
138148
// First encode the commandline arguments hash
139-
tcx.sess.opts.dep_tracking_hash().encode(encoder).unwrap();
149+
tcx.sess.opts.dep_tracking_hash().encode(encoder)?;
140150

141151
// Encode the graph data.
142152
let serialized_graph =
@@ -214,15 +224,13 @@ fn encode_dep_graph(tcx: TyCtxt<'_>, encoder: &mut Encoder) {
214224
println!("[incremental]");
215225
}
216226

217-
tcx.sess.time("incr_comp_encode_serialized_dep_graph", || {
218-
serialized_graph.encode(encoder).unwrap();
219-
});
227+
tcx.sess.time("incr_comp_encode_serialized_dep_graph", || serialized_graph.encode(encoder))
220228
}
221229

222230
fn encode_work_product_index(
223231
work_products: &FxHashMap<WorkProductId, WorkProduct>,
224-
encoder: &mut Encoder,
225-
) {
232+
encoder: &mut FileEncoder,
233+
) -> FileEncodeResult {
226234
let serialized_products: Vec<_> = work_products
227235
.iter()
228236
.map(|(id, work_product)| SerializedWorkProduct {
@@ -231,11 +239,9 @@ fn encode_work_product_index(
231239
})
232240
.collect();
233241

234-
serialized_products.encode(encoder).unwrap();
242+
serialized_products.encode(encoder)
235243
}
236244

237-
fn encode_query_cache(tcx: TyCtxt<'_>, encoder: &mut Encoder) {
238-
tcx.sess.time("incr_comp_serialize_result_cache", || {
239-
tcx.serialize_query_result_cache(encoder).unwrap();
240-
})
245+
fn encode_query_cache(tcx: TyCtxt<'_>, encoder: &mut FileEncoder) -> FileEncodeResult {
246+
tcx.sess.time("incr_comp_serialize_result_cache", || tcx.serialize_query_result_cache(encoder))
241247
}

compiler/rustc_metadata/src/rmeta/encoder.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for Span {
308308

309309
impl<'a, 'tcx> FingerprintEncoder for EncodeContext<'a, 'tcx> {
310310
fn encode_fingerprint(&mut self, f: &Fingerprint) -> Result<(), Self::Error> {
311-
f.encode_opaque(&mut self.opaque)
311+
self.opaque.encode_fingerprint(f)
312312
}
313313
}
314314

compiler/rustc_middle/src/ty/codec.rs

-16
Original file line numberDiff line numberDiff line change
@@ -50,22 +50,6 @@ impl<'tcx, E: TyEncoder<'tcx>> EncodableWithShorthand<'tcx, E> for ty::Predicate
5050
}
5151
}
5252

53-
pub trait OpaqueEncoder: Encoder {
54-
fn opaque(&mut self) -> &mut rustc_serialize::opaque::Encoder;
55-
fn encoder_position(&self) -> usize;
56-
}
57-
58-
impl OpaqueEncoder for rustc_serialize::opaque::Encoder {
59-
#[inline]
60-
fn opaque(&mut self) -> &mut rustc_serialize::opaque::Encoder {
61-
self
62-
}
63-
#[inline]
64-
fn encoder_position(&self) -> usize {
65-
self.position()
66-
}
67-
}
68-
6953
pub trait TyEncoder<'tcx>: Encoder {
7054
const CLEAR_CROSS_CRATE: bool;
7155

compiler/rustc_middle/src/ty/context.rs

+2-4
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ use rustc_hir::{
4747
};
4848
use rustc_index::vec::{Idx, IndexVec};
4949
use rustc_macros::HashStable;
50+
use rustc_serialize::opaque::{FileEncodeResult, FileEncoder};
5051
use rustc_session::config::{BorrowckMode, CrateType, OutputFilenames};
5152
use rustc_session::lint::{Level, Lint};
5253
use rustc_session::Session;
@@ -1336,10 +1337,7 @@ impl<'tcx> TyCtxt<'tcx> {
13361337
}
13371338
}
13381339

1339-
pub fn serialize_query_result_cache<E>(self, encoder: &mut E) -> Result<(), E::Error>
1340-
where
1341-
E: ty::codec::OpaqueEncoder,
1342-
{
1340+
pub fn serialize_query_result_cache(self, encoder: &mut FileEncoder) -> FileEncodeResult {
13431341
self.queries.on_disk_cache.as_ref().map(|c| c.serialize(self, encoder)).unwrap_or(Ok(()))
13441342
}
13451343

0 commit comments

Comments
 (0)