Skip to content

Commit 3269f5a

Browse files
committed
Add index to the dep graph format and encode via MemEncoder
1 parent daa2f5e commit 3269f5a

File tree

3 files changed

+206
-39
lines changed

3 files changed

+206
-39
lines changed

compiler/rustc_query_system/src/dep_graph/serialized.rs

+83-39
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ use rustc_data_structures::profiling::SelfProfilerRef;
4646
use rustc_data_structures::sync::Lock;
4747
use rustc_data_structures::unhash::UnhashMap;
4848
use rustc_index::{Idx, IndexVec};
49+
use rustc_serialize::opaque::mem_encoder::MemEncoder;
4950
use rustc_serialize::opaque::{FileEncodeResult, FileEncoder, IntEncodedWithFixedSize, MemDecoder};
5051
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
5152
use tracing::{debug, instrument};
@@ -102,18 +103,11 @@ impl SerializedDepGraph {
102103
) -> impl Iterator<Item = SerializedDepNodeIndex> + Clone {
103104
let header = self.edge_list_indices[source];
104105
let mut raw = &self.edge_list_data[header.start()..];
105-
// Figure out where the edge list for `source` ends by getting the start index of the next
106-
// edge list, or the end of the array if this is the last edge.
107-
let end = self
108-
.edge_list_indices
109-
.get(source + 1)
110-
.map(|h| h.start())
111-
.unwrap_or_else(|| self.edge_list_data.len() - DEP_NODE_PAD);
112106

113107
// The number of edges for this node is implicitly stored in the combination of the byte
114108
// width and the length.
115109
let bytes_per_index = header.bytes_per_index();
116-
let len = (end - header.start()) / bytes_per_index;
110+
let len = header.edges;
117111

118112
// LLVM doesn't hoist EdgeHeader::mask so we do it ourselves.
119113
let mask = header.mask();
@@ -155,6 +149,7 @@ impl SerializedDepGraph {
155149
#[derive(Debug, Clone, Copy)]
156150
struct EdgeHeader {
157151
repr: usize,
152+
edges: u32,
158153
}
159154

160155
impl EdgeHeader {
@@ -197,9 +192,17 @@ impl SerializedDepGraph {
197192

198193
let graph_bytes = d.len() - (2 * IntEncodedWithFixedSize::ENCODED_SIZE) - d.position();
199194

200-
let mut nodes = IndexVec::with_capacity(node_count);
201-
let mut fingerprints = IndexVec::with_capacity(node_count);
202-
let mut edge_list_indices = IndexVec::with_capacity(node_count);
195+
let mut nodes: IndexVec<SerializedDepNodeIndex, _> = (0..node_count)
196+
.map(|_| DepNode {
197+
kind: D::DEP_KIND_NULL,
198+
hash: PackedFingerprint::from(Fingerprint::ZERO),
199+
})
200+
.collect();
201+
let mut fingerprints: IndexVec<SerializedDepNodeIndex, _> =
202+
(0..node_count).map(|_| Fingerprint::ZERO).collect();
203+
let mut edge_list_indices: IndexVec<SerializedDepNodeIndex, _> =
204+
(0..node_count).map(|_| EdgeHeader { repr: 0, edges: 0 }).collect();
205+
203206
// This estimation assumes that all of the encoded bytes are for the edge lists or for the
204207
// fixed-size node headers. But that's not necessarily true; if any edge list has a length
205208
// that spills out of the size we can bit-pack into SerializedNodeHeader then some of the
@@ -218,11 +221,10 @@ impl SerializedDepGraph {
218221
let node_header =
219222
SerializedNodeHeader::<D> { bytes: d.read_array(), _marker: PhantomData };
220223

221-
let _i: SerializedDepNodeIndex = nodes.push(node_header.node());
222-
debug_assert_eq!(_i.index(), _index);
224+
let index = node_header.index();
223225

224-
let _i: SerializedDepNodeIndex = fingerprints.push(node_header.fingerprint());
225-
debug_assert_eq!(_i.index(), _index);
226+
nodes[index] = node_header.node();
227+
fingerprints[index] = node_header.fingerprint();
226228

227229
// If the length of this node's edge list is small, the length is stored in the header.
228230
// If it is not, we fall back to another decoder call.
@@ -234,12 +236,11 @@ impl SerializedDepGraph {
234236
let edges_len_bytes = node_header.bytes_per_index() * (num_edges as usize);
235237
// The in-memory structure for the edges list stores the byte width of the edges on
236238
// this node with the offset into the global edge data array.
237-
let edges_header = node_header.edges_header(&edge_list_data);
239+
let edges_header = node_header.edges_header(&edge_list_data, num_edges);
238240

239241
edge_list_data.extend(d.read_raw_bytes(edges_len_bytes));
240242

241-
let _i: SerializedDepNodeIndex = edge_list_indices.push(edges_header);
242-
debug_assert_eq!(_i.index(), _index);
243+
edge_list_indices[index] = edges_header;
243244
}
244245

245246
// When we access the edge list data, we do a fixed-size read from the edge list data then
@@ -287,9 +288,10 @@ impl SerializedDepGraph {
287288
/// * In whatever bits remain, the length of the edge list for this node, if it fits
288289
struct SerializedNodeHeader<D> {
289290
// 2 bytes for the DepNode
291+
// 4 bytes for the index
290292
// 16 for Fingerprint in DepNode
291293
// 16 for Fingerprint in NodeInfo
292-
bytes: [u8; 34],
294+
bytes: [u8; 38],
293295
_marker: PhantomData<D>,
294296
}
295297

@@ -299,6 +301,7 @@ struct Unpacked {
299301
len: Option<u32>,
300302
bytes_per_index: usize,
301303
kind: DepKind,
304+
index: SerializedDepNodeIndex,
302305
hash: PackedFingerprint,
303306
fingerprint: Fingerprint,
304307
}
@@ -320,6 +323,7 @@ impl<D: Deps> SerializedNodeHeader<D> {
320323
#[inline]
321324
fn new(
322325
node: DepNode,
326+
index: DepNodeIndex,
323327
fingerprint: Fingerprint,
324328
edge_max_index: u32,
325329
edge_count: usize,
@@ -341,10 +345,11 @@ impl<D: Deps> SerializedNodeHeader<D> {
341345
let hash: Fingerprint = node.hash.into();
342346

343347
// Using half-open ranges ensures an unconditional panic if we get the magic numbers wrong.
344-
let mut bytes = [0u8; 34];
348+
let mut bytes = [0u8; 38];
345349
bytes[..2].copy_from_slice(&head.to_le_bytes());
346-
bytes[2..18].copy_from_slice(&hash.to_le_bytes());
347-
bytes[18..].copy_from_slice(&fingerprint.to_le_bytes());
350+
bytes[2..6].copy_from_slice(&index.as_u32().to_le_bytes());
351+
bytes[6..22].copy_from_slice(&hash.to_le_bytes());
352+
bytes[22..].copy_from_slice(&fingerprint.to_le_bytes());
348353

349354
#[cfg(debug_assertions)]
350355
{
@@ -361,8 +366,9 @@ impl<D: Deps> SerializedNodeHeader<D> {
361366
#[inline]
362367
fn unpack(&self) -> Unpacked {
363368
let head = u16::from_le_bytes(self.bytes[..2].try_into().unwrap());
364-
let hash = self.bytes[2..18].try_into().unwrap();
365-
let fingerprint = self.bytes[18..].try_into().unwrap();
369+
let index = u32::from_le_bytes(self.bytes[2..6].try_into().unwrap());
370+
let hash = self.bytes[6..22].try_into().unwrap();
371+
let fingerprint = self.bytes[22..].try_into().unwrap();
366372

367373
let kind = head & mask(Self::KIND_BITS) as u16;
368374
let bytes_per_index = (head >> Self::KIND_BITS) & mask(Self::WIDTH_BITS) as u16;
@@ -372,6 +378,7 @@ impl<D: Deps> SerializedNodeHeader<D> {
372378
len: len.checked_sub(1),
373379
bytes_per_index: bytes_per_index as usize + 1,
374380
kind: DepKind::new(kind),
381+
index: SerializedDepNodeIndex::from_u32(index),
375382
hash: Fingerprint::from_le_bytes(hash).into(),
376383
fingerprint: Fingerprint::from_le_bytes(fingerprint),
377384
}
@@ -387,6 +394,11 @@ impl<D: Deps> SerializedNodeHeader<D> {
387394
self.unpack().bytes_per_index
388395
}
389396

397+
#[inline]
398+
fn index(&self) -> SerializedDepNodeIndex {
399+
self.unpack().index
400+
}
401+
390402
#[inline]
391403
fn fingerprint(&self) -> Fingerprint {
392404
self.unpack().fingerprint
@@ -399,9 +411,10 @@ impl<D: Deps> SerializedNodeHeader<D> {
399411
}
400412

401413
#[inline]
402-
fn edges_header(&self, edge_list_data: &[u8]) -> EdgeHeader {
414+
fn edges_header(&self, edge_list_data: &[u8], edges: u32) -> EdgeHeader {
403415
EdgeHeader {
404416
repr: (edge_list_data.len() << DEP_NODE_WIDTH_BITS) | (self.bytes_per_index() - 1),
417+
edges,
405418
}
406419
}
407420
}
@@ -414,10 +427,15 @@ struct NodeInfo {
414427
}
415428

416429
impl NodeInfo {
417-
fn encode<D: Deps>(&self, e: &mut FileEncoder) {
430+
fn encode<D: Deps>(&self, e: &mut MemEncoder, index: DepNodeIndex) {
418431
let NodeInfo { node, fingerprint, ref edges } = *self;
419-
let header =
420-
SerializedNodeHeader::<D>::new(node, fingerprint, edges.max_index(), edges.len());
432+
let header = SerializedNodeHeader::<D>::new(
433+
node,
434+
index,
435+
fingerprint,
436+
edges.max_index(),
437+
edges.len(),
438+
);
421439
e.write_array(header.bytes);
422440

423441
if header.len().is_none() {
@@ -439,8 +457,9 @@ impl NodeInfo {
439457
/// This avoids the overhead of constructing `EdgesVec`, which would be needed to call `encode`.
440458
#[inline]
441459
fn encode_promoted<D: Deps>(
442-
e: &mut FileEncoder,
460+
e: &mut MemEncoder,
443461
node: DepNode,
462+
index: DepNodeIndex,
444463
fingerprint: Fingerprint,
445464
prev_index: SerializedDepNodeIndex,
446465
colors: &DepNodeColorMap,
@@ -453,7 +472,7 @@ impl NodeInfo {
453472
let edge_max =
454473
edges.clone().map(|i| colors.current(i).unwrap().as_u32()).max().unwrap_or(0);
455474

456-
let header = SerializedNodeHeader::<D>::new(node, fingerprint, edge_max, edge_count);
475+
let header = SerializedNodeHeader::<D>::new(node, index, fingerprint, edge_max, edge_count);
457476
e.write_array(header.bytes);
458477

459478
if header.len().is_none() {
@@ -487,6 +506,8 @@ struct EncoderState<D: Deps> {
487506
total_edge_count: usize,
488507
stats: Option<FxHashMap<DepKind, Stat>>,
489508

509+
mem_encoder: MemEncoder,
510+
490511
/// Stores the number of times we've encoded each dep kind.
491512
kind_stats: Vec<u32>,
492513
marker: PhantomData<D>,
@@ -500,22 +521,28 @@ impl<D: Deps> EncoderState<D> {
500521
total_edge_count: 0,
501522
total_node_count: 0,
502523
stats: record_stats.then(FxHashMap::default),
524+
mem_encoder: MemEncoder::new(),
503525
kind_stats: iter::repeat(0).take(D::DEP_KIND_MAX as usize + 1).collect(),
504526
marker: PhantomData,
505527
}
506528
}
507529

530+
#[inline]
531+
fn alloc_index(&mut self) -> DepNodeIndex {
532+
let index = DepNodeIndex::new(self.total_node_count);
533+
self.total_node_count += 1;
534+
index
535+
}
536+
508537
#[inline]
509538
fn record(
510539
&mut self,
511540
node: DepNode,
541+
index: DepNodeIndex,
512542
edge_count: usize,
513543
edges: impl FnOnce(&mut Self) -> Vec<DepNodeIndex>,
514544
record_graph: &Option<Lock<DepGraphQuery>>,
515545
) -> DepNodeIndex {
516-
let index = DepNodeIndex::new(self.total_node_count);
517-
518-
self.total_node_count += 1;
519546
self.kind_stats[node.kind.as_usize()] += 1;
520547
self.total_edge_count += edge_count;
521548

@@ -547,14 +574,25 @@ impl<D: Deps> EncoderState<D> {
547574
index
548575
}
549576

577+
#[inline]
578+
fn flush_mem_encoder(&mut self) {
579+
let data = &mut self.mem_encoder.data;
580+
if data.len() > 64 * 1024 {
581+
self.encoder.emit_raw_bytes(&data[..]);
582+
data.clear();
583+
}
584+
}
585+
550586
/// Encodes a node to the current graph.
551587
fn encode_node(
552588
&mut self,
553589
node: &NodeInfo,
554590
record_graph: &Option<Lock<DepGraphQuery>>,
555591
) -> DepNodeIndex {
556-
node.encode::<D>(&mut self.encoder);
557-
self.record(node.node, node.edges.len(), |_| node.edges[..].to_vec(), record_graph)
592+
let index = self.alloc_index();
593+
node.encode::<D>(&mut self.mem_encoder, index);
594+
self.flush_mem_encoder();
595+
self.record(node.node, index, node.edges.len(), |_| node.edges[..].to_vec(), record_graph)
558596
}
559597

560598
/// Encodes a node that was promoted from the previous graph. It reads the information directly from
@@ -570,20 +608,22 @@ impl<D: Deps> EncoderState<D> {
570608
record_graph: &Option<Lock<DepGraphQuery>>,
571609
colors: &DepNodeColorMap,
572610
) -> DepNodeIndex {
611+
let index = self.alloc_index();
573612
let node = self.previous.index_to_node(prev_index);
574-
575613
let fingerprint = self.previous.fingerprint_by_index(prev_index);
576614
let edge_count = NodeInfo::encode_promoted::<D>(
577-
&mut self.encoder,
615+
&mut self.mem_encoder,
578616
node,
617+
index,
579618
fingerprint,
580619
prev_index,
581620
colors,
582621
&self.previous,
583622
);
584-
623+
self.flush_mem_encoder();
585624
self.record(
586625
node,
626+
index,
587627
edge_count,
588628
|this| {
589629
this.previous
@@ -592,12 +632,14 @@ impl<D: Deps> EncoderState<D> {
592632
.collect()
593633
},
594634
record_graph,
595-
)
635+
);
636+
index
596637
}
597638

598639
fn finish(self, profiler: &SelfProfilerRef) -> FileEncodeResult {
599640
let Self {
600641
mut encoder,
642+
mem_encoder,
601643
total_node_count,
602644
total_edge_count,
603645
stats: _,
@@ -606,6 +648,8 @@ impl<D: Deps> EncoderState<D> {
606648
previous: _,
607649
} = self;
608650

651+
encoder.emit_raw_bytes(&mem_encoder.data);
652+
609653
let node_count = total_node_count.try_into().unwrap();
610654
let edge_count = total_edge_count.try_into().unwrap();
611655

compiler/rustc_serialize/src/opaque.rs

+2
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ use crate::int_overflow::DebugStrictAdd;
1010
use crate::leb128;
1111
use crate::serialize::{Decodable, Decoder, Encodable, Encoder};
1212

13+
pub mod mem_encoder;
14+
1315
// -----------------------------------------------------------------------------
1416
// Encoder
1517
// -----------------------------------------------------------------------------

0 commit comments

Comments
 (0)