Skip to content

Commit c6735f9

Browse files
committed
Add index to the dep graph format and encode via MemEncoder
1 parent 6bc57c6 commit c6735f9

File tree

3 files changed

+206
-39
lines changed

3 files changed

+206
-39
lines changed

compiler/rustc_query_system/src/dep_graph/serialized.rs

Lines changed: 83 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ use rustc_data_structures::profiling::SelfProfilerRef;
4646
use rustc_data_structures::sync::Lock;
4747
use rustc_data_structures::unhash::UnhashMap;
4848
use rustc_index::{Idx, IndexVec};
49+
use rustc_serialize::opaque::mem_encoder::MemEncoder;
4950
use rustc_serialize::opaque::{FileEncodeResult, FileEncoder, IntEncodedWithFixedSize, MemDecoder};
5051
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
5152
use tracing::{debug, instrument};
@@ -105,18 +106,11 @@ impl SerializedDepGraph {
105106
) -> impl Iterator<Item = SerializedDepNodeIndex> + Clone {
106107
let header = self.edge_list_indices[source];
107108
let mut raw = &self.edge_list_data[header.start()..];
108-
// Figure out where the edge list for `source` ends by getting the start index of the next
109-
// edge list, or the end of the array if this is the last edge.
110-
let end = self
111-
.edge_list_indices
112-
.get(source + 1)
113-
.map(|h| h.start())
114-
.unwrap_or_else(|| self.edge_list_data.len() - DEP_NODE_PAD);
115109

116110
// The number of edges for this node is implicitly stored in the combination of the byte
117111
// width and the length.
118112
let bytes_per_index = header.bytes_per_index();
119-
let len = (end - header.start()) / bytes_per_index;
113+
let len = header.edges;
120114

121115
// LLVM doesn't hoist EdgeHeader::mask so we do it ourselves.
122116
let mask = header.mask();
@@ -163,6 +157,7 @@ impl SerializedDepGraph {
163157
#[derive(Debug, Clone, Copy)]
164158
struct EdgeHeader {
165159
repr: usize,
160+
edges: u32,
166161
}
167162

168163
impl EdgeHeader {
@@ -205,9 +200,17 @@ impl SerializedDepGraph {
205200

206201
let graph_bytes = d.len() - (2 * IntEncodedWithFixedSize::ENCODED_SIZE) - d.position();
207202

208-
let mut nodes = IndexVec::with_capacity(node_count);
209-
let mut fingerprints = IndexVec::with_capacity(node_count);
210-
let mut edge_list_indices = IndexVec::with_capacity(node_count);
203+
let mut nodes: IndexVec<SerializedDepNodeIndex, _> = (0..node_count)
204+
.map(|_| DepNode {
205+
kind: D::DEP_KIND_NULL,
206+
hash: PackedFingerprint::from(Fingerprint::ZERO),
207+
})
208+
.collect();
209+
let mut fingerprints: IndexVec<SerializedDepNodeIndex, _> =
210+
(0..node_count).map(|_| Fingerprint::ZERO).collect();
211+
let mut edge_list_indices: IndexVec<SerializedDepNodeIndex, _> =
212+
(0..node_count).map(|_| EdgeHeader { repr: 0, edges: 0 }).collect();
213+
211214
// This estimation assumes that all of the encoded bytes are for the edge lists or for the
212215
// fixed-size node headers. But that's not necessarily true; if any edge list has a length
213216
// that spills out of the size we can bit-pack into SerializedNodeHeader then some of the
@@ -226,11 +229,10 @@ impl SerializedDepGraph {
226229
let node_header =
227230
SerializedNodeHeader::<D> { bytes: d.read_array(), _marker: PhantomData };
228231

229-
let _i: SerializedDepNodeIndex = nodes.push(node_header.node());
230-
debug_assert_eq!(_i.index(), _index);
232+
let index = node_header.index();
231233

232-
let _i: SerializedDepNodeIndex = fingerprints.push(node_header.fingerprint());
233-
debug_assert_eq!(_i.index(), _index);
234+
nodes[index] = node_header.node();
235+
fingerprints[index] = node_header.fingerprint();
234236

235237
// If the length of this node's edge list is small, the length is stored in the header.
236238
// If it is not, we fall back to another decoder call.
@@ -242,12 +244,11 @@ impl SerializedDepGraph {
242244
let edges_len_bytes = node_header.bytes_per_index() * (num_edges as usize);
243245
// The in-memory structure for the edges list stores the byte width of the edges on
244246
// this node with the offset into the global edge data array.
245-
let edges_header = node_header.edges_header(&edge_list_data);
247+
let edges_header = node_header.edges_header(&edge_list_data, num_edges);
246248

247249
edge_list_data.extend(d.read_raw_bytes(edges_len_bytes));
248250

249-
let _i: SerializedDepNodeIndex = edge_list_indices.push(edges_header);
250-
debug_assert_eq!(_i.index(), _index);
251+
edge_list_indices[index] = edges_header;
251252
}
252253

253254
// When we access the edge list data, we do a fixed-size read from the edge list data then
@@ -298,9 +299,10 @@ impl SerializedDepGraph {
298299
/// * In whatever bits remain, the length of the edge list for this node, if it fits
299300
struct SerializedNodeHeader<D> {
300301
// 2 bytes for the DepNode
302+
// 4 bytes for the index
301303
// 16 for Fingerprint in DepNode
302304
// 16 for Fingerprint in NodeInfo
303-
bytes: [u8; 34],
305+
bytes: [u8; 38],
304306
_marker: PhantomData<D>,
305307
}
306308

@@ -310,6 +312,7 @@ struct Unpacked {
310312
len: Option<u32>,
311313
bytes_per_index: usize,
312314
kind: DepKind,
315+
index: SerializedDepNodeIndex,
313316
hash: PackedFingerprint,
314317
fingerprint: Fingerprint,
315318
}
@@ -331,6 +334,7 @@ impl<D: Deps> SerializedNodeHeader<D> {
331334
#[inline]
332335
fn new(
333336
node: DepNode,
337+
index: DepNodeIndex,
334338
fingerprint: Fingerprint,
335339
edge_max_index: u32,
336340
edge_count: usize,
@@ -352,10 +356,11 @@ impl<D: Deps> SerializedNodeHeader<D> {
352356
let hash: Fingerprint = node.hash.into();
353357

354358
// Using half-open ranges ensures an unconditional panic if we get the magic numbers wrong.
355-
let mut bytes = [0u8; 34];
359+
let mut bytes = [0u8; 38];
356360
bytes[..2].copy_from_slice(&head.to_le_bytes());
357-
bytes[2..18].copy_from_slice(&hash.to_le_bytes());
358-
bytes[18..].copy_from_slice(&fingerprint.to_le_bytes());
361+
bytes[2..6].copy_from_slice(&index.as_u32().to_le_bytes());
362+
bytes[6..22].copy_from_slice(&hash.to_le_bytes());
363+
bytes[22..].copy_from_slice(&fingerprint.to_le_bytes());
359364

360365
#[cfg(debug_assertions)]
361366
{
@@ -372,8 +377,9 @@ impl<D: Deps> SerializedNodeHeader<D> {
372377
#[inline]
373378
fn unpack(&self) -> Unpacked {
374379
let head = u16::from_le_bytes(self.bytes[..2].try_into().unwrap());
375-
let hash = self.bytes[2..18].try_into().unwrap();
376-
let fingerprint = self.bytes[18..].try_into().unwrap();
380+
let index = u32::from_le_bytes(self.bytes[2..6].try_into().unwrap());
381+
let hash = self.bytes[6..22].try_into().unwrap();
382+
let fingerprint = self.bytes[22..].try_into().unwrap();
377383

378384
let kind = head & mask(Self::KIND_BITS) as u16;
379385
let bytes_per_index = (head >> Self::KIND_BITS) & mask(Self::WIDTH_BITS) as u16;
@@ -383,6 +389,7 @@ impl<D: Deps> SerializedNodeHeader<D> {
383389
len: len.checked_sub(1),
384390
bytes_per_index: bytes_per_index as usize + 1,
385391
kind: DepKind::new(kind),
392+
index: SerializedDepNodeIndex::from_u32(index),
386393
hash: Fingerprint::from_le_bytes(hash).into(),
387394
fingerprint: Fingerprint::from_le_bytes(fingerprint),
388395
}
@@ -398,6 +405,11 @@ impl<D: Deps> SerializedNodeHeader<D> {
398405
self.unpack().bytes_per_index
399406
}
400407

408+
#[inline]
409+
fn index(&self) -> SerializedDepNodeIndex {
410+
self.unpack().index
411+
}
412+
401413
#[inline]
402414
fn fingerprint(&self) -> Fingerprint {
403415
self.unpack().fingerprint
@@ -410,9 +422,10 @@ impl<D: Deps> SerializedNodeHeader<D> {
410422
}
411423

412424
#[inline]
413-
fn edges_header(&self, edge_list_data: &[u8]) -> EdgeHeader {
425+
fn edges_header(&self, edge_list_data: &[u8], edges: u32) -> EdgeHeader {
414426
EdgeHeader {
415427
repr: (edge_list_data.len() << DEP_NODE_WIDTH_BITS) | (self.bytes_per_index() - 1),
428+
edges,
416429
}
417430
}
418431
}
@@ -425,10 +438,15 @@ struct NodeInfo {
425438
}
426439

427440
impl NodeInfo {
428-
fn encode<D: Deps>(&self, e: &mut FileEncoder) {
441+
fn encode<D: Deps>(&self, e: &mut MemEncoder, index: DepNodeIndex) {
429442
let NodeInfo { node, fingerprint, ref edges } = *self;
430-
let header =
431-
SerializedNodeHeader::<D>::new(node, fingerprint, edges.max_index(), edges.len());
443+
let header = SerializedNodeHeader::<D>::new(
444+
node,
445+
index,
446+
fingerprint,
447+
edges.max_index(),
448+
edges.len(),
449+
);
432450
e.write_array(header.bytes);
433451

434452
if header.len().is_none() {
@@ -450,8 +468,9 @@ impl NodeInfo {
450468
/// This avoids the overhead of constructing `EdgesVec`, which would be needed to call `encode`.
451469
#[inline]
452470
fn encode_promoted<D: Deps>(
453-
e: &mut FileEncoder,
471+
e: &mut MemEncoder,
454472
node: DepNode,
473+
index: DepNodeIndex,
455474
fingerprint: Fingerprint,
456475
prev_index: SerializedDepNodeIndex,
457476
colors: &DepNodeColorMap,
@@ -464,7 +483,7 @@ impl NodeInfo {
464483
let edge_max =
465484
edges.clone().map(|i| colors.current(i).unwrap().as_u32()).max().unwrap_or(0);
466485

467-
let header = SerializedNodeHeader::<D>::new(node, fingerprint, edge_max, edge_count);
486+
let header = SerializedNodeHeader::<D>::new(node, index, fingerprint, edge_max, edge_count);
468487
e.write_array(header.bytes);
469488

470489
if header.len().is_none() {
@@ -498,6 +517,8 @@ struct EncoderState<D: Deps> {
498517
total_edge_count: usize,
499518
stats: Option<FxHashMap<DepKind, Stat>>,
500519

520+
mem_encoder: MemEncoder,
521+
501522
/// Stores the number of times we've encoded each dep kind.
502523
kind_stats: Vec<u32>,
503524
marker: PhantomData<D>,
@@ -511,22 +532,28 @@ impl<D: Deps> EncoderState<D> {
511532
total_edge_count: 0,
512533
total_node_count: 0,
513534
stats: record_stats.then(FxHashMap::default),
535+
mem_encoder: MemEncoder::new(),
514536
kind_stats: iter::repeat(0).take(D::DEP_KIND_MAX as usize + 1).collect(),
515537
marker: PhantomData,
516538
}
517539
}
518540

541+
#[inline]
542+
fn alloc_index(&mut self) -> DepNodeIndex {
543+
let index = DepNodeIndex::new(self.total_node_count);
544+
self.total_node_count += 1;
545+
index
546+
}
547+
519548
#[inline]
520549
fn record(
521550
&mut self,
522551
node: DepNode,
552+
index: DepNodeIndex,
523553
edge_count: usize,
524554
edges: impl FnOnce(&mut Self) -> Vec<DepNodeIndex>,
525555
record_graph: &Option<Lock<DepGraphQuery>>,
526556
) -> DepNodeIndex {
527-
let index = DepNodeIndex::new(self.total_node_count);
528-
529-
self.total_node_count += 1;
530557
self.kind_stats[node.kind.as_usize()] += 1;
531558
self.total_edge_count += edge_count;
532559

@@ -558,14 +585,25 @@ impl<D: Deps> EncoderState<D> {
558585
index
559586
}
560587

588+
#[inline]
589+
fn flush_mem_encoder(&mut self) {
590+
let data = &mut self.mem_encoder.data;
591+
if data.len() > 64 * 1024 {
592+
self.encoder.emit_raw_bytes(&data[..]);
593+
data.clear();
594+
}
595+
}
596+
561597
/// Encodes a node to the current graph.
562598
fn encode_node(
563599
&mut self,
564600
node: &NodeInfo,
565601
record_graph: &Option<Lock<DepGraphQuery>>,
566602
) -> DepNodeIndex {
567-
node.encode::<D>(&mut self.encoder);
568-
self.record(node.node, node.edges.len(), |_| node.edges[..].to_vec(), record_graph)
603+
let index = self.alloc_index();
604+
node.encode::<D>(&mut self.mem_encoder, index);
605+
self.flush_mem_encoder();
606+
self.record(node.node, index, node.edges.len(), |_| node.edges[..].to_vec(), record_graph)
569607
}
570608

571609
/// Encodes a node that was promoted from the previous graph. It reads the information directly from
@@ -581,20 +619,22 @@ impl<D: Deps> EncoderState<D> {
581619
record_graph: &Option<Lock<DepGraphQuery>>,
582620
colors: &DepNodeColorMap,
583621
) -> DepNodeIndex {
622+
let index = self.alloc_index();
584623
let node = self.previous.index_to_node(prev_index);
585-
586624
let fingerprint = self.previous.fingerprint_by_index(prev_index);
587625
let edge_count = NodeInfo::encode_promoted::<D>(
588-
&mut self.encoder,
626+
&mut self.mem_encoder,
589627
node,
628+
index,
590629
fingerprint,
591630
prev_index,
592631
colors,
593632
&self.previous,
594633
);
595-
634+
self.flush_mem_encoder();
596635
self.record(
597636
node,
637+
index,
598638
edge_count,
599639
|this| {
600640
this.previous
@@ -603,12 +643,14 @@ impl<D: Deps> EncoderState<D> {
603643
.collect()
604644
},
605645
record_graph,
606-
)
646+
);
647+
index
607648
}
608649

609650
fn finish(self, profiler: &SelfProfilerRef) -> FileEncodeResult {
610651
let Self {
611652
mut encoder,
653+
mem_encoder,
612654
total_node_count,
613655
total_edge_count,
614656
stats: _,
@@ -617,6 +659,8 @@ impl<D: Deps> EncoderState<D> {
617659
previous,
618660
} = self;
619661

662+
encoder.emit_raw_bytes(&mem_encoder.data);
663+
620664
let node_count = total_node_count.try_into().unwrap();
621665
let edge_count = total_edge_count.try_into().unwrap();
622666

compiler/rustc_serialize/src/opaque.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ use crate::int_overflow::DebugStrictAdd;
1010
use crate::leb128;
1111
use crate::serialize::{Decodable, Decoder, Encodable, Encoder};
1212

13+
pub mod mem_encoder;
14+
1315
// -----------------------------------------------------------------------------
1416
// Encoder
1517
// -----------------------------------------------------------------------------

0 commit comments

Comments
 (0)