Skip to content

Commit 2714e13

Browse files
committed
Auto merge of #139756 - Zoxc:out-of-order-dep-graph, r=oli-obk
Allow out of order dep graph node encoding This allows out of order dep graph node encoding by also encoding the index instead of using the file node order as the index. `MemEncoder` is also brought back to life and used for encoding. Both of these are done to enable thread-local encoding of dep graph nodes. This is based on #139636.
2 parents afa859f + 8fc610e commit 2714e13

File tree

3 files changed

+206
-39
lines changed

3 files changed

+206
-39
lines changed

Diff for: compiler/rustc_query_system/src/dep_graph/serialized.rs

+83-39
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ use rustc_data_structures::profiling::SelfProfilerRef;
4646
use rustc_data_structures::sync::Lock;
4747
use rustc_data_structures::unhash::UnhashMap;
4848
use rustc_index::{Idx, IndexVec};
49+
use rustc_serialize::opaque::mem_encoder::MemEncoder;
4950
use rustc_serialize::opaque::{FileEncodeResult, FileEncoder, IntEncodedWithFixedSize, MemDecoder};
5051
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
5152
use tracing::{debug, instrument};
@@ -102,18 +103,11 @@ impl SerializedDepGraph {
102103
) -> impl Iterator<Item = SerializedDepNodeIndex> + Clone {
103104
let header = self.edge_list_indices[source];
104105
let mut raw = &self.edge_list_data[header.start()..];
105-
// Figure out where the edge list for `source` ends by getting the start index of the next
106-
// edge list, or the end of the array if this is the last edge.
107-
let end = self
108-
.edge_list_indices
109-
.get(source + 1)
110-
.map(|h| h.start())
111-
.unwrap_or_else(|| self.edge_list_data.len() - DEP_NODE_PAD);
112106

113107
// The number of edges for this node is implicitly stored in the combination of the byte
114108
// width and the length.
115109
let bytes_per_index = header.bytes_per_index();
116-
let len = (end - header.start()) / bytes_per_index;
110+
let len = header.edges;
117111

118112
// LLVM doesn't hoist EdgeHeader::mask so we do it ourselves.
119113
let mask = header.mask();
@@ -155,6 +149,7 @@ impl SerializedDepGraph {
155149
#[derive(Debug, Clone, Copy)]
156150
struct EdgeHeader {
157151
repr: usize,
152+
edges: u32,
158153
}
159154

160155
impl EdgeHeader {
@@ -197,9 +192,17 @@ impl SerializedDepGraph {
197192

198193
let graph_bytes = d.len() - (2 * IntEncodedWithFixedSize::ENCODED_SIZE) - d.position();
199194

200-
let mut nodes = IndexVec::with_capacity(node_count);
201-
let mut fingerprints = IndexVec::with_capacity(node_count);
202-
let mut edge_list_indices = IndexVec::with_capacity(node_count);
195+
let mut nodes: IndexVec<SerializedDepNodeIndex, _> = (0..node_count)
196+
.map(|_| DepNode {
197+
kind: D::DEP_KIND_NULL,
198+
hash: PackedFingerprint::from(Fingerprint::ZERO),
199+
})
200+
.collect();
201+
let mut fingerprints: IndexVec<SerializedDepNodeIndex, _> =
202+
(0..node_count).map(|_| Fingerprint::ZERO).collect();
203+
let mut edge_list_indices: IndexVec<SerializedDepNodeIndex, _> =
204+
(0..node_count).map(|_| EdgeHeader { repr: 0, edges: 0 }).collect();
205+
203206
// This estimation assumes that all of the encoded bytes are for the edge lists or for the
204207
// fixed-size node headers. But that's not necessarily true; if any edge list has a length
205208
// that spills out of the size we can bit-pack into SerializedNodeHeader then some of the
@@ -218,11 +221,10 @@ impl SerializedDepGraph {
218221
let node_header =
219222
SerializedNodeHeader::<D> { bytes: d.read_array(), _marker: PhantomData };
220223

221-
let _i: SerializedDepNodeIndex = nodes.push(node_header.node());
222-
debug_assert_eq!(_i.index(), _index);
224+
let index = node_header.index();
223225

224-
let _i: SerializedDepNodeIndex = fingerprints.push(node_header.fingerprint());
225-
debug_assert_eq!(_i.index(), _index);
226+
nodes[index] = node_header.node();
227+
fingerprints[index] = node_header.fingerprint();
226228

227229
// If the length of this node's edge list is small, the length is stored in the header.
228230
// If it is not, we fall back to another decoder call.
@@ -234,12 +236,11 @@ impl SerializedDepGraph {
234236
let edges_len_bytes = node_header.bytes_per_index() * (num_edges as usize);
235237
// The in-memory structure for the edges list stores the byte width of the edges on
236238
// this node with the offset into the global edge data array.
237-
let edges_header = node_header.edges_header(&edge_list_data);
239+
let edges_header = node_header.edges_header(&edge_list_data, num_edges);
238240

239241
edge_list_data.extend(d.read_raw_bytes(edges_len_bytes));
240242

241-
let _i: SerializedDepNodeIndex = edge_list_indices.push(edges_header);
242-
debug_assert_eq!(_i.index(), _index);
243+
edge_list_indices[index] = edges_header;
243244
}
244245

245246
// When we access the edge list data, we do a fixed-size read from the edge list data then
@@ -287,9 +288,10 @@ impl SerializedDepGraph {
287288
/// * In whatever bits remain, the length of the edge list for this node, if it fits
288289
struct SerializedNodeHeader<D> {
289290
// 2 bytes for the DepNode
291+
// 4 bytes for the index
290292
// 16 for Fingerprint in DepNode
291293
// 16 for Fingerprint in NodeInfo
292-
bytes: [u8; 34],
294+
bytes: [u8; 38],
293295
_marker: PhantomData<D>,
294296
}
295297

@@ -299,6 +301,7 @@ struct Unpacked {
299301
len: Option<u32>,
300302
bytes_per_index: usize,
301303
kind: DepKind,
304+
index: SerializedDepNodeIndex,
302305
hash: PackedFingerprint,
303306
fingerprint: Fingerprint,
304307
}
@@ -320,6 +323,7 @@ impl<D: Deps> SerializedNodeHeader<D> {
320323
#[inline]
321324
fn new(
322325
node: DepNode,
326+
index: DepNodeIndex,
323327
fingerprint: Fingerprint,
324328
edge_max_index: u32,
325329
edge_count: usize,
@@ -341,10 +345,11 @@ impl<D: Deps> SerializedNodeHeader<D> {
341345
let hash: Fingerprint = node.hash.into();
342346

343347
// Using half-open ranges ensures an unconditional panic if we get the magic numbers wrong.
344-
let mut bytes = [0u8; 34];
348+
let mut bytes = [0u8; 38];
345349
bytes[..2].copy_from_slice(&head.to_le_bytes());
346-
bytes[2..18].copy_from_slice(&hash.to_le_bytes());
347-
bytes[18..].copy_from_slice(&fingerprint.to_le_bytes());
350+
bytes[2..6].copy_from_slice(&index.as_u32().to_le_bytes());
351+
bytes[6..22].copy_from_slice(&hash.to_le_bytes());
352+
bytes[22..].copy_from_slice(&fingerprint.to_le_bytes());
348353

349354
#[cfg(debug_assertions)]
350355
{
@@ -361,8 +366,9 @@ impl<D: Deps> SerializedNodeHeader<D> {
361366
#[inline]
362367
fn unpack(&self) -> Unpacked {
363368
let head = u16::from_le_bytes(self.bytes[..2].try_into().unwrap());
364-
let hash = self.bytes[2..18].try_into().unwrap();
365-
let fingerprint = self.bytes[18..].try_into().unwrap();
369+
let index = u32::from_le_bytes(self.bytes[2..6].try_into().unwrap());
370+
let hash = self.bytes[6..22].try_into().unwrap();
371+
let fingerprint = self.bytes[22..].try_into().unwrap();
366372

367373
let kind = head & mask(Self::KIND_BITS) as u16;
368374
let bytes_per_index = (head >> Self::KIND_BITS) & mask(Self::WIDTH_BITS) as u16;
@@ -372,6 +378,7 @@ impl<D: Deps> SerializedNodeHeader<D> {
372378
len: len.checked_sub(1),
373379
bytes_per_index: bytes_per_index as usize + 1,
374380
kind: DepKind::new(kind),
381+
index: SerializedDepNodeIndex::from_u32(index),
375382
hash: Fingerprint::from_le_bytes(hash).into(),
376383
fingerprint: Fingerprint::from_le_bytes(fingerprint),
377384
}
@@ -387,6 +394,11 @@ impl<D: Deps> SerializedNodeHeader<D> {
387394
self.unpack().bytes_per_index
388395
}
389396

397+
#[inline]
398+
fn index(&self) -> SerializedDepNodeIndex {
399+
self.unpack().index
400+
}
401+
390402
#[inline]
391403
fn fingerprint(&self) -> Fingerprint {
392404
self.unpack().fingerprint
@@ -399,9 +411,10 @@ impl<D: Deps> SerializedNodeHeader<D> {
399411
}
400412

401413
#[inline]
402-
fn edges_header(&self, edge_list_data: &[u8]) -> EdgeHeader {
414+
fn edges_header(&self, edge_list_data: &[u8], edges: u32) -> EdgeHeader {
403415
EdgeHeader {
404416
repr: (edge_list_data.len() << DEP_NODE_WIDTH_BITS) | (self.bytes_per_index() - 1),
417+
edges,
405418
}
406419
}
407420
}
@@ -414,10 +427,15 @@ struct NodeInfo {
414427
}
415428

416429
impl NodeInfo {
417-
fn encode<D: Deps>(&self, e: &mut FileEncoder) {
430+
fn encode<D: Deps>(&self, e: &mut MemEncoder, index: DepNodeIndex) {
418431
let NodeInfo { node, fingerprint, ref edges } = *self;
419-
let header =
420-
SerializedNodeHeader::<D>::new(node, fingerprint, edges.max_index(), edges.len());
432+
let header = SerializedNodeHeader::<D>::new(
433+
node,
434+
index,
435+
fingerprint,
436+
edges.max_index(),
437+
edges.len(),
438+
);
421439
e.write_array(header.bytes);
422440

423441
if header.len().is_none() {
@@ -439,8 +457,9 @@ impl NodeInfo {
439457
/// This avoids the overhead of constructing `EdgesVec`, which would be needed to call `encode`.
440458
#[inline]
441459
fn encode_promoted<D: Deps>(
442-
e: &mut FileEncoder,
460+
e: &mut MemEncoder,
443461
node: DepNode,
462+
index: DepNodeIndex,
444463
fingerprint: Fingerprint,
445464
prev_index: SerializedDepNodeIndex,
446465
colors: &DepNodeColorMap,
@@ -453,7 +472,7 @@ impl NodeInfo {
453472
let edge_max =
454473
edges.clone().map(|i| colors.current(i).unwrap().as_u32()).max().unwrap_or(0);
455474

456-
let header = SerializedNodeHeader::<D>::new(node, fingerprint, edge_max, edge_count);
475+
let header = SerializedNodeHeader::<D>::new(node, index, fingerprint, edge_max, edge_count);
457476
e.write_array(header.bytes);
458477

459478
if header.len().is_none() {
@@ -487,6 +506,8 @@ struct EncoderState<D: Deps> {
487506
total_edge_count: usize,
488507
stats: Option<FxHashMap<DepKind, Stat>>,
489508

509+
mem_encoder: MemEncoder,
510+
490511
/// Stores the number of times we've encoded each dep kind.
491512
kind_stats: Vec<u32>,
492513
marker: PhantomData<D>,
@@ -500,22 +521,28 @@ impl<D: Deps> EncoderState<D> {
500521
total_edge_count: 0,
501522
total_node_count: 0,
502523
stats: record_stats.then(FxHashMap::default),
524+
mem_encoder: MemEncoder::new(),
503525
kind_stats: iter::repeat(0).take(D::DEP_KIND_MAX as usize + 1).collect(),
504526
marker: PhantomData,
505527
}
506528
}
507529

530+
#[inline]
531+
fn alloc_index(&mut self) -> DepNodeIndex {
532+
let index = DepNodeIndex::new(self.total_node_count);
533+
self.total_node_count += 1;
534+
index
535+
}
536+
508537
#[inline]
509538
fn record(
510539
&mut self,
511540
node: DepNode,
541+
index: DepNodeIndex,
512542
edge_count: usize,
513543
edges: impl FnOnce(&mut Self) -> Vec<DepNodeIndex>,
514544
record_graph: &Option<Lock<DepGraphQuery>>,
515545
) -> DepNodeIndex {
516-
let index = DepNodeIndex::new(self.total_node_count);
517-
518-
self.total_node_count += 1;
519546
self.kind_stats[node.kind.as_usize()] += 1;
520547
self.total_edge_count += edge_count;
521548

@@ -547,14 +574,25 @@ impl<D: Deps> EncoderState<D> {
547574
index
548575
}
549576

577+
#[inline]
578+
fn flush_mem_encoder(&mut self) {
579+
let data = &mut self.mem_encoder.data;
580+
if data.len() > 64 * 1024 {
581+
self.encoder.emit_raw_bytes(&data[..]);
582+
data.clear();
583+
}
584+
}
585+
550586
/// Encodes a node to the current graph.
551587
fn encode_node(
552588
&mut self,
553589
node: &NodeInfo,
554590
record_graph: &Option<Lock<DepGraphQuery>>,
555591
) -> DepNodeIndex {
556-
node.encode::<D>(&mut self.encoder);
557-
self.record(node.node, node.edges.len(), |_| node.edges[..].to_vec(), record_graph)
592+
let index = self.alloc_index();
593+
node.encode::<D>(&mut self.mem_encoder, index);
594+
self.flush_mem_encoder();
595+
self.record(node.node, index, node.edges.len(), |_| node.edges[..].to_vec(), record_graph)
558596
}
559597

560598
/// Encodes a node that was promoted from the previous graph. It reads the information directly from
@@ -570,20 +608,22 @@ impl<D: Deps> EncoderState<D> {
570608
record_graph: &Option<Lock<DepGraphQuery>>,
571609
colors: &DepNodeColorMap,
572610
) -> DepNodeIndex {
611+
let index = self.alloc_index();
573612
let node = self.previous.index_to_node(prev_index);
574-
575613
let fingerprint = self.previous.fingerprint_by_index(prev_index);
576614
let edge_count = NodeInfo::encode_promoted::<D>(
577-
&mut self.encoder,
615+
&mut self.mem_encoder,
578616
node,
617+
index,
579618
fingerprint,
580619
prev_index,
581620
colors,
582621
&self.previous,
583622
);
584-
623+
self.flush_mem_encoder();
585624
self.record(
586625
node,
626+
index,
587627
edge_count,
588628
|this| {
589629
this.previous
@@ -592,12 +632,14 @@ impl<D: Deps> EncoderState<D> {
592632
.collect()
593633
},
594634
record_graph,
595-
)
635+
);
636+
index
596637
}
597638

598639
fn finish(self, profiler: &SelfProfilerRef) -> FileEncodeResult {
599640
let Self {
600641
mut encoder,
642+
mem_encoder,
601643
total_node_count,
602644
total_edge_count,
603645
stats: _,
@@ -606,6 +648,8 @@ impl<D: Deps> EncoderState<D> {
606648
previous: _,
607649
} = self;
608650

651+
encoder.emit_raw_bytes(&mem_encoder.data);
652+
609653
let node_count = total_node_count.try_into().unwrap();
610654
let edge_count = total_edge_count.try_into().unwrap();
611655

Diff for: compiler/rustc_serialize/src/opaque.rs

+2
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ use crate::int_overflow::DebugStrictAdd;
1010
use crate::leb128;
1111
use crate::serialize::{Decodable, Decoder, Encodable, Encoder};
1212

13+
pub mod mem_encoder;
14+
1315
// -----------------------------------------------------------------------------
1416
// Encoder
1517
// -----------------------------------------------------------------------------

0 commit comments

Comments
 (0)