Skip to content

Commit 7ad670d

Browse files
Specialize DefPathHash table to skip crate IDs
Instead, we store just the local crate hash as a bare u64. On decoding, we recombine it with the crate's stable crate ID stored separately in metadata. The end result is that we save ~8 bytes/DefIndex in metadata size. One key detail here is that we no longer distinguish in encoded metadata between present and non-present DefPathHashes. It used to be highly likely we could distinguish as we used DefPathHash::default(), an all-zero representation. However in theory even that is fallible as nothing strictly prevents the StableCrateId from being zero.
1 parent 5151b8c commit 7ad670d

File tree

5 files changed

+25
-31
lines changed

5 files changed

+25
-31
lines changed

compiler/rustc_metadata/src/rmeta/decoder.rs

+11-3
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use crate::rmeta::*;
66

77
use rustc_ast as ast;
88
use rustc_data_structures::captures::Captures;
9+
use rustc_data_structures::fingerprint::Fingerprint;
910
use rustc_data_structures::owned_slice::OwnedSlice;
1011
use rustc_data_structures::sync::{AppendOnlyVec, AtomicBool, Lock, Lrc, OnceLock};
1112
use rustc_data_structures::unhash::UnhashMap;
@@ -1489,9 +1490,16 @@ impl<'a, 'tcx> CrateMetadataRef<'a> {
14891490
index: DefIndex,
14901491
def_path_hashes: &mut FxHashMap<DefIndex, DefPathHash>,
14911492
) -> DefPathHash {
1492-
*def_path_hashes
1493-
.entry(index)
1494-
.or_insert_with(|| self.root.tables.def_path_hashes.get(self, index))
1493+
*def_path_hashes.entry(index).or_insert_with(|| {
1494+
// This is a hack to workaround the fact that we can't easily encode/decode a Hash64
1495+
// into the FixedSizeEncoding, as Hash64 lacks a Default impl. A future refactor to
1496+
// relax the Default restriction will likely fix this.
1497+
let fingerprint = Fingerprint::new(
1498+
self.root.stable_crate_id.as_u64(),
1499+
self.root.tables.def_path_hashes.get(self, index),
1500+
);
1501+
DefPathHash::new(self.root.stable_crate_id, fingerprint.split().1)
1502+
})
14951503
}
14961504

14971505
#[inline]

compiler/rustc_metadata/src/rmeta/encoder.rs

+6-2
Original file line numberDiff line numberDiff line change
@@ -467,13 +467,17 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
467467
let def_key = self.lazy(table.def_key(def_index));
468468
let def_path_hash = table.def_path_hash(def_index);
469469
self.tables.def_keys.set_some(def_index, def_key);
470-
self.tables.def_path_hashes.set(def_index, def_path_hash);
470+
assert_eq!(
471+
def_path_hash.stable_crate_id(),
472+
self.tcx.def_path_hash(LOCAL_CRATE.as_def_id()).stable_crate_id()
473+
);
474+
self.tables.def_path_hashes.set(def_index, def_path_hash.local_hash().as_u64());
471475
}
472476
} else {
473477
for (def_index, def_key, def_path_hash) in table.enumerated_keys_and_path_hashes() {
474478
let def_key = self.lazy(def_key);
475479
self.tables.def_keys.set_some(def_index, def_key);
476-
self.tables.def_path_hashes.set(def_index, *def_path_hash);
480+
self.tables.def_path_hashes.set(def_index, def_path_hash.local_hash().as_u64());
477481
}
478482
}
479483
}

compiler/rustc_metadata/src/rmeta/mod.rs

+8-1
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,14 @@ define_tables! {
386386
is_type_alias_impl_trait: Table<DefIndex, bool>,
387387
type_alias_is_lazy: Table<DefIndex, bool>,
388388
attr_flags: Table<DefIndex, AttrFlags>,
389-
def_path_hashes: Table<DefIndex, DefPathHash>,
389+
// The u64 is the crate-local part of the DefPathHash. All hashes in this crate have the same
390+
// StableCrateId, so we omit encoding those into the table.
391+
//
392+
// N.B. this means that we can't distinguish between non-present items and a present but zero
393+
// local hash item. In practice the compiler shouldn't care about non-present items in a foreign
394+
// crate, so this should be OK. If we do start to care we should most likely adjust our hashing
395+
// to reserve a bit (e.g., NonZeroU64).
396+
def_path_hashes: Table<DefIndex, u64>,
390397
explicit_item_bounds: Table<DefIndex, LazyArray<(ty::Clause<'static>, Span)>>,
391398
inferred_outlives_of: Table<DefIndex, LazyArray<(ty::Clause<'static>, Span)>>,
392399
inherent_impls: Table<DefIndex, LazyArray<DefIndex>>,

compiler/rustc_metadata/src/rmeta/table.rs

-23
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
use crate::rmeta::*;
22

3-
use rustc_data_structures::fingerprint::Fingerprint;
43
use rustc_hir::def::CtorOf;
54
use rustc_index::Idx;
65

@@ -44,12 +43,6 @@ impl<T> IsDefault for LazyArray<T> {
4443
}
4544
}
4645

47-
impl IsDefault for DefPathHash {
48-
fn is_default(&self) -> bool {
49-
self.0 == Fingerprint::ZERO
50-
}
51-
}
52-
5346
impl IsDefault for UnusedGenericParams {
5447
fn is_default(&self) -> bool {
5548
// UnusedGenericParams encodes the *un*usedness as a bitset.
@@ -219,22 +212,6 @@ fixed_size_enum! {
219212
}
220213
}
221214

222-
// We directly encode `DefPathHash` because a `LazyValue` would incur a 25% cost.
223-
impl FixedSizeEncoding for DefPathHash {
224-
type ByteArray = [u8; 16];
225-
226-
#[inline]
227-
fn from_bytes(b: &[u8; 16]) -> Self {
228-
DefPathHash(Fingerprint::from_le_bytes(*b))
229-
}
230-
231-
#[inline]
232-
fn write_to_bytes(self, b: &mut [u8; 16]) {
233-
debug_assert!(!self.is_default());
234-
*b = self.0.to_le_bytes();
235-
}
236-
}
237-
238215
// We directly encode RawDefId because using a `LazyValue` would incur a 50% overhead in the worst case.
239216
impl FixedSizeEncoding for Option<RawDefId> {
240217
type ByteArray = [u8; 8];

compiler/rustc_span/src/def_id.rs

-2
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,6 @@ impl DefPathHash {
114114
}
115115

116116
/// Returns the crate-local part of the [DefPathHash].
117-
///
118-
/// Used for tests.
119117
#[inline]
120118
pub fn local_hash(&self) -> Hash64 {
121119
self.0.split().1

0 commit comments

Comments
 (0)