Skip to content

Lazily decode SourceFile from metadata #100209

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Aug 19, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 11 additions & 22 deletions compiler/rustc_metadata/src/rmeta/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,9 @@ impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for Span {
bug!("Cannot decode Span without Session.")
};

// Index of the file in the corresponding crate's list of encoded files.
let metadata_index = usize::decode(decoder);

// There are two possibilities here:
// 1. This is a 'local span', which is located inside a `SourceFile`
// that came from this crate. In this case, we use the source map data
Expand Down Expand Up @@ -587,27 +590,9 @@ impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for Span {
foreign_data.imported_source_files(sess)
};

let source_file = {
// Optimize for the case that most spans within a translated item
// originate from the same source_file.
let last_source_file = &imported_source_files[decoder.last_source_file_index];

if lo >= last_source_file.original_start_pos && lo <= last_source_file.original_end_pos
{
last_source_file
} else {
let index = imported_source_files
.binary_search_by_key(&lo, |source_file| source_file.original_start_pos)
.unwrap_or_else(|index| index - 1);

// Don't try to cache the index for foreign spans,
// as this would require a map from CrateNums to indices
if tag == TAG_VALID_SPAN_LOCAL {
decoder.last_source_file_index = index;
}
&imported_source_files[index]
}
};
// Optimize for the case that most spans within a translated item
// originate from the same source_file.
let source_file = &imported_source_files[metadata_index];

// Make sure our binary search above is correct.
debug_assert!(
Expand Down Expand Up @@ -1545,7 +1530,8 @@ impl<'a, 'tcx> CrateMetadataRef<'a> {
let external_source_map = self.root.source_map.decode(self);

external_source_map
.map(|source_file_to_import| {
.enumerate()
.map(|(source_file_index, source_file_to_import)| {
// We can't reuse an existing SourceFile, so allocate a new one
// containing the information we need.
let rustc_span::SourceFile {
Expand Down Expand Up @@ -1605,6 +1591,9 @@ impl<'a, 'tcx> CrateMetadataRef<'a> {
normalized_pos,
start_pos,
end_pos,
source_file_index
.try_into()
.expect("cannot import more than U32_MAX files"),
);
debug!(
"CrateMetaData::imported_source_files alloc \
Expand Down
96 changes: 49 additions & 47 deletions compiler/rustc_metadata/src/rmeta/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ use rustc_hir::definitions::DefPathData;
use rustc_hir::intravisit::{self, Visitor};
use rustc_hir::lang_items;
use rustc_hir::{AnonConst, GenericParamKind};
use rustc_index::bit_set::GrowableBitSet;
use rustc_middle::hir::nested_filter;
use rustc_middle::middle::dependency_format::Linkage;
use rustc_middle::middle::exported_symbols::{
Expand Down Expand Up @@ -66,13 +65,10 @@ pub(super) struct EncodeContext<'a, 'tcx> {
// The indices (into the `SourceMap`'s `MonotonicVec`)
// of all of the `SourceFiles` that we need to serialize.
// When we serialize a `Span`, we insert the index of its
// `SourceFile` into the `GrowableBitSet`.
//
// This needs to be a `GrowableBitSet` and not a
// regular `BitSet` because we may actually import new `SourceFiles`
// during metadata encoding, due to executing a query
// with a result containing a foreign `Span`.
required_source_files: Option<GrowableBitSet<usize>>,
// `SourceFile` into the `FxIndexSet`.
// The order inside the `FxIndexSet` is used as on-disk
// order of `SourceFiles`, and encoded inside `Span`s.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it worth making this order sensitive like that? I could imagine some kind of poor effects on deterministic metadata contents -- maybe we can emit in order or even sorted by (remapped) file path?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The order of the SourceFiles here should be exactly the order in which spans are encoded. Therefore, a change in this order can only be caused by a change in encoded spans. As we can't really know which SourceFiles we will need in the end, I don't see how we can pre-sort them.

required_source_files: Option<FxIndexSet<usize>>,
is_proc_macro: bool,
hygiene_ctxt: &'a HygieneEncodeContext,
}
Expand Down Expand Up @@ -245,10 +241,6 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for Span {
return TAG_PARTIAL_SPAN.encode(s);
}

let source_files = s.required_source_files.as_mut().expect("Already encoded SourceMap!");
// Record the fact that we need to encode the data for this `SourceFile`
source_files.insert(s.source_file_cache.1);

// There are two possible cases here:
// 1. This span comes from a 'foreign' crate - e.g. some crate upstream of the
// crate we are writing metadata for. When the metadata for *this* crate gets
Expand All @@ -265,30 +257,38 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for Span {
// if we're a proc-macro crate.
// This allows us to avoid loading the dependencies of proc-macro crates: all of
// the information we need to decode `Span`s is stored in the proc-macro crate.
let (tag, lo, hi) = if s.source_file_cache.0.is_imported() && !s.is_proc_macro {
// To simplify deserialization, we 'rebase' this span onto the crate it originally came from
// (the crate that 'owns' the file it references. These rebased 'lo' and 'hi' values
// are relative to the source map information for the 'foreign' crate whose CrateNum
// we write into the metadata. This allows `imported_source_files` to binary
// search through the 'foreign' crate's source map information, using the
// deserialized 'lo' and 'hi' values directly.
//
// All of this logic ensures that the final result of deserialization is a 'normal'
// Span that can be used without any additional trouble.
let external_start_pos = {
// Introduce a new scope so that we drop the 'lock()' temporary
match &*s.source_file_cache.0.external_src.lock() {
ExternalSource::Foreign { original_start_pos, .. } => *original_start_pos,
src => panic!("Unexpected external source {:?}", src),
}
};
let lo = (span.lo - s.source_file_cache.0.start_pos) + external_start_pos;
let hi = (span.hi - s.source_file_cache.0.start_pos) + external_start_pos;
let (tag, lo, hi, metadata_index) =
if s.source_file_cache.0.is_imported() && !s.is_proc_macro {
// To simplify deserialization, we 'rebase' this span onto the crate it originally came from
// (the crate that 'owns' the file it references. These rebased 'lo' and 'hi' values
// are relative to the source map information for the 'foreign' crate whose CrateNum
// we write into the metadata. This allows `imported_source_files` to binary
// search through the 'foreign' crate's source map information, using the
// deserialized 'lo' and 'hi' values directly.
//
// All of this logic ensures that the final result of deserialization is a 'normal'
// Span that can be used without any additional trouble.
let (external_start_pos, metadata_index) = {
// Introduce a new scope so that we drop the 'lock()' temporary
match &*s.source_file_cache.0.external_src.lock() {
ExternalSource::Foreign { original_start_pos, metadata_index, .. } => {
(*original_start_pos, *metadata_index as usize)
}
src => panic!("Unexpected external source {:?}", src),
}
};
let lo = (span.lo - s.source_file_cache.0.start_pos) + external_start_pos;
let hi = (span.hi - s.source_file_cache.0.start_pos) + external_start_pos;

(TAG_VALID_SPAN_FOREIGN, lo, hi)
} else {
(TAG_VALID_SPAN_LOCAL, span.lo, span.hi)
};
(TAG_VALID_SPAN_FOREIGN, lo, hi, metadata_index)
} else {
// Record the fact that we need to encode the data for this `SourceFile`
let source_files =
s.required_source_files.as_mut().expect("Already encoded SourceMap!");
let (source_file_index, _) = source_files.insert_full(s.source_file_cache.1);

(TAG_VALID_SPAN_LOCAL, span.lo, span.hi, source_file_index)
};

tag.encode(s);
lo.encode(s);
Expand All @@ -298,6 +298,9 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for Span {
let len = hi - lo;
len.encode(s);

// Encode the index of the `SourceFile` for the span, in order to make decoding faster.
metadata_index.encode(s);

if tag == TAG_VALID_SPAN_FOREIGN {
// This needs to be two lines to avoid holding the `s.source_file_cache`
// while calling `cnum.encode(s)`
Expand Down Expand Up @@ -460,18 +463,17 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {

let working_directory = &self.tcx.sess.opts.working_dir;

let adapted = all_source_files
// Only serialize `SourceFile`s that were used during the encoding of a `Span`.
//
// The order in which we encode source files is important here: the on-disk format for
// `Span` contains the index of the corresponding `SourceFile`.
let adapted = required_source_files
.iter()
.enumerate()
.filter(|(idx, source_file)| {
// Only serialize `SourceFile`s that were used
// during the encoding of a `Span`
required_source_files.contains(*idx) &&
// Don't serialize imported `SourceFile`s, unless
// we're in a proc-macro crate.
(!source_file.is_imported() || self.is_proc_macro)
})
.map(|(_, source_file)| {
.map(|&source_file_index| &all_source_files[source_file_index])
.map(|source_file| {
// Don't serialize imported `SourceFile`s, unless we're in a proc-macro crate.
assert!(!source_file.is_imported() || self.is_proc_macro);

// At export time we expand all source file paths to absolute paths because
// downstream compilation sessions can have a different compiler working
// directory, so relative paths from this or any other upstream crate
Expand Down Expand Up @@ -2228,7 +2230,7 @@ fn encode_metadata_impl(tcx: TyCtxt<'_>, path: &Path) {

let source_map_files = tcx.sess.source_map().files();
let source_file_cache = (source_map_files[0].clone(), 0);
let required_source_files = Some(GrowableBitSet::with_capacity(source_map_files.len()));
let required_source_files = Some(FxIndexSet::default());
drop(source_map_files);

let hygiene_ctxt = HygieneEncodeContext::default();
Expand Down
2 changes: 2 additions & 0 deletions compiler/rustc_span/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1098,6 +1098,8 @@ pub enum ExternalSource {
original_start_pos: BytePos,
/// The end of this SourceFile within the source_map of its original crate.
original_end_pos: BytePos,
/// Index of the file inside metadata.
metadata_index: u32,
},
}

Expand Down
2 changes: 2 additions & 0 deletions compiler/rustc_span/src/source_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,7 @@ impl SourceMap {
mut file_local_normalized_pos: Vec<NormalizedPos>,
original_start_pos: BytePos,
original_end_pos: BytePos,
metadata_index: u32,
) -> Lrc<SourceFile> {
let start_pos = self
.allocate_address_space(source_len)
Expand Down Expand Up @@ -383,6 +384,7 @@ impl SourceMap {
kind: ExternalSourceKind::AbsentOk,
original_start_pos,
original_end_pos,
metadata_index,
}),
start_pos,
end_pos,
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_span/src/source_map/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@ fn t10() {
normalized_pos,
start_pos,
end_pos,
0,
);

assert!(
Expand Down