-
Notifications
You must be signed in to change notification settings - Fork 13.4k
compute and cache HIR hashes at beginning #35854
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
6bd80d1
0595a3a
226e1e5
484da37
f923083
c42e0a3
ea2d90e
c21fa64
1cc7c90
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,106 +8,122 @@ | |
// option. This file may not be copied, modified, or distributed | ||
// except according to those terms. | ||
|
||
//! Calculation of a Strict Version Hash for crates. For a length | ||
//! comment explaining the general idea, see `librustc/middle/svh.rs`. | ||
|
||
//! Calculation of the (misnamed) "strict version hash" for crates and | ||
//! items. This hash is used to tell when the HIR changed in such a | ||
//! way that results from previous compilations may no longer be | ||
//! applicable and hence must be recomputed. It should probably be | ||
//! renamed to the ICH (incremental compilation hash). | ||
//! | ||
//! The hashes for all items are computed once at the beginning of | ||
//! compilation and stored into a map. In addition, a hash is computed | ||
//! of the **entire crate**. | ||
//! | ||
//! Storing the hashes in a map avoids the need to compute them twice | ||
//! (once when loading prior incremental results and once when | ||
//! saving), but it is also important for correctness: at least as of | ||
//! the time of this writing, the typeck passes rewrites entries in | ||
//! the dep-map in-place to accommodate UFCS resolutions. Since name | ||
//! resolution is part of the hash, the result is that hashes computed | ||
//! at the end of compilation would be different from those computed | ||
//! at the beginning. | ||
|
||
use syntax::ast; | ||
use syntax::attr::AttributeMethods; | ||
use std::hash::{Hash, SipHasher, Hasher}; | ||
use rustc::dep_graph::DepNode; | ||
use rustc::hir; | ||
use rustc::hir::def_id::{CRATE_DEF_INDEX, DefId}; | ||
use rustc::hir::map::{NodeItem, NodeForeignItem}; | ||
use rustc::hir::svh::Svh; | ||
use rustc::hir::intravisit as visit; | ||
use rustc::ty::TyCtxt; | ||
use rustc::hir::intravisit::{self, Visitor}; | ||
use rustc_data_structures::fnv::FnvHashMap; | ||
|
||
use self::svh_visitor::StrictVersionHashVisitor; | ||
|
||
mod svh_visitor; | ||
|
||
pub trait SvhCalculate { | ||
/// Calculate the SVH for an entire krate. | ||
fn calculate_krate_hash(self) -> Svh; | ||
pub type HashesMap = FnvHashMap<DepNode<DefId>, u64>; | ||
|
||
/// Calculate the SVH for a particular item. | ||
fn calculate_item_hash(self, def_id: DefId) -> u64; | ||
pub fn compute_hashes_map<'a, 'tcx: 'a>(tcx: TyCtxt<'a, 'tcx, 'tcx>) -> HashesMap { | ||
let _ignore = tcx.dep_graph.in_ignore(); | ||
let krate = tcx.map.krate(); | ||
let mut visitor = HashItemsVisitor { tcx: tcx, hashes: FnvHashMap() }; | ||
visitor.calculate_def_id(DefId::local(CRATE_DEF_INDEX), |v| visit::walk_crate(v, krate)); | ||
krate.visit_all_items(&mut visitor); | ||
visitor.compute_crate_hash(); | ||
visitor.hashes | ||
} | ||
|
||
impl<'a, 'tcx> SvhCalculate for TyCtxt<'a, 'tcx, 'tcx> { | ||
fn calculate_krate_hash(self) -> Svh { | ||
// FIXME (#14132): This is better than it used to be, but it still not | ||
// ideal. We now attempt to hash only the relevant portions of the | ||
// Crate AST as well as the top-level crate attributes. (However, | ||
// the hashing of the crate attributes should be double-checked | ||
// to ensure it is not incorporating implementation artifacts into | ||
// the hash that are not otherwise visible.) | ||
struct HashItemsVisitor<'a, 'tcx: 'a> { | ||
tcx: TyCtxt<'a, 'tcx, 'tcx>, | ||
hashes: HashesMap, | ||
} | ||
|
||
let crate_disambiguator = self.sess.local_crate_disambiguator(); | ||
let krate = self.map.krate(); | ||
impl<'a, 'tcx> HashItemsVisitor<'a, 'tcx> { | ||
fn calculate_node_id<W>(&mut self, id: ast::NodeId, walk_op: W) | ||
where W: for<'v> FnMut(&mut StrictVersionHashVisitor<'v, 'tcx>) | ||
{ | ||
let def_id = self.tcx.map.local_def_id(id); | ||
self.calculate_def_id(def_id, walk_op) | ||
} | ||
|
||
// FIXME: this should use SHA1, not SipHash. SipHash is not built to | ||
// avoid collisions. | ||
fn calculate_def_id<W>(&mut self, def_id: DefId, mut walk_op: W) | ||
where W: for<'v> FnMut(&mut StrictVersionHashVisitor<'v, 'tcx>) | ||
{ | ||
assert!(def_id.is_local()); | ||
debug!("HashItemsVisitor::calculate(def_id={:?})", def_id); | ||
// FIXME: this should use SHA1, not SipHash. SipHash is not | ||
// built to avoid collisions. | ||
let mut state = SipHasher::new(); | ||
debug!("state: {:?}", state); | ||
walk_op(&mut StrictVersionHashVisitor::new(&mut state, self.tcx)); | ||
let item_hash = state.finish(); | ||
self.hashes.insert(DepNode::Hir(def_id), item_hash); | ||
debug!("calculate_item_hash: def_id={:?} hash={:?}", def_id, item_hash); | ||
} | ||
|
||
fn compute_crate_hash(&mut self) { | ||
let krate = self.tcx.map.krate(); | ||
|
||
// FIXME(#32753) -- at (*) we `to_le` for endianness, but is | ||
// this enough, and does it matter anyway? | ||
"crate_disambiguator".hash(&mut state); | ||
crate_disambiguator.len().to_le().hash(&mut state); // (*) | ||
crate_disambiguator.hash(&mut state); | ||
let mut crate_state = SipHasher::new(); | ||
|
||
debug!("crate_disambiguator: {:?}", crate_disambiguator); | ||
debug!("state: {:?}", state); | ||
let crate_disambiguator = self.tcx.sess.local_crate_disambiguator(); | ||
"crate_disambiguator".hash(&mut crate_state); | ||
crate_disambiguator.len().hash(&mut crate_state); | ||
crate_disambiguator.hash(&mut crate_state); | ||
|
||
// add each item (in some deterministic order) to the overall | ||
// crate hash. | ||
// | ||
// FIXME -- it'd be better to sort by the hash of the def-path, | ||
// so that reordering items would not affect the crate hash. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't ordering by hash value have the same effect? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Hmm, interesting. I imagine so -- of course, I'm not doing that. Perhaps I should. (In fact, not hashing the def-paths is probably just wrong, since I guess in theory one could have two items w/ distinct def-paths but the same hash and I would not notice). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, that's true, you're ordering by There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yeah, I was just reproducing the older behavior. |
||
{ | ||
let mut visit = StrictVersionHashVisitor::new(&mut state, self); | ||
krate.visit_all_items(&mut visit); | ||
let mut keys: Vec<_> = self.hashes.keys().collect(); | ||
keys.sort(); | ||
for key in keys { | ||
self.hashes[key].hash(&mut crate_state); | ||
} | ||
} | ||
|
||
// FIXME (#14132): This hash is still sensitive to e.g. the | ||
// spans of the crate Attributes and their underlying | ||
// MetaItems; we should make ContentHashable impl for those | ||
// types and then use hash_content. But, since all crate | ||
// attributes should appear near beginning of the file, it is | ||
// not such a big deal to be sensitive to their spans for now. | ||
// | ||
// We hash only the MetaItems instead of the entire Attribute | ||
// to avoid hashing the AttrId | ||
for attr in &krate.attrs { | ||
debug!("krate attr {:?}", attr); | ||
attr.meta().hash(&mut state); | ||
attr.meta().hash(&mut crate_state); | ||
} | ||
|
||
Svh::new(state.finish()) | ||
let crate_hash = crate_state.finish(); | ||
self.hashes.insert(DepNode::Krate, crate_hash); | ||
debug!("calculate_crate_hash: crate_hash={:?}", crate_hash); | ||
} | ||
} | ||
|
||
fn calculate_item_hash(self, def_id: DefId) -> u64 { | ||
assert!(def_id.is_local()); | ||
|
||
debug!("calculate_item_hash(def_id={:?})", def_id); | ||
|
||
let mut state = SipHasher::new(); | ||
|
||
{ | ||
let mut visit = StrictVersionHashVisitor::new(&mut state, self); | ||
if def_id.index == CRATE_DEF_INDEX { | ||
// the crate root itself is not registered in the map | ||
// as an item, so we have to fetch it this way | ||
let krate = self.map.krate(); | ||
intravisit::walk_crate(&mut visit, krate); | ||
} else { | ||
let node_id = self.map.as_local_node_id(def_id).unwrap(); | ||
match self.map.find(node_id) { | ||
Some(NodeItem(item)) => visit.visit_item(item), | ||
Some(NodeForeignItem(item)) => visit.visit_foreign_item(item), | ||
r => bug!("calculate_item_hash: expected an item for node {} not {:?}", | ||
node_id, r), | ||
} | ||
} | ||
} | ||
|
||
let hash = state.finish(); | ||
|
||
debug!("calculate_item_hash: def_id={:?} hash={:?}", def_id, hash); | ||
impl<'a, 'tcx> visit::Visitor<'tcx> for HashItemsVisitor<'a, 'tcx> { | ||
fn visit_item(&mut self, item: &'tcx hir::Item) { | ||
self.calculate_node_id(item.id, |v| v.visit_item(item)); | ||
visit::walk_item(self, item); | ||
} | ||
|
||
hash | ||
fn visit_foreign_item(&mut self, item: &'tcx hir::ForeignItem) { | ||
self.calculate_node_id(item.id, |v| v.visit_foreign_item(item)); | ||
visit::walk_foreign_item(self, item); | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd prefer a less generic name than
hashes_map
here and above.incr_comp_hashes
oric_hashes_map
orich_map
, something that makes it visible that this has to do with incr. comp.