Skip to content

Commit 404889d

Browse files
committed
Auto merge of #11044 - Eh2406:file_hash, r=weihanglo
Cache index files based on contents hash Since #10507 Cargo has known how to read registry cached files whose index version starts with the hash of the file contents. Git makes it very cheap to determine the hash of a file. This PR switches cargo to start writing the new format. Cargoes from before #10507 will not know how to read, and therefore overwrite, cached files written by Cargos after this PR. Cargos after this PR can still read, and will consider up-to-date cached files written by all older Cargos. As I'm writing this out I'm thinking that there may not be any point in writing a file that has both. An alternative implementation just writes the file contents hash. 🤔
2 parents 9ecc7f8 + e24222e commit 404889d

File tree

1 file changed

+12
-18
lines changed

1 file changed

+12
-18
lines changed

src/cargo/sources/registry/remote.rs

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -163,10 +163,9 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> {
163163
// Older versions of Cargo used the single value of the hash of the HEAD commit as a `index_version`.
164164
// This is technically correct but a little too conservative. If a new commit is fetched all cached
165165
// files need to be regenerated even if a particular file was not changed.
166-
// Cargo now reads the `index_version` in two parts the cache file is considered valid if `index_version`
167-
// ends with the hash of the HEAD commit OR if it starts with the hash of the file's contents.
168-
// In the future cargo can write cached files with `index_version` = `git_file_hash + ":" + `git_commit_hash`,
169-
// but for now it still uses `git_commit_hash` to be compatible with older Cargoes.
166+
// However if an old cargo has written such a file we still know how to read it, as long as we check for that hash value.
167+
//
168+
// Cargo now uses a hash of the file's contents as provided by git.
170169
fn load(
171170
&mut self,
172171
_root: &Path,
@@ -178,10 +177,9 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> {
178177
}
179178
// Check if the cache is valid.
180179
let git_commit_hash = self.current_version();
181-
if let (Some(c), Some(i)) = (git_commit_hash, index_version) {
182-
if i.ends_with(c.as_str()) {
183-
return Poll::Ready(Ok(LoadResponse::CacheValid));
184-
}
180+
if index_version.is_some() && index_version == git_commit_hash.as_deref() {
181+
// This file was written by an old version of cargo, but it is still up-to-date.
182+
return Poll::Ready(Ok(LoadResponse::CacheValid));
185183
}
186184
// Note that the index calls this method and the filesystem is locked
187185
// in the index, so we don't need to worry about an `update_index`
@@ -190,18 +188,16 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> {
190188
registry: &RemoteRegistry<'_>,
191189
path: &Path,
192190
index_version: Option<&str>,
193-
git_commit_hash: Option<&str>,
194191
) -> CargoResult<LoadResponse> {
195192
let repo = registry.repo()?;
196193
let tree = registry.tree()?;
197194
let entry = tree.get_path(path);
198195
let entry = entry?;
199-
let git_file_hash = entry.id().to_string();
196+
let git_file_hash = Some(entry.id().to_string());
200197

201-
if let Some(i) = index_version {
202-
if i.starts_with(git_file_hash.as_str()) {
203-
return Ok(LoadResponse::CacheValid);
204-
}
198+
// Check if the cache is valid.
199+
if index_version.is_some() && index_version == git_file_hash.as_deref() {
200+
return Ok(LoadResponse::CacheValid);
205201
}
206202

207203
let object = entry.to_object(repo)?;
@@ -212,13 +208,11 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> {
212208

213209
Ok(LoadResponse::Data {
214210
raw_data: blob.content().to_vec(),
215-
index_version: git_commit_hash.map(String::from),
216-
// TODO: When the reading code has been stable for long enough (Say 8/2022)
217-
// change to `git_file_hash + ":" + git_commit_hash`
211+
index_version: git_file_hash,
218212
})
219213
}
220214

221-
match load_helper(&self, path, index_version, git_commit_hash.as_deref()) {
215+
match load_helper(&self, path, index_version) {
222216
Ok(result) => Poll::Ready(Ok(result)),
223217
Err(_) if !self.updated => {
224218
// If git returns an error and we haven't updated the repo, return

0 commit comments

Comments
 (0)