Skip to content

Commit 0416666

Browse files
committed
Support for skipping various safety checks during traversal
1 parent 06638d0 commit 0416666

File tree

4 files changed

+107
-56
lines changed

4 files changed

+107
-56
lines changed

git-odb/src/pack/index/traverse/indexed.rs

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use super::{Error, Reducer};
1+
use super::{Error, Reducer, SafetyCheck};
22
use crate::{
33
pack,
44
pack::index::access::PackOffset,
@@ -14,6 +14,7 @@ use std::collections::BTreeMap;
1414
impl index::File {
1515
pub(crate) fn traverse_with_index_lookup<P, Processor>(
1616
&self,
17+
check: SafetyCheck,
1718
thread_limit: Option<usize>,
1819
new_processor: impl Fn() -> Processor + Send + Sync,
1920
mut root: P,
@@ -145,6 +146,7 @@ impl index::File {
145146

146147
let shared_cache = &mut SharedCache(&mut cache);
147148
let mut stat = self.process_entry_dispatch(
149+
check,
148150
pack,
149151
shared_cache,
150152
buf,
@@ -163,7 +165,7 @@ impl index::File {
163165

164166
Ok(stats)
165167
},
166-
Reducer::from_progress(&reduce_progress, pack.data_len()),
168+
Reducer::from_progress(&reduce_progress, pack.data_len(), check),
167169
)
168170
}
169171
}

git-odb/src/pack/index/traverse/lookup.rs

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use super::{Error, Reducer};
1+
use super::{Error, Reducer, SafetyCheck};
22
use crate::pack::{self, data::decode, index, index::util};
33
use git_features::{
44
parallel::{self, in_parallel_if},
@@ -9,6 +9,7 @@ use git_features::{
99
impl index::File {
1010
pub(crate) fn traverse_with_lookup<P, C, Processor>(
1111
&self,
12+
check: SafetyCheck,
1213
thread_limit: Option<usize>,
1314
new_processor: impl Fn() -> Processor + Send + Sync,
1415
make_cache: impl Fn() -> C + Send + Sync,
@@ -61,6 +62,7 @@ impl index::File {
6162
let mut header_buf = [0u8; 64];
6263
for index_entry in entries.iter() {
6364
stats.push(self.process_entry_dispatch(
65+
check,
6466
pack,
6567
cache,
6668
buf,
@@ -73,7 +75,7 @@ impl index::File {
7375
}
7476
Ok(stats)
7577
},
76-
Reducer::from_progress(&reduce_progress, pack.data_len()),
78+
Reducer::from_progress(&reduce_progress, pack.data_len(), check),
7779
)
7880
}
7981
}

git-odb/src/pack/index/traverse/mod.rs

+78-46
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ pub struct Outcome {
6363
pub pack_size: u64,
6464
}
6565

66-
#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)]
66+
#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone, Copy)]
6767
#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
6868
pub enum SafetyCheck {
6969
/// Don't verify the validity of the checksums stored in the index and pack file
@@ -75,13 +75,36 @@ pub enum SafetyCheck {
7575
/// All of the above, and only log object decode errors.
7676
///
7777
/// Useful if there is a damaged pack and you would like to traverse as many objects as possible.
78-
SkipFileAndObjectChecksumVerificationNoAbortOnDecodeError,
78+
SkipFileAndObjectChecksumVerificationAndNoAbortOnDecodeError,
7979

8080
/// Perform all available safety checks before operating on the pack and
8181
/// abort if any of them fails
8282
All,
8383
}
8484

85+
impl SafetyCheck {
86+
pub fn file_checksum(&self) -> bool {
87+
match self {
88+
SafetyCheck::All => true,
89+
_ => false,
90+
}
91+
}
92+
pub fn object_checksum(&self) -> bool {
93+
match self {
94+
SafetyCheck::All | SafetyCheck::SkipFileChecksumVerification => true,
95+
_ => false,
96+
}
97+
}
98+
pub fn fatal_decode_error(&self) -> bool {
99+
match self {
100+
SafetyCheck::All
101+
| SafetyCheck::SkipFileChecksumVerification
102+
| SafetyCheck::SkipFileAndObjectChecksumVerification => true,
103+
SafetyCheck::SkipFileAndObjectChecksumVerificationAndNoAbortOnDecodeError => false,
104+
}
105+
}
106+
}
107+
85108
impl Default for SafetyCheck {
86109
fn default() -> Self {
87110
SafetyCheck::All
@@ -136,7 +159,7 @@ impl index::File {
136159
Context {
137160
algorithm,
138161
thread_limit,
139-
check: _,
162+
check,
140163
}: Context,
141164
progress: Option<P>,
142165
new_processor: impl Fn() -> Processor + Send + Sync,
@@ -156,38 +179,45 @@ impl index::File {
156179
{
157180
let mut root = progress::DoOrDiscard::from(progress);
158181

159-
let progress = root.add_child("Sha1 of index");
160-
let verify_self = move || self.verify_checksum(progress);
182+
let id = if check.file_checksum() {
183+
let progress = root.add_child("Sha1 of index");
184+
let verify_self = move || self.verify_checksum(progress);
161185

162-
if self.pack_checksum() != pack.checksum() {
163-
return Err(Error::PackMismatch {
164-
actual: pack.checksum(),
165-
expected: self.pack_checksum(),
166-
});
167-
}
168-
let mut progress = root.add_child("Sha1 of pack");
169-
let (pack_res, id) = parallel::join(
170-
move || {
171-
let throughput = TimeThroughput::new(pack.data_len());
172-
let res = pack.verify_checksum();
173-
progress.done(throughput);
174-
res
175-
},
176-
verify_self,
177-
);
178-
pack_res?;
179-
let id = id?;
186+
if self.pack_checksum() != pack.checksum() {
187+
return Err(Error::PackMismatch {
188+
actual: pack.checksum(),
189+
expected: self.pack_checksum(),
190+
});
191+
}
192+
let mut progress = root.add_child("Sha1 of pack");
193+
let (pack_res, id) = parallel::join(
194+
move || {
195+
let throughput = TimeThroughput::new(pack.data_len());
196+
let res = pack.verify_checksum();
197+
progress.done(throughput);
198+
res
199+
},
200+
verify_self,
201+
);
202+
pack_res?;
203+
id?
204+
} else {
205+
self.index_checksum()
206+
};
180207

181208
match algorithm {
182-
Algorithm::Lookup => self.traverse_with_lookup(thread_limit, new_processor, make_cache, root, pack),
183-
Algorithm::DeltaTreeLookup => self.traverse_with_index_lookup(thread_limit, new_processor, root, pack),
209+
Algorithm::Lookup => self.traverse_with_lookup(check, thread_limit, new_processor, make_cache, root, pack),
210+
Algorithm::DeltaTreeLookup => {
211+
self.traverse_with_index_lookup(check, thread_limit, new_processor, root, pack)
212+
}
184213
}
185214
.map(|stats| (id, stats))
186215
}
187216

188217
#[allow(clippy::too_many_arguments)]
189218
pub(crate) fn process_entry_dispatch<C, P>(
190219
&self,
220+
check: SafetyCheck,
191221
pack: &pack::data::File,
192222
cache: &mut C,
193223
buf: &mut Vec<u8>,
@@ -223,32 +253,34 @@ impl index::File {
223253
let object_kind = entry_stats.kind;
224254
let consumed_input = entry_stats.compressed_size;
225255

226-
let header_size = crate::loose::object::header::encode(object_kind, buf.len() as u64, &mut header_buf[..])
227-
.expect("header buffer to be big enough");
228-
let mut hasher = git_features::hash::Sha1::default();
229-
hasher.update(&header_buf[..header_size]);
230-
hasher.update(buf.as_slice());
256+
if check.object_checksum() {
257+
let header_size = crate::loose::object::header::encode(object_kind, buf.len() as u64, &mut header_buf[..])
258+
.expect("header buffer to be big enough");
259+
let mut hasher = git_features::hash::Sha1::default();
260+
hasher.update(&header_buf[..header_size]);
261+
hasher.update(buf.as_slice());
231262

232-
let actual_oid = owned::Id::new_sha1(hasher.digest());
233-
if actual_oid != index_entry.oid {
234-
return Err(Error::PackObjectMismatch {
235-
actual: actual_oid,
236-
expected: index_entry.oid,
237-
offset: index_entry.pack_offset,
238-
kind: object_kind,
239-
});
240-
}
241-
if let Some(desired_crc32) = index_entry.crc32 {
242-
let header_size = (pack_entry_data_offset - index_entry.pack_offset) as usize;
243-
let actual_crc32 = pack.entry_crc32(index_entry.pack_offset, header_size + consumed_input);
244-
if actual_crc32 != desired_crc32 {
245-
return Err(Error::Crc32Mismatch {
246-
actual: actual_crc32,
247-
expected: desired_crc32,
263+
let actual_oid = owned::Id::new_sha1(hasher.digest());
264+
if actual_oid != index_entry.oid {
265+
return Err(Error::PackObjectMismatch {
266+
actual: actual_oid,
267+
expected: index_entry.oid,
248268
offset: index_entry.pack_offset,
249269
kind: object_kind,
250270
});
251271
}
272+
if let Some(desired_crc32) = index_entry.crc32 {
273+
let header_size = (pack_entry_data_offset - index_entry.pack_offset) as usize;
274+
let actual_crc32 = pack.entry_crc32(index_entry.pack_offset, header_size + consumed_input);
275+
if actual_crc32 != desired_crc32 {
276+
return Err(Error::Crc32Mismatch {
277+
actual: actual_crc32,
278+
expected: desired_crc32,
279+
offset: index_entry.pack_offset,
280+
kind: object_kind,
281+
});
282+
}
283+
}
252284
}
253285
processor(object_kind, buf.as_slice(), &index_entry, &entry_stats, progress)?;
254286
Ok(entry_stats)

git-odb/src/pack/index/traverse/reduce.rs

+21-6
Original file line numberDiff line numberDiff line change
@@ -19,19 +19,25 @@ fn div_decode_result(lhs: &mut decode::Outcome, div: usize) {
1919
}
2020

2121
pub struct Reducer<'a, P> {
22-
pub progress: &'a std::sync::Mutex<P>,
23-
pub then: Instant,
24-
pub entries_seen: u32,
25-
pub stats: traverse::Outcome,
22+
progress: &'a std::sync::Mutex<P>,
23+
check: traverse::SafetyCheck,
24+
then: Instant,
25+
entries_seen: u32,
26+
stats: traverse::Outcome,
2627
}
2728

2829
impl<'a, P> Reducer<'a, P>
2930
where
3031
P: Progress,
3132
{
32-
pub fn from_progress(progress: &'a std::sync::Mutex<P>, pack_data_len_in_bytes: usize) -> Self {
33+
pub fn from_progress(
34+
progress: &'a std::sync::Mutex<P>,
35+
pack_data_len_in_bytes: usize,
36+
check: traverse::SafetyCheck,
37+
) -> Self {
3338
Reducer {
3439
progress: &progress,
40+
check,
3541
then: Instant::now(),
3642
entries_seen: 0,
3743
stats: traverse::Outcome {
@@ -55,7 +61,16 @@ where
5561
type Error = traverse::Error;
5662

5763
fn feed(&mut self, input: Self::Input) -> Result<(), Self::Error> {
58-
let chunk_stats: Vec<_> = input?;
64+
let chunk_stats: Vec<_> = match input {
65+
Err(err @ traverse::Error::PackDecode(_, _, _)) if !self.check.fatal_decode_error() => {
66+
self.progress
67+
.lock()
68+
.unwrap()
69+
.fail(format!("Ignoring decode error: {}", err));
70+
return Ok(());
71+
}
72+
res => res,
73+
}?;
5974
self.entries_seen += chunk_stats.len() as u32;
6075

6176
let chunk_total = chunk_stats.into_iter().fold(

0 commit comments

Comments
 (0)