Skip to content

Commit afae684

Browse files
committed
prepare for re-encoding each pack object
1 parent e851cbe commit afae684

File tree

8 files changed

+58
-13
lines changed

8 files changed

+58
-13
lines changed

Diff for: git-odb/src/pack/bundle.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ impl Bundle {
7777
pub fn verify_checksums<P, C>(
7878
&self,
7979
thread_limit: Option<usize>,
80+
mode: pack::index::verify::Mode,
8081
progress: Option<P>,
8182
make_cache: impl Fn() -> C + Send + Sync,
8283
) -> Result<(owned::Id, Option<pack::index::verify::Outcome>), pack::index::verify::Error>
@@ -86,7 +87,7 @@ impl Bundle {
8687
C: pack::cache::DecodeEntry,
8788
{
8889
self.index
89-
.verify_checksum_of_index(Some(&self.pack), thread_limit, progress, make_cache)
90+
.verify_checksum_of_index(Some(&self.pack), thread_limit, mode, progress, make_cache)
9091
}
9192
}
9293

Diff for: git-odb/src/pack/data/decoded.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ pub struct Entry {
1515
pub header: Header,
1616
/// The decompressed size of the object in bytes
1717
pub decompressed_size: u64,
18-
/// absolute offset to compressed object data in the pack
18+
/// absolute offset to compressed object data in the pack, just behind the header
1919
pub data_offset: u64,
2020
}
2121

Diff for: git-odb/src/pack/index/verify.rs

+15-4
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,17 @@ pub struct Outcome {
7676
pub pack_size: u64,
7777
}
7878

79+
/// Various ways in which a pack and index can be verified
80+
#[derive(Debug, Eq, PartialEq, Hash, Clone, Copy)]
81+
pub enum Mode {
82+
/// Validate SHA1 and CRC32
83+
Sha1CRC32,
84+
/// Validate SHA1 and CRC32, and decode each non-Blob object
85+
Sha1CRC32Decode,
86+
/// Validate SHA1 and CRC32, and decode and encode each non-Blob object
87+
Sha1CRC32DecodeEncode,
88+
}
89+
7990
/// Verify and validate the content of the index file
8091
impl index::File {
8192
pub fn checksum_of_index(&self) -> owned::Id {
@@ -94,6 +105,7 @@ impl index::File {
94105
&self,
95106
pack: Option<&pack::data::File>,
96107
thread_limit: Option<usize>,
108+
mode: Mode,
97109
progress: Option<P>,
98110
make_cache: impl Fn() -> C + Send + Sync,
99111
) -> Result<(owned::Id, Option<Outcome>), Error>
@@ -281,10 +293,9 @@ impl index::File {
281293
});
282294
}
283295
if let Some(desired_crc32) = index_entry.crc32 {
284-
let actual_crc32 = pack.entry_crc32(
285-
index_entry.pack_offset,
286-
(pack_entry_data_offset - index_entry.pack_offset) as usize + consumed_input,
287-
);
296+
let header_size = (pack_entry_data_offset - index_entry.pack_offset) as usize;
297+
let actual_crc32 =
298+
pack.entry_crc32(index_entry.pack_offset, header_size + consumed_input);
288299
if actual_crc32 != desired_crc32 {
289300
return Err(Error::Crc32Mismatch {
290301
actual: actual_crc32,

Diff for: git-odb/tests/pack/index.rs

+12-4
Original file line numberDiff line numberDiff line change
@@ -155,8 +155,14 @@ fn pack_lookup() {
155155
assert_eq!(pack.kind(), pack::data::Kind::V2);
156156
assert_eq!(pack.num_objects(), idx.num_objects());
157157
assert_eq!(
158-
idx.verify_checksum_of_index(Some(&pack), None, Discard.into(), || DecodeEntryNoop)
159-
.unwrap(),
158+
idx.verify_checksum_of_index(
159+
Some(&pack),
160+
None,
161+
index::verify::Mode::Sha1CRC32DecodeEncode,
162+
Discard.into(),
163+
|| DecodeEntryNoop
164+
)
165+
.unwrap(),
160166
(idx.checksum_of_index(), Some(stats.to_owned()))
161167
);
162168
for idx_entry in idx.iter() {
@@ -199,8 +205,10 @@ fn iter() {
199205
assert_eq!(idx.version(), *version);
200206
assert_eq!(idx.num_objects(), *num_objects);
201207
assert_eq!(
202-
idx.verify_checksum_of_index(None, None, Discard.into(), || DecodeEntryNoop)
203-
.unwrap(),
208+
idx.verify_checksum_of_index(None, None, index::verify::Mode::Sha1CRC32Decode, Discard.into(), || {
209+
DecodeEntryNoop
210+
})
211+
.unwrap(),
204212
(idx.checksum_of_index(), None)
205213
);
206214
assert_eq!(idx.checksum_of_index(), hex_to_id(index_checksum));

Diff for: gitoxide-core/src/lib.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,15 @@ pub struct Context<W1: io::Write, W2: io::Write> {
5151
/// Otherwise, usually use as many threads as there are logical cores.
5252
/// A value of 0 is interpreted as no-limit
5353
pub thread_limit: Option<usize>,
54+
pub mode: index::verify::Mode,
5455
}
5556

5657
impl Default for Context<Vec<u8>, Vec<u8>> {
5758
fn default() -> Self {
5859
Context {
5960
output_statistics: None,
6061
thread_limit: None,
62+
mode: VerifyMode::Sha1CRC32,
6163
out: Vec::new(),
6264
err: Vec::new(),
6365
}
@@ -95,6 +97,7 @@ pub fn verify_pack_or_pack_index<P, W1, W2>(
9597
Context {
9698
mut out,
9799
mut err,
100+
mode,
98101
output_statistics,
99102
thread_limit,
100103
}: Context<W1, W2>,
@@ -132,7 +135,7 @@ where
132135
Err(e)
133136
})
134137
.ok();
135-
idx.verify_checksum_of_index(pack.as_ref(), thread_limit, progress, || -> EitherCache {
138+
idx.verify_checksum_of_index(pack.as_ref(), thread_limit, mode, progress, || -> EitherCache {
136139
if output_statistics.is_some() {
137140
// turn off acceleration as we need to see entire chains all the time
138141
EitherCache::Left(pack::cache::DecodeEntryNoop)

Diff for: src/plumbing/lean.rs

+16
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,22 @@ mod options {
1010
/// print the program version.
1111
pub version: bool,
1212

13+
#[argh(switch)]
14+
/// Decode and parse tags, commits and trees to validate their correctness beyond hashing correctly.
15+
///
16+
/// Malformed objects should not usually occur, but could be injected on purpose or accident.
17+
/// This will reduce overall performance.
18+
pub decode: bool,
19+
20+
#[argh(switch)]
21+
/// Decode and parse tags, commits and trees to validate their correctness, and re-encode them.
22+
///
23+
/// This flag is primarily to test the implementation of encoding, and requires to decode the object first.
24+
/// Encoding an object after decoding it should yield exactly the same bytes.
25+
/// This will reduce overall performance even more, as re-encoding requires to transform zero-copy objects into
26+
/// owned objects, causing plenty of allocation to occour.
27+
pub re_encode: bool,
28+
1329
#[argh(option, short = 't')]
1430
/// the amount of threads to use for some operations.
1531
///

Diff for: tasks.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,16 @@
66
* [ ] ~~support streamed objects (similar to how it's done with loose objects)~~ - no need, all slices support io::Read, and we don't
77
actually support streaming, so let's net unify 'interfaces' on a low level like this.
88
* **owned objects**
9-
* [ ] encode object
9+
* [x] encode object
1010
* [x] blob
1111
* [x] tag
1212
* [x] tree
1313
* [x] commit
14-
* [ ] write loose
1514
* **pack verify**
1615
* add '--some-flag' to run every non-blob through a decode/encode cycle to see if all objects can be parsed
1716
* add that to the stress test
1817
* **plumbing - explode pack**
18+
* [ ] write loose object
1919
* [ ] single threaded
2020
* [ ] multi-threaded
2121
* [ ] progress

Diff for: tests/stateless-journey.sh

+6
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,12 @@ title "CLI ${kind}"
6161
expect_run $SUCCESSFULLY "$exe_plumbing" verify-pack --statistics "$PACK_INDEX_FILE"
6262
}
6363
)
64+
(with "decode"
65+
it "verifies the pack index successfully and with desired output, and decodes all objects" && {
66+
WITH_SNAPSHOT="$snapshot/plumbing-verify-pack-index-success" \
67+
expect_run $SUCCESSFULLY "$exe_plumbing" verify-pack --decode "$PACK_INDEX_FILE"
68+
}
69+
)
6470
if test "$kind" = "max"; then
6571
(with "statistics (JSON)"
6672
it "verifies the pack index successfully and with desired output" && {

0 commit comments

Comments
 (0)