Skip to content

Commit ee45c7f

Browse files
committed
prepare full 'verify' implementation
1 parent 0a33b24 commit ee45c7f

File tree

6 files changed

+68
-28
lines changed

6 files changed

+68
-28
lines changed

Diff for: git-odb/src/pack/data/decode.rs

+4
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,10 @@ impl File {
363363
// FIXME(Performance) If delta-chains are uneven, we know we will have to copy bytes over here
364364
// Instead we could use a different start buffer, to naturally end up with the result in the
365365
// right one.
366+
// However, this is a bit more complicated than just that - you have to deal with the base
367+
// object, which should also be placed in the second buffer right away. You don't have that
368+
// control/knowledge for out-of-pack bases, so this is a special case to deal with, too.
369+
// Maybe these invariants can be represented in the type system though.
366370
if chain.len() % 2 == 1 {
367371
// this seems inverted, but remember: we swapped the buffers on the last iteration
368372
target_buf[..last_result_size].copy_from_slice(&source_buf[..last_result_size]);

Diff for: gitoxide-core/src/pack/explode.rs

+42-22
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,7 @@ use anyhow::{anyhow, Result};
22
use git_features::progress::{self, Progress};
33
use git_object::{owned, HashKind};
44
use git_odb::{loose, pack, Write};
5-
use std::{
6-
fs,
7-
io::{self, Read},
8-
path::{Path, PathBuf},
9-
};
5+
use std::{fs, io::Read, path::Path};
106

117
#[derive(PartialEq, Debug)]
128
pub enum SafetyCheck {
@@ -79,8 +75,11 @@ quick_error! {
7975
ObjectEncodeMismatch(kind: git_object::Kind, actual: owned::Id, expected: owned::Id) {
8076
display("{} object {} wasn't re-encoded without change - new hash is {}", kind, expected, actual)
8177
}
82-
RemoveFile(err: io::Error, index: PathBuf, data: PathBuf) {
83-
display("Failed to delete pack index file at '{} or data file at '{}'", index.display(), data.display())
78+
WrittenFileMissing(id: owned::Id) {
79+
display("The recently written file for loose object {} could not be found", id)
80+
}
81+
WrittenFileCorrupt(err: loose::db::locate::Error, id: owned::Id) {
82+
display("The recently written file for loose object {} cold not be read", id)
8483
source(err)
8584
}
8685
}
@@ -124,10 +123,12 @@ impl OutputWriter {
124123
}
125124
}
126125

126+
#[derive(Default)]
127127
pub struct Context {
128128
pub thread_limit: Option<usize>,
129129
pub delete_pack: bool,
130130
pub sink_compress: bool,
131+
pub verify: bool,
131132
}
132133

133134
pub fn pack_or_pack_index<P>(
@@ -139,6 +140,7 @@ pub fn pack_or_pack_index<P>(
139140
thread_limit,
140141
delete_pack,
141142
sink_compress,
143+
verify,
142144
}: Context,
143145
) -> Result<()>
144146
where
@@ -183,22 +185,34 @@ where
183185
{
184186
let object_path = object_path.map(|p| p.as_ref().to_owned());
185187
move || {
186-
let out = OutputWriter::new(object_path.clone(), sink_compress);
187-
move |object_kind, buf, index_entry, _entry_stats, progress| {
188-
let written_id = out
189-
.write_buf(object_kind, buf, HashKind::Sha1)
190-
.map_err(|err| Error::Write(Box::new(err) as Box<dyn std::error::Error + Send + Sync>, object_kind, index_entry.oid))
191-
.map_err(|err| Box::new(err) as Box<dyn std::error::Error + Send + Sync>)?;
192-
if written_id != index_entry.oid {
193-
if let git_object::Kind::Tree = object_kind {
194-
progress.info(format!("The tree in pack named {} was written as {} due to modes 100664 and 100640 rewritten as 100644.", index_entry.oid, written_id));
195-
} else {
196-
return Err(Box::new(Error::ObjectEncodeMismatch(object_kind, index_entry.oid, written_id)))
197-
}
188+
let out = OutputWriter::new(object_path.clone(), sink_compress);
189+
let object_verifier = if verify {
190+
object_path.as_ref().map(|p| loose::Db::at(p))
191+
} else {
192+
None
193+
};
194+
move |object_kind, buf, index_entry, _entry_stats, progress| {
195+
let written_id = out
196+
.write_buf(object_kind, buf, HashKind::Sha1)
197+
.map_err(|err| Error::Write(Box::new(err) as Box<dyn std::error::Error + Send + Sync>, object_kind, index_entry.oid))
198+
.map_err(|err| Box::new(err) as Box<dyn std::error::Error + Send + Sync>)?;
199+
if written_id != index_entry.oid {
200+
if let git_object::Kind::Tree = object_kind {
201+
progress.info(format!("The tree in pack named {} was written as {} due to modes 100664 and 100640 rewritten as 100644.", index_entry.oid, written_id));
202+
} else {
203+
return Err(Box::new(Error::ObjectEncodeMismatch(object_kind, index_entry.oid, written_id)))
204+
}
205+
}
206+
if let Some(verifier) = object_verifier.as_ref() {
207+
let obj = verifier.locate(written_id.to_borrowed())
208+
.ok_or_else(|| Error::WrittenFileMissing(written_id))?
209+
.map_err(|err| Error::WrittenFileCorrupt(err, written_id))?;
210+
// obj.checksum_matches(written_id);
211+
}
212+
Ok(())
198213
}
199-
Ok(())
200214
}
201-
}},
215+
},
202216
pack::cache::DecodeEntryLRU::default,
203217
).map(|(_,_,c)|progress::DoOrDiscard::from(c)).with_context(|| "Failed to explode the entire pack - some loose objects may have been created nonetheless")?;
204218

@@ -208,7 +222,13 @@ where
208222
if delete_pack {
209223
fs::remove_file(&index_path)
210224
.and_then(|_| fs::remove_file(&data_path))
211-
.map_err(|err| Error::RemoveFile(err, index_path.clone(), data_path.clone()))?;
225+
.with_context(|| {
226+
format!(
227+
"Failed to delete pack index file at '{} or data file at '{}'",
228+
index_path.display(),
229+
data_path.display()
230+
)
231+
})?;
212232
progress.info(format!(
213233
"Removed '{}' and '{}'",
214234
index_path.display(),

Diff for: src/plumbing/lean.rs

+8
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@ mod options {
3434
#[derive(FromArgs, PartialEq, Debug)]
3535
#[argh(subcommand, name = "pack-explode")]
3636
pub struct PackExplode {
37+
#[argh(switch)]
38+
/// read written objects back and assert they match their source. Fail the operation otherwise.
39+
///
40+
/// Only relevant if an object directory is set.
41+
pub verify: bool,
42+
3743
/// delete the pack and index file after the operation is successful
3844
#[argh(switch)]
3945
pub delete_pack: bool,
@@ -147,6 +153,7 @@ pub fn main() -> Result<()> {
147153
sink_compress,
148154
object_path,
149155
verbose,
156+
verify,
150157
check,
151158
delete_pack,
152159
}) => {
@@ -160,6 +167,7 @@ pub fn main() -> Result<()> {
160167
thread_limit,
161168
delete_pack,
162169
sink_compress,
170+
verify,
163171
},
164172
)
165173
}

Diff for: src/plumbing/pretty.rs

+10-2
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,13 @@ mod options {
3030
/// Verify the integrity of a pack or index file
3131
#[structopt(setting = AppSettings::ColoredHelp)]
3232
PackExplode {
33-
/// Delete the pack and index file after the operation is successful
33+
#[structopt(long, requires("object_path"))]
34+
/// Read written objects back and assert they match their source. Fail the operation otherwise.
35+
///
36+
/// Only relevant if an object directory is set.
37+
verify: bool,
38+
39+
/// delete the pack and index file after the operation is successful
3440
#[structopt(long)]
3541
delete_pack: bool,
3642

@@ -48,7 +54,7 @@ mod options {
4854
/// This helps to determine overhead related to compression. If unset, the sink will
4955
/// only create hashes from bytes, which is usually limited by the speed at which input
5056
/// can be obtained.
51-
#[structopt(long)]
57+
#[structopt(long, conflicts_with("object_path"))]
5258
sink_compress: bool,
5359

5460
/// Display verbose messages and progress information
@@ -236,6 +242,7 @@ pub fn main() -> Result<()> {
236242
delete_pack,
237243
pack_path,
238244
object_path,
245+
verify,
239246
} => prepare_and_run(
240247
"pack-explode",
241248
verbose,
@@ -251,6 +258,7 @@ pub fn main() -> Result<()> {
251258
thread_limit,
252259
delete_pack,
253260
sink_compress,
261+
verify,
254262
},
255263
)
256264
},

Diff for: tasks.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
* [x] to disk
4141
* [x] progress
4242
* [x] option to compress sink input too
43-
* [ ] unrelated: see if delta-decode buffer optimization can work easily
43+
* [x] unrelated: see if delta-decode buffer optimization can work easily
4444
* [ ] --verify
4545
* [ ] statistics
4646

Diff for: tests/stateless-journey.sh

+3-3
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ title "CLI ${kind}"
7272
(with "and all safety checks"
7373
it "does not explode the file at all" && {
7474
WITH_SNAPSHOT="$snapshot/plumbing-broken-pack-explode-delete-pack-to-sink-failure" \
75-
expect_run $WITH_FAILURE "$exe_plumbing" pack-explode --check all --delete-pack "${PACK_FILE}.pack"
75+
expect_run $WITH_FAILURE "$exe_plumbing" pack-explode --sink-compress --check all --delete-pack "${PACK_FILE}.pack"
7676
}
7777

7878
it "did not touch index or pack file" && {
@@ -84,8 +84,8 @@ title "CLI ${kind}"
8484
(with "and no safety checks at all (and an output directory)"
8585
it "does explode the file" && {
8686
WITH_SNAPSHOT="$snapshot/plumbing-broken-pack-explode-delete-pack-to-sink-skip-checks-success" \
87-
expect_run $SUCCESSFULLY "$exe_plumbing" pack-explode --check skip-file-and-object-checksum-and-no-abort-on-decode \
88-
--delete-pack --sink-compress "${PACK_FILE}.pack" .
87+
expect_run $SUCCESSFULLY "$exe_plumbing" pack-explode --verify --check skip-file-and-object-checksum-and-no-abort-on-decode \
88+
--delete-pack "${PACK_FILE}.pack" .
8989
}
9090

9191
it "removes the original files" && {

0 commit comments

Comments
 (0)