Skip to content

Commit 53d835a

Browse files
committed
support for loose object statistics in odb store (#287)
1 parent 3dfec81 commit 53d835a

File tree

4 files changed

+120
-21
lines changed

4 files changed

+120
-21
lines changed

Diff for: git-odb/src/store_impls/dynamic/verify.rs

+76-13
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
use crate::pack;
2+
use crate::store::verify::integrity::{IndexStatistics, SingleOrMultiStatistics};
23
use crate::types::IndexAndPacks;
34
use git_features::progress::Progress;
45
use std::ops::Deref;
56
use std::sync::atomic::{AtomicBool, Ordering};
67

7-
#[allow(missing_docs, unused)]
8-
98
///
109
pub mod integrity {
1110
use crate::pack;
11+
use std::path::PathBuf;
1212

1313
/// Returned by [`Store::verify_integrity()`][crate::Store::verify_integrity()].
1414
#[derive(Debug, thiserror::Error)]
@@ -21,6 +21,8 @@ pub mod integrity {
2121
#[error(transparent)]
2222
IndexOpen(#[from] pack::index::init::Error),
2323
#[error(transparent)]
24+
LooseObjectStoreIntegrity(#[from] crate::loose::verify::integrity::Error),
25+
#[error(transparent)]
2426
MultiIndexOpen(#[from] pack::multi_index::init::Error),
2527
#[error(transparent)]
2628
PackOpen(#[from] pack::data::init::Error),
@@ -30,10 +32,41 @@ pub mod integrity {
3032
NeedsRetryDueToChangeOnDisk,
3133
}
3234

35+
#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)]
36+
#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
37+
/// Integrity information about loose object databases
38+
pub struct LooseObjectStatistics {
39+
/// The path to the root directory of the loose objects database
40+
pub path: PathBuf,
41+
/// The statistics created after verifying the loose object database.
42+
pub statistics: crate::loose::verify::integrity::Statistics,
43+
}
44+
45+
#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)]
46+
#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
47+
/// Traversal statistics of packs governed by single indices or multi-pack indices.
48+
#[allow(missing_docs)]
49+
pub enum SingleOrMultiStatistics {
50+
Single(pack::index::traverse::Statistics),
51+
Multi(Vec<(PathBuf, pack::index::traverse::Statistics)>),
52+
}
53+
54+
/// Statistics gathered when traversing packs of various kinds of indices.
55+
#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)]
56+
#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
57+
pub struct IndexStatistics {
58+
/// The path to the index or multi-pack index for which statics were gathered.
59+
pub path: PathBuf,
60+
/// The actual statistics for the index at `path`.
61+
pub statistics: SingleOrMultiStatistics,
62+
}
63+
3364
/// Returned by [`Store::verify_integrity()`][crate::Store::verify_integrity()].
3465
pub struct Outcome<P> {
35-
/// Pack traversal statistics for each pack whose objects were checked.
36-
pub pack_traverse_statistics: Vec<pack::index::traverse::Statistics>,
66+
/// Statistics for validated loose object stores.
67+
pub loose_object_stores: Vec<LooseObjectStatistics>,
68+
/// Pack traversal statistics for each index and their pack(s)
69+
pub index_statistics: Vec<IndexStatistics>,
3770
/// The provided progress instance.
3871
pub progress: P,
3972
}
@@ -104,11 +137,14 @@ impl super::Store {
104137
progress.add_child("Checking integrity"),
105138
should_interrupt,
106139
)?;
107-
statistics.push(
108-
outcome
109-
.pack_traverse_statistics
110-
.expect("pack provided so there are stats"),
111-
);
140+
statistics.push(IndexStatistics {
141+
path: bundle.index.path().to_owned(),
142+
statistics: SingleOrMultiStatistics::Single(
143+
outcome
144+
.pack_traverse_statistics
145+
.expect("pack provided so there are stats"),
146+
),
147+
});
112148
}
113149
IndexAndPacks::MultiIndex(bundle) => {
114150
let index;
@@ -124,18 +160,45 @@ impl super::Store {
124160
should_interrupt,
125161
options.clone(),
126162
)?;
127-
statistics.extend(outcome.pack_traverse_statistics);
163+
164+
let index_dir = bundle.multi_index.path().parent().expect("file in a directory");
165+
statistics.push(IndexStatistics {
166+
path: Default::default(),
167+
statistics: SingleOrMultiStatistics::Multi(
168+
outcome
169+
.pack_traverse_statistics
170+
.into_iter()
171+
.zip(index.index_names())
172+
.map(|(statistics, index_name)| (index_dir.join(index_name), statistics))
173+
.collect(),
174+
),
175+
});
128176
}
129177
}
130178
progress.inc();
131179
}
132180

133-
for _loose_db in &*index.loose_dbs {
134-
// TODO: impl verify integrity for loose object databases
181+
progress.init(
182+
Some(index.loose_dbs.len()),
183+
git_features::progress::count("loose object stores"),
184+
);
185+
let mut loose_object_stores = Vec::new();
186+
for loose_db in &*index.loose_dbs {
187+
let out = loose_db
188+
.verify_integrity(
189+
progress.add_child(loose_db.path().display().to_string()),
190+
should_interrupt,
191+
)
192+
.map(|statistics| integrity::LooseObjectStatistics {
193+
path: loose_db.path().to_owned(),
194+
statistics,
195+
})?;
196+
loose_object_stores.push(out);
135197
}
136198

137199
Ok(integrity::Outcome {
138-
pack_traverse_statistics: statistics,
200+
loose_object_stores,
201+
index_statistics: statistics,
139202
progress,
140203
})
141204
}

Diff for: git-odb/src/store_impls/loose/verify.rs

+22-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
use crate::loose::Store;
22
use crate::Write;
3+
use git_features::progress::Progress;
4+
use std::sync::atomic::{AtomicBool, Ordering};
35

46
///
57
pub mod integrity {
@@ -21,22 +23,32 @@ pub mod integrity {
2123
},
2224
#[error("Objects were deleted during iteration - try again")]
2325
Retry,
26+
#[error("Interrupted")]
27+
Interrupted,
2428
}
2529

2630
/// The outcome returned by [`verify_integrity()`][super::Store::verify_integrity()].
27-
pub struct Outcome {
31+
#[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)]
32+
#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
33+
pub struct Statistics {
2834
/// The amount of loose objects we checked.
2935
pub num_objects: usize,
3036
}
3137
}
3238

3339
impl Store {
3440
/// Check all loose objects for their integrity checking their hash matches the actual data and by decoding them fully.
35-
pub fn verify_integrity(&self) -> Result<integrity::Outcome, integrity::Error> {
41+
pub fn verify_integrity(
42+
&self,
43+
mut progress: impl Progress,
44+
should_interrupt: &AtomicBool,
45+
) -> Result<integrity::Statistics, integrity::Error> {
3646
let mut buf = Vec::new();
37-
let mut num_objects = 0;
3847
let sink = crate::sink(self.object_hash);
3948

49+
let mut num_objects = 0;
50+
let mut progress = progress.add_child("validating");
51+
progress.init(None, git_features::progress::count("objects"));
4052
for id in self.iter().filter_map(Result::ok) {
4153
let object = self
4254
.try_find(id, &mut buf)
@@ -55,8 +67,14 @@ impl Store {
5567
kind: object.kind,
5668
id,
5769
})?;
70+
71+
progress.inc();
5872
num_objects += 1;
73+
if should_interrupt.load(Ordering::SeqCst) {
74+
return Err(integrity::Error::Interrupted);
75+
}
5976
}
60-
Ok(integrity::Outcome { num_objects })
77+
78+
Ok(integrity::Statistics { num_objects })
6179
}
6280
}

Diff for: git-odb/tests/odb/store/dynamic.rs

+19-3
Original file line numberDiff line numberDiff line change
@@ -571,6 +571,7 @@ fn auto_refresh_with_and_without_id_stability() -> crate::Result {
571571
mod verify {
572572
use crate::store::dynamic::db;
573573
use git_features::progress;
574+
use git_testtools::fixture_path;
574575
use std::sync::atomic::AtomicBool;
575576

576577
#[test]
@@ -580,10 +581,25 @@ mod verify {
580581
.store_ref()
581582
.verify_integrity(progress::Discard, &AtomicBool::new(false), Default::default())
582583
.unwrap();
584+
assert_eq!(outcome.index_statistics.len(), 3, "there are only three packs to check");
583585
assert_eq!(
584-
outcome.pack_traverse_statistics.len(),
585-
3,
586-
"there are only three packs to check"
586+
outcome.index_statistics[0].path,
587+
fixture_path("objects/pack/pack-c0438c19fb16422b6bbcce24387b3264416d485b.idx")
588+
);
589+
assert_eq!(
590+
outcome.index_statistics[1].path,
591+
fixture_path("objects/pack/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.idx")
592+
);
593+
assert_eq!(
594+
outcome.index_statistics[2].path,
595+
fixture_path("objects/pack/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.idx")
596+
);
597+
assert_eq!(
598+
outcome.loose_object_stores,
599+
vec![git_odb::store::verify::integrity::LooseObjectStatistics {
600+
path: fixture_path("objects"),
601+
statistics: git_odb::loose::verify::integrity::Statistics { num_objects: 7 }
602+
}]
587603
);
588604

589605
assert_eq!(

Diff for: git-odb/tests/odb/store/loose.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
use git_actor::{Sign, Time};
22
use git_object::bstr::ByteSlice;
3+
use std::sync::atomic::AtomicBool;
34

5+
use git_features::progress;
46
use git_odb::loose::Store;
57
use pretty_assertions::assert_eq;
68

@@ -35,7 +37,7 @@ pub fn locate_oid(id: git_hash::ObjectId, buf: &mut Vec<u8>) -> git_object::Data
3537
#[test]
3638
fn verify_integrity() {
3739
let db = ldb();
38-
let outcome = db.verify_integrity().unwrap();
40+
let outcome = db.verify_integrity(progress::Discard, &AtomicBool::new(false)).unwrap();
3941
assert_eq!(outcome.num_objects, 7);
4042
}
4143

0 commit comments

Comments
 (0)