Skip to content

Commit ad3c803

Browse files
committed
Merge branch 'index-verification'
2 parents a605b67 + afdeca1 commit ad3c803

File tree

26 files changed

+532
-130
lines changed

26 files changed

+532
-130
lines changed

Diff for: Cargo.lock

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Diff for: README.md

+3
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ Please see _'Development Status'_ for a listing of all crates and their capabili
4242
* [x] **verify** - assure that a commit-graph is consistent
4343
* **repository**
4444
* **verify** - validate a whole repository, for now only the object database.
45+
* **index**
46+
* [x] **entries** - show detailed entry information for human or machine consumption (via JSON)
47+
* [x] **info** - display general information about the index itself, with detailed extension information by default
4548
* **remote**
4649
* [ref-list](https://asciinema.org/a/359320) - list all (or given) references from a remote at the given URL
4750

Diff for: git-index/Cargo.toml

+2-1
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,10 @@ internal-testing-to-avoid-being-run-by-cargo-test-all = []
3030
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
3131

3232
[dependencies]
33-
git-features = { version = "^0.19.1", path = "../git-features", features = ["rustsha1"] }
33+
git-features = { version = "^0.19.1", path = "../git-features", features = ["rustsha1", "progress"] }
3434
git-hash = { version = "^0.9.1", path = "../git-hash" }
3535
git-bitmap = { version = "^0.0.1", path = "../git-bitmap" }
36+
git-object = { version = "^0.17.0", path = "../git-object" }
3637

3738
quick-error = "2.0.0"
3839
memmap2 = "0.5.0"

Diff for: git-index/src/access.rs

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
use crate::{extension, Entry, State, Version};
2+
3+
impl State {
4+
pub fn version(&self) -> Version {
5+
self.version
6+
}
7+
8+
pub fn entries(&self) -> &[Entry] {
9+
&self.entries
10+
}
11+
pub fn tree(&self) -> Option<&extension::Tree> {
12+
self.tree.as_ref()
13+
}
14+
pub fn link(&self) -> Option<&extension::Link> {
15+
self.link.as_ref()
16+
}
17+
pub fn resolve_undo(&self) -> Option<&extension::resolve_undo::Paths> {
18+
self.resolve_undo.as_ref()
19+
}
20+
pub fn untracked(&self) -> Option<&extension::UntrackedCache> {
21+
self.untracked.as_ref()
22+
}
23+
pub fn fs_monitor(&self) -> Option<&extension::FsMonitor> {
24+
self.fs_monitor.as_ref()
25+
}
26+
}

Diff for: git-index/src/entry.rs

+24
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,9 @@ impl Flags {
114114
#[derive(PartialEq, Eq, Hash, Ord, PartialOrd, Clone, Copy)]
115115
#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
116116
pub struct Time {
117+
/// The amount of seconds elapsed since EPOCH
117118
pub secs: u32,
119+
/// The amount of nanoseconds elapsed in the current second, ranging from 0 to 999.999.999 .
118120
pub nsecs: u32,
119121
}
120122

@@ -140,5 +142,27 @@ mod access {
140142
pub fn path<'a>(&self, state: &'a State) -> &'a BStr {
141143
(&state.path_backing[self.path.clone()]).as_bstr()
142144
}
145+
146+
pub fn stage(&self) -> u32 {
147+
self.flags.stage()
148+
}
149+
}
150+
}
151+
152+
mod _impls {
153+
use std::cmp::Ordering;
154+
155+
use crate::{Entry, State};
156+
157+
impl Entry {
158+
pub fn cmp(&self, other: &Self, state: &State) -> Ordering {
159+
let lhs = self.path(state);
160+
let rhs = other.path(state);
161+
let common_len = lhs.len().min(rhs.len());
162+
lhs[..common_len]
163+
.cmp(&rhs[..common_len])
164+
.then_with(|| lhs.len().cmp(&rhs.len()))
165+
.then_with(|| self.stage().cmp(&other.stage()))
166+
}
143167
}
144168
}

Diff for: git-index/src/extension/mod.rs

+8-3
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,11 @@ pub struct Iter<'a> {
1616
/// if there was no change to them. Portions of this tree are invalidated as the index is changed.
1717
pub struct Tree {
1818
pub name: SmallVec<[u8; 23]>,
19-
/// Only set if there are any entries in the index we are associated with.
20-
pub id: Option<tree::NodeId>,
19+
/// The id of the directory tree of the associated tree object.
20+
pub id: git_hash::ObjectId,
21+
/// The amount of non-tree items in this directory tree, including sub-trees, recursively.
22+
/// The value of the top-level tree is thus equal to the value of the total amount of entries.
23+
pub num_entries: u32,
2124
pub children: Vec<Tree>,
2225
}
2326

@@ -26,6 +29,7 @@ pub struct Link {
2629
pub bitmaps: Option<link::Bitmaps>,
2730
}
2831

32+
#[allow(dead_code)]
2933
pub struct UntrackedCache {
3034
/// Something identifying the location and machine that this cache is for.
3135
/// Should the repository be copied to a different machine, the entire cache can immediately be invalidated.
@@ -42,6 +46,7 @@ pub struct UntrackedCache {
4246
directories: Vec<untracked_cache::Directory>,
4347
}
4448

49+
#[allow(dead_code)]
4550
pub struct FsMonitor {
4651
token: fs_monitor::Token,
4752
/// if a bit is true, the resepctive entry is NOT valid as per the fs monitor.
@@ -54,7 +59,7 @@ pub(crate) mod fs_monitor;
5459

5560
pub(crate) mod decode;
5661

57-
pub(crate) mod tree;
62+
pub mod tree;
5863

5964
pub(crate) mod end_of_index_entry;
6065

Diff for: git-index/src/extension/resolve_undo.rs

+2
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use crate::{
88

99
pub type Paths = Vec<ResolvePath>;
1010

11+
#[allow(dead_code)]
1112
pub struct ResolvePath {
1213
/// relative to the root of the repository, or what would be stored in the index
1314
name: BString,
@@ -16,6 +17,7 @@ pub struct ResolvePath {
1617
stages: [Option<Stage>; 3],
1718
}
1819

20+
#[allow(dead_code)]
1921
pub struct Stage {
2022
mode: u32,
2123
id: ObjectId,

Diff for: git-index/src/extension/tree.rs

+136-21
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,136 @@
1+
use std::cmp::Ordering;
2+
3+
use bstr::ByteSlice;
14
use git_hash::ObjectId;
25

36
use crate::{
47
extension::{Signature, Tree},
5-
util::split_at_byte_exclusive,
8+
util::{split_at_byte_exclusive, split_at_pos},
69
};
710

811
pub const SIGNATURE: Signature = *b"TREE";
912

10-
pub struct NodeId {
11-
/// The id of the directory tree of the associated tree object.
12-
pub id: git_hash::ObjectId,
13-
/// The amount of non-tree entries contained within, and definitely not zero.
14-
pub entry_count: u32,
13+
pub mod verify {
14+
use bstr::BString;
15+
use quick_error::quick_error;
16+
17+
quick_error! {
18+
#[derive(Debug)]
19+
pub enum Error {
20+
MissingTreeDirectory { parent_id: git_hash::ObjectId, entry_id: git_hash::ObjectId, name: BString } {
21+
display("The entry {} at path '{}' in parent tree {} wasn't found in the nodes children, making it incomplete", entry_id, name, parent_id)
22+
}
23+
TreeNodeNotFound { oid: git_hash::ObjectId } {
24+
display("The tree with id {} wasn't found in the object database", oid)
25+
}
26+
TreeNodeChildcountMismatch { oid: git_hash::ObjectId, expected_childcount: usize, actual_childcount: usize } {
27+
display("The tree with id {} should have {} children, but its cached representation had {} of them", oid, expected_childcount, actual_childcount)
28+
}
29+
RootWithName { name: BString } {
30+
display("The root tree was named '{}', even though it should be empty", name)
31+
}
32+
EntriesCount {actual: u32, expected: u32 } {
33+
display("Expected not more than {} entries to be reachable from the top-level, but actual count was {}", expected, actual)
34+
}
35+
OutOfOrder { parent_id: git_hash::ObjectId, current_path: BString, previous_path: BString } {
36+
display("Parent tree '{}' contained out-of order trees prev = '{}' and next = '{}'", parent_id, previous_path, current_path)
37+
}
38+
}
39+
}
40+
}
41+
42+
impl Tree {
43+
pub fn verify<F>(&self, use_find: bool, mut find: F) -> Result<(), verify::Error>
44+
where
45+
F: for<'a> FnMut(&git_hash::oid, &'a mut Vec<u8>) -> Option<git_object::TreeRefIter<'a>>,
46+
{
47+
fn verify_recursive<F>(
48+
parent_id: git_hash::ObjectId,
49+
children: &[Tree],
50+
mut find_buf: Option<&mut Vec<u8>>,
51+
find: &mut F,
52+
) -> Result<Option<u32>, verify::Error>
53+
where
54+
F: for<'a> FnMut(&git_hash::oid, &'a mut Vec<u8>) -> Option<git_object::TreeRefIter<'a>>,
55+
{
56+
if children.is_empty() {
57+
return Ok(None);
58+
}
59+
let mut entries = 0;
60+
let mut prev = None::<&Tree>;
61+
for child in children {
62+
entries += child.num_entries;
63+
if let Some(prev) = prev {
64+
if prev.name.cmp(&child.name) != Ordering::Less {
65+
return Err(verify::Error::OutOfOrder {
66+
parent_id,
67+
previous_path: prev.name.as_bstr().into(),
68+
current_path: child.name.as_bstr().into(),
69+
});
70+
}
71+
}
72+
prev = Some(child);
73+
}
74+
if let Some(buf) = find_buf.as_mut() {
75+
let tree_entries = find(&parent_id, *buf).ok_or(verify::Error::TreeNodeNotFound { oid: parent_id })?;
76+
let mut num_entries = 0;
77+
for entry in tree_entries
78+
.filter_map(Result::ok)
79+
.filter(|e| e.mode == git_object::tree::EntryMode::Tree)
80+
{
81+
children
82+
.binary_search_by(|e| e.name.as_bstr().cmp(entry.filename))
83+
.map_err(|_| verify::Error::MissingTreeDirectory {
84+
parent_id,
85+
entry_id: entry.oid.to_owned(),
86+
name: entry.filename.to_owned(),
87+
})?;
88+
num_entries += 1;
89+
}
90+
91+
if num_entries != children.len() {
92+
return Err(verify::Error::TreeNodeChildcountMismatch {
93+
oid: parent_id,
94+
expected_childcount: num_entries,
95+
actual_childcount: children.len(),
96+
});
97+
}
98+
}
99+
for child in children {
100+
// This is actually needed here as it's a mut ref, which isn't copy. We do a re-borrow here.
101+
#[allow(clippy::needless_option_as_deref)]
102+
let actual_num_entries = verify_recursive(child.id, &child.children, find_buf.as_deref_mut(), find)?;
103+
if let Some(actual) = actual_num_entries {
104+
if actual > child.num_entries {
105+
return Err(verify::Error::EntriesCount {
106+
actual,
107+
expected: child.num_entries,
108+
});
109+
}
110+
}
111+
}
112+
Ok(entries.into())
113+
}
114+
115+
if !self.name.is_empty() {
116+
return Err(verify::Error::RootWithName {
117+
name: self.name.as_bstr().into(),
118+
});
119+
}
120+
121+
let mut buf = Vec::new();
122+
let declared_entries = verify_recursive(self.id, &self.children, use_find.then(|| &mut buf), &mut find)?;
123+
if let Some(actual) = declared_entries {
124+
if actual > self.num_entries {
125+
return Err(verify::Error::EntriesCount {
126+
actual,
127+
expected: self.num_entries,
128+
});
129+
}
130+
}
131+
132+
Ok(())
133+
}
15134
}
16135

17136
/// A recursive data structure
@@ -29,32 +148,28 @@ pub fn one_recursive(data: &[u8], hash_len: usize) -> Option<(Tree, &[u8])> {
29148
let (path, data) = split_at_byte_exclusive(data, 0)?;
30149

31150
let (entry_count, data) = split_at_byte_exclusive(data, b' ')?;
32-
let entry_count: u32 = atoi::atoi(entry_count)?;
151+
let num_entries: u32 = atoi::atoi(entry_count)?;
33152

34-
let (subtree_count, mut data) = split_at_byte_exclusive(data, b'\n')?;
153+
let (subtree_count, data) = split_at_byte_exclusive(data, b'\n')?;
35154
let subtree_count: usize = atoi::atoi(subtree_count)?;
36155

37-
let node_id = (entry_count != 0)
38-
.then(|| {
39-
(data.len() >= hash_len).then(|| {
40-
let (hash, rest) = data.split_at(hash_len);
41-
data = rest;
42-
ObjectId::from(hash)
43-
})
44-
})
45-
.flatten()
46-
.map(|id| NodeId { id, entry_count });
156+
let (hash, mut data) = split_at_pos(data, hash_len)?;
157+
let id = ObjectId::from(hash);
47158

48159
let mut subtrees = Vec::with_capacity(subtree_count);
49160
for _ in 0..subtree_count {
50161
let (tree, rest) = one_recursive(data, hash_len)?;
51-
subtrees.push(tree);
162+
match subtrees.binary_search_by(|t: &Tree| t.name.cmp(&tree.name)) {
163+
Ok(_existing_index) => return None,
164+
Err(insert_position) => subtrees.insert(insert_position, tree),
165+
}
52166
data = rest;
53167
}
54168

55169
Some((
56170
Tree {
57-
id: node_id,
171+
id,
172+
num_entries,
58173
name: path.into(),
59174
children: subtrees,
60175
},
@@ -68,6 +183,6 @@ mod tests {
68183

69184
#[test]
70185
fn size_of_tree() {
71-
assert_eq!(std::mem::size_of::<Tree>(), 88);
186+
assert_eq!(std::mem::size_of::<Tree>(), 80);
72187
}
73188
}

Diff for: git-index/src/file.rs

+2-46
Original file line numberDiff line numberDiff line change
@@ -18,49 +18,5 @@ mod impls {
1818
}
1919
}
2020

21-
pub mod init {
22-
#![allow(unused)]
23-
24-
use std::path::{Path, PathBuf};
25-
26-
use memmap2::Mmap;
27-
28-
use crate::{decode, extension, File, State};
29-
30-
mod error {
31-
use quick_error::quick_error;
32-
33-
quick_error! {
34-
#[derive(Debug)]
35-
pub enum Error {
36-
Io(err: std::io::Error) {
37-
display("An IO error occurred while opening the index")
38-
source(err)
39-
from()
40-
}
41-
Decode(err: crate::decode::Error) {
42-
display("The file could not be decoded")
43-
source(err)
44-
from()
45-
}
46-
}
47-
}
48-
}
49-
pub use error::Error;
50-
51-
impl File {
52-
pub fn at(path: impl Into<PathBuf>, options: decode::Options) -> Result<Self, Error> {
53-
let path = path.into();
54-
let (data, mtime) = {
55-
// SAFETY: we have to take the risk of somebody changing the file underneath. Git never writes into the same file.
56-
let file = std::fs::File::open(&path)?;
57-
#[allow(unsafe_code)]
58-
let data = unsafe { Mmap::map(&file)? };
59-
(data, filetime::FileTime::from_last_modification_time(&file.metadata()?))
60-
};
61-
62-
let (state, checksum) = State::from_bytes(&data, mtime, options)?;
63-
Ok(File { state, path, checksum })
64-
}
65-
}
66-
}
21+
pub mod init;
22+
pub mod verify;

0 commit comments

Comments
 (0)