Skip to content

Commit 805f432

Browse files
committed
Merge branch 'index-write-refactor'
2 parents 0f8680a + 3af5121 commit 805f432

33 files changed

+431
-323
lines changed

Diff for: Cargo.lock

+9-8
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Diff for: git-index/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ bstr = { version = "0.2.13", default-features = false }
4545
serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"] }
4646
smallvec = "1.7.0"
4747
atoi = "1.0.0"
48+
itoa = "1.0.3"
4849
bitflags = "1.3.2"
4950

5051
document-features = { version = "0.2.0", optional = true }

Diff for: git-index/src/decode/header.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ pub(crate) const SIZE: usize = 4 /*signature*/ + 4 /*version*/ + 4 /* num entrie
22

33
use crate::{util::from_be_u32, Version};
44

5+
pub(crate) const SIGNATURE: &[u8] = b"DIRC";
6+
57
mod error {
68

79
/// The error produced when failing to decode an index header.
@@ -23,7 +25,6 @@ pub(crate) fn decode(data: &[u8], object_hash: git_hash::Kind) -> Result<(Versio
2325
));
2426
}
2527

26-
const SIGNATURE: &[u8] = b"DIRC";
2728
let (signature, data) = data.split_at(4);
2829
if signature != SIGNATURE {
2930
return Err(Error::Corrupt(

Diff for: git-index/src/entry/flags.rs

+18-10
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
use crate::entry::Stage;
2-
use crate::Version;
32
use bitflags::bitflags;
43

54
bitflags! {
65
/// In-memory flags
76
pub struct Flags: u32 {
87
/// The mask to apply to obtain the stage number of an entry.
98
const STAGE_MASK = 0x3000;
9+
/// If set, additional bits need to be written to storage.
10+
const EXTENDED = 0x4000;
1011
// TODO: could we use the pathlen ourselves to save 8 bytes? And how to handle longer paths than that? 0 as sentinel maybe?
1112
/// The mask to obtain the length of the path associated with this entry.
1213
const PATH_LEN = 0x0fff;
@@ -49,9 +50,6 @@ bitflags! {
4950
/// Stored at rest
5051
const SKIP_WORKTREE = 1 << 30;
5152

52-
/// flags that need to be stored on disk in a V3 formatted index.
53-
const EXTENDED_FLAGS = 1 << 29 | 1 << 30;
54-
5553
/// For future extension
5654
const EXTENDED_2 = 1 << 31;
5755
}
@@ -64,10 +62,17 @@ impl Flags {
6462
}
6563

6664
/// Transform ourselves to a storage representation to keep all flags which are to be persisted,
67-
/// with the caller intending to write `version`.
68-
pub fn to_storage(&self, version: Version) -> at_rest::Flags {
69-
assert_eq!(version, Version::V2, "Can only encode V2 flags at the moment");
70-
at_rest::Flags::from_bits(self.bits() as u16).unwrap()
65+
/// skipping all extended flags. Note that the caller has to check for the `EXTENDED` bit to be present
66+
/// and write extended flags as well if so.
67+
pub fn to_storage(mut self) -> at_rest::Flags {
68+
at_rest::Flags::from_bits(
69+
{
70+
self.remove(Self::PATH_LEN);
71+
self
72+
}
73+
.bits() as u16,
74+
)
75+
.unwrap()
7176
}
7277
}
7378

@@ -89,8 +94,7 @@ pub(crate) mod at_rest {
8994

9095
impl Flags {
9196
pub fn to_memory(self) -> super::Flags {
92-
super::Flags::from_bits((self & (Flags::PATH_LEN | Flags::STAGE_MASK | Flags::ASSUME_VALID)).bits as u32)
93-
.expect("PATHLEN is part of memory representation")
97+
super::Flags::from_bits(self.bits as u32).expect("PATHLEN is part of memory representation")
9498
}
9599
}
96100

@@ -103,6 +107,10 @@ pub(crate) mod at_rest {
103107
}
104108

105109
impl FlagsExtended {
110+
pub fn from_flags(flags: super::Flags) -> Self {
111+
Self::from_bits(((flags & (super::Flags::INTENT_TO_ADD | super::Flags::SKIP_WORKTREE)).bits >> 16) as u16)
112+
.expect("valid")
113+
}
106114
pub fn to_flags(self) -> Option<super::Flags> {
107115
super::Flags::from_bits((self.bits as u32) << 16)
108116
}

Diff for: git-index/src/entry/mod.rs

-3
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,3 @@ mod _impls {
8282
}
8383
}
8484
}
85-
86-
#[cfg(test)]
87-
mod tests;

Diff for: git-index/src/entry/tests.rs

-13
This file was deleted.

Diff for: git-index/src/entry/write.rs

+17-12
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
use crate::{Entry, State, Version};
1+
use crate::{entry, Entry, State};
22
use std::convert::TryInto;
33

44
impl Entry {
5-
/// Serialize ourselves to `out` with path access via `state`.
5+
/// Serialize ourselves to `out` with path access via `state`, without padding.
66
pub fn write_to(&self, mut out: impl std::io::Write, state: &State) -> std::io::Result<()> {
77
let stat = self.stat;
88
out.write_all(&stat.ctime.secs.to_be_bytes())?;
@@ -17,16 +17,21 @@ impl Entry {
1717
out.write_all(&stat.size.to_be_bytes())?;
1818
out.write_all(self.id.as_bytes())?;
1919
let path = self.path(state);
20-
let path_len: u16 = path
21-
.len()
22-
.try_into()
23-
.expect("Cannot handle paths longer than 16bits ever");
24-
assert!(
25-
path_len <= 0xFFF,
26-
"Paths can't be longer than 12 bits as they share space with bit flags in a u16"
27-
);
28-
let version = Version::V2; // TODO: don't hardcode once `to_storage()` can do its work without assertion
29-
out.write_all(&(self.flags.to_storage(version).bits() | path_len).to_be_bytes())?;
20+
let path_len: u16 = if path.len() >= entry::Flags::PATH_LEN.bits() as usize {
21+
entry::Flags::PATH_LEN.bits() as u16
22+
} else {
23+
path.len()
24+
.try_into()
25+
.expect("we just checked that the length is smaller than 0xfff")
26+
};
27+
out.write_all(&(self.flags.to_storage().bits() | path_len).to_be_bytes())?;
28+
if self.flags.contains(entry::Flags::EXTENDED) {
29+
out.write_all(
30+
&entry::at_rest::FlagsExtended::from_flags(self.flags)
31+
.bits()
32+
.to_be_bytes(),
33+
)?;
34+
}
3035
out.write_all(path)?;
3136
out.write_all(b"\0")
3237
}

Diff for: git-index/src/extension/end_of_index_entry.rs renamed to git-index/src/extension/end_of_index_entry/decode.rs

+17-10
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,27 @@
1-
use crate::{decode::header, extension, extension::Signature, util::from_be_u32};
2-
3-
pub const SIGNATURE: Signature = *b"EOIE";
4-
pub const SIZE: usize = 4 /* offset to extensions */ + git_hash::Kind::Sha1.len_in_bytes();
5-
pub const SIZE_WITH_HEADER: usize = crate::extension::MIN_SIZE + SIZE;
6-
1+
use crate::decode::header;
2+
use crate::extension;
3+
use crate::extension::end_of_index_entry::{MIN_SIZE, MIN_SIZE_WITH_HEADER, SIGNATURE};
4+
use crate::util::from_be_u32;
5+
6+
/// Decode the end of index entry extension, which is no more than a glorified offset to the first byte of all extensions to allow
7+
/// loading entries and extensions in parallel.
8+
///
9+
/// Itself it's located at the end of the index file, which allows its location to be known and thus addressable.
10+
/// From there it's possible to traverse the chunks of all set extensions, hash them, and compare that hash with all extensions
11+
/// stored prior to this one to assure they are correct.
12+
///
13+
/// If the checksum wasn't matched, we will ignoree this extension entirely.
714
pub fn decode(data: &[u8], object_hash: git_hash::Kind) -> Option<usize> {
815
let hash_len = object_hash.len_in_bytes();
9-
if data.len() < SIZE_WITH_HEADER + hash_len {
16+
if data.len() < MIN_SIZE_WITH_HEADER + hash_len {
1017
return None;
1118
}
1219

13-
let start_of_eoie = data.len() - SIZE_WITH_HEADER - hash_len;
20+
let start_of_eoie = data.len() - MIN_SIZE_WITH_HEADER - hash_len;
1421
let ext_data = &data[start_of_eoie..data.len() - hash_len];
1522

1623
let (signature, ext_size, ext_data) = extension::decode::header(ext_data);
17-
if signature != SIGNATURE || ext_size as usize != SIZE {
24+
if signature != SIGNATURE || ext_size as usize != MIN_SIZE {
1825
return None;
1926
}
2027

@@ -26,7 +33,7 @@ pub fn decode(data: &[u8], object_hash: git_hash::Kind) -> Option<usize> {
2633

2734
let mut hasher = git_features::hash::hasher(git_hash::Kind::Sha1);
2835
let mut last_chunk = None;
29-
for (signature, chunk) in extension::Iter::new(&data[offset..data.len() - SIZE_WITH_HEADER - hash_len]) {
36+
for (signature, chunk) in extension::Iter::new(&data[offset..data.len() - MIN_SIZE_WITH_HEADER - hash_len]) {
3037
hasher.update(&signature);
3138
hasher.update(&(chunk.len() as u32).to_be_bytes());
3239
last_chunk = Some(chunk);

Diff for: git-index/src/extension/end_of_index_entry/mod.rs

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
use crate::{extension, extension::Signature};
2+
3+
/// The signature of the end-of-index-entry extension
4+
pub const SIGNATURE: Signature = *b"EOIE";
5+
/// The minimal size of the extension, depending on the shortest hash.
6+
pub const MIN_SIZE: usize = 4 /* offset to extensions */ + git_hash::Kind::shortest().len_in_bytes();
7+
/// The smallest size of the extension varying by hash kind, along with the standard extension header.
8+
pub const MIN_SIZE_WITH_HEADER: usize = extension::MIN_SIZE + MIN_SIZE;
9+
10+
mod decode;
11+
pub use decode::decode;
12+
13+
mod write;
14+
pub use write::write_to;

Diff for: git-index/src/extension/end_of_index_entry/write.rs

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
use crate::extension::end_of_index_entry::SIGNATURE;
2+
use crate::extension::Signature;
3+
4+
/// Write this extension to out and generate a hash of `hash_kind` over all `prior_extensions` which are specified as `(signature, size)`
5+
/// pair. `one_past_entries` is the offset to the first byte past the entries, which is also the first byte of the signature of the
6+
/// first extension in `prior_extensions`. Note that `prior_extensions` must have been written prior to this one, as the name suggests,
7+
/// allowing this extension to be the last one in the index file.
8+
///
9+
/// Even if there are no `prior_extensions`, this extension will be written unconditionally.
10+
pub fn write_to(
11+
mut out: impl std::io::Write,
12+
hash_kind: git_hash::Kind,
13+
offset_to_extensions: u32,
14+
prior_extensions: impl IntoIterator<Item = (Signature, u32)>,
15+
) -> Result<(), std::io::Error> {
16+
out.write_all(&SIGNATURE)?;
17+
let extension_size: u32 = 4 + hash_kind.len_in_bytes() as u32;
18+
out.write_all(&extension_size.to_be_bytes())?;
19+
20+
out.write_all(&offset_to_extensions.to_be_bytes())?;
21+
22+
let mut hasher = git_features::hash::hasher(hash_kind);
23+
for (signature, size) in prior_extensions {
24+
hasher.update(&signature);
25+
hasher.update(&size.to_be_bytes());
26+
}
27+
out.write_all(&hasher.digest())?;
28+
29+
Ok(())
30+
}

Diff for: git-index/src/extension/mod.rs

+6-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
use bstr::BString;
22
use smallvec::SmallVec;
33

4-
const MIN_SIZE: usize = 4 /* signature */ + 4 /* size */;
4+
/// The size of the smallest possible exstension, which is no more than a signature and a 0 indicating its size.
5+
pub const MIN_SIZE: usize = 4 /* signature */ + 4 /* size */;
56

67
/// The kind of index extension.
78
pub type Signature = [u8; 4];
@@ -25,7 +26,8 @@ pub struct Tree {
2526
pub id: git_hash::ObjectId,
2627
/// The amount of non-tree items in this directory tree, including sub-trees, recursively.
2728
/// The value of the top-level tree is thus equal to the value of the total amount of entries.
28-
pub num_entries: u32,
29+
/// If `None`, the tree is considered invalid and needs to be refreshed
30+
pub num_entries: Option<u32>,
2931
/// The child-trees below the current tree.
3032
pub children: Vec<Tree>,
3133
}
@@ -77,7 +79,8 @@ pub(crate) mod decode;
7779
///
7880
pub mod tree;
7981

80-
pub(crate) mod end_of_index_entry;
82+
///
83+
pub mod end_of_index_entry;
8184

8285
pub(crate) mod index_entry_offset_table;
8386

Diff for: git-index/src/extension/tree/decode.rs

+12-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use crate::extension::Tree;
22
use crate::util::{split_at_byte_exclusive, split_at_pos};
33
use git_hash::ObjectId;
4+
use std::convert::TryInto;
45

56
/// A recursive data structure
67
pub fn decode(data: &[u8], object_hash: git_hash::Kind) -> Option<Tree> {
@@ -17,13 +18,20 @@ fn one_recursive(data: &[u8], hash_len: usize) -> Option<(Tree, &[u8])> {
1718
let (path, data) = split_at_byte_exclusive(data, 0)?;
1819

1920
let (entry_count, data) = split_at_byte_exclusive(data, b' ')?;
20-
let num_entries: u32 = atoi::atoi(entry_count)?;
21+
let num_entries: i32 = atoi::atoi(entry_count)?;
2122

2223
let (subtree_count, data) = split_at_byte_exclusive(data, b'\n')?;
2324
let subtree_count: usize = atoi::atoi(subtree_count)?;
2425

25-
let (hash, mut data) = split_at_pos(data, hash_len)?;
26-
let id = ObjectId::from(hash);
26+
let (id, mut data) = if num_entries >= 0 {
27+
let (hash, data) = split_at_pos(data, hash_len)?;
28+
(ObjectId::from(hash), data)
29+
} else {
30+
(
31+
ObjectId::null(git_hash::Kind::from_hex_len(hash_len * 2).expect("valid hex_len")),
32+
data,
33+
)
34+
};
2735

2836
let mut subtrees = Vec::with_capacity(subtree_count);
2937
for _ in 0..subtree_count {
@@ -42,7 +50,7 @@ fn one_recursive(data: &[u8], hash_len: usize) -> Option<(Tree, &[u8])> {
4250
Some((
4351
Tree {
4452
id,
45-
num_entries,
53+
num_entries: num_entries.try_into().ok(),
4654
name: path.into(),
4755
children: subtrees,
4856
},

0 commit comments

Comments
 (0)