Skip to content

Commit 69faad0

Browse files
committed
Merge branch 'fix-819'
2 parents b5d2654 + 07e11cf commit 69faad0

File tree

19 files changed

+304
-44
lines changed

19 files changed

+304
-44
lines changed

Diff for: gix-object/src/data.rs

+1-5
Original file line numberDiff line numberDiff line change
@@ -69,11 +69,7 @@ pub mod verify {
6969
/// hash of `self`.
7070
pub fn verify_checksum(&self, desired: impl AsRef<gix_hash::oid>) -> Result<(), Error> {
7171
let desired = desired.as_ref();
72-
let mut hasher = gix_features::hash::hasher(desired.kind());
73-
hasher.update(&crate::encode::loose_header(self.kind, self.data.len()));
74-
hasher.update(self.data);
75-
76-
let actual_id = gix_hash::ObjectId::from(hasher.digest());
72+
let actual_id = crate::compute_hash(desired.kind(), self.kind, self.data);
7773
if desired != actual_id {
7874
return Err(Error::ChecksumMismatch {
7975
desired: desired.into(),

Diff for: gix-object/src/lib.rs

+11
Original file line numberDiff line numberDiff line change
@@ -375,3 +375,14 @@ pub mod decode {
375375
Ok((kind, size, size_end + 1))
376376
}
377377
}
378+
379+
/// A standalone function to compute a hash of kind `hash_kind` for an object of `object_kind` and its `data`.
380+
pub fn compute_hash(hash_kind: gix_hash::Kind, object_kind: Kind, data: &[u8]) -> gix_hash::ObjectId {
381+
let header = encode::loose_header(object_kind, data.len());
382+
383+
let mut hasher = gix_features::hash::hasher(hash_kind);
384+
hasher.update(&header);
385+
hasher.update(data);
386+
387+
hasher.digest().into()
388+
}
File renamed without changes.

Diff for: gix-object/tests/object.rs

+14
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,20 @@ use gix_hash::ObjectId;
44

55
mod encode;
66
mod immutable;
7+
mod loose;
8+
9+
#[test]
10+
fn compute_hash() {
11+
let hk = gix_hash::Kind::Sha1;
12+
assert_eq!(
13+
gix_object::compute_hash(hk, gix_object::Kind::Blob, &[]),
14+
gix_hash::ObjectId::empty_blob(hk)
15+
);
16+
assert_eq!(
17+
gix_object::compute_hash(hk, gix_object::Kind::Tree, &[]),
18+
gix_hash::ObjectId::empty_tree(hk)
19+
);
20+
}
721

822
type Result<T = ()> = std::result::Result<T, Box<dyn std::error::Error>>;
923

Diff for: gix-odb/src/lib.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,7 @@ pub fn sink(object_hash: gix_hash::Kind) -> Sink {
6565
}
6666
}
6767

68-
///
69-
pub mod sink;
68+
mod sink;
7069

7170
///
7271
pub mod find;

Diff for: gix-odb/src/sink.rs

+16-20
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ impl crate::traits::Write for Sink {
3030
mut from: impl io::Read,
3131
) -> Result<gix_hash::ObjectId, Self::Error> {
3232
let mut size = size.try_into().expect("object size to fit into usize");
33-
use gix_features::hash::Sha1;
3433
let mut buf = [0u8; 8096];
3534
let header = gix_object::encode::loose_header(kind, size);
3635

@@ -40,27 +39,24 @@ impl crate::traits::Write for Sink {
4039
}
4140
Ok(())
4241
};
43-
match self.object_hash {
44-
gix_hash::Kind::Sha1 => {
45-
let mut hasher = Sha1::default();
46-
hasher.update(&header);
47-
possibly_compress(&header)?;
4842

49-
while size != 0 {
50-
let bytes = size.min(buf.len());
51-
from.read_exact(&mut buf[..bytes])?;
52-
hasher.update(&buf[..bytes]);
53-
possibly_compress(&buf[..bytes])?;
54-
size -= bytes;
55-
}
56-
if let Some(compressor) = self.compressor.as_ref() {
57-
let mut c = compressor.borrow_mut();
58-
c.flush()?;
59-
c.reset();
60-
}
43+
let mut hasher = gix_features::hash::hasher(self.object_hash);
44+
hasher.update(&header);
45+
possibly_compress(&header)?;
6146

62-
Ok(hasher.digest().into())
63-
}
47+
while size != 0 {
48+
let bytes = size.min(buf.len());
49+
from.read_exact(&mut buf[..bytes])?;
50+
hasher.update(&buf[..bytes]);
51+
possibly_compress(&buf[..bytes])?;
52+
size -= bytes;
53+
}
54+
if let Some(compressor) = self.compressor.as_ref() {
55+
let mut c = compressor.borrow_mut();
56+
c.flush()?;
57+
c.reset();
6458
}
59+
60+
Ok(hasher.digest().into())
6561
}
6662
}

Diff for: gix-odb/src/store_impls/loose/write.rs

+40-1
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,16 @@ impl crate::traits::Write for Store {
9898

9999
type CompressedTempfile = deflate::Write<NamedTempFile>;
100100

101+
/// Access
102+
impl Store {
103+
/// Return the path to the object with `id`.
104+
///
105+
/// Note that is may not exist yet.
106+
pub fn object_path(&self, id: &gix_hash::oid) -> PathBuf {
107+
loose::hash_path(id, self.path.clone())
108+
}
109+
}
110+
101111
impl Store {
102112
fn dest(&self) -> Result<hash::Write<CompressedTempfile>, Error> {
103113
Ok(hash::Write::new(
@@ -126,7 +136,36 @@ impl Store {
126136
}
127137
}
128138
let file = file.into_inner();
129-
file.persist(&object_path).map_err(|err| Error::Persist {
139+
let res = file.persist(&object_path);
140+
// On windows, we assume that such errors are due to its special filesystem semantics,
141+
// on any other platform that would be a legitimate error though.
142+
#[cfg(windows)]
143+
if let Err(err) = &res {
144+
if err.error.kind() == std::io::ErrorKind::PermissionDenied
145+
|| err.error.kind() == std::io::ErrorKind::AlreadyExists
146+
{
147+
return Ok(id);
148+
}
149+
}
150+
#[cfg(unix)]
151+
if let Ok(mut perm) = object_path.metadata().map(|m| m.permissions()) {
152+
use std::os::unix::fs::PermissionsExt;
153+
/// For now we assume the default with standard umask. This can be more sophisticated,
154+
/// but we have the bare minimum.
155+
fn comp_mode(_mode: u32) -> u32 {
156+
0o444
157+
}
158+
let new_mode = comp_mode(perm.mode());
159+
if (perm.mode() ^ new_mode) & !0o170000 != 0 {
160+
perm.set_mode(new_mode);
161+
std::fs::set_permissions(&object_path, perm).map_err(|err| Error::Io {
162+
source: err,
163+
message: "Failed to set permission bits",
164+
path: object_path.clone(),
165+
})?;
166+
}
167+
}
168+
res.map_err(|err| Error::Persist {
130169
source: err,
131170
target: object_path,
132171
})?;

Diff for: gix-odb/tests/fixtures/generated-archives/.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
repo_with_loose_objects.tar.xz

Diff for: gix-odb/tests/fixtures/repo_with_loose_objects.sh

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#!/bin/bash
2+
set -eu -o pipefail
3+
4+
git init -q
5+
6+
git checkout -b main
7+
touch this
8+
git add this
9+
git commit -q -m c1
10+
echo hello >> this
11+
git commit -q -am c2
12+

Diff for: gix-odb/tests/odb/store/loose.rs

+56
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,62 @@ mod write {
7070
}
7171
Ok(())
7272
}
73+
74+
#[test]
75+
#[cfg(unix)]
76+
fn it_writes_objects_with_similar_permissions() -> crate::Result {
77+
let hk = gix_hash::Kind::Sha1;
78+
let git_store = loose::Store::at(
79+
gix_testtools::scripted_fixture_read_only("repo_with_loose_objects.sh")?.join(".git/objects"),
80+
hk,
81+
);
82+
let expected_perm = git_store
83+
.object_path(&gix_hash::ObjectId::empty_blob(hk))
84+
.metadata()?
85+
.permissions();
86+
87+
let tmp = tempfile::TempDir::new()?;
88+
let store = loose::Store::at(tmp.path(), hk);
89+
store.write_buf(gix_object::Kind::Blob, &[])?;
90+
let actual_perm = store
91+
.object_path(&gix_hash::ObjectId::empty_blob(hk))
92+
.metadata()?
93+
.permissions();
94+
assert_eq!(
95+
actual_perm, expected_perm,
96+
"we explicitly equalize permissions to be similar to what `git` would do"
97+
);
98+
Ok(())
99+
}
100+
101+
#[test]
102+
fn collisions_do_not_cause_failure() -> crate::Result {
103+
let dir = tempfile::tempdir()?;
104+
105+
fn write_empty_trees(dir: &std::path::Path) {
106+
let db = loose::Store::at(dir, gix_hash::Kind::Sha1);
107+
let empty_tree = gix_object::Tree::empty();
108+
for _ in 0..2 {
109+
let id = db.write(&empty_tree).expect("works");
110+
assert!(db.contains(id), "written objects are actually available");
111+
112+
let empty_blob = db.write_buf(gix_object::Kind::Blob, &[]).expect("works");
113+
assert!(db.contains(empty_blob), "written objects are actually available");
114+
let id = db
115+
.write_stream(gix_object::Kind::Blob, 0, &mut [].as_slice())
116+
.expect("works");
117+
assert_eq!(id, empty_blob);
118+
assert!(db.contains(empty_blob), "written objects are actually available");
119+
}
120+
}
121+
122+
gix_features::parallel::threads(|scope| {
123+
scope.spawn(|| write_empty_trees(dir.path()));
124+
scope.spawn(|| write_empty_trees(dir.path()));
125+
});
126+
127+
Ok(())
128+
}
73129
}
74130

75131
mod contains {

Diff for: gix-pack/src/index/traverse/mod.rs

+1-5
Original file line numberDiff line numberDiff line change
@@ -216,11 +216,7 @@ where
216216
E: std::error::Error + Send + Sync + 'static,
217217
{
218218
if check.object_checksum() {
219-
let mut hasher = gix_features::hash::hasher(index_entry.oid.kind());
220-
hasher.update(&gix_object::encode::loose_header(object_kind, decompressed.len()));
221-
hasher.update(decompressed);
222-
223-
let actual_oid = gix_hash::ObjectId::from(hasher.digest());
219+
let actual_oid = gix_object::compute_hash(index_entry.oid.kind(), object_kind, decompressed);
224220
if actual_oid != index_entry.oid {
225221
return Err(Error::PackObjectMismatch {
226222
actual: actual_oid,

Diff for: gix-worktree/src/status/content.rs

+1-5
Original file line numberDiff line numberDiff line change
@@ -53,11 +53,7 @@ impl CompareBlobs for FastEq {
5353
return Ok(Some(()));
5454
}
5555
let blob = worktree_blob.read_data()?;
56-
let header = loose_header(gix_object::Kind::Blob, blob.len());
57-
let mut hasher = hash::hasher(entry.id.kind());
58-
hasher.update(&header);
59-
hasher.update(blob);
60-
let file_hash: ObjectId = hasher.digest().into();
56+
let file_hash = gix_object::compute_hash(entry.id.kind(), gix_object::Kind::Blob, blob);
6157
Ok((entry.id != file_hash).then_some(()))
6258
}
6359
}

Diff for: gix/src/object/mod.rs

+8
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,14 @@ impl<'repo> Object<'repo> {
9090
}
9191
}
9292

93+
/// Transform this object into a tag, or panic if it is none.
94+
pub fn into_tag(self) -> Tag<'repo> {
95+
match self.try_into() {
96+
Ok(tag) => tag,
97+
Err(this) => panic!("Tried to use {} as commit, but was {}", this.id, this.kind),
98+
}
99+
}
100+
93101
/// Transform this object into a commit, or return it as part of the `Err` if it is no commit.
94102
pub fn try_into_commit(self) -> Result<Commit<'repo>, try_into::Error> {
95103
self.try_into().map_err(|this: Self| try_into::Error {

Diff for: gix/src/object/tag.rs

+11
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,17 @@
11
use crate::{ext::ObjectIdExt, Tag};
22

33
impl<'repo> Tag<'repo> {
4+
/// Decode the entire tag object and return it for accessing all tag information.
5+
///
6+
/// This never allocates.
7+
///
8+
/// Note that the returned commit object does make lookup easy and should be
9+
/// used for successive calls to string-ish information to avoid decoding the object
10+
/// more than once.
11+
pub fn decode(&self) -> Result<gix_object::TagRef<'_>, gix_object::decode::Error> {
12+
gix_object::TagRef::from_bytes(&self.data)
13+
}
14+
415
/// Decode this tag partially and return the id of its target.
516
pub fn target_id(&self) -> Result<crate::Id<'repo>, gix_object::decode::Error> {
617
gix_object::TagRefIter::from_bytes(&self.data)

Diff for: gix/src/repository/object.rs

+46-6
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#![allow(clippy::result_large_err)]
22
use std::convert::TryInto;
3+
use std::ops::DerefMut;
34

45
use gix_hash::ObjectId;
56
use gix_odb::{Find, FindExt, Write};
@@ -58,32 +59,71 @@ impl crate::Repository {
5859
}
5960
}
6061

62+
fn shared_empty_buf(&self) -> std::cell::RefMut<'_, Vec<u8>> {
63+
let mut bufs = self.bufs.borrow_mut();
64+
if bufs.last().is_none() {
65+
bufs.push(Vec::with_capacity(512));
66+
}
67+
std::cell::RefMut::map(bufs, |bufs| {
68+
let buf = bufs.last_mut().expect("we assure one is present");
69+
buf.clear();
70+
buf
71+
})
72+
}
73+
6174
/// Write the given object into the object database and return its object id.
75+
///
76+
/// Note that we hash the object in memory to avoid storing objects that are already present. That way,
77+
/// we avoid writing duplicate objects using slow disks that will eventually have to be garbage collected.
6278
pub fn write_object(&self, object: impl gix_object::WriteTo) -> Result<Id<'_>, object::write::Error> {
79+
let mut buf = self.shared_empty_buf();
80+
object.write_to(buf.deref_mut())?;
81+
82+
let oid = gix_object::compute_hash(self.object_hash(), object.kind(), &buf);
83+
if self.objects.contains(oid) {
84+
return Ok(oid.attach(self));
85+
}
86+
6387
self.objects
64-
.write(object)
88+
.write_buf(object.kind(), &buf)
6589
.map(|oid| oid.attach(self))
6690
.map_err(Into::into)
6791
}
6892

6993
/// Write a blob from the given `bytes`.
94+
///
95+
/// We avoid writing duplicate objects to slow disks that will eventually have to be garbage collected by
96+
/// pre-hashing the data, and checking if the object is already present.
7097
pub fn write_blob(&self, bytes: impl AsRef<[u8]>) -> Result<Id<'_>, object::write::Error> {
98+
let bytes = bytes.as_ref();
99+
let oid = gix_object::compute_hash(self.object_hash(), gix_object::Kind::Blob, bytes);
100+
if self.objects.contains(oid) {
101+
return Ok(oid.attach(self));
102+
}
71103
self.objects
72-
.write_buf(gix_object::Kind::Blob, bytes.as_ref())
104+
.write_buf(gix_object::Kind::Blob, bytes)
73105
.map(|oid| oid.attach(self))
74106
}
75107

76108
/// Write a blob from the given `Read` implementation.
109+
///
110+
/// Note that we hash the object in memory to avoid storing objects that are already present. That way,
111+
/// we avoid writing duplicate objects using slow disks that will eventually have to be garbage collected.
112+
///
113+
/// If that is prohibitive, use the object database directly.
77114
pub fn write_blob_stream(
78115
&self,
79116
mut bytes: impl std::io::Read + std::io::Seek,
80117
) -> Result<Id<'_>, object::write::Error> {
81-
let current = bytes.stream_position()?;
82-
let len = bytes.seek(std::io::SeekFrom::End(0))? - current;
83-
bytes.seek(std::io::SeekFrom::Start(current))?;
118+
let mut buf = self.shared_empty_buf();
119+
std::io::copy(&mut bytes, buf.deref_mut())?;
120+
let oid = gix_object::compute_hash(self.object_hash(), gix_object::Kind::Blob, &buf);
121+
if self.objects.contains(oid) {
122+
return Ok(oid.attach(self));
123+
}
84124

85125
self.objects
86-
.write_stream(gix_object::Kind::Blob, len, bytes)
126+
.write_buf(gix_object::Kind::Blob, &buf)
87127
.map(|oid| oid.attach(self))
88128
}
89129

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:930d562f57aa9c1a1894d715dbb3e4f70beeb27b356b9c88a9d39eb7d211dc6f
3+
size 10872

0 commit comments

Comments
 (0)