GitoxideLabs
diff --git a/Diff for: ‎Cargo.lock
+7-7 b/Diff for: ‎Cargo.lock
+7-7
diff --git a/Diff for: ‎Cargo.toml
+1-2 b/Diff for: ‎Cargo.toml
+1-2
diff --git a/Diff for: ‎SHORTCOMINGS.md
-4 b/Diff for: ‎SHORTCOMINGS.md
-4
diff --git a/Diff for: ‎gix-features/Cargo.toml
+7-26 b/Diff for: ‎gix-features/Cargo.toml
+7-26
diff --git a/Diff for: ‎gix-features/src/hash.rs
+6-177 b/Diff for: ‎gix-features/src/hash.rs
+6-177
diff --git a/Diff for: ‎gix-features/tests/hash.rs
-16 b/Diff for: ‎gix-features/tests/hash.rs
-16
diff --git a/Diff for: ‎gix-hash/Cargo.toml
+6 b/Diff for: ‎gix-hash/Cargo.toml
+6
@@ -205,8 +205,7 @@ gix-hash = { opt-level = 3 }
 gix-actor = { opt-level = 3 }
 gix-config = { opt-level = 3 }
 miniz_oxide = { opt-level = 3 }
-sha1 = { opt-level = 3 }
-sha1_smol = { opt-level = 3 }
+sha1-checked = { opt-level = 3 }
 
 [profile.release]
 overflow-checks = false
 
@@ -35,7 +35,3 @@ This file is for tracking features that are less well implemented or less powerf
 * **gix-url** _might_ be more restrictive than what git allows as for the most part, it uses a browser grade URL parser.
     * Thus far there is no proof for this, and as _potential remedy_ we could certainly re-implement exactly what git does
       to handle its URLs.
-
-### `gix-features`
-
-* **sha1** isn't hardened (i.e. doesn't have collision detection). Needs [to be contributed](https://github.com/GitoxideLabs/gitoxide/issues/585).
@@ -24,7 +24,7 @@ progress = ["prodash"]
 ## Provide human-readable numbers as well as easier to read byte units for progress bars.
 progress-unit-human-numbers = ["prodash?/unit-human"]
 ## Provide human readable byte units for progress bars.
-progress-unit-bytes = ["dep:bytesize", "prodash?/unit-bytes"]
+progress-unit-bytes = ["dep:bytesize", "prodash?/unit-bytes", "gix-hash/progress-unit-bytes"]
 
 ## Provide utilities suitable for working with the `std::fs::read_dir()`.
 fs-read-dir = ["dep:gix-utils"]
@@ -77,40 +77,29 @@ zlib-stock = ["zlib", "flate2?/zlib"]
 ## may build in environments where other backends don't.
 zlib-rust-backend = ["zlib", "flate2?/rust_backend"]
 
-#! ### Mutually Exclusive SHA1
-## A fast SHA1 implementation is critical to `gitoxide's` object database performance
-## A multi-crate implementation that can use hardware acceleration, thus bearing the potential for up to 2Gb/s throughput on
-## CPUs that support it, like AMD Ryzen or Intel Core i3, as well as Apple Silicon like M1.
-## Takes precedence over `rustsha1` if both are specified.
-fast-sha1 = ["dep:sha1"]
-## A standard and well performing pure Rust implementation of Sha1. Will significantly slow down various git operations.
-rustsha1 = ["dep:sha1_smol"]
+# TODO: Remove these.
+fast-sha1 = []
+rustsha1 = []
 
 #! ### Other
 
 ## Count cache hits and misses and print that debug information on drop.
 ## Caches implement this by default, which costs nothing unless this feature is enabled
 cache-efficiency-debug = []
 
-[[test]]
-name = "hash"
-path = "tests/hash.rs"
-required-features = ["rustsha1"]
-
 [[test]]
 name = "parallel"
 path = "tests/parallel_threaded.rs"
-required-features = ["parallel", "rustsha1"]
+required-features = ["parallel"]
 
 [[test]]
 name = "multi-threaded"
 path = "tests/parallel_shared_threaded.rs"
-required-features = ["parallel", "rustsha1"]
+required-features = ["parallel"]
 
 [[test]]
 name = "single-threaded"
 path = "tests/parallel_shared.rs"
-required-features = ["rustsha1"]
 
 [[test]]
 name = "pipe"
@@ -130,10 +119,8 @@ parking_lot = { version = "0.12.0", default-features = false, optional = true }
 
 walkdir = { version = "2.3.2", optional = true } # used when parallel is off
 
-# hashing and 'fast-sha1' feature
-sha1_smol = { version = "1.0.0", optional = true }
+# hashing
 crc32fast = { version = "1.2.1", optional = true }
-sha1 = { version = "0.10.0", optional = true }
 
 # progress
 prodash = { version = "29.0.1", optional = true }
@@ -156,12 +143,6 @@ libc = { version = "0.2.119" }
 [dev-dependencies]
 bstr = { version = "1.3.0", default-features = false }
 
-
-# Assembly doesn't yet compile on MSVC on windows, but does on GNU, see https://github.com/RustCrypto/asm-hashes/issues/17
-# At this time, only aarch64, x86 and x86_64 are supported.
-[target.'cfg(all(any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64"), not(target_os = "windows")))'.dependencies]
-sha1 = { version = "0.10.0", optional = true, features = ["asm"] }
-
 [package.metadata.docs.rs]
 all-features = true
 features = ["document-features"]
@@ -1,54 +1,12 @@
 //! Hash functions and hash utilities
-//!
-//! With the `fast-sha1` feature, the `Sha1` hash type will use a more elaborate implementation utilizing hardware support
-//! in case it is available. Otherwise the `rustsha1` feature should be set. `fast-sha1` will take precedence.
-//! Otherwise, a minimal yet performant implementation is used instead for a decent trade-off between compile times and run-time performance.
-#[cfg(all(feature = "rustsha1", not(feature = "fast-sha1")))]
-mod _impl {
-    use super::Digest;
-
-    /// A implementation of the Sha1 hash, which can be used once.
-    #[derive(Default, Clone)]
-    pub struct Sha1(sha1_smol::Sha1);
-
-    impl Sha1 {
-        /// Digest the given `bytes`.
-        pub fn update(&mut self, bytes: &[u8]) {
-            self.0.update(bytes);
-        }
-        /// Finalize the hash and produce a digest.
-        pub fn digest(self) -> Digest {
-            self.0.digest().bytes()
-        }
-    }
-}
-
-/// A hash-digest produced by a [`Hasher`] hash implementation.
-#[cfg(any(feature = "fast-sha1", feature = "rustsha1"))]
-pub type Digest = [u8; 20];
-
-#[cfg(feature = "fast-sha1")]
-mod _impl {
-    use sha1::Digest;
-
-    /// A implementation of the Sha1 hash, which can be used once.
-    #[derive(Default, Clone)]
-    pub struct Sha1(sha1::Sha1);
-
-    impl Sha1 {
-        /// Digest the given `bytes`.
-        pub fn update(&mut self, bytes: &[u8]) {
-            self.0.update(bytes);
-        }
-        /// Finalize the hash and produce a digest.
-        pub fn digest(self) -> super::Digest {
-            self.0.finalize().into()
-        }
-    }
-}
 
+// TODO: Remove this.
 #[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
-pub use _impl::Sha1 as Hasher;
+pub use gix_hash::hasher::{
+    hasher,
+    io::{bytes, bytes_of_file, bytes_with_hasher, Write},
+    Digest, Hasher,
+};
 
 /// Compute a CRC32 hash from the given `bytes`, returning the CRC32 hash.
 ///
@@ -71,132 +29,3 @@ pub fn crc32(bytes: &[u8]) -> u32 {
     h.update(bytes);
     h.finalize()
 }
-
-/// Produce a hasher suitable for the given kind of hash.
-#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
-pub fn hasher(kind: gix_hash::Kind) -> Hasher {
-    match kind {
-        gix_hash::Kind::Sha1 => Hasher::default(),
-    }
-}
-
-/// Compute the hash of `kind` for the bytes in the file at `path`, hashing only the first `num_bytes_from_start`
-/// while initializing and calling `progress`.
-///
-/// `num_bytes_from_start` is useful to avoid reading trailing hashes, which are never part of the hash itself,
-/// denoting the amount of bytes to hash starting from the beginning of the file.
-///
-/// # Note
-///
-/// * Only available with the `gix-object` feature enabled due to usage of the [`gix_hash::Kind`] enum and the
-///   [`gix_hash::ObjectId`] return value.
-/// * [Interrupts][crate::interrupt] are supported.
-#[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))]
-pub fn bytes_of_file(
-    path: &std::path::Path,
-    num_bytes_from_start: u64,
-    kind: gix_hash::Kind,
-    progress: &mut dyn crate::progress::Progress,
-    should_interrupt: &std::sync::atomic::AtomicBool,
-) -> std::io::Result<gix_hash::ObjectId> {
-    bytes(
-        &mut std::fs::File::open(path)?,
-        num_bytes_from_start,
-        kind,
-        progress,
-        should_interrupt,
-    )
-}
-
-/// Similar to [`bytes_of_file`], but operates on a stream of bytes.
-#[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))]
-pub fn bytes(
-    read: &mut dyn std::io::Read,
-    num_bytes_from_start: u64,
-    kind: gix_hash::Kind,
-    progress: &mut dyn crate::progress::Progress,
-    should_interrupt: &std::sync::atomic::AtomicBool,
-) -> std::io::Result<gix_hash::ObjectId> {
-    bytes_with_hasher(read, num_bytes_from_start, hasher(kind), progress, should_interrupt)
-}
-
-/// Similar to [`bytes()`], but takes a `hasher` instead of a hash kind.
-#[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))]
-pub fn bytes_with_hasher(
-    read: &mut dyn std::io::Read,
-    num_bytes_from_start: u64,
-    mut hasher: Hasher,
-    progress: &mut dyn crate::progress::Progress,
-    should_interrupt: &std::sync::atomic::AtomicBool,
-) -> std::io::Result<gix_hash::ObjectId> {
-    let start = std::time::Instant::now();
-    // init progress before the possibility for failure, as convenience in case people want to recover
-    progress.init(
-        Some(num_bytes_from_start as prodash::progress::Step),
-        crate::progress::bytes(),
-    );
-
-    const BUF_SIZE: usize = u16::MAX as usize;
-    let mut buf = [0u8; BUF_SIZE];
-    let mut bytes_left = num_bytes_from_start;
-
-    while bytes_left > 0 {
-        let out = &mut buf[..BUF_SIZE.min(bytes_left as usize)];
-        read.read_exact(out)?;
-        bytes_left -= out.len() as u64;
-        progress.inc_by(out.len());
-        hasher.update(out);
-        if should_interrupt.load(std::sync::atomic::Ordering::SeqCst) {
-            return Err(std::io::Error::new(std::io::ErrorKind::Other, "Interrupted"));
-        }
-    }
-
-    let id = gix_hash::ObjectId::from(hasher.digest());
-    progress.show_throughput(start);
-    Ok(id)
-}
-
-#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
-mod write {
-    use crate::hash::Hasher;
-
-    /// A utility to automatically generate a hash while writing into an inner writer.
-    pub struct Write<T> {
-        /// The hash implementation.
-        pub hash: Hasher,
-        /// The inner writer.
-        pub inner: T,
-    }
-
-    impl<T> std::io::Write for Write<T>
-    where
-        T: std::io::Write,
-    {
-        fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
-            let written = self.inner.write(buf)?;
-            self.hash.update(&buf[..written]);
-            Ok(written)
-        }
-
-        fn flush(&mut self) -> std::io::Result<()> {
-            self.inner.flush()
-        }
-    }
-
-    impl<T> Write<T>
-    where
-        T: std::io::Write,
-    {
-        /// Create a new hash writer which hashes all bytes written to `inner` with a hash of `kind`.
-        pub fn new(inner: T, object_hash: gix_hash::Kind) -> Self {
-            match object_hash {
-                gix_hash::Kind::Sha1 => Write {
-                    inner,
-                    hash: Hasher::default(),
-                },
-            }
-        }
-    }
-}
-#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
-pub use write::Write;
@@ -16,13 +16,19 @@ doctest = false
 test = false
 
 [features]
+# Temporary, to avoid a circular dependency on `gix-features`.
+progress-unit-bytes = ["prodash/unit-bytes"]
 ## Data structures implement `serde::Serialize` and `serde::Deserialize`.
 serde = ["dep:serde"]
 
 [dependencies]
+# Temporary, to avoid a circular dependency on `gix-features`.
+prodash = "29.0.1"
+
 thiserror = "2.0.0"
 faster-hex = { version = "0.9.0" }
 serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"] }
+sha1-checked = { version = "0.10.0", default-features = false }
 
 document-features = { version = "0.2.0", optional = true }