diff --git a/cargo-smart-release/src/command/release/git.rs b/cargo-smart-release/src/command/release/git.rs index 9fc6c184bed..7552368add1 100644 --- a/cargo-smart-release/src/command/release/git.rs +++ b/cargo-smart-release/src/command/release/git.rs @@ -65,7 +65,7 @@ pub(in crate::command::release_impl) fn create_version_tag<'repo>( tag_name, target, git_repository::objs::Kind::Commit, - Some(&crate::git::author()?.to_ref()), + Some(crate::git::author()?.to_ref()), message, constraint, )?; diff --git a/cargo-smart-release/src/utils.rs b/cargo-smart-release/src/utils.rs index 8f9b80b974b..76a29a0cd93 100644 --- a/cargo-smart-release/src/utils.rs +++ b/cargo-smart-release/src/utils.rs @@ -52,7 +52,7 @@ pub fn is_pre_release_version(semver: &Version) -> bool { pub fn is_top_level_package(manifest_path: &Utf8Path, repo: &git::Repository) -> bool { manifest_path - .strip_prefix(repo.work_tree().as_ref().expect("repo with working tree")) + .strip_prefix(repo.work_dir().as_ref().expect("repo with working tree")) .map_or(false, |p| p.components().count() == 1) } diff --git a/experiments/traversal/src/main.rs b/experiments/traversal/src/main.rs index 1ea917b7668..ff1f880eec1 100644 --- a/experiments/traversal/src/main.rs +++ b/experiments/traversal/src/main.rs @@ -209,7 +209,7 @@ where for commit in commits { let tree_id = db .try_find(commit, &mut buf)? - .and_then(|o| o.try_into_commit_iter().and_then(|mut c| c.tree_id())) + .and_then(|o| o.try_into_commit_iter().and_then(|mut c| c.tree_id().ok())) .expect("commit as starting point"); let mut count = Count { entries: 0, seen }; diff --git a/git-actor/Cargo.toml b/git-actor/Cargo.toml index 402b7d4c57c..414454c27be 100644 --- a/git-actor/Cargo.toml +++ b/git-actor/Cargo.toml @@ -22,7 +22,7 @@ local-time-support = ["git-features/time"] git-features = { version = "^0.19.1", path = "../git-features", optional = true } quick-error = "2.0.0" btoi = "0.4.2" -bstr = { version = "0.2.13", default-features = false, features = ["std"]} +bstr = { version = "0.2.13", default-features = false, features = ["std", "unicode"]} nom = { version = "7", default-features = false, features = ["std"]} itoa = "1.0.1" serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"]} diff --git a/git-actor/src/lib.rs b/git-actor/src/lib.rs index 884c8f988b0..647dcc27b89 100644 --- a/git-actor/src/lib.rs +++ b/git-actor/src/lib.rs @@ -32,7 +32,7 @@ pub struct Signature { /// A immutable signature is created by an actor at a certain time. /// /// Note that this is not a cryptographical signature. -#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy, Default)] #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] pub struct SignatureRef<'a> { /// The actor's name. diff --git a/git-actor/src/signature/mod.rs b/git-actor/src/signature/mod.rs index 08e3c8e05d4..5295c4dca6c 100644 --- a/git-actor/src/signature/mod.rs +++ b/git-actor/src/signature/mod.rs @@ -1,5 +1,6 @@ mod _ref { use crate::{signature::decode, Signature, SignatureRef}; + use bstr::ByteSlice; impl<'a> SignatureRef<'a> { /// Deserialize a signature from the given `data`. @@ -18,6 +19,15 @@ mod _ref { time: self.time, } } + + /// Trim whitespace surrounding the name and email and return a new signature. + pub fn trim(&self) -> SignatureRef<'a> { + SignatureRef { + name: self.name.trim().as_bstr(), + email: self.email.trim().as_bstr(), + time: self.time, + } + } } } diff --git a/git-actor/src/time.rs b/git-actor/src/time.rs index 47c924a24fd..e538e5b9e4a 100644 --- a/git-actor/src/time.rs +++ b/git-actor/src/time.rs @@ -23,6 +23,20 @@ impl Default for Time { } impl Time { + /// Create a new instance from seconds and offset. + pub fn new(seconds_since_unix_epoch: u32, offset_in_seconds: i32) -> Self { + Time { + seconds_since_unix_epoch, + offset_in_seconds, + sign: offset_in_seconds.into(), + } + } + + /// Return the passed seconds since epoch since this signature was made. + pub fn seconds(&self) -> u32 { + self.seconds_since_unix_epoch + } + /// Serialize this instance to `out` in a format suitable for use in header fields of serialized git commits or tags. pub fn write_to(&self, mut out: impl io::Write) -> io::Result<()> { let mut itoa = itoa::Buffer::new(); diff --git a/git-actor/tests/signature/mod.rs b/git-actor/tests/signature/mod.rs index 1c77382442d..a351319db7e 100644 --- a/git-actor/tests/signature/mod.rs +++ b/git-actor/tests/signature/mod.rs @@ -54,12 +54,20 @@ mod write_to { use bstr::ByteSlice; use git_actor::Signature; +#[test] +fn trim() { + let sig = git_actor::SignatureRef::from_bytes::<()>(b" \t hello there \t < \t email \t > 1 -0030").unwrap(); + let sig = sig.trim(); + assert_eq!(sig.name, "hello there"); + assert_eq!(sig.email, "email"); +} + #[test] fn round_trip() -> Result<(), Box> { for input in &[ &b"Sebastian Thiel 1 -0030"[..], ".. ☺️Sebastian 王知明 Thiel🙌 .. 1528473343 +0230".as_bytes(), - ".. whitespace \t is explicitly allowed - unicode aware trimming must be done elsewhere 1528473343 +0230" + ".. whitespace \t is explicitly allowed - unicode aware trimming must be done elsewhere 1528473343 +0230" .as_bytes(), ] { let signature: Signature = git_actor::SignatureRef::from_bytes::<()>(input)?.into(); diff --git a/git-mailmap/src/snapshot.rs b/git-mailmap/src/snapshot.rs index ec63aa9f4bd..e61ea56ce80 100644 --- a/git-mailmap/src/snapshot.rs +++ b/git-mailmap/src/snapshot.rs @@ -14,7 +14,15 @@ pub struct ResolvedSignature<'a> { } impl<'a> ResolvedSignature<'a> { - fn try_new(new_email: Option<&'a BString>, new_name: Option<&'a BString>) -> Option { + fn try_new( + new_email: Option<&'a BString>, + matched_email: &'a BStr, + current_email: &'_ BStr, + new_name: Option<&'a BString>, + ) -> Option { + let new_email = new_email + .map(|n| n.as_bstr()) + .or_else(|| (matched_email != current_email).then(|| matched_email)); match (new_email, new_name) { (None, None) => None, (new_email, new_name) => Some(ResolvedSignature { @@ -244,8 +252,12 @@ impl Snapshot { /// Try to resolve `signature` by its contained email and name and provide resolved/mapped names as reference. /// Return `None` if no such mapping was found. /// + /// Note that opposed to what git seems to do, we also normalize the case of email addresses to match the one + /// given in the mailmap. That is, if `Alex@example.com` is the current email, it will be matched and replaced with + /// `alex@example.com`. This leads to better mapping results and saves entries in the mailmap. + /// /// This is the fastest possible lookup as there is no allocation. - pub fn try_resolve_ref<'a>(&'a self, signature: &git_actor::SignatureRef<'_>) -> Option> { + pub fn try_resolve_ref<'a>(&'a self, signature: git_actor::SignatureRef<'_>) -> Option> { let email: EncodedStringRef<'_> = signature.email.into(); let pos = self .entries_by_old_email @@ -257,10 +269,20 @@ impl Snapshot { match entry.entries_by_old_name.binary_search_by(|e| e.old_name.cmp_ref(name)) { Ok(pos) => { - let entry = &entry.entries_by_old_name[pos]; - ResolvedSignature::try_new(entry.new_email.as_ref(), entry.new_name.as_ref()) + let name_entry = &entry.entries_by_old_name[pos]; + ResolvedSignature::try_new( + name_entry.new_email.as_ref(), + entry.old_email.as_bstr(), + signature.email, + name_entry.new_name.as_ref(), + ) } - Err(_) => ResolvedSignature::try_new(entry.new_email.as_ref(), entry.new_name.as_ref()), + Err(_) => ResolvedSignature::try_new( + entry.new_email.as_ref(), + entry.old_email.as_bstr(), + signature.email, + entry.new_name.as_ref(), + ), } } @@ -268,7 +290,7 @@ impl Snapshot { /// with the mapped name and/or email replaced accordingly. /// /// Return `None` if no such mapping was found. - pub fn try_resolve(&self, signature: &git_actor::SignatureRef<'_>) -> Option { + pub fn try_resolve(&self, signature: git_actor::SignatureRef<'_>) -> Option { let new = self.try_resolve_ref(signature)?; enriched_signature(signature, new) } @@ -277,16 +299,15 @@ impl Snapshot { /// of `signature` if no mapping was found. /// /// Note that this method will always allocate. - pub fn resolve(&self, signature: &git_actor::SignatureRef<'_>) -> git_actor::Signature { + pub fn resolve(&self, signature: git_actor::SignatureRef<'_>) -> git_actor::Signature { self.try_resolve(signature).unwrap_or_else(|| signature.to_owned()) } } fn enriched_signature( - SignatureRef { name, email, time }: &SignatureRef<'_>, + SignatureRef { name, email, time }: SignatureRef<'_>, new: ResolvedSignature<'_>, ) -> Option { - let time = *time; match (new.email, new.name) { (Some(new_email), Some(new_name)) => git_actor::Signature { email: new_email.to_owned(), diff --git a/git-mailmap/tests/snapshot/mod.rs b/git-mailmap/tests/snapshot/mod.rs index db52a924707..91aa860973f 100644 --- a/git-mailmap/tests/snapshot/mod.rs +++ b/git-mailmap/tests/snapshot/mod.rs @@ -5,49 +5,49 @@ use git_testtools::fixture_bytes; fn try_resolve() { let snapshot = Snapshot::from_bytes(&fixture_bytes("typical.txt")); assert_eq!( - snapshot.try_resolve(&signature("Foo", "Joe@example.com").to_ref()), - Some(signature("Joe R. Developer", "Joe@example.com")), - "resolved signatures contain all original fields, but normalizes only what's in the mapping, lookup is case-insensitive" + snapshot.try_resolve(signature("Foo", "Joe@example.com").to_ref()), + Some(signature("Joe R. Developer", "joe@example.com")), + "resolved signatures contain all original fields, and normalize the email as well to match the one that it was looked up with" ); assert_eq!( - snapshot.try_resolve(&signature("Joe", "bugs@example.com").to_ref()), + snapshot.try_resolve(signature("Joe", "bugs@example.com").to_ref()), Some(signature("Joe R. Developer", "joe@example.com")), "name and email can be mapped specifically" ); assert_eq!( - snapshot.try_resolve(&signature("Jane", "jane@laptop.(none)").to_ref()), + snapshot.try_resolve(signature("Jane", "jane@laptop.(none)").to_ref()), Some(signature("Jane Doe", "jane@example.com")), "fix name and email by email" ); assert_eq!( - snapshot.try_resolve(&signature("Jane", "jane@desktop.(none)").to_ref()), + snapshot.try_resolve(signature("Jane", "jane@desktop.(none)").to_ref()), Some(signature("Jane Doe", "jane@example.com")), "fix name and email by other email" ); assert_eq!( - snapshot.try_resolve(&signature("janE", "Bugs@example.com").to_ref()), + snapshot.try_resolve(signature("janE", "Bugs@example.com").to_ref()), Some(signature("Jane Doe", "jane@example.com")), "name and email can be mapped specifically, case insensitive matching of name" ); let sig = signature("Jane", "other@example.com"); - assert_eq!(snapshot.try_resolve(&sig.to_ref()), None, "unmatched email"); + assert_eq!(snapshot.try_resolve(sig.to_ref()), None, "unmatched email"); assert_eq!( - snapshot.resolve(&sig.to_ref()), + snapshot.resolve(sig.to_ref()), sig, "resolution always works here, returning a copy of the original" ); let sig = signature("Jean", "bugs@example.com"); assert_eq!( - snapshot.try_resolve(&sig.to_ref()), + snapshot.try_resolve(sig.to_ref()), None, "matched email, unmatched name" ); - assert_eq!(snapshot.resolve(&sig.to_ref()), sig); + assert_eq!(snapshot.resolve(sig.to_ref()), sig); assert_eq!(snapshot.entries().len(), 5); } @@ -56,7 +56,7 @@ fn try_resolve() { fn non_name_and_name_mappings_will_not_clash() { let entries = vec![ // add mapping from email - git_mailmap::Entry::change_name_and_email_by_email("new-name", "new-email", "old-email"), + git_mailmap::Entry::change_name_by_email("new-name", "old-email"), // add mapping from name and email git_mailmap::Entry::change_name_and_email_by_name_and_email( "other-new-name", @@ -69,12 +69,12 @@ fn non_name_and_name_mappings_will_not_clash() { let snapshot = Snapshot::new(entries); assert_eq!( - snapshot.try_resolve(&signature("replace-by-email", "old-email").to_ref()), - Some(signature("new-name", "new-email")), - "it can match by email only" + snapshot.try_resolve(signature("replace-by-email", "Old-Email").to_ref()), + Some(signature("new-name", "old-email")), + "it can match by email only, and the email is normalized" ); assert_eq!( - snapshot.try_resolve(&signature("old-name", "old-email").to_ref()), + snapshot.try_resolve(signature("old-name", "Old-Email").to_ref()), Some(signature("other-new-name", "other-new-email")), "it can match by email and name as well" ); @@ -87,25 +87,25 @@ fn non_name_and_name_mappings_will_not_clash() { fn overwrite_entries() { let snapshot = Snapshot::from_bytes(&fixture_bytes("overwrite.txt")); assert_eq!( - snapshot.try_resolve(&signature("does not matter", "old-a-email").to_ref()), + snapshot.try_resolve(signature("does not matter", "old-a-email").to_ref()), Some(signature("A-overwritten", "old-a-email")), "email only by email" ); assert_eq!( - snapshot.try_resolve(&signature("to be replaced", "old-b-EMAIL").to_ref()), + snapshot.try_resolve(signature("to be replaced", "old-b-EMAIL").to_ref()), Some(signature("B-overwritten", "new-b-email-overwritten")), "name and email by email" ); assert_eq!( - snapshot.try_resolve(&signature("old-c", "old-C-email").to_ref()), + snapshot.try_resolve(signature("old-c", "old-C-email").to_ref()), Some(signature("C-overwritten", "new-c-email-overwritten")), "name and email by name and email" ); assert_eq!( - snapshot.try_resolve(&signature("unchanged", "old-d-email").to_ref()), + snapshot.try_resolve(signature("unchanged", "old-d-email").to_ref()), Some(signature("unchanged", "new-d-email-overwritten")), "email by email" ); diff --git a/git-object/src/commit/mod.rs b/git-object/src/commit/mod.rs index 5b84b41ab6d..9b4b999b9ab 100644 --- a/git-object/src/commit/mod.rs +++ b/git-object/src/commit/mod.rs @@ -48,10 +48,29 @@ impl<'a> CommitRef<'a> { crate::commit::ExtraHeaders::new(self.extra_headers.iter().map(|(k, v)| (*k, v.as_ref()))) } + /// Return the author, with whitespace trimmed. + /// + /// This is different from the `author` field which may contain whitespace. + pub fn author(&self) -> git_actor::SignatureRef<'a> { + self.author.trim() + } + + /// Return the committer, with whitespace trimmed. + /// + /// This is different from the `committer` field which may contain whitespace. + pub fn committer(&self) -> git_actor::SignatureRef<'a> { + self.committer.trim() + } + /// Returns a partially parsed message from which more information can be derived. pub fn message(&self) -> MessageRef<'a> { MessageRef::from_bytes(self.message) } + + /// Returns the time at which this commit was created. + pub fn time(&self) -> git_actor::Time { + self.committer.time + } } impl Commit { diff --git a/git-object/src/commit/ref_iter.rs b/git-object/src/commit/ref_iter.rs index 22b5e06e895..d934c84ce99 100644 --- a/git-object/src/commit/ref_iter.rs +++ b/git-object/src/commit/ref_iter.rs @@ -17,6 +17,7 @@ pub(crate) enum SignatureKind { Committer, } +#[derive(Copy, Clone)] pub(crate) enum State { Tree, Parents, @@ -48,8 +49,19 @@ impl<'a> CommitRefIter<'a> { /// Errors are coerced into options, hiding whether there was an error or not. The caller should assume an error if they /// call the method as intended. Such a squelched error cannot be recovered unless the objects data is retrieved and parsed again. /// `next()`. - pub fn tree_id(&mut self) -> Option { - self.next().and_then(Result::ok).and_then(Token::try_into_id) + pub fn tree_id(&mut self) -> Result { + let tree_id = self.next().ok_or_else(missing_field)??; + Token::try_into_id(tree_id).ok_or_else(missing_field) + } + + /// Return all parent_ids as iterator. + /// + /// Parsing errors are ignored quietly. + pub fn parent_ids(self) -> impl Iterator + 'a { + self.filter_map(|t| match t { + Ok(Token::Parent { id }) => Some(id), + _ => None, + }) } /// Returns all signatures, first the author, then the committer, if there is no decoding error. @@ -57,7 +69,7 @@ impl<'a> CommitRefIter<'a> { /// Errors are coerced into options, hiding whether there was an error or not. The caller knows if there was an error or not /// if not exactly two signatures were iterable. /// Errors are not the common case - if an error needs to be detectable, use this instance as iterator. - pub fn signatures(&'a mut self) -> impl Iterator> + 'a { + pub fn signatures(self) -> impl Iterator> + 'a { self.filter_map(|t| match t { Ok(Token::Author { signature }) | Ok(Token::Committer { signature }) => Some(signature), _ => None, @@ -66,12 +78,46 @@ impl<'a> CommitRefIter<'a> { /// Returns the committer signature if there is no decoding error. /// Errors are coerced into options, hiding whether there was an error or not. The caller knows if there was an error or not. - pub fn committer(&mut self) -> Option> { + pub fn committer(mut self) -> Result, crate::decode::Error> { self.find_map(|t| match t { - Ok(Token::Committer { signature }) => Some(signature), + Ok(Token::Committer { signature }) => Some(Ok(signature)), + Err(err) => Some(Err(err)), _ => None, }) + .ok_or_else(missing_field)? } + + /// Returns the author signature if there is no decoding error. + /// + /// It may contain white space surrounding it, and is exactly as parsed. + /// Errors are coerced into options, hiding whether there was an error or not. The caller knows if there was an error or not. + pub fn author(mut self) -> Result, crate::decode::Error> { + self.find_map(|t| match t { + Ok(Token::Author { signature }) => Some(Ok(signature)), + Err(err) => Some(Err(err)), + _ => None, + }) + .ok_or_else(missing_field)? + } + + /// Returns the message if there is no decoding error. + /// + /// It may contain white space surrounding it, and is exactly as + // parsed. + /// Errors are coerced into options, hiding whether there was an error or not. The caller knows if there was an error or not. + pub fn message(mut self) -> Result<&'a BStr, crate::decode::Error> { + self.find_map(|t| match t { + Ok(Token::Message(msg)) => Some(Ok(msg)), + Err(err) => Some(Err(err)), + _ => None, + }) + .transpose() + .map(|msg| msg.unwrap_or_default()) + } +} + +fn missing_field() -> crate::decode::Error { + crate::decode::empty_error() } impl<'a> CommitRefIter<'a> { diff --git a/git-object/src/lib.rs b/git-object/src/lib.rs index 3e30aedb958..06d8caa79c7 100644 --- a/git-object/src/lib.rs +++ b/git-object/src/lib.rs @@ -76,9 +76,13 @@ pub struct CommitRef<'a> { pub tree: &'a BStr, /// HEX hash of each parent commit. Empty for first commit in repository. pub parents: SmallVec<[&'a BStr; 2]>, - /// Who wrote this commit. + /// Who wrote this commit. Name and email might contain whitespace and are not trimmed to ensure round-tripping. + /// + /// Use the [`author()`][CommitRef::author()] method to received a trimmed version of it. pub author: git_actor::SignatureRef<'a>, - /// Who committed this commit. + /// Who committed this commit. Name and email might contain whitespace and are not trimmed to ensure round-tripping. + /// + /// Use the [`committer()`][CommitRef::committer()] method to received a trimmed version of it. /// /// This may be different from the `author` in case the author couldn't write to the repository themselves and /// is commonly encountered with contributed commits. @@ -93,6 +97,7 @@ pub struct CommitRef<'a> { /// Like [`CommitRef`][crate::CommitRef], but as `Iterator` to support (up to) entirely allocation free parsing. /// It's particularly useful to traverse the commit graph without ever allocating arrays for parents. +#[derive(Copy, Clone)] pub struct CommitRefIter<'a> { data: &'a [u8], state: commit::ref_iter::State, @@ -212,10 +217,8 @@ pub struct TreeRef<'a> { /// A directory snapshot containing files (blobs), directories (trees) and submodules (commits), lazily evaluated. #[derive(Default, PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] -#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] pub struct TreeRefIter<'a> { /// The directories and files contained in this tree. - #[cfg_attr(feature = "serde1", serde(borrow))] data: &'a [u8], } @@ -254,6 +257,12 @@ pub mod decode { /// The owned type to be used for parse errors. pub type ParseErrorOwned = nom::error::VerboseError; + pub(crate) fn empty_error() -> Error { + Error { + inner: nom::error::VerboseError:: { errors: Vec::new() }, + } + } + /// A type to indicate errors during parsing and to abstract away details related to `nom`. #[derive(Debug, Clone)] pub struct Error { @@ -293,6 +302,10 @@ pub mod decode { /// The owned type to be used for parse errors, discards everything and is zero size pub type ParseErrorOwned = (); + pub(crate) fn empty_error() -> Error { + Error { inner: () } + } + /// A type to indicate errors during parsing and to abstract away details related to `nom`. #[derive(Debug, Clone)] pub struct Error { @@ -317,6 +330,7 @@ pub mod decode { } } } + pub(crate) use _decode::empty_error; pub use _decode::{Error, ParseError, ParseErrorOwned}; impl std::error::Error for Error {} diff --git a/git-object/src/tag/ref_iter.rs b/git-object/src/tag/ref_iter.rs index 7cf5250417e..94e6f654919 100644 --- a/git-object/src/tag/ref_iter.rs +++ b/git-object/src/tag/ref_iter.rs @@ -39,11 +39,16 @@ impl<'a> TagRefIter<'a> { /// Errors are coerced into options, hiding whether there was an error or not. The caller should assume an error if they /// call the method as intended. Such a squelched error cannot be recovered unless the objects data is retrieved and parsed again. /// `next()`. - pub fn target_id(&mut self) -> Option { - self.next().and_then(Result::ok).and_then(Token::into_id) + pub fn target_id(&mut self) -> Result { + let token = self.next().ok_or_else(missing_field)??; + Token::into_id(token).ok_or_else(missing_field) } } +fn missing_field() -> crate::decode::Error { + crate::decode::empty_error() +} + impl<'a> TagRefIter<'a> { fn next_inner(i: &'a [u8], state: &mut State) -> Result<(&'a [u8], Token<'a>), crate::decode::Error> { use State::*; diff --git a/git-object/tests/immutable/commit/from_bytes.rs b/git-object/tests/immutable/commit/from_bytes.rs index 79495912935..d483f9f92bc 100644 --- a/git-object/tests/immutable/commit/from_bytes.rs +++ b/git-object/tests/immutable/commit/from_bytes.rs @@ -151,7 +151,7 @@ fn with_trailer() -> crate::Result { CommitRef { tree: b"25a19c29c5e36884c1ad85d8faf23f1246b7961b".as_bstr(), parents: SmallVec::from(vec![b"699ae71105dddfcbb9711ed3a92df09e91a04e90".as_bstr()]), - author: kim.clone(), + author: kim, committer: kim, encoding: None, message: b"test: use gitoxide for link-git-protocol tests diff --git a/git-object/tests/immutable/commit/iter.rs b/git-object/tests/immutable/commit/iter.rs index 7aa937e6a09..bbb512a75d6 100644 --- a/git-object/tests/immutable/commit/iter.rs +++ b/git-object/tests/immutable/commit/iter.rs @@ -24,9 +24,10 @@ fn newline_right_after_signature_multiline_header() -> crate::Result { #[test] fn signed_with_encoding() -> crate::Result { + let input = fixture_bytes("commit", "signed-with-encoding.txt"); + let iter = CommitRefIter::from_bytes(&input); assert_eq!( - CommitRefIter::from_bytes(&fixture_bytes("commit", "signed-with-encoding.txt")) - .collect::, _>>()?, + iter.collect::, _>>()?, vec![ Token::Tree { id: hex_to_id("1973afa74d87b2bb73fa884aaaa8752aec43ea88") @@ -45,6 +46,9 @@ fn signed_with_encoding() -> crate::Result { Token::Message(b"encoding & sig".as_bstr()), ] ); + + assert_eq!(iter.author().ok(), Some(signature(1592448995))); + assert_eq!(iter.committer().ok(), Some(signature(1592449083))); Ok(()) } @@ -112,6 +116,12 @@ fn signed_singleline() -> crate::Result { Token::Message(b"update tasks\n".as_bstr()), ] ); + assert_eq!( + CommitRefIter::from_bytes(&fixture_bytes("commit", "signed-singleline.txt")) + .parent_ids() + .collect::>(), + vec![hex_to_id("09d8d3a12e161a7f6afb522dbe8900a9c09bce06")] + ); Ok(()) } @@ -129,8 +139,10 @@ fn error_handling() -> crate::Result { #[test] fn mergetag() -> crate::Result { + let input = fixture_bytes("commit", "mergetag.txt"); + let iter = CommitRefIter::from_bytes(&input); assert_eq!( - CommitRefIter::from_bytes(&fixture_bytes("commit", "mergetag.txt")).collect::, _>>()?, + iter.collect::, _>>()?, vec![ Token::Tree { id: hex_to_id("1c61918031bf2c7fab9e17dde3c52a6a9884fcb5") @@ -148,9 +160,17 @@ fn mergetag() -> crate::Result { signature: linus_signature(1591996221) }, Token::ExtraHeader((b"mergetag".as_bstr(), MERGE_TAG.as_bytes().as_bstr().into())), - Token::Message(LONG_MESSAGE.as_bytes().as_bstr()), + Token::Message(LONG_MESSAGE.into()), + ] + ); + assert_eq!( + iter.parent_ids().collect::>(), + vec![ + hex_to_id("44ebe016df3aad96e3be8f95ec52397728dd7701"), + hex_to_id("8d485da0ddee79d0e6713405694253d401e41b93") ] ); + assert_eq!(iter.message().ok(), Some(LONG_MESSAGE.into())); Ok(()) } @@ -164,27 +184,31 @@ mod method { #[test] fn tree_id() -> crate::Result { + let input = fixture_bytes("commit", "unsigned.txt"); + let iter = CommitRefIter::from_bytes(&input); assert_eq!( - CommitRefIter::from_bytes(&fixture_bytes("commit", "unsigned.txt")).tree_id(), + iter.clone().tree_id().ok(), Some(hex_to_id("1b2dfb4ac5e42080b682fc676e9738c94ce6d54d")) ); assert_eq!( - CommitRefIter::from_bytes(&fixture_bytes("commit", "unsigned.txt")) - .signatures() - .collect::>(), + iter.signatures().collect::>(), vec![signature(1592437401), signature(1592437401)] ); + assert_eq!(iter.parent_ids().count(), 0); Ok(()) } #[test] fn signatures() -> crate::Result { + let input = fixture_bytes("commit", "unsigned.txt"); + let iter = CommitRefIter::from_bytes(&input); assert_eq!( - CommitRefIter::from_bytes(&fixture_bytes("commit", "unsigned.txt")) - .signatures() - .collect::>(), + iter.signatures().collect::>(), vec![signature(1592437401), signature(1592437401)] ); + assert_eq!(iter.author().ok(), Some(signature(1592437401))); + assert_eq!(iter.committer().ok(), Some(signature(1592437401))); + assert_eq!(iter.author().ok(), Some(signature(1592437401)), "it's not consuming"); Ok(()) } } diff --git a/git-object/tests/immutable/commit/message.rs b/git-object/tests/immutable/commit/message.rs index 0a5bf0e79dc..f0ef655e017 100644 --- a/git-object/tests/immutable/commit/message.rs +++ b/git-object/tests/immutable/commit/message.rs @@ -245,7 +245,7 @@ mod summary { CommitRef { tree: "tree".into(), parents: Default::default(), - author: actor.clone(), + author: actor, committer: actor, encoding: None, message: input.as_bstr(), diff --git a/git-ref/src/fullname.rs b/git-ref/src/fullname.rs index 041aaa13f09..2e577bf1a60 100644 --- a/git-ref/src/fullname.rs +++ b/git-ref/src/fullname.rs @@ -105,6 +105,18 @@ impl FullName { } self } + + /// Strip well-known prefixes from the name and return it. + /// + /// If there is no such prefix, the original name is returned. + pub fn shorten(&self) -> &BStr { + self.to_ref().shorten() + } + + /// Classify this name, or return `None` if it's unclassified. + pub fn category(&self) -> Option { + self.to_ref().category() + } } impl<'a> FullNameRef<'a> { diff --git a/git-ref/src/lib.rs b/git-ref/src/lib.rs index c19e75cc355..00c3966f441 100644 --- a/git-ref/src/lib.rs +++ b/git-ref/src/lib.rs @@ -118,6 +118,22 @@ pub enum Kind { Symbolic, } +/// The various known categories of references. +/// +/// This translates into a prefix containing all references of a given category. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] +pub enum Category { + /// A tag in `refs/tags` + Tag, + /// A branch in `refs/heads` + LocalBranch, + /// A branch in `refs/remotes` + RemoteBranch, + /// A tag in `refs/notes` + Note, + // NOTE: when adding something here, add it to `kind()` and `strip_prefix()` too. +} + /// Denotes a ref target, equivalent to [`Kind`], but with mutable data. #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] pub enum Target { diff --git a/git-ref/src/name.rs b/git-ref/src/name.rs index 09c99056fce..620f7cfba73 100644 --- a/git-ref/src/name.rs +++ b/git-ref/src/name.rs @@ -6,11 +6,23 @@ use std::{ use git_object::bstr::{BStr, BString, ByteSlice, ByteVec}; -use crate::{FullNameRef, PartialNameRef}; +use crate::{Category, FullNameRef, PartialNameRef}; /// The error used in the [`PartialNameRef`][super::PartialNameRef]::try_from(…) implementations. pub type Error = git_validate::reference::name::Error; +impl Category { + /// Return the prefix that would contain all references of our kind. + pub fn prefix(&self) -> &BStr { + match self { + Category::Tag => b"refs/tags/".as_bstr(), + Category::LocalBranch => b"refs/heads/".as_bstr(), + Category::RemoteBranch => b"refs/remotes/".as_bstr(), + Category::Note => b"refs/notes/".as_bstr(), + } + } +} + impl<'a> FullNameRef<'a> { /// Convert this name into the relative path identifying the reference location. pub fn to_path(self) -> &'a Path { @@ -21,6 +33,34 @@ impl<'a> FullNameRef<'a> { pub fn as_bstr(&self) -> &'a BStr { self.0 } + + /// Strip well-known prefixes from the name and return it. + /// + /// If there is no such prefix, the original name is returned. + pub fn shorten(&self) -> &'a BStr { + let n = self.as_bstr(); + n.strip_prefix(b"refs/tags/") + .or_else(|| n.strip_prefix(b"refs/heads/")) + .or_else(|| n.strip_prefix(b"refs/remotes/")) + .or_else(|| n.strip_prefix(b"refs/")) + .map(|n| n.as_bstr()) + .unwrap_or(n) + } + + /// Classify this name, or return `None` if it's unclassified. + pub fn category(&self) -> Option { + for kind in &[ + Category::Tag, + Category::LocalBranch, + Category::RemoteBranch, + Category::Note, + ] { + if self.0.starts_with(kind.prefix()) { + return (*kind).into(); + } + } + None + } } impl<'a> PartialNameRef<'a> { diff --git a/git-ref/src/store/file/raw_ext.rs b/git-ref/src/store/file/raw_ext.rs index 2add07d3aa4..dfc0e7bc14d 100644 --- a/git-ref/src/store/file/raw_ext.rs +++ b/git-ref/src/store/file/raw_ext.rs @@ -118,7 +118,7 @@ impl ReferenceExt for Reference { })?; match kind { git_object::Kind::Tag => { - oid = git_object::TagRefIter::from_bytes(data).target_id().ok_or_else(|| { + oid = git_object::TagRefIter::from_bytes(data).target_id().map_err(|_err| { peel::to_id::Error::NotFound { oid, name: self.name.0.clone(), diff --git a/git-ref/src/store/packed/transaction.rs b/git-ref/src/store/packed/transaction.rs index a2238c73331..f23ba93d50d 100644 --- a/git-ref/src/store/packed/transaction.rs +++ b/git-ref/src/store/packed/transaction.rs @@ -68,7 +68,7 @@ impl packed::Transaction { let kind = find(next_id, &mut buf)?; match kind { Some(kind) if kind == git_object::Kind::Tag => { - next_id = git_object::TagRefIter::from_bytes(&buf).target_id().ok_or_else(|| { + next_id = git_object::TagRefIter::from_bytes(&buf).target_id().map_err(|_| { prepare::Error::Resolve( format!("Couldn't get target object id from tag {}", next_id).into(), ) diff --git a/git-ref/tests/fullname/mod.rs b/git-ref/tests/fullname/mod.rs index e2cc9c2356e..7208f5ced88 100644 --- a/git-ref/tests/fullname/mod.rs +++ b/git-ref/tests/fullname/mod.rs @@ -1,3 +1,4 @@ +use git_ref::Category; use std::convert::TryInto; #[test] @@ -5,6 +6,31 @@ fn file_name() { let name: git_ref::FullName = "refs/heads/main".try_into().unwrap(); assert_eq!(name.to_ref().file_name(), "main"); } +#[test] +fn shorten_and_category() { + for (input, expected, category) in [ + ("refs/tags/tag-name", "tag-name", Category::Tag), + ("refs/heads/main", "main", Category::LocalBranch), + ("refs/remotes/origin/main", "origin/main", Category::RemoteBranch), + ("refs/notes/note-name", "notes/note-name", Category::Note), + ] { + let name: git_ref::FullName = input.try_into().unwrap(); + let category = Some(category); + assert_eq!(name.to_ref().shorten(), expected); + assert_eq!(name.shorten(), expected); + assert_eq!(name.category(), category); + assert_eq!(name.to_ref().category(), category); + } + + let special = "HEAD"; + let name: git_ref::FullName = special.try_into().unwrap(); + assert_eq!( + name.shorten(), + special, + "the whole name is returned if there is no prefix" + ); + assert_eq!(name.category(), None); +} #[test] fn prefix_with_namespace_and_stripping() { diff --git a/git-repository/examples/stats.rs b/git-repository/examples/stats.rs index 1450bb8c730..ac88843a75e 100644 --- a/git-repository/examples/stats.rs +++ b/git-repository/examples/stats.rs @@ -2,19 +2,32 @@ use git_repository as git; use git_repository::Reference; fn main() -> Result<(), Box> { - let repo = git::discover(".")?.apply_environment(); + let mut repo = git::discover(".")?.apply_environment(); println!( "Repo: {}", - repo.work_tree().as_deref().unwrap_or(repo.git_dir()).display() + repo.work_dir().as_deref().unwrap_or(repo.git_dir()).display() ); + let mut max_commit_size = 0; + let mut avg_commit_size = 0; + repo.object_cache_size(32 * 1024); let commit_ids = repo .head()? .into_fully_peeled_id() .ok_or_else(|| "There are no commits - nothing to do here.")?? .ancestors() .all() + .inspect(|id| { + if let Ok(Ok(object)) = id.as_ref().map(|id| id.object()) { + avg_commit_size += object.data.len(); + if object.data.len() > max_commit_size { + max_commit_size = object.data.len(); + } + } + }) .collect::, _>>()?; println!("Num Commits: {}", commit_ids.len()); + println!("Max commit Size: {}", max_commit_size); + println!("Avg commit Size: {}", avg_commit_size / commit_ids.len()); assert!(!commit_ids.is_empty(), "checked that before"); let last_commit_id = &commit_ids[0]; diff --git a/git-repository/src/head.rs b/git-repository/src/head.rs index 58138aaefba..0837e677a81 100644 --- a/git-repository/src/head.rs +++ b/git-repository/src/head.rs @@ -124,7 +124,8 @@ pub mod peel { use crate::head::Kind; - mod peel_to_commit { + /// + pub mod to_commit { use crate::object; /// The error returned by [Head::peel_to_commit_in_place()][super::Head::peel_to_commit_in_place()]. @@ -192,14 +193,12 @@ pub mod peel { /// more object to follow, transform the id into a commit if possible and return that. /// /// Returns an error if the head is unborn or if it doesn't point to a commit. - pub fn peel_to_commit_in_place(&mut self) -> Result, peel_to_commit::Error> { - let id = self - .peel_to_id_in_place() - .ok_or_else(|| peel_to_commit::Error::Unborn { - name: self.referent_name().expect("unborn").to_owned(), - })??; + pub fn peel_to_commit_in_place(&mut self) -> Result, to_commit::Error> { + let id = self.peel_to_id_in_place().ok_or_else(|| to_commit::Error::Unborn { + name: self.referent_name().expect("unborn").to_owned(), + })??; id.object() - .map_err(|err| peel_to_commit::Error::Peel(Error::FindExistingObject(err))) + .map_err(|err| to_commit::Error::Peel(Error::FindExistingObject(err))) .and_then(|object| object.try_into_commit().map_err(Into::into)) } diff --git a/git-repository/src/id.rs b/git-repository/src/id.rs index 04ff7300599..b2be98e1333 100644 --- a/git-repository/src/id.rs +++ b/git-repository/src/id.rs @@ -26,15 +26,15 @@ impl<'repo> Id<'repo> { } /// Turn this object id into a shortened id with a length in hex as configured by `core.abbrev`. - pub fn prefix(&self) -> Result { + pub fn shorten(&self) -> Result { let hex_len = self.repo.config_int("core.abbrev", 7); - let hex_len = hex_len.try_into().map_err(|_| prefix::Error::ConfigValue { + let hex_len = hex_len.try_into().map_err(|_| shorten::Error::ConfigValue { actual: hex_len, max_range: self.inner.kind().len_in_hex(), err: None, })?; let prefix = - git_odb::find::PotentialPrefix::new(self.inner, hex_len).map_err(|err| prefix::Error::ConfigValue { + git_odb::find::PotentialPrefix::new(self.inner, hex_len).map_err(|err| shorten::Error::ConfigValue { actual: hex_len as i64, max_range: self.inner.kind().len_in_hex(), err: Some(err), @@ -49,8 +49,8 @@ impl<'repo> Id<'repo> { } /// -pub mod prefix { - /// Returned by [`Id::prefix()`][super::Id::prefix()]. +pub mod shorten { + /// Returned by [`Id::prefix()`][super::Id::shorten()]. #[derive(thiserror::Error, Debug)] #[allow(missing_docs)] pub enum Error { @@ -125,17 +125,18 @@ pub mod ancestors { } /// Return an iterator to traverse all commits in the history of the commit the parent [Id] is pointing to. - pub fn all(&mut self) -> Iter<'_, 'repo> { + pub fn all(&mut self) -> Iter<'repo> { let tips = std::mem::replace(&mut self.tips, Box::new(None.into_iter())); let parents = self.parents; let sorting = self.sorting; + let repo = self.repo; Iter { - repo: self.repo, + repo, inner: Box::new( git_traverse::commit::Ancestors::new( tips, git_traverse::commit::ancestors::State::default(), - move |oid, buf| self.repo.objects.find_commit_iter(oid, buf), + move |oid, buf| repo.objects.find_commit_iter(oid, buf), ) .sorting(sorting) .parents(parents), @@ -145,12 +146,12 @@ pub mod ancestors { } /// The iterator returned by [`Ancestors::all()`]. - pub struct Iter<'a, 'repo> { + pub struct Iter<'repo> { repo: &'repo crate::Repository, - inner: Box> + 'a>, + inner: Box> + 'repo>, } - impl<'a, 'repo> Iterator for Iter<'a, 'repo> { + impl<'repo> Iterator for Iter<'repo> { type Item = Result, git_traverse::commit::ancestors::Error>; fn next(&mut self) -> Option { @@ -165,6 +166,7 @@ mod impls { use git_hash::{oid, ObjectId}; use crate::{DetachedObject, Id, Object}; + // Eq, Hash, Ord, PartialOrd, impl<'a> std::hash::Hash for Id<'a> { diff --git a/git-repository/src/object/commit.rs b/git-repository/src/object/commit.rs index 10e1b0adeab..21da53aea32 100644 --- a/git-repository/src/object/commit.rs +++ b/git-repository/src/object/commit.rs @@ -1,4 +1,4 @@ -use crate::{bstr::BStr, Commit, Tree}; +use crate::{bstr::BStr, Commit, DetachedObject, Tree}; mod error { use crate::object; @@ -9,7 +9,7 @@ mod error { #[error(transparent)] FindExistingObject(#[from] object::find::existing::OdbError), #[error("The commit could not be decoded fully or partially")] - Decode, + Decode(#[from] git_object::decode::Error), #[error("Expected object of type {}, but got {}", .expected, .actual)] ObjectKind { expected: git_object::Kind, @@ -18,42 +18,91 @@ mod error { } } +use crate::id::Ancestors; + pub use error::Error; +impl<'repo> Commit<'repo> { + /// Create an owned instance of this object, copying our data in the process. + pub fn detached(&self) -> DetachedObject { + DetachedObject { + id: self.id, + kind: git_object::Kind::Commit, + data: self.data.clone(), + } + } + + /// Sever the connection to the `Repository` and turn this instance into a standalone object. + pub fn detach(self) -> DetachedObject { + self.into() + } +} + impl<'repo> Commit<'repo> { /// Turn this objects id into a shortened id with a length in hex as configured by `core.abbrev`. - pub fn short_id(&self) -> Result { + pub fn short_id(&self) -> Result { use crate::ext::ObjectIdExt; - self.id.attach(self.repo).prefix() + self.id.attach(self.repo).shorten() } - /// Parse the commits message into a [`MessageRef`][git_object::commit::MessageRef], after decoding the entire commit object. + /// Parse the commits message into a [`MessageRef`][git_object::commit::MessageRef] pub fn message(&self) -> Result, git_object::decode::Error> { - Ok(self.decode()?.message()) + Ok(git_object::commit::MessageRef::from_bytes(self.message_raw()?)) } /// Decode the entire commit object in full and return the raw message bytes. pub fn message_raw(&self) -> Result<&'_ BStr, git_object::decode::Error> { - Ok(self.decode()?.message) + git_object::CommitRefIter::from_bytes(&self.data).message() } /// Decode the commit and obtain the time at which the commit was created. /// /// For the time at which it was authored, refer to `.decode()?.author.time`. - pub fn time(&self) -> Result { - Ok(self.decode()?.committer.time) + pub fn time(&self) -> Result { + Ok(self.committer()?.time) } /// Decode the entire commit object and return it for accessing all commit information. /// - /// Note that the returned commit object doesn't make object lookup easy but should be + /// It will allocate only if there are more than 2 parents. + /// + /// Note that the returned commit object does make lookup easy and should be /// used for successive calls to string-ish information to avoid decoding the object - /// unnecessarily. + /// more than once. pub fn decode(&self) -> Result, git_object::decode::Error> { git_object::CommitRef::from_bytes(&self.data) } + /// Return an iterator over tokens, representing this commit piece by piece. + pub fn iter(&self) -> git_object::CommitRefIter<'_> { + git_object::CommitRefIter::from_bytes(&self.data) + } + + /// Return the commits author, with surrounding whitespace trimmed. + pub fn author(&self) -> Result, git_object::decode::Error> { + git_object::CommitRefIter::from_bytes(&self.data) + .author() + .map(|s| s.trim()) + } + + /// Return the commits committer. with surrounding whitespace trimmed. + pub fn committer(&self) -> Result, git_object::decode::Error> { + git_object::CommitRefIter::from_bytes(&self.data) + .committer() + .map(|s| s.trim()) + } + + /// Decode this commits parent ids on the fly without allocating. + // TODO: tests + pub fn parent_ids(&self) -> impl Iterator> + '_ { + use crate::ext::ObjectIdExt; + let repo = self.repo; + git_object::CommitRefIter::from_bytes(&self.data) + .parent_ids() + .map(move |id| id.attach(repo)) + } + /// Parse the commit and return the the tree object it points to. pub fn tree(&self) -> Result, Error> { - let tree_id = self.tree_id().ok_or(Error::Decode)?; + let tree_id = self.tree_id()?; match self.repo.find_object(tree_id)?.try_into_tree() { Ok(tree) => Ok(tree), Err(crate::object::try_into::Error { actual, expected, .. }) => Err(Error::ObjectKind { actual, expected }), @@ -61,7 +110,18 @@ impl<'repo> Commit<'repo> { } /// Parse the commit and return the the tree id it points to. - pub fn tree_id(&self) -> Option { + pub fn tree_id(&self) -> Result { git_object::CommitRefIter::from_bytes(&self.data).tree_id() } + + /// Return our id own id with connection to this repository. + pub fn id(&self) -> crate::Id<'repo> { + use crate::ext::ObjectIdExt; + self.id.attach(self.repo) + } + + /// Obtain a platform for traversing ancestors of this commit. + pub fn ancestors(&self) -> Ancestors<'repo> { + self.id().ancestors() + } } diff --git a/git-repository/src/object/impls.rs b/git-repository/src/object/impls.rs index c3f5a22d841..8d0c63ec38c 100644 --- a/git-repository/src/object/impls.rs +++ b/git-repository/src/object/impls.rs @@ -9,18 +9,32 @@ impl<'repo> std::fmt::Debug for Object<'repo> { } impl<'repo> From> for DetachedObject { - fn from(r: Object<'repo>) -> Self { - r.into_owned() + fn from(mut v: Object<'repo>) -> Self { + DetachedObject { + id: v.id, + kind: v.kind, + data: std::mem::take(&mut v.data), + } + } +} + +impl<'repo> From> for DetachedObject { + fn from(mut v: Commit<'repo>) -> Self { + DetachedObject { + id: v.id, + kind: git_object::Kind::Commit, + data: std::mem::take(&mut v.data), + } } } impl<'repo> From> for Object<'repo> { - fn from(mut r: Commit<'repo>) -> Self { + fn from(mut v: Commit<'repo>) -> Self { Object { - id: r.id, + id: v.id, kind: git_object::Kind::Commit, - data: steal_from_freelist(&mut r.data), - repo: r.repo, + data: steal_from_freelist(&mut v.data), + repo: v.repo, } } } diff --git a/git-repository/src/object/mod.rs b/git-repository/src/object/mod.rs index 4b5baa7fccb..78232036c68 100644 --- a/git-repository/src/object/mod.rs +++ b/git-repository/src/object/mod.rs @@ -94,7 +94,7 @@ impl<'repo> Object<'repo> { impl<'repo> Object<'repo> { /// Create an owned instance of this object, copying our data in the process. - pub fn to_owned(&self) -> DetachedObject { + pub fn detached(&self) -> DetachedObject { DetachedObject { id: self.id, kind: self.kind, @@ -102,18 +102,7 @@ impl<'repo> Object<'repo> { } } - /// Turn this instance into an owned one, copying our data in the process. - pub fn into_owned(mut self) -> DetachedObject { - DetachedObject { - id: self.id, - kind: self.kind, - data: std::mem::take(&mut self.data), - } - } - - /// Sever the connection to `Easy` and turn this instance into a standalone object. - /// - /// Note that the data buffer will be copied in the process. + /// Sever the connection to the `Repository` and turn this instance into a standalone object. pub fn detach(self) -> DetachedObject { self.into() } diff --git a/git-repository/src/reference/errors.rs b/git-repository/src/reference/errors.rs index 0e25f8fb4b5..ca24ad816b9 100644 --- a/git-repository/src/reference/errors.rs +++ b/git-repository/src/reference/errors.rs @@ -28,6 +28,34 @@ pub mod peel { } } +/// +pub mod head_id { + /// The error returned by [Repository::head_id(…)][crate::Repository::head_id()]. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error(transparent)] + Head(#[from] crate::reference::find::existing::Error), + #[error(transparent)] + PeelToId(#[from] crate::head::peel::Error), + #[error("Branch '{name}' does not have any commits")] + Unborn { name: git_ref::FullName }, + } +} + +/// +pub mod head_commit { + /// The error returned by [Repository::head_commit(…)][crate::Repository::head_commit()]. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error(transparent)] + Head(#[from] crate::reference::find::existing::Error), + #[error(transparent)] + PeelToCommit(#[from] crate::head::peel::to_commit::Error), + } +} + /// pub mod find { /// diff --git a/git-repository/src/reference/iter.rs b/git-repository/src/reference/iter.rs index ea0523d744f..725fa4f8ff1 100644 --- a/git-repository/src/reference/iter.rs +++ b/git-repository/src/reference/iter.rs @@ -43,6 +43,42 @@ impl<'r> Platform<'r> { repo: self.repo, }) } + + // TODO: tests + /// Return an iterator over all references that are tags. + /// + /// They are all prefixed with `refs/tags`. + pub fn tags(&self) -> Result, init::Error> { + Ok(Iter { + inner: self.platform.prefixed("refs/tags/")?, + peel: false, + repo: self.repo, + }) + } + + // TODO: tests + /// Return an iterator over all local branches. + /// + /// They are all prefixed with `refs/heads`. + pub fn local_branches(&self) -> Result, init::Error> { + Ok(Iter { + inner: self.platform.prefixed("refs/heads/")?, + peel: false, + repo: self.repo, + }) + } + + // TODO: tests + /// Return an iterator over all remote branches. + /// + /// They are all prefixed with `refs/remotes`. + pub fn remote_branches(&self) -> Result, init::Error> { + Ok(Iter { + inner: self.platform.prefixed("refs/remotes/")?, + peel: false, + repo: self.repo, + }) + } } impl<'r> Iter<'r> { diff --git a/git-repository/src/reference/mod.rs b/git-repository/src/reference/mod.rs index 7e4e0c1bfd6..13537e733ea 100644 --- a/git-repository/src/reference/mod.rs +++ b/git-repository/src/reference/mod.rs @@ -8,13 +8,14 @@ use crate::{Id, Reference}; pub mod iter; mod errors; - -pub use errors::{edit, find, peel}; +pub use errors::{edit, find, head_commit, head_id, peel}; use crate::ext::ObjectIdExt; pub mod log; +pub use git_ref::{Category, Kind}; + /// Access impl<'repo> Reference<'repo> { /// Return the target to which this reference points to. diff --git a/git-repository/src/repository/location.rs b/git-repository/src/repository/location.rs index 0d05f779773..bc391b914ed 100644 --- a/git-repository/src/repository/location.rs +++ b/git-repository/src/repository/location.rs @@ -5,13 +5,18 @@ impl crate::Repository { } /// Return the work tree containing all checked out files, if there is one. - pub fn work_tree(&self) -> Option<&std::path::Path> { + pub fn work_dir(&self) -> Option<&std::path::Path> { self.work_tree.as_deref() } + // TODO: tests /// The directory of the binary path of the current process. - pub fn install_directory(&self) -> std::io::Result { - std::env::current_exe() + pub fn install_dir(&self) -> std::io::Result { + std::env::current_exe().and_then(|exe| { + exe.parent() + .map(ToOwned::to_owned) + .ok_or_else(|| std::io::Error::new(std::io::ErrorKind::Other, "no parent for current executable")) + }) } /// Return the kind of repository, either bare or one with a work tree. diff --git a/git-repository/src/repository/object.rs b/git-repository/src/repository/object.rs index 63d172ca241..75bc5b63f6b 100644 --- a/git-repository/src/repository/object.rs +++ b/git-repository/src/repository/object.rs @@ -77,7 +77,7 @@ impl crate::Repository { name: impl AsRef, target: impl AsRef, target_kind: git_object::Kind, - tagger: Option<&git_actor::SignatureRef<'_>>, + tagger: Option>, message: impl AsRef, constraint: PreviousValue, ) -> Result, tag::Error> { @@ -108,8 +108,8 @@ impl crate::Repository { pub fn commit( &self, reference: Name, - author: &git_actor::SignatureRef<'_>, - committer: &git_actor::SignatureRef<'_>, + author: git_actor::SignatureRef<'_>, + committer: git_actor::SignatureRef<'_>, message: impl AsRef, tree: impl Into, parents: impl IntoIterator>, diff --git a/git-repository/src/repository/reference.rs b/git-repository/src/repository/reference.rs index 012558dcdb4..a8d39f57069 100644 --- a/git-repository/src/repository/reference.rs +++ b/git-repository/src/repository/reference.rs @@ -184,6 +184,31 @@ impl crate::Repository { .attach(self)) } + /// Resolve the `HEAD` reference, follow and peel its target and obtain its object id. + /// + /// Note that this may fail for various reasons, most notably because the repository + /// is freshly initialized and doesn't have any commits yet. + /// + /// Also note that the returned id is likely to point to a commit, but could also + /// point to a tree or blob. It won't, however, point to a tag as these are always peeled. + pub fn head_id(&self) -> Result, crate::reference::head_id::Error> { + let mut head = self.head()?; + head.peel_to_id_in_place() + .ok_or_else(|| crate::reference::head_id::Error::Unborn { + name: head.referent_name().expect("unborn").to_owned(), + })? + .map_err(Into::into) + } + + /// Return the commit object the `HEAD` reference currently points to after peeling it fully. + /// + /// Note that this may fail for various reasons, most notably because the repository + /// is freshly initialized and doesn't have any commits yet. It could also fail if the + /// head does not point to a commit. + pub fn head_commit(&self) -> Result, crate::reference::head_commit::Error> { + Ok(self.head()?.peel_to_commit_in_place()?) + } + /// Find the reference with the given partial or full `name`, like `main`, `HEAD`, `heads/branch` or `origin/other`, /// or return an error if it wasn't found. /// diff --git a/git-repository/src/repository/snapshots.rs b/git-repository/src/repository/snapshots.rs index e894fefae02..da5b6ba34a8 100644 --- a/git-repository/src/repository/snapshots.rs +++ b/git-repository/src/repository/snapshots.rs @@ -55,14 +55,14 @@ impl crate::Repository { .map_err(|e| err.get_or_insert(e.into())) .ok() }); - match self.work_tree() { + match self.work_dir() { None => { // TODO: replace with ref-spec `HEAD:.mailmap` for less verbose way of getting the blob id blob_id = blob_id.or_else(|| { self.head().ok().and_then(|mut head| { let commit = head.peel_to_commit_in_place().ok()?; let tree = commit.tree().ok()?; - tree.lookup_path(std::iter::once(".mailmap")).ok()?.map(|e| e.oid) + tree.lookup_path(Some(".mailmap")).ok()?.map(|e| e.oid) }) }); } @@ -94,7 +94,7 @@ impl crate::Repository { .value::>("mailmap", None, "file") .ok() .and_then(|path| { - let install_dir = self.install_directory().ok()?; + let install_dir = self.install_dir().ok()?; match path.interpolate(Some(install_dir.as_path())) { Ok(path) => Some(path), Err(e) => { diff --git a/git-repository/tests/easy/ext/object.rs b/git-repository/tests/easy/ext/object.rs index 181eacb5d4d..a623bad8fb8 100644 --- a/git-repository/tests/easy/ext/object.rs +++ b/git-repository/tests/easy/ext/object.rs @@ -45,13 +45,13 @@ mod tag { #[test] fn simple() -> crate::Result { let (repo, _keep) = crate::repo_rw("make_basic_repo.sh")?; - let current_head_id = repo.head()?.peeled()?.id().expect("born"); + let current_head_id = repo.head_id()?; let message = "a multi\nline message"; let tag_ref = repo.tag( "v1.0.0", ¤t_head_id, git_object::Kind::Commit, - Some(&repo.committer().to_ref()), + Some(repo.committer().to_ref()), message, git_ref::transaction::PreviousValue::MustNotExist, )?; @@ -80,8 +80,8 @@ mod commit { let err = repo .commit( "HEAD", - &author.to_ref(), - &author.to_ref(), + author.to_ref(), + author.to_ref(), "initial", empty_tree_id, [empty_tree_id], @@ -102,8 +102,8 @@ mod commit { let author = git::actor::Signature::empty(); let commit_id = repo.commit( "HEAD", - &author.to_ref(), - &author.to_ref(), + author.to_ref(), + author.to_ref(), "initial", empty_tree_id, git::commit::NO_PARENT_IDS, @@ -146,8 +146,8 @@ mod commit { let author = git::actor::Signature::empty(); let first_commit_id = repo.commit( "HEAD", - &author.to_ref(), - &author.to_ref(), + author.to_ref(), + author.to_ref(), "hello there \r\n\nthe body", empty_tree_id, Some(parent), @@ -176,8 +176,8 @@ mod commit { let second_commit_id = repo.commit( "refs/heads/new-branch", - &author.to_ref(), - &author.to_ref(), + author.to_ref(), + author.to_ref(), "committing into a new branch creates it", empty_tree_id, Some(first_commit_id), diff --git a/git-repository/tests/easy/id.rs b/git-repository/tests/easy/id.rs index cfce5fa02fd..021bbca54e1 100644 --- a/git-repository/tests/easy/id.rs +++ b/git-repository/tests/easy/id.rs @@ -7,7 +7,7 @@ use git_testtools::hex_to_id; fn prefix() -> crate::Result { let (repo, worktree_dir) = crate::repo_rw("make_repo_with_fork_and_dates.sh")?; let id = hex_to_id("288e509293165cb5630d08f4185bdf2445bf6170").attach(&repo); - let prefix = id.prefix()?; + let prefix = id.shorten()?; assert_eq!(prefix.cmp_oid(&id), Ordering::Equal); assert_eq!(prefix.hex_len(), 7, "preconfigured via core.abbrev default value"); @@ -19,7 +19,7 @@ fn prefix() -> crate::Result { let repo = git_repository::open(worktree_dir.path()).unwrap(); let id = id.detach().attach(&repo); - let prefix = id.prefix()?; + let prefix = id.shorten()?; assert_eq!(prefix.cmp_oid(&id), Ordering::Equal); assert_eq!(prefix.hex_len(), 5, "preconfigured via core.abbrev in the repo file"); Ok(()) diff --git a/git-repository/tests/easy/object.rs b/git-repository/tests/easy/object.rs index fb28a217a96..c29bd5ac1dc 100644 --- a/git-repository/tests/easy/object.rs +++ b/git-repository/tests/easy/object.rs @@ -1,7 +1,6 @@ mod commit { use std::cmp::Ordering; - use git_repository::{Commit, Repository}; use git_testtools::hex_to_id; use crate::basic_repo; @@ -9,7 +8,7 @@ mod commit { #[test] fn short_id() -> crate::Result { let handle = basic_repo()?; - let commit = head_commit(&handle); + let commit = handle.head_commit()?; assert_eq!(commit.short_id()?.cmp_oid(&commit.id), Ordering::Equal); Ok(()) } @@ -17,11 +16,11 @@ mod commit { #[test] fn tree() -> crate::Result { let handle = basic_repo()?; - let commit = head_commit(&handle); + let commit = handle.head_commit()?; assert_eq!(commit.tree()?.id, commit.tree_id().expect("id present")); assert_eq!( - commit.tree_id(), + commit.tree_id().ok(), Some(hex_to_id("21d3ba9a26b790a4858d67754ae05d04dfce4d0c")) ); Ok(()) @@ -30,16 +29,12 @@ mod commit { #[test] fn decode() -> crate::Result { let handle = basic_repo()?; - let commit = head_commit(&handle); + let commit = handle.head_commit()?; assert_eq!(commit.decode()?.message, commit.message_raw()?); assert_eq!(commit.decode()?.message(), commit.message()?); assert_eq!(commit.decode()?.message, "c2\n"); Ok(()) } - - fn head_commit(repo: &Repository) -> Commit<'_> { - repo.head().unwrap().peel_to_commit_in_place().unwrap() - } } #[test] diff --git a/git-repository/tests/init/mod.rs b/git-repository/tests/init/mod.rs index 1911f0d3111..3b235cfb477 100644 --- a/git-repository/tests/init/mod.rs +++ b/git-repository/tests/init/mod.rs @@ -5,7 +5,7 @@ mod bare { let repo = git_repository::init_bare(tmp.path()).unwrap(); assert_eq!(repo.kind(), git_repository::Kind::Bare); assert!( - repo.work_tree().is_none(), + repo.work_dir().is_none(), "a worktree isn't present in bare repositories" ); assert_eq!( @@ -34,7 +34,7 @@ mod non_bare { let tmp = tempfile::tempdir()?; let repo = git_repository::init(tmp.path())?; assert_eq!(repo.kind(), git_repository::Kind::WorkTree); - assert_eq!(repo.work_tree(), Some(tmp.path()), "there is a work tree by default"); + assert_eq!(repo.work_dir(), Some(tmp.path()), "there is a work tree by default"); assert_eq!( repo.git_dir(), tmp.path().join(".git"), @@ -42,7 +42,7 @@ mod non_bare { ); assert_eq!(git_repository::open(repo.git_dir())?, repo); assert_eq!( - git_repository::open(repo.work_tree().as_ref().expect("non-bare repo"))?, + git_repository::open(repo.work_dir().as_ref().expect("non-bare repo"))?, repo ); Ok(()) diff --git a/git-revision/src/describe.rs b/git-revision/src/describe.rs index c330171c5fc..f35f578f6db 100644 --- a/git-revision/src/describe.rs +++ b/git-revision/src/describe.rs @@ -145,7 +145,7 @@ pub(crate) mod function { Ok(git_object::commit::ref_iter::Token::Parent { id: parent_id }) => { match seen.entry(parent_id) { hash_map::Entry::Vacant(entry) => { - let mut parent = find(&parent_id, &mut parent_buf)?; + let parent = find(&parent_id, &mut parent_buf)?; // TODO: figure out if not having a date is a hard error. let parent_commit_date = parent .committer() diff --git a/git-traverse/src/commit.rs b/git-traverse/src/commit.rs index a74b3e7fef3..9e1a0d93977 100644 --- a/git-traverse/src/commit.rs +++ b/git-traverse/src/commit.rs @@ -215,7 +215,7 @@ pub mod ancestors { let parent = (self.find)(id.as_ref(), &mut state.parents_buf).ok(); let parent_committer_date = parent - .and_then(|mut parent| parent.committer().map(|committer| committer.time)); + .and_then(|parent| parent.committer().ok().map(|committer| committer.time)); if let Some(parent_committer_date) = parent_committer_date { state diff --git a/gitoxide-core/src/hours.rs b/gitoxide-core/src/hours.rs index 0b8fc88eb41..4d40f342a49 100644 --- a/gitoxide-core/src/hours.rs +++ b/gitoxide-core/src/hours.rs @@ -85,27 +85,13 @@ where #[allow(clippy::redundant_closure)] let mut all_commits: Vec = all_commits .into_par_iter() - .map(|commit_data: Vec| { + .filter_map(|commit_data: Vec| { objs::CommitRefIter::from_bytes(&commit_data) - .signatures() - .next() - .map(|author| mailmap.resolve(&author)) + .author() + .map(|author| mailmap.resolve(author.trim())) + .ok() }) - .try_fold( - || Vec::new(), - |mut out: Vec<_>, item| { - out.push(item?); - Some(out) - }, - ) - .try_reduce( - || Vec::new(), - |mut out, vec| { - out.extend(vec.into_iter()); - Some(out) - }, - ) - .ok_or_else(|| anyhow!("An error occurred when decoding commits - one commit could not be parsed"))?; + .collect::>(); all_commits.sort_by(|a, b| { a.email.cmp(&b.email).then( a.time