From 1b08fd937056d0a674b1d4bba40ad3098f54ffbf Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 4 Jun 2025 13:47:45 +0200 Subject: [PATCH 1/4] feat: add `commit::Simple::hide()` to hide a given set of tips. That means, these tips and all their ancestors will be hidden from the traversal. --- Cargo.lock | 1 + gix-traverse/src/commit/simple.rs | 492 +++++++++++++----- gix-traverse/tests/Cargo.toml | 1 + .../generated-archives/make_repos.tar | Bin 143360 -> 193024 bytes gix-traverse/tests/fixtures/make_repos.sh | 14 + gix-traverse/tests/traverse/commit/simple.rs | 245 ++++++++- 6 files changed, 618 insertions(+), 135 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ac290281766..0b04d4447a1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2501,6 +2501,7 @@ dependencies = [ "gix-hash", "gix-object", "gix-odb", + "gix-path", "gix-testtools", "gix-traverse", "insta", diff --git a/gix-traverse/src/commit/simple.rs b/gix-traverse/src/commit/simple.rs index f5047bfd985..32b2b4cf440 100644 --- a/gix-traverse/src/commit/simple.rs +++ b/gix-traverse/src/commit/simple.rs @@ -2,7 +2,6 @@ use std::{cmp::Reverse, collections::VecDeque}; use gix_date::SecondsSinceUnixEpoch; use gix_hash::ObjectId; -use gix_hashtable::HashSet; use smallvec::SmallVec; #[derive(Default, Debug, Copy, Clone)] @@ -61,7 +60,7 @@ pub enum Sorting { ByCommitTimeCutoff { /// The order in which to prioritize lookups. order: CommitTimeOrder, - /// The amount of seconds since unix epoch, the same value obtained by any `gix_date::Time` structure and the way git counts time. + /// The number of seconds since unix epoch, the same value obtained by any `gix_date::Time` structure and the way git counts time. seconds: gix_date::SecondsSinceUnixEpoch, }, } @@ -77,34 +76,60 @@ pub enum Error { } use Result as Either; + type QueueKey = Either>; +type CommitDateQueue = gix_revwalk::PriorityQueue, ObjectId>; +type Candidates = VecDeque; /// The state used and potentially shared by multiple graph traversals. #[derive(Clone)] pub(super) struct State { next: VecDeque, - queue: gix_revwalk::PriorityQueue, ObjectId>, + queue: CommitDateQueue, buf: Vec, - seen: HashSet, + seen: gix_revwalk::graph::IdMap, parents_buf: Vec, parent_ids: SmallVec<[(ObjectId, SecondsSinceUnixEpoch); 2]>, + /// The list (FIFO) of thus far interesting commits. + /// + /// As they may turn hidden later, we have to keep them until the conditions are met to return them. + /// If `None`, there is nothing to do with hidden commits. + candidates: Option, +} + +#[derive(Debug, Clone, Copy)] +enum CommitState { + /// The commit may be returned, it hasn't been hidden yet. + Interesting, + /// The commit should not be returned. + Hidden, +} + +impl CommitState { + pub fn is_hidden(&self) -> bool { + matches!(self, CommitState::Hidden) + } + pub fn is_interesting(&self) -> bool { + matches!(self, CommitState::Interesting) + } } /// mod init { use std::cmp::Reverse; + use super::{ + super::{simple::Sorting, Either, Info, ParentIds, Parents, Simple}, + collect_parents, Candidates, CommitDateQueue, CommitState, CommitTimeOrder, Error, State, + }; use gix_date::SecondsSinceUnixEpoch; use gix_hash::{oid, ObjectId}; + use gix_hashtable::hash_map::Entry; use gix_object::{CommitRefIter, FindExt}; + use std::collections::VecDeque; use Err as Oldest; use Ok as Newest; - use super::{ - super::{simple::Sorting, Either, Info, ParentIds, Parents, Simple}, - collect_parents, CommitTimeOrder, Error, State, - }; - impl Default for State { fn default() -> Self { State { @@ -114,16 +139,27 @@ mod init { seen: Default::default(), parents_buf: vec![], parent_ids: Default::default(), + candidates: None, } } } impl State { fn clear(&mut self) { - self.next.clear(); - self.queue.clear(); - self.buf.clear(); - self.seen.clear(); + let Self { + next, + queue, + buf, + seen, + parents_buf: _, + parent_ids: _, + candidates, + } = self; + next.clear(); + queue.clear(); + buf.clear(); + seen.clear(); + *candidates = None; } } @@ -147,21 +183,16 @@ mod init { self.queue_to_vecdeque(); } Sorting::ByCommitTime(order) | Sorting::ByCommitTimeCutoff { order, .. } => { - let cutoff_time = self.sorting.cutoff_time(); let state = &mut self.state; for commit_id in state.next.drain(..) { - let commit_iter = self.objects.find_commit_iter(&commit_id, &mut state.buf)?; - let time = commit_iter.committer()?.seconds(); - let key = to_queue_key(time, order); - match (cutoff_time, order) { - (Some(cutoff_time), _) if time >= cutoff_time => { - state.queue.insert(key, commit_id); - } - (Some(_), _) => {} - (None, _) => { - state.queue.insert(key, commit_id); - } - } + add_to_queue( + commit_id, + order, + sorting.cutoff_time(), + &mut state.queue, + &self.objects, + &mut state.buf, + )?; } } } @@ -177,9 +208,53 @@ mod init { self } + /// Hide the given `tips`, along with all commits reachable by them so that they will not be returned + /// by the traversal. + /// + /// Note that this will force the traversal into a non-intermediate mode and queue return candidates, + /// to be released when it's clear that they truly are not hidden. + /// + /// Note that hidden objects are expected to exist. + pub fn hide(mut self, tips: impl IntoIterator) -> Result { + self.state.candidates = Some(VecDeque::new()); + let state = &mut self.state; + for id_to_ignore in tips { + let previous = state.seen.insert(id_to_ignore, CommitState::Hidden); + // If there was something, it will pick up whatever commit-state we have set last + // upon iteration. Also, hidden states always override everything else. + if previous.is_none() { + // Assure we *start* traversing hidden variants of a commit first, give them a head-start. + match self.sorting { + Sorting::BreadthFirst => { + state.next.push_front(id_to_ignore); + } + Sorting::ByCommitTime(order) | Sorting::ByCommitTimeCutoff { order, .. } => { + add_to_queue( + id_to_ignore, + order, + self.sorting.cutoff_time(), + &mut state.queue, + &self.objects, + &mut state.buf, + )?; + } + } + } + } + if !self + .state + .seen + .values() + .any(|state| matches!(state, CommitState::Hidden)) + { + self.state.candidates = None; + } + Ok(self) + } + /// Set the commitgraph as `cache` to greatly accelerate any traversal. /// - /// The cache will be used if possible, but we will fall-back without error to using the object + /// The cache will be used if possible, but we will fall back without error to using the object /// database for commit lookup. If the cache is corrupt, we will fall back to the object database as well. pub fn commit_graph(mut self, cache: Option) -> Self { self.cache = cache; @@ -196,6 +271,29 @@ mod init { } } + fn add_to_queue( + commit_id: ObjectId, + order: CommitTimeOrder, + cutoff_time: Option, + queue: &mut CommitDateQueue, + objects: &impl gix_object::Find, + buf: &mut Vec, + ) -> Result<(), Error> { + let commit_iter = objects.find_commit_iter(&commit_id, buf)?; + let time = commit_iter.committer()?.seconds(); + let key = to_queue_key(time, order); + match (cutoff_time, order) { + (Some(cutoff_time), _) if time >= cutoff_time => { + queue.insert(key, commit_id); + } + (Some(_), _) => {} + (None, _) => { + queue.insert(key, commit_id); + } + } + Ok(()) + } + /// Lifecycle impl Simple bool> where @@ -241,8 +339,9 @@ mod init { state.clear(); state.next.reserve(tips.size_hint().0); for tip in tips.map(Into::into) { - let was_inserted = state.seen.insert(tip); - if was_inserted && predicate(&tip) { + let seen = state.seen.insert(tip, CommitState::Interesting); + // We know there can only be duplicate interesting ones. + if seen.is_none() && predicate(&tip) { state.next.push_back(tip); } } @@ -262,7 +361,7 @@ mod init { impl Simple { /// Return an iterator for accessing data of the current commit, parsed lazily. pub fn commit_iter(&self) -> CommitRefIter<'_> { - CommitRefIter::from_bytes(&self.state.buf) + CommitRefIter::from_bytes(self.commit_data()) } /// Return the current commits' raw data, which can be parsed using [`gix_object::CommitRef::from_bytes()`]. @@ -288,6 +387,12 @@ mod init { Sorting::ByCommitTimeCutoff { seconds, order } => self.next_by_commit_date(order, seconds.into()), } } + .or_else(|| { + self.state + .candidates + .as_mut() + .and_then(|candidates| candidates.pop_front().map(Ok)) + }) } } @@ -314,65 +419,89 @@ mod init { ) -> Option> { let state = &mut self.state; - let (commit_time, oid) = match state.queue.pop()? { - (Newest(t) | Oldest(Reverse(t)), o) => (t, o), - }; - let mut parents: ParentIds = Default::default(); - match super::super::find(self.cache.as_ref(), &self.objects, &oid, &mut state.buf) { - Ok(Either::CachedCommit(commit)) => { - if !collect_parents(&mut state.parent_ids, self.cache.as_ref(), commit.iter_parents()) { - // drop corrupt caches and try again with ODB - self.cache = None; - return self.next_by_commit_date(order, cutoff); - } - for (id, parent_commit_time) in state.parent_ids.drain(..) { - parents.push(id); - let was_inserted = state.seen.insert(id); - if !(was_inserted && (self.predicate)(&id)) { - continue; + 'skip_hidden: loop { + let (commit_time, oid) = match state.queue.pop()? { + (Newest(t) | Oldest(Reverse(t)), o) => (t, o), + }; + let mut parents: ParentIds = Default::default(); + // TODO(perf): can avoid this lookup by storing state on `queue` respectively. + // ALSO: need to look ahead for early aborts, i.e. if there is only hidden left to traverse. + // Maybe this can be counted? + let commit_state = *state.seen.get(&oid).expect("every commit we traverse has state added"); + match super::super::find(self.cache.as_ref(), &self.objects, &oid, &mut state.buf) { + Ok(Either::CachedCommit(commit)) => { + if !collect_parents(&mut state.parent_ids, self.cache.as_ref(), commit.iter_parents()) { + // drop corrupt caches and try again with ODB + self.cache = None; + return self.next_by_commit_date(order, cutoff); } - - let key = to_queue_key(parent_commit_time, order); - match cutoff { - Some(cutoff_older_than) if parent_commit_time < cutoff_older_than => continue, - Some(_) | None => state.queue.insert(key, id), + for (id, parent_commit_time) in state.parent_ids.drain(..) { + parents.push(id); + insert_into_seen_and_queue( + &mut state.seen, + id, + &mut state.candidates, + commit_state, + &mut self.predicate, + &mut state.queue, + order, + cutoff, + || parent_commit_time, + ); } } - } - Ok(Either::CommitRefIter(commit_iter)) => { - for token in commit_iter { - match token { - Ok(gix_object::commit::ref_iter::Token::Tree { .. }) => continue, - Ok(gix_object::commit::ref_iter::Token::Parent { id }) => { - parents.push(id); - let was_inserted = state.seen.insert(id); - if !(was_inserted && (self.predicate)(&id)) { - continue; - } - - let parent = self.objects.find_commit_iter(id.as_ref(), &mut state.parents_buf).ok(); - let parent_commit_time = parent - .and_then(|parent| parent.committer().ok().map(|committer| committer.seconds())) - .unwrap_or_default(); - - let time = to_queue_key(parent_commit_time, order); - match cutoff { - Some(cutoff_older_than) if parent_commit_time < cutoff_older_than => continue, - Some(_) | None => state.queue.insert(time, id), + Ok(Either::CommitRefIter(commit_iter)) => { + for token in commit_iter { + match token { + Ok(gix_object::commit::ref_iter::Token::Tree { .. }) => continue, + Ok(gix_object::commit::ref_iter::Token::Parent { id }) => { + parents.push(id); + insert_into_seen_and_queue( + &mut state.seen, + id, + &mut state.candidates, + commit_state, + &mut self.predicate, + &mut state.queue, + order, + cutoff, + || { + let parent = + self.objects.find_commit_iter(id.as_ref(), &mut state.parents_buf).ok(); + parent + .and_then(|parent| { + parent.committer().ok().map(|committer| committer.seconds()) + }) + .unwrap_or_default() + }, + ); } + Ok(_unused_token) => break, + Err(err) => return Some(Err(err.into())), + } + } + } + Err(err) => return Some(Err(err.into())), + } + match commit_state { + CommitState::Interesting => { + let info = Info { + id: oid, + parent_ids: parents, + commit_time: Some(commit_time), + }; + match state.candidates.as_mut() { + None => return Some(Ok(info)), + Some(candidates) => { + // assure candidates aren't prematurely returned - hidden commits may catch up with + // them later. + candidates.push_back(info); } - Ok(_unused_token) => break, - Err(err) => return Some(Err(err.into())), } } + CommitState::Hidden => continue 'skip_hidden, } - Err(err) => return Some(Err(err.into())), } - Some(Ok(Info { - id: oid, - parent_ids: parents, - commit_time: Some(commit_time), - })) } } @@ -384,53 +513,182 @@ mod init { { fn next_by_topology(&mut self) -> Option> { let state = &mut self.state; - let oid = state.next.pop_front()?; - let mut parents: ParentIds = Default::default(); - match super::super::find(self.cache.as_ref(), &self.objects, &oid, &mut state.buf) { - Ok(Either::CachedCommit(commit)) => { - if !collect_parents(&mut state.parent_ids, self.cache.as_ref(), commit.iter_parents()) { - // drop corrupt caches and try again with ODB - self.cache = None; - return self.next_by_topology(); - } + 'skip_hidden: loop { + let oid = state.next.pop_front()?; + let mut parents: ParentIds = Default::default(); + // TODO(perf): can avoid this lookup by storing state on `next` respectively. + // ALSO: need to look ahead for early aborts, i.e. if there is only hidden left to traverse. + // Maybe this can be counted? + let commit_state = *state.seen.get(&oid).expect("every commit we traverse has state added"); + + match super::super::find(self.cache.as_ref(), &self.objects, &oid, &mut state.buf) { + Ok(Either::CachedCommit(commit)) => { + if !collect_parents(&mut state.parent_ids, self.cache.as_ref(), commit.iter_parents()) { + // drop corrupt caches and try again with ODB + self.cache = None; + return self.next_by_topology(); + } - for (id, _commit_time) in state.parent_ids.drain(..) { - parents.push(id); - let was_inserted = state.seen.insert(id); - if was_inserted && (self.predicate)(&id) { - state.next.push_back(id); + for (pid, _commit_time) in state.parent_ids.drain(..) { + parents.push(pid); + insert_into_seen_and_next( + &mut state.seen, + pid, + &mut state.candidates, + commit_state, + &mut self.predicate, + &mut state.next, + ); + if commit_state.is_interesting() && matches!(self.parents, Parents::First) { + break; + } } - if matches!(self.parents, Parents::First) { - break; + } + Ok(Either::CommitRefIter(commit_iter)) => { + for token in commit_iter { + match token { + Ok(gix_object::commit::ref_iter::Token::Tree { .. }) => continue, + Ok(gix_object::commit::ref_iter::Token::Parent { id: pid }) => { + parents.push(pid); + insert_into_seen_and_next( + &mut state.seen, + pid, + &mut state.candidates, + commit_state, + &mut self.predicate, + &mut state.next, + ); + if commit_state.is_interesting() && matches!(self.parents, Parents::First) { + break; + } + } + Ok(_a_token_past_the_parents) => break, + Err(err) => return Some(Err(err.into())), + } } } + Err(err) => return Some(Err(err.into())), } - Ok(Either::CommitRefIter(commit_iter)) => { - for token in commit_iter { - match token { - Ok(gix_object::commit::ref_iter::Token::Tree { .. }) => continue, - Ok(gix_object::commit::ref_iter::Token::Parent { id }) => { - parents.push(id); - let was_inserted = state.seen.insert(id); - if was_inserted && (self.predicate)(&id) { - state.next.push_back(id); - } - if matches!(self.parents, Parents::First) { - break; - } + match commit_state { + CommitState::Interesting => { + let info = Info { + id: oid, + parent_ids: parents, + commit_time: None, + }; + match state.candidates.as_mut() { + None => return Some(Ok(info)), + Some(candidates) => { + // assure candidates aren't prematurely returned - hidden commits may catch up with + // them later. + candidates.push_back(info); } - Ok(_a_token_past_the_parents) => break, - Err(err) => return Some(Err(err.into())), } } + CommitState::Hidden => continue 'skip_hidden, + } + } + } + } + + #[inline] + fn remove_candidate(candidates: Option<&mut Candidates>, remove: ObjectId) -> Option<()> { + let candidates = candidates?; + let pos = candidates + .iter_mut() + .enumerate() + .find_map(|(idx, info)| (info.id == remove).then_some(idx))?; + candidates.remove(pos); + None + } + + fn insert_into_seen_and_next( + seen: &mut gix_revwalk::graph::IdMap, + parent_id: ObjectId, + candidates: &mut Option, + commit_state: CommitState, + predicate: &mut impl FnMut(&oid) -> bool, + next: &mut VecDeque, + ) { + let enqueue = match seen.entry(parent_id) { + Entry::Occupied(mut e) => { + let enqueue = handle_seen(commit_state, *e.get(), parent_id, candidates); + if commit_state.is_hidden() { + e.insert(commit_state); + } + enqueue + } + Entry::Vacant(e) => { + e.insert(commit_state); + match commit_state { + CommitState::Interesting => predicate(&parent_id), + CommitState::Hidden => true, + } + } + }; + if enqueue { + next.push_back(parent_id); + } + } + + #[allow(clippy::too_many_arguments)] + fn insert_into_seen_and_queue( + seen: &mut gix_revwalk::graph::IdMap, + parent_id: ObjectId, + candidates: &mut Option, + commit_state: CommitState, + predicate: &mut impl FnMut(&oid) -> bool, + queue: &mut CommitDateQueue, + order: CommitTimeOrder, + cutoff: Option, + get_parent_commit_time: impl FnOnce() -> gix_date::SecondsSinceUnixEpoch, + ) { + let enqueue = match seen.entry(parent_id) { + Entry::Occupied(mut e) => { + let enqueue = handle_seen(commit_state, *e.get(), parent_id, candidates); + if commit_state.is_hidden() { + e.insert(commit_state); + } + enqueue + } + Entry::Vacant(e) => { + e.insert(commit_state); + match commit_state { + CommitState::Interesting => (predicate)(&parent_id), + CommitState::Hidden => true, } - Err(err) => return Some(Err(err.into())), } - Some(Ok(Info { - id: oid, - parent_ids: parents, - commit_time: None, - })) + }; + + if enqueue { + let parent_commit_time = get_parent_commit_time(); + let key = to_queue_key(parent_commit_time, order); + match cutoff { + Some(cutoff_older_than) if parent_commit_time < cutoff_older_than => {} + Some(_) | None => queue.insert(key, parent_id), + } + } + } + + #[inline] + #[must_use] + fn handle_seen( + next_state: CommitState, + current_state: CommitState, + id: ObjectId, + candidates: &mut Option, + ) -> bool { + match (current_state, next_state) { + (CommitState::Hidden, CommitState::Hidden) => false, + (CommitState::Interesting, CommitState::Interesting) => false, + (CommitState::Hidden, CommitState::Interesting) => { + // keep traversing to paint more hidden. After all, the commit_state overrides the current parent state + true + } + (CommitState::Interesting, CommitState::Hidden) => { + remove_candidate(candidates.as_mut(), id); + true + } } } } diff --git a/gix-traverse/tests/Cargo.toml b/gix-traverse/tests/Cargo.toml index 029c4dba5ec..64a04af9996 100644 --- a/gix-traverse/tests/Cargo.toml +++ b/gix-traverse/tests/Cargo.toml @@ -22,3 +22,4 @@ gix-odb = { path = "../../gix-odb" } gix-hash = { path = "../../gix-hash" } gix-object = { path = "../../gix-object" } gix-commitgraph = { path = "../../gix-commitgraph" } +gix-path = { version = "^0.10.18", path = "../../gix-path" } diff --git a/gix-traverse/tests/fixtures/generated-archives/make_repos.tar b/gix-traverse/tests/fixtures/generated-archives/make_repos.tar index 4cf3b1c9132547a9c5f10f19c8cfb70628d2f821..6f8b96a0fbf0f25488a9acd24ba72bb1a444d4ad 100644 GIT binary patch delta 5698 zcmd^Ddr%bj9lzh+lk+flJdTH8&YB=UQvI!SR6Ei8vOd9hVO={ZIB${GlCKzq1sXDp-_V(^j*;qQ$=^rgK z?Cx*(^ZoAk@%es#-`nk<>b8GA;~)~{@-%F!@l-c%@EQ%Ew9BI1lcMrgtp=D1#S|3J zYF#`40doZAsAsF4Ra`@+wbIkrWm*B@e3b@OlO#R~H%)=_dfQKeP+O*kqM>RutQt53 zkQD%OB5{&PT&9MkNJ0`3KzsoC9&^0LG$0w2eWUqPm3#GiAZbrW)dayz+Wx(|b3?kJ z07+BR5XcJ&e3z-ANtBs%Io(0gG=|3*NTVtlN#Fb|L5U zqoB)@v|vg+gW=Dt=iCjMRUGST$P6R8Oj*G=!9FY9)~hcH-%h&q?re5RMX)a5W+iACur?nHr3uLv#?p>+*&VNZ*!|9C(pXLyT*Zh@+4 zCZyUAehC6`a_9*OJrN8F7OF)7oR>FyR6>uYCPFhXl%z2fnna)q6rM5hES9nWURtXb zHoV%a?6Sya!X>G3i4I}m1|R@V-o#D`-5ErOLo0wTdG{|#=u5D-Fm%MA%kustN?@@? z(34dx7pF7{DRv0*x`LO3rb|#nEOp|##wmwgAYpYWNi>7s6j6*JChLU)^T9lZWSuO^ z(3HbYP^^>U7&pV&S%-~a-SELT++q%$9Kk?ME*oceQy9n6 z1dekqhO;?b7-@sIaT~u9C;8h`b)_t>pRSB$x(yC0U&se$n*hs$tinhW+i=4!lewj) zvO&JrQNdm(8%fAfI#M=RI!Gr-j&i#x=(3A&uy!}fqNoeQSjNq|7@VRox7&@{Xw18^ z2$0^pZ9v06m|`e(VEXAwTI{6LO}Gh`bTT&F0kaF!N#X>@up~w?EQ;GH#^oYy;wu*N zt#e{a^wSlwOtXFCkO&LBN3uYy7*RR;C5!a)R=GiHBBa`l1&vV{7HJ>E`->h$WB6u|y*^sWjO8n19jx3=;I>>;!rWoe1)}QIDH0hofW$8~t>9ig0k| zMmRM6NX{!=sP)1&jpC6lIO-M?B^esxjH!vRuisQHBnBtP;P&SS3&Cwb7oNgGgWG&L z8}5>9fV&FH5VzidN+hVbtH5q{YwUUnyB^L(7!jF^nt=wSlWvy{wcAO|E>4&fOK>zx zJ5iRQP@1&S97)my2HT>Lx$%A3$*)w>otKitpy*9Oz)=zljhO;ir3Tq5Me)+Se1-Iv z2d|5yOI6-Ic{!z_>)#gS<>1n_daraAkLsuSIWfoiM^`uT8}du}qxntV^W~t>+gb;z z_~HU1zqx?)7FU3K`M{cV{y@QE{%pabR`M@_@-xRr59k%|C$~ORzv}jYj;&nwdo4FY zJ-xp5{+<`JE??eL|H6^I@7|qN)U>d2PD5^XOU{~u^Vj^j@CEnEwxdNv$IxcuAJuo9 zoYnp8#i&oWU6|K;c?|8m@btA^0~_sUPsP-KJ=bvN=Q~e+@%Pz>aO)rb`f}XEkEAc( zQSnCZTh%Exz)3FhgKbrqi8pPf zHI7Z3v#}wwmUV6p9x|p&4y(u2#9x>d%U3}fh(pSawSbc}jfY3UCHa6yWA?&4mcty@ zdKXpzLokQpVR1N#r989tNtnRJ{Ii8d@1yk~BTg=pJi08d^}mtMcZ%-##cRX4Ynfu_ zhuYD!k3Xxg$hdgq-PfYCMwYw{Gy0W60nWhxQFz@0&k68sfaj{>lTmw<4wjTBo+jFV>HKv?xw#X|S$&RhScF0IUD=nGuu8uU=qHLZ?pTl2F{#j&TX-d6k8lLt~?xl&1=emgd) zDEq=$)%KZ9`(HZNw6AW*;O?QNK2KSXr`}iBQCHH(_2l%m<#hD2ea&T#j*+gxLAI={ zr)}51&&Jl?mp1pU_KFX;<#)fKAJvR&H09+>^UD=Vb?i?kz;;$t`QoqYcAOfsWu(nL z6GXlNk*y)*nlx8l371WrI>_J)GyXD*jX$bNsBTtvsS|_r8&(j{3f~ zk$s*$j}Lhq&v|^U%{|RN&H-QGNbdA_T-AGBJuVbm{p&a1t95v(1HHXNI}W#vw1I>* zeWnD(`Q%vSE3m_Qy*v3k+aII1X%0pFZimk7sMF?Hn)WNj&I9BK8KRxOG13uu{Y9yJ z7*Z2q@3^s^F_J+eyGS&$4*W36Q(0Y8&pFu!&a%kTSihCiN7cf0&3&~s4cyjhkF&-V zeggNuCNP`=45}v#B>*0*K;<{YA!@`MuLs56jc%axSKWaO0KdVEm^A)W3-XQ9YhDIc zbXnAJKJ{0nAOlLjVH?mQd_wn+LlQ@TFPb->SP@!IfWsy}`Pj11avb=yym?%D`a%`* zl#)*v4vW4A#O1m0Qib@Gym>6F=@dAu<&y`(nqC1uHE(VSKivy_@w|B=ywpL{yoM8? zdFn4>IOfAKVd^j9;rl_1nKyTaRrtVw`U!&(flyZ zmI1Yjk53Ll=K6P%pnZnlU9#^ z3|E!?MjiCW2u1GFAwpsayq&uKZxC9l(+i2&1R^ZYdp(*rUTl;LDEtf=qRzh^(BYXN zM&pl31QXyk62nQ{#@K9F1}w@ZzbP4<2j1^u!2^l#BkGhYXCj~H;z2ZsBkWenlAcpr dyM?n9cp5V>iku&uX8o3A@F|dve64oFKLPR_I~D){ delta 5948 zcmeI0eQXrBVgsfyxZ@Dp*up&ATLR}}Z@z1oC4;5)l-89OCL@7M) zy?4$>QwGxhjHUDW?%wzLexCa!$NpS<>{9g!JbxL5{C4Ox4q$8J!k#Ho_1I~@ojrqR zeAA&h?4J(Jv#%=%7JbWEbf&!M1fHq2LyzF~La_m(yPS|jkC2jc^tnBBHm&f zU%=Ja+WcC|lm1)$C*;%V_|&@fu*u2_4I4UQ74cZID;`V467V|~OC%HVWJSyp1CoW% zzlR6=`-UqjDv*2l;2dy&!>OVYj%qln;PAtNU;69wU;Y$sUN|>#{+sP@{tiEL27a$gP z$d9YAwdvuzJs%W2_ff0i;BJ0h%!O7rnPA@R#@%;GbbDs8bRC6Ng~mVm{-g;rg1`66r!oh zBuQs!`Y;F$7Hry^LQi)sPfo{-I~ zmN!XSQ!|R3HVq=H5EFyK4N24|$%LjKhAlKodNU@~K^_Y>*eZjL79*cdQ^UyQ!9Ja+ zn%N3E9gT`;m~JrP4wJ|t+U7iSWfoXq9pAumi7M2j68C=yOwMSEqD`=PG27%(&nU5nN?|o!Ei^hBB}R@y5jyn;IcbdwHKv}Y!b3~+xs7zA zqLganA?VA?mWTzXShvsU-4YeFQ;!J@{^7fs7oLOy5haO4GH^tk46q^$ zH+hf4Bn&ni?K}cTdlUb`&Y7^|(z1}~ypba!w8@Q;xYr${OXU{?su)zL@f5BI&rM?L zNoo}`GHVLxYMRb!BwSWncD;vCUDxy;l7W{b3_O?>@NUYn0myU~*jx{jyP6Xq;wS_r z&e}Pzvbnmue`utyf6zJKJ23d!k)gp@vg2|HwQNxQ8XN=jL3jGM@#n|`<`#U2iI5I_09NNmDSKzCZgN%nRRx3 z6#vU-ZQ5T(tS&s~vl_PDZCT`Ww0x*`Z4qR~B!33mY2}9V`q1TF`H2pGBv9)3DD!9ef`7 zPX^zGHIF!J7@hMq-$$ov?ck$`)A>$k-*?du9Xc-po%c-nQJxbqsJ5JlY=k4p1hcT;QGsTsaIbl=YM|3+$W#x{QUL{KLd0> zf#xrw+bgZjzXn|AMlYbBpdvs#lnwXqm*}+1^L)XJ;IAF5L>|xcyDFM>d7fYIb*>r8 z^XrGvsem1NKXjB&qK~*J&q(GbDBs!b0o^Jn{~5Htx(F!0*8{BW(vaf?3#q4`1?gyV8vM-^(*CIRkM|9O!B3CXU=WB}W_z%yG Bf*=3@ diff --git a/gix-traverse/tests/fixtures/make_repos.sh b/gix-traverse/tests/fixtures/make_repos.sh index c7d29e06b8a..2450663bc18 100755 --- a/gix-traverse/tests/fixtures/make_repos.sh +++ b/gix-traverse/tests/fixtures/make_repos.sh @@ -76,3 +76,17 @@ function optimize() { optimize ) + +(git init disjoint_branches && cd disjoint_branches + git checkout -b main + commit a1 + commit a2 + commit a3 + + git checkout --orphan disjoint + commit b1 + commit b2 + commit b3 + + optimize +) \ No newline at end of file diff --git a/gix-traverse/tests/traverse/commit/simple.rs b/gix-traverse/tests/traverse/commit/simple.rs index 0b5bc02b7db..1d8043c6fb1 100644 --- a/gix-traverse/tests/traverse/commit/simple.rs +++ b/gix-traverse/tests/traverse/commit/simple.rs @@ -1,7 +1,8 @@ +use crate::hex_to_id; use gix_hash::{oid, ObjectId}; +use gix_object::bstr::{ByteSlice, ByteVec}; use gix_traverse::commit; - -use crate::hex_to_id; +use std::path::PathBuf; struct TraversalAssertion<'a> { init_script: &'a str, @@ -10,6 +11,9 @@ struct TraversalAssertion<'a> { expected: &'a [&'a str], mode: commit::Parents, sorting: commit::simple::Sorting, + expected_without_tips: bool, + // commit-ids that should be hidden (along with all their history. + hidden: &'a [&'a str], } impl<'a> TraversalAssertion<'a> { @@ -25,6 +29,8 @@ impl<'a> TraversalAssertion<'a> { expected, mode: Default::default(), sorting: Default::default(), + hidden: Default::default(), + expected_without_tips: false, } } @@ -37,19 +43,42 @@ impl<'a> TraversalAssertion<'a> { self.sorting = sorting; self } + + /// Set the commits that should be hidden. + fn with_hidden(&mut self, hidden: &'a [&'a str]) -> &mut Self { + self.hidden = hidden; + self + } + + /// Do not automatically add tips to the set of expected items. + fn expected_without_tips(&mut self) -> &mut Self { + self.expected_without_tips = true; + self + } + + /// Execute the fixture and get the repository worktree path. + pub fn worktree_dir(&self) -> crate::Result { + let dir = gix_testtools::scripted_fixture_read_only_standalone(self.init_script)?; + Ok(dir.join(self.repo_name)) + } } impl TraversalAssertion<'_> { - fn setup(&self) -> crate::Result<(gix_odb::Handle, Vec, Vec)> { - let dir = gix_testtools::scripted_fixture_read_only_standalone(self.init_script)?; - let store = gix_odb::at(dir.join(self.repo_name).join(".git").join("objects"))?; + #[allow(clippy::type_complexity)] + fn setup(&self) -> crate::Result<(gix_odb::Handle, Vec, Vec, Vec)> { + let repo_path = self.worktree_dir()?; + let store = gix_odb::at(repo_path.join(".git").join("objects"))?; let tips: Vec<_> = self.tips.iter().copied().map(hex_to_id).collect(); - let expected: Vec = tips - .clone() - .into_iter() - .chain(self.expected.iter().map(|hex_id| hex_to_id(hex_id))) - .collect(); - Ok((store, tips, expected)) + let expected: Vec = if self.expected_without_tips { + self.expected.iter().map(|hex_id| hex_to_id(hex_id)).collect() + } else { + tips.clone() + .into_iter() + .chain(self.expected.iter().map(|hex_id| hex_to_id(hex_id))) + .collect() + }; + let hidden: Vec<_> = self.hidden.iter().copied().map(hex_to_id).collect(); + Ok((store, tips, expected, hidden)) } fn setup_commitgraph(&self, store: &gix_odb::Store, use_graph: bool) -> Option { @@ -60,12 +89,13 @@ impl TraversalAssertion<'_> { } fn check_with_predicate(&mut self, predicate: impl FnMut(&oid) -> bool + Clone) -> crate::Result<()> { - let (store, tips, expected) = self.setup()?; + let (store, tips, expected, hidden) = self.setup()?; for use_commitgraph in [false, true] { let oids = commit::Simple::filtered(tips.clone(), &store, predicate.clone()) .sorting(self.sorting)? .parents(self.mode) + .hide(hidden.clone())? .commit_graph(self.setup_commitgraph(store.store_ref(), use_commitgraph)) .map(|res| res.map(|info| info.id)) .collect::, _>>()?; @@ -76,17 +106,167 @@ impl TraversalAssertion<'_> { } fn check(&self) -> crate::Result { - let (store, tips, expected) = self.setup()?; + let (store, tips, expected, hidden) = self.setup()?; for use_commitgraph in [false, true] { let oids = commit::Simple::new(tips.clone(), &store) .sorting(self.sorting)? .parents(self.mode) + .hide(hidden.clone())? .commit_graph(self.setup_commitgraph(store.store_ref(), use_commitgraph)) .map(|res| res.map(|info| info.id)) .collect::, _>>()?; - assert_eq!(oids, expected); + assert_eq!( + oids, expected, + "use_commitgraph = {use_commitgraph}, sorting = {:?}", + self.sorting + ); + } + Ok(()) + } +} + +mod hide { + use crate::commit::simple::{git_graph, TraversalAssertion}; + use gix_traverse::commit::simple::{CommitTimeOrder, Sorting}; + use gix_traverse::commit::Parents; + + fn all_sortings() -> impl Iterator { + [ + Sorting::ByCommitTime(CommitTimeOrder::NewestFirst), + Sorting::ByCommitTime(CommitTimeOrder::OldestFirst), + Sorting::BreadthFirst, + ] + .into_iter() + } + + #[test] + fn disjoint_hidden_and_interesting() -> crate::Result { + let mut assertion = TraversalAssertion::new_at( + "make_repos.sh", + "disjoint_branches", + &["e07cf1277ff7c43090f1acfc85a46039e7de1272"], /* b3 */ + &[ + "94cf3f3a4c782b672173423e7a4157a02957dd48", /* b2 */ + "34e5ff5ce3d3ba9f0a00d11a7fad72551fff0861", /* b1 */ + ], + ); + insta::assert_snapshot!(git_graph(assertion.worktree_dir()?)?, @r" + * e07cf1277ff7c43090f1acfc85a46039e7de1272 (HEAD -> disjoint) b3 + * 94cf3f3a4c782b672173423e7a4157a02957dd48 b2 + * 34e5ff5ce3d3ba9f0a00d11a7fad72551fff0861 b1 + * b5665181bf4c338ab16b10da0524d81b96aff209 (main) a3 + * f0230ce37b83d8e9f51ea6322ed7e8bd148d8e28 a2 + * 674aca0765b935ac5e7f7e9ab83af7f79272b5b0 a1 + "); + + for sorting in all_sortings() { + assertion + .with_hidden(&["b5665181bf4c338ab16b10da0524d81b96aff209" /* a3 */]) + .with_sorting(sorting) + .check()?; + } + Ok(()) + } + + #[test] + fn all_hidden() -> crate::Result { + let mut assertion = TraversalAssertion::new_at( + "make_repos.sh", + "disjoint_branches", + &[ + "e07cf1277ff7c43090f1acfc85a46039e7de1272", /* b3 */ + "b5665181bf4c338ab16b10da0524d81b96aff209", /* a3 */ + ], + // The start positions are also declared hidden, so nothing should be visible. + &[], + ); + insta::assert_snapshot!(git_graph(assertion.worktree_dir()?)?, @r" + * e07cf1277ff7c43090f1acfc85a46039e7de1272 (HEAD -> disjoint) b3 + * 94cf3f3a4c782b672173423e7a4157a02957dd48 b2 + * 34e5ff5ce3d3ba9f0a00d11a7fad72551fff0861 b1 + * b5665181bf4c338ab16b10da0524d81b96aff209 (main) a3 + * f0230ce37b83d8e9f51ea6322ed7e8bd148d8e28 a2 + * 674aca0765b935ac5e7f7e9ab83af7f79272b5b0 a1 + "); + + for sorting in all_sortings() { + assertion + .with_hidden(&[ + "e07cf1277ff7c43090f1acfc85a46039e7de1272", /* b3 */ + "b5665181bf4c338ab16b10da0524d81b96aff209", /* a3 */ + ]) + .with_sorting(sorting) + .expected_without_tips() + .check()?; + } + Ok(()) + } + + #[test] + fn some_hidden_and_all_hidden() -> crate::Result { + // Hidden has to catch up with non-hidden. + let mut assertion = TraversalAssertion::new_at( + "make_repos.sh", + "simple", + &["ad33ff2d0c4fc77d56b5fbff6f86f332fe792d83"], /* c2 */ + &[], + ); + + insta::assert_snapshot!(git_graph(assertion.worktree_dir()?)?, @r" + *-. f49838d84281c3988eeadd988d97dd358c9f9dc4 (HEAD -> main) merge + |\ \ + | | * 48e8dac19508f4238f06c8de2b10301ce64a641c (branch2) b2c2 + | | * cb6a6befc0a852ac74d74e0354e0f004af29cb79 b2c1 + | * | 66a309480201c4157b0eae86da69f2d606aadbe7 (branch1) b1c2 + | * | 80947acb398362d8236fcb8bf0f8a9dac640583f b1c1 + | |/ + * / 0edb95c0c0d9933d88f532ec08fcd405d0eee882 c5 + |/ + * 8cb5f13b66ce52a49399a2c49f537ee2b812369c c4 + * 33aa07785dd667c0196064e3be3c51dd9b4744ef c3 + * ad33ff2d0c4fc77d56b5fbff6f86f332fe792d83 c2 + * 65d6af66f60b8e39fd1ba6a1423178831e764ec5 c1 + "); + + for sorting in all_sortings() { + assertion + .with_hidden(&["0edb95c0c0d9933d88f532ec08fcd405d0eee882" /* c5 */]) + .expected_without_tips() + .with_sorting(sorting) + .check()?; + } + let mut assertion = TraversalAssertion::new_at( + "make_repos.sh", + "simple", + &["f49838d84281c3988eeadd988d97dd358c9f9dc4"], /* merge */ + &["0edb95c0c0d9933d88f532ec08fcd405d0eee882" /* c5 */], + ); + + for sorting in all_sortings() { + assertion + .with_hidden(&[ + "48e8dac19508f4238f06c8de2b10301ce64a641c", /* b2c2 */ + "66a309480201c4157b0eae86da69f2d606aadbe7", /* b1c2 */ + ]) + .with_sorting(sorting) + .check()?; } + + let mut assertion = TraversalAssertion::new_at( + "make_repos.sh", + "simple", + &["80947acb398362d8236fcb8bf0f8a9dac640583f"], /* b1c1 */ + // Single-parent is only for commits that we are/ought to be interested in. + // Hence, hidden commits still catch up. + &[], + ); + + assertion + .with_hidden(&["f49838d84281c3988eeadd988d97dd358c9f9dc4" /* merge */]) + .with_parents(Parents::First) + .expected_without_tips() + .check()?; Ok(()) } } @@ -403,6 +583,7 @@ mod adjusted_dates { Parents, Simple, }; + use crate::commit::simple::git_graph; use crate::{commit::simple::TraversalAssertion, hex_to_id}; #[test] @@ -424,7 +605,7 @@ mod adjusted_dates { #[test] fn head_date_order() -> crate::Result { - TraversalAssertion::new( + let mut assertion = TraversalAssertion::new( "make_traversal_repo_for_commits_with_dates.sh", &["288e509293165cb5630d08f4185bdf2445bf6170"], /* m1b1 */ &[ @@ -432,9 +613,18 @@ mod adjusted_dates { "9902e3c3e8f0c569b4ab295ddf473e6de763e1e7", /* c2 */ "134385f6d781b7e97062102c6a483440bfda2a03", /* c1 */ ], - ) - .with_sorting(Sorting::ByCommitTime(CommitTimeOrder::NewestFirst)) - .check()?; + ); + insta::assert_snapshot!(git_graph(assertion.worktree_dir()?)?, @r" + * 288e509293165cb5630d08f4185bdf2445bf6170 (HEAD -> main) m1b1 + |\ + | * bcb05040a6925f2ff5e10d3ae1f9264f2e8c43ac (branch1) b1c1 + * | 9902e3c3e8f0c569b4ab295ddf473e6de763e1e7 c2 + |/ + * 134385f6d781b7e97062102c6a483440bfda2a03 c1 + "); + assertion + .with_sorting(Sorting::ByCommitTime(CommitTimeOrder::NewestFirst)) + .check()?; TraversalAssertion::new( "make_traversal_repo_for_commits_with_dates.sh", &["288e509293165cb5630d08f4185bdf2445bf6170"], /* m1b1 */ @@ -545,3 +735,22 @@ mod adjusted_dates { [CommitTimeOrder::NewestFirst, CommitTimeOrder::OldestFirst] } } + +/// Execute a git status in the given repository path. +fn git_graph(repo_dir: impl AsRef) -> crate::Result { + let out = std::process::Command::new(gix_path::env::exe_invocation()) + .current_dir(repo_dir) + .args([ + "log", + "--oneline", + "--graph", + "--decorate", + "--all", + "--pretty=format:%H %d %s", + ]) + .output()?; + if !out.status.success() { + return Err(format!("git status failed: {err}", err = out.stderr.to_str_lossy()).into()); + } + Ok(out.stdout.into_string_lossy()) +} From 219655fb0001b4e88a56fdcaebed1679ff6e7118 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 4 Jun 2025 14:07:08 +0200 Subject: [PATCH 2/4] Improve traversal performance when hidden tips are used. Now it will abort early if there are only hidden tips to be traversed, which cannot change the result anymore. --- gix-traverse/src/commit/simple.rs | 83 ++++++++++++++------ gix-traverse/tests/traverse/commit/simple.rs | 2 +- 2 files changed, 60 insertions(+), 25 deletions(-) diff --git a/gix-traverse/src/commit/simple.rs b/gix-traverse/src/commit/simple.rs index 32b2b4cf440..7b851765bf0 100644 --- a/gix-traverse/src/commit/simple.rs +++ b/gix-traverse/src/commit/simple.rs @@ -78,13 +78,13 @@ pub enum Error { use Result as Either; type QueueKey = Either>; -type CommitDateQueue = gix_revwalk::PriorityQueue, ObjectId>; +type CommitDateQueue = gix_revwalk::PriorityQueue, (ObjectId, CommitState)>; type Candidates = VecDeque; /// The state used and potentially shared by multiple graph traversals. #[derive(Clone)] pub(super) struct State { - next: VecDeque, + next: VecDeque<(ObjectId, CommitState)>, queue: CommitDateQueue, buf: Vec, seen: gix_revwalk::graph::IdMap, @@ -184,9 +184,10 @@ mod init { } Sorting::ByCommitTime(order) | Sorting::ByCommitTimeCutoff { order, .. } => { let state = &mut self.state; - for commit_id in state.next.drain(..) { + for (commit_id, commit_state) in state.next.drain(..) { add_to_queue( commit_id, + commit_state, order, sorting.cutoff_time(), &mut state.queue, @@ -226,11 +227,12 @@ mod init { // Assure we *start* traversing hidden variants of a commit first, give them a head-start. match self.sorting { Sorting::BreadthFirst => { - state.next.push_front(id_to_ignore); + state.next.push_front((id_to_ignore, CommitState::Hidden)); } Sorting::ByCommitTime(order) | Sorting::ByCommitTimeCutoff { order, .. } => { add_to_queue( id_to_ignore, + CommitState::Hidden, order, self.sorting.cutoff_time(), &mut state.queue, @@ -273,6 +275,7 @@ mod init { fn add_to_queue( commit_id: ObjectId, + commit_state: CommitState, order: CommitTimeOrder, cutoff_time: Option, queue: &mut CommitDateQueue, @@ -284,11 +287,11 @@ mod init { let key = to_queue_key(time, order); match (cutoff_time, order) { (Some(cutoff_time), _) if time >= cutoff_time => { - queue.insert(key, commit_id); + queue.insert(key, (commit_id, commit_state)); } (Some(_), _) => {} (None, _) => { - queue.insert(key, commit_id); + queue.insert(key, (commit_id, commit_state)); } } Ok(()) @@ -339,10 +342,11 @@ mod init { state.clear(); state.next.reserve(tips.size_hint().0); for tip in tips.map(Into::into) { - let seen = state.seen.insert(tip, CommitState::Interesting); + let commit_state = CommitState::Interesting; + let seen = state.seen.insert(tip, commit_state); // We know there can only be duplicate interesting ones. if seen.is_none() && predicate(&tip) { - state.next.push_back(tip); + state.next.push_back((tip, commit_state)); } } } @@ -418,16 +422,23 @@ mod init { cutoff: Option, ) -> Option> { let state = &mut self.state; + let next = &mut state.queue; 'skip_hidden: loop { - let (commit_time, oid) = match state.queue.pop()? { + let (commit_time, (oid, _queued_commit_state)) = match next.pop()? { (Newest(t) | Oldest(Reverse(t)), o) => (t, o), }; let mut parents: ParentIds = Default::default(); - // TODO(perf): can avoid this lookup by storing state on `queue` respectively. - // ALSO: need to look ahead for early aborts, i.e. if there is only hidden left to traverse. - // Maybe this can be counted? + + // Always use the state that is actually stored, as we may change the type as we go. let commit_state = *state.seen.get(&oid).expect("every commit we traverse has state added"); + if can_deplete_candidates_early( + next.iter_unordered().map(|t| t.1), + commit_state, + state.candidates.as_ref(), + ) { + return None; + } match super::super::find(self.cache.as_ref(), &self.objects, &oid, &mut state.buf) { Ok(Either::CachedCommit(commit)) => { if !collect_parents(&mut state.parent_ids, self.cache.as_ref(), commit.iter_parents()) { @@ -443,7 +454,7 @@ mod init { &mut state.candidates, commit_state, &mut self.predicate, - &mut state.queue, + next, order, cutoff, || parent_commit_time, @@ -462,7 +473,7 @@ mod init { &mut state.candidates, commit_state, &mut self.predicate, - &mut state.queue, + next, order, cutoff, || { @@ -505,6 +516,28 @@ mod init { } } + /// Returns `true` if we have only hidden cursors queued for traversal, assuming that we don't see interesting ones ever again. + /// + /// `unqueued_commit_state` is the state of the commit that is currently being processed. + fn can_deplete_candidates_early( + mut queued_states: impl Iterator, + unqueued_commit_state: CommitState, + candidates: Option<&Candidates>, + ) -> bool { + if candidates.is_none() { + return false; + } + if unqueued_commit_state.is_interesting() { + return false; + } + + let mut is_empty = true; + queued_states.all(|state| { + is_empty = false; + state.is_hidden() + }) && !is_empty + } + /// Utilities impl Simple where @@ -513,14 +546,16 @@ mod init { { fn next_by_topology(&mut self) -> Option> { let state = &mut self.state; + let next = &mut state.next; 'skip_hidden: loop { - let oid = state.next.pop_front()?; + let (oid, _queued_commit_state) = next.pop_front()?; let mut parents: ParentIds = Default::default(); - // TODO(perf): can avoid this lookup by storing state on `next` respectively. - // ALSO: need to look ahead for early aborts, i.e. if there is only hidden left to traverse. - // Maybe this can be counted? - let commit_state = *state.seen.get(&oid).expect("every commit we traverse has state added"); + // Always use the state that is actually stored, as we may change the type as we go. + let commit_state = *state.seen.get(&oid).expect("every commit we traverse has state added"); + if can_deplete_candidates_early(next.iter().map(|t| t.1), commit_state, state.candidates.as_ref()) { + return None; + } match super::super::find(self.cache.as_ref(), &self.objects, &oid, &mut state.buf) { Ok(Either::CachedCommit(commit)) => { if !collect_parents(&mut state.parent_ids, self.cache.as_ref(), commit.iter_parents()) { @@ -537,7 +572,7 @@ mod init { &mut state.candidates, commit_state, &mut self.predicate, - &mut state.next, + next, ); if commit_state.is_interesting() && matches!(self.parents, Parents::First) { break; @@ -556,7 +591,7 @@ mod init { &mut state.candidates, commit_state, &mut self.predicate, - &mut state.next, + next, ); if commit_state.is_interesting() && matches!(self.parents, Parents::First) { break; @@ -608,7 +643,7 @@ mod init { candidates: &mut Option, commit_state: CommitState, predicate: &mut impl FnMut(&oid) -> bool, - next: &mut VecDeque, + next: &mut VecDeque<(ObjectId, CommitState)>, ) { let enqueue = match seen.entry(parent_id) { Entry::Occupied(mut e) => { @@ -627,7 +662,7 @@ mod init { } }; if enqueue { - next.push_back(parent_id); + next.push_back((parent_id, commit_state)); } } @@ -665,7 +700,7 @@ mod init { let key = to_queue_key(parent_commit_time, order); match cutoff { Some(cutoff_older_than) if parent_commit_time < cutoff_older_than => {} - Some(_) | None => queue.insert(key, parent_id), + Some(_) | None => queue.insert(key, (parent_id, commit_state)), } } } diff --git a/gix-traverse/tests/traverse/commit/simple.rs b/gix-traverse/tests/traverse/commit/simple.rs index 1d8043c6fb1..3529f101985 100644 --- a/gix-traverse/tests/traverse/commit/simple.rs +++ b/gix-traverse/tests/traverse/commit/simple.rs @@ -133,9 +133,9 @@ mod hide { fn all_sortings() -> impl Iterator { [ + Sorting::BreadthFirst, Sorting::ByCommitTime(CommitTimeOrder::NewestFirst), Sorting::ByCommitTime(CommitTimeOrder::OldestFirst), - Sorting::BreadthFirst, ] .into_iter() } From a9befb284dc17d3656cf83859836bc221a42d67e Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 4 Jun 2025 11:24:15 +0200 Subject: [PATCH 3/4] feat: add `revision::walk::Platform::hide()`. This finally makes safe traversals possible and is what most people would want to use instead of `boundary()`. --- gix/src/config/tree/sections/core.rs | 11 ----------- gix/src/revision/walk.rs | 22 ++++++++++++++++++++-- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/gix/src/config/tree/sections/core.rs b/gix/src/config/tree/sections/core.rs index 44d46317f08..fa14d12114d 100644 --- a/gix/src/config/tree/sections/core.rs +++ b/gix/src/config/tree/sections/core.rs @@ -442,17 +442,6 @@ mod abbrev { mod validate { use crate::{bstr::BStr, config::tree::keys}; - pub struct LockTimeout; - impl keys::Validate for LockTimeout { - fn validate(&self, value: &BStr) -> Result<(), Box> { - let value = gix_config::Integer::try_from(value)? - .to_decimal() - .ok_or_else(|| format!("integer {value} cannot be represented as integer")); - super::Core::FILES_REF_LOCK_TIMEOUT.try_into_lock_timeout(Ok(value?))?; - Ok(()) - } - } - pub struct Disambiguate; impl keys::Validate for Disambiguate { #[cfg_attr(not(feature = "revision"), allow(unused_variables))] diff --git a/gix/src/revision/walk.rs b/gix/src/revision/walk.rs index 004df027b9e..2dab00f5ed8 100644 --- a/gix/src/revision/walk.rs +++ b/gix/src/revision/walk.rs @@ -155,6 +155,7 @@ pub struct Platform<'repo> { /// The owning repository. pub repo: &'repo Repository, pub(crate) tips: Vec, + pub(crate) hidden: Vec, pub(crate) boundary: Vec, pub(crate) sorting: Sorting, pub(crate) parents: gix_traverse::commit::Parents, @@ -167,6 +168,7 @@ impl<'repo> Platform<'repo> { revision::walk::Platform { repo, tips: tips.into_iter().map(Into::into).collect(), + hidden: Vec::new(), sorting: Default::default(), parents: Default::default(), use_commit_graph: None, @@ -210,13 +212,13 @@ impl Platform<'_> { self } - /// Don't cross the given `ids` during traversal. + /// Don't cross the given `ids` (commits) during traversal. /// /// Note that this forces the [sorting](Self::sorting()) to [`ByCommitTimeCutoff`](Sorting::ByCommitTimeCutoff) /// configured with the oldest available commit time, ensuring that no commits older than the oldest of `ids` will be returned either. /// Also note that commits that can't be accessed or are missing are simply ignored for the purpose of obtaining the cutoff date. /// - /// A boundary is distinctly different from exclusive refsepcs `^branch-to-not-list` in Git log. + /// A boundary is distinctly different from exclusive revspecs `^branch-to-not-list` in Git log. /// /// If this is not desired, [set the sorting](Self::sorting()) to something else right after this call. pub fn with_boundary(mut self, ids: impl IntoIterator>) -> Self { @@ -242,6 +244,20 @@ impl Platform<'_> { } self } + + /// Don't cross the given `tips` (commits) during traversal or return them, and also don't return any of their ancestors. + /// + /// This allows achieving revspecs like `^branch-to-not-list`, where the commit behind that name would be passed as `ids`. + /// + /// In other words, each of the `tips` acts like a starting point for an iteration that will paint commits as unwanted, and + /// wanted commits cannot cross it. + /// + /// The side effect of this is that commits can't be returned immediately as one still has to wait and see if they may be unwanted later. + /// This makes traversals with hidden commits more costly, with a chance to traverse all commits if the hidden and non-hidden commits are disjoint. + pub fn with_hidden(mut self, tips: impl IntoIterator>) -> Self { + self.hidden = tips.into_iter().map(Into::into).collect(); + self + } } /// Produce the iterator @@ -262,6 +278,7 @@ impl<'repo> Platform<'repo> { use_commit_graph, commit_graph, mut boundary, + hidden, } = self; boundary.sort(); Ok(revision::Walk { @@ -301,6 +318,7 @@ impl<'repo> Platform<'repo> { }) .sorting(sorting.into_simple().expect("for now there is nothing else"))? .parents(parents) + .hide(hidden)? .commit_graph( commit_graph.or(use_commit_graph .map_or_else(|| self.repo.config.may_use_commit_graph(), Ok)? From c5bc49f2a02e9b28c2466ea4c7ae711d091ffc96 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 4 Jun 2025 10:53:14 +0200 Subject: [PATCH 4/4] feat: support for `commitgraph list from..to` to exercise the new 'hide' capability. --- Cargo.toml | 4 + .../src/repository/commitgraph/list.rs | 85 +++++++++++++------ src/plumbing/main.rs | 4 +- src/plumbing/options/mod.rs | 3 + 4 files changed, 70 insertions(+), 26 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index e0f002ded7a..6bbd0fe4da8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -206,6 +206,10 @@ lto = "fat" codegen-units = 1 strip = "symbols" +[profile.bench] +debug = 1 +strip = "none" + [workspace] members = [ "gix-actor", diff --git a/gitoxide-core/src/repository/commitgraph/list.rs b/gitoxide-core/src/repository/commitgraph/list.rs index 6df11b24e1b..c0d1182b819 100644 --- a/gitoxide-core/src/repository/commitgraph/list.rs +++ b/gitoxide-core/src/repository/commitgraph/list.rs @@ -1,55 +1,92 @@ pub(crate) mod function { - use std::{borrow::Cow, ffi::OsString}; - + use crate::OutputFormat; use anyhow::{bail, Context}; + use gix::odb::store::RefreshMode; + use gix::revision::plumbing::Spec; use gix::{prelude::ObjectIdExt, revision::walk::Sorting}; - - use crate::OutputFormat; + use std::fmt::Formatter; + use std::{borrow::Cow, ffi::OsString}; pub fn list( mut repo: gix::Repository, spec: OsString, mut out: impl std::io::Write, + long_hashes: bool, format: OutputFormat, ) -> anyhow::Result<()> { if format != OutputFormat::Human { bail!("Only human output is currently supported"); } let graph = repo - .commit_graph() + .commit_graph_if_enabled() .context("a commitgraph is required, but none was found")?; repo.object_cache_size_if_unset(4 * 1024 * 1024); + repo.objects.refresh = RefreshMode::Never; let spec = gix::path::os_str_into_bstr(&spec)?; - let id = repo - .rev_parse_single(spec) - .context("Only single revisions are currently supported")?; - let commits = id - .object()? - .peel_to_kind(gix::object::Kind::Commit) - .context("Need committish as starting point")? - .id() - .ancestors() - .sorting(Sorting::ByCommitTime(Default::default())) - .all()?; + let spec = repo.rev_parse(spec)?.detach(); + let commits = match spec { + Spec::Include(id) => connected_commit_id(&repo, id)? + .ancestors() + .sorting(Sorting::ByCommitTime(Default::default())) + .all()?, + Spec::Range { from, to } => connected_commit_id(&repo, to)? + .ancestors() + .sorting(Sorting::ByCommitTime(Default::default())) + .with_hidden(Some(connected_commit_id(&repo, from)?)) + .all()?, + Spec::Exclude(_) | Spec::Merge { .. } | Spec::IncludeOnlyParents(_) | Spec::ExcludeParents(_) => { + bail!("The spec isn't currently supported: {spec:?}") + } + }; for commit in commits { let commit = commit?; writeln!( out, "{} {} {} {}", - commit.id().shorten_or_id(), + HexId::new(commit.id(), long_hashes), commit.commit_time.expect("traversal with date"), commit.parent_ids.len(), - graph.commit_by_id(commit.id).map_or_else( - || Cow::Borrowed(""), - |c| Cow::Owned(format!( - "{} {}", - c.root_tree_id().to_owned().attach(&repo).shorten_or_id(), - c.generation() + graph + .as_ref() + .map_or(Cow::Borrowed(""), |graph| graph.commit_by_id(commit.id).map_or_else( + || Cow::Borrowed(""), + |c| Cow::Owned(format!( + "{} {}", + HexId::new(c.root_tree_id().to_owned().attach(&repo), long_hashes), + c.generation() + )) )) - ) )?; } Ok(()) } + + fn connected_commit_id(repo: &gix::Repository, id: gix::ObjectId) -> anyhow::Result> { + Ok(id + .attach(repo) + .object()? + .peel_to_kind(gix::object::Kind::Commit) + .context("Need committish as starting point")? + .id()) + } + + struct HexId<'a>(gix::Id<'a>, bool); + + impl<'a> HexId<'a> { + pub fn new(id: gix::Id<'a>, long_hex: bool) -> Self { + HexId(id, long_hex) + } + } + + impl std::fmt::Display for HexId<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let HexId(id, long_hex) = self; + if *long_hex { + id.fmt(f) + } else { + id.shorten_or_id().fmt(f) + } + } + } } diff --git a/src/plumbing/main.rs b/src/plumbing/main.rs index fe111b065cf..9b57bb3af76 100644 --- a/src/plumbing/main.rs +++ b/src/plumbing/main.rs @@ -543,7 +543,7 @@ pub fn main() -> Result<()> { ) } Subcommands::CommitGraph(cmd) => match cmd { - commitgraph::Subcommands::List { spec } => prepare_and_run( + commitgraph::Subcommands::List { long_hashes, spec } => prepare_and_run( "commitgraph-list", trace, auto_verbose, @@ -551,7 +551,7 @@ pub fn main() -> Result<()> { progress_keep_open, None, move |_progress, out, _err| { - core::repository::commitgraph::list(repository(Mode::Lenient)?, spec, out, format) + core::repository::commitgraph::list(repository(Mode::Lenient)?, spec, out, long_hashes, format) }, ) .map(|_| ()), diff --git a/src/plumbing/options/mod.rs b/src/plumbing/options/mod.rs index beab5743928..f0d02c2c6bc 100644 --- a/src/plumbing/options/mod.rs +++ b/src/plumbing/options/mod.rs @@ -955,6 +955,9 @@ pub mod commitgraph { }, /// List all entries in the commit-graph file as reachable by starting from `HEAD`. List { + /// Display long hashes, instead of expensively shortened versions for best performance. + #[clap(long, short = 'l')] + long_hashes: bool, /// The rev-spec to list reachable commits from. #[clap(default_value = "@")] spec: std::ffi::OsString,