Skip to content

Commit 6cdd692

Browse files
committed
fix: enforce diff.renamelimit even for identical name matching
The problem here is that our implementation avoids using a hashmap, but pays the price by having bad performance if there are too many possible candidates (binary search doesn't seem to be up to the task). This can lead to impossibly long runtimes, so for now, limit it explicitly.
1 parent 03b7ce2 commit 6cdd692

File tree

1 file changed

+27
-0
lines changed

1 file changed

+27
-0
lines changed

Diff for: gix-diff/src/rewrites/tracker.rs

+27
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,10 @@ impl<T: Change> Tracker<T> {
341341
) -> Result<(), emit::Error> {
342342
// we try to cheaply reduce the set of possibilities first, before possibly looking more exhaustively.
343343
let needs_second_pass = !needs_exact_match(percentage);
344+
345+
// https://github.com/git/git/blob/cc01bad4a9f566cf4453c7edd6b433851b0835e2/diffcore-rename.c#L350-L369
346+
// We would need a hashmap to be OK to not use the limit here, otherwise the performance is too bad.
347+
// This also means we don't find all renames if we hit the rename limit.
344348
if self.match_pairs(cb, None /* by identity */, kind, out, diff_cache, objects, filter)? == Action::Cancel {
345349
return Ok(());
346350
}
@@ -384,6 +388,19 @@ impl<T: Change> Tracker<T> {
384388
filter: Option<fn(&T) -> bool>,
385389
) -> Result<Action, emit::Error> {
386390
let mut dest_ofs = 0;
391+
let mut num_checks = 0;
392+
let max_checks = {
393+
let limit = self.rewrites.limit.saturating_pow(2);
394+
// There can be trees with a lot of entries and pathological search behaviour, as they can be repeated
395+
// and then have a lot of similar hashes. This also means we have to search a lot of candidates which
396+
// can be too slow despite best attempts. So play it save and detect such cases 'roughly' by amount of items.
397+
if self.items.len() < 100_000 {
398+
0
399+
} else {
400+
limit
401+
}
402+
};
403+
387404
while let Some((mut dest_idx, dest)) = self.items[dest_ofs..].iter().enumerate().find_map(|(idx, item)| {
388405
(!item.emitted
389406
&& matches!(item.change.kind(), ChangeKind::Addition)
@@ -403,6 +420,7 @@ impl<T: Change> Tracker<T> {
403420
objects,
404421
diff_cache,
405422
&self.path_backing,
423+
&mut num_checks,
406424
)?
407425
.map(|(src_idx, src, diff)| {
408426
let (id, entry_mode) = src.change.id_and_entry_mode();
@@ -420,6 +438,12 @@ impl<T: Change> Tracker<T> {
420438
src_idx,
421439
)
422440
});
441+
if max_checks != 0 && num_checks > max_checks {
442+
gix_trace::warn!(
443+
"Cancelled rename matching as there were too many iterations ({num_checks} > {max_checks})"
444+
);
445+
return Ok(Action::Cancel);
446+
}
423447
let Some((src, src_idx)) = src else {
424448
continue;
425449
};
@@ -631,6 +655,7 @@ fn find_match<'a, T: Change>(
631655
objects: &impl gix_object::FindObjectOrHeader,
632656
diff_cache: &mut crate::blob::Platform,
633657
path_backing: &[u8],
658+
num_checks: &mut usize,
634659
) -> Result<Option<SourceTuple<'a, T>>, emit::Error> {
635660
let (item_id, item_mode) = item.change.id_and_entry_mode();
636661
if needs_exact_match(percentage) || item_mode.is_link() {
@@ -651,6 +676,7 @@ fn find_match<'a, T: Change>(
651676
}
652677
let res = items[range.clone()].iter().enumerate().find_map(|(mut src_idx, src)| {
653678
src_idx += range.start;
679+
*num_checks += 1;
654680
(src_idx != item_idx && src.is_source_for_destination_of(kind, item_mode)).then_some((src_idx, src, None))
655681
});
656682
if let Some(src) = res {
@@ -685,6 +711,7 @@ fn find_match<'a, T: Change>(
685711
)?;
686712
let prep = diff_cache.prepare_diff()?;
687713
stats.num_similarity_checks += 1;
714+
*num_checks += 1;
688715
match prep.operation {
689716
Operation::InternalDiff { algorithm } => {
690717
let tokens =

0 commit comments

Comments
 (0)