Skip to content

Commit 520c832

Browse files
authored
Merge pull request #1705 from GitoxideLabs/merge
tree merge with tree-related auto-resolution
2 parents fadf106 + 960773e commit 520c832

File tree

61 files changed

+1989
-508
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

61 files changed

+1989
-508
lines changed

Diff for: Cargo.lock

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Diff for: crate-status.md

+3
Original file line numberDiff line numberDiff line change
@@ -352,8 +352,11 @@ Check out the [performance discussion][gix-diff-performance] as well.
352352
- [ ] a way to control inter-hunk merging based on proximity (maybe via `gix-diff` feature which could use the same)
353353
* [x] **tree**-diff-heuristics match Git for its test-cases
354354
- [x] a way to generate an index with stages, mostly conforming with Git.
355+
- [ ] resolve to be *ours* or the *ancestors* version of the tree.
355356
- [ ] submodule merges (*right now they count as conflicts if they differ*)
356357
- [ ] assure sparse indices are handled correctly during application - right now we refuse.
358+
- [ ] rewrite so that the whole logic can be proven to be correct - it's too insane now and probably has way
359+
more possible states than are tested, despite best attempts.
357360
* [x] **commits** - with handling of multiple merge bases by recursive merge-base merge
358361
* [x] API documentation
359362
* [ ] Examples

Diff for: gitoxide-core/src/query/engine/update.rs

+6-1
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,12 @@ pub fn update(
139139
});
140140

141141
let rewrites = {
142-
let mut r = gix::diff::new_rewrites(&repo.config_snapshot(), true)?.unwrap_or_default();
142+
// These are either configured, or we set them to the default. There is no turning them off.
143+
let (r, was_configured) = gix::diff::new_rewrites(&repo.config_snapshot(), true)?;
144+
if was_configured && r.is_none() {
145+
gix::trace::warn!("Rename tracking is disabled by configuration, but we enable it using the default");
146+
}
147+
let mut r = r.unwrap_or_default();
143148
r.copies = Some(gix::diff::rewrites::Copies {
144149
source: if find_copies_harder {
145150
CopySource::FromSetOfModifiedFilesAndAllSources

Diff for: gitoxide-core/src/repository/merge/commit.rs

+6-2
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ pub fn commit(
1717
Options {
1818
format,
1919
file_favor,
20+
tree_favor,
2021
in_memory,
2122
debug,
2223
}: Options,
@@ -31,7 +32,10 @@ pub fn commit(
3132
let (ours_ref, ours_id) = refname_and_commit(&repo, ours)?;
3233
let (theirs_ref, theirs_id) = refname_and_commit(&repo, theirs)?;
3334

34-
let options = repo.tree_merge_options()?.with_file_favor(file_favor);
35+
let options = repo
36+
.tree_merge_options()?
37+
.with_file_favor(file_favor)
38+
.with_tree_favor(tree_favor);
3539
let ours_id_str = ours_id.to_string();
3640
let theirs_id_str = theirs_id.to_string();
3741
let labels = gix::merge::blob::builtin_driver::text::Labels {
@@ -49,7 +53,7 @@ pub fn commit(
4953
.merge_commits(ours_id, theirs_id, labels, options.into())?
5054
.tree_merge;
5155
let has_conflicts = res.conflicts.is_empty();
52-
let has_unresolved_conflicts = res.has_unresolved_conflicts(TreatAsUnresolved::Renames);
56+
let has_unresolved_conflicts = res.has_unresolved_conflicts(TreatAsUnresolved::default());
5357
{
5458
let _span = gix::trace::detail!("Writing merged tree");
5559
let mut written = 0;

Diff for: gitoxide-core/src/repository/merge/tree.rs

+7-2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use crate::OutputFormat;
33
pub struct Options {
44
pub format: OutputFormat,
55
pub file_favor: Option<gix::merge::tree::FileFavor>,
6+
pub tree_favor: Option<gix::merge::tree::TreeFavor>,
67
pub in_memory: bool,
78
pub debug: bool,
89
}
@@ -29,6 +30,7 @@ pub(super) mod function {
2930
Options {
3031
format,
3132
file_favor,
33+
tree_favor,
3234
in_memory,
3335
debug,
3436
}: Options,
@@ -44,7 +46,10 @@ pub(super) mod function {
4446
let (ours_ref, ours_id) = refname_and_tree(&repo, ours)?;
4547
let (theirs_ref, theirs_id) = refname_and_tree(&repo, theirs)?;
4648

47-
let options = repo.tree_merge_options()?.with_file_favor(file_favor);
49+
let options = repo
50+
.tree_merge_options()?
51+
.with_file_favor(file_favor)
52+
.with_tree_favor(tree_favor);
4853
let base_id_str = base_id.to_string();
4954
let ours_id_str = ours_id.to_string();
5055
let theirs_id_str = theirs_id.to_string();
@@ -64,7 +69,7 @@ pub(super) mod function {
6469
};
6570
let res = repo.merge_trees(base_id, ours_id, theirs_id, labels, options)?;
6671
let has_conflicts = res.conflicts.is_empty();
67-
let has_unresolved_conflicts = res.has_unresolved_conflicts(TreatAsUnresolved::Renames);
72+
let has_unresolved_conflicts = res.has_unresolved_conflicts(TreatAsUnresolved::default());
6873
{
6974
let _span = gix::trace::detail!("Writing merged tree");
7075
let mut written = 0;

Diff for: gitoxide-core/src/repository/status.rs

+1
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ pub fn show(
8181
copies: None,
8282
percentage: Some(percentage),
8383
limit: 0,
84+
track_empty: false,
8485
});
8586
if opts.rewrites.is_some() {
8687
if let Some(opts) = opts.dirwalk_options.as_mut() {

Diff for: gix-diff/src/lib.rs

+5
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,11 @@ pub struct Rewrites {
3737
/// If the limit would not be enough to test the entire set of combinations, the algorithm will trade in precision and not
3838
/// run the fuzzy version of identity tests at all. That way results are never partial.
3939
pub limit: usize,
40+
41+
/// If `true`, empty blobs will be tracked. If `false`, they do not participate in rename tracking.
42+
///
43+
/// Leaving this off usually leads to better results as empty files don't have a unique-enough identity.
44+
pub track_empty: bool,
4045
}
4146

4247
/// Contains a [Tracker](rewrites::Tracker) to detect rewrites.

Diff for: gix-diff/src/rewrites/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ impl Default for Rewrites {
7070
copies: None,
7171
percentage: Some(0.5),
7272
limit: 1000,
73+
track_empty: false,
7374
}
7475
}
7576
}

Diff for: gix-diff/src/rewrites/tracker.rs

+44-7
Original file line numberDiff line numberDiff line change
@@ -286,12 +286,10 @@ impl<T: Change> Tracker<T> {
286286
CopySource::FromSetOfModifiedFiles => {}
287287
CopySource::FromSetOfModifiedFilesAndAllSources => {
288288
push_source_tree(&mut |change, location| {
289-
assert!(
290-
self.try_push_change(change, location).is_none(),
291-
"we must accept every change"
292-
);
293-
// make sure these aren't viable to be emitted anymore.
294-
self.items.last_mut().expect("just pushed").emitted = true;
289+
if self.try_push_change(change, location).is_none() {
290+
// make sure these aren't viable to be emitted anymore.
291+
self.items.last_mut().expect("just pushed").emitted = true;
292+
}
295293
})
296294
.map_err(|err| emit::Error::GetItemsForExhaustiveCopyDetection(Box::new(err)))?;
297295
self.items.sort_by(by_id_and_location);
@@ -341,6 +339,10 @@ impl<T: Change> Tracker<T> {
341339
) -> Result<(), emit::Error> {
342340
// we try to cheaply reduce the set of possibilities first, before possibly looking more exhaustively.
343341
let needs_second_pass = !needs_exact_match(percentage);
342+
343+
// https://github.com/git/git/blob/cc01bad4a9f566cf4453c7edd6b433851b0835e2/diffcore-rename.c#L350-L369
344+
// We would need a hashmap to be OK to not use the limit here, otherwise the performance is too bad.
345+
// This also means we don't find all renames if we hit the rename limit.
344346
if self.match_pairs(cb, None /* by identity */, kind, out, diff_cache, objects, filter)? == Action::Cancel {
345347
return Ok(());
346348
}
@@ -384,10 +386,35 @@ impl<T: Change> Tracker<T> {
384386
filter: Option<fn(&T) -> bool>,
385387
) -> Result<Action, emit::Error> {
386388
let mut dest_ofs = 0;
389+
let mut num_checks = 0;
390+
let max_checks = {
391+
let limit = self.rewrites.limit.saturating_pow(2);
392+
// There can be trees with a lot of entries and pathological search behaviour, as they can be repeated
393+
// and then have a lot of similar hashes. This also means we have to search a lot of candidates which
394+
// can be too slow despite best attempts. So play it save and detect such cases 'roughly' by amount of items.
395+
if self.items.len() < 100_000 {
396+
0
397+
} else {
398+
limit
399+
}
400+
};
401+
387402
while let Some((mut dest_idx, dest)) = self.items[dest_ofs..].iter().enumerate().find_map(|(idx, item)| {
388403
(!item.emitted
389404
&& matches!(item.change.kind(), ChangeKind::Addition)
390-
&& filter.map_or(true, |f| f(&item.change)))
405+
&& filter.map_or_else(
406+
|| {
407+
self.rewrites.track_empty
408+
// We always want to keep track of entries that are involved of a directory rename.
409+
// Note that this may still match them up arbitrarily if empty, but empty is empty.
410+
|| matches!(item.change.relation(), Some(Relation::ChildOfParent(_)))
411+
|| {
412+
let id = item.change.id();
413+
id != gix_hash::ObjectId::empty_blob(id.kind())
414+
}
415+
},
416+
|f| f(&item.change),
417+
))
391418
.then_some((idx, item))
392419
}) {
393420
dest_idx += dest_ofs;
@@ -403,6 +430,7 @@ impl<T: Change> Tracker<T> {
403430
objects,
404431
diff_cache,
405432
&self.path_backing,
433+
&mut num_checks,
406434
)?
407435
.map(|(src_idx, src, diff)| {
408436
let (id, entry_mode) = src.change.id_and_entry_mode();
@@ -420,6 +448,12 @@ impl<T: Change> Tracker<T> {
420448
src_idx,
421449
)
422450
});
451+
if max_checks != 0 && num_checks > max_checks {
452+
gix_trace::warn!(
453+
"Cancelled rename matching as there were too many iterations ({num_checks} > {max_checks})"
454+
);
455+
return Ok(Action::Cancel);
456+
}
423457
let Some((src, src_idx)) = src else {
424458
continue;
425459
};
@@ -631,6 +665,7 @@ fn find_match<'a, T: Change>(
631665
objects: &impl gix_object::FindObjectOrHeader,
632666
diff_cache: &mut crate::blob::Platform,
633667
path_backing: &[u8],
668+
num_checks: &mut usize,
634669
) -> Result<Option<SourceTuple<'a, T>>, emit::Error> {
635670
let (item_id, item_mode) = item.change.id_and_entry_mode();
636671
if needs_exact_match(percentage) || item_mode.is_link() {
@@ -651,6 +686,7 @@ fn find_match<'a, T: Change>(
651686
}
652687
let res = items[range.clone()].iter().enumerate().find_map(|(mut src_idx, src)| {
653688
src_idx += range.start;
689+
*num_checks += 1;
654690
(src_idx != item_idx && src.is_source_for_destination_of(kind, item_mode)).then_some((src_idx, src, None))
655691
});
656692
if let Some(src) = res {
@@ -685,6 +721,7 @@ fn find_match<'a, T: Change>(
685721
)?;
686722
let prep = diff_cache.prepare_diff()?;
687723
stats.num_similarity_checks += 1;
724+
*num_checks += 1;
688725
match prep.operation {
689726
Operation::InternalDiff { algorithm } => {
690727
let tokens =

Diff for: gix-diff/tests/diff/rewrites/tracker.rs

+10
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ fn rename_by_id() -> crate::Result {
2727
copies: None,
2828
percentage: None,
2929
limit,
30+
track_empty: false,
3031
};
3132
let mut track = util::new_tracker(rewrites);
3233
assert!(
@@ -80,6 +81,7 @@ fn copy_by_similarity_reports_limit_if_encountered() -> crate::Result {
8081
}),
8182
percentage: None,
8283
limit: 1,
84+
track_empty: false,
8385
};
8486
let mut track = util::new_tracker(rewrites);
8587
let odb = util::add_retained_blobs(
@@ -132,6 +134,7 @@ fn copy_by_id() -> crate::Result {
132134
}),
133135
percentage: None,
134136
limit,
137+
track_empty: false,
135138
};
136139
let mut track = util::new_tracker(rewrites);
137140
let odb = util::add_retained_blobs(
@@ -206,6 +209,7 @@ fn copy_by_id_search_in_all_sources() -> crate::Result {
206209
}),
207210
percentage: None,
208211
limit,
212+
track_empty: false,
209213
};
210214
let mut track = util::new_tracker(rewrites);
211215
let odb = util::add_retained_blobs(
@@ -284,6 +288,7 @@ fn copy_by_50_percent_similarity() -> crate::Result {
284288
}),
285289
percentage: None,
286290
limit: 0,
291+
track_empty: false,
287292
};
288293
let mut track = util::new_tracker(rewrites);
289294
let odb = util::add_retained_blobs(
@@ -363,6 +368,7 @@ fn copy_by_id_in_additions_only() -> crate::Result {
363368
}),
364369
percentage: None,
365370
limit: 0,
371+
track_empty: false,
366372
};
367373
let mut track = util::new_tracker(rewrites);
368374
let odb = util::add_retained_blobs(
@@ -413,6 +419,7 @@ fn rename_by_similarity_reports_limit_if_encountered() -> crate::Result {
413419
copies: None,
414420
percentage: Some(0.5),
415421
limit: 1,
422+
track_empty: false,
416423
};
417424
let mut track = util::new_tracker(rewrites);
418425
let odb = util::add_retained_blobs(
@@ -458,6 +465,7 @@ fn rename_by_50_percent_similarity() -> crate::Result {
458465
copies: None,
459466
percentage: Some(0.5),
460467
limit: 0,
468+
track_empty: false,
461469
};
462470
let mut track = util::new_tracker(rewrites);
463471
let odb = util::add_retained_blobs(
@@ -547,6 +555,7 @@ fn directory_renames_by_id_can_fail_gracefully() -> crate::Result {
547555
copies: None,
548556
percentage: Some(0.5),
549557
limit: 0,
558+
track_empty: false,
550559
};
551560
let mut track = util::new_tracker(rename_by_similarity);
552561
let tree_dst_id = 1;
@@ -638,6 +647,7 @@ fn simple_directory_rename_by_id() -> crate::Result {
638647
copies: None,
639648
percentage: None,
640649
limit: 0,
650+
track_empty: false,
641651
};
642652
let mut track = util::new_tracker(renames_by_identity);
643653
let tree_dst_id = 1;

0 commit comments

Comments
 (0)