Skip to content

Commit c35784a

Browse files
committed
merge-ort, diffcore-rename: employ cached renames when possible
When there are many renames between the old base of a series of commits and the new base, the way sequencer.c, merge-recursive.c, and diffcore-rename.c have traditionally split the work resulted in redetecting the same renames with each and every commit being transplanted. To address this, the last several commits have been creating a cache of rename detection results, determining when it was safe to use such a cache in subsequent merge operations, adding helper functions, and so on. See the previous half dozen commit messages for additional discussion of this optimization, particularly the message a few commits ago entitled "add code to check for whether cached renames can be reused". This commit finally ties all of that work together, modifying the merge algorithm to make use of these cached renames. For the testcases mentioned in commit 557ac03 ("merge-ort: begin performance work; instrument with trace2_region_* calls", 2020-10-28), this change improves the performance as follows: Before After no-renames: 5.553 s ± 0.078 s 5.624 s ± 0.077 s mega-renames: 11.308 s ± 0.062 s 10.213 s ± 0.032 s just-one-mega: 491.8 ms ± 7.6 ms 497.6 ms ± 5.3 ms Note that this optimization competes with the previous optimizations (namely full use of exact renames, basename-guided rename detection, and detecting when we can skip detecting renames for certain paths and still get the same result); if it were not for those previous optimizations, this optimization would have sped up the mega-renames testcase by a factor of nearly 35. (The factor of 35 comes from the fact that the mega-renames testcase involves rebasing 35 commits, and each one essentially involves detecting the same renames.) So, while the improvement seen here is modest at best, keep in mind that this optimization kicks in to help accelerate cases where the previous optimizations do not apply, so there may be other testcases where this optimization helps more. The changes made in this optimization also lay some important groundwork for a future optimization around having collect_merge_info() avoid recursing into subtrees in more cases. Signed-off-by: Elijah Newren <[email protected]>
1 parent 0db461c commit c35784a

File tree

3 files changed

+63
-10
lines changed

3 files changed

+63
-10
lines changed

diffcore-rename.c

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -562,7 +562,8 @@ static void update_dir_rename_counts(struct dir_rename_info *info,
562562
static void initialize_dir_rename_info(struct dir_rename_info *info,
563563
struct strintmap *relevant_sources,
564564
struct strintmap *dirs_removed,
565-
struct strmap *dir_rename_count)
565+
struct strmap *dir_rename_count,
566+
struct strmap *cached_pairs)
566567
{
567568
struct hashmap_iter iter;
568569
struct strmap_entry *entry;
@@ -626,6 +627,17 @@ static void initialize_dir_rename_info(struct dir_rename_info *info,
626627
rename_dst[i].p->two->path);
627628
}
628629

630+
/* Add cached_pairs to counts */
631+
strmap_for_each_entry(cached_pairs, &iter, entry) {
632+
const char *old_name = entry->key;
633+
const char *new_name = entry->value;
634+
if (!new_name)
635+
/* known delete; ignore it */
636+
continue;
637+
638+
update_dir_rename_counts(info, dirs_removed, old_name, new_name);
639+
}
640+
629641
/*
630642
* Now we collapse
631643
* dir_rename_count: old_directory -> {new_directory -> count}
@@ -1194,7 +1206,8 @@ static void handle_early_known_dir_renames(struct dir_rename_info *info,
11941206
void diffcore_rename_extended(struct diff_options *options,
11951207
struct strintmap *relevant_sources,
11961208
struct strintmap *dirs_removed,
1197-
struct strmap *dir_rename_count)
1209+
struct strmap *dir_rename_count,
1210+
struct strmap *cached_pairs)
11981211
{
11991212
int detect_rename = options->detect_rename;
12001213
int minimum_score = options->rename_score;
@@ -1310,7 +1323,8 @@ void diffcore_rename_extended(struct diff_options *options,
13101323
/* Preparation for basename-driven matching. */
13111324
trace2_region_enter("diff", "dir rename setup", options->repo);
13121325
initialize_dir_rename_info(&info, relevant_sources,
1313-
dirs_removed, dir_rename_count);
1326+
dirs_removed, dir_rename_count,
1327+
cached_pairs);
13141328
trace2_region_leave("diff", "dir rename setup", options->repo);
13151329

13161330
/* Utilize file basenames to quickly find renames. */
@@ -1509,5 +1523,5 @@ void diffcore_rename_extended(struct diff_options *options,
15091523

15101524
void diffcore_rename(struct diff_options *options)
15111525
{
1512-
diffcore_rename_extended(options, NULL, NULL, NULL);
1526+
diffcore_rename_extended(options, NULL, NULL, NULL, NULL);
15131527
}

diffcore.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,8 @@ void diffcore_rename(struct diff_options *);
180180
void diffcore_rename_extended(struct diff_options *options,
181181
struct strintmap *relevant_sources,
182182
struct strintmap *dirs_removed,
183-
struct strmap *dir_rename_count);
183+
struct strmap *dir_rename_count,
184+
struct strmap *cached_pairs);
184185
void diffcore_merge_broken(void);
185186
void diffcore_pickaxe(struct diff_options *);
186187
void diffcore_order(const char *orderfile);

merge-ort.c

Lines changed: 43 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -739,15 +739,48 @@ static void add_pair(struct merge_options *opt,
739739
struct rename_info *renames = &opt->priv->renames;
740740
int names_idx = is_add ? side : 0;
741741

742-
if (!is_add) {
742+
if (is_add) {
743+
if (strset_contains(&renames->cached_target_names[side],
744+
pathname))
745+
return;
746+
} else {
743747
unsigned content_relevant = (match_mask == 0);
744748
unsigned location_relevant = (dir_rename_mask == 0x07);
745749

750+
/*
751+
* If pathname is found in cached_irrelevant[side] due to
752+
* previous pick but for this commit content is relevant,
753+
* then we need to remove it from cached_irrelevant.
754+
*/
755+
if (content_relevant)
756+
/* strset_remove is no-op if strset doesn't have key */
757+
strset_remove(&renames->cached_irrelevant[side],
758+
pathname);
759+
760+
/*
761+
* We do not need to re-detect renames for paths that we already
762+
* know the pairing, i.e. for cached_pairs (or
763+
* cached_irrelevant). However, handle_deferred_entries() needs
764+
* to loop over the union of keys from relevant_sources[side] and
765+
* cached_pairs[side], so for simplicity we set relevant_sources
766+
* for all the cached_pairs too and then strip them back out in
767+
* prune_cached_from_relevant() at the beginning of
768+
* detect_regular_renames().
769+
*/
746770
if (content_relevant || location_relevant) {
747771
/* content_relevant trumps location_relevant */
748772
strintmap_set(&renames->relevant_sources[side], pathname,
749773
content_relevant ? RELEVANT_CONTENT : RELEVANT_LOCATION);
750774
}
775+
776+
/*
777+
* Avoid creating pair if we've already cached rename results.
778+
* Note that we do this after setting relevant_sources[side]
779+
* as noted in the comment above.
780+
*/
781+
if (strmap_contains(&renames->cached_pairs[side], pathname) ||
782+
strset_contains(&renames->cached_irrelevant[side], pathname))
783+
return;
751784
}
752785

753786
one = alloc_filespec(pathname);
@@ -2274,7 +2307,9 @@ static inline int possible_side_renames(struct rename_info *renames,
22742307
static inline int possible_renames(struct rename_info *renames)
22752308
{
22762309
return possible_side_renames(renames, 1) ||
2277-
possible_side_renames(renames, 2);
2310+
possible_side_renames(renames, 2) ||
2311+
!strmap_empty(&renames->cached_pairs[1]) ||
2312+
!strmap_empty(&renames->cached_pairs[2]);
22782313
}
22792314

22802315
static void resolve_diffpair_statuses(struct diff_queue_struct *q)
@@ -2298,7 +2333,6 @@ static void resolve_diffpair_statuses(struct diff_queue_struct *q)
22982333
}
22992334
}
23002335

2301-
MAYBE_UNUSED
23022336
static void prune_cached_from_relevant(struct rename_info *renames,
23032337
unsigned side)
23042338
{
@@ -2318,7 +2352,6 @@ static void prune_cached_from_relevant(struct rename_info *renames,
23182352
}
23192353
}
23202354

2321-
MAYBE_UNUSED
23222355
static void use_cached_pairs(struct merge_options *opt,
23232356
struct strmap *cached_pairs,
23242357
struct diff_queue_struct *pairs)
@@ -2401,6 +2434,7 @@ static void detect_regular_renames(struct merge_options *opt,
24012434
struct diff_options diff_opts;
24022435
struct rename_info *renames = &opt->priv->renames;
24032436

2437+
prune_cached_from_relevant(renames, side_index);
24042438
if (!possible_side_renames(renames, side_index)) {
24052439
/*
24062440
* No rename detection needed for this side, but we still need
@@ -2411,6 +2445,7 @@ static void detect_regular_renames(struct merge_options *opt,
24112445
return;
24122446
}
24132447

2448+
partial_clear_dir_rename_count(&renames->dir_rename_count[side_index]);
24142449
repo_diff_setup(opt->repo, &diff_opts);
24152450
diff_opts.flags.recursive = 1;
24162451
diff_opts.flags.rename_empty = 0;
@@ -2428,7 +2463,8 @@ static void detect_regular_renames(struct merge_options *opt,
24282463
diffcore_rename_extended(&diff_opts,
24292464
&renames->relevant_sources[side_index],
24302465
&renames->dirs_removed[side_index],
2431-
&renames->dir_rename_count[side_index]);
2466+
&renames->dir_rename_count[side_index],
2467+
&renames->cached_pairs[side_index]);
24322468
trace2_region_leave("diff", "diffcore_rename", opt->repo);
24332469
resolve_diffpair_statuses(&diff_queued_diff);
24342470

@@ -2535,6 +2571,8 @@ static int detect_and_process_renames(struct merge_options *opt,
25352571
trace2_region_enter("merge", "regular renames", opt->repo);
25362572
detect_regular_renames(opt, MERGE_SIDE1);
25372573
detect_regular_renames(opt, MERGE_SIDE2);
2574+
use_cached_pairs(opt, &renames->cached_pairs[1], &renames->pairs[1]);
2575+
use_cached_pairs(opt, &renames->cached_pairs[2], &renames->pairs[2]);
25382576
trace2_region_leave("merge", "regular renames", opt->repo);
25392577

25402578
trace2_region_enter("merge", "directory renames", opt->repo);

0 commit comments

Comments
 (0)