Skip to content

Commit 816821e

Browse files
committed
commit-reach: make can_all_from_reach... linear
The can_all_from_reach_with_flags() algorithm is currently quadratic in the worst case, because it calls the reachable() method for every 'from' without tracking which commits have already been walked or which can already reach a commit in 'to'. Rewrite the algorithm to walk each commit a constant number of times. We also add some optimizations that should work for the main consumer of this method: fetch negotitation (haves/wants). The first step includes using a depth-first-search (DFS) from each from commit, sorted by ascending generation number. We do not walk beyond the minimum generation number or the minimum commit date. This DFS is likely to be faster than the existing reachable() method because we expect previous ref values to be along the first-parent history. If we find a target commit, then we mark everything in the DFS stack as a RESULT. This expands the set of targets for the other from commits. We also mark the visited commits using 'assign_flag' to prevent re-walking the same code. We still need to clear our flags at the end, which is why we will have a total of three visits to each commit. Performance was measured on the Linux repository using 'test-tool reach can_all_from_reach'. The input included rows seeded by tag values. The "small" case included X-rows as v4.[0-9]* and Y-rows as v3.[0-9]*. This mimics a (very large) fetch that says "I have all major v3 releases and want all major v4 releases." The "large" case included X-rows as "v4.*" and Y-rows as "v3.*". This adds all release-candidate tags to the set, which does not greatly increase the number of objects that are considered, but does increase the number of 'from' commits, demonstrating the quadratic nature of the previous code. Small Case ---------- Before: 1.52 s After: 0.26 s Large Case ---------- Before: 3.50 s After: 0.27 s Note how the time increases between the two cases in the two versions. The new code increases relative to the number of commits that need to be walked, but not directly relative to the number of 'from' commits. Signed-off-by: Derrick Stolee <[email protected]>
1 parent 1fd45ef commit 816821e

File tree

3 files changed

+85
-50
lines changed

3 files changed

+85
-50
lines changed

commit-reach.c

Lines changed: 77 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -513,65 +513,88 @@ int commit_contains(struct ref_filter *filter, struct commit *commit,
513513
return is_descendant_of(commit, list);
514514
}
515515

516-
int reachable(struct commit *from, int with_flag, int assign_flag,
517-
time_t min_commit_date)
516+
static int compare_commits_by_gen(const void *_a, const void *_b)
518517
{
519-
struct prio_queue work = { compare_commits_by_commit_date };
518+
const struct commit *a = (const struct commit *)_a;
519+
const struct commit *b = (const struct commit *)_b;
520520

521-
prio_queue_put(&work, from);
522-
while (work.nr) {
523-
struct commit_list *list;
524-
struct commit *commit = prio_queue_get(&work);
525-
526-
if (commit->object.flags & with_flag) {
527-
from->object.flags |= assign_flag;
528-
break;
529-
}
530-
if (!commit->object.parsed)
531-
parse_object(the_repository, &commit->object.oid);
532-
if (commit->object.flags & REACHABLE)
533-
continue;
534-
commit->object.flags |= REACHABLE;
535-
if (commit->date < min_commit_date)
536-
continue;
537-
for (list = commit->parents; list; list = list->next) {
538-
struct commit *parent = list->item;
539-
if (!(parent->object.flags & REACHABLE))
540-
prio_queue_put(&work, parent);
541-
}
542-
}
543-
from->object.flags |= REACHABLE;
544-
clear_commit_marks(from, REACHABLE);
545-
clear_prio_queue(&work);
546-
return (from->object.flags & assign_flag);
521+
if (a->generation < b->generation)
522+
return -1;
523+
if (a->generation > b->generation)
524+
return 1;
525+
return 0;
547526
}
548527

549528
int can_all_from_reach_with_flag(struct object_array *from,
550529
int with_flag, int assign_flag,
551-
time_t min_commit_date)
530+
time_t min_commit_date,
531+
uint32_t min_generation)
552532
{
533+
struct commit **list = NULL;
553534
int i;
535+
int result = 1;
554536

537+
ALLOC_ARRAY(list, from->nr);
555538
for (i = 0; i < from->nr; i++) {
556-
struct object *from_one = from->objects[i].item;
539+
list[i] = (struct commit *)from->objects[i].item;
557540

558-
if (from_one->flags & assign_flag)
559-
continue;
560-
from_one = deref_tag(the_repository, from_one, "a from object", 0);
561-
if (!from_one || from_one->type != OBJ_COMMIT) {
562-
/* no way to tell if this is reachable by
563-
* looking at the ancestry chain alone, so
564-
* leave a note to ourselves not to worry about
565-
* this object anymore.
566-
*/
567-
from->objects[i].item->flags |= assign_flag;
568-
continue;
569-
}
570-
if (!reachable((struct commit *)from_one, with_flag, assign_flag,
571-
min_commit_date))
541+
parse_commit(list[i]);
542+
543+
if (list[i]->generation < min_generation)
572544
return 0;
573545
}
574-
return 1;
546+
547+
QSORT(list, from->nr, compare_commits_by_gen);
548+
549+
for (i = 0; i < from->nr; i++) {
550+
/* DFS from list[i] */
551+
struct commit_list *stack = NULL;
552+
553+
list[i]->object.flags |= assign_flag;
554+
commit_list_insert(list[i], &stack);
555+
556+
while (stack) {
557+
struct commit_list *parent;
558+
559+
if (stack->item->object.flags & with_flag) {
560+
pop_commit(&stack);
561+
continue;
562+
}
563+
564+
for (parent = stack->item->parents; parent; parent = parent->next) {
565+
if (parent->item->object.flags & (with_flag | RESULT))
566+
stack->item->object.flags |= RESULT;
567+
568+
if (!(parent->item->object.flags & assign_flag)) {
569+
parent->item->object.flags |= assign_flag;
570+
571+
parse_commit(parent->item);
572+
573+
if (parent->item->date < min_commit_date ||
574+
parent->item->generation < min_generation)
575+
continue;
576+
577+
commit_list_insert(parent->item, &stack);
578+
break;
579+
}
580+
}
581+
582+
if (!parent)
583+
pop_commit(&stack);
584+
}
585+
586+
if (!(list[i]->object.flags & (with_flag | RESULT))) {
587+
result = 0;
588+
goto cleanup;
589+
}
590+
}
591+
592+
cleanup:
593+
for (i = 0; i < from->nr; i++) {
594+
clear_commit_marks(list[i], RESULT);
595+
clear_commit_marks(list[i], assign_flag);
596+
}
597+
return result;
575598
}
576599

577600
int can_all_from_reach(struct commit_list *from, struct commit_list *to,
@@ -581,13 +604,17 @@ int can_all_from_reach(struct commit_list *from, struct commit_list *to,
581604
time_t min_commit_date = cutoff_by_min_date ? from->item->date : 0;
582605
struct commit_list *from_iter = from, *to_iter = to;
583606
int result;
607+
uint32_t min_generation = GENERATION_NUMBER_INFINITY;
584608

585609
while (from_iter) {
586610
add_object_array(&from_iter->item->object, NULL, &from_objs);
587611

588612
if (!parse_commit(from_iter->item)) {
589613
if (from_iter->item->date < min_commit_date)
590614
min_commit_date = from_iter->item->date;
615+
616+
if (from_iter->item->generation < min_generation)
617+
min_generation = from_iter->item->generation;
591618
}
592619

593620
from_iter = from_iter->next;
@@ -597,6 +624,9 @@ int can_all_from_reach(struct commit_list *from, struct commit_list *to,
597624
if (!parse_commit(to_iter->item)) {
598625
if (to_iter->item->date < min_commit_date)
599626
min_commit_date = to_iter->item->date;
627+
628+
if (to_iter->item->generation < min_generation)
629+
min_generation = to_iter->item->generation;
600630
}
601631

602632
to_iter->item->object.flags |= PARENT2;
@@ -605,7 +635,7 @@ int can_all_from_reach(struct commit_list *from, struct commit_list *to,
605635
}
606636

607637
result = can_all_from_reach_with_flag(&from_objs, PARENT2, PARENT1,
608-
min_commit_date);
638+
min_commit_date, min_generation);
609639

610640
while (from) {
611641
clear_commit_marks(from->item, PARENT1);

commit-reach.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,13 @@ int reachable(struct commit *from, int with_flag, int assign_flag,
6363
* Determine if every commit in 'from' can reach at least one commit
6464
* that is marked with 'with_flag'. As we traverse, use 'assign_flag'
6565
* as a marker for commits that are already visited. Do not walk
66-
* commits with date below 'min_commit_date'.
66+
* commits with date below 'min_commit_date' or generation below
67+
* 'min_generation'.
6768
*/
6869
int can_all_from_reach_with_flag(struct object_array *from,
6970
int with_flag, int assign_flag,
70-
time_t min_commit_date);
71+
time_t min_commit_date,
72+
uint32_t min_generation);
7173
int can_all_from_reach(struct commit_list *from, struct commit_list *to,
7274
int commit_date_cutoff);
7375

upload-pack.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -338,11 +338,14 @@ static int got_oid(const char *hex, struct object_id *oid)
338338

339339
static int ok_to_give_up(void)
340340
{
341+
uint32_t min_generation = GENERATION_NUMBER_ZERO;
342+
341343
if (!have_obj.nr)
342344
return 0;
343345

344346
return can_all_from_reach_with_flag(&want_obj, THEY_HAVE,
345-
COMMON_KNOWN, oldest_have);
347+
COMMON_KNOWN, oldest_have,
348+
min_generation);
346349
}
347350

348351
static int get_common_commits(void)

0 commit comments

Comments
 (0)