Skip to content

Commit 09a2f61

Browse files
committed
Merge branch 'jt/pack-objects-prefetch-in-batch' into next
While packing many objects in a repository with a promissor remote, lazily fetching missing objects from the promissor remote one by one may be inefficient---the code now attempts to fetch all the missing objects in batch (obviously this won't work for a lazy clone that lazily fetches tree objects as you cannot even enumerate what blobs are missing until you learn which trees are missing). * jt/pack-objects-prefetch-in-batch: pack-objects: prefetch objects to be packed pack-objects: refactor to oid_object_info_extended
2 parents 807de33 + e00549a commit 09a2f61

File tree

2 files changed

+72
-4
lines changed

2 files changed

+72
-4
lines changed

builtin/pack-objects.c

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include "midx.h"
3636
#include "trace2.h"
3737
#include "shallow.h"
38+
#include "promisor-remote.h"
3839

3940
#define IN_PACK(obj) oe_in_pack(&to_pack, obj)
4041
#define SIZE(obj) oe_size(&to_pack, obj)
@@ -1704,9 +1705,30 @@ static int can_reuse_delta(const struct object_id *base_oid,
17041705
return 0;
17051706
}
17061707

1707-
static void check_object(struct object_entry *entry)
1708+
static void prefetch_to_pack(uint32_t object_index_start) {
1709+
struct oid_array to_fetch = OID_ARRAY_INIT;
1710+
uint32_t i;
1711+
1712+
for (i = object_index_start; i < to_pack.nr_objects; i++) {
1713+
struct object_entry *entry = to_pack.objects + i;
1714+
1715+
if (!oid_object_info_extended(the_repository,
1716+
&entry->idx.oid,
1717+
NULL,
1718+
OBJECT_INFO_FOR_PREFETCH))
1719+
continue;
1720+
oid_array_append(&to_fetch, &entry->idx.oid);
1721+
}
1722+
promisor_remote_get_direct(the_repository,
1723+
to_fetch.oid, to_fetch.nr);
1724+
oid_array_clear(&to_fetch);
1725+
}
1726+
1727+
static void check_object(struct object_entry *entry, uint32_t object_index)
17081728
{
17091729
unsigned long canonical_size;
1730+
enum object_type type;
1731+
struct object_info oi = {.typep = &type, .sizep = &canonical_size};
17101732

17111733
if (IN_PACK(entry)) {
17121734
struct packed_git *p = IN_PACK(entry);
@@ -1840,8 +1862,18 @@ static void check_object(struct object_entry *entry)
18401862
unuse_pack(&w_curs);
18411863
}
18421864

1843-
oe_set_type(entry,
1844-
oid_object_info(the_repository, &entry->idx.oid, &canonical_size));
1865+
if (oid_object_info_extended(the_repository, &entry->idx.oid, &oi,
1866+
OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_LOOKUP_REPLACE) < 0) {
1867+
if (has_promisor_remote()) {
1868+
prefetch_to_pack(object_index);
1869+
if (oid_object_info_extended(the_repository, &entry->idx.oid, &oi,
1870+
OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_LOOKUP_REPLACE) < 0)
1871+
type = -1;
1872+
} else {
1873+
type = -1;
1874+
}
1875+
}
1876+
oe_set_type(entry, type);
18451877
if (entry->type_valid) {
18461878
SET_SIZE(entry, canonical_size);
18471879
} else {
@@ -2061,7 +2093,7 @@ static void get_object_details(void)
20612093

20622094
for (i = 0; i < to_pack.nr_objects; i++) {
20632095
struct object_entry *entry = sorted_by_offset[i];
2064-
check_object(entry);
2096+
check_object(entry, i);
20652097
if (entry->type_valid &&
20662098
oe_size_greater_than(&to_pack, entry, big_file_threshold))
20672099
entry->no_try_delta = 1;

t/t5300-pack-object.sh

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -497,4 +497,40 @@ test_expect_success 'make sure index-pack detects the SHA1 collision (large blob
497497
)
498498
'
499499

500+
test_expect_success 'prefetch objects' '
501+
rm -rf server client &&
502+
503+
git init server &&
504+
test_config -C server uploadpack.allowanysha1inwant 1 &&
505+
test_config -C server uploadpack.allowfilter 1 &&
506+
test_config -C server protocol.version 2 &&
507+
508+
echo one >server/one &&
509+
git -C server add one &&
510+
git -C server commit -m one &&
511+
git -C server branch one_branch &&
512+
513+
echo two_a >server/two_a &&
514+
echo two_b >server/two_b &&
515+
git -C server add two_a two_b &&
516+
git -C server commit -m two &&
517+
518+
echo three >server/three &&
519+
git -C server add three &&
520+
git -C server commit -m three &&
521+
git -C server branch three_branch &&
522+
523+
# Clone, fetch "two" with blobs excluded, and re-push it. This requires
524+
# the client to have the blobs of "two" - verify that these are
525+
# prefetched in one batch.
526+
git clone --filter=blob:none --single-branch -b one_branch \
527+
"file://$(pwd)/server" client &&
528+
test_config -C client protocol.version 2 &&
529+
TWO=$(git -C server rev-parse three_branch^) &&
530+
git -C client fetch --filter=blob:none origin "$TWO" &&
531+
GIT_TRACE_PACKET=$(pwd)/trace git -C client push origin "$TWO":refs/heads/two_branch &&
532+
grep "git> done" trace >donelines &&
533+
test_line_count = 1 donelines
534+
'
535+
500536
test_done

0 commit comments

Comments
 (0)