Skip to content

Commit 6df56f4

Browse files
committed
path-walk: visit tags and cached objects
The rev_info that is specified for a path-walk traversal may specify visiting tag refs (both lightweight and annotated) and also may specify indexed objects (blobs and trees). Update the path-walk API to walk these objects as well. When walking tags, we need to peel the annotated objects until reaching a non-tag object. If we reach a commit, then we can add it to the pending objects to make sure we visit in the commit walk portion. If we reach a tree, then we will assume that it is a root tree. If we reach a blob, then we have no good path name and so add it to a new list of "tagged blobs". When the rev_info includes the "--indexed-objects" flag, then the pending set includes blobs and trees found in the cache entries and cache-tree. The cache entries are usually blobs, though they could be trees in the case of a sparse index. The cache-tree stores previously-hashed tree objects but these are cleared out when staging objects below those paths. We add tests that demonstrate this. The indexed objects come with a non-NULL 'path' value in the pending item. This allows us to prepopulate the 'path_to_lists' strmap with lists for these paths. The tricky thing about this walk is that we will want to combine the indexed objects walk with the commit walk, especially in the future case of walking objects during a command like 'git repack'. Whenever possible, we want the objects from the index to be grouped with similar objects in history. We don't want to miss any paths that appear only in the index and not in the commit history. Thus, we need to be careful to let the path stack be populated initially with only the root tree path (and possibly tags and tagged blobs) and go through the normal depth-first search. Afterwards, if there are other paths that are remaining in the paths_to_lists strmap, we should then iterate through the stack and visit those objects recursively. Signed-off-by: Derrick Stolee <[email protected]>
1 parent db5c861 commit 6df56f4

File tree

5 files changed

+362
-27
lines changed

5 files changed

+362
-27
lines changed

Documentation/technical/api-path-walk.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ It is also important that you do not specify the `--objects` flag for the
3939
the objects will be walked in a separate way based on those starting
4040
commits.
4141

42-
`commits`, `blobs`, `trees`::
42+
`commits`, `blobs`, `trees`, `tags`::
4343
By default, these members are enabled and signal that the path-walk
4444
API should call the `path_fn` on objects of these types. Specialized
4545
applications could disable some options to make it simpler to walk

path-walk.c

+181-3
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,13 @@
1313
#include "revision.h"
1414
#include "string-list.h"
1515
#include "strmap.h"
16+
#include "tag.h"
1617
#include "trace2.h"
1718
#include "tree.h"
1819
#include "tree-walk.h"
1920

21+
static const char *root_path = "";
22+
2023
struct type_and_oid_list {
2124
enum object_type type;
2225
struct oid_array oids;
@@ -160,12 +163,16 @@ static int walk_path(struct path_walk_context *ctx,
160163

161164
list = strmap_get(&ctx->paths_to_lists, path);
162165

166+
if (!list)
167+
BUG("provided path '%s' that had no associated list", path);
168+
163169
if (!list->oids.nr)
164170
return 0;
165171

166172
/* Evaluate function pointer on this data, if requested. */
167173
if ((list->type == OBJ_TREE && ctx->info->trees) ||
168-
(list->type == OBJ_BLOB && ctx->info->blobs))
174+
(list->type == OBJ_BLOB && ctx->info->blobs) ||
175+
(list->type == OBJ_TAG && ctx->info->tags))
169176
ret = ctx->info->path_fn(path, &list->oids, list->type,
170177
ctx->info->path_fn_data);
171178

@@ -196,6 +203,139 @@ static void clear_paths_to_lists(struct strmap *map)
196203
strmap_init(map);
197204
}
198205

206+
static int setup_pending_objects(struct path_walk_info *info,
207+
struct path_walk_context *ctx)
208+
{
209+
struct type_and_oid_list *tags = NULL;
210+
struct type_and_oid_list *tagged_blobs = NULL;
211+
struct type_and_oid_list *root_tree_list = NULL;
212+
213+
if (info->tags)
214+
CALLOC_ARRAY(tags, 1);
215+
if (info->blobs)
216+
CALLOC_ARRAY(tagged_blobs, 1);
217+
if (info->trees)
218+
root_tree_list = strmap_get(&ctx->paths_to_lists, root_path);
219+
220+
/*
221+
* Pending objects include:
222+
* * Commits at branch tips.
223+
* * Annotated tags at tag tips.
224+
* * Any kind of object at lightweight tag tips.
225+
* * Trees and blobs in the index (with an associated path).
226+
*/
227+
for (size_t i = 0; i < info->revs->pending.nr; i++) {
228+
struct object_array_entry *pending = info->revs->pending.objects + i;
229+
struct object *obj = pending->item;
230+
231+
/* Commits will be picked up by revision walk. */
232+
if (obj->type == OBJ_COMMIT)
233+
continue;
234+
235+
/* Navigate annotated tag object chains. */
236+
while (obj->type == OBJ_TAG) {
237+
struct tag *tag = lookup_tag(info->revs->repo, &obj->oid);
238+
if (!tag) {
239+
error(_("failed to find tag %s"),
240+
oid_to_hex(&obj->oid));
241+
return -1;
242+
}
243+
if (tag->object.flags & SEEN)
244+
break;
245+
tag->object.flags |= SEEN;
246+
247+
if (tags)
248+
oid_array_append(&tags->oids, &obj->oid);
249+
obj = tag->tagged;
250+
}
251+
252+
if (obj->type == OBJ_TAG)
253+
continue;
254+
255+
/* We are now at a non-tag object. */
256+
if (obj->flags & SEEN)
257+
continue;
258+
obj->flags |= SEEN;
259+
260+
switch (obj->type) {
261+
case OBJ_TREE:
262+
if (!info->trees)
263+
continue;
264+
if (pending->path) {
265+
struct type_and_oid_list *list;
266+
char *path = *pending->path ? xstrfmt("%s/", pending->path)
267+
: xstrdup("");
268+
if (!(list = strmap_get(&ctx->paths_to_lists, path))) {
269+
CALLOC_ARRAY(list, 1);
270+
list->type = OBJ_TREE;
271+
strmap_put(&ctx->paths_to_lists, path, list);
272+
}
273+
oid_array_append(&list->oids, &obj->oid);
274+
free(path);
275+
} else {
276+
/* assume a root tree, such as a lightweight tag. */
277+
oid_array_append(&root_tree_list->oids, &obj->oid);
278+
}
279+
break;
280+
281+
case OBJ_BLOB:
282+
if (!info->blobs)
283+
continue;
284+
if (pending->path) {
285+
struct type_and_oid_list *list;
286+
char *path = pending->path;
287+
if (!(list = strmap_get(&ctx->paths_to_lists, path))) {
288+
CALLOC_ARRAY(list, 1);
289+
list->type = OBJ_BLOB;
290+
strmap_put(&ctx->paths_to_lists, path, list);
291+
}
292+
oid_array_append(&list->oids, &obj->oid);
293+
} else {
294+
/* assume a root tree, such as a lightweight tag. */
295+
oid_array_append(&tagged_blobs->oids, &obj->oid);
296+
}
297+
break;
298+
299+
case OBJ_COMMIT:
300+
/* Make sure it is in the object walk */
301+
if (obj != pending->item)
302+
add_pending_object(info->revs, obj, "");
303+
break;
304+
305+
default:
306+
BUG("should not see any other type here");
307+
}
308+
}
309+
310+
/*
311+
* Add tag objects and tagged blobs if they exist.
312+
*/
313+
if (tagged_blobs) {
314+
if (tagged_blobs->oids.nr) {
315+
const char *tagged_blob_path = "/tagged-blobs";
316+
tagged_blobs->type = OBJ_BLOB;
317+
push_to_stack(ctx, tagged_blob_path);
318+
strmap_put(&ctx->paths_to_lists, tagged_blob_path, tagged_blobs);
319+
} else {
320+
oid_array_clear(&tagged_blobs->oids);
321+
free(tagged_blobs);
322+
}
323+
}
324+
if (tags) {
325+
if (tags->oids.nr) {
326+
const char *tag_path = "/tags";
327+
tags->type = OBJ_TAG;
328+
push_to_stack(ctx, tag_path);
329+
strmap_put(&ctx->paths_to_lists, tag_path, tags);
330+
} else {
331+
oid_array_clear(&tags->oids);
332+
free(tags);
333+
}
334+
}
335+
336+
return 0;
337+
}
338+
199339
/**
200340
* Given the configuration of 'info', walk the commits based on 'info->revs' and
201341
* call 'info->path_fn' on each discovered path.
@@ -204,8 +344,7 @@ static void clear_paths_to_lists(struct strmap *map)
204344
*/
205345
int walk_objects_by_path(struct path_walk_info *info)
206346
{
207-
const char *root_path = "";
208-
int ret = 0;
347+
int ret;
209348
size_t commits_nr = 0, paths_nr = 0;
210349
struct commit *c;
211350
struct type_and_oid_list *root_tree_list;
@@ -224,15 +363,34 @@ int walk_objects_by_path(struct path_walk_info *info)
224363
CALLOC_ARRAY(commit_list, 1);
225364
commit_list->type = OBJ_COMMIT;
226365

366+
if (info->tags)
367+
info->revs->tag_objects = 1;
368+
227369
/* Insert a single list for the root tree into the paths. */
228370
CALLOC_ARRAY(root_tree_list, 1);
229371
root_tree_list->type = OBJ_TREE;
230372
strmap_put(&ctx.paths_to_lists, root_path, root_tree_list);
231373
push_to_stack(&ctx, root_path);
232374

375+
/*
376+
* Set these values before preparing the walk to catch
377+
* lightweight tags pointing to non-commits and indexed objects.
378+
*/
379+
info->revs->blob_objects = info->blobs;
380+
info->revs->tree_objects = info->trees;
381+
233382
if (prepare_revision_walk(info->revs))
234383
die(_("failed to setup revision walk"));
235384

385+
info->revs->blob_objects = info->revs->tree_objects = 0;
386+
387+
trace2_region_enter("path-walk", "pending-walk", info->revs->repo);
388+
ret = setup_pending_objects(info, &ctx);
389+
trace2_region_leave("path-walk", "pending-walk", info->revs->repo);
390+
391+
if (ret)
392+
return ret;
393+
236394
while ((c = get_revision(info->revs))) {
237395
struct object_id *oid;
238396
struct tree *t;
@@ -280,6 +438,26 @@ int walk_objects_by_path(struct path_walk_info *info)
280438

281439
free(path);
282440
}
441+
442+
/* Are there paths remaining? Likely they are from indexed objects. */
443+
if (!strmap_empty(&ctx.paths_to_lists)) {
444+
struct hashmap_iter iter;
445+
struct strmap_entry *entry;
446+
447+
strmap_for_each_entry(&ctx.paths_to_lists, &iter, entry)
448+
push_to_stack(&ctx, entry->key);
449+
450+
while (!ret && ctx.path_stack.nr) {
451+
char *path = ctx.path_stack.items[ctx.path_stack.nr - 1].string;
452+
ctx.path_stack.nr--;
453+
paths_nr++;
454+
455+
ret = walk_path(&ctx, path);
456+
457+
free(path);
458+
}
459+
}
460+
283461
trace2_data_intmax("path-walk", ctx.repo, "paths", paths_nr);
284462
trace2_region_leave("path-walk", "path-walk", info->revs->repo);
285463

path-walk.h

+2
Original file line numberDiff line numberDiff line change
@@ -39,12 +39,14 @@ struct path_walk_info {
3939
int commits;
4040
int trees;
4141
int blobs;
42+
int tags;
4243
};
4344

4445
#define PATH_WALK_INFO_INIT { \
4546
.blobs = 1, \
4647
.trees = 1, \
4748
.commits = 1, \
49+
.tags = 1, \
4850
}
4951

5052
void path_walk_info_init(struct path_walk_info *info);

t/helper/test-path-walk.c

+13-2
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ struct path_walk_test_data {
2323
uintmax_t commit_nr;
2424
uintmax_t tree_nr;
2525
uintmax_t blob_nr;
26+
uintmax_t tag_nr;
2627
};
2728

2829
static int emit_block(const char *path, struct oid_array *oids,
@@ -37,11 +38,18 @@ static int emit_block(const char *path, struct oid_array *oids,
3738
tdata->blob_nr += oids->nr;
3839
else if (type == OBJ_COMMIT)
3940
tdata->commit_nr += oids->nr;
41+
else if (type == OBJ_TAG)
42+
tdata->tag_nr += oids->nr;
4043
else
4144
BUG("we do not understand this type");
4245

4346
typestr = type_name(type);
4447

48+
/* This should never be output during tests. */
49+
if (!oids->nr)
50+
printf("%"PRIuMAX":%s:%s:EMPTY\n",
51+
tdata->batch_nr, typestr, path);
52+
4553
for (size_t i = 0; i < oids->nr; i++)
4654
printf("%"PRIuMAX":%s:%s:%s\n",
4755
tdata->batch_nr, typestr, path,
@@ -62,6 +70,8 @@ int cmd__path_walk(int argc, const char **argv)
6270
N_("toggle inclusion of blob objects")),
6371
OPT_BOOL(0, "commits", &info.commits,
6472
N_("toggle inclusion of commit objects")),
73+
OPT_BOOL(0, "tags", &info.tags,
74+
N_("toggle inclusion of tag objects")),
6575
OPT_BOOL(0, "trees", &info.trees,
6676
N_("toggle inclusion of tree objects")),
6777
OPT_END(),
@@ -87,8 +97,9 @@ int cmd__path_walk(int argc, const char **argv)
8797

8898
printf("commits:%" PRIuMAX "\n"
8999
"trees:%" PRIuMAX "\n"
90-
"blobs:%" PRIuMAX "\n",
91-
data.commit_nr, data.tree_nr, data.blob_nr);
100+
"blobs:%" PRIuMAX "\n"
101+
"tags:%" PRIuMAX "\n",
102+
data.commit_nr, data.tree_nr, data.blob_nr, data.tag_nr);
92103

93104
release_revisions(&revs);
94105
return res;

0 commit comments

Comments
 (0)