Skip to content

Commit

Permalink
path-walk: introduce an object walk by path
Browse files Browse the repository at this point in the history
TODO

Signed-off-by: Derrick Stolee <[email protected]>
  • Loading branch information
derrickstolee committed Aug 29, 2024
1 parent 17d4b10 commit e38d1b5
Show file tree
Hide file tree
Showing 3 changed files with 281 additions and 0 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -1090,6 +1090,7 @@ LIB_OBJS += parse-options.o
LIB_OBJS += patch-delta.o
LIB_OBJS += patch-ids.o
LIB_OBJS += path.o
LIB_OBJS += path-walk.o
LIB_OBJS += pathspec.o
LIB_OBJS += pkt-line.o
LIB_OBJS += preload-index.o
Expand Down
237 changes: 237 additions & 0 deletions path-walk.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,237 @@
/*
* path-walk.c: implementation for path-based walks of the object graph.
*/
#include "git-compat-util.h"
#include "path-walk.h"
#include "blob.h"
#include "commit.h"
#include "dir.h"
#include "hashmap.h"
#include "hex.h"
#include "object.h"
#include "oid-array.h"
#include "revision.h"
#include "string-list.h"
#include "strmap.h"
#include "trace2.h"
#include "tree.h"
#include "tree-walk.h"

struct type_and_oid_list
{
enum object_type type;
struct oid_array oids;
};

#define TYPE_AND_OID_LIST_INIT { \
.type = OBJ_NONE, \
.oids = OID_ARRAY_INIT \
}

struct path_walk_context {
/**
* Repeats of data in 'struct path_walk_info' for
* access with fewer characters.
*/
struct repository *repo;
struct rev_info *revs;
struct path_walk_info *info;

/**
* Map a path to a 'struct type_and_oid_list'
* containing the objects discovered at that
* path.
*/
struct strmap paths_to_lists;

/**
* Store the current list of paths in a stack, to
* facilitate depth-first-search without recursion.
*/
struct string_list path_stack;
};

static int add_children(struct path_walk_context *ctx,
const char *base_path,
struct object_id *oid)
{
struct tree_desc desc;
struct name_entry entry;
struct strbuf path = STRBUF_INIT;
size_t base_len;
struct tree *tree = lookup_tree(ctx->repo, oid);

if (!tree) {
error(_("failed to walk children of tree %s: not found"),
oid_to_hex(oid));
return -1;
}

strbuf_addstr(&path, base_path);
base_len = path.len;

parse_tree(tree);
init_tree_desc(&desc, &tree->object.oid, tree->buffer, tree->size);
while (tree_entry(&desc, &entry)) {
struct type_and_oid_list *list;
struct object *o;
/* Not actually true, but we will ignore submodules later. */
enum object_type type = S_ISDIR(entry.mode) ? OBJ_TREE : OBJ_BLOB;

/* Skip submodules. */
if (S_ISGITLINK(entry.mode))
continue;

if (type == OBJ_TREE) {
struct tree *child = lookup_tree(ctx->repo, &entry.oid);
o = child ? &child->object : NULL;
} else if (type == OBJ_BLOB) {
struct blob *child = lookup_blob(ctx->repo, &entry.oid);
o = child ? &child->object : NULL;
}

if (!o) /* report error?*/
continue;

/*
* Pass uninteresting flag, if necessary. This must be done
* before checking the SEEN flag, in case this object was added
* from an interesting object first.
*/
if (tree->object.flags & UNINTERESTING)
o->flags |= UNINTERESTING;

/* Skip this object if already seen. */
if (o->flags & SEEN)
continue;
o->flags |= SEEN;

strbuf_setlen(&path, base_len);
strbuf_add(&path, entry.path, entry.pathlen);

/*
* Trees will end with "/" for concatenation and distinction
* from blobs at the same path.
*/
if (type == OBJ_TREE)
strbuf_addch(&path, '/');

if (!(list = strmap_get(&ctx->paths_to_lists, path.buf))) {
CALLOC_ARRAY(list, 1);
list->type = type;
strmap_put(&ctx->paths_to_lists, path.buf, list);
string_list_append(&ctx->path_stack, path.buf);
}
oid_array_append(&list->oids, &entry.oid);
}

free_tree_buffer(tree);
strbuf_release(&path);
return 0;
}

/*
* For each path in paths_to_explore, walk the trees another level
* and add any found blobs to the batch (but only if they don't
* exist and haven't been added yet).
*/
static int walk_path(struct path_walk_context *ctx,
const char *path)
{
struct type_and_oid_list *list;
int ret = 0;

list = strmap_get(&ctx->paths_to_lists, path);

/* Evaluate function pointer on this data. */
ret = ctx->info->path_fn(path, &list->oids, list->type,
ctx->info->path_fn_data);

/* Expand data for children. */
if (list->type == OBJ_TREE) {
for (size_t i = 0; i < list->oids.nr; i++) {
ret |= add_children(ctx,
path,
&list->oids.oid[i]);
}
}

oid_array_clear(&list->oids);
strmap_remove(&ctx->paths_to_lists, path, 1);
return ret;
}

static void clear_strmap(struct strmap *map)
{
struct hashmap_iter iter;
struct strmap_entry *e;

hashmap_for_each_entry(&map->map, &iter, e, ent) {
struct type_and_oid_list *list = e->value;
oid_array_clear(&list->oids);
}
strmap_clear(map, 1);
strmap_init(map);
}

/**
* Given the configuration of 'info', walk the commits based on 'info->revs' and
* call 'info->path_fn' on each discovered path.
*
* Returns nonzero on an error.
*/
int walk_objects_by_path(struct path_walk_info *info)
{
int ret = 0;
size_t commits_nr = 0, paths_nr = 0;
struct commit *c;
struct type_and_oid_list *list;
struct path_walk_context ctx = {
.repo = info->revs->repo,
.revs = info->revs,
.info = info,
.path_stack = STRING_LIST_INIT_DUP,
.paths_to_lists = STRMAP_INIT
};

trace2_region_enter("path-walk", "commit-walk", info->revs->repo);

/* Insert a single list for the root tree into the paths. */
CALLOC_ARRAY(list, 1);
list->type = OBJ_TREE;
strmap_put(&ctx.paths_to_lists, "", list);

if (prepare_revision_walk(info->revs))
die(_("failed to setup revision walk"));

while ((c = get_revision(info->revs)))
{
struct object_id *oid = get_commit_tree_oid(c);
struct tree *t = lookup_tree(info->revs->repo, oid);

if (t && (c->object.flags & UNINTERESTING))
t->object.flags |= UNINTERESTING;

oid_array_append(&list->oids, oid);
}

trace2_data_intmax("path-walk", ctx.repo, "commits", commits_nr);
trace2_region_leave("path-walk", "commit-walk", info->revs->repo);

string_list_append(&ctx.path_stack, "");

while (!ret && ctx.path_stack.nr) {
char *path = ctx.path_stack.items[ctx.path_stack.nr - 1].string;
ctx.path_stack.nr--;
paths_nr++;

ret = walk_path(&ctx, path);

free(path);
}
trace2_data_intmax("path-walk", ctx.repo, "paths", paths_nr);

clear_strmap(&ctx.paths_to_lists);
string_list_clear(&ctx.path_stack, 0);
return ret;
}
43 changes: 43 additions & 0 deletions path-walk.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
* path-walk.h : Methods and structures for walking the object graph in batches
* by the paths that can reach those objects.
*/
#include "object.h" /* Required for 'enum object_type'. */

struct rev_info;
struct oid_array;

/**
* The type of a function pointer for the method that is called on a list of
* objects reachable at a given path.
*/
typedef int (*path_fn)(const char *path,
struct oid_array *oids,
enum object_type type,
void *data);

struct path_walk_info {
/**
* revs provides the definitions for the commit walk, including
* which commits are UNINTERESTING or not.
*/
struct rev_info *revs;

/**
* The caller wishes to execute custom logic on objects reachable at a
* given path. Every reachable object will be visited exactly once, and
* the first path to see an object wins. This may not be a stable choice.
*/
path_fn path_fn;
void *path_fn_data;
};

#define PATH_WALK_INFO_INIT { 0 }

/**
* Given the configuration of 'info', walk the commits based on 'info->revs' and
* call 'info->path_fn' on each discovered path.
*
* Returns nonzero on an error.
*/
int walk_objects_by_path(struct path_walk_info *info);

0 comments on commit e38d1b5

Please sign in to comment.