From c7944050af45c7384f97c712cb4d126672c7cfa6 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 9 May 2019 07:22:31 -0700 Subject: [PATCH 01/32] commit-graph: fix the_repository reference The parse_commit_buffer() method takes a repository pointer, so it should not refer to the_repository anymore. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commit.c b/commit.c index a5333c7ac6c373..e4d1233226a1f6 100644 --- a/commit.c +++ b/commit.c @@ -443,7 +443,7 @@ int parse_commit_buffer(struct repository *r, struct commit *item, const void *b item->date = parse_commit_date(bufptr, tail); if (check_graph) - load_commit_graph_info(the_repository, item); + load_commit_graph_info(r, item); return 0; } From e103f7276f0d809c2935ebc1a3d68c6bbfaed23d Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 12 Jun 2019 06:29:37 -0700 Subject: [PATCH 02/32] commit-graph: return with errors during write The write_commit_graph() method uses die() to report failure and exit when confronted with an unexpected condition. This use of die() in a library function is incorrect and is now replaced by error() statements and an int return type. Return zero on success and a negative value on failure. Now that we use 'goto cleanup' to jump to the terminal condition on an error, we have new paths that could lead to uninitialized values. New initializers are added to correct for this. The builtins 'commit-graph', 'gc', and 'commit' call these methods, so update them to check the return value. Test that 'git commit-graph write' returns a proper error code when hitting a failure condition in write_commit_graph(). Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/commit-graph.c | 20 +++++++------- builtin/commit.c | 5 ++-- builtin/gc.c | 7 ++--- commit-graph.c | 60 ++++++++++++++++++++++++++++------------- commit-graph.h | 16 +++++++---- t/t5318-commit-graph.sh | 8 ++++++ 6 files changed, 77 insertions(+), 39 deletions(-) diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 537fdfd0f0759e..2a1c4d701fcb50 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -141,6 +141,7 @@ static int graph_write(int argc, const char **argv) struct string_list *pack_indexes = NULL; struct string_list *commit_hex = NULL; struct string_list lines; + int result = 0; static struct option builtin_commit_graph_write_options[] = { OPT_STRING(0, "object-dir", &opts.obj_dir, @@ -168,10 +169,8 @@ static int graph_write(int argc, const char **argv) read_replace_refs = 0; - if (opts.reachable) { - write_commit_graph_reachable(opts.obj_dir, opts.append, 1); - return 0; - } + if (opts.reachable) + return write_commit_graph_reachable(opts.obj_dir, opts.append, 1); string_list_init(&lines, 0); if (opts.stdin_packs || opts.stdin_commits) { @@ -188,14 +187,15 @@ static int graph_write(int argc, const char **argv) UNLEAK(buf); } - write_commit_graph(opts.obj_dir, - pack_indexes, - commit_hex, - opts.append, - 1); + if (write_commit_graph(opts.obj_dir, + pack_indexes, + commit_hex, + opts.append, + 1)) + result = 1; UNLEAK(lines); - return 0; + return result; } int cmd_commit_graph(int argc, const char **argv, const char *prefix) diff --git a/builtin/commit.c b/builtin/commit.c index 2986553d5ffb97..b9ea7222fae4d6 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -1669,8 +1669,9 @@ int cmd_commit(int argc, const char **argv, const char *prefix) "new_index file. Check that disk is not full and quota is\n" "not exceeded, and then \"git reset HEAD\" to recover.")); - if (git_env_bool(GIT_TEST_COMMIT_GRAPH, 0)) - write_commit_graph_reachable(get_object_directory(), 0, 0); + if (git_env_bool(GIT_TEST_COMMIT_GRAPH, 0) && + write_commit_graph_reachable(get_object_directory(), 0, 0)) + return 1; repo_rerere(the_repository, 0); run_command_v_opt(argv_gc_auto, RUN_GIT_CMD); diff --git a/builtin/gc.c b/builtin/gc.c index 020f725acc40f4..3984addf73c48a 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -664,9 +664,10 @@ int cmd_gc(int argc, const char **argv, const char *prefix) clean_pack_garbage(); } - if (gc_write_commit_graph) - write_commit_graph_reachable(get_object_directory(), 0, - !quiet && !daemonized); + if (gc_write_commit_graph && + write_commit_graph_reachable(get_object_directory(), 0, + !quiet && !daemonized)) + return 1; if (auto_gc && too_many_loose_objects()) warning(_("There are too many unreachable loose objects; " diff --git a/commit-graph.c b/commit-graph.c index 66865acbd7489d..1b58d1da149ae8 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -851,27 +851,30 @@ static int add_ref_to_list(const char *refname, return 0; } -void write_commit_graph_reachable(const char *obj_dir, int append, - int report_progress) +int write_commit_graph_reachable(const char *obj_dir, int append, + int report_progress) { struct string_list list = STRING_LIST_INIT_DUP; + int result; for_each_ref(add_ref_to_list, &list); - write_commit_graph(obj_dir, NULL, &list, append, report_progress); + result = write_commit_graph(obj_dir, NULL, &list, + append, report_progress); string_list_clear(&list, 0); + return result; } -void write_commit_graph(const char *obj_dir, - struct string_list *pack_indexes, - struct string_list *commit_hex, - int append, int report_progress) +int write_commit_graph(const char *obj_dir, + struct string_list *pack_indexes, + struct string_list *commit_hex, + int append, int report_progress) { struct packed_oid_list oids; struct packed_commit_list commits; struct hashfile *f; uint32_t i, count_distinct = 0; - char *graph_name; + char *graph_name = NULL; struct lock_file lk = LOCK_INIT; uint32_t chunk_ids[5]; uint64_t chunk_offsets[5]; @@ -883,15 +886,17 @@ void write_commit_graph(const char *obj_dir, uint64_t progress_cnt = 0; struct strbuf progress_title = STRBUF_INIT; unsigned long approx_nr_objects; + int res = 0; if (!commit_graph_compatible(the_repository)) - return; + return 0; oids.nr = 0; approx_nr_objects = approximate_object_count(); oids.alloc = approx_nr_objects / 32; oids.progress = NULL; oids.progress_done = 0; + commits.list = NULL; if (append) { prepare_commit_graph_one(the_repository, obj_dir); @@ -932,10 +937,16 @@ void write_commit_graph(const char *obj_dir, strbuf_setlen(&packname, dirlen); strbuf_addstr(&packname, pack_indexes->items[i].string); p = add_packed_git(packname.buf, packname.len, 1); - if (!p) - die(_("error adding pack %s"), packname.buf); - if (open_pack_index(p)) - die(_("error opening index for %s"), packname.buf); + if (!p) { + error(_("error adding pack %s"), packname.buf); + res = -1; + goto cleanup; + } + if (open_pack_index(p)) { + error(_("error opening index for %s"), packname.buf); + res = -1; + goto cleanup; + } for_each_object_in_pack(p, add_packed_commits, &oids, FOR_EACH_OBJECT_PACK_ORDER); close_pack(p); @@ -1006,8 +1017,11 @@ void write_commit_graph(const char *obj_dir, } stop_progress(&progress); - if (count_distinct >= GRAPH_EDGE_LAST_MASK) - die(_("the commit graph format cannot write %d commits"), count_distinct); + if (count_distinct >= GRAPH_EDGE_LAST_MASK) { + error(_("the commit graph format cannot write %d commits"), count_distinct); + res = -1; + goto cleanup; + } commits.nr = 0; commits.alloc = count_distinct; @@ -1039,16 +1053,21 @@ void write_commit_graph(const char *obj_dir, num_chunks = num_extra_edges ? 4 : 3; stop_progress(&progress); - if (commits.nr >= GRAPH_EDGE_LAST_MASK) - die(_("too many commits to write graph")); + if (commits.nr >= GRAPH_EDGE_LAST_MASK) { + error(_("too many commits to write graph")); + res = -1; + goto cleanup; + } compute_generation_numbers(&commits, report_progress); graph_name = get_commit_graph_filename(obj_dir); if (safe_create_leading_directories(graph_name)) { UNLEAK(graph_name); - die_errno(_("unable to create leading directories of %s"), - graph_name); + error(_("unable to create leading directories of %s"), + graph_name); + res = -1; + goto cleanup; } hold_lock_file_for_update(&lk, graph_name, LOCK_DIE_ON_ERROR); @@ -1107,9 +1126,12 @@ void write_commit_graph(const char *obj_dir, finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC); commit_lock_file(&lk); +cleanup: free(graph_name); free(commits.list); free(oids.list); + + return res; } #define VERIFY_COMMIT_GRAPH_ERROR_HASH 2 diff --git a/commit-graph.h b/commit-graph.h index 7dfb8c896fc35f..869717ca19f192 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -65,12 +65,18 @@ struct commit_graph *parse_commit_graph(void *graph_map, int fd, */ int generation_numbers_enabled(struct repository *r); -void write_commit_graph_reachable(const char *obj_dir, int append, +/* + * The write_commit_graph* methods return zero on success + * and a negative value on failure. Note that if the repository + * is not compatible with the commit-graph feature, then the + * methods will return 0 without writing a commit-graph. + */ +int write_commit_graph_reachable(const char *obj_dir, int append, int report_progress); -void write_commit_graph(const char *obj_dir, - struct string_list *pack_indexes, - struct string_list *commit_hex, - int append, int report_progress); +int write_commit_graph(const char *obj_dir, + struct string_list *pack_indexes, + struct string_list *commit_hex, + int append, int report_progress); int verify_commit_graph(struct repository *r, struct commit_graph *g); diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index e80c1cac02b262..3b6fd0d72848ff 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -23,6 +23,14 @@ test_expect_success 'write graph with no packs' ' test_path_is_file info/commit-graph ' +test_expect_success 'close with correct error on bad input' ' + cd "$TRASH_DIRECTORY/full" && + echo doesnotexist >in && + { git commit-graph write --stdin-packs stderr; ret=$?; } && + test "$ret" = 1 && + test_i18ngrep "error adding pack" stderr +' + test_expect_success 'create commits and repack' ' cd "$TRASH_DIRECTORY/full" && for i in $(test_seq 3) From 5af803945212af875670582ff153ee05ec368b83 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 12 Jun 2019 06:29:38 -0700 Subject: [PATCH 03/32] commit-graph: collapse parameters into flags The write_commit_graph() and write_commit_graph_reachable() methods currently take two boolean parameters: 'append' and 'report_progress'. As we update these methods, adding more parameters this way becomes cluttered and hard to maintain. Collapse these parameters into a 'flags' parameter, and adjust the callers to provide flags as necessary. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/commit-graph.c | 8 +++++--- builtin/commit.c | 2 +- builtin/gc.c | 4 ++-- commit-graph.c | 9 +++++---- commit-graph.h | 8 +++++--- 5 files changed, 18 insertions(+), 13 deletions(-) diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 2a1c4d701fcb50..d8efa5bab276a8 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -142,6 +142,7 @@ static int graph_write(int argc, const char **argv) struct string_list *commit_hex = NULL; struct string_list lines; int result = 0; + unsigned int flags = COMMIT_GRAPH_PROGRESS; static struct option builtin_commit_graph_write_options[] = { OPT_STRING(0, "object-dir", &opts.obj_dir, @@ -166,11 +167,13 @@ static int graph_write(int argc, const char **argv) die(_("use at most one of --reachable, --stdin-commits, or --stdin-packs")); if (!opts.obj_dir) opts.obj_dir = get_object_directory(); + if (opts.append) + flags |= COMMIT_GRAPH_APPEND; read_replace_refs = 0; if (opts.reachable) - return write_commit_graph_reachable(opts.obj_dir, opts.append, 1); + return write_commit_graph_reachable(opts.obj_dir, flags); string_list_init(&lines, 0); if (opts.stdin_packs || opts.stdin_commits) { @@ -190,8 +193,7 @@ static int graph_write(int argc, const char **argv) if (write_commit_graph(opts.obj_dir, pack_indexes, commit_hex, - opts.append, - 1)) + flags)) result = 1; UNLEAK(lines); diff --git a/builtin/commit.c b/builtin/commit.c index b9ea7222fae4d6..b001ef565ddcb1 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -1670,7 +1670,7 @@ int cmd_commit(int argc, const char **argv, const char *prefix) "not exceeded, and then \"git reset HEAD\" to recover.")); if (git_env_bool(GIT_TEST_COMMIT_GRAPH, 0) && - write_commit_graph_reachable(get_object_directory(), 0, 0)) + write_commit_graph_reachable(get_object_directory(), 0)) return 1; repo_rerere(the_repository, 0); diff --git a/builtin/gc.c b/builtin/gc.c index 3984addf73c48a..df2573f124d5fd 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -665,8 +665,8 @@ int cmd_gc(int argc, const char **argv, const char *prefix) } if (gc_write_commit_graph && - write_commit_graph_reachable(get_object_directory(), 0, - !quiet && !daemonized)) + write_commit_graph_reachable(get_object_directory(), + !quiet && !daemonized ? COMMIT_GRAPH_PROGRESS : 0)) return 1; if (auto_gc && too_many_loose_objects()) diff --git a/commit-graph.c b/commit-graph.c index 1b58d1da149ae8..fc40b531afc04a 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -851,15 +851,14 @@ static int add_ref_to_list(const char *refname, return 0; } -int write_commit_graph_reachable(const char *obj_dir, int append, - int report_progress) +int write_commit_graph_reachable(const char *obj_dir, unsigned int flags) { struct string_list list = STRING_LIST_INIT_DUP; int result; for_each_ref(add_ref_to_list, &list); result = write_commit_graph(obj_dir, NULL, &list, - append, report_progress); + flags); string_list_clear(&list, 0); return result; @@ -868,7 +867,7 @@ int write_commit_graph_reachable(const char *obj_dir, int append, int write_commit_graph(const char *obj_dir, struct string_list *pack_indexes, struct string_list *commit_hex, - int append, int report_progress) + unsigned int flags) { struct packed_oid_list oids; struct packed_commit_list commits; @@ -887,6 +886,8 @@ int write_commit_graph(const char *obj_dir, struct strbuf progress_title = STRBUF_INIT; unsigned long approx_nr_objects; int res = 0; + int append = flags & COMMIT_GRAPH_APPEND; + int report_progress = flags & COMMIT_GRAPH_PROGRESS; if (!commit_graph_compatible(the_repository)) return 0; diff --git a/commit-graph.h b/commit-graph.h index 869717ca19f192..01538b5cf59986 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -65,18 +65,20 @@ struct commit_graph *parse_commit_graph(void *graph_map, int fd, */ int generation_numbers_enabled(struct repository *r); +#define COMMIT_GRAPH_APPEND (1 << 0) +#define COMMIT_GRAPH_PROGRESS (1 << 1) + /* * The write_commit_graph* methods return zero on success * and a negative value on failure. Note that if the repository * is not compatible with the commit-graph feature, then the * methods will return 0 without writing a commit-graph. */ -int write_commit_graph_reachable(const char *obj_dir, int append, - int report_progress); +int write_commit_graph_reachable(const char *obj_dir, unsigned int flags); int write_commit_graph(const char *obj_dir, struct string_list *pack_indexes, struct string_list *commit_hex, - int append, int report_progress); + unsigned int flags); int verify_commit_graph(struct repository *r, struct commit_graph *g); From 10bd0be173b0ab9a9255fc15a768b2518d3eba57 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 12 Jun 2019 06:29:39 -0700 Subject: [PATCH 04/32] commit-graph: remove Future Work section The commit-graph feature began with a long list of planned benefits, most of which are now complete. The future work section has only a few items left. As for making more algorithms aware of generation numbers, some are only waiting for generation number v2 to ensure the performance matches the existing behavior using commit date. It is unlikely that we will ever send a commit-graph file as part of the protocol, since we would need to verify the data, and that is expensive. If we want to start trusting remote content, then that item can be investigated again. While there is more work to be done on the feature, having a section of the docs devoted to a TODO list is wasteful and hard to keep up-to-date. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/technical/commit-graph.txt | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/Documentation/technical/commit-graph.txt b/Documentation/technical/commit-graph.txt index 7805b0968c828f..fb53341d5ee311 100644 --- a/Documentation/technical/commit-graph.txt +++ b/Documentation/technical/commit-graph.txt @@ -127,23 +127,6 @@ Design Details helpful for these clones, anyway. The commit-graph will not be read or written when shallow commits are present. -Future Work ------------ - -- After computing and storing generation numbers, we must make graph - walks aware of generation numbers to gain the performance benefits they - enable. This will mostly be accomplished by swapping a commit-date-ordered - priority queue with one ordered by generation number. The following - operations are important candidates: - - - 'log --topo-order' - - 'tag --merged' - -- A server could provide a commit-graph file as part of the network protocol - to avoid extra calculations by clients. This feature is only of benefit if - the user is willing to trust the file, because verifying the file is correct - is as hard as computing it from scratch. - Related Links ------------- [0] https://bugs.chromium.org/p/git/issues/detail?id=8 From c9905beade13efff6be9c15ebe03d07fe5278ccc Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 12 Jun 2019 06:29:40 -0700 Subject: [PATCH 05/32] commit-graph: create write_commit_graph_context The write_commit_graph() method is too large and complex. To simplify it, we should extract several helper functions. However, we will risk repeating a lot of declarations related to progress incidators and object id or commit lists. Create a new write_commit_graph_context struct that contains the core data structures used in this process. Replace the other local variables with the values inside the context object. Following this change, we will start to lift code segments wholesale out of the write_commit_graph() method and into helper functions. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 390 ++++++++++++++++++++++++------------------------- 1 file changed, 194 insertions(+), 196 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index fc40b531afc04a..6d7e83cfe86c66 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -518,14 +518,38 @@ struct tree *get_commit_tree_in_graph(struct repository *r, const struct commit return get_commit_tree_in_graph_one(r, r->objects->commit_graph, c); } +struct packed_commit_list { + struct commit **list; + int nr; + int alloc; +}; + +struct packed_oid_list { + struct object_id *list; + int nr; + int alloc; +}; + +struct write_commit_graph_context { + struct repository *r; + const char *obj_dir; + char *graph_name; + struct packed_oid_list oids; + struct packed_commit_list commits; + int num_extra_edges; + unsigned long approx_nr_objects; + struct progress *progress; + int progress_done; + uint64_t progress_cnt; + unsigned append:1, + report_progress:1; +}; + static void write_graph_chunk_fanout(struct hashfile *f, - struct commit **commits, - int nr_commits, - struct progress *progress, - uint64_t *progress_cnt) + struct write_commit_graph_context *ctx) { int i, count = 0; - struct commit **list = commits; + struct commit **list = ctx->commits.list; /* * Write the first-level table (the list is sorted, @@ -533,10 +557,10 @@ static void write_graph_chunk_fanout(struct hashfile *f, * having to do eight extra binary search iterations). */ for (i = 0; i < 256; i++) { - while (count < nr_commits) { + while (count < ctx->commits.nr) { if ((*list)->object.oid.hash[0] != i) break; - display_progress(progress, ++*progress_cnt); + display_progress(ctx->progress, ++ctx->progress_cnt); count++; list++; } @@ -546,14 +570,12 @@ static void write_graph_chunk_fanout(struct hashfile *f, } static void write_graph_chunk_oids(struct hashfile *f, int hash_len, - struct commit **commits, int nr_commits, - struct progress *progress, - uint64_t *progress_cnt) + struct write_commit_graph_context *ctx) { - struct commit **list = commits; + struct commit **list = ctx->commits.list; int count; - for (count = 0; count < nr_commits; count++, list++) { - display_progress(progress, ++*progress_cnt); + for (count = 0; count < ctx->commits.nr; count++, list++) { + display_progress(ctx->progress, ++ctx->progress_cnt); hashwrite(f, (*list)->object.oid.hash, (int)hash_len); } } @@ -565,19 +587,17 @@ static const unsigned char *commit_to_sha1(size_t index, void *table) } static void write_graph_chunk_data(struct hashfile *f, int hash_len, - struct commit **commits, int nr_commits, - struct progress *progress, - uint64_t *progress_cnt) + struct write_commit_graph_context *ctx) { - struct commit **list = commits; - struct commit **last = commits + nr_commits; + struct commit **list = ctx->commits.list; + struct commit **last = ctx->commits.list + ctx->commits.nr; uint32_t num_extra_edges = 0; while (list < last) { struct commit_list *parent; int edge_value; uint32_t packedDate[2]; - display_progress(progress, ++*progress_cnt); + display_progress(ctx->progress, ++ctx->progress_cnt); parse_commit_no_graph(*list); hashwrite(f, get_commit_tree_oid(*list)->hash, hash_len); @@ -588,8 +608,8 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, edge_value = GRAPH_PARENT_NONE; else { edge_value = sha1_pos(parent->item->object.oid.hash, - commits, - nr_commits, + ctx->commits.list, + ctx->commits.nr, commit_to_sha1); if (edge_value < 0) @@ -609,8 +629,8 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, edge_value = GRAPH_EXTRA_EDGES_NEEDED | num_extra_edges; else { edge_value = sha1_pos(parent->item->object.oid.hash, - commits, - nr_commits, + ctx->commits.list, + ctx->commits.nr, commit_to_sha1); if (edge_value < 0) BUG("missing parent %s for commit %s", @@ -642,19 +662,16 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, } static void write_graph_chunk_extra_edges(struct hashfile *f, - struct commit **commits, - int nr_commits, - struct progress *progress, - uint64_t *progress_cnt) + struct write_commit_graph_context *ctx) { - struct commit **list = commits; - struct commit **last = commits + nr_commits; + struct commit **list = ctx->commits.list; + struct commit **last = ctx->commits.list + ctx->commits.nr; struct commit_list *parent; while (list < last) { int num_parents = 0; - display_progress(progress, ++*progress_cnt); + display_progress(ctx->progress, ++ctx->progress_cnt); for (parent = (*list)->parents; num_parents < 3 && parent; parent = parent->next) @@ -668,8 +685,8 @@ static void write_graph_chunk_extra_edges(struct hashfile *f, /* Since num_parents > 2, this initializer is safe. */ for (parent = (*list)->parents->next; parent; parent = parent->next) { int edge_value = sha1_pos(parent->item->object.oid.hash, - commits, - nr_commits, + ctx->commits.list, + ctx->commits.nr, commit_to_sha1); if (edge_value < 0) @@ -693,125 +710,111 @@ static int commit_compare(const void *_a, const void *_b) return oidcmp(a, b); } -struct packed_commit_list { - struct commit **list; - int nr; - int alloc; -}; - -struct packed_oid_list { - struct object_id *list; - int nr; - int alloc; - struct progress *progress; - int progress_done; -}; - static int add_packed_commits(const struct object_id *oid, struct packed_git *pack, uint32_t pos, void *data) { - struct packed_oid_list *list = (struct packed_oid_list*)data; + struct write_commit_graph_context *ctx = (struct write_commit_graph_context*)data; enum object_type type; off_t offset = nth_packed_object_offset(pack, pos); struct object_info oi = OBJECT_INFO_INIT; - if (list->progress) - display_progress(list->progress, ++list->progress_done); + if (ctx->progress) + display_progress(ctx->progress, ++ctx->progress_done); oi.typep = &type; - if (packed_object_info(the_repository, pack, offset, &oi) < 0) + if (packed_object_info(ctx->r, pack, offset, &oi) < 0) die(_("unable to get type of object %s"), oid_to_hex(oid)); if (type != OBJ_COMMIT) return 0; - ALLOC_GROW(list->list, list->nr + 1, list->alloc); - oidcpy(&(list->list[list->nr]), oid); - list->nr++; + ALLOC_GROW(ctx->oids.list, ctx->oids.nr + 1, ctx->oids.alloc); + oidcpy(&(ctx->oids.list[ctx->oids.nr]), oid); + ctx->oids.nr++; return 0; } -static void add_missing_parents(struct packed_oid_list *oids, struct commit *commit) +static void add_missing_parents(struct write_commit_graph_context *ctx, struct commit *commit) { struct commit_list *parent; for (parent = commit->parents; parent; parent = parent->next) { if (!(parent->item->object.flags & UNINTERESTING)) { - ALLOC_GROW(oids->list, oids->nr + 1, oids->alloc); - oidcpy(&oids->list[oids->nr], &(parent->item->object.oid)); - oids->nr++; + ALLOC_GROW(ctx->oids.list, ctx->oids.nr + 1, ctx->oids.alloc); + oidcpy(&ctx->oids.list[ctx->oids.nr], &(parent->item->object.oid)); + ctx->oids.nr++; parent->item->object.flags |= UNINTERESTING; } } } -static void close_reachable(struct packed_oid_list *oids, int report_progress) +static void close_reachable(struct write_commit_graph_context *ctx) { int i; struct commit *commit; - struct progress *progress = NULL; - if (report_progress) - progress = start_delayed_progress( - _("Loading known commits in commit graph"), oids->nr); - for (i = 0; i < oids->nr; i++) { - display_progress(progress, i + 1); - commit = lookup_commit(the_repository, &oids->list[i]); + if (ctx->report_progress) + ctx->progress = start_delayed_progress( + _("Loading known commits in commit graph"), + ctx->oids.nr); + for (i = 0; i < ctx->oids.nr; i++) { + display_progress(ctx->progress, i + 1); + commit = lookup_commit(ctx->r, &ctx->oids.list[i]); if (commit) commit->object.flags |= UNINTERESTING; } - stop_progress(&progress); + stop_progress(&ctx->progress); /* - * As this loop runs, oids->nr may grow, but not more + * As this loop runs, ctx->oids.nr may grow, but not more * than the number of missing commits in the reachable * closure. */ - if (report_progress) - progress = start_delayed_progress( - _("Expanding reachable commits in commit graph"), oids->nr); - for (i = 0; i < oids->nr; i++) { - display_progress(progress, i + 1); - commit = lookup_commit(the_repository, &oids->list[i]); + if (ctx->report_progress) + ctx->progress = start_delayed_progress( + _("Expanding reachable commits in commit graph"), + ctx->oids.nr); + for (i = 0; i < ctx->oids.nr; i++) { + display_progress(ctx->progress, i + 1); + commit = lookup_commit(ctx->r, &ctx->oids.list[i]); if (commit && !parse_commit_no_graph(commit)) - add_missing_parents(oids, commit); + add_missing_parents(ctx, commit); } - stop_progress(&progress); + stop_progress(&ctx->progress); - if (report_progress) - progress = start_delayed_progress( - _("Clearing commit marks in commit graph"), oids->nr); - for (i = 0; i < oids->nr; i++) { - display_progress(progress, i + 1); - commit = lookup_commit(the_repository, &oids->list[i]); + if (ctx->report_progress) + ctx->progress = start_delayed_progress( + _("Clearing commit marks in commit graph"), + ctx->oids.nr); + for (i = 0; i < ctx->oids.nr; i++) { + display_progress(ctx->progress, i + 1); + commit = lookup_commit(ctx->r, &ctx->oids.list[i]); if (commit) commit->object.flags &= ~UNINTERESTING; } - stop_progress(&progress); + stop_progress(&ctx->progress); } -static void compute_generation_numbers(struct packed_commit_list* commits, - int report_progress) +static void compute_generation_numbers(struct write_commit_graph_context *ctx) { int i; struct commit_list *list = NULL; - struct progress *progress = NULL; - if (report_progress) - progress = start_progress( - _("Computing commit graph generation numbers"), - commits->nr); - for (i = 0; i < commits->nr; i++) { - display_progress(progress, i + 1); - if (commits->list[i]->generation != GENERATION_NUMBER_INFINITY && - commits->list[i]->generation != GENERATION_NUMBER_ZERO) + if (ctx->report_progress) + ctx->progress = start_progress( + _("Computing commit graph generation numbers"), + ctx->commits.nr); + for (i = 0; i < ctx->commits.nr; i++) { + display_progress(ctx->progress, i + 1); + if (ctx->commits.list[i]->generation != GENERATION_NUMBER_INFINITY && + ctx->commits.list[i]->generation != GENERATION_NUMBER_ZERO) continue; - commit_list_insert(commits->list[i], &list); + commit_list_insert(ctx->commits.list[i], &list); while (list) { struct commit *current = list->item; struct commit_list *parent; @@ -838,7 +841,7 @@ static void compute_generation_numbers(struct packed_commit_list* commits, } } } - stop_progress(&progress); + stop_progress(&ctx->progress); } static int add_ref_to_list(const char *refname, @@ -869,8 +872,7 @@ int write_commit_graph(const char *obj_dir, struct string_list *commit_hex, unsigned int flags) { - struct packed_oid_list oids; - struct packed_commit_list commits; + struct write_commit_graph_context *ctx; struct hashfile *f; uint32_t i, count_distinct = 0; char *graph_name = NULL; @@ -878,44 +880,38 @@ int write_commit_graph(const char *obj_dir, uint32_t chunk_ids[5]; uint64_t chunk_offsets[5]; int num_chunks; - int num_extra_edges; struct commit_list *parent; - struct progress *progress = NULL; const unsigned hashsz = the_hash_algo->rawsz; - uint64_t progress_cnt = 0; struct strbuf progress_title = STRBUF_INIT; - unsigned long approx_nr_objects; int res = 0; - int append = flags & COMMIT_GRAPH_APPEND; - int report_progress = flags & COMMIT_GRAPH_PROGRESS; if (!commit_graph_compatible(the_repository)) return 0; - oids.nr = 0; - approx_nr_objects = approximate_object_count(); - oids.alloc = approx_nr_objects / 32; - oids.progress = NULL; - oids.progress_done = 0; - commits.list = NULL; - - if (append) { - prepare_commit_graph_one(the_repository, obj_dir); - if (the_repository->objects->commit_graph) - oids.alloc += the_repository->objects->commit_graph->num_commits; + ctx = xcalloc(1, sizeof(struct write_commit_graph_context)); + ctx->r = the_repository; + ctx->obj_dir = obj_dir; + ctx->append = flags & COMMIT_GRAPH_APPEND ? 1 : 0; + ctx->report_progress = flags & COMMIT_GRAPH_PROGRESS ? 1 : 0; + + ctx->approx_nr_objects = approximate_object_count(); + ctx->oids.alloc = ctx->approx_nr_objects / 32; + + if (ctx->append) { + prepare_commit_graph_one(ctx->r, ctx->obj_dir); + if (ctx->r->objects->commit_graph) + ctx->oids.alloc += ctx->r->objects->commit_graph->num_commits; } - if (oids.alloc < 1024) - oids.alloc = 1024; - ALLOC_ARRAY(oids.list, oids.alloc); - - if (append && the_repository->objects->commit_graph) { - struct commit_graph *commit_graph = - the_repository->objects->commit_graph; - for (i = 0; i < commit_graph->num_commits; i++) { - const unsigned char *hash = commit_graph->chunk_oid_lookup + - commit_graph->hash_len * i; - hashcpy(oids.list[oids.nr++].hash, hash); + if (ctx->oids.alloc < 1024) + ctx->oids.alloc = 1024; + ALLOC_ARRAY(ctx->oids.list, ctx->oids.alloc); + + if (ctx->append && ctx->r->objects->commit_graph) { + struct commit_graph *g = ctx->r->objects->commit_graph; + for (i = 0; i < g->num_commits; i++) { + const unsigned char *hash = g->chunk_oid_lookup + g->hash_len * i; + hashcpy(ctx->oids.list[ctx->oids.nr++].hash, hash); } } @@ -924,14 +920,14 @@ int write_commit_graph(const char *obj_dir, int dirlen; strbuf_addf(&packname, "%s/pack/", obj_dir); dirlen = packname.len; - if (report_progress) { + if (ctx->report_progress) { strbuf_addf(&progress_title, Q_("Finding commits for commit graph in %d pack", "Finding commits for commit graph in %d packs", pack_indexes->nr), pack_indexes->nr); - oids.progress = start_delayed_progress(progress_title.buf, 0); - oids.progress_done = 0; + ctx->progress = start_delayed_progress(progress_title.buf, 0); + ctx->progress_done = 0; } for (i = 0; i < pack_indexes->nr; i++) { struct packed_git *p; @@ -948,75 +944,76 @@ int write_commit_graph(const char *obj_dir, res = -1; goto cleanup; } - for_each_object_in_pack(p, add_packed_commits, &oids, + for_each_object_in_pack(p, add_packed_commits, ctx, FOR_EACH_OBJECT_PACK_ORDER); close_pack(p); free(p); } - stop_progress(&oids.progress); + stop_progress(&ctx->progress); strbuf_reset(&progress_title); strbuf_release(&packname); } if (commit_hex) { - if (report_progress) { + if (ctx->report_progress) { strbuf_addf(&progress_title, Q_("Finding commits for commit graph from %d ref", "Finding commits for commit graph from %d refs", commit_hex->nr), commit_hex->nr); - progress = start_delayed_progress(progress_title.buf, - commit_hex->nr); + ctx->progress = start_delayed_progress( + progress_title.buf, + commit_hex->nr); } for (i = 0; i < commit_hex->nr; i++) { const char *end; struct object_id oid; struct commit *result; - display_progress(progress, i + 1); + display_progress(ctx->progress, i + 1); if (commit_hex->items[i].string && parse_oid_hex(commit_hex->items[i].string, &oid, &end)) continue; - result = lookup_commit_reference_gently(the_repository, &oid, 1); + result = lookup_commit_reference_gently(ctx->r, &oid, 1); if (result) { - ALLOC_GROW(oids.list, oids.nr + 1, oids.alloc); - oidcpy(&oids.list[oids.nr], &(result->object.oid)); - oids.nr++; + ALLOC_GROW(ctx->oids.list, ctx->oids.nr + 1, ctx->oids.alloc); + oidcpy(&ctx->oids.list[ctx->oids.nr], &(result->object.oid)); + ctx->oids.nr++; } } - stop_progress(&progress); + stop_progress(&ctx->progress); strbuf_reset(&progress_title); } if (!pack_indexes && !commit_hex) { - if (report_progress) - oids.progress = start_delayed_progress( + if (ctx->report_progress) + ctx->progress = start_delayed_progress( _("Finding commits for commit graph among packed objects"), - approx_nr_objects); - for_each_packed_object(add_packed_commits, &oids, + ctx->approx_nr_objects); + for_each_packed_object(add_packed_commits, ctx, FOR_EACH_OBJECT_PACK_ORDER); - if (oids.progress_done < approx_nr_objects) - display_progress(oids.progress, approx_nr_objects); - stop_progress(&oids.progress); + if (ctx->progress_done < ctx->approx_nr_objects) + display_progress(ctx->progress, ctx->approx_nr_objects); + stop_progress(&ctx->progress); } - close_reachable(&oids, report_progress); + close_reachable(ctx); - if (report_progress) - progress = start_delayed_progress( + if (ctx->report_progress) + ctx->progress = start_delayed_progress( _("Counting distinct commits in commit graph"), - oids.nr); - display_progress(progress, 0); /* TODO: Measure QSORT() progress */ - QSORT(oids.list, oids.nr, commit_compare); + ctx->oids.nr); + display_progress(ctx->progress, 0); /* TODO: Measure QSORT() progress */ + QSORT(ctx->oids.list, ctx->oids.nr, commit_compare); count_distinct = 1; - for (i = 1; i < oids.nr; i++) { - display_progress(progress, i + 1); - if (!oideq(&oids.list[i - 1], &oids.list[i])) + for (i = 1; i < ctx->oids.nr; i++) { + display_progress(ctx->progress, i + 1); + if (!oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i])) count_distinct++; } - stop_progress(&progress); + stop_progress(&ctx->progress); if (count_distinct >= GRAPH_EDGE_LAST_MASK) { error(_("the commit graph format cannot write %d commits"), count_distinct); @@ -1024,54 +1021,54 @@ int write_commit_graph(const char *obj_dir, goto cleanup; } - commits.nr = 0; - commits.alloc = count_distinct; - ALLOC_ARRAY(commits.list, commits.alloc); + ctx->commits.alloc = count_distinct; + ALLOC_ARRAY(ctx->commits.list, ctx->commits.alloc); - num_extra_edges = 0; - if (report_progress) - progress = start_delayed_progress( + ctx->num_extra_edges = 0; + if (ctx->report_progress) + ctx->progress = start_delayed_progress( _("Finding extra edges in commit graph"), - oids.nr); - for (i = 0; i < oids.nr; i++) { + ctx->oids.nr); + for (i = 0; i < ctx->oids.nr; i++) { int num_parents = 0; - display_progress(progress, i + 1); - if (i > 0 && oideq(&oids.list[i - 1], &oids.list[i])) + display_progress(ctx->progress, i + 1); + if (i > 0 && oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i])) continue; - commits.list[commits.nr] = lookup_commit(the_repository, &oids.list[i]); - parse_commit_no_graph(commits.list[commits.nr]); + ctx->commits.list[ctx->commits.nr] = lookup_commit(ctx->r, &ctx->oids.list[i]); + parse_commit_no_graph(ctx->commits.list[ctx->commits.nr]); - for (parent = commits.list[commits.nr]->parents; + for (parent = ctx->commits.list[ctx->commits.nr]->parents; parent; parent = parent->next) num_parents++; if (num_parents > 2) - num_extra_edges += num_parents - 1; + ctx->num_extra_edges += num_parents - 1; - commits.nr++; + ctx->commits.nr++; } - num_chunks = num_extra_edges ? 4 : 3; - stop_progress(&progress); + stop_progress(&ctx->progress); - if (commits.nr >= GRAPH_EDGE_LAST_MASK) { + if (ctx->commits.nr >= GRAPH_EDGE_LAST_MASK) { error(_("too many commits to write graph")); res = -1; goto cleanup; } - compute_generation_numbers(&commits, report_progress); + compute_generation_numbers(ctx); - graph_name = get_commit_graph_filename(obj_dir); - if (safe_create_leading_directories(graph_name)) { - UNLEAK(graph_name); + num_chunks = ctx->num_extra_edges ? 4 : 3; + + ctx->graph_name = get_commit_graph_filename(ctx->obj_dir); + if (safe_create_leading_directories(ctx->graph_name)) { + UNLEAK(ctx->graph_name); error(_("unable to create leading directories of %s"), - graph_name); + ctx->graph_name); res = -1; goto cleanup; } - hold_lock_file_for_update(&lk, graph_name, LOCK_DIE_ON_ERROR); + hold_lock_file_for_update(&lk, ctx->graph_name, LOCK_DIE_ON_ERROR); f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf); hashwrite_be32(f, GRAPH_SIGNATURE); @@ -1084,7 +1081,7 @@ int write_commit_graph(const char *obj_dir, chunk_ids[0] = GRAPH_CHUNKID_OIDFANOUT; chunk_ids[1] = GRAPH_CHUNKID_OIDLOOKUP; chunk_ids[2] = GRAPH_CHUNKID_DATA; - if (num_extra_edges) + if (ctx->num_extra_edges) chunk_ids[3] = GRAPH_CHUNKID_EXTRAEDGES; else chunk_ids[3] = 0; @@ -1092,9 +1089,9 @@ int write_commit_graph(const char *obj_dir, chunk_offsets[0] = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH; chunk_offsets[1] = chunk_offsets[0] + GRAPH_FANOUT_SIZE; - chunk_offsets[2] = chunk_offsets[1] + hashsz * commits.nr; - chunk_offsets[3] = chunk_offsets[2] + (hashsz + 16) * commits.nr; - chunk_offsets[4] = chunk_offsets[3] + 4 * num_extra_edges; + chunk_offsets[2] = chunk_offsets[1] + hashsz * ctx->commits.nr; + chunk_offsets[3] = chunk_offsets[2] + (hashsz + 16) * ctx->commits.nr; + chunk_offsets[4] = chunk_offsets[3] + 4 * ctx->num_extra_edges; for (i = 0; i <= num_chunks; i++) { uint32_t chunk_write[3]; @@ -1105,32 +1102,33 @@ int write_commit_graph(const char *obj_dir, hashwrite(f, chunk_write, 12); } - if (report_progress) { + if (ctx->report_progress) { strbuf_addf(&progress_title, Q_("Writing out commit graph in %d pass", "Writing out commit graph in %d passes", num_chunks), num_chunks); - progress = start_delayed_progress( + ctx->progress = start_delayed_progress( progress_title.buf, - num_chunks * commits.nr); + num_chunks * ctx->commits.nr); } - write_graph_chunk_fanout(f, commits.list, commits.nr, progress, &progress_cnt); - write_graph_chunk_oids(f, hashsz, commits.list, commits.nr, progress, &progress_cnt); - write_graph_chunk_data(f, hashsz, commits.list, commits.nr, progress, &progress_cnt); - if (num_extra_edges) - write_graph_chunk_extra_edges(f, commits.list, commits.nr, progress, &progress_cnt); - stop_progress(&progress); + write_graph_chunk_fanout(f, ctx); + write_graph_chunk_oids(f, hashsz, ctx); + write_graph_chunk_data(f, hashsz, ctx); + if (ctx->num_extra_edges) + write_graph_chunk_extra_edges(f, ctx); + stop_progress(&ctx->progress); strbuf_release(&progress_title); - close_commit_graph(the_repository); + close_commit_graph(ctx->r); finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC); commit_lock_file(&lk); cleanup: free(graph_name); - free(commits.list); - free(oids.list); + free(ctx->commits.list); + free(ctx->oids.list); + free(ctx); return res; } From ef5b83f2cfe11c2feb98b962881e9a8e6281e3ff Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 12 Jun 2019 06:29:41 -0700 Subject: [PATCH 06/32] commit-graph: extract fill_oids_from_packs() The write_commit_graph() method is too complex, so we are extracting helper functions one by one. This extracts fill_oids_from_packs() that reads the given pack-file list and fills the oid list in the context. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 83 ++++++++++++++++++++++++++++---------------------- 1 file changed, 47 insertions(+), 36 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 6d7e83cfe86c66..02e5f8c65155ac 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -867,6 +867,51 @@ int write_commit_graph_reachable(const char *obj_dir, unsigned int flags) return result; } +static int fill_oids_from_packs(struct write_commit_graph_context *ctx, + struct string_list *pack_indexes) +{ + uint32_t i; + struct strbuf progress_title = STRBUF_INIT; + struct strbuf packname = STRBUF_INIT; + int dirlen; + + strbuf_addf(&packname, "%s/pack/", ctx->obj_dir); + dirlen = packname.len; + if (ctx->report_progress) { + strbuf_addf(&progress_title, + Q_("Finding commits for commit graph in %d pack", + "Finding commits for commit graph in %d packs", + pack_indexes->nr), + pack_indexes->nr); + ctx->progress = start_delayed_progress(progress_title.buf, 0); + ctx->progress_done = 0; + } + for (i = 0; i < pack_indexes->nr; i++) { + struct packed_git *p; + strbuf_setlen(&packname, dirlen); + strbuf_addstr(&packname, pack_indexes->items[i].string); + p = add_packed_git(packname.buf, packname.len, 1); + if (!p) { + error(_("error adding pack %s"), packname.buf); + return -1; + } + if (open_pack_index(p)) { + error(_("error opening index for %s"), packname.buf); + return -1; + } + for_each_object_in_pack(p, add_packed_commits, ctx, + FOR_EACH_OBJECT_PACK_ORDER); + close_pack(p); + free(p); + } + + stop_progress(&ctx->progress); + strbuf_reset(&progress_title); + strbuf_release(&packname); + + return 0; +} + int write_commit_graph(const char *obj_dir, struct string_list *pack_indexes, struct string_list *commit_hex, @@ -916,42 +961,8 @@ int write_commit_graph(const char *obj_dir, } if (pack_indexes) { - struct strbuf packname = STRBUF_INIT; - int dirlen; - strbuf_addf(&packname, "%s/pack/", obj_dir); - dirlen = packname.len; - if (ctx->report_progress) { - strbuf_addf(&progress_title, - Q_("Finding commits for commit graph in %d pack", - "Finding commits for commit graph in %d packs", - pack_indexes->nr), - pack_indexes->nr); - ctx->progress = start_delayed_progress(progress_title.buf, 0); - ctx->progress_done = 0; - } - for (i = 0; i < pack_indexes->nr; i++) { - struct packed_git *p; - strbuf_setlen(&packname, dirlen); - strbuf_addstr(&packname, pack_indexes->items[i].string); - p = add_packed_git(packname.buf, packname.len, 1); - if (!p) { - error(_("error adding pack %s"), packname.buf); - res = -1; - goto cleanup; - } - if (open_pack_index(p)) { - error(_("error opening index for %s"), packname.buf); - res = -1; - goto cleanup; - } - for_each_object_in_pack(p, add_packed_commits, ctx, - FOR_EACH_OBJECT_PACK_ORDER); - close_pack(p); - free(p); - } - stop_progress(&ctx->progress); - strbuf_reset(&progress_title); - strbuf_release(&packname); + if ((res = fill_oids_from_packs(ctx, pack_indexes))) + goto cleanup; } if (commit_hex) { From 4c9efe850d75115a6deedd57072f1d7383bc03da Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 12 Jun 2019 06:29:42 -0700 Subject: [PATCH 07/32] commit-graph: extract fill_oids_from_commit_hex() The write_commit_graph() method is too complex, so we are extracting helper functions one by one. Extract fill_oids_from_commit_hex() that reads the given commit id list and fille the oid list in the context. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 72 ++++++++++++++++++++++++++++---------------------- 1 file changed, 40 insertions(+), 32 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 02e5f8c65155ac..4fae1fcdb2f78a 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -912,6 +912,44 @@ static int fill_oids_from_packs(struct write_commit_graph_context *ctx, return 0; } +static void fill_oids_from_commit_hex(struct write_commit_graph_context *ctx, + struct string_list *commit_hex) +{ + uint32_t i; + struct strbuf progress_title = STRBUF_INIT; + + if (ctx->report_progress) { + strbuf_addf(&progress_title, + Q_("Finding commits for commit graph from %d ref", + "Finding commits for commit graph from %d refs", + commit_hex->nr), + commit_hex->nr); + ctx->progress = start_delayed_progress( + progress_title.buf, + commit_hex->nr); + } + for (i = 0; i < commit_hex->nr; i++) { + const char *end; + struct object_id oid; + struct commit *result; + + display_progress(ctx->progress, i + 1); + if (commit_hex->items[i].string && + parse_oid_hex(commit_hex->items[i].string, &oid, &end)) + continue; + + result = lookup_commit_reference_gently(ctx->r, &oid, 1); + + if (result) { + ALLOC_GROW(ctx->oids.list, ctx->oids.nr + 1, ctx->oids.alloc); + oidcpy(&ctx->oids.list[ctx->oids.nr], &(result->object.oid)); + ctx->oids.nr++; + } + } + stop_progress(&ctx->progress); + strbuf_release(&progress_title); +} + int write_commit_graph(const char *obj_dir, struct string_list *pack_indexes, struct string_list *commit_hex, @@ -965,38 +1003,8 @@ int write_commit_graph(const char *obj_dir, goto cleanup; } - if (commit_hex) { - if (ctx->report_progress) { - strbuf_addf(&progress_title, - Q_("Finding commits for commit graph from %d ref", - "Finding commits for commit graph from %d refs", - commit_hex->nr), - commit_hex->nr); - ctx->progress = start_delayed_progress( - progress_title.buf, - commit_hex->nr); - } - for (i = 0; i < commit_hex->nr; i++) { - const char *end; - struct object_id oid; - struct commit *result; - - display_progress(ctx->progress, i + 1); - if (commit_hex->items[i].string && - parse_oid_hex(commit_hex->items[i].string, &oid, &end)) - continue; - - result = lookup_commit_reference_gently(ctx->r, &oid, 1); - - if (result) { - ALLOC_GROW(ctx->oids.list, ctx->oids.nr + 1, ctx->oids.alloc); - oidcpy(&ctx->oids.list[ctx->oids.nr], &(result->object.oid)); - ctx->oids.nr++; - } - } - stop_progress(&ctx->progress); - strbuf_reset(&progress_title); - } + if (commit_hex) + fill_oids_from_commit_hex(ctx, commit_hex); if (!pack_indexes && !commit_hex) { if (ctx->report_progress) From b2c83060525e69f628fb32836e311978e3899e6e Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 12 Jun 2019 06:29:42 -0700 Subject: [PATCH 08/32] commit-graph: extract fill_oids_from_all_packs() The write_commit_graph() method is too complex, so we are extracting helper functions one by one. Extract fill_oids_from_all_packs() that reads all pack-files for commits and fills the oid list in the context. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 4fae1fcdb2f78a..61cb43ddf8f4e3 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -950,6 +950,19 @@ static void fill_oids_from_commit_hex(struct write_commit_graph_context *ctx, strbuf_release(&progress_title); } +static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx) +{ + if (ctx->report_progress) + ctx->progress = start_delayed_progress( + _("Finding commits for commit graph among packed objects"), + ctx->approx_nr_objects); + for_each_packed_object(add_packed_commits, ctx, + FOR_EACH_OBJECT_PACK_ORDER); + if (ctx->progress_done < ctx->approx_nr_objects) + display_progress(ctx->progress, ctx->approx_nr_objects); + stop_progress(&ctx->progress); +} + int write_commit_graph(const char *obj_dir, struct string_list *pack_indexes, struct string_list *commit_hex, @@ -1006,17 +1019,8 @@ int write_commit_graph(const char *obj_dir, if (commit_hex) fill_oids_from_commit_hex(ctx, commit_hex); - if (!pack_indexes && !commit_hex) { - if (ctx->report_progress) - ctx->progress = start_delayed_progress( - _("Finding commits for commit graph among packed objects"), - ctx->approx_nr_objects); - for_each_packed_object(add_packed_commits, ctx, - FOR_EACH_OBJECT_PACK_ORDER); - if (ctx->progress_done < ctx->approx_nr_objects) - display_progress(ctx->progress, ctx->approx_nr_objects); - stop_progress(&ctx->progress); - } + if (!pack_indexes && !commit_hex) + fill_oids_from_all_packs(ctx); close_reachable(ctx); From 014e3440f524a620fd8aba92f1efeac737d9c23d Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 12 Jun 2019 06:29:43 -0700 Subject: [PATCH 09/32] commit-graph: extract count_distinct_commits() The write_commit_graph() method is too complex, so we are extracting helper functions one by one. Extract count_distinct_commits(), which sorts the oids list, then iterates through to find duplicates. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 61cb43ddf8f4e3..1a0a875a7b2c36 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -963,6 +963,27 @@ static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx) stop_progress(&ctx->progress); } +static uint32_t count_distinct_commits(struct write_commit_graph_context *ctx) +{ + uint32_t i, count_distinct = 1; + + if (ctx->report_progress) + ctx->progress = start_delayed_progress( + _("Counting distinct commits in commit graph"), + ctx->oids.nr); + display_progress(ctx->progress, 0); /* TODO: Measure QSORT() progress */ + QSORT(ctx->oids.list, ctx->oids.nr, commit_compare); + + for (i = 1; i < ctx->oids.nr; i++) { + display_progress(ctx->progress, i + 1); + if (!oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i])) + count_distinct++; + } + stop_progress(&ctx->progress); + + return count_distinct; +} + int write_commit_graph(const char *obj_dir, struct string_list *pack_indexes, struct string_list *commit_hex, @@ -1024,19 +1045,7 @@ int write_commit_graph(const char *obj_dir, close_reachable(ctx); - if (ctx->report_progress) - ctx->progress = start_delayed_progress( - _("Counting distinct commits in commit graph"), - ctx->oids.nr); - display_progress(ctx->progress, 0); /* TODO: Measure QSORT() progress */ - QSORT(ctx->oids.list, ctx->oids.nr, commit_compare); - count_distinct = 1; - for (i = 1; i < ctx->oids.nr; i++) { - display_progress(ctx->progress, i + 1); - if (!oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i])) - count_distinct++; - } - stop_progress(&ctx->progress); + count_distinct = count_distinct_commits(ctx); if (count_distinct >= GRAPH_EDGE_LAST_MASK) { error(_("the commit graph format cannot write %d commits"), count_distinct); From f998d542260ec643875e7f0fa860dbb43c2388ca Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 12 Jun 2019 06:29:44 -0700 Subject: [PATCH 10/32] commit-graph: extract copy_oids_to_commits() The write_commit_graph() method is too complex, so we are extracting helper functions one by one. Extract copy_oids_to_commits(), which fills the commits list with the distinct commits from the oids list. During this loop, it also counts the number of "extra" edges from octopus merges. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 57 ++++++++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 1a0a875a7b2c36..72f9c5c7e20c8f 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -984,6 +984,37 @@ static uint32_t count_distinct_commits(struct write_commit_graph_context *ctx) return count_distinct; } +static void copy_oids_to_commits(struct write_commit_graph_context *ctx) +{ + uint32_t i; + struct commit_list *parent; + + ctx->num_extra_edges = 0; + if (ctx->report_progress) + ctx->progress = start_delayed_progress( + _("Finding extra edges in commit graph"), + ctx->oids.nr); + for (i = 0; i < ctx->oids.nr; i++) { + int num_parents = 0; + display_progress(ctx->progress, i + 1); + if (i > 0 && oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i])) + continue; + + ctx->commits.list[ctx->commits.nr] = lookup_commit(ctx->r, &ctx->oids.list[i]); + parse_commit_no_graph(ctx->commits.list[ctx->commits.nr]); + + for (parent = ctx->commits.list[ctx->commits.nr]->parents; + parent; parent = parent->next) + num_parents++; + + if (num_parents > 2) + ctx->num_extra_edges += num_parents - 1; + + ctx->commits.nr++; + } + stop_progress(&ctx->progress); +} + int write_commit_graph(const char *obj_dir, struct string_list *pack_indexes, struct string_list *commit_hex, @@ -997,7 +1028,6 @@ int write_commit_graph(const char *obj_dir, uint32_t chunk_ids[5]; uint64_t chunk_offsets[5]; int num_chunks; - struct commit_list *parent; const unsigned hashsz = the_hash_algo->rawsz; struct strbuf progress_title = STRBUF_INIT; int res = 0; @@ -1056,30 +1086,7 @@ int write_commit_graph(const char *obj_dir, ctx->commits.alloc = count_distinct; ALLOC_ARRAY(ctx->commits.list, ctx->commits.alloc); - ctx->num_extra_edges = 0; - if (ctx->report_progress) - ctx->progress = start_delayed_progress( - _("Finding extra edges in commit graph"), - ctx->oids.nr); - for (i = 0; i < ctx->oids.nr; i++) { - int num_parents = 0; - display_progress(ctx->progress, i + 1); - if (i > 0 && oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i])) - continue; - - ctx->commits.list[ctx->commits.nr] = lookup_commit(ctx->r, &ctx->oids.list[i]); - parse_commit_no_graph(ctx->commits.list[ctx->commits.nr]); - - for (parent = ctx->commits.list[ctx->commits.nr]->parents; - parent; parent = parent->next) - num_parents++; - - if (num_parents > 2) - ctx->num_extra_edges += num_parents - 1; - - ctx->commits.nr++; - } - stop_progress(&ctx->progress); + copy_oids_to_commits(ctx); if (ctx->commits.nr >= GRAPH_EDGE_LAST_MASK) { error(_("too many commits to write graph")); From 238def57fe7ba219c9e50094512fff1068ac5413 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 12 Jun 2019 06:29:45 -0700 Subject: [PATCH 11/32] commit-graph: extract write_commit_graph_file() The write_commit_graph() method is too complex, so we are extracting helper functions one by one. Extract write_commit_graph_file() that takes all of the information in the context struct and writes the data to a commit-graph file. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 155 +++++++++++++++++++++++++------------------------ 1 file changed, 80 insertions(+), 75 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 72f9c5c7e20c8f..9d2c72f5b4ac0c 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1015,21 +1015,91 @@ static void copy_oids_to_commits(struct write_commit_graph_context *ctx) stop_progress(&ctx->progress); } -int write_commit_graph(const char *obj_dir, - struct string_list *pack_indexes, - struct string_list *commit_hex, - unsigned int flags) +static int write_commit_graph_file(struct write_commit_graph_context *ctx) { - struct write_commit_graph_context *ctx; + uint32_t i; struct hashfile *f; - uint32_t i, count_distinct = 0; - char *graph_name = NULL; struct lock_file lk = LOCK_INIT; uint32_t chunk_ids[5]; uint64_t chunk_offsets[5]; - int num_chunks; const unsigned hashsz = the_hash_algo->rawsz; struct strbuf progress_title = STRBUF_INIT; + int num_chunks = ctx->num_extra_edges ? 4 : 3; + + ctx->graph_name = get_commit_graph_filename(ctx->obj_dir); + if (safe_create_leading_directories(ctx->graph_name)) { + UNLEAK(ctx->graph_name); + error(_("unable to create leading directories of %s"), + ctx->graph_name); + return -1; + } + + hold_lock_file_for_update(&lk, ctx->graph_name, LOCK_DIE_ON_ERROR); + f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf); + + hashwrite_be32(f, GRAPH_SIGNATURE); + + hashwrite_u8(f, GRAPH_VERSION); + hashwrite_u8(f, oid_version()); + hashwrite_u8(f, num_chunks); + hashwrite_u8(f, 0); /* unused padding byte */ + + chunk_ids[0] = GRAPH_CHUNKID_OIDFANOUT; + chunk_ids[1] = GRAPH_CHUNKID_OIDLOOKUP; + chunk_ids[2] = GRAPH_CHUNKID_DATA; + if (ctx->num_extra_edges) + chunk_ids[3] = GRAPH_CHUNKID_EXTRAEDGES; + else + chunk_ids[3] = 0; + chunk_ids[4] = 0; + + chunk_offsets[0] = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH; + chunk_offsets[1] = chunk_offsets[0] + GRAPH_FANOUT_SIZE; + chunk_offsets[2] = chunk_offsets[1] + hashsz * ctx->commits.nr; + chunk_offsets[3] = chunk_offsets[2] + (hashsz + 16) * ctx->commits.nr; + chunk_offsets[4] = chunk_offsets[3] + 4 * ctx->num_extra_edges; + + for (i = 0; i <= num_chunks; i++) { + uint32_t chunk_write[3]; + + chunk_write[0] = htonl(chunk_ids[i]); + chunk_write[1] = htonl(chunk_offsets[i] >> 32); + chunk_write[2] = htonl(chunk_offsets[i] & 0xffffffff); + hashwrite(f, chunk_write, 12); + } + + if (ctx->report_progress) { + strbuf_addf(&progress_title, + Q_("Writing out commit graph in %d pass", + "Writing out commit graph in %d passes", + num_chunks), + num_chunks); + ctx->progress = start_delayed_progress( + progress_title.buf, + num_chunks * ctx->commits.nr); + } + write_graph_chunk_fanout(f, ctx); + write_graph_chunk_oids(f, hashsz, ctx); + write_graph_chunk_data(f, hashsz, ctx); + if (ctx->num_extra_edges) + write_graph_chunk_extra_edges(f, ctx); + stop_progress(&ctx->progress); + strbuf_release(&progress_title); + + close_commit_graph(ctx->r); + finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC); + commit_lock_file(&lk); + + return 0; +} + +int write_commit_graph(const char *obj_dir, + struct string_list *pack_indexes, + struct string_list *commit_hex, + unsigned int flags) +{ + struct write_commit_graph_context *ctx; + uint32_t i, count_distinct = 0; int res = 0; if (!commit_graph_compatible(the_repository)) @@ -1096,75 +1166,10 @@ int write_commit_graph(const char *obj_dir, compute_generation_numbers(ctx); - num_chunks = ctx->num_extra_edges ? 4 : 3; - - ctx->graph_name = get_commit_graph_filename(ctx->obj_dir); - if (safe_create_leading_directories(ctx->graph_name)) { - UNLEAK(ctx->graph_name); - error(_("unable to create leading directories of %s"), - ctx->graph_name); - res = -1; - goto cleanup; - } - - hold_lock_file_for_update(&lk, ctx->graph_name, LOCK_DIE_ON_ERROR); - f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf); - - hashwrite_be32(f, GRAPH_SIGNATURE); - - hashwrite_u8(f, GRAPH_VERSION); - hashwrite_u8(f, oid_version()); - hashwrite_u8(f, num_chunks); - hashwrite_u8(f, 0); /* unused padding byte */ - - chunk_ids[0] = GRAPH_CHUNKID_OIDFANOUT; - chunk_ids[1] = GRAPH_CHUNKID_OIDLOOKUP; - chunk_ids[2] = GRAPH_CHUNKID_DATA; - if (ctx->num_extra_edges) - chunk_ids[3] = GRAPH_CHUNKID_EXTRAEDGES; - else - chunk_ids[3] = 0; - chunk_ids[4] = 0; - - chunk_offsets[0] = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH; - chunk_offsets[1] = chunk_offsets[0] + GRAPH_FANOUT_SIZE; - chunk_offsets[2] = chunk_offsets[1] + hashsz * ctx->commits.nr; - chunk_offsets[3] = chunk_offsets[2] + (hashsz + 16) * ctx->commits.nr; - chunk_offsets[4] = chunk_offsets[3] + 4 * ctx->num_extra_edges; - - for (i = 0; i <= num_chunks; i++) { - uint32_t chunk_write[3]; - - chunk_write[0] = htonl(chunk_ids[i]); - chunk_write[1] = htonl(chunk_offsets[i] >> 32); - chunk_write[2] = htonl(chunk_offsets[i] & 0xffffffff); - hashwrite(f, chunk_write, 12); - } - - if (ctx->report_progress) { - strbuf_addf(&progress_title, - Q_("Writing out commit graph in %d pass", - "Writing out commit graph in %d passes", - num_chunks), - num_chunks); - ctx->progress = start_delayed_progress( - progress_title.buf, - num_chunks * ctx->commits.nr); - } - write_graph_chunk_fanout(f, ctx); - write_graph_chunk_oids(f, hashsz, ctx); - write_graph_chunk_data(f, hashsz, ctx); - if (ctx->num_extra_edges) - write_graph_chunk_extra_edges(f, ctx); - stop_progress(&ctx->progress); - strbuf_release(&progress_title); - - close_commit_graph(ctx->r); - finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC); - commit_lock_file(&lk); + res = write_commit_graph_file(ctx); cleanup: - free(graph_name); + free(ctx->graph_name); free(ctx->commits.list); free(ctx->oids.list); free(ctx); From c3a3a964b29221a9b5fa305a08037c90b9f74be0 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 17 May 2019 11:41:47 -0700 Subject: [PATCH 12/32] commit-graph: use raw_object_store when closing The close_commit_graph() method took a repository struct, but then only uses the raw_object_store within. Change the function prototype to make the method more flexible. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 8 ++++---- commit-graph.h | 2 +- upload-pack.c | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 9d2c72f5b4ac0c..76d189de454161 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -359,10 +359,10 @@ int generation_numbers_enabled(struct repository *r) return !!first_generation; } -void close_commit_graph(struct repository *r) +void close_commit_graph(struct raw_object_store *o) { - free_commit_graph(r->objects->commit_graph); - r->objects->commit_graph = NULL; + free_commit_graph(o->commit_graph); + o->commit_graph = NULL; } static int bsearch_graph(struct commit_graph *g, struct object_id *oid, uint32_t *pos) @@ -1086,7 +1086,7 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) stop_progress(&ctx->progress); strbuf_release(&progress_title); - close_commit_graph(ctx->r); + close_commit_graph(ctx->r->objects); finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC); commit_lock_file(&lk); diff --git a/commit-graph.h b/commit-graph.h index 01538b5cf59986..390c7f696104fb 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -82,7 +82,7 @@ int write_commit_graph(const char *obj_dir, int verify_commit_graph(struct repository *r, struct commit_graph *g); -void close_commit_graph(struct repository *); +void close_commit_graph(struct raw_object_store *); void free_commit_graph(struct commit_graph *); #endif diff --git a/upload-pack.c b/upload-pack.c index d098ef598289d1..b51bed21e4059b 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -717,7 +717,7 @@ static void deepen_by_rev_list(struct packet_writer *writer, int ac, { struct commit_list *result; - close_commit_graph(the_repository); + close_commit_graph(the_repository->objects); result = get_shallow_commits_by_rev_list(ac, av, SHALLOW, NOT_SHALLOW); send_shallow(writer, result); free_commit_list(result); From 5472c32c3724e1404c3d80368edc28910969c29a Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 17 May 2019 11:41:48 -0700 Subject: [PATCH 13/32] packfile: close commit-graph in close_all_packs The close_all_packs() method is used to close all read handles to pack-files and the multi-pack-index before running 'git gc --auto'. This is particularly important on the Windows platform, where read handles block any writes to those files. Replacing one of these files with a rename() will fail in this situation. The commit-graph also performs a rename, so is susceptable to this problem. We are careful to close the commit-graph before writing, but that doesn't work when a 'git fetch' (or similar) process runs 'git gc --auto' which may write a commit-graph. Here, close the commit-graph as part of close_all_packs(). Reported-by: Johannes Schindelin Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- packfile.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/packfile.c b/packfile.c index 16bcb75262d918..ce12bffe3e911c 100644 --- a/packfile.c +++ b/packfile.c @@ -16,6 +16,7 @@ #include "tree.h" #include "object-store.h" #include "midx.h" +#include "commit-graph.h" char *odb_pack_name(struct strbuf *buf, const unsigned char *sha1, @@ -350,6 +351,8 @@ void close_all_packs(struct raw_object_store *o) close_midx(o->multi_pack_index); o->multi_pack_index = NULL; } + + close_commit_graph(o); } /* From 2d511cfc0bfe1d2b98ba8b272ddd9ba83e84e5f8 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 17 May 2019 11:41:49 -0700 Subject: [PATCH 14/32] packfile: rename close_all_packs to close_object_store The close_all_packs() method is now responsible for more than just pack-files. It also closes the commit-graph and the multi-pack-index. Rename the function to be more descriptive of its larger role. The name also fits because the input parameter is a raw_object_store. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/am.c | 2 +- builtin/clone.c | 2 +- builtin/fetch.c | 2 +- builtin/gc.c | 4 ++-- builtin/merge.c | 2 +- builtin/rebase.c | 2 +- builtin/receive-pack.c | 2 +- builtin/repack.c | 2 +- object.c | 2 +- packfile.c | 2 +- packfile.h | 2 +- 11 files changed, 12 insertions(+), 12 deletions(-) diff --git a/builtin/am.c b/builtin/am.c index 58a2aef28bbcd7..9315d32d2a0bee 100644 --- a/builtin/am.c +++ b/builtin/am.c @@ -1800,7 +1800,7 @@ static void am_run(struct am_state *state, int resume) */ if (!state->rebasing) { am_destroy(state); - close_all_packs(the_repository->objects); + close_object_store(the_repository->objects); run_command_v_opt(argv_gc_auto, RUN_GIT_CMD); } } diff --git a/builtin/clone.c b/builtin/clone.c index 50bde9961809b1..82ce682c8065a4 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -1240,7 +1240,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix) transport_disconnect(transport); if (option_dissociate) { - close_all_packs(the_repository->objects); + close_object_store(the_repository->objects); dissociate_from_references(); } diff --git a/builtin/fetch.c b/builtin/fetch.c index b620fd54b4e95c..3aec95608ffafd 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -1670,7 +1670,7 @@ int cmd_fetch(int argc, const char **argv, const char *prefix) string_list_clear(&list, 0); - close_all_packs(the_repository->objects); + close_object_store(the_repository->objects); argv_array_pushl(&argv_gc_auto, "gc", "--auto", NULL); if (verbosity < 0) diff --git a/builtin/gc.c b/builtin/gc.c index df2573f124d5fd..20c8f1bfe8024d 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -632,7 +632,7 @@ int cmd_gc(int argc, const char **argv, const char *prefix) gc_before_repack(); if (!repository_format_precious_objects) { - close_all_packs(the_repository->objects); + close_object_store(the_repository->objects); if (run_command_v_opt(repack.argv, RUN_GIT_CMD)) die(FAILED_RUN, repack.argv[0]); @@ -660,7 +660,7 @@ int cmd_gc(int argc, const char **argv, const char *prefix) report_garbage = report_pack_garbage; reprepare_packed_git(the_repository); if (pack_garbage.nr > 0) { - close_all_packs(the_repository->objects); + close_object_store(the_repository->objects); clean_pack_garbage(); } diff --git a/builtin/merge.c b/builtin/merge.c index e47d77baeebe88..72d7a7c909080c 100644 --- a/builtin/merge.c +++ b/builtin/merge.c @@ -449,7 +449,7 @@ static void finish(struct commit *head_commit, * We ignore errors in 'gc --auto', since the * user should see them. */ - close_all_packs(the_repository->objects); + close_object_store(the_repository->objects); run_command_v_opt(argv_gc_auto, RUN_GIT_CMD); } } diff --git a/builtin/rebase.c b/builtin/rebase.c index 7c7bc13e912aa8..ed30fcd63334c1 100644 --- a/builtin/rebase.c +++ b/builtin/rebase.c @@ -328,7 +328,7 @@ static int finish_rebase(struct rebase_options *opts) delete_ref(NULL, "REBASE_HEAD", NULL, REF_NO_DEREF); apply_autostash(opts); - close_all_packs(the_repository->objects); + close_object_store(the_repository->objects); /* * We ignore errors in 'gc --auto', since the * user should see them. diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index d58b7750b6b465..92cd1f508c1979 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -2032,7 +2032,7 @@ int cmd_receive_pack(int argc, const char **argv, const char *prefix) proc.git_cmd = 1; proc.argv = argv_gc_auto; - close_all_packs(the_repository->objects); + close_object_store(the_repository->objects); if (!start_command(&proc)) { if (use_sideband) copy_to_sideband(proc.err, -1, NULL); diff --git a/builtin/repack.c b/builtin/repack.c index 67f8978043a439..4de8b6600c4523 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -419,7 +419,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix) if (!names.nr && !po_args.quiet) printf_ln(_("Nothing new to pack.")); - close_all_packs(the_repository->objects); + close_object_store(the_repository->objects); /* * Ok we have prepared all new packfiles. diff --git a/object.c b/object.c index e81d47a79cd6eb..cf1a2b708612a0 100644 --- a/object.c +++ b/object.c @@ -517,7 +517,7 @@ void raw_object_store_clear(struct raw_object_store *o) o->loaded_alternates = 0; INIT_LIST_HEAD(&o->packed_git_mru); - close_all_packs(o); + close_object_store(o); o->packed_git = NULL; } diff --git a/packfile.c b/packfile.c index ce12bffe3e911c..017046fcf956fd 100644 --- a/packfile.c +++ b/packfile.c @@ -337,7 +337,7 @@ void close_pack(struct packed_git *p) close_pack_index(p); } -void close_all_packs(struct raw_object_store *o) +void close_object_store(struct raw_object_store *o) { struct packed_git *p; diff --git a/packfile.h b/packfile.h index d70c6d9afb94c7..e95e389eb804e5 100644 --- a/packfile.h +++ b/packfile.h @@ -81,7 +81,7 @@ extern uint32_t get_pack_fanout(struct packed_git *p, uint32_t value); extern unsigned char *use_pack(struct packed_git *, struct pack_window **, off_t, unsigned long *); extern void close_pack_windows(struct packed_git *); extern void close_pack(struct packed_git *); -extern void close_all_packs(struct raw_object_store *o); +extern void close_object_store(struct raw_object_store *o); extern void unuse_pack(struct pack_window **); extern void clear_delta_base_cache(void); extern struct packed_git *add_packed_git(const char *path, size_t path_len, int local); From 890345ac106f78dfcfcde002de39ebbb410e0b39 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:23 -0700 Subject: [PATCH 15/32] commit-graph: document commit-graph chains Add a basic description of commit-graph chains. More details about the feature will be added as we add functionality. This introduction gives a high-level overview to the goals of the feature and the basic layout of commit-graph chains. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/technical/commit-graph.txt | 59 ++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/Documentation/technical/commit-graph.txt b/Documentation/technical/commit-graph.txt index fb53341d5ee311..1dca3bd8fe90a1 100644 --- a/Documentation/technical/commit-graph.txt +++ b/Documentation/technical/commit-graph.txt @@ -127,6 +127,65 @@ Design Details helpful for these clones, anyway. The commit-graph will not be read or written when shallow commits are present. +Commit Graphs Chains +-------------------- + +Typically, repos grow with near-constant velocity (commits per day). Over time, +the number of commits added by a fetch operation is much smaller than the +number of commits in the full history. By creating a "chain" of commit-graphs, +we enable fast writes of new commit data without rewriting the entire commit +history -- at least, most of the time. + +## File Layout + +A commit-graph chain uses multiple files, and we use a fixed naming convention +to organize these files. Each commit-graph file has a name +`$OBJDIR/info/commit-graphs/graph-{hash}.graph` where `{hash}` is the hex- +valued hash stored in the footer of that file (which is a hash of the file's +contents before that hash). For a chain of commit-graph files, a plain-text +file at `$OBJDIR/info/commit-graphs/commit-graph-chain` contains the +hashes for the files in order from "lowest" to "highest". + +For example, if the `commit-graph-chain` file contains the lines + +``` + {hash0} + {hash1} + {hash2} +``` + +then the commit-graph chain looks like the following diagram: + + +-----------------------+ + | graph-{hash2}.graph | + +-----------------------+ + | + +-----------------------+ + | | + | graph-{hash1}.graph | + | | + +-----------------------+ + | + +-----------------------+ + | | + | | + | | + | graph-{hash0}.graph | + | | + | | + | | + +-----------------------+ + +Let X0 be the number of commits in `graph-{hash0}.graph`, X1 be the number of +commits in `graph-{hash1}.graph`, and X2 be the number of commits in +`graph-{hash2}.graph`. If a commit appears in position i in `graph-{hash2}.graph`, +then we interpret this as being the commit in position (X0 + X1 + i), and that +will be used as its "graph position". The commits in `graph-{hash2}.graph` use these +positions to refer to their parents, which may be in `graph-{hash1}.graph` or +`graph-{hash0}.graph`. We can navigate to an arbitrary commit in position j by checking +its containment in the intervals [0, X0), [X0, X0 + X1), [X0 + X1, X0 + X1 + +X2). + Related Links ------------- [0] https://bugs.chromium.org/p/git/issues/detail?id=8 From d4f4d60f6da8b93f768b2b4958c04cc1b3cea443 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:24 -0700 Subject: [PATCH 16/32] commit-graph: prepare for commit-graph chains To prepare for a chain of commit-graph files, augment the commit_graph struct to point to a base commit_graph. As we load commits from the graph, we may actually want to read from a base file according to the graph position. The "graph position" of a commit is given by concatenating the lexicographic commit orders from each of the commit-graph files in the chain. This means that we must distinguish two values: * lexicographic index : the position within the lexicographic order in a single commit-graph file. * graph position: the position within the concatenated order of multiple commit-graph files Given the lexicographic index of a commit in a graph, we can compute the graph position by adding the number of commits in the lower-level graphs. To find the lexicographic index of a commit, we subtract the number of commits in lower-level graphs. While here, change insert_parent_or_die() to take a uint32_t position, as that is the type used by its only caller and that makes more sense with the limits in the commit-graph format. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 89 +++++++++++++++++++++++++++++++++++++++++++------- commit-graph.h | 3 ++ 2 files changed, 81 insertions(+), 11 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 76d189de454161..8f5c09363c4a54 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -359,9 +359,18 @@ int generation_numbers_enabled(struct repository *r) return !!first_generation; } +static void close_commit_graph_one(struct commit_graph *g) +{ + if (!g) + return; + + close_commit_graph_one(g->base_graph); + free_commit_graph(g); +} + void close_commit_graph(struct raw_object_store *o) { - free_commit_graph(o->commit_graph); + close_commit_graph_one(o->commit_graph); o->commit_graph = NULL; } @@ -371,18 +380,38 @@ static int bsearch_graph(struct commit_graph *g, struct object_id *oid, uint32_t g->chunk_oid_lookup, g->hash_len, pos); } +static void load_oid_from_graph(struct commit_graph *g, + uint32_t pos, + struct object_id *oid) +{ + uint32_t lex_index; + + while (g && pos < g->num_commits_in_base) + g = g->base_graph; + + if (!g) + BUG("NULL commit-graph"); + + if (pos >= g->num_commits + g->num_commits_in_base) + die(_("invalid commit position. commit-graph is likely corrupt")); + + lex_index = pos - g->num_commits_in_base; + + hashcpy(oid->hash, g->chunk_oid_lookup + g->hash_len * lex_index); +} + static struct commit_list **insert_parent_or_die(struct repository *r, struct commit_graph *g, - uint64_t pos, + uint32_t pos, struct commit_list **pptr) { struct commit *c; struct object_id oid; - if (pos >= g->num_commits) - die("invalid parent position %"PRIu64, pos); + if (pos >= g->num_commits + g->num_commits_in_base) + die("invalid parent position %"PRIu32, pos); - hashcpy(oid.hash, g->chunk_oid_lookup + g->hash_len * pos); + load_oid_from_graph(g, pos, &oid); c = lookup_commit(r, &oid); if (!c) die(_("could not find commit %s"), oid_to_hex(&oid)); @@ -392,7 +421,14 @@ static struct commit_list **insert_parent_or_die(struct repository *r, static void fill_commit_graph_info(struct commit *item, struct commit_graph *g, uint32_t pos) { - const unsigned char *commit_data = g->chunk_commit_data + GRAPH_DATA_WIDTH * pos; + const unsigned char *commit_data; + uint32_t lex_index; + + while (pos < g->num_commits_in_base) + g = g->base_graph; + + lex_index = pos - g->num_commits_in_base; + commit_data = g->chunk_commit_data + GRAPH_DATA_WIDTH * lex_index; item->graph_pos = pos; item->generation = get_be32(commit_data + g->hash_len + 8) >> 2; } @@ -405,10 +441,25 @@ static int fill_commit_in_graph(struct repository *r, uint32_t *parent_data_ptr; uint64_t date_low, date_high; struct commit_list **pptr; - const unsigned char *commit_data = g->chunk_commit_data + (g->hash_len + 16) * pos; + const unsigned char *commit_data; + uint32_t lex_index; - item->object.parsed = 1; + while (pos < g->num_commits_in_base) + g = g->base_graph; + + if (pos >= g->num_commits + g->num_commits_in_base) + die(_("invalid commit position. commit-graph is likely corrupt")); + + /* + * Store the "full" position, but then use the + * "local" position for the rest of the calculation. + */ item->graph_pos = pos; + lex_index = pos - g->num_commits_in_base; + + commit_data = g->chunk_commit_data + (g->hash_len + 16) * lex_index; + + item->object.parsed = 1; item->maybe_tree = NULL; @@ -452,7 +503,18 @@ static int find_commit_in_graph(struct commit *item, struct commit_graph *g, uin *pos = item->graph_pos; return 1; } else { - return bsearch_graph(g, &(item->object.oid), pos); + struct commit_graph *cur_g = g; + uint32_t lex_index; + + while (cur_g && !bsearch_graph(cur_g, &(item->object.oid), &lex_index)) + cur_g = cur_g->base_graph; + + if (cur_g) { + *pos = lex_index + cur_g->num_commits_in_base; + return 1; + } + + return 0; } } @@ -492,8 +554,13 @@ static struct tree *load_tree_for_commit(struct repository *r, struct commit *c) { struct object_id oid; - const unsigned char *commit_data = g->chunk_commit_data + - GRAPH_DATA_WIDTH * (c->graph_pos); + const unsigned char *commit_data; + + while (c->graph_pos < g->num_commits_in_base) + g = g->base_graph; + + commit_data = g->chunk_commit_data + + GRAPH_DATA_WIDTH * (c->graph_pos - g->num_commits_in_base); hashcpy(oid.hash, commit_data); c->maybe_tree = lookup_tree(r, &oid); diff --git a/commit-graph.h b/commit-graph.h index 390c7f696104fb..5819910a5b22d9 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -48,6 +48,9 @@ struct commit_graph { uint32_t num_commits; struct object_id oid; + uint32_t num_commits_in_base; + struct commit_graph *base_graph; + const uint32_t *chunk_oid_fanout; const unsigned char *chunk_oid_lookup; const unsigned char *chunk_commit_data; From 3cbc6ed3ee02ab7c8254d51528ca75644c52d52d Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:24 -0700 Subject: [PATCH 17/32] commit-graph: rename commit_compare to oid_compare The helper function commit_compare() actually compares object_id structs, not commits. A future change to commit-graph.c will need to sort commit structs, so rename this function in advance. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 8f5c09363c4a54..96b07a674e4d65 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -770,7 +770,7 @@ static void write_graph_chunk_extra_edges(struct hashfile *f, } } -static int commit_compare(const void *_a, const void *_b) +static int oid_compare(const void *_a, const void *_b) { const struct object_id *a = (const struct object_id *)_a; const struct object_id *b = (const struct object_id *)_b; @@ -1039,7 +1039,7 @@ static uint32_t count_distinct_commits(struct write_commit_graph_context *ctx) _("Counting distinct commits in commit graph"), ctx->oids.nr); display_progress(ctx->progress, 0); /* TODO: Measure QSORT() progress */ - QSORT(ctx->oids.list, ctx->oids.nr, commit_compare); + QSORT(ctx->oids.list, ctx->oids.nr, oid_compare); for (i = 1; i < ctx->oids.nr; i++) { display_progress(ctx->progress, i + 1); From 5c84b3396c73382eb49285b4e4bbb1bf82be3a9c Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:25 -0700 Subject: [PATCH 18/32] commit-graph: load commit-graph chains Prepare the logic for reading a chain of commit-graphs. First, look for a file at $OBJDIR/info/commit-graph. If it exists, then use that file and stop. Next, look for the chain file at $OBJDIR/info/commit-graphs/commit-graph-chain. If this file exists, then load the hash values as line-separated values in that file and load $OBJDIR/info/commit-graphs/graph-{hash[i]}.graph for each hash[i] in that file. The file is given in order, so the first hash corresponds to the "base" file and the final hash corresponds to the "tip" file. This implementation assumes that all of the graph-{hash}.graph files are in the same object directory as the commit-graph-chain file. This will be updated in a future change. This change is purposefully simple so we can isolate the different concerns. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 112 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 106 insertions(+), 6 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 96b07a674e4d65..f7dfc6aecd4689 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -45,6 +45,19 @@ char *get_commit_graph_filename(const char *obj_dir) return xstrfmt("%s/info/commit-graph", obj_dir); } +static char *get_split_graph_filename(const char *obj_dir, + const char *oid_hex) +{ + return xstrfmt("%s/info/commit-graphs/graph-%s.graph", + obj_dir, + oid_hex); +} + +static char *get_chain_filename(const char *obj_dir) +{ + return xstrfmt("%s/info/commit-graphs/commit-graph-chain", obj_dir); +} + static uint8_t oid_version(void) { return 1; @@ -286,18 +299,105 @@ static struct commit_graph *load_commit_graph_one(const char *graph_file) return load_commit_graph_one_fd_st(fd, &st); } +static struct commit_graph *load_commit_graph_v1(struct repository *r, const char *obj_dir) +{ + char *graph_name = get_commit_graph_filename(obj_dir); + struct commit_graph *g = load_commit_graph_one(graph_name); + free(graph_name); + + return g; +} + +static int add_graph_to_chain(struct commit_graph *g, + struct commit_graph *chain, + struct object_id *oids, + int n) +{ + struct commit_graph *cur_g = chain; + + while (n) { + n--; + cur_g = cur_g->base_graph; + } + + g->base_graph = chain; + + if (chain) + g->num_commits_in_base = chain->num_commits + chain->num_commits_in_base; + + return 1; +} + +static struct commit_graph *load_commit_graph_chain(struct repository *r, const char *obj_dir) +{ + struct commit_graph *graph_chain = NULL; + struct strbuf line = STRBUF_INIT; + struct stat st; + struct object_id *oids; + int i = 0, valid = 1, count; + char *chain_name = get_chain_filename(obj_dir); + FILE *fp; + int stat_res; + + fp = fopen(chain_name, "r"); + stat_res = stat(chain_name, &st); + free(chain_name); + + if (!fp || + stat_res || + st.st_size <= the_hash_algo->hexsz) + return NULL; + + count = st.st_size / (the_hash_algo->hexsz + 1); + oids = xcalloc(count, sizeof(struct object_id)); + + for (i = 0; i < count && valid; i++) { + char *graph_name; + struct commit_graph *g; + + if (strbuf_getline_lf(&line, fp) == EOF) + break; + + if (get_oid_hex(line.buf, &oids[i])) { + warning(_("invalid commit-graph chain: line '%s' not a hash"), + line.buf); + valid = 0; + break; + } + + graph_name = get_split_graph_filename(obj_dir, line.buf); + g = load_commit_graph_one(graph_name); + free(graph_name); + + if (g && add_graph_to_chain(g, graph_chain, oids, i)) + graph_chain = g; + else + valid = 0; + } + + free(oids); + fclose(fp); + + return graph_chain; +} + +static struct commit_graph *read_commit_graph_one(struct repository *r, const char *obj_dir) +{ + struct commit_graph *g = load_commit_graph_v1(r, obj_dir); + + if (!g) + g = load_commit_graph_chain(r, obj_dir); + + return g; +} + static void prepare_commit_graph_one(struct repository *r, const char *obj_dir) { - char *graph_name; if (r->objects->commit_graph) return; - graph_name = get_commit_graph_filename(obj_dir); - r->objects->commit_graph = - load_commit_graph_one(graph_name); - - FREE_AND_NULL(graph_name); + r->objects->commit_graph = read_commit_graph_one(r, obj_dir); } /* From 118bd570029f5610abdbf1a220e87d3a5c241f5f Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:26 -0700 Subject: [PATCH 19/32] commit-graph: add base graphs chunk To quickly verify a commit-graph chain is valid on load, we will read from the new "Base Graphs Chunk" of each file in the chain. This will prevent accidentally loading incorrect data from manually editing the commit-graph-chain file or renaming graph-{hash}.graph files. The commit_graph struct already had an object_id struct "oid", but it was never initialized or used. Add a line to read the hash from the end of the commit-graph file and into the oid member. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- .../technical/commit-graph-format.txt | 11 ++++++++-- commit-graph.c | 22 +++++++++++++++++++ commit-graph.h | 1 + 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/Documentation/technical/commit-graph-format.txt b/Documentation/technical/commit-graph-format.txt index 16452a0504c8fa..a4f17441aed30f 100644 --- a/Documentation/technical/commit-graph-format.txt +++ b/Documentation/technical/commit-graph-format.txt @@ -44,8 +44,9 @@ HEADER: 1-byte number (C) of "chunks" - 1-byte (reserved for later use) - Current clients should ignore this value. + 1-byte number (B) of base commit-graphs + We infer the length (H*B) of the Base Graphs chunk + from this value. CHUNK LOOKUP: @@ -92,6 +93,12 @@ CHUNK DATA: positions for the parents until reaching a value with the most-significant bit on. The other bits correspond to the position of the last parent. + Base Graphs List (ID: {'B', 'A', 'S', 'E'}) [Optional] + This list of H-byte hashes describe a set of B commit-graph files that + form a commit-graph chain. The graph position for the ith commit in this + file's OID Lookup chunk is equal to i plus the number of commits in all + base graphs. If B is non-zero, this chunk must exist. + TRAILER: H-byte HASH-checksum of all of the above. diff --git a/commit-graph.c b/commit-graph.c index f7dfc6aecd4689..6f7961d0716f2c 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -22,6 +22,7 @@ #define GRAPH_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */ #define GRAPH_CHUNKID_DATA 0x43444154 /* "CDAT" */ #define GRAPH_CHUNKID_EXTRAEDGES 0x45444745 /* "EDGE" */ +#define GRAPH_CHUNKID_BASE 0x42415345 /* "BASE" */ #define GRAPH_DATA_WIDTH (the_hash_algo->rawsz + 16) @@ -262,6 +263,12 @@ struct commit_graph *parse_commit_graph(void *graph_map, int fd, else graph->chunk_extra_edges = data + chunk_offset; break; + + case GRAPH_CHUNKID_BASE: + if (graph->chunk_base_graphs) + chunk_repeated = 1; + else + graph->chunk_base_graphs = data + chunk_offset; } if (chunk_repeated) { @@ -280,6 +287,8 @@ struct commit_graph *parse_commit_graph(void *graph_map, int fd, last_chunk_offset = chunk_offset; } + hashcpy(graph->oid.hash, graph->data + graph->data_len - graph->hash_len); + if (verify_commit_graph_lite(graph)) return NULL; @@ -315,8 +324,21 @@ static int add_graph_to_chain(struct commit_graph *g, { struct commit_graph *cur_g = chain; + if (n && !g->chunk_base_graphs) { + warning(_("commit-graph has no base graphs chunk")); + return 0; + } + while (n) { n--; + + if (!cur_g || + !oideq(&oids[n], &cur_g->oid) || + !hasheq(oids[n].hash, g->chunk_base_graphs + g->hash_len * n)) { + warning(_("commit-graph chain does not match")); + return 0; + } + cur_g = cur_g->base_graph; } diff --git a/commit-graph.h b/commit-graph.h index 5819910a5b22d9..6e7d42cf3207ad 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -55,6 +55,7 @@ struct commit_graph { const unsigned char *chunk_oid_lookup; const unsigned char *chunk_commit_data; const unsigned char *chunk_extra_edges; + const unsigned char *chunk_base_graphs; }; struct commit_graph *load_commit_graph_one_fd_st(int fd, struct stat *st); From 144354b054c99b2231be6bae517eed5f8da55268 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:27 -0700 Subject: [PATCH 20/32] commit-graph: rearrange chunk count logic The number of chunks in a commit-graph file can change depending on whether we need the Extra Edges Chunk. We are going to add more optional chunks, and it will be helpful to rearrange this logic around the chunk count before doing so. Specifically, we need to finalize the number of chunks before writing the commit-graph header. Further, we also need to fill out the chunk lookup table dynamically and using "num_chunks" as we add optional chunks is useful for adding optional chunks in the future. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 6f7961d0716f2c..f2163e109f593e 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1213,7 +1213,7 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) uint64_t chunk_offsets[5]; const unsigned hashsz = the_hash_algo->rawsz; struct strbuf progress_title = STRBUF_INIT; - int num_chunks = ctx->num_extra_edges ? 4 : 3; + int num_chunks = 3; ctx->graph_name = get_commit_graph_filename(ctx->obj_dir); if (safe_create_leading_directories(ctx->graph_name)) { @@ -1226,27 +1226,34 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) hold_lock_file_for_update(&lk, ctx->graph_name, LOCK_DIE_ON_ERROR); f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf); - hashwrite_be32(f, GRAPH_SIGNATURE); - - hashwrite_u8(f, GRAPH_VERSION); - hashwrite_u8(f, oid_version()); - hashwrite_u8(f, num_chunks); - hashwrite_u8(f, 0); /* unused padding byte */ - chunk_ids[0] = GRAPH_CHUNKID_OIDFANOUT; chunk_ids[1] = GRAPH_CHUNKID_OIDLOOKUP; chunk_ids[2] = GRAPH_CHUNKID_DATA; - if (ctx->num_extra_edges) - chunk_ids[3] = GRAPH_CHUNKID_EXTRAEDGES; - else - chunk_ids[3] = 0; - chunk_ids[4] = 0; + if (ctx->num_extra_edges) { + chunk_ids[num_chunks] = GRAPH_CHUNKID_EXTRAEDGES; + num_chunks++; + } + + chunk_ids[num_chunks] = 0; chunk_offsets[0] = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH; chunk_offsets[1] = chunk_offsets[0] + GRAPH_FANOUT_SIZE; chunk_offsets[2] = chunk_offsets[1] + hashsz * ctx->commits.nr; chunk_offsets[3] = chunk_offsets[2] + (hashsz + 16) * ctx->commits.nr; - chunk_offsets[4] = chunk_offsets[3] + 4 * ctx->num_extra_edges; + + num_chunks = 3; + if (ctx->num_extra_edges) { + chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] + + 4 * ctx->num_extra_edges; + num_chunks++; + } + + hashwrite_be32(f, GRAPH_SIGNATURE); + + hashwrite_u8(f, GRAPH_VERSION); + hashwrite_u8(f, oid_version()); + hashwrite_u8(f, num_chunks); + hashwrite_u8(f, 0); for (i = 0; i <= num_chunks; i++) { uint32_t chunk_write[3]; From 6c622f9f0bbb38a23341dc4294f56d0d909b3d50 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:27 -0700 Subject: [PATCH 21/32] commit-graph: write commit-graph chains Extend write_commit_graph() to write a commit-graph chain when given the COMMIT_GRAPH_SPLIT flag. This implementation is purposefully simplistic in how it creates a new chain. The commits not already in the chain are added to a new tip commit-graph file. Much of the logic around writing a graph-{hash}.graph file and updating the commit-graph-chain file is the same as the commit-graph file case. However, there are several places where we need to do some extra logic in the split case. Track the list of graph filenames before and after the planned write. This will be more important when we start merging graph files, but it also allows us to upgrade our commit-graph file to the appropriate graph-{hash}.graph file when we upgrade to a chain of commit-graphs. Note that we use the eighth byte of the commit-graph header to store the number of base graph files. This determines the length of the base graphs chunk. A subtle change of behavior with the new logic is that we do not write a commit-graph if we our commit list is empty. This extends to the typical case, which is reflected in t5318-commit-graph.sh. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 286 ++++++++++++++++++++++++++++++++++++++-- commit-graph.h | 2 + t/t5318-commit-graph.sh | 2 +- 3 files changed, 278 insertions(+), 12 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index f2163e109f593e..f0698b05996757 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -300,12 +300,18 @@ static struct commit_graph *load_commit_graph_one(const char *graph_file) struct stat st; int fd; + struct commit_graph *g; int open_ok = open_commit_graph(graph_file, &fd, &st); if (!open_ok) return NULL; - return load_commit_graph_one_fd_st(fd, &st); + g = load_commit_graph_one_fd_st(fd, &st); + + if (g) + g->filename = xstrdup(graph_file); + + return g; } static struct commit_graph *load_commit_graph_v1(struct repository *r, const char *obj_dir) @@ -730,8 +736,19 @@ struct write_commit_graph_context { struct progress *progress; int progress_done; uint64_t progress_cnt; + + char *base_graph_name; + int num_commit_graphs_before; + int num_commit_graphs_after; + char **commit_graph_filenames_before; + char **commit_graph_filenames_after; + char **commit_graph_hash_after; + uint32_t new_num_commits_in_base; + struct commit_graph *new_base_graph; + unsigned append:1, - report_progress:1; + report_progress:1, + split:1; }; static void write_graph_chunk_fanout(struct hashfile *f, @@ -801,6 +818,16 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, ctx->commits.nr, commit_to_sha1); + if (edge_value >= 0) + edge_value += ctx->new_num_commits_in_base; + else { + uint32_t pos; + if (find_commit_in_graph(parent->item, + ctx->new_base_graph, + &pos)) + edge_value = pos; + } + if (edge_value < 0) BUG("missing parent %s for commit %s", oid_to_hex(&parent->item->object.oid), @@ -821,6 +848,17 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, ctx->commits.list, ctx->commits.nr, commit_to_sha1); + + if (edge_value >= 0) + edge_value += ctx->new_num_commits_in_base; + else { + uint32_t pos; + if (find_commit_in_graph(parent->item, + ctx->new_base_graph, + &pos)) + edge_value = pos; + } + if (edge_value < 0) BUG("missing parent %s for commit %s", oid_to_hex(&parent->item->object.oid), @@ -878,6 +916,16 @@ static void write_graph_chunk_extra_edges(struct hashfile *f, ctx->commits.nr, commit_to_sha1); + if (edge_value >= 0) + edge_value += ctx->new_num_commits_in_base; + else { + uint32_t pos; + if (find_commit_in_graph(parent->item, + ctx->new_base_graph, + &pos)) + edge_value = pos; + } + if (edge_value < 0) BUG("missing parent %s for commit %s", oid_to_hex(&parent->item->object.oid), @@ -969,7 +1017,13 @@ static void close_reachable(struct write_commit_graph_context *ctx) display_progress(ctx->progress, i + 1); commit = lookup_commit(ctx->r, &ctx->oids.list[i]); - if (commit && !parse_commit_no_graph(commit)) + if (!commit) + continue; + if (ctx->split) { + if (!parse_commit(commit) && + commit->graph_pos == COMMIT_NOT_FROM_GRAPH) + add_missing_parents(ctx, commit); + } else if (!parse_commit_no_graph(commit)) add_missing_parents(ctx, commit); } stop_progress(&ctx->progress); @@ -1165,8 +1219,16 @@ static uint32_t count_distinct_commits(struct write_commit_graph_context *ctx) for (i = 1; i < ctx->oids.nr; i++) { display_progress(ctx->progress, i + 1); - if (!oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i])) + if (!oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i])) { + if (ctx->split) { + struct commit *c = lookup_commit(ctx->r, &ctx->oids.list[i]); + + if (!c || c->graph_pos != COMMIT_NOT_FROM_GRAPH) + continue; + } + count_distinct++; + } } stop_progress(&ctx->progress); @@ -1189,7 +1251,13 @@ static void copy_oids_to_commits(struct write_commit_graph_context *ctx) if (i > 0 && oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i])) continue; + ALLOC_GROW(ctx->commits.list, ctx->commits.nr + 1, ctx->commits.alloc); ctx->commits.list[ctx->commits.nr] = lookup_commit(ctx->r, &ctx->oids.list[i]); + + if (ctx->split && + ctx->commits.list[ctx->commits.nr]->graph_pos != COMMIT_NOT_FROM_GRAPH) + continue; + parse_commit_no_graph(ctx->commits.list[ctx->commits.nr]); for (parent = ctx->commits.list[ctx->commits.nr]->parents; @@ -1204,18 +1272,86 @@ static void copy_oids_to_commits(struct write_commit_graph_context *ctx) stop_progress(&ctx->progress); } +static int write_graph_chunk_base_1(struct hashfile *f, + struct commit_graph *g) +{ + int num = 0; + + if (!g) + return 0; + + num = write_graph_chunk_base_1(f, g->base_graph); + hashwrite(f, g->oid.hash, the_hash_algo->rawsz); + return num + 1; +} + +static int write_graph_chunk_base(struct hashfile *f, + struct write_commit_graph_context *ctx) +{ + int num = write_graph_chunk_base_1(f, ctx->new_base_graph); + + if (num != ctx->num_commit_graphs_after - 1) { + error(_("failed to write correct number of base graph ids")); + return -1; + } + + return 0; +} + +static void init_commit_graph_chain(struct write_commit_graph_context *ctx) +{ + struct commit_graph *g = ctx->r->objects->commit_graph; + uint32_t i; + + ctx->new_base_graph = g; + ctx->base_graph_name = xstrdup(g->filename); + ctx->new_num_commits_in_base = g->num_commits + g->num_commits_in_base; + + ctx->num_commit_graphs_after = ctx->num_commit_graphs_before + 1; + + ALLOC_ARRAY(ctx->commit_graph_filenames_after, ctx->num_commit_graphs_after); + ALLOC_ARRAY(ctx->commit_graph_hash_after, ctx->num_commit_graphs_after); + + for (i = 0; i < ctx->num_commit_graphs_before - 1; i++) + ctx->commit_graph_filenames_after[i] = xstrdup(ctx->commit_graph_filenames_before[i]); + + if (ctx->num_commit_graphs_before) + ctx->commit_graph_filenames_after[ctx->num_commit_graphs_before - 1] = + get_split_graph_filename(ctx->obj_dir, oid_to_hex(&g->oid)); + + i = ctx->num_commit_graphs_before - 1; + + while (g) { + ctx->commit_graph_hash_after[i] = xstrdup(oid_to_hex(&g->oid)); + i--; + g = g->base_graph; + } +} + static int write_commit_graph_file(struct write_commit_graph_context *ctx) { uint32_t i; + int fd; struct hashfile *f; struct lock_file lk = LOCK_INIT; - uint32_t chunk_ids[5]; - uint64_t chunk_offsets[5]; + uint32_t chunk_ids[6]; + uint64_t chunk_offsets[6]; const unsigned hashsz = the_hash_algo->rawsz; struct strbuf progress_title = STRBUF_INIT; int num_chunks = 3; + struct object_id file_hash; + + if (ctx->split) { + struct strbuf tmp_file = STRBUF_INIT; + + strbuf_addf(&tmp_file, + "%s/info/commit-graphs/tmp_graph_XXXXXX", + ctx->obj_dir); + ctx->graph_name = strbuf_detach(&tmp_file, NULL); + } else { + ctx->graph_name = get_commit_graph_filename(ctx->obj_dir); + } - ctx->graph_name = get_commit_graph_filename(ctx->obj_dir); if (safe_create_leading_directories(ctx->graph_name)) { UNLEAK(ctx->graph_name); error(_("unable to create leading directories of %s"), @@ -1223,8 +1359,23 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) return -1; } - hold_lock_file_for_update(&lk, ctx->graph_name, LOCK_DIE_ON_ERROR); - f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf); + if (ctx->split) { + char *lock_name = get_chain_filename(ctx->obj_dir); + + hold_lock_file_for_update(&lk, lock_name, LOCK_DIE_ON_ERROR); + + fd = git_mkstemp_mode(ctx->graph_name, 0444); + if (fd < 0) { + error(_("unable to create '%s'"), ctx->graph_name); + return -1; + } + + f = hashfd(fd, ctx->graph_name); + } else { + hold_lock_file_for_update(&lk, ctx->graph_name, LOCK_DIE_ON_ERROR); + fd = lk.tempfile->fd; + f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf); + } chunk_ids[0] = GRAPH_CHUNKID_OIDFANOUT; chunk_ids[1] = GRAPH_CHUNKID_OIDLOOKUP; @@ -1233,6 +1384,10 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) chunk_ids[num_chunks] = GRAPH_CHUNKID_EXTRAEDGES; num_chunks++; } + if (ctx->num_commit_graphs_after > 1) { + chunk_ids[num_chunks] = GRAPH_CHUNKID_BASE; + num_chunks++; + } chunk_ids[num_chunks] = 0; @@ -1247,13 +1402,18 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) 4 * ctx->num_extra_edges; num_chunks++; } + if (ctx->num_commit_graphs_after > 1) { + chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] + + hashsz * (ctx->num_commit_graphs_after - 1); + num_chunks++; + } hashwrite_be32(f, GRAPH_SIGNATURE); hashwrite_u8(f, GRAPH_VERSION); hashwrite_u8(f, oid_version()); hashwrite_u8(f, num_chunks); - hashwrite_u8(f, 0); + hashwrite_u8(f, ctx->num_commit_graphs_after - 1); for (i = 0; i <= num_chunks; i++) { uint32_t chunk_write[3]; @@ -1279,11 +1439,67 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) write_graph_chunk_data(f, hashsz, ctx); if (ctx->num_extra_edges) write_graph_chunk_extra_edges(f, ctx); + if (ctx->num_commit_graphs_after > 1 && + write_graph_chunk_base(f, ctx)) { + return -1; + } stop_progress(&ctx->progress); strbuf_release(&progress_title); + if (ctx->split && ctx->base_graph_name && ctx->num_commit_graphs_after > 1) { + char *new_base_hash = xstrdup(oid_to_hex(&ctx->new_base_graph->oid)); + char *new_base_name = get_split_graph_filename(ctx->obj_dir, new_base_hash); + + free(ctx->commit_graph_filenames_after[ctx->num_commit_graphs_after - 2]); + free(ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 2]); + ctx->commit_graph_filenames_after[ctx->num_commit_graphs_after - 2] = new_base_name; + ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 2] = new_base_hash; + } + close_commit_graph(ctx->r->objects); - finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC); + finalize_hashfile(f, file_hash.hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC); + + if (ctx->split) { + FILE *chainf = fdopen_lock_file(&lk, "w"); + char *final_graph_name; + int result; + + close(fd); + + if (!chainf) { + error(_("unable to open commit-graph chain file")); + return -1; + } + + if (ctx->base_graph_name) { + result = rename(ctx->base_graph_name, + ctx->commit_graph_filenames_after[ctx->num_commit_graphs_after - 2]); + + if (result) { + error(_("failed to rename base commit-graph file")); + return -1; + } + } else { + char *graph_name = get_commit_graph_filename(ctx->obj_dir); + unlink(graph_name); + } + + ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 1] = xstrdup(oid_to_hex(&file_hash)); + final_graph_name = get_split_graph_filename(ctx->obj_dir, + ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 1]); + ctx->commit_graph_filenames_after[ctx->num_commit_graphs_after - 1] = final_graph_name; + + result = rename(ctx->graph_name, final_graph_name); + + for (i = 0; i < ctx->num_commit_graphs_after; i++) + fprintf(lk.tempfile->fp, "%s\n", ctx->commit_graph_hash_after[i]); + + if (result) { + error(_("failed to rename temporary commit-graph file")); + return -1; + } + } + commit_lock_file(&lk); return 0; @@ -1306,6 +1522,30 @@ int write_commit_graph(const char *obj_dir, ctx->obj_dir = obj_dir; ctx->append = flags & COMMIT_GRAPH_APPEND ? 1 : 0; ctx->report_progress = flags & COMMIT_GRAPH_PROGRESS ? 1 : 0; + ctx->split = flags & COMMIT_GRAPH_SPLIT ? 1 : 0; + + if (ctx->split) { + struct commit_graph *g; + prepare_commit_graph(ctx->r); + + g = ctx->r->objects->commit_graph; + + while (g) { + ctx->num_commit_graphs_before++; + g = g->base_graph; + } + + if (ctx->num_commit_graphs_before) { + ALLOC_ARRAY(ctx->commit_graph_filenames_before, ctx->num_commit_graphs_before); + i = ctx->num_commit_graphs_before; + g = ctx->r->objects->commit_graph; + + while (g) { + ctx->commit_graph_filenames_before[--i] = xstrdup(g->filename); + g = g->base_graph; + } + } + } ctx->approx_nr_objects = approximate_object_count(); ctx->oids.alloc = ctx->approx_nr_objects / 32; @@ -1360,6 +1600,14 @@ int write_commit_graph(const char *obj_dir, goto cleanup; } + if (!ctx->commits.nr) + goto cleanup; + + if (ctx->split) + init_commit_graph_chain(ctx); + else + ctx->num_commit_graphs_after = 1; + compute_generation_numbers(ctx); res = write_commit_graph_file(ctx); @@ -1368,6 +1616,21 @@ int write_commit_graph(const char *obj_dir, free(ctx->graph_name); free(ctx->commits.list); free(ctx->oids.list); + + if (ctx->commit_graph_filenames_after) { + for (i = 0; i < ctx->num_commit_graphs_after; i++) { + free(ctx->commit_graph_filenames_after[i]); + free(ctx->commit_graph_hash_after[i]); + } + + for (i = 0; i < ctx->num_commit_graphs_before; i++) + free(ctx->commit_graph_filenames_before[i]); + + free(ctx->commit_graph_filenames_after); + free(ctx->commit_graph_filenames_before); + free(ctx->commit_graph_hash_after); + } + free(ctx); return res; @@ -1555,5 +1818,6 @@ void free_commit_graph(struct commit_graph *g) g->data = NULL; close(g->graph_fd); } + free(g->filename); free(g); } diff --git a/commit-graph.h b/commit-graph.h index 6e7d42cf3207ad..c321834533eb52 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -47,6 +47,7 @@ struct commit_graph { unsigned char num_chunks; uint32_t num_commits; struct object_id oid; + char *filename; uint32_t num_commits_in_base; struct commit_graph *base_graph; @@ -71,6 +72,7 @@ int generation_numbers_enabled(struct repository *r); #define COMMIT_GRAPH_APPEND (1 << 0) #define COMMIT_GRAPH_PROGRESS (1 << 1) +#define COMMIT_GRAPH_SPLIT (1 << 2) /* * The write_commit_graph* methods return zero on success diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 3b6fd0d72848ff..063f906b3eb0ed 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -20,7 +20,7 @@ test_expect_success 'verify graph with no graph file' ' test_expect_success 'write graph with no packs' ' cd "$TRASH_DIRECTORY/full" && git commit-graph write --object-dir . && - test_path_is_file info/commit-graph + test_path_is_missing info/commit-graph ' test_expect_success 'close with correct error on bad input' ' From 135a7123755bfdde05da18012bebd2776f82b26c Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:28 -0700 Subject: [PATCH 22/32] commit-graph: add --split option to builtin Add a new "--split" option to the 'git commit-graph write' subcommand. This option allows the optional behavior of writing a commit-graph chain. The current behavior will add a tip commit-graph containing any commits that are not in the existing commit-graph or commit-graph chain. Later changes will allow merging the chain and expiring out-dated files. Add a new test script (t5324-split-commit-graph.sh) that demonstrates this behavior. Helped-by: Johannes Schindelin Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/commit-graph.c | 10 ++- commit-graph.c | 14 ++-- t/t5324-split-commit-graph.sh | 122 ++++++++++++++++++++++++++++++++++ 3 files changed, 138 insertions(+), 8 deletions(-) create mode 100755 t/t5324-split-commit-graph.sh diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index d8efa5bab276a8..ff8bc3c8b9e017 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -10,7 +10,7 @@ static char const * const builtin_commit_graph_usage[] = { N_("git commit-graph [--object-dir ]"), N_("git commit-graph read [--object-dir ]"), N_("git commit-graph verify [--object-dir ]"), - N_("git commit-graph write [--object-dir ] [--append] [--reachable|--stdin-packs|--stdin-commits]"), + N_("git commit-graph write [--object-dir ] [--append|--split] [--reachable|--stdin-packs|--stdin-commits]"), NULL }; @@ -25,7 +25,7 @@ static const char * const builtin_commit_graph_read_usage[] = { }; static const char * const builtin_commit_graph_write_usage[] = { - N_("git commit-graph write [--object-dir ] [--append] [--reachable|--stdin-packs|--stdin-commits]"), + N_("git commit-graph write [--object-dir ] [--append|--split] [--reachable|--stdin-packs|--stdin-commits]"), NULL }; @@ -35,9 +35,9 @@ static struct opts_commit_graph { int stdin_packs; int stdin_commits; int append; + int split; } opts; - static int graph_verify(int argc, const char **argv) { struct commit_graph *graph = NULL; @@ -156,6 +156,8 @@ static int graph_write(int argc, const char **argv) N_("start walk at commits listed by stdin")), OPT_BOOL(0, "append", &opts.append, N_("include all commits already in the commit-graph file")), + OPT_BOOL(0, "split", &opts.split, + N_("allow writing an incremental commit-graph file")), OPT_END(), }; @@ -169,6 +171,8 @@ static int graph_write(int argc, const char **argv) opts.obj_dir = get_object_directory(); if (opts.append) flags |= COMMIT_GRAPH_APPEND; + if (opts.split) + flags |= COMMIT_GRAPH_SPLIT; read_replace_refs = 0; diff --git a/commit-graph.c b/commit-graph.c index f0698b05996757..1224309e5feb48 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1472,12 +1472,16 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) } if (ctx->base_graph_name) { - result = rename(ctx->base_graph_name, - ctx->commit_graph_filenames_after[ctx->num_commit_graphs_after - 2]); + const char *dest = ctx->commit_graph_filenames_after[ + ctx->num_commit_graphs_after - 2]; - if (result) { - error(_("failed to rename base commit-graph file")); - return -1; + if (strcmp(ctx->base_graph_name, dest)) { + result = rename(ctx->base_graph_name, dest); + + if (result) { + error(_("failed to rename base commit-graph file")); + return -1; + } } } else { char *graph_name = get_commit_graph_filename(ctx->obj_dir); diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh new file mode 100755 index 00000000000000..ccd24bd22bf665 --- /dev/null +++ b/t/t5324-split-commit-graph.sh @@ -0,0 +1,122 @@ +#!/bin/sh + +test_description='split commit graph' +. ./test-lib.sh + +GIT_TEST_COMMIT_GRAPH=0 + +test_expect_success 'setup repo' ' + git init && + git config core.commitGraph true && + infodir=".git/objects/info" && + graphdir="$infodir/commit-graphs" && + test_oid_init +' + +graph_read_expect() { + NUM_BASE=0 + if test ! -z $2 + then + NUM_BASE=$2 + fi + cat >expect <<- EOF + header: 43475048 1 1 3 $NUM_BASE + num_commits: $1 + chunks: oid_fanout oid_lookup commit_metadata + EOF + git commit-graph read >output && + test_cmp expect output +} + +test_expect_success 'create commits and write commit-graph' ' + for i in $(test_seq 3) + do + test_commit $i && + git branch commits/$i || return 1 + done && + git commit-graph write --reachable && + test_path_is_file $infodir/commit-graph && + graph_read_expect 3 +' + +graph_git_two_modes() { + git -c core.commitGraph=true $1 >output + git -c core.commitGraph=false $1 >expect + test_cmp expect output +} + +graph_git_behavior() { + MSG=$1 + BRANCH=$2 + COMPARE=$3 + test_expect_success "check normal git operations: $MSG" ' + graph_git_two_modes "log --oneline $BRANCH" && + graph_git_two_modes "log --topo-order $BRANCH" && + graph_git_two_modes "log --graph $COMPARE..$BRANCH" && + graph_git_two_modes "branch -vv" && + graph_git_two_modes "merge-base -a $BRANCH $COMPARE" + ' +} + +graph_git_behavior 'graph exists' commits/3 commits/1 + +verify_chain_files_exist() { + for hash in $(cat $1/commit-graph-chain) + do + test_path_is_file $1/graph-$hash.graph || return 1 + done +} + +test_expect_success 'add more commits, and write a new base graph' ' + git reset --hard commits/1 && + for i in $(test_seq 4 5) + do + test_commit $i && + git branch commits/$i || return 1 + done && + git reset --hard commits/2 && + for i in $(test_seq 6 10) + do + test_commit $i && + git branch commits/$i || return 1 + done && + git reset --hard commits/2 && + git merge commits/4 && + git branch merge/1 && + git reset --hard commits/4 && + git merge commits/6 && + git branch merge/2 && + git commit-graph write --reachable && + graph_read_expect 12 +' + +test_expect_success 'add three more commits, write a tip graph' ' + git reset --hard commits/3 && + git merge merge/1 && + git merge commits/5 && + git merge merge/2 && + git branch merge/3 && + git commit-graph write --reachable --split && + test_path_is_missing $infodir/commit-graph && + test_path_is_file $graphdir/commit-graph-chain && + ls $graphdir/graph-*.graph >graph-files && + test_line_count = 2 graph-files && + verify_chain_files_exist $graphdir +' + +graph_git_behavior 'split commit-graph: merge 3 vs 2' merge/3 merge/2 + +test_expect_success 'add one commit, write a tip graph' ' + test_commit 11 && + git branch commits/11 && + git commit-graph write --reachable --split && + test_path_is_missing $infodir/commit-graph && + test_path_is_file $graphdir/commit-graph-chain && + ls $graphdir/graph-*.graph >graph-files && + test_line_count = 3 graph-files && + verify_chain_files_exist $graphdir +' + +graph_git_behavior 'three-layer commit-graph: commit 11 vs 6' commits/11 commits/6 + +test_done From 1771be90c8e4797c2466296d1d570dbfa39d9743 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:29 -0700 Subject: [PATCH 23/32] commit-graph: merge commit-graph chains When searching for a commit in a commit-graph chain of G graphs with N commits, the search takes O(G log N) time. If we always add a new tip graph with every write, the linear G term will start to dominate and slow the lookup process. To keep lookups fast, but also keep most incremental writes fast, create a strategy for merging levels of the commit-graph chain. The strategy is detailed in the commit-graph design document, but is summarized by these two conditions: 1. If the number of commits we are adding is more than half the number of commits in the graph below, then merge with that graph. 2. If we are writing more than 64,000 commits into a single graph, then merge with all lower graphs. The numeric values in the conditions above are currently constant, but can become config options in a future update. As we merge levels of the commit-graph chain, check that the commits still exist in the repository. A garbage-collection operation may have removed those commits from the object store and we do not want to persist them in the commit-graph chain. This is a non-issue if the 'git gc' process wrote a new, single-level commit-graph file. After we merge levels, the old graph-{hash}.graph files are no longer referenced by the commit-graph-chain file. We will expire these files in a future change. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/technical/commit-graph.txt | 80 ++++++++++ commit-graph.c | 180 ++++++++++++++++++----- t/t5324-split-commit-graph.sh | 13 ++ 3 files changed, 240 insertions(+), 33 deletions(-) diff --git a/Documentation/technical/commit-graph.txt b/Documentation/technical/commit-graph.txt index 1dca3bd8fe90a1..d9c6253b0a4368 100644 --- a/Documentation/technical/commit-graph.txt +++ b/Documentation/technical/commit-graph.txt @@ -186,6 +186,86 @@ positions to refer to their parents, which may be in `graph-{hash1}.graph` or its containment in the intervals [0, X0), [X0, X0 + X1), [X0 + X1, X0 + X1 + X2). +Each commit-graph file (except the base, `graph-{hash0}.graph`) contains data +specifying the hashes of all files in the lower layers. In the above example, +`graph-{hash1}.graph` contains `{hash0}` while `graph-{hash2}.graph` contains +`{hash0}` and `{hash1}`. + +## Merging commit-graph files + +If we only added a new commit-graph file on every write, we would run into a +linear search problem through many commit-graph files. Instead, we use a merge +strategy to decide when the stack should collapse some number of levels. + +The diagram below shows such a collapse. As a set of new commits are added, it +is determined by the merge strategy that the files should collapse to +`graph-{hash1}`. Thus, the new commits, the commits in `graph-{hash2}` and +the commits in `graph-{hash1}` should be combined into a new `graph-{hash3}` +file. + + +---------------------+ + | | + | (new commits) | + | | + +---------------------+ + | | + +-----------------------+ +---------------------+ + | graph-{hash2} |->| | + +-----------------------+ +---------------------+ + | | | + +-----------------------+ +---------------------+ + | | | | + | graph-{hash1} |->| | + | | | | + +-----------------------+ +---------------------+ + | tmp_graphXXX + +-----------------------+ + | | + | | + | | + | graph-{hash0} | + | | + | | + | | + +-----------------------+ + +During this process, the commits to write are combined, sorted and we write the +contents to a temporary file, all while holding a `commit-graph-chain.lock` +lock-file. When the file is flushed, we rename it to `graph-{hash3}` +according to the computed `{hash3}`. Finally, we write the new chain data to +`commit-graph-chain.lock`: + +``` + {hash3} + {hash0} +``` + +We then close the lock-file. + +## Merge Strategy + +When writing a set of commits that do not exist in the commit-graph stack of +height N, we default to creating a new file at level N + 1. We then decide to +merge with the Nth level if one of two conditions hold: + + 1. The expected file size for level N + 1 is at least half the file size for + level N. + + 2. Level N + 1 contains more than 64,0000 commits. + +This decision cascades down the levels: when we merge a level we create a new +set of commits that then compares to the next level. + +The first condition bounds the number of levels to be logarithmic in the total +number of commits. The second condition bounds the total number of commits in +a `graph-{hashN}` file and not in the `commit-graph` file, preventing +significant performance issues when the stack merges and another process only +partially reads the previous stack. + +The merge strategy values (2 for the size multiple, 64,000 for the maximum +number of commits) could be extracted into config settings for full +flexibility. + Related Links ------------- [0] https://bugs.chromium.org/p/git/issues/detail?id=8 diff --git a/commit-graph.c b/commit-graph.c index 1224309e5feb48..fb3100921cdcd4 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1298,36 +1298,6 @@ static int write_graph_chunk_base(struct hashfile *f, return 0; } -static void init_commit_graph_chain(struct write_commit_graph_context *ctx) -{ - struct commit_graph *g = ctx->r->objects->commit_graph; - uint32_t i; - - ctx->new_base_graph = g; - ctx->base_graph_name = xstrdup(g->filename); - ctx->new_num_commits_in_base = g->num_commits + g->num_commits_in_base; - - ctx->num_commit_graphs_after = ctx->num_commit_graphs_before + 1; - - ALLOC_ARRAY(ctx->commit_graph_filenames_after, ctx->num_commit_graphs_after); - ALLOC_ARRAY(ctx->commit_graph_hash_after, ctx->num_commit_graphs_after); - - for (i = 0; i < ctx->num_commit_graphs_before - 1; i++) - ctx->commit_graph_filenames_after[i] = xstrdup(ctx->commit_graph_filenames_before[i]); - - if (ctx->num_commit_graphs_before) - ctx->commit_graph_filenames_after[ctx->num_commit_graphs_before - 1] = - get_split_graph_filename(ctx->obj_dir, oid_to_hex(&g->oid)); - - i = ctx->num_commit_graphs_before - 1; - - while (g) { - ctx->commit_graph_hash_after[i] = xstrdup(oid_to_hex(&g->oid)); - i--; - g = g->base_graph; - } -} - static int write_commit_graph_file(struct write_commit_graph_context *ctx) { uint32_t i; @@ -1509,6 +1479,145 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) return 0; } +static int split_strategy_max_commits = 64000; +static float split_strategy_size_mult = 2.0f; + +static void split_graph_merge_strategy(struct write_commit_graph_context *ctx) +{ + struct commit_graph *g = ctx->r->objects->commit_graph; + uint32_t num_commits = ctx->commits.nr; + uint32_t i; + + g = ctx->r->objects->commit_graph; + ctx->num_commit_graphs_after = ctx->num_commit_graphs_before + 1; + + while (g && (g->num_commits <= split_strategy_size_mult * num_commits || + num_commits > split_strategy_max_commits)) { + num_commits += g->num_commits; + g = g->base_graph; + + ctx->num_commit_graphs_after--; + } + + ctx->new_base_graph = g; + + ALLOC_ARRAY(ctx->commit_graph_filenames_after, ctx->num_commit_graphs_after); + ALLOC_ARRAY(ctx->commit_graph_hash_after, ctx->num_commit_graphs_after); + + for (i = 0; i < ctx->num_commit_graphs_after && + i < ctx->num_commit_graphs_before; i++) + ctx->commit_graph_filenames_after[i] = xstrdup(ctx->commit_graph_filenames_before[i]); + + i = ctx->num_commit_graphs_before - 1; + g = ctx->r->objects->commit_graph; + + while (g) { + if (i < ctx->num_commit_graphs_after) + ctx->commit_graph_hash_after[i] = xstrdup(oid_to_hex(&g->oid)); + + i--; + g = g->base_graph; + } +} + +static void merge_commit_graph(struct write_commit_graph_context *ctx, + struct commit_graph *g) +{ + uint32_t i; + uint32_t offset = g->num_commits_in_base; + + ALLOC_GROW(ctx->commits.list, ctx->commits.nr + g->num_commits, ctx->commits.alloc); + + for (i = 0; i < g->num_commits; i++) { + struct object_id oid; + struct commit *result; + + display_progress(ctx->progress, i + 1); + + load_oid_from_graph(g, i + offset, &oid); + + /* only add commits if they still exist in the repo */ + result = lookup_commit_reference_gently(ctx->r, &oid, 1); + + if (result) { + ctx->commits.list[ctx->commits.nr] = result; + ctx->commits.nr++; + } + } +} + +static int commit_compare(const void *_a, const void *_b) +{ + const struct commit *a = *(const struct commit **)_a; + const struct commit *b = *(const struct commit **)_b; + return oidcmp(&a->object.oid, &b->object.oid); +} + +static void sort_and_scan_merged_commits(struct write_commit_graph_context *ctx) +{ + uint32_t i, num_parents; + struct commit_list *parent; + + if (ctx->report_progress) + ctx->progress = start_delayed_progress( + _("Scanning merged commits"), + ctx->commits.nr); + + QSORT(ctx->commits.list, ctx->commits.nr, commit_compare); + + ctx->num_extra_edges = 0; + for (i = 0; i < ctx->commits.nr; i++) { + display_progress(ctx->progress, i); + + if (i && oideq(&ctx->commits.list[i - 1]->object.oid, + &ctx->commits.list[i]->object.oid)) { + die(_("unexpected duplicate commit id %s"), + oid_to_hex(&ctx->commits.list[i]->object.oid)); + } else { + num_parents = 0; + for (parent = ctx->commits.list[i]->parents; parent; parent = parent->next) + num_parents++; + + if (num_parents > 2) + ctx->num_extra_edges += num_parents - 2; + } + } + + stop_progress(&ctx->progress); +} + +static void merge_commit_graphs(struct write_commit_graph_context *ctx) +{ + struct commit_graph *g = ctx->r->objects->commit_graph; + uint32_t current_graph_number = ctx->num_commit_graphs_before; + struct strbuf progress_title = STRBUF_INIT; + + while (g && current_graph_number >= ctx->num_commit_graphs_after) { + current_graph_number--; + + if (ctx->report_progress) { + strbuf_addstr(&progress_title, _("Merging commit-graph")); + ctx->progress = start_delayed_progress(progress_title.buf, 0); + } + + merge_commit_graph(ctx, g); + stop_progress(&ctx->progress); + strbuf_release(&progress_title); + + g = g->base_graph; + } + + if (g) { + ctx->new_base_graph = g; + ctx->new_num_commits_in_base = g->num_commits + g->num_commits_in_base; + } + + if (ctx->new_base_graph) + ctx->base_graph_name = xstrdup(ctx->new_base_graph->filename); + + sort_and_scan_merged_commits(ctx); +} + int write_commit_graph(const char *obj_dir, struct string_list *pack_indexes, struct string_list *commit_hex, @@ -1554,6 +1663,9 @@ int write_commit_graph(const char *obj_dir, ctx->approx_nr_objects = approximate_object_count(); ctx->oids.alloc = ctx->approx_nr_objects / 32; + if (ctx->split && ctx->oids.alloc > split_strategy_max_commits) + ctx->oids.alloc = split_strategy_max_commits; + if (ctx->append) { prepare_commit_graph_one(ctx->r, ctx->obj_dir); if (ctx->r->objects->commit_graph) @@ -1607,9 +1719,11 @@ int write_commit_graph(const char *obj_dir, if (!ctx->commits.nr) goto cleanup; - if (ctx->split) - init_commit_graph_chain(ctx); - else + if (ctx->split) { + split_graph_merge_strategy(ctx); + + merge_commit_graphs(ctx); + } else ctx->num_commit_graphs_after = 1; compute_generation_numbers(ctx); diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh index ccd24bd22bf665..5cb5663a30967d 100755 --- a/t/t5324-split-commit-graph.sh +++ b/t/t5324-split-commit-graph.sh @@ -119,4 +119,17 @@ test_expect_success 'add one commit, write a tip graph' ' graph_git_behavior 'three-layer commit-graph: commit 11 vs 6' commits/11 commits/6 +test_expect_success 'add one commit, write a merged graph' ' + test_commit 12 && + git branch commits/12 && + git commit-graph write --reachable --split && + test_path_is_file $graphdir/commit-graph-chain && + test_line_count = 2 $graphdir/commit-graph-chain && + ls $graphdir/graph-*.graph >graph-files && + test_line_count = 4 graph-files && + verify_chain_files_exist $graphdir +' + +graph_git_behavior 'merged commit-graph: commit 12 vs 6' commits/12 commits/6 + test_done From c523035cbd8515d095dc15e9661fd896733bedbc Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:30 -0700 Subject: [PATCH 24/32] commit-graph: allow cross-alternate chains In an environment like a fork network, it is helpful to have a commit-graph chain that spans both the base repo and the fork repo. The fork is usually a small set of data on top of the large repo, but sometimes the fork is much larger. For example, git-for-windows/git has almost double the number of commits as git/git because it rebases its commits on every major version update. To allow cross-alternate commit-graph chains, we need a few pieces: 1. When looking for a graph-{hash}.graph file, check all alternates. 2. When merging commit-graph chains, do not merge across alternates. 3. When writing a new commit-graph chain based on a commit-graph file in another object directory, do not allow success if the base file has of the name "commit-graph" instead of "commit-graphs/graph-{hash}.graph". Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/technical/commit-graph.txt | 40 +++++++++++++++++ commit-graph.c | 56 +++++++++++++++++++----- commit-graph.h | 1 + t/t5324-split-commit-graph.sh | 37 ++++++++++++++++ 4 files changed, 123 insertions(+), 11 deletions(-) diff --git a/Documentation/technical/commit-graph.txt b/Documentation/technical/commit-graph.txt index d9c6253b0a4368..473032e47696ec 100644 --- a/Documentation/technical/commit-graph.txt +++ b/Documentation/technical/commit-graph.txt @@ -266,6 +266,42 @@ The merge strategy values (2 for the size multiple, 64,000 for the maximum number of commits) could be extracted into config settings for full flexibility. +## Chains across multiple object directories + +In a repo with alternates, we look for the `commit-graph-chain` file starting +in the local object directory and then in each alternate. The first file that +exists defines our chain. As we look for the `graph-{hash}` files for +each `{hash}` in the chain file, we follow the same pattern for the host +directories. + +This allows commit-graphs to be split across multiple forks in a fork network. +The typical case is a large "base" repo with many smaller forks. + +As the base repo advances, it will likely update and merge its commit-graph +chain more frequently than the forks. If a fork updates their commit-graph after +the base repo, then it should "reparent" the commit-graph chain onto the new +chain in the base repo. When reading each `graph-{hash}` file, we track +the object directory containing it. During a write of a new commit-graph file, +we check for any changes in the source object directory and read the +`commit-graph-chain` file for that source and create a new file based on those +files. During this "reparent" operation, we necessarily need to collapse all +levels in the fork, as all of the files are invalid against the new base file. + +It is crucial to be careful when cleaning up "unreferenced" `graph-{hash}.graph` +files in this scenario. It falls to the user to define the proper settings for +their custom environment: + + 1. When merging levels in the base repo, the unreferenced files may still be + referenced by chains from fork repos. + + 2. The expiry time should be set to a length of time such that every fork has + time to recompute their commit-graph chain to "reparent" onto the new base + file(s). + + 3. If the commit-graph chain is updated in the base, the fork will not have + access to the new chain until its chain is updated to reference those files. + (This may change in the future [5].) + Related Links ------------- [0] https://bugs.chromium.org/p/git/issues/detail?id=8 @@ -292,3 +328,7 @@ Related Links [4] https://public-inbox.org/git/20180108154822.54829-1-git@jeffhostetler.com/T/#u A patch to remove the ahead-behind calculation from 'status'. + +[5] https://public-inbox.org/git/f27db281-abad-5043-6d71-cbb083b1c877@gmail.com/ + A discussion of a "two-dimensional graph position" that can allow reading + multiple commit-graph chains at the same time. diff --git a/commit-graph.c b/commit-graph.c index fb3100921cdcd4..fba705bc51e507 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -320,6 +320,9 @@ static struct commit_graph *load_commit_graph_v1(struct repository *r, const cha struct commit_graph *g = load_commit_graph_one(graph_name); free(graph_name); + if (g) + g->obj_dir = obj_dir; + return g; } @@ -379,9 +382,10 @@ static struct commit_graph *load_commit_graph_chain(struct repository *r, const count = st.st_size / (the_hash_algo->hexsz + 1); oids = xcalloc(count, sizeof(struct object_id)); - for (i = 0; i < count && valid; i++) { - char *graph_name; - struct commit_graph *g; + prepare_alt_odb(r); + + for (i = 0; i < count; i++) { + struct object_directory *odb; if (strbuf_getline_lf(&line, fp) == EOF) break; @@ -393,14 +397,29 @@ static struct commit_graph *load_commit_graph_chain(struct repository *r, const break; } - graph_name = get_split_graph_filename(obj_dir, line.buf); - g = load_commit_graph_one(graph_name); - free(graph_name); + valid = 0; + for (odb = r->objects->odb; odb; odb = odb->next) { + char *graph_name = get_split_graph_filename(odb->path, line.buf); + struct commit_graph *g = load_commit_graph_one(graph_name); - if (g && add_graph_to_chain(g, graph_chain, oids, i)) - graph_chain = g; - else - valid = 0; + free(graph_name); + + if (g) { + g->obj_dir = odb->path; + + if (add_graph_to_chain(g, graph_chain, oids, i)) { + graph_chain = g; + valid = 1; + } + + break; + } + } + + if (!valid) { + warning(_("unable to find all commit-graph files")); + break; + } } free(oids); @@ -1418,7 +1437,7 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) if (ctx->split && ctx->base_graph_name && ctx->num_commit_graphs_after > 1) { char *new_base_hash = xstrdup(oid_to_hex(&ctx->new_base_graph->oid)); - char *new_base_name = get_split_graph_filename(ctx->obj_dir, new_base_hash); + char *new_base_name = get_split_graph_filename(ctx->new_base_graph->obj_dir, new_base_hash); free(ctx->commit_graph_filenames_after[ctx->num_commit_graphs_after - 2]); free(ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 2]); @@ -1493,6 +1512,9 @@ static void split_graph_merge_strategy(struct write_commit_graph_context *ctx) while (g && (g->num_commits <= split_strategy_size_mult * num_commits || num_commits > split_strategy_max_commits)) { + if (strcmp(g->obj_dir, ctx->obj_dir)) + break; + num_commits += g->num_commits; g = g->base_graph; @@ -1501,6 +1523,18 @@ static void split_graph_merge_strategy(struct write_commit_graph_context *ctx) ctx->new_base_graph = g; + if (ctx->num_commit_graphs_after == 2) { + char *old_graph_name = get_commit_graph_filename(g->obj_dir); + + if (!strcmp(g->filename, old_graph_name) && + strcmp(g->obj_dir, ctx->obj_dir)) { + ctx->num_commit_graphs_after = 1; + ctx->new_base_graph = NULL; + } + + free(old_graph_name); + } + ALLOC_ARRAY(ctx->commit_graph_filenames_after, ctx->num_commit_graphs_after); ALLOC_ARRAY(ctx->commit_graph_hash_after, ctx->num_commit_graphs_after); diff --git a/commit-graph.h b/commit-graph.h index c321834533eb52..802d35254f3cc8 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -48,6 +48,7 @@ struct commit_graph { uint32_t num_commits; struct object_id oid; char *filename; + const char *obj_dir; uint32_t num_commits_in_base; struct commit_graph *base_graph; diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh index 5cb5663a30967d..46f0832f680100 100755 --- a/t/t5324-split-commit-graph.sh +++ b/t/t5324-split-commit-graph.sh @@ -90,6 +90,21 @@ test_expect_success 'add more commits, and write a new base graph' ' graph_read_expect 12 ' +test_expect_success 'fork and fail to base a chain on a commit-graph file' ' + test_when_finished rm -rf fork && + git clone . fork && + ( + cd fork && + rm .git/objects/info/commit-graph && + echo "$(pwd)/../.git/objects" >.git/objects/info/alternates && + test_commit new-commit && + git commit-graph write --reachable --split && + test_path_is_file $graphdir/commit-graph-chain && + test_line_count = 1 $graphdir/commit-graph-chain && + verify_chain_files_exist $graphdir + ) +' + test_expect_success 'add three more commits, write a tip graph' ' git reset --hard commits/3 && git merge merge/1 && @@ -132,4 +147,26 @@ test_expect_success 'add one commit, write a merged graph' ' graph_git_behavior 'merged commit-graph: commit 12 vs 6' commits/12 commits/6 +test_expect_success 'create fork and chain across alternate' ' + git clone . fork && + ( + cd fork && + git config core.commitGraph true && + rm -rf $graphdir && + echo "$(pwd)/../.git/objects" >.git/objects/info/alternates && + test_commit 13 && + git branch commits/13 && + git commit-graph write --reachable --split && + test_path_is_file $graphdir/commit-graph-chain && + test_line_count = 3 $graphdir/commit-graph-chain && + ls $graphdir/graph-*.graph >graph-files && + test_line_count = 1 graph-files && + git -c core.commitGraph=true rev-list HEAD >expect && + git -c core.commitGraph=false rev-list HEAD >actual && + test_cmp expect actual + ) +' + +graph_git_behavior 'alternate: commit 13 vs 6' commits/13 commits/6 + test_done From 8d84097f9655a954ea7e94ba08d93e5940ccb2ae Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:31 -0700 Subject: [PATCH 25/32] commit-graph: expire commit-graph files As we merge commit-graph files in a commit-graph chain, we should clean up the files that are no longer used. This change introduces an 'expiry_window' value to the context, which is always zero (for now). We then check the modified time of each graph-{hash}.graph file in the $OBJDIR/info/commit-graphs folder and unlink the files that are older than the expiry_window. Since this is always zero, this immediately clears all unused graph files. We will update the value to match a config setting in a future change. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/technical/commit-graph.txt | 15 ++++++ commit-graph.c | 69 ++++++++++++++++++++++++ t/t5324-split-commit-graph.sh | 2 +- 3 files changed, 85 insertions(+), 1 deletion(-) diff --git a/Documentation/technical/commit-graph.txt b/Documentation/technical/commit-graph.txt index 473032e47696ec..aed4350a596228 100644 --- a/Documentation/technical/commit-graph.txt +++ b/Documentation/technical/commit-graph.txt @@ -266,6 +266,21 @@ The merge strategy values (2 for the size multiple, 64,000 for the maximum number of commits) could be extracted into config settings for full flexibility. +## Deleting graph-{hash} files + +After a new tip file is written, some `graph-{hash}` files may no longer +be part of a chain. It is important to remove these files from disk, eventually. +The main reason to delay removal is that another process could read the +`commit-graph-chain` file before it is rewritten, but then look for the +`graph-{hash}` files after they are deleted. + +To allow holding old split commit-graphs for a while after they are unreferenced, +we update the modified times of the files when they become unreferenced. Then, +we scan the `$OBJDIR/info/commit-graphs/` directory for `graph-{hash}` +files whose modified times are older than a given expiry window. This window +defaults to zero, but can be changed using command-line arguments or a config +setting. + ## Chains across multiple object directories In a repo with alternates, we look for the `commit-graph-chain` file starting diff --git a/commit-graph.c b/commit-graph.c index fba705bc51e507..0cc2ceb349a2de 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1652,6 +1652,70 @@ static void merge_commit_graphs(struct write_commit_graph_context *ctx) sort_and_scan_merged_commits(ctx); } +static void mark_commit_graphs(struct write_commit_graph_context *ctx) +{ + uint32_t i; + time_t now = time(NULL); + + for (i = ctx->num_commit_graphs_after - 1; i < ctx->num_commit_graphs_before; i++) { + struct stat st; + struct utimbuf updated_time; + + stat(ctx->commit_graph_filenames_before[i], &st); + + updated_time.actime = st.st_atime; + updated_time.modtime = now; + utime(ctx->commit_graph_filenames_before[i], &updated_time); + } +} + +static void expire_commit_graphs(struct write_commit_graph_context *ctx) +{ + struct strbuf path = STRBUF_INIT; + DIR *dir; + struct dirent *de; + size_t dirnamelen; + time_t expire_time = time(NULL); + + strbuf_addstr(&path, ctx->obj_dir); + strbuf_addstr(&path, "/info/commit-graphs"); + dir = opendir(path.buf); + + if (!dir) { + strbuf_release(&path); + return; + } + + strbuf_addch(&path, '/'); + dirnamelen = path.len; + while ((de = readdir(dir)) != NULL) { + struct stat st; + uint32_t i, found = 0; + + strbuf_setlen(&path, dirnamelen); + strbuf_addstr(&path, de->d_name); + + stat(path.buf, &st); + + if (st.st_mtime > expire_time) + continue; + if (path.len < 6 || strcmp(path.buf + path.len - 6, ".graph")) + continue; + + for (i = 0; i < ctx->num_commit_graphs_after; i++) { + if (!strcmp(ctx->commit_graph_filenames_after[i], + path.buf)) { + found = 1; + break; + } + } + + if (!found) + unlink(path.buf); + + } +} + int write_commit_graph(const char *obj_dir, struct string_list *pack_indexes, struct string_list *commit_hex, @@ -1764,6 +1828,11 @@ int write_commit_graph(const char *obj_dir, res = write_commit_graph_file(ctx); + if (ctx->split) { + mark_commit_graphs(ctx); + expire_commit_graphs(ctx); + } + cleanup: free(ctx->graph_name); free(ctx->commits.list); diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh index 46f0832f680100..76068ee4077e6c 100755 --- a/t/t5324-split-commit-graph.sh +++ b/t/t5324-split-commit-graph.sh @@ -141,7 +141,7 @@ test_expect_success 'add one commit, write a merged graph' ' test_path_is_file $graphdir/commit-graph-chain && test_line_count = 2 $graphdir/commit-graph-chain && ls $graphdir/graph-*.graph >graph-files && - test_line_count = 4 graph-files && + test_line_count = 2 graph-files && verify_chain_files_exist $graphdir ' From c2bc6e6ab0ade70c475a73d06326e677e70840d2 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:32 -0700 Subject: [PATCH 26/32] commit-graph: create options for split files The split commit-graph feature is now fully implemented, but needs some more run-time configurability. Allow direct callers to 'git commit-graph write --split' to specify the values used in the merge strategy and the expire time. Update the documentation to specify these values. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/git-commit-graph.txt | 21 ++++++++++- Documentation/technical/commit-graph.txt | 7 ++-- builtin/commit-graph.c | 25 ++++++++++--- builtin/commit.c | 2 +- builtin/gc.c | 3 +- commit-graph.c | 35 ++++++++++++------ commit-graph.h | 12 +++++- t/t5324-split-commit-graph.sh | 47 ++++++++++++++++++++++++ 8 files changed, 128 insertions(+), 24 deletions(-) diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt index 624470e198202a..365e145e82b414 100644 --- a/Documentation/git-commit-graph.txt +++ b/Documentation/git-commit-graph.txt @@ -26,7 +26,7 @@ OPTIONS Use given directory for the location of packfiles and commit-graph file. This parameter exists to specify the location of an alternate that only has the objects directory, not a full `.git` directory. The - commit-graph file is expected to be at `/info/commit-graph` and + commit-graph file is expected to be in the `/info` directory and the packfiles are expected to be in `/pack`. @@ -51,6 +51,25 @@ or `--stdin-packs`.) + With the `--append` option, include all commits that are present in the existing commit-graph file. ++ +With the `--split` option, write the commit-graph as a chain of multiple +commit-graph files stored in `/info/commit-graphs`. The new commits +not already in the commit-graph are added in a new "tip" file. This file +is merged with the existing file if the following merge conditions are +met: ++ +* If `--size-multiple=` is not specified, let `X` equal 2. If the new +tip file would have `N` commits and the previous tip has `M` commits and +`X` times `N` is greater than `M`, instead merge the two files into a +single file. ++ +* If `--max-commits=` is specified with `M` a positive integer, and the +new tip file would have more than `M` commits, then instead merge the new +tip with the previous tip. ++ +Finally, if `--expire-time=` is not specified, let `datetime` +be the current time. After writing the split commit-graph, delete all +unused commit-graph whose modified times are older than `datetime`. 'read':: diff --git a/Documentation/technical/commit-graph.txt b/Documentation/technical/commit-graph.txt index aed4350a596228..729fbcb32f8793 100644 --- a/Documentation/technical/commit-graph.txt +++ b/Documentation/technical/commit-graph.txt @@ -248,10 +248,11 @@ When writing a set of commits that do not exist in the commit-graph stack of height N, we default to creating a new file at level N + 1. We then decide to merge with the Nth level if one of two conditions hold: - 1. The expected file size for level N + 1 is at least half the file size for - level N. + 1. `--size-multiple=` is specified or X = 2, and the number of commits in + level N is less than X times the number of commits in level N + 1. - 2. Level N + 1 contains more than 64,0000 commits. + 2. `--max-commits=` is specified with non-zero C and the number of commits + in level N + 1 is more than C commits. This decision cascades down the levels: when we merge a level we create a new set of commits that then compares to the next level. diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index ff8bc3c8b9e017..6c0b5b17e00cde 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -10,7 +10,7 @@ static char const * const builtin_commit_graph_usage[] = { N_("git commit-graph [--object-dir ]"), N_("git commit-graph read [--object-dir ]"), N_("git commit-graph verify [--object-dir ]"), - N_("git commit-graph write [--object-dir ] [--append|--split] [--reachable|--stdin-packs|--stdin-commits]"), + N_("git commit-graph write [--object-dir ] [--append|--split] [--reachable|--stdin-packs|--stdin-commits] "), NULL }; @@ -25,7 +25,7 @@ static const char * const builtin_commit_graph_read_usage[] = { }; static const char * const builtin_commit_graph_write_usage[] = { - N_("git commit-graph write [--object-dir ] [--append|--split] [--reachable|--stdin-packs|--stdin-commits]"), + N_("git commit-graph write [--object-dir ] [--append|--split] [--reachable|--stdin-packs|--stdin-commits] "), NULL }; @@ -135,6 +135,7 @@ static int graph_read(int argc, const char **argv) } extern int read_replace_refs; +static struct split_commit_graph_opts split_opts; static int graph_write(int argc, const char **argv) { @@ -158,9 +159,19 @@ static int graph_write(int argc, const char **argv) N_("include all commits already in the commit-graph file")), OPT_BOOL(0, "split", &opts.split, N_("allow writing an incremental commit-graph file")), + OPT_INTEGER(0, "max-commits", &split_opts.max_commits, + N_("maximum number of commits in a non-base split commit-graph")), + OPT_INTEGER(0, "size-multiple", &split_opts.size_multiple, + N_("maximum ratio between two levels of a split commit-graph")), + OPT_EXPIRY_DATE(0, "expire-time", &split_opts.expire_time, + N_("maximum number of commits in a non-base split commit-graph")), OPT_END(), }; + split_opts.size_multiple = 2; + split_opts.max_commits = 0; + split_opts.expire_time = 0; + argc = parse_options(argc, argv, NULL, builtin_commit_graph_write_options, builtin_commit_graph_write_usage, 0); @@ -176,8 +187,11 @@ static int graph_write(int argc, const char **argv) read_replace_refs = 0; - if (opts.reachable) - return write_commit_graph_reachable(opts.obj_dir, flags); + if (opts.reachable) { + if (write_commit_graph_reachable(opts.obj_dir, flags, &split_opts)) + return 1; + return 0; + } string_list_init(&lines, 0); if (opts.stdin_packs || opts.stdin_commits) { @@ -197,7 +211,8 @@ static int graph_write(int argc, const char **argv) if (write_commit_graph(opts.obj_dir, pack_indexes, commit_hex, - flags)) + flags, + &split_opts)) result = 1; UNLEAK(lines); diff --git a/builtin/commit.c b/builtin/commit.c index b001ef565ddcb1..9216e9c043d156 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -1670,7 +1670,7 @@ int cmd_commit(int argc, const char **argv, const char *prefix) "not exceeded, and then \"git reset HEAD\" to recover.")); if (git_env_bool(GIT_TEST_COMMIT_GRAPH, 0) && - write_commit_graph_reachable(get_object_directory(), 0)) + write_commit_graph_reachable(get_object_directory(), 0, NULL)) return 1; repo_rerere(the_repository, 0); diff --git a/builtin/gc.c b/builtin/gc.c index 20c8f1bfe8024d..0aa8eac7476ff8 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -666,7 +666,8 @@ int cmd_gc(int argc, const char **argv, const char *prefix) if (gc_write_commit_graph && write_commit_graph_reachable(get_object_directory(), - !quiet && !daemonized ? COMMIT_GRAPH_PROGRESS : 0)) + !quiet && !daemonized ? COMMIT_GRAPH_PROGRESS : 0, + NULL)) return 1; if (auto_gc && too_many_loose_objects()) diff --git a/commit-graph.c b/commit-graph.c index 0cc2ceb349a2de..315088d20531d4 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -768,6 +768,8 @@ struct write_commit_graph_context { unsigned append:1, report_progress:1, split:1; + + const struct split_commit_graph_opts *split_opts; }; static void write_graph_chunk_fanout(struct hashfile *f, @@ -1116,14 +1118,15 @@ static int add_ref_to_list(const char *refname, return 0; } -int write_commit_graph_reachable(const char *obj_dir, unsigned int flags) +int write_commit_graph_reachable(const char *obj_dir, unsigned int flags, + const struct split_commit_graph_opts *split_opts) { struct string_list list = STRING_LIST_INIT_DUP; int result; for_each_ref(add_ref_to_list, &list); result = write_commit_graph(obj_dir, NULL, &list, - flags); + flags, split_opts); string_list_clear(&list, 0); return result; @@ -1498,20 +1501,25 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) return 0; } -static int split_strategy_max_commits = 64000; -static float split_strategy_size_mult = 2.0f; - static void split_graph_merge_strategy(struct write_commit_graph_context *ctx) { struct commit_graph *g = ctx->r->objects->commit_graph; uint32_t num_commits = ctx->commits.nr; uint32_t i; + int max_commits = 0; + int size_mult = 2; + + if (ctx->split_opts) { + max_commits = ctx->split_opts->max_commits; + size_mult = ctx->split_opts->size_multiple; + } + g = ctx->r->objects->commit_graph; ctx->num_commit_graphs_after = ctx->num_commit_graphs_before + 1; - while (g && (g->num_commits <= split_strategy_size_mult * num_commits || - num_commits > split_strategy_max_commits)) { + while (g && (g->num_commits <= size_mult * num_commits || + (max_commits && num_commits > max_commits))) { if (strcmp(g->obj_dir, ctx->obj_dir)) break; @@ -1675,7 +1683,10 @@ static void expire_commit_graphs(struct write_commit_graph_context *ctx) DIR *dir; struct dirent *de; size_t dirnamelen; - time_t expire_time = time(NULL); + timestamp_t expire_time = time(NULL); + + if (ctx->split_opts && ctx->split_opts->expire_time) + expire_time -= ctx->split_opts->expire_time; strbuf_addstr(&path, ctx->obj_dir); strbuf_addstr(&path, "/info/commit-graphs"); @@ -1719,7 +1730,8 @@ static void expire_commit_graphs(struct write_commit_graph_context *ctx) int write_commit_graph(const char *obj_dir, struct string_list *pack_indexes, struct string_list *commit_hex, - unsigned int flags) + unsigned int flags, + const struct split_commit_graph_opts *split_opts) { struct write_commit_graph_context *ctx; uint32_t i, count_distinct = 0; @@ -1734,6 +1746,7 @@ int write_commit_graph(const char *obj_dir, ctx->append = flags & COMMIT_GRAPH_APPEND ? 1 : 0; ctx->report_progress = flags & COMMIT_GRAPH_PROGRESS ? 1 : 0; ctx->split = flags & COMMIT_GRAPH_SPLIT ? 1 : 0; + ctx->split_opts = split_opts; if (ctx->split) { struct commit_graph *g; @@ -1761,8 +1774,8 @@ int write_commit_graph(const char *obj_dir, ctx->approx_nr_objects = approximate_object_count(); ctx->oids.alloc = ctx->approx_nr_objects / 32; - if (ctx->split && ctx->oids.alloc > split_strategy_max_commits) - ctx->oids.alloc = split_strategy_max_commits; + if (ctx->split && split_opts && ctx->oids.alloc > split_opts->max_commits) + ctx->oids.alloc = split_opts->max_commits; if (ctx->append) { prepare_commit_graph_one(ctx->r, ctx->obj_dir); diff --git a/commit-graph.h b/commit-graph.h index 802d35254f3cc8..a84c22a5609a40 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -75,17 +75,25 @@ int generation_numbers_enabled(struct repository *r); #define COMMIT_GRAPH_PROGRESS (1 << 1) #define COMMIT_GRAPH_SPLIT (1 << 2) +struct split_commit_graph_opts { + int size_multiple; + int max_commits; + timestamp_t expire_time; +}; + /* * The write_commit_graph* methods return zero on success * and a negative value on failure. Note that if the repository * is not compatible with the commit-graph feature, then the * methods will return 0 without writing a commit-graph. */ -int write_commit_graph_reachable(const char *obj_dir, unsigned int flags); +int write_commit_graph_reachable(const char *obj_dir, unsigned int flags, + const struct split_commit_graph_opts *split_opts); int write_commit_graph(const char *obj_dir, struct string_list *pack_indexes, struct string_list *commit_hex, - unsigned int flags); + unsigned int flags, + const struct split_commit_graph_opts *split_opts); int verify_commit_graph(struct repository *r, struct commit_graph *g); diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh index 76068ee4077e6c..1b699a543c9beb 100755 --- a/t/t5324-split-commit-graph.sh +++ b/t/t5324-split-commit-graph.sh @@ -169,4 +169,51 @@ test_expect_success 'create fork and chain across alternate' ' graph_git_behavior 'alternate: commit 13 vs 6' commits/13 commits/6 +test_expect_success 'test merge stragety constants' ' + git clone . merge-2 && + ( + cd merge-2 && + git config core.commitGraph true && + test_line_count = 2 $graphdir/commit-graph-chain && + test_commit 14 && + git commit-graph write --reachable --split --size-multiple=2 && + test_line_count = 3 $graphdir/commit-graph-chain + + ) && + git clone . merge-10 && + ( + cd merge-10 && + git config core.commitGraph true && + test_line_count = 2 $graphdir/commit-graph-chain && + test_commit 14 && + git commit-graph write --reachable --split --size-multiple=10 && + test_line_count = 1 $graphdir/commit-graph-chain && + ls $graphdir/graph-*.graph >graph-files && + test_line_count = 1 graph-files + ) && + git clone . merge-10-expire && + ( + cd merge-10-expire && + git config core.commitGraph true && + test_line_count = 2 $graphdir/commit-graph-chain && + test_commit 15 && + git commit-graph write --reachable --split --size-multiple=10 --expire-time=1980-01-01 && + test_line_count = 1 $graphdir/commit-graph-chain && + ls $graphdir/graph-*.graph >graph-files && + test_line_count = 3 graph-files + ) && + git clone --no-hardlinks . max-commits && + ( + cd max-commits && + git config core.commitGraph true && + test_line_count = 2 $graphdir/commit-graph-chain && + test_commit 16 && + test_commit 17 && + git commit-graph write --reachable --split --max-commits=1 && + test_line_count = 1 $graphdir/commit-graph-chain && + ls $graphdir/graph-*.graph >graph-files && + test_line_count = 1 graph-files + ) +' + test_done From 3da4b609bb14b13672f64af908706462617f53cb Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:32 -0700 Subject: [PATCH 27/32] commit-graph: verify chains with --shallow mode If we wrote a commit-graph chain, we only modified the tip file in the chain. It is valuable to verify what we wrote, but not waste time checking files we did not write. Add a '--shallow' option to the 'git commit-graph verify' subcommand and check that it does not read the base graph in a two-file chain. Making the verify subcommand read from a chain of commit-graphs takes some rearranging of the builtin code. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/git-commit-graph.txt | 5 ++- builtin/commit-graph.c | 27 +++++++++---- commit-graph.c | 15 ++++++-- commit-graph.h | 6 ++- t/t5324-split-commit-graph.sh | 62 ++++++++++++++++++++++++++++++ 5 files changed, 101 insertions(+), 14 deletions(-) diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt index 365e145e82b414..eb5e7865f0ef78 100644 --- a/Documentation/git-commit-graph.txt +++ b/Documentation/git-commit-graph.txt @@ -10,7 +10,7 @@ SYNOPSIS -------- [verse] 'git commit-graph read' [--object-dir ] -'git commit-graph verify' [--object-dir ] +'git commit-graph verify' [--object-dir ] [--shallow] 'git commit-graph write' [--object-dir ] @@ -80,6 +80,9 @@ Used for debugging purposes. Read the commit-graph file and verify its contents against the object database. Used to check for corrupted data. ++ +With the `--shallow` option, only check the tip commit-graph file in +a chain of split commit-graphs. EXAMPLES diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 6c0b5b17e00cde..38027b83d9d832 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -5,17 +5,18 @@ #include "parse-options.h" #include "repository.h" #include "commit-graph.h" +#include "object-store.h" static char const * const builtin_commit_graph_usage[] = { N_("git commit-graph [--object-dir ]"), N_("git commit-graph read [--object-dir ]"), - N_("git commit-graph verify [--object-dir ]"), + N_("git commit-graph verify [--object-dir ] [--shallow]"), N_("git commit-graph write [--object-dir ] [--append|--split] [--reachable|--stdin-packs|--stdin-commits] "), NULL }; static const char * const builtin_commit_graph_verify_usage[] = { - N_("git commit-graph verify [--object-dir ]"), + N_("git commit-graph verify [--object-dir ] [--shallow]"), NULL }; @@ -36,6 +37,7 @@ static struct opts_commit_graph { int stdin_commits; int append; int split; + int shallow; } opts; static int graph_verify(int argc, const char **argv) @@ -45,11 +47,14 @@ static int graph_verify(int argc, const char **argv) int open_ok; int fd; struct stat st; + int flags = 0; static struct option builtin_commit_graph_verify_options[] = { OPT_STRING(0, "object-dir", &opts.obj_dir, N_("dir"), N_("The object directory to store the graph")), + OPT_BOOL(0, "shallow", &opts.shallow, + N_("if the commit-graph is split, only verify the tip file")), OPT_END(), }; @@ -59,21 +64,27 @@ static int graph_verify(int argc, const char **argv) if (!opts.obj_dir) opts.obj_dir = get_object_directory(); + if (opts.shallow) + flags |= COMMIT_GRAPH_VERIFY_SHALLOW; graph_name = get_commit_graph_filename(opts.obj_dir); open_ok = open_commit_graph(graph_name, &fd, &st); - if (!open_ok && errno == ENOENT) - return 0; - if (!open_ok) + if (!open_ok && errno != ENOENT) die_errno(_("Could not open commit-graph '%s'"), graph_name); - graph = load_commit_graph_one_fd_st(fd, &st); + FREE_AND_NULL(graph_name); + if (open_ok) + graph = load_commit_graph_one_fd_st(fd, &st); + else + graph = read_commit_graph_one(the_repository, opts.obj_dir); + + /* Return failure if open_ok predicted success */ if (!graph) - return 1; + return !!open_ok; UNLEAK(graph); - return verify_commit_graph(the_repository, graph); + return verify_commit_graph(the_repository, graph, flags); } static int graph_read(int argc, const char **argv) diff --git a/commit-graph.c b/commit-graph.c index 315088d20531d4..f33f4fe0091bf8 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -428,7 +428,7 @@ static struct commit_graph *load_commit_graph_chain(struct repository *r, const return graph_chain; } -static struct commit_graph *read_commit_graph_one(struct repository *r, const char *obj_dir) +struct commit_graph *read_commit_graph_one(struct repository *r, const char *obj_dir) { struct commit_graph *g = load_commit_graph_v1(r, obj_dir); @@ -1887,7 +1887,7 @@ static void graph_report(const char *fmt, ...) #define GENERATION_ZERO_EXISTS 1 #define GENERATION_NUMBER_EXISTS 2 -int verify_commit_graph(struct repository *r, struct commit_graph *g) +int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags) { uint32_t i, cur_fanout_pos = 0; struct object_id prev_oid, cur_oid, checksum; @@ -1895,6 +1895,7 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g) struct hashfile *f; int devnull; struct progress *progress = NULL; + int local_error = 0; if (!g) { graph_report("no commit-graph file loaded"); @@ -1989,6 +1990,9 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g) break; } + /* parse parent in case it is in a base graph */ + parse_commit_in_graph_one(r, g, graph_parents->item); + if (!oideq(&graph_parents->item->object.oid, &odb_parents->item->object.oid)) graph_report(_("commit-graph parent for %s is %s != %s"), oid_to_hex(&cur_oid), @@ -2040,7 +2044,12 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g) } stop_progress(&progress); - return verify_commit_graph_error; + local_error = verify_commit_graph_error; + + if (!(flags & COMMIT_GRAPH_VERIFY_SHALLOW) && g->base_graph) + local_error |= verify_commit_graph(r, g->base_graph, flags); + + return local_error; } void free_commit_graph(struct commit_graph *g) diff --git a/commit-graph.h b/commit-graph.h index a84c22a5609a40..df9a3b20e4abc7 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -61,7 +61,7 @@ struct commit_graph { }; struct commit_graph *load_commit_graph_one_fd_st(int fd, struct stat *st); - +struct commit_graph *read_commit_graph_one(struct repository *r, const char *obj_dir); struct commit_graph *parse_commit_graph(void *graph_map, int fd, size_t graph_size); @@ -95,7 +95,9 @@ int write_commit_graph(const char *obj_dir, unsigned int flags, const struct split_commit_graph_opts *split_opts); -int verify_commit_graph(struct repository *r, struct commit_graph *g); +#define COMMIT_GRAPH_VERIFY_SHALLOW (1 << 0) + +int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags); void close_commit_graph(struct raw_object_store *); void free_commit_graph(struct commit_graph *); diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh index 1b699a543c9beb..3df90ae58f8bee 100755 --- a/t/t5324-split-commit-graph.sh +++ b/t/t5324-split-commit-graph.sh @@ -216,4 +216,66 @@ test_expect_success 'test merge stragety constants' ' ) ' +corrupt_file() { + file=$1 + pos=$2 + data="${3:-\0}" + printf "$data" | dd of="$file" bs=1 seek="$pos" conv=notrunc +} + +test_expect_success 'verify hashes along chain, even in shallow' ' + git clone --no-hardlinks . verify && + ( + cd verify && + git commit-graph verify && + base_file=$graphdir/graph-$(head -n 1 $graphdir/commit-graph-chain).graph && + corrupt_file "$base_file" 1760 "\01" && + test_must_fail git commit-graph verify --shallow 2>test_err && + grep -v "^+" test_err >err && + test_i18ngrep "incorrect checksum" err + ) +' + +test_expect_success 'verify --shallow does not check base contents' ' + git clone --no-hardlinks . verify-shallow && + ( + cd verify-shallow && + git commit-graph verify && + base_file=$graphdir/graph-$(head -n 1 $graphdir/commit-graph-chain).graph && + corrupt_file "$base_file" 1000 "\01" && + git commit-graph verify --shallow && + test_must_fail git commit-graph verify 2>test_err && + grep -v "^+" test_err >err && + test_i18ngrep "incorrect checksum" err + ) +' + +test_expect_success 'warn on base graph chunk incorrect' ' + git clone --no-hardlinks . base-chunk && + ( + cd base-chunk && + git commit-graph verify && + base_file=$graphdir/graph-$(tail -n 1 $graphdir/commit-graph-chain).graph && + corrupt_file "$base_file" 1376 "\01" && + git commit-graph verify --shallow 2>test_err && + grep -v "^+" test_err >err && + test_i18ngrep "commit-graph chain does not match" err + ) +' + +test_expect_success 'verify after commit-graph-chain corruption' ' + git clone --no-hardlinks . verify-chain && + ( + cd verify-chain && + corrupt_file "$graphdir/commit-graph-chain" 60 "G" && + git commit-graph verify 2>test_err && + grep -v "^+" test_err >err && + test_i18ngrep "invalid commit-graph chain" err && + corrupt_file "$graphdir/commit-graph-chain" 60 "A" && + git commit-graph verify 2>test_err && + grep -v "^+" test_err >err && + test_i18ngrep "unable to find all commit-graph files" err + ) +' + test_done From ba41112a6331e8b454cac6cfd3ee67ae93957a2c Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:33 -0700 Subject: [PATCH 28/32] commit-graph: clean up chains after flattened write If we write a commit-graph file without the split option, then we write to $OBJDIR/info/commit-graph and start to ignore the chains in $OBJDIR/info/commit-graphs/. Unlink the commit-graph-chain file and expire the graph-{hash}.graph files in $OBJDIR/info/commit-graphs/ during every write. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 12 +++++++++--- t/t5324-split-commit-graph.sh | 12 ++++++++++++ 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index f33f4fe0091bf8..3599ae664d70ea 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1687,6 +1687,12 @@ static void expire_commit_graphs(struct write_commit_graph_context *ctx) if (ctx->split_opts && ctx->split_opts->expire_time) expire_time -= ctx->split_opts->expire_time; + if (!ctx->split) { + char *chain_file_name = get_chain_filename(ctx->obj_dir); + unlink(chain_file_name); + free(chain_file_name); + ctx->num_commit_graphs_after = 0; + } strbuf_addstr(&path, ctx->obj_dir); strbuf_addstr(&path, "/info/commit-graphs"); @@ -1841,10 +1847,10 @@ int write_commit_graph(const char *obj_dir, res = write_commit_graph_file(ctx); - if (ctx->split) { + if (ctx->split) mark_commit_graphs(ctx); - expire_commit_graphs(ctx); - } + + expire_commit_graphs(ctx); cleanup: free(ctx->graph_name); diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh index 3df90ae58f8bee..e8df35c30b8dd0 100755 --- a/t/t5324-split-commit-graph.sh +++ b/t/t5324-split-commit-graph.sh @@ -216,6 +216,18 @@ test_expect_success 'test merge stragety constants' ' ) ' +test_expect_success 'remove commit-graph-chain file after flattening' ' + git clone . flatten && + ( + cd flatten && + test_line_count = 2 $graphdir/commit-graph-chain && + git commit-graph write --reachable && + test_path_is_missing $graphdir/commit-graph-chain && + ls $graphdir >graph-files && + test_line_count = 0 graph-files + ) +' + corrupt_file() { file=$1 pos=$2 From e2017c48fe95d44f95bc4e9f36e2210dff698c40 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:34 -0700 Subject: [PATCH 29/32] commit-graph: test octopus merges with --split Octopus merges require an extra chunk of data in the commit-graph file format. Create a test that ensures the new --split option continues to work with an octopus merge. Specifically, ensure that the octopus merge has parents across layers to truly check that our graph position logic holds up correctly. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- t/t5324-split-commit-graph.sh | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh index e8df35c30b8dd0..704def70bbce3a 100755 --- a/t/t5324-split-commit-graph.sh +++ b/t/t5324-split-commit-graph.sh @@ -290,4 +290,15 @@ test_expect_success 'verify after commit-graph-chain corruption' ' ) ' +test_expect_success 'add octopus merge' ' + git reset --hard commits/10 && + git merge commits/3 commits/4 && + git branch merge/octopus && + git commit-graph write --reachable --split && + git commit-graph verify && + test_line_count = 3 $graphdir/commit-graph-chain +' + +graph_git_behavior 'graph exists' merge/octopus commits/12 + test_done From a09c1301ce5986a374ab008b2125fa26a9bdd2fe Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:35 -0700 Subject: [PATCH 30/32] commit-graph: test --split across alternate without --split We allow sharing commit-graph files across alternates. When we are writing a split commit-graph, we allow adding tip graph files that are not in the alternate, but include commits from our local repo. However, if our alternate is not using the split commit-graph format, its file is at .git/objects/info/commit-graph and we are trying to write files in .git/objects/info/commit-graphs/graph-{hash}.graph. We already have logic to ensure we do not merge across alternate boundaries, but we also cannot have a commit-graph chain to our alternate if uses the old filename structure. Create a test that verifies we create a new split commit-graph with only one level and we do not modify the existing commit-graph in the alternate. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- t/t5324-split-commit-graph.sh | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh index 704def70bbce3a..130f2baf44d7a3 100755 --- a/t/t5324-split-commit-graph.sh +++ b/t/t5324-split-commit-graph.sh @@ -301,4 +301,19 @@ test_expect_success 'add octopus merge' ' graph_git_behavior 'graph exists' merge/octopus commits/12 +test_expect_success 'split across alternate where alternate is not split' ' + git commit-graph write --reachable && + test_path_is_file .git/objects/info/commit-graph && + cp .git/objects/info/commit-graph . && + git clone --no-hardlinks . alt-split && + ( + cd alt-split && + echo "$(pwd)"/../.git/objects >.git/objects/info/alternates && + test_commit 18 && + git commit-graph write --reachable --split && + test_line_count = 1 $graphdir/commit-graph-chain + ) && + test_cmp commit-graph .git/objects/info/commit-graph +' + test_done From 16110c9348fc2022e78f2581968cf428c056dc5b Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:36 -0700 Subject: [PATCH 31/32] commit-graph: normalize commit-graph filenames When writing commit-graph files, we append path data to an object directory, which may be specified by the user via the '--object-dir' option. If the user supplies a trailing slash, or some other alternative path format, the resulting path may be usable for writing to the correct location. However, when expiring graph files from the /info/commit-graphs directory during a write, we need to compare paths with exact string matches. Normalize the commit-graph filenames to avoid ambiguity. This creates extra allocations, but this is a constant multiple of the number of commit-graph files, which should be a number in the single digits. Further normalize the object directory in the context. Due to a comparison between g->obj_dir and ctx->obj_dir in split_graph_merge_strategy(), a trailing slash would prevent any merging of layers within the same object directory. The check is there to ensure we do not merge across alternates. Update the tests to include a case with this trailing slash problem. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.c | 30 +++++++++++++++++++++++------- t/t5324-split-commit-graph.sh | 7 ++++++- 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 3599ae664d70ea..c91e6f0fb82158 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -43,15 +43,23 @@ char *get_commit_graph_filename(const char *obj_dir) { - return xstrfmt("%s/info/commit-graph", obj_dir); + char *filename = xstrfmt("%s/info/commit-graph", obj_dir); + char *normalized = xmalloc(strlen(filename) + 1); + normalize_path_copy(normalized, filename); + free(filename); + return normalized; } static char *get_split_graph_filename(const char *obj_dir, const char *oid_hex) { - return xstrfmt("%s/info/commit-graphs/graph-%s.graph", - obj_dir, - oid_hex); + char *filename = xstrfmt("%s/info/commit-graphs/graph-%s.graph", + obj_dir, + oid_hex); + char *normalized = xmalloc(strlen(filename) + 1); + normalize_path_copy(normalized, filename); + free(filename); + return normalized; } static char *get_chain_filename(const char *obj_dir) @@ -746,7 +754,7 @@ struct packed_oid_list { struct write_commit_graph_context { struct repository *r; - const char *obj_dir; + char *obj_dir; char *graph_name; struct packed_oid_list oids; struct packed_commit_list commits; @@ -1729,7 +1737,6 @@ static void expire_commit_graphs(struct write_commit_graph_context *ctx) if (!found) unlink(path.buf); - } } @@ -1741,6 +1748,7 @@ int write_commit_graph(const char *obj_dir, { struct write_commit_graph_context *ctx; uint32_t i, count_distinct = 0; + size_t len; int res = 0; if (!commit_graph_compatible(the_repository)) @@ -1748,7 +1756,14 @@ int write_commit_graph(const char *obj_dir, ctx = xcalloc(1, sizeof(struct write_commit_graph_context)); ctx->r = the_repository; - ctx->obj_dir = obj_dir; + + /* normalize object dir with no trailing slash */ + ctx->obj_dir = xmallocz(strlen(obj_dir) + 1); + normalize_path_copy(ctx->obj_dir, obj_dir); + len = strlen(ctx->obj_dir); + if (len && ctx->obj_dir[len - 1] == '/') + ctx->obj_dir[len - 1] = 0; + ctx->append = flags & COMMIT_GRAPH_APPEND ? 1 : 0; ctx->report_progress = flags & COMMIT_GRAPH_PROGRESS ? 1 : 0; ctx->split = flags & COMMIT_GRAPH_SPLIT ? 1 : 0; @@ -1856,6 +1871,7 @@ int write_commit_graph(const char *obj_dir, free(ctx->graph_name); free(ctx->commits.list); free(ctx->oids.list); + free(ctx->obj_dir); if (ctx->commit_graph_filenames_after) { for (i = 0; i < ctx->num_commit_graphs_after; i++) { diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh index 130f2baf44d7a3..fc0d00751c301b 100755 --- a/t/t5324-split-commit-graph.sh +++ b/t/t5324-split-commit-graph.sh @@ -163,7 +163,12 @@ test_expect_success 'create fork and chain across alternate' ' test_line_count = 1 graph-files && git -c core.commitGraph=true rev-list HEAD >expect && git -c core.commitGraph=false rev-list HEAD >actual && - test_cmp expect actual + test_cmp expect actual && + test_commit 14 && + git commit-graph write --reachable --split --object-dir=.git/objects/ && + test_line_count = 3 $graphdir/commit-graph-chain && + ls $graphdir/graph-*.graph >graph-files && + test_line_count = 1 graph-files ) ' From 5b15eb397d176c557a82e872d6b4043aa7589874 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:36 -0700 Subject: [PATCH 32/32] commit-graph: test verify across alternates The 'git commit-graph verify' subcommand loads a commit-graph from a given object directory instead of using the standard method prepare_commit_graph(). During development of load_commit_graph_chain(), a version did not include prepare_alt_odb() as it was previously run by prepare_commit_graph() in most cases. Add a test that prevents that mistake from happening again. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- t/t5324-split-commit-graph.sh | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh index fc0d00751c301b..03f45a1ed9023c 100755 --- a/t/t5324-split-commit-graph.sh +++ b/t/t5324-split-commit-graph.sh @@ -237,6 +237,7 @@ corrupt_file() { file=$1 pos=$2 data="${3:-\0}" + chmod a+w "$file" && printf "$data" | dd of="$file" bs=1 seek="$pos" conv=notrunc } @@ -295,6 +296,24 @@ test_expect_success 'verify after commit-graph-chain corruption' ' ) ' +test_expect_success 'verify across alternates' ' + git clone --no-hardlinks . verify-alt && + ( + cd verify-alt && + rm -rf $graphdir && + altdir="$(pwd)/../.git/objects" && + echo "$altdir" >.git/objects/info/alternates && + git commit-graph verify --object-dir="$altdir/" && + test_commit extra && + git commit-graph write --reachable --split && + tip_file=$graphdir/graph-$(tail -n 1 $graphdir/commit-graph-chain).graph && + corrupt_file "$tip_file" 100 "\01" && + test_must_fail git commit-graph verify --shallow 2>test_err && + grep -v "^+" test_err >err && + test_i18ngrep "commit-graph has incorrect fanout value" err + ) +' + test_expect_success 'add octopus merge' ' git reset --hard commits/10 && git merge commits/3 commits/4 &&