From b71687ca03fe5909eae54cd6f2e044822ca335de Mon Sep 17 00:00:00 2001 From: Roy Eldar Date: Wed, 11 Dec 2024 08:32:28 +0200 Subject: [PATCH 001/535] git-submodule.sh: improve parsing of some long options Some command-line options have a long form which takes an argument. In this case, the argument can be given right after `='; for example, "--depth" takes a numerical argument, which can be given as "--depth=X". Support the case where the argument is given right after `=' for all long options, in order to improve consistency throughout the script. Signed-off-by: Roy Eldar Signed-off-by: Junio C Hamano --- git-submodule.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/git-submodule.sh b/git-submodule.sh index 03c5a220a26ded..d3e3669fded31f 100755 --- a/git-submodule.sh +++ b/git-submodule.sh @@ -77,6 +77,9 @@ cmd_add() branch=$2 shift ;; + --branch=*) + branch="${1#--branch=}" + ;; -f | --force) force=$1 ;; @@ -110,6 +113,9 @@ cmd_add() custom_name=$2 shift ;; + --name=*) + custom_name="${1#--name=}" + ;; --depth) case "$2" in '') usage ;; esac depth="--depth=$2" @@ -425,6 +431,9 @@ cmd_set_branch() { branch=$2 shift ;; + --branch=*) + branch="${1#--branch=}" + ;; --) shift break From e6c3e349458ec2ad36addc6004cffcfa6d663c38 Mon Sep 17 00:00:00 2001 From: Roy Eldar Date: Wed, 11 Dec 2024 08:32:29 +0200 Subject: [PATCH 002/535] git-submodule.sh: improve parsing of short options Some command-line options have a short form which takes an argument; for example, "--jobs" has the form "-j", and it takes a numerical argument. When parsing short options, support the case where there is no space between the flag and the option argument, in order to improve consistency with the rest of the builtin git commands. Signed-off-by: Roy Eldar Signed-off-by: Junio C Hamano --- git-submodule.sh | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/git-submodule.sh b/git-submodule.sh index d3e3669fded31f..fd54cb8fa6a7ef 100755 --- a/git-submodule.sh +++ b/git-submodule.sh @@ -77,6 +77,9 @@ cmd_add() branch=$2 shift ;; + -b*) + branch="${1#-b}" + ;; --branch=*) branch="${1#--branch=}" ;; @@ -352,6 +355,9 @@ cmd_update() jobs="--jobs=$2" shift ;; + -j*) + jobs="--jobs=${1#-j}" + ;; --jobs=*) jobs=$1 ;; @@ -431,6 +437,9 @@ cmd_set_branch() { branch=$2 shift ;; + -b*) + branch="${1#-b}" + ;; --branch=*) branch="${1#--branch=}" ;; @@ -519,6 +528,10 @@ cmd_summary() { isnumber "$summary_limit" || usage shift ;; + -n*) + summary_limit="${1#-n}" + isnumber "$summary_limit" || usage + ;; --summary-limit=*) summary_limit="${1#--summary-limit=}" isnumber "$summary_limit" || usage From 006f546bc30bd42de6ba569ab70ec80441f54430 Mon Sep 17 00:00:00 2001 From: Roy Eldar Date: Wed, 11 Dec 2024 08:32:30 +0200 Subject: [PATCH 003/535] git-submodule.sh: get rid of isnumber It's entirely unnecessary to check whether the argument given to an option (i.e. --summary-limit) is valid in the shell wrapper, since it's already done when parsing the various options in git-submodule--helper. Remove this check from the script; this both improves consistency throughout the script, and the error message shown to the user in case some invalid non-numeric argument was passed to "--summary-limit" is more informative as well. Signed-off-by: Roy Eldar Signed-off-by: Junio C Hamano --- git-submodule.sh | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/git-submodule.sh b/git-submodule.sh index fd54cb8fa6a7ef..3adaa8d9a3fe09 100755 --- a/git-submodule.sh +++ b/git-submodule.sh @@ -53,11 +53,6 @@ jobs= recommend_shallow= filter= -isnumber() -{ - n=$(($1 + 0)) 2>/dev/null && test "$n" = "$1" -} - # # Add a new submodule to the working tree, .gitmodules and the index # @@ -524,17 +519,15 @@ cmd_summary() { for_status="$1" ;; -n|--summary-limit) + case "$2" in '') usage ;; esac summary_limit="$2" - isnumber "$summary_limit" || usage shift ;; -n*) summary_limit="${1#-n}" - isnumber "$summary_limit" || usage ;; --summary-limit=*) summary_limit="${1#--summary-limit=}" - isnumber "$summary_limit" || usage ;; --) shift @@ -554,7 +547,7 @@ cmd_summary() { ${files:+--files} \ ${cached:+--cached} \ ${for_status:+--for-status} \ - ${summary_limit:+-n $summary_limit} \ + ${summary_limit:+-n "$summary_limit"} \ -- \ "$@" } From 402e46daf5ebf96f2cb31bf37e3d1637247688e5 Mon Sep 17 00:00:00 2001 From: Roy Eldar Date: Wed, 11 Dec 2024 08:32:31 +0200 Subject: [PATCH 004/535] git-submodule.sh: get rid of unused variable Remove the variable "$diff_cmd" which is no longer used. Signed-off-by: Roy Eldar Signed-off-by: Junio C Hamano --- git-submodule.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/git-submodule.sh b/git-submodule.sh index 3adaa8d9a3fe09..ba3bef88214797 100755 --- a/git-submodule.sh +++ b/git-submodule.sh @@ -503,7 +503,6 @@ cmd_set_url() { cmd_summary() { summary_limit=-1 for_status= - diff_cmd=diff-index # parse $args after "submodule ... summary". while test $# -ne 0 From 57f9b30fcd7707b3917d96e98dc109fb541cd30b Mon Sep 17 00:00:00 2001 From: Roy Eldar Date: Wed, 11 Dec 2024 08:32:32 +0200 Subject: [PATCH 005/535] git-submodule.sh: add some comments Add a couple of comments in a few functions where they were missing. Signed-off-by: Roy Eldar Signed-off-by: Junio C Hamano --- git-submodule.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/git-submodule.sh b/git-submodule.sh index ba3bef88214797..ee86e4de46f692 100755 --- a/git-submodule.sh +++ b/git-submodule.sh @@ -418,6 +418,7 @@ cmd_set_branch() { default= branch= + # parse $args after "submodule ... set-branch". while test $# -ne 0 do case "$1" in @@ -466,6 +467,7 @@ cmd_set_branch() { # $@ = requested path, requested url # cmd_set_url() { + # parse $args after "submodule ... set-url". while test $# -ne 0 do case "$1" in @@ -604,6 +606,7 @@ cmd_status() # cmd_sync() { + # parse $args after "submodule ... sync". while test $# -ne 0 do case "$1" in From 3ad0ba72274436f4e1eef6bed392e3e875484e2b Mon Sep 17 00:00:00 2001 From: Roy Eldar Date: Wed, 11 Dec 2024 08:32:33 +0200 Subject: [PATCH 006/535] git-submodule.sh: improve variables readability When git-submodule.sh parses various options and switches, it sets some variables to values; the variables in turn affect the options given to git-submodule--helper. Currently, variables which correspond to switches have boolean values (for example, whenever "--force" is passed, force=1), while variables which correspond to options which take arguments have string values that sometimes contain the option name and sometimes only the option value. Set all of the variables to strings which contain the option name (e.g. force="--force" rather than force=1); this has a couple of advantages: it improves consistency, readability and debuggability. Suggested-by: Junio C Hamano Signed-off-by: Roy Eldar Signed-off-by: Junio C Hamano --- git-submodule.sh | 213 +++++++++++++++++++++-------------------------- 1 file changed, 95 insertions(+), 118 deletions(-) diff --git a/git-submodule.sh b/git-submodule.sh index ee86e4de46f692..6df25efc480401 100755 --- a/git-submodule.sh +++ b/git-submodule.sh @@ -52,6 +52,10 @@ single_branch= jobs= recommend_shallow= filter= +deinit_all= +default= +summary_limit= +for_status= # # Add a new submodule to the working tree, .gitmodules and the index @@ -63,37 +67,33 @@ filter= cmd_add() { # parse $args after "submodule ... add". - reference_path= while test $# -ne 0 do case "$1" in -b | --branch) case "$2" in '') usage ;; esac - branch=$2 + branch="--branch=$2" shift ;; - -b*) - branch="${1#-b}" - ;; - --branch=*) - branch="${1#--branch=}" + -b* | --branch=*) + branch="$1" ;; -f | --force) force=$1 ;; -q|--quiet) - quiet=1 + quiet=$1 ;; --progress) - progress=1 + progress=$1 ;; --reference) case "$2" in '') usage ;; esac - reference_path=$2 + reference="--reference=$2" shift ;; --reference=*) - reference_path="${1#--reference=}" + reference="$1" ;; --ref-format) case "$2" in '') usage ;; esac @@ -104,15 +104,15 @@ cmd_add() ref_format="$1" ;; --dissociate) - dissociate=1 + dissociate=$1 ;; --name) case "$2" in '') usage ;; esac - custom_name=$2 + custom_name="--name=$2" shift ;; --name=*) - custom_name="${1#--name=}" + custom_name="$1" ;; --depth) case "$2" in '') usage ;; esac @@ -120,7 +120,7 @@ cmd_add() shift ;; --depth=*) - depth=$1 + depth="$1" ;; --) shift @@ -142,14 +142,14 @@ cmd_add() fi git ${wt_prefix:+-C "$wt_prefix"} submodule--helper add \ - ${quiet:+--quiet} \ - ${force:+--force} \ - ${progress:+"--progress"} \ - ${branch:+--branch "$branch"} \ - ${reference_path:+--reference "$reference_path"} \ + $quiet \ + $force \ + $progress \ + ${branch:+"$branch"} \ + ${reference:+"$reference"} \ ${ref_format:+"$ref_format"} \ - ${dissociate:+--dissociate} \ - ${custom_name:+--name "$custom_name"} \ + $dissociate \ + ${custom_name:+"$custom_name"} \ ${depth:+"$depth"} \ -- \ "$@" @@ -168,10 +168,10 @@ cmd_foreach() do case "$1" in -q|--quiet) - quiet=1 + quiet=$1 ;; --recursive) - recursive=1 + recursive=$1 ;; -*) usage @@ -184,8 +184,8 @@ cmd_foreach() done git ${wt_prefix:+-C "$wt_prefix"} submodule--helper foreach \ - ${quiet:+--quiet} \ - ${recursive:+--recursive} \ + $quiet \ + $recursive \ -- \ "$@" } @@ -202,7 +202,7 @@ cmd_init() do case "$1" in -q|--quiet) - quiet=1 + quiet=$1 ;; --) shift @@ -219,7 +219,7 @@ cmd_init() done git ${wt_prefix:+-C "$wt_prefix"} submodule--helper init \ - ${quiet:+--quiet} \ + $quiet \ -- \ "$@" } @@ -230,7 +230,6 @@ cmd_init() cmd_deinit() { # parse $args after "submodule ... deinit". - deinit_all= while test $# -ne 0 do case "$1" in @@ -238,10 +237,10 @@ cmd_deinit() force=$1 ;; -q|--quiet) - quiet=1 + quiet=$1 ;; --all) - deinit_all=t + deinit_all=$1 ;; --) shift @@ -258,9 +257,9 @@ cmd_deinit() done git ${wt_prefix:+-C "$wt_prefix"} submodule--helper deinit \ - ${quiet:+--quiet} \ - ${force:+--force} \ - ${deinit_all:+--all} \ + $quiet \ + $force \ + $deinit_all \ -- \ "$@" } @@ -277,31 +276,31 @@ cmd_update() do case "$1" in -q|--quiet) - quiet=1 + quiet=$1 ;; -v|--verbose) - quiet=0 + quiet= ;; --progress) - progress=1 + progress=$1 ;; -i|--init) - init=1 + init=$1 ;; --require-init) - require_init=1 + require_init=$1 ;; --remote) - remote=1 + remote=$1 ;; -N|--no-fetch) - nofetch=1 + nofetch=$1 ;; -f|--force) force=$1 ;; -r|--rebase) - rebase=1 + rebase=$1 ;; --ref-format) case "$2" in '') usage ;; esac @@ -320,22 +319,19 @@ cmd_update() reference="$1" ;; --dissociate) - dissociate=1 + dissociate=$1 ;; -m|--merge) - merge=1 + merge=$1 ;; --recursive) - recursive=1 + recursive=$1 ;; --checkout) - checkout=1 - ;; - --recommend-shallow) - recommend_shallow="--recommend-shallow" + checkout=$1 ;; - --no-recommend-shallow) - recommend_shallow="--no-recommend-shallow" + --recommend-shallow|--no-recommend-shallow) + recommend_shallow=$1 ;; --depth) case "$2" in '') usage ;; esac @@ -343,24 +339,18 @@ cmd_update() shift ;; --depth=*) - depth=$1 + depth="$1" ;; -j|--jobs) case "$2" in '') usage ;; esac jobs="--jobs=$2" shift ;; - -j*) - jobs="--jobs=${1#-j}" - ;; - --jobs=*) - jobs=$1 + -j*|--jobs=*) + jobs="$1" ;; - --single-branch) - single_branch="--single-branch" - ;; - --no-single-branch) - single_branch="--no-single-branch" + --single-branch|--no-single-branch) + single_branch=$1 ;; --filter) case "$2" in '') usage ;; esac @@ -385,22 +375,21 @@ cmd_update() done git ${wt_prefix:+-C "$wt_prefix"} submodule--helper update \ - ${quiet:+--quiet} \ - ${force:+--force} \ - ${progress:+"--progress"} \ - ${remote:+--remote} \ - ${recursive:+--recursive} \ - ${init:+--init} \ - ${nofetch:+--no-fetch} \ - ${rebase:+--rebase} \ - ${merge:+--merge} \ - ${checkout:+--checkout} \ + $quiet \ + $force \ + $progress \ + $remote \ + $recursive \ + $init \ + $nofetch \ + $rebase \ + $merge \ + $checkout \ ${ref_format:+"$ref_format"} \ ${reference:+"$reference"} \ - ${dissociate:+"--dissociate"} \ + $dissociate \ ${depth:+"$depth"} \ - ${require_init:+--require-init} \ - ${dissociate:+"--dissociate"} \ + $require_init \ $single_branch \ $recommend_shallow \ $jobs \ @@ -415,9 +404,6 @@ cmd_update() # $@ = requested path # cmd_set_branch() { - default= - branch= - # parse $args after "submodule ... set-branch". while test $# -ne 0 do @@ -426,18 +412,15 @@ cmd_set_branch() { # we don't do anything with this but we need to accept it ;; -d|--default) - default=1 + default=$1 ;; -b|--branch) case "$2" in '') usage ;; esac - branch=$2 + branch="--branch=$2" shift ;; - -b*) - branch="${1#-b}" - ;; - --branch=*) - branch="${1#--branch=}" + -b*|--branch=*) + branch="$1" ;; --) shift @@ -454,9 +437,9 @@ cmd_set_branch() { done git ${wt_prefix:+-C "$wt_prefix"} submodule--helper set-branch \ - ${quiet:+--quiet} \ - ${branch:+--branch "$branch"} \ - ${default:+--default} \ + $quiet \ + ${branch:+"$branch"} \ + $default \ -- \ "$@" } @@ -472,7 +455,7 @@ cmd_set_url() { do case "$1" in -q|--quiet) - quiet=1 + quiet=$1 ;; --) shift @@ -489,7 +472,7 @@ cmd_set_url() { done git ${wt_prefix:+-C "$wt_prefix"} submodule--helper set-url \ - ${quiet:+--quiet} \ + $quiet \ -- \ "$@" } @@ -503,32 +486,26 @@ cmd_set_url() { # $@ = [commit (default 'HEAD'),] requested paths (default all) # cmd_summary() { - summary_limit=-1 - for_status= - # parse $args after "submodule ... summary". while test $# -ne 0 do case "$1" in --cached) - cached=1 + cached=$1 ;; --files) - files="$1" + files=$1 ;; --for-status) - for_status="$1" + for_status=$1 ;; -n|--summary-limit) case "$2" in '') usage ;; esac - summary_limit="$2" + summary_limit="--summary-limit=$2" shift ;; - -n*) - summary_limit="${1#-n}" - ;; - --summary-limit=*) - summary_limit="${1#--summary-limit=}" + -n*|--summary-limit=*) + summary_limit="$1" ;; --) shift @@ -545,10 +522,10 @@ cmd_summary() { done git ${wt_prefix:+-C "$wt_prefix"} submodule--helper summary \ - ${files:+--files} \ - ${cached:+--cached} \ - ${for_status:+--for-status} \ - ${summary_limit:+-n "$summary_limit"} \ + $files \ + $cached \ + $for_status \ + ${summary_limit:+"$summary_limit"} \ -- \ "$@" } @@ -569,13 +546,13 @@ cmd_status() do case "$1" in -q|--quiet) - quiet=1 + quiet=$1 ;; --cached) - cached=1 + cached=$1 ;; --recursive) - recursive=1 + recursive=$1 ;; --) shift @@ -592,9 +569,9 @@ cmd_status() done git ${wt_prefix:+-C "$wt_prefix"} submodule--helper status \ - ${quiet:+--quiet} \ - ${cached:+--cached} \ - ${recursive:+--recursive} \ + $quiet \ + $cached \ + $recursive \ -- \ "$@" } @@ -611,11 +588,11 @@ cmd_sync() do case "$1" in -q|--quiet) - quiet=1 + quiet=$1 shift ;; --recursive) - recursive=1 + recursive=$1 shift ;; --) @@ -632,8 +609,8 @@ cmd_sync() done git ${wt_prefix:+-C "$wt_prefix"} submodule--helper sync \ - ${quiet:+--quiet} \ - ${recursive:+--recursive} \ + $quiet \ + $recursive \ -- \ "$@" } @@ -656,10 +633,10 @@ do command=$1 ;; -q|--quiet) - quiet=1 + quiet=$1 ;; --cached) - cached=1 + cached=$1 ;; --) break From b86f0f9071dd881a14cb9a71d94a66ae3186a2b9 Mon Sep 17 00:00:00 2001 From: Roy Eldar Date: Wed, 11 Dec 2024 08:32:34 +0200 Subject: [PATCH 007/535] git-submodule.sh: rename some variables Every switch and option which is passed to git-submodule.sh has a corresponding variable which is set accordingly; by convention, the name of the variable is the option name (for example, "--jobs" and "$jobs"). Rename "$custom_name", "$deinit_all" and "$nofetch", for consistency. Signed-off-by: Roy Eldar Signed-off-by: Junio C Hamano --- git-submodule.sh | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/git-submodule.sh b/git-submodule.sh index 6df25efc480401..2999b31fad3e97 100755 --- a/git-submodule.sh +++ b/git-submodule.sh @@ -40,11 +40,11 @@ init= require_init= files= remote= -nofetch= +no_fetch= rebase= merge= checkout= -custom_name= +name= depth= progress= dissociate= @@ -52,7 +52,7 @@ single_branch= jobs= recommend_shallow= filter= -deinit_all= +all= default= summary_limit= for_status= @@ -108,11 +108,11 @@ cmd_add() ;; --name) case "$2" in '') usage ;; esac - custom_name="--name=$2" + name="--name=$2" shift ;; --name=*) - custom_name="$1" + name="$1" ;; --depth) case "$2" in '') usage ;; esac @@ -149,7 +149,7 @@ cmd_add() ${reference:+"$reference"} \ ${ref_format:+"$ref_format"} \ $dissociate \ - ${custom_name:+"$custom_name"} \ + ${name:+"$name"} \ ${depth:+"$depth"} \ -- \ "$@" @@ -240,7 +240,7 @@ cmd_deinit() quiet=$1 ;; --all) - deinit_all=$1 + all=$1 ;; --) shift @@ -259,7 +259,7 @@ cmd_deinit() git ${wt_prefix:+-C "$wt_prefix"} submodule--helper deinit \ $quiet \ $force \ - $deinit_all \ + $all \ -- \ "$@" } @@ -294,7 +294,7 @@ cmd_update() remote=$1 ;; -N|--no-fetch) - nofetch=$1 + no_fetch=$1 ;; -f|--force) force=$1 @@ -381,7 +381,7 @@ cmd_update() $remote \ $recursive \ $init \ - $nofetch \ + $no_fetch \ $rebase \ $merge \ $checkout \ From 1f7e6478dcd9e7462c70a5784ae0d41ab25ced11 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 17 Dec 2024 07:43:48 +0100 Subject: [PATCH 008/535] progress: stop using `the_repository` Stop using `the_repository` in the "progress" subsystem by passing in a repository when initializing `struct progress`. Furthermore, store a pointer to the repository in that struct so that we can pass it to the trace2 API when logging information. Adjust callers accordingly by using `the_repository`. While there may be some callers that have a repository available in their context, this trivial conversion allows for easier verification and bubbles up the use of `the_repository` by one level. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/blame.c | 4 +++- builtin/commit-graph.c | 1 + builtin/fsck.c | 12 ++++++++---- builtin/index-pack.c | 7 +++++-- builtin/log.c | 3 ++- builtin/pack-objects.c | 21 ++++++++++++++------- builtin/prune.c | 3 ++- builtin/remote.c | 3 ++- builtin/rev-list.c | 3 ++- builtin/unpack-objects.c | 3 ++- commit-graph.c | 20 +++++++++++++++++--- delta-islands.c | 3 ++- diffcore-rename.c | 1 + entry.c | 4 +++- midx-write.c | 11 ++++++++--- midx.c | 13 +++++++++---- pack-bitmap-write.c | 6 ++++-- pack-bitmap.c | 4 +++- preload-index.c | 4 +++- progress.c | 34 ++++++++++++++++++++-------------- progress.h | 13 +++++++++---- prune-packed.c | 3 ++- pseudo-merge.c | 3 ++- read-cache.c | 3 ++- t/helper/test-progress.c | 6 +++++- unpack-trees.c | 4 +++- walker.c | 3 ++- 27 files changed, 136 insertions(+), 59 deletions(-) diff --git a/builtin/blame.c b/builtin/blame.c index 867032e4c16878..dd78288530f06e 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -1193,7 +1193,9 @@ int cmd_blame(int argc, sb.found_guilty_entry = &found_guilty_entry; sb.found_guilty_entry_data = π if (show_progress) - pi.progress = start_delayed_progress(_("Blaming lines"), num_lines); + pi.progress = start_delayed_progress(the_repository, + _("Blaming lines"), + num_lines); assign_blame(&sb, opt); diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index bd70d052e706b6..8ca75262c59c48 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -305,6 +305,7 @@ static int graph_write(int argc, const char **argv, const char *prefix, oidset_init(&commits, 0); if (opts.progress) progress = start_delayed_progress( + the_repository, _("Collecting commits from input"), 0); while (strbuf_getline(&buf, stdin) != EOF) { diff --git a/builtin/fsck.c b/builtin/fsck.c index 0196c54eb68ee5..7a4dcb0716052f 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -197,7 +197,8 @@ static int traverse_reachable(void) unsigned int nr = 0; int result = 0; if (show_progress) - progress = start_delayed_progress(_("Checking connectivity"), 0); + progress = start_delayed_progress(the_repository, + _("Checking connectivity"), 0); while (pending.nr) { result |= traverse_one_object(object_array_pop(&pending)); display_progress(progress, ++nr); @@ -703,7 +704,8 @@ static void fsck_object_dir(const char *path) fprintf_ln(stderr, _("Checking object directory")); if (show_progress) - progress = start_progress(_("Checking object directories"), 256); + progress = start_progress(the_repository, + _("Checking object directories"), 256); for_each_loose_file_in_objdir(path, fsck_loose, fsck_cruft, fsck_subdir, &cb_data); @@ -879,7 +881,8 @@ static int check_pack_rev_indexes(struct repository *r, int show_progress) if (show_progress) { for (struct packed_git *p = get_all_packs(r); p; p = p->next) pack_count++; - progress = start_delayed_progress("Verifying reverse pack-indexes", pack_count); + progress = start_delayed_progress(the_repository, + "Verifying reverse pack-indexes", pack_count); pack_count = 0; } @@ -989,7 +992,8 @@ int cmd_fsck(int argc, total += p->num_objects; } - progress = start_progress(_("Checking objects"), total); + progress = start_progress(the_repository, + _("Checking objects"), total); } for (p = get_all_packs(the_repository); p; p = p->next) { diff --git a/builtin/index-pack.c b/builtin/index-pack.c index d773809c4c9660..05691104c36b4f 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -282,7 +282,8 @@ static unsigned check_objects(void) max = get_max_object_index(); if (verbose) - progress = start_delayed_progress(_("Checking objects"), max); + progress = start_delayed_progress(the_repository, + _("Checking objects"), max); for (i = 0; i < max; i++) { foreign_nr += check_object(get_indexed_object(i)); @@ -1249,6 +1250,7 @@ static void parse_pack_objects(unsigned char *hash) if (verbose) progress = start_progress( + the_repository, progress_title ? progress_title : from_stdin ? _("Receiving objects") : _("Indexing objects"), nr_objects); @@ -1329,7 +1331,8 @@ static void resolve_deltas(struct pack_idx_option *opts) QSORT(ref_deltas, nr_ref_deltas, compare_ref_delta_entry); if (verbose || show_resolving_progress) - progress = start_progress(_("Resolving deltas"), + progress = start_progress(the_repository, + _("Resolving deltas"), nr_ref_deltas + nr_ofs_deltas); nr_dispatched = 0; diff --git a/builtin/log.c b/builtin/log.c index cb41a035c6ead5..317335e60d4501 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -2495,7 +2495,8 @@ int cmd_format_patch(int argc, rev.add_signoff = cfg.do_signoff; if (show_progress) - progress = start_delayed_progress(_("Generating patches"), total); + progress = start_delayed_progress(the_repository, + _("Generating patches"), total); while (0 <= --nr) { int shown; display_progress(progress, total - nr); diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 1c3b8426515c42..d51c021d99d9f4 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1264,7 +1264,8 @@ static void write_pack_file(void) struct object_entry **write_order; if (progress > pack_to_stdout) - progress_state = start_progress(_("Writing objects"), nr_result); + progress_state = start_progress(the_repository, + _("Writing objects"), nr_result); ALLOC_ARRAY(written_list, to_pack.nr_objects); write_order = compute_write_order(); @@ -2400,7 +2401,8 @@ static void get_object_details(void) struct object_entry **sorted_by_offset; if (progress) - progress_state = start_progress(_("Counting objects"), + progress_state = start_progress(the_repository, + _("Counting objects"), to_pack.nr_objects); CALLOC_ARRAY(sorted_by_offset, to_pack.nr_objects); @@ -3220,7 +3222,8 @@ static void prepare_pack(int window, int depth) unsigned nr_done = 0; if (progress) - progress_state = start_progress(_("Compressing objects"), + progress_state = start_progress(the_repository, + _("Compressing objects"), nr_deltas); QSORT(delta_list, n, type_size_sort); ll_find_deltas(delta_list, n, window+1, depth, &nr_done); @@ -3648,7 +3651,8 @@ static void add_objects_in_unpacked_packs(void); static void enumerate_cruft_objects(void) { if (progress) - progress_state = start_progress(_("Enumerating cruft objects"), 0); + progress_state = start_progress(the_repository, + _("Enumerating cruft objects"), 0); add_objects_in_unpacked_packs(); add_unreachable_loose_objects(); @@ -3674,7 +3678,8 @@ static void enumerate_and_traverse_cruft_objects(struct string_list *fresh_packs revs.ignore_missing_links = 1; if (progress) - progress_state = start_progress(_("Enumerating cruft objects"), 0); + progress_state = start_progress(the_repository, + _("Enumerating cruft objects"), 0); ret = add_unseen_recent_objects_to_traversal(&revs, cruft_expiration, set_cruft_mtime, 1); stop_progress(&progress_state); @@ -3693,7 +3698,8 @@ static void enumerate_and_traverse_cruft_objects(struct string_list *fresh_packs if (prepare_revision_walk(&revs)) die(_("revision walk setup failed")); if (progress) - progress_state = start_progress(_("Traversing cruft objects"), 0); + progress_state = start_progress(the_repository, + _("Traversing cruft objects"), 0); nr_seen = 0; traverse_commit_list(&revs, show_cruft_commit, show_cruft_object, NULL); @@ -4625,7 +4631,8 @@ int cmd_pack_objects(int argc, prepare_packing_data(the_repository, &to_pack); if (progress && !cruft) - progress_state = start_progress(_("Enumerating objects"), 0); + progress_state = start_progress(the_repository, + _("Enumerating objects"), 0); if (stdin_packs) { /* avoids adding objects in excluded packs */ ignore_packed_keep_in_core = 1; diff --git a/builtin/prune.c b/builtin/prune.c index aeff9ca1b35c3f..1c357fffd8cde6 100644 --- a/builtin/prune.c +++ b/builtin/prune.c @@ -64,7 +64,8 @@ static void perform_reachability_traversal(struct rev_info *revs) return; if (show_progress) - progress = start_delayed_progress(_("Checking connectivity"), 0); + progress = start_delayed_progress(the_repository, + _("Checking connectivity"), 0); mark_reachable_objects(revs, 1, expire, progress); stop_progress(&progress); initialized = 1; diff --git a/builtin/remote.c b/builtin/remote.c index b2b13a7dd2acd4..95440bc9ff284e 100644 --- a/builtin/remote.c +++ b/builtin/remote.c @@ -820,7 +820,8 @@ static int mv(int argc, const char **argv, const char *prefix, * Count symrefs twice, since "renaming" them is done by * deleting and recreating them in two separate passes. */ - progress = start_progress(_("Renaming remote references"), + progress = start_progress(the_repository, + _("Renaming remote references"), rename.remote_branches->nr + rename.symrefs_nr); } for (i = 0; i < remote_branches.nr; i++) { diff --git a/builtin/rev-list.c b/builtin/rev-list.c index 3196da7b2d23e3..8a7db9b546196c 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -735,7 +735,8 @@ int cmd_rev_list(int argc, revs.limited = 1; if (show_progress) - progress = start_delayed_progress(show_progress, 0); + progress = start_delayed_progress(the_repository, + show_progress, 0); if (use_bitmap_index) { if (!try_bitmap_count(&revs, filter_provided_objects)) diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 2197d6d9332117..842a90353a50b6 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -590,7 +590,8 @@ static void unpack_all(void) use(sizeof(struct pack_header)); if (!quiet) - progress = start_progress(_("Unpacking objects"), nr_objects); + progress = start_progress(the_repository, + _("Unpacking objects"), nr_objects); CALLOC_ARRAY(obj_list, nr_objects); begin_odb_transaction(); for (i = 0; i < nr_objects; i++) { diff --git a/commit-graph.c b/commit-graph.c index 0df66e5a243390..2a2999a6b88690 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1534,6 +1534,7 @@ static void close_reachable(struct write_commit_graph_context *ctx) if (ctx->report_progress) ctx->progress = start_delayed_progress( + the_repository, _("Loading known commits in commit graph"), ctx->oids.nr); for (i = 0; i < ctx->oids.nr; i++) { @@ -1551,6 +1552,7 @@ static void close_reachable(struct write_commit_graph_context *ctx) */ if (ctx->report_progress) ctx->progress = start_delayed_progress( + the_repository, _("Expanding reachable commits in commit graph"), 0); for (i = 0; i < ctx->oids.nr; i++) { @@ -1571,6 +1573,7 @@ static void close_reachable(struct write_commit_graph_context *ctx) if (ctx->report_progress) ctx->progress = start_delayed_progress( + the_repository, _("Clearing commit marks in commit graph"), ctx->oids.nr); for (i = 0; i < ctx->oids.nr; i++) { @@ -1688,6 +1691,7 @@ static void compute_topological_levels(struct write_commit_graph_context *ctx) if (ctx->report_progress) info.progress = ctx->progress = start_delayed_progress( + the_repository, _("Computing commit graph topological levels"), ctx->commits.nr); @@ -1722,6 +1726,7 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx) if (ctx->report_progress) info.progress = ctx->progress = start_delayed_progress( + the_repository, _("Computing commit graph generation numbers"), ctx->commits.nr); @@ -1798,6 +1803,7 @@ static void compute_bloom_filters(struct write_commit_graph_context *ctx) if (ctx->report_progress) progress = start_delayed_progress( + the_repository, _("Computing commit changed paths Bloom filters"), ctx->commits.nr); @@ -1877,6 +1883,7 @@ int write_commit_graph_reachable(struct object_directory *odb, data.commits = &commits; if (flags & COMMIT_GRAPH_WRITE_PROGRESS) data.progress = start_delayed_progress( + the_repository, _("Collecting referenced commits"), 0); refs_for_each_ref(get_main_ref_store(the_repository), add_ref_to_set, @@ -1908,7 +1915,8 @@ static int fill_oids_from_packs(struct write_commit_graph_context *ctx, "Finding commits for commit graph in %"PRIuMAX" packs", pack_indexes->nr), (uintmax_t)pack_indexes->nr); - ctx->progress = start_delayed_progress(progress_title.buf, 0); + ctx->progress = start_delayed_progress(the_repository, + progress_title.buf, 0); ctx->progress_done = 0; } for (i = 0; i < pack_indexes->nr; i++) { @@ -1959,6 +1967,7 @@ static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx) { if (ctx->report_progress) ctx->progress = start_delayed_progress( + the_repository, _("Finding commits for commit graph among packed objects"), ctx->approx_nr_objects); for_each_packed_object(ctx->r, add_packed_commits, ctx, @@ -1977,6 +1986,7 @@ static void copy_oids_to_commits(struct write_commit_graph_context *ctx) ctx->num_extra_edges = 0; if (ctx->report_progress) ctx->progress = start_delayed_progress( + the_repository, _("Finding extra edges in commit graph"), ctx->oids.nr); oid_array_sort(&ctx->oids); @@ -2136,6 +2146,7 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) get_num_chunks(cf)), get_num_chunks(cf)); ctx->progress = start_delayed_progress( + the_repository, progress_title.buf, st_mult(get_num_chunks(cf), ctx->commits.nr)); } @@ -2348,6 +2359,7 @@ static void sort_and_scan_merged_commits(struct write_commit_graph_context *ctx) if (ctx->report_progress) ctx->progress = start_delayed_progress( + the_repository, _("Scanning merged commits"), ctx->commits.nr); @@ -2392,7 +2404,8 @@ static void merge_commit_graphs(struct write_commit_graph_context *ctx) current_graph_number--; if (ctx->report_progress) - ctx->progress = start_delayed_progress(_("Merging commit-graph"), 0); + ctx->progress = start_delayed_progress(the_repository, + _("Merging commit-graph"), 0); merge_commit_graph(ctx, g); stop_progress(&ctx->progress); @@ -2874,7 +2887,8 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags) if (!(flags & COMMIT_GRAPH_VERIFY_SHALLOW)) total += g->num_commits_in_base; - progress = start_progress(_("Verifying commits in commit graph"), + progress = start_progress(the_repository, + _("Verifying commits in commit graph"), total); } diff --git a/delta-islands.c b/delta-islands.c index 1c465a60419145..3aec43fada36f7 100644 --- a/delta-islands.c +++ b/delta-islands.c @@ -267,7 +267,8 @@ void resolve_tree_islands(struct repository *r, QSORT(todo, nr, tree_depth_compare); if (progress) - progress_state = start_progress(_("Propagating island marks"), nr); + progress_state = start_progress(the_repository, + _("Propagating island marks"), nr); for (i = 0; i < nr; i++) { struct object_entry *ent = todo[i].entry; diff --git a/diffcore-rename.c b/diffcore-rename.c index 10bb0321b10d58..91b77993c7827f 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -1567,6 +1567,7 @@ void diffcore_rename_extended(struct diff_options *options, trace2_region_enter("diff", "inexact renames", options->repo); if (options->show_rename_progress) { progress = start_delayed_progress( + the_repository, _("Performing inexact rename detection"), (uint64_t)num_destinations * (uint64_t)num_sources); } diff --git a/entry.c b/entry.c index 53a1c393582b2e..81b321e53d1b96 100644 --- a/entry.c +++ b/entry.c @@ -188,7 +188,9 @@ int finish_delayed_checkout(struct checkout *state, int show_progress) dco->state = CE_RETRY; if (show_progress) - progress = start_delayed_progress(_("Filtering content"), dco->paths.nr); + progress = start_delayed_progress(the_repository, + _("Filtering content"), + dco->paths.nr); while (dco->filters.nr > 0) { for_each_string_list_item(filter, &dco->filters) { struct string_list available_paths = STRING_LIST_INIT_DUP; diff --git a/midx-write.c b/midx-write.c index 0066594fa6310b..b3827b936bdb1d 100644 --- a/midx-write.c +++ b/midx-write.c @@ -1131,7 +1131,8 @@ static int write_midx_internal(struct repository *r, const char *object_dir, ctx.pack_paths_checked = 0; if (flags & MIDX_PROGRESS) - ctx.progress = start_delayed_progress(_("Adding packfiles to multi-pack-index"), 0); + ctx.progress = start_delayed_progress(r, + _("Adding packfiles to multi-pack-index"), 0); else ctx.progress = NULL; @@ -1539,7 +1540,9 @@ int expire_midx_packs(struct repository *r, const char *object_dir, unsigned fla CALLOC_ARRAY(count, m->num_packs); if (flags & MIDX_PROGRESS) - progress = start_delayed_progress(_("Counting referenced objects"), + progress = start_delayed_progress( + r, + _("Counting referenced objects"), m->num_objects); for (i = 0; i < m->num_objects; i++) { int pack_int_id = nth_midxed_pack_int_id(m, i); @@ -1549,7 +1552,9 @@ int expire_midx_packs(struct repository *r, const char *object_dir, unsigned fla stop_progress(&progress); if (flags & MIDX_PROGRESS) - progress = start_delayed_progress(_("Finding and deleting unreferenced packfiles"), + progress = start_delayed_progress( + r, + _("Finding and deleting unreferenced packfiles"), m->num_packs); for (i = 0; i < m->num_packs; i++) { char *pack_name; diff --git a/midx.c b/midx.c index f8a75cafd4efea..d91088efb87ca0 100644 --- a/midx.c +++ b/midx.c @@ -907,7 +907,8 @@ int verify_midx_file(struct repository *r, const char *object_dir, unsigned flag midx_report(_("incorrect checksum")); if (flags & MIDX_PROGRESS) - progress = start_delayed_progress(_("Looking for referenced packfiles"), + progress = start_delayed_progress(r, + _("Looking for referenced packfiles"), m->num_packs + m->num_packs_in_base); for (i = 0; i < m->num_packs + m->num_packs_in_base; i++) { if (prepare_midx_pack(r, m, i)) @@ -927,7 +928,8 @@ int verify_midx_file(struct repository *r, const char *object_dir, unsigned flag } if (flags & MIDX_PROGRESS) - progress = start_sparse_progress(_("Verifying OID order in multi-pack-index"), + progress = start_sparse_progress(r, + _("Verifying OID order in multi-pack-index"), m->num_objects - 1); for (curr = m; curr; curr = curr->base_midx) { @@ -959,14 +961,17 @@ int verify_midx_file(struct repository *r, const char *object_dir, unsigned flag } if (flags & MIDX_PROGRESS) - progress = start_sparse_progress(_("Sorting objects by packfile"), + progress = start_sparse_progress(r, + _("Sorting objects by packfile"), m->num_objects); display_progress(progress, 0); /* TODO: Measure QSORT() progress */ QSORT(pairs, m->num_objects, compare_pair_pos_vs_id); stop_progress(&progress); if (flags & MIDX_PROGRESS) - progress = start_sparse_progress(_("Verifying object offsets"), m->num_objects); + progress = start_sparse_progress(r, + _("Verifying object offsets"), + m->num_objects); for (i = 0; i < m->num_objects + m->num_objects_in_base; i++) { struct object_id oid; struct pack_entry e; diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index 4f8be53c2bd75f..a06a1f35c619b3 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -590,7 +590,8 @@ int bitmap_writer_build(struct bitmap_writer *writer) int closed = 1; /* until proven otherwise */ if (writer->show_progress) - writer->progress = start_progress("Building bitmaps", + writer->progress = start_progress(the_repository, + "Building bitmaps", writer->selected_nr); trace2_region_enter("pack-bitmap-write", "building_bitmaps_total", the_repository); @@ -710,7 +711,8 @@ void bitmap_writer_select_commits(struct bitmap_writer *writer, } if (writer->show_progress) - writer->progress = start_progress("Selecting bitmap commits", 0); + writer->progress = start_progress(the_repository, + "Selecting bitmap commits", 0); for (;;) { struct commit *chosen = NULL; diff --git a/pack-bitmap.c b/pack-bitmap.c index bceb6da042772d..48e3b99efb2f55 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -2578,7 +2578,9 @@ void test_bitmap_walk(struct rev_info *revs) tdata.trees = ewah_to_bitmap(bitmap_git->trees); tdata.blobs = ewah_to_bitmap(bitmap_git->blobs); tdata.tags = ewah_to_bitmap(bitmap_git->tags); - tdata.prg = start_progress("Verifying bitmap entries", result_popcnt); + tdata.prg = start_progress(revs->repo, + "Verifying bitmap entries", + result_popcnt); tdata.seen = 0; traverse_commit_list(revs, &test_show_commit, &test_show_object, &tdata); diff --git a/preload-index.c b/preload-index.c index ab94d6f39967ea..40ab2abafb8de5 100644 --- a/preload-index.c +++ b/preload-index.c @@ -132,7 +132,9 @@ void preload_index(struct index_state *index, memset(&pd, 0, sizeof(pd)); if (refresh_flags & REFRESH_PROGRESS && isatty(2)) { - pd.progress = start_delayed_progress(_("Refreshing index"), index->cache_nr); + pd.progress = start_delayed_progress(the_repository, + _("Refreshing index"), + index->cache_nr); pthread_mutex_init(&pd.mutex, NULL); } diff --git a/progress.c b/progress.c index a8fdfceb5cd128..8d5ae70f3a9ec7 100644 --- a/progress.c +++ b/progress.c @@ -9,7 +9,6 @@ */ #define GIT_TEST_PROGRESS_ONLY -#define USE_THE_REPOSITORY_VARIABLE #define DISABLE_SIGN_COMPARE_WARNINGS #include "git-compat-util.h" @@ -37,6 +36,7 @@ struct throughput { }; struct progress { + struct repository *repo; const char *title; uint64_t last_value; uint64_t total; @@ -254,10 +254,12 @@ void display_progress(struct progress *progress, uint64_t n) display(progress, n, NULL); } -static struct progress *start_progress_delay(const char *title, uint64_t total, +static struct progress *start_progress_delay(struct repository *r, + const char *title, uint64_t total, unsigned delay, unsigned sparse) { struct progress *progress = xmalloc(sizeof(*progress)); + progress->repo = r; progress->title = title; progress->total = total; progress->last_value = -1; @@ -270,7 +272,7 @@ static struct progress *start_progress_delay(const char *title, uint64_t total, progress->title_len = utf8_strwidth(title); progress->split = 0; set_progress_signal(); - trace2_region_enter("progress", title, the_repository); + trace2_region_enter("progress", title, r); return progress; } @@ -284,14 +286,16 @@ static int get_default_delay(void) return delay_in_secs; } -struct progress *start_delayed_progress(const char *title, uint64_t total) +struct progress *start_delayed_progress(struct repository *r, + const char *title, uint64_t total) { - return start_progress_delay(title, total, get_default_delay(), 0); + return start_progress_delay(r, title, total, get_default_delay(), 0); } -struct progress *start_progress(const char *title, uint64_t total) +struct progress *start_progress(struct repository *r, + const char *title, uint64_t total) { - return start_progress_delay(title, total, 0, 0); + return start_progress_delay(r, title, total, 0, 0); } /* @@ -303,15 +307,17 @@ struct progress *start_progress(const char *title, uint64_t total) * When "sparse" is set, stop_progress() will automatically force the done * message to show 100%. */ -struct progress *start_sparse_progress(const char *title, uint64_t total) +struct progress *start_sparse_progress(struct repository *r, + const char *title, uint64_t total) { - return start_progress_delay(title, total, 0, 1); + return start_progress_delay(r, title, total, 0, 1); } -struct progress *start_delayed_sparse_progress(const char *title, +struct progress *start_delayed_sparse_progress(struct repository *r, + const char *title, uint64_t total) { - return start_progress_delay(title, total, get_default_delay(), 1); + return start_progress_delay(r, title, total, get_default_delay(), 1); } static void finish_if_sparse(struct progress *progress) @@ -341,14 +347,14 @@ static void force_last_update(struct progress *progress, const char *msg) static void log_trace2(struct progress *progress) { - trace2_data_intmax("progress", the_repository, "total_objects", + trace2_data_intmax("progress", progress->repo, "total_objects", progress->total); if (progress->throughput) - trace2_data_intmax("progress", the_repository, "total_bytes", + trace2_data_intmax("progress", progress->repo, "total_bytes", progress->throughput->curr_total); - trace2_region_leave("progress", progress->title, the_repository); + trace2_region_leave("progress", progress->title, progress->repo); } void stop_progress_msg(struct progress **p_progress, const char *msg) diff --git a/progress.h b/progress.h index 3a945637c81c22..ed068c7bab845b 100644 --- a/progress.h +++ b/progress.h @@ -3,6 +3,7 @@ #include "gettext.h" struct progress; +struct repository; #ifdef GIT_TEST_PROGRESS_ONLY @@ -14,10 +15,14 @@ void progress_test_force_update(void); void display_throughput(struct progress *progress, uint64_t total); void display_progress(struct progress *progress, uint64_t n); -struct progress *start_progress(const char *title, uint64_t total); -struct progress *start_sparse_progress(const char *title, uint64_t total); -struct progress *start_delayed_progress(const char *title, uint64_t total); -struct progress *start_delayed_sparse_progress(const char *title, +struct progress *start_progress(struct repository *r, + const char *title, uint64_t total); +struct progress *start_sparse_progress(struct repository *r, + const char *title, uint64_t total); +struct progress *start_delayed_progress(struct repository *r, + const char *title, uint64_t total); +struct progress *start_delayed_sparse_progress(struct repository *r, + const char *title, uint64_t total); void stop_progress_msg(struct progress **p_progress, const char *msg); static inline void stop_progress(struct progress **p_progress) diff --git a/prune-packed.c b/prune-packed.c index d1c65ab10ed600..7dad2fc0c169cf 100644 --- a/prune-packed.c +++ b/prune-packed.c @@ -37,7 +37,8 @@ static int prune_object(const struct object_id *oid, const char *path, void prune_packed_objects(int opts) { if (opts & PRUNE_PACKED_VERBOSE) - progress = start_delayed_progress(_("Removing duplicate objects"), 256); + progress = start_delayed_progress(the_repository, + _("Removing duplicate objects"), 256); for_each_loose_file_in_objdir(repo_get_object_directory(the_repository), prune_object, NULL, prune_subdir, &opts); diff --git a/pseudo-merge.c b/pseudo-merge.c index 971f54cfe1a895..893b763fe45490 100644 --- a/pseudo-merge.c +++ b/pseudo-merge.c @@ -459,7 +459,8 @@ void select_pseudo_merges(struct bitmap_writer *writer) return; if (writer->show_progress) - progress = start_progress("Selecting pseudo-merge commits", + progress = start_progress(the_repository, + "Selecting pseudo-merge commits", writer->pseudo_merge_groups.nr); refs_for_each_ref(get_main_ref_store(the_repository), diff --git a/read-cache.c b/read-cache.c index 15d79839c20517..38c36caa7fef4d 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1523,7 +1523,8 @@ int refresh_index(struct index_state *istate, unsigned int flags, int t2_sum_scan = 0; if (flags & REFRESH_PROGRESS && isatty(2)) - progress = start_delayed_progress(_("Refresh index"), + progress = start_delayed_progress(the_repository, + _("Refresh index"), istate->cache_nr); trace_performance_enter(); diff --git a/t/helper/test-progress.c b/t/helper/test-progress.c index 44be2645e9c7cb..1f75b7bd199aff 100644 --- a/t/helper/test-progress.c +++ b/t/helper/test-progress.c @@ -17,10 +17,14 @@ * * See 't0500-progress-display.sh' for examples. */ + +#define USE_THE_REPOSITORY_VARIABLE #define GIT_TEST_PROGRESS_ONLY + #include "test-tool.h" #include "parse-options.h" #include "progress.h" +#include "repository.h" #include "strbuf.h" #include "string-list.h" @@ -64,7 +68,7 @@ int cmd__progress(int argc, const char **argv) else die("invalid input: '%s'", line.buf); - progress = start_progress(title, total); + progress = start_progress(the_repository, title, total); } else if (skip_prefix(line.buf, "progress ", (const char **) &end)) { uint64_t item_count = strtoull(end, &end, 10); if (*end != '\0') diff --git a/unpack-trees.c b/unpack-trees.c index b3be5d542f5fc5..334cb84f6531b5 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -372,7 +372,8 @@ static struct progress *get_progress(struct unpack_trees_options *o, total++; } - return start_delayed_progress(_("Updating files"), total); + return start_delayed_progress(the_repository, + _("Updating files"), total); } static void setup_collided_checkout_detection(struct checkout *state, @@ -1773,6 +1774,7 @@ static int clear_ce_flags(struct index_state *istate, strbuf_reset(&prefix); if (show_progress) istate->progress = start_delayed_progress( + the_repository, _("Updating index flags"), istate->cache_nr); diff --git a/walker.c b/walker.c index 7cc9dbea46d64d..1cf3da02193531 100644 --- a/walker.c +++ b/walker.c @@ -172,7 +172,8 @@ static int loop(struct walker *walker) uint64_t nr = 0; if (walker->get_progress) - progress = start_delayed_progress(_("Fetching objects"), 0); + progress = start_delayed_progress(the_repository, + _("Fetching objects"), 0); while (process_queue) { struct object *obj = process_queue->item; From 59b6131a677b0b45fb7fd54ba60dbf689b0a1797 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 17 Dec 2024 07:43:49 +0100 Subject: [PATCH 009/535] pager: stop using `the_repository` Stop using `the_repository` in the "pager" subsystem by passing in a repository when setting up the pager and when configuring it. Adjust callers accordingly by using `the_repository`. While there may be some callers that have a repository available in their context, this trivial conversion allows for easier verification and bubbles up the use of `the_repository` by one level. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- add-patch.c | 2 +- builtin/am.c | 4 ++-- builtin/blame.c | 2 +- builtin/grep.c | 4 ++-- builtin/help.c | 4 ++-- builtin/log.c | 4 ++-- builtin/var.c | 2 +- diff.c | 4 ++-- git.c | 8 ++++---- pager.c | 14 ++++++-------- pager.h | 7 ++++--- 11 files changed, 27 insertions(+), 28 deletions(-) diff --git a/add-patch.c b/add-patch.c index 7b598e14df0164..95c67d8c80c4f3 100644 --- a/add-patch.c +++ b/add-patch.c @@ -1464,7 +1464,7 @@ static int patch_update_file(struct add_p_state *s, if (file_diff->hunk_nr) { if (rendered_hunk_index != hunk_index) { if (use_pager) { - setup_pager(); + setup_pager(the_repository); sigchain_push(SIGPIPE, SIG_IGN); } render_hunk(s, hunk, 0, colored, &s->buf); diff --git a/builtin/am.c b/builtin/am.c index 1338b606febdde..27ccca8341feef 100644 --- a/builtin/am.c +++ b/builtin/am.c @@ -1786,7 +1786,7 @@ static int do_interactive(struct am_state *state) } strbuf_release(&msg); } else if (*reply == 'v' || *reply == 'V') { - const char *pager = git_pager(1); + const char *pager = git_pager(the_repository, 1); struct child_process cp = CHILD_PROCESS_INIT; if (!pager) @@ -2246,7 +2246,7 @@ static int show_patch(struct am_state *state, enum resume_type resume_mode) if (len < 0) die_errno(_("failed to read '%s'"), patch_path); - setup_pager(); + setup_pager(the_repository); write_in_full(1, sb.buf, sb.len); strbuf_release(&sb); return 0; diff --git a/builtin/blame.c b/builtin/blame.c index dd78288530f06e..1f44c341c50ecd 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -1202,7 +1202,7 @@ int cmd_blame(int argc, stop_progress(&pi.progress); if (!incremental) - setup_pager(); + setup_pager(the_repository); else goto cleanup; diff --git a/builtin/grep.c b/builtin/grep.c index d00ee76f24cfde..d1427290f773b6 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1084,7 +1084,7 @@ int cmd_grep(int argc, } if (show_in_pager == default_pager) - show_in_pager = git_pager(1); + show_in_pager = git_pager(the_repository, 1); if (show_in_pager) { opt.color = 0; opt.name_only = 1; @@ -1246,7 +1246,7 @@ int cmd_grep(int argc, } if (!show_in_pager && !opt.status_only) - setup_pager(); + setup_pager(the_repository); die_for_incompatible_opt3(!use_index, "--no-index", untracked, "--untracked", diff --git a/builtin/help.c b/builtin/help.c index 05136279cf7b10..c257079cebc3c0 100644 --- a/builtin/help.c +++ b/builtin/help.c @@ -658,7 +658,7 @@ int cmd_help(int argc, case HELP_ACTION_ALL: opt_mode_usage(argc, "--all", help_format); if (verbose) { - setup_pager(); + setup_pager(the_repository); list_all_cmds_help(show_external_commands, show_aliases); return 0; @@ -692,7 +692,7 @@ int cmd_help(int argc, return 0; case HELP_ACTION_CONFIG: opt_mode_usage(argc, "--config", help_format); - setup_pager(); + setup_pager(the_repository); list_config_help(SHOW_CONFIG_HUMAN); printf("\n%s\n", _("'git help config' for more information")); return 0; diff --git a/builtin/log.c b/builtin/log.c index 317335e60d4501..3a6a3362f3c36e 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -369,7 +369,7 @@ static void cmd_log_init_finish(int argc, const char **argv, const char *prefix, if (rev->line_level_traverse) line_log_init(rev, line_cb.prefix, &line_cb.args); - setup_pager(); + setup_pager(the_repository); } static void cmd_log_init(int argc, const char **argv, const char *prefix, @@ -2292,7 +2292,7 @@ int cmd_format_patch(int argc, rev.commit_format = CMIT_FMT_MBOXRD; if (use_stdout) { - setup_pager(); + setup_pager(the_repository); } else if (!rev.diffopt.close_file) { int saved; diff --git a/builtin/var.c b/builtin/var.c index 1449656cc924f8..50d024de66604a 100644 --- a/builtin/var.c +++ b/builtin/var.c @@ -42,7 +42,7 @@ static char *sequence_editor(int ident_flag UNUSED) static char *pager(int ident_flag UNUSED) { - const char *pgm = git_pager(1); + const char *pgm = git_pager(the_repository, 1); if (!pgm) pgm = "cat"; diff --git a/diff.c b/diff.c index d28b4114c8dffb..0822ae443361f8 100644 --- a/diff.c +++ b/diff.c @@ -7386,6 +7386,6 @@ void setup_diff_pager(struct diff_options *opt) * --exit-code" in hooks and other scripts, we do not do so. */ if (!opt->flags.exit_with_status && - check_pager_config("diff") != 0) - setup_pager(); + check_pager_config(the_repository, "diff") != 0) + setup_pager(the_repository); } diff --git a/git.c b/git.c index 71d644dc1c5990..d977ebc84cfba6 100644 --- a/git.c +++ b/git.c @@ -125,7 +125,7 @@ static void commit_pager_choice(void) setenv("GIT_PAGER", "cat", 1); break; case 1: - setup_pager(); + setup_pager(the_repository); break; default: break; @@ -136,7 +136,7 @@ void setup_auto_pager(const char *cmd, int def) { if (use_pager != -1 || pager_in_use()) return; - use_pager = check_pager_config(cmd); + use_pager = check_pager_config(the_repository, cmd); if (use_pager == -1) use_pager = def; commit_pager_choice(); @@ -462,7 +462,7 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv, struct precompose_argv_prefix(argc, argv, NULL); if (use_pager == -1 && run_setup && !(p->option & DELAY_PAGER_CONFIG)) - use_pager = check_pager_config(p->cmd); + use_pager = check_pager_config(the_repository, p->cmd); if (use_pager == -1 && p->option & USE_PAGER) use_pager = 1; if (run_setup && startup_info->have_repository) @@ -750,7 +750,7 @@ static void execv_dashed_external(const char **argv) int status; if (use_pager == -1 && !is_builtin(argv[0])) - use_pager = check_pager_config(argv[0]); + use_pager = check_pager_config(the_repository, argv[0]); commit_pager_choice(); strvec_pushf(&cmd.args, "git-%s", argv[0]); diff --git a/pager.c b/pager.c index 40b664f893c8ec..5531fff50eb73f 100644 --- a/pager.c +++ b/pager.c @@ -1,5 +1,3 @@ -#define USE_THE_REPOSITORY_VARIABLE - #include "git-compat-util.h" #include "config.h" #include "editor.h" @@ -84,7 +82,7 @@ static int core_pager_config(const char *var, const char *value, return 0; } -const char *git_pager(int stdout_is_tty) +const char *git_pager(struct repository *r, int stdout_is_tty) { const char *pager; @@ -94,7 +92,7 @@ const char *git_pager(int stdout_is_tty) pager = getenv("GIT_PAGER"); if (!pager) { if (!pager_program) - read_early_config(the_repository, + read_early_config(r, core_pager_config, NULL); pager = pager_program; } @@ -143,10 +141,10 @@ void prepare_pager_args(struct child_process *pager_process, const char *pager) pager_process->trace2_child_class = "pager"; } -void setup_pager(void) +void setup_pager(struct repository *r) { static int once = 0; - const char *pager = git_pager(isatty(1)); + const char *pager = git_pager(r, isatty(1)); if (!pager) return; @@ -293,7 +291,7 @@ static int pager_command_config(const char *var, const char *value, } /* returns 0 for "no pager", 1 for "use pager", and -1 for "not specified" */ -int check_pager_config(const char *cmd) +int check_pager_config(struct repository *r, const char *cmd) { struct pager_command_config_data data; @@ -301,7 +299,7 @@ int check_pager_config(const char *cmd) data.want = -1; data.value = NULL; - read_early_config(the_repository, pager_command_config, &data); + read_early_config(r, pager_command_config, &data); if (data.value) pager_program = data.value; diff --git a/pager.h b/pager.h index 103ecac476fdd6..d070be63489716 100644 --- a/pager.h +++ b/pager.h @@ -2,15 +2,16 @@ #define PAGER_H struct child_process; +struct repository; -const char *git_pager(int stdout_is_tty); -void setup_pager(void); +const char *git_pager(struct repository *r, int stdout_is_tty); +void setup_pager(struct repository *r); void wait_for_pager(void); int pager_in_use(void); int term_columns(void); void term_clear_line(void); int decimal_width(uintmax_t); -int check_pager_config(const char *cmd); +int check_pager_config(struct repository *r, const char *cmd); void prepare_pager_args(struct child_process *, const char *pager); extern int pager_use_color; From bd0c0fb790dcb6cd32a585e1d9fcb0a4a4f7379b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 17 Dec 2024 07:43:50 +0100 Subject: [PATCH 010/535] trace: stop using `the_repository` Stop using `the_repository` in the "trace" subsystem by passing in a repository when setting up tracing. Adjust the only caller accordingly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- git.c | 2 +- trace.c | 9 ++++----- trace.h | 4 +++- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/git.c b/git.c index d977ebc84cfba6..a94dab3770251f 100644 --- a/git.c +++ b/git.c @@ -467,7 +467,7 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv, struct use_pager = 1; if (run_setup && startup_info->have_repository) /* get_git_dir() may set up repo, avoid that */ - trace_repo_setup(); + trace_repo_setup(the_repository); commit_pager_choice(); if (!help && p->option & NEED_WORK_TREE) diff --git a/trace.c b/trace.c index 2cfd25942ee725..9b99460db82a28 100644 --- a/trace.c +++ b/trace.c @@ -21,7 +21,6 @@ * along with this program; if not, see . */ -#define USE_THE_REPOSITORY_VARIABLE #define DISABLE_SIGN_COMPARE_WARNINGS #include "git-compat-util.h" @@ -298,7 +297,7 @@ static const char *quote_crnl(const char *path) return new_path.buf; } -void trace_repo_setup(void) +void trace_repo_setup(struct repository *r) { const char *git_work_tree, *prefix = startup_info->prefix; char *cwd; @@ -308,14 +307,14 @@ void trace_repo_setup(void) cwd = xgetcwd(); - if (!(git_work_tree = repo_get_work_tree(the_repository))) + if (!(git_work_tree = repo_get_work_tree(r))) git_work_tree = "(null)"; if (!startup_info->prefix) prefix = "(null)"; - trace_printf_key(&trace_setup_key, "setup: git_dir: %s\n", quote_crnl(repo_get_git_dir(the_repository))); - trace_printf_key(&trace_setup_key, "setup: git_common_dir: %s\n", quote_crnl(repo_get_common_dir(the_repository))); + trace_printf_key(&trace_setup_key, "setup: git_dir: %s\n", quote_crnl(repo_get_git_dir(r))); + trace_printf_key(&trace_setup_key, "setup: git_common_dir: %s\n", quote_crnl(repo_get_common_dir(r))); trace_printf_key(&trace_setup_key, "setup: worktree: %s\n", quote_crnl(git_work_tree)); trace_printf_key(&trace_setup_key, "setup: cwd: %s\n", quote_crnl(cwd)); trace_printf_key(&trace_setup_key, "setup: prefix: %s\n", quote_crnl(prefix)); diff --git a/trace.h b/trace.h index d304d55aa1d706..9152fe9b3e565e 100644 --- a/trace.h +++ b/trace.h @@ -92,7 +92,9 @@ extern struct trace_key trace_default_key; extern struct trace_key trace_perf_key; extern struct trace_key trace_setup_key; -void trace_repo_setup(void); +struct repository; + +void trace_repo_setup(struct repository *r); /** * Checks whether the trace key is enabled. Used to prevent expensive From 395b584b5751b009d657d8c3aed371f2a233d919 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 17 Dec 2024 07:43:51 +0100 Subject: [PATCH 011/535] serve: stop using `the_repository` Stop using `the_repository` in the "serve" subsystem by passing in a repository when advertising capabilities or serving requests. Adjust callers accordingly by using `the_repository`. While there may be some callers that have a repository available in their context, this trivial conversion allows for easier verification and bubbles up the use of `the_repository` by one level. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/upload-pack.c | 6 ++++-- serve.c | 36 +++++++++++++++++------------------- serve.h | 6 ++++-- t/helper/test-serve-v2.c | 7 +++++-- 4 files changed, 30 insertions(+), 25 deletions(-) diff --git a/builtin/upload-pack.c b/builtin/upload-pack.c index dd63d6eadfe002..c2bbc035ab0c91 100644 --- a/builtin/upload-pack.c +++ b/builtin/upload-pack.c @@ -1,3 +1,5 @@ +#define USE_THE_REPOSITORY_VARIABLE + #include "builtin.h" #include "exec-cmd.h" #include "gettext.h" @@ -63,9 +65,9 @@ int cmd_upload_pack(int argc, switch (determine_protocol_version_server()) { case protocol_v2: if (advertise_refs) - protocol_v2_advertise_capabilities(); + protocol_v2_advertise_capabilities(the_repository); else - protocol_v2_serve_loop(stateless_rpc); + protocol_v2_serve_loop(the_repository, stateless_rpc); break; case protocol_v1: /* diff --git a/serve.c b/serve.c index c8694e375159ca..f6dfe34a2bee6b 100644 --- a/serve.c +++ b/serve.c @@ -1,5 +1,3 @@ -#define USE_THE_REPOSITORY_VARIABLE - #include "git-compat-util.h" #include "repository.h" #include "config.h" @@ -159,7 +157,7 @@ static struct protocol_capability capabilities[] = { }, }; -void protocol_v2_advertise_capabilities(void) +void protocol_v2_advertise_capabilities(struct repository *r) { struct strbuf capability = STRBUF_INIT; struct strbuf value = STRBUF_INIT; @@ -170,7 +168,7 @@ void protocol_v2_advertise_capabilities(void) for (size_t i = 0; i < ARRAY_SIZE(capabilities); i++) { struct protocol_capability *c = &capabilities[i]; - if (c->advertise(the_repository, &value)) { + if (c->advertise(r, &value)) { strbuf_addstr(&capability, c->name); if (value.len) { @@ -214,20 +212,20 @@ static struct protocol_capability *get_capability(const char *key, const char ** return NULL; } -static int receive_client_capability(const char *key) +static int receive_client_capability(struct repository *r, const char *key) { const char *value; const struct protocol_capability *c = get_capability(key, &value); - if (!c || c->command || !c->advertise(the_repository, NULL)) + if (!c || c->command || !c->advertise(r, NULL)) return 0; if (c->receive) - c->receive(the_repository, value); + c->receive(r, value); return 1; } -static int parse_command(const char *key, struct protocol_capability **command) +static int parse_command(struct repository *r, const char *key, struct protocol_capability **command) { const char *out; @@ -238,7 +236,7 @@ static int parse_command(const char *key, struct protocol_capability **command) if (*command) die("command '%s' requested after already requesting command '%s'", out, (*command)->name); - if (!cmd || !cmd->advertise(the_repository, NULL) || !cmd->command || value) + if (!cmd || !cmd->advertise(r, NULL) || !cmd->command || value) die("invalid command '%s'", out); *command = cmd; @@ -253,7 +251,7 @@ enum request_state { PROCESS_REQUEST_DONE, }; -static int process_request(void) +static int process_request(struct repository *r) { enum request_state state = PROCESS_REQUEST_KEYS; struct packet_reader reader; @@ -278,8 +276,8 @@ static int process_request(void) case PACKET_READ_EOF: BUG("Should have already died when seeing EOF"); case PACKET_READ_NORMAL: - if (parse_command(reader.line, &command) || - receive_client_capability(reader.line)) + if (parse_command(r, reader.line, &command) || + receive_client_capability(r, reader.line)) seen_capability_or_command = 1; else die("unknown capability '%s'", reader.line); @@ -319,30 +317,30 @@ static int process_request(void) if (!command) die("no command requested"); - if (client_hash_algo != hash_algo_by_ptr(the_repository->hash_algo)) + if (client_hash_algo != hash_algo_by_ptr(r->hash_algo)) die("mismatched object format: server %s; client %s", - the_repository->hash_algo->name, + r->hash_algo->name, hash_algos[client_hash_algo].name); - command->command(the_repository, &reader); + command->command(r, &reader); return 0; } -void protocol_v2_serve_loop(int stateless_rpc) +void protocol_v2_serve_loop(struct repository *r, int stateless_rpc) { if (!stateless_rpc) - protocol_v2_advertise_capabilities(); + protocol_v2_advertise_capabilities(r); /* * If stateless-rpc was requested then exit after * a single request/response exchange */ if (stateless_rpc) { - process_request(); + process_request(r); } else { for (;;) - if (process_request()) + if (process_request(r)) break; } } diff --git a/serve.h b/serve.h index f946cf904a242d..85bf73cfe53cb9 100644 --- a/serve.h +++ b/serve.h @@ -1,7 +1,9 @@ #ifndef SERVE_H #define SERVE_H -void protocol_v2_advertise_capabilities(void); -void protocol_v2_serve_loop(int stateless_rpc); +struct repository; + +void protocol_v2_advertise_capabilities(struct repository *r); +void protocol_v2_serve_loop(struct repository *r, int stateless_rpc); #endif /* SERVE_H */ diff --git a/t/helper/test-serve-v2.c b/t/helper/test-serve-v2.c index 054cbcf5d83946..63a200b8d46f68 100644 --- a/t/helper/test-serve-v2.c +++ b/t/helper/test-serve-v2.c @@ -1,6 +1,9 @@ +#define USE_THE_REPOSITORY_VARIABLE + #include "test-tool.h" #include "gettext.h" #include "parse-options.h" +#include "repository.h" #include "serve.h" #include "setup.h" @@ -28,9 +31,9 @@ int cmd__serve_v2(int argc, const char **argv) PARSE_OPT_KEEP_UNKNOWN_OPT); if (advertise_capabilities) - protocol_v2_advertise_capabilities(); + protocol_v2_advertise_capabilities(the_repository); else - protocol_v2_serve_loop(stateless_rpc); + protocol_v2_serve_loop(the_repository, stateless_rpc); return 0; } From 5ee907bb3f12c630f78554ea9b4c605f4e1a5e92 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 17 Dec 2024 07:43:52 +0100 Subject: [PATCH 012/535] send-pack: stop using `the_repository` Stop using `the_repository` in the "send-pack" subsystem by passing in a repository when sending a packfile. Adjust callers accordingly by using `the_repository`. While there may be some callers that have a repository available in their context, this trivial conversion allows for easier verification and bubbles up the use of `the_repository` by one level. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/send-pack.c | 2 +- send-pack.c | 77 +++++++++++++++++++++++---------------------- send-pack.h | 3 +- transport.c | 2 +- 4 files changed, 44 insertions(+), 40 deletions(-) diff --git a/builtin/send-pack.c b/builtin/send-pack.c index 59b626aae8cd82..8d461008e2e860 100644 --- a/builtin/send-pack.c +++ b/builtin/send-pack.c @@ -317,7 +317,7 @@ int cmd_send_pack(int argc, set_ref_status_for_push(remote_refs, args.send_mirror, args.force_update); - ret = send_pack(&args, fd, conn, remote_refs, &extra_have); + ret = send_pack(the_repository, &args, fd, conn, remote_refs, &extra_have); if (helper_status) print_helper_status(remote_refs); diff --git a/send-pack.c b/send-pack.c index 7e8321368379ef..772c7683a01570 100644 --- a/send-pack.c +++ b/send-pack.c @@ -1,5 +1,3 @@ -#define USE_THE_REPOSITORY_VARIABLE - #include "git-compat-util.h" #include "config.h" #include "commit.h" @@ -44,10 +42,11 @@ int option_parse_push_signed(const struct option *opt, die("bad %s argument: %s", opt->long_name, arg); } -static void feed_object(const struct object_id *oid, FILE *fh, int negative) +static void feed_object(struct repository *r, + const struct object_id *oid, FILE *fh, int negative) { if (negative && - !repo_has_object_file_with_flags(the_repository, oid, + !repo_has_object_file_with_flags(r, oid, OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_QUICK)) return; @@ -61,7 +60,8 @@ static void feed_object(const struct object_id *oid, FILE *fh, int negative) /* * Make a pack stream and spit it out into file descriptor fd */ -static int pack_objects(int fd, struct ref *refs, struct oid_array *advertised, +static int pack_objects(struct repository *r, + int fd, struct ref *refs, struct oid_array *advertised, struct oid_array *negotiated, struct send_pack_args *args) { @@ -74,7 +74,7 @@ static int pack_objects(int fd, struct ref *refs, struct oid_array *advertised, FILE *po_in; int rc; - trace2_region_enter("send_pack", "pack_objects", the_repository); + trace2_region_enter("send_pack", "pack_objects", r); strvec_push(&po.args, "pack-objects"); strvec_push(&po.args, "--all-progress-implied"); strvec_push(&po.args, "--revs"); @@ -87,7 +87,7 @@ static int pack_objects(int fd, struct ref *refs, struct oid_array *advertised, strvec_push(&po.args, "-q"); if (args->progress) strvec_push(&po.args, "--progress"); - if (is_repository_shallow(the_repository)) + if (is_repository_shallow(r)) strvec_push(&po.args, "--shallow"); if (args->disable_bitmaps) strvec_push(&po.args, "--no-use-bitmap-index"); @@ -104,15 +104,15 @@ static int pack_objects(int fd, struct ref *refs, struct oid_array *advertised, */ po_in = xfdopen(po.in, "w"); for (size_t i = 0; i < advertised->nr; i++) - feed_object(&advertised->oid[i], po_in, 1); + feed_object(r, &advertised->oid[i], po_in, 1); for (size_t i = 0; i < negotiated->nr; i++) - feed_object(&negotiated->oid[i], po_in, 1); + feed_object(r, &negotiated->oid[i], po_in, 1); while (refs) { if (!is_null_oid(&refs->old_oid)) - feed_object(&refs->old_oid, po_in, 1); + feed_object(r, &refs->old_oid, po_in, 1); if (!is_null_oid(&refs->new_oid)) - feed_object(&refs->new_oid, po_in, 0); + feed_object(r, &refs->new_oid, po_in, 0); refs = refs->next; } @@ -146,10 +146,10 @@ static int pack_objects(int fd, struct ref *refs, struct oid_array *advertised, */ if (rc > 128 && rc != 141) error("pack-objects died of signal %d", rc - 128); - trace2_region_leave("send_pack", "pack_objects", the_repository); + trace2_region_leave("send_pack", "pack_objects", r); return -1; } - trace2_region_leave("send_pack", "pack_objects", the_repository); + trace2_region_leave("send_pack", "pack_objects", r); return 0; } @@ -164,7 +164,8 @@ static int receive_unpack_status(struct packet_reader *reader) return 0; } -static int receive_status(struct packet_reader *reader, struct ref *refs) +static int receive_status(struct repository *r, + struct packet_reader *reader, struct ref *refs) { struct ref *hint; int ret; @@ -172,7 +173,7 @@ static int receive_status(struct packet_reader *reader, struct ref *refs) int new_report = 0; int once = 0; - trace2_region_enter("send_pack", "receive_status", the_repository); + trace2_region_enter("send_pack", "receive_status", r); hint = NULL; ret = receive_unpack_status(reader); while (1) { @@ -221,10 +222,10 @@ static int receive_status(struct packet_reader *reader, struct ref *refs) if (!strcmp(key, "refname")) report->ref_name = xstrdup_or_null(val); else if (!strcmp(key, "old-oid") && val && - !parse_oid_hex(val, &old_oid, &val)) + !parse_oid_hex_algop(val, &old_oid, &val, r->hash_algo)) report->old_oid = oiddup(&old_oid); else if (!strcmp(key, "new-oid") && val && - !parse_oid_hex(val, &new_oid, &val)) + !parse_oid_hex_algop(val, &new_oid, &val, r->hash_algo)) report->new_oid = oiddup(&new_oid); else if (!strcmp(key, "forced-update")) report->forced_update = 1; @@ -271,7 +272,7 @@ static int receive_status(struct packet_reader *reader, struct ref *refs) new_report = 1; } } - trace2_region_leave("send_pack", "receive_status", the_repository); + trace2_region_leave("send_pack", "receive_status", r); return ret; } @@ -293,9 +294,9 @@ static int advertise_shallow_grafts_cb(const struct commit_graft *graft, void *c return 0; } -static void advertise_shallow_grafts_buf(struct strbuf *sb) +static void advertise_shallow_grafts_buf(struct repository *r, struct strbuf *sb) { - if (!is_repository_shallow(the_repository)) + if (!is_repository_shallow(r)) return; for_each_commit_graft(advertise_shallow_grafts_cb, sb); } @@ -426,13 +427,14 @@ static void reject_invalid_nonce(const char *nonce, int len) } } -static void get_commons_through_negotiation(const char *url, +static void get_commons_through_negotiation(struct repository *r, + const char *url, const struct ref *remote_refs, struct oid_array *commons) { struct child_process child = CHILD_PROCESS_INIT; const struct ref *ref; - int len = the_hash_algo->hexsz + 1; /* hash + NL */ + int len = r->hash_algo->hexsz + 1; /* hash + NL */ int nr_negotiation_tip = 0; child.git_cmd = 1; @@ -466,7 +468,7 @@ static void get_commons_through_negotiation(const char *url, break; if (read_len != len) die("invalid length read %d", read_len); - if (parse_oid_hex(hex_hash, &oid, &end) || *end != '\n') + if (parse_oid_hex_algop(hex_hash, &oid, &end, r->hash_algo) || *end != '\n') die("invalid hash"); oid_array_append(commons, &oid); } while (1); @@ -480,7 +482,8 @@ static void get_commons_through_negotiation(const char *url, } } -int send_pack(struct send_pack_args *args, +int send_pack(struct repository *r, + struct send_pack_args *args, int fd[], struct child_process *conn, struct ref *remote_refs, struct oid_array *extra_have) @@ -518,17 +521,17 @@ int send_pack(struct send_pack_args *args, goto out; } - git_config_get_bool("push.negotiate", &push_negotiate); + repo_config_get_bool(r, "push.negotiate", &push_negotiate); if (push_negotiate) { - trace2_region_enter("send_pack", "push_negotiate", the_repository); - get_commons_through_negotiation(args->url, remote_refs, &commons); - trace2_region_leave("send_pack", "push_negotiate", the_repository); + trace2_region_enter("send_pack", "push_negotiate", r); + get_commons_through_negotiation(r, args->url, remote_refs, &commons); + trace2_region_leave("send_pack", "push_negotiate", r); } - if (!git_config_get_bool("push.usebitmaps", &use_bitmaps)) + if (!repo_config_get_bool(r, "push.usebitmaps", &use_bitmaps)) args->disable_bitmaps = !use_bitmaps; - git_config_get_bool("transfer.advertisesid", &advertise_sid); + repo_config_get_bool(r, "transfer.advertisesid", &advertise_sid); /* Does the other end support the reporting? */ if (server_supports("report-status-v2")) @@ -554,7 +557,7 @@ int send_pack(struct send_pack_args *args, if (server_supports("push-options")) push_options_supported = 1; - if (!server_supports_hash(the_hash_algo->name, &object_format_supported)) + if (!server_supports_hash(r->hash_algo->name, &object_format_supported)) die(_("the receiving end does not support this repository's hash algorithm")); if (args->push_cert != SEND_PACK_PUSH_CERT_NEVER) { @@ -596,7 +599,7 @@ int send_pack(struct send_pack_args *args, if (use_push_options) strbuf_addstr(&cap_buf, " push-options"); if (object_format_supported) - strbuf_addf(&cap_buf, " object-format=%s", the_hash_algo->name); + strbuf_addf(&cap_buf, " object-format=%s", r->hash_algo->name); if (agent_supported) strbuf_addf(&cap_buf, " agent=%s", git_user_agent_sanitized()); if (advertise_sid) @@ -646,7 +649,7 @@ int send_pack(struct send_pack_args *args, } if (!args->dry_run) - advertise_shallow_grafts_buf(&req_buf); + advertise_shallow_grafts_buf(r, &req_buf); /* * Finally, tell the other end! @@ -686,7 +689,7 @@ int send_pack(struct send_pack_args *args, } if (args->stateless_rpc) { - if (!args->dry_run && (cmds_sent || is_repository_shallow(the_repository))) { + if (!args->dry_run && (cmds_sent || is_repository_shallow(r))) { packet_buf_flush(&req_buf); send_sideband(out, -1, req_buf.buf, req_buf.len, LARGE_PACKET_MAX); } @@ -711,7 +714,7 @@ int send_pack(struct send_pack_args *args, PACKET_READ_DIE_ON_ERR_PACKET); if (need_pack_data && cmds_sent) { - if (pack_objects(out, remote_refs, extra_have, &commons, args) < 0) { + if (pack_objects(r, out, remote_refs, extra_have, &commons, args) < 0) { if (args->stateless_rpc) close(out); if (git_connection_is_socket(conn)) @@ -724,7 +727,7 @@ int send_pack(struct send_pack_args *args, * we get one). */ if (status_report) - receive_status(&reader, remote_refs); + receive_status(r, &reader, remote_refs); if (use_sideband) { close(demux.out); @@ -743,7 +746,7 @@ int send_pack(struct send_pack_args *args, packet_flush(out); if (status_report && cmds_sent) - ret = receive_status(&reader, remote_refs); + ret = receive_status(r, &reader, remote_refs); else ret = 0; if (args->stateless_rpc) diff --git a/send-pack.h b/send-pack.h index 7edb80596c7b0e..d256715681b363 100644 --- a/send-pack.h +++ b/send-pack.h @@ -6,6 +6,7 @@ struct child_process; struct oid_array; struct ref; +struct repository; /* Possible values for push_cert field in send_pack_args. */ #define SEND_PACK_PUSH_CERT_NEVER 0 @@ -35,7 +36,7 @@ struct option; int option_parse_push_signed(const struct option *opt, const char *arg, int unset); -int send_pack(struct send_pack_args *args, +int send_pack(struct repository *r, struct send_pack_args *args, int fd[], struct child_process *conn, struct ref *remote_refs, struct oid_array *extra_have); diff --git a/transport.c b/transport.c index 10d820c33353f6..81ae8243b9a32a 100644 --- a/transport.c +++ b/transport.c @@ -932,7 +932,7 @@ static int git_transport_push(struct transport *transport, struct ref *remote_re break; case protocol_v1: case protocol_v0: - ret = send_pack(&args, data->fd, data->conn, remote_refs, + ret = send_pack(the_repository, &args, data->fd, data->conn, remote_refs, &data->extra_have); break; case protocol_unknown_version: From c365dbb44ec81fc60e9b7666a4384f8649d80b2a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 17 Dec 2024 07:43:53 +0100 Subject: [PATCH 013/535] server-info: stop using `the_repository` Stop using `the_repository` in the "server-info" subsystem by passing in a repository when updating server info and storing the repository in the `update_info_ctx` structure to make it accessible to other functions. Adjust callers accordingly by using `the_repository`. While there may be some callers that have a repository available in their context, this trivial conversion allows for easier verification and bubbles up the use of `the_repository` by one level. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/receive-pack.c | 2 +- builtin/repack.c | 2 +- builtin/update-server-info.c | 2 +- server-info.c | 40 ++++++++++++++++++++---------------- server-info.h | 4 +++- 5 files changed, 28 insertions(+), 22 deletions(-) diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index c2e9103f112c1e..191b5eeb34e679 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -2628,7 +2628,7 @@ int cmd_receive_pack(int argc, } } if (auto_update_server_info) - update_server_info(0); + update_server_info(the_repository, 0); clear_shallow_info(&si); } if (use_sideband) diff --git a/builtin/repack.c b/builtin/repack.c index 0c6dad7df47a16..81d13630ea41f8 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -1565,7 +1565,7 @@ int cmd_repack(int argc, } if (run_update_server_info) - update_server_info(0); + update_server_info(the_repository, 0); if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX, 0)) { unsigned flags = 0; diff --git a/builtin/update-server-info.c b/builtin/update-server-info.c index 6769611a025d0d..47a3f0bdd9c349 100644 --- a/builtin/update-server-info.c +++ b/builtin/update-server-info.c @@ -27,5 +27,5 @@ int cmd_update_server_info(int argc, if (argc > 0) usage_with_options(update_server_info_usage, options); - return !!update_server_info(force); + return !!update_server_info(the_repository, force); } diff --git a/server-info.c b/server-info.c index ef2f3f4b5c7b04..31c3fdc118447d 100644 --- a/server-info.c +++ b/server-info.c @@ -1,4 +1,3 @@ -#define USE_THE_REPOSITORY_VARIABLE #define DISABLE_SIGN_COMPARE_WARNINGS #include "git-compat-util.h" @@ -18,6 +17,7 @@ #include "tempfile.h" struct update_info_ctx { + struct repository *repo; FILE *cur_fp; FILE *old_fp; /* becomes NULL if it differs from cur_fp */ struct strbuf cur_sb; @@ -73,7 +73,7 @@ static int uic_printf(struct update_info_ctx *uic, const char *fmt, ...) * it into place. The contents of the file come from "generate", which * should return non-zero if it encounters an error. */ -static int update_info_file(char *path, +static int update_info_file(struct repository *r, char *path, int (*generate)(struct update_info_ctx *), int force) { @@ -81,6 +81,7 @@ static int update_info_file(char *path, struct tempfile *f = NULL; int ret = -1; struct update_info_ctx uic = { + .repo = r, .cur_fp = NULL, .old_fp = NULL, .cur_sb = STRBUF_INIT, @@ -152,7 +153,7 @@ static int add_info_ref(const char *path, const char *referent UNUSED, const str void *cb_data) { struct update_info_ctx *uic = cb_data; - struct object *o = parse_object(the_repository, oid); + struct object *o = parse_object(uic->repo, oid); if (!o) return -1; @@ -160,7 +161,7 @@ static int add_info_ref(const char *path, const char *referent UNUSED, const str return -1; if (o->type == OBJ_TAG) { - o = deref_tag(the_repository, o, path, 0); + o = deref_tag(uic->repo, o, path, 0); if (o) if (uic_printf(uic, "%s %s^{}\n", oid_to_hex(&o->oid), path) < 0) @@ -171,14 +172,14 @@ static int add_info_ref(const char *path, const char *referent UNUSED, const str static int generate_info_refs(struct update_info_ctx *uic) { - return refs_for_each_ref(get_main_ref_store(the_repository), + return refs_for_each_ref(get_main_ref_store(uic->repo), add_info_ref, uic); } -static int update_info_refs(int force) +static int update_info_refs(struct repository *r, int force) { - char *path = git_pathdup("info/refs"); - int ret = update_info_file(path, generate_info_refs, force); + char *path = repo_git_path(r, "info/refs"); + int ret = update_info_file(r, path, generate_info_refs, force); free(path); return ret; } @@ -284,14 +285,14 @@ static int compare_info(const void *a_, const void *b_) return 1; } -static void init_pack_info(const char *infofile, int force) +static void init_pack_info(struct repository *r, const char *infofile, int force) { struct packed_git *p; int stale; int i; size_t alloc = 0; - for (p = get_all_packs(the_repository); p; p = p->next) { + for (p = get_all_packs(r); p; p = p->next) { /* we ignore things on alternate path since they are * not available to the pullers in general. */ @@ -340,33 +341,36 @@ static int write_pack_info_file(struct update_info_ctx *uic) return 0; } -static int update_info_packs(int force) +static int update_info_packs(struct repository *r, int force) { char *infofile = mkpathdup("%s/info/packs", - repo_get_object_directory(the_repository)); + repo_get_object_directory(r)); int ret; - init_pack_info(infofile, force); - ret = update_info_file(infofile, write_pack_info_file, force); + init_pack_info(r, infofile, force); + ret = update_info_file(r, infofile, write_pack_info_file, force); free_pack_info(); free(infofile); return ret; } /* public */ -int update_server_info(int force) +int update_server_info(struct repository *r, int force) { /* We would add more dumb-server support files later, * including index of available pack files and their * intended audiences. */ int errs = 0; + char *path; - errs = errs | update_info_refs(force); - errs = errs | update_info_packs(force); + errs = errs | update_info_refs(r, force); + errs = errs | update_info_packs(r, force); /* remove leftover rev-cache file if there is any */ - unlink_or_warn(git_path("info/rev-cache")); + path = repo_git_path(r, "info/rev-cache"); + unlink_or_warn(path); + free(path); return errs; } diff --git a/server-info.h b/server-info.h index 13bbde2c55fafe..e634d1722bdfaa 100644 --- a/server-info.h +++ b/server-info.h @@ -1,7 +1,9 @@ #ifndef SERVER_INFO_H #define SERVER_INFO_H +struct repository; + /* Dumb servers support */ -int update_server_info(int); +int update_server_info(struct repository *r, int force); #endif /* SERVER_INFO_H */ From b4c476c43a1125300614d2e9ec9dedfa44355515 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 17 Dec 2024 07:43:54 +0100 Subject: [PATCH 014/535] diagnose: stop using `the_repository` Stop using `the_repository` in the "diagnose" subsystem by passing in a repository when generating a diagnostics archive. Adjust callers accordingly by using `the_repository`. While there may be some callers that have a repository available in their context, this trivial conversion allows for easier verification and bubbles up the use of `the_repository` by one level. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/bugreport.c | 2 +- builtin/diagnose.c | 4 +++- diagnose.c | 15 ++++++++------- diagnose.h | 5 ++++- 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/builtin/bugreport.c b/builtin/bugreport.c index 7c2df035c9cff0..0ac59cc8dc5824 100644 --- a/builtin/bugreport.c +++ b/builtin/bugreport.c @@ -167,7 +167,7 @@ int cmd_bugreport(int argc, strbuf_addftime(&zip_path, option_suffix, localtime_r(&now, &tm), 0, 0); strbuf_addstr(&zip_path, ".zip"); - if (create_diagnostics_archive(&zip_path, diagnose)) + if (create_diagnostics_archive(the_repository, &zip_path, diagnose)) die_errno(_("unable to create diagnostics archive %s"), zip_path.buf); strbuf_release(&zip_path); diff --git a/builtin/diagnose.c b/builtin/diagnose.c index 66a22d918e68a4..33c39bd5981f22 100644 --- a/builtin/diagnose.c +++ b/builtin/diagnose.c @@ -1,3 +1,5 @@ +#define USE_THE_REPOSITORY_VARIABLE + #include "builtin.h" #include "abspath.h" #include "gettext.h" @@ -58,7 +60,7 @@ int cmd_diagnose(int argc, } /* Prepare diagnostics */ - if (create_diagnostics_archive(&zip_path, mode)) + if (create_diagnostics_archive(the_repository, &zip_path, mode)) die_errno(_("unable to create diagnostics archive %s"), zip_path.buf); diff --git a/diagnose.c b/diagnose.c index b11931df86c4ba..bd485effea22ce 100644 --- a/diagnose.c +++ b/diagnose.c @@ -1,5 +1,3 @@ -#define USE_THE_REPOSITORY_VARIABLE - #include "git-compat-util.h" #include "diagnose.h" #include "compat/disk.h" @@ -12,6 +10,7 @@ #include "object-store-ll.h" #include "packfile.h" #include "parse-options.h" +#include "repository.h" #include "write-or-die.h" struct archive_dir { @@ -179,7 +178,9 @@ static int add_directory_to_archiver(struct strvec *archiver_args, return res; } -int create_diagnostics_archive(struct strbuf *zip_path, enum diagnose_mode mode) +int create_diagnostics_archive(struct repository *r, + struct strbuf *zip_path, + enum diagnose_mode mode) { struct strvec archiver_args = STRVEC_INIT; char **argv_copy = NULL; @@ -218,7 +219,7 @@ int create_diagnostics_archive(struct strbuf *zip_path, enum diagnose_mode mode) strbuf_addstr(&buf, "Collecting diagnostic info\n\n"); get_version_info(&buf, 1); - strbuf_addf(&buf, "Repository root: %s\n", the_repository->worktree); + strbuf_addf(&buf, "Repository root: %s\n", r->worktree); get_disk_info(&buf); write_or_die(stdout_fd, buf.buf, buf.len); strvec_pushf(&archiver_args, @@ -227,7 +228,7 @@ int create_diagnostics_archive(struct strbuf *zip_path, enum diagnose_mode mode) strbuf_reset(&buf); strbuf_addstr(&buf, "--add-virtual-file=packs-local.txt:"); - dir_file_stats(the_repository->objects->odb, &buf); + dir_file_stats(r->objects->odb, &buf); foreach_alt_odb(dir_file_stats, &buf); strvec_push(&archiver_args, buf.buf); @@ -250,13 +251,13 @@ int create_diagnostics_archive(struct strbuf *zip_path, enum diagnose_mode mode) } strvec_pushl(&archiver_args, "--prefix=", - oid_to_hex(the_hash_algo->empty_tree), "--", NULL); + oid_to_hex(r->hash_algo->empty_tree), "--", NULL); /* `write_archive()` modifies the `argv` passed to it. Let it. */ argv_copy = xmemdupz(archiver_args.v, sizeof(char *) * archiver_args.nr); res = write_archive(archiver_args.nr, (const char **)argv_copy, NULL, - the_repository, NULL, 0); + r, NULL, 0); if (res) { error(_("failed to write archive")); goto diagnose_cleanup; diff --git a/diagnose.h b/diagnose.h index f525219ab0cf9b..f7b38f49f5271a 100644 --- a/diagnose.h +++ b/diagnose.h @@ -4,6 +4,7 @@ #include "strbuf.h" struct option; +struct repository; enum diagnose_mode { DIAGNOSE_NONE, @@ -13,6 +14,8 @@ enum diagnose_mode { int option_parse_diagnose(const struct option *opt, const char *arg, int unset); -int create_diagnostics_archive(struct strbuf *zip_path, enum diagnose_mode mode); +int create_diagnostics_archive(struct repository *r, + struct strbuf *zip_path, + enum diagnose_mode mode); #endif /* DIAGNOSE_H */ From 71e5afee8be62c9602838f859592539d8728cc56 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 17 Dec 2024 07:43:55 +0100 Subject: [PATCH 015/535] mailinfo: stop using `the_repository` Stop using `the_repository` in the "mailinfo" subsystem by passing in a repository when setting up the mailinfo structure. Adjust callers accordingly by using `the_repository`. While there may be some callers that have a repository available in their context, this trivial conversion allows for easier verification and bubbles up the use of `the_repository` by one level. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/am.c | 2 +- builtin/mailinfo.c | 2 +- mailinfo.c | 5 ++--- mailinfo.h | 4 +++- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/builtin/am.c b/builtin/am.c index 27ccca8341feef..e94d08e04b2387 100644 --- a/builtin/am.c +++ b/builtin/am.c @@ -1211,7 +1211,7 @@ static int parse_mail(struct am_state *state, const char *mail) int ret = 0; struct mailinfo mi; - setup_mailinfo(&mi); + setup_mailinfo(the_repository, &mi); if (state->utf8) mi.metainfo_charset = get_commit_output_encoding(); diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c index e17dec27b1df69..8de7ba7de1d6aa 100644 --- a/builtin/mailinfo.c +++ b/builtin/mailinfo.c @@ -83,7 +83,7 @@ int cmd_mailinfo(int argc, OPT_END() }; - setup_mailinfo(&mi); + setup_mailinfo(the_repository, &mi); meta_charset.policy = CHARSET_DEFAULT; argc = parse_options(argc, argv, prefix, options, mailinfo_usage, 0); diff --git a/mailinfo.c b/mailinfo.c index aa263bf490881d..7b001fa5dbd685 100644 --- a/mailinfo.c +++ b/mailinfo.c @@ -1,4 +1,3 @@ -#define USE_THE_REPOSITORY_VARIABLE #define DISABLE_SIGN_COMPARE_WARNINGS #include "git-compat-util.h" @@ -1269,7 +1268,7 @@ static int git_mailinfo_config(const char *var, const char *value, return 0; } -void setup_mailinfo(struct mailinfo *mi) +void setup_mailinfo(struct repository *r, struct mailinfo *mi) { memset(mi, 0, sizeof(*mi)); strbuf_init(&mi->name, 0); @@ -1281,7 +1280,7 @@ void setup_mailinfo(struct mailinfo *mi) mi->header_stage = 1; mi->use_inbody_headers = 1; mi->content_top = mi->content; - git_config(git_mailinfo_config, mi); + repo_config(r, git_mailinfo_config, mi); } void clear_mailinfo(struct mailinfo *mi) diff --git a/mailinfo.h b/mailinfo.h index f2ffd0349e8007..1f2066416578ac 100644 --- a/mailinfo.h +++ b/mailinfo.h @@ -5,6 +5,8 @@ #define MAX_BOUNDARIES 5 +struct repository; + enum quoted_cr_action { quoted_cr_unset = -1, quoted_cr_nowarn, @@ -49,7 +51,7 @@ struct mailinfo { }; int mailinfo_parse_quoted_cr_action(const char *actionstr, int *action); -void setup_mailinfo(struct mailinfo *); +void setup_mailinfo(struct repository *r, struct mailinfo *); int mailinfo(struct mailinfo *, const char *msg, const char *patch); void clear_mailinfo(struct mailinfo *); From 6c27d22276b754e2214242de7a200b372aa611f6 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 17 Dec 2024 07:43:56 +0100 Subject: [PATCH 016/535] credential: stop using `the_repository` Stop using `the_repository` in the "credential" subsystem by passing in a repository when filling, approving or rejecting credentials. Adjust callers accordingly by using `the_repository`. While there may be some callers that have a repository available in their context, this trivial conversion allows for easier verification and bubbles up the use of `the_repository` by one level. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/credential.c | 6 +++--- credential.c | 34 +++++++++++++++++----------------- credential.h | 11 +++++++---- http.c | 24 ++++++++++++------------ imap-send.c | 10 +++++----- remote-curl.c | 4 ++-- 6 files changed, 46 insertions(+), 43 deletions(-) diff --git a/builtin/credential.c b/builtin/credential.c index 14c8c6608b2fbd..614b195b753ed8 100644 --- a/builtin/credential.c +++ b/builtin/credential.c @@ -32,15 +32,15 @@ int cmd_credential(int argc, die("unable to read credential from stdin"); if (!strcmp(op, "fill")) { - credential_fill(&c, 0); + credential_fill(the_repository, &c, 0); credential_next_state(&c); credential_write(&c, stdout, CREDENTIAL_OP_RESPONSE); } else if (!strcmp(op, "approve")) { credential_set_all_capabilities(&c, CREDENTIAL_OP_HELPER); - credential_approve(&c); + credential_approve(the_repository, &c); } else if (!strcmp(op, "reject")) { credential_set_all_capabilities(&c, CREDENTIAL_OP_HELPER); - credential_reject(&c); + credential_reject(the_repository, &c); } else { usage(usage_msg); } diff --git a/credential.c b/credential.c index a995031c5f5d84..b3f87b5b2f16c5 100644 --- a/credential.c +++ b/credential.c @@ -1,4 +1,3 @@ -#define USE_THE_REPOSITORY_VARIABLE #define DISABLE_SIGN_COMPARE_WARNINGS #include "git-compat-util.h" @@ -166,7 +165,7 @@ static int match_partial_url(const char *url, void *cb) return matches; } -static void credential_apply_config(struct credential *c) +static void credential_apply_config(struct repository *r, struct credential *c) { char *normalized_url; struct urlmatch_config config = URLMATCH_CONFIG_INIT; @@ -191,7 +190,7 @@ static void credential_apply_config(struct credential *c) credential_format(c, &url); normalized_url = url_normalize(url.buf, &config.url); - git_config(urlmatch_config_entry, &config); + repo_config(r, urlmatch_config_entry, &config); string_list_clear(&config.vars, 1); free(normalized_url); urlmatch_config_release(&config); @@ -254,34 +253,34 @@ static char *credential_ask_one(const char *what, struct credential *c, return xstrdup(r); } -static int credential_getpass(struct credential *c) +static int credential_getpass(struct repository *r, struct credential *c) { int interactive; char *value; - if (!git_config_get_maybe_bool("credential.interactive", &interactive) && + if (!repo_config_get_maybe_bool(r, "credential.interactive", &interactive) && !interactive) { - trace2_data_intmax("credential", the_repository, + trace2_data_intmax("credential", r, "interactive/skipped", 1); return -1; } - if (!git_config_get_string("credential.interactive", &value)) { + if (!repo_config_get_string(r, "credential.interactive", &value)) { int same = !strcmp(value, "never"); free(value); if (same) { - trace2_data_intmax("credential", the_repository, + trace2_data_intmax("credential", r, "interactive/skipped", 1); return -1; } } - trace2_region_enter("credential", "interactive", the_repository); + trace2_region_enter("credential", "interactive", r); if (!c->username) c->username = credential_ask_one("Username", c, PROMPT_ASKPASS|PROMPT_ECHO); if (!c->password) c->password = credential_ask_one("Password", c, PROMPT_ASKPASS); - trace2_region_leave("credential", "interactive", the_repository); + trace2_region_leave("credential", "interactive", r); return 0; } @@ -489,7 +488,8 @@ static int credential_do(struct credential *c, const char *helper, return r; } -void credential_fill(struct credential *c, int all_capabilities) +void credential_fill(struct repository *r, + struct credential *c, int all_capabilities) { int i; @@ -499,7 +499,7 @@ void credential_fill(struct credential *c, int all_capabilities) credential_next_state(c); c->multistage = 0; - credential_apply_config(c); + credential_apply_config(r, c); if (all_capabilities) credential_set_all_capabilities(c, CREDENTIAL_OP_INITIAL); @@ -526,12 +526,12 @@ void credential_fill(struct credential *c, int all_capabilities) c->helpers.items[i].string); } - if (credential_getpass(c) || + if (credential_getpass(r, c) || (!c->username && !c->password && !c->credential)) die("unable to get password from user"); } -void credential_approve(struct credential *c) +void credential_approve(struct repository *r, struct credential *c) { int i; @@ -542,20 +542,20 @@ void credential_approve(struct credential *c) credential_next_state(c); - credential_apply_config(c); + credential_apply_config(r, c); for (i = 0; i < c->helpers.nr; i++) credential_do(c, c->helpers.items[i].string, "store"); c->approved = 1; } -void credential_reject(struct credential *c) +void credential_reject(struct repository *r, struct credential *c) { int i; credential_next_state(c); - credential_apply_config(c); + credential_apply_config(r, c); for (i = 0; i < c->helpers.nr; i++) credential_do(c, c->helpers.items[i].string, "erase"); diff --git a/credential.h b/credential.h index 5f9e6ff2efef55..1e6b0dc5b0f465 100644 --- a/credential.h +++ b/credential.h @@ -4,6 +4,8 @@ #include "string-list.h" #include "strvec.h" +struct repository; + /** * The credentials API provides an abstracted way of gathering * authentication credentials from the user. @@ -65,7 +67,7 @@ * // Fill in the username and password fields by contacting * // helpers and/or asking the user. The function will die if it * // fails. - * credential_fill(&c); + * credential_fill(repo, &c); * * // Otherwise, we have a username and password. Try to use it. * @@ -218,7 +220,8 @@ void credential_clear(struct credential *); * If all_capabilities is set, this is an internal user that is prepared * to deal with all known capabilities, and we should advertise that fact. */ -void credential_fill(struct credential *, int all_capabilities); +void credential_fill(struct repository *, struct credential *, + int all_capabilities); /** * Inform the credential subsystem that the provided credentials @@ -227,7 +230,7 @@ void credential_fill(struct credential *, int all_capabilities); * that they may store the result to be used again. Any errors * from helpers are ignored. */ -void credential_approve(struct credential *); +void credential_approve(struct repository *, struct credential *); /** * Inform the credential subsystem that the provided credentials @@ -239,7 +242,7 @@ void credential_approve(struct credential *); * for another call to `credential_fill`). Any errors from helpers * are ignored. */ -void credential_reject(struct credential *); +void credential_reject(struct repository *, struct credential *); /** * Enable all of the supported credential flags in this credential. diff --git a/http.c b/http.c index c8fc15aa118d3b..f08b2ae4746533 100644 --- a/http.c +++ b/http.c @@ -609,7 +609,7 @@ static void init_curl_http_auth(CURL *result) } } - credential_fill(&http_auth, 1); + credential_fill(the_repository, &http_auth, 1); if (http_auth.password) { if (always_auth_proactively()) { @@ -652,7 +652,7 @@ static void init_curl_proxy_auth(CURL *result) { if (proxy_auth.username) { if (!proxy_auth.password && !proxy_auth.credential) - credential_fill(&proxy_auth, 1); + credential_fill(the_repository, &proxy_auth, 1); set_proxyauth_name_password(result); } @@ -686,7 +686,7 @@ static int has_cert_password(void) cert_auth.host = xstrdup(""); cert_auth.username = xstrdup(""); cert_auth.path = xstrdup(ssl_cert); - credential_fill(&cert_auth, 0); + credential_fill(the_repository, &cert_auth, 0); } return 1; } @@ -700,7 +700,7 @@ static int has_proxy_cert_password(void) proxy_cert_auth.host = xstrdup(""); proxy_cert_auth.username = xstrdup(""); proxy_cert_auth.path = xstrdup(http_proxy_ssl_cert); - credential_fill(&proxy_cert_auth, 0); + credential_fill(the_repository, &proxy_cert_auth, 0); } return 1; } @@ -1784,9 +1784,9 @@ static int handle_curl_result(struct slot_results *results) curl_errorstr, sizeof(curl_errorstr)); if (results->curl_result == CURLE_OK) { - credential_approve(&http_auth); - credential_approve(&proxy_auth); - credential_approve(&cert_auth); + credential_approve(the_repository, &http_auth); + credential_approve(the_repository, &proxy_auth); + credential_approve(the_repository, &cert_auth); return HTTP_OK; } else if (results->curl_result == CURLE_SSL_CERTPROBLEM) { /* @@ -1795,7 +1795,7 @@ static int handle_curl_result(struct slot_results *results) * with the certificate. So we reject the credential to * avoid caching or saving a bad password. */ - credential_reject(&cert_auth); + credential_reject(the_repository, &cert_auth); return HTTP_NOAUTH; } else if (results->curl_result == CURLE_SSL_PINNEDPUBKEYNOTMATCH) { return HTTP_NOMATCHPUBLICKEY; @@ -1808,7 +1808,7 @@ static int handle_curl_result(struct slot_results *results) credential_clear_secrets(&http_auth); return HTTP_REAUTH; } - credential_reject(&http_auth); + credential_reject(the_repository, &http_auth); if (always_auth_proactively()) http_proactive_auth = PROACTIVE_AUTH_NONE; return HTTP_NOAUTH; @@ -1822,7 +1822,7 @@ static int handle_curl_result(struct slot_results *results) } } else { if (results->http_connectcode == 407) - credential_reject(&proxy_auth); + credential_reject(the_repository, &proxy_auth); if (!curl_errorstr[0]) strlcpy(curl_errorstr, curl_easy_strerror(results->curl_result), @@ -2210,7 +2210,7 @@ static int http_request_reauth(const char *url, int ret; if (always_auth_proactively()) - credential_fill(&http_auth, 1); + credential_fill(the_repository, &http_auth, 1); ret = http_request(url, result, target, options); @@ -2251,7 +2251,7 @@ static int http_request_reauth(const char *url, BUG("Unknown http_request target"); } - credential_fill(&http_auth, 1); + credential_fill(the_repository, &http_auth, 1); ret = http_request(url, result, target, options); } diff --git a/imap-send.c b/imap-send.c index 68ab1aea837c35..6c8f84e836bb40 100644 --- a/imap-send.c +++ b/imap-send.c @@ -922,7 +922,7 @@ static void server_fill_credential(struct imap_server_conf *srvc, struct credent cred->username = xstrdup_or_null(srvc->user); cred->password = xstrdup_or_null(srvc->pass); - credential_fill(cred, 1); + credential_fill(the_repository, cred, 1); if (!srvc->user) srvc->user = xstrdup(cred->username); @@ -1123,7 +1123,7 @@ static struct imap_store *imap_open_store(struct imap_server_conf *srvc, const c } /* !preauth */ if (cred.username) - credential_approve(&cred); + credential_approve(the_repository, &cred); credential_clear(&cred); /* check the target mailbox exists */ @@ -1150,7 +1150,7 @@ static struct imap_store *imap_open_store(struct imap_server_conf *srvc, const c bail: if (cred.username) - credential_reject(&cred); + credential_reject(the_repository, &cred); credential_clear(&cred); out: @@ -1492,9 +1492,9 @@ static int curl_append_msgs_to_imap(struct imap_server_conf *server, if (cred.username) { if (res == CURLE_OK) - credential_approve(&cred); + credential_approve(the_repository, &cred); else if (res == CURLE_LOGIN_DENIED) - credential_reject(&cred); + credential_reject(the_repository, &cred); } credential_clear(&cred); diff --git a/remote-curl.c b/remote-curl.c index a24e3a8b9abcc9..1273507a96cae9 100644 --- a/remote-curl.c +++ b/remote-curl.c @@ -942,7 +942,7 @@ static int post_rpc(struct rpc_state *rpc, int stateless_connect, int flush_rece do { err = probe_rpc(rpc, &results); if (err == HTTP_REAUTH) - credential_fill(&http_auth, 0); + credential_fill(the_repository, &http_auth, 0); } while (err == HTTP_REAUTH); if (err != HTTP_OK) return -1; @@ -1064,7 +1064,7 @@ static int post_rpc(struct rpc_state *rpc, int stateless_connect, int flush_rece rpc->any_written = 0; err = run_slot(slot, NULL); if (err == HTTP_REAUTH && !large_request) { - credential_fill(&http_auth, 0); + credential_fill(the_repository, &http_auth, 0); curl_slist_free_all(headers); goto retry; } From b81093aeae557d56277773dee710eb363c720b6e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 17 Dec 2024 07:43:57 +0100 Subject: [PATCH 017/535] resolve-undo: stop using `the_repository` Stop using `the_repository` in the "resolve-undo" subsystem by passing in the hash algorithm when reading or writing resolve-undo information. While we could trivially update the caller to pass in the hash algorithm used by the index itself, we instead pass in `the_hash_algo`. This is mostly done to stay consistent with the rest of the code in that file, which isn't prepared to handle arbitrary repositories, either. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- read-cache.c | 4 ++-- resolve-undo.c | 14 +++++++------- resolve-undo.h | 6 ++++-- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/read-cache.c b/read-cache.c index 38c36caa7fef4d..d54be2c1726872 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1754,7 +1754,7 @@ static int read_index_extension(struct index_state *istate, istate->cache_tree = cache_tree_read(data, sz); break; case CACHE_EXT_RESOLVE_UNDO: - istate->resolve_undo = resolve_undo_read(data, sz); + istate->resolve_undo = resolve_undo_read(data, sz, the_hash_algo); break; case CACHE_EXT_LINK: if (read_link_extension(istate, data, sz)) @@ -3033,7 +3033,7 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile, istate->resolve_undo) { strbuf_reset(&sb); - resolve_undo_write(&sb, istate->resolve_undo); + resolve_undo_write(&sb, istate->resolve_undo, the_hash_algo); err = write_index_ext_header(f, eoie_c, CACHE_EXT_RESOLVE_UNDO, sb.len) < 0; hashwrite(f, sb.buf, sb.len); diff --git a/resolve-undo.c b/resolve-undo.c index b5a9dfb4acc511..52c45e5a494636 100644 --- a/resolve-undo.c +++ b/resolve-undo.c @@ -1,4 +1,3 @@ -#define USE_THE_REPOSITORY_VARIABLE #define DISABLE_SIGN_COMPARE_WARNINGS #include "git-compat-util.h" @@ -34,7 +33,8 @@ void record_resolve_undo(struct index_state *istate, struct cache_entry *ce) ui->mode[stage - 1] = ce->ce_mode; } -void resolve_undo_write(struct strbuf *sb, struct string_list *resolve_undo) +void resolve_undo_write(struct strbuf *sb, struct string_list *resolve_undo, + const struct git_hash_algo *algop) { struct string_list_item *item; for_each_string_list_item(item, resolve_undo) { @@ -50,18 +50,19 @@ void resolve_undo_write(struct strbuf *sb, struct string_list *resolve_undo) for (i = 0; i < 3; i++) { if (!ui->mode[i]) continue; - strbuf_add(sb, ui->oid[i].hash, the_hash_algo->rawsz); + strbuf_add(sb, ui->oid[i].hash, algop->rawsz); } } } -struct string_list *resolve_undo_read(const char *data, unsigned long size) +struct string_list *resolve_undo_read(const char *data, unsigned long size, + const struct git_hash_algo *algop) { struct string_list *resolve_undo; size_t len; char *endptr; int i; - const unsigned rawsz = the_hash_algo->rawsz; + const unsigned rawsz = algop->rawsz; CALLOC_ARRAY(resolve_undo, 1); resolve_undo->strdup_strings = 1; @@ -96,8 +97,7 @@ struct string_list *resolve_undo_read(const char *data, unsigned long size) continue; if (size < rawsz) goto error; - oidread(&ui->oid[i], (const unsigned char *)data, - the_repository->hash_algo); + oidread(&ui->oid[i], (const unsigned char *)data, algop); size -= rawsz; data += rawsz; } diff --git a/resolve-undo.h b/resolve-undo.h index 89a32272620f2b..7ed11a1c59b819 100644 --- a/resolve-undo.h +++ b/resolve-undo.h @@ -14,8 +14,10 @@ struct resolve_undo_info { }; void record_resolve_undo(struct index_state *, struct cache_entry *); -void resolve_undo_write(struct strbuf *, struct string_list *); -struct string_list *resolve_undo_read(const char *, unsigned long); +void resolve_undo_write(struct strbuf *, struct string_list *, + const struct git_hash_algo *algop); +struct string_list *resolve_undo_read(const char *, unsigned long, + const struct git_hash_algo *algop); void resolve_undo_clear_index(struct index_state *); int unmerge_index_entry(struct index_state *, const char *, struct resolve_undo_info *, unsigned); void unmerge_index(struct index_state *, const struct pathspec *, unsigned); From 727c71a1121c2067223aad8d187409c9822a3f8d Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 17 Dec 2024 07:43:58 +0100 Subject: [PATCH 018/535] tmp-objdir: stop using `the_repository` Stop using `the_repository` in the "tmp-objdir" subsystem by passing in the repostiroy when creating a new temporary object directory. While we could trivially update the caller to pass in the hash algorithm used by the index itself, we instead pass in `the_hash_algo`. This is mostly done to stay consistent with the rest of the code in that file, which isn't prepared to handle arbitrary repositories, either. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/receive-pack.c | 2 +- bulk-checkin.c | 2 +- log-tree.c | 2 +- tmp-objdir.c | 15 ++++++++------- tmp-objdir.h | 5 +++-- 5 files changed, 14 insertions(+), 12 deletions(-) diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index 191b5eeb34e679..56347a79633505 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -2239,7 +2239,7 @@ static const char *unpack(int err_fd, struct shallow_info *si) strvec_push(&child.args, alt_shallow_file); } - tmp_objdir = tmp_objdir_create("incoming"); + tmp_objdir = tmp_objdir_create(the_repository, "incoming"); if (!tmp_objdir) { if (err_fd > 0) close(err_fd); diff --git a/bulk-checkin.c b/bulk-checkin.c index 4a70a70a951cfd..f7b15e3999f2c7 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -333,7 +333,7 @@ void prepare_loose_object_bulk_checkin(void) if (!odb_transaction_nesting || bulk_fsync_objdir) return; - bulk_fsync_objdir = tmp_objdir_create("bulk-fsync"); + bulk_fsync_objdir = tmp_objdir_create(the_repository, "bulk-fsync"); if (bulk_fsync_objdir) tmp_objdir_replace_primary_odb(bulk_fsync_objdir, 0); } diff --git a/log-tree.c b/log-tree.c index d08eb672a93390..8b184d6776344b 100644 --- a/log-tree.c +++ b/log-tree.c @@ -1042,7 +1042,7 @@ static int do_remerge_diff(struct rev_info *opt, * into the alternative object store list as the primary. */ if (opt->remerge_diff && !opt->remerge_objdir) { - opt->remerge_objdir = tmp_objdir_create("remerge-diff"); + opt->remerge_objdir = tmp_objdir_create(the_repository, "remerge-diff"); if (!opt->remerge_objdir) return error(_("unable to create temporary object directory")); tmp_objdir_replace_primary_odb(opt->remerge_objdir, 1); diff --git a/tmp-objdir.c b/tmp-objdir.c index 659fcdcc2954ed..0ea078a5c5f4eb 100644 --- a/tmp-objdir.c +++ b/tmp-objdir.c @@ -1,5 +1,3 @@ -#define USE_THE_REPOSITORY_VARIABLE - #include "git-compat-util.h" #include "tmp-objdir.h" #include "abspath.h" @@ -16,6 +14,7 @@ #include "repository.h" struct tmp_objdir { + struct repository *repo; struct strbuf path; struct strvec env; struct object_directory *prev_odb; @@ -116,7 +115,8 @@ static int setup_tmp_objdir(const char *root) return ret; } -struct tmp_objdir *tmp_objdir_create(const char *prefix) +struct tmp_objdir *tmp_objdir_create(struct repository *r, + const char *prefix) { static int installed_handlers; struct tmp_objdir *t; @@ -125,6 +125,7 @@ struct tmp_objdir *tmp_objdir_create(const char *prefix) BUG("only one tmp_objdir can be used at a time"); t = xcalloc(1, sizeof(*t)); + t->repo = r; strbuf_init(&t->path, 0); strvec_init(&t->env); @@ -134,7 +135,7 @@ struct tmp_objdir *tmp_objdir_create(const char *prefix) * them. */ strbuf_addf(&t->path, "%s/tmp_objdir-%s-XXXXXX", - repo_get_object_directory(the_repository), prefix); + repo_get_object_directory(r), prefix); if (!mkdtemp(t->path.buf)) { /* free, not destroy, as we never touched the filesystem */ @@ -154,7 +155,7 @@ struct tmp_objdir *tmp_objdir_create(const char *prefix) } env_append(&t->env, ALTERNATE_DB_ENVIRONMENT, - absolute_path(repo_get_object_directory(the_repository))); + absolute_path(repo_get_object_directory(r))); env_replace(&t->env, DB_ENVIRONMENT, absolute_path(t->path.buf)); env_replace(&t->env, GIT_QUARANTINE_ENVIRONMENT, absolute_path(t->path.buf)); @@ -273,14 +274,14 @@ int tmp_objdir_migrate(struct tmp_objdir *t) return 0; if (t->prev_odb) { - if (the_repository->objects->odb->will_destroy) + if (t->repo->objects->odb->will_destroy) BUG("migrating an ODB that was marked for destruction"); restore_primary_odb(t->prev_odb, t->path.buf); t->prev_odb = NULL; } strbuf_addbuf(&src, &t->path); - strbuf_addstr(&dst, repo_get_object_directory(the_repository)); + strbuf_addstr(&dst, repo_get_object_directory(t->repo)); ret = migrate_paths(&src, &dst, 0); diff --git a/tmp-objdir.h b/tmp-objdir.h index 237d96b66050c8..fceda14979648f 100644 --- a/tmp-objdir.h +++ b/tmp-objdir.h @@ -11,7 +11,7 @@ * Example: * * struct child_process child = CHILD_PROCESS_INIT; - * struct tmp_objdir *t = tmp_objdir_create("incoming"); + * struct tmp_objdir *t = tmp_objdir_create(repo, "incoming"); * strvec_push(&child.args, cmd); * strvec_pushv(&child.env, tmp_objdir_env(t)); * if (!run_command(&child)) && !tmp_objdir_migrate(t)) @@ -21,13 +21,14 @@ * */ +struct repository; struct tmp_objdir; /* * Create a new temporary object directory with the specified prefix; * returns NULL on failure. */ -struct tmp_objdir *tmp_objdir_create(const char *prefix); +struct tmp_objdir *tmp_objdir_create(struct repository *r, const char *prefix); /* * Return a list of environment strings, suitable for use with From 1b374ad71f702cd13f7395ccff54d5c4ee604b98 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 17 Dec 2024 07:43:59 +0100 Subject: [PATCH 019/535] add-interactive: stop using `the_repository` Stop using `the_repository` in the "add-interactive" subsystem by reusing the repository we already have available via parameters or in the `add_i_state` structure. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- add-interactive.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/add-interactive.c b/add-interactive.c index d0f8c10e6fd460..97ff35b6f12a32 100644 --- a/add-interactive.c +++ b/add-interactive.c @@ -1,4 +1,3 @@ -#define USE_THE_REPOSITORY_VARIABLE #define DISABLE_SIGN_COMPARE_WARNINGS #include "git-compat-util.h" @@ -72,14 +71,14 @@ void init_add_i_state(struct add_i_state *s, struct repository *r) s->use_color ? GIT_COLOR_RESET : "", COLOR_MAXLEN); FREE_AND_NULL(s->interactive_diff_filter); - git_config_get_string("interactive.difffilter", - &s->interactive_diff_filter); + repo_config_get_string(r, "interactive.difffilter", + &s->interactive_diff_filter); FREE_AND_NULL(s->interactive_diff_algorithm); - git_config_get_string("diff.algorithm", - &s->interactive_diff_algorithm); + repo_config_get_string(r, "diff.algorithm", + &s->interactive_diff_algorithm); - git_config_get_bool("interactive.singlekey", &s->use_single_key); + repo_config_get_bool(r, "interactive.singlekey", &s->use_single_key); if (s->use_single_key) setbuf(stdin, NULL); } @@ -535,7 +534,7 @@ static int get_modified_files(struct repository *r, size_t *binary_count) { struct object_id head_oid; - int is_initial = !refs_resolve_ref_unsafe(get_main_ref_store(the_repository), + int is_initial = !refs_resolve_ref_unsafe(get_main_ref_store(r), "HEAD", RESOLVE_REF_READING, &head_oid, NULL); struct collection_status s = { 0 }; @@ -560,7 +559,7 @@ static int get_modified_files(struct repository *r, s.skip_unseen = filter && i; opt.def = is_initial ? - empty_tree_oid_hex(the_repository->hash_algo) : oid_to_hex(&head_oid); + empty_tree_oid_hex(r->hash_algo) : oid_to_hex(&head_oid); repo_init_revisions(r, &rev, NULL); setup_revisions(0, NULL, &rev, &opt); @@ -765,7 +764,7 @@ static int run_revert(struct add_i_state *s, const struct pathspec *ps, size_t count, i, j; struct object_id oid; - int is_initial = !refs_resolve_ref_unsafe(get_main_ref_store(the_repository), + int is_initial = !refs_resolve_ref_unsafe(get_main_ref_store(s->r), "HEAD", RESOLVE_REF_READING, &oid, NULL); @@ -996,7 +995,7 @@ static int run_diff(struct add_i_state *s, const struct pathspec *ps, ssize_t count, i; struct object_id oid; - int is_initial = !refs_resolve_ref_unsafe(get_main_ref_store(the_repository), + int is_initial = !refs_resolve_ref_unsafe(get_main_ref_store(s->r), "HEAD", RESOLVE_REF_READING, &oid, NULL); From e1335a940748d8e525b3a5e095f352c85644accf Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 17 Dec 2024 07:44:00 +0100 Subject: [PATCH 020/535] graph: stop using `the_repository` Stop using `the_repository` in the "graph" subsystem by reusing the repository we already have available via `struct rev_info`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- graph.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/graph.c b/graph.c index 52205f75c37cf3..26f6fbf000aef5 100644 --- a/graph.c +++ b/graph.c @@ -1,4 +1,3 @@ -#define USE_THE_REPOSITORY_VARIABLE #define DISABLE_SIGN_COMPARE_WARNINGS #include "git-compat-util.h" @@ -351,7 +350,7 @@ struct git_graph *graph_init(struct rev_info *opt) if (!column_colors) { char *string; - if (git_config_get_string("log.graphcolors", &string)) { + if (repo_config_get_string(opt->repo, "log.graphcolors", &string)) { /* not configured -- use default */ graph_set_column_colors(column_colors_ansi, column_colors_ansi_max); From d4cd757051d1c779f1d95557b7ac523a6e1803fc Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 17 Dec 2024 07:44:01 +0100 Subject: [PATCH 021/535] match-trees: stop using `the_repository` Stop using `the_repository` in the "match-trees" subsystem by passing down the already-available repository parameters to internal functions as required. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- match-trees.c | 50 +++++++++++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/match-trees.c b/match-trees.c index a1c8b91eaef8fa..ef14ceb594c72a 100644 --- a/match-trees.c +++ b/match-trees.c @@ -1,4 +1,3 @@ -#define USE_THE_REPOSITORY_VARIABLE #define DISABLE_SIGN_COMPARE_WARNINGS #include "git-compat-util.h" @@ -8,6 +7,7 @@ #include "tree.h" #include "tree-walk.h" #include "object-store-ll.h" +#include "repository.h" static int score_missing(unsigned mode) { @@ -54,14 +54,15 @@ static int score_matches(unsigned mode1, unsigned mode2) return score; } -static void *fill_tree_desc_strict(struct tree_desc *desc, +static void *fill_tree_desc_strict(struct repository *r, + struct tree_desc *desc, const struct object_id *hash) { void *buffer; enum object_type type; unsigned long size; - buffer = repo_read_object_file(the_repository, hash, &type, &size); + buffer = repo_read_object_file(r, hash, &type, &size); if (!buffer) die("unable to read tree (%s)", oid_to_hex(hash)); if (type != OBJ_TREE) @@ -80,12 +81,13 @@ static int base_name_entries_compare(const struct name_entry *a, /* * Inspect two trees, and give a score that tells how similar they are. */ -static int score_trees(const struct object_id *hash1, const struct object_id *hash2) +static int score_trees(struct repository *r, + const struct object_id *hash1, const struct object_id *hash2) { struct tree_desc one; struct tree_desc two; - void *one_buf = fill_tree_desc_strict(&one, hash1); - void *two_buf = fill_tree_desc_strict(&two, hash2); + void *one_buf = fill_tree_desc_strict(r, &one, hash1); + void *two_buf = fill_tree_desc_strict(r, &two, hash2); int score = 0; for (;;) { @@ -133,7 +135,8 @@ static int score_trees(const struct object_id *hash1, const struct object_id *ha /* * Match one itself and its subtrees with two and pick the best match. */ -static void match_trees(const struct object_id *hash1, +static void match_trees(struct repository *r, + const struct object_id *hash1, const struct object_id *hash2, int *best_score, char **best_match, @@ -141,7 +144,7 @@ static void match_trees(const struct object_id *hash1, int recurse_limit) { struct tree_desc one; - void *one_buf = fill_tree_desc_strict(&one, hash1); + void *one_buf = fill_tree_desc_strict(r, &one, hash1); while (one.size) { const char *path; @@ -152,7 +155,7 @@ static void match_trees(const struct object_id *hash1, elem = tree_entry_extract(&one, &path, &mode); if (!S_ISDIR(mode)) goto next; - score = score_trees(elem, hash2); + score = score_trees(r, elem, hash2); if (*best_score < score) { free(*best_match); *best_match = xstrfmt("%s%s", base, path); @@ -160,7 +163,7 @@ static void match_trees(const struct object_id *hash1, } if (recurse_limit) { char *newbase = xstrfmt("%s%s/", base, path); - match_trees(elem, hash2, best_score, best_match, + match_trees(r, elem, hash2, best_score, best_match, newbase, recurse_limit - 1); free(newbase); } @@ -175,7 +178,8 @@ static void match_trees(const struct object_id *hash1, * A tree "oid1" has a subdirectory at "prefix". Come up with a tree object by * replacing it with another tree "oid2". */ -static int splice_tree(const struct object_id *oid1, const char *prefix, +static int splice_tree(struct repository *r, + const struct object_id *oid1, const char *prefix, const struct object_id *oid2, struct object_id *result) { char *subpath; @@ -194,7 +198,7 @@ static int splice_tree(const struct object_id *oid1, const char *prefix, if (*subpath) subpath++; - buf = repo_read_object_file(the_repository, oid1, &type, &sz); + buf = repo_read_object_file(r, oid1, &type, &sz); if (!buf) die("cannot read tree %s", oid_to_hex(oid1)); init_tree_desc(&desc, oid1, buf, sz); @@ -232,15 +236,15 @@ static int splice_tree(const struct object_id *oid1, const char *prefix, oid_to_hex(oid1)); if (*subpath) { struct object_id tree_oid; - oidread(&tree_oid, rewrite_here, the_repository->hash_algo); - status = splice_tree(&tree_oid, subpath, oid2, &subtree); + oidread(&tree_oid, rewrite_here, r->hash_algo); + status = splice_tree(r, &tree_oid, subpath, oid2, &subtree); if (status) return status; rewrite_with = &subtree; } else { rewrite_with = oid2; } - hashcpy(rewrite_here, rewrite_with->hash, the_repository->hash_algo); + hashcpy(rewrite_here, rewrite_with->hash, r->hash_algo); status = write_object_file(buf, sz, OBJ_TREE, result); free(buf); return status; @@ -271,7 +275,7 @@ void shift_tree(struct repository *r, if (!depth_limit) depth_limit = 2; - add_score = del_score = score_trees(hash1, hash2); + add_score = del_score = score_trees(r, hash1, hash2); add_prefix = xcalloc(1, 1); del_prefix = xcalloc(1, 1); @@ -279,13 +283,13 @@ void shift_tree(struct repository *r, * See if one's subtree resembles two; if so we need to prefix * two with a few fake trees to match the prefix. */ - match_trees(hash1, hash2, &add_score, &add_prefix, "", depth_limit); + match_trees(r, hash1, hash2, &add_score, &add_prefix, "", depth_limit); /* * See if two's subtree resembles one; if so we need to * pick only subtree of two. */ - match_trees(hash2, hash1, &del_score, &del_prefix, "", depth_limit); + match_trees(r, hash2, hash1, &del_score, &del_prefix, "", depth_limit); /* Assume we do not have to do any shifting */ oidcpy(shifted, hash2); @@ -306,7 +310,7 @@ void shift_tree(struct repository *r, if (!*add_prefix) goto out; - splice_tree(hash1, add_prefix, hash2, shifted); + splice_tree(r, hash1, add_prefix, hash2, shifted); out: free(add_prefix); @@ -340,16 +344,16 @@ void shift_tree_by(struct repository *r, if (candidate == 3) { /* Both are plausible -- we need to evaluate the score */ - int best_score = score_trees(hash1, hash2); + int best_score = score_trees(r, hash1, hash2); int score; candidate = 0; - score = score_trees(&sub1, hash2); + score = score_trees(r, &sub1, hash2); if (score > best_score) { candidate = 1; best_score = score; } - score = score_trees(&sub2, hash1); + score = score_trees(r, &sub2, hash1); if (score > best_score) candidate = 2; } @@ -365,7 +369,7 @@ void shift_tree_by(struct repository *r, * shift tree2 down by adding shift_prefix above it * to match tree1. */ - splice_tree(hash1, shift_prefix, hash2, shifted); + splice_tree(r, hash1, shift_prefix, hash2, shifted); else /* * shift tree2 up by removing shift_prefix from it From 9d46bc791b77e059deb740fde3439a0417a91b5e Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 20 Dec 2024 16:21:09 +0000 Subject: [PATCH 022/535] path-walk: introduce an object walk by path In anticipation of a few planned applications, introduce the most basic form of a path-walk API. It currently assumes that there are no UNINTERESTING objects, and does not include any complicated filters. It calls a function pointer on groups of tree and blob objects as grouped by path. This only includes objects the first time they are discovered, so an object that appears at multiple paths will not be included in two batches. These batches are collected in 'struct type_and_oid_list' objects, which store an object type and an oid_array of objects. The data structures are documented in 'struct path_walk_context', but in summary the most important are: * 'paths_to_lists' is a strmap that connects a path to a type_and_oid_list for that path. To avoid conflicts in path names, we make sure that tree paths end in "/" (except the root path with is an empty string) and blob paths do not end in "/". * 'path_stack' is a string list that is added to in an append-only way. This stores the stack of our depth-first search on the heap instead of using recursion. * 'path_stack_pushed' is a strmap that stores path names that were already added to 'path_stack', to avoid repeating paths in the stack. Mostly, this saves us from quadratic lookups from doing unsorted checks into the string_list. The coupling of 'path_stack' and 'path_stack_pushed' is protected by the push_to_stack() method. Call this instead of inserting into these structures directly. The walk_objects_by_path() method initializes these structures and starts walking commits from the given rev_info struct. The commits are used to find the list of root trees which populate the start of our depth-first search. The core of our depth-first search is in a while loop that continues while we have not indicated an early exit and our 'path_stack' still has entries in it. The loop body pops a path off of the stack and "visits" the path via the walk_path() method. The walk_path() method gets the list of OIDs from the 'path_to_lists' strmap and executes the callback method on that list with the given path and type. If the OIDs correspond to tree objects, then iterate over all trees in the list and run add_children() to add the child objects to their own lists, adding new entries to the stack if necessary. In testing, this depth-first search approach was the one that used the least memory while iterating over the object lists. There is still a chance that repositories with too-wide path patterns could cause memory pressure issues. Limiting the stack size could be done in the future by limiting how many objects are being considered in-progress, or by visiting blob paths earlier than trees. There are many future adaptations that could be made, but they are left for future updates when consumers are ready to take advantage of those features. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/technical/api-path-walk.txt | 45 ++++ Makefile | 1 + path-walk.c | 279 ++++++++++++++++++++++ path-walk.h | 47 ++++ 4 files changed, 372 insertions(+) create mode 100644 Documentation/technical/api-path-walk.txt create mode 100644 path-walk.c create mode 100644 path-walk.h diff --git a/Documentation/technical/api-path-walk.txt b/Documentation/technical/api-path-walk.txt new file mode 100644 index 00000000000000..c550c77ca30754 --- /dev/null +++ b/Documentation/technical/api-path-walk.txt @@ -0,0 +1,45 @@ +Path-Walk API +============= + +The path-walk API is used to walk reachable objects, but to visit objects +in batches based on a common path they appear in, or by type. + +For example, all reachable commits are visited in a group. All tags are +visited in a group. Then, all root trees are visited. At some point, all +blobs reachable via a path `my/dir/to/A` are visited. When there are +multiple paths possible to reach the same object, then only one of those +paths is used to visit the object. + +Basics +------ + +To use the path-walk API, include `path-walk.h` and call +`walk_objects_by_path()` with a customized `path_walk_info` struct. The +struct is used to set all of the options for how the walk should proceed. +Let's dig into the different options and their use. + +`path_fn` and `path_fn_data`:: + The most important option is the `path_fn` option, which is a + function pointer to the callback that can execute logic on the + object IDs for objects grouped by type and path. This function + also receives a `data` value that corresponds to the + `path_fn_data` member, for providing custom data structures to + this callback function. + +`revs`:: + To configure the exact details of the reachable set of objects, + use the `revs` member and initialize it using the revision + machinery in `revision.h`. Initialize `revs` using calls such as + `setup_revisions()` or `parse_revision_opt()`. Do not call + `prepare_revision_walk()`, as that will be called within + `walk_objects_by_path()`. ++ +It is also important that you do not specify the `--objects` flag for the +`revs` struct. The revision walk should only be used to walk commits, and +the objects will be walked in a separate way based on those starting +commits. + +Examples +-------- + +See example usages in future changes. diff --git a/Makefile b/Makefile index 8d8cc6ab90928b..1b2abad3b4a9ee 100644 --- a/Makefile +++ b/Makefile @@ -1098,6 +1098,7 @@ LIB_OBJS += parse-options.o LIB_OBJS += patch-delta.o LIB_OBJS += patch-ids.o LIB_OBJS += path.o +LIB_OBJS += path-walk.o LIB_OBJS += pathspec.o LIB_OBJS += pkt-line.o LIB_OBJS += preload-index.o diff --git a/path-walk.c b/path-walk.c new file mode 100644 index 00000000000000..021840ab41b56a --- /dev/null +++ b/path-walk.c @@ -0,0 +1,279 @@ +/* + * path-walk.c: implementation for path-based walks of the object graph. + */ +#include "git-compat-util.h" +#include "path-walk.h" +#include "blob.h" +#include "commit.h" +#include "dir.h" +#include "hashmap.h" +#include "hex.h" +#include "object.h" +#include "oid-array.h" +#include "revision.h" +#include "string-list.h" +#include "strmap.h" +#include "trace2.h" +#include "tree.h" +#include "tree-walk.h" + +struct type_and_oid_list { + enum object_type type; + struct oid_array oids; +}; + +#define TYPE_AND_OID_LIST_INIT { \ + .type = OBJ_NONE, \ + .oids = OID_ARRAY_INIT \ +} + +struct path_walk_context { + /** + * Repeats of data in 'struct path_walk_info' for + * access with fewer characters. + */ + struct repository *repo; + struct rev_info *revs; + struct path_walk_info *info; + + /** + * Map a path to a 'struct type_and_oid_list' + * containing the objects discovered at that + * path. + */ + struct strmap paths_to_lists; + + /** + * Store the current list of paths in a stack, to + * facilitate depth-first-search without recursion. + * + * Use path_stack_pushed to indicate whether a path + * was previously added to path_stack. + */ + struct string_list path_stack; + struct strset path_stack_pushed; +}; + +static void push_to_stack(struct path_walk_context *ctx, + const char *path) +{ + if (strset_contains(&ctx->path_stack_pushed, path)) + return; + + strset_add(&ctx->path_stack_pushed, path); + string_list_append(&ctx->path_stack, path); +} + +static int add_tree_entries(struct path_walk_context *ctx, + const char *base_path, + struct object_id *oid) +{ + struct tree_desc desc; + struct name_entry entry; + struct strbuf path = STRBUF_INIT; + size_t base_len; + struct tree *tree = lookup_tree(ctx->repo, oid); + + if (!tree) { + error(_("failed to walk children of tree %s: not found"), + oid_to_hex(oid)); + return -1; + } else if (parse_tree_gently(tree, 1)) { + error("bad tree object %s", oid_to_hex(oid)); + return -1; + } + + strbuf_addstr(&path, base_path); + base_len = path.len; + + parse_tree(tree); + init_tree_desc(&desc, &tree->object.oid, tree->buffer, tree->size); + while (tree_entry(&desc, &entry)) { + struct type_and_oid_list *list; + struct object *o; + /* Not actually true, but we will ignore submodules later. */ + enum object_type type = S_ISDIR(entry.mode) ? OBJ_TREE : OBJ_BLOB; + + /* Skip submodules. */ + if (S_ISGITLINK(entry.mode)) + continue; + + if (type == OBJ_TREE) { + struct tree *child = lookup_tree(ctx->repo, &entry.oid); + o = child ? &child->object : NULL; + } else if (type == OBJ_BLOB) { + struct blob *child = lookup_blob(ctx->repo, &entry.oid); + o = child ? &child->object : NULL; + } else { + BUG("invalid type for tree entry: %d", type); + } + + if (!o) { + error(_("failed to find object %s"), + oid_to_hex(&o->oid)); + return -1; + } + + /* Skip this object if already seen. */ + if (o->flags & SEEN) + continue; + o->flags |= SEEN; + + strbuf_setlen(&path, base_len); + strbuf_add(&path, entry.path, entry.pathlen); + + /* + * Trees will end with "/" for concatenation and distinction + * from blobs at the same path. + */ + if (type == OBJ_TREE) + strbuf_addch(&path, '/'); + + if (!(list = strmap_get(&ctx->paths_to_lists, path.buf))) { + CALLOC_ARRAY(list, 1); + list->type = type; + strmap_put(&ctx->paths_to_lists, path.buf, list); + } + push_to_stack(ctx, path.buf); + oid_array_append(&list->oids, &entry.oid); + } + + free_tree_buffer(tree); + strbuf_release(&path); + return 0; +} + +/* + * For each path in paths_to_explore, walk the trees another level + * and add any found blobs to the batch (but only if they exist and + * haven't been added yet). + */ +static int walk_path(struct path_walk_context *ctx, + const char *path) +{ + struct type_and_oid_list *list; + int ret = 0; + + list = strmap_get(&ctx->paths_to_lists, path); + + if (!list->oids.nr) + return 0; + + /* Evaluate function pointer on this data. */ + ret = ctx->info->path_fn(path, &list->oids, list->type, + ctx->info->path_fn_data); + + /* Expand data for children. */ + if (list->type == OBJ_TREE) { + for (size_t i = 0; i < list->oids.nr; i++) { + ret |= add_tree_entries(ctx, + path, + &list->oids.oid[i]); + } + } + + oid_array_clear(&list->oids); + strmap_remove(&ctx->paths_to_lists, path, 1); + return ret; +} + +static void clear_paths_to_lists(struct strmap *map) +{ + struct hashmap_iter iter; + struct strmap_entry *e; + + hashmap_for_each_entry(&map->map, &iter, e, ent) { + struct type_and_oid_list *list = e->value; + oid_array_clear(&list->oids); + } + strmap_clear(map, 1); + strmap_init(map); +} + +/** + * Given the configuration of 'info', walk the commits based on 'info->revs' and + * call 'info->path_fn' on each discovered path. + * + * Returns nonzero on an error. + */ +int walk_objects_by_path(struct path_walk_info *info) +{ + const char *root_path = ""; + int ret = 0; + size_t commits_nr = 0, paths_nr = 0; + struct commit *c; + struct type_and_oid_list *root_tree_list; + struct path_walk_context ctx = { + .repo = info->revs->repo, + .revs = info->revs, + .info = info, + .path_stack = STRING_LIST_INIT_DUP, + .path_stack_pushed = STRSET_INIT, + .paths_to_lists = STRMAP_INIT + }; + + trace2_region_enter("path-walk", "commit-walk", info->revs->repo); + + /* Insert a single list for the root tree into the paths. */ + CALLOC_ARRAY(root_tree_list, 1); + root_tree_list->type = OBJ_TREE; + strmap_put(&ctx.paths_to_lists, root_path, root_tree_list); + push_to_stack(&ctx, root_path); + + if (prepare_revision_walk(info->revs)) + die(_("failed to setup revision walk")); + + while ((c = get_revision(info->revs))) { + struct object_id *oid = get_commit_tree_oid(c); + struct tree *t; + commits_nr++; + + oid = get_commit_tree_oid(c); + t = lookup_tree(info->revs->repo, oid); + + if (!t) { + error("could not find tree %s", oid_to_hex(oid)); + return -1; + } + + if (t->object.flags & SEEN) + continue; + t->object.flags |= SEEN; + oid_array_append(&root_tree_list->oids, oid); + } + + trace2_data_intmax("path-walk", ctx.repo, "commits", commits_nr); + trace2_region_leave("path-walk", "commit-walk", info->revs->repo); + + trace2_region_enter("path-walk", "path-walk", info->revs->repo); + while (!ret && ctx.path_stack.nr) { + char *path = ctx.path_stack.items[ctx.path_stack.nr - 1].string; + ctx.path_stack.nr--; + paths_nr++; + + ret = walk_path(&ctx, path); + + free(path); + } + trace2_data_intmax("path-walk", ctx.repo, "paths", paths_nr); + trace2_region_leave("path-walk", "path-walk", info->revs->repo); + + clear_paths_to_lists(&ctx.paths_to_lists); + strset_clear(&ctx.path_stack_pushed); + string_list_clear(&ctx.path_stack, 0); + return ret; +} + +void path_walk_info_init(struct path_walk_info *info) +{ + struct path_walk_info empty = PATH_WALK_INFO_INIT; + memcpy(info, &empty, sizeof(empty)); +} + +void path_walk_info_clear(struct path_walk_info *info UNUSED) +{ + /* + * This destructor is empty for now, as info->revs + * is not owned by 'struct path_walk_info'. + */ +} diff --git a/path-walk.h b/path-walk.h new file mode 100644 index 00000000000000..7cb3538cd8f4af --- /dev/null +++ b/path-walk.h @@ -0,0 +1,47 @@ +/* + * path-walk.h : Methods and structures for walking the object graph in batches + * by the paths that can reach those objects. + */ +#include "object.h" /* Required for 'enum object_type'. */ + +struct rev_info; +struct oid_array; + +/** + * The type of a function pointer for the method that is called on a list of + * objects reachable at a given path. + */ +typedef int (*path_fn)(const char *path, + struct oid_array *oids, + enum object_type type, + void *data); + +struct path_walk_info { + /** + * revs provides the definitions for the commit walk, including + * which commits are UNINTERESTING or not. This structure is + * expected to be owned by the caller. + */ + struct rev_info *revs; + + /** + * The caller wishes to execute custom logic on objects reachable at a + * given path. Every reachable object will be visited exactly once, and + * the first path to see an object wins. This may not be a stable choice. + */ + path_fn path_fn; + void *path_fn_data; +}; + +#define PATH_WALK_INFO_INIT { 0 } + +void path_walk_info_init(struct path_walk_info *info); +void path_walk_info_clear(struct path_walk_info *info); + +/** + * Given the configuration of 'info', walk the commits based on 'info->revs' and + * call 'info->path_fn' on each discovered path. + * + * Returns nonzero on an error. + */ +int walk_objects_by_path(struct path_walk_info *info); From cef003d4532d4dff26007c0cd50afcfef84ad33b Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 20 Dec 2024 16:21:10 +0000 Subject: [PATCH 023/535] test-lib-functions: add test_cmp_sorted This test helper will be helpful to reduce repeated logic in t6601-path-walk.sh, but may be helpful elsewhere, too. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- t/test-lib-functions.sh | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh index 78e054ab503a65..c17f6b05c9b4af 100644 --- a/t/test-lib-functions.sh +++ b/t/test-lib-functions.sh @@ -1268,6 +1268,16 @@ test_cmp () { eval "$GIT_TEST_CMP" '"$@"' } +# test_cmp_sorted runs test_cmp on sorted versions of the two +# input files. Uses "$1.sorted" and "$2.sorted" as temp files. + +test_cmp_sorted () { + sort <"$1" >"$1.sorted" && + sort <"$2" >"$2.sorted" && + test_cmp "$1.sorted" "$2.sorted" && + rm "$1.sorted" "$2.sorted" +} + # Check that the given config key has the expected value. # # test_cmp_config [-C ] From d190124f277952bd828f932d87e76deabedf0c83 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 20 Dec 2024 16:21:11 +0000 Subject: [PATCH 024/535] t6601: add helper for testing path-walk API Add some tests based on the current behavior, doing interesting checks for different sets of branches, ranges, and the --boundary option. This sets a baseline for the behavior and we can extend it as new options are introduced. Store and output a 'batch_nr' value so we can demonstrate that the paths are grouped together in a batch and not following some other ordering. This allows us to test the depth-first behavior of the path-walk API. However, we purposefully do not test the order of the objects in the batch, so the output is compared to the expected output through a sort. It is important to mention that the behavior of the API will change soon as we start to handle UNINTERESTING objects differently, but these tests will demonstrate the change in behavior. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/technical/api-path-walk.txt | 3 +- Makefile | 1 + t/helper/test-path-walk.c | 84 +++++++++++++++ t/helper/test-tool.c | 1 + t/helper/test-tool.h | 1 + t/t6601-path-walk.sh | 120 ++++++++++++++++++++++ 6 files changed, 209 insertions(+), 1 deletion(-) create mode 100644 t/helper/test-path-walk.c create mode 100755 t/t6601-path-walk.sh diff --git a/Documentation/technical/api-path-walk.txt b/Documentation/technical/api-path-walk.txt index c550c77ca30754..662162ec70b38b 100644 --- a/Documentation/technical/api-path-walk.txt +++ b/Documentation/technical/api-path-walk.txt @@ -42,4 +42,5 @@ commits. Examples -------- -See example usages in future changes. +See example usages in: + `t/helper/test-path-walk.c` diff --git a/Makefile b/Makefile index 1b2abad3b4a9ee..e0b9e14a683832 100644 --- a/Makefile +++ b/Makefile @@ -822,6 +822,7 @@ TEST_BUILTINS_OBJS += test-parse-options.o TEST_BUILTINS_OBJS += test-parse-pathspec-file.o TEST_BUILTINS_OBJS += test-partial-clone.o TEST_BUILTINS_OBJS += test-path-utils.o +TEST_BUILTINS_OBJS += test-path-walk.o TEST_BUILTINS_OBJS += test-pcre2-config.o TEST_BUILTINS_OBJS += test-pkt-line.o TEST_BUILTINS_OBJS += test-proc-receive.o diff --git a/t/helper/test-path-walk.c b/t/helper/test-path-walk.c new file mode 100644 index 00000000000000..def7c81ac4fac2 --- /dev/null +++ b/t/helper/test-path-walk.c @@ -0,0 +1,84 @@ +#define USE_THE_REPOSITORY_VARIABLE + +#include "test-tool.h" +#include "environment.h" +#include "hex.h" +#include "object-name.h" +#include "object.h" +#include "pretty.h" +#include "revision.h" +#include "setup.h" +#include "parse-options.h" +#include "path-walk.h" +#include "oid-array.h" + +static const char * const path_walk_usage[] = { + N_("test-tool path-walk -- "), + NULL +}; + +struct path_walk_test_data { + uintmax_t batch_nr; + uintmax_t tree_nr; + uintmax_t blob_nr; +}; + +static int emit_block(const char *path, struct oid_array *oids, + enum object_type type, void *data) +{ + struct path_walk_test_data *tdata = data; + const char *typestr; + + if (type == OBJ_TREE) + tdata->tree_nr += oids->nr; + else if (type == OBJ_BLOB) + tdata->blob_nr += oids->nr; + else + BUG("we do not understand this type"); + + typestr = type_name(type); + + for (size_t i = 0; i < oids->nr; i++) + printf("%"PRIuMAX":%s:%s:%s\n", + tdata->batch_nr, typestr, path, + oid_to_hex(&oids->oid[i])); + + tdata->batch_nr++; + return 0; +} + +int cmd__path_walk(int argc, const char **argv) +{ + int res; + struct rev_info revs = REV_INFO_INIT; + struct path_walk_info info = PATH_WALK_INFO_INIT; + struct path_walk_test_data data = { 0 }; + struct option options[] = { + OPT_END(), + }; + + setup_git_directory(); + revs.repo = the_repository; + + argc = parse_options(argc, argv, NULL, + options, path_walk_usage, + PARSE_OPT_KEEP_UNKNOWN_OPT | PARSE_OPT_KEEP_ARGV0); + + if (argc > 1) + setup_revisions(argc, argv, &revs, NULL); + else + usage(path_walk_usage[0]); + + info.revs = &revs; + info.path_fn = emit_block; + info.path_fn_data = &data; + + res = walk_objects_by_path(&info); + + printf("trees:%" PRIuMAX "\n" + "blobs:%" PRIuMAX "\n", + data.tree_nr, data.blob_nr); + + release_revisions(&revs); + return res; +} diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c index 1ebb69a5dc4c17..43676e7b93a43f 100644 --- a/t/helper/test-tool.c +++ b/t/helper/test-tool.c @@ -52,6 +52,7 @@ static struct test_cmd cmds[] = { { "parse-subcommand", cmd__parse_subcommand }, { "partial-clone", cmd__partial_clone }, { "path-utils", cmd__path_utils }, + { "path-walk", cmd__path_walk }, { "pcre2-config", cmd__pcre2_config }, { "pkt-line", cmd__pkt_line }, { "proc-receive", cmd__proc_receive }, diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h index 21802ac27da37f..9cfc5da6e57b00 100644 --- a/t/helper/test-tool.h +++ b/t/helper/test-tool.h @@ -45,6 +45,7 @@ int cmd__parse_pathspec_file(int argc, const char** argv); int cmd__parse_subcommand(int argc, const char **argv); int cmd__partial_clone(int argc, const char **argv); int cmd__path_utils(int argc, const char **argv); +int cmd__path_walk(int argc, const char **argv); int cmd__pcre2_config(int argc, const char **argv); int cmd__pkt_line(int argc, const char **argv); int cmd__proc_receive(int argc, const char **argv); diff --git a/t/t6601-path-walk.sh b/t/t6601-path-walk.sh new file mode 100755 index 00000000000000..4e052c093095ab --- /dev/null +++ b/t/t6601-path-walk.sh @@ -0,0 +1,120 @@ +#!/bin/sh + +TEST_PASSES_SANITIZE_LEAK=true + +test_description='direct path-walk API tests' + +. ./test-lib.sh + +test_expect_success 'setup test repository' ' + git checkout -b base && + + mkdir left && + mkdir right && + echo a >a && + echo b >left/b && + echo c >right/c && + git add . && + git commit -m "first" && + + echo d >right/d && + git add right && + git commit -m "second" && + + echo bb >left/b && + git commit -a -m "third" && + + git checkout -b topic HEAD~1 && + echo cc >right/c && + git commit -a -m "topic" +' + +test_expect_success 'all' ' + test-tool path-walk -- --all >out && + + cat >expect <<-EOF && + 0:tree::$(git rev-parse topic^{tree}) + 0:tree::$(git rev-parse base^{tree}) + 0:tree::$(git rev-parse base~1^{tree}) + 0:tree::$(git rev-parse base~2^{tree}) + 1:tree:right/:$(git rev-parse topic:right) + 1:tree:right/:$(git rev-parse base~1:right) + 1:tree:right/:$(git rev-parse base~2:right) + 2:blob:right/d:$(git rev-parse base~1:right/d) + 3:blob:right/c:$(git rev-parse base~2:right/c) + 3:blob:right/c:$(git rev-parse topic:right/c) + 4:tree:left/:$(git rev-parse base:left) + 4:tree:left/:$(git rev-parse base~2:left) + 5:blob:left/b:$(git rev-parse base~2:left/b) + 5:blob:left/b:$(git rev-parse base:left/b) + 6:blob:a:$(git rev-parse base~2:a) + blobs:6 + trees:9 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'topic only' ' + test-tool path-walk -- topic >out && + + cat >expect <<-EOF && + 0:tree::$(git rev-parse topic^{tree}) + 0:tree::$(git rev-parse base~1^{tree}) + 0:tree::$(git rev-parse base~2^{tree}) + 1:tree:right/:$(git rev-parse topic:right) + 1:tree:right/:$(git rev-parse base~1:right) + 1:tree:right/:$(git rev-parse base~2:right) + 2:blob:right/d:$(git rev-parse base~1:right/d) + 3:blob:right/c:$(git rev-parse base~2:right/c) + 3:blob:right/c:$(git rev-parse topic:right/c) + 4:tree:left/:$(git rev-parse base~2:left) + 5:blob:left/b:$(git rev-parse base~2:left/b) + 6:blob:a:$(git rev-parse base~2:a) + blobs:5 + trees:7 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'topic, not base' ' + test-tool path-walk -- topic --not base >out && + + cat >expect <<-EOF && + 0:tree::$(git rev-parse topic^{tree}) + 1:tree:right/:$(git rev-parse topic:right) + 2:blob:right/d:$(git rev-parse topic:right/d) + 3:blob:right/c:$(git rev-parse topic:right/c) + 4:tree:left/:$(git rev-parse topic:left) + 5:blob:left/b:$(git rev-parse topic:left/b) + 6:blob:a:$(git rev-parse topic:a) + blobs:4 + trees:3 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'topic, not base, boundary' ' + test-tool path-walk -- --boundary topic --not base >out && + + cat >expect <<-EOF && + 0:tree::$(git rev-parse topic^{tree}) + 0:tree::$(git rev-parse base~1^{tree}) + 1:tree:right/:$(git rev-parse topic:right) + 1:tree:right/:$(git rev-parse base~1:right) + 2:blob:right/d:$(git rev-parse base~1:right/d) + 3:blob:right/c:$(git rev-parse base~1:right/c) + 3:blob:right/c:$(git rev-parse topic:right/c) + 4:tree:left/:$(git rev-parse base~1:left) + 5:blob:left/b:$(git rev-parse base~1:left/b) + 6:blob:a:$(git rev-parse base~1:a) + blobs:5 + trees:5 + EOF + + test_cmp_sorted expect out +' + +test_done From c8dba310d734962c0bcadd8cad1ebf7cfe734c8c Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 20 Dec 2024 16:21:12 +0000 Subject: [PATCH 025/535] path-walk: allow consumer to specify object types We add the ability to filter the object types in the path-walk API so the callback function is called fewer times. This adds the ability to ask for the commits in a list, as well. We re-use the empty string for this set of objects because these are passed directly to the callback function instead of being part of the 'path_stack'. Future changes will add the ability to visit annotated tags. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/technical/api-path-walk.txt | 9 ++ path-walk.c | 33 ++++- path-walk.h | 14 +- t/helper/test-path-walk.c | 15 ++- t/t6601-path-walk.sh | 149 +++++++++++++++------- 5 files changed, 170 insertions(+), 50 deletions(-) diff --git a/Documentation/technical/api-path-walk.txt b/Documentation/technical/api-path-walk.txt index 662162ec70b38b..dce553b6114e1c 100644 --- a/Documentation/technical/api-path-walk.txt +++ b/Documentation/technical/api-path-walk.txt @@ -39,6 +39,15 @@ It is also important that you do not specify the `--objects` flag for the the objects will be walked in a separate way based on those starting commits. +`commits`, `blobs`, `trees`:: + By default, these members are enabled and signal that the path-walk + API should call the `path_fn` on objects of these types. Specialized + applications could disable some options to make it simpler to walk + the objects or to have fewer calls to `path_fn`. ++ +While it is possible to walk only commits in this way, consumers would be +better off using the revision walk API instead. + Examples -------- diff --git a/path-walk.c b/path-walk.c index 021840ab41b56a..05ca7b2442a813 100644 --- a/path-walk.c +++ b/path-walk.c @@ -98,6 +98,10 @@ static int add_tree_entries(struct path_walk_context *ctx, if (S_ISGITLINK(entry.mode)) continue; + /* If the caller doesn't want blobs, then don't bother. */ + if (!ctx->info->blobs && type == OBJ_BLOB) + continue; + if (type == OBJ_TREE) { struct tree *child = lookup_tree(ctx->repo, &entry.oid); o = child ? &child->object : NULL; @@ -159,9 +163,11 @@ static int walk_path(struct path_walk_context *ctx, if (!list->oids.nr) return 0; - /* Evaluate function pointer on this data. */ - ret = ctx->info->path_fn(path, &list->oids, list->type, - ctx->info->path_fn_data); + /* Evaluate function pointer on this data, if requested. */ + if ((list->type == OBJ_TREE && ctx->info->trees) || + (list->type == OBJ_BLOB && ctx->info->blobs)) + ret = ctx->info->path_fn(path, &list->oids, list->type, + ctx->info->path_fn_data); /* Expand data for children. */ if (list->type == OBJ_TREE) { @@ -203,6 +209,7 @@ int walk_objects_by_path(struct path_walk_info *info) size_t commits_nr = 0, paths_nr = 0; struct commit *c; struct type_and_oid_list *root_tree_list; + struct type_and_oid_list *commit_list; struct path_walk_context ctx = { .repo = info->revs->repo, .revs = info->revs, @@ -214,6 +221,9 @@ int walk_objects_by_path(struct path_walk_info *info) trace2_region_enter("path-walk", "commit-walk", info->revs->repo); + CALLOC_ARRAY(commit_list, 1); + commit_list->type = OBJ_COMMIT; + /* Insert a single list for the root tree into the paths. */ CALLOC_ARRAY(root_tree_list, 1); root_tree_list->type = OBJ_TREE; @@ -224,10 +234,18 @@ int walk_objects_by_path(struct path_walk_info *info) die(_("failed to setup revision walk")); while ((c = get_revision(info->revs))) { - struct object_id *oid = get_commit_tree_oid(c); + struct object_id *oid; struct tree *t; commits_nr++; + if (info->commits) + oid_array_append(&commit_list->oids, + &c->object.oid); + + /* If we only care about commits, then skip trees. */ + if (!info->trees && !info->blobs) + continue; + oid = get_commit_tree_oid(c); t = lookup_tree(info->revs->repo, oid); @@ -245,6 +263,13 @@ int walk_objects_by_path(struct path_walk_info *info) trace2_data_intmax("path-walk", ctx.repo, "commits", commits_nr); trace2_region_leave("path-walk", "commit-walk", info->revs->repo); + /* Track all commits. */ + if (info->commits && commit_list->oids.nr) + ret = info->path_fn("", &commit_list->oids, OBJ_COMMIT, + info->path_fn_data); + oid_array_clear(&commit_list->oids); + free(commit_list); + trace2_region_enter("path-walk", "path-walk", info->revs->repo); while (!ret && ctx.path_stack.nr) { char *path = ctx.path_stack.items[ctx.path_stack.nr - 1].string; diff --git a/path-walk.h b/path-walk.h index 7cb3538cd8f4af..2cafc71e153c19 100644 --- a/path-walk.h +++ b/path-walk.h @@ -31,9 +31,21 @@ struct path_walk_info { */ path_fn path_fn; void *path_fn_data; + + /** + * Initialize which object types the path_fn should be called on. This + * could also limit the walk to skip blobs if not set. + */ + int commits; + int trees; + int blobs; }; -#define PATH_WALK_INFO_INIT { 0 } +#define PATH_WALK_INFO_INIT { \ + .blobs = 1, \ + .trees = 1, \ + .commits = 1, \ +} void path_walk_info_init(struct path_walk_info *info); void path_walk_info_clear(struct path_walk_info *info); diff --git a/t/helper/test-path-walk.c b/t/helper/test-path-walk.c index def7c81ac4fac2..a57a05a6391fcb 100644 --- a/t/helper/test-path-walk.c +++ b/t/helper/test-path-walk.c @@ -19,6 +19,8 @@ static const char * const path_walk_usage[] = { struct path_walk_test_data { uintmax_t batch_nr; + + uintmax_t commit_nr; uintmax_t tree_nr; uintmax_t blob_nr; }; @@ -33,6 +35,8 @@ static int emit_block(const char *path, struct oid_array *oids, tdata->tree_nr += oids->nr; else if (type == OBJ_BLOB) tdata->blob_nr += oids->nr; + else if (type == OBJ_COMMIT) + tdata->commit_nr += oids->nr; else BUG("we do not understand this type"); @@ -54,6 +58,12 @@ int cmd__path_walk(int argc, const char **argv) struct path_walk_info info = PATH_WALK_INFO_INIT; struct path_walk_test_data data = { 0 }; struct option options[] = { + OPT_BOOL(0, "blobs", &info.blobs, + N_("toggle inclusion of blob objects")), + OPT_BOOL(0, "commits", &info.commits, + N_("toggle inclusion of commit objects")), + OPT_BOOL(0, "trees", &info.trees, + N_("toggle inclusion of tree objects")), OPT_END(), }; @@ -75,9 +85,10 @@ int cmd__path_walk(int argc, const char **argv) res = walk_objects_by_path(&info); - printf("trees:%" PRIuMAX "\n" + printf("commits:%" PRIuMAX "\n" + "trees:%" PRIuMAX "\n" "blobs:%" PRIuMAX "\n", - data.tree_nr, data.blob_nr); + data.commit_nr, data.tree_nr, data.blob_nr); release_revisions(&revs); return res; diff --git a/t/t6601-path-walk.sh b/t/t6601-path-walk.sh index 4e052c093095ab..4a4939a1b02bb1 100755 --- a/t/t6601-path-walk.sh +++ b/t/t6601-path-walk.sh @@ -33,22 +33,27 @@ test_expect_success 'all' ' test-tool path-walk -- --all >out && cat >expect <<-EOF && - 0:tree::$(git rev-parse topic^{tree}) - 0:tree::$(git rev-parse base^{tree}) - 0:tree::$(git rev-parse base~1^{tree}) - 0:tree::$(git rev-parse base~2^{tree}) - 1:tree:right/:$(git rev-parse topic:right) - 1:tree:right/:$(git rev-parse base~1:right) - 1:tree:right/:$(git rev-parse base~2:right) - 2:blob:right/d:$(git rev-parse base~1:right/d) - 3:blob:right/c:$(git rev-parse base~2:right/c) - 3:blob:right/c:$(git rev-parse topic:right/c) - 4:tree:left/:$(git rev-parse base:left) - 4:tree:left/:$(git rev-parse base~2:left) - 5:blob:left/b:$(git rev-parse base~2:left/b) - 5:blob:left/b:$(git rev-parse base:left/b) - 6:blob:a:$(git rev-parse base~2:a) + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base) + 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~2) + 1:tree::$(git rev-parse topic^{tree}) + 1:tree::$(git rev-parse base^{tree}) + 1:tree::$(git rev-parse base~1^{tree}) + 1:tree::$(git rev-parse base~2^{tree}) + 2:tree:right/:$(git rev-parse topic:right) + 2:tree:right/:$(git rev-parse base~1:right) + 2:tree:right/:$(git rev-parse base~2:right) + 3:blob:right/d:$(git rev-parse base~1:right/d) + 4:blob:right/c:$(git rev-parse base~2:right/c) + 4:blob:right/c:$(git rev-parse topic:right/c) + 5:tree:left/:$(git rev-parse base:left) + 5:tree:left/:$(git rev-parse base~2:left) + 6:blob:left/b:$(git rev-parse base~2:left/b) + 6:blob:left/b:$(git rev-parse base:left/b) + 7:blob:a:$(git rev-parse base~2:a) blobs:6 + commits:4 trees:9 EOF @@ -59,19 +64,23 @@ test_expect_success 'topic only' ' test-tool path-walk -- topic >out && cat >expect <<-EOF && - 0:tree::$(git rev-parse topic^{tree}) - 0:tree::$(git rev-parse base~1^{tree}) - 0:tree::$(git rev-parse base~2^{tree}) - 1:tree:right/:$(git rev-parse topic:right) - 1:tree:right/:$(git rev-parse base~1:right) - 1:tree:right/:$(git rev-parse base~2:right) - 2:blob:right/d:$(git rev-parse base~1:right/d) - 3:blob:right/c:$(git rev-parse base~2:right/c) - 3:blob:right/c:$(git rev-parse topic:right/c) - 4:tree:left/:$(git rev-parse base~2:left) - 5:blob:left/b:$(git rev-parse base~2:left/b) - 6:blob:a:$(git rev-parse base~2:a) + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~2) + 1:tree::$(git rev-parse topic^{tree}) + 1:tree::$(git rev-parse base~1^{tree}) + 1:tree::$(git rev-parse base~2^{tree}) + 2:tree:right/:$(git rev-parse topic:right) + 2:tree:right/:$(git rev-parse base~1:right) + 2:tree:right/:$(git rev-parse base~2:right) + 3:blob:right/d:$(git rev-parse base~1:right/d) + 4:blob:right/c:$(git rev-parse base~2:right/c) + 4:blob:right/c:$(git rev-parse topic:right/c) + 5:tree:left/:$(git rev-parse base~2:left) + 6:blob:left/b:$(git rev-parse base~2:left/b) + 7:blob:a:$(git rev-parse base~2:a) blobs:5 + commits:3 trees:7 EOF @@ -82,15 +91,66 @@ test_expect_success 'topic, not base' ' test-tool path-walk -- topic --not base >out && cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 1:tree::$(git rev-parse topic^{tree}) + 2:tree:right/:$(git rev-parse topic:right) + 3:blob:right/d:$(git rev-parse topic:right/d) + 4:blob:right/c:$(git rev-parse topic:right/c) + 5:tree:left/:$(git rev-parse topic:left) + 6:blob:left/b:$(git rev-parse topic:left/b) + 7:blob:a:$(git rev-parse topic:a) + blobs:4 + commits:1 + trees:3 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'topic, not base, only blobs' ' + test-tool path-walk --no-trees --no-commits \ + -- topic --not base >out && + + cat >expect <<-EOF && + commits:0 + trees:0 + 0:blob:right/d:$(git rev-parse topic:right/d) + 1:blob:right/c:$(git rev-parse topic:right/c) + 2:blob:left/b:$(git rev-parse topic:left/b) + 3:blob:a:$(git rev-parse topic:a) + blobs:4 + EOF + + test_cmp_sorted expect out +' + +# No, this doesn't make a lot of sense for the path-walk API, +# but it is possible to do. +test_expect_success 'topic, not base, only commits' ' + test-tool path-walk --no-blobs --no-trees \ + -- topic --not base >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + commits:1 + trees:0 + blobs:0 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'topic, not base, only trees' ' + test-tool path-walk --no-blobs --no-commits \ + -- topic --not base >out && + + cat >expect <<-EOF && + commits:0 0:tree::$(git rev-parse topic^{tree}) 1:tree:right/:$(git rev-parse topic:right) - 2:blob:right/d:$(git rev-parse topic:right/d) - 3:blob:right/c:$(git rev-parse topic:right/c) - 4:tree:left/:$(git rev-parse topic:left) - 5:blob:left/b:$(git rev-parse topic:left/b) - 6:blob:a:$(git rev-parse topic:a) - blobs:4 + 2:tree:left/:$(git rev-parse topic:left) trees:3 + blobs:0 EOF test_cmp_sorted expect out @@ -100,17 +160,20 @@ test_expect_success 'topic, not base, boundary' ' test-tool path-walk -- --boundary topic --not base >out && cat >expect <<-EOF && - 0:tree::$(git rev-parse topic^{tree}) - 0:tree::$(git rev-parse base~1^{tree}) - 1:tree:right/:$(git rev-parse topic:right) - 1:tree:right/:$(git rev-parse base~1:right) - 2:blob:right/d:$(git rev-parse base~1:right/d) - 3:blob:right/c:$(git rev-parse base~1:right/c) - 3:blob:right/c:$(git rev-parse topic:right/c) - 4:tree:left/:$(git rev-parse base~1:left) - 5:blob:left/b:$(git rev-parse base~1:left/b) - 6:blob:a:$(git rev-parse base~1:a) + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base~1) + 1:tree::$(git rev-parse topic^{tree}) + 1:tree::$(git rev-parse base~1^{tree}) + 2:tree:right/:$(git rev-parse topic:right) + 2:tree:right/:$(git rev-parse base~1:right) + 3:blob:right/d:$(git rev-parse base~1:right/d) + 4:blob:right/c:$(git rev-parse base~1:right/c) + 4:blob:right/c:$(git rev-parse topic:right/c) + 5:tree:left/:$(git rev-parse base~1:left) + 6:blob:left/b:$(git rev-parse base~1:left/b) + 7:blob:a:$(git rev-parse base~1:a) blobs:5 + commits:2 trees:5 EOF From 9145660979d699733e05d7336eabb086b5266370 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 20 Dec 2024 16:21:13 +0000 Subject: [PATCH 026/535] path-walk: visit tags and cached objects The rev_info that is specified for a path-walk traversal may specify visiting tag refs (both lightweight and annotated) and also may specify indexed objects (blobs and trees). Update the path-walk API to walk these objects as well. When walking tags, we need to peel the annotated objects until reaching a non-tag object. If we reach a commit, then we can add it to the pending objects to make sure we visit in the commit walk portion. If we reach a tree, then we will assume that it is a root tree. If we reach a blob, then we have no good path name and so add it to a new list of "tagged blobs". When the rev_info includes the "--indexed-objects" flag, then the pending set includes blobs and trees found in the cache entries and cache-tree. The cache entries are usually blobs, though they could be trees in the case of a sparse index. The cache-tree stores previously-hashed tree objects but these are cleared out when staging objects below those paths. We add tests that demonstrate this. The indexed objects come with a non-NULL 'path' value in the pending item. This allows us to prepopulate the 'path_to_lists' strmap with lists for these paths. The tricky thing about this walk is that we will want to combine the indexed objects walk with the commit walk, especially in the future case of walking objects during a command like 'git repack'. Whenever possible, we want the objects from the index to be grouped with similar objects in history. We don't want to miss any paths that appear only in the index and not in the commit history. Thus, we need to be careful to let the path stack be populated initially with only the root tree path (and possibly tags and tagged blobs) and go through the normal depth-first search. Afterwards, if there are other paths that are remaining in the paths_to_lists strmap, we should then iterate through the stack and visit those objects recursively. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/technical/api-path-walk.txt | 2 +- path-walk.c | 184 ++++++++++++++++++++- path-walk.h | 2 + t/helper/test-path-walk.c | 15 +- t/t6601-path-walk.sh | 186 +++++++++++++++++++--- 5 files changed, 362 insertions(+), 27 deletions(-) diff --git a/Documentation/technical/api-path-walk.txt b/Documentation/technical/api-path-walk.txt index dce553b6114e1c..6022c381b7c3f0 100644 --- a/Documentation/technical/api-path-walk.txt +++ b/Documentation/technical/api-path-walk.txt @@ -39,7 +39,7 @@ It is also important that you do not specify the `--objects` flag for the the objects will be walked in a separate way based on those starting commits. -`commits`, `blobs`, `trees`:: +`commits`, `blobs`, `trees`, `tags`:: By default, these members are enabled and signal that the path-walk API should call the `path_fn` on objects of these types. Specialized applications could disable some options to make it simpler to walk diff --git a/path-walk.c b/path-walk.c index 05ca7b2442a813..f34dbf61de0a0f 100644 --- a/path-walk.c +++ b/path-walk.c @@ -13,10 +13,13 @@ #include "revision.h" #include "string-list.h" #include "strmap.h" +#include "tag.h" #include "trace2.h" #include "tree.h" #include "tree-walk.h" +static const char *root_path = ""; + struct type_and_oid_list { enum object_type type; struct oid_array oids; @@ -160,12 +163,16 @@ static int walk_path(struct path_walk_context *ctx, list = strmap_get(&ctx->paths_to_lists, path); + if (!list) + BUG("provided path '%s' that had no associated list", path); + if (!list->oids.nr) return 0; /* Evaluate function pointer on this data, if requested. */ if ((list->type == OBJ_TREE && ctx->info->trees) || - (list->type == OBJ_BLOB && ctx->info->blobs)) + (list->type == OBJ_BLOB && ctx->info->blobs) || + (list->type == OBJ_TAG && ctx->info->tags)) ret = ctx->info->path_fn(path, &list->oids, list->type, ctx->info->path_fn_data); @@ -196,6 +203,139 @@ static void clear_paths_to_lists(struct strmap *map) strmap_init(map); } +static int setup_pending_objects(struct path_walk_info *info, + struct path_walk_context *ctx) +{ + struct type_and_oid_list *tags = NULL; + struct type_and_oid_list *tagged_blobs = NULL; + struct type_and_oid_list *root_tree_list = NULL; + + if (info->tags) + CALLOC_ARRAY(tags, 1); + if (info->blobs) + CALLOC_ARRAY(tagged_blobs, 1); + if (info->trees) + root_tree_list = strmap_get(&ctx->paths_to_lists, root_path); + + /* + * Pending objects include: + * * Commits at branch tips. + * * Annotated tags at tag tips. + * * Any kind of object at lightweight tag tips. + * * Trees and blobs in the index (with an associated path). + */ + for (size_t i = 0; i < info->revs->pending.nr; i++) { + struct object_array_entry *pending = info->revs->pending.objects + i; + struct object *obj = pending->item; + + /* Commits will be picked up by revision walk. */ + if (obj->type == OBJ_COMMIT) + continue; + + /* Navigate annotated tag object chains. */ + while (obj->type == OBJ_TAG) { + struct tag *tag = lookup_tag(info->revs->repo, &obj->oid); + if (!tag) { + error(_("failed to find tag %s"), + oid_to_hex(&obj->oid)); + return -1; + } + if (tag->object.flags & SEEN) + break; + tag->object.flags |= SEEN; + + if (tags) + oid_array_append(&tags->oids, &obj->oid); + obj = tag->tagged; + } + + if (obj->type == OBJ_TAG) + continue; + + /* We are now at a non-tag object. */ + if (obj->flags & SEEN) + continue; + obj->flags |= SEEN; + + switch (obj->type) { + case OBJ_TREE: + if (!info->trees) + continue; + if (pending->path) { + struct type_and_oid_list *list; + char *path = *pending->path ? xstrfmt("%s/", pending->path) + : xstrdup(""); + if (!(list = strmap_get(&ctx->paths_to_lists, path))) { + CALLOC_ARRAY(list, 1); + list->type = OBJ_TREE; + strmap_put(&ctx->paths_to_lists, path, list); + } + oid_array_append(&list->oids, &obj->oid); + free(path); + } else { + /* assume a root tree, such as a lightweight tag. */ + oid_array_append(&root_tree_list->oids, &obj->oid); + } + break; + + case OBJ_BLOB: + if (!info->blobs) + continue; + if (pending->path) { + struct type_and_oid_list *list; + char *path = pending->path; + if (!(list = strmap_get(&ctx->paths_to_lists, path))) { + CALLOC_ARRAY(list, 1); + list->type = OBJ_BLOB; + strmap_put(&ctx->paths_to_lists, path, list); + } + oid_array_append(&list->oids, &obj->oid); + } else { + /* assume a root tree, such as a lightweight tag. */ + oid_array_append(&tagged_blobs->oids, &obj->oid); + } + break; + + case OBJ_COMMIT: + /* Make sure it is in the object walk */ + if (obj != pending->item) + add_pending_object(info->revs, obj, ""); + break; + + default: + BUG("should not see any other type here"); + } + } + + /* + * Add tag objects and tagged blobs if they exist. + */ + if (tagged_blobs) { + if (tagged_blobs->oids.nr) { + const char *tagged_blob_path = "/tagged-blobs"; + tagged_blobs->type = OBJ_BLOB; + push_to_stack(ctx, tagged_blob_path); + strmap_put(&ctx->paths_to_lists, tagged_blob_path, tagged_blobs); + } else { + oid_array_clear(&tagged_blobs->oids); + free(tagged_blobs); + } + } + if (tags) { + if (tags->oids.nr) { + const char *tag_path = "/tags"; + tags->type = OBJ_TAG; + push_to_stack(ctx, tag_path); + strmap_put(&ctx->paths_to_lists, tag_path, tags); + } else { + oid_array_clear(&tags->oids); + free(tags); + } + } + + return 0; +} + /** * Given the configuration of 'info', walk the commits based on 'info->revs' and * call 'info->path_fn' on each discovered path. @@ -204,8 +344,7 @@ static void clear_paths_to_lists(struct strmap *map) */ int walk_objects_by_path(struct path_walk_info *info) { - const char *root_path = ""; - int ret = 0; + int ret; size_t commits_nr = 0, paths_nr = 0; struct commit *c; struct type_and_oid_list *root_tree_list; @@ -224,15 +363,34 @@ int walk_objects_by_path(struct path_walk_info *info) CALLOC_ARRAY(commit_list, 1); commit_list->type = OBJ_COMMIT; + if (info->tags) + info->revs->tag_objects = 1; + /* Insert a single list for the root tree into the paths. */ CALLOC_ARRAY(root_tree_list, 1); root_tree_list->type = OBJ_TREE; strmap_put(&ctx.paths_to_lists, root_path, root_tree_list); push_to_stack(&ctx, root_path); + /* + * Set these values before preparing the walk to catch + * lightweight tags pointing to non-commits and indexed objects. + */ + info->revs->blob_objects = info->blobs; + info->revs->tree_objects = info->trees; + if (prepare_revision_walk(info->revs)) die(_("failed to setup revision walk")); + info->revs->blob_objects = info->revs->tree_objects = 0; + + trace2_region_enter("path-walk", "pending-walk", info->revs->repo); + ret = setup_pending_objects(info, &ctx); + trace2_region_leave("path-walk", "pending-walk", info->revs->repo); + + if (ret) + return ret; + while ((c = get_revision(info->revs))) { struct object_id *oid; struct tree *t; @@ -280,6 +438,26 @@ int walk_objects_by_path(struct path_walk_info *info) free(path); } + + /* Are there paths remaining? Likely they are from indexed objects. */ + if (!strmap_empty(&ctx.paths_to_lists)) { + struct hashmap_iter iter; + struct strmap_entry *entry; + + strmap_for_each_entry(&ctx.paths_to_lists, &iter, entry) + push_to_stack(&ctx, entry->key); + + while (!ret && ctx.path_stack.nr) { + char *path = ctx.path_stack.items[ctx.path_stack.nr - 1].string; + ctx.path_stack.nr--; + paths_nr++; + + ret = walk_path(&ctx, path); + + free(path); + } + } + trace2_data_intmax("path-walk", ctx.repo, "paths", paths_nr); trace2_region_leave("path-walk", "path-walk", info->revs->repo); diff --git a/path-walk.h b/path-walk.h index 2cafc71e153c19..3679fa7a859f67 100644 --- a/path-walk.h +++ b/path-walk.h @@ -39,12 +39,14 @@ struct path_walk_info { int commits; int trees; int blobs; + int tags; }; #define PATH_WALK_INFO_INIT { \ .blobs = 1, \ .trees = 1, \ .commits = 1, \ + .tags = 1, \ } void path_walk_info_init(struct path_walk_info *info); diff --git a/t/helper/test-path-walk.c b/t/helper/test-path-walk.c index a57a05a6391fcb..56289859e69f8d 100644 --- a/t/helper/test-path-walk.c +++ b/t/helper/test-path-walk.c @@ -23,6 +23,7 @@ struct path_walk_test_data { uintmax_t commit_nr; uintmax_t tree_nr; uintmax_t blob_nr; + uintmax_t tag_nr; }; static int emit_block(const char *path, struct oid_array *oids, @@ -37,11 +38,18 @@ static int emit_block(const char *path, struct oid_array *oids, tdata->blob_nr += oids->nr; else if (type == OBJ_COMMIT) tdata->commit_nr += oids->nr; + else if (type == OBJ_TAG) + tdata->tag_nr += oids->nr; else BUG("we do not understand this type"); typestr = type_name(type); + /* This should never be output during tests. */ + if (!oids->nr) + printf("%"PRIuMAX":%s:%s:EMPTY\n", + tdata->batch_nr, typestr, path); + for (size_t i = 0; i < oids->nr; i++) printf("%"PRIuMAX":%s:%s:%s\n", tdata->batch_nr, typestr, path, @@ -62,6 +70,8 @@ int cmd__path_walk(int argc, const char **argv) N_("toggle inclusion of blob objects")), OPT_BOOL(0, "commits", &info.commits, N_("toggle inclusion of commit objects")), + OPT_BOOL(0, "tags", &info.tags, + N_("toggle inclusion of tag objects")), OPT_BOOL(0, "trees", &info.trees, N_("toggle inclusion of tree objects")), OPT_END(), @@ -87,8 +97,9 @@ int cmd__path_walk(int argc, const char **argv) printf("commits:%" PRIuMAX "\n" "trees:%" PRIuMAX "\n" - "blobs:%" PRIuMAX "\n", - data.commit_nr, data.tree_nr, data.blob_nr); + "blobs:%" PRIuMAX "\n" + "tags:%" PRIuMAX "\n", + data.commit_nr, data.tree_nr, data.blob_nr, data.tag_nr); release_revisions(&revs); return res; diff --git a/t/t6601-path-walk.sh b/t/t6601-path-walk.sh index 4a4939a1b02bb1..1f3d2e0cb76a76 100755 --- a/t/t6601-path-walk.sh +++ b/t/t6601-path-walk.sh @@ -9,29 +9,142 @@ test_description='direct path-walk API tests' test_expect_success 'setup test repository' ' git checkout -b base && + # Make some objects that will only be reachable + # via non-commit tags. + mkdir child && + echo file >child/file && + git add child && + git commit -m "will abandon" && + git tag -a -m "tree" tree-tag HEAD^{tree} && + echo file2 >file2 && + git add file2 && + git commit --amend -m "will abandon" && + git tag tree-tag2 HEAD^{tree} && + + echo blob >file && + blob_oid=$(git hash-object -t blob -w --stdin file2 && + blob2_oid=$(git hash-object -t blob -w --stdin a && echo b >left/b && echo c >right/c && git add . && - git commit -m "first" && + git commit --amend -m "first" && + git tag -m "first" first HEAD && echo d >right/d && git add right && git commit -m "second" && + git tag -a -m "second (under)" second.1 HEAD && + git tag -a -m "second (top)" second.2 second.1 && + # Set up file/dir collision in history. + rm a && + mkdir a && + echo a >a/a && echo bb >left/b && - git commit -a -m "third" && + git add a left && + git commit -m "third" && + git tag -a -m "third" third && git checkout -b topic HEAD~1 && echo cc >right/c && - git commit -a -m "topic" + git commit -a -m "topic" && + git tag -a -m "fourth" fourth ' test_expect_success 'all' ' test-tool path-walk -- --all >out && + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base) + 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~2) + 1:tag:/tags:$(git rev-parse refs/tags/first) + 1:tag:/tags:$(git rev-parse refs/tags/second.1) + 1:tag:/tags:$(git rev-parse refs/tags/second.2) + 1:tag:/tags:$(git rev-parse refs/tags/third) + 1:tag:/tags:$(git rev-parse refs/tags/fourth) + 1:tag:/tags:$(git rev-parse refs/tags/tree-tag) + 1:tag:/tags:$(git rev-parse refs/tags/blob-tag) + 2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag^{}) + 2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{}) + 3:tree::$(git rev-parse topic^{tree}) + 3:tree::$(git rev-parse base^{tree}) + 3:tree::$(git rev-parse base~1^{tree}) + 3:tree::$(git rev-parse base~2^{tree}) + 3:tree::$(git rev-parse refs/tags/tree-tag^{}) + 3:tree::$(git rev-parse refs/tags/tree-tag2^{}) + 4:blob:a:$(git rev-parse base~2:a) + 5:tree:right/:$(git rev-parse topic:right) + 5:tree:right/:$(git rev-parse base~1:right) + 5:tree:right/:$(git rev-parse base~2:right) + 6:blob:right/d:$(git rev-parse base~1:right/d) + 7:blob:right/c:$(git rev-parse base~2:right/c) + 7:blob:right/c:$(git rev-parse topic:right/c) + 8:tree:left/:$(git rev-parse base:left) + 8:tree:left/:$(git rev-parse base~2:left) + 9:blob:left/b:$(git rev-parse base~2:left/b) + 9:blob:left/b:$(git rev-parse base:left/b) + 10:tree:a/:$(git rev-parse base:a) + 11:blob:file2:$(git rev-parse refs/tags/tree-tag2^{}:file2) + 12:tree:child/:$(git rev-parse refs/tags/tree-tag:child) + 13:blob:child/file:$(git rev-parse refs/tags/tree-tag:child/file) + blobs:10 + commits:4 + tags:7 + trees:13 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'indexed objects' ' + test_when_finished git reset --hard && + + # stage change into index, adding a blob but + # also invalidating the cache-tree for the root + # and the "left" directory. + echo bogus >left/c && + git add left && + + test-tool path-walk -- --indexed-objects >out && + + cat >expect <<-EOF && + 0:blob:a:$(git rev-parse HEAD:a) + 1:blob:left/b:$(git rev-parse HEAD:left/b) + 2:blob:left/c:$(git rev-parse :left/c) + 3:blob:right/c:$(git rev-parse HEAD:right/c) + 4:blob:right/d:$(git rev-parse HEAD:right/d) + 5:tree:right/:$(git rev-parse topic:right) + blobs:5 + commits:0 + tags:0 + trees:1 + EOF + + test_cmp_sorted expect out +' + +test_expect_success 'branches and indexed objects mix well' ' + test_when_finished git reset --hard && + + # stage change into index, adding a blob but + # also invalidating the cache-tree for the root + # and the "right" directory. + echo fake >right/d && + git add right && + + test-tool path-walk -- --indexed-objects --branches >out && + cat >expect <<-EOF && 0:commit::$(git rev-parse topic) 0:commit::$(git rev-parse base) @@ -41,20 +154,23 @@ test_expect_success 'all' ' 1:tree::$(git rev-parse base^{tree}) 1:tree::$(git rev-parse base~1^{tree}) 1:tree::$(git rev-parse base~2^{tree}) - 2:tree:right/:$(git rev-parse topic:right) - 2:tree:right/:$(git rev-parse base~1:right) - 2:tree:right/:$(git rev-parse base~2:right) - 3:blob:right/d:$(git rev-parse base~1:right/d) - 4:blob:right/c:$(git rev-parse base~2:right/c) - 4:blob:right/c:$(git rev-parse topic:right/c) - 5:tree:left/:$(git rev-parse base:left) - 5:tree:left/:$(git rev-parse base~2:left) - 6:blob:left/b:$(git rev-parse base~2:left/b) - 6:blob:left/b:$(git rev-parse base:left/b) - 7:blob:a:$(git rev-parse base~2:a) - blobs:6 + 2:blob:a:$(git rev-parse base~2:a) + 3:tree:right/:$(git rev-parse topic:right) + 3:tree:right/:$(git rev-parse base~1:right) + 3:tree:right/:$(git rev-parse base~2:right) + 4:blob:right/d:$(git rev-parse base~1:right/d) + 4:blob:right/d:$(git rev-parse :right/d) + 5:blob:right/c:$(git rev-parse base~2:right/c) + 5:blob:right/c:$(git rev-parse topic:right/c) + 6:tree:left/:$(git rev-parse base:left) + 6:tree:left/:$(git rev-parse base~2:left) + 7:blob:left/b:$(git rev-parse base:left/b) + 7:blob:left/b:$(git rev-parse base~2:left/b) + 8:tree:a/:$(git rev-parse refs/tags/third:a) + blobs:7 commits:4 - trees:9 + tags:0 + trees:10 EOF test_cmp_sorted expect out @@ -81,6 +197,7 @@ test_expect_success 'topic only' ' 7:blob:a:$(git rev-parse base~2:a) blobs:5 commits:3 + tags:0 trees:7 EOF @@ -101,6 +218,7 @@ test_expect_success 'topic, not base' ' 7:blob:a:$(git rev-parse topic:a) blobs:4 commits:1 + tags:0 trees:3 EOF @@ -112,13 +230,14 @@ test_expect_success 'topic, not base, only blobs' ' -- topic --not base >out && cat >expect <<-EOF && - commits:0 - trees:0 0:blob:right/d:$(git rev-parse topic:right/d) 1:blob:right/c:$(git rev-parse topic:right/c) 2:blob:left/b:$(git rev-parse topic:left/b) 3:blob:a:$(git rev-parse topic:a) blobs:4 + commits:0 + tags:0 + trees:0 EOF test_cmp_sorted expect out @@ -133,8 +252,9 @@ test_expect_success 'topic, not base, only commits' ' cat >expect <<-EOF && 0:commit::$(git rev-parse topic) commits:1 - trees:0 blobs:0 + tags:0 + trees:0 EOF test_cmp_sorted expect out @@ -145,12 +265,13 @@ test_expect_success 'topic, not base, only trees' ' -- topic --not base >out && cat >expect <<-EOF && - commits:0 0:tree::$(git rev-parse topic^{tree}) 1:tree:right/:$(git rev-parse topic:right) 2:tree:left/:$(git rev-parse topic:left) - trees:3 + commits:0 blobs:0 + tags:0 + trees:3 EOF test_cmp_sorted expect out @@ -174,10 +295,33 @@ test_expect_success 'topic, not base, boundary' ' 7:blob:a:$(git rev-parse base~1:a) blobs:5 commits:2 + tags:0 trees:5 EOF test_cmp_sorted expect out ' +test_expect_success 'trees are reported exactly once' ' + test_when_finished "rm -rf unique-trees" && + test_create_repo unique-trees && + ( + cd unique-trees && + mkdir initial && + test_commit initial/file && + + git switch -c move-to-top && + git mv initial/file.t ./ && + test_tick && + git commit -m moved && + + git update-ref refs/heads/other HEAD + ) && + + test-tool -C unique-trees path-walk -- --all >out && + tree=$(git -C unique-trees rev-parse HEAD:) && + grep "$tree" out >out-filtered && + test_line_count = 1 out-filtered +' + test_done From 6333e7ae0bb1027b3299d482be5a4a0937116ab0 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 20 Dec 2024 16:21:14 +0000 Subject: [PATCH 027/535] path-walk: mark trees and blobs as UNINTERESTING When the input rev_info has UNINTERESTING starting points, we want to be sure that the UNINTERESTING flag is passed appropriately through the objects. To match how this is done in places such as 'git pack-objects', we use the mark_edges_uninteresting() method. This method has an option for using the "sparse" walk, which is similar in spirit to the path-walk API's walk. To be sure to keep it independent, add a new 'prune_all_uninteresting' option to the path_walk_info struct. To check how the UNINTERSTING flag is spread through our objects, extend the 'test-tool path-walk' command to output whether or not an object has that flag. This changes our tests significantly, including the removal of some objects that were previously visited due to the incomplete implementation. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/technical/api-path-walk.txt | 8 +++ path-walk.c | 74 +++++++++++++++++++++ path-walk.h | 8 +++ t/helper/test-path-walk.c | 12 +++- t/t6601-path-walk.sh | 79 +++++++++++++++++------ 5 files changed, 159 insertions(+), 22 deletions(-) diff --git a/Documentation/technical/api-path-walk.txt b/Documentation/technical/api-path-walk.txt index 6022c381b7c3f0..7075d0d5ab50fd 100644 --- a/Documentation/technical/api-path-walk.txt +++ b/Documentation/technical/api-path-walk.txt @@ -48,6 +48,14 @@ commits. While it is possible to walk only commits in this way, consumers would be better off using the revision walk API instead. +`prune_all_uninteresting`:: + By default, all reachable paths are emitted by the path-walk API. + This option allows consumers to declare that they are not + interested in paths where all included objects are marked with the + `UNINTERESTING` flag. This requires using the `boundary` option in + the revision walk so that the walk emits commits marked with the + `UNINTERESTING` flag. + Examples -------- diff --git a/path-walk.c b/path-walk.c index f34dbf61de0a0f..4013569e9e4cbe 100644 --- a/path-walk.c +++ b/path-walk.c @@ -8,6 +8,7 @@ #include "dir.h" #include "hashmap.h" #include "hex.h" +#include "list-objects.h" #include "object.h" #include "oid-array.h" #include "revision.h" @@ -23,6 +24,7 @@ static const char *root_path = ""; struct type_and_oid_list { enum object_type type; struct oid_array oids; + int maybe_interesting; }; #define TYPE_AND_OID_LIST_INIT { \ @@ -142,6 +144,10 @@ static int add_tree_entries(struct path_walk_context *ctx, strmap_put(&ctx->paths_to_lists, path.buf, list); } push_to_stack(ctx, path.buf); + + if (!(o->flags & UNINTERESTING)) + list->maybe_interesting = 1; + oid_array_append(&list->oids, &entry.oid); } @@ -169,6 +175,43 @@ static int walk_path(struct path_walk_context *ctx, if (!list->oids.nr) return 0; + if (ctx->info->prune_all_uninteresting) { + /* + * This is true if all objects were UNINTERESTING + * when added to the list. + */ + if (!list->maybe_interesting) + return 0; + + /* + * But it's still possible that the objects were set + * as UNINTERESTING after being added. Do a quick check. + */ + list->maybe_interesting = 0; + for (size_t i = 0; + !list->maybe_interesting && i < list->oids.nr; + i++) { + if (list->type == OBJ_TREE) { + struct tree *t = lookup_tree(ctx->repo, + &list->oids.oid[i]); + if (t && !(t->object.flags & UNINTERESTING)) + list->maybe_interesting = 1; + } else if (list->type == OBJ_BLOB) { + struct blob *b = lookup_blob(ctx->repo, + &list->oids.oid[i]); + if (b && !(b->object.flags & UNINTERESTING)) + list->maybe_interesting = 1; + } else { + /* Tags are always interesting if visited. */ + list->maybe_interesting = 1; + } + } + + /* We have confirmed that all objects are UNINTERESTING. */ + if (!list->maybe_interesting) + return 0; + } + /* Evaluate function pointer on this data, if requested. */ if ((list->type == OBJ_TREE && ctx->info->trees) || (list->type == OBJ_BLOB && ctx->info->blobs) || @@ -203,6 +246,26 @@ static void clear_paths_to_lists(struct strmap *map) strmap_init(map); } +static struct repository *edge_repo; +static struct type_and_oid_list *edge_tree_list; + +static void show_edge(struct commit *commit) +{ + struct tree *t = repo_get_commit_tree(edge_repo, commit); + + if (!t) + return; + + if (commit->object.flags & UNINTERESTING) + t->object.flags |= UNINTERESTING; + + if (t->object.flags & SEEN) + return; + t->object.flags |= SEEN; + + oid_array_append(&edge_tree_list->oids, &t->object.oid); +} + static int setup_pending_objects(struct path_walk_info *info, struct path_walk_context *ctx) { @@ -314,6 +377,7 @@ static int setup_pending_objects(struct path_walk_info *info, if (tagged_blobs->oids.nr) { const char *tagged_blob_path = "/tagged-blobs"; tagged_blobs->type = OBJ_BLOB; + tagged_blobs->maybe_interesting = 1; push_to_stack(ctx, tagged_blob_path); strmap_put(&ctx->paths_to_lists, tagged_blob_path, tagged_blobs); } else { @@ -325,6 +389,7 @@ static int setup_pending_objects(struct path_walk_info *info, if (tags->oids.nr) { const char *tag_path = "/tags"; tags->type = OBJ_TAG; + tags->maybe_interesting = 1; push_to_stack(ctx, tag_path); strmap_put(&ctx->paths_to_lists, tag_path, tags); } else { @@ -369,6 +434,7 @@ int walk_objects_by_path(struct path_walk_info *info) /* Insert a single list for the root tree into the paths. */ CALLOC_ARRAY(root_tree_list, 1); root_tree_list->type = OBJ_TREE; + root_tree_list->maybe_interesting = 1; strmap_put(&ctx.paths_to_lists, root_path, root_tree_list); push_to_stack(&ctx, root_path); @@ -382,6 +448,14 @@ int walk_objects_by_path(struct path_walk_info *info) if (prepare_revision_walk(info->revs)) die(_("failed to setup revision walk")); + /* Walk trees to mark them as UNINTERESTING. */ + edge_repo = info->revs->repo; + edge_tree_list = root_tree_list; + mark_edges_uninteresting(info->revs, show_edge, + info->prune_all_uninteresting); + edge_repo = NULL; + edge_tree_list = NULL; + info->revs->blob_objects = info->revs->tree_objects = 0; trace2_region_enter("path-walk", "pending-walk", info->revs->repo); diff --git a/path-walk.h b/path-walk.h index 3679fa7a859f67..414d6db23c23b6 100644 --- a/path-walk.h +++ b/path-walk.h @@ -40,6 +40,14 @@ struct path_walk_info { int trees; int blobs; int tags; + + /** + * When 'prune_all_uninteresting' is set and a path has all objects + * marked as UNINTERESTING, then the path-walk will not visit those + * objects. It will not call path_fn on those objects and will not + * walk the children of such trees. + */ + int prune_all_uninteresting; }; #define PATH_WALK_INFO_INIT { \ diff --git a/t/helper/test-path-walk.c b/t/helper/test-path-walk.c index 56289859e69f8d..7f2d409c5bca63 100644 --- a/t/helper/test-path-walk.c +++ b/t/helper/test-path-walk.c @@ -50,10 +50,14 @@ static int emit_block(const char *path, struct oid_array *oids, printf("%"PRIuMAX":%s:%s:EMPTY\n", tdata->batch_nr, typestr, path); - for (size_t i = 0; i < oids->nr; i++) - printf("%"PRIuMAX":%s:%s:%s\n", + for (size_t i = 0; i < oids->nr; i++) { + struct object *o = lookup_unknown_object(the_repository, + &oids->oid[i]); + printf("%"PRIuMAX":%s:%s:%s%s\n", tdata->batch_nr, typestr, path, - oid_to_hex(&oids->oid[i])); + oid_to_hex(&oids->oid[i]), + o->flags & UNINTERESTING ? ":UNINTERESTING" : ""); + } tdata->batch_nr++; return 0; @@ -74,6 +78,8 @@ int cmd__path_walk(int argc, const char **argv) N_("toggle inclusion of tag objects")), OPT_BOOL(0, "trees", &info.trees, N_("toggle inclusion of tree objects")), + OPT_BOOL(0, "prune", &info.prune_all_uninteresting, + N_("toggle pruning of uninteresting paths")), OPT_END(), }; diff --git a/t/t6601-path-walk.sh b/t/t6601-path-walk.sh index 1f3d2e0cb76a76..a317cdf289e278 100755 --- a/t/t6601-path-walk.sh +++ b/t/t6601-path-walk.sh @@ -211,11 +211,11 @@ test_expect_success 'topic, not base' ' 0:commit::$(git rev-parse topic) 1:tree::$(git rev-parse topic^{tree}) 2:tree:right/:$(git rev-parse topic:right) - 3:blob:right/d:$(git rev-parse topic:right/d) + 3:blob:right/d:$(git rev-parse topic:right/d):UNINTERESTING 4:blob:right/c:$(git rev-parse topic:right/c) - 5:tree:left/:$(git rev-parse topic:left) - 6:blob:left/b:$(git rev-parse topic:left/b) - 7:blob:a:$(git rev-parse topic:a) + 5:tree:left/:$(git rev-parse topic:left):UNINTERESTING + 6:blob:left/b:$(git rev-parse topic:left/b):UNINTERESTING + 7:blob:a:$(git rev-parse topic:a):UNINTERESTING blobs:4 commits:1 tags:0 @@ -225,15 +225,38 @@ test_expect_success 'topic, not base' ' test_cmp_sorted expect out ' +test_expect_success 'fourth, blob-tag2, not base' ' + test-tool path-walk -- fourth blob-tag2 --not base >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 1:tag:/tags:$(git rev-parse fourth) + 2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{}) + 3:tree::$(git rev-parse topic^{tree}) + 4:tree:right/:$(git rev-parse topic:right) + 5:blob:right/d:$(git rev-parse base~1:right/d):UNINTERESTING + 6:blob:right/c:$(git rev-parse topic:right/c) + 7:tree:left/:$(git rev-parse base~1:left):UNINTERESTING + 8:blob:left/b:$(git rev-parse base~1:left/b):UNINTERESTING + 9:blob:a:$(git rev-parse base~1:a):UNINTERESTING + blobs:5 + commits:1 + tags:1 + trees:3 + EOF + + test_cmp_sorted expect out +' + test_expect_success 'topic, not base, only blobs' ' test-tool path-walk --no-trees --no-commits \ -- topic --not base >out && cat >expect <<-EOF && - 0:blob:right/d:$(git rev-parse topic:right/d) + 0:blob:right/d:$(git rev-parse topic:right/d):UNINTERESTING 1:blob:right/c:$(git rev-parse topic:right/c) - 2:blob:left/b:$(git rev-parse topic:left/b) - 3:blob:a:$(git rev-parse topic:a) + 2:blob:left/b:$(git rev-parse topic:left/b):UNINTERESTING + 3:blob:a:$(git rev-parse topic:a):UNINTERESTING blobs:4 commits:0 tags:0 @@ -267,7 +290,7 @@ test_expect_success 'topic, not base, only trees' ' cat >expect <<-EOF && 0:tree::$(git rev-parse topic^{tree}) 1:tree:right/:$(git rev-parse topic:right) - 2:tree:left/:$(git rev-parse topic:left) + 2:tree:left/:$(git rev-parse topic:left):UNINTERESTING commits:0 blobs:0 tags:0 @@ -282,17 +305,17 @@ test_expect_success 'topic, not base, boundary' ' cat >expect <<-EOF && 0:commit::$(git rev-parse topic) - 0:commit::$(git rev-parse base~1) + 0:commit::$(git rev-parse base~1):UNINTERESTING 1:tree::$(git rev-parse topic^{tree}) - 1:tree::$(git rev-parse base~1^{tree}) + 1:tree::$(git rev-parse base~1^{tree}):UNINTERESTING 2:tree:right/:$(git rev-parse topic:right) - 2:tree:right/:$(git rev-parse base~1:right) - 3:blob:right/d:$(git rev-parse base~1:right/d) - 4:blob:right/c:$(git rev-parse base~1:right/c) + 2:tree:right/:$(git rev-parse base~1:right):UNINTERESTING + 3:blob:right/d:$(git rev-parse base~1:right/d):UNINTERESTING + 4:blob:right/c:$(git rev-parse base~1:right/c):UNINTERESTING 4:blob:right/c:$(git rev-parse topic:right/c) - 5:tree:left/:$(git rev-parse base~1:left) - 6:blob:left/b:$(git rev-parse base~1:left/b) - 7:blob:a:$(git rev-parse base~1:a) + 5:tree:left/:$(git rev-parse base~1:left):UNINTERESTING + 6:blob:left/b:$(git rev-parse base~1:left/b):UNINTERESTING + 7:blob:a:$(git rev-parse base~1:a):UNINTERESTING blobs:5 commits:2 tags:0 @@ -302,6 +325,27 @@ test_expect_success 'topic, not base, boundary' ' test_cmp_sorted expect out ' +test_expect_success 'topic, not base, boundary with pruning' ' + test-tool path-walk --prune -- --boundary topic --not base >out && + + cat >expect <<-EOF && + 0:commit::$(git rev-parse topic) + 0:commit::$(git rev-parse base~1):UNINTERESTING + 1:tree::$(git rev-parse topic^{tree}) + 1:tree::$(git rev-parse base~1^{tree}):UNINTERESTING + 2:tree:right/:$(git rev-parse topic:right) + 2:tree:right/:$(git rev-parse base~1:right):UNINTERESTING + 3:blob:right/c:$(git rev-parse base~1:right/c):UNINTERESTING + 3:blob:right/c:$(git rev-parse topic:right/c) + blobs:2 + commits:2 + tags:0 + trees:4 + EOF + + test_cmp_sorted expect out +' + test_expect_success 'trees are reported exactly once' ' test_when_finished "rm -rf unique-trees" && test_create_repo unique-trees && @@ -309,15 +353,12 @@ test_expect_success 'trees are reported exactly once' ' cd unique-trees && mkdir initial && test_commit initial/file && - git switch -c move-to-top && git mv initial/file.t ./ && test_tick && git commit -m moved && - git update-ref refs/heads/other HEAD ) && - test-tool -C unique-trees path-walk -- --all >out && tree=$(git -C unique-trees rev-parse HEAD:) && grep "$tree" out >out-filtered && From 71edf6c3c8161a2f102b69ca318ca1784ba29a58 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 20 Dec 2024 16:21:15 +0000 Subject: [PATCH 028/535] path-walk: reorder object visits The path-walk API currently uses a stack-based approach to recursing through the list of paths within the repository. This guarantees that after a tree path is explored, all paths contained within that tree path will be explored before continuing to explore siblings of that tree path. The initial motivation of this depth-first approach was to minimize memory pressure while exploring the repository. A breadth-first approach would have too many "active" paths being stored in the paths_to_lists map. We can take this approach one step further by making sure that blob paths are visited before tree paths. This allows the API to free the memory for these blob objects before continuing to perform the depth-first search. This modifies the order in which we visit siblings, but does not change the fact that we are performing depth-first search. To achieve this goal, use a priority queue with a custom sorting method. The sort needs to handle tags, blobs, and trees (commits are handled slightly differently). When objects share a type, we can sort by path name. This will keep children of the latest path to leave the stack be preferred over the rest of the paths in the stack, since they agree in prefix up to and including a directory separator. When the types are different, we can prefer tags over other types and blobs over trees. This causes significant adjustments to t6601-path-walk.sh to rearrange the order of the visited paths. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- path-walk.c | 60 ++++++++++++++++----- t/t6601-path-walk.sh | 124 +++++++++++++++++++++---------------------- 2 files changed, 110 insertions(+), 74 deletions(-) diff --git a/path-walk.c b/path-walk.c index 4013569e9e4cbe..136ec08fb0ed72 100644 --- a/path-walk.c +++ b/path-walk.c @@ -11,6 +11,7 @@ #include "list-objects.h" #include "object.h" #include "oid-array.h" +#include "prio-queue.h" #include "revision.h" #include "string-list.h" #include "strmap.h" @@ -49,16 +50,50 @@ struct path_walk_context { struct strmap paths_to_lists; /** - * Store the current list of paths in a stack, to - * facilitate depth-first-search without recursion. + * Store the current list of paths in a priority queue, + * using object type as a sorting mechanism, mostly to + * make sure blobs are popped off the stack first. No + * other sort is made, so within each object type it acts + * like a stack and performs a DFS within the trees. * * Use path_stack_pushed to indicate whether a path * was previously added to path_stack. */ - struct string_list path_stack; + struct prio_queue path_stack; struct strset path_stack_pushed; }; +static int compare_by_type(const void *one, const void *two, void *cb_data) +{ + struct type_and_oid_list *list1, *list2; + const char *str1 = one; + const char *str2 = two; + struct path_walk_context *ctx = cb_data; + + list1 = strmap_get(&ctx->paths_to_lists, str1); + list2 = strmap_get(&ctx->paths_to_lists, str2); + + /* + * If object types are equal, then use path comparison. + */ + if (!list1 || !list2 || list1->type == list2->type) + return strcmp(str1, str2); + + /* Prefer tags to be popped off first. */ + if (list1->type == OBJ_TAG) + return -1; + if (list2->type == OBJ_TAG) + return 1; + + /* Prefer blobs to be popped off second. */ + if (list1->type == OBJ_BLOB) + return -1; + if (list2->type == OBJ_BLOB) + return 1; + + return 0; +} + static void push_to_stack(struct path_walk_context *ctx, const char *path) { @@ -66,7 +101,7 @@ static void push_to_stack(struct path_walk_context *ctx, return; strset_add(&ctx->path_stack_pushed, path); - string_list_append(&ctx->path_stack, path); + prio_queue_put(&ctx->path_stack, xstrdup(path)); } static int add_tree_entries(struct path_walk_context *ctx, @@ -378,8 +413,8 @@ static int setup_pending_objects(struct path_walk_info *info, const char *tagged_blob_path = "/tagged-blobs"; tagged_blobs->type = OBJ_BLOB; tagged_blobs->maybe_interesting = 1; - push_to_stack(ctx, tagged_blob_path); strmap_put(&ctx->paths_to_lists, tagged_blob_path, tagged_blobs); + push_to_stack(ctx, tagged_blob_path); } else { oid_array_clear(&tagged_blobs->oids); free(tagged_blobs); @@ -390,8 +425,8 @@ static int setup_pending_objects(struct path_walk_info *info, const char *tag_path = "/tags"; tags->type = OBJ_TAG; tags->maybe_interesting = 1; - push_to_stack(ctx, tag_path); strmap_put(&ctx->paths_to_lists, tag_path, tags); + push_to_stack(ctx, tag_path); } else { oid_array_clear(&tags->oids); free(tags); @@ -418,7 +453,10 @@ int walk_objects_by_path(struct path_walk_info *info) .repo = info->revs->repo, .revs = info->revs, .info = info, - .path_stack = STRING_LIST_INIT_DUP, + .path_stack = { + .compare = compare_by_type, + .cb_data = &ctx + }, .path_stack_pushed = STRSET_INIT, .paths_to_lists = STRMAP_INIT }; @@ -504,8 +542,7 @@ int walk_objects_by_path(struct path_walk_info *info) trace2_region_enter("path-walk", "path-walk", info->revs->repo); while (!ret && ctx.path_stack.nr) { - char *path = ctx.path_stack.items[ctx.path_stack.nr - 1].string; - ctx.path_stack.nr--; + char *path = prio_queue_get(&ctx.path_stack); paths_nr++; ret = walk_path(&ctx, path); @@ -522,8 +559,7 @@ int walk_objects_by_path(struct path_walk_info *info) push_to_stack(&ctx, entry->key); while (!ret && ctx.path_stack.nr) { - char *path = ctx.path_stack.items[ctx.path_stack.nr - 1].string; - ctx.path_stack.nr--; + char *path = prio_queue_get(&ctx.path_stack); paths_nr++; ret = walk_path(&ctx, path); @@ -537,7 +573,7 @@ int walk_objects_by_path(struct path_walk_info *info) clear_paths_to_lists(&ctx.paths_to_lists); strset_clear(&ctx.path_stack_pushed); - string_list_clear(&ctx.path_stack, 0); + clear_prio_queue(&ctx.path_stack); return ret; } diff --git a/t/t6601-path-walk.sh b/t/t6601-path-walk.sh index a317cdf289e278..5f04acb8a2f79a 100755 --- a/t/t6601-path-walk.sh +++ b/t/t6601-path-walk.sh @@ -84,20 +84,20 @@ test_expect_success 'all' ' 3:tree::$(git rev-parse refs/tags/tree-tag^{}) 3:tree::$(git rev-parse refs/tags/tree-tag2^{}) 4:blob:a:$(git rev-parse base~2:a) - 5:tree:right/:$(git rev-parse topic:right) - 5:tree:right/:$(git rev-parse base~1:right) - 5:tree:right/:$(git rev-parse base~2:right) - 6:blob:right/d:$(git rev-parse base~1:right/d) - 7:blob:right/c:$(git rev-parse base~2:right/c) - 7:blob:right/c:$(git rev-parse topic:right/c) - 8:tree:left/:$(git rev-parse base:left) - 8:tree:left/:$(git rev-parse base~2:left) - 9:blob:left/b:$(git rev-parse base~2:left/b) - 9:blob:left/b:$(git rev-parse base:left/b) - 10:tree:a/:$(git rev-parse base:a) - 11:blob:file2:$(git rev-parse refs/tags/tree-tag2^{}:file2) - 12:tree:child/:$(git rev-parse refs/tags/tree-tag:child) - 13:blob:child/file:$(git rev-parse refs/tags/tree-tag:child/file) + 5:blob:file2:$(git rev-parse refs/tags/tree-tag2^{}:file2) + 6:tree:a/:$(git rev-parse base:a) + 7:tree:child/:$(git rev-parse refs/tags/tree-tag:child) + 8:blob:child/file:$(git rev-parse refs/tags/tree-tag:child/file) + 9:tree:left/:$(git rev-parse base:left) + 9:tree:left/:$(git rev-parse base~2:left) + 10:blob:left/b:$(git rev-parse base~2:left/b) + 10:blob:left/b:$(git rev-parse base:left/b) + 11:tree:right/:$(git rev-parse topic:right) + 11:tree:right/:$(git rev-parse base~1:right) + 11:tree:right/:$(git rev-parse base~2:right) + 12:blob:right/c:$(git rev-parse base~2:right/c) + 12:blob:right/c:$(git rev-parse topic:right/c) + 13:blob:right/d:$(git rev-parse base~1:right/d) blobs:10 commits:4 tags:7 @@ -154,19 +154,19 @@ test_expect_success 'branches and indexed objects mix well' ' 1:tree::$(git rev-parse base^{tree}) 1:tree::$(git rev-parse base~1^{tree}) 1:tree::$(git rev-parse base~2^{tree}) - 2:blob:a:$(git rev-parse base~2:a) - 3:tree:right/:$(git rev-parse topic:right) - 3:tree:right/:$(git rev-parse base~1:right) - 3:tree:right/:$(git rev-parse base~2:right) - 4:blob:right/d:$(git rev-parse base~1:right/d) - 4:blob:right/d:$(git rev-parse :right/d) - 5:blob:right/c:$(git rev-parse base~2:right/c) - 5:blob:right/c:$(git rev-parse topic:right/c) - 6:tree:left/:$(git rev-parse base:left) - 6:tree:left/:$(git rev-parse base~2:left) - 7:blob:left/b:$(git rev-parse base:left/b) - 7:blob:left/b:$(git rev-parse base~2:left/b) - 8:tree:a/:$(git rev-parse refs/tags/third:a) + 2:tree:a/:$(git rev-parse refs/tags/third:a) + 3:tree:left/:$(git rev-parse base:left) + 3:tree:left/:$(git rev-parse base~2:left) + 4:blob:left/b:$(git rev-parse base:left/b) + 4:blob:left/b:$(git rev-parse base~2:left/b) + 5:tree:right/:$(git rev-parse topic:right) + 5:tree:right/:$(git rev-parse base~1:right) + 5:tree:right/:$(git rev-parse base~2:right) + 6:blob:right/c:$(git rev-parse base~2:right/c) + 6:blob:right/c:$(git rev-parse topic:right/c) + 7:blob:right/d:$(git rev-parse base~1:right/d) + 7:blob:right/d:$(git rev-parse :right/d) + 8:blob:a:$(git rev-parse base~2:a) blobs:7 commits:4 tags:0 @@ -186,15 +186,15 @@ test_expect_success 'topic only' ' 1:tree::$(git rev-parse topic^{tree}) 1:tree::$(git rev-parse base~1^{tree}) 1:tree::$(git rev-parse base~2^{tree}) - 2:tree:right/:$(git rev-parse topic:right) - 2:tree:right/:$(git rev-parse base~1:right) - 2:tree:right/:$(git rev-parse base~2:right) - 3:blob:right/d:$(git rev-parse base~1:right/d) - 4:blob:right/c:$(git rev-parse base~2:right/c) - 4:blob:right/c:$(git rev-parse topic:right/c) - 5:tree:left/:$(git rev-parse base~2:left) - 6:blob:left/b:$(git rev-parse base~2:left/b) - 7:blob:a:$(git rev-parse base~2:a) + 2:blob:a:$(git rev-parse base~2:a) + 3:tree:left/:$(git rev-parse base~2:left) + 4:blob:left/b:$(git rev-parse base~2:left/b) + 5:tree:right/:$(git rev-parse topic:right) + 5:tree:right/:$(git rev-parse base~1:right) + 5:tree:right/:$(git rev-parse base~2:right) + 6:blob:right/c:$(git rev-parse base~2:right/c) + 6:blob:right/c:$(git rev-parse topic:right/c) + 7:blob:right/d:$(git rev-parse base~1:right/d) blobs:5 commits:3 tags:0 @@ -210,12 +210,12 @@ test_expect_success 'topic, not base' ' cat >expect <<-EOF && 0:commit::$(git rev-parse topic) 1:tree::$(git rev-parse topic^{tree}) - 2:tree:right/:$(git rev-parse topic:right) - 3:blob:right/d:$(git rev-parse topic:right/d):UNINTERESTING - 4:blob:right/c:$(git rev-parse topic:right/c) - 5:tree:left/:$(git rev-parse topic:left):UNINTERESTING - 6:blob:left/b:$(git rev-parse topic:left/b):UNINTERESTING - 7:blob:a:$(git rev-parse topic:a):UNINTERESTING + 2:blob:a:$(git rev-parse topic:a):UNINTERESTING + 3:tree:left/:$(git rev-parse topic:left):UNINTERESTING + 4:blob:left/b:$(git rev-parse topic:left/b):UNINTERESTING + 5:tree:right/:$(git rev-parse topic:right) + 6:blob:right/c:$(git rev-parse topic:right/c) + 7:blob:right/d:$(git rev-parse topic:right/d):UNINTERESTING blobs:4 commits:1 tags:0 @@ -233,12 +233,12 @@ test_expect_success 'fourth, blob-tag2, not base' ' 1:tag:/tags:$(git rev-parse fourth) 2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{}) 3:tree::$(git rev-parse topic^{tree}) - 4:tree:right/:$(git rev-parse topic:right) - 5:blob:right/d:$(git rev-parse base~1:right/d):UNINTERESTING - 6:blob:right/c:$(git rev-parse topic:right/c) - 7:tree:left/:$(git rev-parse base~1:left):UNINTERESTING - 8:blob:left/b:$(git rev-parse base~1:left/b):UNINTERESTING - 9:blob:a:$(git rev-parse base~1:a):UNINTERESTING + 4:blob:a:$(git rev-parse base~1:a):UNINTERESTING + 5:tree:left/:$(git rev-parse base~1:left):UNINTERESTING + 6:blob:left/b:$(git rev-parse base~1:left/b):UNINTERESTING + 7:tree:right/:$(git rev-parse topic:right) + 8:blob:right/c:$(git rev-parse topic:right/c) + 9:blob:right/d:$(git rev-parse base~1:right/d):UNINTERESTING blobs:5 commits:1 tags:1 @@ -253,10 +253,10 @@ test_expect_success 'topic, not base, only blobs' ' -- topic --not base >out && cat >expect <<-EOF && - 0:blob:right/d:$(git rev-parse topic:right/d):UNINTERESTING - 1:blob:right/c:$(git rev-parse topic:right/c) - 2:blob:left/b:$(git rev-parse topic:left/b):UNINTERESTING - 3:blob:a:$(git rev-parse topic:a):UNINTERESTING + 0:blob:a:$(git rev-parse topic:a):UNINTERESTING + 1:blob:left/b:$(git rev-parse topic:left/b):UNINTERESTING + 2:blob:right/c:$(git rev-parse topic:right/c) + 3:blob:right/d:$(git rev-parse topic:right/d):UNINTERESTING blobs:4 commits:0 tags:0 @@ -289,8 +289,8 @@ test_expect_success 'topic, not base, only trees' ' cat >expect <<-EOF && 0:tree::$(git rev-parse topic^{tree}) - 1:tree:right/:$(git rev-parse topic:right) - 2:tree:left/:$(git rev-parse topic:left):UNINTERESTING + 1:tree:left/:$(git rev-parse topic:left):UNINTERESTING + 2:tree:right/:$(git rev-parse topic:right) commits:0 blobs:0 tags:0 @@ -308,14 +308,14 @@ test_expect_success 'topic, not base, boundary' ' 0:commit::$(git rev-parse base~1):UNINTERESTING 1:tree::$(git rev-parse topic^{tree}) 1:tree::$(git rev-parse base~1^{tree}):UNINTERESTING - 2:tree:right/:$(git rev-parse topic:right) - 2:tree:right/:$(git rev-parse base~1:right):UNINTERESTING - 3:blob:right/d:$(git rev-parse base~1:right/d):UNINTERESTING - 4:blob:right/c:$(git rev-parse base~1:right/c):UNINTERESTING - 4:blob:right/c:$(git rev-parse topic:right/c) - 5:tree:left/:$(git rev-parse base~1:left):UNINTERESTING - 6:blob:left/b:$(git rev-parse base~1:left/b):UNINTERESTING - 7:blob:a:$(git rev-parse base~1:a):UNINTERESTING + 2:blob:a:$(git rev-parse base~1:a):UNINTERESTING + 3:tree:left/:$(git rev-parse base~1:left):UNINTERESTING + 4:blob:left/b:$(git rev-parse base~1:left/b):UNINTERESTING + 5:tree:right/:$(git rev-parse topic:right) + 5:tree:right/:$(git rev-parse base~1:right):UNINTERESTING + 6:blob:right/c:$(git rev-parse base~1:right/c):UNINTERESTING + 6:blob:right/c:$(git rev-parse topic:right/c) + 7:blob:right/d:$(git rev-parse base~1:right/d):UNINTERESTING blobs:5 commits:2 tags:0 From d11d003ba5e98c036fb94204df6dcef28aafe2f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=B6ren=20Krecker?= Date: Mon, 23 Dec 2024 12:04:05 +0100 Subject: [PATCH 029/535] date.c: Fix type missmatch warings from msvc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix compiler warings from msvc in date.c for value truncation from 64 bit to 32 bit integers. Also switch from int to size_t for all variables with result of strlen() which cannot become negative. Signed-off-by: Sören Krecker Signed-off-by: Junio C Hamano --- date.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/date.c b/date.c index a1b26a8dce697c..17a95077cf5450 100644 --- a/date.c +++ b/date.c @@ -1244,7 +1244,7 @@ static const char *approxidate_alpha(const char *date, struct tm *tm, struct tm } for (s = special; s->name; s++) { - int len = strlen(s->name); + size_t len = strlen(s->name); if (match_string(date, s->name) == len) { s->fn(tm, now, num); *touched = 1; @@ -1254,7 +1254,7 @@ static const char *approxidate_alpha(const char *date, struct tm *tm, struct tm if (!*num) { for (i = 1; i < 11; i++) { - int len = strlen(number_name[i]); + size_t len = strlen(number_name[i]); if (match_string(date, number_name[i]) == len) { *num = i; *touched = 1; @@ -1270,7 +1270,7 @@ static const char *approxidate_alpha(const char *date, struct tm *tm, struct tm tl = typelen; while (tl->type) { - int len = strlen(tl->type); + size_t len = strlen(tl->type); if (match_string(date, tl->type) >= len-1) { update_tm(tm, now, tl->length * *num); *num = 0; From 44945dfe867e56aab1685a0f371665273291a2af Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 27 Dec 2024 11:46:21 +0100 Subject: [PATCH 030/535] prio-queue: fix type of `insertion_ctr` In 62e745ced2 (prio-queue: use size_t rather than int for size, 2024-12-20), we have converted `struct prio_queue` to use `size_t` to track the number of entries in the queue as well as the allocated size of the underlying array. There is one more counter though, namely the insertion counter, that is still using an `unsigned` instead of a `size_t`. This is unlikely to ever be a problem, but it makes one wonder why some indices use `size_t` while others use `unsigned`. Furthermore, the mentioned commit stated the intent to also adapt these variables, but seemingly forgot to do so. Fix the issue by converting those counters to use `size_t`, as well. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- prio-queue.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prio-queue.h b/prio-queue.h index 36f370625f0802..38d032636d4cf9 100644 --- a/prio-queue.h +++ b/prio-queue.h @@ -22,13 +22,13 @@ typedef int (*prio_queue_compare_fn)(const void *one, const void *two, void *cb_data); struct prio_queue_entry { - unsigned ctr; + size_t ctr; void *data; }; struct prio_queue { prio_queue_compare_fn compare; - unsigned insertion_ctr; + size_t insertion_ctr; void *cb_data; size_t alloc, nr; struct prio_queue_entry *array; From 95c09e4d07492fa9e4ad951a268b4ea6bae69038 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 27 Dec 2024 11:46:22 +0100 Subject: [PATCH 031/535] commit-reach: fix index used to loop through unsigned integer In 62e745ced2 (prio-queue: use size_t rather than int for size, 2024-12-20), we refactored `struct prio_queue` to track the number of contained entries via a `size_t`. While the refactoring adapted one of the users of that variable, it forgot to also adapt "commit-reach.c" accordingly. This was missed because that file has -Wsign-conversion disabled. Fix the issue by using a `size_t` to iterate through entries. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- commit-reach.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/commit-reach.c b/commit-reach.c index e3edd119952979..e65872617003d0 100644 --- a/commit-reach.c +++ b/commit-reach.c @@ -42,8 +42,7 @@ static int compare_commits_by_gen(const void *_a, const void *_b) static int queue_has_nonstale(struct prio_queue *queue) { - int i; - for (i = 0; i < queue->nr; i++) { + for (size_t i = 0; i < queue->nr; i++) { struct commit *commit = queue->array[i].data; if (!(commit->object.flags & STALE)) return 1; From 04aeeeaab1f02213703c4e1997b2c2f1ca0f8f96 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 27 Dec 2024 11:46:23 +0100 Subject: [PATCH 032/535] commit-reach: fix type of `min_commit_date` The `can_all_from_reach_with_flag()` function accepts a parameter that allows callers to cut off traversal at a specified commit date. This parameter is of type `time_t`, which is a signed type, while we end up comparing it to a commit's `date` field, which is of the unsigned type `timestamp_t`. Fix the parameter to be of type `timestamp_t`. There is only a single caller in "upload-pack.c" that sets this parameter, and that caller knows to pass in a `timestamp_t` already. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- commit-reach.c | 4 ++-- commit-reach.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/commit-reach.c b/commit-reach.c index e65872617003d0..9f8b2457bcc12b 100644 --- a/commit-reach.c +++ b/commit-reach.c @@ -780,7 +780,7 @@ int commit_contains(struct ref_filter *filter, struct commit *commit, int can_all_from_reach_with_flag(struct object_array *from, unsigned int with_flag, unsigned int assign_flag, - time_t min_commit_date, + timestamp_t min_commit_date, timestamp_t min_generation) { struct commit **list = NULL; @@ -883,9 +883,9 @@ int can_all_from_reach(struct commit_list *from, struct commit_list *to, int cutoff_by_min_date) { struct object_array from_objs = OBJECT_ARRAY_INIT; - time_t min_commit_date = cutoff_by_min_date ? from->item->date : 0; struct commit_list *from_iter = from, *to_iter = to; int result; + timestamp_t min_commit_date = cutoff_by_min_date ? from->item->date : 0; timestamp_t min_generation = GENERATION_NUMBER_INFINITY; while (from_iter) { diff --git a/commit-reach.h b/commit-reach.h index 9a745b7e176685..d5f3347376b631 100644 --- a/commit-reach.h +++ b/commit-reach.h @@ -81,7 +81,7 @@ int commit_contains(struct ref_filter *filter, struct commit *commit, int can_all_from_reach_with_flag(struct object_array *from, unsigned int with_flag, unsigned int assign_flag, - time_t min_commit_date, + timestamp_t min_commit_date, timestamp_t min_generation); int can_all_from_reach(struct commit_list *from, struct commit_list *to, int commit_date_cutoff); From 45843d8f4eb2bbfc73cc361ba9d612d088dc8a4f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 27 Dec 2024 11:46:24 +0100 Subject: [PATCH 033/535] commit-reach: use `size_t` to track indices in `remove_redundant()` The function `remove_redundant()` gets as input an array of commits as well as the size of that array and then drops redundant commits from that array. It then returns either `-1` in case an error occurred, or the new number of items in the array. The function receives and returns these sizes with a signed integer, which causes several warnings with -Wsign-compare. Fix this issue by consistently using `size_t` to track array indices and splitting up the returned value into a returned error code and a separate out pointer for the new computed size. Note that `get_merge_bases_many()` and related functions still track array sizes as a signed integer. This will be fixed in a subsequent commit. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- commit-reach.c | 53 ++++++++++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/commit-reach.c b/commit-reach.c index 9f8b2457bcc12b..d7f6f1be75e95c 100644 --- a/commit-reach.c +++ b/commit-reach.c @@ -212,12 +212,13 @@ int get_octopus_merge_bases(struct commit_list *in, struct commit_list **result) } static int remove_redundant_no_gen(struct repository *r, - struct commit **array, int cnt) + struct commit **array, + size_t cnt, size_t *dedup_cnt) { struct commit **work; unsigned char *redundant; - int *filled_index; - int i, j, filled; + size_t *filled_index; + size_t i, j, filled; CALLOC_ARRAY(work, cnt); redundant = xcalloc(cnt, 1); @@ -267,20 +268,22 @@ static int remove_redundant_no_gen(struct repository *r, for (i = filled = 0; i < cnt; i++) if (!redundant[i]) array[filled++] = work[i]; + *dedup_cnt = filled; free(work); free(redundant); free(filled_index); - return filled; + return 0; } static int remove_redundant_with_gen(struct repository *r, - struct commit **array, int cnt) + struct commit **array, size_t cnt, + size_t *dedup_cnt) { - int i, count_non_stale = 0, count_still_independent = cnt; + size_t i, count_non_stale = 0, count_still_independent = cnt; timestamp_t min_generation = GENERATION_NUMBER_INFINITY; struct commit **walk_start, **sorted; size_t walk_start_nr = 0, walk_start_alloc = cnt; - int min_gen_pos = 0; + size_t min_gen_pos = 0; /* * Sort the input by generation number, ascending. This allows @@ -326,12 +329,12 @@ static int remove_redundant_with_gen(struct repository *r, * terminate early. Otherwise, we will do the same amount of work * as before. */ - for (i = walk_start_nr - 1; i >= 0 && count_still_independent > 1; i--) { + for (i = walk_start_nr; i && count_still_independent > 1; i--) { /* push the STALE bits up to min generation */ struct commit_list *stack = NULL; - commit_list_insert(walk_start[i], &stack); - walk_start[i]->object.flags |= STALE; + commit_list_insert(walk_start[i - 1], &stack); + walk_start[i - 1]->object.flags |= STALE; while (stack) { struct commit_list *parents; @@ -388,10 +391,12 @@ static int remove_redundant_with_gen(struct repository *r, clear_commit_marks_many(walk_start_nr, walk_start, STALE); free(walk_start); - return count_non_stale; + *dedup_cnt = count_non_stale; + return 0; } -static int remove_redundant(struct repository *r, struct commit **array, int cnt) +static int remove_redundant(struct repository *r, struct commit **array, + size_t cnt, size_t *dedup_cnt) { /* * Some commit in the array may be an ancestor of @@ -401,19 +406,17 @@ static int remove_redundant(struct repository *r, struct commit **array, int cnt * that number. */ if (generation_numbers_enabled(r)) { - int i; - /* * If we have a single commit with finite generation * number, then the _with_gen algorithm is preferred. */ - for (i = 0; i < cnt; i++) { + for (size_t i = 0; i < cnt; i++) { if (commit_graph_generation(array[i]) < GENERATION_NUMBER_INFINITY) - return remove_redundant_with_gen(r, array, cnt); + return remove_redundant_with_gen(r, array, cnt, dedup_cnt); } } - return remove_redundant_no_gen(r, array, cnt); + return remove_redundant_no_gen(r, array, cnt, dedup_cnt); } static int get_merge_bases_many_0(struct repository *r, @@ -425,7 +428,8 @@ static int get_merge_bases_many_0(struct repository *r, { struct commit_list *list; struct commit **rslt; - int cnt, i; + size_t cnt, i; + int ret; if (merge_bases_many(r, one, n, twos, result) < 0) return -1; @@ -452,8 +456,8 @@ static int get_merge_bases_many_0(struct repository *r, clear_commit_marks(one, all_flags); clear_commit_marks_many(n, twos, all_flags); - cnt = remove_redundant(r, rslt, cnt); - if (cnt < 0) { + ret = remove_redundant(r, rslt, cnt, &cnt); + if (ret < 0) { free(rslt); return -1; } @@ -582,7 +586,8 @@ struct commit_list *reduce_heads(struct commit_list *heads) struct commit_list *p; struct commit_list *result = NULL, **tail = &result; struct commit **array; - int num_head, i; + size_t num_head, i; + int ret; if (!heads) return NULL; @@ -603,11 +608,13 @@ struct commit_list *reduce_heads(struct commit_list *heads) p->item->object.flags &= ~STALE; } } - num_head = remove_redundant(the_repository, array, num_head); - if (num_head < 0) { + + ret = remove_redundant(the_repository, array, num_head, &num_head); + if (ret < 0) { free(array); return NULL; } + for (i = 0; i < num_head; i++) tail = &commit_list_insert(array[i], tail)->next; free(array); From 85ee0680e2d5d667919e06394ca7622f09652310 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 27 Dec 2024 11:46:25 +0100 Subject: [PATCH 034/535] commit-reach: use `size_t` to track indices in `get_reachable_subset()` Similar as with the preceding commit, adapt `get_reachable_subset()` so that it tracks array indices via `size_t` instead of using signed integers to fix a couple of -Wsign-compare warnings. Adapt callers accordingly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- bisect.c | 9 +++++---- commit-reach.c | 8 ++++---- commit-reach.h | 4 ++-- commit.c | 4 ++-- commit.h | 2 +- ref-filter.c | 2 +- remote.c | 4 ++-- 7 files changed, 17 insertions(+), 16 deletions(-) diff --git a/bisect.c b/bisect.c index 1a9069c9ad8a19..7a1afc46e5fc02 100644 --- a/bisect.c +++ b/bisect.c @@ -780,10 +780,10 @@ static struct commit *get_commit_reference(struct repository *r, } static struct commit **get_bad_and_good_commits(struct repository *r, - int *rev_nr) + size_t *rev_nr) { struct commit **rev; - int i, n = 0; + size_t i, n = 0; ALLOC_ARRAY(rev, 1 + good_revs.nr); rev[n++] = get_commit_reference(r, current_bad_oid); @@ -887,7 +887,7 @@ static enum bisect_error check_merge_bases(int rev_nr, struct commit **rev, int return res; } -static int check_ancestors(struct repository *r, int rev_nr, +static int check_ancestors(struct repository *r, size_t rev_nr, struct commit **rev, const char *prefix) { struct strvec rev_argv = STRVEC_INIT; @@ -922,7 +922,8 @@ static enum bisect_error check_good_are_ancestors_of_bad(struct repository *r, { char *filename; struct stat st; - int fd, rev_nr; + int fd; + size_t rev_nr; enum bisect_error res = BISECT_OK; struct commit **rev; diff --git a/commit-reach.c b/commit-reach.c index d7f6f1be75e95c..bab40f55758047 100644 --- a/commit-reach.c +++ b/commit-reach.c @@ -791,8 +791,8 @@ int can_all_from_reach_with_flag(struct object_array *from, timestamp_t min_generation) { struct commit **list = NULL; - int i; - int nr_commits; + size_t i; + size_t nr_commits; int result = 1; ALLOC_ARRAY(list, from->nr); @@ -944,8 +944,8 @@ int can_all_from_reach(struct commit_list *from, struct commit_list *to, return result; } -struct commit_list *get_reachable_subset(struct commit **from, int nr_from, - struct commit **to, int nr_to, +struct commit_list *get_reachable_subset(struct commit **from, size_t nr_from, + struct commit **to, size_t nr_to, unsigned int reachable_flag) { struct commit **item; diff --git a/commit-reach.h b/commit-reach.h index d5f3347376b631..fa5408054ac013 100644 --- a/commit-reach.h +++ b/commit-reach.h @@ -95,8 +95,8 @@ int can_all_from_reach(struct commit_list *from, struct commit_list *to, * This method uses the PARENT1 and PARENT2 flags during its operation, * so be sure these flags are not set before calling the method. */ -struct commit_list *get_reachable_subset(struct commit **from, int nr_from, - struct commit **to, int nr_to, +struct commit_list *get_reachable_subset(struct commit **from, size_t nr_from, + struct commit **to, size_t nr_to, unsigned int reachable_flag); struct ahead_behind_count { diff --git a/commit.c b/commit.c index a127fe60c5e83c..540660359d4190 100644 --- a/commit.c +++ b/commit.c @@ -778,11 +778,11 @@ static void clear_commit_marks_1(struct commit_list **plist, } } -void clear_commit_marks_many(int nr, struct commit **commit, unsigned int mark) +void clear_commit_marks_many(size_t nr, struct commit **commit, unsigned int mark) { struct commit_list *list = NULL; - while (nr--) { + for (size_t i = 0; i < nr; i++) { clear_commit_marks_1(&list, *commit, mark); commit++; } diff --git a/commit.h b/commit.h index 943e3d74b2ae02..70c870dae4d4b9 100644 --- a/commit.h +++ b/commit.h @@ -210,7 +210,7 @@ struct commit *pop_most_recent_commit(struct commit_list **list, struct commit *pop_commit(struct commit_list **stack); void clear_commit_marks(struct commit *commit, unsigned int mark); -void clear_commit_marks_many(int nr, struct commit **commit, unsigned int mark); +void clear_commit_marks_many(size_t nr, struct commit **commit, unsigned int mark); enum rev_sort_order { diff --git a/ref-filter.c b/ref-filter.c index 23054694c2c960..bf5534605e234d 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -3041,7 +3041,7 @@ static void reach_filter(struct ref_array *array, struct commit_list **check_reachable, int include_reached) { - int i, old_nr; + size_t i, old_nr; struct commit **to_clear; if (!*check_reachable) diff --git a/remote.c b/remote.c index 18e5ccf3918445..0f6fba85625b52 100644 --- a/remote.c +++ b/remote.c @@ -1535,7 +1535,7 @@ static struct ref **tail_ref(struct ref **head) struct tips { struct commit **tip; - int nr, alloc; + size_t nr, alloc; }; static void add_to_tips(struct tips *tips, const struct object_id *oid) @@ -1602,7 +1602,7 @@ static void add_missing_tags(struct ref *src, struct ref **dst, struct ref ***ds const int reachable_flag = 1; struct commit_list *found_commits; struct commit **src_commits; - int nr_src_commits = 0, alloc_src_commits = 16; + size_t nr_src_commits = 0, alloc_src_commits = 16; ALLOC_ARRAY(src_commits, alloc_src_commits); for_each_string_list_item(item, &src_tag) { From 0905ed201a87bc97dc4d47c0cb8fd65316f33269 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 27 Dec 2024 11:46:26 +0100 Subject: [PATCH 035/535] builtin/log: use `size_t` to track indices Similar as with the preceding commit, adapt "builtin/log.c" so that it tracks array indices via `size_t` instead of using signed integers. This fixes a couple of -Wsign-compare warnings and prepares the code for a similar refactoring of `repo_get_merge_bases_many()` in a subsequent commit. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/log.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/builtin/log.c b/builtin/log.c index 75e1b34123b348..805b2355d96491 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -1746,11 +1746,12 @@ struct base_tree_info { static struct commit *get_base_commit(const struct format_config *cfg, struct commit **list, - int total) + size_t total) { struct commit *base = NULL; struct commit **rev; - int i = 0, rev_nr = 0, auto_select, die_on_failure, ret; + int auto_select, die_on_failure, ret; + size_t i = 0, rev_nr = 0; switch (cfg->auto_base) { case AUTO_BASE_NEVER: @@ -1885,13 +1886,12 @@ define_commit_slab(commit_base, int); static void prepare_bases(struct base_tree_info *bases, struct commit *base, struct commit **list, - int total) + size_t total) { struct commit *commit; struct rev_info revs; struct diff_options diffopt; struct commit_base commit_base; - int i; if (!base) return; @@ -1906,7 +1906,7 @@ static void prepare_bases(struct base_tree_info *bases, repo_init_revisions(the_repository, &revs, NULL); revs.max_parents = 1; revs.topo_order = 1; - for (i = 0; i < total; i++) { + for (size_t i = 0; i < total; i++) { list[i]->object.flags &= ~UNINTERESTING; add_pending_object(&revs, &list[i]->object, "rev_list"); *commit_base_at(&commit_base, list[i]) = 1; @@ -2007,7 +2007,7 @@ int cmd_format_patch(int argc, struct rev_info rev; char *to_free = NULL; struct setup_revision_opt s_r_opt; - int nr = 0, total, i; + size_t nr = 0, total, i; int use_stdout = 0; int start_number = -1; int just_numbers = 0; @@ -2500,11 +2500,14 @@ int cmd_format_patch(int argc, if (show_progress) progress = start_delayed_progress(_("Generating patches"), total); - while (0 <= --nr) { + for (i = 0; i < nr; i++) { + size_t idx = nr - i - 1; int shown; - display_progress(progress, total - nr); - commit = list[nr]; - rev.nr = total - nr + (start_number - 1); + + display_progress(progress, total - idx); + commit = list[idx]; + rev.nr = total - idx + (start_number - 1); + /* Make the second and subsequent mails replies to the first */ if (cfg.thread) { /* Have we already had a message ID? */ From 1ab5948141e62b52bcb812b04a901b3efaf1b578 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 27 Dec 2024 11:46:27 +0100 Subject: [PATCH 036/535] builtin/log: fix remaining -Wsign-compare warnings Fix remaining -Wsign-compare warnings in "builtin/log.c" and mark the file as -Wsign-compare-clean. While most of the fixes are obvious, one fix requires us to use `cast_size_t_to_int()`, which will cause us to die in case the `size_t` cannot be represented as `int`. This should be fine though, as the data would typically be set either via a config key or via the command line, neither of which should ever exceed a couple of kilobytes of data. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/log.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/builtin/log.c b/builtin/log.c index 805b2355d96491..a4f41aafcae069 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -6,7 +6,6 @@ */ #define USE_THE_REPOSITORY_VARIABLE -#define DISABLE_SIGN_COMPARE_WARNINGS #include "builtin.h" #include "abspath.h" @@ -209,7 +208,6 @@ static void cmd_log_init_defaults(struct rev_info *rev, static void set_default_decoration_filter(struct decoration_filter *decoration_filter) { - int i; char *value = NULL; struct string_list *include = decoration_filter->include_ref_pattern; const struct string_list *config_exclude; @@ -243,7 +241,7 @@ static void set_default_decoration_filter(struct decoration_filter *decoration_f * No command-line or config options were given, so * populate with sensible defaults. */ - for (i = 0; i < ARRAY_SIZE(ref_namespace); i++) { + for (size_t i = 0; i < ARRAY_SIZE(ref_namespace); i++) { if (!ref_namespace[i].decoration) continue; @@ -717,14 +715,14 @@ static int show_tag_object(const struct object_id *oid, struct rev_info *rev) unsigned long size; enum object_type type; char *buf = repo_read_object_file(the_repository, oid, &type, &size); - int offset = 0; + unsigned long offset = 0; if (!buf) return error(_("could not read object %s"), oid_to_hex(oid)); assert(type == OBJ_TAG); while (offset < size && buf[offset] != '\n') { - int new_offset = offset + 1; + unsigned long new_offset = offset + 1; const char *ident; while (new_offset < size && buf[new_offset++] != '\n') ; /* do nothing */ @@ -1316,24 +1314,25 @@ static void print_signature(const char *signature, FILE *file) static char *find_branch_name(struct rev_info *rev) { - int i, positive = -1; struct object_id branch_oid; const struct object_id *tip_oid; const char *ref, *v; char *full_ref, *branch = NULL; + int interesting_found = 0; + size_t idx; - for (i = 0; i < rev->cmdline.nr; i++) { + for (size_t i = 0; i < rev->cmdline.nr; i++) { if (rev->cmdline.rev[i].flags & UNINTERESTING) continue; - if (positive < 0) - positive = i; - else + if (interesting_found) return NULL; + interesting_found = 1; + idx = i; } - if (positive < 0) + if (!interesting_found) return NULL; - ref = rev->cmdline.rev[positive].name; - tip_oid = &rev->cmdline.rev[positive].item->oid; + ref = rev->cmdline.rev[idx].name; + tip_oid = &rev->cmdline.rev[idx].item->oid; if (repo_dwim_ref(the_repository, ref, strlen(ref), &branch_oid, &full_ref, 0) && skip_prefix(full_ref, "refs/heads/", &v) && @@ -2183,7 +2182,7 @@ int cmd_format_patch(int argc, fmt_patch_suffix = cfg.fmt_patch_suffix; /* Make sure "0000-$sub.patch" gives non-negative length for $sub */ - if (cfg.log.fmt_patch_name_max <= strlen("0000-") + strlen(fmt_patch_suffix)) + if (cfg.log.fmt_patch_name_max <= cast_size_t_to_int(strlen("0000-") + strlen(fmt_patch_suffix))) cfg.log.fmt_patch_name_max = strlen("0000-") + strlen(fmt_patch_suffix); if (cover_from_description_arg) From 455ac07021d4feede4f5b7e39bf00dc186ce3c09 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 27 Dec 2024 11:46:28 +0100 Subject: [PATCH 037/535] shallow: fix -Wsign-compare warnings Fix a couple of -Wsign-compare issues in "shallow.c" and mark the file as -Wsign-compare-clean. This change prepares the code for a refactoring of `repo_in_merge_bases_many()`, which will be adapted to accept the number of commits as `size_t` instead of `int`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- shallow.c | 38 ++++++++++++++++++-------------------- shallow.h | 6 +++--- 2 files changed, 21 insertions(+), 23 deletions(-) diff --git a/shallow.c b/shallow.c index 82a8da3d730b2f..b8fcfbef0f9cdb 100644 --- a/shallow.c +++ b/shallow.c @@ -1,5 +1,4 @@ #define USE_THE_REPOSITORY_VARIABLE -#define DISABLE_SIGN_COMPARE_WARNINGS #include "git-compat-util.h" #include "hex.h" @@ -134,7 +133,8 @@ static void free_depth_in_slab(int **ptr) struct commit_list *get_shallow_commits(struct object_array *heads, int depth, int shallow_flag, int not_shallow_flag) { - int i = 0, cur_depth = 0; + size_t i = 0; + int cur_depth = 0; struct commit_list *result = NULL; struct object_array stack = OBJECT_ARRAY_INIT; struct commit *commit = NULL; @@ -335,16 +335,16 @@ static int write_shallow_commits_1(struct strbuf *out, int use_pack_protocol, const struct oid_array *extra, unsigned flags) { - struct write_shallow_data data; - int i; - data.out = out; - data.use_pack_protocol = use_pack_protocol; - data.count = 0; - data.flags = flags; + struct write_shallow_data data = { + .out = out, + .use_pack_protocol = use_pack_protocol, + .flags = flags, + }; + for_each_commit_graft(write_one_shallow, &data); if (!extra) return data.count; - for (i = 0; i < extra->nr; i++) { + for (size_t i = 0; i < extra->nr; i++) { strbuf_addstr(out, oid_to_hex(extra->oid + i)); strbuf_addch(out, '\n'); data.count++; @@ -466,7 +466,6 @@ struct trace_key trace_shallow = TRACE_KEY_INIT(SHALLOW); */ void prepare_shallow_info(struct shallow_info *info, struct oid_array *sa) { - int i; trace_printf_key(&trace_shallow, "shallow: prepare_shallow_info\n"); memset(info, 0, sizeof(*info)); info->shallow = sa; @@ -474,7 +473,7 @@ void prepare_shallow_info(struct shallow_info *info, struct oid_array *sa) return; ALLOC_ARRAY(info->ours, sa->nr); ALLOC_ARRAY(info->theirs, sa->nr); - for (i = 0; i < sa->nr; i++) { + for (size_t i = 0; i < sa->nr; i++) { if (repo_has_object_file(the_repository, sa->oid + i)) { struct commit_graft *graft; graft = lookup_commit_graft(the_repository, @@ -507,7 +506,7 @@ void clear_shallow_info(struct shallow_info *info) void remove_nonexistent_theirs_shallow(struct shallow_info *info) { struct object_id *oid = info->shallow->oid; - int i, dst; + size_t i, dst; trace_printf_key(&trace_shallow, "shallow: remove_nonexistent_theirs_shallow\n"); for (i = dst = 0; i < info->nr_theirs; i++) { if (i != dst) @@ -560,7 +559,7 @@ static void paint_down(struct paint_info *info, const struct object_id *oid, { unsigned int i, nr; struct commit_list *head = NULL; - int bitmap_nr = DIV_ROUND_UP(info->nr_bits, 32); + size_t bitmap_nr = DIV_ROUND_UP(info->nr_bits, 32); size_t bitmap_size = st_mult(sizeof(uint32_t), bitmap_nr); struct commit *c = lookup_commit_reference_gently(the_repository, oid, 1); @@ -660,7 +659,7 @@ void assign_shallow_commits_to_refs(struct shallow_info *info, struct object_id *oid = info->shallow->oid; struct oid_array *ref = info->ref; unsigned int i, nr; - int *shallow, nr_shallow = 0; + size_t *shallow, nr_shallow = 0; struct paint_info pi; trace_printf_key(&trace_shallow, "shallow: assign_shallow_commits_to_refs\n"); @@ -735,7 +734,7 @@ void assign_shallow_commits_to_refs(struct shallow_info *info, struct commit_array { struct commit **commits; - int nr, alloc; + size_t nr, alloc; }; static int add_ref(const char *refname UNUSED, @@ -753,12 +752,11 @@ static int add_ref(const char *refname UNUSED, return 0; } -static void update_refstatus(int *ref_status, int nr, uint32_t *bitmap) +static void update_refstatus(int *ref_status, size_t nr, uint32_t *bitmap) { - unsigned int i; if (!ref_status) return; - for (i = 0; i < nr; i++) + for (size_t i = 0; i < nr; i++) if (bitmap[i / 32] & (1U << (i % 32))) ref_status[i]++; } @@ -773,8 +771,8 @@ static void post_assign_shallow(struct shallow_info *info, struct object_id *oid = info->shallow->oid; struct commit *c; uint32_t **bitmap; - int dst, i, j; - int bitmap_nr = DIV_ROUND_UP(info->ref->nr, 32); + size_t dst, i, j; + size_t bitmap_nr = DIV_ROUND_UP(info->ref->nr, 32); struct commit_array ca; trace_printf_key(&trace_shallow, "shallow: post_assign_shallow\n"); diff --git a/shallow.h b/shallow.h index e9ca7e4bc80451..9bfeade93ead74 100644 --- a/shallow.h +++ b/shallow.h @@ -59,8 +59,8 @@ void prune_shallow(unsigned options); */ struct shallow_info { struct oid_array *shallow; - int *ours, nr_ours; - int *theirs, nr_theirs; + size_t *ours, nr_ours; + size_t *theirs, nr_theirs; struct oid_array *ref; /* for receive-pack */ @@ -69,7 +69,7 @@ struct shallow_info { int *reachable; int *shallow_ref; struct commit **commits; - int nr_commits; + size_t nr_commits; }; void prepare_shallow_info(struct shallow_info *, struct oid_array *); From 5e7fe8a7b89a07d8c3ab298ac69bc33f6ba88b47 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 27 Dec 2024 11:46:29 +0100 Subject: [PATCH 038/535] commit-reach: use `size_t` to track indices when computing merge bases The functions `repo_get_merge_bases_many()` and friends accepts an array of commits as well as a parameter that indicates how large that array is. This parameter is using a signed integer, which leads to a couple of warnings with -Wsign-compare. Refactor the code to use `size_t` to track indices instead and adapt callers accordingly. While most callers are trivial, there are two callers that require a bit more scrutiny: - builtin/merge-base.c:show_merge_base() subtracts `1` from the `rev_nr` before calling `repo_get_merge_bases_many_dirty()`, so if the variable was `0` it would wrap. This code is fine though because its only caller will execute that code only when `argc >= 2`, and it follows that `rev_nr >= 2`, as well. - bisect.ccheck_merge_bases() similarly subtracts `1` from `rev_nr`. Again, there is only a single caller that populates `rev_nr` with `good_revs.nr`. And because a bisection always requires at least one good revision it follws that `rev_nr >= 1`. Mark the file as -Wsign-compare-clean. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- bisect.c | 2 +- builtin/merge-base.c | 4 ++-- commit-reach.c | 7 +++---- commit-reach.h | 4 ++-- t/helper/test-reach.c | 6 +++--- 5 files changed, 11 insertions(+), 12 deletions(-) diff --git a/bisect.c b/bisect.c index 7a1afc46e5fc02..7a3c77c6d84da0 100644 --- a/bisect.c +++ b/bisect.c @@ -855,7 +855,7 @@ static void handle_skipped_merge_base(const struct object_id *mb) * for early success, this will be converted back to 0 in * check_good_are_ancestors_of_bad(). */ -static enum bisect_error check_merge_bases(int rev_nr, struct commit **rev, int no_checkout) +static enum bisect_error check_merge_bases(size_t rev_nr, struct commit **rev, int no_checkout) { enum bisect_error res = BISECT_OK; struct commit_list *result = NULL; diff --git a/builtin/merge-base.c b/builtin/merge-base.c index a20c93b11aaa0b..123c81515e1f5f 100644 --- a/builtin/merge-base.c +++ b/builtin/merge-base.c @@ -8,7 +8,7 @@ #include "parse-options.h" #include "commit-reach.h" -static int show_merge_base(struct commit **rev, int rev_nr, int show_all) +static int show_merge_base(struct commit **rev, size_t rev_nr, int show_all) { struct commit_list *result = NULL, *r; @@ -149,7 +149,7 @@ int cmd_merge_base(int argc, struct repository *repo UNUSED) { struct commit **rev; - int rev_nr = 0; + size_t rev_nr = 0; int show_all = 0; int cmdmode = 0; int ret; diff --git a/commit-reach.c b/commit-reach.c index bab40f55758047..a339e41aa4ed1e 100644 --- a/commit-reach.c +++ b/commit-reach.c @@ -1,5 +1,4 @@ #define USE_THE_REPOSITORY_VARIABLE -#define DISABLE_SIGN_COMPARE_WARNINGS #include "git-compat-util.h" #include "commit.h" @@ -421,7 +420,7 @@ static int remove_redundant(struct repository *r, struct commit **array, static int get_merge_bases_many_0(struct repository *r, struct commit *one, - int n, + size_t n, struct commit **twos, int cleanup, struct commit_list **result) @@ -469,7 +468,7 @@ static int get_merge_bases_many_0(struct repository *r, int repo_get_merge_bases_many(struct repository *r, struct commit *one, - int n, + size_t n, struct commit **twos, struct commit_list **result) { @@ -478,7 +477,7 @@ int repo_get_merge_bases_many(struct repository *r, int repo_get_merge_bases_many_dirty(struct repository *r, struct commit *one, - int n, + size_t n, struct commit **twos, struct commit_list **result) { diff --git a/commit-reach.h b/commit-reach.h index fa5408054ac013..6012402dfcfe45 100644 --- a/commit-reach.h +++ b/commit-reach.h @@ -14,12 +14,12 @@ int repo_get_merge_bases(struct repository *r, struct commit *rev2, struct commit_list **result); int repo_get_merge_bases_many(struct repository *r, - struct commit *one, int n, + struct commit *one, size_t n, struct commit **twos, struct commit_list **result); /* To be used only when object flags after this call no longer matter */ int repo_get_merge_bases_many_dirty(struct repository *r, - struct commit *one, int n, + struct commit *one, size_t n, struct commit **twos, struct commit_list **result); diff --git a/t/helper/test-reach.c b/t/helper/test-reach.c index 01cf77ae65b9a7..028ec003067828 100644 --- a/t/helper/test-reach.c +++ b/t/helper/test-reach.c @@ -35,7 +35,7 @@ int cmd__reach(int ac, const char **av) struct commit_list *X, *Y; struct object_array X_obj = OBJECT_ARRAY_INIT; struct commit **X_array, **Y_array; - int X_nr, X_alloc, Y_nr, Y_alloc; + size_t X_nr, X_alloc, Y_nr, Y_alloc; struct strbuf buf = STRBUF_INIT; struct repository *r = the_repository; @@ -157,7 +157,7 @@ int cmd__reach(int ac, const char **av) clear_contains_cache(&cache); } else if (!strcmp(av[1], "get_reachable_subset")) { const int reachable_flag = 1; - int i, count = 0; + int count = 0; struct commit_list *current; struct commit_list *list = get_reachable_subset(X_array, X_nr, Y_array, Y_nr, @@ -169,7 +169,7 @@ int cmd__reach(int ac, const char **av) oid_to_hex(&list->item->object.oid)); count++; } - for (i = 0; i < Y_nr; i++) { + for (size_t i = 0; i < Y_nr; i++) { if (Y_array[i]->object.flags & reachable_flag) count--; } From 24027256aa9614a445563707a72af7ce5ff49b5b Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 27 Dec 2024 12:25:30 -0800 Subject: [PATCH 039/535] sign-compare: avoid comparing ptrdiff with an int/unsigned Instead, offset the base pointer with integer and compare it with the other pointer. Signed-off-by: Junio C Hamano --- shallow.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shallow.c b/shallow.c index b8fcfbef0f9cdb..b54244ffa90d66 100644 --- a/shallow.c +++ b/shallow.c @@ -534,7 +534,7 @@ static uint32_t *paint_alloc(struct paint_info *info) unsigned nr = DIV_ROUND_UP(info->nr_bits, 32); unsigned size = nr * sizeof(uint32_t); void *p; - if (!info->pool_count || size > info->end - info->free) { + if (!info->pool_count || info->end < info->free + size) { if (size > POOL_SIZE) BUG("pool size too small for %d in paint_alloc()", size); From 8214e27d275915079ddf7c294c379515e34e8efb Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 30 Dec 2024 15:24:04 +0100 Subject: [PATCH 040/535] meson: consistenlty spell 'CommonCrypto' The 'CommonCrypto' backend can be specified as HTTPS and SHA1 backends, but the value that one needs to use is inconsistent across those two build options. Unify it to 'CommonCrypto'. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- meson.build | 2 +- meson_options.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/meson.build b/meson.build index 0064eb64f546a6..9da58dafe0f102 100644 --- a/meson.build +++ b/meson.build @@ -1367,7 +1367,7 @@ if sha1_backend == 'sha1dc' 'sha1dc/sha1.c', 'sha1dc/ubc_check.c', ] -elif sha1_backend == 'common-crypto' +elif sha1_backend == 'CommonCrypto' libgit_c_args += '-DCOMMON_DIGEST_FOR_OPENSSL' libgit_c_args += '-DSHA1_APPLE' # Apple CommonCrypto requires chunking diff --git a/meson_options.txt b/meson_options.txt index 4be7eab3993917..a7f308d217f29e 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -49,7 +49,7 @@ option('regex', type: 'feature', value: 'auto', # Backends. option('https_backend', type: 'combo', value: 'auto', choices: ['auto', 'openssl', 'CommonCrypto', 'none'], description: 'The HTTPS backend to use when connecting to remotes.') -option('sha1_backend', type: 'combo', choices: ['openssl', 'block', 'sha1dc', 'common-crypto'], value: 'sha1dc', +option('sha1_backend', type: 'combo', choices: ['openssl', 'block', 'sha1dc', 'CommonCrypto'], value: 'sha1dc', description: 'The backend used for hashing objects with the SHA1 object format') option('sha256_backend', type: 'combo', choices: ['openssl', 'nettle', 'gcrypt', 'block'], value: 'block', description: 'The backend used for hashing objects with the SHA256 object format') From 31eb6d7cf09c3fa668c1839d8c5759ab7cdf280c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 30 Dec 2024 15:24:05 +0100 Subject: [PATCH 041/535] meson: deduplicate access to SHA1/SHA256 backend options We've got a couple of repeated calls to `get_option()` for the SHA1 and SHA256 backend options. While not an issue, it makes the code needlessly verbose. Fix this by consistently using a local variable. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- meson.build | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/meson.build b/meson.build index 9da58dafe0f102..6fa4d900ee02f0 100644 --- a/meson.build +++ b/meson.build @@ -1326,6 +1326,8 @@ if not meson.is_cross_build() and fs.exists('/dev/tty') endif https_backend = get_option('https_backend') +sha1_backend = get_option('sha1_backend') +sha256_backend = get_option('sha256_backend') security_framework = dependency('Security', required: https_backend == 'CommonCrypto') core_foundation_framework = dependency('CoreFoundation', required: security_framework.found()) @@ -1333,7 +1335,7 @@ if https_backend == 'auto' and security_framework.found() https_backend = 'CommonCrypto' endif -openssl_required = https_backend == 'openssl' or get_option('sha1_backend') == 'openssl' or get_option('sha256_backend') == 'openssl' +openssl_required = https_backend == 'openssl' or sha1_backend == 'openssl' or sha256_backend == 'openssl' openssl = dependency('openssl', required: openssl_required, default_options: ['default_library=static']) if https_backend == 'auto' and openssl.found() https_backend = 'openssl' @@ -1354,7 +1356,6 @@ if https_backend != 'openssl' libgit_c_args += '-DNO_OPENSSL' endif -sha1_backend = get_option('sha1_backend') if sha1_backend == 'sha1dc' libgit_c_args += '-DSHA1_DC' libgit_c_args += '-DSHA1DC_NO_STANDARD_INCLUDES=1' @@ -1382,7 +1383,6 @@ else error('Unhandled SHA1 backend ' + sha1_backend) endif -sha256_backend = get_option('sha256_backend') if sha256_backend == 'openssl' libgit_c_args += '-DSHA256_OPENSSL' libgit_dependencies += openssl From d6787d975147a74f1560fffc09dcb2a1f92460bb Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 30 Dec 2024 15:24:06 +0100 Subject: [PATCH 042/535] meson: require SecurityFramework when it's used as SHA1 backend The Security framework is required when we use CommonCrypto either as HTTPS or SHA1 backend, but we only require it in case it is set up as HTTPS backend. Fix this. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meson.build b/meson.build index 6fa4d900ee02f0..bc75ad954a4949 100644 --- a/meson.build +++ b/meson.build @@ -1329,7 +1329,7 @@ https_backend = get_option('https_backend') sha1_backend = get_option('sha1_backend') sha256_backend = get_option('sha256_backend') -security_framework = dependency('Security', required: https_backend == 'CommonCrypto') +security_framework = dependency('Security', required: https_backend == 'CommonCrypto' or sha1_backend == 'CommonCrypto') core_foundation_framework = dependency('CoreFoundation', required: security_framework.found()) if https_backend == 'auto' and security_framework.found() https_backend = 'CommonCrypto' From 6d8aa2aec81abf4935c72745790bc5f9bf7541b9 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 30 Dec 2024 15:24:07 +0100 Subject: [PATCH 043/535] meson: simplify conditions for HTTPS and SHA1 dependencies The conditions used to figure out whteher the Security framework or OpenSSL library is required are a bit convoluted because they can be pulled in via the HTTPS, SHA1 or SHA256 backends. Refactor them to be easier to read. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- meson.build | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/meson.build b/meson.build index bc75ad954a4949..46f807899b7bae 100644 --- a/meson.build +++ b/meson.build @@ -1329,13 +1329,13 @@ https_backend = get_option('https_backend') sha1_backend = get_option('sha1_backend') sha256_backend = get_option('sha256_backend') -security_framework = dependency('Security', required: https_backend == 'CommonCrypto' or sha1_backend == 'CommonCrypto') +security_framework = dependency('Security', required: 'CommonCrypto' in [https_backend, sha1_backend]) core_foundation_framework = dependency('CoreFoundation', required: security_framework.found()) if https_backend == 'auto' and security_framework.found() https_backend = 'CommonCrypto' endif -openssl_required = https_backend == 'openssl' or sha1_backend == 'openssl' or sha256_backend == 'openssl' +openssl_required = 'openssl' in [https_backend, sha1_backend, sha256_backend] openssl = dependency('openssl', required: openssl_required, default_options: ['default_library=static']) if https_backend == 'auto' and openssl.found() https_backend = 'openssl' From 12068bd4de03c7769f50cd8321f792477692d0ea Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 30 Dec 2024 15:24:08 +0100 Subject: [PATCH 044/535] meson: add missing dots for build options Most of our Meson build options end with a trailing dot, but those for our SHA1 and SHA256 backends don't. Add it. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- meson_options.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/meson_options.txt b/meson_options.txt index a7f308d217f29e..d8d283982bcdd0 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -50,9 +50,9 @@ option('regex', type: 'feature', value: 'auto', option('https_backend', type: 'combo', value: 'auto', choices: ['auto', 'openssl', 'CommonCrypto', 'none'], description: 'The HTTPS backend to use when connecting to remotes.') option('sha1_backend', type: 'combo', choices: ['openssl', 'block', 'sha1dc', 'CommonCrypto'], value: 'sha1dc', - description: 'The backend used for hashing objects with the SHA1 object format') + description: 'The backend used for hashing objects with the SHA1 object format.') option('sha256_backend', type: 'combo', choices: ['openssl', 'nettle', 'gcrypt', 'block'], value: 'block', - description: 'The backend used for hashing objects with the SHA256 object format') + description: 'The backend used for hashing objects with the SHA256 object format.') # Build tweaks. option('macos_use_homebrew_gettext', type: 'boolean', value: true, From d2c0b6a86cb0f1a73d9ad5fcffda45497cd7ad42 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 30 Dec 2024 15:24:09 +0100 Subject: [PATCH 045/535] meson: wire up unsafe SHA1 backend In 06c92dafb8 (Makefile: allow specifying a SHA-1 for non-cryptographic uses, 2024-09-26), we have introduced a cryptographically-insecure backend for SHA1 that can optionally be used in some contexts where the processed data is not security relevant. This effort was in-flight with the effort to introduce Meson, so we don't have an equivalent here. Wire up a new build option that lets users pick an unsafe SHA1 backend. Note that for simplicity's sake we have to drop the error condition around an unhandled SHA1 backend. This should be fine though given that Meson verifies the value for combo-options for us. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- meson.build | 40 ++++++++++++++++++++++++++++++---------- meson_options.txt | 2 ++ 2 files changed, 32 insertions(+), 10 deletions(-) diff --git a/meson.build b/meson.build index 46f807899b7bae..dc82c23cb4f076 100644 --- a/meson.build +++ b/meson.build @@ -1327,15 +1327,16 @@ endif https_backend = get_option('https_backend') sha1_backend = get_option('sha1_backend') +sha1_unsafe_backend = get_option('sha1_unsafe_backend') sha256_backend = get_option('sha256_backend') -security_framework = dependency('Security', required: 'CommonCrypto' in [https_backend, sha1_backend]) +security_framework = dependency('Security', required: 'CommonCrypto' in [https_backend, sha1_backend, sha1_unsafe_backend]) core_foundation_framework = dependency('CoreFoundation', required: security_framework.found()) if https_backend == 'auto' and security_framework.found() https_backend = 'CommonCrypto' endif -openssl_required = 'openssl' in [https_backend, sha1_backend, sha256_backend] +openssl_required = 'openssl' in [https_backend, sha1_backend, sha1_unsafe_backend, sha256_backend] openssl = dependency('openssl', required: openssl_required, default_options: ['default_library=static']) if https_backend == 'auto' and openssl.found() https_backend = 'openssl' @@ -1368,19 +1369,38 @@ if sha1_backend == 'sha1dc' 'sha1dc/sha1.c', 'sha1dc/ubc_check.c', ] -elif sha1_backend == 'CommonCrypto' +endif +if sha1_backend == 'CommonCrypto' or sha1_unsafe_backend == 'CommonCrypto' + if sha1_backend == 'CommonCrypto' + libgit_c_args += '-DSHA1_APPLE' + endif + if sha1_unsafe_backend == 'CommonCrypto' + libgit_c_args += '-DSHA1_APPLE_UNSAFE' + endif + libgit_c_args += '-DCOMMON_DIGEST_FOR_OPENSSL' - libgit_c_args += '-DSHA1_APPLE' # Apple CommonCrypto requires chunking libgit_c_args += '-DSHA1_MAX_BLOCK_SIZE=1024L*1024L*1024L' -elif sha1_backend == 'openssl' - libgit_c_args += '-DSHA1_OPENSSL' +endif +if sha1_backend == 'openssl' or sha1_unsafe_backend == 'openssl' + if sha1_backend == 'openssl' + libgit_c_args += '-DSHA1_OPENSSL' + endif + if sha1_unsafe_backend == 'openssl' + libgit_c_args += '-DSHA1_OPENSSL_UNSAFE' + endif + libgit_dependencies += openssl -elif sha1_backend == 'block' - libgit_c_args += '-DSHA1_BLK' +endif +if sha1_backend == 'block' or sha1_unsafe_backend == 'block' + if sha1_backend == 'block' + libgit_c_args += '-DSHA1_BLK' + endif + if sha1_unsafe_backend == 'block' + libgit_c_args += '-DSHA1_BLK_UNSAFE' + endif + libgit_sources += 'block-sha1/sha1.c' -else - error('Unhandled SHA1 backend ' + sha1_backend) endif if sha256_backend == 'openssl' diff --git a/meson_options.txt b/meson_options.txt index d8d283982bcdd0..8282b1dea8e852 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -51,6 +51,8 @@ option('https_backend', type: 'combo', value: 'auto', choices: ['auto', 'openssl description: 'The HTTPS backend to use when connecting to remotes.') option('sha1_backend', type: 'combo', choices: ['openssl', 'block', 'sha1dc', 'CommonCrypto'], value: 'sha1dc', description: 'The backend used for hashing objects with the SHA1 object format.') +option('sha1_unsafe_backend', type: 'combo', choices: ['openssl', 'block', 'CommonCrypto', 'none'], value: 'none', + description: 'The backend used for hashing data with the SHA1 object format in case no cryptographic security is needed.') option('sha256_backend', type: 'combo', choices: ['openssl', 'nettle', 'gcrypt', 'block'], value: 'block', description: 'The backend used for hashing objects with the SHA256 object format.') From 6a0ee54f9a3ebf667e86f7110c36b2240df96166 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 30 Dec 2024 15:24:10 +0100 Subject: [PATCH 046/535] meson: provide a summary of configured backends There are a couple of backends from which the user can choose for HTTPS, SHA1, its unsafe variant as well as SHA256. Provide a summary of the configured values to make these more discoverable. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- meson.build | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/meson.build b/meson.build index dc82c23cb4f076..7361eb2eaad422 100644 --- a/meson.build +++ b/meson.build @@ -1943,3 +1943,10 @@ summary({ 'perl': perl_features_enabled, 'python': python.found(), }, section: 'Auto-detected features') + +summary({ + 'https': https_backend, + 'sha1': sha1_backend, + 'sha1_unsafe': sha1_unsafe_backend, + 'sha256': sha256_backend, +}, section: 'Backends') From 5b34dd08d0ffc967b92abe187cc890d52ade5ac7 Mon Sep 17 00:00:00 2001 From: Alexander Shopov Date: Sat, 28 Dec 2024 12:42:18 +0100 Subject: [PATCH 047/535] parse-options: localize mark-up of placeholder text in the short help i18n: expose substitution hint chars in functions and macros to translators For example (based on builtin/commit.c and shortened): the "--author" option takes a name. In source this can be represented as: OPT_STRING(0, "author", &force_author, N_("author"), N_("override author")), When the command is run with "-h" (short help) option (git commit -h), the above definition is displayed as: --[no-]author override author Git does not use translated option names so the first part of the above, "--[no-]author", is given as-is (it is based on the 2nd argument of OPT_STRING). However the string "author" in the pair of "<>", and the explanation "override author for commit" may be translated into user's language. The user's language may use a convention to mark a replaceable part of the command line (called a "placeholder string") differently from enclosing it inside a pair of "<>", but the implementation in parse-options.c hardcodes "<%s>". Allow translators to specify the presentation of a placeholder string for their languages by overriding the "<%s>". In case the translator's writing system is sufficiently different than Latin the "<>" characters can be substituted by an empty string thus effectively skipping them in the output. For example languages with uppercase versions of characters can use that to deliniate replaceability. Alternatively a translator can decide to use characters that are visually close to "<>" but are not interpreted by the shell. Signed-off-by: Alexander Shopov Signed-off-by: Junio C Hamano --- parse-options.c | 43 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/parse-options.c b/parse-options.c index 33bfba0ed4a0ea..3ff6a5d1fac125 100644 --- a/parse-options.c +++ b/parse-options.c @@ -1076,11 +1076,48 @@ static int usage_argh(const struct option *opts, FILE *outfile) !opts->argh || !!strpbrk(opts->argh, "()<>[]|"); if (opts->flags & PARSE_OPT_OPTARG) if (opts->long_name) - s = literal ? "[=%s]" : "[=<%s>]"; + /* + * TRANSLATORS: The "<%s>" part of this string + * stands for an optional value given to a command + * line option in the long form, and "<>" is there + * as a convention to signal that it is a + * placeholder (i.e. the user should substitute it + * with the real value). If your language uses a + * different convention, you can change "<%s>" part + * to match yours, e.g. it might use "|%s|" instead, + * or if the alphabet is different enough it may use + * "%s" without any placeholder signal. Most + * translations leave this message as is. + */ + s = literal ? "[=%s]" : _("[=<%s>]"); else - s = literal ? "[%s]" : "[<%s>]"; + /* + * TRANSLATORS: The "<%s>" part of this string + * stands for an optional value given to a command + * line option in the short form, and "<>" is there + * as a convention to signal that it is a + * placeholder (i.e. the user should substitute it + * with the real value). If your language uses a + * different convention, you can change "<%s>" part + * to match yours, e.g. it might use "|%s|" instead, + * or if the alphabet is different enough it may use + * "%s" without any placeholder signal. Most + * translations leave this message as is. + */ + s = literal ? "[%s]" : _("[<%s>]"); else - s = literal ? " %s" : " <%s>"; + /* + * TRANSLATORS: The "<%s>" part of this string stands for a + * value given to a command line option, and "<>" is there + * as a convention to signal that it is a placeholder + * (i.e. the user should substitute it with the real value). + * If your language uses a different convention, you can + * change "<%s>" part to match yours, e.g. it might use + * "|%s|" instead, or if the alphabet is different enough it + * may use "%s" without any placeholder signal. Most + * translations leave this message as is. + */ + s = literal ? " %s" : _(" <%s>"); return utf8_fprintf(outfile, s, opts->argh ? _(opts->argh) : _("...")); } From 866ea877036ce581e0d3c130f527631cd8b36bdf Mon Sep 17 00:00:00 2001 From: Matteo Bagnolini Date: Fri, 3 Jan 2025 14:00:35 +0100 Subject: [PATCH 048/535] t7110: replace `test -f` with `test_path_is_*` helpers `test -f` and `! test -f` do not provide clear error messages when they fail. To enhance debuggability, use `test_path_is_file` and `test_path_is_missing`, which instead provide more informative error messages. Note that `! test -f` checks if a path is not a file, while `test_path_is_missing` verifies that a path does not exist. In this specific case the tests are meant to check the absence of the path, making `test_path_is_missing` a valid replacement. Signed-off-by: Matteo Bagnolini Signed-off-by: Junio C Hamano --- t/t7110-reset-merge.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/t/t7110-reset-merge.sh b/t/t7110-reset-merge.sh index 61669a2d2102b8..9a335071af6c0d 100755 --- a/t/t7110-reset-merge.sh +++ b/t/t7110-reset-merge.sh @@ -270,13 +270,13 @@ test_expect_success '--merge is ok with added/deleted merge' ' git reset --hard third && rm -f file2 && test_must_fail git merge branch3 && - ! test -f file2 && - test -f file3 && + test_path_is_missing file2 && + test_path_is_file file3 && git diff --exit-code file3 && git diff --exit-code branch3 file3 && git reset --merge HEAD && - ! test -f file3 && - ! test -f file2 && + test_path_is_missing file3 && + test_path_is_missing file2 && git diff --exit-code --cached ' @@ -284,8 +284,8 @@ test_expect_success '--keep fails with added/deleted merge' ' git reset --hard third && rm -f file2 && test_must_fail git merge branch3 && - ! test -f file2 && - test -f file3 && + test_path_is_missing file2 && + test_path_is_file file3 && git diff --exit-code file3 && git diff --exit-code branch3 file3 && test_must_fail git reset --keep HEAD 2>err.log && From 8776470cf379f31d483d8512d28a0eaa47d2e3f2 Mon Sep 17 00:00:00 2001 From: "D. Ben Knoble" Date: Mon, 6 Jan 2025 21:47:06 +0000 Subject: [PATCH 049/535] completion: repair config completion for Zsh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 1e0ee4087e (completion: add and use __git_compute_first_level_config_vars_for_section, 2024-02-10) uses an indirect variable syntax that is only valid for Bash, but the Zsh completion code relies on the Bash completion code to function. Zsh supports a different indirect variable expansion using ${(P)var}, but in `emulate ksh` mode does not support Bash's ${!var}. This manifests as completing strange config options like "__git_first_level_config_vars_for_section_remote" as a choice for the command line git config set remote. Using Zsh's C-x ? _complete_debug widget with the cursor at the end of that command line captures a trace, in which we see (some details elided): +__git_complete_config_variable_name:7> __git_compute_first_level_config_vars_for_section remote +__git_compute_first_level_config_vars_for_section:7> local section=remote +__git_compute_first_level_config_vars_for_section:7> __git_compute_config_vars +__git_compute_config_vars:7> test -n $'add.ignoreErrors\nadvice.addEmbeddedRepo\nadvice.addEmptyPathspec\nadvice.addIgnoredFile[…]' +__git_compute_first_level_config_vars_for_section:7> local this_section=__git_first_level_config_vars_for_section_remote +__git_compute_first_level_config_vars_for_section:7> test -n __git_first_level_config_vars_for_section_remote +__git_complete_config_variable_name:7> local this_section=__git_first_level_config_vars_for_section_remote +__git_complete_config_variable_name:7> __gitcomp_nl_append __git_first_level_config_vars_for_section_remote remote. '' ' ' +__gitcomp_nl_append:7> __gitcomp_nl __git_first_level_config_vars_for_section_remote remote. '' ' ' +__gitcomp_nl:7> emulate -L zsh +__gitcomp_nl:7> compset -P '*[=:]' +__gitcomp_nl:7> compadd -Q -S ' ' -p remote. -- __git_first_level_config_vars_for_section_remote We perform the test for __git_compute_config_vars correctly, but the ${!this_section} references are not expanded as expected. Instead, portably expand indirect references through the new __git_indirect. Contrary to some versions you might find online [1], this version avoids echo non-portabilities [2] [3] and correctly quotes the indirect expansion after eval (so that the result is not split or globbed before being handed to printf). [1]: https://unix.stackexchange.com/a/41409/301073 [2]: https://askubuntu.com/questions/715765/mysterious-behavior-of-echo-command#comment1056038_715769 [3]: https://mywiki.wooledge.org/CatEchoLs The following demo program demonstrates how this works: b=1 indirect() { eval printf '%s' "\"\$$1\"" } f() { # Comment this out to see that it works for globals, too. Or, use # a value with spaces like '2 3 4' to see how it handles those. local b=2 local a=b test -n "$(indirect $a)" && echo nice } f When placed in a file "demo", then both bash -x demo and zsh -xc 'emulate ksh -c ". ./demo"' |& tail provide traces showing that "$(indirect $a)" produces 2 (or 1, with the global, or "2 3 4" as a single string, etc.). Signed-off-by: D. Ben Knoble Acked-by: Philippe Blain Signed-off-by: Junio C Hamano --- contrib/completion/git-completion.bash | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash index 75193ded4bdeda..c0c980d8ae7add 100644 --- a/contrib/completion/git-completion.bash +++ b/contrib/completion/git-completion.bash @@ -2723,12 +2723,17 @@ __git_compute_config_vars_all () __git_config_vars_all="$(git --no-pager help --config)" } +__git_indirect() +{ + eval printf '%s' "\"\$$1\"" +} + __git_compute_first_level_config_vars_for_section () { local section="$1" __git_compute_config_vars local this_section="__git_first_level_config_vars_for_section_${section}" - test -n "${!this_section}" || + test -n "$(__git_indirect "${this_section}")" || printf -v "__git_first_level_config_vars_for_section_${section}" %s \ "$(echo "$__git_config_vars" | awk -F. "/^${section}\.[a-z]/ { print \$2 }")" } @@ -2738,7 +2743,7 @@ __git_compute_second_level_config_vars_for_section () local section="$1" __git_compute_config_vars_all local this_section="__git_second_level_config_vars_for_section_${section}" - test -n "${!this_section}" || + test -n "$(__git_indirect "${this_section}")" || printf -v "__git_second_level_config_vars_for_section_${section}" %s \ "$(echo "$__git_config_vars_all" | awk -F. "/^${section}\. Date: Tue, 7 Jan 2025 02:18:24 -0500 Subject: [PATCH 050/535] t7407: use test_grep There are a few grep calls here that can benefit from test_grep, which produces more user-friendly output when it fails. One of these calls also passes "-sq", which is curious. The "-q" option suppresses the matched output. But test output is either already redirected to /dev/null in non-verbose mode, and in verbose mode it's better to see the output. The "-s" option suppresses errors opening files, but we are just grepping in the "expected" file we just generated, so it should not be needed. Neither of these was really hurting anything, but they are not a style we'd like to see emulated. So get rid of them. (It is also curious to grep in the expected file in the first place, but that is because we are auto-generating the expectation from a Git command. So this is double-checking it did what we wanted). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/t7407-submodule-foreach.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/t/t7407-submodule-foreach.sh b/t/t7407-submodule-foreach.sh index 8d7b234beb8b09..77b6d0040e8a8f 100755 --- a/t/t7407-submodule-foreach.sh +++ b/t/t7407-submodule-foreach.sh @@ -426,14 +426,14 @@ test_expect_success 'option-like arguments passed to foreach commands are not lo git submodule foreach "echo be --quiet" > ../expected && git submodule foreach echo be --quiet > ../actual ) && - grep -sq -e "--quiet" expected && + test_grep -e "--quiet" expected && test_cmp expected actual ' test_expect_success 'option-like arguments passed to foreach recurse correctly' ' git -C clone2 submodule foreach --recursive "echo be --an-option" >expect && git -C clone2 submodule foreach --recursive echo be --an-option >actual && - grep -e "--an-option" expect && + test_grep -e "--an-option" expect && test_cmp expect actual ' From 1568d1562eecc31d2062b6d22e37ec03fc3d6747 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 7 Jan 2025 16:26:59 +0100 Subject: [PATCH 051/535] wrapper: allow generating insecure random bytes The `csprng_bytes()` function generates randomness and writes it into a caller-provided buffer. It abstracts over a couple of implementations, where the exact one that is used depends on the platform. These implementations have different guarantees: while some guarantee to never fail (arc4random(3)), others may fail. There are two significant failures to distinguish from one another: - Systemic failure, where e.g. opening "/dev/urandom" fails or when OpenSSL doesn't have a provider configured. - Entropy failure, where the entropy pool is exhausted, and thus the function cannot guarantee strong cryptographic randomness. While we cannot do anything about the former, the latter failure can be acceptable in some situations where we don't care whether or not the randomness can be predicted. Introduce a new `CSPRNG_BYTES_INSECURE` flag that allows callers to opt into weak cryptographic randomness. The exact behaviour of the flag depends on the underlying implementation: - `arc4random_buf()` never returns an error, so it doesn't change. - `getrandom()` pulls from "/dev/urandom" by default, which never blocks on modern systems even when the entropy pool is empty. - `getentropy()` seems to block when there is not enough randomness available, and there is no way of changing that behaviour. - `GtlGenRandom()` doesn't mention anything about its specific failure mode. - The fallback reads from "/dev/urandom", which also returns bytes in case the entropy pool is drained in modern Linux systems. That only leaves OpenSSL with `RAND_bytes()`, which returns an error in case the returned data wouldn't be cryptographically safe. This function is replaced with a call to `RAND_pseudo_bytes()`, which can indicate whether or not the returned data is cryptographically secure via its return value. If it is insecure, and if the `CSPRNG_BYTES_INSECURE` flag is set, then we ignore the insecurity and return the data regardless. It is somewhat questionable whether we really need the flag in the first place, or whether we wouldn't just ignore the potentially-insecure data. But the risk of doing that is that we might have or grow callsites that aren't aware of the potential insecureness of the data in places where it really matters. So using a flag to opt-in to that behaviour feels like the more secure choice. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/gc.c | 2 +- reftable/stack.c | 2 +- t/helper/test-csprng.c | 2 +- t/unit-tests/t-reftable-readwrite.c | 6 +++--- wrapper.c | 24 ++++++++++++++---------- wrapper.h | 16 ++++++++++++---- 6 files changed, 32 insertions(+), 20 deletions(-) diff --git a/builtin/gc.c b/builtin/gc.c index a9b1c36de27da2..3e754f25bba2aa 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1909,7 +1909,7 @@ static int get_random_minute(void) if (getenv("GIT_TEST_MAINT_SCHEDULER")) return 13; - return git_rand() % 60; + return git_rand(0) % 60; } static int is_launchctl_available(void) diff --git a/reftable/stack.c b/reftable/stack.c index 531660a49f0948..6d0aa774e7e29d 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -659,7 +659,7 @@ int reftable_stack_add(struct reftable_stack *st, static int format_name(struct reftable_buf *dest, uint64_t min, uint64_t max) { char buf[100]; - uint32_t rnd = (uint32_t)git_rand(); + uint32_t rnd = (uint32_t)git_rand(0); snprintf(buf, sizeof(buf), "0x%012" PRIx64 "-0x%012" PRIx64 "-%08x", min, max, rnd); reftable_buf_reset(dest); diff --git a/t/helper/test-csprng.c b/t/helper/test-csprng.c index a4a0aca61773b0..c86dcc4870fed8 100644 --- a/t/helper/test-csprng.c +++ b/t/helper/test-csprng.c @@ -15,7 +15,7 @@ int cmd__csprng(int argc, const char **argv) while (count) { unsigned long chunk = count < sizeof(buf) ? count : sizeof(buf); - if (csprng_bytes(buf, chunk) < 0) { + if (csprng_bytes(buf, chunk, 0) < 0) { perror("failed to read"); return 5; } diff --git a/t/unit-tests/t-reftable-readwrite.c b/t/unit-tests/t-reftable-readwrite.c index 6b75a419b9d3bb..f22b9775639e3e 100644 --- a/t/unit-tests/t-reftable-readwrite.c +++ b/t/unit-tests/t-reftable-readwrite.c @@ -108,8 +108,8 @@ static void t_log_buffer_size(void) hash, to ensure that the compressed part is larger than the original. */ for (i = 0; i < REFTABLE_HASH_SIZE_SHA1; i++) { - log.value.update.old_hash[i] = (uint8_t)(git_rand() % 256); - log.value.update.new_hash[i] = (uint8_t)(git_rand() % 256); + log.value.update.old_hash[i] = (uint8_t)(git_rand(0) % 256); + log.value.update.new_hash[i] = (uint8_t)(git_rand(0) % 256); } reftable_writer_set_limits(w, update_index, update_index); err = reftable_writer_add_log(w, &log); @@ -325,7 +325,7 @@ static void t_log_zlib_corruption(void) }; for (i = 0; i < sizeof(message) - 1; i++) - message[i] = (uint8_t)(git_rand() % 64 + ' '); + message[i] = (uint8_t)(git_rand(0) % 64 + ' '); reftable_writer_set_limits(w, 1, 1); diff --git a/wrapper.c b/wrapper.c index fa79fd6ec9e144..8b985931490d62 100644 --- a/wrapper.c +++ b/wrapper.c @@ -479,7 +479,7 @@ int git_mkstemps_mode(char *pattern, int suffix_len, int mode) for (count = 0; count < TMP_MAX; ++count) { int i; uint64_t v; - if (csprng_bytes(&v, sizeof(v)) < 0) + if (csprng_bytes(&v, sizeof(v), 0) < 0) return error_errno("unable to get random bytes for temporary file"); /* Fill in the random bits. */ @@ -750,7 +750,7 @@ int open_nofollow(const char *path, int flags) #endif } -int csprng_bytes(void *buf, size_t len) +int csprng_bytes(void *buf, size_t len, MAYBE_UNUSED unsigned flags) { #if defined(HAVE_ARC4RANDOM) || defined(HAVE_ARC4RANDOM_LIBBSD) /* This function never returns an error. */ @@ -785,14 +785,18 @@ int csprng_bytes(void *buf, size_t len) return -1; return 0; #elif defined(HAVE_OPENSSL_CSPRNG) - int res = RAND_bytes(buf, len); - if (res == 1) + switch (RAND_pseudo_bytes(buf, len)) { + case 1: return 0; - if (res == -1) - errno = ENOTSUP; - else + case 0: + if (flags & CSPRNG_BYTES_INSECURE) + return 0; errno = EIO; - return -1; + return -1; + default: + errno = ENOTSUP; + return -1; + } #else ssize_t res; char *p = buf; @@ -816,11 +820,11 @@ int csprng_bytes(void *buf, size_t len) #endif } -uint32_t git_rand(void) +uint32_t git_rand(unsigned flags) { uint32_t result; - if (csprng_bytes(&result, sizeof(result)) < 0) + if (csprng_bytes(&result, sizeof(result), flags) < 0) die(_("unable to get random bytes")); return result; diff --git a/wrapper.h b/wrapper.h index a6b3e1f09ecdef..7df824e34a906e 100644 --- a/wrapper.h +++ b/wrapper.h @@ -127,18 +127,26 @@ int open_nofollow(const char *path, int flags); void sleep_millisec(int millisec); +enum { + /* + * Accept insecure bytes, which some CSPRNG implementations may return + * in case the entropy pool has been exhausted. + */ + CSPRNG_BYTES_INSECURE = (1 << 0), +}; + /* * Generate len bytes from the system cryptographically secure PRNG. * Returns 0 on success and -1 on error, setting errno. The inability to - * satisfy the full request is an error. + * satisfy the full request is an error. Accepts CSPRNG flags. */ -int csprng_bytes(void *buf, size_t len); +int csprng_bytes(void *buf, size_t len, unsigned flags); /* * Returns a random uint32_t, uniformly distributed across all possible - * values. + * values. Accepts CSPRNG flags. */ -uint32_t git_rand(void); +uint32_t git_rand(unsigned flags); /* Provide log2 of the given `size_t`. */ static inline unsigned log2u(uintmax_t sz) From 0b4f8afef6b744d5aa92883c5a6c1985be67cc7c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 7 Jan 2025 16:27:00 +0100 Subject: [PATCH 052/535] reftable/stack: accept insecure random bytes The reftable library uses randomness in two call paths: - When reading a stack in case some of the referenced tables disappears. The randomness is used to delay the next read by a couple of milliseconds. - When writing a new table, where the randomness gets appended to the table name (e.g. "0x000000000001-0x000000000002-0b1d8ddf.ref"). In neither of these cases do we need strong randomness. Unfortunately though, we have observed test failures caused by the former case. In t0610 we have a test that spawns a 100 processes at once, all of which try to write a new table to the stack. And given that all of the processes will require randomness, it can happen that these processes make the entropy pool run dry, which will then cause us to die: + test_seq 100 + printf %s commit\trefs/heads/branch-%s\n 68d032e9edd3481ac96382786ececc37ec28709e 1 + printf %s commit\trefs/heads/branch-%s\n 68d032e9edd3481ac96382786ececc37ec28709e 2 ... + git update-ref refs/heads/branch-98 HEAD + git update-ref refs/heads/branch-97 HEAD + git update-ref refs/heads/branch-99 HEAD + git update-ref refs/heads/branch-100 HEAD fatal: unable to get random bytes fatal: unable to get random bytes fatal: unable to get random bytes fatal: unable to get random bytes fatal: unable to get random bytes fatal: unable to get random bytes fatal: unable to get random bytes The report was for NonStop, which uses OpenSSL as the backend for randomness. In the preceding commit we have adapted that backend to also return randomness in case the entropy pool is empty and the caller passes the `CSPRNG_BYTES_INSECURE` flag. Do so to fix the issue. Reported-by: Randall S. Becker Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/stack.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/reftable/stack.c b/reftable/stack.c index 6d0aa774e7e29d..572a74e00f9ed6 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -493,7 +493,7 @@ static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st, close(fd); fd = -1; - delay = delay + (delay * rand()) / RAND_MAX + 1; + delay = delay + (delay * git_rand(CSPRNG_BYTES_INSECURE)) / UINT32_MAX + 1; sleep_millisec(delay); } @@ -659,7 +659,7 @@ int reftable_stack_add(struct reftable_stack *st, static int format_name(struct reftable_buf *dest, uint64_t min, uint64_t max) { char buf[100]; - uint32_t rnd = (uint32_t)git_rand(0); + uint32_t rnd = git_rand(CSPRNG_BYTES_INSECURE); snprintf(buf, sizeof(buf), "0x%012" PRIx64 "-0x%012" PRIx64 "-%08x", min, max, rnd); reftable_buf_reset(dest); From ca7158076f9f6e0ee1c84595aaf44194a9880a72 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 7 Jan 2025 10:29:15 -0600 Subject: [PATCH 053/535] fsck: reject misconfigured fsck.skipList In Git, fsck operations can ignore known broken objects via the `fsck.skipList` configuration. This option expects a path to a file with the list of object names. When the configuration is specified without a path, an error message is printed, but the command continues as if the configuration was not set. Configuring `fsck.skipList` without a value is a misconfiguration so config parsing should be more strict and reject it. Update `git_fsck_config()` to no longer ignore misconfiguration of `fsck.skipList`. The same behavior is also present for `fetch.fsck.skipList` and `receive.fsck.skipList` so the configuration parsers for these are updated to ensure the related operations remain consistent. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- builtin/receive-pack.c | 2 +- fetch-pack.c | 2 +- fsck.c | 2 +- t/t5504-fetch-receive-strict.sh | 10 ++++++++++ 4 files changed, 13 insertions(+), 3 deletions(-) diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index c2e9103f112c1e..0158faf537b7a7 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -174,7 +174,7 @@ static int receive_pack_config(const char *var, const char *value, char *path; if (git_config_pathname(&path, var, value)) - return 1; + return -1; strbuf_addf(&fsck_msg_types, "%cskiplist=%s", fsck_msg_types.len ? ',' : '=', path); free(path); diff --git a/fetch-pack.c b/fetch-pack.c index 3a227721ed0935..055e8c3643015c 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -1867,7 +1867,7 @@ int fetch_pack_fsck_config(const char *var, const char *value, char *path ; if (git_config_pathname(&path, var, value)) - return 0; + return -1; strbuf_addf(msg_types, "%cskiplist=%s", msg_types->len ? ',' : '=', path); free(path); diff --git a/fsck.c b/fsck.c index 87ce999a49c548..9fc4c25ffd59ba 100644 --- a/fsck.c +++ b/fsck.c @@ -1353,7 +1353,7 @@ int git_fsck_config(const char *var, const char *value, struct strbuf sb = STRBUF_INIT; if (git_config_pathname(&path, var, value)) - return 1; + return -1; strbuf_addf(&sb, "skiplist=%s", path); free(path); fsck_set_msg_types(options, sb.buf); diff --git a/t/t5504-fetch-receive-strict.sh b/t/t5504-fetch-receive-strict.sh index 8212a70be8ae39..e273ab29c7f88f 100755 --- a/t/t5504-fetch-receive-strict.sh +++ b/t/t5504-fetch-receive-strict.sh @@ -167,6 +167,8 @@ test_expect_success 'fsck with unsorted skipList' ' test_expect_success 'fsck with invalid or bogus skipList input' ' git -c fsck.skipList=/dev/null -c fsck.missingEmail=ignore fsck && + test_must_fail git -c fsck.skipList -c fsck.missingEmail=ignore fsck 2>err && + test_grep "unable to parse '\'fsck.skiplist\'' from command-line config" err && test_must_fail git -c fsck.skipList=does-not-exist -c fsck.missingEmail=ignore fsck 2>err && test_grep "could not open.*: does-not-exist" err && test_must_fail git -c fsck.skipList=.git/config -c fsck.missingEmail=ignore fsck 2>err && @@ -213,6 +215,11 @@ test_expect_success 'fsck with exhaustive accepted skipList input (various types test_must_be_empty err ' +test_expect_success 'receive-pack with missing receive.fsck.skipList path' ' + test_must_fail git -c receive.fsck.skipList receive-pack dst 2>err && + test_grep "unable to parse '\'receive.fsck.skiplist\'' from command-line config" err +' + test_expect_success 'push with receive.fsck.skipList' ' git push . $commit:refs/heads/bogus && rm -rf dst && @@ -255,6 +262,9 @@ test_expect_success 'fetch with fetch.fsck.skipList' ' test_must_fail git --git-dir=dst/.git fetch "file://$(pwd)" $refspec && # Invalid and/or bogus skipList input + test_must_fail git --git-dir=dst/.git -c fetch.fsck.skipList fetch \ + "file://$(pwd)" $refspec 2>err && + test_grep "unable to parse '\'fetch.fsck.skiplist\'' from command-line config" err && git --git-dir=dst/.git config fetch.fsck.skipList /dev/null && test_must_fail git --git-dir=dst/.git fetch "file://$(pwd)" $refspec && git --git-dir=dst/.git config fetch.fsck.skipList does-not-exist && From 6a63995335e7a941d34bdafb93a56f789ebeed75 Mon Sep 17 00:00:00 2001 From: Matthew Hughes Date: Tue, 7 Jan 2025 21:24:21 +0000 Subject: [PATCH 054/535] docs: fix typesetting of merge driver placeholders Following the `CodingGuidlines`, since these placeholders are literal they should be typeset verbatim, so fix some that aren't. Signed-off-by: Matthew Hughes Signed-off-by: Junio C Hamano --- Documentation/gitattributes.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt index e6150595af8210..5d12b785491fba 100644 --- a/Documentation/gitattributes.txt +++ b/Documentation/gitattributes.txt @@ -1166,7 +1166,7 @@ internal merge and the final merge. The merge driver can learn the pathname in which the merged result will be stored via placeholder `%P`. The conflict labels to be used for the common ancestor, local head and other head can be passed by -using '%S', '%X' and '%Y` respectively. +using `%S`, `%X` and `%Y` respectively. `conflict-marker-size` ^^^^^^^^^^^^^^^^^^^^^^ From 43850dcf9c4ca6407abdd167aa3acc098e0e0f7c Mon Sep 17 00:00:00 2001 From: Seyi Kuforiji Date: Thu, 9 Jan 2025 15:09:52 +0100 Subject: [PATCH 055/535] t/unit-tests: convert hash to use clar test framework Adapt the hash test functions to clar framework by using clar assertions where necessary. Following the consensus to convert the unit-tests scripts found in the t/unit-tests folder to clar driven by Patrick Steinhardt. Test functions are structured as a standalone to test individual hash string and literal case. Mentored-by: Patrick Steinhardt Signed-off-by: Seyi Kuforiji Signed-off-by: Junio C Hamano --- Makefile | 2 +- t/meson.build | 2 +- t/unit-tests/{t-hash.c => u-hash.c} | 71 +++++++++++++++++++---------- 3 files changed, 50 insertions(+), 25 deletions(-) rename t/unit-tests/{t-hash.c => u-hash.c} (80%) diff --git a/Makefile b/Makefile index 97e8385b6643b9..d3011e30f73266 100644 --- a/Makefile +++ b/Makefile @@ -1338,6 +1338,7 @@ THIRD_PARTY_SOURCES += $(UNIT_TEST_DIR)/clar/% THIRD_PARTY_SOURCES += $(UNIT_TEST_DIR)/clar/clar/% CLAR_TEST_SUITES += u-ctype +CLAR_TEST_SUITES += u-hash CLAR_TEST_SUITES += u-strvec CLAR_TEST_PROG = $(UNIT_TEST_BIN)/unit-tests$(X) CLAR_TEST_OBJS = $(patsubst %,$(UNIT_TEST_DIR)/%.o,$(CLAR_TEST_SUITES)) @@ -1345,7 +1346,6 @@ CLAR_TEST_OBJS += $(UNIT_TEST_DIR)/clar/clar.o CLAR_TEST_OBJS += $(UNIT_TEST_DIR)/unit-test.o UNIT_TEST_PROGRAMS += t-example-decorate -UNIT_TEST_PROGRAMS += t-hash UNIT_TEST_PROGRAMS += t-hashmap UNIT_TEST_PROGRAMS += t-mem-pool UNIT_TEST_PROGRAMS += t-oid-array diff --git a/t/meson.build b/t/meson.build index 602ebfe6a26021..7b35eadbc8a99e 100644 --- a/t/meson.build +++ b/t/meson.build @@ -1,5 +1,6 @@ clar_test_suites = [ 'unit-tests/u-ctype.c', + 'unit-tests/u-hash.c', 'unit-tests/u-strvec.c', ] @@ -41,7 +42,6 @@ test('unit-tests', clar_unit_tests) unit_test_programs = [ 'unit-tests/t-example-decorate.c', - 'unit-tests/t-hash.c', 'unit-tests/t-hashmap.c', 'unit-tests/t-mem-pool.c', 'unit-tests/t-oid-array.c', diff --git a/t/unit-tests/t-hash.c b/t/unit-tests/u-hash.c similarity index 80% rename from t/unit-tests/t-hash.c rename to t/unit-tests/u-hash.c index e62647019bcd47..a0320efe4b88e0 100644 --- a/t/unit-tests/t-hash.c +++ b/t/unit-tests/u-hash.c @@ -1,14 +1,11 @@ -#include "test-lib.h" +#include "unit-test.h" #include "hex.h" #include "strbuf.h" static void check_hash_data(const void *data, size_t data_length, const char *expected_hashes[]) { - if (!check(data != NULL)) { - test_msg("BUG: NULL data pointer provided"); - return; - } + cl_assert(data != NULL); for (size_t i = 1; i < ARRAY_SIZE(hash_algos); i++) { git_hash_ctx ctx; @@ -19,66 +16,94 @@ static void check_hash_data(const void *data, size_t data_length, algop->update_fn(&ctx, data, data_length); algop->final_fn(hash, &ctx); - if (!check_str(hash_to_hex_algop(hash, algop), expected_hashes[i - 1])) - test_msg("result does not match with the expected for %s\n", hash_algos[i].name); + cl_assert_equal_s(hash_to_hex_algop(hash,algop), expected_hashes[i - 1]); } } /* Works with a NUL terminated string. Doesn't work if it should contain a NUL character. */ #define TEST_HASH_STR(data, expected_sha1, expected_sha256) do { \ const char *expected_hashes[] = { expected_sha1, expected_sha256 }; \ - TEST(check_hash_data(data, strlen(data), expected_hashes), \ - "SHA1 and SHA256 (%s) works", #data); \ + check_hash_data(data, strlen(data), expected_hashes); \ } while (0) /* Only works with a literal string, useful when it contains a NUL character. */ #define TEST_HASH_LITERAL(literal, expected_sha1, expected_sha256) do { \ const char *expected_hashes[] = { expected_sha1, expected_sha256 }; \ - TEST(check_hash_data(literal, (sizeof(literal) - 1), expected_hashes), \ - "SHA1 and SHA256 (%s) works", #literal); \ + check_hash_data(literal, (sizeof(literal) - 1), expected_hashes); \ } while (0) -int cmd_main(int argc UNUSED, const char **argv UNUSED) +void test_hash__empty_string(void) { - struct strbuf aaaaaaaaaa_100000 = STRBUF_INIT; - struct strbuf alphabet_100000 = STRBUF_INIT; - - strbuf_addstrings(&aaaaaaaaaa_100000, "aaaaaaaaaa", 100000); - strbuf_addstrings(&alphabet_100000, "abcdefghijklmnopqrstuvwxyz", 100000); - TEST_HASH_STR("", "da39a3ee5e6b4b0d3255bfef95601890afd80709", "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"); +} + +void test_hash__single_character(void) +{ TEST_HASH_STR("a", "86f7e437faa5a7fce15d1ddcb9eaeaea377667b8", "ca978112ca1bbdcafac231b39a23dc4da786eff8147c4e72b9807785afee48bb"); +} + +void test_hash__multi_character(void) +{ TEST_HASH_STR("abc", "a9993e364706816aba3e25717850c26c9cd0d89d", "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"); +} + +void test_hash__message_digest(void) +{ TEST_HASH_STR("message digest", "c12252ceda8be8994d5fa0290a47231c1d16aae3", "f7846f55cf23e14eebeab5b4e1550cad5b509e3348fbc4efa3a1413d393cb650"); +} + +void test_hash__alphabet(void) +{ TEST_HASH_STR("abcdefghijklmnopqrstuvwxyz", "32d10c7b8cf96570ca04ce37f2a19d84240d3a89", "71c480df93d6ae2f1efad1447c66c9525e316218cf51fc8d9ed832f2daf18b73"); +} + +void test_hash__aaaaaaaaaa_100000(void) +{ + struct strbuf aaaaaaaaaa_100000 = STRBUF_INIT; + strbuf_addstrings(&aaaaaaaaaa_100000, "aaaaaaaaaa", 100000); TEST_HASH_STR(aaaaaaaaaa_100000.buf, "34aa973cd4c4daa4f61eeb2bdbad27316534016f", "cdc76e5c9914fb9281a1c7e284d73e67f1809a48a497200e046d39ccc7112cd0"); + strbuf_release(&aaaaaaaaaa_100000); +} + +void test_hash__alphabet_100000(void) +{ + struct strbuf alphabet_100000 = STRBUF_INIT; + strbuf_addstrings(&alphabet_100000, "abcdefghijklmnopqrstuvwxyz", 100000); TEST_HASH_STR(alphabet_100000.buf, "e7da7c55b3484fdf52aebec9cbe7b85a98f02fd4", "e406ba321ca712ad35a698bf0af8d61fc4dc40eca6bdcea4697962724ccbde35"); + strbuf_release(&alphabet_100000); +} + +void test_hash__zero_blob_literal(void) +{ TEST_HASH_LITERAL("blob 0\0", "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391", "473a0f4c3be8a93681a267e3b1e9a7dcda1185436fe141f7749120a303721813"); +} + +void test_hash__three_blob_literal(void) +{ TEST_HASH_LITERAL("blob 3\0abc", "f2ba8f84ab5c1bce84a7b441cb1959cfc7093b7f", "c1cf6e465077930e88dc5136641d402f72a229ddd996f627d60e9639eaba35a6"); +} + +void test_hash__zero_tree_literal(void) +{ TEST_HASH_LITERAL("tree 0\0", "4b825dc642cb6eb9a060e54bf8d69288fbee4904", "6ef19b41225c5369f1c104d45d8d85efa9b057b53b14b4b9b939dd74decc5321"); - - strbuf_release(&aaaaaaaaaa_100000); - strbuf_release(&alphabet_100000); - - return test_done(); } From 21e1b4486586d3a15d2d7bf0479e77636359b816 Mon Sep 17 00:00:00 2001 From: Adam Johnson Date: Wed, 8 Jan 2025 23:35:23 +0000 Subject: [PATCH 056/535] difftool docs: restore correct position of tool list 2a9dfdf260 (difftool docs: de-duplicate configuration sections, 2022-09-07) moved the difftool documentation, but missed moving this "include" line that includes the generated list of diff tools, as referenced in the moved text. Restore the correct position of the included list. Signed-off-by: Adam Johnson Signed-off-by: Junio C Hamano --- Documentation/config/diff.txt | 2 -- Documentation/config/difftool.txt | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/config/diff.txt b/Documentation/config/diff.txt index fdae13a2122935..1135a62a0ad3de 100644 --- a/Documentation/config/diff.txt +++ b/Documentation/config/diff.txt @@ -218,8 +218,6 @@ endif::git-diff[] Set this option to `true` to make the diff driver cache the text conversion outputs. See linkgit:gitattributes[5] for details. -include::{build_dir}/mergetools-diff.txt[] - `diff.indentHeuristic`:: Set this option to `false` to disable the default heuristics that shift diff hunk boundaries to make patches easier to read. diff --git a/Documentation/config/difftool.txt b/Documentation/config/difftool.txt index 447c40d85a289d..6cd47331a91077 100644 --- a/Documentation/config/difftool.txt +++ b/Documentation/config/difftool.txt @@ -13,6 +13,8 @@ diff.guitool:: and requires that a corresponding difftool..cmd variable is defined. +include::{build_dir}/mergetools-diff.txt[] + difftool..cmd:: Specify the command to invoke the specified diff tool. The specified command is evaluated in shell with the following From 949bb8f74f6db7405d6ad8bbf02ebc42a947801d Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 9 Jan 2025 03:28:18 -0500 Subject: [PATCH 057/535] run_diff_files(): delay allocation of combine_diff_path While looping over the index entries, when we see a higher level stage the first thing we do is allocate a combine_diff_path struct for it. But this can leak; if check_removed() returns an error, we'll continue to the next iteration of the loop without cleaning up. We can fix this by just delaying the allocation by a few lines. I don't think this leak is triggered in the test suite, but it's pretty easy to see by inspection. My ulterior motive here is that the delayed allocation means we have all of the data needed to initialize "dpath" at the time of malloc, making it easier to factor out a constructor function. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- diff-lib.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/diff-lib.c b/diff-lib.c index c6d3bc4d37d557..85b8f1fa596ed3 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -156,18 +156,6 @@ void run_diff_files(struct rev_info *revs, unsigned int option) size_t path_len; struct stat st; - path_len = ce_namelen(ce); - - dpath = xmalloc(combine_diff_path_size(5, path_len)); - dpath->path = (char *) &(dpath->parent[5]); - - dpath->next = NULL; - memcpy(dpath->path, ce->name, path_len); - dpath->path[path_len] = '\0'; - oidclr(&dpath->oid, the_repository->hash_algo); - memset(&(dpath->parent[0]), 0, - sizeof(struct combine_diff_parent)*5); - changed = check_removed(ce, &st); if (!changed) wt_mode = ce_mode_from_stat(ce, st.st_mode); @@ -178,7 +166,19 @@ void run_diff_files(struct rev_info *revs, unsigned int option) } wt_mode = 0; } + + path_len = ce_namelen(ce); + + dpath = xmalloc(combine_diff_path_size(5, path_len)); + dpath->path = (char *) &(dpath->parent[5]); + + dpath->next = NULL; + memcpy(dpath->path, ce->name, path_len); + dpath->path[path_len] = '\0'; + oidclr(&dpath->oid, the_repository->hash_algo); dpath->mode = wt_mode; + memset(&(dpath->parent[0]), 0, + sizeof(struct combine_diff_parent)*5); while (i < entries) { struct cache_entry *nce = istate->cache[i]; From 706779344155823518745a19515601905877c41f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 9 Jan 2025 03:32:36 -0500 Subject: [PATCH 058/535] combine-diff: add combine_diff_path_new() The combine_diff_path struct has variable size, since it embeds both the memory allocation for the path field as well as a variable-sized parent array. This makes allocating one a bit tricky. We have a helper to compute the required size, but it's up to individual sites to actually initialize all of the fields. Let's provide a constructor function to make that a little nicer. Besides being shorter, it also hides away tricky bits like the computation of the "path" pointer (which is right after the "parent" flex array). As a bonus, using the same constructor everywhere means that we'll consistently initialize all parts of the struct. A few code paths left the parent array unitialized. This didn't cause any bugs, but we'll be able to simplify some code in the next few patches knowing that the parent fields have all been zero'd. This also gets rid of some questionable uses of "int" to store buffer lengths. Though we do use them to allocate, I don't think there are any integer overflow vulnerabilities here (the allocation helper promotes them to size_t and checks arithmetic for overflow, and the actual memcpy of the bytes is done using the possibly-truncated "int" value). Sadly we can't use the FLEX_* macros to simplify the allocation here, because there are two variable-sized parts to the struct (and those macros only handle one). Nor can we get stop publicly declaring combine_diff_path_size(). This patch does not touch the code in path_appendnew() at all, which is not ready to be moved to our new constructor for a few reasons: - path_appendnew() has a memory-reuse optimization where it tries to reuse combine_diff_path structs rather than freeing and reallocating. - path_appendnew() does not create the struct from a single path string, but rather allocates and copies into the buffer from multiple sources. These can be addressed by some refactoring, but let's leave it as-is for now. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- combine-diff.c | 40 ++++++++++++++++++++++++++-------------- diff-lib.c | 29 ++++++----------------------- diff.h | 5 +++++ 3 files changed, 37 insertions(+), 37 deletions(-) diff --git a/combine-diff.c b/combine-diff.c index 641bc92dbdbc13..45548fd438321f 100644 --- a/combine-diff.c +++ b/combine-diff.c @@ -47,22 +47,13 @@ static struct combine_diff_path *intersect_paths( if (!n) { for (i = 0; i < q->nr; i++) { - int len; - const char *path; if (diff_unmodified_pair(q->queue[i])) continue; - path = q->queue[i]->two->path; - len = strlen(path); - p = xmalloc(combine_diff_path_size(num_parent, len)); - p->path = (char *) &(p->parent[num_parent]); - memcpy(p->path, path, len); - p->path[len] = 0; - p->next = NULL; - memset(p->parent, 0, - sizeof(p->parent[0]) * num_parent); - - oidcpy(&p->oid, &q->queue[i]->two->oid); - p->mode = q->queue[i]->two->mode; + p = combine_diff_path_new(q->queue[i]->two->path, + strlen(q->queue[i]->two->path), + q->queue[i]->two->mode, + &q->queue[i]->two->oid, + num_parent); oidcpy(&p->parent[n].oid, &q->queue[i]->one->oid); p->parent[n].mode = q->queue[i]->one->mode; p->parent[n].status = q->queue[i]->status; @@ -1667,3 +1658,24 @@ void diff_tree_combined_merge(const struct commit *commit, diff_tree_combined(&commit->object.oid, &parents, rev); oid_array_clear(&parents); } + +struct combine_diff_path *combine_diff_path_new(const char *path, + size_t path_len, + unsigned int mode, + const struct object_id *oid, + size_t num_parents) +{ + struct combine_diff_path *p; + + p = xmalloc(combine_diff_path_size(num_parents, path_len)); + p->path = (char *)&(p->parent[num_parents]); + memcpy(p->path, path, path_len); + p->path[path_len] = 0; + p->next = NULL; + p->mode = mode; + oidcpy(&p->oid, oid); + + memset(p->parent, 0, sizeof(p->parent[0]) * num_parents); + + return p; +} diff --git a/diff-lib.c b/diff-lib.c index 85b8f1fa596ed3..471ef996146bb8 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -153,7 +153,6 @@ void run_diff_files(struct rev_info *revs, unsigned int option) struct diff_filepair *pair; unsigned int wt_mode = 0; int num_compare_stages = 0; - size_t path_len; struct stat st; changed = check_removed(ce, &st); @@ -167,18 +166,8 @@ void run_diff_files(struct rev_info *revs, unsigned int option) wt_mode = 0; } - path_len = ce_namelen(ce); - - dpath = xmalloc(combine_diff_path_size(5, path_len)); - dpath->path = (char *) &(dpath->parent[5]); - - dpath->next = NULL; - memcpy(dpath->path, ce->name, path_len); - dpath->path[path_len] = '\0'; - oidclr(&dpath->oid, the_repository->hash_algo); - dpath->mode = wt_mode; - memset(&(dpath->parent[0]), 0, - sizeof(struct combine_diff_parent)*5); + dpath = combine_diff_path_new(ce->name, ce_namelen(ce), + wt_mode, null_oid(), 5); while (i < entries) { struct cache_entry *nce = istate->cache[i]; @@ -405,16 +394,10 @@ static int show_modified(struct rev_info *revs, if (revs->combine_merges && !cached && (!oideq(oid, &old_entry->oid) || !oideq(&old_entry->oid, &new_entry->oid))) { struct combine_diff_path *p; - int pathlen = ce_namelen(new_entry); - - p = xmalloc(combine_diff_path_size(2, pathlen)); - p->path = (char *) &p->parent[2]; - p->next = NULL; - memcpy(p->path, new_entry->name, pathlen); - p->path[pathlen] = 0; - p->mode = mode; - oidclr(&p->oid, the_repository->hash_algo); - memset(p->parent, 0, 2 * sizeof(struct combine_diff_parent)); + + p = combine_diff_path_new(new_entry->name, + ce_namelen(new_entry), + mode, null_oid(), 2); p->parent[0].status = DIFF_STATUS_MODIFIED; p->parent[0].mode = new_entry->ce_mode; oidcpy(&p->parent[0].oid, &new_entry->oid); diff --git a/diff.h b/diff.h index 6e6007c17b01ad..5cddd5a8706e40 100644 --- a/diff.h +++ b/diff.h @@ -486,6 +486,11 @@ struct combine_diff_path { #define combine_diff_path_size(n, l) \ st_add4(sizeof(struct combine_diff_path), (l), 1, \ st_mult(sizeof(struct combine_diff_parent), (n))) +struct combine_diff_path *combine_diff_path_new(const char *path, + size_t path_len, + unsigned int mode, + const struct object_id *oid, + size_t num_parents); void show_combined_diff(struct combine_diff_path *elem, int num_parent, struct rev_info *); From 5173099aae25bedf7a87225891d124569cba7076 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 9 Jan 2025 03:33:10 -0500 Subject: [PATCH 059/535] tree-diff: clear parent array in path_appendnew() All of the other functions which allocate a combine_diff_path struct zero out the parent array, but this code path does not. There's no bug, since our caller will fill in most of the fields. But leaving the unused fields (like combine_diff_parent.path) uninitialized makes working with the struct more error-prone than it needs to be. Let's just zero the parent field to be consistent with the combine_diff_path_new() allocator. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- tree-diff.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tree-diff.c b/tree-diff.c index d9237ffd9bf6fe..24f7b5912c91f7 100644 --- a/tree-diff.c +++ b/tree-diff.c @@ -151,8 +151,6 @@ static int emit_diff_first_parent_only(struct diff_options *opt, struct combine_ * process(p); * p = pprev; * ; don't forget to free tail->next in the end - * - * p->parent[] remains uninitialized. */ static struct combine_diff_path *path_appendnew(struct combine_diff_path *last, int nparent, const struct strbuf *base, const char *path, int pathlen, @@ -187,6 +185,8 @@ static struct combine_diff_path *path_appendnew(struct combine_diff_path *last, p->mode = mode; oidcpy(&p->oid, oid ? oid : null_oid()); + memset(p->parent, 0, sizeof(p->parent[0]) * nparent); + return p; } From 3a0599788fd38822dcd2f32de538afdd36a478aa Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 9 Jan 2025 03:42:29 -0500 Subject: [PATCH 060/535] combine-diff: use pointer for parent paths Commit d76ce4f734 (log,diff-tree: add --combined-all-paths option, 2019-02-07) added a "path" field to each combine_diff_parent struct. It's defined as a strbuf, but this is overkill. We never manipulate the buffer beyond inserting a single string into it. And in fact there's a small bug: we zero the parent structs, including the path strbufs. For the 0th parent, we strbuf_init() the strbuf before adding to it. But for subsequent parents, we never do the init. This is technically violating the strbuf API, though the code there is resilient enough to handle this zero'd state. This patch switches us to just store an allocated string pointer. Zeroing it is enough to properly initialize it there (modulo the usual assumption we make that a NULL pointer is all-zeroes). And as a bonus, we can just check for a non-NULL value to see if it is present, rather than repeating the combined_all_paths logic at each site. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- combine-diff.c | 30 +++++++++++------------------- diff.h | 2 +- 2 files changed, 12 insertions(+), 20 deletions(-) diff --git a/combine-diff.c b/combine-diff.c index 45548fd438321f..ae3cbfc699ccc0 100644 --- a/combine-diff.c +++ b/combine-diff.c @@ -60,9 +60,7 @@ static struct combine_diff_path *intersect_paths( if (combined_all_paths && filename_changed(p->parent[n].status)) { - strbuf_init(&p->parent[n].path, 0); - strbuf_addstr(&p->parent[n].path, - q->queue[i]->one->path); + p->parent[n].path = xstrdup(q->queue[i]->one->path); } *tail = p; tail = &p->next; @@ -83,9 +81,7 @@ static struct combine_diff_path *intersect_paths( /* p->path not in q->queue[]; drop it */ *tail = p->next; for (j = 0; j < num_parent; j++) - if (combined_all_paths && - filename_changed(p->parent[j].status)) - strbuf_release(&p->parent[j].path); + free(p->parent[j].path); free(p); continue; } @@ -101,8 +97,7 @@ static struct combine_diff_path *intersect_paths( p->parent[n].status = q->queue[i]->status; if (combined_all_paths && filename_changed(p->parent[n].status)) - strbuf_addstr(&p->parent[n].path, - q->queue[i]->one->path); + p->parent[n].path = xstrdup(q->queue[i]->one->path); tail = &p->next; i++; @@ -987,8 +982,9 @@ static void show_combined_header(struct combine_diff_path *elem, if (rev->combined_all_paths) { for (i = 0; i < num_parent; i++) { - char *path = filename_changed(elem->parent[i].status) - ? elem->parent[i].path.buf : elem->path; + const char *path = elem->parent[i].path ? + elem->parent[i].path : + elem->path; if (elem->parent[i].status == DIFF_STATUS_ADDED) dump_quoted_path("--- ", "", "/dev/null", line_prefix, c_meta, c_reset); @@ -1269,12 +1265,10 @@ static void show_raw_diff(struct combine_diff_path *p, int num_parent, struct re for (i = 0; i < num_parent; i++) if (rev->combined_all_paths) { - if (filename_changed(p->parent[i].status)) - write_name_quoted(p->parent[i].path.buf, stdout, - inter_name_termination); - else - write_name_quoted(p->path, stdout, - inter_name_termination); + const char *path = p->parent[i].path ? + p->parent[i].path : + p->path; + write_name_quoted(path, stdout, inter_name_termination); } write_name_quoted(p->path, stdout, line_termination); } @@ -1636,9 +1630,7 @@ void diff_tree_combined(const struct object_id *oid, struct combine_diff_path *tmp = paths; paths = paths->next; for (i = 0; i < num_parent; i++) - if (rev->combined_all_paths && - filename_changed(tmp->parent[i].status)) - strbuf_release(&tmp->parent[i].path); + free(tmp->parent[i].path); free(tmp); } diff --git a/diff.h b/diff.h index 5cddd5a8706e40..f5f6ea00fb3770 100644 --- a/diff.h +++ b/diff.h @@ -480,7 +480,7 @@ struct combine_diff_path { char status; unsigned int mode; struct object_id oid; - struct strbuf path; + char *path; } parent[FLEX_ARRAY]; }; #define combine_diff_path_size(n, l) \ From 30f7414ca17bc675105d3d731a827778d7367b11 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 9 Jan 2025 03:42:48 -0500 Subject: [PATCH 061/535] diff: add a comment about combine_diff_path.parent.path We only fill in the per-parent "path" field when it differs from what's in combine_diff_path.path (and even then only when the option is appropriate). Let's document that. Suggested-by: Wink Saville Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- diff.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/diff.h b/diff.h index f5f6ea00fb3770..60e7db4ad6cc76 100644 --- a/diff.h +++ b/diff.h @@ -480,6 +480,12 @@ struct combine_diff_path { char status; unsigned int mode; struct object_id oid; + /* + * This per-parent path is filled only when doing a combined + * diff with revs.combined_all_paths set, and only if the path + * differs from the post-image (e.g., a rename or copy). + * Otherwise it is left NULL. + */ char *path; } parent[FLEX_ARRAY]; }; From ca3abe41d71c4789ca00cba0ca2b6c22d67f08a3 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 9 Jan 2025 03:44:21 -0500 Subject: [PATCH 062/535] run_diff_files(): de-mystify the size of combine_diff_path struct We allocate a combine_diff_path struct with space for 5 parents. Why 5? The history is not particularly enlightening. The allocation comes from b4b1550315 (Don't instantiate structures with FAMs., 2006-06-18), which just switched to xmalloc from a stack struct with 5 elements. That struct changed to 5 from 4 in 2454c962fb (combine-diff: show mode changes as well., 2006-02-06), when we also moved from storing raw sha1 bytes to the combine_diff_parent struct. But no explanation is given. That 4 comes from the earliest code in ea726d02e9 (diff-files: -c and --cc options., 2006-01-28). One might guess it is for the 4 stages we can store in the index. But this code path only ever diffs the current state against stages 2 and 3. So we only need two slots. And it's easy to see this is still the case. We fill the parent slots by subtracting 2 from the ce_stage() values, ignoring values below 2. And since ce_stage() is only 2 bits, there are 4 values, and thus we need 2 slots. Let's use the correct value (saving a tiny bit of memory) and add a comment explaining what's going on (saving a tiny bit of programmer brain power). Arguably we could use: 1 + (STAGEMASK >> STAGESHIFT) - 2 which lets the compiler enforce that we will not go out-of-bounds if we see an unexpected value from ce_stage(). But that is more confusing to explain, and the constant "2" is baked into other parts of the function. It is a fundamental constant, not something where somebody might bump a macro and forget to update this code. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- diff-lib.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/diff-lib.c b/diff-lib.c index 471ef996146bb8..353b473ed52e41 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -166,8 +166,13 @@ void run_diff_files(struct rev_info *revs, unsigned int option) wt_mode = 0; } + /* + * Allocate space for two parents, which will come from + * index stages #2 and #3, if present. Below we'll fill + * these from (stage - 2). + */ dpath = combine_diff_path_new(ce->name, ce_namelen(ce), - wt_mode, null_oid(), 5); + wt_mode, null_oid(), 2); while (i < entries) { struct cache_entry *nce = istate->cache[i]; From a8dda1af6ab400d45b7524bc46b64e04d14fc912 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 9 Jan 2025 03:46:49 -0500 Subject: [PATCH 063/535] tree-diff: drop path_appendnew() alloc optimization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we're diffing trees, we create a list of combine_diff_path structs that represent changed paths. We allocate each struct and add it to the list with path_appendnew(), which we then feed to opt->pathchange(). That function tells us whether the path is of interest or not; if not, then we can throw away the struct we allocated. So there's an optimization to avoid extra allocations: instead of throwing away the new entry, we try to reuse it. If it was large enough to store the next path we care about, we can do so. And if not, we fall back to freeing and re-allocating a new struct. This comes from 72441af7c4 (tree-diff: rework diff_tree() to generate diffs for multiparent cases as well, 2014-04-07), where the goal was to have even the 2-parent diff code use the combine-diff infrastructure, but without taking a performance hit. The implementation causes some complexities in the interface (as we store the allocation length inside the "next" pointer), and prevents us from using the regular combine_diff_path_new() constructor. The complexity is mostly contained inside two functions, but it's worth re-evaluating how much it's helping. That commit claims it helps ~1% on generating two-parent diffs in linux.git. Here are the timings I get on the same command today ("old" is the current tip of master, and "new" has this patch applied): Benchmark 1: ./git.old log --raw --no-abbrev --no-renames v3.10..v3.11 Time (mean ± σ): 532.9 ms ± 5.8 ms [User: 472.7 ms, System: 59.6 ms] Range (min … max): 525.9 ms … 543.3 ms 10 runs Benchmark 2: ./git.new log --raw --no-abbrev --no-renames v3.10..v3.11 Time (mean ± σ): 538.3 ms ± 5.7 ms [User: 478.0 ms, System: 59.7 ms] Range (min … max): 528.5 ms … 545.3 ms 10 runs Summary ./git.old log --raw --no-abbrev --no-renames v3.10..v3.11 ran 1.01 ± 0.02 times faster than ./git.new log --raw --no-abbrev --no-renames v3.10..v3.11 So we do end up on average 1% faster, but with 2% of noise. I tried to focus more on diff performance by running the commit traversal separately, like: git rev-list v3.10..v3.11 >in and then timing just the diffs: Benchmark 1: ./git.old diff-tree --stdin -r