From b2aaf4f467f4e1399bd7e2cd523945a266d4abea Mon Sep 17 00:00:00 2001 From: Matthew John Cheetham Date: Wed, 22 Jan 2025 13:30:42 +0000 Subject: [PATCH 1/4] fixup! maintenance: care about gvfs.sharedCache config --- builtin/gc.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/builtin/gc.c b/builtin/gc.c index 555f81cb2b24f0..e7ffd81955ad4b 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1141,7 +1141,7 @@ static int write_loose_object_to_stdin(const struct object_id *oid, return ++(d->count) > d->batch_size; } -static const char *object_dir = NULL; +static const char *shared_object_dir = NULL; static int pack_loose(struct maintenance_run_opts *opts) { @@ -1149,9 +1149,11 @@ static int pack_loose(struct maintenance_run_opts *opts) int result = 0; struct write_loose_object_data data; struct child_process pack_proc = CHILD_PROCESS_INIT; + const char *object_dir = r->objects->odb->path; - if (!object_dir) - object_dir = r->objects->odb->path; + /* If set, use the shared object directory. */ + if (shared_object_dir) + object_dir = shared_object_dir; /* * Do not start pack-objects process @@ -1620,8 +1622,8 @@ static int maintenance_run(int argc, const char **argv, const char *prefix) */ if (!git_config_get_value("gvfs.sharedcache", &tmp_obj_dir) && tmp_obj_dir) { - object_dir = xstrdup(tmp_obj_dir); - setenv(DB_ENVIRONMENT, object_dir, 1); + shared_object_dir = xstrdup(tmp_obj_dir); + setenv(DB_ENVIRONMENT, shared_object_dir, 1); } ret = maintenance_run_tasks(&opts, &cfg); From 385e6205477bbc87ddec3ae72683d3b3f1208400 Mon Sep 17 00:00:00 2001 From: Matthew John Cheetham Date: Wed, 22 Jan 2025 13:57:21 +0000 Subject: [PATCH 2/4] t7900-maintenance.sh: reset config between tests Tests in t7900 assume the state of the `maintenance.strategy` config setting; set/unset by previous tests. Correct this by explictly unsetting and re-setting the config at the start of the tests. Signed-off-by: Matthew John Cheetham --- t/t7900-maintenance.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh index c224c8450c85f5..fa6988a5f8af45 100755 --- a/t/t7900-maintenance.sh +++ b/t/t7900-maintenance.sh @@ -458,6 +458,7 @@ test_expect_success 'invalid --schedule value' ' ' test_expect_success '--schedule inheritance weekly -> daily -> hourly' ' + git config --unset maintenance.strategy && git config maintenance.loose-objects.enabled true && git config maintenance.loose-objects.schedule hourly && git config maintenance.commit-graph.enabled true && From 784e82b88e1ab9a2f767f3dd4021f6406f55b00d Mon Sep 17 00:00:00 2001 From: Matthew John Cheetham Date: Tue, 14 Jan 2025 17:28:31 +0000 Subject: [PATCH 3/4] maintenance: add cache-local-objects maintenance task Introduce a new maintenance task, `cache-local-objects`, that operates on Scalar or VFS for Git repositories with a per-volume, shared object cache (specified by `gvfs.sharedCache`) to migrate packfiles and loose objects from the repository object directory to the shared cache. Older versions of `microsoft/git` incorrectly placed packfiles in the repository object directory instead of the shared cache; this task will help clean up existing clones impacted by that issue. Migration of packfiles involves the following steps for each pack: 1. Hardlink (or copy): a. the .pack file b. the .keep file c. the .rev file 2. Move (or copy + delete) the .idx file 3. Delete/unlink: a. the .pack file b. the .keep file c. the .rev file Moving the index file after the others ensures the pack is not read from the new cache directory until all associated files (rev, keep) exist in the cache directory also. Moving loose objects operates as a move, or copy + delete. Signed-off-by: Matthew John Cheetham --- Documentation/git-maintenance.txt | 8 ++ builtin/gc.c | 190 ++++++++++++++++++++++++++++++ t/t7900-maintenance.sh | 129 ++++++++++++++++++++ 3 files changed, 327 insertions(+) diff --git a/Documentation/git-maintenance.txt b/Documentation/git-maintenance.txt index 6e6651309d3253..b020f4fe7f3c6a 100644 --- a/Documentation/git-maintenance.txt +++ b/Documentation/git-maintenance.txt @@ -69,6 +69,7 @@ task: * `prefetch`: hourly. * `loose-objects`: daily. * `incremental-repack`: daily. +* `cache-local-objects`: weekly. -- + `git maintenance register` will also disable foreground maintenance by @@ -158,6 +159,13 @@ pack-refs:: need to iterate across many references. See linkgit:git-pack-refs[1] for more information. +cache-local-objects:: + The `cache-local-objects` task only operates on Scalar or VFS for Git + repositories (cloned with either `scalar clone` or `gvfs clone`) that + have the `gvfs.sharedCache` configuration setting present. This task + migrates pack files and loose objects from the repository's object + directory in to the shared volume cache. + OPTIONS ------- --auto:: diff --git a/builtin/gc.c b/builtin/gc.c index e7ffd81955ad4b..1d8980683bfce9 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -10,6 +10,7 @@ * Copyright (c) 2006 Shawn O. Pearce */ #define USE_THE_REPOSITORY_VARIABLE +#include "git-compat-util.h" #include "builtin.h" #include "abspath.h" #include "date.h" @@ -41,6 +42,8 @@ #include "hook.h" #include "setup.h" #include "trace2.h" +#include "copy.h" +#include "dir.h" #define FAILED_RUN "failed to run %s" @@ -1347,6 +1350,186 @@ static int maintenance_task_incremental_repack(struct maintenance_run_opts *opts return 0; } +static void link_or_copy_or_die(const char *src, const char *dst) +{ + if (!link(src, dst)) + return; + + /* Use copy operation if src and dst are on different file systems. */ + if (errno != EXDEV) + warning_errno(_("failed to link '%s' to '%s'"), src, dst); + + if (copy_file(dst, src, 0444)) + die_errno(_("failed to copy '%s' to '%s'"), src, dst); +} + +static void rename_or_copy_or_die(const char *src, const char *dst) +{ + if (!rename(src, dst)) + return; + + /* Use copy and delete if src and dst are on different file systems. */ + if (errno != EXDEV) + warning_errno(_("failed to move '%s' to '%s'"), src, dst); + + if (copy_file(dst, src, 0444)) + die_errno(_("failed to copy '%s' to '%s'"), src, dst); + + if (unlink(src)) + die_errno(_("failed to delete '%s'"), src); +} + +static void migrate_pack(const char *srcdir, const char *dstdir, + const char *pack_filename) +{ + size_t basenamelen, srclen, dstlen; + struct strbuf src = STRBUF_INIT, dst = STRBUF_INIT; + struct { + const char *ext; + unsigned move:1; + } files[] = { + {".pack", 0}, + {".keep", 0}, + {".rev", 0}, + {".idx", 1}, /* The index file must be atomically moved last. */ + }; + + trace2_region_enter("maintenance", "migrate_pack", the_repository); + + basenamelen = strlen(pack_filename) - 5; /* .pack */ + strbuf_addstr(&src, srcdir); + strbuf_addch(&src, '/'); + strbuf_add(&src, pack_filename, basenamelen); + strbuf_addstr(&src, ".idx"); + + /* A pack without an index file is not yet ready to be migrated. */ + if (!file_exists(src.buf)) + goto cleanup; + + strbuf_setlen(&src, src.len - 4 /* .idx */); + strbuf_addstr(&dst, dstdir); + strbuf_addch(&dst, '/'); + strbuf_add(&dst, pack_filename, basenamelen); + + srclen = src.len; + dstlen = dst.len; + + /* Move or copy files from the source directory to the destination. */ + for (size_t i = 0; i < ARRAY_SIZE(files); i++) { + strbuf_setlen(&src, srclen); + strbuf_addstr(&src, files[i].ext); + + if (!file_exists(src.buf)) + continue; + + strbuf_setlen(&dst, dstlen); + strbuf_addstr(&dst, files[i].ext); + + if (files[i].move) + rename_or_copy_or_die(src.buf, dst.buf); + else + link_or_copy_or_die(src.buf, dst.buf); + } + + /* + * Now the pack and all associated files exist at the destination we can + * now clean up the files in the source directory. + */ + for (size_t i = 0; i < ARRAY_SIZE(files); i++) { + /* Files that were moved rather than copied have no clean up. */ + if (files[i].move) + continue; + + strbuf_setlen(&src, srclen); + strbuf_addstr(&src, files[i].ext); + + /* Files that never existed in originally have no clean up.*/ + if (!file_exists(src.buf)) + continue; + + if (unlink(src.buf)) + warning_errno(_("failed to delete '%s'"), src.buf); + } + +cleanup: + strbuf_release(&src); + strbuf_release(&dst); + + trace2_region_leave("maintenance", "migrate_pack", the_repository); +} + +static void move_pack_to_shared_cache(const char *full_path, size_t full_path_len, + const char *file_name, void *data) +{ + char *srcdir; + const char *dstdir = (const char *)data; + + /* We only care about the actual pack files here. + * The associated .idx, .keep, .rev files will be copied in tandem + * with the pack file, with the index file being moved last. + * The original locations of the non-index files will only deleted + * once all other files have been copied/moved. + */ + if (!ends_with(file_name, ".pack")) + return; + + srcdir = xstrndup(full_path, full_path_len - strlen(file_name) - 1); + + migrate_pack(srcdir, dstdir, file_name); + + free(srcdir); +} + +static int move_loose_object_to_shared_cache(const struct object_id *oid, + const char *path, + UNUSED void *data) +{ + struct stat st; + struct strbuf dst = STRBUF_INIT; + char *hex = oid_to_hex(oid); + + strbuf_addf(&dst, "%s/%.2s/", shared_object_dir, hex); + + if (stat(dst.buf, &st)) { + if (mkdir(dst.buf, 0777)) + die_errno(_("failed to create directory '%s'"), dst.buf); + } else if (!S_ISDIR(st.st_mode)) + die(_("expected '%s' to be a directory"), dst.buf); + + strbuf_addstr(&dst, hex+2); + rename_or_copy_or_die(path, dst.buf); + + strbuf_release(&dst); + return 0; +} + +static int maintenance_task_cache_local_objs(UNUSED struct maintenance_run_opts *opts, + UNUSED struct gc_config *cfg) +{ + struct strbuf dstdir = STRBUF_INIT; + struct repository *r = the_repository; + + /* This task is only applicable with a VFS/Scalar shared cache. */ + if (!shared_object_dir) + return 0; + + /* If the dest is the same as the local odb path then we do nothing. */ + if (!fspathcmp(r->objects->odb->path, shared_object_dir)) + goto cleanup; + + strbuf_addf(&dstdir, "%s/pack", shared_object_dir); + + for_each_file_in_pack_dir(r->objects->odb->path, move_pack_to_shared_cache, + dstdir.buf); + + for_each_loose_object(move_loose_object_to_shared_cache, NULL, + FOR_EACH_OBJECT_LOCAL_ONLY); + +cleanup: + strbuf_release(&dstdir); + return 0; +} + typedef int maintenance_task_fn(struct maintenance_run_opts *opts, struct gc_config *cfg); @@ -1376,6 +1559,7 @@ enum maintenance_task_label { TASK_GC, TASK_COMMIT_GRAPH, TASK_PACK_REFS, + TASK_CACHE_LOCAL_OBJS, /* Leave as final value */ TASK__COUNT @@ -1412,6 +1596,10 @@ static struct maintenance_task tasks[] = { maintenance_task_pack_refs, pack_refs_condition, }, + [TASK_CACHE_LOCAL_OBJS] = { + "cache-local-objects", + maintenance_task_cache_local_objs, + }, }; static int compare_tasks_by_selection(const void *a_, const void *b_) @@ -1506,6 +1694,8 @@ static void initialize_maintenance_strategy(void) tasks[TASK_LOOSE_OBJECTS].schedule = SCHEDULE_DAILY; tasks[TASK_PACK_REFS].enabled = 1; tasks[TASK_PACK_REFS].schedule = SCHEDULE_WEEKLY; + tasks[TASK_CACHE_LOCAL_OBJS].enabled = 1; + tasks[TASK_CACHE_LOCAL_OBJS].schedule = SCHEDULE_WEEKLY; } } diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh index fa6988a5f8af45..ea523ba58577ad 100755 --- a/t/t7900-maintenance.sh +++ b/t/t7900-maintenance.sh @@ -32,6 +32,25 @@ test_systemd_analyze_verify () { fi } +test_import_packfile () { + printf "blob\ndata <actual && test_grep "usage: git maintenance " actual && @@ -1012,4 +1031,114 @@ test_expect_success 'repacking loose objects is quiet' ' ) ' +test_expect_success 'cache-local-objects task with no shared cache no op' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + + test_commit something && + git config set maintenance.gc.enabled false && + git config set maintenance.cache-local-objects.enabled true && + git config set maintenance.cache-local-objects.auto 1 && + + test_import_packfile && + test_get_packdir_files "*.pack" "*.idx" "*.keep" "*.rev" \ + >files.txt && + test_get_loose_object_files >>files.txt && + + git maintenance run && + while IFS= read -r f; do + test_path_exists $f || exit 1 + done files.txt && + test_get_loose_object_files >>files.txt && + + git maintenance run && + while IFS= read -r f; do + test_path_exists $f || exit 1 + done src.txt && + test_get_loose_object_files >>src.txt && + + rm -f .git/objects/pack/*.rev .git/objects/pack/*.keep && + + sed "s/.git\\/objects\\//..\\/cache\\//" src.txt >dst.txt && + + git maintenance run && + while IFS= read -r f; do + test_path_is_missing $f || exit 1 + done src.txt && + test_get_loose_object_files >>src.txt && + + sed "s/.git\\/objects\\//..\\/cache\\//" src.txt >dst.txt && + + git maintenance run && + while IFS= read -r f; do + test_path_is_missing $f || exit 1 + done Date: Thu, 23 Jan 2025 09:16:19 +0000 Subject: [PATCH 4/4] scalar.c: add cache-local-objects task Add the `cache-local-objects` maintenance task to the list of tasks run by the `scalar run` command. It's often easier for users to run the shorter `scalar run` command than the equivalent `git maintenance` command. Signed-off-by: Matthew John Cheetham --- scalar.c | 1 + 1 file changed, 1 insertion(+) diff --git a/scalar.c b/scalar.c index 92582890c7ceb7..91b3b43dc4a2e8 100644 --- a/scalar.c +++ b/scalar.c @@ -1163,6 +1163,7 @@ static int cmd_run(int argc, const char **argv) { "fetch", "prefetch" }, { "loose-objects", "loose-objects" }, { "pack-files", "incremental-repack" }, + { "cache-local-objects", "cache-local-objects" }, { NULL, NULL } }; struct strbuf buf = STRBUF_INIT;