Skip to content

Commit

Permalink
maintenance: add new cache-local-objects maintenance task (#720)
Browse files Browse the repository at this point in the history
Introduce a new maintenance task, `cache-local-objects`, that operates
on Scalar or VFS for Git repositories with a per-volume, shared object
cache (specified by `gvfs.sharedCache`) to migrate packfiles and loose
objects from the repository object directory to the shared cache.

Older versions of `microsoft/git` incorrectly placed packfiles in the
repository object directory instead of the shared cache; this task will
help clean up existing clones impacted by that issue.

Fixes #716
  • Loading branch information
mjcheetham authored Jan 31, 2025
2 parents 5b0f528 + 03e984b commit b5b340d
Show file tree
Hide file tree
Showing 4 changed files with 336 additions and 5 deletions.
8 changes: 8 additions & 0 deletions Documentation/git-maintenance.txt
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ task:
* `prefetch`: hourly.
* `loose-objects`: daily.
* `incremental-repack`: daily.
* `cache-local-objects`: weekly.
--
+
`git maintenance register` will also disable foreground maintenance by
Expand Down Expand Up @@ -158,6 +159,13 @@ pack-refs::
need to iterate across many references. See linkgit:git-pack-refs[1]
for more information.

cache-local-objects::
The `cache-local-objects` task only operates on Scalar or VFS for Git
repositories (cloned with either `scalar clone` or `gvfs clone`) that
have the `gvfs.sharedCache` configuration setting present. This task
migrates pack files and loose objects from the repository's object
directory in to the shared volume cache.

OPTIONS
-------
--auto::
Expand Down
202 changes: 197 additions & 5 deletions builtin/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
* Copyright (c) 2006 Shawn O. Pearce
*/
#define USE_THE_REPOSITORY_VARIABLE
#include "git-compat-util.h"
#include "builtin.h"
#include "abspath.h"
#include "date.h"
Expand Down Expand Up @@ -41,6 +42,8 @@
#include "hook.h"
#include "setup.h"
#include "trace2.h"
#include "copy.h"
#include "dir.h"

#define FAILED_RUN "failed to run %s"

Expand Down Expand Up @@ -1141,17 +1144,19 @@ static int write_loose_object_to_stdin(const struct object_id *oid,
return ++(d->count) > d->batch_size;
}

static const char *object_dir = NULL;
static const char *shared_object_dir = NULL;

static int pack_loose(struct maintenance_run_opts *opts)
{
struct repository *r = the_repository;
int result = 0;
struct write_loose_object_data data;
struct child_process pack_proc = CHILD_PROCESS_INIT;
const char *object_dir = r->objects->odb->path;

if (!object_dir)
object_dir = r->objects->odb->path;
/* If set, use the shared object directory. */
if (shared_object_dir)
object_dir = shared_object_dir;

/*
* Do not start pack-objects process
Expand Down Expand Up @@ -1345,6 +1350,186 @@ static int maintenance_task_incremental_repack(struct maintenance_run_opts *opts
return 0;
}

static void link_or_copy_or_die(const char *src, const char *dst)
{
if (!link(src, dst))
return;

/* Use copy operation if src and dst are on different file systems. */
if (errno != EXDEV)
warning_errno(_("failed to link '%s' to '%s'"), src, dst);

if (copy_file(dst, src, 0444))
die_errno(_("failed to copy '%s' to '%s'"), src, dst);
}

static void rename_or_copy_or_die(const char *src, const char *dst)
{
if (!rename(src, dst))
return;

/* Use copy and delete if src and dst are on different file systems. */
if (errno != EXDEV)
warning_errno(_("failed to move '%s' to '%s'"), src, dst);

if (copy_file(dst, src, 0444))
die_errno(_("failed to copy '%s' to '%s'"), src, dst);

if (unlink(src))
die_errno(_("failed to delete '%s'"), src);
}

static void migrate_pack(const char *srcdir, const char *dstdir,
const char *pack_filename)
{
size_t basenamelen, srclen, dstlen;
struct strbuf src = STRBUF_INIT, dst = STRBUF_INIT;
struct {
const char *ext;
unsigned move:1;
} files[] = {
{".pack", 0},
{".keep", 0},
{".rev", 0},
{".idx", 1}, /* The index file must be atomically moved last. */
};

trace2_region_enter("maintenance", "migrate_pack", the_repository);

basenamelen = strlen(pack_filename) - 5; /* .pack */
strbuf_addstr(&src, srcdir);
strbuf_addch(&src, '/');
strbuf_add(&src, pack_filename, basenamelen);
strbuf_addstr(&src, ".idx");

/* A pack without an index file is not yet ready to be migrated. */
if (!file_exists(src.buf))
goto cleanup;

strbuf_setlen(&src, src.len - 4 /* .idx */);
strbuf_addstr(&dst, dstdir);
strbuf_addch(&dst, '/');
strbuf_add(&dst, pack_filename, basenamelen);

srclen = src.len;
dstlen = dst.len;

/* Move or copy files from the source directory to the destination. */
for (size_t i = 0; i < ARRAY_SIZE(files); i++) {
strbuf_setlen(&src, srclen);
strbuf_addstr(&src, files[i].ext);

if (!file_exists(src.buf))
continue;

strbuf_setlen(&dst, dstlen);
strbuf_addstr(&dst, files[i].ext);

if (files[i].move)
rename_or_copy_or_die(src.buf, dst.buf);
else
link_or_copy_or_die(src.buf, dst.buf);
}

/*
* Now the pack and all associated files exist at the destination we can
* now clean up the files in the source directory.
*/
for (size_t i = 0; i < ARRAY_SIZE(files); i++) {
/* Files that were moved rather than copied have no clean up. */
if (files[i].move)
continue;

strbuf_setlen(&src, srclen);
strbuf_addstr(&src, files[i].ext);

/* Files that never existed in originally have no clean up.*/
if (!file_exists(src.buf))
continue;

if (unlink(src.buf))
warning_errno(_("failed to delete '%s'"), src.buf);
}

cleanup:
strbuf_release(&src);
strbuf_release(&dst);

trace2_region_leave("maintenance", "migrate_pack", the_repository);
}

static void move_pack_to_shared_cache(const char *full_path, size_t full_path_len,
const char *file_name, void *data)
{
char *srcdir;
const char *dstdir = (const char *)data;

/* We only care about the actual pack files here.
* The associated .idx, .keep, .rev files will be copied in tandem
* with the pack file, with the index file being moved last.
* The original locations of the non-index files will only deleted
* once all other files have been copied/moved.
*/
if (!ends_with(file_name, ".pack"))
return;

srcdir = xstrndup(full_path, full_path_len - strlen(file_name) - 1);

migrate_pack(srcdir, dstdir, file_name);

free(srcdir);
}

static int move_loose_object_to_shared_cache(const struct object_id *oid,
const char *path,
UNUSED void *data)
{
struct stat st;
struct strbuf dst = STRBUF_INIT;
char *hex = oid_to_hex(oid);

strbuf_addf(&dst, "%s/%.2s/", shared_object_dir, hex);

if (stat(dst.buf, &st)) {
if (mkdir(dst.buf, 0777))
die_errno(_("failed to create directory '%s'"), dst.buf);
} else if (!S_ISDIR(st.st_mode))
die(_("expected '%s' to be a directory"), dst.buf);

strbuf_addstr(&dst, hex+2);
rename_or_copy_or_die(path, dst.buf);

strbuf_release(&dst);
return 0;
}

static int maintenance_task_cache_local_objs(UNUSED struct maintenance_run_opts *opts,
UNUSED struct gc_config *cfg)
{
struct strbuf dstdir = STRBUF_INIT;
struct repository *r = the_repository;

/* This task is only applicable with a VFS/Scalar shared cache. */
if (!shared_object_dir)
return 0;

/* If the dest is the same as the local odb path then we do nothing. */
if (!fspathcmp(r->objects->odb->path, shared_object_dir))
goto cleanup;

strbuf_addf(&dstdir, "%s/pack", shared_object_dir);

for_each_file_in_pack_dir(r->objects->odb->path, move_pack_to_shared_cache,
dstdir.buf);

for_each_loose_object(move_loose_object_to_shared_cache, NULL,
FOR_EACH_OBJECT_LOCAL_ONLY);

cleanup:
strbuf_release(&dstdir);
return 0;
}

typedef int maintenance_task_fn(struct maintenance_run_opts *opts,
struct gc_config *cfg);

Expand Down Expand Up @@ -1374,6 +1559,7 @@ enum maintenance_task_label {
TASK_GC,
TASK_COMMIT_GRAPH,
TASK_PACK_REFS,
TASK_CACHE_LOCAL_OBJS,

/* Leave as final value */
TASK__COUNT
Expand Down Expand Up @@ -1410,6 +1596,10 @@ static struct maintenance_task tasks[] = {
maintenance_task_pack_refs,
pack_refs_condition,
},
[TASK_CACHE_LOCAL_OBJS] = {
"cache-local-objects",
maintenance_task_cache_local_objs,
},
};

static int compare_tasks_by_selection(const void *a_, const void *b_)
Expand Down Expand Up @@ -1504,6 +1694,8 @@ static void initialize_maintenance_strategy(void)
tasks[TASK_LOOSE_OBJECTS].schedule = SCHEDULE_DAILY;
tasks[TASK_PACK_REFS].enabled = 1;
tasks[TASK_PACK_REFS].schedule = SCHEDULE_WEEKLY;
tasks[TASK_CACHE_LOCAL_OBJS].enabled = 1;
tasks[TASK_CACHE_LOCAL_OBJS].schedule = SCHEDULE_WEEKLY;
}
}

Expand Down Expand Up @@ -1620,8 +1812,8 @@ static int maintenance_run(int argc, const char **argv, const char *prefix)
*/
if (!git_config_get_value("gvfs.sharedcache", &tmp_obj_dir) &&
tmp_obj_dir) {
object_dir = xstrdup(tmp_obj_dir);
setenv(DB_ENVIRONMENT, object_dir, 1);
shared_object_dir = xstrdup(tmp_obj_dir);
setenv(DB_ENVIRONMENT, shared_object_dir, 1);
}

ret = maintenance_run_tasks(&opts, &cfg);
Expand Down
1 change: 1 addition & 0 deletions scalar.c
Original file line number Diff line number Diff line change
Expand Up @@ -1163,6 +1163,7 @@ static int cmd_run(int argc, const char **argv)
{ "fetch", "prefetch" },
{ "loose-objects", "loose-objects" },
{ "pack-files", "incremental-repack" },
{ "cache-local-objects", "cache-local-objects" },
{ NULL, NULL }
};
struct strbuf buf = STRBUF_INIT;
Expand Down
Loading

0 comments on commit b5b340d

Please sign in to comment.