Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sparse index: integrate with read-tree #426

Merged
merged 9 commits into from
Sep 20, 2021
20 changes: 18 additions & 2 deletions builtin/read-tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -168,12 +168,15 @@ int cmd_read_tree(int argc, const char **argv, const char *cmd_prefix)
argc = parse_options(argc, argv, cmd_prefix, read_tree_options,
read_tree_usage, 0);

hold_locked_index(&lock_file, LOCK_DIE_ON_ERROR);

prefix_set = opts.prefix ? 1 : 0;
if (1 < opts.merge + opts.reset + prefix_set)
die("Which one? -m, --reset, or --prefix?");

prepare_repo_settings(the_repository);
the_repository->settings.command_requires_full_index = 0;

hold_locked_index(&lock_file, LOCK_DIE_ON_ERROR);

/*
* NEEDSWORK
*
Expand Down Expand Up @@ -214,6 +217,9 @@ int cmd_read_tree(int argc, const char **argv, const char *cmd_prefix)
if (opts.merge && !opts.index_only)
setup_work_tree();

if (opts.skip_sparse_checkout)
ensure_full_index(&the_index);

if (opts.merge) {
switch (stage - 1) {
case 0:
Expand All @@ -223,11 +229,21 @@ int cmd_read_tree(int argc, const char **argv, const char *cmd_prefix)
opts.fn = opts.prefix ? bind_merge : oneway_merge;
break;
case 2:
/*
* TODO: update twoway_merge to handle edit/edit conflicts in
* sparse directories.
*/
ensure_full_index(&the_index);
opts.fn = twoway_merge;
opts.initial_checkout = is_cache_unborn();
break;
case 3:
default:
/*
* TODO: update threeway_merge to handle edit/edit conflicts in
* sparse directories.
*/
ensure_full_index(&the_index);
Comment on lines +242 to +246

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My guess is that this case is rare enough that we might not even bother with it. Let's see if git stash ever needs this case, for instance. This ensure_full_index() might be a long-term solution.

Copy link
Author

@vdye vdye Sep 20, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since git stash only needs oneway_diff (single-tree) cases to work, I'll leave the ensure_full_index() calls in for two-way and three-way merges.

opts.fn = threeway_merge;
break;
}
Expand Down
39 changes: 23 additions & 16 deletions cache-tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -800,6 +800,17 @@ int write_index_as_tree(struct object_id *oid, struct index_state *index_state,
return ret;
}

static void prime_cache_tree_sparse_dir(struct repository *r,
struct cache_tree *it,
struct tree *tree,
struct strbuf *tree_path)
{

oidcpy(&it->oid, &tree->object.oid);
it->entry_count = 1;
return;
}

static void prime_cache_tree_rec(struct repository *r,
struct cache_tree *it,
struct tree *tree,
Expand All @@ -812,21 +823,6 @@ static void prime_cache_tree_rec(struct repository *r,

oidcpy(&it->oid, &tree->object.oid);

/*
* If this entry is outside the sparse-checkout cone, then it might be
* a sparse directory entry. Check the index to ensure it is by looking
* for an entry with the exact same name as the tree. If no matching sparse
* entry is found, a staged or conflicted entry is preventing this
* directory from collapsing to a sparse directory entry, so the cache
* tree expansion should continue.
*/
if (r->index->sparse_index &&
!path_in_cone_modesparse_checkout(tree_path->buf, r->index) &&
index_name_pos(r->index, tree_path->buf, tree_path->len) >= 0) {
it->entry_count = 1;
return;
}

init_tree_desc(&desc, tree->buffer, tree->size);
cnt = 0;
while (tree_entry(&desc, &entry)) {
Expand All @@ -846,7 +842,18 @@ static void prime_cache_tree_rec(struct repository *r,
strbuf_add(&subtree_path, entry.path, entry.pathlen);
strbuf_addch(&subtree_path, '/');

prime_cache_tree_rec(r, sub->cache_tree, subtree, &subtree_path);
/*
* If a sparse index is in use, the directory being processed may be
* sparse. To confirm that, we can check whether an entry with that
* exact name exists in the index. If it does, the created subtree
* should be sparse. Otherwise, cache tree expansion should continue
* as normal.
*/
if (r->index->sparse_index &&
index_entry_exists(r->index, subtree_path.buf, subtree_path.len))
prime_cache_tree_sparse_dir(r, sub->cache_tree, subtree, &subtree_path);
else
prime_cache_tree_rec(r, sub->cache_tree, subtree, &subtree_path);
cnt += sub->cache_tree->entry_count;
}
}
Expand Down
10 changes: 10 additions & 0 deletions cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -833,6 +833,16 @@ struct cache_entry *index_file_next_match(struct index_state *istate, struct cac
*/
int index_name_pos(struct index_state *, const char *name, int namelen);

/*
* Determines whether an entry with the given name exists within the
* given index. The return value is 1 if an exact match is found, otherwise
* it is 0. Note that, unlike index_name_pos, this function does not expand
* the index if it is sparse. If an item exists within the full index but it
* is contained within a sparse directory (and not in the sparse index), 0 is
* returned.
*/
int index_entry_exists(struct index_state *, const char *name, int namelen);

/*
* Some functions return the negative complement of an insert position when a
* precise match was not found but a position was found where the entry would
Expand Down
22 changes: 15 additions & 7 deletions read-cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -564,7 +564,10 @@ int cache_name_stage_compare(const char *name1, int len1, int stage1, const char
return 0;
}

static int index_name_stage_pos(struct index_state *istate, const char *name, int namelen, int stage)
static int index_name_stage_pos(struct index_state *istate,
const char *name, int namelen,
int stage,
int search_sparse)
{
int first, last;

Expand All @@ -583,7 +586,7 @@ static int index_name_stage_pos(struct index_state *istate, const char *name, in
first = next+1;
}

if (istate->sparse_index &&
if (search_sparse && istate->sparse_index &&
first > 0) {
/* Note: first <= istate->cache_nr */
struct cache_entry *ce = istate->cache[first - 1];
Expand All @@ -599,7 +602,7 @@ static int index_name_stage_pos(struct index_state *istate, const char *name, in
ce_namelen(ce) < namelen &&
!strncmp(name, ce->name, ce_namelen(ce))) {
ensure_full_index(istate);
return index_name_stage_pos(istate, name, namelen, stage);
return index_name_stage_pos(istate, name, namelen, stage, search_sparse);
}
}

Expand All @@ -608,7 +611,12 @@ static int index_name_stage_pos(struct index_state *istate, const char *name, in

int index_name_pos(struct index_state *istate, const char *name, int namelen)
{
return index_name_stage_pos(istate, name, namelen, 0);
return index_name_stage_pos(istate, name, namelen, 0, 1);
}

int index_entry_exists(struct index_state *istate, const char *name, int namelen)
{
return index_name_stage_pos(istate, name, namelen, 0, 0) >= 0;
}

int remove_index_entry_at(struct index_state *istate, int pos)
Expand Down Expand Up @@ -1235,7 +1243,7 @@ static int has_dir_name(struct index_state *istate,
*/
}

pos = index_name_stage_pos(istate, name, len, stage);
pos = index_name_stage_pos(istate, name, len, stage, 1);
if (pos >= 0) {
/*
* Found one, but not so fast. This could
Expand Down Expand Up @@ -1332,7 +1340,7 @@ static int add_index_entry_with_check(struct index_state *istate, struct cache_e
strcmp(ce->name, istate->cache[istate->cache_nr - 1]->name) > 0)
pos = index_pos_to_insert_pos(istate->cache_nr);
else
pos = index_name_stage_pos(istate, ce->name, ce_namelen(ce), ce_stage(ce));
pos = index_name_stage_pos(istate, ce->name, ce_namelen(ce), ce_stage(ce), 1);

/*
* Cache tree path should be invalidated only after index_name_stage_pos,
Expand Down Expand Up @@ -1374,7 +1382,7 @@ static int add_index_entry_with_check(struct index_state *istate, struct cache_e
if (!ok_to_replace)
return error(_("'%s' appears as both a file and as a directory"),
ce->name);
pos = index_name_stage_pos(istate, ce->name, ce_namelen(ce), ce_stage(ce));
pos = index_name_stage_pos(istate, ce->name, ce_namelen(ce), ce_stage(ce), 1);
pos = -pos-1;
}
return pos + 1;
Expand Down
1 change: 1 addition & 0 deletions t/perf/p2000-sparse-operations.sh
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ test_perf_on_all git commit -a -m A
test_perf_on_all git checkout -f -
test_perf_on_all git reset
test_perf_on_all git reset --hard
test_perf_on_all git read-tree -mu HEAD

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you reorder this to be early in the series, you can then advertise the results of this test in the commit message for the commit that improves the performance.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I moved the commit, but haven't added performance numbers yet (like with the git status change, I'll add those once any outstanding bugs are fixed). In the meantime, I'll keep this open to remind myself.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Performance numbers added to commit message - ~90% reduction in execution time!

test_perf_on_all git checkout-index -f --all
test_perf_on_all git update-index --add --remove
test_perf_on_all git diff
Expand Down
144 changes: 138 additions & 6 deletions t/t1092-sparse-checkout-compatibility.sh
Original file line number Diff line number Diff line change
Expand Up @@ -390,22 +390,26 @@ test_expect_success 'diff --staged' '
test_expect_success 'diff partially-staged' '
init_repos &&

write_script edit-contents <<-\EOF &&
echo text >>$1
EOF

# Add file within cone
test_all_match git sparse-checkout set deep &&
run_on_all 'echo >deep/testfile' &&
run_on_all ../edit-contents deep/testfile &&
test_all_match git add deep/testfile &&
run_on_all 'echo a new line >>deep/testfile' &&
run_on_all ../edit-contents deep/testfile &&

test_all_match git diff &&
test_all_match git diff --staged &&

# Add file outside cone
test_all_match git reset --hard &&
run_on_all mkdir newdirectory &&
run_on_all 'echo >newdirectory/testfile' &&
run_on_all ../edit-contents newdirectory/testfile &&
test_all_match git sparse-checkout set newdirectory &&
test_all_match git add newdirectory/testfile &&
run_on_all 'echo a new line >>newdirectory/testfile' &&
run_on_all ../edit-contents newdirectory/testfile &&
test_all_match git sparse-checkout set &&

test_all_match git diff &&
Expand Down Expand Up @@ -771,6 +775,117 @@ test_expect_success 'update-index --cacheinfo' '
test_sparse_match git status --porcelain=v2
'

test_expect_success 'read-tree --merge with files outside sparse definition' '
init_repos &&

test_all_match git checkout -b test-branch update-folder1 &&
for MERGE_TREES in "base HEAD update-folder2" \
"update-folder1 update-folder2" \
"update-folder2"
do
# Clean up and remove on-disk files
test_all_match git reset --hard HEAD &&
test_sparse_match git sparse-checkout reapply &&

# Although the index matches, without --no-sparse-checkout, outside-of-
# definition files will not exist on disk for sparse checkouts
test_all_match git read-tree -mu $MERGE_TREES &&
test_all_match git status --porcelain=v2 &&
test_path_is_missing sparse-checkout/folder2 &&
test_path_is_missing sparse-index/folder2 &&

test_all_match git read-tree --reset -u HEAD &&
test_all_match git status --porcelain=v2 &&

test_all_match git read-tree -mu --no-sparse-checkout $MERGE_TREES &&
test_all_match git status --porcelain=v2 &&
test_cmp sparse-checkout/folder2/a sparse-index/folder2/a &&
test_cmp sparse-checkout/folder2/a full-checkout/folder2/a || return 1
done
'

test_expect_success 'read-tree --merge with edit/edit conflicts in sparse directories' '
init_repos &&

# Merge of multiple changes to same directory (but not same files) should
# succeed
test_all_match git read-tree -mu base rename-base update-folder1 &&
test_all_match git status --porcelain=v2 &&

test_all_match git reset --hard &&

test_all_match git read-tree -mu rename-base update-folder2 &&
test_all_match git status --porcelain=v2 &&

test_all_match git reset --hard &&

test_all_match test_must_fail git read-tree -mu base update-folder1 rename-out-to-in &&
test_all_match test_must_fail git read-tree -mu rename-out-to-in update-folder1
'

test_expect_success 'read-tree --merge with modified file outside definition' '
init_repos &&

write_script edit-contents <<-\EOF &&
echo text >>$1
EOF

test_all_match git checkout -b test-branch update-folder1 &&
run_on_sparse mkdir -p folder2 &&
run_on_all ../edit-contents folder2/a &&

# With manually-modified file, full-checkout cannot merge, but it is ignored
# in sparse checkouts
test_must_fail git -C full-checkout read-tree -mu update-folder2 &&
test_sparse_match git read-tree -mu update-folder2 &&
test_sparse_match git status --porcelain=v2 &&

# Reset only the sparse checkouts to "undo" the merge. All three checkouts
# now have matching indexes and matching folder2/a on disk.
test_sparse_match git read-tree --reset -u HEAD &&

# When --no-sparse-checkout is specified, sparse checkouts identify the file
# on disk and prevent the merge
test_all_match test_must_fail git read-tree -mu --no-sparse-checkout update-folder2
'

test_expect_success 'read-tree --prefix outside sparse definition' '
init_repos &&

# Cannot read-tree --prefix with a single argument when files exist within
# prefix
test_all_match test_must_fail git read-tree --prefix=folder1/ -u update-folder1 &&

test_all_match git read-tree --prefix=folder2/0 -u rename-base &&
test_path_is_missing sparse-checkout/folder2 &&
test_path_is_missing sparse-index/folder2 &&

test_all_match git read-tree --reset -u HEAD &&
test_all_match git read-tree --prefix=folder2/0 -u --no-sparse-checkout rename-base &&
test_cmp sparse-checkout/folder2/0/a sparse-index/folder2/0/a &&
test_cmp sparse-checkout/folder2/0/a full-checkout/folder2/0/a
'

test_expect_success 'read-tree --merge with directory-file conflicts' '
init_repos &&

test_all_match git checkout -b test-branch rename-base &&

# Although the index matches, without --no-sparse-checkout, outside-of-
# definition files will not exist on disk for sparse checkouts
test_sparse_match git read-tree -mu rename-out-to-out &&
test_sparse_match git status --porcelain=v2 &&
test_path_is_missing sparse-checkout/folder2 &&
test_path_is_missing sparse-index/folder2 &&

test_sparse_match git read-tree --reset -u HEAD &&
test_sparse_match git status --porcelain=v2 &&

test_sparse_match git read-tree -mu --no-sparse-checkout rename-out-to-out &&
test_sparse_match git status --porcelain=v2 &&
test_cmp sparse-checkout/folder2/0/1 sparse-index/folder2/0/1
'

test_expect_success 'merge, cherry-pick, and rebase' '
init_repos &&

Expand Down Expand Up @@ -995,7 +1110,7 @@ test_expect_success 'sparse-index is expanded and converted back' '
init_repos &&

GIT_TRACE2_EVENT="$(pwd)/trace2.txt" GIT_TRACE2_EVENT_NESTING=10 \
git -C sparse-index -c core.fsmonitor="" read-tree -mu HEAD &&
git -C sparse-index -c core.fsmonitor="" mv a b &&
test_region index convert_to_sparse trace2.txt &&
test_region index ensure_full_index trace2.txt
'
Expand Down Expand Up @@ -1050,7 +1165,7 @@ test_expect_success 'sparse-index is not expanded' '
do
echo >>sparse-index/README.md &&
ensure_not_expanded reset --mixed $ref
ensure_not_expanded reset --hard $ref
ensure_not_expanded reset --hard $ref || return 1
done &&

ensure_not_expanded reset --hard update-deep &&
Expand Down Expand Up @@ -1135,6 +1250,23 @@ test_expect_success 'sparse index is not expanded: update-index' '
ensure_not_expanded update-index --add --remove --again
'

test_expect_success 'sparse index is not expanded: read-tree' '
init_repos &&

ensure_not_expanded checkout -b test-branch update-folder1 &&
for MERGE_TREES in "update-folder2"
do
ensure_not_expanded read-tree -mu $MERGE_TREES &&
ensure_not_expanded reset --hard HEAD || return 1
done &&

rm -rf sparse-index/deep/deeper2 &&
ensure_not_expanded add . &&
ensure_not_expanded commit -m "test" &&

ensure_not_expanded read-tree --prefix=deep/deeper2 -u deepest
'

# NEEDSWORK: a sparse-checkout behaves differently from a full checkout
# in this scenario, but it shouldn't.
test_expect_success 'reset mixed and checkout orphan' '
Expand Down
Loading