Skip to content

Commit

Permalink
For linear histories, avoid redoing path lookup work
Browse files Browse the repository at this point in the history
Also, set a fixed and higher pack-cache to double typical pack-decode
performance in mid-sized repositories.
Additionally, normalize the input path.
  • Loading branch information
Byron committed Dec 25, 2024
1 parent 667e626 commit 8196a43
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 20 deletions.
33 changes: 28 additions & 5 deletions gitoxide-core/src/repository/blame.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use gix::bstr::BStr;
use gix::bstr::ByteSlice;
use gix::config::tree;
use std::ffi::OsStr;

pub fn blame_file(
Expand All @@ -7,17 +8,39 @@ pub fn blame_file(
out: impl std::io::Write,
err: Option<&mut dyn std::io::Write>,
) -> anyhow::Result<()> {
repo.object_cache_size_if_unset(repo.compute_object_cache_size_for_tree_diffs(&**repo.index_or_empty()?));
{
let mut config = repo.config_snapshot_mut();
if config.string(&tree::Core::DELTA_BASE_CACHE_LIMIT).is_none() {
config.set_value(&tree::Core::DELTA_BASE_CACHE_LIMIT, "100m")?;
}
}
let index = repo.index_or_empty()?;
repo.object_cache_size_if_unset(repo.compute_object_cache_size_for_tree_diffs(&index));

let file = gix::path::os_str_into_bstr(file)?;
let specs = repo.pathspec(
false,
[file],
true,
&index,
gix::worktree::stack::state::attributes::Source::WorktreeThenIdMapping.adjust_for_bare(repo.is_bare()),
)?;
// TODO: there should be a way to normalize paths without going through patterns, at least in this case maybe?
// `Search` actually sorts patterns by excluding or not, all that can lead to strange results.
let file = specs
.search()
.patterns()
.map(|p| p.path().to_owned())
.next()
.expect("exactly one pattern");

let suspect = repo.head()?.peel_to_commit_in_place()?;
let traverse =
gix::traverse::commit::topo::Builder::from_iters(&repo.objects, [suspect.id], None::<Vec<gix::ObjectId>>)
.with_commit_graph(repo.commit_graph_if_enabled()?)
.build()?;
let mut resource_cache = repo.diff_resource_cache_for_tree_diff()?;
let file_path: &BStr = gix::path::os_str_into_bstr(file)?;

let outcome = gix::blame::file(&repo.objects, traverse, &mut resource_cache, file_path)?;
let outcome = gix::blame::file(&repo.objects, traverse, &mut resource_cache, file.as_bstr())?;
let statistics = outcome.statistics;
write_blame_entries(out, outcome)?;

Expand Down
41 changes: 26 additions & 15 deletions gix-blame/src/file/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,12 @@ where

let mut stats = Statistics::default();
let (mut buf, mut buf2, mut buf3) = (Vec::new(), Vec::new(), Vec::new());
let blamed_file_entry = find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2, &mut stats)?
let blamed_file_entry_id = find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2, &mut stats)?
.ok_or_else(|| Error::FileMissing {
file_path: file_path.to_owned(),
commit_id: suspect,
})?;
let blamed_file_blob = odb.find_blob(&blamed_file_entry.oid, &mut buf)?.data.to_vec();
file_path: file_path.to_owned(),
commit_id: suspect,
})?;
let blamed_file_blob = odb.find_blob(&blamed_file_entry_id, &mut buf)?.data.to_vec();
let num_lines_in_blamed = {
let mut interner = gix_diff::blob::intern::Interner::new(blamed_file_blob.len() / 100);
tokens_for_diffing(&blamed_file_blob)
Expand All @@ -98,6 +98,7 @@ where

let mut out = Vec::new();
let mut diff_state = gix_diff::tree::State::default();
let mut previous_entry: Option<(ObjectId, ObjectId)> = None;
'outer: while let Some(item) = traverse.next() {
if hunks_to_blame.is_empty() {
break;
Expand All @@ -123,15 +124,27 @@ where
continue;
}

let Some(entry) = find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2, &mut stats)? else {
let mut entry = previous_entry
.take()
.filter(|(id, _)| *id == suspect)
.map(|(_, entry)| entry);
if entry.is_none() {
entry = find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2, &mut stats)?;
}

let Some(entry_id) = entry else {
continue;
};

for parent_id in &parent_ids {
if let Some(parent_entry) =
for (pid, parent_id) in parent_ids.iter().enumerate() {
if let Some(parent_entry_id) =
find_path_entry_in_commit(&odb, parent_id, file_path, &mut buf, &mut buf2, &mut stats)?
{
if entry.oid == parent_entry.oid {
let no_change_in_entry = entry_id == parent_entry_id;
if pid == 0 {
previous_entry = Some((*parent_id, parent_entry_id));
}
if no_change_in_entry {
pass_blame_from_to(suspect, *parent_id, &mut hunks_to_blame);
continue 'outer;
}
Expand Down Expand Up @@ -170,10 +183,8 @@ where
// Do nothing under the assumption that this always (or almost always)
// implies that the file comes from a different parent, compared to which
// it was modified, not added.
} else {
if unblamed_to_out_is_done(&mut hunks_to_blame, &mut out, suspect) {
break 'outer;
}
} else if unblamed_to_out_is_done(&mut hunks_to_blame, &mut out, suspect) {
break 'outer;
}
}
gix_diff::tree::recorder::Change::Deletion { .. } => {
Expand Down Expand Up @@ -418,7 +429,7 @@ fn find_path_entry_in_commit(
buf: &mut Vec<u8>,
buf2: &mut Vec<u8>,
stats: &mut Statistics,
) -> Result<Option<gix_object::tree::Entry>, Error> {
) -> Result<Option<ObjectId>, Error> {
let commit_id = odb.find_commit(commit, buf)?.tree();
stats.commits_to_tree += 1;
let tree_iter = odb.find_tree_iter(&commit_id, buf)?;
Expand All @@ -430,7 +441,7 @@ fn find_path_entry_in_commit(
file_path.split(|b| *b == b'/').inspect(|_| stats.trees_decoded += 1),
)?;
stats.trees_decoded -= 1;
Ok(res)
Ok(res.map(|e| e.oid))
}

/// Return an iterator over tokens for use in diffing. These usually lines, but iit's important to unify them
Expand Down

0 comments on commit 8196a43

Please sign in to comment.