diff --git a/Cargo.lock b/Cargo.lock index 28796ddce5a..365fb927fa5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1365,6 +1365,7 @@ dependencies = [ "gix-actor 0.33.1", "gix-archive", "gix-attributes 0.23.1", + "gix-blame", "gix-command", "gix-commitgraph 0.25.1", "gix-config", @@ -1538,7 +1539,19 @@ dependencies = [ name = "gix-blame" version = "0.0.0" dependencies = [ + "gix-diff", + "gix-filter", + "gix-fs 0.12.1", + "gix-hash 0.15.1", + "gix-index 0.37.0", + "gix-object 0.46.1", + "gix-odb", + "gix-ref 0.49.1", "gix-testtools", + "gix-trace 0.1.11", + "gix-traverse 0.43.1", + "gix-worktree 0.38.0", + "thiserror 2.0.3", ] [[package]] diff --git a/README.md b/README.md index 99c26d1f361..a6b5b4db7fd 100644 --- a/README.md +++ b/README.md @@ -139,6 +139,7 @@ is usable to some extent. * [gix-shallow](https://github.com/GitoxideLabs/gitoxide/blob/main/crate-status.md#gix-shallow) * `gitoxide-core` * **very early** _(possibly without any documentation and many rough edges)_ + * [gix-blame](https://github.com/GitoxideLabs/gitoxide/blob/main/crate-status.md#gix-blame) * **idea** _(just a name placeholder)_ * [gix-note](https://github.com/GitoxideLabs/gitoxide/blob/main/crate-status.md#gix-note) * [gix-fetchhead](https://github.com/GitoxideLabs/gitoxide/blob/main/crate-status.md#gix-fetchhead) diff --git a/crate-status.md b/crate-status.md index e42d8402ebf..3274407dd48 100644 --- a/crate-status.md +++ b/crate-status.md @@ -293,7 +293,7 @@ The top-level crate that acts as hub to all functionality provided by the `gix-* * [x] safe with cycles and recursive configurations * [x] multi-line with comments and quotes * **promisor** - * It's vague, but these seems to be like index files allowing to fetch objects from a server on demand. + * It's vague, but these seem to be like index files allowing to fetch objects from a server on demand. * [x] API documentation * [ ] Some examples @@ -361,6 +361,25 @@ Check out the [performance discussion][gix-diff-performance] as well. * [x] API documentation * [ ] Examples +### gix-blame + +* [x] commit-annotations for a single file + - [ ] progress + - [ ] interruptibility + - [ ] streaming +- [ ] support for worktree changes (creates virtual commit on top of `HEAD`) +- [ ] shallow-history support +- [ ] rename tracking (track different paths through history) +- [ ] commits to ignore +- [ ] pass all blame-cornercases (from Git) +* **Performance-Improvements** + * Without the following the performance isn't competitive with Git. + 1. Implement custom graph walk which won't run down parents that don't have the path in question. + 2. Implement access of trees from commit-graph and fill that information into the traversal info by default. + 3. commit-graph with bloom filter, used to quickly check if a commit has a path. +* [x] API documentation + * [ ] Examples + ### gix-traverse Check out the [performance discussion][gix-traverse-performance] as well. diff --git a/gitoxide-core/Cargo.toml b/gitoxide-core/Cargo.toml index 64f2642e7c8..2291c5c4223 100644 --- a/gitoxide-core/Cargo.toml +++ b/gitoxide-core/Cargo.toml @@ -49,7 +49,7 @@ serde = ["gix/serde", "dep:serde_json", "dep:serde", "bytesize/serde"] [dependencies] # deselect everything else (like "performance") as this should be controllable by the parent application. -gix = { version = "^0.69.1", path = "../gix", default-features = false, features = ["merge", "blob-diff", "revision", "mailmap", "excludes", "attributes", "worktree-mutation", "credentials", "interrupt", "status", "dirwalk"] } +gix = { version = "^0.69.1", path = "../gix", default-features = false, features = ["merge", "blob-diff", "blame", "revision", "mailmap", "excludes", "attributes", "worktree-mutation", "credentials", "interrupt", "status", "dirwalk"] } gix-pack-for-configuration-only = { package = "gix-pack", version = "^0.56.0", path = "../gix-pack", default-features = false, features = ["pack-cache-lru-dynamic", "pack-cache-lru-static", "generate", "streaming-input"] } gix-transport-configuration-only = { package = "gix-transport", version = "^0.44.0", path = "../gix-transport", default-features = false } gix-archive-for-configuration-only = { package = "gix-archive", version = "^0.18.0", path = "../gix-archive", optional = true, features = ["tar", "tar_gz"] } diff --git a/gitoxide-core/src/repository/blame.rs b/gitoxide-core/src/repository/blame.rs new file mode 100644 index 00000000000..fea525035fa --- /dev/null +++ b/gitoxide-core/src/repository/blame.rs @@ -0,0 +1,71 @@ +use gix::bstr::ByteSlice; +use gix::config::tree; +use std::ffi::OsStr; + +pub fn blame_file( + mut repo: gix::Repository, + file: &OsStr, + out: impl std::io::Write, + err: Option<&mut dyn std::io::Write>, +) -> anyhow::Result<()> { + { + let mut config = repo.config_snapshot_mut(); + if config.string(&tree::Core::DELTA_BASE_CACHE_LIMIT).is_none() { + config.set_value(&tree::Core::DELTA_BASE_CACHE_LIMIT, "100m")?; + } + } + let index = repo.index_or_empty()?; + repo.object_cache_size_if_unset(repo.compute_object_cache_size_for_tree_diffs(&index)); + + let file = gix::path::os_str_into_bstr(file)?; + let specs = repo.pathspec( + false, + [file], + true, + &index, + gix::worktree::stack::state::attributes::Source::WorktreeThenIdMapping.adjust_for_bare(repo.is_bare()), + )?; + // TODO: there should be a way to normalize paths without going through patterns, at least in this case maybe? + // `Search` actually sorts patterns by excluding or not, all that can lead to strange results. + let file = specs + .search() + .patterns() + .map(|p| p.path().to_owned()) + .next() + .expect("exactly one pattern"); + + let suspect = repo.head()?.peel_to_commit_in_place()?; + let traverse = + gix::traverse::commit::topo::Builder::from_iters(&repo.objects, [suspect.id], None::>) + .with_commit_graph(repo.commit_graph_if_enabled()?) + .build()?; + let mut resource_cache = repo.diff_resource_cache_for_tree_diff()?; + let outcome = gix::blame::file(&repo.objects, traverse, &mut resource_cache, file.as_bstr())?; + let statistics = outcome.statistics; + write_blame_entries(out, outcome)?; + + if let Some(err) = err { + writeln!(err, "{statistics:#?}")?; + } + Ok(()) +} + +fn write_blame_entries(mut out: impl std::io::Write, outcome: gix::blame::Outcome) -> Result<(), std::io::Error> { + for (entry, lines_in_hunk) in outcome.entries_with_lines() { + for ((actual_lno, source_lno), line) in entry + .range_in_blamed_file() + .zip(entry.range_in_source_file()) + .zip(lines_in_hunk) + { + write!( + out, + "{short_id} {line_no} {src_line_no} {line}", + line_no = actual_lno + 1, + src_line_no = source_lno + 1, + short_id = entry.commit_id.to_hex_with_len(8), + )?; + } + } + + Ok(()) +} diff --git a/gitoxide-core/src/repository/mod.rs b/gitoxide-core/src/repository/mod.rs index c9044f99cd9..5b51e5c1ac3 100644 --- a/gitoxide-core/src/repository/mod.rs +++ b/gitoxide-core/src/repository/mod.rs @@ -21,6 +21,7 @@ pub enum PathsOrPatterns { pub mod archive; pub mod cat; pub use cat::function::cat; +pub mod blame; pub mod commit; pub mod config; mod credential; diff --git a/gix-blame/Cargo.toml b/gix-blame/Cargo.toml index de8b8fa22b9..fc4baf3fe48 100644 --- a/gix-blame/Cargo.toml +++ b/gix-blame/Cargo.toml @@ -5,7 +5,7 @@ name = "gix-blame" version = "0.0.0" repository = "https://github.com/GitoxideLabs/gitoxide" license = "MIT OR Apache-2.0" -description = "A crate of the gitoxide project dedicated implementing a 'blame' algorithm" +description = "A crate of the gitoxide project dedicated to implementing a 'blame' algorithm" authors = ["Christoph Rüßler ", "Sebastian Thiel "] edition = "2021" rust-version = "1.65" @@ -14,6 +14,19 @@ rust-version = "1.65" doctest = false [dependencies] +gix-trace = { version = "^0.1.11", path = "../gix-trace" } +gix-diff = { version = "^0.49.0", path = "../gix-diff", default-features = false, features = ["blob"] } +gix-object = { version = "^0.46.0", path = "../gix-object" } +gix-hash = { version = "^0.15.0", path = "../gix-hash" } +gix-worktree = { version = "^0.38.0", path = "../gix-worktree", default-features = false, features = ["attributes"] } +gix-traverse = { version = "^0.43.0", path = "../gix-traverse" } + +thiserror = "2.0.0" [dev-dependencies] +gix-ref = { version = "^0.49.0", path = "../gix-ref" } +gix-filter = { version = "^0.16.0", path = "../gix-filter" } +gix-fs = { version = "^0.12.0", path = "../gix-fs" } +gix-index = { version = "^0.37.0", path = "../gix-index" } +gix-odb = { version = "^0.66.0", path = "../gix-odb" } gix-testtools = { path = "../tests/tools" } diff --git a/gix-blame/src/error.rs b/gix-blame/src/error.rs new file mode 100644 index 00000000000..daedf0aecd7 --- /dev/null +++ b/gix-blame/src/error.rs @@ -0,0 +1,30 @@ +use gix_object::bstr::BString; + +/// The error returned by [file()](crate::file()). +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error("No commit was given")] + EmptyTraversal, + #[error(transparent)] + BlobDiffSetResource(#[from] gix_diff::blob::platform::set_resource::Error), + #[error(transparent)] + BlobDiffPrepare(#[from] gix_diff::blob::platform::prepare_diff::Error), + #[error("The file to blame at '{file_path}' wasn't found in the first commit at {commit_id}")] + FileMissing { + /// The file-path to the object to blame. + file_path: BString, + /// The commit whose tree didn't contain `file_path`. + commit_id: gix_hash::ObjectId, + }, + #[error("Couldn't find commit or tree in the object database")] + FindObject(#[from] gix_object::find::Error), + #[error("Could not find existing blob or commit")] + FindExistingObject(#[from] gix_object::find::existing_object::Error), + #[error("Could not find existing iterator over a tree")] + FindExistingIter(#[from] gix_object::find::existing_iter::Error), + #[error("Failed to obtain the next commit in the commit-graph traversal")] + Traverse(#[source] Box), + #[error(transparent)] + DiffTree(#[from] gix_diff::tree::Error), +} diff --git a/gix-blame/src/file/function.rs b/gix-blame/src/file/function.rs new file mode 100644 index 00000000000..16384638e36 --- /dev/null +++ b/gix-blame/src/file/function.rs @@ -0,0 +1,451 @@ +use super::{process_changes, Change, UnblamedHunk}; +use crate::{BlameEntry, Error, Outcome, Statistics}; +use gix_diff::blob::intern::TokenSource; +use gix_hash::ObjectId; +use gix_object::{bstr::BStr, FindExt}; +use std::num::NonZeroU32; +use std::ops::Range; + +/// Produce a list of consecutive [`BlameEntry`] instances to indicate in which commits the ranges of the file +/// at `traverse[0]:` originated in. +/// +/// ## Paramters +/// +/// * `odb` +/// - Access to database objects, also for used for diffing. +/// - Should have an object cache for good diff performance. +/// * `traverse` +/// - The list of commits from the most recent to prior ones, following all parents sorted +/// by time. +/// - It's paramount that older commits are returned after newer ones. +/// - The first commit returned here is the first eligible commit to be responsible for parts of `file_path`. +/// * `file_path` +/// - A *slash-separated* worktree-relative path to the file to blame. +/// * `resource_cache` +/// - Used for diffing trees. +/// +/// ## The algorithm +/// +/// *For brevity, `HEAD` denotes the starting point of the blame operation. It could be any commit, or even commits that +/// represent the worktree state. +/// We begin with a single *Unblamed Hunk* and a single suspect, usually the `HEAD` commit as the commit containing the +/// *Blamed File*, so that it contains the entire file, with the first commit being a candidate for the entire *Blamed File*. +/// We traverse the commit graph starting at the first suspect, and see if there have been changes to `file_path`. +/// If so, we have found a *Source File* and a *Suspect* commit, and have hunks that represent these changes. +/// Now the *Unblamed Hunk* is split at the boundaries of each matching change, creating a new *Unblamed Hunk* on each side, +/// along with a [`BlameEntry`] to represent the match. +/// This is repeated until there are no non-empty *Unblamed Hunk*s left. +/// +/// At a high level, what we want to do is the following: +/// +/// - get the commit +/// - walk through its parents +/// - for each parent, do a diff and mark lines that don’t have a suspect yet (this is the term +/// used in `libgit2`), but that have been changed in this commit +/// +/// The algorithm in `libgit2` works by going through parents and keeping a linked list of blame +/// suspects. It can be visualized as follows: +// +// <----------------------------------------> +// <---------------><-----------------------> +// <---><----------><-----------------------> +// <---><----------><-------><-----><-------> +// <---><---><-----><-------><-----><-------> +// <---><---><-----><-------><-----><-><-><-> +pub fn file( + odb: impl gix_object::Find + gix_object::FindHeader, + traverse: impl IntoIterator>, + resource_cache: &mut gix_diff::blob::Platform, + file_path: &BStr, +) -> Result +where + E: Into>, +{ + let mut traverse = traverse.into_iter().peekable(); + let Some(Ok(suspect)) = traverse.peek().map(|res| res.as_ref().map(|item| item.id)) else { + return Err(Error::EmptyTraversal); + }; + let _span = gix_trace::coarse!("gix_blame::file()", ?file_path, ?suspect); + + let mut stats = Statistics::default(); + let (mut buf, mut buf2, mut buf3) = (Vec::new(), Vec::new(), Vec::new()); + let blamed_file_entry_id = find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2, &mut stats)? + .ok_or_else(|| Error::FileMissing { + file_path: file_path.to_owned(), + commit_id: suspect, + })?; + let blamed_file_blob = odb.find_blob(&blamed_file_entry_id, &mut buf)?.data.to_vec(); + let num_lines_in_blamed = { + let mut interner = gix_diff::blob::intern::Interner::new(blamed_file_blob.len() / 100); + tokens_for_diffing(&blamed_file_blob) + .tokenize() + .map(|token| interner.intern(token)) + .count() + }; + + // Binary or otherwise empty? + if num_lines_in_blamed == 0 { + return Ok(Outcome::default()); + } + + let mut hunks_to_blame = vec![{ + let range_in_blamed_file = 0..num_lines_in_blamed as u32; + UnblamedHunk { + range_in_blamed_file: range_in_blamed_file.clone(), + suspects: [(suspect, range_in_blamed_file)].into(), + } + }]; + + let mut out = Vec::new(); + let mut diff_state = gix_diff::tree::State::default(); + let mut previous_entry: Option<(ObjectId, ObjectId)> = None; + 'outer: while let Some(item) = traverse.next() { + if hunks_to_blame.is_empty() { + break; + } + let commit = item.map_err(|err| Error::Traverse(err.into()))?; + let suspect = commit.id; + stats.commits_traversed += 1; + + let parent_ids = commit.parent_ids; + if parent_ids.is_empty() { + if traverse.peek().is_none() { + // I’m not entirely sure if this is correct yet. `suspect`, at this point, is the `id` of + // the last `item` that was yielded by `traverse`, so it makes sense to assign the + // remaining lines to it, even though we don’t explicitly check whether that is true + // here. We could perhaps use diff-tree-to-tree to compare `suspect` + // against an empty tree to validate this assumption. + if unblamed_to_out_is_done(&mut hunks_to_blame, &mut out, suspect) { + break 'outer; + } + } + + // There is more, keep looking. + continue; + } + + let mut entry = previous_entry + .take() + .filter(|(id, _)| *id == suspect) + .map(|(_, entry)| entry); + if entry.is_none() { + entry = find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2, &mut stats)?; + } + + let Some(entry_id) = entry else { + continue; + }; + + for (pid, parent_id) in parent_ids.iter().enumerate() { + if let Some(parent_entry_id) = + find_path_entry_in_commit(&odb, parent_id, file_path, &mut buf, &mut buf2, &mut stats)? + { + let no_change_in_entry = entry_id == parent_entry_id; + if pid == 0 { + previous_entry = Some((*parent_id, parent_entry_id)); + } + if no_change_in_entry { + pass_blame_from_to(suspect, *parent_id, &mut hunks_to_blame); + continue 'outer; + } + } + } + + let more_than_one_parent = parent_ids.len() > 1; + for parent_id in parent_ids { + let changes_for_file_path = tree_diff_at_file_path( + &odb, + file_path, + commit.id, + parent_id, + &mut stats, + &mut diff_state, + &mut buf, + &mut buf2, + &mut buf3, + )?; + let Some(modification) = changes_for_file_path else { + if more_than_one_parent { + // None of the changes affected the file we’re currently blaming. + // Copy blame to parent. + for unblamed_hunk in &mut hunks_to_blame { + unblamed_hunk.clone_blame(suspect, parent_id); + } + } else { + pass_blame_from_to(suspect, parent_id, &mut hunks_to_blame); + } + continue; + }; + + match modification { + gix_diff::tree::recorder::Change::Addition { .. } => { + if more_than_one_parent { + // Do nothing under the assumption that this always (or almost always) + // implies that the file comes from a different parent, compared to which + // it was modified, not added. + } else if unblamed_to_out_is_done(&mut hunks_to_blame, &mut out, suspect) { + break 'outer; + } + } + gix_diff::tree::recorder::Change::Deletion { .. } => { + unreachable!("We already found file_path in suspect^{{tree}}, so it can't be deleted") + } + gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => { + let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path, &mut stats)?; + hunks_to_blame = process_changes(&mut out, hunks_to_blame, changes, suspect); + pass_blame_from_to(suspect, parent_id, &mut hunks_to_blame); + } + } + } + if more_than_one_parent { + for unblamed_hunk in &mut hunks_to_blame { + unblamed_hunk.remove_blame(suspect); + } + } + } + + debug_assert_eq!( + hunks_to_blame, + vec![], + "only if there is no portion of the file left we have completed the blame" + ); + + // I don’t know yet whether it would make sense to use a data structure instead that preserves + // order on insertion. + out.sort_by(|a, b| a.start_in_blamed_file.cmp(&b.start_in_blamed_file)); + Ok(Outcome { + entries: coalesce_blame_entries(out), + blob: blamed_file_blob, + statistics: stats, + }) +} + +/// Pass ownership of each unblamed hunk of `from` to `to`. +/// +/// This happens when `from` didn't actually change anything in the blamed file. +fn pass_blame_from_to(from: ObjectId, to: ObjectId, hunks_to_blame: &mut Vec) { + for unblamed_hunk in hunks_to_blame { + unblamed_hunk.pass_blame(from, to); + } +} + +/// Convert each of the unblamed hunk in `hunks_to_blame` into a [`BlameEntry`], consuming them in the process. +/// +/// Return `true` if we are done because `hunks_to_blame` is empty. +fn unblamed_to_out_is_done( + hunks_to_blame: &mut Vec, + out: &mut Vec, + suspect: ObjectId, +) -> bool { + let mut without_suspect = Vec::new(); + out.extend(hunks_to_blame.drain(..).filter_map(|hunk| { + BlameEntry::from_unblamed_hunk(&hunk, suspect).or_else(|| { + without_suspect.push(hunk); + None + }) + })); + *hunks_to_blame = without_suspect; + hunks_to_blame.is_empty() +} + +/// This function merges adjacent blame entries. It merges entries that are adjacent both in the +/// blamed file and in the source file that introduced them. This follows `git`’s +/// behaviour. `libgit2`, as of 2024-09-19, only checks whether two entries are adjacent in the +/// blamed file which can result in different blames in certain edge cases. See [the commit][1] +/// that introduced the extra check into `git` for context. See [this commit][2] for a way to test +/// for this behaviour in `git`. +/// +/// [1]: https://github.com/git/git/commit/c2ebaa27d63bfb7c50cbbdaba90aee4efdd45d0a +/// [2]: https://github.com/git/git/commit/6dbf0c7bebd1c71c44d786ebac0f2b3f226a0131 +fn coalesce_blame_entries(lines_blamed: Vec) -> Vec { + let len = lines_blamed.len(); + lines_blamed + .into_iter() + .fold(Vec::with_capacity(len), |mut acc, entry| { + let previous_entry = acc.last(); + + if let Some(previous_entry) = previous_entry { + let previous_blamed_range = previous_entry.range_in_blamed_file(); + let current_blamed_range = entry.range_in_blamed_file(); + let previous_source_range = previous_entry.range_in_source_file(); + let current_source_range = entry.range_in_source_file(); + if previous_entry.commit_id == entry.commit_id + && previous_blamed_range.end == current_blamed_range.start + // As of 2024-09-19, the check below only is in `git`, but not in `libgit2`. + && previous_source_range.end == current_source_range.start + { + // let combined_range = + let coalesced_entry = BlameEntry { + start_in_blamed_file: previous_blamed_range.start as u32, + start_in_source_file: previous_source_range.start as u32, + len: NonZeroU32::new((current_source_range.end - previous_source_range.start) as u32) + .expect("BUG: hunks are never zero-sized"), + commit_id: previous_entry.commit_id, + }; + + acc.pop(); + acc.push(coalesced_entry); + } else { + acc.push(entry); + } + + acc + } else { + acc.push(entry); + + acc + } + }) +} + +#[allow(clippy::too_many_arguments)] +fn tree_diff_at_file_path( + odb: impl gix_object::Find + gix_object::FindHeader, + file_path: &BStr, + id: ObjectId, + parent_id: ObjectId, + stats: &mut Statistics, + state: &mut gix_diff::tree::State, + commit_buf: &mut Vec, + lhs_tree_buf: &mut Vec, + rhs_tree_buf: &mut Vec, +) -> Result, Error> { + let parent_tree = odb.find_commit(&parent_id, commit_buf)?.tree(); + stats.commits_to_tree += 1; + + let parent_tree_iter = odb.find_tree_iter(&parent_tree, lhs_tree_buf)?; + stats.trees_decoded += 1; + + let tree_id = odb.find_commit(&id, commit_buf)?.tree(); + stats.commits_to_tree += 1; + + let tree_iter = odb.find_tree_iter(&tree_id, rhs_tree_buf)?; + stats.trees_decoded += 1; + + let mut recorder = gix_diff::tree::Recorder::default(); + gix_diff::tree(parent_tree_iter, tree_iter, state, &odb, &mut recorder)?; + stats.trees_diffed += 1; + + Ok(recorder.records.into_iter().find(|change| match change { + gix_diff::tree::recorder::Change::Modification { path, .. } => path == file_path, + gix_diff::tree::recorder::Change::Addition { path, .. } => path == file_path, + gix_diff::tree::recorder::Change::Deletion { path, .. } => path == file_path, + })) +} + +fn blob_changes( + odb: impl gix_object::Find + gix_object::FindHeader, + resource_cache: &mut gix_diff::blob::Platform, + oid: ObjectId, + previous_oid: ObjectId, + file_path: &BStr, + stats: &mut Statistics, +) -> Result, Error> { + /// Record all [`Change`]s to learn about additions, deletions and unchanged portions of a *Source File*. + struct ChangeRecorder { + last_seen_after_end: u32, + hunks: Vec, + total_number_of_lines: u32, + } + + impl ChangeRecorder { + /// `total_number_of_lines` is used to fill in the last unchanged hunk if needed + /// so that the entire file is represented by [`Change`]. + fn new(total_number_of_lines: u32) -> Self { + ChangeRecorder { + last_seen_after_end: 0, + hunks: Vec::new(), + total_number_of_lines, + } + } + } + + impl gix_diff::blob::Sink for ChangeRecorder { + type Out = Vec; + + fn process_change(&mut self, before: Range, after: Range) { + // This checks for unchanged hunks. + if after.start > self.last_seen_after_end { + self.hunks + .push(Change::Unchanged(self.last_seen_after_end..after.start)); + } + + match (!before.is_empty(), !after.is_empty()) { + (_, true) => { + self.hunks.push(Change::AddedOrReplaced( + after.start..after.end, + before.end - before.start, + )); + } + (true, false) => { + self.hunks.push(Change::Deleted(after.start, before.end - before.start)); + } + (false, false) => unreachable!("BUG: imara-diff provided a non-change"), + } + self.last_seen_after_end = after.end; + } + + fn finish(mut self) -> Self::Out { + if self.total_number_of_lines > self.last_seen_after_end { + self.hunks + .push(Change::Unchanged(self.last_seen_after_end..self.total_number_of_lines)); + } + self.hunks + } + } + + resource_cache.set_resource( + previous_oid, + gix_object::tree::EntryKind::Blob, + file_path, + gix_diff::blob::ResourceKind::OldOrSource, + &odb, + )?; + resource_cache.set_resource( + oid, + gix_object::tree::EntryKind::Blob, + file_path, + gix_diff::blob::ResourceKind::NewOrDestination, + &odb, + )?; + + let outcome = resource_cache.prepare_diff()?; + let input = gix_diff::blob::intern::InternedInput::new( + tokens_for_diffing(outcome.old.data.as_slice().unwrap_or_default()), + tokens_for_diffing(outcome.new.data.as_slice().unwrap_or_default()), + ); + let number_of_lines_in_destination = input.after.len(); + let change_recorder = ChangeRecorder::new(number_of_lines_in_destination as u32); + + let res = gix_diff::blob::diff(gix_diff::blob::Algorithm::Histogram, &input, change_recorder); + stats.blobs_diffed += 1; + Ok(res) +} + +fn find_path_entry_in_commit( + odb: &impl gix_object::Find, + commit: &gix_hash::oid, + file_path: &BStr, + buf: &mut Vec, + buf2: &mut Vec, + stats: &mut Statistics, +) -> Result, Error> { + let commit_id = odb.find_commit(commit, buf)?.tree(); + stats.commits_to_tree += 1; + let tree_iter = odb.find_tree_iter(&commit_id, buf)?; + stats.trees_decoded += 1; + + let res = tree_iter.lookup_entry( + odb, + buf2, + file_path.split(|b| *b == b'/').inspect(|_| stats.trees_decoded += 1), + )?; + stats.trees_decoded -= 1; + Ok(res.map(|e| e.oid)) +} + +/// Return an iterator over tokens for use in diffing. These usually lines, but iit's important to unify them +/// so the later access shows the right thing. +pub(crate) fn tokens_for_diffing(data: &[u8]) -> impl TokenSource { + gix_diff::blob::sources::byte_lines_with_terminator(data) +} diff --git a/gix-blame/src/file/mod.rs b/gix-blame/src/file/mod.rs new file mode 100644 index 00000000000..1afa77723e0 --- /dev/null +++ b/gix-blame/src/file/mod.rs @@ -0,0 +1,490 @@ +//! A module with low-level types and functions. + +use std::num::NonZeroU32; +use std::ops::Range; + +use gix_hash::ObjectId; + +use crate::types::{BlameEntry, Either, LineRange}; +use crate::types::{Change, Offset, UnblamedHunk}; + +pub(super) mod function; + +/// Compare a section from the *Blamed File* (`hunk`) with a change from a diff and see if there +/// is an intersection with `change`. Based on that intersection, we may generate a [`BlameEntry`] for `out` +/// and/or split the `hunk` into multiple. +/// +/// This is the core of the blame implementation as it matches regions in *Source File* to the *Blamed File*. +fn process_change( + out: &mut Vec, + new_hunks_to_blame: &mut Vec, + offset: &mut Offset, + suspect: ObjectId, + hunk: Option, + change: Option, +) -> (Option, Option) { + /// Since `range_with_end` is a range that is not inclusive at the end, + /// `range_with_end.end` is not part of `range_with_end`. + /// The first line that is `range_with_end.end - 1`. + fn actual_end_in_range(test: &Range, containing_range: &Range) -> bool { + (test.end - 1) >= containing_range.start && test.end <= containing_range.end + } + + // # General Rules + // 1. If there is no suspect, immediately reschedule `hunk` and redo processing of `change`. + // + // # Detailed Rules + // 1. whenever we do *not* return `hunk`, it must be added to `new_hunks_to_blame`, shifted with `offset` + // 2. return `hunk` if it is not fully covered by changes yet. + // 3. `change` *must* be returned if it is not fully included in `hunk`. + match (hunk, change) { + (Some(hunk), Some(Change::Unchanged(unchanged))) => { + let Some(range_in_suspect) = hunk.suspects.get(&suspect) else { + new_hunks_to_blame.push(hunk); + return (None, Some(Change::Unchanged(unchanged))); + }; + + match ( + range_in_suspect.contains(&unchanged.start), + actual_end_in_range(&unchanged, range_in_suspect), + ) { + (_, true) => { + // <------> (hunk) + // <-------> (unchanged) + // + // <----------> (hunk) + // <---> (unchanged) + + // skip over unchanged - there will be changes right after. + (Some(hunk), None) + } + (true, false) => { + // <--------> (hunk) + // <-------> (unchanged) + + // Nothing to do with `hunk` except shifting it, + // but `unchanged` needs to be checked against the next hunk to catch up. + new_hunks_to_blame.push(hunk.shift_by(suspect, *offset)); + (None, Some(Change::Unchanged(unchanged))) + } + (false, false) => { + // Any of the following cases are handled by this branch: + // <---> (hunk) + // <----------> (unchanged) + // + // <----> (hunk) + // <--> (unchanged) + // + // <--> (hunk) + // <----> (unchanged) + + if unchanged.end <= range_in_suspect.start { + // <----> (hunk) + // <--> (unchanged) + + // Let changes catch up with us. + (Some(hunk), None) + } else { + // <--> (hunk) + // <----> (unchanged) + // + // <---> (hunk) + // <----------> (unchanged) + + // Nothing to do with `hunk` except shifting it, + // but `unchanged` needs to be checked against the next hunk to catch up. + new_hunks_to_blame.push(hunk.shift_by(suspect, *offset)); + (None, Some(Change::Unchanged(unchanged))) + } + } + } + } + (Some(hunk), Some(Change::AddedOrReplaced(added, number_of_lines_deleted))) => { + let Some(range_in_suspect) = hunk.suspects.get(&suspect).cloned() else { + new_hunks_to_blame.push(hunk); + return (None, Some(Change::AddedOrReplaced(added, number_of_lines_deleted))); + }; + + let suspect_contains_added_start = range_in_suspect.contains(&added.start); + let suspect_contains_added_end = actual_end_in_range(&added, &range_in_suspect); + match (suspect_contains_added_start, suspect_contains_added_end) { + (true, true) => { + // A perfect match of lines to take out of the unblamed portion. + // <----------> (hunk) + // <---> (added) + // <---> (blamed) + // <--> <-> (new hunk) + + // Split hunk at the start of added. + let hunk_starting_at_added = match hunk.split_at(suspect, added.start) { + Either::Left(hunk) => { + // `added` starts with `hunk`, nothing to split. + hunk + } + Either::Right((before, after)) => { + // requeue the left side `before` after offsetting it… + new_hunks_to_blame.push(before.shift_by(suspect, *offset)); + // …and treat `after` as `new_hunk`, which contains the `added` range. + after + } + }; + + *offset += added.end - added.start; + *offset -= number_of_lines_deleted; + + // The overlapping `added` section was successfully located. + out.push(BlameEntry::with_offset( + added.clone(), + suspect, + hunk_starting_at_added.offset_for(suspect), + )); + + // Re-split at the end of `added` to continue with what's after. + match hunk_starting_at_added.split_at(suspect, added.end) { + Either::Left(_) => { + // Nothing to split, so we are done with this hunk. + (None, None) + } + Either::Right((_, after)) => { + // Keep processing the unblamed range after `added` + (Some(after), None) + } + } + } + (true, false) => { + // Added overlaps towards the end of `hunk`. + // <--------> (hunk) + // <-------> (added) + // <----> (blamed) + // <--> (new hunk) + + let hunk_starting_at_added = match hunk.split_at(suspect, added.start) { + Either::Left(hunk) => hunk, + Either::Right((before, after)) => { + // Keep looking for the left side of the unblamed portion. + new_hunks_to_blame.push(before.shift_by(suspect, *offset)); + after + } + }; + + // We can 'blame' the overlapping area of `added` and `hunk`. + out.push(BlameEntry::with_offset( + added.start..range_in_suspect.end, + suspect, + hunk_starting_at_added.offset_for(suspect), + )); + // Keep processing `added`, it's portion past `hunk` may still contribute. + (None, Some(Change::AddedOrReplaced(added, number_of_lines_deleted))) + } + (false, true) => { + // Added reaches into the hunk, so we blame only the overlapping portion of it. + // <-------> (hunk) + // <------> (added) + // <---> (blamed) + // <--> (new hunk) + + out.push(BlameEntry::with_offset( + range_in_suspect.start..added.end, + suspect, + hunk.offset_for(suspect), + )); + + *offset += added.end - added.start; + *offset -= number_of_lines_deleted; + + match hunk.split_at(suspect, added.end) { + Either::Left(_) => (None, None), + Either::Right((_, after)) => (Some(after), None), + } + } + (false, false) => { + // Any of the following cases are handled by this branch: + // <---> (hunk) + // <----------> (added) + // + // <----> (hunk) + // <--> (added) + // + // <--> (hunk) + // <----> (added) + + if added.end <= range_in_suspect.start { + // <----> (hunk) + // <--> (added) + + *offset += added.end - added.start; + *offset -= number_of_lines_deleted; + + // Let changes catchup with `hunk` after letting `added` contribute to the offset. + (Some(hunk), None) + } else if range_in_suspect.end <= added.start { + // <--> (hunk) + // <----> (added) + + // Retry `hunk` once there is overlapping changes to process. + new_hunks_to_blame.push(hunk.shift_by(suspect, *offset)); + + // Let hunks catchup with this change. + ( + None, + Some(Change::AddedOrReplaced(added.clone(), number_of_lines_deleted)), + ) + } else { + // Discard the left side of `added`, keep track of `blamed`, and continue with the + // right side of added that is going past `hunk`. + // <---> (hunk) + // <----------> (added) + // <---> (blamed) + + // Successfully blame the whole range. + out.push(BlameEntry::with_offset( + range_in_suspect.clone(), + suspect, + hunk.offset_for(suspect), + )); + + // And keep processing `added` with future `hunks` that might be affected by it. + ( + None, + Some(Change::AddedOrReplaced(added.clone(), number_of_lines_deleted)), + ) + } + } + } + } + (Some(hunk), Some(Change::Deleted(line_number_in_destination, number_of_lines_deleted))) => { + let Some(range_in_suspect) = hunk.suspects.get(&suspect) else { + new_hunks_to_blame.push(hunk); + return ( + None, + Some(Change::Deleted(line_number_in_destination, number_of_lines_deleted)), + ); + }; + + if line_number_in_destination < range_in_suspect.start { + // <---> (hunk) + // | (line_number_in_destination) + + // Track the shift to `hunk` as it affects us, and keep catching up with changes. + *offset -= number_of_lines_deleted; + (Some(hunk), None) + } else if line_number_in_destination < range_in_suspect.end { + // <-----> (hunk) + // | (line_number_in_destination) + + let new_hunk = match hunk.split_at(suspect, line_number_in_destination) { + Either::Left(hunk) => { + // Nothing to split as `line_number_in_destination` is directly at start of `hunk` + hunk + } + Either::Right((before, after)) => { + // `before` isn't affected by deletion, so keep it for later. + new_hunks_to_blame.push(before.shift_by(suspect, *offset)); + // after will be affected by offset, and we will see if there are more changes affecting it. + after + } + }; + *offset -= number_of_lines_deleted; + (Some(new_hunk), None) + } else { + // <---> (hunk) + // | (line_number_in_destination) + + // Catchup with changes. + new_hunks_to_blame.push(hunk.shift_by(suspect, *offset)); + ( + None, + Some(Change::Deleted(line_number_in_destination, number_of_lines_deleted)), + ) + } + } + (Some(hunk), None) => { + // nothing to do - changes are exhausted, re-evaluate `hunk`. + new_hunks_to_blame.push(hunk.shift_by(suspect, *offset)); + (None, None) + } + (None, Some(Change::Unchanged(_))) => { + // Nothing changed past the blamed range - do nothing. + (None, None) + } + (None, Some(Change::AddedOrReplaced(added, number_of_lines_deleted))) => { + // Keep track of the shift to apply to hunks in the future. + *offset += added.len() as u32; + *offset -= number_of_lines_deleted; + (None, None) + } + (None, Some(Change::Deleted(_, number_of_lines_deleted))) => { + // Keep track of the shift to apply to hunks in the future. + *offset -= number_of_lines_deleted; + (None, None) + } + (None, None) => { + // Noop, caller shouldn't do that, but not our problem. + (None, None) + } + } +} + +/// Consume `hunks_to_blame` and `changes` to pair up matches ranges (also overlapping) with each other. +/// Once a match is found, it's pushed onto `out`. +fn process_changes( + out: &mut Vec, + hunks_to_blame: Vec, + changes: Vec, + suspect: ObjectId, +) -> Vec { + let mut hunks_iter = hunks_to_blame.into_iter(); + let mut changes_iter = changes.into_iter(); + + let mut hunk = hunks_iter.next(); + let mut change = changes_iter.next(); + + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination = Offset::Added(0); + + loop { + (hunk, change) = process_change( + out, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + hunk, + change, + ); + + hunk = hunk.or_else(|| hunks_iter.next()); + change = change.or_else(|| changes_iter.next()); + + if hunk.is_none() && change.is_none() { + break; + } + } + new_hunks_to_blame +} + +impl UnblamedHunk { + fn shift_by(mut self, suspect: ObjectId, offset: Offset) -> Self { + self.suspects.entry(suspect).and_modify(|e| *e = e.shift_by(offset)); + self + } + + fn split_at(self, suspect: ObjectId, line_number_in_destination: u32) -> Either { + match self.suspects.get(&suspect) { + None => Either::Left(self), + Some(range_in_suspect) => { + if !range_in_suspect.contains(&line_number_in_destination) { + return Either::Left(self); + } + + let split_at_from_start = line_number_in_destination - range_in_suspect.start; + if split_at_from_start > 0 { + let new_suspects_before = self + .suspects + .iter() + .map(|(suspect, range)| (*suspect, range.start..(range.start + split_at_from_start))); + + let new_suspects_after = self + .suspects + .iter() + .map(|(suspect, range)| (*suspect, (range.start + split_at_from_start)..range.end)); + + let new_hunk_before = Self { + range_in_blamed_file: self.range_in_blamed_file.start + ..(self.range_in_blamed_file.start + split_at_from_start), + suspects: new_suspects_before.collect(), + }; + let new_hunk_after = Self { + range_in_blamed_file: (self.range_in_blamed_file.start + split_at_from_start) + ..(self.range_in_blamed_file.end), + suspects: new_suspects_after.collect(), + }; + + Either::Right((new_hunk_before, new_hunk_after)) + } else { + Either::Left(self) + } + } + } + } + + fn offset_for(&self, suspect: ObjectId) -> Offset { + let range_in_suspect = self + .suspects + .get(&suspect) + .expect("Internal and we know suspect is present"); + + if self.range_in_blamed_file.start > range_in_suspect.start { + Offset::Added(self.range_in_blamed_file.start - range_in_suspect.start) + } else { + Offset::Deleted(range_in_suspect.start - self.range_in_blamed_file.start) + } + } + + /// Transfer all ranges from the commit at `from` to the commit at `to`. + fn pass_blame(&mut self, from: ObjectId, to: ObjectId) { + if let Some(range_in_suspect) = self.suspects.remove(&from) { + self.suspects.insert(to, range_in_suspect); + } + } + + fn clone_blame(&mut self, from: ObjectId, to: ObjectId) { + if let Some(range_in_suspect) = self.suspects.get(&from) { + self.suspects.insert(to, range_in_suspect.clone()); + } + } + + fn remove_blame(&mut self, suspect: ObjectId) { + self.suspects.remove(&suspect); + } +} + +impl BlameEntry { + /// Create a new instance by creating `range_in_blamed_file` after applying `offset` to `range_in_source_file`. + fn with_offset(range_in_source_file: Range, commit_id: ObjectId, offset: Offset) -> Self { + debug_assert!( + range_in_source_file.end > range_in_source_file.start, + "{range_in_source_file:?}" + ); + + match offset { + Offset::Added(added) => Self { + start_in_blamed_file: range_in_source_file.start + added, + start_in_source_file: range_in_source_file.start, + len: force_non_zero(range_in_source_file.len() as u32), + commit_id, + }, + Offset::Deleted(deleted) => { + debug_assert!( + range_in_source_file.start >= deleted, + "{range_in_source_file:?} {offset:?}" + ); + + Self { + start_in_blamed_file: range_in_source_file.start - deleted, + start_in_source_file: range_in_source_file.start, + len: force_non_zero(range_in_source_file.len() as u32), + commit_id, + } + } + } + } + + /// Create an offset from a portion of the *Blamed File*. + fn from_unblamed_hunk(unblamed_hunk: &UnblamedHunk, commit_id: ObjectId) -> Option { + let range_in_source_file = unblamed_hunk.suspects.get(&commit_id)?; + + Some(Self { + start_in_blamed_file: unblamed_hunk.range_in_blamed_file.start, + start_in_source_file: range_in_source_file.start, + len: force_non_zero(range_in_source_file.len() as u32), + commit_id, + }) + } +} + +fn force_non_zero(n: u32) -> NonZeroU32 { + NonZeroU32::new(n).expect("BUG: hunks are never empty") +} + +#[cfg(test)] +mod tests; diff --git a/gix-blame/src/file/tests.rs b/gix-blame/src/file/tests.rs new file mode 100644 index 00000000000..c6ed47b29c3 --- /dev/null +++ b/gix-blame/src/file/tests.rs @@ -0,0 +1,1366 @@ +use crate::file::{Offset, UnblamedHunk}; +use gix_hash::ObjectId; +use std::ops::Range; + +fn new_unblamed_hunk(range_in_blamed_file: Range, suspect: ObjectId, offset: Offset) -> UnblamedHunk { + assert!( + range_in_blamed_file.end > range_in_blamed_file.start, + "{range_in_blamed_file:?}" + ); + + let range_in_destination = offset.shifted_range(&range_in_blamed_file); + UnblamedHunk { + range_in_blamed_file, + suspects: [(suspect, range_in_destination)].into(), + } +} + +mod process_change { + use super::*; + use crate::file::{force_non_zero, process_change, Change, Offset, UnblamedHunk}; + use crate::BlameEntry; + use gix_hash::ObjectId; + + #[test] + fn nothing() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + None, + None, + ); + + assert_eq!(hunk, None); + assert_eq!(change, None); + assert_eq!(offset_in_destination, Offset::Added(0)); + } + + #[test] + fn added_hunk() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(new_unblamed_hunk(0..5, suspect, Offset::Added(0))), + Some(Change::AddedOrReplaced(0..3, 0)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 3..5, + suspects: [(suspect, 3..5)].into() + }) + ); + assert_eq!(change, None); + assert_eq!( + lines_blamed, + [BlameEntry { + start_in_blamed_file: 0, + start_in_source_file: 0, + len: force_non_zero(3), + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(3)); + } + + #[test] + fn added_hunk_2() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(new_unblamed_hunk(0..5, suspect, Offset::Added(0))), + Some(Change::AddedOrReplaced(2..3, 0)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 3..5, + suspects: [(suspect, 3..5)].into() + }) + ); + assert_eq!(change, None); + assert_eq!( + lines_blamed, + [BlameEntry { + start_in_blamed_file: 2, + start_in_source_file: 2, + len: force_non_zero(1), + commit_id: suspect + }] + ); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 0..2, + suspects: [(suspect, 0..2)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(1)); + } + + #[test] + fn added_hunk_3() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(5); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(new_unblamed_hunk(10..15, suspect, Offset::Added(0))), + Some(Change::AddedOrReplaced(12..13, 0)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 13..15, + suspects: [(suspect, 13..15)].into() + }) + ); + assert_eq!(change, None); + assert_eq!( + lines_blamed, + [BlameEntry { + start_in_blamed_file: 12, + start_in_source_file: 12, + len: force_non_zero(1), + commit_id: suspect + }] + ); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 10..12, + suspects: [(suspect, 5..7)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(6)); + } + + #[test] + fn added_hunk_4() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 7..12 + Some(new_unblamed_hunk(12..17, suspect, Offset::Added(5))), + Some(Change::AddedOrReplaced(9..10, 0)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 15..17, + suspects: [(suspect, 10..12)].into() + }) + ); + assert_eq!(change, None); + assert_eq!( + lines_blamed, + [BlameEntry { + start_in_blamed_file: 14, + start_in_source_file: 9, + len: force_non_zero(1), + commit_id: suspect + }] + ); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 12..14, + suspects: [(suspect, 7..9)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(1)); + } + + #[test] + fn added_hunk_5() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(new_unblamed_hunk(0..5, suspect, Offset::Added(0))), + Some(Change::AddedOrReplaced(0..3, 1)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 3..5, + suspects: [(suspect, 3..5)].into() + }) + ); + assert_eq!(change, None); + assert_eq!( + lines_blamed, + [BlameEntry { + start_in_blamed_file: 0, + start_in_source_file: 0, + len: force_non_zero(3), + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(2)); + } + + #[test] + fn added_hunk_6() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 0..4 + Some(new_unblamed_hunk(1..5, suspect, Offset::Added(1))), + Some(Change::AddedOrReplaced(0..3, 1)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 4..5, + suspects: [(suspect, 3..4)].into() + }) + ); + assert_eq!(change, None); + assert_eq!( + lines_blamed, + [BlameEntry { + start_in_blamed_file: 1, + start_in_source_file: 0, + len: force_non_zero(3), + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(2)); + } + + #[test] + fn added_hunk_7() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(2); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 2..6 + Some(new_unblamed_hunk(3..7, suspect, Offset::Added(1))), + Some(Change::AddedOrReplaced(3..5, 1)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 6..7, + suspects: [(suspect, 5..6)].into() + }) + ); + assert_eq!(change, None); + assert_eq!( + lines_blamed, + [BlameEntry { + start_in_blamed_file: 4, + start_in_source_file: 3, + len: force_non_zero(2), + commit_id: suspect + }] + ); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 3..4, + suspects: [(suspect, 0..1)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(3)); + } + + #[test] + fn added_hunk_8() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(1); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 25..26 + Some(new_unblamed_hunk(23..24, suspect, Offset::Deleted(2))), + Some(Change::AddedOrReplaced(25..27, 1)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, Some(Change::AddedOrReplaced(25..27, 1))); + assert_eq!( + lines_blamed, + [BlameEntry { + start_in_blamed_file: 23, + start_in_source_file: 25, + len: force_non_zero(1), + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(1)); + } + + #[test] + fn added_hunk_9() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 21..22 + Some(new_unblamed_hunk(23..24, suspect, Offset::Added(2))), + Some(Change::AddedOrReplaced(18..22, 3)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, None); + assert_eq!( + lines_blamed, + [BlameEntry { + start_in_blamed_file: 23, + start_in_source_file: 21, + len: force_non_zero(1), + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(1)); + } + + #[test] + fn added_hunk_10() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 70..108 + Some(new_unblamed_hunk(71..109, suspect, Offset::Added(1))), + Some(Change::AddedOrReplaced(106..109, 0)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, Some(Change::AddedOrReplaced(106..109, 0))); + assert_eq!( + lines_blamed, + [BlameEntry { + start_in_blamed_file: 107, + start_in_source_file: 106, + len: force_non_zero(2), + commit_id: suspect + }] + ); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 71..107, + suspects: [(suspect, 70..106)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(0)); + } + + #[test] + fn added_hunk_11() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 137..144 + Some(new_unblamed_hunk(149..156, suspect, Offset::Added(12))), + Some(Change::AddedOrReplaced(143..146, 0)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, Some(Change::AddedOrReplaced(143..146, 0))); + assert_eq!( + lines_blamed, + [BlameEntry { + start_in_blamed_file: 155, + start_in_source_file: 143, + len: force_non_zero(1), + commit_id: suspect + }] + ); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 149..155, + suspects: [(suspect, 137..143)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(0)); + } + + #[test] + fn no_overlap() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Deleted(3); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 2..5 + Some(new_unblamed_hunk(3..6, suspect, Offset::Added(1))), + Some(Change::AddedOrReplaced(7..10, 1)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, Some(Change::AddedOrReplaced(7..10, 1))); + assert_eq!(lines_blamed, []); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 3..6, + suspects: [(suspect, 5..8)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Deleted(3)); + } + + #[test] + fn no_overlap_2() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 6..8 + Some(new_unblamed_hunk(9..11, suspect, Offset::Added(3))), + Some(Change::AddedOrReplaced(2..5, 0)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 9..11, + suspects: [(suspect, 6..8)].into() + }) + ); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(3)); + } + + #[test] + fn no_overlap_3() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 5..15 + Some(new_unblamed_hunk(4..15, suspect, Offset::Deleted(1))), + Some(Change::AddedOrReplaced(4..5, 1)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 4..15, + suspects: [(suspect, 5..16)].into() + }) + ); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(0)); + } + + #[test] + fn no_overlap_4() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(1); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 25..27 + Some(new_unblamed_hunk(23..25, suspect, Offset::Deleted(2))), + Some(Change::Unchanged(21..22)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 23..25, + suspects: [(suspect, 25..27)].into() + }) + ); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(1)); + } + + #[test] + fn no_overlap_5() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(1); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 17..18 + Some(new_unblamed_hunk(15..16, suspect, Offset::Deleted(2))), + Some(Change::Deleted(20, 1)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, Some(Change::Deleted(20, 1))); + assert_eq!(lines_blamed, []); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 15..16, + suspects: [(suspect, 16..17)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(1)); + } + + #[test] + fn no_overlap_6() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 22..24 + Some(new_unblamed_hunk(23..25, suspect, Offset::Added(1))), + Some(Change::Deleted(20, 1)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 23..25, + suspects: [(suspect, 22..24)].into() + }) + ); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Deleted(1)); + } + + #[test] + fn enclosing_addition() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(3); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 5..8 + Some(new_unblamed_hunk(2..5, suspect, Offset::Deleted(3))), + Some(Change::AddedOrReplaced(3..12, 2)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, Some(Change::AddedOrReplaced(3..12, 2))); + assert_eq!( + lines_blamed, + [BlameEntry { + start_in_blamed_file: 2, + start_in_source_file: 5, + len: force_non_zero(3), + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(3)); + } + + #[test] + fn enclosing_deletion() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(3); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 13..20 + Some(new_unblamed_hunk(12..19, suspect, Offset::Deleted(1))), + Some(Change::Deleted(15, 2)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 14..19, + suspects: [(suspect, 15..20)].into() + }) + ); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 12..14, + suspects: [(suspect, 10..12)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(1)); + } + + #[test] + fn enclosing_unchanged_lines() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(3); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 109..113 + Some(new_unblamed_hunk(110..114, suspect, Offset::Added(1))), + Some(Change::Unchanged(109..172)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, Some(Change::Unchanged(109..172))); + assert_eq!(lines_blamed, []); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 110..114, + suspects: [(suspect, 106..110)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(3)); + } + + #[test] + fn unchanged_hunk() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(new_unblamed_hunk(0..5, suspect, Offset::Added(0))), + Some(Change::Unchanged(0..3)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 0..5, + suspects: [(suspect, 0..5)].into() + }) + ); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(0)); + } + + #[test] + fn unchanged_hunk_2() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(new_unblamed_hunk(0..5, suspect, Offset::Added(0))), + Some(Change::Unchanged(0..7)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, Some(Change::Unchanged(0..7))); + assert_eq!(lines_blamed, []); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 0..5, + suspects: [(suspect, 0..5)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(0)); + } + + #[test] + fn unchanged_hunk_3() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Deleted(2); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(UnblamedHunk { + range_in_blamed_file: 22..30, + suspects: [(suspect, 21..29)].into(), + }), + Some(Change::Unchanged(21..23)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 22..30, + suspects: [(suspect, 21..29)].into() + }) + ); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Deleted(2)); + } + + #[test] + fn deleted_hunk() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(new_unblamed_hunk(0..5, suspect, Offset::Added(0))), + Some(Change::Deleted(5, 3)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, Some(Change::Deleted(5, 3))); + assert_eq!(lines_blamed, []); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 0..5, + suspects: [(suspect, 0..5)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(0)); + } + + #[test] + fn deleted_hunk_2() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(new_unblamed_hunk(2..16, suspect, Offset::Added(0))), + Some(Change::Deleted(0, 4)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 2..16, + suspects: [(suspect, 2..16)].into() + }) + ); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Deleted(4)); + } + + #[test] + fn deleted_hunk_3() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(new_unblamed_hunk(2..16, suspect, Offset::Added(0))), + Some(Change::Deleted(14, 4)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 14..16, + suspects: [(suspect, 14..16)].into() + }) + ); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!( + new_hunks_to_blame, + [new_unblamed_hunk(2..14, suspect, Offset::Added(0))] + ); + assert_eq!(offset_in_destination, Offset::Deleted(4)); + } + + #[test] + fn addition_only() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(1); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + None, + Some(Change::AddedOrReplaced(22..25, 1)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(3)); + } + + #[test] + fn deletion_only() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(1); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + None, + Some(Change::Deleted(11, 5)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Deleted(4)); + } + + #[test] + fn unchanged_only() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(1); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + None, + Some(Change::Unchanged(11..13)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(1)); + } +} +mod process_changes { + use crate::file::tests::new_unblamed_hunk; + use crate::file::{force_non_zero, process_changes, Change, Offset, UnblamedHunk}; + use crate::BlameEntry; + use gix_hash::ObjectId; + + #[test] + fn nothing() { + let mut lines_blamed = Vec::new(); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let new_hunks_to_blame = process_changes(&mut lines_blamed, vec![], vec![], suspect); + + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + } + + #[test] + fn added_hunk() { + let mut lines_blamed = Vec::new(); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let hunks_to_blame = vec![new_unblamed_hunk(0..4, suspect, Offset::Added(0))]; + let changes = vec![Change::AddedOrReplaced(0..4, 0)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + assert_eq!( + lines_blamed, + [BlameEntry { + start_in_blamed_file: 0, + start_in_source_file: 0, + len: force_non_zero(4), + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, []); + } + + #[test] + fn added_hunk_2() { + let mut lines_blamed = Vec::new(); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let hunks_to_blame = vec![new_unblamed_hunk(0..6, suspect, Offset::Added(0))]; + let changes = vec![Change::AddedOrReplaced(0..4, 0), Change::Unchanged(4..6)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + assert_eq!( + lines_blamed, + [BlameEntry { + start_in_blamed_file: 0, + start_in_source_file: 0, + len: force_non_zero(4), + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, [new_unblamed_hunk(4..6, suspect, Offset::Added(4))]); + } + + #[test] + fn added_hunk_3() { + let mut lines_blamed = Vec::new(); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let hunks_to_blame = vec![new_unblamed_hunk(0..6, suspect, Offset::Added(0))]; + let changes = vec![ + Change::Unchanged(0..2), + Change::AddedOrReplaced(2..4, 0), + Change::Unchanged(4..6), + ]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + assert_eq!( + lines_blamed, + [BlameEntry { + start_in_blamed_file: 2, + start_in_source_file: 2, + len: force_non_zero(2), + commit_id: suspect + }] + ); + assert_eq!( + new_hunks_to_blame, + [ + new_unblamed_hunk(0..2, suspect, Offset::Added(0)), + new_unblamed_hunk(4..6, suspect, Offset::Added(2)) + ] + ); + } + + #[test] + fn added_hunk_4_0() { + let mut lines_blamed = Vec::new(); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let hunks_to_blame = vec![new_unblamed_hunk(0..6, suspect, Offset::Added(0))]; + let changes = vec![ + Change::AddedOrReplaced(0..1, 0), + Change::AddedOrReplaced(1..4, 0), + Change::Unchanged(4..6), + ]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + assert_eq!( + lines_blamed, + [ + BlameEntry { + start_in_blamed_file: 0, + start_in_source_file: 0, + len: force_non_zero(1), + commit_id: suspect + }, + BlameEntry { + start_in_blamed_file: 1, + start_in_source_file: 1, + len: force_non_zero(3), + commit_id: suspect + } + ] + ); + assert_eq!(new_hunks_to_blame, [new_unblamed_hunk(4..6, suspect, Offset::Added(4))]); + } + + #[test] + fn added_hunk_4_1() { + let mut lines_blamed = Vec::new(); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let hunks_to_blame = vec![new_unblamed_hunk(0..6, suspect, Offset::Added(0))]; + let changes = vec![Change::AddedOrReplaced(0..1, 0)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + assert_eq!( + lines_blamed, + [BlameEntry { + start_in_blamed_file: 0, + start_in_source_file: 0, + len: force_non_zero(1), + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, [new_unblamed_hunk(1..6, suspect, Offset::Added(1))]); + } + + #[test] + fn added_hunk_4_2() { + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let suspect_2 = ObjectId::from_hex(b"2222222222222222222222222222222222222222").unwrap(); + let mut lines_blamed: Vec = vec![BlameEntry { + start_in_blamed_file: 0, + start_in_source_file: 0, + len: force_non_zero(2), + commit_id: suspect, + }]; + let hunks_to_blame = vec![new_unblamed_hunk(2..6, suspect_2, Offset::Added(2))]; + let changes = vec![Change::AddedOrReplaced(0..1, 0)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect_2); + + assert_eq!( + lines_blamed, + [ + BlameEntry { + start_in_blamed_file: 0, + start_in_source_file: 0, + len: force_non_zero(2), + commit_id: suspect + }, + BlameEntry { + start_in_blamed_file: 2, + start_in_source_file: 0, + len: force_non_zero(1), + commit_id: suspect_2 + } + ] + ); + assert_eq!( + new_hunks_to_blame, + [new_unblamed_hunk(3..6, suspect_2, Offset::Added(3))] + ); + } + + #[test] + fn added_hunk_5() { + let mut lines_blamed = Vec::new(); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let hunks_to_blame = vec![new_unblamed_hunk(0..6, suspect, Offset::Added(0))]; + let changes = vec![Change::AddedOrReplaced(0..4, 3), Change::Unchanged(4..6)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + assert_eq!( + lines_blamed, + [BlameEntry { + start_in_blamed_file: 0, + start_in_source_file: 0, + len: force_non_zero(4), + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, [new_unblamed_hunk(4..6, suspect, Offset::Added(1))]); + } + + #[test] + fn added_hunk_6() { + let mut lines_blamed = Vec::new(); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let hunks_to_blame = vec![new_unblamed_hunk(4..6, suspect, Offset::Added(1))]; + let changes = vec![Change::AddedOrReplaced(0..3, 0), Change::Unchanged(3..5)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, [new_unblamed_hunk(4..6, suspect, Offset::Added(4))]); + } + + #[test] + fn added_hunk_7() { + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let suspect_2 = ObjectId::from_hex(b"2222222222222222222222222222222222222222").unwrap(); + let mut lines_blamed: Vec = vec![BlameEntry { + start_in_blamed_file: 0, + start_in_source_file: 0, + len: force_non_zero(1), + commit_id: suspect, + }]; + let hunks_to_blame = vec![new_unblamed_hunk(1..3, suspect_2, Offset::Added(1))]; + let changes = vec![Change::AddedOrReplaced(0..1, 2)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect_2); + + assert_eq!( + lines_blamed, + [ + BlameEntry { + start_in_blamed_file: 0, + start_in_source_file: 0, + len: force_non_zero(1), + commit_id: suspect + }, + BlameEntry { + start_in_blamed_file: 1, + start_in_source_file: 0, + len: force_non_zero(1), + commit_id: suspect_2 + } + ] + ); + assert_eq!( + new_hunks_to_blame, + [new_unblamed_hunk(2..3, suspect_2, Offset::Added(0))] + ); + } + + #[test] + fn added_hunk_8() { + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let mut lines_blamed = Vec::new(); + let hunks_to_blame = vec![new_unblamed_hunk(0..4, suspect, Offset::Added(0))]; + let changes = vec![ + Change::AddedOrReplaced(0..2, 0), + Change::Unchanged(2..3), + Change::AddedOrReplaced(3..4, 0), + ]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + assert_eq!( + lines_blamed, + [ + BlameEntry { + start_in_blamed_file: 0, + start_in_source_file: 0, + len: force_non_zero(2), + commit_id: suspect + }, + BlameEntry { + start_in_blamed_file: 3, + start_in_source_file: 3, + len: force_non_zero(1), + commit_id: suspect + } + ] + ); + assert_eq!(new_hunks_to_blame, [new_unblamed_hunk(2..3, suspect, Offset::Added(2))]); + } + + #[test] + fn added_hunk_9() { + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let mut lines_blamed: Vec = vec![BlameEntry { + start_in_blamed_file: 30, + start_in_source_file: 30, + len: force_non_zero(1), + commit_id: suspect, + }]; + let hunks_to_blame = vec![ + UnblamedHunk { + range_in_blamed_file: 0..30, + suspects: [(suspect, 0..30)].into(), + }, + UnblamedHunk { + range_in_blamed_file: 31..37, + suspects: [(suspect, 31..37)].into(), + }, + ]; + let changes = vec![ + Change::Unchanged(0..16), + Change::AddedOrReplaced(16..17, 0), + Change::Unchanged(17..37), + ]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + lines_blamed.sort_by(|a, b| a.start_in_blamed_file.cmp(&b.start_in_blamed_file)); + + assert_eq!( + lines_blamed, + [ + BlameEntry { + start_in_blamed_file: 16, + start_in_source_file: 16, + len: force_non_zero(1), + commit_id: suspect + }, + BlameEntry { + start_in_blamed_file: 30, + start_in_source_file: 30, + len: force_non_zero(1), + commit_id: suspect + } + ] + ); + assert_eq!( + new_hunks_to_blame, + [ + UnblamedHunk { + range_in_blamed_file: 0..16, + suspects: [(suspect, 0..16)].into() + }, + UnblamedHunk { + range_in_blamed_file: 17..30, + suspects: [(suspect, 16..29)].into() + }, + UnblamedHunk { + range_in_blamed_file: 31..37, + suspects: [(suspect, 30..36)].into() + } + ] + ); + } + + #[test] + fn deleted_hunk() { + let mut lines_blamed = Vec::new(); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let hunks_to_blame = vec![ + new_unblamed_hunk(0..4, suspect, Offset::Added(0)), + new_unblamed_hunk(4..7, suspect, Offset::Added(0)), + ]; + let changes = vec![Change::Deleted(0, 3), Change::AddedOrReplaced(0..4, 0)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + assert_eq!( + lines_blamed, + [BlameEntry { + start_in_blamed_file: 0, + start_in_source_file: 0, + len: force_non_zero(4), + commit_id: suspect + }] + ); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 4..7, + suspects: [(suspect, 3..6)].into() + }] + ); + } +} diff --git a/gix-blame/src/lib.rs b/gix-blame/src/lib.rs index d13db17bbbb..489434b5b3d 100644 --- a/gix-blame/src/lib.rs +++ b/gix-blame/src/lib.rs @@ -1,10 +1,23 @@ //! A crate to implement an algorithm to annotate lines in tracked files with the commits that changed them. -#![deny(rust_2018_idioms)] +//! +//! ### Terminology +//! +//! * **Source File** +//! - The file as it exists in `HEAD`. +//! - the initial state with all lines that we need to associate with a *Source File*. +//! * **Blamed File** +//! - A file at a version (i.e. commit) that introduces hunks into the final 'image'. +//! * **Suspects** +//! - The versions of the files that can contain hunks that we could use in the final 'image' +//! - multiple at the same time as the commit-graph may split up. +//! - turns into *Source File* once we have found an association into the *Blamed File*. +#![deny(rust_2018_idioms, missing_docs)] #![forbid(unsafe_code)] -#[cfg(test)] -mod tests { - #[test] - #[ignore] - fn it_works() {} -} +mod error; +pub use error::Error; +mod types; +pub use types::{BlameEntry, Outcome, Statistics}; + +mod file; +pub use file::function::file; diff --git a/gix-blame/src/types.rs b/gix-blame/src/types.rs new file mode 100644 index 00000000000..e0c8843b4cd --- /dev/null +++ b/gix-blame/src/types.rs @@ -0,0 +1,207 @@ +use crate::file::function::tokens_for_diffing; +use gix_hash::ObjectId; +use gix_object::bstr::BString; +use std::num::NonZeroU32; +use std::{ + collections::BTreeMap, + ops::{AddAssign, Range, SubAssign}, +}; + +/// The outcome of [`file()`](crate::file()). +#[derive(Debug, Default, Clone)] +pub struct Outcome { + /// One entry in sequential order, to associate a hunk in the blamed file with the source commit (and its lines) + /// that introduced it. + pub entries: Vec, + /// A buffer with the file content of the *Blamed File*, ready for tokenization. + pub blob: Vec, + /// Additional information about the amount of work performed to produce the blame. + pub statistics: Statistics, +} + +/// Additional information about the performed operations. +#[derive(Debug, Default, Copy, Clone)] +pub struct Statistics { + /// The amount of commits it traversed until the blame was complete. + pub commits_traversed: usize, + /// The amount of commits whose trees were extracted. + pub commits_to_tree: usize, + /// The amount of trees that were decoded to find the entry of the file to blame. + pub trees_decoded: usize, + /// The amount of fully-fledged tree-diffs to see if the filepath was added, deleted or modified. + pub trees_diffed: usize, + /// The amount of blobs there were compared to each other to learn what changed between commits. + /// Note that in order to diff a blob, one needs to load both versions from the database. + pub blobs_diffed: usize, +} + +impl Outcome { + /// Return an iterator over each entry in [`Self::entries`], along with its lines, line by line. + /// + /// Note that [`Self::blob`] must be tokenized in exactly the same way as the tokenizer that was used + /// to perform the diffs, which is what this method assures. + pub fn entries_with_lines(&self) -> impl Iterator)> + '_ { + use gix_diff::blob::intern::TokenSource; + let mut interner = gix_diff::blob::intern::Interner::new(self.blob.len() / 100); + let lines_as_tokens: Vec<_> = tokens_for_diffing(&self.blob) + .tokenize() + .map(|token| interner.intern(token)) + .collect(); + self.entries.iter().map(move |e| { + ( + e.clone(), + lines_as_tokens[e.range_in_blamed_file()] + .iter() + .map(|token| BString::new(interner[*token].into())) + .collect(), + ) + }) + } +} + +/// Describes the offset of a particular hunk relative to the *Blamed File*. +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum Offset { + /// The amount of lines to add. + Added(u32), + /// The amount of lines to remove. + Deleted(u32), +} + +impl Offset { + /// Shift the given `range` according to our offset. + pub fn shifted_range(&self, range: &Range) -> Range { + match self { + Offset::Added(added) => { + debug_assert!(range.start >= *added, "{self:?} {range:?}"); + Range { + start: range.start - added, + end: range.end - added, + } + } + Offset::Deleted(deleted) => Range { + start: range.start + deleted, + end: range.end + deleted, + }, + } + } +} + +impl AddAssign for Offset { + fn add_assign(&mut self, rhs: u32) { + match self { + Self::Added(added) => *self = Self::Added(*added + rhs), + Self::Deleted(deleted) => { + if rhs > *deleted { + *self = Self::Added(rhs - *deleted); + } else { + *self = Self::Deleted(*deleted - rhs); + } + } + } + } +} + +impl SubAssign for Offset { + fn sub_assign(&mut self, rhs: u32) { + match self { + Self::Added(added) => { + if rhs > *added { + *self = Self::Deleted(rhs - *added); + } else { + *self = Self::Added(*added - rhs); + } + } + Self::Deleted(deleted) => *self = Self::Deleted(*deleted + rhs), + } + } +} + +/// A mapping of a section of the *Blamed File* to the section in a *Source File* that introduced it. +/// +/// Both ranges are of the same size, but may use different [starting points](Range::start). Naturally, +/// they have the same content, which is the reason they are in what is returned by [`file()`](crate::file()). +#[derive(Clone, Debug, PartialEq)] +pub struct BlameEntry { + /// The index of the token in the *Blamed File* (typically lines) where this entry begins. + pub start_in_blamed_file: u32, + /// The index of the token in the *Source File* (typically lines) where this entry begins. + /// + /// This is possibly offset compared to `start_in_blamed_file`. + pub start_in_source_file: u32, + /// The amount of lines the hunk is spanning. + pub len: NonZeroU32, + /// The commit that introduced the section into the *Source File*. + pub commit_id: ObjectId, +} + +impl BlameEntry { + /// Create a new instance. + pub fn new(range_in_blamed_file: Range, range_in_source_file: Range, commit_id: ObjectId) -> Self { + debug_assert!( + range_in_blamed_file.end > range_in_blamed_file.start, + "{range_in_blamed_file:?}" + ); + debug_assert!( + range_in_source_file.end > range_in_source_file.start, + "{range_in_source_file:?}" + ); + debug_assert_eq!(range_in_source_file.len(), range_in_blamed_file.len()); + + Self { + start_in_blamed_file: range_in_blamed_file.start, + start_in_source_file: range_in_source_file.start, + len: NonZeroU32::new(range_in_blamed_file.len() as u32).expect("BUG: hunks are never empty"), + commit_id, + } + } +} + +impl BlameEntry { + /// Return the range of tokens this entry spans in the *Blamed File*. + pub fn range_in_blamed_file(&self) -> Range { + let start = self.start_in_blamed_file as usize; + start..start + self.len.get() as usize + } + /// Return the range of tokens this entry spans in the *Source File*. + pub fn range_in_source_file(&self) -> Range { + let start = self.start_in_source_file as usize; + start..start + self.len.get() as usize + } +} + +pub(crate) trait LineRange { + fn shift_by(&self, offset: Offset) -> Self; +} + +impl LineRange for Range { + fn shift_by(&self, offset: Offset) -> Self { + offset.shifted_range(self) + } +} + +/// Tracks the hunks in the *Blamed File* that are not yet associated with the commit that introduced them. +#[derive(Debug, PartialEq)] +pub struct UnblamedHunk { + /// The range in the file that is being blamed that this hunk represents. + pub range_in_blamed_file: Range, + /// Maps a commit to the range in a source file (i.e. *Blamed File* at a revision) that is equal to `range_in_blamed_file`. + pub suspects: BTreeMap>, +} + +#[derive(Debug)] +pub(crate) enum Either { + Left(T), + Right(U), +} + +/// A single change between two blobs, or an unchanged region. +#[derive(Debug, PartialEq)] +pub enum Change { + /// A range of tokens that wasn't changed. + Unchanged(Range), + /// `(added_line_range, num_deleted_in_before)` + AddedOrReplaced(Range, u32), + /// `(line_to_start_deletion_at, num_deleted_in_before)` + Deleted(u32, u32), +} diff --git a/gix-blame/tests/blame.rs b/gix-blame/tests/blame.rs index 4cb22417ece..258a3457c4a 100644 --- a/gix-blame/tests/blame.rs +++ b/gix-blame/tests/blame.rs @@ -1,4 +1,271 @@ +use std::path::PathBuf; + +use gix_hash::ObjectId; +use gix_object::bstr; + +struct Baseline<'a> { + lines: bstr::Lines<'a>, +} + +mod baseline { + use std::path::Path; + + use gix_blame::BlameEntry; + use gix_hash::ObjectId; + use gix_ref::bstr::ByteSlice; + + use super::Baseline; + + // These fields are used by `git` in its porcelain output. + const HEADER_FIELDS: [&str; 12] = [ + // https://github.com/git/git/blob/6258f68c3c1092c901337895c864073dcdea9213/builtin/blame.c#L256-L280 + "author", + "author-mail", + "author-time", + "author-tz", + "committer", + "committer-mail", + "committer-time", + "committer-tz", + "summary", + "boundary", + // https://github.com/git/git/blob/6258f68c3c1092c901337895c864073dcdea9213/builtin/blame.c#L239-L248 + "previous", + "filename", + ]; + + fn is_known_header_field(field: &&str) -> bool { + HEADER_FIELDS.contains(field) + } + + impl Baseline<'_> { + pub fn collect(baseline_path: impl AsRef) -> std::io::Result> { + let content = std::fs::read(baseline_path)?; + + Ok(Baseline { lines: content.lines() }.collect()) + } + } + + impl Iterator for Baseline<'_> { + type Item = BlameEntry; + + fn next(&mut self) -> Option { + let mut ranges = None; + let mut commit_id = gix_hash::Kind::Sha1.null(); + let mut skip_lines: u32 = 0; + + for line in self.lines.by_ref() { + if line.starts_with(b"\t") { + // Each group consists of a header and one or more lines. We break from the + // loop, thus returning a `BlameEntry` from `next` once we have seen the number + // of lines starting with "\t" as indicated in the group’s header. + skip_lines -= 1; + + if skip_lines == 0 { + break; + } else { + continue; + } + } + + let fields: Vec<&str> = line.to_str().unwrap().split(' ').collect(); + if fields.len() == 4 { + // We’re possibly dealing with a group header. + // If we can’t parse the first field as an `ObjectId`, we know this is not a + // group header, so we continue. This can yield false positives, but for + // testing purposes, we don’t bother. + commit_id = match ObjectId::from_hex(fields[0].as_bytes()) { + Ok(id) => id, + Err(_) => continue, + }; + + let line_number_in_source_file = fields[1].parse::().unwrap(); + let line_number_in_final_file = fields[2].parse::().unwrap(); + // The last field indicates the number of lines this group contains info for + // (this is not equal to the number of lines in git blame’s porcelain output). + let number_of_lines_in_group = fields[3].parse::().unwrap(); + + skip_lines = number_of_lines_in_group; + + let source_range = + (line_number_in_source_file - 1)..(line_number_in_source_file + number_of_lines_in_group - 1); + let blame_range = + (line_number_in_final_file - 1)..(line_number_in_final_file + number_of_lines_in_group - 1); + assert!(ranges.is_none(), "should not overwrite existing ranges"); + ranges = Some((blame_range, source_range)); + } else if !is_known_header_field(&fields[0]) && ObjectId::from_hex(fields[0].as_bytes()).is_err() { + panic!("unexpected line: '{:?}'", line.as_bstr()); + } + } + + let Some((range_in_blamed_file, range_in_source_file)) = ranges else { + // No new lines were parsed, so we assume the iterator is finished. + return None; + }; + Some(BlameEntry::new(range_in_blamed_file, range_in_source_file, commit_id)) + } + } +} + +struct Fixture { + odb: gix_odb::Handle, + resource_cache: gix_diff::blob::Platform, + commits: Vec>, +} + +impl Fixture { + fn new() -> gix_testtools::Result { + Self::for_worktree_path(fixture_path()) + } + + fn for_worktree_path(worktree_path: PathBuf) -> gix_testtools::Result { + use gix_ref::store::WriteReflog; + + let store = gix_ref::file::Store::at( + worktree_path.join(".git"), + gix_ref::store::init::Options { + write_reflog: WriteReflog::Disable, + ..Default::default() + }, + ); + let odb = gix_odb::at(worktree_path.join(".git/objects"))?; + + let mut reference = gix_ref::file::Store::find(&store, "HEAD")?; + + // Needed for `peel_to_id_in_place`. + use gix_ref::file::ReferenceExt; + + let head_id = reference.peel_to_id_in_place(&store, &odb)?; + + let commits: Vec<_> = gix_traverse::commit::topo::Builder::from_iters(&odb, [head_id], None::>) + .build()? + .collect(); + + let git_dir = worktree_path.join(".git"); + let index = gix_index::File::at(git_dir.join("index"), gix_hash::Kind::Sha1, false, Default::default())?; + let stack = gix_worktree::Stack::from_state_and_ignore_case( + worktree_path.clone(), + false, + gix_worktree::stack::State::AttributesAndIgnoreStack { + attributes: Default::default(), + ignore: Default::default(), + }, + &index, + index.path_backing(), + ); + let capabilities = gix_fs::Capabilities::probe(&git_dir); + let resource_cache = gix_diff::blob::Platform::new( + Default::default(), + gix_diff::blob::Pipeline::new( + gix_diff::blob::pipeline::WorktreeRoots { + old_root: None, + new_root: None, + }, + gix_filter::Pipeline::new(Default::default(), Default::default()), + vec![], + gix_diff::blob::pipeline::Options { + large_file_threshold_bytes: 0, + fs: capabilities, + }, + ), + gix_diff::blob::pipeline::Mode::ToGit, + stack, + ); + Ok(Fixture { + odb, + resource_cache, + commits, + }) + } +} + +macro_rules! mktest { + ($name:ident, $case:expr, $number_of_lines:literal) => { + #[test] + fn $name() -> gix_testtools::Result<()> { + let Fixture { + odb, + mut resource_cache, + commits, + } = Fixture::new()?; + + let lines_blamed = gix_blame::file( + &odb, + commits, + &mut resource_cache, + format!("{}.txt", $case).as_str().into(), + )? + .entries; + + assert_eq!(lines_blamed.len(), $number_of_lines); + + let git_dir = fixture_path().join(".git"); + let baseline = Baseline::collect(git_dir.join(format!("{}.baseline", $case)))?; + + assert_eq!(baseline.len(), $number_of_lines); + assert_eq!(lines_blamed, baseline); + Ok(()) + } + }; +} + +mktest!(simple_case, "simple", 4); +mktest!(multiline_hunks, "multiline-hunks", 3); +mktest!(deleted_lines, "deleted-lines", 1); +mktest!(deleted_lines_multiple_hunks, "deleted-lines-multiple-hunks", 2); +mktest!(changed_lines, "changed-lines", 1); +mktest!( + changed_line_between_unchanged_lines, + "changed-line-between-unchanged-lines", + 3 +); +mktest!(added_lines, "added-lines", 2); +mktest!(added_lines_around, "added-lines-around", 3); +mktest!(switched_lines, "switched-lines", 4); +mktest!(added_line_before_changed_line, "added-line-before-changed-line", 3); +mktest!(same_line_changed_twice, "same-line-changed-twice", 2); +mktest!(coalesce_adjacent_hunks, "coalesce-adjacent-hunks", 1); + +mktest!(resolved_conflict, "resolved-conflict", 2); +mktest!(file_in_one_chain_of_ancestors, "file-in-one-chain-of-ancestors", 1); +mktest!( + different_file_in_another_chain_of_ancestors, + "different-file-in-another-chain-of-ancestors", + 1 +); +mktest!(file_only_changed_in_branch, "file-only-changed-in-branch", 2); + +/// As of 2024-09-24, these tests are expected to fail. +/// +/// Context: https://github.com/Byron/gitoxide/pull/1453#issuecomment-2371013904 #[test] -fn it_works() { - let _worktree = gix_testtools::scripted_fixture_read_only("make_blame_repo.sh").unwrap(); +#[should_panic = "empty-lines-myers"] +fn diff_disparity() { + for case in ["empty-lines-myers", "empty-lines-histogram"] { + let Fixture { + odb, + mut resource_cache, + commits, + } = Fixture::new().unwrap(); + + let lines_blamed = gix_blame::file( + &odb, + commits, + &mut resource_cache, + format!("{case}.txt").as_str().into(), + ) + .unwrap() + .entries; + + assert_eq!(lines_blamed.len(), 5); + + let git_dir = fixture_path().join(".git"); + let baseline = Baseline::collect(git_dir.join(format!("{case}.baseline"))).unwrap(); + + assert_eq!(lines_blamed, baseline, "{case}"); + } +} + +fn fixture_path() -> PathBuf { + gix_testtools::scripted_fixture_read_only("make_blame_repo.sh").unwrap() } diff --git a/gix-blame/tests/fixtures/make_blame_repo.sh b/gix-blame/tests/fixtures/make_blame_repo.sh index 279cb3fe9d5..31e30c42e4d 100755 --- a/gix-blame/tests/fixtures/make_blame_repo.sh +++ b/gix-blame/tests/fixtures/make_blame_repo.sh @@ -1,23 +1,206 @@ #!/usr/bin/env bash set -eu -o pipefail - git init -q +git config --local diff.algorithm histogram + git config merge.ff false git checkout -q -b main -git commit -q --allow-empty -m c1 -git tag at-c1 -git commit -q --allow-empty -m c2 -git commit -q --allow-empty -m c3 -git commit -q --allow-empty -m c4 - -git checkout -q -b branch1 -git commit -q --allow-empty -m b1c1 -git tag at-b1c1 -git commit -q --allow-empty -m b1c2 - -git checkout -q main -git commit -q --allow-empty -m c5 -git tag at-c5 -git merge branch1 -m m1b1 + +echo "line 1" >> simple.txt +git add simple.txt +git commit -q -m c1 + +echo -e "line 1\nline 2\nline 3" >> multiline-hunks.txt +git add multiline-hunks.txt +git commit -q -m c1.1 + +echo -e "line 1\nline 2" > changed-lines.txt +echo -e "line 1\nline 2\nline 3\nline 4\nline 5\nline 6" >> changed-line-between-unchanged-lines.txt +git add changed-lines.txt +git add changed-line-between-unchanged-lines.txt +git commit -q -m c1.2 + +echo "line 2" >> added-lines.txt +echo "line 2" >> added-lines-around.txt +echo -e "line 1\nline 2" > coalesce-adjacent-hunks.txt +git add added-lines.txt +git add added-lines-around.txt +git add coalesce-adjacent-hunks.txt +git commit -q -m c1.3 + +echo "line 2" >> simple.txt +git add simple.txt +git commit -q -m c2 + +echo -e "line 4\nline 5\nline 6" >> multiline-hunks.txt +git add multiline-hunks.txt +git commit -q -m c2.1 + +echo -e "line 1\nline 2\nline 3\nline 4\nline 5\nline 6" >> deleted-lines.txt +echo -e "line 1\nline 2\nline 3\nline 4\nline 5\nline 6" >> deleted-lines-multiple-hunks.txt +git add deleted-lines.txt +git add deleted-lines-multiple-hunks.txt +git commit -q -m c2.2 + +echo -e "line 1\nline 2\nline 3" > added-line-before-changed-line.txt +git add added-line-before-changed-line.txt +git commit -q -m c2.3 + +echo -e "line 1\nline 2" > same-line-changed-twice.txt +echo -e "line 1\nline in between\nline 2" > coalesce-adjacent-hunks.txt +git add same-line-changed-twice.txt +git add coalesce-adjacent-hunks.txt +git commit -q -m c2.4 + +echo "line 3" >> simple.txt +git add simple.txt +git commit -q -m c3 + +echo -e "line 3\nline 4" > deleted-lines.txt +echo -e "line 2\nline 4" > deleted-lines-multiple-hunks.txt +git add deleted-lines.txt +git add deleted-lines-multiple-hunks.txt +git commit -q -m c3.1 + +echo -e "line 3\nline 4" > changed-lines.txt +echo -e "line 1\nline 2\nline 3 changed\nline 4\nline 5\nline 6" > changed-line-between-unchanged-lines.txt +git add changed-lines.txt +git add changed-line-between-unchanged-lines.txt +git commit -q -m c3.2 + +echo -e "line 2\nline 3" > added-line-before-changed-line.txt +echo -e "line 1\nline 2" > coalesce-adjacent-hunks.txt +git add added-line-before-changed-line.txt +git add coalesce-adjacent-hunks.txt +git commit -q -m c3.3 + +echo -e "line 1\nline 2 changed" > same-line-changed-twice.txt +git add same-line-changed-twice.txt +git commit -q -m c3.4 + +echo "line 4" >> simple.txt +git add simple.txt +git commit -q -m c4 + +echo -e "line 7\nline 8\nline 9" >> multiline-hunks.txt +git add multiline-hunks.txt +git commit -q -m c4.1 + +echo -e "line 1\nline 3\nline 2\nline 4" > switched-lines.txt +git add switched-lines.txt +git commit -q -m c4.2 + +echo -e "line 2 changed\nline 3" > added-line-before-changed-line.txt +git add added-line-before-changed-line.txt +git commit -q -m c4.3 + +echo -e "line 1\nline 2 changed a second time" > same-line-changed-twice.txt +git add same-line-changed-twice.txt +git commit -q -m c4.4 + +echo -e " line 1\n\n line 2\n\n line 3" > empty-lines-histogram.txt +cp empty-lines-histogram.txt empty-lines-myers.txt +git add empty-lines-histogram.txt empty-lines-myers.txt +git commit -q -m c4.5 + +echo -e "line 0\nline 1\nline 2" > added-lines.txt +echo -e "line 0\nline 1\nline 2\nline 3" > added-lines-around.txt +git add added-lines.txt +git add added-lines-around.txt +git commit -q -m c5 + +echo -e "line 4" > deleted-lines.txt +git add deleted-lines.txt +git commit -q -m c5.1 + +echo -e "line 1\nline 2\nline 3\nline 4" > switched-lines.txt +git add switched-lines.txt +git commit -q -m c5.2 + +echo -e "line 1\nline 2 changed\nline 3" > added-line-before-changed-line.txt +git add added-line-before-changed-line.txt +git commit -q -m c5.3 + +echo -e " line 1\n\n line in between\n\n line 2\n\n line in between\n\n line 3" > empty-lines-histogram.txt +cp empty-lines-histogram.txt empty-lines-myers.txt +git add empty-lines-histogram.txt empty-lines-myers.txt +git commit -q -m c5.4 + +# The commit history created by the commits above this line is linear, it only +# contains commits that have exactly one parent. +# Below this line, there’s also commits that have more than one parent. + +echo -e "line 1 original\nline 2\n line 3" > resolved-conflict.txt +git add resolved-conflict.txt +git commit -q -m c6 + +echo -e "line 1 changed\nline 2\n line 3" > resolved-conflict.txt +git add resolved-conflict.txt +git commit -q -m c7 + +git checkout -b different-branch-to-create-a-conflict +git reset --hard HEAD~1 + +echo -e "line 1 changed in a different way\nline 2\n line 3" > resolved-conflict.txt +git add resolved-conflict.txt +git commit -q -m c8 + +git checkout main +git merge different-branch-to-create-a-conflict || true + +echo -e "line 1 conflict resolved\nline 2\n line 3" > resolved-conflict.txt +git add resolved-conflict.txt +git commit -q -m c9 + +echo -e "line 1\nline 2\n line 3" > file-in-one-chain-of-ancestors.txt +git add file-in-one-chain-of-ancestors.txt +git commit -q -m c10 + +git checkout -b different-branch-that-does-not-contain-file +git reset --hard HEAD~1 + +echo -e "line 4\nline 5\n line 6" > different-file-in-another-chain-of-ancestors.txt +git add different-file-in-another-chain-of-ancestors.txt +git commit -q -m c11 + +git checkout main +git merge different-branch-that-does-not-contain-file || true + +echo -e "line 1\nline 2\n line 3" > file-only-changed-in-branch.txt +git add file-only-changed-in-branch.txt +git commit -q -m c12 + +git checkout -b branch-that-has-one-commit + +echo -e "line 1 changed\nline 2\n line 3" > file-only-changed-in-branch.txt +git add file-only-changed-in-branch.txt +git commit -q -m c13 + +git checkout main +git merge branch-that-has-one-commit || true + +git blame --porcelain simple.txt > .git/simple.baseline +git blame --porcelain multiline-hunks.txt > .git/multiline-hunks.baseline +git blame --porcelain deleted-lines.txt > .git/deleted-lines.baseline +git blame --porcelain deleted-lines-multiple-hunks.txt > .git/deleted-lines-multiple-hunks.baseline +git blame --porcelain changed-lines.txt > .git/changed-lines.baseline +git blame --porcelain changed-line-between-unchanged-lines.txt > .git/changed-line-between-unchanged-lines.baseline +git blame --porcelain added-lines.txt > .git/added-lines.baseline +git blame --porcelain added-lines-around.txt > .git/added-lines-around.baseline +git blame --porcelain switched-lines.txt > .git/switched-lines.baseline +git blame --porcelain added-line-before-changed-line.txt > .git/added-line-before-changed-line.baseline +git blame --porcelain same-line-changed-twice.txt > .git/same-line-changed-twice.baseline +git blame --porcelain coalesce-adjacent-hunks.txt > .git/coalesce-adjacent-hunks.baseline + +git blame --porcelain resolved-conflict.txt > .git/resolved-conflict.baseline +git blame --porcelain file-in-one-chain-of-ancestors.txt > .git/file-in-one-chain-of-ancestors.baseline +git blame --porcelain different-file-in-another-chain-of-ancestors.txt > .git/different-file-in-another-chain-of-ancestors.baseline +git blame --porcelain file-only-changed-in-branch.txt > .git/file-only-changed-in-branch.baseline + +git blame --porcelain empty-lines-histogram.txt > .git/empty-lines-histogram.baseline + +git config --local diff.algorithm myers + +git blame --porcelain empty-lines-myers.txt > .git/empty-lines-myers.baseline diff --git a/gix/Cargo.toml b/gix/Cargo.toml index fcfc29706a3..c0578080ff9 100644 --- a/gix/Cargo.toml +++ b/gix/Cargo.toml @@ -141,6 +141,9 @@ blob-diff = ["gix-diff/blob", "attributes"] ## Add functions to specifically merge files, using the standard three-way merge that git offers. merge = ["tree-editor", "blob-diff", "dep:gix-merge", "attributes"] +## Add blame command similar to `git blame`. +blame = ["dep:gix-blame"] + ## Make it possible to turn a tree into a stream of bytes, which can be decoded to entries and turned into various other formats. worktree-stream = ["gix-worktree-stream", "attributes"] @@ -371,6 +374,7 @@ gix-command = { version = "^0.4.0", path = "../gix-command", optional = true } gix-worktree-stream = { version = "^0.18.0", path = "../gix-worktree-stream", optional = true } gix-archive = { version = "^0.18.0", path = "../gix-archive", default-features = false, optional = true } +gix-blame = { version= "^0.0.0", path ="../gix-blame", optional = true } # For communication with remotes gix-protocol = { version = "^0.47.0", path = "../gix-protocol" } diff --git a/gix/src/lib.rs b/gix/src/lib.rs index 906db6bb3e8..6c8d06f91dd 100644 --- a/gix/src/lib.rs +++ b/gix/src/lib.rs @@ -95,6 +95,8 @@ pub use gix_actor as actor; #[cfg(feature = "attributes")] pub use gix_attributes as attrs; +#[cfg(feature = "blame")] +pub use gix_blame as blame; #[cfg(feature = "command")] pub use gix_command as command; pub use gix_commitgraph as commitgraph; diff --git a/src/plumbing/main.rs b/src/plumbing/main.rs index 2391dd14cd3..625f9733268 100644 --- a/src/plumbing/main.rs +++ b/src/plumbing/main.rs @@ -1533,6 +1533,17 @@ pub fn main() -> Result<()> { }, ), }, + Subcommands::Blame { statistics, file } => prepare_and_run( + "blame", + trace, + verbose, + progress, + progress_keep_open, + None, + move |_progress, out, err| { + core::repository::blame::blame_file(repository(Mode::Lenient)?, &file, out, statistics.then_some(err)) + }, + ), Subcommands::Completions { shell, out_dir } => { let mut app = Args::command(); diff --git a/src/plumbing/options/mod.rs b/src/plumbing/options/mod.rs index b0928c0d426..a1f37b08e13 100644 --- a/src/plumbing/options/mod.rs +++ b/src/plumbing/options/mod.rs @@ -151,6 +151,14 @@ pub enum Subcommands { /// Subcommands that need no git repository to run. #[clap(subcommand)] Free(free::Subcommands), + /// Blame lines in a file + Blame { + /// Print additional statistics to help understanding performance. + #[clap(long, short = 's')] + statistics: bool, + /// The file to create the blame information for. + file: std::ffi::OsString, + }, /// Generate shell completions to stdout or a directory. #[clap(visible_alias = "generate-completions", visible_alias = "shell-completions")] Completions {