From d27adf70b4e2f57d8431a0a553119322d7158f4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20R=C3=BC=C3=9Fler?= Date: Thu, 19 Dec 2024 20:11:11 +0100 Subject: [PATCH 01/16] Add initial implementation and tests for `gix-blame`. Note that it's still very early, and this is more of a proof-of-concept that may still have some shortcomings compared to `git blame`. --- Cargo.lock | 10 + gix-blame/Cargo.toml | 12 +- gix-blame/src/lib.rs | 978 +++++++++++- gix-blame/tests/blame.rs | 1576 ++++++++++++++++++- gix-blame/tests/fixtures/make_blame_repo.sh | 215 ++- 5 files changed, 2767 insertions(+), 24 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 28796ddce5a..f09c7f8a822 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1538,7 +1538,17 @@ dependencies = [ name = "gix-blame" version = "0.0.0" dependencies = [ + "gix-diff", + "gix-filter", + "gix-fs 0.12.0", + "gix-hash 0.15.1", + "gix-index 0.37.0", + "gix-object 0.46.0", + "gix-odb", + "gix-ref 0.49.0", "gix-testtools", + "gix-traverse 0.43.0", + "gix-worktree 0.38.0", ] [[package]] diff --git a/gix-blame/Cargo.toml b/gix-blame/Cargo.toml index de8b8fa22b9..747f2bf17b6 100644 --- a/gix-blame/Cargo.toml +++ b/gix-blame/Cargo.toml @@ -5,7 +5,7 @@ name = "gix-blame" version = "0.0.0" repository = "https://github.com/GitoxideLabs/gitoxide" license = "MIT OR Apache-2.0" -description = "A crate of the gitoxide project dedicated implementing a 'blame' algorithm" +description = "A crate of the gitoxide project dedicated to implementing a 'blame' algorithm" authors = ["Christoph Rüßler ", "Sebastian Thiel "] edition = "2021" rust-version = "1.65" @@ -14,6 +14,16 @@ rust-version = "1.65" doctest = false [dependencies] +gix-diff = { version = "^0.48.0", path = "../gix-diff", default-features = false, features = ["blob"] } +gix-object = { version = "^0.46.0", path = "../gix-object" } +gix-hash = { version = "^0.15.0", path = "../gix-hash" } +gix-worktree = { version = "^0.38.0", path = "../gix-worktree", default-features = false, features = ["attributes"] } +gix-traverse = { version = "^0.43.0", path = "../gix-traverse" } [dev-dependencies] +gix-ref = { version = "^0.49.0", path = "../gix-ref" } +gix-filter = { version = "^0.15.0", path = "../gix-filter" } +gix-fs = { version = "^0.12.0", path = "../gix-fs" } +gix-index = { version = "^0.37.0", path = "../gix-index" } +gix-odb = { version = "^0.65.0", path = "../gix-odb" } gix-testtools = { path = "../tests/tools" } diff --git a/gix-blame/src/lib.rs b/gix-blame/src/lib.rs index d13db17bbbb..25970cf05bf 100644 --- a/gix-blame/src/lib.rs +++ b/gix-blame/src/lib.rs @@ -2,9 +2,977 @@ #![deny(rust_2018_idioms)] #![forbid(unsafe_code)] -#[cfg(test)] -mod tests { - #[test] - #[ignore] - fn it_works() {} +use std::{ + collections::BTreeMap, + ops::{Add, AddAssign, Range, SubAssign}, + path::PathBuf, +}; + +use gix_hash::ObjectId; +use gix_object::bstr::BStr; +use gix_object::FindExt; + +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum Offset { + Added(u32), + Deleted(u32), +} + +impl Add for Offset { + type Output = Offset; + + fn add(self, rhs: u32) -> Self::Output { + let Self::Added(added) = self else { todo!() }; + + Self::Added(added + rhs) + } +} + +impl Add for Offset { + type Output = Offset; + + fn add(self, rhs: Offset) -> Self::Output { + match (self, rhs) { + (Self::Added(added), Offset::Added(added_rhs)) => Self::Added(added + added_rhs), + (Self::Added(added), Offset::Deleted(deleted_rhs)) => { + if deleted_rhs > added { + Self::Deleted(deleted_rhs - added) + } else { + Self::Added(added - deleted_rhs) + } + } + (Self::Deleted(deleted), Offset::Added(added_rhs)) => { + if added_rhs > deleted { + Self::Added(added_rhs - deleted) + } else { + Self::Deleted(deleted - added_rhs) + } + } + (Self::Deleted(deleted), Offset::Deleted(deleted_rhs)) => Self::Deleted(deleted + deleted_rhs), + } + } +} + +impl AddAssign for Offset { + fn add_assign(&mut self, rhs: u32) { + match self { + Self::Added(added) => *self = Self::Added(*added + rhs), + Self::Deleted(deleted) => { + if rhs > *deleted { + *self = Self::Added(rhs - *deleted); + } else { + *self = Self::Deleted(*deleted - rhs); + } + } + } + } +} + +impl SubAssign for Offset { + fn sub_assign(&mut self, rhs: u32) { + match self { + Self::Added(added) => { + if rhs > *added { + *self = Self::Deleted(rhs - *added); + } else { + *self = Self::Added(*added - rhs); + } + } + Self::Deleted(deleted) => *self = Self::Deleted(*deleted + rhs), + } + } +} + +#[derive(Debug, PartialEq)] +pub struct BlameEntry { + pub range_in_blamed_file: Range, + pub range_in_original_file: Range, + pub commit_id: ObjectId, +} + +impl BlameEntry { + pub fn new(range_in_blamed_file: Range, range_in_original_file: Range, commit_id: ObjectId) -> Self { + assert!( + range_in_blamed_file.end > range_in_blamed_file.start, + "{range_in_blamed_file:?}" + ); + assert!( + range_in_original_file.end > range_in_original_file.start, + "{range_in_original_file:?}" + ); + + Self { + range_in_blamed_file: range_in_blamed_file.clone(), + range_in_original_file: range_in_original_file.clone(), + commit_id, + } + } + + fn with_offset(range_in_original_file: Range, commit_id: ObjectId, offset: Offset) -> Self { + assert!( + range_in_original_file.end > range_in_original_file.start, + "{range_in_original_file:?}" + ); + + match offset { + Offset::Added(added) => Self { + range_in_blamed_file: (range_in_original_file.start + added)..(range_in_original_file.end + added), + range_in_original_file, + commit_id, + }, + Offset::Deleted(deleted) => { + assert!( + range_in_original_file.start >= deleted, + "{range_in_original_file:?} {offset:?}" + ); + + Self { + range_in_blamed_file: (range_in_original_file.start - deleted) + ..(range_in_original_file.end - deleted), + range_in_original_file, + commit_id, + } + } + } + } + + fn from_unblamed_hunk(unblamed_hunk: &UnblamedHunk, commit_id: ObjectId) -> Self { + let range_in_original_file = unblamed_hunk.suspects.get(&commit_id).expect("TODO"); + + Self { + range_in_blamed_file: unblamed_hunk.range_in_blamed_file.clone(), + range_in_original_file: range_in_original_file.clone(), + commit_id, + } + } +} + +trait LineRange { + fn shift_by(&self, offset: Offset) -> Self; +} + +impl LineRange for Range { + fn shift_by(&self, offset: Offset) -> Self { + match offset { + Offset::Added(added) => { + assert!(self.start >= added, "{self:?} {offset:?}"); + + Self { + start: self.start - added, + end: self.end - added, + } + } + Offset::Deleted(deleted) => Self { + start: self.start + deleted, + end: self.end + deleted, + }, + } + } +} + +#[derive(Clone, Debug, PartialEq)] +pub struct UnblamedHunk { + pub range_in_blamed_file: Range, + pub suspects: BTreeMap>, +} + +#[derive(Debug)] +enum Either { + Left(T), + Right(U), +} + +impl UnblamedHunk { + pub fn new(range_in_blamed_file: Range, suspect: ObjectId, offset: Offset) -> Self { + assert!( + range_in_blamed_file.end > range_in_blamed_file.start, + "{range_in_blamed_file:?}" + ); + + let range_in_destination = range_in_blamed_file.shift_by(offset); + + Self { + range_in_blamed_file, + suspects: [(suspect, range_in_destination)].into(), + } + } + + fn shift_by(mut self, suspect: ObjectId, offset: Offset) -> Self { + self.suspects.entry(suspect).and_modify(|e| *e = e.shift_by(offset)); + + self + } + + fn split_at(self, suspect: ObjectId, line_number_in_destination: u32) -> Either { + match self.suspects.get(&suspect) { + None => Either::Left(self), + Some(range_in_suspect) => { + if line_number_in_destination > range_in_suspect.start + && line_number_in_destination < range_in_suspect.end + { + let split_at_from_start = line_number_in_destination - range_in_suspect.start; + + if split_at_from_start > 0 { + let new_suspects_before = self + .suspects + .iter() + .map(|(suspect, range)| (*suspect, range.start..(range.start + split_at_from_start))) + .collect(); + + let new_suspects_after = self + .suspects + .iter() + .map(|(suspect, range)| (*suspect, (range.start + split_at_from_start)..range.end)) + .collect(); + + let new_hunk_before = Self { + range_in_blamed_file: self.range_in_blamed_file.start + ..(self.range_in_blamed_file.start + split_at_from_start), + suspects: new_suspects_before, + }; + let new_hunk_after = Self { + range_in_blamed_file: (self.range_in_blamed_file.start + split_at_from_start) + ..(self.range_in_blamed_file.end), + suspects: new_suspects_after, + }; + + Either::Right((new_hunk_before, new_hunk_after)) + } else { + Either::Left(self) + } + } else { + Either::Left(self) + } + } + } + } + + fn offset_for(&self, suspect: ObjectId) -> Offset { + let range_in_suspect = self.suspects.get(&suspect).expect("TODO"); + + if self.range_in_blamed_file.start > range_in_suspect.start { + Offset::Added(self.range_in_blamed_file.start - range_in_suspect.start) + } else { + Offset::Deleted(range_in_suspect.start - self.range_in_blamed_file.start) + } + } + + fn pass_blame(&mut self, from: ObjectId, to: ObjectId) { + if let Some(range_in_suspect) = self.suspects.remove(&from) { + self.suspects.insert(to, range_in_suspect); + } + } + + fn clone_blame(&mut self, from: ObjectId, to: ObjectId) { + if let Some(range_in_suspect) = self.suspects.get(&from) { + self.suspects.insert(to, range_in_suspect.clone()); + } + } + + fn remove_blame(&mut self, suspect: ObjectId) { + let _ = self.suspects.remove(&suspect); + } +} + +#[derive(Clone, Debug, PartialEq)] +pub enum Change { + Unchanged(Range), + Added(Range, u32), + Deleted(u32, u32), +} + +struct ChangeRecorder { + previous_after_end: u32, + changes: Vec, + total_number_of_lines: u32, +} + +impl ChangeRecorder { + fn new(total_number_of_lines: u32) -> Self { + ChangeRecorder { + previous_after_end: 0, + changes: vec![], + total_number_of_lines, + } + } +} + +impl gix_diff::blob::Sink for ChangeRecorder { + type Out = Vec; + + // “imara-diff will compute a line diff by default”, so each `start` and `end` represents a + // line in a file. + fn process_change(&mut self, before: Range, after: Range) { + // This checks for unchanged hunks. + // + // https://docs.rs/imara-diff/latest/imara_diff/sink/trait.Sink.html#notes + if after.start > self.previous_after_end { + self.changes + .push(Change::Unchanged(self.previous_after_end..after.start)); + } + + match (before.end > before.start, after.end > after.start) { + (_, true) => { + self.changes + .push(Change::Added(after.start..after.end, before.end - before.start)); + } + (true, false) => { + self.changes + .push(Change::Deleted(after.start, before.end - before.start)); + } + (false, false) => unimplemented!(), + } + + self.previous_after_end = after.end; + } + + fn finish(mut self) -> Self::Out { + if self.total_number_of_lines > self.previous_after_end { + self.changes + .push(Change::Unchanged(self.previous_after_end..self.total_number_of_lines)); + } + + self.changes + } +} + +pub fn process_change( + out: &mut Vec, + new_hunks_to_blame: &mut Vec, + offset_in_destination: &mut Offset, + suspect: ObjectId, + hunk: Option, + change: Option, +) -> (Option, Option) { + match (hunk, change) { + (Some(hunk), Some(Change::Unchanged(unchanged))) => { + let Some(range_in_suspect) = hunk.suspects.get(&suspect) else { + new_hunks_to_blame.push(hunk); + + return (None, Some(Change::Unchanged(unchanged))); + }; + + match ( + // Since `unchanged` is a range that is not inclusive at the end, + // `unchanged.end` is not part of `unchanged`. The first line that is + // `unchanged.end - 1`. + range_in_suspect.contains(&unchanged.start), + (unchanged.end - 1) >= range_in_suspect.start && unchanged.end <= range_in_suspect.end, + ) { + (_, true) => { + // <------> (hunk) + // <-------> (unchanged) + // + // <----------> (hunk) + // <---> (unchanged) + + (Some(hunk), None) + } + (true, false) => { + // <--------> (hunk) + // <-------> (unchanged) + + new_hunks_to_blame.push(hunk.shift_by(suspect, *offset_in_destination)); + + (None, Some(Change::Unchanged(unchanged))) + } + (false, false) => { + // Any of the following cases are handled by this branch: + // <---> (hunk) + // <----------> (unchanged) + // + // <----> (hunk) + // <--> (unchanged) + // + // <--> (hunk) + // <----> (unchanged) + + if unchanged.end <= range_in_suspect.start { + // <----> (hunk) + // <--> (unchanged) + + (Some(hunk.clone()), None) + } else { + // <--> (hunk) + // <----> (unchanged) + // + // <---> (hunk) + // <----------> (unchanged) + + new_hunks_to_blame.push(hunk.shift_by(suspect, *offset_in_destination)); + + (None, Some(Change::Unchanged(unchanged.clone()))) + } + } + } + } + (Some(hunk), Some(Change::Added(added, number_of_lines_deleted))) => { + let Some(range_in_suspect) = hunk.suspects.get(&suspect) else { + new_hunks_to_blame.push(hunk); + + return (None, Some(Change::Added(added, number_of_lines_deleted))); + }; + + let range_in_suspect = range_in_suspect.clone(); + + match ( + range_in_suspect.contains(&added.start), + // Since `added` is a range that is not inclusive at the end, `added.end` is + // not part of `added`. The first line that is is `added.end - 1`. + (added.end - 1) >= range_in_suspect.start && added.end <= range_in_suspect.end, + ) { + (true, true) => { + // <----------> (hunk) + // <---> (added) + // <---> (blamed) + // <--> <-> (new hunk) + + let new_hunk = match hunk.split_at(suspect, added.start) { + Either::Left(hunk) => hunk, + Either::Right((before, after)) => { + new_hunks_to_blame.push(before.shift_by(suspect, *offset_in_destination)); + + after + } + }; + + *offset_in_destination += added.end - added.start; + *offset_in_destination -= number_of_lines_deleted; + + out.push(BlameEntry::with_offset( + added.clone(), + suspect, + new_hunk.offset_for(suspect), + )); + + match new_hunk.split_at(suspect, added.end) { + Either::Left(_) => (None, None), + Either::Right((_, after)) => (Some(after), None), + } + } + (true, false) => { + // <--------> (hunk) + // <-------> (added) + // <----> (blamed) + // <--> (new hunk) + + let new_hunk = match hunk.split_at(suspect, added.start) { + Either::Left(hunk) => hunk, + Either::Right((before, after)) => { + new_hunks_to_blame.push(before.shift_by(suspect, *offset_in_destination)); + + after + } + }; + + out.push(BlameEntry::with_offset( + added.start..range_in_suspect.end, + suspect, + new_hunk.offset_for(suspect), + )); + + if added.end > range_in_suspect.end { + (None, Some(Change::Added(added, number_of_lines_deleted))) + } else { + todo!(); + } + } + (false, true) => { + // <-------> (hunk) + // <------> (added) + // <---> (blamed) + // <--> (new hunk) + + out.push(BlameEntry::with_offset( + range_in_suspect.start..added.end, + suspect, + hunk.offset_for(suspect), + )); + + *offset_in_destination += added.end - added.start; + *offset_in_destination -= number_of_lines_deleted; + + match hunk.split_at(suspect, added.end) { + Either::Left(_) => (None, None), + Either::Right((_, after)) => (Some(after), None), + } + } + (false, false) => { + // Any of the following cases are handled by this branch: + // <---> (hunk) + // <----------> (added) + // + // <----> (hunk) + // <--> (added) + // + // <--> (hunk) + // <----> (added) + + if added.end <= range_in_suspect.start { + // <----> (hunk) + // <--> (added) + + *offset_in_destination += added.end - added.start; + *offset_in_destination -= number_of_lines_deleted; + + (Some(hunk.clone()), None) + } else if range_in_suspect.end <= added.start { + // <--> (hunk) + // <----> (added) + + new_hunks_to_blame.push(hunk.shift_by(suspect, *offset_in_destination)); + + (None, Some(Change::Added(added.clone(), number_of_lines_deleted))) + } else { + // <---> (hunk) + // <----------> (added) + // <---> (blamed) + + out.push(BlameEntry::with_offset( + range_in_suspect.clone(), + suspect, + hunk.offset_for(suspect), + )); + + (None, Some(Change::Added(added.clone(), number_of_lines_deleted))) + } + } + } + } + (Some(hunk), Some(Change::Deleted(line_number_in_destination, number_of_lines_deleted))) => { + let range_in_suspect = hunk.suspects.get(&suspect).expect("TODO"); + + if line_number_in_destination < range_in_suspect.start { + // <---> (hunk) + // | (line_number_in_destination) + + *offset_in_destination -= number_of_lines_deleted; + + (Some(hunk), None) + } else if line_number_in_destination < range_in_suspect.end { + // <-----> (hunk) + // | (line_number_in_destination) + + let new_hunk = match hunk.split_at(suspect, line_number_in_destination) { + Either::Left(hunk) => hunk, + Either::Right((before, after)) => { + new_hunks_to_blame.push(before.shift_by(suspect, *offset_in_destination)); + + after + } + }; + + *offset_in_destination -= number_of_lines_deleted; + + (Some(new_hunk), None) + } else { + // <---> (hunk) + // | (line_number_in_destination) + + new_hunks_to_blame.push(hunk.shift_by(suspect, *offset_in_destination)); + + ( + None, + Some(Change::Deleted(line_number_in_destination, number_of_lines_deleted)), + ) + } + } + (Some(hunk), None) => { + new_hunks_to_blame.push(hunk.shift_by(suspect, *offset_in_destination)); + + (None, None) + } + (None, Some(Change::Unchanged(_))) => (None, None), + (None, Some(Change::Added(added, number_of_lines_deleted))) => { + *offset_in_destination += added.end - added.start; + *offset_in_destination -= number_of_lines_deleted; + + (None, None) + } + (None, Some(Change::Deleted(_, number_of_lines_deleted))) => { + *offset_in_destination -= number_of_lines_deleted; + + (None, None) + } + (None, None) => (None, None), + } +} + +pub fn process_changes( + out: &mut Vec, + hunks_to_blame: &[UnblamedHunk], + changes: &[Change], + suspect: ObjectId, +) -> Vec { + let mut hunks_iter = hunks_to_blame.iter().cloned(); + let mut changes_iter = changes.iter().cloned(); + + let mut hunk: Option = hunks_iter.next(); + let mut change: Option = changes_iter.next(); + + let mut new_hunks_to_blame: Vec = vec![]; + let mut offset_in_destination: Offset = Offset::Added(0); + + loop { + (hunk, change) = process_change( + out, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + hunk, + change, + ); + + hunk = hunk.or_else(|| hunks_iter.next()); + change = change.or_else(|| changes_iter.next()); + + if hunk.is_none() && change.is_none() { + break; + } + } + + new_hunks_to_blame +} + +fn get_changes_for_file_path( + odb: impl gix_object::Find + gix_object::FindHeader, + file_path: &BStr, + id: ObjectId, + parent_id: ObjectId, +) -> Vec { + let mut buffer = Vec::new(); + + let parent = odb.find_commit(&parent_id, &mut buffer).unwrap(); + + let mut buffer = Vec::new(); + let parent_tree_iter = odb + .find(&parent.tree(), &mut buffer) + .unwrap() + .try_into_tree_iter() + .unwrap(); + + let mut buffer = Vec::new(); + let commit = odb.find_commit(&id, &mut buffer).unwrap(); + + let mut buffer = Vec::new(); + let tree_iter = odb + .find(&commit.tree(), &mut buffer) + .unwrap() + .try_into_tree_iter() + .unwrap(); + + let mut recorder = gix_diff::tree::Recorder::default(); + gix_diff::tree( + parent_tree_iter, + tree_iter, + gix_diff::tree::State::default(), + &odb, + &mut recorder, + ) + .unwrap(); + + recorder + .records + .iter() + .filter(|change| match change { + gix_diff::tree::recorder::Change::Modification { path, .. } => path == file_path, + gix_diff::tree::recorder::Change::Addition { path, .. } => path == file_path, + gix_diff::tree::recorder::Change::Deletion { path, .. } => path == file_path, + }) + .cloned() + .collect() +} + +fn get_changes( + odb: impl gix_object::Find + gix_object::FindHeader, + resource_cache: &mut gix_diff::blob::Platform, + oid: ObjectId, + previous_oid: ObjectId, + file_path: &BStr, +) -> Vec { + resource_cache + .set_resource( + previous_oid, + gix_object::tree::EntryKind::Blob, + file_path, + gix_diff::blob::ResourceKind::OldOrSource, + &odb, + ) + .unwrap(); + resource_cache + .set_resource( + oid, + gix_object::tree::EntryKind::Blob, + file_path, + gix_diff::blob::ResourceKind::NewOrDestination, + &odb, + ) + .unwrap(); + + let outcome = resource_cache.prepare_diff().unwrap(); + let input = outcome.interned_input(); + let number_of_lines_in_destination = input.after.len(); + let change_recorder = ChangeRecorder::new(number_of_lines_in_destination.try_into().unwrap()); + + gix_diff::blob::diff(gix_diff::blob::Algorithm::Histogram, &input, change_recorder) +} + +/// This function merges adjacent blame entries. It merges entries that are adjacent both in the +/// blamed file and in the original file that introduced them. This follows `git`’s +/// behaviour. `libgit2`, as of 2024-09-19, only checks whether two entries are adjacent in the +/// blamed file which can result in different blames in certain edge cases. See [the commit][1] +/// that introduced the extra check into `git` for context. See [this commit][2] for a way to test +/// for this behaviour in `git`. +/// +/// [1]: https://github.com/git/git/commit/c2ebaa27d63bfb7c50cbbdaba90aee4efdd45d0a +/// [2]: https://github.com/git/git/commit/6dbf0c7bebd1c71c44d786ebac0f2b3f226a0131 +fn coalesce_blame_entries(lines_blamed: Vec) -> Vec { + // TODO + // It’s possible this could better be done on insertion into `lines_blamed`. + lines_blamed.into_iter().fold(vec![], |mut acc, entry| { + let previous_entry = acc.last(); + + if let Some(previous_entry) = previous_entry { + if previous_entry.commit_id == entry.commit_id + && previous_entry.range_in_blamed_file.end == entry.range_in_blamed_file.start + // As of 2024-09-19, the check below only is in `git`, but not in `libgit2`. + && previous_entry.range_in_original_file.end == entry.range_in_original_file.start + { + let coalesced_entry = BlameEntry { + range_in_blamed_file: previous_entry.range_in_blamed_file.start..entry.range_in_blamed_file.end, + range_in_original_file: previous_entry.range_in_original_file.start + ..entry.range_in_original_file.end, + commit_id: previous_entry.commit_id, + }; + + acc.pop(); + acc.push(coalesced_entry); + } else { + acc.push(entry); + } + + acc + } else { + acc.push(entry); + + acc + } + }) +} + +// TODO: do not instantiate anything, get everything passed as argument. +pub fn blame_file( + odb: impl gix_object::Find + gix_object::FindHeader, + traverse: impl IntoIterator>, + resource_cache: &mut gix_diff::blob::Platform, + suspect: ObjectId, + worktree_path: PathBuf, + file_path: &BStr, +) -> Result, E> { + // TODO + // At a high level, what we want to do is the following: + // + // - get the commit that belongs to a commit id + // - walk through parents + // - for each parent, do a diff and mark lines that don’t have a suspect (this is the term + // used in `libgit2`) yet, but that have been changed in this commit + // + // The algorithm in `libgit2` works by going through parents and keeping a linked list of blame + // suspects. It can be visualized as follows: + // + // <----------------------------------------> + // <---------------><-----------------------> + // <---><----------><-----------------------> + // <---><----------><-------><-----><-------> + // <---><---><-----><-------><-----><-------> + // <---><---><-----><-------><-----><-><-><-> + + // Needed for `to_str`. + use gix_object::bstr::ByteSlice; + + let absolute_path = worktree_path.join(file_path.to_str().unwrap()); + + // TODO Verify that `imara-diff` tokenizes lines the same way `lines` does. + let number_of_lines = std::fs::read_to_string(absolute_path).unwrap().lines().count(); + + let mut hunks_to_blame: Vec = vec![UnblamedHunk::new( + 0..number_of_lines.try_into().unwrap(), + suspect, + Offset::Added(0), + )]; + let mut out: Vec = vec![]; + + 'outer: for item in traverse { + let item = item?; + let suspect = item.id; + + let parent_ids = item.parent_ids; + if parent_ids.is_empty() { + // I’m not entirely sure if this is correct yet. `suspect`, at this point, is the `id` of + // the last `item` that was yielded by `traverse`, so it makes sense to assign the + // remaining lines to it, even though we don’t explicitly check whether that is true + // here. We could perhaps use `needed_to_obtain` to compare `suspect` against an empty + // tree to validate this assumption. + out.extend( + hunks_to_blame + .iter() + .map(|hunk| BlameEntry::from_unblamed_hunk(hunk, suspect)), + ); + + hunks_to_blame = vec![]; + + break; + } + + let mut buffer = Vec::new(); + let commit_id = odb.find_commit(&suspect, &mut buffer).unwrap().tree(); + let tree_iter = odb.find_tree_iter(&commit_id, &mut buffer).unwrap(); + + let mut entry_buffer = Vec::new(); + let Some(entry) = tree_iter + .lookup_entry_by_path(&odb, &mut entry_buffer, file_path.to_str().unwrap()) + .unwrap() + else { + continue; + }; + + if parent_ids.len() == 1 { + let parent_id: ObjectId = *parent_ids.last().unwrap(); + + let mut buffer = Vec::new(); + let parent_commit_id = odb.find_commit(&parent_id, &mut buffer).unwrap().tree(); + let parent_tree_iter = odb.find_tree_iter(&parent_commit_id, &mut buffer).unwrap(); + + let mut entry_buffer = Vec::new(); + if let Some(parent_entry) = parent_tree_iter + .lookup_entry_by_path(&odb, &mut entry_buffer, file_path.to_str().unwrap()) + .unwrap() + { + if entry.oid == parent_entry.oid { + // The blobs storing the blamed file in `entry` and `parent_entry` are identical + // which is why we can pass blame to the parent without further checks. + hunks_to_blame + .iter_mut() + .for_each(|unblamed_hunk| unblamed_hunk.pass_blame(suspect, parent_id)); + + continue; + } + } + + let changes_for_file_path = get_changes_for_file_path(&odb, file_path, item.id, parent_id); + + let [ref modification]: [gix_diff::tree::recorder::Change] = changes_for_file_path[..] else { + // None of the changes affected the file we’re currently blaming. Pass blame to parent. + hunks_to_blame + .iter_mut() + .for_each(|unblamed_hunk| unblamed_hunk.pass_blame(suspect, parent_id)); + + continue; + }; + + match modification { + gix_diff::tree::recorder::Change::Addition { .. } => { + // Every line that has not been blamed yet on a commit, is expected to have been + // added when the file was added to the repository. + out.extend( + hunks_to_blame + .iter() + .map(|hunk| BlameEntry::from_unblamed_hunk(hunk, suspect)), + ); + + hunks_to_blame = vec![]; + + break; + } + gix_diff::tree::recorder::Change::Deletion { .. } => todo!(), + gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => { + let changes = get_changes(&odb, resource_cache, *oid, *previous_oid, file_path); + + hunks_to_blame = process_changes(&mut out, &hunks_to_blame, &changes, suspect); + hunks_to_blame + .iter_mut() + .for_each(|unblamed_hunk| unblamed_hunk.pass_blame(suspect, parent_id)); + } + } + } else { + let mut buffer = Vec::new(); + let commit_id = odb.find_commit(&suspect, &mut buffer).unwrap().tree(); + let tree_iter = odb.find_tree_iter(&commit_id, &mut buffer).unwrap(); + + let mut entry_buffer = Vec::new(); + let entry = tree_iter + .lookup_entry_by_path(&odb, &mut entry_buffer, file_path.to_str().unwrap()) + .unwrap() + .unwrap(); + + for parent_id in &parent_ids { + let mut buffer = Vec::new(); + let parent_commit_id = odb.find_commit(parent_id, &mut buffer).unwrap().tree(); + let parent_tree_iter = odb.find_tree_iter(&parent_commit_id, &mut buffer).unwrap(); + + let mut entry_buffer = Vec::new(); + if let Some(parent_entry) = parent_tree_iter + .lookup_entry_by_path(&odb, &mut entry_buffer, file_path.to_str().unwrap()) + .unwrap() + { + if entry.oid == parent_entry.oid { + // The blobs storing the blamed file in `entry` and `parent_entry` are + // identical which is why we can pass blame to the parent without further + // checks. + hunks_to_blame + .iter_mut() + .for_each(|unblamed_hunk| unblamed_hunk.pass_blame(suspect, *parent_id)); + + continue 'outer; + } + } + } + + for parent_id in parent_ids { + let changes_for_file_path = get_changes_for_file_path(&odb, file_path, item.id, parent_id); + + let [ref modification]: [gix_diff::tree::recorder::Change] = changes_for_file_path[..] else { + // None of the changes affected the file we’re currently blaming. Pass blame + // to parent. + hunks_to_blame + .iter_mut() + .for_each(|unblamed_hunk| unblamed_hunk.clone_blame(suspect, parent_id)); + + continue; + }; + + match modification { + gix_diff::tree::recorder::Change::Addition { .. } => { + // Do nothing under the assumption that this always (or almost always) + // implies that the file comes from a different parent, compared to which + // it was modified, not added. + // + // TODO: I still have to figure out whether this is correct in all cases. + } + gix_diff::tree::recorder::Change::Deletion { .. } => todo!(), + gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => { + let changes = get_changes(&odb, resource_cache, *oid, *previous_oid, file_path); + + hunks_to_blame = process_changes(&mut out, &hunks_to_blame, &changes, suspect); + + hunks_to_blame + .iter_mut() + .for_each(|unblamed_hunk| unblamed_hunk.pass_blame(suspect, parent_id)); + } + } + } + + hunks_to_blame + .iter_mut() + .for_each(|unblamed_hunk| unblamed_hunk.remove_blame(suspect)); + } + } + + assert_eq!(hunks_to_blame, vec![]); + + // I don’t know yet whether it would make sense to use a data structure instead that preserves + // order on insertion. + out.sort_by(|a, b| a.range_in_blamed_file.start.cmp(&b.range_in_blamed_file.start)); + + Ok(coalesce_blame_entries(out)) } diff --git a/gix-blame/tests/blame.rs b/gix-blame/tests/blame.rs index 4cb22417ece..5d85590713a 100644 --- a/gix-blame/tests/blame.rs +++ b/gix-blame/tests/blame.rs @@ -1,4 +1,1576 @@ +use gix_blame::{blame_file, process_change, process_changes, BlameEntry, Change, Offset, UnblamedHunk}; +use gix_hash::ObjectId; +use gix_object::bstr; +use std::path::PathBuf; + +struct Baseline<'a> { + lines: bstr::Lines<'a>, +} + +mod baseline { + use std::path::Path; + + use gix_hash::ObjectId; + use gix_ref::bstr::ByteSlice; + + use super::Baseline; + use gix_blame::BlameEntry; + + // These fields are used by `git` in its porcelain output. + const HEADER_FIELDS: [&str; 12] = [ + // https://github.com/git/git/blob/6258f68c3c1092c901337895c864073dcdea9213/builtin/blame.c#L256-L280 + "author", + "author-mail", + "author-time", + "author-tz", + "committer", + "committer-mail", + "committer-time", + "committer-tz", + "summary", + "boundary", + // https://github.com/git/git/blob/6258f68c3c1092c901337895c864073dcdea9213/builtin/blame.c#L239-L248 + "previous", + "filename", + ]; + + fn is_known_header_field(field: &&str) -> bool { + HEADER_FIELDS.contains(field) + } + + impl Baseline<'_> { + pub fn collect(baseline_path: impl AsRef) -> std::io::Result> { + let content = std::fs::read(baseline_path)?; + + Ok(Baseline { lines: content.lines() }.collect()) + } + } + + impl Iterator for Baseline<'_> { + type Item = BlameEntry; + + fn next(&mut self) -> Option { + let mut ranges = None; + let mut commit_id = gix_hash::Kind::Sha1.null(); + let mut skip_lines: u32 = 0; + + for line in self.lines.by_ref() { + if line.starts_with(b"\t") { + // Each group consists of a header and one or more lines. We break from the + // loop, thus returning a `BlameEntry` from `next` once we have seen the number + // of lines starting with "\t" as indicated in the group’s header. + skip_lines -= 1; + + if skip_lines == 0 { + break; + } else { + continue; + } + } + + let fields: Vec<&str> = line.to_str().unwrap().split(' ').collect(); + if fields.len() == 4 { + // We’re possibly dealing with a group header. + // If we can’t parse the first field as an `ObjectId`, we know this is not a + // group header, so we continue. This can yield false positives, but for + // testing purposes, we don’t bother. + commit_id = match ObjectId::from_hex(fields[0].as_bytes()) { + Ok(id) => id, + Err(_) => continue, + }; + + let line_number_in_original_file = fields[1].parse::().unwrap(); + let line_number_in_final_file = fields[2].parse::().unwrap(); + // The last field indicates the number of lines this group contains info for + // (this is not equal to the number of lines in git blame’s porcelain output). + let number_of_lines_in_group = fields[3].parse::().unwrap(); + + skip_lines = number_of_lines_in_group; + + let original_range = (line_number_in_original_file - 1) + ..(line_number_in_original_file + number_of_lines_in_group - 1); + let blame_range = + (line_number_in_final_file - 1)..(line_number_in_final_file + number_of_lines_in_group - 1); + assert!(ranges.is_none(), "should not overwrite existing ranges"); + ranges = Some((blame_range, original_range)); + } else if !is_known_header_field(&fields[0]) && ObjectId::from_hex(fields[0].as_bytes()).is_err() { + panic!("unexpected line: '{:?}'", line.as_bstr()); + } + } + + let Some((range_in_blamed_file, range_in_original_file)) = ranges else { + // No new lines were parsed, so we assume the iterator is finished. + return None; + }; + Some(BlameEntry::new(range_in_blamed_file, range_in_original_file, commit_id)) + } + } +} + +struct Fixture { + worktree_path: PathBuf, + odb: gix_odb::Handle, + resource_cache: gix_diff::blob::Platform, + suspect: ObjectId, + commits: Vec>, +} + +impl Fixture { + fn new() -> gix_testtools::Result { + Self::for_worktree_path(fixture_path()) + } + + fn for_worktree_path(worktree_path: PathBuf) -> gix_testtools::Result { + use gix_ref::store::WriteReflog; + + let store = gix_ref::file::Store::at( + worktree_path.join(".git"), + gix_ref::store::init::Options { + write_reflog: WriteReflog::Disable, + ..Default::default() + }, + ); + let odb = gix_odb::at(worktree_path.join(".git/objects"))?; + + let mut reference = gix_ref::file::Store::find(&store, "HEAD")?; + + // Needed for `peel_to_id_in_place`. + use gix_ref::file::ReferenceExt; + + let head_id = reference.peel_to_id_in_place(&store, &odb)?; + + let commits: Vec<_> = gix_traverse::commit::topo::Builder::from_iters(&odb, [head_id], None::>) + .build()? + .collect(); + + let git_dir = worktree_path.join(".git"); + let index = gix_index::File::at(git_dir.join("index"), gix_hash::Kind::Sha1, false, Default::default())?; + let stack = gix_worktree::Stack::from_state_and_ignore_case( + worktree_path.clone(), + false, + gix_worktree::stack::State::AttributesAndIgnoreStack { + attributes: Default::default(), + ignore: Default::default(), + }, + &index, + index.path_backing(), + ); + let capabilities = gix_fs::Capabilities::probe(&git_dir); + let resource_cache = gix_diff::blob::Platform::new( + Default::default(), + gix_diff::blob::Pipeline::new( + gix_diff::blob::pipeline::WorktreeRoots { + old_root: None, + new_root: None, + }, + gix_filter::Pipeline::new(Default::default(), Default::default()), + vec![], + gix_diff::blob::pipeline::Options { + large_file_threshold_bytes: 0, + fs: capabilities, + }, + ), + gix_diff::blob::pipeline::Mode::ToGit, + stack, + ); + Ok(Fixture { + odb, + worktree_path, + resource_cache, + suspect: head_id, + commits, + }) + } +} + +macro_rules! mktest { + ($name:ident, $case:expr, $number_of_lines:literal) => { + #[test] + fn $name() { + let Fixture { + worktree_path, + odb, + mut resource_cache, + suspect, + commits, + } = Fixture::new().unwrap(); + + let lines_blamed = blame_file( + &odb, + commits, + &mut resource_cache, + suspect, + worktree_path, + format!("{}.txt", $case).as_str().into(), + ) + .unwrap(); + + assert_eq!(lines_blamed.len(), $number_of_lines); + + let git_dir = fixture_path().join(".git"); + let baseline = Baseline::collect(git_dir.join(format!("{}.baseline", $case))).unwrap(); + + assert_eq!(baseline.len(), $number_of_lines); + assert_eq!(lines_blamed, baseline); + } + }; +} + +mktest!(simple_case, "simple", 4); +mktest!(multiline_hunks, "multiline-hunks", 3); +mktest!(deleted_lines, "deleted-lines", 1); +mktest!(deleted_lines_multiple_hunks, "deleted-lines-multiple-hunks", 2); +mktest!(changed_lines, "changed-lines", 1); +mktest!( + changed_line_between_unchanged_lines, + "changed-line-between-unchanged-lines", + 3 +); +mktest!(added_lines, "added-lines", 2); +mktest!(added_lines_around, "added-lines-around", 3); +mktest!(switched_lines, "switched-lines", 4); +mktest!(added_line_before_changed_line, "added-line-before-changed-line", 3); +mktest!(same_line_changed_twice, "same-line-changed-twice", 2); +mktest!(coalesce_adjacent_hunks, "coalesce-adjacent-hunks", 1); + +mktest!(resolved_conflict, "resolved-conflict", 2); +mktest!(file_in_one_chain_of_ancestors, "file-in-one-chain-of-ancestors", 1); +mktest!( + different_file_in_another_chain_of_ancestors, + "different-file-in-another-chain-of-ancestors", + 1 +); +mktest!(file_only_changed_in_branch, "file-only-changed-in-branch", 2); + +#[test] +#[ignore = "TBD: figure out what the problem is"] +// As of 2024-09-24, these tests are expected to fail. +// +// Context: https://github.com/Byron/gitoxide/pull/1453#issuecomment-2371013904 +fn diff_disparity() { + for case in ["empty-lines-myers", "empty-lines-histogram"] { + let Fixture { + worktree_path, + odb, + mut resource_cache, + suspect, + commits, + } = Fixture::new().unwrap(); + + let lines_blamed = blame_file( + &odb, + commits, + &mut resource_cache, + suspect, + worktree_path, + format!("{case}.txt").as_str().into(), + ) + .unwrap(); + + assert_eq!(lines_blamed.len(), 5); + + let git_dir = fixture_path().join(".git"); + let baseline = Baseline::collect(git_dir.join(format!("{case}.baseline"))).unwrap(); + + assert_eq!(lines_blamed, baseline, "{case}"); + } +} + +#[test] +fn process_change_works() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + None, + None, + ); + + assert_eq!(hunk, None); + assert_eq!(change, None); + assert_eq!(offset_in_destination, Offset::Added(0)); +} + +#[test] +fn process_change_works_added_hunk() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(UnblamedHunk::new(0..5, suspect, Offset::Added(0))), + Some(Change::Added(0..3, 0)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 3..5, + suspects: [(suspect, 3..5)].into() + }) + ); + assert_eq!(change, None); + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 0..3, + range_in_original_file: 0..3, + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(3)); +} + +#[test] +fn process_change_works_added_hunk_2() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(UnblamedHunk::new(0..5, suspect, Offset::Added(0))), + Some(Change::Added(2..3, 0)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 3..5, + suspects: [(suspect, 3..5)].into() + }) + ); + assert_eq!(change, None); + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 2..3, + range_in_original_file: 2..3, + commit_id: suspect + }] + ); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 0..2, + suspects: [(suspect, 0..2)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(1)); +} + +#[test] +fn process_change_works_added_hunk_3() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(5); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(UnblamedHunk::new(10..15, suspect, Offset::Added(0))), + Some(Change::Added(12..13, 0)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 13..15, + suspects: [(suspect, 13..15)].into() + }) + ); + assert_eq!(change, None); + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 12..13, + range_in_original_file: 12..13, + commit_id: suspect + }] + ); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 10..12, + suspects: [(suspect, 5..7)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(6)); +} + +#[test] +fn process_change_works_added_hunk_4() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 7..12 + Some(UnblamedHunk::new(12..17, suspect, Offset::Added(5))), + Some(Change::Added(9..10, 0)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 15..17, + suspects: [(suspect, 10..12)].into() + }) + ); + assert_eq!(change, None); + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 14..15, + range_in_original_file: 9..10, + commit_id: suspect + }] + ); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 12..14, + suspects: [(suspect, 7..9)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(1)); +} + +#[test] +fn process_change_works_added_hunk_5() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(UnblamedHunk::new(0..5, suspect, Offset::Added(0))), + Some(Change::Added(0..3, 1)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 3..5, + suspects: [(suspect, 3..5)].into() + }) + ); + assert_eq!(change, None); + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 0..3, + range_in_original_file: 0..3, + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(2)); +} + +#[test] +fn process_change_works_added_hunk_6() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 0..4 + Some(UnblamedHunk::new(1..5, suspect, Offset::Added(1))), + Some(Change::Added(0..3, 1)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 4..5, + suspects: [(suspect, 3..4)].into() + }) + ); + assert_eq!(change, None); + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 1..4, + range_in_original_file: 0..3, + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(2)); +} + +#[test] +fn process_change_works_added_hunk_7() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(2); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 2..6 + Some(UnblamedHunk::new(3..7, suspect, Offset::Added(1))), + Some(Change::Added(3..5, 1)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 6..7, + suspects: [(suspect, 5..6)].into() + }) + ); + assert_eq!(change, None); + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 4..6, + range_in_original_file: 3..5, + commit_id: suspect + }] + ); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 3..4, + suspects: [(suspect, 0..1)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(3)); +} + +#[test] +fn process_change_works_added_hunk_8() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(1); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 25..26 + Some(UnblamedHunk::new(23..24, suspect, Offset::Deleted(2))), + Some(Change::Added(25..27, 1)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, Some(Change::Added(25..27, 1))); + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 23..24, + range_in_original_file: 25..26, + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(1)); +} + +#[test] +fn process_change_works_added_hunk_9() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 21..22 + Some(UnblamedHunk::new(23..24, suspect, Offset::Added(2))), + Some(Change::Added(18..22, 3)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, None); + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 23..24, + range_in_original_file: 21..22, + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(1)); +} + +#[test] +fn process_change_works_added_hunk_10() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 70..108 + Some(UnblamedHunk::new(71..109, suspect, Offset::Added(1))), + Some(Change::Added(106..109, 0)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, Some(Change::Added(106..109, 0))); + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 107..109, + range_in_original_file: 106..108, + commit_id: suspect + }] + ); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 71..107, + suspects: [(suspect, 70..106)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(0)); +} + +#[test] +fn process_change_works_added_hunk_11() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 137..144 + Some(UnblamedHunk::new(149..156, suspect, Offset::Added(12))), + Some(Change::Added(143..146, 0)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, Some(Change::Added(143..146, 0))); + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 155..156, + range_in_original_file: 143..144, + commit_id: suspect + }] + ); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 149..155, + suspects: [(suspect, 137..143)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(0)); +} + +#[test] +fn process_change_works_no_overlap() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Deleted(3); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 2..5 + Some(UnblamedHunk::new(3..6, suspect, Offset::Added(1))), + Some(Change::Added(7..10, 1)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, Some(Change::Added(7..10, 1))); + assert_eq!(lines_blamed, []); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 3..6, + suspects: [(suspect, 5..8)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Deleted(3)); +} + +#[test] +fn process_change_works_no_overlap_2() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 6..8 + Some(UnblamedHunk::new(9..11, suspect, Offset::Added(3))), + Some(Change::Added(2..5, 0)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 9..11, + suspects: [(suspect, 6..8)].into() + }) + ); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(3)); +} + +#[test] +fn process_change_works_no_overlap_3() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 5..15 + Some(UnblamedHunk::new(4..15, suspect, Offset::Deleted(1))), + Some(Change::Added(4..5, 1)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 4..15, + suspects: [(suspect, 5..16)].into() + }) + ); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(0)); +} + +#[test] +fn process_change_works_no_overlap_4() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(1); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 25..27 + Some(UnblamedHunk::new(23..25, suspect, Offset::Deleted(2))), + Some(Change::Unchanged(21..22)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 23..25, + suspects: [(suspect, 25..27)].into() + }) + ); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(1)); +} + +#[test] +fn process_change_works_no_overlap_5() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(1); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 17..18 + Some(UnblamedHunk::new(15..16, suspect, Offset::Deleted(2))), + Some(Change::Deleted(20, 1)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, Some(Change::Deleted(20, 1))); + assert_eq!(lines_blamed, []); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 15..16, + suspects: [(suspect, 16..17)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(1)); +} + +#[test] +fn process_change_works_no_overlap_6() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 22..24 + Some(UnblamedHunk::new(23..25, suspect, Offset::Added(1))), + Some(Change::Deleted(20, 1)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 23..25, + suspects: [(suspect, 22..24)].into() + }) + ); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Deleted(1)); +} + +#[test] +fn process_change_works_enclosing_addition() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(3); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 5..8 + Some(UnblamedHunk::new(2..5, suspect, Offset::Deleted(3))), + Some(Change::Added(3..12, 2)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, Some(Change::Added(3..12, 2))); + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 2..5, + range_in_original_file: 5..8, + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(3)); +} + +#[test] +fn process_change_works_enclosing_deletion() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(3); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 13..20 + Some(UnblamedHunk::new(12..19, suspect, Offset::Deleted(1))), + Some(Change::Deleted(15, 2)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 14..19, + suspects: [(suspect, 15..20)].into() + }) + ); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 12..14, + suspects: [(suspect, 10..12)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(1)); +} + +#[test] +fn process_change_works_enclosing_unchanged_lines() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(3); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 109..113 + Some(UnblamedHunk::new(110..114, suspect, Offset::Added(1))), + Some(Change::Unchanged(109..172)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, Some(Change::Unchanged(109..172))); + assert_eq!(lines_blamed, []); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 110..114, + suspects: [(suspect, 106..110)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(3)); +} + +#[test] +fn process_change_works_unchanged_hunk() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(UnblamedHunk::new(0..5, suspect, Offset::Added(0))), + Some(Change::Unchanged(0..3)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 0..5, + suspects: [(suspect, 0..5)].into() + }) + ); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(0)); +} + +#[test] +fn process_change_works_unchanged_hunk_2() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(UnblamedHunk::new(0..5, suspect, Offset::Added(0))), + Some(Change::Unchanged(0..7)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, Some(Change::Unchanged(0..7))); + assert_eq!(lines_blamed, []); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 0..5, + suspects: [(suspect, 0..5)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(0)); +} + +#[test] +fn process_change_works_unchanged_hunk_3() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Deleted(2); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(UnblamedHunk { + range_in_blamed_file: 22..30, + suspects: [(suspect, 21..29)].into(), + }), + Some(Change::Unchanged(21..23)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 22..30, + suspects: [(suspect, 21..29)].into() + }) + ); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Deleted(2)); +} + +#[test] +fn process_change_works_deleted_hunk() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(UnblamedHunk::new(0..5, suspect, Offset::Added(0))), + Some(Change::Deleted(5, 3)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, Some(Change::Deleted(5, 3))); + assert_eq!(lines_blamed, []); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 0..5, + suspects: [(suspect, 0..5)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(0)); +} + +#[test] +fn process_change_works_deleted_hunk_2() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(UnblamedHunk::new(2..16, suspect, Offset::Added(0))), + Some(Change::Deleted(0, 4)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 2..16, + suspects: [(suspect, 2..16)].into() + }) + ); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Deleted(4)); +} + +#[test] +fn process_change_works_deleted_hunk_3() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(UnblamedHunk::new(2..16, suspect, Offset::Added(0))), + Some(Change::Deleted(14, 4)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 14..16, + suspects: [(suspect, 14..16)].into() + }) + ); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk::new(2..14, suspect, Offset::Added(0))] + ); + assert_eq!(offset_in_destination, Offset::Deleted(4)); +} + +#[test] +fn process_change_works_addition_only() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(1); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + None, + Some(Change::Added(22..25, 1)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(3)); +} + +#[test] +fn process_change_works_deletion_only() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(1); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + None, + Some(Change::Deleted(11, 5)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Deleted(4)); +} + +#[test] +fn process_change_works_unchanged_only() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(1); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + None, + Some(Change::Unchanged(11..13)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(1)); +} +#[test] +fn process_changes_works() { + let mut lines_blamed = Vec::new(); + let hunks_to_blame = &[]; + let changes = &[]; + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); +} + #[test] -fn it_works() { - let _worktree = gix_testtools::scripted_fixture_read_only("make_blame_repo.sh").unwrap(); +fn process_changes_works_added_hunk() { + let mut lines_blamed = Vec::new(); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let hunks_to_blame = &[UnblamedHunk::new(0..4, suspect, Offset::Added(0))]; + let changes = &[Change::Added(0..4, 0)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 0..4, + range_in_original_file: 0..4, + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, []); +} + +#[test] +fn process_changes_works_added_hunk_2() { + let mut lines_blamed = Vec::new(); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let hunks_to_blame = &[UnblamedHunk::new(0..6, suspect, Offset::Added(0))]; + let changes = &[Change::Added(0..4, 0), Change::Unchanged(4..6)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 0..4, + range_in_original_file: 0..4, + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, [UnblamedHunk::new(4..6, suspect, Offset::Added(4))]); +} + +#[test] +fn process_changes_works_added_hunk_3() { + let mut lines_blamed = Vec::new(); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let hunks_to_blame = &[UnblamedHunk::new(0..6, suspect, Offset::Added(0))]; + let changes = &[Change::Unchanged(0..2), Change::Added(2..4, 0), Change::Unchanged(4..6)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 2..4, + range_in_original_file: 2..4, + commit_id: suspect + }] + ); + assert_eq!( + new_hunks_to_blame, + [ + UnblamedHunk::new(0..2, suspect, Offset::Added(0)), + UnblamedHunk::new(4..6, suspect, Offset::Added(2)) + ] + ); +} + +#[test] +fn process_changes_works_added_hunk_4_0() { + let mut lines_blamed = Vec::new(); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let hunks_to_blame = &[UnblamedHunk::new(0..6, suspect, Offset::Added(0))]; + let changes = &[Change::Added(0..1, 0), Change::Added(1..4, 0), Change::Unchanged(4..6)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + assert_eq!( + lines_blamed, + [ + BlameEntry { + range_in_blamed_file: 0..1, + range_in_original_file: 0..1, + commit_id: suspect + }, + BlameEntry { + range_in_blamed_file: 1..4, + range_in_original_file: 1..4, + commit_id: suspect + } + ] + ); + assert_eq!(new_hunks_to_blame, [UnblamedHunk::new(4..6, suspect, Offset::Added(4))]); +} + +#[test] +fn process_changes_works_added_hunk_4_1() { + let mut lines_blamed = Vec::new(); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let hunks_to_blame = &[UnblamedHunk::new(0..6, suspect, Offset::Added(0))]; + let changes = &[Change::Added(0..1, 0)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 0..1, + range_in_original_file: 0..1, + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, [UnblamedHunk::new(1..6, suspect, Offset::Added(1))]); +} + +#[test] +fn process_changes_works_added_hunk_4_2() { + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let suspect_2 = ObjectId::from_hex(b"2222222222222222222222222222222222222222").unwrap(); + let mut lines_blamed: Vec = vec![BlameEntry { + range_in_blamed_file: 0..2, + range_in_original_file: 0..2, + commit_id: suspect, + }]; + let hunks_to_blame = &[UnblamedHunk::new(2..6, suspect_2, Offset::Added(2))]; + let changes = &[Change::Added(0..1, 0)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect_2); + + assert_eq!( + lines_blamed, + [ + BlameEntry { + range_in_blamed_file: 0..2, + range_in_original_file: 0..2, + commit_id: suspect + }, + BlameEntry { + range_in_blamed_file: 2..3, + range_in_original_file: 0..1, + commit_id: suspect_2 + } + ] + ); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk::new(3..6, suspect_2, Offset::Added(3))] + ); +} + +#[test] +fn process_changes_works_added_hunk_5() { + let mut lines_blamed = Vec::new(); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let hunks_to_blame = &[UnblamedHunk::new(0..6, suspect, Offset::Added(0))]; + let changes = &[Change::Added(0..4, 3), Change::Unchanged(4..6)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 0..4, + range_in_original_file: 0..4, + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, [UnblamedHunk::new(4..6, suspect, Offset::Added(1))]); +} + +#[test] +fn process_changes_works_added_hunk_6() { + let mut lines_blamed = Vec::new(); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let hunks_to_blame = &[UnblamedHunk::new(4..6, suspect, Offset::Added(1))]; + let changes = &[Change::Added(0..3, 0), Change::Unchanged(3..5)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, [UnblamedHunk::new(4..6, suspect, Offset::Added(4))]); +} + +#[test] +fn process_changes_works_added_hunk_7() { + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let suspect_2 = ObjectId::from_hex(b"2222222222222222222222222222222222222222").unwrap(); + let mut lines_blamed: Vec = vec![BlameEntry { + range_in_blamed_file: 0..1, + range_in_original_file: 0..1, + commit_id: suspect, + }]; + let hunks_to_blame = &[UnblamedHunk::new(1..3, suspect_2, Offset::Added(1))]; + let changes = &[Change::Added(0..1, 2)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect_2); + + assert_eq!( + lines_blamed, + [ + BlameEntry { + range_in_blamed_file: 0..1, + range_in_original_file: 0..1, + commit_id: suspect + }, + BlameEntry { + range_in_blamed_file: 1..2, + range_in_original_file: 0..1, + commit_id: suspect_2 + } + ] + ); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk::new(2..3, suspect_2, Offset::Added(0))] + ); +} + +#[test] +fn process_changes_works_added_hunk_8() { + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let mut lines_blamed = Vec::new(); + let hunks_to_blame = &[UnblamedHunk::new(0..4, suspect, Offset::Added(0))]; + let changes = &[Change::Added(0..2, 0), Change::Unchanged(2..3), Change::Added(3..4, 0)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + assert_eq!( + lines_blamed, + [ + BlameEntry { + range_in_blamed_file: 0..2, + range_in_original_file: 0..2, + commit_id: suspect + }, + BlameEntry { + range_in_blamed_file: 3..4, + range_in_original_file: 3..4, + commit_id: suspect + } + ] + ); + assert_eq!(new_hunks_to_blame, [UnblamedHunk::new(2..3, suspect, Offset::Added(2))]); +} + +#[test] +fn process_changes_works_added_hunk_9() { + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let mut lines_blamed: Vec = vec![BlameEntry { + range_in_blamed_file: 30..31, + range_in_original_file: 30..31, + commit_id: suspect, + }]; + let hunks_to_blame = &[ + UnblamedHunk { + range_in_blamed_file: 0..30, + suspects: [(suspect, 0..30)].into(), + }, + UnblamedHunk { + range_in_blamed_file: 31..37, + suspects: [(suspect, 31..37)].into(), + }, + ]; + let changes = &[ + Change::Unchanged(0..16), + Change::Added(16..17, 0), + Change::Unchanged(17..37), + ]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + lines_blamed.sort_by(|a, b| a.range_in_blamed_file.start.cmp(&b.range_in_blamed_file.start)); + + assert_eq!( + lines_blamed, + [ + BlameEntry { + range_in_blamed_file: 16..17, + range_in_original_file: 16..17, + commit_id: suspect + }, + BlameEntry { + range_in_blamed_file: 30..31, + range_in_original_file: 30..31, + commit_id: suspect + } + ] + ); + assert_eq!( + new_hunks_to_blame, + [ + UnblamedHunk { + range_in_blamed_file: 0..16, + suspects: [(suspect, 0..16)].into() + }, + UnblamedHunk { + range_in_blamed_file: 17..30, + suspects: [(suspect, 16..29)].into() + }, + UnblamedHunk { + range_in_blamed_file: 31..37, + suspects: [(suspect, 30..36)].into() + } + ] + ); +} + +#[test] +fn process_changes_works_deleted_hunk() { + let mut lines_blamed = Vec::new(); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let hunks_to_blame = &[ + UnblamedHunk::new(0..4, suspect, Offset::Added(0)), + UnblamedHunk::new(4..7, suspect, Offset::Added(0)), + ]; + let changes = &[Change::Deleted(0, 3), Change::Added(0..4, 0)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 0..4, + range_in_original_file: 0..4, + commit_id: suspect + }] + ); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 4..7, + suspects: [(suspect, 3..6)].into() + }] + ); +} + +fn fixture_path() -> PathBuf { + gix_testtools::scripted_fixture_read_only("make_blame_repo.sh").unwrap() } diff --git a/gix-blame/tests/fixtures/make_blame_repo.sh b/gix-blame/tests/fixtures/make_blame_repo.sh index 279cb3fe9d5..31e30c42e4d 100755 --- a/gix-blame/tests/fixtures/make_blame_repo.sh +++ b/gix-blame/tests/fixtures/make_blame_repo.sh @@ -1,23 +1,206 @@ #!/usr/bin/env bash set -eu -o pipefail - git init -q +git config --local diff.algorithm histogram + git config merge.ff false git checkout -q -b main -git commit -q --allow-empty -m c1 -git tag at-c1 -git commit -q --allow-empty -m c2 -git commit -q --allow-empty -m c3 -git commit -q --allow-empty -m c4 - -git checkout -q -b branch1 -git commit -q --allow-empty -m b1c1 -git tag at-b1c1 -git commit -q --allow-empty -m b1c2 - -git checkout -q main -git commit -q --allow-empty -m c5 -git tag at-c5 -git merge branch1 -m m1b1 + +echo "line 1" >> simple.txt +git add simple.txt +git commit -q -m c1 + +echo -e "line 1\nline 2\nline 3" >> multiline-hunks.txt +git add multiline-hunks.txt +git commit -q -m c1.1 + +echo -e "line 1\nline 2" > changed-lines.txt +echo -e "line 1\nline 2\nline 3\nline 4\nline 5\nline 6" >> changed-line-between-unchanged-lines.txt +git add changed-lines.txt +git add changed-line-between-unchanged-lines.txt +git commit -q -m c1.2 + +echo "line 2" >> added-lines.txt +echo "line 2" >> added-lines-around.txt +echo -e "line 1\nline 2" > coalesce-adjacent-hunks.txt +git add added-lines.txt +git add added-lines-around.txt +git add coalesce-adjacent-hunks.txt +git commit -q -m c1.3 + +echo "line 2" >> simple.txt +git add simple.txt +git commit -q -m c2 + +echo -e "line 4\nline 5\nline 6" >> multiline-hunks.txt +git add multiline-hunks.txt +git commit -q -m c2.1 + +echo -e "line 1\nline 2\nline 3\nline 4\nline 5\nline 6" >> deleted-lines.txt +echo -e "line 1\nline 2\nline 3\nline 4\nline 5\nline 6" >> deleted-lines-multiple-hunks.txt +git add deleted-lines.txt +git add deleted-lines-multiple-hunks.txt +git commit -q -m c2.2 + +echo -e "line 1\nline 2\nline 3" > added-line-before-changed-line.txt +git add added-line-before-changed-line.txt +git commit -q -m c2.3 + +echo -e "line 1\nline 2" > same-line-changed-twice.txt +echo -e "line 1\nline in between\nline 2" > coalesce-adjacent-hunks.txt +git add same-line-changed-twice.txt +git add coalesce-adjacent-hunks.txt +git commit -q -m c2.4 + +echo "line 3" >> simple.txt +git add simple.txt +git commit -q -m c3 + +echo -e "line 3\nline 4" > deleted-lines.txt +echo -e "line 2\nline 4" > deleted-lines-multiple-hunks.txt +git add deleted-lines.txt +git add deleted-lines-multiple-hunks.txt +git commit -q -m c3.1 + +echo -e "line 3\nline 4" > changed-lines.txt +echo -e "line 1\nline 2\nline 3 changed\nline 4\nline 5\nline 6" > changed-line-between-unchanged-lines.txt +git add changed-lines.txt +git add changed-line-between-unchanged-lines.txt +git commit -q -m c3.2 + +echo -e "line 2\nline 3" > added-line-before-changed-line.txt +echo -e "line 1\nline 2" > coalesce-adjacent-hunks.txt +git add added-line-before-changed-line.txt +git add coalesce-adjacent-hunks.txt +git commit -q -m c3.3 + +echo -e "line 1\nline 2 changed" > same-line-changed-twice.txt +git add same-line-changed-twice.txt +git commit -q -m c3.4 + +echo "line 4" >> simple.txt +git add simple.txt +git commit -q -m c4 + +echo -e "line 7\nline 8\nline 9" >> multiline-hunks.txt +git add multiline-hunks.txt +git commit -q -m c4.1 + +echo -e "line 1\nline 3\nline 2\nline 4" > switched-lines.txt +git add switched-lines.txt +git commit -q -m c4.2 + +echo -e "line 2 changed\nline 3" > added-line-before-changed-line.txt +git add added-line-before-changed-line.txt +git commit -q -m c4.3 + +echo -e "line 1\nline 2 changed a second time" > same-line-changed-twice.txt +git add same-line-changed-twice.txt +git commit -q -m c4.4 + +echo -e " line 1\n\n line 2\n\n line 3" > empty-lines-histogram.txt +cp empty-lines-histogram.txt empty-lines-myers.txt +git add empty-lines-histogram.txt empty-lines-myers.txt +git commit -q -m c4.5 + +echo -e "line 0\nline 1\nline 2" > added-lines.txt +echo -e "line 0\nline 1\nline 2\nline 3" > added-lines-around.txt +git add added-lines.txt +git add added-lines-around.txt +git commit -q -m c5 + +echo -e "line 4" > deleted-lines.txt +git add deleted-lines.txt +git commit -q -m c5.1 + +echo -e "line 1\nline 2\nline 3\nline 4" > switched-lines.txt +git add switched-lines.txt +git commit -q -m c5.2 + +echo -e "line 1\nline 2 changed\nline 3" > added-line-before-changed-line.txt +git add added-line-before-changed-line.txt +git commit -q -m c5.3 + +echo -e " line 1\n\n line in between\n\n line 2\n\n line in between\n\n line 3" > empty-lines-histogram.txt +cp empty-lines-histogram.txt empty-lines-myers.txt +git add empty-lines-histogram.txt empty-lines-myers.txt +git commit -q -m c5.4 + +# The commit history created by the commits above this line is linear, it only +# contains commits that have exactly one parent. +# Below this line, there’s also commits that have more than one parent. + +echo -e "line 1 original\nline 2\n line 3" > resolved-conflict.txt +git add resolved-conflict.txt +git commit -q -m c6 + +echo -e "line 1 changed\nline 2\n line 3" > resolved-conflict.txt +git add resolved-conflict.txt +git commit -q -m c7 + +git checkout -b different-branch-to-create-a-conflict +git reset --hard HEAD~1 + +echo -e "line 1 changed in a different way\nline 2\n line 3" > resolved-conflict.txt +git add resolved-conflict.txt +git commit -q -m c8 + +git checkout main +git merge different-branch-to-create-a-conflict || true + +echo -e "line 1 conflict resolved\nline 2\n line 3" > resolved-conflict.txt +git add resolved-conflict.txt +git commit -q -m c9 + +echo -e "line 1\nline 2\n line 3" > file-in-one-chain-of-ancestors.txt +git add file-in-one-chain-of-ancestors.txt +git commit -q -m c10 + +git checkout -b different-branch-that-does-not-contain-file +git reset --hard HEAD~1 + +echo -e "line 4\nline 5\n line 6" > different-file-in-another-chain-of-ancestors.txt +git add different-file-in-another-chain-of-ancestors.txt +git commit -q -m c11 + +git checkout main +git merge different-branch-that-does-not-contain-file || true + +echo -e "line 1\nline 2\n line 3" > file-only-changed-in-branch.txt +git add file-only-changed-in-branch.txt +git commit -q -m c12 + +git checkout -b branch-that-has-one-commit + +echo -e "line 1 changed\nline 2\n line 3" > file-only-changed-in-branch.txt +git add file-only-changed-in-branch.txt +git commit -q -m c13 + +git checkout main +git merge branch-that-has-one-commit || true + +git blame --porcelain simple.txt > .git/simple.baseline +git blame --porcelain multiline-hunks.txt > .git/multiline-hunks.baseline +git blame --porcelain deleted-lines.txt > .git/deleted-lines.baseline +git blame --porcelain deleted-lines-multiple-hunks.txt > .git/deleted-lines-multiple-hunks.baseline +git blame --porcelain changed-lines.txt > .git/changed-lines.baseline +git blame --porcelain changed-line-between-unchanged-lines.txt > .git/changed-line-between-unchanged-lines.baseline +git blame --porcelain added-lines.txt > .git/added-lines.baseline +git blame --porcelain added-lines-around.txt > .git/added-lines-around.baseline +git blame --porcelain switched-lines.txt > .git/switched-lines.baseline +git blame --porcelain added-line-before-changed-line.txt > .git/added-line-before-changed-line.baseline +git blame --porcelain same-line-changed-twice.txt > .git/same-line-changed-twice.baseline +git blame --porcelain coalesce-adjacent-hunks.txt > .git/coalesce-adjacent-hunks.baseline + +git blame --porcelain resolved-conflict.txt > .git/resolved-conflict.baseline +git blame --porcelain file-in-one-chain-of-ancestors.txt > .git/file-in-one-chain-of-ancestors.baseline +git blame --porcelain different-file-in-another-chain-of-ancestors.txt > .git/different-file-in-another-chain-of-ancestors.baseline +git blame --porcelain file-only-changed-in-branch.txt > .git/file-only-changed-in-branch.baseline + +git blame --porcelain empty-lines-histogram.txt > .git/empty-lines-histogram.baseline + +git config --local diff.algorithm myers + +git blame --porcelain empty-lines-myers.txt > .git/empty-lines-myers.baseline From 25efbfb72e5a043ce8f7d196c1f7104ef93394df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20R=C3=BC=C3=9Fler?= Date: Thu, 19 Dec 2024 20:14:50 +0100 Subject: [PATCH 02/16] feat: Add `blame` plumbing crate to the top-level. For now, it doesn't come with a simplified `gix` API though. --- Cargo.lock | 9 +++++---- gix-blame/Cargo.toml | 6 +++--- gix/Cargo.toml | 4 ++++ gix/src/lib.rs | 2 ++ 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f09c7f8a822..ca292869281 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1365,6 +1365,7 @@ dependencies = [ "gix-actor 0.33.1", "gix-archive", "gix-attributes 0.23.1", + "gix-blame", "gix-command", "gix-commitgraph 0.25.1", "gix-config", @@ -1540,14 +1541,14 @@ version = "0.0.0" dependencies = [ "gix-diff", "gix-filter", - "gix-fs 0.12.0", + "gix-fs 0.12.1", "gix-hash 0.15.1", "gix-index 0.37.0", - "gix-object 0.46.0", + "gix-object 0.46.1", "gix-odb", - "gix-ref 0.49.0", + "gix-ref 0.49.1", "gix-testtools", - "gix-traverse 0.43.0", + "gix-traverse 0.43.1", "gix-worktree 0.38.0", ] diff --git a/gix-blame/Cargo.toml b/gix-blame/Cargo.toml index 747f2bf17b6..ed0018bc9ee 100644 --- a/gix-blame/Cargo.toml +++ b/gix-blame/Cargo.toml @@ -14,7 +14,7 @@ rust-version = "1.65" doctest = false [dependencies] -gix-diff = { version = "^0.48.0", path = "../gix-diff", default-features = false, features = ["blob"] } +gix-diff = { version = "^0.49.0", path = "../gix-diff", default-features = false, features = ["blob"] } gix-object = { version = "^0.46.0", path = "../gix-object" } gix-hash = { version = "^0.15.0", path = "../gix-hash" } gix-worktree = { version = "^0.38.0", path = "../gix-worktree", default-features = false, features = ["attributes"] } @@ -22,8 +22,8 @@ gix-traverse = { version = "^0.43.0", path = "../gix-traverse" } [dev-dependencies] gix-ref = { version = "^0.49.0", path = "../gix-ref" } -gix-filter = { version = "^0.15.0", path = "../gix-filter" } +gix-filter = { version = "^0.16.0", path = "../gix-filter" } gix-fs = { version = "^0.12.0", path = "../gix-fs" } gix-index = { version = "^0.37.0", path = "../gix-index" } -gix-odb = { version = "^0.65.0", path = "../gix-odb" } +gix-odb = { version = "^0.66.0", path = "../gix-odb" } gix-testtools = { path = "../tests/tools" } diff --git a/gix/Cargo.toml b/gix/Cargo.toml index fcfc29706a3..c0578080ff9 100644 --- a/gix/Cargo.toml +++ b/gix/Cargo.toml @@ -141,6 +141,9 @@ blob-diff = ["gix-diff/blob", "attributes"] ## Add functions to specifically merge files, using the standard three-way merge that git offers. merge = ["tree-editor", "blob-diff", "dep:gix-merge", "attributes"] +## Add blame command similar to `git blame`. +blame = ["dep:gix-blame"] + ## Make it possible to turn a tree into a stream of bytes, which can be decoded to entries and turned into various other formats. worktree-stream = ["gix-worktree-stream", "attributes"] @@ -371,6 +374,7 @@ gix-command = { version = "^0.4.0", path = "../gix-command", optional = true } gix-worktree-stream = { version = "^0.18.0", path = "../gix-worktree-stream", optional = true } gix-archive = { version = "^0.18.0", path = "../gix-archive", default-features = false, optional = true } +gix-blame = { version= "^0.0.0", path ="../gix-blame", optional = true } # For communication with remotes gix-protocol = { version = "^0.47.0", path = "../gix-protocol" } diff --git a/gix/src/lib.rs b/gix/src/lib.rs index 906db6bb3e8..6c8d06f91dd 100644 --- a/gix/src/lib.rs +++ b/gix/src/lib.rs @@ -95,6 +95,8 @@ pub use gix_actor as actor; #[cfg(feature = "attributes")] pub use gix_attributes as attrs; +#[cfg(feature = "blame")] +pub use gix_blame as blame; #[cfg(feature = "command")] pub use gix_command as command; pub use gix_commitgraph as commitgraph; From 80e5804dea9c1090efdcddbfc97ed1d573c28091 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20R=C3=BC=C3=9Fler?= Date: Thu, 19 Dec 2024 20:16:36 +0100 Subject: [PATCH 03/16] feat: add `gix blame` to the CLI That way it's possible to see the `blame` result of any file in the repository. Co-authored-by: Sebastian Thiel --- README.md | 1 + crate-status.md | 9 ++++ gitoxide-core/Cargo.toml | 2 +- gitoxide-core/src/repository/blame.rs | 62 +++++++++++++++++++++++++++ gitoxide-core/src/repository/mod.rs | 1 + src/plumbing/main.rs | 9 ++++ src/plumbing/options/mod.rs | 4 ++ 7 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 gitoxide-core/src/repository/blame.rs diff --git a/README.md b/README.md index 99c26d1f361..a6b5b4db7fd 100644 --- a/README.md +++ b/README.md @@ -139,6 +139,7 @@ is usable to some extent. * [gix-shallow](https://github.com/GitoxideLabs/gitoxide/blob/main/crate-status.md#gix-shallow) * `gitoxide-core` * **very early** _(possibly without any documentation and many rough edges)_ + * [gix-blame](https://github.com/GitoxideLabs/gitoxide/blob/main/crate-status.md#gix-blame) * **idea** _(just a name placeholder)_ * [gix-note](https://github.com/GitoxideLabs/gitoxide/blob/main/crate-status.md#gix-note) * [gix-fetchhead](https://github.com/GitoxideLabs/gitoxide/blob/main/crate-status.md#gix-fetchhead) diff --git a/crate-status.md b/crate-status.md index e42d8402ebf..4040bf2144e 100644 --- a/crate-status.md +++ b/crate-status.md @@ -361,6 +361,15 @@ Check out the [performance discussion][gix-diff-performance] as well. * [x] API documentation * [ ] Examples +### gix-blame + +* [ ] commit-annotations for a single file + - [ ] progress + - [ ] interruptability + - [ ] streaming +* [x] API documentation + * [ ] Examples + ### gix-traverse Check out the [performance discussion][gix-traverse-performance] as well. diff --git a/gitoxide-core/Cargo.toml b/gitoxide-core/Cargo.toml index 64f2642e7c8..2291c5c4223 100644 --- a/gitoxide-core/Cargo.toml +++ b/gitoxide-core/Cargo.toml @@ -49,7 +49,7 @@ serde = ["gix/serde", "dep:serde_json", "dep:serde", "bytesize/serde"] [dependencies] # deselect everything else (like "performance") as this should be controllable by the parent application. -gix = { version = "^0.69.1", path = "../gix", default-features = false, features = ["merge", "blob-diff", "revision", "mailmap", "excludes", "attributes", "worktree-mutation", "credentials", "interrupt", "status", "dirwalk"] } +gix = { version = "^0.69.1", path = "../gix", default-features = false, features = ["merge", "blob-diff", "blame", "revision", "mailmap", "excludes", "attributes", "worktree-mutation", "credentials", "interrupt", "status", "dirwalk"] } gix-pack-for-configuration-only = { package = "gix-pack", version = "^0.56.0", path = "../gix-pack", default-features = false, features = ["pack-cache-lru-dynamic", "pack-cache-lru-static", "generate", "streaming-input"] } gix-transport-configuration-only = { package = "gix-transport", version = "^0.44.0", path = "../gix-transport", default-features = false } gix-archive-for-configuration-only = { package = "gix-archive", version = "^0.18.0", path = "../gix-archive", optional = true, features = ["tar", "tar_gz"] } diff --git a/gitoxide-core/src/repository/blame.rs b/gitoxide-core/src/repository/blame.rs new file mode 100644 index 00000000000..0129e83b20e --- /dev/null +++ b/gitoxide-core/src/repository/blame.rs @@ -0,0 +1,62 @@ +use std::{ffi::OsStr, path::PathBuf, str::Lines}; + +use anyhow::anyhow; +use gix::bstr::BStr; + +pub fn blame_file(mut repo: gix::Repository, file: &OsStr, out: impl std::io::Write) -> anyhow::Result<()> { + repo.object_cache_size_if_unset(repo.compute_object_cache_size_for_tree_diffs(&**repo.index_or_empty()?)); + + let suspect = repo.head()?.peel_to_commit_in_place()?; + let traverse: Vec<_> = + gix::traverse::commit::topo::Builder::from_iters(&repo.objects, [suspect.id], None::>) + .build()? + .collect(); + let mut resource_cache = repo.diff_resource_cache_for_tree_diff()?; + + let work_dir: PathBuf = repo + .work_dir() + .ok_or_else(|| anyhow!("blame needs a workdir, but there is none"))? + .into(); + let file_path: &BStr = gix::path::os_str_into_bstr(file)?; + + let blame_entries = gix::blame::blame_file( + &repo.objects, + traverse, + &mut resource_cache, + suspect.id, + work_dir.clone(), + file_path, + )?; + + let absolute_path = work_dir.join(file); + let file_content = std::fs::read_to_string(absolute_path)?; + let lines = file_content.lines(); + + write_blame_entries(out, lines, blame_entries)?; + + Ok(()) +} + +fn write_blame_entries( + mut out: impl std::io::Write, + mut lines: Lines<'_>, + blame_entries: Vec, +) -> Result<(), std::io::Error> { + for blame_entry in blame_entries { + for line_number in blame_entry.range_in_blamed_file { + let line = lines.next().unwrap(); + + writeln!( + out, + "{} {} {}", + blame_entry.commit_id.to_hex_with_len(8), + // `line_number` is 0-based, but we want to show 1-based line numbers (as `git` + // does). + line_number + 1, + line + )?; + } + } + + Ok(()) +} diff --git a/gitoxide-core/src/repository/mod.rs b/gitoxide-core/src/repository/mod.rs index c9044f99cd9..5b51e5c1ac3 100644 --- a/gitoxide-core/src/repository/mod.rs +++ b/gitoxide-core/src/repository/mod.rs @@ -21,6 +21,7 @@ pub enum PathsOrPatterns { pub mod archive; pub mod cat; pub use cat::function::cat; +pub mod blame; pub mod commit; pub mod config; mod credential; diff --git a/src/plumbing/main.rs b/src/plumbing/main.rs index 2391dd14cd3..20c0db777cc 100644 --- a/src/plumbing/main.rs +++ b/src/plumbing/main.rs @@ -1533,6 +1533,15 @@ pub fn main() -> Result<()> { }, ), }, + Subcommands::Blame { file } => prepare_and_run( + "blame", + trace, + verbose, + progress, + progress_keep_open, + None, + move |_progress, out, _err| core::repository::blame::blame_file(repository(Mode::Lenient)?, &file, out), + ), Subcommands::Completions { shell, out_dir } => { let mut app = Args::command(); diff --git a/src/plumbing/options/mod.rs b/src/plumbing/options/mod.rs index b0928c0d426..99d66a39861 100644 --- a/src/plumbing/options/mod.rs +++ b/src/plumbing/options/mod.rs @@ -151,6 +151,10 @@ pub enum Subcommands { /// Subcommands that need no git repository to run. #[clap(subcommand)] Free(free::Subcommands), + /// Blame lines in a file + Blame { + file: std::ffi::OsString, + }, /// Generate shell completions to stdout or a directory. #[clap(visible_alias = "generate-completions", visible_alias = "shell-completions")] Completions { From 983ec7d776b459898b90927242582fc03a0e9056 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 19 Dec 2024 20:27:11 +0100 Subject: [PATCH 04/16] first review round * document as much as possible * simplify signatures * add TODOs for other opportunities --- Cargo.lock | 1 + crate-status.md | 13 +- gitoxide-core/src/repository/blame.rs | 8 +- gix-blame/Cargo.toml | 1 + gix-blame/src/lib.rs | 416 ++++++++++++++------------ gix-blame/tests/blame.rs | 154 +++++----- 6 files changed, 319 insertions(+), 274 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ca292869281..f7b2469af2a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1546,6 +1546,7 @@ dependencies = [ "gix-index 0.37.0", "gix-object 0.46.1", "gix-odb", + "gix-path 0.10.13", "gix-ref 0.49.1", "gix-testtools", "gix-traverse 0.43.1", diff --git a/crate-status.md b/crate-status.md index 4040bf2144e..44fb8d7c4d1 100644 --- a/crate-status.md +++ b/crate-status.md @@ -293,7 +293,7 @@ The top-level crate that acts as hub to all functionality provided by the `gix-* * [x] safe with cycles and recursive configurations * [x] multi-line with comments and quotes * **promisor** - * It's vague, but these seems to be like index files allowing to fetch objects from a server on demand. + * It's vague, but these seem to be like index files allowing to fetch objects from a server on demand. * [x] API documentation * [ ] Some examples @@ -363,10 +363,17 @@ Check out the [performance discussion][gix-diff-performance] as well. ### gix-blame -* [ ] commit-annotations for a single file +* [x] commit-annotations for a single file - [ ] progress - - [ ] interruptability + - [ ] interruptibility - [ ] streaming +- [ ] support for worktree changes (creates virtual commit on top of `HEAD`) +- [ ] shallow-history support +- [ ] rename tracking (track different paths through history) +- [ ] commits to ignore +* **Performance-Improvements** + - [ ] use commit-graph bloom filter for performance + - [ ] traverse input-commits in correct order without `compute_indegrees_to_depth()` * [x] API documentation * [ ] Examples diff --git a/gitoxide-core/src/repository/blame.rs b/gitoxide-core/src/repository/blame.rs index 0129e83b20e..1b5ff20096b 100644 --- a/gitoxide-core/src/repository/blame.rs +++ b/gitoxide-core/src/repository/blame.rs @@ -7,10 +7,9 @@ pub fn blame_file(mut repo: gix::Repository, file: &OsStr, out: impl std::io::Wr repo.object_cache_size_if_unset(repo.compute_object_cache_size_for_tree_diffs(&**repo.index_or_empty()?)); let suspect = repo.head()?.peel_to_commit_in_place()?; - let traverse: Vec<_> = + let traverse = gix::traverse::commit::topo::Builder::from_iters(&repo.objects, [suspect.id], None::>) - .build()? - .collect(); + .build()?; let mut resource_cache = repo.diff_resource_cache_for_tree_diff()?; let work_dir: PathBuf = repo @@ -19,11 +18,10 @@ pub fn blame_file(mut repo: gix::Repository, file: &OsStr, out: impl std::io::Wr .into(); let file_path: &BStr = gix::path::os_str_into_bstr(file)?; - let blame_entries = gix::blame::blame_file( + let blame_entries = gix::blame::file( &repo.objects, traverse, &mut resource_cache, - suspect.id, work_dir.clone(), file_path, )?; diff --git a/gix-blame/Cargo.toml b/gix-blame/Cargo.toml index ed0018bc9ee..ce07a774386 100644 --- a/gix-blame/Cargo.toml +++ b/gix-blame/Cargo.toml @@ -14,6 +14,7 @@ rust-version = "1.65" doctest = false [dependencies] +gix-path = { version = "^0.10.13", path = "../gix-path" } gix-diff = { version = "^0.49.0", path = "../gix-diff", default-features = false, features = ["blob"] } gix-object = { version = "^0.46.0", path = "../gix-object" } gix-hash = { version = "^0.15.0", path = "../gix-hash" } diff --git a/gix-blame/src/lib.rs b/gix-blame/src/lib.rs index 25970cf05bf..139c4445425 100644 --- a/gix-blame/src/lib.rs +++ b/gix-blame/src/lib.rs @@ -1,10 +1,23 @@ //! A crate to implement an algorithm to annotate lines in tracked files with the commits that changed them. -#![deny(rust_2018_idioms)] +//! +//! ### Terminology +//! +//! * **Original File** +//! - The file as it exists in `HEAD`. +//! - the initial state with all lines that we need to associate with a *Blamed File*. +//! * **Blamed File** +//! - A file at a version (i.e. commit) that introduces hunks into the final 'image'. +//! * **Suspects** +//! - The versions of the files that can contain hunks that we could use in the final 'image' +//! - multiple at the same time as the commit-graph may split up. +//! - turns into *Blamed File* once we have found an association into the *Original File*. +//! - every [`UnblamedHunk`] can have multiple suspects of which we find the best match. +#![deny(rust_2018_idioms, missing_docs)] #![forbid(unsafe_code)] use std::{ collections::BTreeMap, - ops::{Add, AddAssign, Range, SubAssign}, + ops::{AddAssign, Range, SubAssign}, path::PathBuf, }; @@ -12,43 +25,30 @@ use gix_hash::ObjectId; use gix_object::bstr::BStr; use gix_object::FindExt; +/// Describes the offset of a particular hunk relative to the *Original File*. #[derive(Clone, Copy, Debug, PartialEq)] pub enum Offset { + /// The amount of lines to add. Added(u32), + /// The amount of lines to remove. Deleted(u32), } -impl Add for Offset { - type Output = Offset; - - fn add(self, rhs: u32) -> Self::Output { - let Self::Added(added) = self else { todo!() }; - - Self::Added(added + rhs) - } -} - -impl Add for Offset { - type Output = Offset; - - fn add(self, rhs: Offset) -> Self::Output { - match (self, rhs) { - (Self::Added(added), Offset::Added(added_rhs)) => Self::Added(added + added_rhs), - (Self::Added(added), Offset::Deleted(deleted_rhs)) => { - if deleted_rhs > added { - Self::Deleted(deleted_rhs - added) - } else { - Self::Added(added - deleted_rhs) - } - } - (Self::Deleted(deleted), Offset::Added(added_rhs)) => { - if added_rhs > deleted { - Self::Added(added_rhs - deleted) - } else { - Self::Deleted(deleted - added_rhs) +impl Offset { + /// Shift the given `range` according to our offset. + pub fn shifted_range(&self, range: &Range) -> Range { + match self { + Offset::Added(added) => { + debug_assert!(range.start >= *added, "{self:?} {range:?}"); + Range { + start: range.start - added, + end: range.end - added, } } - (Self::Deleted(deleted), Offset::Deleted(deleted_rhs)) => Self::Deleted(deleted + deleted_rhs), + Offset::Deleted(deleted) => Range { + start: range.start + deleted, + end: range.end + deleted, + }, } } } @@ -83,23 +83,33 @@ impl SubAssign for Offset { } } +/// A mapping of a section of the *Original File* to the section in a *Blamed File* that introduced it. +/// +/// Both ranges are of the same size, but may use different [starting points](Range::start). Naturally, +/// they have the same content, which is the reason they are in what is returned by [`file()`]. +// TODO: see if this can be encoded as `start_in_original_file` and `start_in_blamed_file` and a single `len`. #[derive(Debug, PartialEq)] pub struct BlameEntry { + /// The section of tokens in the tokenized version of the *Blamed File* (typically lines). pub range_in_blamed_file: Range, + /// The section of tokens in the tokenized version of the *Original File* (typically lines). pub range_in_original_file: Range, + /// The commit that introduced the section into the *Blamed File*. pub commit_id: ObjectId, } impl BlameEntry { + /// Create a new instance. pub fn new(range_in_blamed_file: Range, range_in_original_file: Range, commit_id: ObjectId) -> Self { - assert!( + debug_assert!( range_in_blamed_file.end > range_in_blamed_file.start, "{range_in_blamed_file:?}" ); - assert!( + debug_assert!( range_in_original_file.end > range_in_original_file.start, "{range_in_original_file:?}" ); + debug_assert_eq!(range_in_original_file.len(), range_in_blamed_file.len()); Self { range_in_blamed_file: range_in_blamed_file.clone(), @@ -108,8 +118,9 @@ impl BlameEntry { } } + /// Create a new instance by creating `range_in_blamed_file` after applying `offset` to `range_in_original_file`. fn with_offset(range_in_original_file: Range, commit_id: ObjectId, offset: Offset) -> Self { - assert!( + debug_assert!( range_in_original_file.end > range_in_original_file.start, "{range_in_original_file:?}" ); @@ -121,7 +132,7 @@ impl BlameEntry { commit_id, }, Offset::Deleted(deleted) => { - assert!( + debug_assert!( range_in_original_file.start >= deleted, "{range_in_original_file:?} {offset:?}" ); @@ -136,8 +147,9 @@ impl BlameEntry { } } + /// Create an offset from a portion of the *Original File*. fn from_unblamed_hunk(unblamed_hunk: &UnblamedHunk, commit_id: ObjectId) -> Self { - let range_in_original_file = unblamed_hunk.suspects.get(&commit_id).expect("TODO"); + let range_in_original_file = unblamed_hunk.suspects.get(&commit_id).unwrap(); Self { range_in_blamed_file: unblamed_hunk.range_in_blamed_file.clone(), @@ -153,26 +165,17 @@ trait LineRange { impl LineRange for Range { fn shift_by(&self, offset: Offset) -> Self { - match offset { - Offset::Added(added) => { - assert!(self.start >= added, "{self:?} {offset:?}"); - - Self { - start: self.start - added, - end: self.end - added, - } - } - Offset::Deleted(deleted) => Self { - start: self.start + deleted, - end: self.end + deleted, - }, - } + offset.shifted_range(self) } } +/// TODO: docs - what is it? +// TODO: is `Clone` really needed. #[derive(Clone, Debug, PartialEq)] pub struct UnblamedHunk { + /// TODO: figure out how this works. pub range_in_blamed_file: Range, + /// Maps a commit to the range in the *Original File* that `range_in_blamed_file` refers to. pub suspects: BTreeMap>, } @@ -183,7 +186,7 @@ enum Either { } impl UnblamedHunk { - pub fn new(range_in_blamed_file: Range, suspect: ObjectId, offset: Offset) -> Self { + fn new(range_in_blamed_file: Range, suspect: ObjectId, offset: Offset) -> Self { assert!( range_in_blamed_file.end > range_in_blamed_file.start, "{range_in_blamed_file:?}" @@ -257,6 +260,7 @@ impl UnblamedHunk { } } + /// Transfer all ranges from the commit at `from` to the commit at `to`. fn pass_blame(&mut self, from: ObjectId, to: ObjectId) { if let Some(range_in_suspect) = self.suspects.remove(&from) { self.suspects.insert(to, range_in_suspect); @@ -270,28 +274,36 @@ impl UnblamedHunk { } fn remove_blame(&mut self, suspect: ObjectId) { - let _ = self.suspects.remove(&suspect); + // TODO: figure out why it can try to remove suspects that don't exist. + self.suspects.remove(&suspect); } } +/// A single change between two blobs, or an unchanged region. #[derive(Clone, Debug, PartialEq)] pub enum Change { + /// A range of tokens that wasn't changed. Unchanged(Range), + /// `(added_line_range, num_deleted_in_before)` Added(Range, u32), + /// `(line_to_start_deletion_at, num_deleted_in_before)` Deleted(u32, u32), } +/// Record all [`Change`]s to learn about additions, deletions and unchanged portions of a *Blamed File*. struct ChangeRecorder { - previous_after_end: u32, - changes: Vec, + last_seen_after_end: u32, + hunks: Vec, total_number_of_lines: u32, } impl ChangeRecorder { + /// `total_number_of_lines` is used to fill in the last unchanged hunk if needed + /// so that the entire file is represented by [`Change`]. fn new(total_number_of_lines: u32) -> Self { ChangeRecorder { - previous_after_end: 0, - changes: vec![], + last_seen_after_end: 0, + hunks: Vec::new(), total_number_of_lines, } } @@ -300,42 +312,40 @@ impl ChangeRecorder { impl gix_diff::blob::Sink for ChangeRecorder { type Out = Vec; - // “imara-diff will compute a line diff by default”, so each `start` and `end` represents a - // line in a file. fn process_change(&mut self, before: Range, after: Range) { // This checks for unchanged hunks. - // - // https://docs.rs/imara-diff/latest/imara_diff/sink/trait.Sink.html#notes - if after.start > self.previous_after_end { - self.changes - .push(Change::Unchanged(self.previous_after_end..after.start)); + if after.start > self.last_seen_after_end { + self.hunks + .push(Change::Unchanged(self.last_seen_after_end..after.start)); } - match (before.end > before.start, after.end > after.start) { + match (!before.is_empty(), !after.is_empty()) { (_, true) => { - self.changes + self.hunks .push(Change::Added(after.start..after.end, before.end - before.start)); } (true, false) => { - self.changes - .push(Change::Deleted(after.start, before.end - before.start)); + self.hunks.push(Change::Deleted(after.start, before.end - before.start)); } - (false, false) => unimplemented!(), + (false, false) => unreachable!("BUG: imara-diff provided a non-change"), } - - self.previous_after_end = after.end; + self.last_seen_after_end = after.end; } fn finish(mut self) -> Self::Out { - if self.total_number_of_lines > self.previous_after_end { - self.changes - .push(Change::Unchanged(self.previous_after_end..self.total_number_of_lines)); + if self.total_number_of_lines > self.last_seen_after_end { + self.hunks + .push(Change::Unchanged(self.last_seen_after_end..self.total_number_of_lines)); } - - self.changes + self.hunks } } +/// Compare a section from the *Original File* (`hunk`) with a change from a diff and see if there +/// is an intersection with `change`. Based on that intersection, we may generate a [`BlameEntry`] for `out` +/// and/or split the `hunk` into multiple. +/// +/// This is the core of the blame implementation as it matches regions in *Blamed Files* to the *Original File*. pub fn process_change( out: &mut Vec, new_hunks_to_blame: &mut Vec, @@ -348,7 +358,6 @@ pub fn process_change( (Some(hunk), Some(Change::Unchanged(unchanged))) => { let Some(range_in_suspect) = hunk.suspects.get(&suspect) else { new_hunks_to_blame.push(hunk); - return (None, Some(Change::Unchanged(unchanged))); }; @@ -418,7 +427,7 @@ pub fn process_change( match ( range_in_suspect.contains(&added.start), // Since `added` is a range that is not inclusive at the end, `added.end` is - // not part of `added`. The first line that is is `added.end - 1`. + // not part of `added`. The first line that is `added.end - 1`. (added.end - 1) >= range_in_suspect.start && added.end <= range_in_suspect.end, ) { (true, true) => { @@ -598,20 +607,22 @@ pub fn process_change( } } +/// Consume `hunks_to_blame` and `changes` to pair up matches ranges (also overlapping) with each other. +/// Once a match is found, it's pushed onto `out`. pub fn process_changes( out: &mut Vec, - hunks_to_blame: &[UnblamedHunk], - changes: &[Change], + hunks_to_blame: Vec, + changes: Vec, suspect: ObjectId, ) -> Vec { - let mut hunks_iter = hunks_to_blame.iter().cloned(); - let mut changes_iter = changes.iter().cloned(); + let mut hunks_iter = hunks_to_blame.into_iter(); + let mut changes_iter = changes.into_iter(); - let mut hunk: Option = hunks_iter.next(); - let mut change: Option = changes_iter.next(); + let mut hunk = hunks_iter.next(); + let mut change = changes_iter.next(); - let mut new_hunks_to_blame: Vec = vec![]; - let mut offset_in_destination: Offset = Offset::Added(0); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination = Offset::Added(0); loop { (hunk, change) = process_change( @@ -630,16 +641,15 @@ pub fn process_changes( break; } } - new_hunks_to_blame } -fn get_changes_for_file_path( +fn tree_diff_at_file_path( odb: impl gix_object::Find + gix_object::FindHeader, file_path: &BStr, id: ObjectId, parent_id: ObjectId, -) -> Vec { +) -> Option { let mut buffer = Vec::new(); let parent = odb.find_commit(&parent_id, &mut buffer).unwrap(); @@ -671,19 +681,14 @@ fn get_changes_for_file_path( ) .unwrap(); - recorder - .records - .iter() - .filter(|change| match change { - gix_diff::tree::recorder::Change::Modification { path, .. } => path == file_path, - gix_diff::tree::recorder::Change::Addition { path, .. } => path == file_path, - gix_diff::tree::recorder::Change::Deletion { path, .. } => path == file_path, - }) - .cloned() - .collect() + recorder.records.into_iter().find(|change| match change { + gix_diff::tree::recorder::Change::Modification { path, .. } => path == file_path, + gix_diff::tree::recorder::Change::Addition { path, .. } => path == file_path, + gix_diff::tree::recorder::Change::Deletion { path, .. } => path == file_path, + }) } -fn get_changes( +fn blob_changes( odb: impl gix_object::Find + gix_object::FindHeader, resource_cache: &mut gix_diff::blob::Platform, oid: ObjectId, @@ -727,81 +732,115 @@ fn get_changes( /// [1]: https://github.com/git/git/commit/c2ebaa27d63bfb7c50cbbdaba90aee4efdd45d0a /// [2]: https://github.com/git/git/commit/6dbf0c7bebd1c71c44d786ebac0f2b3f226a0131 fn coalesce_blame_entries(lines_blamed: Vec) -> Vec { - // TODO - // It’s possible this could better be done on insertion into `lines_blamed`. - lines_blamed.into_iter().fold(vec![], |mut acc, entry| { - let previous_entry = acc.last(); - - if let Some(previous_entry) = previous_entry { - if previous_entry.commit_id == entry.commit_id + let len = lines_blamed.len(); + lines_blamed + .into_iter() + .fold(Vec::with_capacity(len), |mut acc, entry| { + let previous_entry = acc.last(); + + if let Some(previous_entry) = previous_entry { + if previous_entry.commit_id == entry.commit_id && previous_entry.range_in_blamed_file.end == entry.range_in_blamed_file.start // As of 2024-09-19, the check below only is in `git`, but not in `libgit2`. && previous_entry.range_in_original_file.end == entry.range_in_original_file.start - { - let coalesced_entry = BlameEntry { - range_in_blamed_file: previous_entry.range_in_blamed_file.start..entry.range_in_blamed_file.end, - range_in_original_file: previous_entry.range_in_original_file.start - ..entry.range_in_original_file.end, - commit_id: previous_entry.commit_id, - }; + { + let coalesced_entry = BlameEntry { + range_in_blamed_file: previous_entry.range_in_blamed_file.start..entry.range_in_blamed_file.end, + range_in_original_file: previous_entry.range_in_original_file.start + ..entry.range_in_original_file.end, + commit_id: previous_entry.commit_id, + }; - acc.pop(); - acc.push(coalesced_entry); + acc.pop(); + acc.push(coalesced_entry); + } else { + acc.push(entry); + } + + acc } else { acc.push(entry); - } - - acc - } else { - acc.push(entry); - acc - } - }) + acc + } + }) } // TODO: do not instantiate anything, get everything passed as argument. -pub fn blame_file( +/// Produce a list of consecutive [`BlameEntry`] instances to indicate in which commits the ranges of the file +/// at `traverse[0]:` originated in. +/// +/// ## Paramters +/// +/// * `odb` +/// - Access to database objects, also for used for diffing. +/// - Should have an object cache for good diff performance. +/// * `traverse` +/// - The list of commits from the most recent to prior ones, following all parents sorted +/// by time. +/// - It's paramount that older commits are returned after newer ones. +/// - The first commit returned here is the first eligible commit to be responsible for parts of `file_path`. +/// * `file_path` +/// - A *slash-separated* worktree-relative path to the file to blame. +/// * `resource_cache` +/// - Used for diffing trees. +/// +/// ## The algorithm +/// +/// *For brevity, `HEAD` denotes the starting point of the blame operation. It could be any commit, or even commits that +/// represent the worktree state. +/// We begin with a single [`UnblamedHunk`] and a single suspect, usually `HEAD` as the commit containing the *Original File*. +/// We traverse the commit graph starting at `HEAD`, and see if there have been changes to `file_path`. If so, we have found +/// a *Blamed File* and a *Suspect* commit, and have hunks that represent these changes. Now the [`UnblamedHunk`]s is split at +/// the boundaries of each matching hunk, creating a new [`UnblamedHunk`] on each side, along with a [`BlameEntry`] to represent +/// the match. +/// This is repeated until there are no non-empty [`UnblamedHunk`]s left. +/// +/// At a high level, what we want to do is the following: +/// +/// - get the commit that belongs to a commit id +/// - walk through parents +/// - for each parent, do a diff and mark lines that don’t have a suspect (this is the term +/// used in `libgit2`) yet, but that have been changed in this commit +/// +/// The algorithm in `libgit2` works by going through parents and keeping a linked list of blame +/// suspects. It can be visualized as follows: +// +// <----------------------------------------> +// <---------------><-----------------------> +// <---><----------><-----------------------> +// <---><----------><-------><-----><-------> +// <---><---><-----><-------><-----><-------> +// <---><---><-----><-------><-----><-><-><-> +pub fn file( odb: impl gix_object::Find + gix_object::FindHeader, traverse: impl IntoIterator>, resource_cache: &mut gix_diff::blob::Platform, - suspect: ObjectId, - worktree_path: PathBuf, + // TODO: remove + worktree_root: PathBuf, file_path: &BStr, ) -> Result, E> { - // TODO - // At a high level, what we want to do is the following: - // - // - get the commit that belongs to a commit id - // - walk through parents - // - for each parent, do a diff and mark lines that don’t have a suspect (this is the term - // used in `libgit2`) yet, but that have been changed in this commit - // - // The algorithm in `libgit2` works by going through parents and keeping a linked list of blame - // suspects. It can be visualized as follows: - // - // <----------------------------------------> - // <---------------><-----------------------> - // <---><----------><-----------------------> - // <---><----------><-------><-----><-------> - // <---><---><-----><-------><-----><-------> - // <---><---><-----><-------><-----><-><-><-> - - // Needed for `to_str`. + // TODO: `worktree_root` should be removed - read everything from Commit. + // Worktree changes should be placed into a temporary commit. + // TODO: remove this and deduplicate the respective code. use gix_object::bstr::ByteSlice; + let absolute_path = worktree_root.join(gix_path::from_bstr(file_path)); - let absolute_path = worktree_path.join(file_path.to_str().unwrap()); - - // TODO Verify that `imara-diff` tokenizes lines the same way `lines` does. + // TODO use `imara-diff` to tokenize this just like it will be tokenized when diffing. let number_of_lines = std::fs::read_to_string(absolute_path).unwrap().lines().count(); - let mut hunks_to_blame: Vec = vec![UnblamedHunk::new( + let mut traverse = traverse.into_iter().peekable(); + let Some(Ok(suspect)) = traverse.peek().map(|res| res.as_ref().map(|item| item.id)) else { + todo!("return actual error"); + }; + + let mut hunks_to_blame = vec![UnblamedHunk::new( 0..number_of_lines.try_into().unwrap(), suspect, Offset::Added(0), )]; - let mut out: Vec = vec![]; + let mut out = Vec::new(); 'outer: for item in traverse { let item = item?; let suspect = item.id; @@ -819,8 +858,7 @@ pub fn blame_file( .map(|hunk| BlameEntry::from_unblamed_hunk(hunk, suspect)), ); - hunks_to_blame = vec![]; - + hunks_to_blame.clear(); break; } @@ -851,22 +889,18 @@ pub fn blame_file( if entry.oid == parent_entry.oid { // The blobs storing the blamed file in `entry` and `parent_entry` are identical // which is why we can pass blame to the parent without further checks. - hunks_to_blame - .iter_mut() - .for_each(|unblamed_hunk| unblamed_hunk.pass_blame(suspect, parent_id)); - + for unblamed_hunk in &mut hunks_to_blame { + unblamed_hunk.pass_blame(suspect, parent_id); + } continue; } } - let changes_for_file_path = get_changes_for_file_path(&odb, file_path, item.id, parent_id); - - let [ref modification]: [gix_diff::tree::recorder::Change] = changes_for_file_path[..] else { + let Some(modification) = tree_diff_at_file_path(&odb, file_path, item.id, parent_id) else { // None of the changes affected the file we’re currently blaming. Pass blame to parent. - hunks_to_blame - .iter_mut() - .for_each(|unblamed_hunk| unblamed_hunk.pass_blame(suspect, parent_id)); - + for unblamed_hunk in &mut hunks_to_blame { + unblamed_hunk.pass_blame(suspect, parent_id); + } continue; }; @@ -880,18 +914,17 @@ pub fn blame_file( .map(|hunk| BlameEntry::from_unblamed_hunk(hunk, suspect)), ); - hunks_to_blame = vec![]; - + hunks_to_blame.clear(); break; } gix_diff::tree::recorder::Change::Deletion { .. } => todo!(), gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => { - let changes = get_changes(&odb, resource_cache, *oid, *previous_oid, file_path); + let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path); - hunks_to_blame = process_changes(&mut out, &hunks_to_blame, &changes, suspect); - hunks_to_blame - .iter_mut() - .for_each(|unblamed_hunk| unblamed_hunk.pass_blame(suspect, parent_id)); + hunks_to_blame = process_changes(&mut out, hunks_to_blame, changes, suspect); + for unblamed_hunk in &mut hunks_to_blame { + unblamed_hunk.pass_blame(suspect, parent_id); + } } } } else { @@ -919,24 +952,22 @@ pub fn blame_file( // The blobs storing the blamed file in `entry` and `parent_entry` are // identical which is why we can pass blame to the parent without further // checks. - hunks_to_blame - .iter_mut() - .for_each(|unblamed_hunk| unblamed_hunk.pass_blame(suspect, *parent_id)); - + for unblamed_hunk in &mut hunks_to_blame { + unblamed_hunk.pass_blame(suspect, *parent_id); + } continue 'outer; } } } for parent_id in parent_ids { - let changes_for_file_path = get_changes_for_file_path(&odb, file_path, item.id, parent_id); - - let [ref modification]: [gix_diff::tree::recorder::Change] = changes_for_file_path[..] else { + let changes_for_file_path = tree_diff_at_file_path(&odb, file_path, item.id, parent_id); + let Some(modification) = changes_for_file_path else { // None of the changes affected the file we’re currently blaming. Pass blame // to parent. - hunks_to_blame - .iter_mut() - .for_each(|unblamed_hunk| unblamed_hunk.clone_blame(suspect, parent_id)); + for unblamed_hunk in &mut hunks_to_blame { + unblamed_hunk.clone_blame(suspect, parent_id); + } continue; }; @@ -951,28 +982,29 @@ pub fn blame_file( } gix_diff::tree::recorder::Change::Deletion { .. } => todo!(), gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => { - let changes = get_changes(&odb, resource_cache, *oid, *previous_oid, file_path); + let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path); - hunks_to_blame = process_changes(&mut out, &hunks_to_blame, &changes, suspect); - - hunks_to_blame - .iter_mut() - .for_each(|unblamed_hunk| unblamed_hunk.pass_blame(suspect, parent_id)); + hunks_to_blame = process_changes(&mut out, hunks_to_blame, changes, suspect); + for unblamed_hunk in &mut hunks_to_blame { + unblamed_hunk.pass_blame(suspect, parent_id); + } } } } - - hunks_to_blame - .iter_mut() - .for_each(|unblamed_hunk| unblamed_hunk.remove_blame(suspect)); + for unblamed_hunk in &mut hunks_to_blame { + unblamed_hunk.remove_blame(suspect); + } } } - assert_eq!(hunks_to_blame, vec![]); + debug_assert_eq!( + hunks_to_blame, + vec![], + "only if there is no portion of the file left we have completed the blame" + ); // I don’t know yet whether it would make sense to use a data structure instead that preserves // order on insertion. out.sort_by(|a, b| a.range_in_blamed_file.start.cmp(&b.range_in_blamed_file.start)); - Ok(coalesce_blame_entries(out)) } diff --git a/gix-blame/tests/blame.rs b/gix-blame/tests/blame.rs index 5d85590713a..3634491ea56 100644 --- a/gix-blame/tests/blame.rs +++ b/gix-blame/tests/blame.rs @@ -1,6 +1,7 @@ -use gix_blame::{blame_file, process_change, process_changes, BlameEntry, Change, Offset, UnblamedHunk}; +use gix_blame::{file, process_change, process_changes, BlameEntry, Change, Offset, UnblamedHunk}; use gix_hash::ObjectId; use gix_object::bstr; +use std::ops::Range; use std::path::PathBuf; struct Baseline<'a> { @@ -111,7 +112,6 @@ struct Fixture { worktree_path: PathBuf, odb: gix_odb::Handle, resource_cache: gix_diff::blob::Platform, - suspect: ObjectId, commits: Vec>, } @@ -177,7 +177,6 @@ impl Fixture { odb, worktree_path, resource_cache, - suspect: head_id, commits, }) } @@ -191,15 +190,13 @@ macro_rules! mktest { worktree_path, odb, mut resource_cache, - suspect, commits, } = Fixture::new().unwrap(); - let lines_blamed = blame_file( + let lines_blamed = file( &odb, commits, &mut resource_cache, - suspect, worktree_path, format!("{}.txt", $case).as_str().into(), ) @@ -253,15 +250,13 @@ fn diff_disparity() { worktree_path, odb, mut resource_cache, - suspect, commits, } = Fixture::new().unwrap(); - let lines_blamed = blame_file( + let lines_blamed = file( &odb, commits, &mut resource_cache, - suspect, worktree_path, format!("{case}.txt").as_str().into(), ) @@ -309,7 +304,7 @@ fn process_change_works_added_hunk() { &mut new_hunks_to_blame, &mut offset_in_destination, suspect, - Some(UnblamedHunk::new(0..5, suspect, Offset::Added(0))), + Some(new_unblamed_hunk(0..5, suspect, Offset::Added(0))), Some(Change::Added(0..3, 0)), ); @@ -345,7 +340,7 @@ fn process_change_works_added_hunk_2() { &mut new_hunks_to_blame, &mut offset_in_destination, suspect, - Some(UnblamedHunk::new(0..5, suspect, Offset::Added(0))), + Some(new_unblamed_hunk(0..5, suspect, Offset::Added(0))), Some(Change::Added(2..3, 0)), ); @@ -387,7 +382,7 @@ fn process_change_works_added_hunk_3() { &mut new_hunks_to_blame, &mut offset_in_destination, suspect, - Some(UnblamedHunk::new(10..15, suspect, Offset::Added(0))), + Some(new_unblamed_hunk(10..15, suspect, Offset::Added(0))), Some(Change::Added(12..13, 0)), ); @@ -430,7 +425,7 @@ fn process_change_works_added_hunk_4() { &mut offset_in_destination, suspect, // range_in_destination: 7..12 - Some(UnblamedHunk::new(12..17, suspect, Offset::Added(5))), + Some(new_unblamed_hunk(12..17, suspect, Offset::Added(5))), Some(Change::Added(9..10, 0)), ); @@ -472,7 +467,7 @@ fn process_change_works_added_hunk_5() { &mut new_hunks_to_blame, &mut offset_in_destination, suspect, - Some(UnblamedHunk::new(0..5, suspect, Offset::Added(0))), + Some(new_unblamed_hunk(0..5, suspect, Offset::Added(0))), Some(Change::Added(0..3, 1)), ); @@ -509,7 +504,7 @@ fn process_change_works_added_hunk_6() { &mut offset_in_destination, suspect, // range_in_destination: 0..4 - Some(UnblamedHunk::new(1..5, suspect, Offset::Added(1))), + Some(new_unblamed_hunk(1..5, suspect, Offset::Added(1))), Some(Change::Added(0..3, 1)), ); @@ -546,7 +541,7 @@ fn process_change_works_added_hunk_7() { &mut offset_in_destination, suspect, // range_in_destination: 2..6 - Some(UnblamedHunk::new(3..7, suspect, Offset::Added(1))), + Some(new_unblamed_hunk(3..7, suspect, Offset::Added(1))), Some(Change::Added(3..5, 1)), ); @@ -589,7 +584,7 @@ fn process_change_works_added_hunk_8() { &mut offset_in_destination, suspect, // range_in_destination: 25..26 - Some(UnblamedHunk::new(23..24, suspect, Offset::Deleted(2))), + Some(new_unblamed_hunk(23..24, suspect, Offset::Deleted(2))), Some(Change::Added(25..27, 1)), ); @@ -620,7 +615,7 @@ fn process_change_works_added_hunk_9() { &mut offset_in_destination, suspect, // range_in_destination: 21..22 - Some(UnblamedHunk::new(23..24, suspect, Offset::Added(2))), + Some(new_unblamed_hunk(23..24, suspect, Offset::Added(2))), Some(Change::Added(18..22, 3)), ); @@ -651,7 +646,7 @@ fn process_change_works_added_hunk_10() { &mut offset_in_destination, suspect, // range_in_destination: 70..108 - Some(UnblamedHunk::new(71..109, suspect, Offset::Added(1))), + Some(new_unblamed_hunk(71..109, suspect, Offset::Added(1))), Some(Change::Added(106..109, 0)), ); @@ -688,7 +683,7 @@ fn process_change_works_added_hunk_11() { &mut offset_in_destination, suspect, // range_in_destination: 137..144 - Some(UnblamedHunk::new(149..156, suspect, Offset::Added(12))), + Some(new_unblamed_hunk(149..156, suspect, Offset::Added(12))), Some(Change::Added(143..146, 0)), ); @@ -725,7 +720,7 @@ fn process_change_works_no_overlap() { &mut offset_in_destination, suspect, // range_in_destination: 2..5 - Some(UnblamedHunk::new(3..6, suspect, Offset::Added(1))), + Some(new_unblamed_hunk(3..6, suspect, Offset::Added(1))), Some(Change::Added(7..10, 1)), ); @@ -755,7 +750,7 @@ fn process_change_works_no_overlap_2() { &mut offset_in_destination, suspect, // range_in_destination: 6..8 - Some(UnblamedHunk::new(9..11, suspect, Offset::Added(3))), + Some(new_unblamed_hunk(9..11, suspect, Offset::Added(3))), Some(Change::Added(2..5, 0)), ); @@ -785,7 +780,7 @@ fn process_change_works_no_overlap_3() { &mut offset_in_destination, suspect, // range_in_destination: 5..15 - Some(UnblamedHunk::new(4..15, suspect, Offset::Deleted(1))), + Some(new_unblamed_hunk(4..15, suspect, Offset::Deleted(1))), Some(Change::Added(4..5, 1)), ); @@ -815,7 +810,7 @@ fn process_change_works_no_overlap_4() { &mut offset_in_destination, suspect, // range_in_destination: 25..27 - Some(UnblamedHunk::new(23..25, suspect, Offset::Deleted(2))), + Some(new_unblamed_hunk(23..25, suspect, Offset::Deleted(2))), Some(Change::Unchanged(21..22)), ); @@ -845,7 +840,7 @@ fn process_change_works_no_overlap_5() { &mut offset_in_destination, suspect, // range_in_destination: 17..18 - Some(UnblamedHunk::new(15..16, suspect, Offset::Deleted(2))), + Some(new_unblamed_hunk(15..16, suspect, Offset::Deleted(2))), Some(Change::Deleted(20, 1)), ); @@ -875,7 +870,7 @@ fn process_change_works_no_overlap_6() { &mut offset_in_destination, suspect, // range_in_destination: 22..24 - Some(UnblamedHunk::new(23..25, suspect, Offset::Added(1))), + Some(new_unblamed_hunk(23..25, suspect, Offset::Added(1))), Some(Change::Deleted(20, 1)), ); @@ -905,7 +900,7 @@ fn process_change_works_enclosing_addition() { &mut offset_in_destination, suspect, // range_in_destination: 5..8 - Some(UnblamedHunk::new(2..5, suspect, Offset::Deleted(3))), + Some(new_unblamed_hunk(2..5, suspect, Offset::Deleted(3))), Some(Change::Added(3..12, 2)), ); @@ -936,7 +931,7 @@ fn process_change_works_enclosing_deletion() { &mut offset_in_destination, suspect, // range_in_destination: 13..20 - Some(UnblamedHunk::new(12..19, suspect, Offset::Deleted(1))), + Some(new_unblamed_hunk(12..19, suspect, Offset::Deleted(1))), Some(Change::Deleted(15, 2)), ); @@ -972,7 +967,7 @@ fn process_change_works_enclosing_unchanged_lines() { &mut offset_in_destination, suspect, // range_in_destination: 109..113 - Some(UnblamedHunk::new(110..114, suspect, Offset::Added(1))), + Some(new_unblamed_hunk(110..114, suspect, Offset::Added(1))), Some(Change::Unchanged(109..172)), ); @@ -1001,7 +996,7 @@ fn process_change_works_unchanged_hunk() { &mut new_hunks_to_blame, &mut offset_in_destination, suspect, - Some(UnblamedHunk::new(0..5, suspect, Offset::Added(0))), + Some(new_unblamed_hunk(0..5, suspect, Offset::Added(0))), Some(Change::Unchanged(0..3)), ); @@ -1030,7 +1025,7 @@ fn process_change_works_unchanged_hunk_2() { &mut new_hunks_to_blame, &mut offset_in_destination, suspect, - Some(UnblamedHunk::new(0..5, suspect, Offset::Added(0))), + Some(new_unblamed_hunk(0..5, suspect, Offset::Added(0))), Some(Change::Unchanged(0..7)), ); @@ -1091,7 +1086,7 @@ fn process_change_works_deleted_hunk() { &mut new_hunks_to_blame, &mut offset_in_destination, suspect, - Some(UnblamedHunk::new(0..5, suspect, Offset::Added(0))), + Some(new_unblamed_hunk(0..5, suspect, Offset::Added(0))), Some(Change::Deleted(5, 3)), ); @@ -1120,7 +1115,7 @@ fn process_change_works_deleted_hunk_2() { &mut new_hunks_to_blame, &mut offset_in_destination, suspect, - Some(UnblamedHunk::new(2..16, suspect, Offset::Added(0))), + Some(new_unblamed_hunk(2..16, suspect, Offset::Added(0))), Some(Change::Deleted(0, 4)), ); @@ -1149,7 +1144,7 @@ fn process_change_works_deleted_hunk_3() { &mut new_hunks_to_blame, &mut offset_in_destination, suspect, - Some(UnblamedHunk::new(2..16, suspect, Offset::Added(0))), + Some(new_unblamed_hunk(2..16, suspect, Offset::Added(0))), Some(Change::Deleted(14, 4)), ); @@ -1164,7 +1159,7 @@ fn process_change_works_deleted_hunk_3() { assert_eq!(lines_blamed, []); assert_eq!( new_hunks_to_blame, - [UnblamedHunk::new(2..14, suspect, Offset::Added(0))] + [new_unblamed_hunk(2..14, suspect, Offset::Added(0))] ); assert_eq!(offset_in_destination, Offset::Deleted(4)); } @@ -1240,10 +1235,8 @@ fn process_change_works_unchanged_only() { #[test] fn process_changes_works() { let mut lines_blamed = Vec::new(); - let hunks_to_blame = &[]; - let changes = &[]; let suspect = ObjectId::null(gix_hash::Kind::Sha1); - let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + let new_hunks_to_blame = process_changes(&mut lines_blamed, vec![], vec![], suspect); assert_eq!(lines_blamed, []); assert_eq!(new_hunks_to_blame, []); @@ -1253,8 +1246,8 @@ fn process_changes_works() { fn process_changes_works_added_hunk() { let mut lines_blamed = Vec::new(); let suspect = ObjectId::null(gix_hash::Kind::Sha1); - let hunks_to_blame = &[UnblamedHunk::new(0..4, suspect, Offset::Added(0))]; - let changes = &[Change::Added(0..4, 0)]; + let hunks_to_blame = vec![new_unblamed_hunk(0..4, suspect, Offset::Added(0))]; + let changes = vec![Change::Added(0..4, 0)]; let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); assert_eq!( @@ -1272,8 +1265,8 @@ fn process_changes_works_added_hunk() { fn process_changes_works_added_hunk_2() { let mut lines_blamed = Vec::new(); let suspect = ObjectId::null(gix_hash::Kind::Sha1); - let hunks_to_blame = &[UnblamedHunk::new(0..6, suspect, Offset::Added(0))]; - let changes = &[Change::Added(0..4, 0), Change::Unchanged(4..6)]; + let hunks_to_blame = vec![new_unblamed_hunk(0..6, suspect, Offset::Added(0))]; + let changes = vec![Change::Added(0..4, 0), Change::Unchanged(4..6)]; let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); assert_eq!( @@ -1284,15 +1277,15 @@ fn process_changes_works_added_hunk_2() { commit_id: suspect }] ); - assert_eq!(new_hunks_to_blame, [UnblamedHunk::new(4..6, suspect, Offset::Added(4))]); + assert_eq!(new_hunks_to_blame, [new_unblamed_hunk(4..6, suspect, Offset::Added(4))]); } #[test] fn process_changes_works_added_hunk_3() { let mut lines_blamed = Vec::new(); let suspect = ObjectId::null(gix_hash::Kind::Sha1); - let hunks_to_blame = &[UnblamedHunk::new(0..6, suspect, Offset::Added(0))]; - let changes = &[Change::Unchanged(0..2), Change::Added(2..4, 0), Change::Unchanged(4..6)]; + let hunks_to_blame = vec![new_unblamed_hunk(0..6, suspect, Offset::Added(0))]; + let changes = vec![Change::Unchanged(0..2), Change::Added(2..4, 0), Change::Unchanged(4..6)]; let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); assert_eq!( @@ -1306,8 +1299,8 @@ fn process_changes_works_added_hunk_3() { assert_eq!( new_hunks_to_blame, [ - UnblamedHunk::new(0..2, suspect, Offset::Added(0)), - UnblamedHunk::new(4..6, suspect, Offset::Added(2)) + new_unblamed_hunk(0..2, suspect, Offset::Added(0)), + new_unblamed_hunk(4..6, suspect, Offset::Added(2)) ] ); } @@ -1316,8 +1309,8 @@ fn process_changes_works_added_hunk_3() { fn process_changes_works_added_hunk_4_0() { let mut lines_blamed = Vec::new(); let suspect = ObjectId::null(gix_hash::Kind::Sha1); - let hunks_to_blame = &[UnblamedHunk::new(0..6, suspect, Offset::Added(0))]; - let changes = &[Change::Added(0..1, 0), Change::Added(1..4, 0), Change::Unchanged(4..6)]; + let hunks_to_blame = vec![new_unblamed_hunk(0..6, suspect, Offset::Added(0))]; + let changes = vec![Change::Added(0..1, 0), Change::Added(1..4, 0), Change::Unchanged(4..6)]; let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); assert_eq!( @@ -1335,15 +1328,15 @@ fn process_changes_works_added_hunk_4_0() { } ] ); - assert_eq!(new_hunks_to_blame, [UnblamedHunk::new(4..6, suspect, Offset::Added(4))]); + assert_eq!(new_hunks_to_blame, [new_unblamed_hunk(4..6, suspect, Offset::Added(4))]); } #[test] fn process_changes_works_added_hunk_4_1() { let mut lines_blamed = Vec::new(); let suspect = ObjectId::null(gix_hash::Kind::Sha1); - let hunks_to_blame = &[UnblamedHunk::new(0..6, suspect, Offset::Added(0))]; - let changes = &[Change::Added(0..1, 0)]; + let hunks_to_blame = vec![new_unblamed_hunk(0..6, suspect, Offset::Added(0))]; + let changes = vec![Change::Added(0..1, 0)]; let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); assert_eq!( @@ -1354,7 +1347,7 @@ fn process_changes_works_added_hunk_4_1() { commit_id: suspect }] ); - assert_eq!(new_hunks_to_blame, [UnblamedHunk::new(1..6, suspect, Offset::Added(1))]); + assert_eq!(new_hunks_to_blame, [new_unblamed_hunk(1..6, suspect, Offset::Added(1))]); } #[test] @@ -1366,8 +1359,8 @@ fn process_changes_works_added_hunk_4_2() { range_in_original_file: 0..2, commit_id: suspect, }]; - let hunks_to_blame = &[UnblamedHunk::new(2..6, suspect_2, Offset::Added(2))]; - let changes = &[Change::Added(0..1, 0)]; + let hunks_to_blame = vec![new_unblamed_hunk(2..6, suspect_2, Offset::Added(2))]; + let changes = vec![Change::Added(0..1, 0)]; let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect_2); assert_eq!( @@ -1387,7 +1380,7 @@ fn process_changes_works_added_hunk_4_2() { ); assert_eq!( new_hunks_to_blame, - [UnblamedHunk::new(3..6, suspect_2, Offset::Added(3))] + [new_unblamed_hunk(3..6, suspect_2, Offset::Added(3))] ); } @@ -1395,8 +1388,8 @@ fn process_changes_works_added_hunk_4_2() { fn process_changes_works_added_hunk_5() { let mut lines_blamed = Vec::new(); let suspect = ObjectId::null(gix_hash::Kind::Sha1); - let hunks_to_blame = &[UnblamedHunk::new(0..6, suspect, Offset::Added(0))]; - let changes = &[Change::Added(0..4, 3), Change::Unchanged(4..6)]; + let hunks_to_blame = vec![new_unblamed_hunk(0..6, suspect, Offset::Added(0))]; + let changes = vec![Change::Added(0..4, 3), Change::Unchanged(4..6)]; let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); assert_eq!( @@ -1407,19 +1400,19 @@ fn process_changes_works_added_hunk_5() { commit_id: suspect }] ); - assert_eq!(new_hunks_to_blame, [UnblamedHunk::new(4..6, suspect, Offset::Added(1))]); + assert_eq!(new_hunks_to_blame, [new_unblamed_hunk(4..6, suspect, Offset::Added(1))]); } #[test] fn process_changes_works_added_hunk_6() { let mut lines_blamed = Vec::new(); let suspect = ObjectId::null(gix_hash::Kind::Sha1); - let hunks_to_blame = &[UnblamedHunk::new(4..6, suspect, Offset::Added(1))]; - let changes = &[Change::Added(0..3, 0), Change::Unchanged(3..5)]; + let hunks_to_blame = vec![new_unblamed_hunk(4..6, suspect, Offset::Added(1))]; + let changes = vec![Change::Added(0..3, 0), Change::Unchanged(3..5)]; let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); assert_eq!(lines_blamed, []); - assert_eq!(new_hunks_to_blame, [UnblamedHunk::new(4..6, suspect, Offset::Added(4))]); + assert_eq!(new_hunks_to_blame, [new_unblamed_hunk(4..6, suspect, Offset::Added(4))]); } #[test] @@ -1431,8 +1424,8 @@ fn process_changes_works_added_hunk_7() { range_in_original_file: 0..1, commit_id: suspect, }]; - let hunks_to_blame = &[UnblamedHunk::new(1..3, suspect_2, Offset::Added(1))]; - let changes = &[Change::Added(0..1, 2)]; + let hunks_to_blame = vec![new_unblamed_hunk(1..3, suspect_2, Offset::Added(1))]; + let changes = vec![Change::Added(0..1, 2)]; let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect_2); assert_eq!( @@ -1452,7 +1445,7 @@ fn process_changes_works_added_hunk_7() { ); assert_eq!( new_hunks_to_blame, - [UnblamedHunk::new(2..3, suspect_2, Offset::Added(0))] + [new_unblamed_hunk(2..3, suspect_2, Offset::Added(0))] ); } @@ -1460,8 +1453,8 @@ fn process_changes_works_added_hunk_7() { fn process_changes_works_added_hunk_8() { let suspect = ObjectId::null(gix_hash::Kind::Sha1); let mut lines_blamed = Vec::new(); - let hunks_to_blame = &[UnblamedHunk::new(0..4, suspect, Offset::Added(0))]; - let changes = &[Change::Added(0..2, 0), Change::Unchanged(2..3), Change::Added(3..4, 0)]; + let hunks_to_blame = vec![new_unblamed_hunk(0..4, suspect, Offset::Added(0))]; + let changes = vec![Change::Added(0..2, 0), Change::Unchanged(2..3), Change::Added(3..4, 0)]; let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); assert_eq!( @@ -1479,7 +1472,7 @@ fn process_changes_works_added_hunk_8() { } ] ); - assert_eq!(new_hunks_to_blame, [UnblamedHunk::new(2..3, suspect, Offset::Added(2))]); + assert_eq!(new_hunks_to_blame, [new_unblamed_hunk(2..3, suspect, Offset::Added(2))]); } #[test] @@ -1490,7 +1483,7 @@ fn process_changes_works_added_hunk_9() { range_in_original_file: 30..31, commit_id: suspect, }]; - let hunks_to_blame = &[ + let hunks_to_blame = vec![ UnblamedHunk { range_in_blamed_file: 0..30, suspects: [(suspect, 0..30)].into(), @@ -1500,7 +1493,7 @@ fn process_changes_works_added_hunk_9() { suspects: [(suspect, 31..37)].into(), }, ]; - let changes = &[ + let changes = vec![ Change::Unchanged(0..16), Change::Added(16..17, 0), Change::Unchanged(17..37), @@ -1547,11 +1540,11 @@ fn process_changes_works_added_hunk_9() { fn process_changes_works_deleted_hunk() { let mut lines_blamed = Vec::new(); let suspect = ObjectId::null(gix_hash::Kind::Sha1); - let hunks_to_blame = &[ - UnblamedHunk::new(0..4, suspect, Offset::Added(0)), - UnblamedHunk::new(4..7, suspect, Offset::Added(0)), + let hunks_to_blame = vec![ + new_unblamed_hunk(0..4, suspect, Offset::Added(0)), + new_unblamed_hunk(4..7, suspect, Offset::Added(0)), ]; - let changes = &[Change::Deleted(0, 3), Change::Added(0..4, 0)]; + let changes = vec![Change::Deleted(0, 3), Change::Added(0..4, 0)]; let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); assert_eq!( @@ -1574,3 +1567,16 @@ fn process_changes_works_deleted_hunk() { fn fixture_path() -> PathBuf { gix_testtools::scripted_fixture_read_only("make_blame_repo.sh").unwrap() } + +fn new_unblamed_hunk(range_in_blamed_file: Range, suspect: ObjectId, offset: Offset) -> UnblamedHunk { + assert!( + range_in_blamed_file.end > range_in_blamed_file.start, + "{range_in_blamed_file:?}" + ); + + let range_in_destination = offset.shifted_range(&range_in_blamed_file); + UnblamedHunk { + range_in_blamed_file, + suspects: [(suspect, range_in_destination)].into(), + } +} From 26bfd2d73374e134aff24410fac44857b8128244 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 23 Dec 2024 17:29:28 +0100 Subject: [PATCH 05/16] modularlize `gix-blame/lib.rs` --- gix-blame/src/file/function.rs | 424 ++++++++++ gix-blame/src/file/mod.rs | 451 +++++++++++ gix-blame/src/file/tests.rs | 1323 ++++++++++++++++++++++++++++++++ gix-blame/src/lib.rs | 997 +----------------------- gix-blame/src/types.rs | 137 ++++ gix-blame/tests/blame.rs | 1317 +------------------------------ 6 files changed, 2344 insertions(+), 2305 deletions(-) create mode 100644 gix-blame/src/file/function.rs create mode 100644 gix-blame/src/file/mod.rs create mode 100644 gix-blame/src/file/tests.rs create mode 100644 gix-blame/src/types.rs diff --git a/gix-blame/src/file/function.rs b/gix-blame/src/file/function.rs new file mode 100644 index 00000000000..bc00f54a0c3 --- /dev/null +++ b/gix-blame/src/file/function.rs @@ -0,0 +1,424 @@ +use std::{ops::Range, path::PathBuf}; + +use gix_hash::ObjectId; +use gix_object::{bstr::BStr, FindExt}; + +use super::{process_changes, Change, Offset, UnblamedHunk}; +use crate::BlameEntry; + +// TODO: do not instantiate anything, get everything passed as argument. +/// Produce a list of consecutive [`BlameEntry`] instances to indicate in which commits the ranges of the file +/// at `traverse[0]:` originated in. +/// +/// ## Paramters +/// +/// * `odb` +/// - Access to database objects, also for used for diffing. +/// - Should have an object cache for good diff performance. +/// * `traverse` +/// - The list of commits from the most recent to prior ones, following all parents sorted +/// by time. +/// - It's paramount that older commits are returned after newer ones. +/// - The first commit returned here is the first eligible commit to be responsible for parts of `file_path`. +/// * `file_path` +/// - A *slash-separated* worktree-relative path to the file to blame. +/// * `resource_cache` +/// - Used for diffing trees. +/// +/// ## The algorithm +/// +/// *For brevity, `HEAD` denotes the starting point of the blame operation. It could be any commit, or even commits that +/// represent the worktree state. +/// We begin with a single *Unblamed Hunk* and a single suspect, usually the `HEAD` commit as the commit containing the +/// *Original File*, so that it contains the entire file, with the first commit being a candidate for the entire *Original File*. +/// We traverse the commit graph starting at the first suspect, and see if there have been changes to `file_path`. +/// If so, we have found a *Blamed File* and a *Suspect* commit, and have hunks that represent these changes. +/// Now the *Unblamed Hunk* is split at the boundaries of each matching change, creating a new *Unblamed Hunk* on each side, +/// along with a [`BlameEntry`] to represent the match. +/// This is repeated until there are no non-empty *Unblamed Hunk*s left. +/// +/// At a high level, what we want to do is the following: +/// +/// - get the commit +/// - walk through its parents +/// - for each parent, do a diff and mark lines that don’t have a suspect yet (this is the term +/// used in `libgit2`), but that have been changed in this commit +/// +/// The algorithm in `libgit2` works by going through parents and keeping a linked list of blame +/// suspects. It can be visualized as follows: +// +// <----------------------------------------> +// <---------------><-----------------------> +// <---><----------><-----------------------> +// <---><----------><-------><-----><-------> +// <---><---><-----><-------><-----><-------> +// <---><---><-----><-------><-----><-><-><-> +pub fn file( + odb: impl gix_object::Find + gix_object::FindHeader, + traverse: impl IntoIterator>, + resource_cache: &mut gix_diff::blob::Platform, + // TODO: remove + worktree_root: PathBuf, + file_path: &BStr, +) -> Result, E> { + // TODO: `worktree_root` should be removed - read everything from Commit. + // Worktree changes should be placed into a temporary commit. + // TODO: remove this and deduplicate the respective code. + use gix_object::bstr::ByteSlice; + let absolute_path = worktree_root.join(gix_path::from_bstr(file_path)); + + // TODO use `imara-diff` to tokenize this just like it will be tokenized when diffing. + let number_of_lines = std::fs::read_to_string(absolute_path).unwrap().lines().count(); + + let mut traverse = traverse.into_iter().peekable(); + let Some(Ok(suspect)) = traverse.peek().map(|res| res.as_ref().map(|item| item.id)) else { + todo!("return actual error"); + }; + + let mut hunks_to_blame = vec![UnblamedHunk::new( + 0..number_of_lines.try_into().unwrap(), + suspect, + Offset::Added(0), + )]; + + let mut out = Vec::new(); + 'outer: for item in traverse { + let item = item?; + let suspect = item.id; + + let parent_ids = item.parent_ids; + if parent_ids.is_empty() { + // I’m not entirely sure if this is correct yet. `suspect`, at this point, is the `id` of + // the last `item` that was yielded by `traverse`, so it makes sense to assign the + // remaining lines to it, even though we don’t explicitly check whether that is true + // here. We could perhaps use `needed_to_obtain` to compare `suspect` against an empty + // tree to validate this assumption. + out.extend( + hunks_to_blame + .iter() + .map(|hunk| BlameEntry::from_unblamed_hunk(hunk, suspect)), + ); + + hunks_to_blame.clear(); + break; + } + + let mut buffer = Vec::new(); + let commit_id = odb.find_commit(&suspect, &mut buffer).unwrap().tree(); + let tree_iter = odb.find_tree_iter(&commit_id, &mut buffer).unwrap(); + + let mut entry_buffer = Vec::new(); + let Some(entry) = tree_iter + .lookup_entry_by_path(&odb, &mut entry_buffer, file_path.to_str().unwrap()) + .unwrap() + else { + continue; + }; + + if parent_ids.len() == 1 { + let parent_id: ObjectId = *parent_ids.last().unwrap(); + + let mut buffer = Vec::new(); + let parent_commit_id = odb.find_commit(&parent_id, &mut buffer).unwrap().tree(); + let parent_tree_iter = odb.find_tree_iter(&parent_commit_id, &mut buffer).unwrap(); + + let mut entry_buffer = Vec::new(); + if let Some(parent_entry) = parent_tree_iter + .lookup_entry_by_path(&odb, &mut entry_buffer, file_path.to_str().unwrap()) + .unwrap() + { + if entry.oid == parent_entry.oid { + // The blobs storing the blamed file in `entry` and `parent_entry` are identical + // which is why we can pass blame to the parent without further checks. + for unblamed_hunk in &mut hunks_to_blame { + unblamed_hunk.pass_blame(suspect, parent_id); + } + continue; + } + } + + let Some(modification) = tree_diff_at_file_path(&odb, file_path, item.id, parent_id) else { + // None of the changes affected the file we’re currently blaming. Pass blame to parent. + for unblamed_hunk in &mut hunks_to_blame { + unblamed_hunk.pass_blame(suspect, parent_id); + } + continue; + }; + + match modification { + gix_diff::tree::recorder::Change::Addition { .. } => { + // Every line that has not been blamed yet on a commit, is expected to have been + // added when the file was added to the repository. + out.extend( + hunks_to_blame + .iter() + .map(|hunk| BlameEntry::from_unblamed_hunk(hunk, suspect)), + ); + + hunks_to_blame.clear(); + break; + } + gix_diff::tree::recorder::Change::Deletion { .. } => todo!(), + gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => { + let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path); + + hunks_to_blame = process_changes(&mut out, hunks_to_blame, changes, suspect); + for unblamed_hunk in &mut hunks_to_blame { + unblamed_hunk.pass_blame(suspect, parent_id); + } + } + } + } else { + let mut buffer = Vec::new(); + let commit_id = odb.find_commit(&suspect, &mut buffer).unwrap().tree(); + let tree_iter = odb.find_tree_iter(&commit_id, &mut buffer).unwrap(); + + let mut entry_buffer = Vec::new(); + let entry = tree_iter + .lookup_entry_by_path(&odb, &mut entry_buffer, file_path.to_str().unwrap()) + .unwrap() + .unwrap(); + + for parent_id in &parent_ids { + let mut buffer = Vec::new(); + let parent_commit_id = odb.find_commit(parent_id, &mut buffer).unwrap().tree(); + let parent_tree_iter = odb.find_tree_iter(&parent_commit_id, &mut buffer).unwrap(); + + let mut entry_buffer = Vec::new(); + if let Some(parent_entry) = parent_tree_iter + .lookup_entry_by_path(&odb, &mut entry_buffer, file_path.to_str().unwrap()) + .unwrap() + { + if entry.oid == parent_entry.oid { + // The blobs storing the blamed file in `entry` and `parent_entry` are + // identical which is why we can pass blame to the parent without further + // checks. + for unblamed_hunk in &mut hunks_to_blame { + unblamed_hunk.pass_blame(suspect, *parent_id); + } + continue 'outer; + } + } + } + + for parent_id in parent_ids { + let changes_for_file_path = tree_diff_at_file_path(&odb, file_path, item.id, parent_id); + let Some(modification) = changes_for_file_path else { + // None of the changes affected the file we’re currently blaming. Pass blame + // to parent. + for unblamed_hunk in &mut hunks_to_blame { + unblamed_hunk.clone_blame(suspect, parent_id); + } + + continue; + }; + + match modification { + gix_diff::tree::recorder::Change::Addition { .. } => { + // Do nothing under the assumption that this always (or almost always) + // implies that the file comes from a different parent, compared to which + // it was modified, not added. + // + // TODO: I still have to figure out whether this is correct in all cases. + } + gix_diff::tree::recorder::Change::Deletion { .. } => todo!(), + gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => { + let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path); + + hunks_to_blame = process_changes(&mut out, hunks_to_blame, changes, suspect); + for unblamed_hunk in &mut hunks_to_blame { + unblamed_hunk.pass_blame(suspect, parent_id); + } + } + } + } + for unblamed_hunk in &mut hunks_to_blame { + unblamed_hunk.remove_blame(suspect); + } + } + } + + debug_assert_eq!( + hunks_to_blame, + vec![], + "only if there is no portion of the file left we have completed the blame" + ); + + // I don’t know yet whether it would make sense to use a data structure instead that preserves + // order on insertion. + out.sort_by(|a, b| a.range_in_blamed_file.start.cmp(&b.range_in_blamed_file.start)); + Ok(coalesce_blame_entries(out)) +} + +/// This function merges adjacent blame entries. It merges entries that are adjacent both in the +/// blamed file and in the original file that introduced them. This follows `git`’s +/// behaviour. `libgit2`, as of 2024-09-19, only checks whether two entries are adjacent in the +/// blamed file which can result in different blames in certain edge cases. See [the commit][1] +/// that introduced the extra check into `git` for context. See [this commit][2] for a way to test +/// for this behaviour in `git`. +/// +/// [1]: https://github.com/git/git/commit/c2ebaa27d63bfb7c50cbbdaba90aee4efdd45d0a +/// [2]: https://github.com/git/git/commit/6dbf0c7bebd1c71c44d786ebac0f2b3f226a0131 +fn coalesce_blame_entries(lines_blamed: Vec) -> Vec { + let len = lines_blamed.len(); + lines_blamed + .into_iter() + .fold(Vec::with_capacity(len), |mut acc, entry| { + let previous_entry = acc.last(); + + if let Some(previous_entry) = previous_entry { + if previous_entry.commit_id == entry.commit_id + && previous_entry.range_in_blamed_file.end == entry.range_in_blamed_file.start + // As of 2024-09-19, the check below only is in `git`, but not in `libgit2`. + && previous_entry.range_in_original_file.end == entry.range_in_original_file.start + { + let coalesced_entry = BlameEntry { + range_in_blamed_file: previous_entry.range_in_blamed_file.start..entry.range_in_blamed_file.end, + range_in_original_file: previous_entry.range_in_original_file.start + ..entry.range_in_original_file.end, + commit_id: previous_entry.commit_id, + }; + + acc.pop(); + acc.push(coalesced_entry); + } else { + acc.push(entry); + } + + acc + } else { + acc.push(entry); + + acc + } + }) +} + +fn tree_diff_at_file_path( + odb: impl gix_object::Find + gix_object::FindHeader, + file_path: &BStr, + id: ObjectId, + parent_id: ObjectId, +) -> Option { + let mut buffer = Vec::new(); + + let parent = odb.find_commit(&parent_id, &mut buffer).unwrap(); + + let mut buffer = Vec::new(); + let parent_tree_iter = odb + .find(&parent.tree(), &mut buffer) + .unwrap() + .try_into_tree_iter() + .unwrap(); + + let mut buffer = Vec::new(); + let commit = odb.find_commit(&id, &mut buffer).unwrap(); + + let mut buffer = Vec::new(); + let tree_iter = odb + .find(&commit.tree(), &mut buffer) + .unwrap() + .try_into_tree_iter() + .unwrap(); + + let mut recorder = gix_diff::tree::Recorder::default(); + gix_diff::tree( + parent_tree_iter, + tree_iter, + gix_diff::tree::State::default(), + &odb, + &mut recorder, + ) + .unwrap(); + + recorder.records.into_iter().find(|change| match change { + gix_diff::tree::recorder::Change::Modification { path, .. } => path == file_path, + gix_diff::tree::recorder::Change::Addition { path, .. } => path == file_path, + gix_diff::tree::recorder::Change::Deletion { path, .. } => path == file_path, + }) +} + +fn blob_changes( + odb: impl gix_object::Find + gix_object::FindHeader, + resource_cache: &mut gix_diff::blob::Platform, + oid: ObjectId, + previous_oid: ObjectId, + file_path: &BStr, +) -> Vec { + /// Record all [`Change`]s to learn about additions, deletions and unchanged portions of a *Blamed File*. + struct ChangeRecorder { + last_seen_after_end: u32, + hunks: Vec, + total_number_of_lines: u32, + } + + impl ChangeRecorder { + /// `total_number_of_lines` is used to fill in the last unchanged hunk if needed + /// so that the entire file is represented by [`Change`]. + fn new(total_number_of_lines: u32) -> Self { + ChangeRecorder { + last_seen_after_end: 0, + hunks: Vec::new(), + total_number_of_lines, + } + } + } + + impl gix_diff::blob::Sink for ChangeRecorder { + type Out = Vec; + + fn process_change(&mut self, before: Range, after: Range) { + // This checks for unchanged hunks. + if after.start > self.last_seen_after_end { + self.hunks + .push(Change::Unchanged(self.last_seen_after_end..after.start)); + } + + match (!before.is_empty(), !after.is_empty()) { + (_, true) => { + self.hunks + .push(Change::Added(after.start..after.end, before.end - before.start)); + } + (true, false) => { + self.hunks.push(Change::Deleted(after.start, before.end - before.start)); + } + (false, false) => unreachable!("BUG: imara-diff provided a non-change"), + } + self.last_seen_after_end = after.end; + } + + fn finish(mut self) -> Self::Out { + if self.total_number_of_lines > self.last_seen_after_end { + self.hunks + .push(Change::Unchanged(self.last_seen_after_end..self.total_number_of_lines)); + } + self.hunks + } + } + + resource_cache + .set_resource( + previous_oid, + gix_object::tree::EntryKind::Blob, + file_path, + gix_diff::blob::ResourceKind::OldOrSource, + &odb, + ) + .unwrap(); + resource_cache + .set_resource( + oid, + gix_object::tree::EntryKind::Blob, + file_path, + gix_diff::blob::ResourceKind::NewOrDestination, + &odb, + ) + .unwrap(); + + let outcome = resource_cache.prepare_diff().unwrap(); + let input = outcome.interned_input(); + let number_of_lines_in_destination = input.after.len(); + let change_recorder = ChangeRecorder::new(number_of_lines_in_destination.try_into().unwrap()); + + gix_diff::blob::diff(gix_diff::blob::Algorithm::Histogram, &input, change_recorder) +} diff --git a/gix-blame/src/file/mod.rs b/gix-blame/src/file/mod.rs new file mode 100644 index 00000000000..cdc270bf752 --- /dev/null +++ b/gix-blame/src/file/mod.rs @@ -0,0 +1,451 @@ +//! A module with low-level types and functions. +use std::ops::Range; + +use gix_hash::ObjectId; + +use crate::types::{BlameEntry, Either, LineRange}; +use crate::types::{Change, Offset, UnblamedHunk}; + +pub(super) mod function; + +/// Compare a section from the *Original File* (`hunk`) with a change from a diff and see if there +/// is an intersection with `change`. Based on that intersection, we may generate a [`BlameEntry`] for `out` +/// and/or split the `hunk` into multiple. +/// +/// This is the core of the blame implementation as it matches regions in *Blamed Files* to the *Original File*. +fn process_change( + out: &mut Vec, + new_hunks_to_blame: &mut Vec, + offset_in_destination: &mut Offset, + suspect: ObjectId, + hunk: Option, + change: Option, +) -> (Option, Option) { + match (hunk, change) { + (Some(hunk), Some(Change::Unchanged(unchanged))) => { + let Some(range_in_suspect) = hunk.suspects.get(&suspect) else { + new_hunks_to_blame.push(hunk); + return (None, Some(Change::Unchanged(unchanged))); + }; + + match ( + // Since `unchanged` is a range that is not inclusive at the end, + // `unchanged.end` is not part of `unchanged`. The first line that is + // `unchanged.end - 1`. + range_in_suspect.contains(&unchanged.start), + (unchanged.end - 1) >= range_in_suspect.start && unchanged.end <= range_in_suspect.end, + ) { + (_, true) => { + // <------> (hunk) + // <-------> (unchanged) + // + // <----------> (hunk) + // <---> (unchanged) + + (Some(hunk), None) + } + (true, false) => { + // <--------> (hunk) + // <-------> (unchanged) + + new_hunks_to_blame.push(hunk.shift_by(suspect, *offset_in_destination)); + + (None, Some(Change::Unchanged(unchanged))) + } + (false, false) => { + // Any of the following cases are handled by this branch: + // <---> (hunk) + // <----------> (unchanged) + // + // <----> (hunk) + // <--> (unchanged) + // + // <--> (hunk) + // <----> (unchanged) + + if unchanged.end <= range_in_suspect.start { + // <----> (hunk) + // <--> (unchanged) + + (Some(hunk.clone()), None) + } else { + // <--> (hunk) + // <----> (unchanged) + // + // <---> (hunk) + // <----------> (unchanged) + + new_hunks_to_blame.push(hunk.shift_by(suspect, *offset_in_destination)); + + (None, Some(Change::Unchanged(unchanged.clone()))) + } + } + } + } + (Some(hunk), Some(Change::Added(added, number_of_lines_deleted))) => { + let Some(range_in_suspect) = hunk.suspects.get(&suspect) else { + new_hunks_to_blame.push(hunk); + + return (None, Some(Change::Added(added, number_of_lines_deleted))); + }; + + let range_in_suspect = range_in_suspect.clone(); + + match ( + range_in_suspect.contains(&added.start), + // Since `added` is a range that is not inclusive at the end, `added.end` is + // not part of `added`. The first line that is `added.end - 1`. + (added.end - 1) >= range_in_suspect.start && added.end <= range_in_suspect.end, + ) { + (true, true) => { + // <----------> (hunk) + // <---> (added) + // <---> (blamed) + // <--> <-> (new hunk) + + let new_hunk = match hunk.split_at(suspect, added.start) { + Either::Left(hunk) => hunk, + Either::Right((before, after)) => { + new_hunks_to_blame.push(before.shift_by(suspect, *offset_in_destination)); + + after + } + }; + + *offset_in_destination += added.end - added.start; + *offset_in_destination -= number_of_lines_deleted; + + out.push(BlameEntry::with_offset( + added.clone(), + suspect, + new_hunk.offset_for(suspect), + )); + + match new_hunk.split_at(suspect, added.end) { + Either::Left(_) => (None, None), + Either::Right((_, after)) => (Some(after), None), + } + } + (true, false) => { + // <--------> (hunk) + // <-------> (added) + // <----> (blamed) + // <--> (new hunk) + + let new_hunk = match hunk.split_at(suspect, added.start) { + Either::Left(hunk) => hunk, + Either::Right((before, after)) => { + new_hunks_to_blame.push(before.shift_by(suspect, *offset_in_destination)); + + after + } + }; + + out.push(BlameEntry::with_offset( + added.start..range_in_suspect.end, + suspect, + new_hunk.offset_for(suspect), + )); + + if added.end > range_in_suspect.end { + (None, Some(Change::Added(added, number_of_lines_deleted))) + } else { + todo!(); + } + } + (false, true) => { + // <-------> (hunk) + // <------> (added) + // <---> (blamed) + // <--> (new hunk) + + out.push(BlameEntry::with_offset( + range_in_suspect.start..added.end, + suspect, + hunk.offset_for(suspect), + )); + + *offset_in_destination += added.end - added.start; + *offset_in_destination -= number_of_lines_deleted; + + match hunk.split_at(suspect, added.end) { + Either::Left(_) => (None, None), + Either::Right((_, after)) => (Some(after), None), + } + } + (false, false) => { + // Any of the following cases are handled by this branch: + // <---> (hunk) + // <----------> (added) + // + // <----> (hunk) + // <--> (added) + // + // <--> (hunk) + // <----> (added) + + if added.end <= range_in_suspect.start { + // <----> (hunk) + // <--> (added) + + *offset_in_destination += added.end - added.start; + *offset_in_destination -= number_of_lines_deleted; + + (Some(hunk.clone()), None) + } else if range_in_suspect.end <= added.start { + // <--> (hunk) + // <----> (added) + + new_hunks_to_blame.push(hunk.shift_by(suspect, *offset_in_destination)); + + (None, Some(Change::Added(added.clone(), number_of_lines_deleted))) + } else { + // <---> (hunk) + // <----------> (added) + // <---> (blamed) + + out.push(BlameEntry::with_offset( + range_in_suspect.clone(), + suspect, + hunk.offset_for(suspect), + )); + + (None, Some(Change::Added(added.clone(), number_of_lines_deleted))) + } + } + } + } + (Some(hunk), Some(Change::Deleted(line_number_in_destination, number_of_lines_deleted))) => { + let range_in_suspect = hunk.suspects.get(&suspect).expect("TODO"); + + if line_number_in_destination < range_in_suspect.start { + // <---> (hunk) + // | (line_number_in_destination) + + *offset_in_destination -= number_of_lines_deleted; + + (Some(hunk), None) + } else if line_number_in_destination < range_in_suspect.end { + // <-----> (hunk) + // | (line_number_in_destination) + + let new_hunk = match hunk.split_at(suspect, line_number_in_destination) { + Either::Left(hunk) => hunk, + Either::Right((before, after)) => { + new_hunks_to_blame.push(before.shift_by(suspect, *offset_in_destination)); + + after + } + }; + + *offset_in_destination -= number_of_lines_deleted; + + (Some(new_hunk), None) + } else { + // <---> (hunk) + // | (line_number_in_destination) + + new_hunks_to_blame.push(hunk.shift_by(suspect, *offset_in_destination)); + + ( + None, + Some(Change::Deleted(line_number_in_destination, number_of_lines_deleted)), + ) + } + } + (Some(hunk), None) => { + new_hunks_to_blame.push(hunk.shift_by(suspect, *offset_in_destination)); + + (None, None) + } + (None, Some(Change::Unchanged(_))) => (None, None), + (None, Some(Change::Added(added, number_of_lines_deleted))) => { + *offset_in_destination += added.end - added.start; + *offset_in_destination -= number_of_lines_deleted; + + (None, None) + } + (None, Some(Change::Deleted(_, number_of_lines_deleted))) => { + *offset_in_destination -= number_of_lines_deleted; + + (None, None) + } + (None, None) => (None, None), + } +} + +/// Consume `hunks_to_blame` and `changes` to pair up matches ranges (also overlapping) with each other. +/// Once a match is found, it's pushed onto `out`. +fn process_changes( + out: &mut Vec, + hunks_to_blame: Vec, + changes: Vec, + suspect: ObjectId, +) -> Vec { + let mut hunks_iter = hunks_to_blame.into_iter(); + let mut changes_iter = changes.into_iter(); + + let mut hunk = hunks_iter.next(); + let mut change = changes_iter.next(); + + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination = Offset::Added(0); + + loop { + (hunk, change) = process_change( + out, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + hunk, + change, + ); + + hunk = hunk.or_else(|| hunks_iter.next()); + change = change.or_else(|| changes_iter.next()); + + if hunk.is_none() && change.is_none() { + break; + } + } + new_hunks_to_blame +} + +impl UnblamedHunk { + fn new(range_in_blamed_file: Range, suspect: ObjectId, offset: Offset) -> Self { + assert!( + range_in_blamed_file.end > range_in_blamed_file.start, + "{range_in_blamed_file:?}" + ); + + let range_in_destination = range_in_blamed_file.shift_by(offset); + + Self { + range_in_blamed_file, + suspects: [(suspect, range_in_destination)].into(), + } + } + + fn shift_by(mut self, suspect: ObjectId, offset: Offset) -> Self { + self.suspects.entry(suspect).and_modify(|e| *e = e.shift_by(offset)); + + self + } + + fn split_at(self, suspect: ObjectId, line_number_in_destination: u32) -> Either { + match self.suspects.get(&suspect) { + None => Either::Left(self), + Some(range_in_suspect) => { + if line_number_in_destination > range_in_suspect.start + && line_number_in_destination < range_in_suspect.end + { + let split_at_from_start = line_number_in_destination - range_in_suspect.start; + + if split_at_from_start > 0 { + let new_suspects_before = self + .suspects + .iter() + .map(|(suspect, range)| (*suspect, range.start..(range.start + split_at_from_start))) + .collect(); + + let new_suspects_after = self + .suspects + .iter() + .map(|(suspect, range)| (*suspect, (range.start + split_at_from_start)..range.end)) + .collect(); + + let new_hunk_before = Self { + range_in_blamed_file: self.range_in_blamed_file.start + ..(self.range_in_blamed_file.start + split_at_from_start), + suspects: new_suspects_before, + }; + let new_hunk_after = Self { + range_in_blamed_file: (self.range_in_blamed_file.start + split_at_from_start) + ..(self.range_in_blamed_file.end), + suspects: new_suspects_after, + }; + + Either::Right((new_hunk_before, new_hunk_after)) + } else { + Either::Left(self) + } + } else { + Either::Left(self) + } + } + } + } + + fn offset_for(&self, suspect: ObjectId) -> Offset { + let range_in_suspect = self.suspects.get(&suspect).expect("TODO"); + + if self.range_in_blamed_file.start > range_in_suspect.start { + Offset::Added(self.range_in_blamed_file.start - range_in_suspect.start) + } else { + Offset::Deleted(range_in_suspect.start - self.range_in_blamed_file.start) + } + } + + /// Transfer all ranges from the commit at `from` to the commit at `to`. + fn pass_blame(&mut self, from: ObjectId, to: ObjectId) { + if let Some(range_in_suspect) = self.suspects.remove(&from) { + self.suspects.insert(to, range_in_suspect); + } + } + + fn clone_blame(&mut self, from: ObjectId, to: ObjectId) { + if let Some(range_in_suspect) = self.suspects.get(&from) { + self.suspects.insert(to, range_in_suspect.clone()); + } + } + + fn remove_blame(&mut self, suspect: ObjectId) { + // TODO: figure out why it can try to remove suspects that don't exist. + self.suspects.remove(&suspect); + } +} + +impl BlameEntry { + /// Create a new instance by creating `range_in_blamed_file` after applying `offset` to `range_in_original_file`. + fn with_offset(range_in_original_file: Range, commit_id: ObjectId, offset: Offset) -> Self { + debug_assert!( + range_in_original_file.end > range_in_original_file.start, + "{range_in_original_file:?}" + ); + + match offset { + Offset::Added(added) => Self { + range_in_blamed_file: (range_in_original_file.start + added)..(range_in_original_file.end + added), + range_in_original_file, + commit_id, + }, + Offset::Deleted(deleted) => { + debug_assert!( + range_in_original_file.start >= deleted, + "{range_in_original_file:?} {offset:?}" + ); + + Self { + range_in_blamed_file: (range_in_original_file.start - deleted) + ..(range_in_original_file.end - deleted), + range_in_original_file, + commit_id, + } + } + } + } + + /// Create an offset from a portion of the *Original File*. + fn from_unblamed_hunk(unblamed_hunk: &UnblamedHunk, commit_id: ObjectId) -> Self { + let range_in_original_file = unblamed_hunk.suspects.get(&commit_id).unwrap(); + + Self { + range_in_blamed_file: unblamed_hunk.range_in_blamed_file.clone(), + range_in_original_file: range_in_original_file.clone(), + commit_id, + } + } +} + +#[cfg(test)] +mod tests; diff --git a/gix-blame/src/file/tests.rs b/gix-blame/src/file/tests.rs new file mode 100644 index 00000000000..35e63d6edd4 --- /dev/null +++ b/gix-blame/src/file/tests.rs @@ -0,0 +1,1323 @@ +use crate::file::{Offset, UnblamedHunk}; +use gix_hash::ObjectId; +use std::ops::Range; + +fn new_unblamed_hunk(range_in_blamed_file: Range, suspect: ObjectId, offset: Offset) -> UnblamedHunk { + assert!( + range_in_blamed_file.end > range_in_blamed_file.start, + "{range_in_blamed_file:?}" + ); + + let range_in_destination = offset.shifted_range(&range_in_blamed_file); + UnblamedHunk { + range_in_blamed_file, + suspects: [(suspect, range_in_destination)].into(), + } +} + +mod process_change { + use super::*; + use crate::file::{process_change, Change, Offset, UnblamedHunk}; + use crate::BlameEntry; + use gix_hash::ObjectId; + + #[test] + fn nothing() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + None, + None, + ); + + assert_eq!(hunk, None); + assert_eq!(change, None); + assert_eq!(offset_in_destination, Offset::Added(0)); + } + + #[test] + fn added_hunk() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(new_unblamed_hunk(0..5, suspect, Offset::Added(0))), + Some(Change::Added(0..3, 0)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 3..5, + suspects: [(suspect, 3..5)].into() + }) + ); + assert_eq!(change, None); + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 0..3, + range_in_original_file: 0..3, + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(3)); + } + + #[test] + fn added_hunk_2() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(new_unblamed_hunk(0..5, suspect, Offset::Added(0))), + Some(Change::Added(2..3, 0)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 3..5, + suspects: [(suspect, 3..5)].into() + }) + ); + assert_eq!(change, None); + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 2..3, + range_in_original_file: 2..3, + commit_id: suspect + }] + ); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 0..2, + suspects: [(suspect, 0..2)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(1)); + } + + #[test] + fn added_hunk_3() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(5); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(new_unblamed_hunk(10..15, suspect, Offset::Added(0))), + Some(Change::Added(12..13, 0)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 13..15, + suspects: [(suspect, 13..15)].into() + }) + ); + assert_eq!(change, None); + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 12..13, + range_in_original_file: 12..13, + commit_id: suspect + }] + ); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 10..12, + suspects: [(suspect, 5..7)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(6)); + } + + #[test] + fn added_hunk_4() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 7..12 + Some(new_unblamed_hunk(12..17, suspect, Offset::Added(5))), + Some(Change::Added(9..10, 0)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 15..17, + suspects: [(suspect, 10..12)].into() + }) + ); + assert_eq!(change, None); + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 14..15, + range_in_original_file: 9..10, + commit_id: suspect + }] + ); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 12..14, + suspects: [(suspect, 7..9)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(1)); + } + + #[test] + fn added_hunk_5() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(new_unblamed_hunk(0..5, suspect, Offset::Added(0))), + Some(Change::Added(0..3, 1)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 3..5, + suspects: [(suspect, 3..5)].into() + }) + ); + assert_eq!(change, None); + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 0..3, + range_in_original_file: 0..3, + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(2)); + } + + #[test] + fn added_hunk_6() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 0..4 + Some(new_unblamed_hunk(1..5, suspect, Offset::Added(1))), + Some(Change::Added(0..3, 1)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 4..5, + suspects: [(suspect, 3..4)].into() + }) + ); + assert_eq!(change, None); + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 1..4, + range_in_original_file: 0..3, + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(2)); + } + + #[test] + fn added_hunk_7() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(2); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 2..6 + Some(new_unblamed_hunk(3..7, suspect, Offset::Added(1))), + Some(Change::Added(3..5, 1)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 6..7, + suspects: [(suspect, 5..6)].into() + }) + ); + assert_eq!(change, None); + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 4..6, + range_in_original_file: 3..5, + commit_id: suspect + }] + ); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 3..4, + suspects: [(suspect, 0..1)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(3)); + } + + #[test] + fn added_hunk_8() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(1); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 25..26 + Some(new_unblamed_hunk(23..24, suspect, Offset::Deleted(2))), + Some(Change::Added(25..27, 1)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, Some(Change::Added(25..27, 1))); + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 23..24, + range_in_original_file: 25..26, + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(1)); + } + + #[test] + fn added_hunk_9() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 21..22 + Some(new_unblamed_hunk(23..24, suspect, Offset::Added(2))), + Some(Change::Added(18..22, 3)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, None); + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 23..24, + range_in_original_file: 21..22, + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(1)); + } + + #[test] + fn added_hunk_10() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 70..108 + Some(new_unblamed_hunk(71..109, suspect, Offset::Added(1))), + Some(Change::Added(106..109, 0)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, Some(Change::Added(106..109, 0))); + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 107..109, + range_in_original_file: 106..108, + commit_id: suspect + }] + ); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 71..107, + suspects: [(suspect, 70..106)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(0)); + } + + #[test] + fn added_hunk_11() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 137..144 + Some(new_unblamed_hunk(149..156, suspect, Offset::Added(12))), + Some(Change::Added(143..146, 0)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, Some(Change::Added(143..146, 0))); + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 155..156, + range_in_original_file: 143..144, + commit_id: suspect + }] + ); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 149..155, + suspects: [(suspect, 137..143)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(0)); + } + + #[test] + fn no_overlap() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Deleted(3); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 2..5 + Some(new_unblamed_hunk(3..6, suspect, Offset::Added(1))), + Some(Change::Added(7..10, 1)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, Some(Change::Added(7..10, 1))); + assert_eq!(lines_blamed, []); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 3..6, + suspects: [(suspect, 5..8)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Deleted(3)); + } + + #[test] + fn no_overlap_2() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 6..8 + Some(new_unblamed_hunk(9..11, suspect, Offset::Added(3))), + Some(Change::Added(2..5, 0)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 9..11, + suspects: [(suspect, 6..8)].into() + }) + ); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(3)); + } + + #[test] + fn no_overlap_3() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 5..15 + Some(new_unblamed_hunk(4..15, suspect, Offset::Deleted(1))), + Some(Change::Added(4..5, 1)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 4..15, + suspects: [(suspect, 5..16)].into() + }) + ); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(0)); + } + + #[test] + fn no_overlap_4() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(1); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 25..27 + Some(new_unblamed_hunk(23..25, suspect, Offset::Deleted(2))), + Some(Change::Unchanged(21..22)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 23..25, + suspects: [(suspect, 25..27)].into() + }) + ); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(1)); + } + + #[test] + fn no_overlap_5() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(1); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 17..18 + Some(new_unblamed_hunk(15..16, suspect, Offset::Deleted(2))), + Some(Change::Deleted(20, 1)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, Some(Change::Deleted(20, 1))); + assert_eq!(lines_blamed, []); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 15..16, + suspects: [(suspect, 16..17)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(1)); + } + + #[test] + fn no_overlap_6() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 22..24 + Some(new_unblamed_hunk(23..25, suspect, Offset::Added(1))), + Some(Change::Deleted(20, 1)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 23..25, + suspects: [(suspect, 22..24)].into() + }) + ); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Deleted(1)); + } + + #[test] + fn enclosing_addition() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(3); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 5..8 + Some(new_unblamed_hunk(2..5, suspect, Offset::Deleted(3))), + Some(Change::Added(3..12, 2)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, Some(Change::Added(3..12, 2))); + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 2..5, + range_in_original_file: 5..8, + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(3)); + } + + #[test] + fn enclosing_deletion() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(3); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 13..20 + Some(new_unblamed_hunk(12..19, suspect, Offset::Deleted(1))), + Some(Change::Deleted(15, 2)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 14..19, + suspects: [(suspect, 15..20)].into() + }) + ); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 12..14, + suspects: [(suspect, 10..12)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(1)); + } + + #[test] + fn enclosing_unchanged_lines() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(3); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 109..113 + Some(new_unblamed_hunk(110..114, suspect, Offset::Added(1))), + Some(Change::Unchanged(109..172)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, Some(Change::Unchanged(109..172))); + assert_eq!(lines_blamed, []); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 110..114, + suspects: [(suspect, 106..110)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(3)); + } + + #[test] + fn unchanged_hunk() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(new_unblamed_hunk(0..5, suspect, Offset::Added(0))), + Some(Change::Unchanged(0..3)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 0..5, + suspects: [(suspect, 0..5)].into() + }) + ); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(0)); + } + + #[test] + fn unchanged_hunk_2() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(new_unblamed_hunk(0..5, suspect, Offset::Added(0))), + Some(Change::Unchanged(0..7)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, Some(Change::Unchanged(0..7))); + assert_eq!(lines_blamed, []); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 0..5, + suspects: [(suspect, 0..5)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(0)); + } + + #[test] + fn unchanged_hunk_3() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Deleted(2); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(UnblamedHunk { + range_in_blamed_file: 22..30, + suspects: [(suspect, 21..29)].into(), + }), + Some(Change::Unchanged(21..23)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 22..30, + suspects: [(suspect, 21..29)].into() + }) + ); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Deleted(2)); + } + + #[test] + fn deleted_hunk() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(new_unblamed_hunk(0..5, suspect, Offset::Added(0))), + Some(Change::Deleted(5, 3)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, Some(Change::Deleted(5, 3))); + assert_eq!(lines_blamed, []); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 0..5, + suspects: [(suspect, 0..5)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(0)); + } + + #[test] + fn deleted_hunk_2() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(new_unblamed_hunk(2..16, suspect, Offset::Added(0))), + Some(Change::Deleted(0, 4)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 2..16, + suspects: [(suspect, 2..16)].into() + }) + ); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Deleted(4)); + } + + #[test] + fn deleted_hunk_3() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(new_unblamed_hunk(2..16, suspect, Offset::Added(0))), + Some(Change::Deleted(14, 4)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 14..16, + suspects: [(suspect, 14..16)].into() + }) + ); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!( + new_hunks_to_blame, + [new_unblamed_hunk(2..14, suspect, Offset::Added(0))] + ); + assert_eq!(offset_in_destination, Offset::Deleted(4)); + } + + #[test] + fn addition_only() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(1); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + None, + Some(Change::Added(22..25, 1)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(3)); + } + + #[test] + fn deletion_only() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(1); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + None, + Some(Change::Deleted(11, 5)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Deleted(4)); + } + + #[test] + fn unchanged_only() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(1); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + None, + Some(Change::Unchanged(11..13)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(1)); + } +} +mod process_changes { + use crate::file::tests::new_unblamed_hunk; + use crate::file::{process_changes, Change, Offset, UnblamedHunk}; + use crate::BlameEntry; + use gix_hash::ObjectId; + + #[test] + fn nothing() { + let mut lines_blamed = Vec::new(); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let new_hunks_to_blame = process_changes(&mut lines_blamed, vec![], vec![], suspect); + + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + } + + #[test] + fn added_hunk() { + let mut lines_blamed = Vec::new(); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let hunks_to_blame = vec![new_unblamed_hunk(0..4, suspect, Offset::Added(0))]; + let changes = vec![Change::Added(0..4, 0)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 0..4, + range_in_original_file: 0..4, + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, []); + } + + #[test] + fn added_hunk_2() { + let mut lines_blamed = Vec::new(); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let hunks_to_blame = vec![new_unblamed_hunk(0..6, suspect, Offset::Added(0))]; + let changes = vec![Change::Added(0..4, 0), Change::Unchanged(4..6)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 0..4, + range_in_original_file: 0..4, + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, [new_unblamed_hunk(4..6, suspect, Offset::Added(4))]); + } + + #[test] + fn added_hunk_3() { + let mut lines_blamed = Vec::new(); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let hunks_to_blame = vec![new_unblamed_hunk(0..6, suspect, Offset::Added(0))]; + let changes = vec![Change::Unchanged(0..2), Change::Added(2..4, 0), Change::Unchanged(4..6)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 2..4, + range_in_original_file: 2..4, + commit_id: suspect + }] + ); + assert_eq!( + new_hunks_to_blame, + [ + new_unblamed_hunk(0..2, suspect, Offset::Added(0)), + new_unblamed_hunk(4..6, suspect, Offset::Added(2)) + ] + ); + } + + #[test] + fn added_hunk_4_0() { + let mut lines_blamed = Vec::new(); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let hunks_to_blame = vec![new_unblamed_hunk(0..6, suspect, Offset::Added(0))]; + let changes = vec![Change::Added(0..1, 0), Change::Added(1..4, 0), Change::Unchanged(4..6)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + assert_eq!( + lines_blamed, + [ + BlameEntry { + range_in_blamed_file: 0..1, + range_in_original_file: 0..1, + commit_id: suspect + }, + BlameEntry { + range_in_blamed_file: 1..4, + range_in_original_file: 1..4, + commit_id: suspect + } + ] + ); + assert_eq!(new_hunks_to_blame, [new_unblamed_hunk(4..6, suspect, Offset::Added(4))]); + } + + #[test] + fn added_hunk_4_1() { + let mut lines_blamed = Vec::new(); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let hunks_to_blame = vec![new_unblamed_hunk(0..6, suspect, Offset::Added(0))]; + let changes = vec![Change::Added(0..1, 0)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 0..1, + range_in_original_file: 0..1, + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, [new_unblamed_hunk(1..6, suspect, Offset::Added(1))]); + } + + #[test] + fn added_hunk_4_2() { + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let suspect_2 = ObjectId::from_hex(b"2222222222222222222222222222222222222222").unwrap(); + let mut lines_blamed: Vec = vec![BlameEntry { + range_in_blamed_file: 0..2, + range_in_original_file: 0..2, + commit_id: suspect, + }]; + let hunks_to_blame = vec![new_unblamed_hunk(2..6, suspect_2, Offset::Added(2))]; + let changes = vec![Change::Added(0..1, 0)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect_2); + + assert_eq!( + lines_blamed, + [ + BlameEntry { + range_in_blamed_file: 0..2, + range_in_original_file: 0..2, + commit_id: suspect + }, + BlameEntry { + range_in_blamed_file: 2..3, + range_in_original_file: 0..1, + commit_id: suspect_2 + } + ] + ); + assert_eq!( + new_hunks_to_blame, + [new_unblamed_hunk(3..6, suspect_2, Offset::Added(3))] + ); + } + + #[test] + fn added_hunk_5() { + let mut lines_blamed = Vec::new(); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let hunks_to_blame = vec![new_unblamed_hunk(0..6, suspect, Offset::Added(0))]; + let changes = vec![Change::Added(0..4, 3), Change::Unchanged(4..6)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 0..4, + range_in_original_file: 0..4, + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, [new_unblamed_hunk(4..6, suspect, Offset::Added(1))]); + } + + #[test] + fn added_hunk_6() { + let mut lines_blamed = Vec::new(); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let hunks_to_blame = vec![new_unblamed_hunk(4..6, suspect, Offset::Added(1))]; + let changes = vec![Change::Added(0..3, 0), Change::Unchanged(3..5)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, [new_unblamed_hunk(4..6, suspect, Offset::Added(4))]); + } + + #[test] + fn added_hunk_7() { + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let suspect_2 = ObjectId::from_hex(b"2222222222222222222222222222222222222222").unwrap(); + let mut lines_blamed: Vec = vec![BlameEntry { + range_in_blamed_file: 0..1, + range_in_original_file: 0..1, + commit_id: suspect, + }]; + let hunks_to_blame = vec![new_unblamed_hunk(1..3, suspect_2, Offset::Added(1))]; + let changes = vec![Change::Added(0..1, 2)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect_2); + + assert_eq!( + lines_blamed, + [ + BlameEntry { + range_in_blamed_file: 0..1, + range_in_original_file: 0..1, + commit_id: suspect + }, + BlameEntry { + range_in_blamed_file: 1..2, + range_in_original_file: 0..1, + commit_id: suspect_2 + } + ] + ); + assert_eq!( + new_hunks_to_blame, + [new_unblamed_hunk(2..3, suspect_2, Offset::Added(0))] + ); + } + + #[test] + fn added_hunk_8() { + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let mut lines_blamed = Vec::new(); + let hunks_to_blame = vec![new_unblamed_hunk(0..4, suspect, Offset::Added(0))]; + let changes = vec![Change::Added(0..2, 0), Change::Unchanged(2..3), Change::Added(3..4, 0)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + assert_eq!( + lines_blamed, + [ + BlameEntry { + range_in_blamed_file: 0..2, + range_in_original_file: 0..2, + commit_id: suspect + }, + BlameEntry { + range_in_blamed_file: 3..4, + range_in_original_file: 3..4, + commit_id: suspect + } + ] + ); + assert_eq!(new_hunks_to_blame, [new_unblamed_hunk(2..3, suspect, Offset::Added(2))]); + } + + #[test] + fn added_hunk_9() { + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let mut lines_blamed: Vec = vec![BlameEntry { + range_in_blamed_file: 30..31, + range_in_original_file: 30..31, + commit_id: suspect, + }]; + let hunks_to_blame = vec![ + UnblamedHunk { + range_in_blamed_file: 0..30, + suspects: [(suspect, 0..30)].into(), + }, + UnblamedHunk { + range_in_blamed_file: 31..37, + suspects: [(suspect, 31..37)].into(), + }, + ]; + let changes = vec![ + Change::Unchanged(0..16), + Change::Added(16..17, 0), + Change::Unchanged(17..37), + ]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + lines_blamed.sort_by(|a, b| a.range_in_blamed_file.start.cmp(&b.range_in_blamed_file.start)); + + assert_eq!( + lines_blamed, + [ + BlameEntry { + range_in_blamed_file: 16..17, + range_in_original_file: 16..17, + commit_id: suspect + }, + BlameEntry { + range_in_blamed_file: 30..31, + range_in_original_file: 30..31, + commit_id: suspect + } + ] + ); + assert_eq!( + new_hunks_to_blame, + [ + UnblamedHunk { + range_in_blamed_file: 0..16, + suspects: [(suspect, 0..16)].into() + }, + UnblamedHunk { + range_in_blamed_file: 17..30, + suspects: [(suspect, 16..29)].into() + }, + UnblamedHunk { + range_in_blamed_file: 31..37, + suspects: [(suspect, 30..36)].into() + } + ] + ); + } + + #[test] + fn deleted_hunk() { + let mut lines_blamed = Vec::new(); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let hunks_to_blame = vec![ + new_unblamed_hunk(0..4, suspect, Offset::Added(0)), + new_unblamed_hunk(4..7, suspect, Offset::Added(0)), + ]; + let changes = vec![Change::Deleted(0, 3), Change::Added(0..4, 0)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 0..4, + range_in_original_file: 0..4, + commit_id: suspect + }] + ); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 4..7, + suspects: [(suspect, 3..6)].into() + }] + ); + } +} diff --git a/gix-blame/src/lib.rs b/gix-blame/src/lib.rs index 139c4445425..5d83a1a6d61 100644 --- a/gix-blame/src/lib.rs +++ b/gix-blame/src/lib.rs @@ -11,1000 +11,11 @@ //! - The versions of the files that can contain hunks that we could use in the final 'image' //! - multiple at the same time as the commit-graph may split up. //! - turns into *Blamed File* once we have found an association into the *Original File*. -//! - every [`UnblamedHunk`] can have multiple suspects of which we find the best match. #![deny(rust_2018_idioms, missing_docs)] #![forbid(unsafe_code)] -use std::{ - collections::BTreeMap, - ops::{AddAssign, Range, SubAssign}, - path::PathBuf, -}; +mod types; +pub use types::BlameEntry; -use gix_hash::ObjectId; -use gix_object::bstr::BStr; -use gix_object::FindExt; - -/// Describes the offset of a particular hunk relative to the *Original File*. -#[derive(Clone, Copy, Debug, PartialEq)] -pub enum Offset { - /// The amount of lines to add. - Added(u32), - /// The amount of lines to remove. - Deleted(u32), -} - -impl Offset { - /// Shift the given `range` according to our offset. - pub fn shifted_range(&self, range: &Range) -> Range { - match self { - Offset::Added(added) => { - debug_assert!(range.start >= *added, "{self:?} {range:?}"); - Range { - start: range.start - added, - end: range.end - added, - } - } - Offset::Deleted(deleted) => Range { - start: range.start + deleted, - end: range.end + deleted, - }, - } - } -} - -impl AddAssign for Offset { - fn add_assign(&mut self, rhs: u32) { - match self { - Self::Added(added) => *self = Self::Added(*added + rhs), - Self::Deleted(deleted) => { - if rhs > *deleted { - *self = Self::Added(rhs - *deleted); - } else { - *self = Self::Deleted(*deleted - rhs); - } - } - } - } -} - -impl SubAssign for Offset { - fn sub_assign(&mut self, rhs: u32) { - match self { - Self::Added(added) => { - if rhs > *added { - *self = Self::Deleted(rhs - *added); - } else { - *self = Self::Added(*added - rhs); - } - } - Self::Deleted(deleted) => *self = Self::Deleted(*deleted + rhs), - } - } -} - -/// A mapping of a section of the *Original File* to the section in a *Blamed File* that introduced it. -/// -/// Both ranges are of the same size, but may use different [starting points](Range::start). Naturally, -/// they have the same content, which is the reason they are in what is returned by [`file()`]. -// TODO: see if this can be encoded as `start_in_original_file` and `start_in_blamed_file` and a single `len`. -#[derive(Debug, PartialEq)] -pub struct BlameEntry { - /// The section of tokens in the tokenized version of the *Blamed File* (typically lines). - pub range_in_blamed_file: Range, - /// The section of tokens in the tokenized version of the *Original File* (typically lines). - pub range_in_original_file: Range, - /// The commit that introduced the section into the *Blamed File*. - pub commit_id: ObjectId, -} - -impl BlameEntry { - /// Create a new instance. - pub fn new(range_in_blamed_file: Range, range_in_original_file: Range, commit_id: ObjectId) -> Self { - debug_assert!( - range_in_blamed_file.end > range_in_blamed_file.start, - "{range_in_blamed_file:?}" - ); - debug_assert!( - range_in_original_file.end > range_in_original_file.start, - "{range_in_original_file:?}" - ); - debug_assert_eq!(range_in_original_file.len(), range_in_blamed_file.len()); - - Self { - range_in_blamed_file: range_in_blamed_file.clone(), - range_in_original_file: range_in_original_file.clone(), - commit_id, - } - } - - /// Create a new instance by creating `range_in_blamed_file` after applying `offset` to `range_in_original_file`. - fn with_offset(range_in_original_file: Range, commit_id: ObjectId, offset: Offset) -> Self { - debug_assert!( - range_in_original_file.end > range_in_original_file.start, - "{range_in_original_file:?}" - ); - - match offset { - Offset::Added(added) => Self { - range_in_blamed_file: (range_in_original_file.start + added)..(range_in_original_file.end + added), - range_in_original_file, - commit_id, - }, - Offset::Deleted(deleted) => { - debug_assert!( - range_in_original_file.start >= deleted, - "{range_in_original_file:?} {offset:?}" - ); - - Self { - range_in_blamed_file: (range_in_original_file.start - deleted) - ..(range_in_original_file.end - deleted), - range_in_original_file, - commit_id, - } - } - } - } - - /// Create an offset from a portion of the *Original File*. - fn from_unblamed_hunk(unblamed_hunk: &UnblamedHunk, commit_id: ObjectId) -> Self { - let range_in_original_file = unblamed_hunk.suspects.get(&commit_id).unwrap(); - - Self { - range_in_blamed_file: unblamed_hunk.range_in_blamed_file.clone(), - range_in_original_file: range_in_original_file.clone(), - commit_id, - } - } -} - -trait LineRange { - fn shift_by(&self, offset: Offset) -> Self; -} - -impl LineRange for Range { - fn shift_by(&self, offset: Offset) -> Self { - offset.shifted_range(self) - } -} - -/// TODO: docs - what is it? -// TODO: is `Clone` really needed. -#[derive(Clone, Debug, PartialEq)] -pub struct UnblamedHunk { - /// TODO: figure out how this works. - pub range_in_blamed_file: Range, - /// Maps a commit to the range in the *Original File* that `range_in_blamed_file` refers to. - pub suspects: BTreeMap>, -} - -#[derive(Debug)] -enum Either { - Left(T), - Right(U), -} - -impl UnblamedHunk { - fn new(range_in_blamed_file: Range, suspect: ObjectId, offset: Offset) -> Self { - assert!( - range_in_blamed_file.end > range_in_blamed_file.start, - "{range_in_blamed_file:?}" - ); - - let range_in_destination = range_in_blamed_file.shift_by(offset); - - Self { - range_in_blamed_file, - suspects: [(suspect, range_in_destination)].into(), - } - } - - fn shift_by(mut self, suspect: ObjectId, offset: Offset) -> Self { - self.suspects.entry(suspect).and_modify(|e| *e = e.shift_by(offset)); - - self - } - - fn split_at(self, suspect: ObjectId, line_number_in_destination: u32) -> Either { - match self.suspects.get(&suspect) { - None => Either::Left(self), - Some(range_in_suspect) => { - if line_number_in_destination > range_in_suspect.start - && line_number_in_destination < range_in_suspect.end - { - let split_at_from_start = line_number_in_destination - range_in_suspect.start; - - if split_at_from_start > 0 { - let new_suspects_before = self - .suspects - .iter() - .map(|(suspect, range)| (*suspect, range.start..(range.start + split_at_from_start))) - .collect(); - - let new_suspects_after = self - .suspects - .iter() - .map(|(suspect, range)| (*suspect, (range.start + split_at_from_start)..range.end)) - .collect(); - - let new_hunk_before = Self { - range_in_blamed_file: self.range_in_blamed_file.start - ..(self.range_in_blamed_file.start + split_at_from_start), - suspects: new_suspects_before, - }; - let new_hunk_after = Self { - range_in_blamed_file: (self.range_in_blamed_file.start + split_at_from_start) - ..(self.range_in_blamed_file.end), - suspects: new_suspects_after, - }; - - Either::Right((new_hunk_before, new_hunk_after)) - } else { - Either::Left(self) - } - } else { - Either::Left(self) - } - } - } - } - - fn offset_for(&self, suspect: ObjectId) -> Offset { - let range_in_suspect = self.suspects.get(&suspect).expect("TODO"); - - if self.range_in_blamed_file.start > range_in_suspect.start { - Offset::Added(self.range_in_blamed_file.start - range_in_suspect.start) - } else { - Offset::Deleted(range_in_suspect.start - self.range_in_blamed_file.start) - } - } - - /// Transfer all ranges from the commit at `from` to the commit at `to`. - fn pass_blame(&mut self, from: ObjectId, to: ObjectId) { - if let Some(range_in_suspect) = self.suspects.remove(&from) { - self.suspects.insert(to, range_in_suspect); - } - } - - fn clone_blame(&mut self, from: ObjectId, to: ObjectId) { - if let Some(range_in_suspect) = self.suspects.get(&from) { - self.suspects.insert(to, range_in_suspect.clone()); - } - } - - fn remove_blame(&mut self, suspect: ObjectId) { - // TODO: figure out why it can try to remove suspects that don't exist. - self.suspects.remove(&suspect); - } -} - -/// A single change between two blobs, or an unchanged region. -#[derive(Clone, Debug, PartialEq)] -pub enum Change { - /// A range of tokens that wasn't changed. - Unchanged(Range), - /// `(added_line_range, num_deleted_in_before)` - Added(Range, u32), - /// `(line_to_start_deletion_at, num_deleted_in_before)` - Deleted(u32, u32), -} - -/// Record all [`Change`]s to learn about additions, deletions and unchanged portions of a *Blamed File*. -struct ChangeRecorder { - last_seen_after_end: u32, - hunks: Vec, - total_number_of_lines: u32, -} - -impl ChangeRecorder { - /// `total_number_of_lines` is used to fill in the last unchanged hunk if needed - /// so that the entire file is represented by [`Change`]. - fn new(total_number_of_lines: u32) -> Self { - ChangeRecorder { - last_seen_after_end: 0, - hunks: Vec::new(), - total_number_of_lines, - } - } -} - -impl gix_diff::blob::Sink for ChangeRecorder { - type Out = Vec; - - fn process_change(&mut self, before: Range, after: Range) { - // This checks for unchanged hunks. - if after.start > self.last_seen_after_end { - self.hunks - .push(Change::Unchanged(self.last_seen_after_end..after.start)); - } - - match (!before.is_empty(), !after.is_empty()) { - (_, true) => { - self.hunks - .push(Change::Added(after.start..after.end, before.end - before.start)); - } - (true, false) => { - self.hunks.push(Change::Deleted(after.start, before.end - before.start)); - } - (false, false) => unreachable!("BUG: imara-diff provided a non-change"), - } - self.last_seen_after_end = after.end; - } - - fn finish(mut self) -> Self::Out { - if self.total_number_of_lines > self.last_seen_after_end { - self.hunks - .push(Change::Unchanged(self.last_seen_after_end..self.total_number_of_lines)); - } - self.hunks - } -} - -/// Compare a section from the *Original File* (`hunk`) with a change from a diff and see if there -/// is an intersection with `change`. Based on that intersection, we may generate a [`BlameEntry`] for `out` -/// and/or split the `hunk` into multiple. -/// -/// This is the core of the blame implementation as it matches regions in *Blamed Files* to the *Original File*. -pub fn process_change( - out: &mut Vec, - new_hunks_to_blame: &mut Vec, - offset_in_destination: &mut Offset, - suspect: ObjectId, - hunk: Option, - change: Option, -) -> (Option, Option) { - match (hunk, change) { - (Some(hunk), Some(Change::Unchanged(unchanged))) => { - let Some(range_in_suspect) = hunk.suspects.get(&suspect) else { - new_hunks_to_blame.push(hunk); - return (None, Some(Change::Unchanged(unchanged))); - }; - - match ( - // Since `unchanged` is a range that is not inclusive at the end, - // `unchanged.end` is not part of `unchanged`. The first line that is - // `unchanged.end - 1`. - range_in_suspect.contains(&unchanged.start), - (unchanged.end - 1) >= range_in_suspect.start && unchanged.end <= range_in_suspect.end, - ) { - (_, true) => { - // <------> (hunk) - // <-------> (unchanged) - // - // <----------> (hunk) - // <---> (unchanged) - - (Some(hunk), None) - } - (true, false) => { - // <--------> (hunk) - // <-------> (unchanged) - - new_hunks_to_blame.push(hunk.shift_by(suspect, *offset_in_destination)); - - (None, Some(Change::Unchanged(unchanged))) - } - (false, false) => { - // Any of the following cases are handled by this branch: - // <---> (hunk) - // <----------> (unchanged) - // - // <----> (hunk) - // <--> (unchanged) - // - // <--> (hunk) - // <----> (unchanged) - - if unchanged.end <= range_in_suspect.start { - // <----> (hunk) - // <--> (unchanged) - - (Some(hunk.clone()), None) - } else { - // <--> (hunk) - // <----> (unchanged) - // - // <---> (hunk) - // <----------> (unchanged) - - new_hunks_to_blame.push(hunk.shift_by(suspect, *offset_in_destination)); - - (None, Some(Change::Unchanged(unchanged.clone()))) - } - } - } - } - (Some(hunk), Some(Change::Added(added, number_of_lines_deleted))) => { - let Some(range_in_suspect) = hunk.suspects.get(&suspect) else { - new_hunks_to_blame.push(hunk); - - return (None, Some(Change::Added(added, number_of_lines_deleted))); - }; - - let range_in_suspect = range_in_suspect.clone(); - - match ( - range_in_suspect.contains(&added.start), - // Since `added` is a range that is not inclusive at the end, `added.end` is - // not part of `added`. The first line that is `added.end - 1`. - (added.end - 1) >= range_in_suspect.start && added.end <= range_in_suspect.end, - ) { - (true, true) => { - // <----------> (hunk) - // <---> (added) - // <---> (blamed) - // <--> <-> (new hunk) - - let new_hunk = match hunk.split_at(suspect, added.start) { - Either::Left(hunk) => hunk, - Either::Right((before, after)) => { - new_hunks_to_blame.push(before.shift_by(suspect, *offset_in_destination)); - - after - } - }; - - *offset_in_destination += added.end - added.start; - *offset_in_destination -= number_of_lines_deleted; - - out.push(BlameEntry::with_offset( - added.clone(), - suspect, - new_hunk.offset_for(suspect), - )); - - match new_hunk.split_at(suspect, added.end) { - Either::Left(_) => (None, None), - Either::Right((_, after)) => (Some(after), None), - } - } - (true, false) => { - // <--------> (hunk) - // <-------> (added) - // <----> (blamed) - // <--> (new hunk) - - let new_hunk = match hunk.split_at(suspect, added.start) { - Either::Left(hunk) => hunk, - Either::Right((before, after)) => { - new_hunks_to_blame.push(before.shift_by(suspect, *offset_in_destination)); - - after - } - }; - - out.push(BlameEntry::with_offset( - added.start..range_in_suspect.end, - suspect, - new_hunk.offset_for(suspect), - )); - - if added.end > range_in_suspect.end { - (None, Some(Change::Added(added, number_of_lines_deleted))) - } else { - todo!(); - } - } - (false, true) => { - // <-------> (hunk) - // <------> (added) - // <---> (blamed) - // <--> (new hunk) - - out.push(BlameEntry::with_offset( - range_in_suspect.start..added.end, - suspect, - hunk.offset_for(suspect), - )); - - *offset_in_destination += added.end - added.start; - *offset_in_destination -= number_of_lines_deleted; - - match hunk.split_at(suspect, added.end) { - Either::Left(_) => (None, None), - Either::Right((_, after)) => (Some(after), None), - } - } - (false, false) => { - // Any of the following cases are handled by this branch: - // <---> (hunk) - // <----------> (added) - // - // <----> (hunk) - // <--> (added) - // - // <--> (hunk) - // <----> (added) - - if added.end <= range_in_suspect.start { - // <----> (hunk) - // <--> (added) - - *offset_in_destination += added.end - added.start; - *offset_in_destination -= number_of_lines_deleted; - - (Some(hunk.clone()), None) - } else if range_in_suspect.end <= added.start { - // <--> (hunk) - // <----> (added) - - new_hunks_to_blame.push(hunk.shift_by(suspect, *offset_in_destination)); - - (None, Some(Change::Added(added.clone(), number_of_lines_deleted))) - } else { - // <---> (hunk) - // <----------> (added) - // <---> (blamed) - - out.push(BlameEntry::with_offset( - range_in_suspect.clone(), - suspect, - hunk.offset_for(suspect), - )); - - (None, Some(Change::Added(added.clone(), number_of_lines_deleted))) - } - } - } - } - (Some(hunk), Some(Change::Deleted(line_number_in_destination, number_of_lines_deleted))) => { - let range_in_suspect = hunk.suspects.get(&suspect).expect("TODO"); - - if line_number_in_destination < range_in_suspect.start { - // <---> (hunk) - // | (line_number_in_destination) - - *offset_in_destination -= number_of_lines_deleted; - - (Some(hunk), None) - } else if line_number_in_destination < range_in_suspect.end { - // <-----> (hunk) - // | (line_number_in_destination) - - let new_hunk = match hunk.split_at(suspect, line_number_in_destination) { - Either::Left(hunk) => hunk, - Either::Right((before, after)) => { - new_hunks_to_blame.push(before.shift_by(suspect, *offset_in_destination)); - - after - } - }; - - *offset_in_destination -= number_of_lines_deleted; - - (Some(new_hunk), None) - } else { - // <---> (hunk) - // | (line_number_in_destination) - - new_hunks_to_blame.push(hunk.shift_by(suspect, *offset_in_destination)); - - ( - None, - Some(Change::Deleted(line_number_in_destination, number_of_lines_deleted)), - ) - } - } - (Some(hunk), None) => { - new_hunks_to_blame.push(hunk.shift_by(suspect, *offset_in_destination)); - - (None, None) - } - (None, Some(Change::Unchanged(_))) => (None, None), - (None, Some(Change::Added(added, number_of_lines_deleted))) => { - *offset_in_destination += added.end - added.start; - *offset_in_destination -= number_of_lines_deleted; - - (None, None) - } - (None, Some(Change::Deleted(_, number_of_lines_deleted))) => { - *offset_in_destination -= number_of_lines_deleted; - - (None, None) - } - (None, None) => (None, None), - } -} - -/// Consume `hunks_to_blame` and `changes` to pair up matches ranges (also overlapping) with each other. -/// Once a match is found, it's pushed onto `out`. -pub fn process_changes( - out: &mut Vec, - hunks_to_blame: Vec, - changes: Vec, - suspect: ObjectId, -) -> Vec { - let mut hunks_iter = hunks_to_blame.into_iter(); - let mut changes_iter = changes.into_iter(); - - let mut hunk = hunks_iter.next(); - let mut change = changes_iter.next(); - - let mut new_hunks_to_blame = Vec::new(); - let mut offset_in_destination = Offset::Added(0); - - loop { - (hunk, change) = process_change( - out, - &mut new_hunks_to_blame, - &mut offset_in_destination, - suspect, - hunk, - change, - ); - - hunk = hunk.or_else(|| hunks_iter.next()); - change = change.or_else(|| changes_iter.next()); - - if hunk.is_none() && change.is_none() { - break; - } - } - new_hunks_to_blame -} - -fn tree_diff_at_file_path( - odb: impl gix_object::Find + gix_object::FindHeader, - file_path: &BStr, - id: ObjectId, - parent_id: ObjectId, -) -> Option { - let mut buffer = Vec::new(); - - let parent = odb.find_commit(&parent_id, &mut buffer).unwrap(); - - let mut buffer = Vec::new(); - let parent_tree_iter = odb - .find(&parent.tree(), &mut buffer) - .unwrap() - .try_into_tree_iter() - .unwrap(); - - let mut buffer = Vec::new(); - let commit = odb.find_commit(&id, &mut buffer).unwrap(); - - let mut buffer = Vec::new(); - let tree_iter = odb - .find(&commit.tree(), &mut buffer) - .unwrap() - .try_into_tree_iter() - .unwrap(); - - let mut recorder = gix_diff::tree::Recorder::default(); - gix_diff::tree( - parent_tree_iter, - tree_iter, - gix_diff::tree::State::default(), - &odb, - &mut recorder, - ) - .unwrap(); - - recorder.records.into_iter().find(|change| match change { - gix_diff::tree::recorder::Change::Modification { path, .. } => path == file_path, - gix_diff::tree::recorder::Change::Addition { path, .. } => path == file_path, - gix_diff::tree::recorder::Change::Deletion { path, .. } => path == file_path, - }) -} - -fn blob_changes( - odb: impl gix_object::Find + gix_object::FindHeader, - resource_cache: &mut gix_diff::blob::Platform, - oid: ObjectId, - previous_oid: ObjectId, - file_path: &BStr, -) -> Vec { - resource_cache - .set_resource( - previous_oid, - gix_object::tree::EntryKind::Blob, - file_path, - gix_diff::blob::ResourceKind::OldOrSource, - &odb, - ) - .unwrap(); - resource_cache - .set_resource( - oid, - gix_object::tree::EntryKind::Blob, - file_path, - gix_diff::blob::ResourceKind::NewOrDestination, - &odb, - ) - .unwrap(); - - let outcome = resource_cache.prepare_diff().unwrap(); - let input = outcome.interned_input(); - let number_of_lines_in_destination = input.after.len(); - let change_recorder = ChangeRecorder::new(number_of_lines_in_destination.try_into().unwrap()); - - gix_diff::blob::diff(gix_diff::blob::Algorithm::Histogram, &input, change_recorder) -} - -/// This function merges adjacent blame entries. It merges entries that are adjacent both in the -/// blamed file and in the original file that introduced them. This follows `git`’s -/// behaviour. `libgit2`, as of 2024-09-19, only checks whether two entries are adjacent in the -/// blamed file which can result in different blames in certain edge cases. See [the commit][1] -/// that introduced the extra check into `git` for context. See [this commit][2] for a way to test -/// for this behaviour in `git`. -/// -/// [1]: https://github.com/git/git/commit/c2ebaa27d63bfb7c50cbbdaba90aee4efdd45d0a -/// [2]: https://github.com/git/git/commit/6dbf0c7bebd1c71c44d786ebac0f2b3f226a0131 -fn coalesce_blame_entries(lines_blamed: Vec) -> Vec { - let len = lines_blamed.len(); - lines_blamed - .into_iter() - .fold(Vec::with_capacity(len), |mut acc, entry| { - let previous_entry = acc.last(); - - if let Some(previous_entry) = previous_entry { - if previous_entry.commit_id == entry.commit_id - && previous_entry.range_in_blamed_file.end == entry.range_in_blamed_file.start - // As of 2024-09-19, the check below only is in `git`, but not in `libgit2`. - && previous_entry.range_in_original_file.end == entry.range_in_original_file.start - { - let coalesced_entry = BlameEntry { - range_in_blamed_file: previous_entry.range_in_blamed_file.start..entry.range_in_blamed_file.end, - range_in_original_file: previous_entry.range_in_original_file.start - ..entry.range_in_original_file.end, - commit_id: previous_entry.commit_id, - }; - - acc.pop(); - acc.push(coalesced_entry); - } else { - acc.push(entry); - } - - acc - } else { - acc.push(entry); - - acc - } - }) -} - -// TODO: do not instantiate anything, get everything passed as argument. -/// Produce a list of consecutive [`BlameEntry`] instances to indicate in which commits the ranges of the file -/// at `traverse[0]:` originated in. -/// -/// ## Paramters -/// -/// * `odb` -/// - Access to database objects, also for used for diffing. -/// - Should have an object cache for good diff performance. -/// * `traverse` -/// - The list of commits from the most recent to prior ones, following all parents sorted -/// by time. -/// - It's paramount that older commits are returned after newer ones. -/// - The first commit returned here is the first eligible commit to be responsible for parts of `file_path`. -/// * `file_path` -/// - A *slash-separated* worktree-relative path to the file to blame. -/// * `resource_cache` -/// - Used for diffing trees. -/// -/// ## The algorithm -/// -/// *For brevity, `HEAD` denotes the starting point of the blame operation. It could be any commit, or even commits that -/// represent the worktree state. -/// We begin with a single [`UnblamedHunk`] and a single suspect, usually `HEAD` as the commit containing the *Original File*. -/// We traverse the commit graph starting at `HEAD`, and see if there have been changes to `file_path`. If so, we have found -/// a *Blamed File* and a *Suspect* commit, and have hunks that represent these changes. Now the [`UnblamedHunk`]s is split at -/// the boundaries of each matching hunk, creating a new [`UnblamedHunk`] on each side, along with a [`BlameEntry`] to represent -/// the match. -/// This is repeated until there are no non-empty [`UnblamedHunk`]s left. -/// -/// At a high level, what we want to do is the following: -/// -/// - get the commit that belongs to a commit id -/// - walk through parents -/// - for each parent, do a diff and mark lines that don’t have a suspect (this is the term -/// used in `libgit2`) yet, but that have been changed in this commit -/// -/// The algorithm in `libgit2` works by going through parents and keeping a linked list of blame -/// suspects. It can be visualized as follows: -// -// <----------------------------------------> -// <---------------><-----------------------> -// <---><----------><-----------------------> -// <---><----------><-------><-----><-------> -// <---><---><-----><-------><-----><-------> -// <---><---><-----><-------><-----><-><-><-> -pub fn file( - odb: impl gix_object::Find + gix_object::FindHeader, - traverse: impl IntoIterator>, - resource_cache: &mut gix_diff::blob::Platform, - // TODO: remove - worktree_root: PathBuf, - file_path: &BStr, -) -> Result, E> { - // TODO: `worktree_root` should be removed - read everything from Commit. - // Worktree changes should be placed into a temporary commit. - // TODO: remove this and deduplicate the respective code. - use gix_object::bstr::ByteSlice; - let absolute_path = worktree_root.join(gix_path::from_bstr(file_path)); - - // TODO use `imara-diff` to tokenize this just like it will be tokenized when diffing. - let number_of_lines = std::fs::read_to_string(absolute_path).unwrap().lines().count(); - - let mut traverse = traverse.into_iter().peekable(); - let Some(Ok(suspect)) = traverse.peek().map(|res| res.as_ref().map(|item| item.id)) else { - todo!("return actual error"); - }; - - let mut hunks_to_blame = vec![UnblamedHunk::new( - 0..number_of_lines.try_into().unwrap(), - suspect, - Offset::Added(0), - )]; - - let mut out = Vec::new(); - 'outer: for item in traverse { - let item = item?; - let suspect = item.id; - - let parent_ids = item.parent_ids; - if parent_ids.is_empty() { - // I’m not entirely sure if this is correct yet. `suspect`, at this point, is the `id` of - // the last `item` that was yielded by `traverse`, so it makes sense to assign the - // remaining lines to it, even though we don’t explicitly check whether that is true - // here. We could perhaps use `needed_to_obtain` to compare `suspect` against an empty - // tree to validate this assumption. - out.extend( - hunks_to_blame - .iter() - .map(|hunk| BlameEntry::from_unblamed_hunk(hunk, suspect)), - ); - - hunks_to_blame.clear(); - break; - } - - let mut buffer = Vec::new(); - let commit_id = odb.find_commit(&suspect, &mut buffer).unwrap().tree(); - let tree_iter = odb.find_tree_iter(&commit_id, &mut buffer).unwrap(); - - let mut entry_buffer = Vec::new(); - let Some(entry) = tree_iter - .lookup_entry_by_path(&odb, &mut entry_buffer, file_path.to_str().unwrap()) - .unwrap() - else { - continue; - }; - - if parent_ids.len() == 1 { - let parent_id: ObjectId = *parent_ids.last().unwrap(); - - let mut buffer = Vec::new(); - let parent_commit_id = odb.find_commit(&parent_id, &mut buffer).unwrap().tree(); - let parent_tree_iter = odb.find_tree_iter(&parent_commit_id, &mut buffer).unwrap(); - - let mut entry_buffer = Vec::new(); - if let Some(parent_entry) = parent_tree_iter - .lookup_entry_by_path(&odb, &mut entry_buffer, file_path.to_str().unwrap()) - .unwrap() - { - if entry.oid == parent_entry.oid { - // The blobs storing the blamed file in `entry` and `parent_entry` are identical - // which is why we can pass blame to the parent without further checks. - for unblamed_hunk in &mut hunks_to_blame { - unblamed_hunk.pass_blame(suspect, parent_id); - } - continue; - } - } - - let Some(modification) = tree_diff_at_file_path(&odb, file_path, item.id, parent_id) else { - // None of the changes affected the file we’re currently blaming. Pass blame to parent. - for unblamed_hunk in &mut hunks_to_blame { - unblamed_hunk.pass_blame(suspect, parent_id); - } - continue; - }; - - match modification { - gix_diff::tree::recorder::Change::Addition { .. } => { - // Every line that has not been blamed yet on a commit, is expected to have been - // added when the file was added to the repository. - out.extend( - hunks_to_blame - .iter() - .map(|hunk| BlameEntry::from_unblamed_hunk(hunk, suspect)), - ); - - hunks_to_blame.clear(); - break; - } - gix_diff::tree::recorder::Change::Deletion { .. } => todo!(), - gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => { - let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path); - - hunks_to_blame = process_changes(&mut out, hunks_to_blame, changes, suspect); - for unblamed_hunk in &mut hunks_to_blame { - unblamed_hunk.pass_blame(suspect, parent_id); - } - } - } - } else { - let mut buffer = Vec::new(); - let commit_id = odb.find_commit(&suspect, &mut buffer).unwrap().tree(); - let tree_iter = odb.find_tree_iter(&commit_id, &mut buffer).unwrap(); - - let mut entry_buffer = Vec::new(); - let entry = tree_iter - .lookup_entry_by_path(&odb, &mut entry_buffer, file_path.to_str().unwrap()) - .unwrap() - .unwrap(); - - for parent_id in &parent_ids { - let mut buffer = Vec::new(); - let parent_commit_id = odb.find_commit(parent_id, &mut buffer).unwrap().tree(); - let parent_tree_iter = odb.find_tree_iter(&parent_commit_id, &mut buffer).unwrap(); - - let mut entry_buffer = Vec::new(); - if let Some(parent_entry) = parent_tree_iter - .lookup_entry_by_path(&odb, &mut entry_buffer, file_path.to_str().unwrap()) - .unwrap() - { - if entry.oid == parent_entry.oid { - // The blobs storing the blamed file in `entry` and `parent_entry` are - // identical which is why we can pass blame to the parent without further - // checks. - for unblamed_hunk in &mut hunks_to_blame { - unblamed_hunk.pass_blame(suspect, *parent_id); - } - continue 'outer; - } - } - } - - for parent_id in parent_ids { - let changes_for_file_path = tree_diff_at_file_path(&odb, file_path, item.id, parent_id); - let Some(modification) = changes_for_file_path else { - // None of the changes affected the file we’re currently blaming. Pass blame - // to parent. - for unblamed_hunk in &mut hunks_to_blame { - unblamed_hunk.clone_blame(suspect, parent_id); - } - - continue; - }; - - match modification { - gix_diff::tree::recorder::Change::Addition { .. } => { - // Do nothing under the assumption that this always (or almost always) - // implies that the file comes from a different parent, compared to which - // it was modified, not added. - // - // TODO: I still have to figure out whether this is correct in all cases. - } - gix_diff::tree::recorder::Change::Deletion { .. } => todo!(), - gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => { - let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path); - - hunks_to_blame = process_changes(&mut out, hunks_to_blame, changes, suspect); - for unblamed_hunk in &mut hunks_to_blame { - unblamed_hunk.pass_blame(suspect, parent_id); - } - } - } - } - for unblamed_hunk in &mut hunks_to_blame { - unblamed_hunk.remove_blame(suspect); - } - } - } - - debug_assert_eq!( - hunks_to_blame, - vec![], - "only if there is no portion of the file left we have completed the blame" - ); - - // I don’t know yet whether it would make sense to use a data structure instead that preserves - // order on insertion. - out.sort_by(|a, b| a.range_in_blamed_file.start.cmp(&b.range_in_blamed_file.start)); - Ok(coalesce_blame_entries(out)) -} +mod file; +pub use file::function::file; diff --git a/gix-blame/src/types.rs b/gix-blame/src/types.rs new file mode 100644 index 00000000000..7499389c26a --- /dev/null +++ b/gix-blame/src/types.rs @@ -0,0 +1,137 @@ +use std::{ + collections::BTreeMap, + ops::{AddAssign, Range, SubAssign}, +}; + +use gix_hash::ObjectId; + +/// Describes the offset of a particular hunk relative to the *Original File*. +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum Offset { + /// The amount of lines to add. + Added(u32), + /// The amount of lines to remove. + Deleted(u32), +} + +impl Offset { + /// Shift the given `range` according to our offset. + pub fn shifted_range(&self, range: &Range) -> Range { + match self { + Offset::Added(added) => { + debug_assert!(range.start >= *added, "{self:?} {range:?}"); + Range { + start: range.start - added, + end: range.end - added, + } + } + Offset::Deleted(deleted) => Range { + start: range.start + deleted, + end: range.end + deleted, + }, + } + } +} + +impl AddAssign for Offset { + fn add_assign(&mut self, rhs: u32) { + match self { + Self::Added(added) => *self = Self::Added(*added + rhs), + Self::Deleted(deleted) => { + if rhs > *deleted { + *self = Self::Added(rhs - *deleted); + } else { + *self = Self::Deleted(*deleted - rhs); + } + } + } + } +} + +impl SubAssign for Offset { + fn sub_assign(&mut self, rhs: u32) { + match self { + Self::Added(added) => { + if rhs > *added { + *self = Self::Deleted(rhs - *added); + } else { + *self = Self::Added(*added - rhs); + } + } + Self::Deleted(deleted) => *self = Self::Deleted(*deleted + rhs), + } + } +} + +/// A mapping of a section of the *Original File* to the section in a *Blamed File* that introduced it. +/// +/// Both ranges are of the same size, but may use different [starting points](Range::start). Naturally, +/// they have the same content, which is the reason they are in what is returned by [`file()`](crate::file()). +// TODO: see if this can be encoded as `start_in_original_file` and `start_in_blamed_file` and a single `len`. +#[derive(Debug, PartialEq)] +pub struct BlameEntry { + /// The section of tokens in the tokenized version of the *Blamed File* (typically lines). + pub range_in_blamed_file: Range, + /// The section of tokens in the tokenized version of the *Original File* (typically lines). + pub range_in_original_file: Range, + /// The commit that introduced the section into the *Blamed File*. + pub commit_id: ObjectId, +} + +impl BlameEntry { + /// Create a new instance. + pub fn new(range_in_blamed_file: Range, range_in_original_file: Range, commit_id: ObjectId) -> Self { + debug_assert!( + range_in_blamed_file.end > range_in_blamed_file.start, + "{range_in_blamed_file:?}" + ); + debug_assert!( + range_in_original_file.end > range_in_original_file.start, + "{range_in_original_file:?}" + ); + debug_assert_eq!(range_in_original_file.len(), range_in_blamed_file.len()); + + Self { + range_in_blamed_file: range_in_blamed_file.clone(), + range_in_original_file: range_in_original_file.clone(), + commit_id, + } + } +} + +pub(crate) trait LineRange { + fn shift_by(&self, offset: Offset) -> Self; +} + +impl LineRange for Range { + fn shift_by(&self, offset: Offset) -> Self { + offset.shifted_range(self) + } +} + +/// TODO: docs - what is it? +// TODO: is `Clone` really needed. +#[derive(Clone, Debug, PartialEq)] +pub struct UnblamedHunk { + /// TODO: figure out how this works. + pub range_in_blamed_file: Range, + /// Maps a commit to the range in the *Original File* that `range_in_blamed_file` refers to. + pub suspects: BTreeMap>, +} + +#[derive(Debug)] +pub(crate) enum Either { + Left(T), + Right(U), +} + +/// A single change between two blobs, or an unchanged region. +#[derive(Clone, Debug, PartialEq)] +pub enum Change { + /// A range of tokens that wasn't changed. + Unchanged(Range), + /// `(added_line_range, num_deleted_in_before)` + Added(Range, u32), + /// `(line_to_start_deletion_at, num_deleted_in_before)` + Deleted(u32, u32), +} diff --git a/gix-blame/tests/blame.rs b/gix-blame/tests/blame.rs index 3634491ea56..63dc7b2c3a5 100644 --- a/gix-blame/tests/blame.rs +++ b/gix-blame/tests/blame.rs @@ -1,8 +1,7 @@ -use gix_blame::{file, process_change, process_changes, BlameEntry, Change, Offset, UnblamedHunk}; +use std::path::PathBuf; + use gix_hash::ObjectId; use gix_object::bstr; -use std::ops::Range; -use std::path::PathBuf; struct Baseline<'a> { lines: bstr::Lines<'a>, @@ -11,11 +10,11 @@ struct Baseline<'a> { mod baseline { use std::path::Path; + use gix_blame::BlameEntry; use gix_hash::ObjectId; use gix_ref::bstr::ByteSlice; use super::Baseline; - use gix_blame::BlameEntry; // These fields are used by `git` in its porcelain output. const HEADER_FIELDS: [&str; 12] = [ @@ -193,7 +192,7 @@ macro_rules! mktest { commits, } = Fixture::new().unwrap(); - let lines_blamed = file( + let lines_blamed = gix_blame::file( &odb, commits, &mut resource_cache, @@ -253,7 +252,7 @@ fn diff_disparity() { commits, } = Fixture::new().unwrap(); - let lines_blamed = file( + let lines_blamed = gix_blame::file( &odb, commits, &mut resource_cache, @@ -271,1312 +270,6 @@ fn diff_disparity() { } } -#[test] -fn process_change_works() { - let mut lines_blamed = Vec::new(); - let mut new_hunks_to_blame = Vec::new(); - let mut offset_in_destination: Offset = Offset::Added(0); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - - let (hunk, change) = process_change( - &mut lines_blamed, - &mut new_hunks_to_blame, - &mut offset_in_destination, - suspect, - None, - None, - ); - - assert_eq!(hunk, None); - assert_eq!(change, None); - assert_eq!(offset_in_destination, Offset::Added(0)); -} - -#[test] -fn process_change_works_added_hunk() { - let mut lines_blamed = Vec::new(); - let mut new_hunks_to_blame = Vec::new(); - let mut offset_in_destination: Offset = Offset::Added(0); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - - let (hunk, change) = process_change( - &mut lines_blamed, - &mut new_hunks_to_blame, - &mut offset_in_destination, - suspect, - Some(new_unblamed_hunk(0..5, suspect, Offset::Added(0))), - Some(Change::Added(0..3, 0)), - ); - - assert_eq!( - hunk, - Some(UnblamedHunk { - range_in_blamed_file: 3..5, - suspects: [(suspect, 3..5)].into() - }) - ); - assert_eq!(change, None); - assert_eq!( - lines_blamed, - [BlameEntry { - range_in_blamed_file: 0..3, - range_in_original_file: 0..3, - commit_id: suspect - }] - ); - assert_eq!(new_hunks_to_blame, []); - assert_eq!(offset_in_destination, Offset::Added(3)); -} - -#[test] -fn process_change_works_added_hunk_2() { - let mut lines_blamed = Vec::new(); - let mut new_hunks_to_blame = Vec::new(); - let mut offset_in_destination: Offset = Offset::Added(0); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - - let (hunk, change) = process_change( - &mut lines_blamed, - &mut new_hunks_to_blame, - &mut offset_in_destination, - suspect, - Some(new_unblamed_hunk(0..5, suspect, Offset::Added(0))), - Some(Change::Added(2..3, 0)), - ); - - assert_eq!( - hunk, - Some(UnblamedHunk { - range_in_blamed_file: 3..5, - suspects: [(suspect, 3..5)].into() - }) - ); - assert_eq!(change, None); - assert_eq!( - lines_blamed, - [BlameEntry { - range_in_blamed_file: 2..3, - range_in_original_file: 2..3, - commit_id: suspect - }] - ); - assert_eq!( - new_hunks_to_blame, - [UnblamedHunk { - range_in_blamed_file: 0..2, - suspects: [(suspect, 0..2)].into() - }] - ); - assert_eq!(offset_in_destination, Offset::Added(1)); -} - -#[test] -fn process_change_works_added_hunk_3() { - let mut lines_blamed = Vec::new(); - let mut new_hunks_to_blame = Vec::new(); - let mut offset_in_destination: Offset = Offset::Added(5); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - - let (hunk, change) = process_change( - &mut lines_blamed, - &mut new_hunks_to_blame, - &mut offset_in_destination, - suspect, - Some(new_unblamed_hunk(10..15, suspect, Offset::Added(0))), - Some(Change::Added(12..13, 0)), - ); - - assert_eq!( - hunk, - Some(UnblamedHunk { - range_in_blamed_file: 13..15, - suspects: [(suspect, 13..15)].into() - }) - ); - assert_eq!(change, None); - assert_eq!( - lines_blamed, - [BlameEntry { - range_in_blamed_file: 12..13, - range_in_original_file: 12..13, - commit_id: suspect - }] - ); - assert_eq!( - new_hunks_to_blame, - [UnblamedHunk { - range_in_blamed_file: 10..12, - suspects: [(suspect, 5..7)].into() - }] - ); - assert_eq!(offset_in_destination, Offset::Added(6)); -} - -#[test] -fn process_change_works_added_hunk_4() { - let mut lines_blamed = Vec::new(); - let mut new_hunks_to_blame = Vec::new(); - let mut offset_in_destination: Offset = Offset::Added(0); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - - let (hunk, change) = process_change( - &mut lines_blamed, - &mut new_hunks_to_blame, - &mut offset_in_destination, - suspect, - // range_in_destination: 7..12 - Some(new_unblamed_hunk(12..17, suspect, Offset::Added(5))), - Some(Change::Added(9..10, 0)), - ); - - assert_eq!( - hunk, - Some(UnblamedHunk { - range_in_blamed_file: 15..17, - suspects: [(suspect, 10..12)].into() - }) - ); - assert_eq!(change, None); - assert_eq!( - lines_blamed, - [BlameEntry { - range_in_blamed_file: 14..15, - range_in_original_file: 9..10, - commit_id: suspect - }] - ); - assert_eq!( - new_hunks_to_blame, - [UnblamedHunk { - range_in_blamed_file: 12..14, - suspects: [(suspect, 7..9)].into() - }] - ); - assert_eq!(offset_in_destination, Offset::Added(1)); -} - -#[test] -fn process_change_works_added_hunk_5() { - let mut lines_blamed = Vec::new(); - let mut new_hunks_to_blame = Vec::new(); - let mut offset_in_destination: Offset = Offset::Added(0); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - - let (hunk, change) = process_change( - &mut lines_blamed, - &mut new_hunks_to_blame, - &mut offset_in_destination, - suspect, - Some(new_unblamed_hunk(0..5, suspect, Offset::Added(0))), - Some(Change::Added(0..3, 1)), - ); - - assert_eq!( - hunk, - Some(UnblamedHunk { - range_in_blamed_file: 3..5, - suspects: [(suspect, 3..5)].into() - }) - ); - assert_eq!(change, None); - assert_eq!( - lines_blamed, - [BlameEntry { - range_in_blamed_file: 0..3, - range_in_original_file: 0..3, - commit_id: suspect - }] - ); - assert_eq!(new_hunks_to_blame, []); - assert_eq!(offset_in_destination, Offset::Added(2)); -} - -#[test] -fn process_change_works_added_hunk_6() { - let mut lines_blamed = Vec::new(); - let mut new_hunks_to_blame = Vec::new(); - let mut offset_in_destination: Offset = Offset::Added(0); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - - let (hunk, change) = process_change( - &mut lines_blamed, - &mut new_hunks_to_blame, - &mut offset_in_destination, - suspect, - // range_in_destination: 0..4 - Some(new_unblamed_hunk(1..5, suspect, Offset::Added(1))), - Some(Change::Added(0..3, 1)), - ); - - assert_eq!( - hunk, - Some(UnblamedHunk { - range_in_blamed_file: 4..5, - suspects: [(suspect, 3..4)].into() - }) - ); - assert_eq!(change, None); - assert_eq!( - lines_blamed, - [BlameEntry { - range_in_blamed_file: 1..4, - range_in_original_file: 0..3, - commit_id: suspect - }] - ); - assert_eq!(new_hunks_to_blame, []); - assert_eq!(offset_in_destination, Offset::Added(2)); -} - -#[test] -fn process_change_works_added_hunk_7() { - let mut lines_blamed = Vec::new(); - let mut new_hunks_to_blame = Vec::new(); - let mut offset_in_destination: Offset = Offset::Added(2); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - - let (hunk, change) = process_change( - &mut lines_blamed, - &mut new_hunks_to_blame, - &mut offset_in_destination, - suspect, - // range_in_destination: 2..6 - Some(new_unblamed_hunk(3..7, suspect, Offset::Added(1))), - Some(Change::Added(3..5, 1)), - ); - - assert_eq!( - hunk, - Some(UnblamedHunk { - range_in_blamed_file: 6..7, - suspects: [(suspect, 5..6)].into() - }) - ); - assert_eq!(change, None); - assert_eq!( - lines_blamed, - [BlameEntry { - range_in_blamed_file: 4..6, - range_in_original_file: 3..5, - commit_id: suspect - }] - ); - assert_eq!( - new_hunks_to_blame, - [UnblamedHunk { - range_in_blamed_file: 3..4, - suspects: [(suspect, 0..1)].into() - }] - ); - assert_eq!(offset_in_destination, Offset::Added(3)); -} - -#[test] -fn process_change_works_added_hunk_8() { - let mut lines_blamed = Vec::new(); - let mut new_hunks_to_blame = Vec::new(); - let mut offset_in_destination: Offset = Offset::Added(1); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - - let (hunk, change) = process_change( - &mut lines_blamed, - &mut new_hunks_to_blame, - &mut offset_in_destination, - suspect, - // range_in_destination: 25..26 - Some(new_unblamed_hunk(23..24, suspect, Offset::Deleted(2))), - Some(Change::Added(25..27, 1)), - ); - - assert_eq!(hunk, None); - assert_eq!(change, Some(Change::Added(25..27, 1))); - assert_eq!( - lines_blamed, - [BlameEntry { - range_in_blamed_file: 23..24, - range_in_original_file: 25..26, - commit_id: suspect - }] - ); - assert_eq!(new_hunks_to_blame, []); - assert_eq!(offset_in_destination, Offset::Added(1)); -} - -#[test] -fn process_change_works_added_hunk_9() { - let mut lines_blamed = Vec::new(); - let mut new_hunks_to_blame = Vec::new(); - let mut offset_in_destination: Offset = Offset::Added(0); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - - let (hunk, change) = process_change( - &mut lines_blamed, - &mut new_hunks_to_blame, - &mut offset_in_destination, - suspect, - // range_in_destination: 21..22 - Some(new_unblamed_hunk(23..24, suspect, Offset::Added(2))), - Some(Change::Added(18..22, 3)), - ); - - assert_eq!(hunk, None); - assert_eq!(change, None); - assert_eq!( - lines_blamed, - [BlameEntry { - range_in_blamed_file: 23..24, - range_in_original_file: 21..22, - commit_id: suspect - }] - ); - assert_eq!(new_hunks_to_blame, []); - assert_eq!(offset_in_destination, Offset::Added(1)); -} - -#[test] -fn process_change_works_added_hunk_10() { - let mut lines_blamed = Vec::new(); - let mut new_hunks_to_blame = Vec::new(); - let mut offset_in_destination: Offset = Offset::Added(0); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - - let (hunk, change) = process_change( - &mut lines_blamed, - &mut new_hunks_to_blame, - &mut offset_in_destination, - suspect, - // range_in_destination: 70..108 - Some(new_unblamed_hunk(71..109, suspect, Offset::Added(1))), - Some(Change::Added(106..109, 0)), - ); - - assert_eq!(hunk, None); - assert_eq!(change, Some(Change::Added(106..109, 0))); - assert_eq!( - lines_blamed, - [BlameEntry { - range_in_blamed_file: 107..109, - range_in_original_file: 106..108, - commit_id: suspect - }] - ); - assert_eq!( - new_hunks_to_blame, - [UnblamedHunk { - range_in_blamed_file: 71..107, - suspects: [(suspect, 70..106)].into() - }] - ); - assert_eq!(offset_in_destination, Offset::Added(0)); -} - -#[test] -fn process_change_works_added_hunk_11() { - let mut lines_blamed = Vec::new(); - let mut new_hunks_to_blame = Vec::new(); - let mut offset_in_destination: Offset = Offset::Added(0); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - - let (hunk, change) = process_change( - &mut lines_blamed, - &mut new_hunks_to_blame, - &mut offset_in_destination, - suspect, - // range_in_destination: 137..144 - Some(new_unblamed_hunk(149..156, suspect, Offset::Added(12))), - Some(Change::Added(143..146, 0)), - ); - - assert_eq!(hunk, None); - assert_eq!(change, Some(Change::Added(143..146, 0))); - assert_eq!( - lines_blamed, - [BlameEntry { - range_in_blamed_file: 155..156, - range_in_original_file: 143..144, - commit_id: suspect - }] - ); - assert_eq!( - new_hunks_to_blame, - [UnblamedHunk { - range_in_blamed_file: 149..155, - suspects: [(suspect, 137..143)].into() - }] - ); - assert_eq!(offset_in_destination, Offset::Added(0)); -} - -#[test] -fn process_change_works_no_overlap() { - let mut lines_blamed = Vec::new(); - let mut new_hunks_to_blame = Vec::new(); - let mut offset_in_destination: Offset = Offset::Deleted(3); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - - let (hunk, change) = process_change( - &mut lines_blamed, - &mut new_hunks_to_blame, - &mut offset_in_destination, - suspect, - // range_in_destination: 2..5 - Some(new_unblamed_hunk(3..6, suspect, Offset::Added(1))), - Some(Change::Added(7..10, 1)), - ); - - assert_eq!(hunk, None); - assert_eq!(change, Some(Change::Added(7..10, 1))); - assert_eq!(lines_blamed, []); - assert_eq!( - new_hunks_to_blame, - [UnblamedHunk { - range_in_blamed_file: 3..6, - suspects: [(suspect, 5..8)].into() - }] - ); - assert_eq!(offset_in_destination, Offset::Deleted(3)); -} - -#[test] -fn process_change_works_no_overlap_2() { - let mut lines_blamed = Vec::new(); - let mut new_hunks_to_blame = Vec::new(); - let mut offset_in_destination: Offset = Offset::Added(0); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - - let (hunk, change) = process_change( - &mut lines_blamed, - &mut new_hunks_to_blame, - &mut offset_in_destination, - suspect, - // range_in_destination: 6..8 - Some(new_unblamed_hunk(9..11, suspect, Offset::Added(3))), - Some(Change::Added(2..5, 0)), - ); - - assert_eq!( - hunk, - Some(UnblamedHunk { - range_in_blamed_file: 9..11, - suspects: [(suspect, 6..8)].into() - }) - ); - assert_eq!(change, None); - assert_eq!(lines_blamed, []); - assert_eq!(new_hunks_to_blame, []); - assert_eq!(offset_in_destination, Offset::Added(3)); -} - -#[test] -fn process_change_works_no_overlap_3() { - let mut lines_blamed = Vec::new(); - let mut new_hunks_to_blame = Vec::new(); - let mut offset_in_destination: Offset = Offset::Added(0); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - - let (hunk, change) = process_change( - &mut lines_blamed, - &mut new_hunks_to_blame, - &mut offset_in_destination, - suspect, - // range_in_destination: 5..15 - Some(new_unblamed_hunk(4..15, suspect, Offset::Deleted(1))), - Some(Change::Added(4..5, 1)), - ); - - assert_eq!( - hunk, - Some(UnblamedHunk { - range_in_blamed_file: 4..15, - suspects: [(suspect, 5..16)].into() - }) - ); - assert_eq!(change, None); - assert_eq!(lines_blamed, []); - assert_eq!(new_hunks_to_blame, []); - assert_eq!(offset_in_destination, Offset::Added(0)); -} - -#[test] -fn process_change_works_no_overlap_4() { - let mut lines_blamed = Vec::new(); - let mut new_hunks_to_blame = Vec::new(); - let mut offset_in_destination: Offset = Offset::Added(1); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - - let (hunk, change) = process_change( - &mut lines_blamed, - &mut new_hunks_to_blame, - &mut offset_in_destination, - suspect, - // range_in_destination: 25..27 - Some(new_unblamed_hunk(23..25, suspect, Offset::Deleted(2))), - Some(Change::Unchanged(21..22)), - ); - - assert_eq!( - hunk, - Some(UnblamedHunk { - range_in_blamed_file: 23..25, - suspects: [(suspect, 25..27)].into() - }) - ); - assert_eq!(change, None); - assert_eq!(lines_blamed, []); - assert_eq!(new_hunks_to_blame, []); - assert_eq!(offset_in_destination, Offset::Added(1)); -} - -#[test] -fn process_change_works_no_overlap_5() { - let mut lines_blamed = Vec::new(); - let mut new_hunks_to_blame = Vec::new(); - let mut offset_in_destination: Offset = Offset::Added(1); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - - let (hunk, change) = process_change( - &mut lines_blamed, - &mut new_hunks_to_blame, - &mut offset_in_destination, - suspect, - // range_in_destination: 17..18 - Some(new_unblamed_hunk(15..16, suspect, Offset::Deleted(2))), - Some(Change::Deleted(20, 1)), - ); - - assert_eq!(hunk, None); - assert_eq!(change, Some(Change::Deleted(20, 1))); - assert_eq!(lines_blamed, []); - assert_eq!( - new_hunks_to_blame, - [UnblamedHunk { - range_in_blamed_file: 15..16, - suspects: [(suspect, 16..17)].into() - }] - ); - assert_eq!(offset_in_destination, Offset::Added(1)); -} - -#[test] -fn process_change_works_no_overlap_6() { - let mut lines_blamed = Vec::new(); - let mut new_hunks_to_blame = Vec::new(); - let mut offset_in_destination: Offset = Offset::Added(0); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - - let (hunk, change) = process_change( - &mut lines_blamed, - &mut new_hunks_to_blame, - &mut offset_in_destination, - suspect, - // range_in_destination: 22..24 - Some(new_unblamed_hunk(23..25, suspect, Offset::Added(1))), - Some(Change::Deleted(20, 1)), - ); - - assert_eq!( - hunk, - Some(UnblamedHunk { - range_in_blamed_file: 23..25, - suspects: [(suspect, 22..24)].into() - }) - ); - assert_eq!(change, None); - assert_eq!(lines_blamed, []); - assert_eq!(new_hunks_to_blame, []); - assert_eq!(offset_in_destination, Offset::Deleted(1)); -} - -#[test] -fn process_change_works_enclosing_addition() { - let mut lines_blamed = Vec::new(); - let mut new_hunks_to_blame = Vec::new(); - let mut offset_in_destination: Offset = Offset::Added(3); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - - let (hunk, change) = process_change( - &mut lines_blamed, - &mut new_hunks_to_blame, - &mut offset_in_destination, - suspect, - // range_in_destination: 5..8 - Some(new_unblamed_hunk(2..5, suspect, Offset::Deleted(3))), - Some(Change::Added(3..12, 2)), - ); - - assert_eq!(hunk, None); - assert_eq!(change, Some(Change::Added(3..12, 2))); - assert_eq!( - lines_blamed, - [BlameEntry { - range_in_blamed_file: 2..5, - range_in_original_file: 5..8, - commit_id: suspect - }] - ); - assert_eq!(new_hunks_to_blame, []); - assert_eq!(offset_in_destination, Offset::Added(3)); -} - -#[test] -fn process_change_works_enclosing_deletion() { - let mut lines_blamed = Vec::new(); - let mut new_hunks_to_blame = Vec::new(); - let mut offset_in_destination: Offset = Offset::Added(3); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - - let (hunk, change) = process_change( - &mut lines_blamed, - &mut new_hunks_to_blame, - &mut offset_in_destination, - suspect, - // range_in_destination: 13..20 - Some(new_unblamed_hunk(12..19, suspect, Offset::Deleted(1))), - Some(Change::Deleted(15, 2)), - ); - - assert_eq!( - hunk, - Some(UnblamedHunk { - range_in_blamed_file: 14..19, - suspects: [(suspect, 15..20)].into() - }) - ); - assert_eq!(change, None); - assert_eq!(lines_blamed, []); - assert_eq!( - new_hunks_to_blame, - [UnblamedHunk { - range_in_blamed_file: 12..14, - suspects: [(suspect, 10..12)].into() - }] - ); - assert_eq!(offset_in_destination, Offset::Added(1)); -} - -#[test] -fn process_change_works_enclosing_unchanged_lines() { - let mut lines_blamed = Vec::new(); - let mut new_hunks_to_blame = Vec::new(); - let mut offset_in_destination: Offset = Offset::Added(3); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - - let (hunk, change) = process_change( - &mut lines_blamed, - &mut new_hunks_to_blame, - &mut offset_in_destination, - suspect, - // range_in_destination: 109..113 - Some(new_unblamed_hunk(110..114, suspect, Offset::Added(1))), - Some(Change::Unchanged(109..172)), - ); - - assert_eq!(hunk, None); - assert_eq!(change, Some(Change::Unchanged(109..172))); - assert_eq!(lines_blamed, []); - assert_eq!( - new_hunks_to_blame, - [UnblamedHunk { - range_in_blamed_file: 110..114, - suspects: [(suspect, 106..110)].into() - }] - ); - assert_eq!(offset_in_destination, Offset::Added(3)); -} - -#[test] -fn process_change_works_unchanged_hunk() { - let mut lines_blamed = Vec::new(); - let mut new_hunks_to_blame = Vec::new(); - let mut offset_in_destination: Offset = Offset::Added(0); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - - let (hunk, change) = process_change( - &mut lines_blamed, - &mut new_hunks_to_blame, - &mut offset_in_destination, - suspect, - Some(new_unblamed_hunk(0..5, suspect, Offset::Added(0))), - Some(Change::Unchanged(0..3)), - ); - - assert_eq!( - hunk, - Some(UnblamedHunk { - range_in_blamed_file: 0..5, - suspects: [(suspect, 0..5)].into() - }) - ); - assert_eq!(change, None); - assert_eq!(lines_blamed, []); - assert_eq!(new_hunks_to_blame, []); - assert_eq!(offset_in_destination, Offset::Added(0)); -} - -#[test] -fn process_change_works_unchanged_hunk_2() { - let mut lines_blamed = Vec::new(); - let mut new_hunks_to_blame = Vec::new(); - let mut offset_in_destination: Offset = Offset::Added(0); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - - let (hunk, change) = process_change( - &mut lines_blamed, - &mut new_hunks_to_blame, - &mut offset_in_destination, - suspect, - Some(new_unblamed_hunk(0..5, suspect, Offset::Added(0))), - Some(Change::Unchanged(0..7)), - ); - - assert_eq!(hunk, None); - assert_eq!(change, Some(Change::Unchanged(0..7))); - assert_eq!(lines_blamed, []); - assert_eq!( - new_hunks_to_blame, - [UnblamedHunk { - range_in_blamed_file: 0..5, - suspects: [(suspect, 0..5)].into() - }] - ); - assert_eq!(offset_in_destination, Offset::Added(0)); -} - -#[test] -fn process_change_works_unchanged_hunk_3() { - let mut lines_blamed = Vec::new(); - let mut new_hunks_to_blame = Vec::new(); - let mut offset_in_destination: Offset = Offset::Deleted(2); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - - let (hunk, change) = process_change( - &mut lines_blamed, - &mut new_hunks_to_blame, - &mut offset_in_destination, - suspect, - Some(UnblamedHunk { - range_in_blamed_file: 22..30, - suspects: [(suspect, 21..29)].into(), - }), - Some(Change::Unchanged(21..23)), - ); - - assert_eq!( - hunk, - Some(UnblamedHunk { - range_in_blamed_file: 22..30, - suspects: [(suspect, 21..29)].into() - }) - ); - assert_eq!(change, None); - assert_eq!(lines_blamed, []); - assert_eq!(new_hunks_to_blame, []); - assert_eq!(offset_in_destination, Offset::Deleted(2)); -} - -#[test] -fn process_change_works_deleted_hunk() { - let mut lines_blamed = Vec::new(); - let mut new_hunks_to_blame = Vec::new(); - let mut offset_in_destination: Offset = Offset::Added(0); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - - let (hunk, change) = process_change( - &mut lines_blamed, - &mut new_hunks_to_blame, - &mut offset_in_destination, - suspect, - Some(new_unblamed_hunk(0..5, suspect, Offset::Added(0))), - Some(Change::Deleted(5, 3)), - ); - - assert_eq!(hunk, None); - assert_eq!(change, Some(Change::Deleted(5, 3))); - assert_eq!(lines_blamed, []); - assert_eq!( - new_hunks_to_blame, - [UnblamedHunk { - range_in_blamed_file: 0..5, - suspects: [(suspect, 0..5)].into() - }] - ); - assert_eq!(offset_in_destination, Offset::Added(0)); -} - -#[test] -fn process_change_works_deleted_hunk_2() { - let mut lines_blamed = Vec::new(); - let mut new_hunks_to_blame = Vec::new(); - let mut offset_in_destination: Offset = Offset::Added(0); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - - let (hunk, change) = process_change( - &mut lines_blamed, - &mut new_hunks_to_blame, - &mut offset_in_destination, - suspect, - Some(new_unblamed_hunk(2..16, suspect, Offset::Added(0))), - Some(Change::Deleted(0, 4)), - ); - - assert_eq!( - hunk, - Some(UnblamedHunk { - range_in_blamed_file: 2..16, - suspects: [(suspect, 2..16)].into() - }) - ); - assert_eq!(change, None); - assert_eq!(lines_blamed, []); - assert_eq!(new_hunks_to_blame, []); - assert_eq!(offset_in_destination, Offset::Deleted(4)); -} - -#[test] -fn process_change_works_deleted_hunk_3() { - let mut lines_blamed = Vec::new(); - let mut new_hunks_to_blame = Vec::new(); - let mut offset_in_destination: Offset = Offset::Added(0); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - - let (hunk, change) = process_change( - &mut lines_blamed, - &mut new_hunks_to_blame, - &mut offset_in_destination, - suspect, - Some(new_unblamed_hunk(2..16, suspect, Offset::Added(0))), - Some(Change::Deleted(14, 4)), - ); - - assert_eq!( - hunk, - Some(UnblamedHunk { - range_in_blamed_file: 14..16, - suspects: [(suspect, 14..16)].into() - }) - ); - assert_eq!(change, None); - assert_eq!(lines_blamed, []); - assert_eq!( - new_hunks_to_blame, - [new_unblamed_hunk(2..14, suspect, Offset::Added(0))] - ); - assert_eq!(offset_in_destination, Offset::Deleted(4)); -} - -#[test] -fn process_change_works_addition_only() { - let mut lines_blamed = Vec::new(); - let mut new_hunks_to_blame = Vec::new(); - let mut offset_in_destination: Offset = Offset::Added(1); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - - let (hunk, change) = process_change( - &mut lines_blamed, - &mut new_hunks_to_blame, - &mut offset_in_destination, - suspect, - None, - Some(Change::Added(22..25, 1)), - ); - - assert_eq!(hunk, None); - assert_eq!(change, None); - assert_eq!(lines_blamed, []); - assert_eq!(new_hunks_to_blame, []); - assert_eq!(offset_in_destination, Offset::Added(3)); -} - -#[test] -fn process_change_works_deletion_only() { - let mut lines_blamed = Vec::new(); - let mut new_hunks_to_blame = Vec::new(); - let mut offset_in_destination: Offset = Offset::Added(1); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - - let (hunk, change) = process_change( - &mut lines_blamed, - &mut new_hunks_to_blame, - &mut offset_in_destination, - suspect, - None, - Some(Change::Deleted(11, 5)), - ); - - assert_eq!(hunk, None); - assert_eq!(change, None); - assert_eq!(lines_blamed, []); - assert_eq!(new_hunks_to_blame, []); - assert_eq!(offset_in_destination, Offset::Deleted(4)); -} - -#[test] -fn process_change_works_unchanged_only() { - let mut lines_blamed = Vec::new(); - let mut new_hunks_to_blame = Vec::new(); - let mut offset_in_destination: Offset = Offset::Added(1); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - - let (hunk, change) = process_change( - &mut lines_blamed, - &mut new_hunks_to_blame, - &mut offset_in_destination, - suspect, - None, - Some(Change::Unchanged(11..13)), - ); - - assert_eq!(hunk, None); - assert_eq!(change, None); - assert_eq!(lines_blamed, []); - assert_eq!(new_hunks_to_blame, []); - assert_eq!(offset_in_destination, Offset::Added(1)); -} -#[test] -fn process_changes_works() { - let mut lines_blamed = Vec::new(); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - let new_hunks_to_blame = process_changes(&mut lines_blamed, vec![], vec![], suspect); - - assert_eq!(lines_blamed, []); - assert_eq!(new_hunks_to_blame, []); -} - -#[test] -fn process_changes_works_added_hunk() { - let mut lines_blamed = Vec::new(); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - let hunks_to_blame = vec![new_unblamed_hunk(0..4, suspect, Offset::Added(0))]; - let changes = vec![Change::Added(0..4, 0)]; - let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); - - assert_eq!( - lines_blamed, - [BlameEntry { - range_in_blamed_file: 0..4, - range_in_original_file: 0..4, - commit_id: suspect - }] - ); - assert_eq!(new_hunks_to_blame, []); -} - -#[test] -fn process_changes_works_added_hunk_2() { - let mut lines_blamed = Vec::new(); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - let hunks_to_blame = vec![new_unblamed_hunk(0..6, suspect, Offset::Added(0))]; - let changes = vec![Change::Added(0..4, 0), Change::Unchanged(4..6)]; - let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); - - assert_eq!( - lines_blamed, - [BlameEntry { - range_in_blamed_file: 0..4, - range_in_original_file: 0..4, - commit_id: suspect - }] - ); - assert_eq!(new_hunks_to_blame, [new_unblamed_hunk(4..6, suspect, Offset::Added(4))]); -} - -#[test] -fn process_changes_works_added_hunk_3() { - let mut lines_blamed = Vec::new(); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - let hunks_to_blame = vec![new_unblamed_hunk(0..6, suspect, Offset::Added(0))]; - let changes = vec![Change::Unchanged(0..2), Change::Added(2..4, 0), Change::Unchanged(4..6)]; - let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); - - assert_eq!( - lines_blamed, - [BlameEntry { - range_in_blamed_file: 2..4, - range_in_original_file: 2..4, - commit_id: suspect - }] - ); - assert_eq!( - new_hunks_to_blame, - [ - new_unblamed_hunk(0..2, suspect, Offset::Added(0)), - new_unblamed_hunk(4..6, suspect, Offset::Added(2)) - ] - ); -} - -#[test] -fn process_changes_works_added_hunk_4_0() { - let mut lines_blamed = Vec::new(); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - let hunks_to_blame = vec![new_unblamed_hunk(0..6, suspect, Offset::Added(0))]; - let changes = vec![Change::Added(0..1, 0), Change::Added(1..4, 0), Change::Unchanged(4..6)]; - let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); - - assert_eq!( - lines_blamed, - [ - BlameEntry { - range_in_blamed_file: 0..1, - range_in_original_file: 0..1, - commit_id: suspect - }, - BlameEntry { - range_in_blamed_file: 1..4, - range_in_original_file: 1..4, - commit_id: suspect - } - ] - ); - assert_eq!(new_hunks_to_blame, [new_unblamed_hunk(4..6, suspect, Offset::Added(4))]); -} - -#[test] -fn process_changes_works_added_hunk_4_1() { - let mut lines_blamed = Vec::new(); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - let hunks_to_blame = vec![new_unblamed_hunk(0..6, suspect, Offset::Added(0))]; - let changes = vec![Change::Added(0..1, 0)]; - let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); - - assert_eq!( - lines_blamed, - [BlameEntry { - range_in_blamed_file: 0..1, - range_in_original_file: 0..1, - commit_id: suspect - }] - ); - assert_eq!(new_hunks_to_blame, [new_unblamed_hunk(1..6, suspect, Offset::Added(1))]); -} - -#[test] -fn process_changes_works_added_hunk_4_2() { - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - let suspect_2 = ObjectId::from_hex(b"2222222222222222222222222222222222222222").unwrap(); - let mut lines_blamed: Vec = vec![BlameEntry { - range_in_blamed_file: 0..2, - range_in_original_file: 0..2, - commit_id: suspect, - }]; - let hunks_to_blame = vec![new_unblamed_hunk(2..6, suspect_2, Offset::Added(2))]; - let changes = vec![Change::Added(0..1, 0)]; - let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect_2); - - assert_eq!( - lines_blamed, - [ - BlameEntry { - range_in_blamed_file: 0..2, - range_in_original_file: 0..2, - commit_id: suspect - }, - BlameEntry { - range_in_blamed_file: 2..3, - range_in_original_file: 0..1, - commit_id: suspect_2 - } - ] - ); - assert_eq!( - new_hunks_to_blame, - [new_unblamed_hunk(3..6, suspect_2, Offset::Added(3))] - ); -} - -#[test] -fn process_changes_works_added_hunk_5() { - let mut lines_blamed = Vec::new(); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - let hunks_to_blame = vec![new_unblamed_hunk(0..6, suspect, Offset::Added(0))]; - let changes = vec![Change::Added(0..4, 3), Change::Unchanged(4..6)]; - let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); - - assert_eq!( - lines_blamed, - [BlameEntry { - range_in_blamed_file: 0..4, - range_in_original_file: 0..4, - commit_id: suspect - }] - ); - assert_eq!(new_hunks_to_blame, [new_unblamed_hunk(4..6, suspect, Offset::Added(1))]); -} - -#[test] -fn process_changes_works_added_hunk_6() { - let mut lines_blamed = Vec::new(); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - let hunks_to_blame = vec![new_unblamed_hunk(4..6, suspect, Offset::Added(1))]; - let changes = vec![Change::Added(0..3, 0), Change::Unchanged(3..5)]; - let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); - - assert_eq!(lines_blamed, []); - assert_eq!(new_hunks_to_blame, [new_unblamed_hunk(4..6, suspect, Offset::Added(4))]); -} - -#[test] -fn process_changes_works_added_hunk_7() { - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - let suspect_2 = ObjectId::from_hex(b"2222222222222222222222222222222222222222").unwrap(); - let mut lines_blamed: Vec = vec![BlameEntry { - range_in_blamed_file: 0..1, - range_in_original_file: 0..1, - commit_id: suspect, - }]; - let hunks_to_blame = vec![new_unblamed_hunk(1..3, suspect_2, Offset::Added(1))]; - let changes = vec![Change::Added(0..1, 2)]; - let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect_2); - - assert_eq!( - lines_blamed, - [ - BlameEntry { - range_in_blamed_file: 0..1, - range_in_original_file: 0..1, - commit_id: suspect - }, - BlameEntry { - range_in_blamed_file: 1..2, - range_in_original_file: 0..1, - commit_id: suspect_2 - } - ] - ); - assert_eq!( - new_hunks_to_blame, - [new_unblamed_hunk(2..3, suspect_2, Offset::Added(0))] - ); -} - -#[test] -fn process_changes_works_added_hunk_8() { - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - let mut lines_blamed = Vec::new(); - let hunks_to_blame = vec![new_unblamed_hunk(0..4, suspect, Offset::Added(0))]; - let changes = vec![Change::Added(0..2, 0), Change::Unchanged(2..3), Change::Added(3..4, 0)]; - let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); - - assert_eq!( - lines_blamed, - [ - BlameEntry { - range_in_blamed_file: 0..2, - range_in_original_file: 0..2, - commit_id: suspect - }, - BlameEntry { - range_in_blamed_file: 3..4, - range_in_original_file: 3..4, - commit_id: suspect - } - ] - ); - assert_eq!(new_hunks_to_blame, [new_unblamed_hunk(2..3, suspect, Offset::Added(2))]); -} - -#[test] -fn process_changes_works_added_hunk_9() { - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - let mut lines_blamed: Vec = vec![BlameEntry { - range_in_blamed_file: 30..31, - range_in_original_file: 30..31, - commit_id: suspect, - }]; - let hunks_to_blame = vec![ - UnblamedHunk { - range_in_blamed_file: 0..30, - suspects: [(suspect, 0..30)].into(), - }, - UnblamedHunk { - range_in_blamed_file: 31..37, - suspects: [(suspect, 31..37)].into(), - }, - ]; - let changes = vec![ - Change::Unchanged(0..16), - Change::Added(16..17, 0), - Change::Unchanged(17..37), - ]; - let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); - - lines_blamed.sort_by(|a, b| a.range_in_blamed_file.start.cmp(&b.range_in_blamed_file.start)); - - assert_eq!( - lines_blamed, - [ - BlameEntry { - range_in_blamed_file: 16..17, - range_in_original_file: 16..17, - commit_id: suspect - }, - BlameEntry { - range_in_blamed_file: 30..31, - range_in_original_file: 30..31, - commit_id: suspect - } - ] - ); - assert_eq!( - new_hunks_to_blame, - [ - UnblamedHunk { - range_in_blamed_file: 0..16, - suspects: [(suspect, 0..16)].into() - }, - UnblamedHunk { - range_in_blamed_file: 17..30, - suspects: [(suspect, 16..29)].into() - }, - UnblamedHunk { - range_in_blamed_file: 31..37, - suspects: [(suspect, 30..36)].into() - } - ] - ); -} - -#[test] -fn process_changes_works_deleted_hunk() { - let mut lines_blamed = Vec::new(); - let suspect = ObjectId::null(gix_hash::Kind::Sha1); - let hunks_to_blame = vec![ - new_unblamed_hunk(0..4, suspect, Offset::Added(0)), - new_unblamed_hunk(4..7, suspect, Offset::Added(0)), - ]; - let changes = vec![Change::Deleted(0, 3), Change::Added(0..4, 0)]; - let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); - - assert_eq!( - lines_blamed, - [BlameEntry { - range_in_blamed_file: 0..4, - range_in_original_file: 0..4, - commit_id: suspect - }] - ); - assert_eq!( - new_hunks_to_blame, - [UnblamedHunk { - range_in_blamed_file: 4..7, - suspects: [(suspect, 3..6)].into() - }] - ); -} - fn fixture_path() -> PathBuf { gix_testtools::scripted_fixture_read_only("make_blame_repo.sh").unwrap() } - -fn new_unblamed_hunk(range_in_blamed_file: Range, suspect: ObjectId, offset: Offset) -> UnblamedHunk { - assert!( - range_in_blamed_file.end > range_in_blamed_file.start, - "{range_in_blamed_file:?}" - ); - - let range_in_destination = offset.shifted_range(&range_in_blamed_file); - UnblamedHunk { - range_in_blamed_file, - suspects: [(suspect, range_in_destination)].into(), - } -} From f2790a9db8cac3ce57003b512edf735e734383d1 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 23 Dec 2024 19:13:09 +0100 Subject: [PATCH 06/16] unify how lines in blame results are accessed * provide `Outcome` with interner and and a list of tokens. * unify the way tokens are created. --- gitoxide-core/src/repository/blame.rs | 39 +++++++++--------------- gix-blame/src/file/function.rs | 44 +++++++++++++++++++-------- gix-blame/src/lib.rs | 2 +- gix-blame/src/types.rs | 39 +++++++++++++++++++++++- gix-blame/tests/blame.rs | 6 ++-- 5 files changed, 89 insertions(+), 41 deletions(-) diff --git a/gitoxide-core/src/repository/blame.rs b/gitoxide-core/src/repository/blame.rs index 1b5ff20096b..2872545ed53 100644 --- a/gitoxide-core/src/repository/blame.rs +++ b/gitoxide-core/src/repository/blame.rs @@ -1,4 +1,4 @@ -use std::{ffi::OsStr, path::PathBuf, str::Lines}; +use std::{ffi::OsStr, path::PathBuf}; use anyhow::anyhow; use gix::bstr::BStr; @@ -18,40 +18,31 @@ pub fn blame_file(mut repo: gix::Repository, file: &OsStr, out: impl std::io::Wr .into(); let file_path: &BStr = gix::path::os_str_into_bstr(file)?; - let blame_entries = gix::blame::file( + let outcome = gix::blame::file( &repo.objects, traverse, &mut resource_cache, work_dir.clone(), file_path, )?; - - let absolute_path = work_dir.join(file); - let file_content = std::fs::read_to_string(absolute_path)?; - let lines = file_content.lines(); - - write_blame_entries(out, lines, blame_entries)?; + write_blame_entries(out, outcome)?; Ok(()) } -fn write_blame_entries( - mut out: impl std::io::Write, - mut lines: Lines<'_>, - blame_entries: Vec, -) -> Result<(), std::io::Error> { - for blame_entry in blame_entries { - for line_number in blame_entry.range_in_blamed_file { - let line = lines.next().unwrap(); - - writeln!( +fn write_blame_entries(mut out: impl std::io::Write, outcome: gix::blame::Outcome) -> Result<(), std::io::Error> { + for (entry, lines_in_hunk) in outcome.entries_with_lines() { + for ((actual_lno, source_lno), line) in entry + .range_in_blamed_file + .zip(entry.range_in_original_file) + .zip(lines_in_hunk) + { + write!( out, - "{} {} {}", - blame_entry.commit_id.to_hex_with_len(8), - // `line_number` is 0-based, but we want to show 1-based line numbers (as `git` - // does). - line_number + 1, - line + "{short_id} {line_no} {src_line_no} {line}", + line_no = actual_lno + 1, + src_line_no = source_lno + 1, + short_id = entry.commit_id.to_hex_with_len(8), )?; } } diff --git a/gix-blame/src/file/function.rs b/gix-blame/src/file/function.rs index bc00f54a0c3..dcbc56ccdb6 100644 --- a/gix-blame/src/file/function.rs +++ b/gix-blame/src/file/function.rs @@ -1,10 +1,10 @@ -use std::{ops::Range, path::PathBuf}; - +use gix_diff::blob::intern::TokenSource; use gix_hash::ObjectId; use gix_object::{bstr::BStr, FindExt}; +use std::{ops::Range, path::PathBuf}; use super::{process_changes, Change, Offset, UnblamedHunk}; -use crate::BlameEntry; +use crate::{BlameEntry, Outcome}; // TODO: do not instantiate anything, get everything passed as argument. /// Produce a list of consecutive [`BlameEntry`] instances to indicate in which commits the ranges of the file @@ -60,28 +60,35 @@ pub fn file( // TODO: remove worktree_root: PathBuf, file_path: &BStr, -) -> Result, E> { +) -> Result { // TODO: `worktree_root` should be removed - read everything from Commit. // Worktree changes should be placed into a temporary commit. // TODO: remove this and deduplicate the respective code. use gix_object::bstr::ByteSlice; let absolute_path = worktree_root.join(gix_path::from_bstr(file_path)); - // TODO use `imara-diff` to tokenize this just like it will be tokenized when diffing. - let number_of_lines = std::fs::read_to_string(absolute_path).unwrap().lines().count(); - let mut traverse = traverse.into_iter().peekable(); let Some(Ok(suspect)) = traverse.peek().map(|res| res.as_ref().map(|item| item.id)) else { todo!("return actual error"); }; + let original_file_blob = std::fs::read(absolute_path).unwrap(); + let num_lines_in_original = { + let mut interner = gix_diff::blob::intern::Interner::new(original_file_blob.len() / 100); + tokens_for_diffing(&original_file_blob) + .tokenize() + .map(|token| interner.intern(token)) + .count() + }; + let mut hunks_to_blame = vec![UnblamedHunk::new( - 0..number_of_lines.try_into().unwrap(), + 0..num_lines_in_original.try_into().unwrap(), suspect, Offset::Added(0), )]; let mut out = Vec::new(); + let mut buf = Vec::with_capacity(512); 'outer: for item in traverse { let item = item?; let suspect = item.id; @@ -103,9 +110,8 @@ pub fn file( break; } - let mut buffer = Vec::new(); - let commit_id = odb.find_commit(&suspect, &mut buffer).unwrap().tree(); - let tree_iter = odb.find_tree_iter(&commit_id, &mut buffer).unwrap(); + let commit_id = odb.find_commit(&suspect, &mut buf).unwrap().tree(); + let tree_iter = odb.find_tree_iter(&commit_id, &mut buf).unwrap(); let mut entry_buffer = Vec::new(); let Some(entry) = tree_iter @@ -247,7 +253,10 @@ pub fn file( // I don’t know yet whether it would make sense to use a data structure instead that preserves // order on insertion. out.sort_by(|a, b| a.range_in_blamed_file.start.cmp(&b.range_in_blamed_file.start)); - Ok(coalesce_blame_entries(out)) + Ok(Outcome { + entries: coalesce_blame_entries(out), + blob: original_file_blob, + }) } /// This function merges adjacent blame entries. It merges entries that are adjacent both in the @@ -416,9 +425,18 @@ fn blob_changes( .unwrap(); let outcome = resource_cache.prepare_diff().unwrap(); - let input = outcome.interned_input(); + let input = gix_diff::blob::intern::InternedInput::new( + tokens_for_diffing(outcome.old.data.as_slice().unwrap_or_default()), + tokens_for_diffing(outcome.new.data.as_slice().unwrap_or_default()), + ); let number_of_lines_in_destination = input.after.len(); let change_recorder = ChangeRecorder::new(number_of_lines_in_destination.try_into().unwrap()); gix_diff::blob::diff(gix_diff::blob::Algorithm::Histogram, &input, change_recorder) } + +/// Return an iterator over tokens for use in diffing. These usually lines, but iit's important to unify them +/// so the later access shows the right thing. +pub(crate) fn tokens_for_diffing(data: &[u8]) -> impl TokenSource { + gix_diff::blob::sources::byte_lines_with_terminator(data) +} diff --git a/gix-blame/src/lib.rs b/gix-blame/src/lib.rs index 5d83a1a6d61..7a7ebbcc24b 100644 --- a/gix-blame/src/lib.rs +++ b/gix-blame/src/lib.rs @@ -15,7 +15,7 @@ #![forbid(unsafe_code)] mod types; -pub use types::BlameEntry; +pub use types::{BlameEntry, Outcome}; mod file; pub use file::function::file; diff --git a/gix-blame/src/types.rs b/gix-blame/src/types.rs index 7499389c26a..3def1bddf9b 100644 --- a/gix-blame/src/types.rs +++ b/gix-blame/src/types.rs @@ -3,7 +3,43 @@ use std::{ ops::{AddAssign, Range, SubAssign}, }; +use crate::file::function::tokens_for_diffing; use gix_hash::ObjectId; +use gix_object::bstr::BString; + +/// The outcome of [`file()`](crate::file()). +pub struct Outcome { + /// One entry in sequential order, to associate a hunk in the original file with the commit (and its lines) + /// that introduced it. + pub entries: Vec, + /// A buffer with the file content of the *Original File*, ready for tokenization. + pub blob: Vec, +} + +impl Outcome { + /// Return an iterator over each entry in [`Self::entries`], along with its lines, line by line. + /// + /// Note that [`Self::blob`] must be tokenized in exactly the same way as the tokenizer that was used + /// to perform the diffs, which is what this method assures. + pub fn entries_with_lines(&self) -> impl Iterator)> + '_ { + use gix_diff::blob::intern::TokenSource; + let mut interner = gix_diff::blob::intern::Interner::new(self.blob.len() / 100); + let lines_as_tokens: Vec<_> = tokens_for_diffing(&self.blob) + .tokenize() + .map(|token| interner.intern(token)) + .collect(); + self.entries.iter().map(move |e| { + let Range { start, end } = e.range_in_blamed_file.clone(); + ( + e.clone(), + lines_as_tokens[start as usize..end as usize] + .iter() + .map(|token| BString::new(interner[*token].into())) + .collect(), + ) + }) + } +} /// Describes the offset of a particular hunk relative to the *Original File*. #[derive(Clone, Copy, Debug, PartialEq)] @@ -68,11 +104,12 @@ impl SubAssign for Offset { /// Both ranges are of the same size, but may use different [starting points](Range::start). Naturally, /// they have the same content, which is the reason they are in what is returned by [`file()`](crate::file()). // TODO: see if this can be encoded as `start_in_original_file` and `start_in_blamed_file` and a single `len`. -#[derive(Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq)] pub struct BlameEntry { /// The section of tokens in the tokenized version of the *Blamed File* (typically lines). pub range_in_blamed_file: Range, /// The section of tokens in the tokenized version of the *Original File* (typically lines). + // TODO: figure out why this is basically inverted. Probably that's just it - would make sense with `UnblamedHunk` then. pub range_in_original_file: Range, /// The commit that introduced the section into the *Blamed File*. pub commit_id: ObjectId, diff --git a/gix-blame/tests/blame.rs b/gix-blame/tests/blame.rs index 63dc7b2c3a5..fe4660327e9 100644 --- a/gix-blame/tests/blame.rs +++ b/gix-blame/tests/blame.rs @@ -199,7 +199,8 @@ macro_rules! mktest { worktree_path, format!("{}.txt", $case).as_str().into(), ) - .unwrap(); + .unwrap() + .entries; assert_eq!(lines_blamed.len(), $number_of_lines); @@ -259,7 +260,8 @@ fn diff_disparity() { worktree_path, format!("{case}.txt").as_str().into(), ) - .unwrap(); + .unwrap() + .entries; assert_eq!(lines_blamed.len(), 5); From a158d22703077d37b83e0434aa229baf12c342ed Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 23 Dec 2024 21:18:46 +0100 Subject: [PATCH 07/16] remove duplication and unnecessary parameter --- Cargo.lock | 1 - gitoxide-core/src/repository/blame.rs | 17 +----- gix-blame/Cargo.toml | 1 - gix-blame/src/file/function.rs | 78 +++++++++------------------ gix-blame/tests/blame.rs | 6 --- 5 files changed, 28 insertions(+), 75 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f7b2469af2a..ca292869281 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1546,7 +1546,6 @@ dependencies = [ "gix-index 0.37.0", "gix-object 0.46.1", "gix-odb", - "gix-path 0.10.13", "gix-ref 0.49.1", "gix-testtools", "gix-traverse 0.43.1", diff --git a/gitoxide-core/src/repository/blame.rs b/gitoxide-core/src/repository/blame.rs index 2872545ed53..bfb86661be2 100644 --- a/gitoxide-core/src/repository/blame.rs +++ b/gitoxide-core/src/repository/blame.rs @@ -1,7 +1,5 @@ -use std::{ffi::OsStr, path::PathBuf}; - -use anyhow::anyhow; use gix::bstr::BStr; +use std::ffi::OsStr; pub fn blame_file(mut repo: gix::Repository, file: &OsStr, out: impl std::io::Write) -> anyhow::Result<()> { repo.object_cache_size_if_unset(repo.compute_object_cache_size_for_tree_diffs(&**repo.index_or_empty()?)); @@ -11,20 +9,9 @@ pub fn blame_file(mut repo: gix::Repository, file: &OsStr, out: impl std::io::Wr gix::traverse::commit::topo::Builder::from_iters(&repo.objects, [suspect.id], None::>) .build()?; let mut resource_cache = repo.diff_resource_cache_for_tree_diff()?; - - let work_dir: PathBuf = repo - .work_dir() - .ok_or_else(|| anyhow!("blame needs a workdir, but there is none"))? - .into(); let file_path: &BStr = gix::path::os_str_into_bstr(file)?; - let outcome = gix::blame::file( - &repo.objects, - traverse, - &mut resource_cache, - work_dir.clone(), - file_path, - )?; + let outcome = gix::blame::file(&repo.objects, traverse, &mut resource_cache, file_path)?; write_blame_entries(out, outcome)?; Ok(()) diff --git a/gix-blame/Cargo.toml b/gix-blame/Cargo.toml index ce07a774386..ed0018bc9ee 100644 --- a/gix-blame/Cargo.toml +++ b/gix-blame/Cargo.toml @@ -14,7 +14,6 @@ rust-version = "1.65" doctest = false [dependencies] -gix-path = { version = "^0.10.13", path = "../gix-path" } gix-diff = { version = "^0.49.0", path = "../gix-diff", default-features = false, features = ["blob"] } gix-object = { version = "^0.46.0", path = "../gix-object" } gix-hash = { version = "^0.15.0", path = "../gix-hash" } diff --git a/gix-blame/src/file/function.rs b/gix-blame/src/file/function.rs index dcbc56ccdb6..4ec6d24780f 100644 --- a/gix-blame/src/file/function.rs +++ b/gix-blame/src/file/function.rs @@ -1,10 +1,9 @@ +use super::{process_changes, Change, Offset, UnblamedHunk}; +use crate::{BlameEntry, Outcome}; use gix_diff::blob::intern::TokenSource; use gix_hash::ObjectId; use gix_object::{bstr::BStr, FindExt}; -use std::{ops::Range, path::PathBuf}; - -use super::{process_changes, Change, Offset, UnblamedHunk}; -use crate::{BlameEntry, Outcome}; +use std::ops::Range; // TODO: do not instantiate anything, get everything passed as argument. /// Produce a list of consecutive [`BlameEntry`] instances to indicate in which commits the ranges of the file @@ -57,22 +56,16 @@ pub fn file( odb: impl gix_object::Find + gix_object::FindHeader, traverse: impl IntoIterator>, resource_cache: &mut gix_diff::blob::Platform, - // TODO: remove - worktree_root: PathBuf, file_path: &BStr, ) -> Result { - // TODO: `worktree_root` should be removed - read everything from Commit. - // Worktree changes should be placed into a temporary commit. - // TODO: remove this and deduplicate the respective code. - use gix_object::bstr::ByteSlice; - let absolute_path = worktree_root.join(gix_path::from_bstr(file_path)); - let mut traverse = traverse.into_iter().peekable(); let Some(Ok(suspect)) = traverse.peek().map(|res| res.as_ref().map(|item| item.id)) else { todo!("return actual error"); }; - let original_file_blob = std::fs::read(absolute_path).unwrap(); + let (mut buf, mut buf2) = (Vec::new(), Vec::new()); + let original_file_entry = find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2).unwrap(); + let original_file_blob = odb.find_blob(&original_file_entry.oid, &mut buf).unwrap().data.to_vec(); let num_lines_in_original = { let mut interner = gix_diff::blob::intern::Interner::new(original_file_blob.len() / 100); tokens_for_diffing(&original_file_blob) @@ -88,12 +81,11 @@ pub fn file( )]; let mut out = Vec::new(); - let mut buf = Vec::with_capacity(512); 'outer: for item in traverse { let item = item?; let suspect = item.id; - let parent_ids = item.parent_ids; + let mut parent_ids = item.parent_ids; if parent_ids.is_empty() { // I’m not entirely sure if this is correct yet. `suspect`, at this point, is the `id` of // the last `item` that was yielded by `traverse`, so it makes sense to assign the @@ -110,29 +102,13 @@ pub fn file( break; } - let commit_id = odb.find_commit(&suspect, &mut buf).unwrap().tree(); - let tree_iter = odb.find_tree_iter(&commit_id, &mut buf).unwrap(); - - let mut entry_buffer = Vec::new(); - let Some(entry) = tree_iter - .lookup_entry_by_path(&odb, &mut entry_buffer, file_path.to_str().unwrap()) - .unwrap() - else { + let Some(entry) = find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2) else { continue; }; if parent_ids.len() == 1 { - let parent_id: ObjectId = *parent_ids.last().unwrap(); - - let mut buffer = Vec::new(); - let parent_commit_id = odb.find_commit(&parent_id, &mut buffer).unwrap().tree(); - let parent_tree_iter = odb.find_tree_iter(&parent_commit_id, &mut buffer).unwrap(); - - let mut entry_buffer = Vec::new(); - if let Some(parent_entry) = parent_tree_iter - .lookup_entry_by_path(&odb, &mut entry_buffer, file_path.to_str().unwrap()) - .unwrap() - { + let parent_id = parent_ids.pop().expect("just validated there is exactly one"); + if let Some(parent_entry) = find_path_entry_in_commit(&odb, &parent_id, file_path, &mut buf, &mut buf2) { if entry.oid == parent_entry.oid { // The blobs storing the blamed file in `entry` and `parent_entry` are identical // which is why we can pass blame to the parent without further checks. @@ -175,25 +151,8 @@ pub fn file( } } } else { - let mut buffer = Vec::new(); - let commit_id = odb.find_commit(&suspect, &mut buffer).unwrap().tree(); - let tree_iter = odb.find_tree_iter(&commit_id, &mut buffer).unwrap(); - - let mut entry_buffer = Vec::new(); - let entry = tree_iter - .lookup_entry_by_path(&odb, &mut entry_buffer, file_path.to_str().unwrap()) - .unwrap() - .unwrap(); - for parent_id in &parent_ids { - let mut buffer = Vec::new(); - let parent_commit_id = odb.find_commit(parent_id, &mut buffer).unwrap().tree(); - let parent_tree_iter = odb.find_tree_iter(&parent_commit_id, &mut buffer).unwrap(); - - let mut entry_buffer = Vec::new(); - if let Some(parent_entry) = parent_tree_iter - .lookup_entry_by_path(&odb, &mut entry_buffer, file_path.to_str().unwrap()) - .unwrap() + if let Some(parent_entry) = find_path_entry_in_commit(&odb, &parent_id, file_path, &mut buf, &mut buf2) { if entry.oid == parent_entry.oid { // The blobs storing the blamed file in `entry` and `parent_entry` are @@ -435,6 +394,21 @@ fn blob_changes( gix_diff::blob::diff(gix_diff::blob::Algorithm::Histogram, &input, change_recorder) } +fn find_path_entry_in_commit( + odb: &impl gix_object::Find, + commit: &gix_hash::oid, + file_path: &BStr, + buf: &mut Vec, + buf2: &mut Vec, +) -> Option { + let commit_id = odb.find_commit(commit, buf).unwrap().tree(); + let tree_iter = odb.find_tree_iter(&commit_id, buf).unwrap(); + + tree_iter + .lookup_entry(odb, buf2, file_path.split(|b| *b == b'/')) + .unwrap() +} + /// Return an iterator over tokens for use in diffing. These usually lines, but iit's important to unify them /// so the later access shows the right thing. pub(crate) fn tokens_for_diffing(data: &[u8]) -> impl TokenSource { diff --git a/gix-blame/tests/blame.rs b/gix-blame/tests/blame.rs index fe4660327e9..08664e9045e 100644 --- a/gix-blame/tests/blame.rs +++ b/gix-blame/tests/blame.rs @@ -108,7 +108,6 @@ mod baseline { } struct Fixture { - worktree_path: PathBuf, odb: gix_odb::Handle, resource_cache: gix_diff::blob::Platform, commits: Vec>, @@ -174,7 +173,6 @@ impl Fixture { ); Ok(Fixture { odb, - worktree_path, resource_cache, commits, }) @@ -186,7 +184,6 @@ macro_rules! mktest { #[test] fn $name() { let Fixture { - worktree_path, odb, mut resource_cache, commits, @@ -196,7 +193,6 @@ macro_rules! mktest { &odb, commits, &mut resource_cache, - worktree_path, format!("{}.txt", $case).as_str().into(), ) .unwrap() @@ -247,7 +243,6 @@ mktest!(file_only_changed_in_branch, "file-only-changed-in-branch", 2); fn diff_disparity() { for case in ["empty-lines-myers", "empty-lines-histogram"] { let Fixture { - worktree_path, odb, mut resource_cache, commits, @@ -257,7 +252,6 @@ fn diff_disparity() { &odb, commits, &mut resource_cache, - worktree_path, format!("{case}.txt").as_str().into(), ) .unwrap() From 4ffe6eb8f7921c6a03db0aa6d796cc2e3cc328e0 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 24 Dec 2024 14:59:33 +0100 Subject: [PATCH 08/16] feat: Add support for statistics and additional performance information. --- Cargo.lock | 1 + gitoxide-core/src/repository/blame.rs | 12 +++- gix-blame/Cargo.toml | 1 + gix-blame/src/file/function.rs | 99 ++++++++++++++++++--------- gix-blame/src/lib.rs | 2 +- gix-blame/src/types.rs | 19 +++++ src/plumbing/main.rs | 6 +- src/plumbing/options/mod.rs | 4 ++ 8 files changed, 106 insertions(+), 38 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ca292869281..2df79331bb8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1548,6 +1548,7 @@ dependencies = [ "gix-odb", "gix-ref 0.49.1", "gix-testtools", + "gix-trace 0.1.11", "gix-traverse 0.43.1", "gix-worktree 0.38.0", ] diff --git a/gitoxide-core/src/repository/blame.rs b/gitoxide-core/src/repository/blame.rs index bfb86661be2..d33d34f3c4a 100644 --- a/gitoxide-core/src/repository/blame.rs +++ b/gitoxide-core/src/repository/blame.rs @@ -1,19 +1,29 @@ use gix::bstr::BStr; use std::ffi::OsStr; -pub fn blame_file(mut repo: gix::Repository, file: &OsStr, out: impl std::io::Write) -> anyhow::Result<()> { +pub fn blame_file( + mut repo: gix::Repository, + file: &OsStr, + out: impl std::io::Write, + err: Option<&mut dyn std::io::Write>, +) -> anyhow::Result<()> { repo.object_cache_size_if_unset(repo.compute_object_cache_size_for_tree_diffs(&**repo.index_or_empty()?)); let suspect = repo.head()?.peel_to_commit_in_place()?; let traverse = gix::traverse::commit::topo::Builder::from_iters(&repo.objects, [suspect.id], None::>) + .with_commit_graph(repo.commit_graph_if_enabled()?) .build()?; let mut resource_cache = repo.diff_resource_cache_for_tree_diff()?; let file_path: &BStr = gix::path::os_str_into_bstr(file)?; let outcome = gix::blame::file(&repo.objects, traverse, &mut resource_cache, file_path)?; + let statistics = outcome.statistics; write_blame_entries(out, outcome)?; + if let Some(err) = err { + writeln!(err, "{statistics:#?}")?; + } Ok(()) } diff --git a/gix-blame/Cargo.toml b/gix-blame/Cargo.toml index ed0018bc9ee..039f4788bfd 100644 --- a/gix-blame/Cargo.toml +++ b/gix-blame/Cargo.toml @@ -14,6 +14,7 @@ rust-version = "1.65" doctest = false [dependencies] +gix-trace = { version = "^0.1.11", path = "../gix-trace" } gix-diff = { version = "^0.49.0", path = "../gix-diff", default-features = false, features = ["blob"] } gix-object = { version = "^0.46.0", path = "../gix-object" } gix-hash = { version = "^0.15.0", path = "../gix-hash" } diff --git a/gix-blame/src/file/function.rs b/gix-blame/src/file/function.rs index 4ec6d24780f..b869b81a726 100644 --- a/gix-blame/src/file/function.rs +++ b/gix-blame/src/file/function.rs @@ -1,5 +1,5 @@ use super::{process_changes, Change, Offset, UnblamedHunk}; -use crate::{BlameEntry, Outcome}; +use crate::{BlameEntry, Outcome, Statistics}; use gix_diff::blob::intern::TokenSource; use gix_hash::ObjectId; use gix_object::{bstr::BStr, FindExt}; @@ -62,9 +62,12 @@ pub fn file( let Some(Ok(suspect)) = traverse.peek().map(|res| res.as_ref().map(|item| item.id)) else { todo!("return actual error"); }; + let _span = gix_trace::coarse!("gix_blame::file()", ?file_path, ?suspect); - let (mut buf, mut buf2) = (Vec::new(), Vec::new()); - let original_file_entry = find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2).unwrap(); + let mut stats = Statistics::default(); + let (mut buf, mut buf2, mut buf3) = (Vec::new(), Vec::new(), Vec::new()); + let original_file_entry = + find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2, &mut stats).unwrap(); let original_file_blob = odb.find_blob(&original_file_entry.oid, &mut buf).unwrap().data.to_vec(); let num_lines_in_original = { let mut interner = gix_diff::blob::intern::Interner::new(original_file_blob.len() / 100); @@ -81,9 +84,11 @@ pub fn file( )]; let mut out = Vec::new(); + let mut diff_state = gix_diff::tree::State::default(); 'outer: for item in traverse { let item = item?; let suspect = item.id; + stats.commits_traversed += 1; let mut parent_ids = item.parent_ids; if parent_ids.is_empty() { @@ -102,13 +107,15 @@ pub fn file( break; } - let Some(entry) = find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2) else { + let Some(entry) = find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2, &mut stats) else { continue; }; if parent_ids.len() == 1 { let parent_id = parent_ids.pop().expect("just validated there is exactly one"); - if let Some(parent_entry) = find_path_entry_in_commit(&odb, &parent_id, file_path, &mut buf, &mut buf2) { + if let Some(parent_entry) = + find_path_entry_in_commit(&odb, &parent_id, file_path, &mut buf, &mut buf2, &mut stats) + { if entry.oid == parent_entry.oid { // The blobs storing the blamed file in `entry` and `parent_entry` are identical // which is why we can pass blame to the parent without further checks. @@ -119,7 +126,17 @@ pub fn file( } } - let Some(modification) = tree_diff_at_file_path(&odb, file_path, item.id, parent_id) else { + let Some(modification) = tree_diff_at_file_path( + &odb, + file_path, + item.id, + parent_id, + &mut stats, + &mut diff_state, + &mut buf, + &mut buf2, + &mut buf3, + ) else { // None of the changes affected the file we’re currently blaming. Pass blame to parent. for unblamed_hunk in &mut hunks_to_blame { unblamed_hunk.pass_blame(suspect, parent_id); @@ -142,8 +159,7 @@ pub fn file( } gix_diff::tree::recorder::Change::Deletion { .. } => todo!(), gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => { - let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path); - + let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path, &mut stats); hunks_to_blame = process_changes(&mut out, hunks_to_blame, changes, suspect); for unblamed_hunk in &mut hunks_to_blame { unblamed_hunk.pass_blame(suspect, parent_id); @@ -152,7 +168,8 @@ pub fn file( } } else { for parent_id in &parent_ids { - if let Some(parent_entry) = find_path_entry_in_commit(&odb, &parent_id, file_path, &mut buf, &mut buf2) + if let Some(parent_entry) = + find_path_entry_in_commit(&odb, parent_id, file_path, &mut buf, &mut buf2, &mut stats) { if entry.oid == parent_entry.oid { // The blobs storing the blamed file in `entry` and `parent_entry` are @@ -167,7 +184,17 @@ pub fn file( } for parent_id in parent_ids { - let changes_for_file_path = tree_diff_at_file_path(&odb, file_path, item.id, parent_id); + let changes_for_file_path = tree_diff_at_file_path( + &odb, + file_path, + item.id, + parent_id, + &mut stats, + &mut diff_state, + &mut buf, + &mut buf2, + &mut buf3, + ); let Some(modification) = changes_for_file_path else { // None of the changes affected the file we’re currently blaming. Pass blame // to parent. @@ -188,8 +215,7 @@ pub fn file( } gix_diff::tree::recorder::Change::Deletion { .. } => todo!(), gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => { - let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path); - + let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path, &mut stats); hunks_to_blame = process_changes(&mut out, hunks_to_blame, changes, suspect); for unblamed_hunk in &mut hunks_to_blame { unblamed_hunk.pass_blame(suspect, parent_id); @@ -215,6 +241,7 @@ pub fn file( Ok(Outcome { entries: coalesce_blame_entries(out), blob: original_file_blob, + statistics: stats, }) } @@ -262,42 +289,37 @@ fn coalesce_blame_entries(lines_blamed: Vec) -> Vec { }) } +#[allow(clippy::too_many_arguments)] fn tree_diff_at_file_path( odb: impl gix_object::Find + gix_object::FindHeader, file_path: &BStr, id: ObjectId, parent_id: ObjectId, + stats: &mut Statistics, + state: &mut gix_diff::tree::State, + commit_buf: &mut Vec, + lhs_tree_buf: &mut Vec, + rhs_tree_buf: &mut Vec, ) -> Option { - let mut buffer = Vec::new(); + let parent_tree = odb.find_commit(&parent_id, commit_buf).unwrap().tree(); + stats.commits_to_tree += 1; - let parent = odb.find_commit(&parent_id, &mut buffer).unwrap(); - - let mut buffer = Vec::new(); let parent_tree_iter = odb - .find(&parent.tree(), &mut buffer) + .find(&parent_tree, lhs_tree_buf) .unwrap() .try_into_tree_iter() .unwrap(); + stats.trees_decoded += 1; - let mut buffer = Vec::new(); - let commit = odb.find_commit(&id, &mut buffer).unwrap(); + let tree_id = odb.find_commit(&id, commit_buf).unwrap().tree(); + stats.commits_to_tree += 1; - let mut buffer = Vec::new(); - let tree_iter = odb - .find(&commit.tree(), &mut buffer) - .unwrap() - .try_into_tree_iter() - .unwrap(); + let tree_iter = odb.find(&tree_id, rhs_tree_buf).unwrap().try_into_tree_iter().unwrap(); + stats.trees_decoded += 1; let mut recorder = gix_diff::tree::Recorder::default(); - gix_diff::tree( - parent_tree_iter, - tree_iter, - gix_diff::tree::State::default(), - &odb, - &mut recorder, - ) - .unwrap(); + gix_diff::tree(parent_tree_iter, tree_iter, state, &odb, &mut recorder).unwrap(); + stats.trees_diffed += 1; recorder.records.into_iter().find(|change| match change { gix_diff::tree::recorder::Change::Modification { path, .. } => path == file_path, @@ -312,6 +334,7 @@ fn blob_changes( oid: ObjectId, previous_oid: ObjectId, file_path: &BStr, + stats: &mut Statistics, ) -> Vec { /// Record all [`Change`]s to learn about additions, deletions and unchanged portions of a *Blamed File*. struct ChangeRecorder { @@ -391,6 +414,7 @@ fn blob_changes( let number_of_lines_in_destination = input.after.len(); let change_recorder = ChangeRecorder::new(number_of_lines_in_destination.try_into().unwrap()); + stats.blobs_diffed += 1; gix_diff::blob::diff(gix_diff::blob::Algorithm::Histogram, &input, change_recorder) } @@ -400,12 +424,19 @@ fn find_path_entry_in_commit( file_path: &BStr, buf: &mut Vec, buf2: &mut Vec, + stats: &mut Statistics, ) -> Option { let commit_id = odb.find_commit(commit, buf).unwrap().tree(); let tree_iter = odb.find_tree_iter(&commit_id, buf).unwrap(); + stats.commits_to_tree += 1; + stats.trees_decoded += 1; tree_iter - .lookup_entry(odb, buf2, file_path.split(|b| *b == b'/')) + .lookup_entry( + odb, + buf2, + file_path.split(|b| *b == b'/').inspect(|_| stats.trees_decoded += 1), + ) .unwrap() } diff --git a/gix-blame/src/lib.rs b/gix-blame/src/lib.rs index 7a7ebbcc24b..6ea0a3c61e5 100644 --- a/gix-blame/src/lib.rs +++ b/gix-blame/src/lib.rs @@ -15,7 +15,7 @@ #![forbid(unsafe_code)] mod types; -pub use types::{BlameEntry, Outcome}; +pub use types::{BlameEntry, Outcome, Statistics}; mod file; pub use file::function::file; diff --git a/gix-blame/src/types.rs b/gix-blame/src/types.rs index 3def1bddf9b..6dea55be399 100644 --- a/gix-blame/src/types.rs +++ b/gix-blame/src/types.rs @@ -8,12 +8,31 @@ use gix_hash::ObjectId; use gix_object::bstr::BString; /// The outcome of [`file()`](crate::file()). +#[derive(Debug, Clone)] pub struct Outcome { /// One entry in sequential order, to associate a hunk in the original file with the commit (and its lines) /// that introduced it. pub entries: Vec, /// A buffer with the file content of the *Original File*, ready for tokenization. pub blob: Vec, + /// Additional information about the amount of work performed to produce the blame. + pub statistics: Statistics, +} + +/// Additional information about the performed operations. +#[derive(Debug, Default, Copy, Clone)] +pub struct Statistics { + /// The amount of commits it traversed until the blame was complete. + pub commits_traversed: usize, + /// The amount of commits whose trees were extracted. + pub commits_to_tree: usize, + /// The amount of trees that were decoded to find the entry of the file to blame. + pub trees_decoded: usize, + /// The amount of fully-fledged tree-diffs to see if the filepath was added, deleted or modified. + pub trees_diffed: usize, + /// The amount of blobs there were compared to each other to learn what changed between commits. + /// Note that in order to diff a blob, one needs to load both versions from the database. + pub blobs_diffed: usize, } impl Outcome { diff --git a/src/plumbing/main.rs b/src/plumbing/main.rs index 20c0db777cc..625f9733268 100644 --- a/src/plumbing/main.rs +++ b/src/plumbing/main.rs @@ -1533,14 +1533,16 @@ pub fn main() -> Result<()> { }, ), }, - Subcommands::Blame { file } => prepare_and_run( + Subcommands::Blame { statistics, file } => prepare_and_run( "blame", trace, verbose, progress, progress_keep_open, None, - move |_progress, out, _err| core::repository::blame::blame_file(repository(Mode::Lenient)?, &file, out), + move |_progress, out, err| { + core::repository::blame::blame_file(repository(Mode::Lenient)?, &file, out, statistics.then_some(err)) + }, ), Subcommands::Completions { shell, out_dir } => { let mut app = Args::command(); diff --git a/src/plumbing/options/mod.rs b/src/plumbing/options/mod.rs index 99d66a39861..a1f37b08e13 100644 --- a/src/plumbing/options/mod.rs +++ b/src/plumbing/options/mod.rs @@ -153,6 +153,10 @@ pub enum Subcommands { Free(free::Subcommands), /// Blame lines in a file Blame { + /// Print additional statistics to help understanding performance. + #[clap(long, short = 's')] + statistics: bool, + /// The file to create the blame information for. file: std::ffi::OsString, }, /// Generate shell completions to stdout or a directory. From 845d96a4ffff89703a8c3815ac52adc7f2b286f6 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 24 Dec 2024 16:14:16 +0100 Subject: [PATCH 09/16] add `Error` type --- Cargo.lock | 1 + gix-blame/Cargo.toml | 2 + gix-blame/src/error.rs | 30 ++++++++ gix-blame/src/file/function.rs | 122 ++++++++++++++++----------------- gix-blame/src/file/mod.rs | 15 +++- gix-blame/src/lib.rs | 2 + gix-blame/tests/blame.rs | 18 ++--- 7 files changed, 117 insertions(+), 73 deletions(-) create mode 100644 gix-blame/src/error.rs diff --git a/Cargo.lock b/Cargo.lock index 2df79331bb8..365fb927fa5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1551,6 +1551,7 @@ dependencies = [ "gix-trace 0.1.11", "gix-traverse 0.43.1", "gix-worktree 0.38.0", + "thiserror 2.0.3", ] [[package]] diff --git a/gix-blame/Cargo.toml b/gix-blame/Cargo.toml index 039f4788bfd..fc4baf3fe48 100644 --- a/gix-blame/Cargo.toml +++ b/gix-blame/Cargo.toml @@ -21,6 +21,8 @@ gix-hash = { version = "^0.15.0", path = "../gix-hash" } gix-worktree = { version = "^0.38.0", path = "../gix-worktree", default-features = false, features = ["attributes"] } gix-traverse = { version = "^0.43.0", path = "../gix-traverse" } +thiserror = "2.0.0" + [dev-dependencies] gix-ref = { version = "^0.49.0", path = "../gix-ref" } gix-filter = { version = "^0.16.0", path = "../gix-filter" } diff --git a/gix-blame/src/error.rs b/gix-blame/src/error.rs new file mode 100644 index 00000000000..daedf0aecd7 --- /dev/null +++ b/gix-blame/src/error.rs @@ -0,0 +1,30 @@ +use gix_object::bstr::BString; + +/// The error returned by [file()](crate::file()). +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error("No commit was given")] + EmptyTraversal, + #[error(transparent)] + BlobDiffSetResource(#[from] gix_diff::blob::platform::set_resource::Error), + #[error(transparent)] + BlobDiffPrepare(#[from] gix_diff::blob::platform::prepare_diff::Error), + #[error("The file to blame at '{file_path}' wasn't found in the first commit at {commit_id}")] + FileMissing { + /// The file-path to the object to blame. + file_path: BString, + /// The commit whose tree didn't contain `file_path`. + commit_id: gix_hash::ObjectId, + }, + #[error("Couldn't find commit or tree in the object database")] + FindObject(#[from] gix_object::find::Error), + #[error("Could not find existing blob or commit")] + FindExistingObject(#[from] gix_object::find::existing_object::Error), + #[error("Could not find existing iterator over a tree")] + FindExistingIter(#[from] gix_object::find::existing_iter::Error), + #[error("Failed to obtain the next commit in the commit-graph traversal")] + Traverse(#[source] Box), + #[error(transparent)] + DiffTree(#[from] gix_diff::tree::Error), +} diff --git a/gix-blame/src/file/function.rs b/gix-blame/src/file/function.rs index b869b81a726..5b3ccd7cc05 100644 --- a/gix-blame/src/file/function.rs +++ b/gix-blame/src/file/function.rs @@ -1,5 +1,5 @@ use super::{process_changes, Change, Offset, UnblamedHunk}; -use crate::{BlameEntry, Outcome, Statistics}; +use crate::{BlameEntry, Error, Outcome, Statistics}; use gix_diff::blob::intern::TokenSource; use gix_hash::ObjectId; use gix_object::{bstr::BStr, FindExt}; @@ -57,18 +57,24 @@ pub fn file( traverse: impl IntoIterator>, resource_cache: &mut gix_diff::blob::Platform, file_path: &BStr, -) -> Result { +) -> Result +where + E: Into>, +{ let mut traverse = traverse.into_iter().peekable(); let Some(Ok(suspect)) = traverse.peek().map(|res| res.as_ref().map(|item| item.id)) else { - todo!("return actual error"); + return Err(Error::EmptyTraversal); }; let _span = gix_trace::coarse!("gix_blame::file()", ?file_path, ?suspect); let mut stats = Statistics::default(); let (mut buf, mut buf2, mut buf3) = (Vec::new(), Vec::new(), Vec::new()); - let original_file_entry = - find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2, &mut stats).unwrap(); - let original_file_blob = odb.find_blob(&original_file_entry.oid, &mut buf).unwrap().data.to_vec(); + let original_file_entry = find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2, &mut stats)? + .ok_or_else(|| Error::FileMissing { + file_path: file_path.to_owned(), + commit_id: suspect, + })?; + let original_file_blob = odb.find_blob(&original_file_entry.oid, &mut buf)?.data.to_vec(); let num_lines_in_original = { let mut interner = gix_diff::blob::intern::Interner::new(original_file_blob.len() / 100); tokens_for_diffing(&original_file_blob) @@ -78,7 +84,7 @@ pub fn file( }; let mut hunks_to_blame = vec![UnblamedHunk::new( - 0..num_lines_in_original.try_into().unwrap(), + 0..num_lines_in_original as u32, suspect, Offset::Added(0), )]; @@ -86,7 +92,7 @@ pub fn file( let mut out = Vec::new(); let mut diff_state = gix_diff::tree::State::default(); 'outer: for item in traverse { - let item = item?; + let item = item.map_err(|err| Error::Traverse(err.into()))?; let suspect = item.id; stats.commits_traversed += 1; @@ -107,14 +113,14 @@ pub fn file( break; } - let Some(entry) = find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2, &mut stats) else { + let Some(entry) = find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2, &mut stats)? else { continue; }; if parent_ids.len() == 1 { let parent_id = parent_ids.pop().expect("just validated there is exactly one"); if let Some(parent_entry) = - find_path_entry_in_commit(&odb, &parent_id, file_path, &mut buf, &mut buf2, &mut stats) + find_path_entry_in_commit(&odb, &parent_id, file_path, &mut buf, &mut buf2, &mut stats)? { if entry.oid == parent_entry.oid { // The blobs storing the blamed file in `entry` and `parent_entry` are identical @@ -126,7 +132,7 @@ pub fn file( } } - let Some(modification) = tree_diff_at_file_path( + let changes_for_file_path = tree_diff_at_file_path( &odb, file_path, item.id, @@ -136,7 +142,8 @@ pub fn file( &mut buf, &mut buf2, &mut buf3, - ) else { + )?; + let Some(modification) = changes_for_file_path else { // None of the changes affected the file we’re currently blaming. Pass blame to parent. for unblamed_hunk in &mut hunks_to_blame { unblamed_hunk.pass_blame(suspect, parent_id); @@ -159,7 +166,7 @@ pub fn file( } gix_diff::tree::recorder::Change::Deletion { .. } => todo!(), gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => { - let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path, &mut stats); + let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path, &mut stats)?; hunks_to_blame = process_changes(&mut out, hunks_to_blame, changes, suspect); for unblamed_hunk in &mut hunks_to_blame { unblamed_hunk.pass_blame(suspect, parent_id); @@ -169,7 +176,7 @@ pub fn file( } else { for parent_id in &parent_ids { if let Some(parent_entry) = - find_path_entry_in_commit(&odb, parent_id, file_path, &mut buf, &mut buf2, &mut stats) + find_path_entry_in_commit(&odb, parent_id, file_path, &mut buf, &mut buf2, &mut stats)? { if entry.oid == parent_entry.oid { // The blobs storing the blamed file in `entry` and `parent_entry` are @@ -194,7 +201,7 @@ pub fn file( &mut buf, &mut buf2, &mut buf3, - ); + )?; let Some(modification) = changes_for_file_path else { // None of the changes affected the file we’re currently blaming. Pass blame // to parent. @@ -215,7 +222,7 @@ pub fn file( } gix_diff::tree::recorder::Change::Deletion { .. } => todo!(), gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => { - let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path, &mut stats); + let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path, &mut stats)?; hunks_to_blame = process_changes(&mut out, hunks_to_blame, changes, suspect); for unblamed_hunk in &mut hunks_to_blame { unblamed_hunk.pass_blame(suspect, parent_id); @@ -300,32 +307,28 @@ fn tree_diff_at_file_path( commit_buf: &mut Vec, lhs_tree_buf: &mut Vec, rhs_tree_buf: &mut Vec, -) -> Option { - let parent_tree = odb.find_commit(&parent_id, commit_buf).unwrap().tree(); +) -> Result, Error> { + let parent_tree = odb.find_commit(&parent_id, commit_buf)?.tree(); stats.commits_to_tree += 1; - let parent_tree_iter = odb - .find(&parent_tree, lhs_tree_buf) - .unwrap() - .try_into_tree_iter() - .unwrap(); + let parent_tree_iter = odb.find_tree_iter(&parent_tree, lhs_tree_buf)?; stats.trees_decoded += 1; - let tree_id = odb.find_commit(&id, commit_buf).unwrap().tree(); + let tree_id = odb.find_commit(&id, commit_buf)?.tree(); stats.commits_to_tree += 1; - let tree_iter = odb.find(&tree_id, rhs_tree_buf).unwrap().try_into_tree_iter().unwrap(); + let tree_iter = odb.find_tree_iter(&tree_id, rhs_tree_buf)?; stats.trees_decoded += 1; let mut recorder = gix_diff::tree::Recorder::default(); - gix_diff::tree(parent_tree_iter, tree_iter, state, &odb, &mut recorder).unwrap(); + gix_diff::tree(parent_tree_iter, tree_iter, state, &odb, &mut recorder)?; stats.trees_diffed += 1; - recorder.records.into_iter().find(|change| match change { + Ok(recorder.records.into_iter().find(|change| match change { gix_diff::tree::recorder::Change::Modification { path, .. } => path == file_path, gix_diff::tree::recorder::Change::Addition { path, .. } => path == file_path, gix_diff::tree::recorder::Change::Deletion { path, .. } => path == file_path, - }) + })) } fn blob_changes( @@ -335,7 +338,7 @@ fn blob_changes( previous_oid: ObjectId, file_path: &BStr, stats: &mut Statistics, -) -> Vec { +) -> Result, Error> { /// Record all [`Change`]s to learn about additions, deletions and unchanged portions of a *Blamed File*. struct ChangeRecorder { last_seen_after_end: u32, @@ -387,35 +390,32 @@ fn blob_changes( } } - resource_cache - .set_resource( - previous_oid, - gix_object::tree::EntryKind::Blob, - file_path, - gix_diff::blob::ResourceKind::OldOrSource, - &odb, - ) - .unwrap(); - resource_cache - .set_resource( - oid, - gix_object::tree::EntryKind::Blob, - file_path, - gix_diff::blob::ResourceKind::NewOrDestination, - &odb, - ) - .unwrap(); - - let outcome = resource_cache.prepare_diff().unwrap(); + resource_cache.set_resource( + previous_oid, + gix_object::tree::EntryKind::Blob, + file_path, + gix_diff::blob::ResourceKind::OldOrSource, + &odb, + )?; + resource_cache.set_resource( + oid, + gix_object::tree::EntryKind::Blob, + file_path, + gix_diff::blob::ResourceKind::NewOrDestination, + &odb, + )?; + + let outcome = resource_cache.prepare_diff()?; let input = gix_diff::blob::intern::InternedInput::new( tokens_for_diffing(outcome.old.data.as_slice().unwrap_or_default()), tokens_for_diffing(outcome.new.data.as_slice().unwrap_or_default()), ); let number_of_lines_in_destination = input.after.len(); - let change_recorder = ChangeRecorder::new(number_of_lines_in_destination.try_into().unwrap()); + let change_recorder = ChangeRecorder::new(number_of_lines_in_destination as u32); + let res = gix_diff::blob::diff(gix_diff::blob::Algorithm::Histogram, &input, change_recorder); stats.blobs_diffed += 1; - gix_diff::blob::diff(gix_diff::blob::Algorithm::Histogram, &input, change_recorder) + Ok(res) } fn find_path_entry_in_commit( @@ -425,19 +425,19 @@ fn find_path_entry_in_commit( buf: &mut Vec, buf2: &mut Vec, stats: &mut Statistics, -) -> Option { - let commit_id = odb.find_commit(commit, buf).unwrap().tree(); - let tree_iter = odb.find_tree_iter(&commit_id, buf).unwrap(); +) -> Result, Error> { + let commit_id = odb.find_commit(commit, buf)?.tree(); stats.commits_to_tree += 1; + let tree_iter = odb.find_tree_iter(&commit_id, buf)?; stats.trees_decoded += 1; - tree_iter - .lookup_entry( - odb, - buf2, - file_path.split(|b| *b == b'/').inspect(|_| stats.trees_decoded += 1), - ) - .unwrap() + let res = tree_iter.lookup_entry( + odb, + buf2, + file_path.split(|b| *b == b'/').inspect(|_| stats.trees_decoded += 1), + )?; + stats.trees_decoded -= 1; + Ok(res) } /// Return an iterator over tokens for use in diffing. These usually lines, but iit's important to unify them diff --git a/gix-blame/src/file/mod.rs b/gix-blame/src/file/mod.rs index cdc270bf752..7132af04048 100644 --- a/gix-blame/src/file/mod.rs +++ b/gix-blame/src/file/mod.rs @@ -216,7 +216,10 @@ fn process_change( } } (Some(hunk), Some(Change::Deleted(line_number_in_destination, number_of_lines_deleted))) => { - let range_in_suspect = hunk.suspects.get(&suspect).expect("TODO"); + let range_in_suspect = hunk + .suspects + .get(&suspect) + .expect("Internal and we know suspect is present"); if line_number_in_destination < range_in_suspect.start { // <---> (hunk) @@ -377,7 +380,10 @@ impl UnblamedHunk { } fn offset_for(&self, suspect: ObjectId) -> Offset { - let range_in_suspect = self.suspects.get(&suspect).expect("TODO"); + let range_in_suspect = self + .suspects + .get(&suspect) + .expect("Internal and we know suspect is present"); if self.range_in_blamed_file.start > range_in_suspect.start { Offset::Added(self.range_in_blamed_file.start - range_in_suspect.start) @@ -437,7 +443,10 @@ impl BlameEntry { /// Create an offset from a portion of the *Original File*. fn from_unblamed_hunk(unblamed_hunk: &UnblamedHunk, commit_id: ObjectId) -> Self { - let range_in_original_file = unblamed_hunk.suspects.get(&commit_id).unwrap(); + let range_in_original_file = unblamed_hunk + .suspects + .get(&commit_id) + .expect("Private and only called when we now `commit_id` is in the suspect list"); Self { range_in_blamed_file: unblamed_hunk.range_in_blamed_file.clone(), diff --git a/gix-blame/src/lib.rs b/gix-blame/src/lib.rs index 6ea0a3c61e5..c504835e50e 100644 --- a/gix-blame/src/lib.rs +++ b/gix-blame/src/lib.rs @@ -14,6 +14,8 @@ #![deny(rust_2018_idioms, missing_docs)] #![forbid(unsafe_code)] +mod error; +pub use error::Error; mod types; pub use types::{BlameEntry, Outcome, Statistics}; diff --git a/gix-blame/tests/blame.rs b/gix-blame/tests/blame.rs index 08664e9045e..73c2597933f 100644 --- a/gix-blame/tests/blame.rs +++ b/gix-blame/tests/blame.rs @@ -182,29 +182,29 @@ impl Fixture { macro_rules! mktest { ($name:ident, $case:expr, $number_of_lines:literal) => { #[test] - fn $name() { + fn $name() -> gix_testtools::Result<()> { let Fixture { odb, mut resource_cache, commits, - } = Fixture::new().unwrap(); + } = Fixture::new()?; let lines_blamed = gix_blame::file( &odb, commits, &mut resource_cache, format!("{}.txt", $case).as_str().into(), - ) - .unwrap() + )? .entries; assert_eq!(lines_blamed.len(), $number_of_lines); let git_dir = fixture_path().join(".git"); - let baseline = Baseline::collect(git_dir.join(format!("{}.baseline", $case))).unwrap(); + let baseline = Baseline::collect(git_dir.join(format!("{}.baseline", $case)))?; assert_eq!(baseline.len(), $number_of_lines); assert_eq!(lines_blamed, baseline); + Ok(()) } }; } @@ -235,11 +235,11 @@ mktest!( ); mktest!(file_only_changed_in_branch, "file-only-changed-in-branch", 2); +/// As of 2024-09-24, these tests are expected to fail. +/// +/// Context: https://github.com/Byron/gitoxide/pull/1453#issuecomment-2371013904 #[test] -#[ignore = "TBD: figure out what the problem is"] -// As of 2024-09-24, these tests are expected to fail. -// -// Context: https://github.com/Byron/gitoxide/pull/1453#issuecomment-2371013904 +#[should_panic = "empty-lines-myers"] fn diff_disparity() { for case in ["empty-lines-myers", "empty-lines-histogram"] { let Fixture { From b736ace18e8996b410a597fb4f43bf28f422dfc5 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 24 Dec 2024 16:48:56 +0100 Subject: [PATCH 10/16] Replace todos!() with assertions or remove them. --- crate-status.md | 1 + gix-blame/src/file/function.rs | 163 +++++++++++++-------------------- gix-blame/src/file/mod.rs | 27 +++--- 3 files changed, 76 insertions(+), 115 deletions(-) diff --git a/crate-status.md b/crate-status.md index 44fb8d7c4d1..64cae232207 100644 --- a/crate-status.md +++ b/crate-status.md @@ -371,6 +371,7 @@ Check out the [performance discussion][gix-diff-performance] as well. - [ ] shallow-history support - [ ] rename tracking (track different paths through history) - [ ] commits to ignore +- [ ] pass all blame-cornercases (from Git) * **Performance-Improvements** - [ ] use commit-graph bloom filter for performance - [ ] traverse input-commits in correct order without `compute_indegrees_to_depth()` diff --git a/gix-blame/src/file/function.rs b/gix-blame/src/file/function.rs index 5b3ccd7cc05..0a18ee60e96 100644 --- a/gix-blame/src/file/function.rs +++ b/gix-blame/src/file/function.rs @@ -91,51 +91,48 @@ where let mut out = Vec::new(); let mut diff_state = gix_diff::tree::State::default(); - 'outer: for item in traverse { - let item = item.map_err(|err| Error::Traverse(err.into()))?; - let suspect = item.id; + 'outer: while let Some(item) = traverse.next() { + let commit = item.map_err(|err| Error::Traverse(err.into()))?; + let suspect = commit.id; stats.commits_traversed += 1; - let mut parent_ids = item.parent_ids; + let parent_ids = commit.parent_ids; if parent_ids.is_empty() { - // I’m not entirely sure if this is correct yet. `suspect`, at this point, is the `id` of - // the last `item` that was yielded by `traverse`, so it makes sense to assign the - // remaining lines to it, even though we don’t explicitly check whether that is true - // here. We could perhaps use `needed_to_obtain` to compare `suspect` against an empty - // tree to validate this assumption. - out.extend( - hunks_to_blame - .iter() - .map(|hunk| BlameEntry::from_unblamed_hunk(hunk, suspect)), - ); - - hunks_to_blame.clear(); - break; + if traverse.peek().is_none() { + // I’m not entirely sure if this is correct yet. `suspect`, at this point, is the `id` of + // the last `item` that was yielded by `traverse`, so it makes sense to assign the + // remaining lines to it, even though we don’t explicitly check whether that is true + // here. We could perhaps use diff-tree-to-tree to compare `suspect` + // against an empty tree to validate this assumption. + unblamed_to_out(&mut hunks_to_blame, &mut out, suspect); + break; + } else { + // There is more, keep looking. + continue; + } } let Some(entry) = find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2, &mut stats)? else { continue; }; - if parent_ids.len() == 1 { - let parent_id = parent_ids.pop().expect("just validated there is exactly one"); + for parent_id in &parent_ids { if let Some(parent_entry) = - find_path_entry_in_commit(&odb, &parent_id, file_path, &mut buf, &mut buf2, &mut stats)? + find_path_entry_in_commit(&odb, parent_id, file_path, &mut buf, &mut buf2, &mut stats)? { if entry.oid == parent_entry.oid { - // The blobs storing the blamed file in `entry` and `parent_entry` are identical - // which is why we can pass blame to the parent without further checks. - for unblamed_hunk in &mut hunks_to_blame { - unblamed_hunk.pass_blame(suspect, parent_id); - } - continue; + pass_blame_from_to(suspect, *parent_id, &mut hunks_to_blame); + continue 'outer; } } + } + let more_than_one_parent = parent_ids.len() > 1; + for parent_id in parent_ids { let changes_for_file_path = tree_diff_at_file_path( &odb, file_path, - item.id, + commit.id, parent_id, &mut stats, &mut diff_state, @@ -144,92 +141,42 @@ where &mut buf3, )?; let Some(modification) = changes_for_file_path else { - // None of the changes affected the file we’re currently blaming. Pass blame to parent. - for unblamed_hunk in &mut hunks_to_blame { - unblamed_hunk.pass_blame(suspect, parent_id); - } - continue; - }; - - match modification { - gix_diff::tree::recorder::Change::Addition { .. } => { - // Every line that has not been blamed yet on a commit, is expected to have been - // added when the file was added to the repository. - out.extend( - hunks_to_blame - .iter() - .map(|hunk| BlameEntry::from_unblamed_hunk(hunk, suspect)), - ); - - hunks_to_blame.clear(); - break; - } - gix_diff::tree::recorder::Change::Deletion { .. } => todo!(), - gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => { - let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path, &mut stats)?; - hunks_to_blame = process_changes(&mut out, hunks_to_blame, changes, suspect); - for unblamed_hunk in &mut hunks_to_blame { - unblamed_hunk.pass_blame(suspect, parent_id); - } - } - } - } else { - for parent_id in &parent_ids { - if let Some(parent_entry) = - find_path_entry_in_commit(&odb, parent_id, file_path, &mut buf, &mut buf2, &mut stats)? - { - if entry.oid == parent_entry.oid { - // The blobs storing the blamed file in `entry` and `parent_entry` are - // identical which is why we can pass blame to the parent without further - // checks. - for unblamed_hunk in &mut hunks_to_blame { - unblamed_hunk.pass_blame(suspect, *parent_id); - } - continue 'outer; - } - } - } - - for parent_id in parent_ids { - let changes_for_file_path = tree_diff_at_file_path( - &odb, - file_path, - item.id, - parent_id, - &mut stats, - &mut diff_state, - &mut buf, - &mut buf2, - &mut buf3, - )?; - let Some(modification) = changes_for_file_path else { + if more_than_one_parent { // None of the changes affected the file we’re currently blaming. Pass blame // to parent. for unblamed_hunk in &mut hunks_to_blame { unblamed_hunk.clone_blame(suspect, parent_id); } + } else { + pass_blame_from_to(suspect, parent_id, &mut hunks_to_blame); + } + continue; + }; - continue; - }; - - match modification { - gix_diff::tree::recorder::Change::Addition { .. } => { + match modification { + gix_diff::tree::recorder::Change::Addition { .. } => { + if more_than_one_parent { // Do nothing under the assumption that this always (or almost always) // implies that the file comes from a different parent, compared to which // it was modified, not added. // // TODO: I still have to figure out whether this is correct in all cases. + } else { + unblamed_to_out(&mut hunks_to_blame, &mut out, suspect); + break; } - gix_diff::tree::recorder::Change::Deletion { .. } => todo!(), - gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => { - let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path, &mut stats)?; - hunks_to_blame = process_changes(&mut out, hunks_to_blame, changes, suspect); - for unblamed_hunk in &mut hunks_to_blame { - unblamed_hunk.pass_blame(suspect, parent_id); - } - } + } + gix_diff::tree::recorder::Change::Deletion { .. } => { + unreachable!("We already found file_path in suspect^{{tree}}, so it can't be deleted") + } + gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => { + let changes = blob_changes(&odb, resource_cache, oid, previous_oid, file_path, &mut stats)?; + hunks_to_blame = process_changes(&mut out, hunks_to_blame, changes, suspect); + pass_blame_from_to(suspect, parent_id, &mut hunks_to_blame); } } + } + if more_than_one_parent { for unblamed_hunk in &mut hunks_to_blame { unblamed_hunk.remove_blame(suspect); } @@ -252,6 +199,24 @@ where }) } +/// The blobs storing the blamed file in `entry` and `parent_entry` are identical which is why +/// we can pass blame to the parent without further checks. +fn pass_blame_from_to(from: ObjectId, to: ObjectId, hunks_to_blame: &mut Vec) { + for unblamed_hunk in hunks_to_blame { + unblamed_hunk.pass_blame(from, to); + } +} + +fn unblamed_to_out(hunks_to_blame: &mut Vec, out: &mut Vec, suspect: ObjectId) { + // Every line that has not been blamed yet on a commit, is expected to have been + // added when the file was added to the repository. + out.extend( + hunks_to_blame + .drain(..) + .map(|hunk| BlameEntry::from_unblamed_hunk(hunk, suspect)), + ); +} + /// This function merges adjacent blame entries. It merges entries that are adjacent both in the /// blamed file and in the original file that introduced them. This follows `git`’s /// behaviour. `libgit2`, as of 2024-09-19, only checks whether two entries are adjacent in the diff --git a/gix-blame/src/file/mod.rs b/gix-blame/src/file/mod.rs index 7132af04048..52a5f67a6f1 100644 --- a/gix-blame/src/file/mod.rs +++ b/gix-blame/src/file/mod.rs @@ -90,13 +90,12 @@ fn process_change( }; let range_in_suspect = range_in_suspect.clone(); - - match ( - range_in_suspect.contains(&added.start), - // Since `added` is a range that is not inclusive at the end, `added.end` is - // not part of `added`. The first line that is `added.end - 1`. - (added.end - 1) >= range_in_suspect.start && added.end <= range_in_suspect.end, - ) { + let range_contains_added_start = range_in_suspect.contains(&added.start); + // Since `added` is a range that is not inclusive at the end, `added.end` is + // not part of `added`. The first line that is `added.end - 1`. + let range_contains_added_end = + (added.end - 1) >= range_in_suspect.start && added.end <= range_in_suspect.end; + match (range_contains_added_start, range_contains_added_end) { (true, true) => { // <----------> (hunk) // <---> (added) @@ -147,11 +146,7 @@ fn process_change( new_hunk.offset_for(suspect), )); - if added.end > range_in_suspect.end { - (None, Some(Change::Added(added, number_of_lines_deleted))) - } else { - todo!(); - } + (None, Some(Change::Added(added, number_of_lines_deleted))) } (false, true) => { // <-------> (hunk) @@ -442,15 +437,15 @@ impl BlameEntry { } /// Create an offset from a portion of the *Original File*. - fn from_unblamed_hunk(unblamed_hunk: &UnblamedHunk, commit_id: ObjectId) -> Self { + fn from_unblamed_hunk(mut unblamed_hunk: UnblamedHunk, commit_id: ObjectId) -> Self { let range_in_original_file = unblamed_hunk .suspects - .get(&commit_id) + .remove(&commit_id) .expect("Private and only called when we now `commit_id` is in the suspect list"); Self { - range_in_blamed_file: unblamed_hunk.range_in_blamed_file.clone(), - range_in_original_file: range_in_original_file.clone(), + range_in_blamed_file: unblamed_hunk.range_in_blamed_file, + range_in_original_file, commit_id, } } From b7f1468f0fe38a50ad3414efb5efcf3ac0d2fddb Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 24 Dec 2024 19:05:46 +0100 Subject: [PATCH 11/16] swap blamed-file and original-file variable names. They are used with inverse meaning compared to the current documentation. It's easier to adjust the variable names. --- gitoxide-core/src/repository/blame.rs | 2 +- gix-blame/src/file/function.rs | 27 ++++++------ gix-blame/src/file/mod.rs | 31 +++++++------- gix-blame/src/file/tests.rs | 62 +++++++++++++-------------- gix-blame/src/lib.rs | 6 +-- gix-blame/src/types.rs | 27 ++++++------ gix-blame/tests/blame.rs | 12 +++--- 7 files changed, 82 insertions(+), 85 deletions(-) diff --git a/gitoxide-core/src/repository/blame.rs b/gitoxide-core/src/repository/blame.rs index d33d34f3c4a..1001df0c7fe 100644 --- a/gitoxide-core/src/repository/blame.rs +++ b/gitoxide-core/src/repository/blame.rs @@ -31,7 +31,7 @@ fn write_blame_entries(mut out: impl std::io::Write, outcome: gix::blame::Outcom for (entry, lines_in_hunk) in outcome.entries_with_lines() { for ((actual_lno, source_lno), line) in entry .range_in_blamed_file - .zip(entry.range_in_original_file) + .zip(entry.range_in_source_file) .zip(lines_in_hunk) { write!( diff --git a/gix-blame/src/file/function.rs b/gix-blame/src/file/function.rs index 0a18ee60e96..3729ed0e905 100644 --- a/gix-blame/src/file/function.rs +++ b/gix-blame/src/file/function.rs @@ -29,9 +29,9 @@ use std::ops::Range; /// *For brevity, `HEAD` denotes the starting point of the blame operation. It could be any commit, or even commits that /// represent the worktree state. /// We begin with a single *Unblamed Hunk* and a single suspect, usually the `HEAD` commit as the commit containing the -/// *Original File*, so that it contains the entire file, with the first commit being a candidate for the entire *Original File*. +/// *Blamed File*, so that it contains the entire file, with the first commit being a candidate for the entire *Blamed File*. /// We traverse the commit graph starting at the first suspect, and see if there have been changes to `file_path`. -/// If so, we have found a *Blamed File* and a *Suspect* commit, and have hunks that represent these changes. +/// If so, we have found a *Source File* and a *Suspect* commit, and have hunks that represent these changes. /// Now the *Unblamed Hunk* is split at the boundaries of each matching change, creating a new *Unblamed Hunk* on each side, /// along with a [`BlameEntry`] to represent the match. /// This is repeated until there are no non-empty *Unblamed Hunk*s left. @@ -69,22 +69,22 @@ where let mut stats = Statistics::default(); let (mut buf, mut buf2, mut buf3) = (Vec::new(), Vec::new(), Vec::new()); - let original_file_entry = find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2, &mut stats)? + let blamed_file_entry = find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2, &mut stats)? .ok_or_else(|| Error::FileMissing { file_path: file_path.to_owned(), commit_id: suspect, })?; - let original_file_blob = odb.find_blob(&original_file_entry.oid, &mut buf)?.data.to_vec(); - let num_lines_in_original = { - let mut interner = gix_diff::blob::intern::Interner::new(original_file_blob.len() / 100); - tokens_for_diffing(&original_file_blob) + let blamed_file_blob = odb.find_blob(&blamed_file_entry.oid, &mut buf)?.data.to_vec(); + let num_lines_in_blamed = { + let mut interner = gix_diff::blob::intern::Interner::new(blamed_file_blob.len() / 100); + tokens_for_diffing(&blamed_file_blob) .tokenize() .map(|token| interner.intern(token)) .count() }; let mut hunks_to_blame = vec![UnblamedHunk::new( - 0..num_lines_in_original as u32, + 0..num_lines_in_blamed as u32, suspect, Offset::Added(0), )]; @@ -194,7 +194,7 @@ where out.sort_by(|a, b| a.range_in_blamed_file.start.cmp(&b.range_in_blamed_file.start)); Ok(Outcome { entries: coalesce_blame_entries(out), - blob: original_file_blob, + blob: blamed_file_blob, statistics: stats, }) } @@ -218,7 +218,7 @@ fn unblamed_to_out(hunks_to_blame: &mut Vec, out: &mut Vec) -> Vec { if previous_entry.commit_id == entry.commit_id && previous_entry.range_in_blamed_file.end == entry.range_in_blamed_file.start // As of 2024-09-19, the check below only is in `git`, but not in `libgit2`. - && previous_entry.range_in_original_file.end == entry.range_in_original_file.start + && previous_entry.range_in_source_file.end == entry.range_in_source_file.start { let coalesced_entry = BlameEntry { range_in_blamed_file: previous_entry.range_in_blamed_file.start..entry.range_in_blamed_file.end, - range_in_original_file: previous_entry.range_in_original_file.start - ..entry.range_in_original_file.end, + range_in_source_file: previous_entry.range_in_source_file.start..entry.range_in_source_file.end, commit_id: previous_entry.commit_id, }; @@ -304,7 +303,7 @@ fn blob_changes( file_path: &BStr, stats: &mut Statistics, ) -> Result, Error> { - /// Record all [`Change`]s to learn about additions, deletions and unchanged portions of a *Blamed File*. + /// Record all [`Change`]s to learn about additions, deletions and unchanged portions of a *Source File*. struct ChangeRecorder { last_seen_after_end: u32, hunks: Vec, diff --git a/gix-blame/src/file/mod.rs b/gix-blame/src/file/mod.rs index 52a5f67a6f1..24953ba8784 100644 --- a/gix-blame/src/file/mod.rs +++ b/gix-blame/src/file/mod.rs @@ -8,11 +8,11 @@ use crate::types::{Change, Offset, UnblamedHunk}; pub(super) mod function; -/// Compare a section from the *Original File* (`hunk`) with a change from a diff and see if there +/// Compare a section from the *Blamed File* (`hunk`) with a change from a diff and see if there /// is an intersection with `change`. Based on that intersection, we may generate a [`BlameEntry`] for `out` /// and/or split the `hunk` into multiple. /// -/// This is the core of the blame implementation as it matches regions in *Blamed Files* to the *Original File*. +/// This is the core of the blame implementation as it matches regions in *Source File* to the *Blamed File*. fn process_change( out: &mut Vec, new_hunks_to_blame: &mut Vec, @@ -407,45 +407,44 @@ impl UnblamedHunk { } impl BlameEntry { - /// Create a new instance by creating `range_in_blamed_file` after applying `offset` to `range_in_original_file`. - fn with_offset(range_in_original_file: Range, commit_id: ObjectId, offset: Offset) -> Self { + /// Create a new instance by creating `range_in_blamed_file` after applying `offset` to `range_in_source_file`. + fn with_offset(range_in_source_file: Range, commit_id: ObjectId, offset: Offset) -> Self { debug_assert!( - range_in_original_file.end > range_in_original_file.start, - "{range_in_original_file:?}" + range_in_source_file.end > range_in_source_file.start, + "{range_in_source_file:?}" ); match offset { Offset::Added(added) => Self { - range_in_blamed_file: (range_in_original_file.start + added)..(range_in_original_file.end + added), - range_in_original_file, + range_in_blamed_file: (range_in_source_file.start + added)..(range_in_source_file.end + added), + range_in_source_file, commit_id, }, Offset::Deleted(deleted) => { debug_assert!( - range_in_original_file.start >= deleted, - "{range_in_original_file:?} {offset:?}" + range_in_source_file.start >= deleted, + "{range_in_source_file:?} {offset:?}" ); Self { - range_in_blamed_file: (range_in_original_file.start - deleted) - ..(range_in_original_file.end - deleted), - range_in_original_file, + range_in_blamed_file: (range_in_source_file.start - deleted)..(range_in_source_file.end - deleted), + range_in_source_file, commit_id, } } } } - /// Create an offset from a portion of the *Original File*. + /// Create an offset from a portion of the *Blamed File*. fn from_unblamed_hunk(mut unblamed_hunk: UnblamedHunk, commit_id: ObjectId) -> Self { - let range_in_original_file = unblamed_hunk + let range_in_source_file = unblamed_hunk .suspects .remove(&commit_id) .expect("Private and only called when we now `commit_id` is in the suspect list"); Self { range_in_blamed_file: unblamed_hunk.range_in_blamed_file, - range_in_original_file, + range_in_source_file, commit_id, } } diff --git a/gix-blame/src/file/tests.rs b/gix-blame/src/file/tests.rs index 35e63d6edd4..f01e7c8034f 100644 --- a/gix-blame/src/file/tests.rs +++ b/gix-blame/src/file/tests.rs @@ -70,7 +70,7 @@ mod process_change { lines_blamed, [BlameEntry { range_in_blamed_file: 0..3, - range_in_original_file: 0..3, + range_in_source_file: 0..3, commit_id: suspect }] ); @@ -106,7 +106,7 @@ mod process_change { lines_blamed, [BlameEntry { range_in_blamed_file: 2..3, - range_in_original_file: 2..3, + range_in_source_file: 2..3, commit_id: suspect }] ); @@ -148,7 +148,7 @@ mod process_change { lines_blamed, [BlameEntry { range_in_blamed_file: 12..13, - range_in_original_file: 12..13, + range_in_source_file: 12..13, commit_id: suspect }] ); @@ -191,7 +191,7 @@ mod process_change { lines_blamed, [BlameEntry { range_in_blamed_file: 14..15, - range_in_original_file: 9..10, + range_in_source_file: 9..10, commit_id: suspect }] ); @@ -233,7 +233,7 @@ mod process_change { lines_blamed, [BlameEntry { range_in_blamed_file: 0..3, - range_in_original_file: 0..3, + range_in_source_file: 0..3, commit_id: suspect }] ); @@ -270,7 +270,7 @@ mod process_change { lines_blamed, [BlameEntry { range_in_blamed_file: 1..4, - range_in_original_file: 0..3, + range_in_source_file: 0..3, commit_id: suspect }] ); @@ -307,7 +307,7 @@ mod process_change { lines_blamed, [BlameEntry { range_in_blamed_file: 4..6, - range_in_original_file: 3..5, + range_in_source_file: 3..5, commit_id: suspect }] ); @@ -344,7 +344,7 @@ mod process_change { lines_blamed, [BlameEntry { range_in_blamed_file: 23..24, - range_in_original_file: 25..26, + range_in_source_file: 25..26, commit_id: suspect }] ); @@ -375,7 +375,7 @@ mod process_change { lines_blamed, [BlameEntry { range_in_blamed_file: 23..24, - range_in_original_file: 21..22, + range_in_source_file: 21..22, commit_id: suspect }] ); @@ -406,7 +406,7 @@ mod process_change { lines_blamed, [BlameEntry { range_in_blamed_file: 107..109, - range_in_original_file: 106..108, + range_in_source_file: 106..108, commit_id: suspect }] ); @@ -443,7 +443,7 @@ mod process_change { lines_blamed, [BlameEntry { range_in_blamed_file: 155..156, - range_in_original_file: 143..144, + range_in_source_file: 143..144, commit_id: suspect }] ); @@ -660,7 +660,7 @@ mod process_change { lines_blamed, [BlameEntry { range_in_blamed_file: 2..5, - range_in_original_file: 5..8, + range_in_source_file: 5..8, commit_id: suspect }] ); @@ -1011,7 +1011,7 @@ mod process_changes { lines_blamed, [BlameEntry { range_in_blamed_file: 0..4, - range_in_original_file: 0..4, + range_in_source_file: 0..4, commit_id: suspect }] ); @@ -1030,7 +1030,7 @@ mod process_changes { lines_blamed, [BlameEntry { range_in_blamed_file: 0..4, - range_in_original_file: 0..4, + range_in_source_file: 0..4, commit_id: suspect }] ); @@ -1049,7 +1049,7 @@ mod process_changes { lines_blamed, [BlameEntry { range_in_blamed_file: 2..4, - range_in_original_file: 2..4, + range_in_source_file: 2..4, commit_id: suspect }] ); @@ -1075,12 +1075,12 @@ mod process_changes { [ BlameEntry { range_in_blamed_file: 0..1, - range_in_original_file: 0..1, + range_in_source_file: 0..1, commit_id: suspect }, BlameEntry { range_in_blamed_file: 1..4, - range_in_original_file: 1..4, + range_in_source_file: 1..4, commit_id: suspect } ] @@ -1100,7 +1100,7 @@ mod process_changes { lines_blamed, [BlameEntry { range_in_blamed_file: 0..1, - range_in_original_file: 0..1, + range_in_source_file: 0..1, commit_id: suspect }] ); @@ -1113,7 +1113,7 @@ mod process_changes { let suspect_2 = ObjectId::from_hex(b"2222222222222222222222222222222222222222").unwrap(); let mut lines_blamed: Vec = vec![BlameEntry { range_in_blamed_file: 0..2, - range_in_original_file: 0..2, + range_in_source_file: 0..2, commit_id: suspect, }]; let hunks_to_blame = vec![new_unblamed_hunk(2..6, suspect_2, Offset::Added(2))]; @@ -1125,12 +1125,12 @@ mod process_changes { [ BlameEntry { range_in_blamed_file: 0..2, - range_in_original_file: 0..2, + range_in_source_file: 0..2, commit_id: suspect }, BlameEntry { range_in_blamed_file: 2..3, - range_in_original_file: 0..1, + range_in_source_file: 0..1, commit_id: suspect_2 } ] @@ -1153,7 +1153,7 @@ mod process_changes { lines_blamed, [BlameEntry { range_in_blamed_file: 0..4, - range_in_original_file: 0..4, + range_in_source_file: 0..4, commit_id: suspect }] ); @@ -1178,7 +1178,7 @@ mod process_changes { let suspect_2 = ObjectId::from_hex(b"2222222222222222222222222222222222222222").unwrap(); let mut lines_blamed: Vec = vec![BlameEntry { range_in_blamed_file: 0..1, - range_in_original_file: 0..1, + range_in_source_file: 0..1, commit_id: suspect, }]; let hunks_to_blame = vec![new_unblamed_hunk(1..3, suspect_2, Offset::Added(1))]; @@ -1190,12 +1190,12 @@ mod process_changes { [ BlameEntry { range_in_blamed_file: 0..1, - range_in_original_file: 0..1, + range_in_source_file: 0..1, commit_id: suspect }, BlameEntry { range_in_blamed_file: 1..2, - range_in_original_file: 0..1, + range_in_source_file: 0..1, commit_id: suspect_2 } ] @@ -1219,12 +1219,12 @@ mod process_changes { [ BlameEntry { range_in_blamed_file: 0..2, - range_in_original_file: 0..2, + range_in_source_file: 0..2, commit_id: suspect }, BlameEntry { range_in_blamed_file: 3..4, - range_in_original_file: 3..4, + range_in_source_file: 3..4, commit_id: suspect } ] @@ -1237,7 +1237,7 @@ mod process_changes { let suspect = ObjectId::null(gix_hash::Kind::Sha1); let mut lines_blamed: Vec = vec![BlameEntry { range_in_blamed_file: 30..31, - range_in_original_file: 30..31, + range_in_source_file: 30..31, commit_id: suspect, }]; let hunks_to_blame = vec![ @@ -1264,12 +1264,12 @@ mod process_changes { [ BlameEntry { range_in_blamed_file: 16..17, - range_in_original_file: 16..17, + range_in_source_file: 16..17, commit_id: suspect }, BlameEntry { range_in_blamed_file: 30..31, - range_in_original_file: 30..31, + range_in_source_file: 30..31, commit_id: suspect } ] @@ -1308,7 +1308,7 @@ mod process_changes { lines_blamed, [BlameEntry { range_in_blamed_file: 0..4, - range_in_original_file: 0..4, + range_in_source_file: 0..4, commit_id: suspect }] ); diff --git a/gix-blame/src/lib.rs b/gix-blame/src/lib.rs index c504835e50e..489434b5b3d 100644 --- a/gix-blame/src/lib.rs +++ b/gix-blame/src/lib.rs @@ -2,15 +2,15 @@ //! //! ### Terminology //! -//! * **Original File** +//! * **Source File** //! - The file as it exists in `HEAD`. -//! - the initial state with all lines that we need to associate with a *Blamed File*. +//! - the initial state with all lines that we need to associate with a *Source File*. //! * **Blamed File** //! - A file at a version (i.e. commit) that introduces hunks into the final 'image'. //! * **Suspects** //! - The versions of the files that can contain hunks that we could use in the final 'image' //! - multiple at the same time as the commit-graph may split up. -//! - turns into *Blamed File* once we have found an association into the *Original File*. +//! - turns into *Source File* once we have found an association into the *Blamed File*. #![deny(rust_2018_idioms, missing_docs)] #![forbid(unsafe_code)] diff --git a/gix-blame/src/types.rs b/gix-blame/src/types.rs index 6dea55be399..af1c693e681 100644 --- a/gix-blame/src/types.rs +++ b/gix-blame/src/types.rs @@ -10,10 +10,10 @@ use gix_object::bstr::BString; /// The outcome of [`file()`](crate::file()). #[derive(Debug, Clone)] pub struct Outcome { - /// One entry in sequential order, to associate a hunk in the original file with the commit (and its lines) + /// One entry in sequential order, to associate a hunk in the blamed file with the source commit (and its lines) /// that introduced it. pub entries: Vec, - /// A buffer with the file content of the *Original File*, ready for tokenization. + /// A buffer with the file content of the *Blamed File*, ready for tokenization. pub blob: Vec, /// Additional information about the amount of work performed to produce the blame. pub statistics: Statistics, @@ -60,7 +60,7 @@ impl Outcome { } } -/// Describes the offset of a particular hunk relative to the *Original File*. +/// Describes the offset of a particular hunk relative to the *Blamed File*. #[derive(Clone, Copy, Debug, PartialEq)] pub enum Offset { /// The amount of lines to add. @@ -118,7 +118,7 @@ impl SubAssign for Offset { } } -/// A mapping of a section of the *Original File* to the section in a *Blamed File* that introduced it. +/// A mapping of a section of the *Blamed File* to the section in a *Source File* that introduced it. /// /// Both ranges are of the same size, but may use different [starting points](Range::start). Naturally, /// they have the same content, which is the reason they are in what is returned by [`file()`](crate::file()). @@ -127,29 +127,28 @@ impl SubAssign for Offset { pub struct BlameEntry { /// The section of tokens in the tokenized version of the *Blamed File* (typically lines). pub range_in_blamed_file: Range, - /// The section of tokens in the tokenized version of the *Original File* (typically lines). - // TODO: figure out why this is basically inverted. Probably that's just it - would make sense with `UnblamedHunk` then. - pub range_in_original_file: Range, - /// The commit that introduced the section into the *Blamed File*. + /// The section of tokens in the tokenized version of the *Source File* (typically lines). + pub range_in_source_file: Range, + /// The commit that introduced the section into the *Source File*. pub commit_id: ObjectId, } impl BlameEntry { /// Create a new instance. - pub fn new(range_in_blamed_file: Range, range_in_original_file: Range, commit_id: ObjectId) -> Self { + pub fn new(range_in_blamed_file: Range, range_in_source_file: Range, commit_id: ObjectId) -> Self { debug_assert!( range_in_blamed_file.end > range_in_blamed_file.start, "{range_in_blamed_file:?}" ); debug_assert!( - range_in_original_file.end > range_in_original_file.start, - "{range_in_original_file:?}" + range_in_source_file.end > range_in_source_file.start, + "{range_in_source_file:?}" ); - debug_assert_eq!(range_in_original_file.len(), range_in_blamed_file.len()); + debug_assert_eq!(range_in_source_file.len(), range_in_blamed_file.len()); Self { range_in_blamed_file: range_in_blamed_file.clone(), - range_in_original_file: range_in_original_file.clone(), + range_in_source_file: range_in_source_file.clone(), commit_id, } } @@ -171,7 +170,7 @@ impl LineRange for Range { pub struct UnblamedHunk { /// TODO: figure out how this works. pub range_in_blamed_file: Range, - /// Maps a commit to the range in the *Original File* that `range_in_blamed_file` refers to. + /// Maps a commit to the range in the *Blamed File* that `range_in_blamed_file` refers to. pub suspects: BTreeMap>, } diff --git a/gix-blame/tests/blame.rs b/gix-blame/tests/blame.rs index 73c2597933f..258a3457c4a 100644 --- a/gix-blame/tests/blame.rs +++ b/gix-blame/tests/blame.rs @@ -79,7 +79,7 @@ mod baseline { Err(_) => continue, }; - let line_number_in_original_file = fields[1].parse::().unwrap(); + let line_number_in_source_file = fields[1].parse::().unwrap(); let line_number_in_final_file = fields[2].parse::().unwrap(); // The last field indicates the number of lines this group contains info for // (this is not equal to the number of lines in git blame’s porcelain output). @@ -87,22 +87,22 @@ mod baseline { skip_lines = number_of_lines_in_group; - let original_range = (line_number_in_original_file - 1) - ..(line_number_in_original_file + number_of_lines_in_group - 1); + let source_range = + (line_number_in_source_file - 1)..(line_number_in_source_file + number_of_lines_in_group - 1); let blame_range = (line_number_in_final_file - 1)..(line_number_in_final_file + number_of_lines_in_group - 1); assert!(ranges.is_none(), "should not overwrite existing ranges"); - ranges = Some((blame_range, original_range)); + ranges = Some((blame_range, source_range)); } else if !is_known_header_field(&fields[0]) && ObjectId::from_hex(fields[0].as_bytes()).is_err() { panic!("unexpected line: '{:?}'", line.as_bstr()); } } - let Some((range_in_blamed_file, range_in_original_file)) = ranges else { + let Some((range_in_blamed_file, range_in_source_file)) = ranges else { // No new lines were parsed, so we assume the iterator is finished. return None; }; - Some(BlameEntry::new(range_in_blamed_file, range_in_original_file, commit_id)) + Some(BlameEntry::new(range_in_blamed_file, range_in_source_file, commit_id)) } } } From 63ee0f9c34dc89ad51d5c9ab83e49cbc08e3ed69 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 25 Dec 2024 08:55:17 +0100 Subject: [PATCH 12/16] Review and remove all TODOs where possible, update docs and comments --- gitoxide-core/src/repository/blame.rs | 4 +- gix-blame/src/file/function.rs | 34 +++--- gix-blame/src/file/mod.rs | 25 ++-- gix-blame/src/file/tests.rs | 161 +++++++++++++++----------- gix-blame/src/types.rs | 51 +++++--- 5 files changed, 168 insertions(+), 107 deletions(-) diff --git a/gitoxide-core/src/repository/blame.rs b/gitoxide-core/src/repository/blame.rs index 1001df0c7fe..c130770e9dc 100644 --- a/gitoxide-core/src/repository/blame.rs +++ b/gitoxide-core/src/repository/blame.rs @@ -30,8 +30,8 @@ pub fn blame_file( fn write_blame_entries(mut out: impl std::io::Write, outcome: gix::blame::Outcome) -> Result<(), std::io::Error> { for (entry, lines_in_hunk) in outcome.entries_with_lines() { for ((actual_lno, source_lno), line) in entry - .range_in_blamed_file - .zip(entry.range_in_source_file) + .range_in_blamed_file() + .zip(entry.range_in_source_file()) .zip(lines_in_hunk) { write!( diff --git a/gix-blame/src/file/function.rs b/gix-blame/src/file/function.rs index 3729ed0e905..48373fb7561 100644 --- a/gix-blame/src/file/function.rs +++ b/gix-blame/src/file/function.rs @@ -3,9 +3,9 @@ use crate::{BlameEntry, Error, Outcome, Statistics}; use gix_diff::blob::intern::TokenSource; use gix_hash::ObjectId; use gix_object::{bstr::BStr, FindExt}; +use std::num::NonZeroU32; use std::ops::Range; -// TODO: do not instantiate anything, get everything passed as argument. /// Produce a list of consecutive [`BlameEntry`] instances to indicate in which commits the ranges of the file /// at `traverse[0]:` originated in. /// @@ -142,8 +142,8 @@ where )?; let Some(modification) = changes_for_file_path else { if more_than_one_parent { - // None of the changes affected the file we’re currently blaming. Pass blame - // to parent. + // None of the changes affected the file we’re currently blaming. + // Copy blame to parent. for unblamed_hunk in &mut hunks_to_blame { unblamed_hunk.clone_blame(suspect, parent_id); } @@ -159,8 +159,6 @@ where // Do nothing under the assumption that this always (or almost always) // implies that the file comes from a different parent, compared to which // it was modified, not added. - // - // TODO: I still have to figure out whether this is correct in all cases. } else { unblamed_to_out(&mut hunks_to_blame, &mut out, suspect); break; @@ -191,7 +189,7 @@ where // I don’t know yet whether it would make sense to use a data structure instead that preserves // order on insertion. - out.sort_by(|a, b| a.range_in_blamed_file.start.cmp(&b.range_in_blamed_file.start)); + out.sort_by(|a, b| a.start_in_blamed_file.cmp(&b.start_in_blamed_file)); Ok(Outcome { entries: coalesce_blame_entries(out), blob: blamed_file_blob, @@ -199,17 +197,18 @@ where }) } -/// The blobs storing the blamed file in `entry` and `parent_entry` are identical which is why -/// we can pass blame to the parent without further checks. +/// Pass ownership of each unblamed hunk of `from` to `to`. +/// +/// This happens when `from` didn't actually change anything in the blamed file. fn pass_blame_from_to(from: ObjectId, to: ObjectId, hunks_to_blame: &mut Vec) { for unblamed_hunk in hunks_to_blame { unblamed_hunk.pass_blame(from, to); } } +/// Convert each of the unblamed hunk in `hunks_to_blame` into a [`BlameEntry`], consuming them in the process. +/// `suspect` is expected to be present in the suspect-map in each [`UnblamedHunk`]. fn unblamed_to_out(hunks_to_blame: &mut Vec, out: &mut Vec, suspect: ObjectId) { - // Every line that has not been blamed yet on a commit, is expected to have been - // added when the file was added to the repository. out.extend( hunks_to_blame .drain(..) @@ -234,14 +233,21 @@ fn coalesce_blame_entries(lines_blamed: Vec) -> Vec { let previous_entry = acc.last(); if let Some(previous_entry) = previous_entry { + let previous_blamed_range = previous_entry.range_in_blamed_file(); + let current_blamed_range = entry.range_in_blamed_file(); + let previous_source_range = previous_entry.range_in_source_file(); + let current_source_range = entry.range_in_source_file(); if previous_entry.commit_id == entry.commit_id - && previous_entry.range_in_blamed_file.end == entry.range_in_blamed_file.start + && previous_blamed_range.end == current_blamed_range.start // As of 2024-09-19, the check below only is in `git`, but not in `libgit2`. - && previous_entry.range_in_source_file.end == entry.range_in_source_file.start + && previous_source_range.end == current_source_range.start { + // let combined_range = let coalesced_entry = BlameEntry { - range_in_blamed_file: previous_entry.range_in_blamed_file.start..entry.range_in_blamed_file.end, - range_in_source_file: previous_entry.range_in_source_file.start..entry.range_in_source_file.end, + start_in_blamed_file: previous_blamed_range.start as u32, + start_in_source_file: previous_source_range.start as u32, + len: NonZeroU32::new((current_source_range.end - previous_source_range.start) as u32) + .expect("BUG: hunks are never zero-sized"), commit_id: previous_entry.commit_id, }; diff --git a/gix-blame/src/file/mod.rs b/gix-blame/src/file/mod.rs index 24953ba8784..6b2e125a97e 100644 --- a/gix-blame/src/file/mod.rs +++ b/gix-blame/src/file/mod.rs @@ -1,4 +1,6 @@ //! A module with low-level types and functions. + +use std::num::NonZeroU32; use std::ops::Range; use gix_hash::ObjectId; @@ -67,7 +69,7 @@ fn process_change( // <----> (hunk) // <--> (unchanged) - (Some(hunk.clone()), None) + (Some(hunk), None) } else { // <--> (hunk) // <----> (unchanged) @@ -186,7 +188,7 @@ fn process_change( *offset_in_destination += added.end - added.start; *offset_in_destination -= number_of_lines_deleted; - (Some(hunk.clone()), None) + (Some(hunk), None) } else if range_in_suspect.end <= added.start { // <--> (hunk) // <----> (added) @@ -416,8 +418,9 @@ impl BlameEntry { match offset { Offset::Added(added) => Self { - range_in_blamed_file: (range_in_source_file.start + added)..(range_in_source_file.end + added), - range_in_source_file, + start_in_blamed_file: range_in_source_file.start + added, + start_in_source_file: range_in_source_file.start, + len: force_non_zero(range_in_source_file.len() as u32), commit_id, }, Offset::Deleted(deleted) => { @@ -427,8 +430,9 @@ impl BlameEntry { ); Self { - range_in_blamed_file: (range_in_source_file.start - deleted)..(range_in_source_file.end - deleted), - range_in_source_file, + start_in_blamed_file: range_in_source_file.start - deleted, + start_in_source_file: range_in_source_file.start, + len: force_non_zero(range_in_source_file.len() as u32), commit_id, } } @@ -443,12 +447,17 @@ impl BlameEntry { .expect("Private and only called when we now `commit_id` is in the suspect list"); Self { - range_in_blamed_file: unblamed_hunk.range_in_blamed_file, - range_in_source_file, + start_in_blamed_file: unblamed_hunk.range_in_blamed_file.start, + start_in_source_file: range_in_source_file.start, + len: force_non_zero(range_in_source_file.len() as u32), commit_id, } } } +fn force_non_zero(n: u32) -> NonZeroU32 { + NonZeroU32::new(n).expect("BUG: hunks are never empty") +} + #[cfg(test)] mod tests; diff --git a/gix-blame/src/file/tests.rs b/gix-blame/src/file/tests.rs index f01e7c8034f..4d7f28af415 100644 --- a/gix-blame/src/file/tests.rs +++ b/gix-blame/src/file/tests.rs @@ -17,7 +17,7 @@ fn new_unblamed_hunk(range_in_blamed_file: Range, suspect: ObjectId, offset mod process_change { use super::*; - use crate::file::{process_change, Change, Offset, UnblamedHunk}; + use crate::file::{force_non_zero, process_change, Change, Offset, UnblamedHunk}; use crate::BlameEntry; use gix_hash::ObjectId; @@ -69,8 +69,9 @@ mod process_change { assert_eq!( lines_blamed, [BlameEntry { - range_in_blamed_file: 0..3, - range_in_source_file: 0..3, + start_in_blamed_file: 0, + start_in_source_file: 0, + len: force_non_zero(3), commit_id: suspect }] ); @@ -105,8 +106,9 @@ mod process_change { assert_eq!( lines_blamed, [BlameEntry { - range_in_blamed_file: 2..3, - range_in_source_file: 2..3, + start_in_blamed_file: 2, + start_in_source_file: 2, + len: force_non_zero(1), commit_id: suspect }] ); @@ -147,8 +149,9 @@ mod process_change { assert_eq!( lines_blamed, [BlameEntry { - range_in_blamed_file: 12..13, - range_in_source_file: 12..13, + start_in_blamed_file: 12, + start_in_source_file: 12, + len: force_non_zero(1), commit_id: suspect }] ); @@ -190,8 +193,9 @@ mod process_change { assert_eq!( lines_blamed, [BlameEntry { - range_in_blamed_file: 14..15, - range_in_source_file: 9..10, + start_in_blamed_file: 14, + start_in_source_file: 9, + len: force_non_zero(1), commit_id: suspect }] ); @@ -232,8 +236,9 @@ mod process_change { assert_eq!( lines_blamed, [BlameEntry { - range_in_blamed_file: 0..3, - range_in_source_file: 0..3, + start_in_blamed_file: 0, + start_in_source_file: 0, + len: force_non_zero(3), commit_id: suspect }] ); @@ -269,8 +274,9 @@ mod process_change { assert_eq!( lines_blamed, [BlameEntry { - range_in_blamed_file: 1..4, - range_in_source_file: 0..3, + start_in_blamed_file: 1, + start_in_source_file: 0, + len: force_non_zero(3), commit_id: suspect }] ); @@ -306,8 +312,9 @@ mod process_change { assert_eq!( lines_blamed, [BlameEntry { - range_in_blamed_file: 4..6, - range_in_source_file: 3..5, + start_in_blamed_file: 4, + start_in_source_file: 3, + len: force_non_zero(2), commit_id: suspect }] ); @@ -343,8 +350,9 @@ mod process_change { assert_eq!( lines_blamed, [BlameEntry { - range_in_blamed_file: 23..24, - range_in_source_file: 25..26, + start_in_blamed_file: 23, + start_in_source_file: 25, + len: force_non_zero(1), commit_id: suspect }] ); @@ -374,8 +382,9 @@ mod process_change { assert_eq!( lines_blamed, [BlameEntry { - range_in_blamed_file: 23..24, - range_in_source_file: 21..22, + start_in_blamed_file: 23, + start_in_source_file: 21, + len: force_non_zero(1), commit_id: suspect }] ); @@ -405,8 +414,9 @@ mod process_change { assert_eq!( lines_blamed, [BlameEntry { - range_in_blamed_file: 107..109, - range_in_source_file: 106..108, + start_in_blamed_file: 107, + start_in_source_file: 106, + len: force_non_zero(2), commit_id: suspect }] ); @@ -442,8 +452,9 @@ mod process_change { assert_eq!( lines_blamed, [BlameEntry { - range_in_blamed_file: 155..156, - range_in_source_file: 143..144, + start_in_blamed_file: 155, + start_in_source_file: 143, + len: force_non_zero(1), commit_id: suspect }] ); @@ -659,8 +670,9 @@ mod process_change { assert_eq!( lines_blamed, [BlameEntry { - range_in_blamed_file: 2..5, - range_in_source_file: 5..8, + start_in_blamed_file: 2, + start_in_source_file: 5, + len: force_non_zero(3), commit_id: suspect }] ); @@ -985,7 +997,7 @@ mod process_change { } mod process_changes { use crate::file::tests::new_unblamed_hunk; - use crate::file::{process_changes, Change, Offset, UnblamedHunk}; + use crate::file::{force_non_zero, process_changes, Change, Offset, UnblamedHunk}; use crate::BlameEntry; use gix_hash::ObjectId; @@ -1010,8 +1022,9 @@ mod process_changes { assert_eq!( lines_blamed, [BlameEntry { - range_in_blamed_file: 0..4, - range_in_source_file: 0..4, + start_in_blamed_file: 0, + start_in_source_file: 0, + len: force_non_zero(4), commit_id: suspect }] ); @@ -1029,8 +1042,9 @@ mod process_changes { assert_eq!( lines_blamed, [BlameEntry { - range_in_blamed_file: 0..4, - range_in_source_file: 0..4, + start_in_blamed_file: 0, + start_in_source_file: 0, + len: force_non_zero(4), commit_id: suspect }] ); @@ -1048,8 +1062,9 @@ mod process_changes { assert_eq!( lines_blamed, [BlameEntry { - range_in_blamed_file: 2..4, - range_in_source_file: 2..4, + start_in_blamed_file: 2, + start_in_source_file: 2, + len: force_non_zero(2), commit_id: suspect }] ); @@ -1074,13 +1089,15 @@ mod process_changes { lines_blamed, [ BlameEntry { - range_in_blamed_file: 0..1, - range_in_source_file: 0..1, + start_in_blamed_file: 0, + start_in_source_file: 0, + len: force_non_zero(1), commit_id: suspect }, BlameEntry { - range_in_blamed_file: 1..4, - range_in_source_file: 1..4, + start_in_blamed_file: 1, + start_in_source_file: 1, + len: force_non_zero(3), commit_id: suspect } ] @@ -1099,8 +1116,9 @@ mod process_changes { assert_eq!( lines_blamed, [BlameEntry { - range_in_blamed_file: 0..1, - range_in_source_file: 0..1, + start_in_blamed_file: 0, + start_in_source_file: 0, + len: force_non_zero(1), commit_id: suspect }] ); @@ -1112,8 +1130,9 @@ mod process_changes { let suspect = ObjectId::null(gix_hash::Kind::Sha1); let suspect_2 = ObjectId::from_hex(b"2222222222222222222222222222222222222222").unwrap(); let mut lines_blamed: Vec = vec![BlameEntry { - range_in_blamed_file: 0..2, - range_in_source_file: 0..2, + start_in_blamed_file: 0, + start_in_source_file: 0, + len: force_non_zero(2), commit_id: suspect, }]; let hunks_to_blame = vec![new_unblamed_hunk(2..6, suspect_2, Offset::Added(2))]; @@ -1124,13 +1143,15 @@ mod process_changes { lines_blamed, [ BlameEntry { - range_in_blamed_file: 0..2, - range_in_source_file: 0..2, + start_in_blamed_file: 0, + start_in_source_file: 0, + len: force_non_zero(2), commit_id: suspect }, BlameEntry { - range_in_blamed_file: 2..3, - range_in_source_file: 0..1, + start_in_blamed_file: 2, + start_in_source_file: 0, + len: force_non_zero(1), commit_id: suspect_2 } ] @@ -1152,8 +1173,9 @@ mod process_changes { assert_eq!( lines_blamed, [BlameEntry { - range_in_blamed_file: 0..4, - range_in_source_file: 0..4, + start_in_blamed_file: 0, + start_in_source_file: 0, + len: force_non_zero(4), commit_id: suspect }] ); @@ -1177,8 +1199,9 @@ mod process_changes { let suspect = ObjectId::null(gix_hash::Kind::Sha1); let suspect_2 = ObjectId::from_hex(b"2222222222222222222222222222222222222222").unwrap(); let mut lines_blamed: Vec = vec![BlameEntry { - range_in_blamed_file: 0..1, - range_in_source_file: 0..1, + start_in_blamed_file: 0, + start_in_source_file: 0, + len: force_non_zero(1), commit_id: suspect, }]; let hunks_to_blame = vec![new_unblamed_hunk(1..3, suspect_2, Offset::Added(1))]; @@ -1189,13 +1212,15 @@ mod process_changes { lines_blamed, [ BlameEntry { - range_in_blamed_file: 0..1, - range_in_source_file: 0..1, + start_in_blamed_file: 0, + start_in_source_file: 0, + len: force_non_zero(1), commit_id: suspect }, BlameEntry { - range_in_blamed_file: 1..2, - range_in_source_file: 0..1, + start_in_blamed_file: 1, + start_in_source_file: 0, + len: force_non_zero(1), commit_id: suspect_2 } ] @@ -1218,13 +1243,15 @@ mod process_changes { lines_blamed, [ BlameEntry { - range_in_blamed_file: 0..2, - range_in_source_file: 0..2, + start_in_blamed_file: 0, + start_in_source_file: 0, + len: force_non_zero(2), commit_id: suspect }, BlameEntry { - range_in_blamed_file: 3..4, - range_in_source_file: 3..4, + start_in_blamed_file: 3, + start_in_source_file: 3, + len: force_non_zero(1), commit_id: suspect } ] @@ -1236,8 +1263,9 @@ mod process_changes { fn added_hunk_9() { let suspect = ObjectId::null(gix_hash::Kind::Sha1); let mut lines_blamed: Vec = vec![BlameEntry { - range_in_blamed_file: 30..31, - range_in_source_file: 30..31, + start_in_blamed_file: 30, + start_in_source_file: 30, + len: force_non_zero(1), commit_id: suspect, }]; let hunks_to_blame = vec![ @@ -1257,19 +1285,21 @@ mod process_changes { ]; let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); - lines_blamed.sort_by(|a, b| a.range_in_blamed_file.start.cmp(&b.range_in_blamed_file.start)); + lines_blamed.sort_by(|a, b| a.start_in_blamed_file.cmp(&b.start_in_blamed_file)); assert_eq!( lines_blamed, [ BlameEntry { - range_in_blamed_file: 16..17, - range_in_source_file: 16..17, + start_in_blamed_file: 16, + start_in_source_file: 16, + len: force_non_zero(1), commit_id: suspect }, BlameEntry { - range_in_blamed_file: 30..31, - range_in_source_file: 30..31, + start_in_blamed_file: 30, + start_in_source_file: 30, + len: force_non_zero(1), commit_id: suspect } ] @@ -1307,8 +1337,9 @@ mod process_changes { assert_eq!( lines_blamed, [BlameEntry { - range_in_blamed_file: 0..4, - range_in_source_file: 0..4, + start_in_blamed_file: 0, + start_in_source_file: 0, + len: force_non_zero(4), commit_id: suspect }] ); diff --git a/gix-blame/src/types.rs b/gix-blame/src/types.rs index af1c693e681..f5b6ef5ea08 100644 --- a/gix-blame/src/types.rs +++ b/gix-blame/src/types.rs @@ -1,12 +1,12 @@ +use crate::file::function::tokens_for_diffing; +use gix_hash::ObjectId; +use gix_object::bstr::BString; +use std::num::NonZeroU32; use std::{ collections::BTreeMap, ops::{AddAssign, Range, SubAssign}, }; -use crate::file::function::tokens_for_diffing; -use gix_hash::ObjectId; -use gix_object::bstr::BString; - /// The outcome of [`file()`](crate::file()). #[derive(Debug, Clone)] pub struct Outcome { @@ -48,10 +48,9 @@ impl Outcome { .map(|token| interner.intern(token)) .collect(); self.entries.iter().map(move |e| { - let Range { start, end } = e.range_in_blamed_file.clone(); ( e.clone(), - lines_as_tokens[start as usize..end as usize] + lines_as_tokens[e.range_in_blamed_file()] .iter() .map(|token| BString::new(interner[*token].into())) .collect(), @@ -122,13 +121,16 @@ impl SubAssign for Offset { /// /// Both ranges are of the same size, but may use different [starting points](Range::start). Naturally, /// they have the same content, which is the reason they are in what is returned by [`file()`](crate::file()). -// TODO: see if this can be encoded as `start_in_original_file` and `start_in_blamed_file` and a single `len`. #[derive(Clone, Debug, PartialEq)] pub struct BlameEntry { - /// The section of tokens in the tokenized version of the *Blamed File* (typically lines). - pub range_in_blamed_file: Range, - /// The section of tokens in the tokenized version of the *Source File* (typically lines). - pub range_in_source_file: Range, + /// The index of the token in the *Blamed File* (typically lines) where this entry begins. + pub start_in_blamed_file: u32, + /// The index of the token in the *Source File* (typically lines) where this entry begins. + /// + /// This is possibly offset compared to `start_in_blamed_file`. + pub start_in_source_file: u32, + /// The amount of lines the hunk is spanning. + pub len: NonZeroU32, /// The commit that introduced the section into the *Source File*. pub commit_id: ObjectId, } @@ -147,13 +149,27 @@ impl BlameEntry { debug_assert_eq!(range_in_source_file.len(), range_in_blamed_file.len()); Self { - range_in_blamed_file: range_in_blamed_file.clone(), - range_in_source_file: range_in_source_file.clone(), + start_in_blamed_file: range_in_blamed_file.start, + start_in_source_file: range_in_source_file.start, + len: NonZeroU32::new(range_in_blamed_file.len() as u32).expect("BUG: hunks are never empty"), commit_id, } } } +impl BlameEntry { + /// Return the range of tokens this entry spans in the *Blamed File*. + pub fn range_in_blamed_file(&self) -> Range { + let start = self.start_in_blamed_file as usize; + start..start + self.len.get() as usize + } + /// Return the range of tokens this entry spans in the *Source File*. + pub fn range_in_source_file(&self) -> Range { + let start = self.start_in_source_file as usize; + start..start + self.len.get() as usize + } +} + pub(crate) trait LineRange { fn shift_by(&self, offset: Offset) -> Self; } @@ -164,13 +180,12 @@ impl LineRange for Range { } } -/// TODO: docs - what is it? -// TODO: is `Clone` really needed. -#[derive(Clone, Debug, PartialEq)] +/// Tracks the hunks in the *Blamed File* that are not yet associated with the commit that introduced them. +#[derive(Debug, PartialEq)] pub struct UnblamedHunk { - /// TODO: figure out how this works. + /// The range in the file that is being blamed that this hunk represents. pub range_in_blamed_file: Range, - /// Maps a commit to the range in the *Blamed File* that `range_in_blamed_file` refers to. + /// Maps a commit to the range in a source file (i.e. *Blamed File* at a revision) that is equal to `range_in_blamed_file`. pub suspects: BTreeMap>, } From 3ac8be1557de8a66ff32abe3d1c9ea83198d4a05 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 25 Dec 2024 10:23:19 +0100 Subject: [PATCH 13/16] additional pass of refactoring, focus on the algorithm itself. --- gix-blame/src/file/function.rs | 20 +-- gix-blame/src/file/mod.rs | 252 ++++++++++++++++++--------------- gix-blame/src/file/tests.rs | 78 +++++----- gix-blame/src/types.rs | 4 +- 4 files changed, 199 insertions(+), 155 deletions(-) diff --git a/gix-blame/src/file/function.rs b/gix-blame/src/file/function.rs index 48373fb7561..0cac6874815 100644 --- a/gix-blame/src/file/function.rs +++ b/gix-blame/src/file/function.rs @@ -1,4 +1,4 @@ -use super::{process_changes, Change, Offset, UnblamedHunk}; +use super::{process_changes, Change, UnblamedHunk}; use crate::{BlameEntry, Error, Outcome, Statistics}; use gix_diff::blob::intern::TokenSource; use gix_hash::ObjectId; @@ -83,11 +83,13 @@ where .count() }; - let mut hunks_to_blame = vec![UnblamedHunk::new( - 0..num_lines_in_blamed as u32, - suspect, - Offset::Added(0), - )]; + let mut hunks_to_blame = vec![{ + let range_in_blamed_file = 0..num_lines_in_blamed as u32; + UnblamedHunk { + range_in_blamed_file: range_in_blamed_file.clone(), + suspects: [(suspect, range_in_blamed_file)].into(), + } + }]; let mut out = Vec::new(); let mut diff_state = gix_diff::tree::State::default(); @@ -340,8 +342,10 @@ fn blob_changes( match (!before.is_empty(), !after.is_empty()) { (_, true) => { - self.hunks - .push(Change::Added(after.start..after.end, before.end - before.start)); + self.hunks.push(Change::AddedOrReplaced( + after.start..after.end, + before.end - before.start, + )); } (true, false) => { self.hunks.push(Change::Deleted(after.start, before.end - before.start)); diff --git a/gix-blame/src/file/mod.rs b/gix-blame/src/file/mod.rs index 6b2e125a97e..b2661e1619d 100644 --- a/gix-blame/src/file/mod.rs +++ b/gix-blame/src/file/mod.rs @@ -18,11 +18,25 @@ pub(super) mod function; fn process_change( out: &mut Vec, new_hunks_to_blame: &mut Vec, - offset_in_destination: &mut Offset, + offset: &mut Offset, suspect: ObjectId, hunk: Option, change: Option, ) -> (Option, Option) { + /// Since `range_with_end` is a range that is not inclusive at the end, + /// `range_with_end.end` is not part of `range_with_end`. + /// The first line that is `range_with_end.end - 1`. + fn actual_end_in_range(test: &Range, containing_range: &Range) -> bool { + (test.end - 1) >= containing_range.start && test.end <= containing_range.end + } + + // # General Rules + // 1. If there is no suspect, immediately reschedule `hunk` and redo processing of `change`. + // + // # Detailed Rules + // 1. whenever we do *not* return `hunk`, it must be added to `new_hunks_to_blame`, shifted with `offset` + // 2. return `hunk` if it is not fully covered by changes yet. + // 3. `change` *must* be returned if it is not fully included in `hunk`. match (hunk, change) { (Some(hunk), Some(Change::Unchanged(unchanged))) => { let Some(range_in_suspect) = hunk.suspects.get(&suspect) else { @@ -31,11 +45,8 @@ fn process_change( }; match ( - // Since `unchanged` is a range that is not inclusive at the end, - // `unchanged.end` is not part of `unchanged`. The first line that is - // `unchanged.end - 1`. range_in_suspect.contains(&unchanged.start), - (unchanged.end - 1) >= range_in_suspect.start && unchanged.end <= range_in_suspect.end, + actual_end_in_range(&unchanged, range_in_suspect), ) { (_, true) => { // <------> (hunk) @@ -44,14 +55,16 @@ fn process_change( // <----------> (hunk) // <---> (unchanged) + // skip over unchanged - there will be changes right after. (Some(hunk), None) } (true, false) => { // <--------> (hunk) // <-------> (unchanged) - new_hunks_to_blame.push(hunk.shift_by(suspect, *offset_in_destination)); - + // Nothing to do with `hunk` except shifting it, + // but `unchanged` needs to be checked against the next hunk to catch up. + new_hunks_to_blame.push(hunk.shift_by(suspect, *offset)); (None, Some(Change::Unchanged(unchanged))) } (false, false) => { @@ -69,6 +82,7 @@ fn process_change( // <----> (hunk) // <--> (unchanged) + // Let changes catch up with us. (Some(hunk), None) } else { // <--> (hunk) @@ -77,80 +91,93 @@ fn process_change( // <---> (hunk) // <----------> (unchanged) - new_hunks_to_blame.push(hunk.shift_by(suspect, *offset_in_destination)); - - (None, Some(Change::Unchanged(unchanged.clone()))) + // Nothing to do with `hunk` except shifting it, + // but `unchanged` needs to be checked against the next hunk to catch up. + new_hunks_to_blame.push(hunk.shift_by(suspect, *offset)); + (None, Some(Change::Unchanged(unchanged))) } } } } - (Some(hunk), Some(Change::Added(added, number_of_lines_deleted))) => { - let Some(range_in_suspect) = hunk.suspects.get(&suspect) else { + (Some(hunk), Some(Change::AddedOrReplaced(added, number_of_lines_deleted))) => { + let Some(range_in_suspect) = hunk.suspects.get(&suspect).cloned() else { new_hunks_to_blame.push(hunk); - - return (None, Some(Change::Added(added, number_of_lines_deleted))); + return (None, Some(Change::AddedOrReplaced(added, number_of_lines_deleted))); }; - let range_in_suspect = range_in_suspect.clone(); - let range_contains_added_start = range_in_suspect.contains(&added.start); - // Since `added` is a range that is not inclusive at the end, `added.end` is - // not part of `added`. The first line that is `added.end - 1`. - let range_contains_added_end = - (added.end - 1) >= range_in_suspect.start && added.end <= range_in_suspect.end; - match (range_contains_added_start, range_contains_added_end) { + let suspect_contains_added_start = range_in_suspect.contains(&added.start); + let suspect_contains_added_end = actual_end_in_range(&added, &range_in_suspect); + match (suspect_contains_added_start, suspect_contains_added_end) { (true, true) => { + // A perfect match of lines to take out of the unblamed portion. // <----------> (hunk) // <---> (added) // <---> (blamed) // <--> <-> (new hunk) - let new_hunk = match hunk.split_at(suspect, added.start) { - Either::Left(hunk) => hunk, + // Split hunk at the start of added. + let hunk_starting_at_added = match hunk.split_at(suspect, added.start) { + Either::Left(hunk) => { + // `added` starts with `hunk`, nothing to split. + hunk + } Either::Right((before, after)) => { - new_hunks_to_blame.push(before.shift_by(suspect, *offset_in_destination)); - + // requeue the left side `before` after offsetting it… + new_hunks_to_blame.push(before.shift_by(suspect, *offset)); + // …and treat `after` as `new_hunk`, which contains the `added` range. after } }; - *offset_in_destination += added.end - added.start; - *offset_in_destination -= number_of_lines_deleted; + *offset += added.end - added.start; + *offset -= number_of_lines_deleted; + // The overlapping `added` section was successfully located. out.push(BlameEntry::with_offset( added.clone(), suspect, - new_hunk.offset_for(suspect), + hunk_starting_at_added.offset_for(suspect), )); - match new_hunk.split_at(suspect, added.end) { - Either::Left(_) => (None, None), - Either::Right((_, after)) => (Some(after), None), + // Re-split at the end of `added` to continue with what's after. + match hunk_starting_at_added.split_at(suspect, added.end) { + Either::Left(_) => { + // Nothing to split, so we are done with this hunk. + (None, None) + } + Either::Right((_, after)) => { + // Keep processing the unblamed range after `added` + (Some(after), None) + } } } (true, false) => { + // Added overlaps towards the end of `hunk`. // <--------> (hunk) // <-------> (added) // <----> (blamed) // <--> (new hunk) - let new_hunk = match hunk.split_at(suspect, added.start) { + let hunk_starting_at_added = match hunk.split_at(suspect, added.start) { Either::Left(hunk) => hunk, Either::Right((before, after)) => { - new_hunks_to_blame.push(before.shift_by(suspect, *offset_in_destination)); - + // Keep looking for the left side of the unblamed portion. + new_hunks_to_blame.push(before.shift_by(suspect, *offset)); after } }; + // We can 'blame' the overlapping area of `added` and `hunk`. out.push(BlameEntry::with_offset( added.start..range_in_suspect.end, suspect, - new_hunk.offset_for(suspect), + hunk_starting_at_added.offset_for(suspect), )); - - (None, Some(Change::Added(added, number_of_lines_deleted))) + // Keep processing `added`, it's portion past `hunk` may still contribute. + (None, Some(Change::AddedOrReplaced(added, number_of_lines_deleted))) } (false, true) => { + // Added reaches into the hunk, so we blame only the overlapping portion of it. // <-------> (hunk) // <------> (added) // <---> (blamed) @@ -162,8 +189,8 @@ fn process_change( hunk.offset_for(suspect), )); - *offset_in_destination += added.end - added.start; - *offset_in_destination -= number_of_lines_deleted; + *offset += added.end - added.start; + *offset -= number_of_lines_deleted; match hunk.split_at(suspect, added.end) { Either::Left(_) => (None, None), @@ -185,29 +212,42 @@ fn process_change( // <----> (hunk) // <--> (added) - *offset_in_destination += added.end - added.start; - *offset_in_destination -= number_of_lines_deleted; + *offset += added.end - added.start; + *offset -= number_of_lines_deleted; + // Let changes catchup with `hunk` after letting `added` contribute to the offset. (Some(hunk), None) } else if range_in_suspect.end <= added.start { // <--> (hunk) // <----> (added) - new_hunks_to_blame.push(hunk.shift_by(suspect, *offset_in_destination)); + // Retry `hunk` once there is overlapping changes to process. + new_hunks_to_blame.push(hunk.shift_by(suspect, *offset)); - (None, Some(Change::Added(added.clone(), number_of_lines_deleted))) + // Let hunks catchup with this change. + ( + None, + Some(Change::AddedOrReplaced(added.clone(), number_of_lines_deleted)), + ) } else { + // Discard the left side of `added`, keep track of `blamed`, and continue with the + // right side of added that is going past `hunk`. // <---> (hunk) // <----------> (added) // <---> (blamed) + // Successfully blame the whole range. out.push(BlameEntry::with_offset( range_in_suspect.clone(), suspect, hunk.offset_for(suspect), )); - (None, Some(Change::Added(added.clone(), number_of_lines_deleted))) + // And keep processing `added` with future `hunks` that might be affected by it. + ( + None, + Some(Change::AddedOrReplaced(added.clone(), number_of_lines_deleted)), + ) } } } @@ -222,31 +262,33 @@ fn process_change( // <---> (hunk) // | (line_number_in_destination) - *offset_in_destination -= number_of_lines_deleted; - + // Track the shift to `hunk` as it affects us, and keep catching up with changes. + *offset -= number_of_lines_deleted; (Some(hunk), None) } else if line_number_in_destination < range_in_suspect.end { // <-----> (hunk) // | (line_number_in_destination) let new_hunk = match hunk.split_at(suspect, line_number_in_destination) { - Either::Left(hunk) => hunk, + Either::Left(hunk) => { + // Nothing to split as `line_number_in_destination` is directly at start of `hunk` + hunk + } Either::Right((before, after)) => { - new_hunks_to_blame.push(before.shift_by(suspect, *offset_in_destination)); - + // `before` isn't affected by deletion, so keep it for later. + new_hunks_to_blame.push(before.shift_by(suspect, *offset)); + // after will be affected by offset, and we will see if there are more changes affecting it. after } }; - - *offset_in_destination -= number_of_lines_deleted; - + *offset -= number_of_lines_deleted; (Some(new_hunk), None) } else { // <---> (hunk) // | (line_number_in_destination) - new_hunks_to_blame.push(hunk.shift_by(suspect, *offset_in_destination)); - + // Catchup with changes. + new_hunks_to_blame.push(hunk.shift_by(suspect, *offset)); ( None, Some(Change::Deleted(line_number_in_destination, number_of_lines_deleted)), @@ -254,23 +296,29 @@ fn process_change( } } (Some(hunk), None) => { - new_hunks_to_blame.push(hunk.shift_by(suspect, *offset_in_destination)); - + // nothing to do - changes are exhausted, re-evaluate `hunk`. + new_hunks_to_blame.push(hunk.shift_by(suspect, *offset)); (None, None) } - (None, Some(Change::Unchanged(_))) => (None, None), - (None, Some(Change::Added(added, number_of_lines_deleted))) => { - *offset_in_destination += added.end - added.start; - *offset_in_destination -= number_of_lines_deleted; - + (None, Some(Change::Unchanged(_))) => { + // Nothing changed past the blamed range - do nothing. + (None, None) + } + (None, Some(Change::AddedOrReplaced(added, number_of_lines_deleted))) => { + // Keep track of the shift to apply to hunks in the future. + *offset += added.len() as u32; + *offset -= number_of_lines_deleted; (None, None) } (None, Some(Change::Deleted(_, number_of_lines_deleted))) => { - *offset_in_destination -= number_of_lines_deleted; - + // Keep track of the shift to apply to hunks in the future. + *offset -= number_of_lines_deleted; + (None, None) + } + (None, None) => { + // Noop, caller shouldn't do that, but not our problem. (None, None) } - (None, None) => (None, None), } } @@ -312,23 +360,8 @@ fn process_changes( } impl UnblamedHunk { - fn new(range_in_blamed_file: Range, suspect: ObjectId, offset: Offset) -> Self { - assert!( - range_in_blamed_file.end > range_in_blamed_file.start, - "{range_in_blamed_file:?}" - ); - - let range_in_destination = range_in_blamed_file.shift_by(offset); - - Self { - range_in_blamed_file, - suspects: [(suspect, range_in_destination)].into(), - } - } - fn shift_by(mut self, suspect: ObjectId, offset: Offset) -> Self { self.suspects.entry(suspect).and_modify(|e| *e = e.shift_by(offset)); - self } @@ -336,39 +369,34 @@ impl UnblamedHunk { match self.suspects.get(&suspect) { None => Either::Left(self), Some(range_in_suspect) => { - if line_number_in_destination > range_in_suspect.start - && line_number_in_destination < range_in_suspect.end - { - let split_at_from_start = line_number_in_destination - range_in_suspect.start; - - if split_at_from_start > 0 { - let new_suspects_before = self - .suspects - .iter() - .map(|(suspect, range)| (*suspect, range.start..(range.start + split_at_from_start))) - .collect(); - - let new_suspects_after = self - .suspects - .iter() - .map(|(suspect, range)| (*suspect, (range.start + split_at_from_start)..range.end)) - .collect(); - - let new_hunk_before = Self { - range_in_blamed_file: self.range_in_blamed_file.start - ..(self.range_in_blamed_file.start + split_at_from_start), - suspects: new_suspects_before, - }; - let new_hunk_after = Self { - range_in_blamed_file: (self.range_in_blamed_file.start + split_at_from_start) - ..(self.range_in_blamed_file.end), - suspects: new_suspects_after, - }; - - Either::Right((new_hunk_before, new_hunk_after)) - } else { - Either::Left(self) - } + if !range_in_suspect.contains(&line_number_in_destination) { + return Either::Left(self); + } + + let split_at_from_start = line_number_in_destination - range_in_suspect.start; + if split_at_from_start > 0 { + let new_suspects_before = self + .suspects + .iter() + .map(|(suspect, range)| (*suspect, range.start..(range.start + split_at_from_start))); + + let new_suspects_after = self + .suspects + .iter() + .map(|(suspect, range)| (*suspect, (range.start + split_at_from_start)..range.end)); + + let new_hunk_before = Self { + range_in_blamed_file: self.range_in_blamed_file.start + ..(self.range_in_blamed_file.start + split_at_from_start), + suspects: new_suspects_before.collect(), + }; + let new_hunk_after = Self { + range_in_blamed_file: (self.range_in_blamed_file.start + split_at_from_start) + ..(self.range_in_blamed_file.end), + suspects: new_suspects_after.collect(), + }; + + Either::Right((new_hunk_before, new_hunk_after)) } else { Either::Left(self) } diff --git a/gix-blame/src/file/tests.rs b/gix-blame/src/file/tests.rs index 4d7f28af415..c6ed47b29c3 100644 --- a/gix-blame/src/file/tests.rs +++ b/gix-blame/src/file/tests.rs @@ -55,7 +55,7 @@ mod process_change { &mut offset_in_destination, suspect, Some(new_unblamed_hunk(0..5, suspect, Offset::Added(0))), - Some(Change::Added(0..3, 0)), + Some(Change::AddedOrReplaced(0..3, 0)), ); assert_eq!( @@ -92,7 +92,7 @@ mod process_change { &mut offset_in_destination, suspect, Some(new_unblamed_hunk(0..5, suspect, Offset::Added(0))), - Some(Change::Added(2..3, 0)), + Some(Change::AddedOrReplaced(2..3, 0)), ); assert_eq!( @@ -135,7 +135,7 @@ mod process_change { &mut offset_in_destination, suspect, Some(new_unblamed_hunk(10..15, suspect, Offset::Added(0))), - Some(Change::Added(12..13, 0)), + Some(Change::AddedOrReplaced(12..13, 0)), ); assert_eq!( @@ -179,7 +179,7 @@ mod process_change { suspect, // range_in_destination: 7..12 Some(new_unblamed_hunk(12..17, suspect, Offset::Added(5))), - Some(Change::Added(9..10, 0)), + Some(Change::AddedOrReplaced(9..10, 0)), ); assert_eq!( @@ -222,7 +222,7 @@ mod process_change { &mut offset_in_destination, suspect, Some(new_unblamed_hunk(0..5, suspect, Offset::Added(0))), - Some(Change::Added(0..3, 1)), + Some(Change::AddedOrReplaced(0..3, 1)), ); assert_eq!( @@ -260,7 +260,7 @@ mod process_change { suspect, // range_in_destination: 0..4 Some(new_unblamed_hunk(1..5, suspect, Offset::Added(1))), - Some(Change::Added(0..3, 1)), + Some(Change::AddedOrReplaced(0..3, 1)), ); assert_eq!( @@ -298,7 +298,7 @@ mod process_change { suspect, // range_in_destination: 2..6 Some(new_unblamed_hunk(3..7, suspect, Offset::Added(1))), - Some(Change::Added(3..5, 1)), + Some(Change::AddedOrReplaced(3..5, 1)), ); assert_eq!( @@ -342,11 +342,11 @@ mod process_change { suspect, // range_in_destination: 25..26 Some(new_unblamed_hunk(23..24, suspect, Offset::Deleted(2))), - Some(Change::Added(25..27, 1)), + Some(Change::AddedOrReplaced(25..27, 1)), ); assert_eq!(hunk, None); - assert_eq!(change, Some(Change::Added(25..27, 1))); + assert_eq!(change, Some(Change::AddedOrReplaced(25..27, 1))); assert_eq!( lines_blamed, [BlameEntry { @@ -374,7 +374,7 @@ mod process_change { suspect, // range_in_destination: 21..22 Some(new_unblamed_hunk(23..24, suspect, Offset::Added(2))), - Some(Change::Added(18..22, 3)), + Some(Change::AddedOrReplaced(18..22, 3)), ); assert_eq!(hunk, None); @@ -406,11 +406,11 @@ mod process_change { suspect, // range_in_destination: 70..108 Some(new_unblamed_hunk(71..109, suspect, Offset::Added(1))), - Some(Change::Added(106..109, 0)), + Some(Change::AddedOrReplaced(106..109, 0)), ); assert_eq!(hunk, None); - assert_eq!(change, Some(Change::Added(106..109, 0))); + assert_eq!(change, Some(Change::AddedOrReplaced(106..109, 0))); assert_eq!( lines_blamed, [BlameEntry { @@ -444,11 +444,11 @@ mod process_change { suspect, // range_in_destination: 137..144 Some(new_unblamed_hunk(149..156, suspect, Offset::Added(12))), - Some(Change::Added(143..146, 0)), + Some(Change::AddedOrReplaced(143..146, 0)), ); assert_eq!(hunk, None); - assert_eq!(change, Some(Change::Added(143..146, 0))); + assert_eq!(change, Some(Change::AddedOrReplaced(143..146, 0))); assert_eq!( lines_blamed, [BlameEntry { @@ -482,11 +482,11 @@ mod process_change { suspect, // range_in_destination: 2..5 Some(new_unblamed_hunk(3..6, suspect, Offset::Added(1))), - Some(Change::Added(7..10, 1)), + Some(Change::AddedOrReplaced(7..10, 1)), ); assert_eq!(hunk, None); - assert_eq!(change, Some(Change::Added(7..10, 1))); + assert_eq!(change, Some(Change::AddedOrReplaced(7..10, 1))); assert_eq!(lines_blamed, []); assert_eq!( new_hunks_to_blame, @@ -512,7 +512,7 @@ mod process_change { suspect, // range_in_destination: 6..8 Some(new_unblamed_hunk(9..11, suspect, Offset::Added(3))), - Some(Change::Added(2..5, 0)), + Some(Change::AddedOrReplaced(2..5, 0)), ); assert_eq!( @@ -542,7 +542,7 @@ mod process_change { suspect, // range_in_destination: 5..15 Some(new_unblamed_hunk(4..15, suspect, Offset::Deleted(1))), - Some(Change::Added(4..5, 1)), + Some(Change::AddedOrReplaced(4..5, 1)), ); assert_eq!( @@ -662,11 +662,11 @@ mod process_change { suspect, // range_in_destination: 5..8 Some(new_unblamed_hunk(2..5, suspect, Offset::Deleted(3))), - Some(Change::Added(3..12, 2)), + Some(Change::AddedOrReplaced(3..12, 2)), ); assert_eq!(hunk, None); - assert_eq!(change, Some(Change::Added(3..12, 2))); + assert_eq!(change, Some(Change::AddedOrReplaced(3..12, 2))); assert_eq!( lines_blamed, [BlameEntry { @@ -939,7 +939,7 @@ mod process_change { &mut offset_in_destination, suspect, None, - Some(Change::Added(22..25, 1)), + Some(Change::AddedOrReplaced(22..25, 1)), ); assert_eq!(hunk, None); @@ -1016,7 +1016,7 @@ mod process_changes { let mut lines_blamed = Vec::new(); let suspect = ObjectId::null(gix_hash::Kind::Sha1); let hunks_to_blame = vec![new_unblamed_hunk(0..4, suspect, Offset::Added(0))]; - let changes = vec![Change::Added(0..4, 0)]; + let changes = vec![Change::AddedOrReplaced(0..4, 0)]; let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); assert_eq!( @@ -1036,7 +1036,7 @@ mod process_changes { let mut lines_blamed = Vec::new(); let suspect = ObjectId::null(gix_hash::Kind::Sha1); let hunks_to_blame = vec![new_unblamed_hunk(0..6, suspect, Offset::Added(0))]; - let changes = vec![Change::Added(0..4, 0), Change::Unchanged(4..6)]; + let changes = vec![Change::AddedOrReplaced(0..4, 0), Change::Unchanged(4..6)]; let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); assert_eq!( @@ -1056,7 +1056,11 @@ mod process_changes { let mut lines_blamed = Vec::new(); let suspect = ObjectId::null(gix_hash::Kind::Sha1); let hunks_to_blame = vec![new_unblamed_hunk(0..6, suspect, Offset::Added(0))]; - let changes = vec![Change::Unchanged(0..2), Change::Added(2..4, 0), Change::Unchanged(4..6)]; + let changes = vec![ + Change::Unchanged(0..2), + Change::AddedOrReplaced(2..4, 0), + Change::Unchanged(4..6), + ]; let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); assert_eq!( @@ -1082,7 +1086,11 @@ mod process_changes { let mut lines_blamed = Vec::new(); let suspect = ObjectId::null(gix_hash::Kind::Sha1); let hunks_to_blame = vec![new_unblamed_hunk(0..6, suspect, Offset::Added(0))]; - let changes = vec![Change::Added(0..1, 0), Change::Added(1..4, 0), Change::Unchanged(4..6)]; + let changes = vec![ + Change::AddedOrReplaced(0..1, 0), + Change::AddedOrReplaced(1..4, 0), + Change::Unchanged(4..6), + ]; let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); assert_eq!( @@ -1110,7 +1118,7 @@ mod process_changes { let mut lines_blamed = Vec::new(); let suspect = ObjectId::null(gix_hash::Kind::Sha1); let hunks_to_blame = vec![new_unblamed_hunk(0..6, suspect, Offset::Added(0))]; - let changes = vec![Change::Added(0..1, 0)]; + let changes = vec![Change::AddedOrReplaced(0..1, 0)]; let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); assert_eq!( @@ -1136,7 +1144,7 @@ mod process_changes { commit_id: suspect, }]; let hunks_to_blame = vec![new_unblamed_hunk(2..6, suspect_2, Offset::Added(2))]; - let changes = vec![Change::Added(0..1, 0)]; + let changes = vec![Change::AddedOrReplaced(0..1, 0)]; let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect_2); assert_eq!( @@ -1167,7 +1175,7 @@ mod process_changes { let mut lines_blamed = Vec::new(); let suspect = ObjectId::null(gix_hash::Kind::Sha1); let hunks_to_blame = vec![new_unblamed_hunk(0..6, suspect, Offset::Added(0))]; - let changes = vec![Change::Added(0..4, 3), Change::Unchanged(4..6)]; + let changes = vec![Change::AddedOrReplaced(0..4, 3), Change::Unchanged(4..6)]; let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); assert_eq!( @@ -1187,7 +1195,7 @@ mod process_changes { let mut lines_blamed = Vec::new(); let suspect = ObjectId::null(gix_hash::Kind::Sha1); let hunks_to_blame = vec![new_unblamed_hunk(4..6, suspect, Offset::Added(1))]; - let changes = vec![Change::Added(0..3, 0), Change::Unchanged(3..5)]; + let changes = vec![Change::AddedOrReplaced(0..3, 0), Change::Unchanged(3..5)]; let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); assert_eq!(lines_blamed, []); @@ -1205,7 +1213,7 @@ mod process_changes { commit_id: suspect, }]; let hunks_to_blame = vec![new_unblamed_hunk(1..3, suspect_2, Offset::Added(1))]; - let changes = vec![Change::Added(0..1, 2)]; + let changes = vec![Change::AddedOrReplaced(0..1, 2)]; let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect_2); assert_eq!( @@ -1236,7 +1244,11 @@ mod process_changes { let suspect = ObjectId::null(gix_hash::Kind::Sha1); let mut lines_blamed = Vec::new(); let hunks_to_blame = vec![new_unblamed_hunk(0..4, suspect, Offset::Added(0))]; - let changes = vec![Change::Added(0..2, 0), Change::Unchanged(2..3), Change::Added(3..4, 0)]; + let changes = vec![ + Change::AddedOrReplaced(0..2, 0), + Change::Unchanged(2..3), + Change::AddedOrReplaced(3..4, 0), + ]; let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); assert_eq!( @@ -1280,7 +1292,7 @@ mod process_changes { ]; let changes = vec![ Change::Unchanged(0..16), - Change::Added(16..17, 0), + Change::AddedOrReplaced(16..17, 0), Change::Unchanged(17..37), ]; let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); @@ -1331,7 +1343,7 @@ mod process_changes { new_unblamed_hunk(0..4, suspect, Offset::Added(0)), new_unblamed_hunk(4..7, suspect, Offset::Added(0)), ]; - let changes = vec![Change::Deleted(0, 3), Change::Added(0..4, 0)]; + let changes = vec![Change::Deleted(0, 3), Change::AddedOrReplaced(0..4, 0)]; let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); assert_eq!( diff --git a/gix-blame/src/types.rs b/gix-blame/src/types.rs index f5b6ef5ea08..611197eaa0b 100644 --- a/gix-blame/src/types.rs +++ b/gix-blame/src/types.rs @@ -196,12 +196,12 @@ pub(crate) enum Either { } /// A single change between two blobs, or an unchanged region. -#[derive(Clone, Debug, PartialEq)] +#[derive(Debug, PartialEq)] pub enum Change { /// A range of tokens that wasn't changed. Unchanged(Range), /// `(added_line_range, num_deleted_in_before)` - Added(Range, u32), + AddedOrReplaced(Range, u32), /// `(line_to_start_deletion_at, num_deleted_in_before)` Deleted(u32, u32), } From 667e6262bcba1d95e32795faa79dc6b354da9a01 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 25 Dec 2024 14:58:51 +0100 Subject: [PATCH 14/16] Don't panic when suspect isn't known when converting unblamed to blame-entry This can apparently happen, and now we handle this case and keep looking for the remaining blame entries. Also, exit early when no work is left to be done. --- gix-blame/src/file/function.rs | 47 ++++++++++++++++++++++++---------- gix-blame/src/file/mod.rs | 23 ++++++++--------- gix-blame/src/types.rs | 2 +- 3 files changed, 45 insertions(+), 27 deletions(-) diff --git a/gix-blame/src/file/function.rs b/gix-blame/src/file/function.rs index 0cac6874815..59a63786687 100644 --- a/gix-blame/src/file/function.rs +++ b/gix-blame/src/file/function.rs @@ -83,6 +83,11 @@ where .count() }; + // Binary or otherwise empty? + if num_lines_in_blamed == 0 { + return Ok(Outcome::default()); + } + let mut hunks_to_blame = vec![{ let range_in_blamed_file = 0..num_lines_in_blamed as u32; UnblamedHunk { @@ -94,6 +99,9 @@ where let mut out = Vec::new(); let mut diff_state = gix_diff::tree::State::default(); 'outer: while let Some(item) = traverse.next() { + if hunks_to_blame.is_empty() { + break; + } let commit = item.map_err(|err| Error::Traverse(err.into()))?; let suspect = commit.id; stats.commits_traversed += 1; @@ -106,12 +114,13 @@ where // remaining lines to it, even though we don’t explicitly check whether that is true // here. We could perhaps use diff-tree-to-tree to compare `suspect` // against an empty tree to validate this assumption. - unblamed_to_out(&mut hunks_to_blame, &mut out, suspect); - break; - } else { - // There is more, keep looking. - continue; + if unblamed_to_out_is_done(&mut hunks_to_blame, &mut out, suspect) { + break 'outer; + } } + + // There is more, keep looking. + continue; } let Some(entry) = find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2, &mut stats)? else { @@ -162,8 +171,9 @@ where // implies that the file comes from a different parent, compared to which // it was modified, not added. } else { - unblamed_to_out(&mut hunks_to_blame, &mut out, suspect); - break; + if unblamed_to_out_is_done(&mut hunks_to_blame, &mut out, suspect) { + break 'outer; + } } } gix_diff::tree::recorder::Change::Deletion { .. } => { @@ -209,13 +219,22 @@ fn pass_blame_from_to(from: ObjectId, to: ObjectId, hunks_to_blame: &mut Vec, out: &mut Vec, suspect: ObjectId) { - out.extend( - hunks_to_blame - .drain(..) - .map(|hunk| BlameEntry::from_unblamed_hunk(hunk, suspect)), - ); +/// +/// Return `true` if we are done because `hunks_to_blame` is empty. +fn unblamed_to_out_is_done( + hunks_to_blame: &mut Vec, + out: &mut Vec, + suspect: ObjectId, +) -> bool { + let mut without_suspect = Vec::new(); + out.extend(hunks_to_blame.drain(..).filter_map(|hunk| { + BlameEntry::from_unblamed_hunk(&hunk, suspect).or_else(|| { + without_suspect.push(hunk); + None + }) + })); + *hunks_to_blame = without_suspect; + hunks_to_blame.is_empty() } /// This function merges adjacent blame entries. It merges entries that are adjacent both in the diff --git a/gix-blame/src/file/mod.rs b/gix-blame/src/file/mod.rs index b2661e1619d..1afa77723e0 100644 --- a/gix-blame/src/file/mod.rs +++ b/gix-blame/src/file/mod.rs @@ -253,10 +253,13 @@ fn process_change( } } (Some(hunk), Some(Change::Deleted(line_number_in_destination, number_of_lines_deleted))) => { - let range_in_suspect = hunk - .suspects - .get(&suspect) - .expect("Internal and we know suspect is present"); + let Some(range_in_suspect) = hunk.suspects.get(&suspect) else { + new_hunks_to_blame.push(hunk); + return ( + None, + Some(Change::Deleted(line_number_in_destination, number_of_lines_deleted)), + ); + }; if line_number_in_destination < range_in_suspect.start { // <---> (hunk) @@ -431,7 +434,6 @@ impl UnblamedHunk { } fn remove_blame(&mut self, suspect: ObjectId) { - // TODO: figure out why it can try to remove suspects that don't exist. self.suspects.remove(&suspect); } } @@ -468,18 +470,15 @@ impl BlameEntry { } /// Create an offset from a portion of the *Blamed File*. - fn from_unblamed_hunk(mut unblamed_hunk: UnblamedHunk, commit_id: ObjectId) -> Self { - let range_in_source_file = unblamed_hunk - .suspects - .remove(&commit_id) - .expect("Private and only called when we now `commit_id` is in the suspect list"); + fn from_unblamed_hunk(unblamed_hunk: &UnblamedHunk, commit_id: ObjectId) -> Option { + let range_in_source_file = unblamed_hunk.suspects.get(&commit_id)?; - Self { + Some(Self { start_in_blamed_file: unblamed_hunk.range_in_blamed_file.start, start_in_source_file: range_in_source_file.start, len: force_non_zero(range_in_source_file.len() as u32), commit_id, - } + }) } } diff --git a/gix-blame/src/types.rs b/gix-blame/src/types.rs index 611197eaa0b..e0c8843b4cd 100644 --- a/gix-blame/src/types.rs +++ b/gix-blame/src/types.rs @@ -8,7 +8,7 @@ use std::{ }; /// The outcome of [`file()`](crate::file()). -#[derive(Debug, Clone)] +#[derive(Debug, Default, Clone)] pub struct Outcome { /// One entry in sequential order, to associate a hunk in the blamed file with the source commit (and its lines) /// that introduced it. From 8196a433ed08de6b09b5cb187f8ce53fc2ab09ca Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 25 Dec 2024 17:06:43 +0100 Subject: [PATCH 15/16] For linear histories, avoid redoing path lookup work Also, set a fixed and higher pack-cache to double typical pack-decode performance in mid-sized repositories. Additionally, normalize the input path. --- gitoxide-core/src/repository/blame.rs | 33 +++++++++++++++++---- gix-blame/src/file/function.rs | 41 +++++++++++++++++---------- 2 files changed, 54 insertions(+), 20 deletions(-) diff --git a/gitoxide-core/src/repository/blame.rs b/gitoxide-core/src/repository/blame.rs index c130770e9dc..fea525035fa 100644 --- a/gitoxide-core/src/repository/blame.rs +++ b/gitoxide-core/src/repository/blame.rs @@ -1,4 +1,5 @@ -use gix::bstr::BStr; +use gix::bstr::ByteSlice; +use gix::config::tree; use std::ffi::OsStr; pub fn blame_file( @@ -7,7 +8,31 @@ pub fn blame_file( out: impl std::io::Write, err: Option<&mut dyn std::io::Write>, ) -> anyhow::Result<()> { - repo.object_cache_size_if_unset(repo.compute_object_cache_size_for_tree_diffs(&**repo.index_or_empty()?)); + { + let mut config = repo.config_snapshot_mut(); + if config.string(&tree::Core::DELTA_BASE_CACHE_LIMIT).is_none() { + config.set_value(&tree::Core::DELTA_BASE_CACHE_LIMIT, "100m")?; + } + } + let index = repo.index_or_empty()?; + repo.object_cache_size_if_unset(repo.compute_object_cache_size_for_tree_diffs(&index)); + + let file = gix::path::os_str_into_bstr(file)?; + let specs = repo.pathspec( + false, + [file], + true, + &index, + gix::worktree::stack::state::attributes::Source::WorktreeThenIdMapping.adjust_for_bare(repo.is_bare()), + )?; + // TODO: there should be a way to normalize paths without going through patterns, at least in this case maybe? + // `Search` actually sorts patterns by excluding or not, all that can lead to strange results. + let file = specs + .search() + .patterns() + .map(|p| p.path().to_owned()) + .next() + .expect("exactly one pattern"); let suspect = repo.head()?.peel_to_commit_in_place()?; let traverse = @@ -15,9 +40,7 @@ pub fn blame_file( .with_commit_graph(repo.commit_graph_if_enabled()?) .build()?; let mut resource_cache = repo.diff_resource_cache_for_tree_diff()?; - let file_path: &BStr = gix::path::os_str_into_bstr(file)?; - - let outcome = gix::blame::file(&repo.objects, traverse, &mut resource_cache, file_path)?; + let outcome = gix::blame::file(&repo.objects, traverse, &mut resource_cache, file.as_bstr())?; let statistics = outcome.statistics; write_blame_entries(out, outcome)?; diff --git a/gix-blame/src/file/function.rs b/gix-blame/src/file/function.rs index 59a63786687..16384638e36 100644 --- a/gix-blame/src/file/function.rs +++ b/gix-blame/src/file/function.rs @@ -69,12 +69,12 @@ where let mut stats = Statistics::default(); let (mut buf, mut buf2, mut buf3) = (Vec::new(), Vec::new(), Vec::new()); - let blamed_file_entry = find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2, &mut stats)? + let blamed_file_entry_id = find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2, &mut stats)? .ok_or_else(|| Error::FileMissing { - file_path: file_path.to_owned(), - commit_id: suspect, - })?; - let blamed_file_blob = odb.find_blob(&blamed_file_entry.oid, &mut buf)?.data.to_vec(); + file_path: file_path.to_owned(), + commit_id: suspect, + })?; + let blamed_file_blob = odb.find_blob(&blamed_file_entry_id, &mut buf)?.data.to_vec(); let num_lines_in_blamed = { let mut interner = gix_diff::blob::intern::Interner::new(blamed_file_blob.len() / 100); tokens_for_diffing(&blamed_file_blob) @@ -98,6 +98,7 @@ where let mut out = Vec::new(); let mut diff_state = gix_diff::tree::State::default(); + let mut previous_entry: Option<(ObjectId, ObjectId)> = None; 'outer: while let Some(item) = traverse.next() { if hunks_to_blame.is_empty() { break; @@ -123,15 +124,27 @@ where continue; } - let Some(entry) = find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2, &mut stats)? else { + let mut entry = previous_entry + .take() + .filter(|(id, _)| *id == suspect) + .map(|(_, entry)| entry); + if entry.is_none() { + entry = find_path_entry_in_commit(&odb, &suspect, file_path, &mut buf, &mut buf2, &mut stats)?; + } + + let Some(entry_id) = entry else { continue; }; - for parent_id in &parent_ids { - if let Some(parent_entry) = + for (pid, parent_id) in parent_ids.iter().enumerate() { + if let Some(parent_entry_id) = find_path_entry_in_commit(&odb, parent_id, file_path, &mut buf, &mut buf2, &mut stats)? { - if entry.oid == parent_entry.oid { + let no_change_in_entry = entry_id == parent_entry_id; + if pid == 0 { + previous_entry = Some((*parent_id, parent_entry_id)); + } + if no_change_in_entry { pass_blame_from_to(suspect, *parent_id, &mut hunks_to_blame); continue 'outer; } @@ -170,10 +183,8 @@ where // Do nothing under the assumption that this always (or almost always) // implies that the file comes from a different parent, compared to which // it was modified, not added. - } else { - if unblamed_to_out_is_done(&mut hunks_to_blame, &mut out, suspect) { - break 'outer; - } + } else if unblamed_to_out_is_done(&mut hunks_to_blame, &mut out, suspect) { + break 'outer; } } gix_diff::tree::recorder::Change::Deletion { .. } => { @@ -418,7 +429,7 @@ fn find_path_entry_in_commit( buf: &mut Vec, buf2: &mut Vec, stats: &mut Statistics, -) -> Result, Error> { +) -> Result, Error> { let commit_id = odb.find_commit(commit, buf)?.tree(); stats.commits_to_tree += 1; let tree_iter = odb.find_tree_iter(&commit_id, buf)?; @@ -430,7 +441,7 @@ fn find_path_entry_in_commit( file_path.split(|b| *b == b'/').inspect(|_| stats.trees_decoded += 1), )?; stats.trees_decoded -= 1; - Ok(res) + Ok(res.map(|e| e.oid)) } /// Return an iterator over tokens for use in diffing. These usually lines, but iit's important to unify them From e951e7dcdc1ea6b14a31df2f4abac3cf1ff0a86d Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 25 Dec 2024 21:32:32 +0100 Subject: [PATCH 16/16] update crate-status with performance opportunities --- crate-status.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crate-status.md b/crate-status.md index 64cae232207..3274407dd48 100644 --- a/crate-status.md +++ b/crate-status.md @@ -373,8 +373,10 @@ Check out the [performance discussion][gix-diff-performance] as well. - [ ] commits to ignore - [ ] pass all blame-cornercases (from Git) * **Performance-Improvements** - - [ ] use commit-graph bloom filter for performance - - [ ] traverse input-commits in correct order without `compute_indegrees_to_depth()` + * Without the following the performance isn't competitive with Git. + 1. Implement custom graph walk which won't run down parents that don't have the path in question. + 2. Implement access of trees from commit-graph and fill that information into the traversal info by default. + 3. commit-graph with bloom filter, used to quickly check if a commit has a path. * [x] API documentation * [ ] Examples