From c7e04e976989435ba752628522d53ac39348b49b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20R=C3=BC=C3=9Fler?= Date: Sat, 21 Dec 2024 20:08:11 +0100 Subject: [PATCH 1/2] feat: add first 'debug' version of `gix log` It's primarily meant to better understand `gix blame`. --- gitoxide-core/src/repository/log.rs | 170 ++++++++++++++++++++++++++++ gitoxide-core/src/repository/mod.rs | 1 + src/plumbing/main.rs | 9 ++ src/plumbing/options/mod.rs | 13 +++ 4 files changed, 193 insertions(+) create mode 100644 gitoxide-core/src/repository/log.rs diff --git a/gitoxide-core/src/repository/log.rs b/gitoxide-core/src/repository/log.rs new file mode 100644 index 00000000000..c6ea5177091 --- /dev/null +++ b/gitoxide-core/src/repository/log.rs @@ -0,0 +1,170 @@ +use gix::bstr::{BStr, BString, ByteSlice}; +use gix::prelude::FindExt; +use gix::ObjectId; + +pub fn log(mut repo: gix::Repository, out: &mut dyn std::io::Write, path: Option) -> anyhow::Result<()> { + repo.object_cache_size_if_unset(repo.compute_object_cache_size_for_tree_diffs(&**repo.index_or_empty()?)); + + if let Some(path) = path { + log_file(repo, out, path) + } else { + log_all(repo, out) + } +} + +fn log_all(repo: gix::Repository, out: &mut dyn std::io::Write) -> Result<(), anyhow::Error> { + let head = repo.head()?.peel_to_commit_in_place()?; + let topo = gix::traverse::commit::topo::Builder::from_iters(&repo.objects, [head.id], None::>) + .build()?; + + for info in topo { + let info = info?; + + write_info(&repo, &mut *out, &info)?; + } + + Ok(()) +} + +fn log_file(repo: gix::Repository, out: &mut dyn std::io::Write, path: BString) -> anyhow::Result<()> { + let head = repo.head()?.peel_to_commit_in_place()?; + let topo = gix::traverse::commit::topo::Builder::from_iters(&repo.objects, [head.id], None::>) + .build()?; + + 'outer: for info in topo { + let info = info?; + let commit = repo.find_commit(info.id).unwrap(); + + let tree = repo.find_tree(commit.tree_id().unwrap()).unwrap(); + + let entry = tree.lookup_entry_by_path(path.to_path().unwrap()).unwrap(); + + let Some(entry) = entry else { + continue; + }; + + let parent_ids: Vec<_> = commit.parent_ids().collect(); + + if parent_ids.is_empty() { + // We confirmed above that the file is in `commit`'s tree. If `parent_ids` is + // empty, the file was added in `commit`. + + write_info(&repo, out, &info)?; + + break; + } + + let parent_ids_with_changes: Vec<_> = parent_ids + .clone() + .into_iter() + .filter(|parent_id| { + let parent_commit = repo.find_commit(*parent_id).unwrap(); + let parent_tree = repo.find_tree(parent_commit.tree_id().unwrap()).unwrap(); + let parent_entry = parent_tree.lookup_entry_by_path(path.to_path().unwrap()).unwrap(); + + if let Some(parent_entry) = parent_entry { + if entry.oid() == parent_entry.oid() { + // The blobs storing the file in `entry` and `parent_entry` are + // identical which means the file was not changed in `commit`. + + return false; + } + } + + true + }) + .collect(); + + if parent_ids.len() != parent_ids_with_changes.len() { + // At least one parent had an identical version of the file which means it was not + // changed in `commit`. + + continue; + } + + for parent_id in parent_ids_with_changes { + let modifications = + get_modifications_for_file_path(&repo.objects, path.as_ref(), commit.id, parent_id.into()); + + if !modifications.is_empty() { + write_info(&repo, &mut *out, &info)?; + + // We continue because we’ve already determined that this commit is part of the + // file’s history, so there’s no need to compare it to its other parents. + + continue 'outer; + } + } + } + + Ok(()) +} + +fn write_info( + repo: &gix::Repository, + mut out: impl std::io::Write, + info: &gix::traverse::commit::Info, +) -> Result<(), std::io::Error> { + let commit = repo.find_commit(info.id).unwrap(); + + let message = commit.message_raw_sloppy(); + let title = message.lines().next(); + + writeln!( + out, + "{} {}", + info.id.to_hex_with_len(8), + title.map_or_else(|| "".into(), BString::from) + )?; + + Ok(()) +} + +fn get_modifications_for_file_path( + odb: impl gix::objs::Find + gix::objs::FindHeader, + file_path: &BStr, + id: ObjectId, + parent_id: ObjectId, +) -> Vec { + let mut buffer = Vec::new(); + + let parent = odb.find_commit(&parent_id, &mut buffer).unwrap(); + + let mut buffer = Vec::new(); + let parent_tree_iter = odb + .find(&parent.tree(), &mut buffer) + .unwrap() + .try_into_tree_iter() + .unwrap(); + + let mut buffer = Vec::new(); + let commit = odb.find_commit(&id, &mut buffer).unwrap(); + + let mut buffer = Vec::new(); + let tree_iter = odb + .find(&commit.tree(), &mut buffer) + .unwrap() + .try_into_tree_iter() + .unwrap(); + + let mut recorder = gix::diff::tree::Recorder::default(); + gix::diff::tree( + parent_tree_iter, + tree_iter, + gix::diff::tree::State::default(), + &odb, + &mut recorder, + ) + .unwrap(); + + recorder + .records + .iter() + .filter(|change| match change { + gix::diff::tree::recorder::Change::Modification { path, .. } => path == file_path, + gix::diff::tree::recorder::Change::Addition { path, .. } => path == file_path, + _ => false, + }) + .cloned() + .collect() +} diff --git a/gitoxide-core/src/repository/mod.rs b/gitoxide-core/src/repository/mod.rs index 489d5c32e66..c9044f99cd9 100644 --- a/gitoxide-core/src/repository/mod.rs +++ b/gitoxide-core/src/repository/mod.rs @@ -46,6 +46,7 @@ pub mod commitgraph; mod fsck; pub use fsck::function as fsck; pub mod index; +pub mod log; pub mod mailmap; mod merge_base; pub use merge_base::merge_base; diff --git a/src/plumbing/main.rs b/src/plumbing/main.rs index b0a69339c29..2391dd14cd3 100644 --- a/src/plumbing/main.rs +++ b/src/plumbing/main.rs @@ -269,6 +269,15 @@ pub fn main() -> Result<()> { }, ), }, + Subcommands::Log(crate::plumbing::options::log::Platform { pathspec }) => prepare_and_run( + "log", + trace, + verbose, + progress, + progress_keep_open, + None, + move |_progress, out, _err| core::repository::log::log(repository(Mode::Lenient)?, out, pathspec), + ), Subcommands::Worktree(crate::plumbing::options::worktree::Platform { cmd }) => match cmd { crate::plumbing::options::worktree::SubCommands::List => prepare_and_run( "worktree-list", diff --git a/src/plumbing/options/mod.rs b/src/plumbing/options/mod.rs index c1a3ec670d7..b0928c0d426 100644 --- a/src/plumbing/options/mod.rs +++ b/src/plumbing/options/mod.rs @@ -146,6 +146,7 @@ pub enum Subcommands { MergeBase(merge_base::Command), Merge(merge::Platform), Diff(diff::Platform), + Log(log::Platform), Worktree(worktree::Platform), /// Subcommands that need no git repository to run. #[clap(subcommand)] @@ -499,6 +500,18 @@ pub mod diff { } } +pub mod log { + use gix::bstr::BString; + + /// List all commits in a repository, optionally limited to those that change a given path + #[derive(Debug, clap::Parser)] + pub struct Platform { + /// The git path specification to show a log for. + #[clap(value_parser = crate::shared::AsBString)] + pub pathspec: Option, + } +} + pub mod config { use gix::bstr::BString; From 162887e2d3968639548f7d3581cb1f47bfe38341 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 21 Dec 2024 20:10:23 +0100 Subject: [PATCH 2/2] trim `gix log` to be mergeable. For now, let's ignore file-based logs as it would take a moment to make it fast enough. Git can diff with pathspecs, which is probably something that would be needed to be fast here. In any case, more research would be needed to be competitive in performance. Maybe one day this will be re-added even in its current form to help with `gix blame` debugging, but I'd hope that there will be better ways to validate it. --- gitoxide-core/src/repository/log.rs | 128 +--------------------------- 1 file changed, 4 insertions(+), 124 deletions(-) diff --git a/gitoxide-core/src/repository/log.rs b/gitoxide-core/src/repository/log.rs index c6ea5177091..c3eee8ec47d 100644 --- a/gitoxide-core/src/repository/log.rs +++ b/gitoxide-core/src/repository/log.rs @@ -1,6 +1,5 @@ -use gix::bstr::{BStr, BString, ByteSlice}; -use gix::prelude::FindExt; -use gix::ObjectId; +use anyhow::bail; +use gix::bstr::{BString, ByteSlice}; pub fn log(mut repo: gix::Repository, out: &mut dyn std::io::Write, path: Option) -> anyhow::Result<()> { repo.object_cache_size_if_unset(repo.compute_object_cache_size_for_tree_diffs(&**repo.index_or_empty()?)); @@ -26,78 +25,8 @@ fn log_all(repo: gix::Repository, out: &mut dyn std::io::Write) -> Result<(), an Ok(()) } -fn log_file(repo: gix::Repository, out: &mut dyn std::io::Write, path: BString) -> anyhow::Result<()> { - let head = repo.head()?.peel_to_commit_in_place()?; - let topo = gix::traverse::commit::topo::Builder::from_iters(&repo.objects, [head.id], None::>) - .build()?; - - 'outer: for info in topo { - let info = info?; - let commit = repo.find_commit(info.id).unwrap(); - - let tree = repo.find_tree(commit.tree_id().unwrap()).unwrap(); - - let entry = tree.lookup_entry_by_path(path.to_path().unwrap()).unwrap(); - - let Some(entry) = entry else { - continue; - }; - - let parent_ids: Vec<_> = commit.parent_ids().collect(); - - if parent_ids.is_empty() { - // We confirmed above that the file is in `commit`'s tree. If `parent_ids` is - // empty, the file was added in `commit`. - - write_info(&repo, out, &info)?; - - break; - } - - let parent_ids_with_changes: Vec<_> = parent_ids - .clone() - .into_iter() - .filter(|parent_id| { - let parent_commit = repo.find_commit(*parent_id).unwrap(); - let parent_tree = repo.find_tree(parent_commit.tree_id().unwrap()).unwrap(); - let parent_entry = parent_tree.lookup_entry_by_path(path.to_path().unwrap()).unwrap(); - - if let Some(parent_entry) = parent_entry { - if entry.oid() == parent_entry.oid() { - // The blobs storing the file in `entry` and `parent_entry` are - // identical which means the file was not changed in `commit`. - - return false; - } - } - - true - }) - .collect(); - - if parent_ids.len() != parent_ids_with_changes.len() { - // At least one parent had an identical version of the file which means it was not - // changed in `commit`. - - continue; - } - - for parent_id in parent_ids_with_changes { - let modifications = - get_modifications_for_file_path(&repo.objects, path.as_ref(), commit.id, parent_id.into()); - - if !modifications.is_empty() { - write_info(&repo, &mut *out, &info)?; - - // We continue because we’ve already determined that this commit is part of the - // file’s history, so there’s no need to compare it to its other parents. - - continue 'outer; - } - } - } - - Ok(()) +fn log_file(_repo: gix::Repository, _out: &mut dyn std::io::Write, _path: BString) -> anyhow::Result<()> { + bail!("File-based lookup isn't yet implemented in a way that is competitively fast"); } fn write_info( @@ -119,52 +48,3 @@ fn write_info( Ok(()) } - -fn get_modifications_for_file_path( - odb: impl gix::objs::Find + gix::objs::FindHeader, - file_path: &BStr, - id: ObjectId, - parent_id: ObjectId, -) -> Vec { - let mut buffer = Vec::new(); - - let parent = odb.find_commit(&parent_id, &mut buffer).unwrap(); - - let mut buffer = Vec::new(); - let parent_tree_iter = odb - .find(&parent.tree(), &mut buffer) - .unwrap() - .try_into_tree_iter() - .unwrap(); - - let mut buffer = Vec::new(); - let commit = odb.find_commit(&id, &mut buffer).unwrap(); - - let mut buffer = Vec::new(); - let tree_iter = odb - .find(&commit.tree(), &mut buffer) - .unwrap() - .try_into_tree_iter() - .unwrap(); - - let mut recorder = gix::diff::tree::Recorder::default(); - gix::diff::tree( - parent_tree_iter, - tree_iter, - gix::diff::tree::State::default(), - &odb, - &mut recorder, - ) - .unwrap(); - - recorder - .records - .iter() - .filter(|change| match change { - gix::diff::tree::recorder::Change::Modification { path, .. } => path == file_path, - gix::diff::tree::recorder::Change::Addition { path, .. } => path == file_path, - _ => false, - }) - .cloned() - .collect() -}