-
-
Notifications
You must be signed in to change notification settings - Fork 321
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
refactor: split data::output::count::objects into files(#67)
- Loading branch information
Showing
13 changed files
with
773 additions
and
759 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
use std::{marker::PhantomData, sync::Arc}; | ||
|
||
use git_features::{parallel, progress::Progress}; | ||
|
||
use super::Outcome; | ||
use crate::data::output; | ||
|
||
pub struct Statistics<E, P> { | ||
total: Outcome, | ||
counts: Vec<output::Count>, | ||
progress: Arc<parking_lot::Mutex<P>>, | ||
_err: PhantomData<E>, | ||
} | ||
|
||
impl<E, P> Statistics<E, P> | ||
where | ||
P: Progress, | ||
{ | ||
pub fn new(progress: Arc<parking_lot::Mutex<P>>) -> Self { | ||
Statistics { | ||
total: Default::default(), | ||
counts: Default::default(), | ||
progress, | ||
_err: PhantomData::default(), | ||
} | ||
} | ||
} | ||
|
||
impl<E, P> parallel::Reduce for Statistics<E, P> | ||
where | ||
P: Progress, | ||
{ | ||
type Input = Result<(Vec<output::Count>, Outcome), E>; | ||
type FeedProduce = (); | ||
type Output = (Vec<output::Count>, Outcome); | ||
type Error = E; | ||
|
||
fn feed(&mut self, item: Self::Input) -> Result<Self::FeedProduce, Self::Error> { | ||
let (counts, mut stats) = item?; | ||
stats.total_objects = counts.len(); | ||
self.total.aggregate(stats); | ||
self.progress.lock().inc_by(counts.len()); | ||
self.counts.extend(counts); | ||
Ok(()) | ||
} | ||
|
||
fn finalize(self) -> Result<Self::Output, Self::Error> { | ||
Ok((self.counts, self.total)) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
pub mod changes { | ||
use git_diff::tree::{ | ||
visit::{Action, Change}, | ||
Visit, | ||
}; | ||
use git_hash::ObjectId; | ||
use git_object::bstr::BStr; | ||
|
||
use crate::data::output::count::objects_impl::util::InsertImmutable; | ||
|
||
pub struct AllNew<'a, H> { | ||
pub objects: Vec<ObjectId>, | ||
all_seen: &'a H, | ||
} | ||
|
||
impl<'a, H> AllNew<'a, H> | ||
where | ||
H: InsertImmutable<ObjectId>, | ||
{ | ||
pub fn new(all_seen: &'a H) -> Self { | ||
AllNew { | ||
objects: Default::default(), | ||
all_seen, | ||
} | ||
} | ||
pub fn clear(&mut self) { | ||
self.objects.clear(); | ||
} | ||
} | ||
|
||
impl<'a, H> Visit for AllNew<'a, H> | ||
where | ||
H: InsertImmutable<ObjectId>, | ||
{ | ||
fn pop_front_tracked_path_and_set_current(&mut self) {} | ||
|
||
fn push_back_tracked_path_component(&mut self, _component: &BStr) {} | ||
|
||
fn push_path_component(&mut self, _component: &BStr) {} | ||
|
||
fn pop_path_component(&mut self) {} | ||
|
||
fn visit(&mut self, change: Change) -> Action { | ||
match change { | ||
Change::Addition { oid, .. } | Change::Modification { oid, .. } => { | ||
let inserted = self.all_seen.insert(oid); | ||
if inserted { | ||
self.objects.push(oid); | ||
} | ||
} | ||
Change::Deletion { .. } => {} | ||
}; | ||
Action::Continue | ||
} | ||
} | ||
} | ||
|
||
pub mod traverse { | ||
use git_hash::ObjectId; | ||
use git_object::{bstr::BStr, tree::EntryRef}; | ||
use git_traverse::tree::{visit::Action, Visit}; | ||
|
||
use crate::data::output::count::objects_impl::util::InsertImmutable; | ||
|
||
pub struct AllUnseen<'a, H> { | ||
pub non_trees: Vec<ObjectId>, | ||
all_seen: &'a H, | ||
} | ||
|
||
impl<'a, H> AllUnseen<'a, H> | ||
where | ||
H: InsertImmutable<ObjectId>, | ||
{ | ||
pub fn new(all_seen: &'a H) -> Self { | ||
AllUnseen { | ||
non_trees: Default::default(), | ||
all_seen, | ||
} | ||
} | ||
pub fn clear(&mut self) { | ||
self.non_trees.clear(); | ||
} | ||
} | ||
|
||
impl<'a, H> Visit for AllUnseen<'a, H> | ||
where | ||
H: InsertImmutable<ObjectId>, | ||
{ | ||
fn pop_front_tracked_path_and_set_current(&mut self) {} | ||
|
||
fn push_back_tracked_path_component(&mut self, _component: &BStr) {} | ||
|
||
fn push_path_component(&mut self, _component: &BStr) {} | ||
|
||
fn pop_path_component(&mut self) {} | ||
|
||
fn visit_tree(&mut self, entry: &EntryRef<'_>) -> Action { | ||
let inserted = self.all_seen.insert(entry.oid.to_owned()); | ||
if inserted { | ||
Action::Continue | ||
} else { | ||
Action::Skip | ||
} | ||
} | ||
|
||
fn visit_nontree(&mut self, entry: &EntryRef<'_>) -> Action { | ||
let inserted = self.all_seen.insert(entry.oid.to_owned()); | ||
if inserted { | ||
self.non_trees.push(entry.oid.to_owned()); | ||
} | ||
Action::Continue | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
/// Information gathered during the run of [`iter_from_objects()`][super::objects()]. | ||
#[derive(Default, PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] | ||
#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] | ||
pub struct Outcome { | ||
/// The amount of objects provided to start the iteration. | ||
pub input_objects: usize, | ||
/// The amount of objects that have been expanded from the input source. | ||
/// It's desirable to do that as expansion happens on multiple threads, allowing the amount of input objects to be small. | ||
/// `expanded_objects - decoded_objects` is the 'cheap' object we found without decoding the object itself. | ||
pub expanded_objects: usize, | ||
/// The amount of fully decoded objects. These are the most expensive as they are fully decoded | ||
pub decoded_objects: usize, | ||
/// The total amount of encountered objects. Should be `expanded_objects + input_objects`. | ||
pub total_objects: usize, | ||
} | ||
|
||
impl Outcome { | ||
pub(in crate::data::output::count) fn aggregate( | ||
&mut self, | ||
Outcome { | ||
input_objects, | ||
decoded_objects, | ||
expanded_objects, | ||
total_objects, | ||
}: Self, | ||
) { | ||
self.input_objects += input_objects; | ||
self.decoded_objects += decoded_objects; | ||
self.expanded_objects += expanded_objects; | ||
self.total_objects += total_objects; | ||
} | ||
} | ||
|
||
/// The way input objects are handled | ||
#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] | ||
#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] | ||
pub enum ObjectExpansion { | ||
/// Don't do anything with the input objects except for transforming them into pack entries | ||
AsIs, | ||
/// If the input object is a Commit then turn it into a pack entry. Additionally obtain its tree, turn it into a pack entry | ||
/// along with all of its contents, that is nested trees, and any other objects reachable from it. | ||
/// Otherwise, the same as [`AsIs`][ObjectExpansion::AsIs]. | ||
/// | ||
/// This mode is useful if all reachable objects should be added, as in cloning a repository. | ||
TreeContents, | ||
/// If the input is a commit, obtain its ancestors and turn them into pack entries. Obtain the ancestor trees along with the commits | ||
/// tree and turn them into pack entries. Finally obtain the added/changed objects when comparing the ancestor trees with the | ||
/// current tree and turn them into entries as well. | ||
/// Otherwise, the same as [`AsIs`][ObjectExpansion::AsIs]. | ||
/// | ||
/// This mode is useful to build a pack containing only new objects compared to a previous state. | ||
TreeAdditionsComparedToAncestor, | ||
} | ||
|
||
impl Default for ObjectExpansion { | ||
fn default() -> Self { | ||
ObjectExpansion::AsIs | ||
} | ||
} | ||
|
||
/// Configuration options for the pack generation functions provied in [this module][crate::data::output]. | ||
#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] | ||
#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] | ||
pub struct Options { | ||
/// The amount of threads to use at most when resolving the pack. If `None`, all logical cores are used. | ||
/// If more than one thread is used, the order of returned [counts][crate::data::output::Count] is not deterministic anymore | ||
/// especially when tree traversal is involved. Thus deterministic ordering requires `Some(1)` to be set. | ||
pub thread_limit: Option<usize>, | ||
/// The amount of objects per chunk or unit of work to be sent to threads for processing | ||
pub chunk_size: usize, | ||
/// The way input objects are handled | ||
pub input_object_expansion: ObjectExpansion, | ||
/// The size of a per-thread object cache in bytes to accelerate tree diffs in conjunction | ||
/// with [ObjectExpansion::TreeAdditionsComparedToAncestor]. | ||
/// | ||
/// If zero, the cache is disabled but in a costly way. Consider using a low value instead. | ||
/// | ||
/// Defaults to 10 megabytes which usually leads to 2.5x speedups. | ||
#[cfg(feature = "object-cache-dynamic")] | ||
pub object_cache_size_in_bytes: usize, | ||
} | ||
|
||
impl Default for Options { | ||
fn default() -> Self { | ||
Options { | ||
thread_limit: None, | ||
chunk_size: 10, | ||
input_object_expansion: Default::default(), | ||
#[cfg(feature = "object-cache-dynamic")] | ||
object_cache_size_in_bytes: 10 * 1024 * 1024, | ||
} | ||
} | ||
} | ||
|
||
/// The error returned by the pack generation iterator [bytes::FromEntriesIter][crate::data::output::bytes::FromEntriesIter]. | ||
#[derive(Debug, thiserror::Error)] | ||
#[allow(missing_docs)] | ||
pub enum Error<FindErr, IterErr> | ||
where | ||
FindErr: std::error::Error + 'static, | ||
IterErr: std::error::Error + 'static, | ||
{ | ||
#[error(transparent)] | ||
CommitDecode(git_object::decode::Error), | ||
#[error(transparent)] | ||
FindExisting(#[from] FindErr), | ||
#[error(transparent)] | ||
InputIteration(IterErr), | ||
#[error(transparent)] | ||
TreeTraverse(git_traverse::tree::breadthfirst::Error), | ||
#[error(transparent)] | ||
TreeChanges(git_diff::tree::changes::Error), | ||
#[error("Operation interrupted")] | ||
Interrupted, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
pub trait InsertImmutable<Item: Eq + std::hash::Hash> { | ||
fn insert(&self, item: Item) -> bool; | ||
} | ||
|
||
mod trait_impls { | ||
use std::{cell::RefCell, collections::HashSet, hash::Hash}; | ||
|
||
use dashmap::DashSet; | ||
|
||
use super::InsertImmutable; | ||
|
||
impl<T: Eq + Hash> InsertImmutable<T> for DashSet<T> { | ||
fn insert(&self, item: T) -> bool { | ||
self.insert(item) | ||
} | ||
} | ||
|
||
impl<T: Eq + Hash> InsertImmutable<T> for RefCell<HashSet<T>> { | ||
fn insert(&self, item: T) -> bool { | ||
self.borrow_mut().insert(item) | ||
} | ||
} | ||
} | ||
|
||
pub struct Chunks<I> { | ||
pub size: usize, | ||
pub iter: I, | ||
} | ||
|
||
impl<I, Item> Iterator for Chunks<I> | ||
where | ||
I: Iterator<Item = Item>, | ||
{ | ||
type Item = Vec<Item>; | ||
|
||
fn next(&mut self) -> Option<Self::Item> { | ||
let mut res = Vec::with_capacity(self.size); | ||
let mut items_left = self.size; | ||
for item in &mut self.iter { | ||
res.push(item); | ||
items_left -= 1; | ||
if items_left == 0 { | ||
break; | ||
} | ||
} | ||
(!res.is_empty()).then(|| res) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.