Skip to content

Commit

Permalink
Merge pull request GitoxideLabs#1750 from GitoxideLabs/odb-issue
Browse files Browse the repository at this point in the history
handle many packs better
  • Loading branch information
Byron authored Jan 14, 2025
2 parents 8df5ba2 + dbf079f commit e4fb21e
Show file tree
Hide file tree
Showing 6 changed files with 90 additions and 4 deletions.
2 changes: 1 addition & 1 deletion gix-odb/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ pub struct Store {

/// The below state acts like a slot-map with each slot is mutable when the write lock is held, but readable independently of it.
/// This allows multiple file to be loaded concurrently if there is multiple handles requesting to load packs or additional indices.
/// The map is static and cannot typically change.
/// The map is static and cannot change.
/// It's read often and changed rarely.
pub(crate) files: Vec<types::MutableIndexAndPack>,

Expand Down
12 changes: 9 additions & 3 deletions gix-odb/src/store_impls/dynamic/load_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ impl super::Store {
Option::as_ref(&files_guard).expect("slot is set or we wouldn't know it points to this file");
if index_info.is_multi_index() && files.mtime() != mtime {
// we have a changed multi-pack index. We can't just change the existing slot as it may alter slot indices
// that are currently available. Instead we have to move what's there into a new slot, along with the changes,
// that are currently available. Instead, we have to move what's there into a new slot, along with the changes,
// and later free the slot or dispose of the index in the slot (like we do for removed/missing files).
index_paths_to_add.push_back((index_info, mtime, Some(slot_idx)));
// If the current slot is loaded, the soon-to-be copied multi-index path will be loaded as well.
Expand Down Expand Up @@ -304,6 +304,12 @@ impl super::Store {
needed: index_paths_to_add.len() + 1, /*the one currently popped off*/
});
}
// Don't allow duplicate indicates, we need a 1:1 mapping.
if new_slot_map_indices.contains(&next_possibly_free_index) {
next_possibly_free_index = (next_possibly_free_index + 1) % self.files.len();
num_indices_checked += 1;
continue 'increment_slot_index;
}
let slot_index = next_possibly_free_index;
let slot = &self.files[slot_index];
next_possibly_free_index = (next_possibly_free_index + 1) % self.files.len();
Expand Down Expand Up @@ -502,7 +508,7 @@ impl super::Store {
}
// Unlike libgit2, do not sort by modification date, but by size and put the biggest indices first. That way
// the chance to hit an object should be higher. We leave it to the handle to sort by LRU.
// Git itself doesn't change the order which may safe time, but we want it to be stable which also helps some tests.
// Git itself doesn't change the order which may save time, but we want it to be stable which also helps some tests.
// NOTE: this will work well for well-packed repos or those using geometric repacking, but force us to open a lot
// of files when dealing with new objects, as there is no notion of recency here as would be with unmaintained
// repositories. Different algorithms should be provided, like newest packs first, and possibly a mix of both
Expand All @@ -512,7 +518,7 @@ impl super::Store {
Ok(indices_by_modification_time)
}

/// returns Ok<dest slot was empty> if the copy could happen because dest-slot was actually free or disposable , and Some(true) if it was empty
/// returns `Ok(dest_slot_was_empty)` if the copy could happen because dest-slot was actually free or disposable.
#[allow(clippy::too_many_arguments)]
fn try_set_index_slot(
lock: &parking_lot::MutexGuard<'_, ()>,
Expand Down
8 changes: 8 additions & 0 deletions gix/src/commit.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
//!
#![allow(clippy::empty_docs)]

use std::convert::Infallible;

/// An empty array of a type usable with the `gix::easy` API to help declaring no parents should be used
pub const NO_PARENT_IDS: [gix_hash::ObjectId; 0] = [];

Expand All @@ -22,6 +24,12 @@ pub enum Error {
ReferenceEdit(#[from] crate::reference::edit::Error),
}

impl From<std::convert::Infallible> for Error {
fn from(_value: Infallible) -> Self {
unreachable!("cannot be invoked")
}
}

///
#[cfg(feature = "revision")]
pub mod describe {
Expand Down
2 changes: 2 additions & 0 deletions gix/src/remote/connection/fetch/update_refs/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ pub(crate) fn update(
let update = if is_implicit_tag {
Mode::ImplicitTagNotSentByRemote.into()
} else {
// Assure the ODB is not to blame for the missing object.
repo.try_find_object(remote_id)?;
Mode::RejectedSourceObjectNotFound { id: remote_id.into() }.into()
};
updates.push(update);
Expand Down
2 changes: 2 additions & 0 deletions gix/src/remote/connection/fetch/update_refs/update.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ mod error {
PeelToId(#[from] crate::reference::peel::Error),
#[error("Failed to follow a symbolic reference to assure worktree isn't affected")]
FollowSymref(#[from] gix_ref::file::find::existing::Error),
#[error(transparent)]
FindObject(#[from] crate::object::find::Error),
}
}

Expand Down
68 changes: 68 additions & 0 deletions gix/tests/gix/remote/fetch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,74 @@ mod blocking_and_async_io {
try_repo_rw(name).unwrap()
}

#[test]
#[cfg(feature = "blocking-network-client")]
fn fetch_more_packs_than_can_be_handled() -> gix_testtools::Result {
use gix::config::tree::User;
use gix::interrupt::IS_INTERRUPTED;
use gix_odb::store::init::Slots;
use gix_testtools::tempfile;
fn create_empty_commit(repo: &gix::Repository) -> anyhow::Result<()> {
let name = repo.head_name()?.expect("no detached head");
repo.commit(
name.as_bstr(),
"empty",
gix::hash::ObjectId::empty_tree(repo.object_hash()),
repo.try_find_reference(name.as_ref())?.map(|r| r.id()),
)?;
Ok(())
}
for max_packs in 1..=3 {
let remote_dir = tempfile::tempdir()?;
let mut remote_repo = gix::init_bare(remote_dir.path())?;
{
let mut config = remote_repo.config_snapshot_mut();
config.set_value(&User::NAME, "author")?;
config.set_value(&User::EMAIL, "[email protected]")?;
}
create_empty_commit(&remote_repo)?;

let local_dir = tempfile::tempdir()?;
let (local_repo, _) = gix::clone::PrepareFetch::new(
remote_repo.path(),
local_dir.path(),
gix::create::Kind::Bare,
Default::default(),
gix::open::Options::isolated().object_store_slots(Slots::Given(max_packs)),
)?
.fetch_only(gix::progress::Discard, &IS_INTERRUPTED)?;

let remote = local_repo
.branch_remote(
local_repo.head_ref()?.expect("branch available").name().shorten(),
Fetch,
)
.expect("remote is configured after clone")?;
for _round_to_create_pack in 1..12 {
create_empty_commit(&remote_repo)?;
match remote
.connect(Fetch)?
.prepare_fetch(gix::progress::Discard, Default::default())?
.receive(gix::progress::Discard, &IS_INTERRUPTED)
{
Ok(out) => {
for local_tracking_branch_name in out.ref_map.mappings.into_iter().filter_map(|m| m.local) {
let r = local_repo.find_reference(&local_tracking_branch_name)?;
r.id()
.object()
.expect("object should be present after fetching, triggering pack refreshes works");
local_repo.head_ref()?.unwrap().set_target_id(r.id(), "post fetch")?;
}
}
Err(err) => assert!(err
.to_string()
.starts_with("The slotmap turned out to be too small with ")),
}
}
}
Ok(())
}

#[test]
#[cfg(feature = "blocking-network-client")]
#[allow(clippy::result_large_err)]
Expand Down

0 comments on commit e4fb21e

Please sign in to comment.