diff --git a/git-pack/src/index/write/encode.rs b/git-pack/src/index/write/encode.rs index 56be7a9468a..6b0e1f1926d 100644 --- a/git-pack/src/index/write/encode.rs +++ b/git-pack/src/index/write/encode.rs @@ -1,5 +1,8 @@ use std::{cmp::Ordering, collections::VecDeque, io}; +pub(crate) const LARGE_OFFSET_THRESHOLD: u64 = 0x7fff_ffff; +pub(crate) const HIGH_BIT: u32 = 0x8000_0000; + use byteorder::{BigEndian, WriteBytesExt}; use git_features::{ hash, @@ -34,9 +37,6 @@ pub(crate) fn write_to( out.write_all(V2_SIGNATURE)?; out.write_u32::(kind as u32)?; - const LARGE_OFFSET_THRESHOLD: u64 = 0x7fff_ffff; - const HIGH_BIT: u32 = 0x8000_0000; - let needs_64bit_offsets = entries_sorted_by_oid.back().expect("at least one pack entry").offset > LARGE_OFFSET_THRESHOLD; progress.init(Some(4), progress::steps()); diff --git a/git-pack/src/multi_index/chunk.rs b/git-pack/src/multi_index/chunk.rs index c890bfc2a97..cf1054aba5f 100644 --- a/git-pack/src/multi_index/chunk.rs +++ b/git-pack/src/multi_index/chunk.rs @@ -173,11 +173,41 @@ pub mod lookup { /// Information about the offsets table. pub mod offsets { + use crate::multi_index; + use byteorder::{BigEndian, WriteBytesExt}; use std::ops::Range; /// The id uniquely identifying the offsets table. pub const ID: git_chunk::Id = *b"OOFF"; + /// Return the amount of bytes needed to offset data for `entries`. + pub fn storage_size(entries: usize) -> u64 { + (entries * (4 /*pack-id*/ + 4/* pack offset */)) as u64 + } + + /// Returns the amount of entries that need a u64 offset. + pub(crate) fn write( + sorted_entries: &[multi_index::write::Entry], + mut out: impl std::io::Write, + ) -> std::io::Result { + use crate::index::write::encode::{HIGH_BIT, LARGE_OFFSET_THRESHOLD}; + let mut num_large_offsets = 0u32; + + for entry in sorted_entries { + out.write_u32::(entry.pack_index)?; + + let offset = if entry.pack_offset > LARGE_OFFSET_THRESHOLD { + let res = num_large_offsets | HIGH_BIT; + num_large_offsets += 1; + res + } else { + entry.pack_offset as u32 + }; + out.write_u32::(offset)?; + } + Ok(num_large_offsets) + } + /// Returns true if the `offset` range seems to match the size required for `num_objects`. pub fn is_valid(offset: &Range, num_objects: u32) -> bool { let entry_size = 4 /* pack-id */ + 4 /* pack-offset */; diff --git a/git-pack/src/multi_index/write.rs b/git-pack/src/multi_index/write.rs index 6c0700deca8..b1fe0c5eafa 100644 --- a/git-pack/src/multi_index/write.rs +++ b/git-pack/src/multi_index/write.rs @@ -26,8 +26,8 @@ pub use error::Error; /// An entry suitable for sorting and writing pub(crate) struct Entry { pub(crate) id: git_hash::ObjectId, - pack_index: u32, - pack_offset: crate::data::Offset, + pub(crate) pack_index: u32, + pub(crate) pack_offset: crate::data::Offset, /// Used for sorting in case of duplicates index_mtime: SystemTime, } @@ -120,6 +120,10 @@ impl multi_index::File { multi_index::chunk::lookup::ID, multi_index::chunk::lookup::storage_size(entries.len(), object_hash), ); + cf.plan_chunk( + multi_index::chunk::offsets::ID, + multi_index::chunk::offsets::storage_size(entries.len()), + ); let bytes_written = Self::write_header( &mut out, @@ -128,6 +132,7 @@ impl multi_index::File { object_hash, )?; let mut chunk_write = cf.into_write(&mut out, bytes_written)?; + let mut num_large_offsets = None; while let Some(chunk_to_write) = chunk_write.next_chunk() { match chunk_to_write { multi_index::chunk::index_names::ID => { @@ -135,6 +140,9 @@ impl multi_index::File { } multi_index::chunk::fanout::ID => multi_index::chunk::fanout::write(&entries, &mut chunk_write)?, multi_index::chunk::lookup::ID => multi_index::chunk::lookup::write(&entries, &mut chunk_write)?, + multi_index::chunk::offsets::ID => { + num_large_offsets = multi_index::chunk::offsets::write(&entries, &mut chunk_write)?.into(); + } unknown => unreachable!("BUG: forgot to implement chunk {:?}", std::str::from_utf8(&unknown)), } }