From 3ca04e355a413975e55adf8b204d6962a9341d32 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 20 Dec 2021 16:21:39 +0800 Subject: [PATCH] read and validate fanout chunk (#279) --- git-chunk/src/lib.rs | 47 ++++++++++++++++++++++++++++++++++--- git-pack/src/multi_index.rs | 28 ++++++++++++++++++---- 2 files changed, 68 insertions(+), 7 deletions(-) diff --git a/git-chunk/src/lib.rs b/git-chunk/src/lib.rs index 12682866fcf..d5ef451b45d 100644 --- a/git-chunk/src/lib.rs +++ b/git-chunk/src/lib.rs @@ -4,12 +4,22 @@ #![deny(unsafe_code)] #![deny(rust_2018_idioms, missing_docs)] +use std::convert::TryInto; +use std::ops::Range; + /// An identifier to describe the kind of chunk, unique within a chunk file. pub type Kind = u32; /// A special value denoting the end of the chunk file table of contents. pub const SENTINEL: Kind = 0; +/// Turn a u64 Range into a usize range safely, to make chunk ranges useful in memory mapped files. +pub fn into_usize_range(Range { start, end }: Range) -> Option> { + let start = start.try_into().ok()?; + let end = end.try_into().ok()?; + Some(Range { start, end }) +} + /// pub mod file { /// @@ -18,7 +28,7 @@ pub mod file { use std::ops::Range; /// - pub mod not_found { + pub mod offset_by_kind { use std::fmt::{Display, Formatter}; /// The error returned by [Index::offset_by_kind()][super::Index::offset_by_kind()]. @@ -42,6 +52,26 @@ pub mod file { impl std::error::Error for Error {} } + /// + pub mod data_by_kind { + use quick_error::quick_error; + quick_error! { + /// The error returned by [Index::data_by_kind()][super::Index::data_by_kind()]. + #[derive(Debug)] + #[allow(missing_docs)] + pub enum Error { + NotFound(err: super::offset_by_kind::Error) { + display("The chunk wasn't found in the file index") + from() + source(err) + } + FileTooLarge { + display("The offsets into the file couldn't be represented by usize") + } + } + } + } + /// An entry of a chunk file index pub struct Entry { /// The kind of the chunk file @@ -61,11 +91,22 @@ pub mod file { &self, kind: crate::Kind, name: &'static str, - ) -> Result, not_found::Error> { + ) -> Result, offset_by_kind::Error> { self.chunks .iter() .find_map(|c| (c.kind == kind).then(|| c.offset.clone())) - .ok_or_else(|| not_found::Error { kind, name }) + .ok_or_else(|| offset_by_kind::Error { kind, name }) + } + + /// Find a chunk of `kind` and return its data slice based on its offset. + pub fn data_by_kind<'a>( + &self, + data: &'a [u8], + kind: crate::Kind, + name: &'static str, + ) -> Result<&'a [u8], data_by_kind::Error> { + let offset = self.offset_by_kind(kind, name)?; + Ok(&data[crate::into_usize_range(offset).ok_or_else(|| data_by_kind::Error::FileTooLarge)?]) } } } diff --git a/git-pack/src/multi_index.rs b/git-pack/src/multi_index.rs index b2c15d05006..02492ff087b 100644 --- a/git-pack/src/multi_index.rs +++ b/git-pack/src/multi_index.rs @@ -27,7 +27,7 @@ pub struct File { num_chunks: u8, /// The amount of pack files contained within num_packs: u32, - fanout: Range, + fan: [u32; 256], } /// @@ -49,7 +49,20 @@ mod chunk { pub const ID: git_chunk::Kind = 0x504e414d; /* "PNAM" */ } pub mod fanout { + use std::convert::TryInto; + pub const ID: git_chunk::Kind = 0x4f494446; /* "OIDF" */ + + pub fn from_slice(chunk: &[u8]) -> Option<[u32; 256]> { + if chunk.len() != 4 * 256 { + return None; + } + let mut out = [0; 256]; + for (c, f) in chunk.chunks(4).zip(out.iter_mut()) { + *f = u32::from_be_bytes(c.try_into().unwrap()); + } + out.into() + } } pub mod lookup { pub const ID: git_chunk::Kind = 0x4f49444c; /* "OIDL" */ @@ -87,7 +100,11 @@ pub mod init { #[error(transparent)] ChunkFileDecode(#[from] git_chunk::file::decode::Error), #[error(transparent)] - MissingChunk(#[from] git_chunk::file::index::not_found::Error), + MissingChunk(#[from] git_chunk::file::index::offset_by_kind::Error), + #[error(transparent)] + FileTooLarge(#[from] git_chunk::file::index::data_by_kind::Error), + #[error("The multi-pack fan doesn't have the correct size of 256 * 4 bytes")] + MultiPackFanSize, } } pub use error::Error; @@ -152,7 +169,10 @@ pub mod init { let chunks = git_chunk::file::Index::from_bytes(&data, HEADER_LEN, num_chunks as u32)?; let pack_names = chunks.offset_by_kind(chunk::pack_names::ID, "PNAM")?; - let fanout = chunks.offset_by_kind(chunk::fanout::ID, "OIDF")?; + + let fan = chunks.data_by_kind(&data, chunk::fanout::ID, "OIDF")?; + let fan = chunk::fanout::from_slice(fan).ok_or_else(|| Error::MultiPackFanSize)?; + let lookup = chunks.offset_by_kind(chunk::lookup::ID, "OIDL")?; let offsets = chunks.offset_by_kind(chunk::offsets::ID, "OOFF")?; let large_offsets = chunks.offset_by_kind(chunk::large_offsets::ID, "LOFF").ok(); @@ -162,7 +182,7 @@ pub mod init { path: path.to_owned(), version, hash_kind, - fanout, + fan, num_chunks, num_packs, })