Skip to content

Commit

Permalink
fix: recursively unpack nested archives
Browse files Browse the repository at this point in the history
  • Loading branch information
mrcjkb committed Feb 3, 2025
1 parent f3a9749 commit d33853d
Show file tree
Hide file tree
Showing 3 changed files with 195 additions and 122 deletions.
16 changes: 15 additions & 1 deletion rocks-lib/src/build/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use command::CommandError;
use external_dependency::{ExternalDependencyError, ExternalDependencyInfo};

use indicatif::style::TemplateError;
use itertools::Itertools;
use luarocks::LuarocksBuildError;
use make::MakeError;
use patch::{Patch, PatchError};
Expand Down Expand Up @@ -301,7 +302,20 @@ async fn do_build<R: Rockspec + HasIntegrity>(
let rock_source = build.rockspec.source().current_platform();
let build_dir = match &rock_source.unpack_dir {
Some(unpack_dir) => temp_dir.path().join(unpack_dir),
None => temp_dir.path().into(),
None => {
// Some older rockspecs don't specify a source.dir.
// If there exists a single directory after unpacking,
// we assume it's the source directory.
let entries = std::fs::read_dir(temp_dir.path())?
.filter_map(Result::ok)
.filter(|f| f.path().is_dir())
.collect_vec();
if entries.len() == 1 {
temp_dir.path().join(entries.first().unwrap().path())
} else {
temp_dir.path().into()
}
}
};

Patch::new(
Expand Down
124 changes: 5 additions & 119 deletions rocks-lib/src/operations/fetch.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,12 @@
use bon::Builder;
use flate2::read::GzDecoder;
use git2::build::RepoBuilder;
use git2::FetchOptions;
use itertools::Itertools;
use ssri::Integrity;
use std::fs::File;
use std::io;
use std::io::BufReader;
use std::io::Cursor;
use std::io::Read;
use std::io::Seek;
use std::path::Path;
use std::path::PathBuf;
use thiserror::Error;

use crate::config::Config;
Expand All @@ -24,6 +19,7 @@ use crate::progress::ProgressBar;
use crate::rockspec::Rockspec;

use super::DownloadSrcRockError;
use super::UnpackError;

/// A rocks package source fetcher, providing fine-grained control
/// over how a package should be fetched.
Expand Down Expand Up @@ -162,7 +158,7 @@ async fn do_fetch_src<R: Rockspec>(fetch: &FetchSrc<'_, R>) -> Result<Integrity,
.unwrap_or(url.to_string());
let cursor = Cursor::new(response);
let mime_type = infer::get(cursor.get_ref()).map(|file_type| file_type.mime_type());
unpack(
operations::unpack::unpack(
mime_type,
cursor,
rock_source.unpack_dir.is_none(),
Expand Down Expand Up @@ -197,7 +193,7 @@ async fn do_fetch_src<R: Rockspec>(fetch: &FetchSrc<'_, R>) -> Result<Integrity,
.map(|os_str| os_str.to_string_lossy())
.unwrap_or(path.to_string_lossy())
.to_string();
unpack(
operations::unpack::unpack(
mime_type,
file,
rock_source.unpack_dir.is_none(),
Expand Down Expand Up @@ -226,124 +222,14 @@ async fn do_fetch_src_rock(fetch: FetchSrcRock<'_>) -> Result<Integrity, FetchSr
let integrity = src_rock.bytes.hash()?;
let cursor = Cursor::new(src_rock.bytes);
let mime_type = infer::get(cursor.get_ref()).map(|file_type| file_type.mime_type());
unpack(
operations::unpack::unpack(
mime_type,
cursor,
false,
true,
src_rock.file_name,
dest_dir,
progress,
)
.await?;
Ok(integrity)
}

fn is_single_directory<R: Read + Seek + Send>(reader: R) -> io::Result<bool> {
let tar = GzDecoder::new(reader);
let mut archive = tar::Archive::new(tar);

let entries: Vec<_> = archive
.entries()?
.filter_map(|entry| {
if entry.as_ref().ok()?.path().ok()?.file_name()? != "pax_global_header" {
Some(entry)
} else {
None
}
})
.try_collect()?;

let directory: PathBuf = entries
.first()
.unwrap()
.path()?
.components()
.take(1)
.collect();

Ok(entries.into_iter().all(|entry| {
entry
.path()
.unwrap()
.to_str()
.unwrap()
.starts_with(directory.to_str().unwrap())
}))
}

#[derive(Error, Debug)]
pub enum UnpackError {
#[error(transparent)]
Zip(#[from] zip::result::ZipError),
#[error(transparent)]
Io(#[from] io::Error),
#[error("source returned HTML - it may have been moved or deleted")]
SourceMovedOrDeleted,
#[error("rockspec source has unsupported file type {0}")]
UnsupportedFileType(String),
#[error("could not determine mimetype of rockspec source")]
UnknownMimeType,
}

async fn unpack<R: Read + Seek + Send>(
mime_type: Option<&str>,
reader: R,
auto_find_lua_sources: bool,
file_name: String,
dest_dir: &Path,
progress: &Progress<ProgressBar>,
) -> Result<(), UnpackError> {
progress.map(|p| p.set_message(format!("📦 Unpacking {}", file_name)));

match mime_type {
Some("application/zip") => {
let mut archive = zip::ZipArchive::new(reader)?;
archive.extract(dest_dir)?;
}
Some("application/x-tar") => {
let mut archive = tar::Archive::new(reader);
archive.unpack(dest_dir)?;
}
Some("application/gzip") => {
let mut bufreader = BufReader::new(reader);

let extract_subdirectory =
auto_find_lua_sources && is_single_directory(&mut bufreader)?;

bufreader.rewind()?;
let tar = GzDecoder::new(bufreader);
let mut archive = tar::Archive::new(tar);

if extract_subdirectory {
archive.entries()?.try_for_each(|entry| {
let mut entry = entry?;

let path: PathBuf = entry.path()?.components().skip(1).collect();
if path.components().count() > 0 {
let dest = dest_dir.join(path);
std::fs::create_dir_all(dest.parent().unwrap())?;
entry.unpack(dest)?;
}

Ok::<_, io::Error>(())
})?;
} else {
archive.entries()?.try_for_each(|entry| {
entry?.unpack_in(dest_dir)?;
Ok::<_, io::Error>(())
})?;
}
}
Some("text/html") => {
return Err(UnpackError::SourceMovedOrDeleted);
}
Some(other) => {
return Err(UnpackError::UnsupportedFileType(other.to_string()));
}
None => {
return Err(UnpackError::UnknownMimeType);
}
}

Ok(())
}
177 changes: 175 additions & 2 deletions rocks-lib/src/operations/unpack.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,32 @@
use async_recursion::async_recursion;
use flate2::read::GzDecoder;
use itertools::Itertools;
use std::fs;
use std::fs::File;
use std::io;
use std::io::BufReader;
use std::io::Read;
use std::io::Seek;
use std::path::Path;
use std::path::PathBuf;
use thiserror::Error;

use crate::progress::Progress;
use crate::progress::ProgressBar;

#[derive(Error, Debug)]
#[error("failed to unpack source rock: {0}")]
pub struct UnpackError(#[from] zip::result::ZipError);
pub enum UnpackError {
#[error("failed to unpack source: {0}")]
Io(#[from] io::Error),
#[error("failed to unpack zip source: {0}")]
Zip(#[from] zip::result::ZipError),
#[error("source returned HTML - it may have been moved or deleted")]
SourceMovedOrDeleted,
#[error("rockspec source has unsupported file type {0}")]
UnsupportedFileType(String),
#[error("could not determine mimetype of rockspec source")]
UnknownMimeType,
}

pub async fn unpack_src_rock<R: Read + Seek + Send>(
rock_src: R,
Expand All @@ -34,6 +52,161 @@ async fn unpack_src_rock_impl<R: Read + Seek + Send>(
Ok(destination)
}

#[async_recursion]
pub(crate) async fn unpack<R>(
mime_type: Option<&str>,
reader: R,
extract_nested_archive: bool,
file_name: String,
dest_dir: &Path,
progress: &Progress<ProgressBar>,
) -> Result<(), UnpackError>
where
R: Read + Seek + Send,
{
progress.map(|p| p.set_message(format!("📦 Unpacking {}", file_name)));

match mime_type {
Some("application/zip") => {
let mut archive = zip::ZipArchive::new(reader)?;
archive.extract(dest_dir)?;
}
Some("application/x-tar") => {
let mut archive = tar::Archive::new(reader);
archive.unpack(dest_dir)?;
}
Some("application/gzip") => {
let mut bufreader = BufReader::new(reader);

let extract_subdirectory =
extract_nested_archive && is_single_tar_directory(&mut bufreader)?;

bufreader.rewind()?;
let tar = GzDecoder::new(bufreader);
let mut archive = tar::Archive::new(tar);

if extract_subdirectory {
archive.entries()?.try_for_each(|entry| {
let mut entry = entry?;

let path: PathBuf = entry.path()?.components().skip(1).collect();
if path.components().count() > 0 {
let dest = dest_dir.join(path);
std::fs::create_dir_all(dest.parent().unwrap())?;
entry.unpack(dest)?;
}

Ok::<_, io::Error>(())
})?;
} else {
archive.entries()?.try_for_each(|entry| {
entry?.unpack_in(dest_dir)?;
Ok::<_, io::Error>(())
})?;
}
}
Some("text/html") => {
return Err(UnpackError::SourceMovedOrDeleted);
}
Some(other) => {
return Err(UnpackError::UnsupportedFileType(other.to_string()));
}
None => {
return Err(UnpackError::UnknownMimeType);
}
}

if extract_nested_archive {
// If the source is an archive, luarocks will pack the source archive and the rockspec.
// So we need to unpack the source archive.
if let Some(nested_archive_path) = get_single_file_entry(dest_dir)? {
if let Some(mime_type) =
infer::get_from_path(&nested_archive_path)?.map(|file_type| file_type.mime_type())
{
if matches!(
mime_type,
"application/zip" | "application/x-tar" | "application/gzip"
) {
let mut file = File::open(&nested_archive_path)?;
let mut buffer = Vec::new();
file.read_to_end(&mut buffer)?;
let file_name = nested_archive_path
.file_name()
.map(|os_str| os_str.to_string_lossy())
.unwrap_or(nested_archive_path.to_string_lossy())
.to_string();
unpack(
Some(mime_type),
file,
extract_nested_archive, // It might be a nested archive inside a .src.rock
file_name,
dest_dir,
progress,
)
.await?;
fs::remove_file(nested_archive_path)?;
}
}
}
}
Ok(())
}

fn is_single_tar_directory<R: Read + Seek + Send>(reader: R) -> io::Result<bool> {
let tar = GzDecoder::new(reader);
let mut archive = tar::Archive::new(tar);

let entries: Vec<_> = archive
.entries()?
.filter_map(|entry| {
if entry.as_ref().ok()?.path().ok()?.file_name()? != "pax_global_header" {
Some(entry)
} else {
None
}
})
.try_collect()?;

let directory: PathBuf = entries
.first()
.unwrap()
.path()?
.components()
.take(1)
.collect();

Ok(entries.into_iter().all(|entry| {
entry
.path()
.unwrap()
.to_str()
.unwrap()
.starts_with(directory.to_str().unwrap())
}))
}

fn get_single_file_entry(dir: &Path) -> Result<Option<PathBuf>, io::Error> {
let entries = std::fs::read_dir(dir)?
.filter_map(Result::ok)
.filter_map(|f| {
let f = f.path();
if f.is_file()
&& f.extension()
.is_some_and(|ext| ext.to_string_lossy() != "rockspec")
{
Some(f)
} else {
None
}
})
.collect_vec();
if entries.len() == 1 {
Ok(entries.first().cloned())
} else {
Ok(None)
}
}

#[cfg(test)]
mod tests {
use crate::progress::MultiProgress;
Expand Down

0 comments on commit d33853d

Please sign in to comment.