From d33853d29c69d313edafc109b2a5709458df7800 Mon Sep 17 00:00:00 2001 From: Marc Jakobi Date: Sun, 2 Feb 2025 22:10:07 +0100 Subject: [PATCH] fix: recursively unpack nested archives --- rocks-lib/src/build/mod.rs | 16 ++- rocks-lib/src/operations/fetch.rs | 124 +------------------- rocks-lib/src/operations/unpack.rs | 177 ++++++++++++++++++++++++++++- 3 files changed, 195 insertions(+), 122 deletions(-) diff --git a/rocks-lib/src/build/mod.rs b/rocks-lib/src/build/mod.rs index 53876469..0e967ce3 100644 --- a/rocks-lib/src/build/mod.rs +++ b/rocks-lib/src/build/mod.rs @@ -21,6 +21,7 @@ use command::CommandError; use external_dependency::{ExternalDependencyError, ExternalDependencyInfo}; use indicatif::style::TemplateError; +use itertools::Itertools; use luarocks::LuarocksBuildError; use make::MakeError; use patch::{Patch, PatchError}; @@ -301,7 +302,20 @@ async fn do_build( let rock_source = build.rockspec.source().current_platform(); let build_dir = match &rock_source.unpack_dir { Some(unpack_dir) => temp_dir.path().join(unpack_dir), - None => temp_dir.path().into(), + None => { + // Some older rockspecs don't specify a source.dir. + // If there exists a single directory after unpacking, + // we assume it's the source directory. + let entries = std::fs::read_dir(temp_dir.path())? + .filter_map(Result::ok) + .filter(|f| f.path().is_dir()) + .collect_vec(); + if entries.len() == 1 { + temp_dir.path().join(entries.first().unwrap().path()) + } else { + temp_dir.path().into() + } + } }; Patch::new( diff --git a/rocks-lib/src/operations/fetch.rs b/rocks-lib/src/operations/fetch.rs index 55322612..9c95d7a1 100644 --- a/rocks-lib/src/operations/fetch.rs +++ b/rocks-lib/src/operations/fetch.rs @@ -1,17 +1,12 @@ use bon::Builder; -use flate2::read::GzDecoder; use git2::build::RepoBuilder; use git2::FetchOptions; -use itertools::Itertools; use ssri::Integrity; use std::fs::File; use std::io; -use std::io::BufReader; use std::io::Cursor; use std::io::Read; -use std::io::Seek; use std::path::Path; -use std::path::PathBuf; use thiserror::Error; use crate::config::Config; @@ -24,6 +19,7 @@ use crate::progress::ProgressBar; use crate::rockspec::Rockspec; use super::DownloadSrcRockError; +use super::UnpackError; /// A rocks package source fetcher, providing fine-grained control /// over how a package should be fetched. @@ -162,7 +158,7 @@ async fn do_fetch_src(fetch: &FetchSrc<'_, R>) -> Result(fetch: &FetchSrc<'_, R>) -> Result) -> Result) -> Result(reader: R) -> io::Result { - let tar = GzDecoder::new(reader); - let mut archive = tar::Archive::new(tar); - - let entries: Vec<_> = archive - .entries()? - .filter_map(|entry| { - if entry.as_ref().ok()?.path().ok()?.file_name()? != "pax_global_header" { - Some(entry) - } else { - None - } - }) - .try_collect()?; - - let directory: PathBuf = entries - .first() - .unwrap() - .path()? - .components() - .take(1) - .collect(); - - Ok(entries.into_iter().all(|entry| { - entry - .path() - .unwrap() - .to_str() - .unwrap() - .starts_with(directory.to_str().unwrap()) - })) -} - -#[derive(Error, Debug)] -pub enum UnpackError { - #[error(transparent)] - Zip(#[from] zip::result::ZipError), - #[error(transparent)] - Io(#[from] io::Error), - #[error("source returned HTML - it may have been moved or deleted")] - SourceMovedOrDeleted, - #[error("rockspec source has unsupported file type {0}")] - UnsupportedFileType(String), - #[error("could not determine mimetype of rockspec source")] - UnknownMimeType, -} - -async fn unpack( - mime_type: Option<&str>, - reader: R, - auto_find_lua_sources: bool, - file_name: String, - dest_dir: &Path, - progress: &Progress, -) -> Result<(), UnpackError> { - progress.map(|p| p.set_message(format!("📦 Unpacking {}", file_name))); - - match mime_type { - Some("application/zip") => { - let mut archive = zip::ZipArchive::new(reader)?; - archive.extract(dest_dir)?; - } - Some("application/x-tar") => { - let mut archive = tar::Archive::new(reader); - archive.unpack(dest_dir)?; - } - Some("application/gzip") => { - let mut bufreader = BufReader::new(reader); - - let extract_subdirectory = - auto_find_lua_sources && is_single_directory(&mut bufreader)?; - - bufreader.rewind()?; - let tar = GzDecoder::new(bufreader); - let mut archive = tar::Archive::new(tar); - - if extract_subdirectory { - archive.entries()?.try_for_each(|entry| { - let mut entry = entry?; - - let path: PathBuf = entry.path()?.components().skip(1).collect(); - if path.components().count() > 0 { - let dest = dest_dir.join(path); - std::fs::create_dir_all(dest.parent().unwrap())?; - entry.unpack(dest)?; - } - - Ok::<_, io::Error>(()) - })?; - } else { - archive.entries()?.try_for_each(|entry| { - entry?.unpack_in(dest_dir)?; - Ok::<_, io::Error>(()) - })?; - } - } - Some("text/html") => { - return Err(UnpackError::SourceMovedOrDeleted); - } - Some(other) => { - return Err(UnpackError::UnsupportedFileType(other.to_string())); - } - None => { - return Err(UnpackError::UnknownMimeType); - } - } - - Ok(()) -} diff --git a/rocks-lib/src/operations/unpack.rs b/rocks-lib/src/operations/unpack.rs index 2b3151df..843e6db4 100644 --- a/rocks-lib/src/operations/unpack.rs +++ b/rocks-lib/src/operations/unpack.rs @@ -1,5 +1,13 @@ +use async_recursion::async_recursion; +use flate2::read::GzDecoder; +use itertools::Itertools; +use std::fs; +use std::fs::File; +use std::io; +use std::io::BufReader; use std::io::Read; use std::io::Seek; +use std::path::Path; use std::path::PathBuf; use thiserror::Error; @@ -7,8 +15,18 @@ use crate::progress::Progress; use crate::progress::ProgressBar; #[derive(Error, Debug)] -#[error("failed to unpack source rock: {0}")] -pub struct UnpackError(#[from] zip::result::ZipError); +pub enum UnpackError { + #[error("failed to unpack source: {0}")] + Io(#[from] io::Error), + #[error("failed to unpack zip source: {0}")] + Zip(#[from] zip::result::ZipError), + #[error("source returned HTML - it may have been moved or deleted")] + SourceMovedOrDeleted, + #[error("rockspec source has unsupported file type {0}")] + UnsupportedFileType(String), + #[error("could not determine mimetype of rockspec source")] + UnknownMimeType, +} pub async fn unpack_src_rock( rock_src: R, @@ -34,6 +52,161 @@ async fn unpack_src_rock_impl( Ok(destination) } +#[async_recursion] +pub(crate) async fn unpack( + mime_type: Option<&str>, + reader: R, + extract_nested_archive: bool, + file_name: String, + dest_dir: &Path, + progress: &Progress, +) -> Result<(), UnpackError> +where + R: Read + Seek + Send, +{ + progress.map(|p| p.set_message(format!("📦 Unpacking {}", file_name))); + + match mime_type { + Some("application/zip") => { + let mut archive = zip::ZipArchive::new(reader)?; + archive.extract(dest_dir)?; + } + Some("application/x-tar") => { + let mut archive = tar::Archive::new(reader); + archive.unpack(dest_dir)?; + } + Some("application/gzip") => { + let mut bufreader = BufReader::new(reader); + + let extract_subdirectory = + extract_nested_archive && is_single_tar_directory(&mut bufreader)?; + + bufreader.rewind()?; + let tar = GzDecoder::new(bufreader); + let mut archive = tar::Archive::new(tar); + + if extract_subdirectory { + archive.entries()?.try_for_each(|entry| { + let mut entry = entry?; + + let path: PathBuf = entry.path()?.components().skip(1).collect(); + if path.components().count() > 0 { + let dest = dest_dir.join(path); + std::fs::create_dir_all(dest.parent().unwrap())?; + entry.unpack(dest)?; + } + + Ok::<_, io::Error>(()) + })?; + } else { + archive.entries()?.try_for_each(|entry| { + entry?.unpack_in(dest_dir)?; + Ok::<_, io::Error>(()) + })?; + } + } + Some("text/html") => { + return Err(UnpackError::SourceMovedOrDeleted); + } + Some(other) => { + return Err(UnpackError::UnsupportedFileType(other.to_string())); + } + None => { + return Err(UnpackError::UnknownMimeType); + } + } + + if extract_nested_archive { + // If the source is an archive, luarocks will pack the source archive and the rockspec. + // So we need to unpack the source archive. + if let Some(nested_archive_path) = get_single_file_entry(dest_dir)? { + if let Some(mime_type) = + infer::get_from_path(&nested_archive_path)?.map(|file_type| file_type.mime_type()) + { + if matches!( + mime_type, + "application/zip" | "application/x-tar" | "application/gzip" + ) { + let mut file = File::open(&nested_archive_path)?; + let mut buffer = Vec::new(); + file.read_to_end(&mut buffer)?; + let file_name = nested_archive_path + .file_name() + .map(|os_str| os_str.to_string_lossy()) + .unwrap_or(nested_archive_path.to_string_lossy()) + .to_string(); + unpack( + Some(mime_type), + file, + extract_nested_archive, // It might be a nested archive inside a .src.rock + file_name, + dest_dir, + progress, + ) + .await?; + fs::remove_file(nested_archive_path)?; + } + } + } + } + Ok(()) +} + +fn is_single_tar_directory(reader: R) -> io::Result { + let tar = GzDecoder::new(reader); + let mut archive = tar::Archive::new(tar); + + let entries: Vec<_> = archive + .entries()? + .filter_map(|entry| { + if entry.as_ref().ok()?.path().ok()?.file_name()? != "pax_global_header" { + Some(entry) + } else { + None + } + }) + .try_collect()?; + + let directory: PathBuf = entries + .first() + .unwrap() + .path()? + .components() + .take(1) + .collect(); + + Ok(entries.into_iter().all(|entry| { + entry + .path() + .unwrap() + .to_str() + .unwrap() + .starts_with(directory.to_str().unwrap()) + })) +} + +fn get_single_file_entry(dir: &Path) -> Result, io::Error> { + let entries = std::fs::read_dir(dir)? + .filter_map(Result::ok) + .filter_map(|f| { + let f = f.path(); + if f.is_file() + && f.extension() + .is_some_and(|ext| ext.to_string_lossy() != "rockspec") + { + Some(f) + } else { + None + } + }) + .collect_vec(); + if entries.len() == 1 { + Ok(entries.first().cloned()) + } else { + Ok(None) + } +} + #[cfg(test)] mod tests { use crate::progress::MultiProgress;