From 0a6283126f76718e8f74ebcbdf4c81334acc4b82 Mon Sep 17 00:00:00 2001 From: Paolo Barbolini Date: Sat, 6 Apr 2024 10:07:31 +0200 Subject: [PATCH 1/7] chore: move rustup handling to a separate module --- src/main.rs | 15 +++------------ src/rustup.rs | 17 +++++++++++++++++ 2 files changed, 20 insertions(+), 12 deletions(-) create mode 100644 src/rustup.rs diff --git a/src/main.rs b/src/main.rs index fa78b63..2444fcf 100644 --- a/src/main.rs +++ b/src/main.rs @@ -17,6 +17,8 @@ use sha2::{Digest as _, Sha256, Sha512}; use tar::Archive; use url::Url; +mod rustup; + const USER_AGENT: &str = concat!( env!("CARGO_PKG_NAME"), "/", @@ -42,18 +44,7 @@ fn main() -> Result<()> { .user_agent(USER_AGENT) .build()?; - let default_toolchain = Command::new("rustup") - .arg("default") - .output() - .ok() - .filter(|out| out.status.success()) - .and_then(|out| String::from_utf8(out.stdout).ok()) - .and_then(|stdout| { - stdout - .split_once(' ') - .map(|(toolchain, _)| toolchain.to_owned()) - }) - .unwrap_or_else(|| "stable".to_owned()); + let default_toolchain = self::rustup::default_toolchain(); let crates_io_index = "https://github.com/rust-lang/crates.io-index".parse::()?; diff --git a/src/rustup.rs b/src/rustup.rs new file mode 100644 index 0000000..ee85d6e --- /dev/null +++ b/src/rustup.rs @@ -0,0 +1,17 @@ +use std::process::Command; + +/// Get the default rustup toolchain or `stable` if the default can't be determined +pub fn default_toolchain() -> String { + Command::new("rustup") + .arg("default") + .output() + .ok() + .filter(|out| out.status.success()) + .and_then(|out| String::from_utf8(out.stdout).ok()) + .and_then(|stdout| { + stdout + .split_once(' ') + .map(|(toolchain, _)| toolchain.to_owned()) + }) + .unwrap_or_else(|| "stable".to_owned()) +} From f79b53d9961579ed7e64143d8956667d641225b0 Mon Sep 17 00:00:00 2001 From: Paolo Barbolini Date: Sat, 6 Apr 2024 10:21:55 +0200 Subject: [PATCH 2/7] chore: move registry handling to a separate module --- src/main.rs | 54 ++++++++++++++--------------------------- src/registry.rs | 64 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 36 deletions(-) create mode 100644 src/registry.rs diff --git a/src/main.rs b/src/main.rs index 2444fcf..0516645 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,7 +2,7 @@ use std::{ collections::{BTreeMap, HashSet}, env, fs::{self, File}, - io::{self, BufRead, BufReader, Read, Write}, + io::{self, BufRead, BufReader, Read, }, process::Command, str, }; @@ -17,6 +17,9 @@ use sha2::{Digest as _, Sha256, Sha512}; use tar::Archive; use url::Url; +use self::registry::RegistryCrate; + +mod registry; mod rustup; const USER_AGENT: &str = concat!( @@ -88,40 +91,21 @@ fn main() -> Result<()> { // Download the package // - let crate_dir = crates_dir.join(package.name.as_str()); - if !crate_dir.try_exists()? { - fs::create_dir(&crate_dir)?; - } - let crate_path = crate_dir.join(format!("{}.tar.gz", package.version)); - if !crate_path.try_exists()? { - println!("Downloading {} v{}", package.name, package.version); - - let mut resp = client - .get(format!( - "https://static.crates.io/crates/{}/{}-{}.crate", - package.name, package.name, package.version - )) - .send()?; - if !resp.status().is_success() { + let registry_crate = match RegistryCrate::obtain( + &client, + &crates_dir, + package.name.as_str(), + &package.version, + ) { + Ok(registry_crate) => registry_crate, + Err(err) => { println!( - "Couldn't download {} v{}, status: {}", - package.name, - package.version, - resp.status() + "Couldn't obtain package {} v{} err={:?}", + package.name, package.version, err ); continue; } - - let mut tmp_crate_path = crate_path.clone(); - tmp_crate_path.as_mut_os_string().push(".tmp"); - - let mut tmp_crate_file = File::create(&tmp_crate_path)?; - io::copy(&mut resp, &mut tmp_crate_file)?; - tmp_crate_file.flush()?; - drop(tmp_crate_file); - - fs::rename(tmp_crate_path, &crate_path)?; - } + }; // // Verify the package checksum @@ -130,9 +114,7 @@ fn main() -> Result<()> { match package.checksum { Some(Checksum::Sha256(expected_sha256_hash)) => { let mut sha256 = Sha256::new(); - let mut file = File::open(&crate_path)?; - io::copy(&mut file, &mut sha256)?; - drop(file); + io::copy(&mut registry_crate.raw_crate_file()?, &mut sha256)?; let sha256 = sha256.finalize(); ensure!( @@ -153,7 +135,7 @@ fn main() -> Result<()> { let mut cargo_vcs_info = None; let mut cargo_toml = None; - let mut tar = Archive::new(GzDecoder::new(File::open(&crate_path)?)); + let mut tar = registry_crate.crate_contents()?; for entry in tar.entries()? { let mut entry = entry?; let path = entry @@ -446,7 +428,7 @@ fn main() -> Result<()> { // Hash file contents // - let mut crates_io_tar = Archive::new(GzDecoder::new(File::open(&crate_path)?)); + let mut crates_io_tar = registry_crate.crate_contents()?; let mut crates_io_hashes = BTreeMap::new(); for file in crates_io_tar.entries()? { let file = file?; diff --git a/src/registry.rs b/src/registry.rs new file mode 100644 index 0000000..2c685fd --- /dev/null +++ b/src/registry.rs @@ -0,0 +1,64 @@ +use std::{ + fs::{self, File}, + io::{self, Read, Write as _}, + path::{Path, PathBuf}, +}; + +use anyhow::Result; +use flate2::read::GzDecoder; +use semver::Version; +use tar::Archive; + +pub struct RegistryCrate { + crate_file: PathBuf, +} + +impl RegistryCrate { + pub fn obtain( + http_client: &reqwest::blocking::Client, + cache_dir: &Path, + name: &str, + version: &Version, + ) -> Result { + let crate_dir = cache_dir.join(name); + if !crate_dir.try_exists()? { + fs::create_dir(&crate_dir)?; + } + + let crate_path = crate_dir.join(format!("{version}.tar.gz")); + if !crate_path.try_exists()? { + let mut resp = http_client + .get(format!( + "https://static.crates.io/crates/{name}/{name}-{version}.crate", + )) + .send()? + .error_for_status()?; + + let mut tmp_crate_path = crate_path.clone(); + tmp_crate_path.as_mut_os_string().push(".tmp"); + + let mut tmp_crate_file = File::create(&tmp_crate_path)?; + io::copy(&mut resp, &mut tmp_crate_file)?; + tmp_crate_file.flush()?; + drop(tmp_crate_file); + + fs::rename(tmp_crate_path, &crate_path)?; + } + + Ok(Self { + crate_file: crate_path, + }) + } + + pub fn raw_crate_file(&self) -> io::Result { + File::open(&self.crate_file) + } + + pub fn decompressed_crate_file(&self) -> io::Result { + self.raw_crate_file().map(GzDecoder::new) + } + + pub fn crate_contents(&self) -> io::Result> { + self.decompressed_crate_file().map(Archive::new) + } +} From 7c46ce1bcfe44b2b493ff567fdca3f73a0b9c24a Mon Sep 17 00:00:00 2001 From: Paolo Barbolini Date: Sat, 6 Apr 2024 11:14:10 +0200 Subject: [PATCH 3/7] chore: move repository handling to a separate module --- src/git.rs | 250 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 231 ++++++++---------------------------------------- 2 files changed, 286 insertions(+), 195 deletions(-) create mode 100644 src/git.rs diff --git a/src/git.rs b/src/git.rs new file mode 100644 index 0000000..38fcf67 --- /dev/null +++ b/src/git.rs @@ -0,0 +1,250 @@ +use std::{ + cmp::Ordering, + collections::BTreeSet, + fs::File, + io::Read, + path::{Path, PathBuf}, + process::Command, + str, +}; + +use anyhow::{ensure, Context as _, Result}; +use flate2::read::GzDecoder; +use semver::Version; +use tar::Archive; +use url::Url; + +#[derive(Debug)] +pub struct GitRepository { + repo_dir: PathBuf, +} + +#[derive(Debug)] +pub struct GitRepositoryCheckout<'a> { + repository: &'a GitRepository, +} + +#[derive(Debug)] +pub struct GitTags<'a>(BTreeSet>); + +#[derive(Debug)] +pub struct GitTag<'a> { + repository: &'a GitRepository, + tag: String, +} + +impl GitRepository { + pub fn obtain(dir: &Path, url: Url) -> Result { + let normalized_url = normalize_url(url)?; + + let name = format!( + "{}-{}", + normalized_url.host().unwrap(), + normalized_url.path().replace('/', "-") + ); + let repo_dir = dir.join(name); + if !repo_dir.try_exists()? { + let out = Command::new("git") + .arg("clone") + .arg("--filter=blob:none") + .arg("--") + .arg(normalized_url.to_string()) + .arg(&repo_dir) + .env("GIT_TERMINAL_PROMPT", "0") + .output()?; + ensure!(out.status.success(), "`git clone` is successful"); + } + + Ok(Self { repo_dir }) + } + + pub fn tags(&self) -> Result { + let out = Command::new("git") + .arg("tag") + .current_dir(&self.repo_dir) + .output()?; + ensure!(out.status.success(), "`git tag` is successful"); + + let tags = str::from_utf8(&out.stdout) + .context("couldn't parse git tags")? + .lines() + .map(|tag| GitTag { + repository: self, + tag: tag.to_owned(), + }) + .collect::>(); + Ok(GitTags(tags)) + } + + pub fn checkout<'a>(&'a mut self, commit: &str) -> Result> { + let out = Command::new("git") + .arg("checkout") + .arg(commit) + .current_dir(&self.repo_dir) + .output() + .context("checkout the commit")?; + ensure!(out.status.success(), "`git checkout` is successful"); + + let out = Command::new("git") + .arg("submodule") + .arg("init") + .env("GIT_TERMINAL_PROMPT", "0") + .current_dir(&self.repo_dir) + .output() + .context("init submodules")?; + ensure!(out.status.success(), "`git submodule init` is successful"); + + let out = Command::new("git") + .arg("submodule") + .arg("sync") + .env("GIT_TERMINAL_PROMPT", "0") + .current_dir(&self.repo_dir) + .output() + .context("sync submodules")?; + ensure!(out.status.success(), "`git submodule sync` is successful"); + + let out = Command::new("git") + .arg("submodule") + .arg("update") + .env("GIT_TERMINAL_PROMPT", "0") + .current_dir(&self.repo_dir) + .output() + .context("update submodules")?; + ensure!(out.status.success(), "`git submodule update` is successful"); + + Ok(GitRepositoryCheckout { repository: self }) + } +} + +impl<'a> GitRepositoryCheckout<'a> { + pub fn crate_package( + &self, + default_toolchain: &str, + name: &str, + version: &Version, + ) -> Result> { + let package_path = self + .repository + .repo_dir + .join("target") + .join("package") + .join(format!("{name}-{version}.crate")); + + if !package_path.try_exists()? { + let out = Command::new("cargo") + .arg("package") + .arg("--no-verify") + .arg("--package") + .arg(name) + .current_dir(&self.repository.repo_dir) + .env("RUSTUP_TOOLCHAIN", default_toolchain) + .output() + .context("cargo package")?; + ensure!(out.status.success(), "`cargo package` is successful"); + ensure!( + package_path.try_exists()?, + "`cargo package` generated a file" + ); + } + + File::open(package_path) + .map(GzDecoder::new) + .map(Archive::new) + .map_err(Into::into) + } +} + +impl<'a> GitTags<'a> { + pub fn find_tag_for_version(&'a self, name: &str, version: Version) -> Option<&'a GitTag<'a>> { + let mut clean_version = version; + clean_version.build = semver::BuildMetadata::EMPTY; + + let possible_tags = [ + // With package name prefix + format!("{name}-v{clean_version}"), + format!("{name}-{clean_version}"), + format!("{name}_v{clean_version}"), + format!("{name}_{clean_version}"), + format!("{name}/v{clean_version}"), + format!("{name}v/{clean_version}"), + format!("{name}/{clean_version}"), + format!("{name}@v{clean_version}"), + format!("{name}@{clean_version}"), + // Just the version + format!("v{clean_version}"), + clean_version.to_string(), + format!("v/{clean_version}"), + ]; + possible_tags + .iter() + .find_map(|possible_tag| self.0.iter().find(|&tag| tag.tag == **possible_tag)) + } +} + +impl<'a> GitTag<'a> { + pub fn commit(&self) -> Result { + let out = Command::new("git") + .arg("rev-list") + .arg("-n") + .arg("1") + .arg(&self.tag) + .current_dir(&self.repository.repo_dir) + .output() + .context("find out commit behind tag")?; + ensure!(out.status.success(), "`git rev-list` is successful"); + + let commit = str::from_utf8(&out.stdout) + .context("git tag isn't utf-8")? + .lines() + .next() + .context("output is empty")? + .to_owned(); + Ok(commit) + } +} + +impl<'a> PartialEq for GitTag<'a> { + fn eq(&self, other: &Self) -> bool { + self.tag.eq(&other.tag) + } +} + +impl<'a> Eq for GitTag<'a> {} + +impl<'a> PartialOrd for GitTag<'a> { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl<'a> Ord for GitTag<'a> { + fn cmp(&self, other: &Self) -> Ordering { + self.tag.cmp(&other.tag) + } +} + +fn normalize_url(url: Url) -> Result { + ensure!( + matches!(url.scheme(), "http" | "https"), + "Bad repository scheme" + ); + let host = url + .host() + .context("repository doesn't have a `host`")? + .to_string(); + + Ok(if host == "github.com" || host.starts_with("gitlab.") { + let mut url = url; + let mut path = url.path().strip_prefix('/').unwrap().split('/'); + url.set_path(&format!( + "/{}/{}.git", + path.next().context("repository is missing user/org")?, + path.next() + .context("repository is missing repo name")? + .trim_end_matches(".git") + )); + url + } else { + url + }) +} diff --git a/src/main.rs b/src/main.rs index 0516645..b5fce15 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,24 +1,21 @@ use std::{ - collections::{BTreeMap, HashSet}, - env, - fs::{self, File}, - io::{self, BufRead, BufReader, Read, }, - process::Command, + collections::BTreeMap, + env, fs, + io::{self, BufRead, BufReader, Read}, str, }; -use anyhow::{ensure, Context as _, Result}; +use anyhow::{ensure, Context, Result}; use cargo_lock::{package::SourceKind, Checksum, Lockfile}; use cargo_toml::Manifest; -use flate2::read::GzDecoder; -use semver::BuildMetadata; use serde::Deserialize; use sha2::{Digest as _, Sha256, Sha512}; -use tar::Archive; use url::Url; +use self::git::GitRepository; use self::registry::RegistryCrate; +mod git; mod registry; mod rustup; @@ -181,126 +178,35 @@ fn main() -> Result<()> { // Clone repository // - let repository = repository + let repository_url = repository .get()? .parse::() .context("repository isn't a valid url")?; - ensure!( - matches!(repository.scheme(), "http" | "https"), - "Bad repository scheme" - ); - let host = repository - .host() - .context("repository doesn't have a `host`")? - .to_string(); - let repository = if host == "github.com" || host.starts_with("gitlab.") { - let mut repository = repository; - let mut path = repository.path().strip_prefix('/').unwrap().split('/'); - repository.set_path(&format!( - "/{}/{}.git", - path.next().context("repository is missing user/org")?, - path.next() - .context("repository is missing repo name")? - .trim_end_matches(".git") - )); - repository - } else { - repository - }; - - let name = format!( - "{}-{}", - repository.host().unwrap(), - repository.path().replace('/', "-") - ); - let repo_dir = repos_dir.join(&name); - if !repo_dir.try_exists()? { - println!("Cloning {}", repository); - let out = Command::new("git") - .arg("clone") - .arg("--filter=blob:none") - .arg("--") - .arg(repository.to_string()) - .arg(&repo_dir) - .env("GIT_TERMINAL_PROMPT", "0") - .output()?; - if !out.status.success() { + let mut git_repository = match GitRepository::obtain(&repos_dir, repository_url.clone()) { + Ok(git_repository) => git_repository, + Err(err) => { println!( - "Couldn't clone {} repo for package {} v{} status={}", - repository, package.name, package.version, out.status + "Couldn't obtain git repository for {} v{} err={:?} url={}", + package.name, package.version, err, repository_url ); continue; } - } + }; // // Get git tags // - let tags = Command::new("git") - .arg("tag") - .current_dir(&repo_dir) - .output()?; - ensure!( - tags.status.success(), - "Couldn't list git tags {} repo status={}", - repository, - tags.status - ); - let tags = str::from_utf8(&tags.stdout) - .context("couldn't parse git tags")? - .lines() - .map(ToOwned::to_owned) - .collect::>(); + let tags = git_repository.tags().context("obtain git tags")?; // // Find a matching tag // - let mut clean_version = package.version.clone(); - clean_version.build = BuildMetadata::EMPTY; - - let possible_tags = [ - // With package name prefix - format!("{}-v{}", package.name, clean_version), - format!("{}-{}", package.name, clean_version), - format!("{}_v{}", package.name, clean_version), - format!("{}_{}", package.name, clean_version), - format!("{}/v{}", package.name, clean_version), - format!("{}v/{}", package.name, clean_version), - format!("{}/{}", package.name, clean_version), - format!("{}@v{}", package.name, clean_version), - format!("{}@{}", package.name, clean_version), - // Just the version - format!("v{clean_version}"), - clean_version.to_string(), - format!("v/{clean_version}"), - ]; - let commit = match possible_tags - .iter() - .find(|&possible_tag| tags.contains(possible_tag)) + let commit = match tags.find_tag_for_version(package.name.as_str(), package.version.clone()) { Some(tag) => { - let out = Command::new("git") - .arg("rev-list") - .arg("-n") - .arg("1") - .arg(tag) - .current_dir(&repo_dir) - .output() - .context("find out commit behind tag")?; - ensure!( - out.status.success(), - "Couldn't determine commit behind tag {} repo status={}", - repository, - out.status - ); - let commit = str::from_utf8(&out.stdout) - .context("git tag isn't utf-8")? - .lines() - .next() - .context("output is empty")? - .to_owned(); + let commit = tag.commit()?; if let Some(cargo_vcs_info) = &cargo_vcs_info { if cargo_vcs_info.git.sha1 != commit { @@ -330,99 +236,35 @@ fn main() -> Result<()> { // Checkout the commit in the repo // - let out = Command::new("git") - .arg("checkout") - .arg(commit) - .current_dir(&repo_dir) - .output() - .context("checkout the commit")?; - if !out.status.success() { - println!( - "Couldn't checkout the commit in {} repo for package {} v{} status={}", - repository, package.name, package.version, out.status - ); - continue; - } - - let out = Command::new("git") - .arg("submodule") - .arg("init") - .env("GIT_TERMINAL_PROMPT", "0") - .current_dir(&repo_dir) - .output() - .context("init submodules")?; - if !out.status.success() { - println!( - "Couldn't init submodules in {} repo for package {} v{} status={}", - repository, package.name, package.version, out.status - ); - continue; - } - - let out = Command::new("git") - .arg("submodule") - .arg("sync") - .env("GIT_TERMINAL_PROMPT", "0") - .current_dir(&repo_dir) - .output() - .context("sync submodules")?; - if !out.status.success() { - println!( - "Couldn't sync submodules in {} repo for package {} v{} status={}", - repository, package.name, package.version, out.status - ); - continue; - } - - let out = Command::new("git") - .arg("submodule") - .arg("update") - .env("GIT_TERMINAL_PROMPT", "0") - .current_dir(&repo_dir) - .output() - .context("update submodules")?; - if !out.status.success() { - println!( - "Couldn't update submodules in {} repo for package {} v{} status={}", - repository, package.name, package.version, out.status - ); - continue; - } + let git_repository_checkout = match git_repository.checkout(&commit) { + Ok(git_repository_checkout) => git_repository_checkout, + Err(err) => { + println!( + "Couldn't checkout commit {} for package {} v{} err={:?}", + commit, package.name, package.version, err + ); + continue; + } + }; // // Create local package // - let package_path = repo_dir - .join("target") - .join("package") - .join(format!("{}-{}.crate", package.name, package.version)); - - if !package_path.try_exists()? { - println!("Packaging release {} v{}", package.name, package.version); - - let out = Command::new("cargo") - .arg("package") - .arg("--no-verify") - .arg("--package") - .arg(package.name.as_str()) - .current_dir(&repo_dir) - .env("RUSTUP_TOOLCHAIN", &default_toolchain) - .output() - .context("cargo package")?; - if !out.status.success() { + let mut our_tar_gz = match git_repository_checkout.crate_package( + &default_toolchain, + package.name.as_str(), + &package.version, + ) { + Ok(our_tar_gz) => our_tar_gz, + Err(err) => { println!( - "Couldn't assemble the package in {} repo status={}", - repository, out.status + "Couldn't package {} v{} err={:?}", + package.name, package.version, err ); continue; } - - if !package_path.try_exists()? { - println!("Package still somehow doesn't exist {}", package.name); - continue; - } - } + }; // // Hash file contents @@ -448,7 +290,6 @@ fn main() -> Result<()> { crates_io_hashes.insert(path, sha512.finalize()); } - let mut our_tar_gz = Archive::new(GzDecoder::new(File::open(&package_path)?)); let mut our_hashes = BTreeMap::new(); for file in our_tar_gz.entries()? { let file = file?; From 056214cfb465432082491a4d3912ca5d535dd694 Mon Sep 17 00:00:00 2001 From: Paolo Barbolini Date: Sat, 6 Apr 2024 11:19:49 +0200 Subject: [PATCH 4/7] chore: move AsciiWhitespaceSkippingReader to it's own module --- src/io.rs | 47 ++++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 54 ++++++++--------------------------------------------- 2 files changed, 55 insertions(+), 46 deletions(-) create mode 100644 src/io.rs diff --git a/src/io.rs b/src/io.rs new file mode 100644 index 0000000..e02128f --- /dev/null +++ b/src/io.rs @@ -0,0 +1,47 @@ +use std::io::{self, BufRead, Read}; + +pub struct AsciiWhitespaceSkippingReader(R); + +impl AsciiWhitespaceSkippingReader { + pub fn new(reader: R) -> Self { + Self(reader) + } +} + +impl Read for AsciiWhitespaceSkippingReader +where + R: BufRead, +{ + fn read(&mut self, mut buf: &mut [u8]) -> io::Result { + let mut written = 0; + + loop { + if buf.is_empty() { + break; + } + + let mut read_buf = self.0.fill_buf()?; + if read_buf.is_empty() { + break; + } + + let mut read = 0; + while !read_buf.is_empty() && !buf.is_empty() { + read += 1; + let b = read_buf[0]; + read_buf = &read_buf[1..]; + if b.is_ascii_whitespace() { + continue; + } + + buf[0] = b; + buf = &mut buf[1..]; + written += 1; + } + + self.0.consume(read); + } + + Ok(written) + } +} diff --git a/src/main.rs b/src/main.rs index b5fce15..36b0c6c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,7 @@ use std::{ collections::BTreeMap, env, fs, - io::{self, BufRead, BufReader, Read}, + io::{self as std_io, BufReader, Read}, str, }; @@ -13,9 +13,11 @@ use sha2::{Digest as _, Sha256, Sha512}; use url::Url; use self::git::GitRepository; +use self::io::AsciiWhitespaceSkippingReader; use self::registry::RegistryCrate; mod git; +mod io; mod registry; mod rustup; @@ -111,7 +113,7 @@ fn main() -> Result<()> { match package.checksum { Some(Checksum::Sha256(expected_sha256_hash)) => { let mut sha256 = Sha256::new(); - io::copy(&mut registry_crate.raw_crate_file()?, &mut sha256)?; + std_io::copy(&mut registry_crate.raw_crate_file()?, &mut sha256)?; let sha256 = sha256.finalize(); ensure!( @@ -283,10 +285,10 @@ fn main() -> Result<()> { continue; } - let mut reader = AsciiWhitespaceSkippingReader(BufReader::new(file)); + let mut reader = AsciiWhitespaceSkippingReader::new(BufReader::new(file)); let mut sha512 = Sha512::new(); - io::copy(&mut reader, &mut sha512)?; + std_io::copy(&mut reader, &mut sha512)?; crates_io_hashes.insert(path, sha512.finalize()); } @@ -302,10 +304,10 @@ fn main() -> Result<()> { continue; } - let mut reader = AsciiWhitespaceSkippingReader(BufReader::new(file)); + let mut reader = AsciiWhitespaceSkippingReader::new(BufReader::new(file)); let mut sha512 = Sha512::new(); - io::copy(&mut reader, &mut sha512)?; + std_io::copy(&mut reader, &mut sha512)?; our_hashes.insert(path, sha512.finalize()); } @@ -346,43 +348,3 @@ fn main() -> Result<()> { Ok(()) } - -struct AsciiWhitespaceSkippingReader(R); - -impl Read for AsciiWhitespaceSkippingReader -where - R: BufRead, -{ - fn read(&mut self, mut buf: &mut [u8]) -> io::Result { - let mut written = 0; - - loop { - if buf.is_empty() { - break; - } - - let mut read_buf = self.0.fill_buf()?; - if read_buf.is_empty() { - break; - } - - let mut read = 0; - while !read_buf.is_empty() && !buf.is_empty() { - read += 1; - let b = read_buf[0]; - read_buf = &read_buf[1..]; - if b.is_ascii_whitespace() { - continue; - } - - buf[0] = b; - buf = &mut buf[1..]; - written += 1; - } - - self.0.consume(read); - } - - Ok(written) - } -} From 41c1fd58fb5e6bd2ba1d995059622d80cb8c056d Mon Sep 17 00:00:00 2001 From: Paolo Barbolini Date: Sat, 6 Apr 2024 11:21:57 +0200 Subject: [PATCH 5/7] fix: have tmp dir match package name --- src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main.rs b/src/main.rs index 36b0c6c..4956060 100644 --- a/src/main.rs +++ b/src/main.rs @@ -51,7 +51,7 @@ fn main() -> Result<()> { let crates_io_index = "https://github.com/rust-lang/crates.io-index".parse::()?; let current_dir = env::current_dir()?; - let temp_dir = env::temp_dir().join("cargo-crates-check"); + let temp_dir = env::temp_dir().join(env!("CARGO_PKG_NAME")); let crates_dir = temp_dir.join("crates"); let repos_dir = temp_dir.join("repositories"); fs::create_dir_all(&crates_dir)?; From 56473fe4704a09cc0e9d8e9292d9c0d16ce98297 Mon Sep 17 00:00:00 2001 From: Paolo Barbolini Date: Sat, 6 Apr 2024 11:30:41 +0200 Subject: [PATCH 6/7] chore: reunite .crate handlers into a common module --- src/git.rs | 13 ++---- src/main.rs | 95 +++++++++++++++--------------------------- src/package.rs | 107 ++++++++++++++++++++++++++++++++++++++++++++++++ src/registry.rs | 18 +++----- 4 files changed, 148 insertions(+), 85 deletions(-) create mode 100644 src/package.rs diff --git a/src/git.rs b/src/git.rs index 38fcf67..3109136 100644 --- a/src/git.rs +++ b/src/git.rs @@ -1,19 +1,17 @@ use std::{ cmp::Ordering, collections::BTreeSet, - fs::File, - io::Read, path::{Path, PathBuf}, process::Command, str, }; use anyhow::{ensure, Context as _, Result}; -use flate2::read::GzDecoder; use semver::Version; -use tar::Archive; use url::Url; +use crate::package::Package; + #[derive(Debug)] pub struct GitRepository { repo_dir: PathBuf, @@ -122,7 +120,7 @@ impl<'a> GitRepositoryCheckout<'a> { default_toolchain: &str, name: &str, version: &Version, - ) -> Result> { + ) -> Result { let package_path = self .repository .repo_dir @@ -147,10 +145,7 @@ impl<'a> GitRepositoryCheckout<'a> { ); } - File::open(package_path) - .map(GzDecoder::new) - .map(Archive::new) - .map_err(Into::into) + Ok(Package::new(package_path)) } } diff --git a/src/main.rs b/src/main.rs index 4956060..853801d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,6 @@ use std::{ - collections::BTreeMap, env, fs, - io::{self as std_io, BufReader, Read}, + io::{self as std_io, Read}, str, }; @@ -9,15 +8,17 @@ use anyhow::{ensure, Context, Result}; use cargo_lock::{package::SourceKind, Checksum, Lockfile}; use cargo_toml::Manifest; use serde::Deserialize; -use sha2::{Digest as _, Sha256, Sha512}; +use sha2::{Digest as _, Sha256}; use url::Url; +use crate::package::{PackageComparison, PackageContents}; + use self::git::GitRepository; -use self::io::AsciiWhitespaceSkippingReader; use self::registry::RegistryCrate; mod git; mod io; +mod package; mod registry; mod rustup; @@ -105,6 +106,7 @@ fn main() -> Result<()> { continue; } }; + let registry_crate_package = registry_crate.package(); // // Verify the package checksum @@ -113,7 +115,7 @@ fn main() -> Result<()> { match package.checksum { Some(Checksum::Sha256(expected_sha256_hash)) => { let mut sha256 = Sha256::new(); - std_io::copy(&mut registry_crate.raw_crate_file()?, &mut sha256)?; + std_io::copy(&mut registry_crate_package.raw_reader()?, &mut sha256)?; let sha256 = sha256.finalize(); ensure!( @@ -134,7 +136,7 @@ fn main() -> Result<()> { let mut cargo_vcs_info = None; let mut cargo_toml = None; - let mut tar = registry_crate.crate_contents()?; + let mut tar = registry_crate_package.archive_reader()?; for entry in tar.entries()? { let mut entry = entry?; let path = entry @@ -253,12 +255,12 @@ fn main() -> Result<()> { // Create local package // - let mut our_tar_gz = match git_repository_checkout.crate_package( + let repository_package = match git_repository_checkout.crate_package( &default_toolchain, package.name.as_str(), &package.version, ) { - Ok(our_tar_gz) => our_tar_gz, + Ok(repository_package) => repository_package, Err(err) => { println!( "Couldn't package {} v{} err={:?}", @@ -272,76 +274,43 @@ fn main() -> Result<()> { // Hash file contents // - let mut crates_io_tar = registry_crate.crate_contents()?; - let mut crates_io_hashes = BTreeMap::new(); - for file in crates_io_tar.entries()? { - let file = file?; - let path = file.path()?.into_owned(); - if path.ends_with(".cargo_vcs_info.json") { - continue; - } - // TODO: remove this - if path.ends_with("Cargo.toml") || path.ends_with("Cargo.toml.orig") { - continue; - } - - let mut reader = AsciiWhitespaceSkippingReader::new(BufReader::new(file)); - - let mut sha512 = Sha512::new(); - std_io::copy(&mut reader, &mut sha512)?; - crates_io_hashes.insert(path, sha512.finalize()); - } - - let mut our_hashes = BTreeMap::new(); - for file in our_tar_gz.entries()? { - let file = file?; - let path = file.path()?.into_owned(); - if path.ends_with(".cargo_vcs_info.json") { - continue; - } - // TODO: remove this - if path.ends_with("Cargo.toml") || path.ends_with("Cargo.toml.orig") { - continue; - } - - let mut reader = AsciiWhitespaceSkippingReader::new(BufReader::new(file)); - - let mut sha512 = Sha512::new(); - std_io::copy(&mut reader, &mut sha512)?; - our_hashes.insert(path, sha512.finalize()); - } + let repository_package_contents = repository_package + .contents() + .context("calculate repository package contents")?; + let registry_package_contents = registry_crate_package + .contents() + .context("calculate registry crate package contents")?; // // Compare hashes // - for (our_filename, our_sha512_hash) in &our_hashes { - match crates_io_hashes.get(our_filename) { - Some(crates_io_sha512) if our_sha512_hash == crates_io_sha512 => {} - Some(_) => { + let comparison = + PackageContents::compare(&repository_package_contents, ®istry_package_contents); + for outcome in comparison { + match outcome { + PackageComparison::Equal(_) => continue, + PackageComparison::Different(path) => { println!( "Package {} has mismatching file hashes for {}", package.name, - our_filename.display() + path.display() ); } - None => { + PackageComparison::OnlyLeft(path) => { println!( "Package {} has file {} in our release but not in crates.io tarball", package.name, - our_filename.display() + path.display() + ); + } + PackageComparison::OnlyRight(path) => { + println!( + "Package {} has file {} in crates.io release but not ours", + package.name, + path.display() ); } - } - } - - for crates_io_filename in crates_io_hashes.keys() { - if !our_hashes.contains_key(crates_io_filename) { - println!( - "Package {} has file {} in crates.io release but not ours", - package.name, - crates_io_filename.display() - ); } } } diff --git a/src/package.rs b/src/package.rs new file mode 100644 index 0000000..9c40a0c --- /dev/null +++ b/src/package.rs @@ -0,0 +1,107 @@ +use std::{ + collections::BTreeMap, + fs::File, + io::{self, BufReader, Read, Seek}, + path::{Path, PathBuf}, +}; + +use flate2::read::GzDecoder; +use sha2::{Digest as _, Sha512}; +use tar::Archive; + +use crate::io::AsciiWhitespaceSkippingReader; + +#[derive(Debug)] +pub struct Package(PathBuf); + +#[derive(Debug)] +pub struct PackageContents(BTreeMap); + +#[derive(Debug)] +pub enum PackageComparison { + Equal(#[allow(dead_code)] PathBuf), + Different(PathBuf), + OnlyLeft(PathBuf), + OnlyRight(PathBuf), +} + +impl Package { + pub fn new(path: PathBuf) -> Self { + Self(path) + } + + pub fn raw_reader(&self) -> io::Result { + File::open(&self.0) + } + + pub fn decompressed_reader(&self) -> io::Result { + self.raw_reader().map(GzDecoder::new) + } + + pub fn archive_reader(&self) -> io::Result> { + self.decompressed_reader().map(Archive::new) + } + + pub fn contents(&self) -> io::Result { + let mut hashes = BTreeMap::new(); + + let mut archive = self.archive_reader()?; + for file in archive.entries()? { + let file = file?; + let path = file.path()?.into_owned(); + + let mut reader = AsciiWhitespaceSkippingReader::new(BufReader::new(file)); + + let mut sha512 = Sha512::new(); + io::copy(&mut reader, &mut sha512)?; + hashes.insert(path, sha512.finalize().into()); + } + + Ok(PackageContents(hashes)) + } +} + +impl PackageContents { + pub fn compare<'a>( + left: &'a PackageContents, + right: &'a PackageContents, + ) -> impl Iterator + 'a { + let a = left + .0 + .iter() + .filter(|(path, _)| !is_path_ignored(path)) + .map(|(path, left_hash)| match right.0.get(path) { + Some(right_hash) if left_hash == right_hash => { + PackageComparison::Equal(path.to_owned()) + } + Some(_) => PackageComparison::Different(path.to_owned()), + None => PackageComparison::OnlyLeft(path.to_owned()), + }); + let b = right + .0 + .iter() + .filter(|(path, _)| !is_path_ignored(path)) + .filter_map(|(path, _)| { + if left.0.contains_key(path) { + None + } else { + Some(PackageComparison::OnlyRight(path.to_owned())) + } + }); + + a.chain(b) + } +} + +fn is_path_ignored(path: &Path) -> bool { + if path.ends_with(".cargo_vcs_info.json") { + return true; + } + + // TODO: remove this + if path.ends_with("Cargo.toml") || path.ends_with("Cargo.toml.orig") { + return true; + } + + false +} diff --git a/src/registry.rs b/src/registry.rs index 2c685fd..883c46f 100644 --- a/src/registry.rs +++ b/src/registry.rs @@ -1,13 +1,13 @@ use std::{ fs::{self, File}, - io::{self, Read, Write as _}, + io::{self, Write as _}, path::{Path, PathBuf}, }; use anyhow::Result; -use flate2::read::GzDecoder; use semver::Version; -use tar::Archive; + +use crate::package::Package; pub struct RegistryCrate { crate_file: PathBuf, @@ -50,15 +50,7 @@ impl RegistryCrate { }) } - pub fn raw_crate_file(&self) -> io::Result { - File::open(&self.crate_file) - } - - pub fn decompressed_crate_file(&self) -> io::Result { - self.raw_crate_file().map(GzDecoder::new) - } - - pub fn crate_contents(&self) -> io::Result> { - self.decompressed_crate_file().map(Archive::new) + pub fn package(&self) -> Package { + Package::new(self.crate_file.clone()) } } From 532e60ae31e04b434886f82f997839a311102444 Mon Sep 17 00:00:00 2001 From: Paolo Barbolini Date: Sat, 6 Apr 2024 15:13:07 +0200 Subject: [PATCH 7/7] fix: use File::sync_all API for closing registry file --- src/registry.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/registry.rs b/src/registry.rs index 883c46f..6dd0004 100644 --- a/src/registry.rs +++ b/src/registry.rs @@ -1,6 +1,6 @@ use std::{ fs::{self, File}, - io::{self, Write as _}, + io, path::{Path, PathBuf}, }; @@ -39,7 +39,7 @@ impl RegistryCrate { let mut tmp_crate_file = File::create(&tmp_crate_path)?; io::copy(&mut resp, &mut tmp_crate_file)?; - tmp_crate_file.flush()?; + tmp_crate_file.sync_all()?; drop(tmp_crate_file); fs::rename(tmp_crate_path, &crate_path)?;