Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generate .cargo_vcs_info.json and include in cargo package #5786

Closed
153 changes: 131 additions & 22 deletions src/cargo/ops/cargo_package.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::fs::{self, File};
use std::io::SeekFrom;
use std::io::prelude::*;
use std::path::{self, Path};
use std::path::{self, Path, PathBuf};
use std::sync::Arc;

use flate2::read::GzDecoder;
Expand All @@ -28,6 +28,8 @@ pub struct PackageOpts<'cfg> {
pub registry: Option<String>,
}

static VCS_INFO_FILE: &'static str = ".cargo_vcs_info.json";

pub fn package(ws: &Workspace, opts: &PackageOpts) -> CargoResult<Option<FileLock>> {
ops::resolve_ws(ws)?;
let pkg = ws.current()?;
Expand All @@ -42,6 +44,43 @@ pub fn package(ws: &Workspace, opts: &PackageOpts) -> CargoResult<Option<FileLoc

verify_dependencies(pkg)?;

// `list_files` outputs warnings as a side effect, so only do it once.
let src_files = src.list_files(pkg)?;

// Check (git) repository state, getting the current commit hash if not
// dirty. This will `bail!` if dirty, unless allow_dirty.
let sha1_hash = check_repo_state(pkg, &src_files, opts.allow_dirty)?;

// Write out VCS info file for package, if we have a sha1 hash, then
// preserve a shared lock on the file.
let vcs_info = if let Some(hsh) = sha1_hash {
let mut ifile = ws.target_dir().open_rw(
VCS_INFO_FILE,
config,
"(git) VCS info"
)?;
ifile.file().set_len(0)?;
// Manual JSON format is safe given sha1 hash is hex encoded.
writeln!(ifile, r#"{{"git_head_revision_sha1":"{}"}}"#, hsh)?;
ifile.flush()?;
ifile.file().sync_all()?;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm could you elaborate a bit on why the file locks and syncs are needed here? I was thinking we'd just synthesize this file directly into the tarball (like Cargo.toml.orig without actually putting it on the filesystem)

Copy link
Contributor Author

@dekellum dekellum Jul 27, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I figured early on that I wanted a sentry file to try and make it at least inconvenient to spoof the hash, for example by protecting against interference from another cargo package process. The changes would be reduced without the sentry file however. Do you think the anti-spoof efforts don't justify the increased complexity? Note: The test failure by platform diff will remain an issue even without the linked sentry file.

Another early thought was that cargo package was doing something sentry file like, with the tarball produced being valided before being moved in place, so this was for parity.

drop(ifile);

// Downgrade the flock to shared, read access.
let ifile = ws.target_dir().open_ro(
VCS_INFO_FILE,
config,
"(git) VCS info, read only"
)?;
Some(ifile)
} else {
None
};

// Make sure a VCS info file is not included in source, regardless of if
// we produced the file above, and in particular if we did not.
check_vcs_file_collision(pkg, &src_files)?;

if opts.list {
let root = pkg.root();
let mut list: Vec<_> = src.list_files(pkg)?
Expand All @@ -51,17 +90,16 @@ pub fn package(ws: &Workspace, opts: &PackageOpts) -> CargoResult<Option<FileLoc
if include_lockfile(pkg) {
list.push("Cargo.lock".into());
}
if let Some(ref fl) = vcs_info {
list.push(fl.path().file_name().unwrap().into());
}
list.sort_unstable();
for file in list.iter() {
println!("{}", file.display());
}
return Ok(None);
}

if !opts.allow_dirty {
check_not_dirty(pkg, &src)?;
}

let filename = format!("{}-{}.crate", pkg.name(), pkg.version());
let dir = ws.target_dir().join("package");
let mut dst = {
Expand All @@ -77,7 +115,8 @@ pub fn package(ws: &Workspace, opts: &PackageOpts) -> CargoResult<Option<FileLoc
.shell()
.status("Packaging", pkg.package_id().to_string())?;
dst.file().set_len(0)?;
tar(ws, &src, dst.file(), &filename)
let vcs_path = vcs_info.as_ref().map(|f| f.path());
tar(ws, &src_files, vcs_path, dst.file(), &filename)
.chain_err(|| format_err!("failed to prepare local package for uploading"))?;
if opts.verify {
dst.seek(SeekFrom::Start(0))?;
Expand Down Expand Up @@ -152,7 +191,15 @@ fn verify_dependencies(pkg: &Package) -> CargoResult<()> {
Ok(())
}

fn check_not_dirty(p: &Package, src: &PathSource) -> CargoResult<()> {
// Check if the package source is in a *git* DVCS repository. If *git*, and
// the source is *dirty* (e.g. has uncommited changes) and not `allow_dirty`
// then `bail!` with an with an informative message. Otherwise return the sha1
// hash of the current *HEAD* commit, or `None` if *dirty*.
fn check_repo_state(
p: &Package,
src_files: &[PathBuf],
allow_dirty: bool
) -> CargoResult<Option<String>> {
if let Ok(repo) = git2::Repository::discover(p.root()) {
if let Some(workdir) = repo.workdir() {
debug!(
Expand All @@ -164,19 +211,24 @@ fn check_not_dirty(p: &Package, src: &PathSource) -> CargoResult<()> {
if let Ok(status) = repo.status_file(path) {
if (status & git2::Status::IGNORED).is_empty() {
debug!("Cargo.toml found in repo, checking if dirty");
return git(p, src, &repo);
return git(p, src_files, &repo, allow_dirty);
}
}
}
}

// No VCS recognized, we don't know if the directory is dirty or not, so we
// have to assume that it's clean.
return Ok(());

fn git(p: &Package, src: &PathSource, repo: &git2::Repository) -> CargoResult<()> {
return Ok(None);

fn git(
p: &Package,
src_files: &[PathBuf],
repo: &git2::Repository,
allow_dirty: bool
) -> CargoResult<Option<String>> {
let workdir = repo.workdir().unwrap();
let dirty = src.list_files(p)?
let dirty = src_files
.iter()
.filter(|file| {
let relative = file.strip_prefix(workdir).unwrap();
Expand All @@ -194,20 +246,46 @@ fn check_not_dirty(p: &Package, src: &PathSource) -> CargoResult<()> {
})
.collect::<Vec<_>>();
if dirty.is_empty() {
Ok(())
let rev_obj = repo.revparse_single("HEAD")?;
Ok(Some(rev_obj.id().to_string()))
} else {
bail!(
"{} files in the working directory contain changes that were \
not yet committed into git:\n\n{}\n\n\
to proceed despite this, pass the `--allow-dirty` flag",
dirty.len(),
dirty.join("\n")
)
if !allow_dirty {
bail!(
"{} files in the working directory contain changes that were \
not yet committed into git:\n\n{}\n\n\
to proceed despite this, pass the `--allow-dirty` flag",
dirty.len(),
dirty.join("\n")
)
}
Ok(None)
}
}
}

fn tar(ws: &Workspace, src: &PathSource, dst: &File, filename: &str) -> CargoResult<()> {
// Check for and `bail!` if a source file matches ROOT/VCS_INFO_FILE, since
// this is now a cargo reserved file name, and we don't want to allow
// forgery.
fn check_vcs_file_collision(pkg: &Package, src_files: &[PathBuf]) -> CargoResult<()> {
let root = pkg.root();
let vcs_info_path = Path::new(VCS_INFO_FILE);
let collision = src_files.iter().find(|&p| {
util::without_prefix(&p, root).unwrap() == vcs_info_path
});
if collision.is_some() {
bail!("Invalid inclusion of reserved file name \
{} in package source", VCS_INFO_FILE);
}
Ok(())
}

fn tar(
ws: &Workspace,
src_files: &[PathBuf],
vcs_info_path: Option<&Path>,
dst: &File,
filename: &str
) -> CargoResult<()> {
// Prepare the encoder and its header
let filename = Path::new(filename);
let encoder = GzBuilder::new()
Expand All @@ -219,7 +297,7 @@ fn tar(ws: &Workspace, src: &PathSource, dst: &File, filename: &str) -> CargoRes
let pkg = ws.current()?;
let config = ws.config();
let root = pkg.root();
for file in src.list_files(pkg)?.iter() {
for file in src_files.iter() {
let relative = util::without_prefix(file, root).unwrap();
check_filename(relative)?;
let relative = relative.to_str().ok_or_else(|| {
Expand Down Expand Up @@ -287,6 +365,37 @@ fn tar(ws: &Workspace, src: &PathSource, dst: &File, filename: &str) -> CargoRes
}
}

if let Some(src_path) = vcs_info_path {
check_filename(src_path)?;
let filename: PathBuf = src_path.file_name().unwrap().into();
let fnd = filename.display();
config
.shell()
.verbose(|shell| shell.status("Archiving", &fnd))?;
let path = format!(
"{}-{}{}{}",
pkg.name(),
pkg.version(),
path::MAIN_SEPARATOR,
fnd
);
let mut header = Header::new_ustar();
header.set_path(&path).chain_err(|| {
format!("failed to add to archive: `{}`", fnd)
})?;
let mut file_in = File::open(src_path).chain_err(|| {
format!("failed to open for archiving: `{}`", fnd)
})?;
let metadata = file_in.metadata().chain_err(|| {
format!("could not learn metadata for: `{}`", fnd)
})?;
header.set_metadata(&metadata);
header.set_cksum();
ar.append(&header, &mut file_in).chain_err(|| {
internal(format!("could not archive source file `{}`", fnd))
})?;
}

if include_lockfile(pkg) {
let toml = paths::read(&ws.root().join("Cargo.lock"))?;
let path = format!(
Expand Down
96 changes: 81 additions & 15 deletions tests/testsuite/package.rs
Original file line number Diff line number Diff line change
Expand Up @@ -222,27 +222,56 @@ See http://doc.crates.io/manifest.html#package-metadata for more info.
[PACKAGING] foo v0.0.1 ([..])
[ARCHIVING] [..]
[ARCHIVING] [..]
[ARCHIVING] .cargo_vcs_info.json
",
),
);

println!("package sub-repo");
assert_that(
cargo
.arg("package")
.arg("-v")
.arg("--no-verify")
.cwd(p.root().join("a")),
execs().with_status(0).with_stderr(
"\
#[cfg(unix)]
{
println!("package sub-repo (unix)");
assert_that(
cargo
.arg("package")
.arg("-v")
.arg("--no-verify")
.cwd(p.root().join("a")),
execs().with_status(0).with_stderr(
"\
[WARNING] manifest has no description[..]
See http://doc.crates.io/manifest.html#package-metadata for more info.
[PACKAGING] a v0.0.1 ([..])
[ARCHIVING] [..]
[ARCHIVING] [..]
[ARCHIVING] Cargo.toml
[ARCHIVING] src[/]lib.rs
[ARCHIVING] .cargo_vcs_info.json
",
),
);
),
);
}

// FIXME: From this required change in the test (omits final
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm this seems like it's a bit worrisome, do we know what changed from before to cause this test to fail on windows?

Copy link
Contributor Author

@dekellum dekellum Jul 27, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No we don't know that anything changed. the prior check_not_dirty would not bail! (which is avoided for the tests on both platforms) if either no-git-repo-found or git-repo-clean. So if there has been a change with this PR, for windows, then its a change between one of these results to the other. There is no existing test to pin down which one it is.

Also I've changed the order of checks (in cargo_package) vs list output, so another side-effect could be an earlier failure in a check, but that is not apparent. With the PR changes, cargo package just won't generate a .cargo_vcs_info.json file in the repo-not-found or git-repo-clean cases, which makes the output, coincidentally, the same as before.

So I'm asking to move forward with merging this, then followup with a specific test for this which can be backported if necessary for personal diagnostic purposes. Thoughts?

Copy link
Contributor Author

@dekellum dekellum Jul 27, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also: I have a logic flaw in the code comment, clarified above, which I'll fix. When I manually reviewed test fixtures, it looked like the sub-repo was dirty, so that makes me think this the no-git-repo-found case.

// .cargo_vcs_info.json) we can only conclude that on windows, and windows
// only, cargo is failing to find the parent repo
#[cfg(windows)]
{
println!("package sub-repo (windows)");
assert_that(
cargo
.arg("package")
.arg("-v")
.arg("--no-verify")
.cwd(p.root().join("a")),
execs().with_status(0).with_stderr(
"\
[WARNING] manifest has no description[..]
See http://doc.crates.io/manifest.html#package-metadata for more info.
[PACKAGING] a v0.0.1 ([..])
[ARCHIVING] Cargo.toml
[ARCHIVING] src[/]lib.rs
",
),
);
}
}

#[test]
Expand Down Expand Up @@ -281,6 +310,42 @@ See http://doc.crates.io/manifest.html#package-metadata for more info.
);
}

#[test]
fn vcs_file_collision() {
let p = project().build();
let _ = git::repo(&paths::root().join("foo"))
.file(
"Cargo.toml",
r#"
[project]
name = "foo"
description = "foo"
version = "0.0.1"
authors = []
license = "MIT"
documentation = "foo"
homepage = "foo"
repository = "foo"
exclude = ["*.no-existe"]
"#)
.file(
"src/main.rs",
r#"
fn main() {}
"#)
.file(".cargo_vcs_info.json", "foo")
.build();
assert_that(
p.cargo("package").arg("--no-verify"),
execs().with_status(101).with_stderr(&format!(
"\
[ERROR] Invalid inclusion of reserved file name .cargo_vcs_info.json \
in package source
",
)),
);
}

#[test]
fn path_dependency_no_version() {
let p = project()
Expand Down Expand Up @@ -397,7 +462,6 @@ fn exclude() {
"\
[WARNING] manifest has no description[..]
See http://doc.crates.io/manifest.html#package-metadata for more info.
[PACKAGING] foo v0.0.1 ([..])
[WARNING] [..] file `dir_root_1[/]some_dir[/]file` WILL be excluded [..]
See [..]
[WARNING] [..] file `dir_root_2[/]some_dir[/]file` WILL be excluded [..]
Expand All @@ -410,6 +474,7 @@ See [..]
See [..]
[WARNING] [..] file `some_dir[/]file_deep_1` WILL be excluded [..]
See [..]
[PACKAGING] foo v0.0.1 ([..])
[ARCHIVING] [..]
[ARCHIVING] [..]
[ARCHIVING] [..]
Expand Down Expand Up @@ -600,7 +665,7 @@ fn no_duplicates_from_modified_tracked_files() {
cargo.cwd(p.root());
assert_that(cargo.clone().arg("build"), execs().with_status(0));
assert_that(
cargo.arg("package").arg("--list"),
cargo.arg("package").arg("--list").arg("--allow-dirty"),
execs().with_status(0).with_stdout(
"\
Cargo.toml
Expand Down Expand Up @@ -1326,6 +1391,7 @@ fn package_lockfile_git_repo() {
p.cargo("package").arg("-l").masquerade_as_nightly_cargo(),
execs().with_status(0).with_stdout(
"\
.cargo_vcs_info.json
Cargo.lock
Cargo.toml
src/main.rs
Expand Down