Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use GitHubs commit API to check out the tree directly #11176

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions src/cargo/core/source/source_id.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@ use crate::sources::{DirectorySource, CRATES_IO_DOMAIN, CRATES_IO_INDEX, CRATES_
use crate::sources::{GitSource, PathSource, RegistrySource};
use crate::util::{config, CanonicalUrl, CargoResult, Config, IntoUrl};
use log::trace;
use serde::de;
use serde::ser;
use serde::{de, ser, Serialize};
use std::cmp::{self, Ordering};
use std::collections::HashSet;
use std::fmt::{self, Formatter};
Expand Down Expand Up @@ -64,7 +63,7 @@ enum SourceKind {
}

/// Information to find a specific commit in a Git repository.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize)]
pub enum GitReference {
/// From a tag.
Tag(String),
Expand Down
2 changes: 1 addition & 1 deletion src/cargo/sources/git/source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ impl<'cfg> Source for GitSource<'cfg> {
// database, then try to resolve our reference with the preexisting
// repository.
(None, Some(db)) if self.config.offline() => {
let rev = db.resolve(&self.manifest_reference).with_context(|| {
let rev = db.resolve_to_object(&self.manifest_reference).with_context(|| {
"failed to lookup reference in preexisting repository, and \
can't check for updates in offline mode (--offline)"
})?;
Expand Down
230 changes: 189 additions & 41 deletions src/cargo/sources/git/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,13 @@ use cargo_util::{paths, ProcessBuilder};
use curl::easy::List;
use git2::{self, ErrorClass, ObjectType, Oid};
use log::{debug, info};
use serde::ser;
use serde::Serialize;
use serde::{ser, Deserialize, Serialize};
use std::borrow::Cow;
use std::env;
use std::fmt;
use std::path::{Path, PathBuf};
use std::process::Command;
use std::str;
use std::str::{self, FromStr};
use std::time::{Duration, Instant};
use url::Url;

Expand All @@ -28,6 +27,17 @@ where
s.collect_str(t)
}

fn deserialize_str<'de, D, T>(deserializer: D) -> Result<T, D::Error>
where
T: FromStr,
<T as FromStr>::Err: fmt::Display,
D: serde::Deserializer<'de>,
{
let buf = String::deserialize(deserializer)?;

FromStr::from_str(&buf).map_err(serde::de::Error::custom)
}

pub struct GitShortID(git2::Buf);

impl GitShortID {
Expand Down Expand Up @@ -78,8 +88,25 @@ impl GitRemote {
&self.url
}

pub fn rev_for(&self, path: &Path, reference: &GitReference) -> CargoResult<git2::Oid> {
reference.resolve(&self.db_at(path)?.repo)
/// Finds the Oid associated with the reference. The result is guaranteed to be on disk.
/// But may not be the object the reference points to!
/// For example, the reference points to a Commit and this may return the Tree that commit points do.
pub fn rev_to_object_for(
&self,
path: &Path,
reference: &GitReference,
) -> CargoResult<git2::Oid> {
reference.resolve_to_object(&self.db_at(path)?.repo)
}

/// Finds the Oid of the Commit the reference points to. But the result may not be on disk!
/// For example, the reference points to a Commit and we have only cloned the Tree.
pub fn rev_to_commit_for(
&self,
path: &Path,
reference: &GitReference,
) -> CargoResult<git2::Oid> {
reference.resolve_to_commit(&self.db_at(path)?.repo)
}

pub fn checkout(
Expand All @@ -104,7 +131,7 @@ impl GitRemote {
}
}
None => {
if let Ok(rev) = reference.resolve(&db.repo) {
if let Ok(rev) = reference.resolve_to_object(&db.repo) {
return Ok((db, rev));
}
}
Expand All @@ -123,7 +150,7 @@ impl GitRemote {
.context(format!("failed to clone into: {}", into.display()))?;
let rev = match locked_rev {
Some(rev) => rev,
None => reference.resolve(&repo)?,
None => reference.resolve_to_object(&repo)?,
};

Ok((
Expand Down Expand Up @@ -179,13 +206,72 @@ impl GitDatabase {
self.repo.revparse_single(&oid.to_string()).is_ok()
}

pub fn resolve(&self, r: &GitReference) -> CargoResult<git2::Oid> {
r.resolve(&self.repo)
pub fn resolve_to_object(&self, r: &GitReference) -> CargoResult<git2::Oid> {
r.resolve_to_object(&self.repo)
}
}

#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
struct ShallowDataBlob {
#[serde(serialize_with = "serialize_str")]
#[serde(deserialize_with = "deserialize_str")]
tree: git2::Oid,
#[serde(serialize_with = "serialize_str")]
#[serde(deserialize_with = "deserialize_str")]
etag: git2::Oid,
}

#[test]
fn check_with_git_hub() {
panic!(
r#"nonstandard shallow clone may be worse than a full check out.
This test is here to make sure we do not merge until we have official signoff from GitHub"#
)
}
Comment on lines +224 to +230
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Raising visibility of this


impl GitReference {
pub fn resolve(&self, repo: &git2::Repository) -> CargoResult<git2::Oid> {
/// Finds the Oid associated with the reference. The result is guaranteed to be on disk.
/// But may not be the object the reference points to!
/// For example, the reference points to a Commit and this may return the Tree that commit points do.
pub fn resolve_to_object(&self, repo: &git2::Repository) -> CargoResult<git2::Oid> {
// Check if Cargo has done a nonstandard shallow clone
if let Some(shallow_data) = self.find_shallow_blob(repo) {
Ok(shallow_data.tree)
} else {
self.resolve_by_git(repo)
}
}
/// Finds the Oid of the Commit the reference points to. But the result may not be on disk!
/// For example, the reference points to a Commit and we have only cloned the Tree.
pub fn resolve_to_commit(&self, repo: &git2::Repository) -> CargoResult<git2::Oid> {
// Check if Cargo has done a nonstandard shallow clone
if let Some(shallow_data) = self.find_shallow_blob(repo) {
return Ok(shallow_data.etag);
} else {
self.resolve_by_git(repo)
}
}

fn find_shallow_blob(&self, repo: &git2::Repository) -> Option<ShallowDataBlob> {
let mut name = format!(
"refs/cargo-{}",
serde_json::to_string(self).expect("why cant we make json of this")
);
let shallow_blob = repo
.find_reference(&name)
.ok()
.and_then(|re| re.peel_to_blob().ok())
.and_then(|blob| serde_json::from_slice(blob.content()).ok())?;
// A nonstandard shallow clone should also have a reference to keep `git gc` from removing the tree.
name = name + "Tree";
let tree = repo.find_reference(&name).ok()?;
if tree.target()? != shallow_blob.tree {
return None;
}
shallow_blob
}

fn resolve_by_git(&self, repo: &git2::Repository) -> CargoResult<git2::Oid> {
let id = match self {
// Note that we resolve the named tag here in sync with where it's
// fetched into via `fetch` below.
Expand Down Expand Up @@ -707,10 +793,17 @@ fn reset(repo: &git2::Repository, obj: &git2::Object<'_>, config: &Config) -> Ca
opts.progress(|_, cur, max| {
drop(pb.tick(cur, max, ""));
});
debug!("doing reset");
repo.reset(obj, git2::ResetType::Hard, Some(&mut opts))?;
debug!("reset done");
Ok(())
if obj.as_tree().is_some() {
debug!("doing reset for Cargo nonstandard shallow clone");
repo.checkout_tree(obj, Some(&mut opts))?;
debug!("reset done");
Ok(())
} else {
debug!("doing reset");
repo.reset(obj, git2::ResetType::Hard, Some(&mut opts))?;
debug!("reset done");
Ok(())
}
}

pub fn with_fetch_options(
Expand Down Expand Up @@ -816,32 +909,50 @@ pub fn fetch(
// The `+` symbol on the refspec means to allow a forced (fast-forward)
// update which is needed if there is ever a force push that requires a
// fast-forward.
match reference {
// For branches and tags we can fetch simply one reference and copy it
// locally, no need to fetch other branches/tags.
GitReference::Branch(b) => {
refspecs.push(format!("+refs/heads/{0}:refs/remotes/origin/{0}", b));
if let Some(oid_to_fetch) = oid_to_fetch {
// GitHub told us exactly the min needed to fetch. So we can go ahead and do a Cargo nonstandard shallow clone.
refspecs.push(oid_to_fetch);
} else {
// In some cases we have Cargo nonstandard shallow cloned this repo before, but cannot do it now.
// Mostly if GitHub is now rate limiting us. If so, remove the info about the shallow clone.
let mut name = format!(
"refs/cargo-{}",
serde_json::to_string(reference).expect("why cant we make json of this")
);
if let Ok(mut refe) = repo.find_reference(&name) {
let _ = refe.delete();
}
GitReference::Tag(t) => {
refspecs.push(format!("+refs/tags/{0}:refs/remotes/origin/tags/{0}", t));
// And remove the reference that keeps `git gc` from cleaning the tree.
name = name + "Tree";
if let Ok(mut refe) = repo.find_reference(&name) {
let _ = refe.delete();
}

GitReference::DefaultBranch => {
refspecs.push(String::from("+HEAD:refs/remotes/origin/HEAD"));
}
match reference {
// For branches and tags we can fetch simply one reference and copy it
// locally, no need to fetch other branches/tags.
GitReference::Branch(b) => {
refspecs.push(format!("+refs/heads/{0}:refs/remotes/origin/{0}", b));
}
GitReference::Tag(t) => {
refspecs.push(format!("+refs/tags/{0}:refs/remotes/origin/tags/{0}", t));
}

GitReference::Rev(rev) => {
if rev.starts_with("refs/") {
refspecs.push(format!("+{0}:{0}", rev));
} else if let Some(oid_to_fetch) = oid_to_fetch {
refspecs.push(format!("+{0}:refs/commit/{0}", oid_to_fetch));
} else {
// We don't know what the rev will point to. To handle this
// situation we fetch all branches and tags, and then we pray
// it's somewhere in there.
refspecs.push(String::from("+refs/heads/*:refs/remotes/origin/*"));
GitReference::DefaultBranch => {
refspecs.push(String::from("+HEAD:refs/remotes/origin/HEAD"));
tags = true;
}

GitReference::Rev(rev) => {
if rev.starts_with("refs/") {
refspecs.push(format!("+{0}:{0}", rev));
} else {
// We don't know what the rev will point to. To handle this
// situation we fetch all branches and tags, and then we pray
// it's somewhere in there.
refspecs.push(String::from("+refs/heads/*:refs/remotes/origin/*"));
refspecs.push(String::from("+HEAD:refs/remotes/origin/HEAD"));
tags = true;
}
}
}
}
Expand Down Expand Up @@ -1072,9 +1183,9 @@ enum FastPathRev {
/// The local rev (determined by `reference.resolve(repo)`) is already up to
/// date with what this rev resolves to on GitHub's server.
UpToDate,
/// The following SHA must be fetched in order for the local rev to become
/// The following refspec must be fetched in order for the local rev to become
/// up to date.
NeedsFetch(Oid),
NeedsFetch(String),
/// Don't know whether local rev is up to date. We'll fetch _all_ branches
/// and tags from the server and see what happens.
Indeterminate,
Expand Down Expand Up @@ -1105,7 +1216,7 @@ fn github_fast_path(
return Ok(FastPathRev::Indeterminate);
}

let local_object = reference.resolve(repo).ok();
let local_object = reference.resolve_to_commit(repo).ok();

let github_branch_name = match reference {
GitReference::Branch(branch) => branch,
Expand Down Expand Up @@ -1175,7 +1286,7 @@ fn github_fast_path(
handle.useragent("cargo")?;
handle.http_headers({
let mut headers = List::new();
headers.append("Accept: application/vnd.github.3.sha")?;
headers.append("Accept: application/vnd.github+json")?;
if let Some(local_object) = local_object {
headers.append(&format!("If-None-Match: \"{}\"", local_object))?;
}
Expand All @@ -1195,8 +1306,45 @@ fn github_fast_path(
if response_code == 304 {
Ok(FastPathRev::UpToDate)
} else if response_code == 200 {
let oid_to_fetch = str::from_utf8(&response_body)?.parse::<Oid>()?;
Ok(FastPathRev::NeedsFetch(oid_to_fetch))
#[derive(Debug, Deserialize)]
struct GithubFastPathJsonResponse {
#[serde(serialize_with = "serialize_str")]
#[serde(deserialize_with = "deserialize_str")]
sha: git2::Oid,
commit: GithubCommitJsonResponse,
}

#[derive(Debug, Deserialize)]
struct GithubCommitJsonResponse {
tree: GithubTreeJsonResponse,
}

#[derive(Debug, Deserialize)]
struct GithubTreeJsonResponse {
#[serde(serialize_with = "serialize_str")]
#[serde(deserialize_with = "deserialize_str")]
sha: git2::Oid,
}

let data: GithubFastPathJsonResponse = serde_json::from_slice(&response_body)?;
// We can do a Cargo nonstandard shallow clone, so record the relevant information.
let bytes = serde_json::to_string(&ShallowDataBlob {
tree: data.commit.tree.sha,
etag: data.sha,
})
.expect("why cant we make json of this");
let shallow_blob = repo.blob(bytes.as_bytes())?;
let mut name = format!(
"refs/cargo-{}",
serde_json::to_string(reference).expect("why cant we make json of this")
);
repo.reference(&name, shallow_blob, true, "")?;
// Make a refspec to fetch the tree derectly, with a reference so that `git gc` does not clean it up.
Ok(FastPathRev::NeedsFetch(format!(
"+{}:{}Tree",
data.commit.tree.sha,
name
)))
} else {
// Usually response_code == 404 if the repository does not exist, and
// response_code == 422 if exists but GitHub is unable to resolve the
Expand Down
Loading