blob: 8fa8b7dd09bf433a8c901ebb257c38bbe3117e59 [file] [log] [blame]
use crate::core::GitReference;
use crate::util::errors::{CargoResult, CargoResultExt};
use crate::util::paths;
use crate::util::process_builder::process;
use crate::util::{network, Config, IntoUrl, Progress};
use curl::easy::{Easy, List};
use git2::{self, ObjectType};
use log::{debug, info};
use serde::ser;
use serde::Serialize;
use std::env;
use std::fmt;
use std::fs::File;
use std::mem;
use std::path::{Path, PathBuf};
use std::process::Command;
use url::Url;
#[derive(PartialEq, Clone, Debug)]
pub struct GitRevision(git2::Oid);
impl ser::Serialize for GitRevision {
fn serialize<S: ser::Serializer>(&self, s: S) -> Result<S::Ok, S::Error> {
serialize_str(self, s)
}
}
fn serialize_str<T, S>(t: &T, s: S) -> Result<S::Ok, S::Error>
where
T: fmt::Display,
S: ser::Serializer,
{
s.collect_str(t)
}
impl fmt::Display for GitRevision {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(&self.0, f)
}
}
pub struct GitShortID(git2::Buf);
impl GitShortID {
pub fn as_str(&self) -> &str {
self.0.as_str().unwrap()
}
}
/// `GitRemote` represents a remote repository. It gets cloned into a local
/// `GitDatabase`.
#[derive(PartialEq, Clone, Debug, Serialize)]
pub struct GitRemote {
#[serde(serialize_with = "serialize_str")]
url: Url,
}
/// `GitDatabase` is a local clone of a remote repository's database. Multiple
/// `GitCheckouts` can be cloned from this `GitDatabase`.
#[derive(Serialize)]
pub struct GitDatabase {
remote: GitRemote,
path: PathBuf,
#[serde(skip_serializing)]
repo: git2::Repository,
}
/// `GitCheckout` is a local checkout of a particular revision. Calling
/// `clone_into` with a reference will resolve the reference into a revision,
/// and return a `anyhow::Error` if no revision for that reference was found.
#[derive(Serialize)]
pub struct GitCheckout<'a> {
database: &'a GitDatabase,
location: PathBuf,
revision: GitRevision,
#[serde(skip_serializing)]
repo: git2::Repository,
}
// Implementations
impl GitRemote {
pub fn new(url: &Url) -> GitRemote {
GitRemote { url: url.clone() }
}
pub fn url(&self) -> &Url {
&self.url
}
pub fn rev_for(&self, path: &Path, reference: &GitReference) -> CargoResult<GitRevision> {
reference.resolve(&self.db_at(path)?.repo)
}
pub fn checkout(
&self,
into: &Path,
reference: &GitReference,
cargo_config: &Config,
) -> CargoResult<(GitDatabase, GitRevision)> {
let mut repo_and_rev = None;
if let Ok(mut repo) = git2::Repository::open(into) {
self.fetch_into(&mut repo, cargo_config)
.chain_err(|| format!("failed to fetch into {}", into.display()))?;
if let Ok(rev) = reference.resolve(&repo) {
repo_and_rev = Some((repo, rev));
}
}
let (repo, rev) = match repo_and_rev {
Some(pair) => pair,
None => {
let repo = self
.clone_into(into, cargo_config)
.chain_err(|| format!("failed to clone into: {}", into.display()))?;
let rev = reference.resolve(&repo)?;
(repo, rev)
}
};
Ok((
GitDatabase {
remote: self.clone(),
path: into.to_path_buf(),
repo,
},
rev,
))
}
pub fn db_at(&self, db_path: &Path) -> CargoResult<GitDatabase> {
let repo = git2::Repository::open(db_path)?;
Ok(GitDatabase {
remote: self.clone(),
path: db_path.to_path_buf(),
repo,
})
}
fn fetch_into(&self, dst: &mut git2::Repository, cargo_config: &Config) -> CargoResult<()> {
// Create a local anonymous remote in the repository to fetch the url
let refspec = "refs/heads/*:refs/heads/*";
fetch(dst, self.url.as_str(), refspec, cargo_config)
}
fn clone_into(&self, dst: &Path, cargo_config: &Config) -> CargoResult<git2::Repository> {
if dst.exists() {
paths::remove_dir_all(dst)?;
}
paths::create_dir_all(dst)?;
let mut repo = init(dst, true)?;
fetch(
&mut repo,
self.url.as_str(),
"refs/heads/*:refs/heads/*",
cargo_config,
)?;
Ok(repo)
}
}
impl GitDatabase {
pub fn copy_to(
&self,
rev: GitRevision,
dest: &Path,
cargo_config: &Config,
) -> CargoResult<GitCheckout<'_>> {
let mut checkout = None;
if let Ok(repo) = git2::Repository::open(dest) {
let mut co = GitCheckout::new(dest, self, rev.clone(), repo);
if !co.is_fresh() {
// After a successful fetch operation do a sanity check to
// ensure we've got the object in our database to reset to. This
// can fail sometimes for corrupt repositories where the fetch
// operation succeeds but the object isn't actually there.
co.fetch(cargo_config)?;
if co.has_object() {
co.reset(cargo_config)?;
assert!(co.is_fresh());
checkout = Some(co);
}
} else {
checkout = Some(co);
}
};
let checkout = match checkout {
Some(c) => c,
None => GitCheckout::clone_into(dest, self, rev, cargo_config)?,
};
checkout.update_submodules(cargo_config)?;
Ok(checkout)
}
pub fn to_short_id(&self, revision: &GitRevision) -> CargoResult<GitShortID> {
let obj = self.repo.find_object(revision.0, None)?;
Ok(GitShortID(obj.short_id()?))
}
pub fn has_ref(&self, reference: &str) -> CargoResult<()> {
self.repo.revparse_single(reference)?;
Ok(())
}
}
impl GitReference {
fn resolve(&self, repo: &git2::Repository) -> CargoResult<GitRevision> {
let id = match *self {
GitReference::Tag(ref s) => (|| -> CargoResult<git2::Oid> {
let refname = format!("refs/tags/{}", s);
let id = repo.refname_to_id(&refname)?;
let obj = repo.find_object(id, None)?;
let obj = obj.peel(ObjectType::Commit)?;
Ok(obj.id())
})()
.chain_err(|| format!("failed to find tag `{}`", s))?,
GitReference::Branch(ref s) => {
let b = repo
.find_branch(s, git2::BranchType::Local)
.chain_err(|| format!("failed to find branch `{}`", s))?;
b.get()
.target()
.ok_or_else(|| anyhow::format_err!("branch `{}` did not have a target", s))?
}
GitReference::Rev(ref s) => {
let obj = repo.revparse_single(s)?;
match obj.as_tag() {
Some(tag) => tag.target_id(),
None => obj.id(),
}
}
};
Ok(GitRevision(id))
}
}
impl<'a> GitCheckout<'a> {
fn new(
path: &Path,
database: &'a GitDatabase,
revision: GitRevision,
repo: git2::Repository,
) -> GitCheckout<'a> {
GitCheckout {
location: path.to_path_buf(),
database,
revision,
repo,
}
}
fn clone_into(
into: &Path,
database: &'a GitDatabase,
revision: GitRevision,
config: &Config,
) -> CargoResult<GitCheckout<'a>> {
let dirname = into.parent().unwrap();
paths::create_dir_all(&dirname)?;
if into.exists() {
paths::remove_dir_all(into)?;
}
// we're doing a local filesystem-to-filesystem clone so there should
// be no need to respect global configuration options, so pass in
// an empty instance of `git2::Config` below.
let git_config = git2::Config::new()?;
// Clone the repository, but make sure we use the "local" option in
// libgit2 which will attempt to use hardlinks to set up the database.
// This should speed up the clone operation quite a bit if it works.
//
// Note that we still use the same fetch options because while we don't
// need authentication information we may want progress bars and such.
let url = database.path.into_url()?;
let mut repo = None;
with_fetch_options(&git_config, url.as_str(), config, &mut |fopts| {
let mut checkout = git2::build::CheckoutBuilder::new();
checkout.dry_run(); // we'll do this below during a `reset`
let r = git2::build::RepoBuilder::new()
// use hard links and/or copy the database, we're doing a
// filesystem clone so this'll speed things up quite a bit.
.clone_local(git2::build::CloneLocal::Local)
.with_checkout(checkout)
.fetch_options(fopts)
// .remote_create(|repo, _name, url| repo.remote_anonymous(url))
.clone(url.as_str(), into)?;
repo = Some(r);
Ok(())
})?;
let repo = repo.unwrap();
let checkout = GitCheckout::new(into, database, revision, repo);
checkout.reset(config)?;
Ok(checkout)
}
fn is_fresh(&self) -> bool {
match self.repo.revparse_single("HEAD") {
Ok(ref head) if head.id() == self.revision.0 => {
// See comments in reset() for why we check this
self.location.join(".cargo-ok").exists()
}
_ => false,
}
}
fn fetch(&mut self, cargo_config: &Config) -> CargoResult<()> {
info!("fetch {}", self.repo.path().display());
let url = self.database.path.into_url()?;
let refspec = "refs/heads/*:refs/heads/*";
fetch(&mut self.repo, url.as_str(), refspec, cargo_config)?;
Ok(())
}
fn has_object(&self) -> bool {
self.repo.find_object(self.revision.0, None).is_ok()
}
fn reset(&self, config: &Config) -> CargoResult<()> {
// If we're interrupted while performing this reset (e.g., we die because
// of a signal) Cargo needs to be sure to try to check out this repo
// again on the next go-round.
//
// To enable this we have a dummy file in our checkout, .cargo-ok, which
// if present means that the repo has been successfully reset and is
// ready to go. Hence if we start to do a reset, we make sure this file
// *doesn't* exist, and then once we're done we create the file.
let ok_file = self.location.join(".cargo-ok");
let _ = paths::remove_file(&ok_file);
info!("reset {} to {}", self.repo.path().display(), self.revision);
let object = self.repo.find_object(self.revision.0, None)?;
reset(&self.repo, &object, config)?;
File::create(ok_file)?;
Ok(())
}
fn update_submodules(&self, cargo_config: &Config) -> CargoResult<()> {
return update_submodules(&self.repo, cargo_config);
fn update_submodules(repo: &git2::Repository, cargo_config: &Config) -> CargoResult<()> {
info!("update submodules for: {:?}", repo.workdir().unwrap());
for mut child in repo.submodules()? {
update_submodule(repo, &mut child, cargo_config).chain_err(|| {
format!(
"failed to update submodule `{}`",
child.name().unwrap_or("")
)
})?;
}
Ok(())
}
fn update_submodule(
parent: &git2::Repository,
child: &mut git2::Submodule<'_>,
cargo_config: &Config,
) -> CargoResult<()> {
child.init(false)?;
let url = child.url().ok_or_else(|| {
anyhow::format_err!("non-utf8 url for submodule {:?}?", child.path())
})?;
// A submodule which is listed in .gitmodules but not actually
// checked out will not have a head id, so we should ignore it.
let head = match child.head_id() {
Some(head) => head,
None => return Ok(()),
};
// If the submodule hasn't been checked out yet, we need to
// clone it. If it has been checked out and the head is the same
// as the submodule's head, then we can skip an update and keep
// recursing.
let head_and_repo = child.open().and_then(|repo| {
let target = repo.head()?.target();
Ok((target, repo))
});
let mut repo = match head_and_repo {
Ok((head, repo)) => {
if child.head_id() == head {
return update_submodules(&repo, cargo_config);
}
repo
}
Err(..) => {
let path = parent.workdir().unwrap().join(child.path());
let _ = paths::remove_dir_all(&path);
init(&path, false)?
}
};
// Fetch data from origin and reset to the head commit
let refspec = "refs/heads/*:refs/heads/*";
fetch(&mut repo, url, refspec, cargo_config).chain_err(|| {
format!(
"failed to fetch submodule `{}` from {}",
child.name().unwrap_or(""),
url
)
})?;
let obj = repo.find_object(head, None)?;
reset(&repo, &obj, cargo_config)?;
update_submodules(&repo, cargo_config)
}
}
}
/// Prepare the authentication callbacks for cloning a git repository.
///
/// The main purpose of this function is to construct the "authentication
/// callback" which is used to clone a repository. This callback will attempt to
/// find the right authentication on the system (without user input) and will
/// guide libgit2 in doing so.
///
/// The callback is provided `allowed` types of credentials, and we try to do as
/// much as possible based on that:
///
/// * Prioritize SSH keys from the local ssh agent as they're likely the most
/// reliable. The username here is prioritized from the credential
/// callback, then from whatever is configured in git itself, and finally
/// we fall back to the generic user of `git`.
///
/// * If a username/password is allowed, then we fallback to git2-rs's
/// implementation of the credential helper. This is what is configured
/// with `credential.helper` in git, and is the interface for the macOS
/// keychain, for example.
///
/// * After the above two have failed, we just kinda grapple attempting to
/// return *something*.
///
/// If any form of authentication fails, libgit2 will repeatedly ask us for
/// credentials until we give it a reason to not do so. To ensure we don't
/// just sit here looping forever we keep track of authentications we've
/// attempted and we don't try the same ones again.
fn with_authentication<T, F>(url: &str, cfg: &git2::Config, mut f: F) -> CargoResult<T>
where
F: FnMut(&mut git2::Credentials<'_>) -> CargoResult<T>,
{
let mut cred_helper = git2::CredentialHelper::new(url);
cred_helper.config(cfg);
let mut ssh_username_requested = false;
let mut cred_helper_bad = None;
let mut ssh_agent_attempts = Vec::new();
let mut any_attempts = false;
let mut tried_sshkey = false;
let mut res = f(&mut |url, username, allowed| {
any_attempts = true;
// libgit2's "USERNAME" authentication actually means that it's just
// asking us for a username to keep going. This is currently only really
// used for SSH authentication and isn't really an authentication type.
// The logic currently looks like:
//
// let user = ...;
// if (user.is_null())
// user = callback(USERNAME, null, ...);
//
// callback(SSH_KEY, user, ...)
//
// So if we're being called here then we know that (a) we're using ssh
// authentication and (b) no username was specified in the URL that
// we're trying to clone. We need to guess an appropriate username here,
// but that may involve a few attempts. Unfortunately we can't switch
// usernames during one authentication session with libgit2, so to
// handle this we bail out of this authentication session after setting
// the flag `ssh_username_requested`, and then we handle this below.
if allowed.contains(git2::CredentialType::USERNAME) {
debug_assert!(username.is_none());
ssh_username_requested = true;
return Err(git2::Error::from_str("gonna try usernames later"));
}
// An "SSH_KEY" authentication indicates that we need some sort of SSH
// authentication. This can currently either come from the ssh-agent
// process or from a raw in-memory SSH key. Cargo only supports using
// ssh-agent currently.
//
// If we get called with this then the only way that should be possible
// is if a username is specified in the URL itself (e.g., `username` is
// Some), hence the unwrap() here. We try custom usernames down below.
if allowed.contains(git2::CredentialType::SSH_KEY) && !tried_sshkey {
// If ssh-agent authentication fails, libgit2 will keep
// calling this callback asking for other authentication
// methods to try. Make sure we only try ssh-agent once,
// to avoid looping forever.
tried_sshkey = true;
let username = username.unwrap();
debug_assert!(!ssh_username_requested);
ssh_agent_attempts.push(username.to_string());
return git2::Cred::ssh_key_from_agent(username);
}
// Sometimes libgit2 will ask for a username/password in plaintext. This
// is where Cargo would have an interactive prompt if we supported it,
// but we currently don't! Right now the only way we support fetching a
// plaintext password is through the `credential.helper` support, so
// fetch that here.
//
// If ssh-agent authentication fails, libgit2 will keep calling this
// callback asking for other authentication methods to try. Check
// cred_helper_bad to make sure we only try the git credentail helper
// once, to avoid looping forever.
if allowed.contains(git2::CredentialType::USER_PASS_PLAINTEXT) && cred_helper_bad.is_none()
{
let r = git2::Cred::credential_helper(cfg, url, username);
cred_helper_bad = Some(r.is_err());
return r;
}
// I'm... not sure what the DEFAULT kind of authentication is, but seems
// easy to support?
if allowed.contains(git2::CredentialType::DEFAULT) {
return git2::Cred::default();
}
// Whelp, we tried our best
Err(git2::Error::from_str("no authentication available"))
});
// Ok, so if it looks like we're going to be doing ssh authentication, we
// want to try a few different usernames as one wasn't specified in the URL
// for us to use. In order, we'll try:
//
// * A credential helper's username for this URL, if available.
// * This account's username.
// * "git"
//
// We have to restart the authentication session each time (due to
// constraints in libssh2 I guess? maybe this is inherent to ssh?), so we
// call our callback, `f`, in a loop here.
if ssh_username_requested {
debug_assert!(res.is_err());
let mut attempts = Vec::new();
attempts.push("git".to_string());
if let Ok(s) = env::var("USER").or_else(|_| env::var("USERNAME")) {
attempts.push(s);
}
if let Some(ref s) = cred_helper.username {
attempts.push(s.clone());
}
while let Some(s) = attempts.pop() {
// We should get `USERNAME` first, where we just return our attempt,
// and then after that we should get `SSH_KEY`. If the first attempt
// fails we'll get called again, but we don't have another option so
// we bail out.
let mut attempts = 0;
res = f(&mut |_url, username, allowed| {
if allowed.contains(git2::CredentialType::USERNAME) {
return git2::Cred::username(&s);
}
if allowed.contains(git2::CredentialType::SSH_KEY) {
debug_assert_eq!(Some(&s[..]), username);
attempts += 1;
if attempts == 1 {
ssh_agent_attempts.push(s.to_string());
return git2::Cred::ssh_key_from_agent(&s);
}
}
Err(git2::Error::from_str("no authentication available"))
});
// If we made two attempts then that means:
//
// 1. A username was requested, we returned `s`.
// 2. An ssh key was requested, we returned to look up `s` in the
// ssh agent.
// 3. For whatever reason that lookup failed, so we were asked again
// for another mode of authentication.
//
// Essentially, if `attempts == 2` then in theory the only error was
// that this username failed to authenticate (e.g., no other network
// errors happened). Otherwise something else is funny so we bail
// out.
if attempts != 2 {
break;
}
}
}
if res.is_ok() || !any_attempts {
return res.map_err(From::from);
}
// In the case of an authentication failure (where we tried something) then
// we try to give a more helpful error message about precisely what we
// tried.
let res = res.map_err(anyhow::Error::from).chain_err(|| {
let mut msg = "failed to authenticate when downloading \
repository"
.to_string();
if !ssh_agent_attempts.is_empty() {
let names = ssh_agent_attempts
.iter()
.map(|s| format!("`{}`", s))
.collect::<Vec<_>>()
.join(", ");
msg.push_str(&format!(
"\nattempted ssh-agent authentication, but \
none of the usernames {} succeeded",
names
));
}
if let Some(failed_cred_helper) = cred_helper_bad {
if failed_cred_helper {
msg.push_str(
"\nattempted to find username/password via \
git's `credential.helper` support, but failed",
);
} else {
msg.push_str(
"\nattempted to find username/password via \
`credential.helper`, but maybe the found \
credentials were incorrect",
);
}
}
msg
})?;
Ok(res)
}
fn reset(repo: &git2::Repository, obj: &git2::Object<'_>, config: &Config) -> CargoResult<()> {
let mut pb = Progress::new("Checkout", config);
let mut opts = git2::build::CheckoutBuilder::new();
opts.progress(|_, cur, max| {
drop(pb.tick(cur, max));
});
repo.reset(obj, git2::ResetType::Hard, Some(&mut opts))?;
Ok(())
}
pub fn with_fetch_options(
git_config: &git2::Config,
url: &str,
config: &Config,
cb: &mut dyn FnMut(git2::FetchOptions<'_>) -> CargoResult<()>,
) -> CargoResult<()> {
let mut progress = Progress::new("Fetch", config);
network::with_retry(config, || {
with_authentication(url, git_config, |f| {
let mut rcb = git2::RemoteCallbacks::new();
rcb.credentials(f);
rcb.transfer_progress(|stats| {
progress
.tick(stats.indexed_objects(), stats.total_objects())
.is_ok()
});
// Create a local anonymous remote in the repository to fetch the
// url
let mut opts = git2::FetchOptions::new();
opts.remote_callbacks(rcb)
.download_tags(git2::AutotagOption::All);
cb(opts)
})?;
Ok(())
})
}
pub fn fetch(
repo: &mut git2::Repository,
url: &str,
refspec: &str,
config: &Config,
) -> CargoResult<()> {
if config.frozen() {
anyhow::bail!(
"attempting to update a git repository, but --frozen \
was specified"
)
}
if !config.network_allowed() {
anyhow::bail!("can't update a git repository in the offline mode")
}
// If we're fetching from GitHub, attempt GitHub's special fast path for
// testing if we've already got an up-to-date copy of the repository
if let Ok(url) = Url::parse(url) {
if url.host_str() == Some("github.com") {
if let Ok(oid) = repo.refname_to_id("refs/remotes/origin/master") {
let mut handle = config.http()?.borrow_mut();
debug!("attempting GitHub fast path for {}", url);
if github_up_to_date(&mut handle, &url, &oid) {
return Ok(());
} else {
debug!("fast path failed, falling back to a git fetch");
}
}
}
}
// We reuse repositories quite a lot, so before we go through and update the
// repo check to see if it's a little too old and could benefit from a gc.
// In theory this shouldn't be too too expensive compared to the network
// request we're about to issue.
maybe_gc_repo(repo)?;
// Unfortunately `libgit2` is notably lacking in the realm of authentication
// when compared to the `git` command line. As a result, allow an escape
// hatch for users that would prefer to use `git`-the-CLI for fetching
// repositories instead of `libgit2`-the-library. This should make more
// flavors of authentication possible while also still giving us all the
// speed and portability of using `libgit2`.
if let Some(true) = config.net_config()?.git_fetch_with_cli {
return fetch_with_cli(repo, url, refspec, config);
}
debug!("doing a fetch for {}", url);
let git_config = git2::Config::open_default()?;
with_fetch_options(&git_config, url, config, &mut |mut opts| {
// The `fetch` operation here may fail spuriously due to a corrupt
// repository. It could also fail, however, for a whole slew of other
// reasons (aka network related reasons). We want Cargo to automatically
// recover from corrupt repositories, but we don't want Cargo to stomp
// over other legitimate errors.
//
// Consequently we save off the error of the `fetch` operation and if it
// looks like a "corrupt repo" error then we blow away the repo and try
// again. If it looks like any other kind of error, or if we've already
// blown away the repository, then we want to return the error as-is.
let mut repo_reinitialized = false;
loop {
debug!("initiating fetch of {} from {}", refspec, url);
let res = repo
.remote_anonymous(url)?
.fetch(&[refspec], Some(&mut opts), None);
let err = match res {
Ok(()) => break,
Err(e) => e,
};
debug!("fetch failed: {}", err);
if !repo_reinitialized && err.class() == git2::ErrorClass::Reference {
repo_reinitialized = true;
debug!(
"looks like this is a corrupt repository, reinitializing \
and trying again"
);
if reinitialize(repo).is_ok() {
continue;
}
}
return Err(err.into());
}
Ok(())
})
}
fn fetch_with_cli(
repo: &mut git2::Repository,
url: &str,
refspec: &str,
config: &Config,
) -> CargoResult<()> {
let mut cmd = process("git");
cmd.arg("fetch")
.arg("--tags") // fetch all tags
.arg("--force") // handle force pushes
.arg("--update-head-ok") // see discussion in #2078
.arg(url)
.arg(refspec)
// If cargo is run by git (for example, the `exec` command in `git
// rebase`), the GIT_DIR is set by git and will point to the wrong
// location (this takes precedence over the cwd). Make sure this is
// unset so git will look at cwd for the repo.
.env_remove("GIT_DIR")
// The reset of these may not be necessary, but I'm including them
// just to be extra paranoid and avoid any issues.
.env_remove("GIT_WORK_TREE")
.env_remove("GIT_INDEX_FILE")
.env_remove("GIT_OBJECT_DIRECTORY")
.env_remove("GIT_ALTERNATE_OBJECT_DIRECTORIES")
.cwd(repo.path());
config
.shell()
.verbose(|s| s.status("Running", &cmd.to_string()))?;
cmd.exec_with_output()?;
Ok(())
}
/// Cargo has a bunch of long-lived git repositories in its global cache and
/// some, like the index, are updated very frequently. Right now each update
/// creates a new "pack file" inside the git database, and over time this can
/// cause bad performance and bad current behavior in libgit2.
///
/// One pathological use case today is where libgit2 opens hundreds of file
/// descriptors, getting us dangerously close to blowing out the OS limits of
/// how many fds we can have open. This is detailed in #4403.
///
/// To try to combat this problem we attempt a `git gc` here. Note, though, that
/// we may not even have `git` installed on the system! As a result we
/// opportunistically try a `git gc` when the pack directory looks too big, and
/// failing that we just blow away the repository and start over.
fn maybe_gc_repo(repo: &mut git2::Repository) -> CargoResult<()> {
// Here we arbitrarily declare that if you have more than 100 files in your
// `pack` folder that we need to do a gc.
let entries = match repo.path().join("objects/pack").read_dir() {
Ok(e) => e.count(),
Err(_) => {
debug!("skipping gc as pack dir appears gone");
return Ok(());
}
};
let max = env::var("__CARGO_PACKFILE_LIMIT")
.ok()
.and_then(|s| s.parse::<usize>().ok())
.unwrap_or(100);
if entries < max {
debug!("skipping gc as there's only {} pack files", entries);
return Ok(());
}
// First up, try a literal `git gc` by shelling out to git. This is pretty
// likely to fail though as we may not have `git` installed. Note that
// libgit2 doesn't currently implement the gc operation, so there's no
// equivalent there.
match Command::new("git")
.arg("gc")
.current_dir(repo.path())
.output()
{
Ok(out) => {
debug!(
"git-gc status: {}\n\nstdout ---\n{}\nstderr ---\n{}",
out.status,
String::from_utf8_lossy(&out.stdout),
String::from_utf8_lossy(&out.stderr)
);
if out.status.success() {
let new = git2::Repository::open(repo.path())?;
mem::replace(repo, new);
return Ok(());
}
}
Err(e) => debug!("git-gc failed to spawn: {}", e),
}
// Alright all else failed, let's start over.
reinitialize(repo)
}
fn reinitialize(repo: &mut git2::Repository) -> CargoResult<()> {
// Here we want to drop the current repository object pointed to by `repo`,
// so we initialize temporary repository in a sub-folder, blow away the
// existing git folder, and then recreate the git repo. Finally we blow away
// the `tmp` folder we allocated.
let path = repo.path().to_path_buf();
debug!("reinitializing git repo at {:?}", path);
let tmp = path.join("tmp");
let bare = !repo.path().ends_with(".git");
*repo = init(&tmp, false)?;
for entry in path.read_dir()? {
let entry = entry?;
if entry.file_name().to_str() == Some("tmp") {
continue;
}
let path = entry.path();
drop(paths::remove_file(&path).or_else(|_| paths::remove_dir_all(&path)));
}
*repo = init(&path, bare)?;
paths::remove_dir_all(&tmp)?;
Ok(())
}
fn init(path: &Path, bare: bool) -> CargoResult<git2::Repository> {
let mut opts = git2::RepositoryInitOptions::new();
// Skip anything related to templates, they just call all sorts of issues as
// we really don't want to use them yet they insist on being used. See #6240
// for an example issue that comes up.
opts.external_template(false);
opts.bare(bare);
Ok(git2::Repository::init_opts(&path, &opts)?)
}
/// Updating the index is done pretty regularly so we want it to be as fast as
/// possible. For registries hosted on GitHub (like the crates.io index) there's
/// a fast path available to use [1] to tell us that there's no updates to be
/// made.
///
/// This function will attempt to hit that fast path and verify that the `oid`
/// is actually the current `master` branch of the repository. If `true` is
/// returned then no update needs to be performed, but if `false` is returned
/// then the standard update logic still needs to happen.
///
/// [1]: https://developer.github.com/v3/repos/commits/#get-the-sha-1-of-a-commit-reference
///
/// Note that this function should never cause an actual failure because it's
/// just a fast path. As a result all errors are ignored in this function and we
/// just return a `bool`. Any real errors will be reported through the normal
/// update path above.
fn github_up_to_date(handle: &mut Easy, url: &Url, oid: &git2::Oid) -> bool {
macro_rules! r#try {
($e:expr) => {
match $e {
Some(e) => e,
None => return false,
}
};
}
// This expects GitHub urls in the form `github.com/user/repo` and nothing
// else
let mut pieces = r#try!(url.path_segments());
let username = r#try!(pieces.next());
let repo = r#try!(pieces.next());
if pieces.next().is_some() {
return false;
}
let url = format!(
"https://api.github.com/repos/{}/{}/commits/master",
username, repo
);
r#try!(handle.get(true).ok());
r#try!(handle.url(&url).ok());
r#try!(handle.useragent("cargo").ok());
let mut headers = List::new();
r#try!(headers.append("Accept: application/vnd.github.3.sha").ok());
r#try!(headers.append(&format!("If-None-Match: \"{}\"", oid)).ok());
r#try!(handle.http_headers(headers).ok());
r#try!(handle.perform().ok());
r#try!(handle.response_code().ok()) == 304
}