blob: 9b083463c080a8211e5e2f87a80eda561cfd1fcd [file] [log] [blame]
use std::collections::{HashMap, HashSet};
use std::path::Path;
use std::str;
use log::{info, trace};
use semver::Version;
use crate::core::dependency::Dependency;
use crate::core::{PackageId, SourceId, Summary};
use crate::sources::registry::RegistryData;
use crate::sources::registry::{RegistryPackage, INDEX_LOCK};
use crate::util::{internal, CargoResult, Config, Filesystem, ToSemver};
/// Crates.io treats hyphen and underscores as interchangeable, but the index and old Cargo do not.
/// Therefore, the index must store uncanonicalized version of the name so old Cargo's can find it.
/// This loop tries all possible combinations of switching hyphen and underscores to find the
/// uncanonicalized one. As all stored inputs have the correct spelling, we start with the spelling
/// as-provided.
struct UncanonicalizedIter<'s> {
input: &'s str,
num_hyphen_underscore: u32,
hyphen_combination_num: u16,
}
impl<'s> UncanonicalizedIter<'s> {
fn new(input: &'s str) -> Self {
let num_hyphen_underscore = input.chars().filter(|&c| c == '_' || c == '-').count() as u32;
UncanonicalizedIter {
input,
num_hyphen_underscore,
hyphen_combination_num: 0,
}
}
}
impl<'s> Iterator for UncanonicalizedIter<'s> {
type Item = String;
fn next(&mut self) -> Option<Self::Item> {
if self.hyphen_combination_num > 0
&& self.hyphen_combination_num.trailing_zeros() >= self.num_hyphen_underscore
{
return None;
}
let ret = Some(
self.input
.chars()
.scan(0u16, |s, c| {
// the check against 15 here's to prevent
// shift overflow on inputs with more then 15 hyphens
if (c == '_' || c == '-') && *s <= 15 {
let switch = (self.hyphen_combination_num & (1u16 << *s)) > 0;
let out = if (c == '_') ^ switch { '_' } else { '-' };
*s += 1;
Some(out)
} else {
Some(c)
}
})
.collect(),
);
self.hyphen_combination_num += 1;
ret
}
}
#[test]
fn no_hyphen() {
assert_eq!(
UncanonicalizedIter::new("test").collect::<Vec<_>>(),
vec!["test".to_string()]
)
}
#[test]
fn two_hyphen() {
assert_eq!(
UncanonicalizedIter::new("te-_st").collect::<Vec<_>>(),
vec![
"te-_st".to_string(),
"te__st".to_string(),
"te--st".to_string(),
"te_-st".to_string()
]
)
}
#[test]
fn overflow_hyphen() {
assert_eq!(
UncanonicalizedIter::new("te-_-_-_-_-_-_-_-_-st")
.take(100)
.count(),
100
)
}
pub struct RegistryIndex<'cfg> {
source_id: SourceId,
path: Filesystem,
cache: HashMap<&'static str, Vec<(Summary, bool)>>,
// `(name, vers)` -> `checksum`
hashes: HashMap<&'static str, HashMap<Version, String>>,
config: &'cfg Config,
locked: bool,
}
impl<'cfg> RegistryIndex<'cfg> {
pub fn new(
source_id: SourceId,
path: &Filesystem,
config: &'cfg Config,
locked: bool,
) -> RegistryIndex<'cfg> {
RegistryIndex {
source_id,
path: path.clone(),
cache: HashMap::new(),
hashes: HashMap::new(),
config,
locked,
}
}
/// Returns the hash listed for a specified `PackageId`.
pub fn hash(&mut self, pkg: PackageId, load: &mut dyn RegistryData) -> CargoResult<String> {
let name = pkg.name().as_str();
let version = pkg.version();
if let Some(s) = self.hashes.get(name).and_then(|v| v.get(version)) {
return Ok(s.clone());
}
// Ok, we're missing the key, so parse the index file to load it.
self.summaries(name, load)?;
self.hashes
.get(name)
.and_then(|v| v.get(version))
.ok_or_else(|| internal(format!("no hash listed for {}", pkg)))
.map(|s| s.clone())
}
/// Parses the on-disk metadata for the package provided.
///
/// Returns a list of pairs of `(summary, yanked)` for the package name specified.
pub fn summaries(
&mut self,
name: &'static str,
load: &mut dyn RegistryData,
) -> CargoResult<&Vec<(Summary, bool)>> {
if self.cache.contains_key(name) {
return Ok(&self.cache[name]);
}
let summaries = self.load_summaries(name, load)?;
self.cache.insert(name, summaries);
Ok(&self.cache[name])
}
fn load_summaries(
&mut self,
name: &str,
load: &mut dyn RegistryData,
) -> CargoResult<Vec<(Summary, bool)>> {
// Prepare the `RegistryData` which will lazily initialize internal data
// structures. Note that this is also importantly needed to initialize
// to avoid deadlocks where we acquire a lock below but the `load`
// function inside *also* wants to acquire a lock. See an instance of
// this on #5551.
load.prepare()?;
let (root, _lock) = if self.locked {
let lock = self
.path
.open_ro(Path::new(INDEX_LOCK), self.config, "the registry index");
match lock {
Ok(lock) => (lock.path().parent().unwrap().to_path_buf(), Some(lock)),
Err(_) => return Ok(Vec::new()),
}
} else {
(self.path.clone().into_path_unlocked(), None)
};
let fs_name = name
.chars()
.flat_map(|c| c.to_lowercase())
.collect::<String>();
// See module comment for why this is structured the way it is.
let raw_path = match fs_name.len() {
1 => format!("1/{}", fs_name),
2 => format!("2/{}", fs_name),
3 => format!("3/{}/{}", &fs_name[..1], fs_name),
_ => format!("{}/{}/{}", &fs_name[0..2], &fs_name[2..4], fs_name),
};
let mut ret = Vec::new();
for path in UncanonicalizedIter::new(&raw_path).take(1024) {
let mut hit_closure = false;
let err = load.load(&root, Path::new(&path), &mut |contents| {
hit_closure = true;
let contents = str::from_utf8(contents)
.map_err(|_| failure::format_err!("registry index file was not valid utf-8"))?;
ret.reserve(contents.lines().count());
let lines = contents.lines().map(|s| s.trim()).filter(|l| !l.is_empty());
// Attempt forwards-compatibility on the index by ignoring
// everything that we ourselves don't understand, that should
// allow future cargo implementations to break the
// interpretation of each line here and older cargo will simply
// ignore the new lines.
ret.extend(lines.filter_map(|line| {
let (summary, locked) = match self.parse_registry_package(line) {
Ok(p) => p,
Err(e) => {
info!("failed to parse `{}` registry package: {}", name, e);
trace!("line: {}", line);
return None;
}
};
Some((summary, locked))
}));
Ok(())
});
// We ignore lookup failures as those are just crates which don't exist
// or we haven't updated the registry yet. If we actually ran the
// closure though then we care about those errors.
if hit_closure {
err?;
// Crates.io ensures that there is only one hyphen and underscore equivalent
// result in the index so return when we find it.
return Ok(ret);
}
}
Ok(ret)
}
/// Parses a line from the registry's index file into a `Summary` for a package.
///
/// The returned boolean is whether or not the summary has been yanked.
fn parse_registry_package(&mut self, line: &str) -> CargoResult<(Summary, bool)> {
let RegistryPackage {
name,
vers,
cksum,
deps,
features,
yanked,
links,
} = serde_json::from_str(line)?;
let pkgid = PackageId::new(&name, &vers, self.source_id)?;
let name = pkgid.name();
let deps = deps
.into_iter()
.map(|dep| dep.into_dep(self.source_id))
.collect::<CargoResult<Vec<_>>>()?;
let summary = Summary::new(pkgid, deps, &features, links, false)?;
let summary = summary.set_checksum(cksum.clone());
self.hashes
.entry(name.as_str())
.or_insert_with(HashMap::new)
.insert(vers, cksum);
Ok((summary, yanked.unwrap_or(false)))
}
pub fn query_inner(
&mut self,
dep: &Dependency,
load: &mut dyn RegistryData,
yanked_whitelist: &HashSet<PackageId>,
f: &mut dyn FnMut(Summary),
) -> CargoResult<()> {
if self.config.cli_unstable().offline {
if self.query_inner_with_online(dep, load, yanked_whitelist, f, false)? != 0 {
return Ok(());
}
// If offline, and there are no matches, try again with online.
// This is necessary for dependencies that are not used (such as
// target-cfg or optional), but are not downloaded. Normally the
// build should succeed if they are not downloaded and not used,
// but they still need to resolve. If they are actually needed
// then cargo will fail to download and an error message
// indicating that the required dependency is unavailable while
// offline will be displayed.
}
self.query_inner_with_online(dep, load, yanked_whitelist, f, true)?;
Ok(())
}
fn query_inner_with_online(
&mut self,
dep: &Dependency,
load: &mut dyn RegistryData,
yanked_whitelist: &HashSet<PackageId>,
f: &mut dyn FnMut(Summary),
online: bool,
) -> CargoResult<usize> {
let source_id = self.source_id;
let name = dep.package_name().as_str();
let summaries = self.summaries(name, load)?;
let summaries = summaries
.iter()
.filter(|&(summary, yanked)| {
(online || load.is_crate_downloaded(summary.package_id()))
&& (!yanked || {
log::debug!("{:?}", yanked_whitelist);
log::debug!("{:?}", summary.package_id());
yanked_whitelist.contains(&summary.package_id())
})
})
.map(|s| s.0.clone());
// Handle `cargo update --precise` here. If specified, our own source
// will have a precise version listed of the form
// `<pkg>=<p_req>o-><f_req>` where `<pkg>` is the name of a crate on
// this source, `<p_req>` is the version installed and `<f_req> is the
// version requested (argument to `--precise`).
let summaries = summaries.filter(|s| match source_id.precise() {
Some(p) if p.starts_with(name) && p[name.len()..].starts_with('=') => {
let mut vers = p[name.len() + 1..].splitn(2, "->");
if dep
.version_req()
.matches(&vers.next().unwrap().to_semver().unwrap())
{
vers.next().unwrap() == s.version().to_string()
} else {
true
}
}
_ => true,
});
let mut count = 0;
for summary in summaries {
f(summary);
count += 1;
}
Ok(count)
}
}