blob: 454361c0e6f2ec03efff3a24d69e9017e4f25dbc [file] [log] [blame]
use std::io;
use std::path::{Path, PathBuf};
use crate::dirs::{
crate_name_to_relative_path, local_path_and_canonical_url_with_hash_kind, HashKind, DEFAULT_HASHER_KIND,
};
use crate::{path_max_byte_len, Crate, Error, IndexConfig, SparseIndex};
/// The default URL of the crates.io HTTP index, see [`SparseIndex::from_url`] and [`SparseIndex::new_cargo_default`]
pub const URL: &str = "sparse+https://index.crates.io/";
impl SparseIndex {
/// Creates a view over the sparse HTTP index from a provided URL, opening
/// the same location on disk that Cargo uses for that registry index's
/// metadata and cache.
///
/// Note this function takes the `CARGO_HOME` environment variable into account
#[inline]
pub fn from_url(url: &str) -> Result<Self, Error> {
Self::from_url_with_hash_kind(url, &DEFAULT_HASHER_KIND)
}
/// Like [`Self::from_url`] but accepts an explicit [`HashKind`] for determining the crates index path.
#[inline]
pub fn from_url_with_hash_kind(url: &str, hash_kind: &HashKind) -> Result<Self, Error> {
Self::with_path_and_hash_kind(home::cargo_home()?, url, hash_kind)
}
/// Creates an index for the default crates.io registry, using the same
/// disk location as Cargo itself.
///
/// This is the recommended way to access the crates.io sparse index.
///
/// Note this function takes the `CARGO_HOME` environment variable into account
#[inline]
pub fn new_cargo_default() -> Result<Self, Error> {
Self::from_url(URL)
}
/// Creates a view over the sparse HTTP index from the provided URL, rooted
/// at the specified location
#[inline]
pub fn with_path(cargo_home: impl AsRef<Path>, url: impl AsRef<str>) -> Result<Self, Error> {
Self::with_path_and_hash_kind(cargo_home, url, &DEFAULT_HASHER_KIND)
}
/// Like [`Self::with_path`] but accepts an explicit [`HashKind`] for determining the crates index path.
#[inline]
pub fn with_path_and_hash_kind(
cargo_home: impl AsRef<Path>,
url: impl AsRef<str>,
hash_kind: &HashKind,
) -> Result<Self, Error> {
let url = url.as_ref();
// It is required to have the sparse+ scheme modifier for sparse urls as
// they are part of the short ident hash calculation done by cargo
if !url.starts_with("sparse+http") {
return Err(Error::Url(url.to_owned()));
}
let (path, url) = local_path_and_canonical_url_with_hash_kind(url, Some(cargo_home.as_ref()), hash_kind)?;
Ok(Self::at_path(path, url))
}
/// Creates a view over the sparse HTTP index at the exact specified path
#[inline]
#[must_use]
pub fn at_path(path: PathBuf, mut url: String) -> Self {
if !url.ends_with('/') {
url.push('/');
}
Self { path, url }
}
/// Get the global configuration of the index. There are no guarantees around freshness,
/// and if the config is not available, no fetch will be performed.
pub fn index_config(&self) -> Result<IndexConfig, Error> {
let path = self.path.join("config.json");
let bytes = std::fs::read(path).map_err(Error::Io)?;
serde_json::from_slice(&bytes).map_err(Error::Json)
}
/// Reads a crate from the local cache of the index. There are no guarantees around freshness,
/// and if the crate is not known in the cache, no fetch will be performed.
pub fn crate_from_cache(&self, name: &str) -> Result<Crate, Error> {
let cache_path = self
.cache_path(name)
.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidInput, "bad name"))?;
let cache_bytes = std::fs::read(&cache_path)
.map_err(|e| io::Error::new(e.kind(), format!("{}: `{}`", e, cache_path.display())))?;
Ok(Crate::from_cache_slice(&cache_bytes, None)?)
}
/// The HTTP url of the index
#[inline]
#[must_use]
pub fn url(&self) -> &str {
self.url.strip_prefix("sparse+").unwrap_or(&self.url)
}
/// Get the URL that can be used to fetch the index entry for the specified
/// crate
///
/// The body of a successful response for the returned URL can be parsed
/// via [`Crate::from_slice`]
#[inline]
#[must_use]
pub fn crate_url(&self, name: &str) -> Option<String> {
let rel_path = crate_name_to_relative_path(name, Some('/'))?;
Some(format!("{}{rel_path}", self.url()))
}
/// Gets the full path to the cache file for the specified crate
fn cache_path(&self, name: &str) -> Option<PathBuf> {
let rel_path = crate_name_to_relative_path(name, None)?;
// avoid realloc on each push
let mut cache_path = PathBuf::with_capacity(path_max_byte_len(&self.path) + 8 + rel_path.len());
cache_path.push(&self.path);
cache_path.push(".cache");
cache_path.push(rel_path);
Some(cache_path)
}
/// Reads the version of the cache entry for the specified crate, if it exists
///
/// The version is of the form `key:value`, where, currently, the key is either
/// `etag` or `last-modified`
#[cfg(feature = "sparse")]
fn read_cache_version(&self, name: &str) -> Option<String> {
let cache_path = self.cache_path(name)?;
let bytes = std::fs::read(cache_path).ok()?;
const CURRENT_CACHE_VERSION: u8 = 3;
const CURRENT_INDEX_FORMAT_VERSION: u32 = 2;
let (&first_byte, rest) = bytes.split_first()?;
if first_byte != CURRENT_CACHE_VERSION {
return None;
}
let index_v_bytes = rest.get(..4)?;
let index_v = u32::from_le_bytes(index_v_bytes.try_into().unwrap());
if index_v != CURRENT_INDEX_FORMAT_VERSION {
return None;
}
let rest = &rest[4..];
let version = crate::split(rest, 0)
.next()
.and_then(|version| std::str::from_utf8(version).ok().map(String::from));
version
}
#[cfg(feature = "sparse")]
fn make_request(&self, url: &str, cache_version: Option<&str>) -> Result<http::request::Builder, Error> {
use http::header;
let mut req = http::Request::get(url).version(http::Version::HTTP_2);
{
let headers = req.headers_mut().unwrap();
// AFAICT this does not affect responses at the moment, but could in the future
// if there are changes
headers.insert("cargo-protocol", header::HeaderValue::from_static("version=1"));
// All index entries are just files with lines of JSON
headers.insert(header::ACCEPT, header::HeaderValue::from_static("text/plain"));
// We need to accept both identity and gzip, as otherwise cloudfront will
// always respond to requests with strong etag's, which will differ from
// cache entries generated by cargo
headers.insert(
header::ACCEPT_ENCODING,
header::HeaderValue::from_static("gzip,identity"),
);
// If we have a local cache entry, include its version with the
// appropriate header, this allows the server to respond with a
// cached, or even better, empty response if its version matches
// the local one making the request/response loop basically free
if let Some(cache_version) = cache_version {
if let Some((key, value)) = cache_version.split_once(':') {
if let Ok(value) = header::HeaderValue::from_str(value.trim()) {
if key == header::ETAG {
headers.insert(header::IF_NONE_MATCH, value);
} else if key == header::LAST_MODIFIED {
headers.insert(header::IF_MODIFIED_SINCE, value);
} else {
// We could error here, but that's kind of pointless
// since the response will be sent in full if we haven't
// specified one of the above headers. Though it does
// potentially indicate something weird is going on
}
}
}
}
}
Ok(req)
}
/// Creates an HTTP request that can be sent via your HTTP client of choice
/// to retrieve the config for this index.
///
/// See [`Self::parse_config_response()`] processing the response from the remote
/// index.
///
/// It is highly recommended to assume HTTP/2 when making requests to remote
/// indices, at least crates.io.
#[cfg(feature = "sparse")]
pub fn make_config_request(&self) -> Result<http::request::Builder, Error> {
self.make_request(&format!("{}config.json", self.url()), None)
}
/// Creates an HTTP request that can be sent via your HTTP client of choice
/// to retrieve the current metadata for the specified crate `namw`.
///
/// See [`Self::parse_cache_response()`] processing the response from the remote
/// index.
///
/// It is highly recommended to assume HTTP/2 when making requests to remote
/// indices, at least crates.io.
#[cfg(feature = "sparse")]
pub fn make_cache_request(&self, name: &str) -> Result<http::request::Builder, Error> {
self.make_request(
&self
.crate_url(name)
.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidInput, "crate name is invalid"))?,
self.read_cache_version(name).as_deref(),
)
}
/// Process the response to a request created by [`Self::make_config_request()`].
///
/// If `write_config` is `true`, write the configuration to disk after parsing it.
/// Note that the write operation may fail, and as opposed to the similar parameter
/// in [`Self::parse_cache_response()`], write errors will not be ignored.
///
/// Note that the `response` from sparse HTTP indices, at least crates.io, may
/// send responses with `gzip` compression, it is your responsibility to
/// decompress it before sending to this function.
#[cfg(feature = "sparse")]
pub fn parse_config_response(
&self,
response: http::Response<Vec<u8>>,
write_config: bool,
) -> Result<IndexConfig, Error> {
use http::StatusCode;
let (parts, body) = response.into_parts();
match parts.status {
StatusCode::OK => {
let res = serde_json::from_slice(&body).map_err(Error::Json);
if write_config {
let path = self.path.join("config.json");
std::fs::create_dir_all(path.parent().unwrap())?;
std::fs::write(&path, &body)?;
}
res
}
StatusCode::UNAUTHORIZED => {
Err(io::Error::new(io::ErrorKind::PermissionDenied, "the request was not authorized").into())
}
StatusCode::NOT_FOUND => {
Err(io::Error::new(io::ErrorKind::NotFound, "config.json not found in registry").into())
}
other => Err(io::Error::new(
io::ErrorKind::Unsupported,
format!(
"the server responded with status code '{other}', which is not supported in the current protocol"
),
)
.into()),
}
}
/// Process the response to a request created by [`Self::make_cache_request`]
///
/// This handles both the scenario where the local cache is missing the specified
/// crate, or it is out of date, as well as the local entry being up to date
/// and can just be read from disk
///
/// You may specify whether an updated index entry is written locally to the
/// cache or not
///
/// Note that responses from sparse HTTP indices, at least crates.io, may
/// send responses with `gzip` compression, it is your responsibility to
/// decompress it before sending to this function
#[cfg(feature = "sparse")]
pub fn parse_cache_response(
&self,
name: &str,
response: http::Response<Vec<u8>>,
write_cache_entry: bool,
) -> Result<Option<Crate>, Error> {
use http::{header, StatusCode};
let (parts, body) = response.into_parts();
match parts.status {
// The server responded with the full contents of the index entry
StatusCode::OK => {
let krate = Crate::from_slice(&body)?;
if write_cache_entry {
// The same as cargo, prefer etag over last-modified
let version = if let Some(etag) = parts.headers.get(header::ETAG) {
etag.to_str().ok().map(|etag| format!("{}: {etag}", header::ETAG))
} else if let Some(lm) = parts.headers.get(header::LAST_MODIFIED) {
lm.to_str().ok().map(|lm| format!("{}: {lm}", header::LAST_MODIFIED))
} else {
None
};
let version = version.unwrap_or_else(|| "Unknown".to_owned());
// This should always succeed, but no need to panic or fail
if let Some(cache_path) = self.cache_path(name) {
if std::fs::create_dir_all(cache_path.parent().unwrap()).is_ok() {
// It's unfortunate if this fails for some reason, but
// not writing the cache entry shouldn't stop the user
// from getting the crate's metadata
let _ = krate.write_cache_entry(&cache_path, &version);
}
}
}
Ok(Some(krate))
}
// The local cache entry is up to date with the latest entry on the
// server, we can just return the local one
StatusCode::NOT_MODIFIED => self.crate_from_cache(name).map(Option::Some),
// The server requires authorization but the user didn't provide it
StatusCode::UNAUTHORIZED => {
Err(io::Error::new(io::ErrorKind::PermissionDenied, "the request was not authorized").into())
}
// The crate does not exist, or has been removed
StatusCode::NOT_FOUND | StatusCode::GONE | StatusCode::UNAVAILABLE_FOR_LEGAL_REASONS => Ok(None),
other => Err(io::Error::new(
io::ErrorKind::Unsupported,
format!(
"the server responded with status code '{other}', which is not supported in the current protocol"
),
)
.into()),
}
}
}
#[cfg(test)]
#[cfg(feature = "sparse")]
mod tests {
use crate::SparseIndex;
use http::header;
#[inline]
fn crates_io() -> SparseIndex {
SparseIndex::with_path(
std::path::Path::new(&std::env::var_os("CARGO_MANIFEST_DIR").unwrap())
.join("tests/fixtures/sparse_registry_cache/cargo_home"),
crate::sparse::URL,
)
.unwrap()
}
// curl -v -H 'accept-encoding: gzip,identity' https://index.crates.io/cr/at/crates-index
const CRATES_INDEX_INDEX_ENTRY: &[u8] = include_bytes!("../tests/fixtures/crates-index.txt");
// Validates that a valid cache entry is written if the index entry has been
// modified
#[test]
fn writes_cache_entry() {
let index = crates_io();
let cache_path = index.cache_path("crates-index").unwrap();
if cache_path.exists() {
std::fs::remove_file(&cache_path).expect("failed to remove existing crates-index cache file");
}
let response = http::Response::builder()
.status(http::StatusCode::OK)
.header(header::ETAG, "W/\"7fbfc422231ec53a9283f2eb2fb4f459\"")
.body(CRATES_INDEX_INDEX_ENTRY.to_vec())
.unwrap();
let http_krate = index
.parse_cache_response("crates-index", response, true /* write cache entry */)
.unwrap()
.unwrap();
assert!(cache_path.is_file(), "the cache entry was indeed written");
let cache_krate = index.crate_from_cache("crates-index").unwrap();
for (http, cache) in http_krate.versions().iter().zip(cache_krate.versions().iter()) {
assert_eq!(http.version(), cache.version());
}
}
}