| use std::io; |
| use std::path::{Path, PathBuf}; |
| |
| use crate::dirs::{ |
| crate_name_to_relative_path, local_path_and_canonical_url_with_hash_kind, HashKind, DEFAULT_HASHER_KIND, |
| }; |
| use crate::{path_max_byte_len, Crate, Error, IndexConfig, SparseIndex}; |
| |
| /// The default URL of the crates.io HTTP index, see [`SparseIndex::from_url`] and [`SparseIndex::new_cargo_default`] |
| pub const URL: &str = "sparse+https://index.crates.io/"; |
| |
| impl SparseIndex { |
| /// Creates a view over the sparse HTTP index from a provided URL, opening |
| /// the same location on disk that Cargo uses for that registry index's |
| /// metadata and cache. |
| /// |
| /// Note this function takes the `CARGO_HOME` environment variable into account |
| #[inline] |
| pub fn from_url(url: &str) -> Result<Self, Error> { |
| Self::from_url_with_hash_kind(url, &DEFAULT_HASHER_KIND) |
| } |
| |
| /// Like [`Self::from_url`] but accepts an explicit [`HashKind`] for determining the crates index path. |
| #[inline] |
| pub fn from_url_with_hash_kind(url: &str, hash_kind: &HashKind) -> Result<Self, Error> { |
| Self::with_path_and_hash_kind(home::cargo_home()?, url, hash_kind) |
| } |
| |
| /// Creates an index for the default crates.io registry, using the same |
| /// disk location as Cargo itself. |
| /// |
| /// This is the recommended way to access the crates.io sparse index. |
| /// |
| /// Note this function takes the `CARGO_HOME` environment variable into account |
| #[inline] |
| pub fn new_cargo_default() -> Result<Self, Error> { |
| Self::from_url(URL) |
| } |
| |
| /// Creates a view over the sparse HTTP index from the provided URL, rooted |
| /// at the specified location |
| #[inline] |
| pub fn with_path(cargo_home: impl AsRef<Path>, url: impl AsRef<str>) -> Result<Self, Error> { |
| Self::with_path_and_hash_kind(cargo_home, url, &DEFAULT_HASHER_KIND) |
| } |
| |
| /// Like [`Self::with_path`] but accepts an explicit [`HashKind`] for determining the crates index path. |
| #[inline] |
| pub fn with_path_and_hash_kind( |
| cargo_home: impl AsRef<Path>, |
| url: impl AsRef<str>, |
| hash_kind: &HashKind, |
| ) -> Result<Self, Error> { |
| let url = url.as_ref(); |
| // It is required to have the sparse+ scheme modifier for sparse urls as |
| // they are part of the short ident hash calculation done by cargo |
| if !url.starts_with("sparse+http") { |
| return Err(Error::Url(url.to_owned())); |
| } |
| |
| let (path, url) = local_path_and_canonical_url_with_hash_kind(url, Some(cargo_home.as_ref()), hash_kind)?; |
| Ok(Self::at_path(path, url)) |
| } |
| |
| /// Creates a view over the sparse HTTP index at the exact specified path |
| #[inline] |
| #[must_use] |
| pub fn at_path(path: PathBuf, mut url: String) -> Self { |
| if !url.ends_with('/') { |
| url.push('/'); |
| } |
| Self { path, url } |
| } |
| |
| /// Get the global configuration of the index. There are no guarantees around freshness, |
| /// and if the config is not available, no fetch will be performed. |
| pub fn index_config(&self) -> Result<IndexConfig, Error> { |
| let path = self.path.join("config.json"); |
| let bytes = std::fs::read(path).map_err(Error::Io)?; |
| |
| serde_json::from_slice(&bytes).map_err(Error::Json) |
| } |
| |
| /// Reads a crate from the local cache of the index. There are no guarantees around freshness, |
| /// and if the crate is not known in the cache, no fetch will be performed. |
| pub fn crate_from_cache(&self, name: &str) -> Result<Crate, Error> { |
| let cache_path = self |
| .cache_path(name) |
| .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidInput, "bad name"))?; |
| |
| let cache_bytes = std::fs::read(&cache_path) |
| .map_err(|e| io::Error::new(e.kind(), format!("{}: `{}`", e, cache_path.display())))?; |
| Ok(Crate::from_cache_slice(&cache_bytes, None)?) |
| } |
| |
| /// The HTTP url of the index |
| #[inline] |
| #[must_use] |
| pub fn url(&self) -> &str { |
| self.url.strip_prefix("sparse+").unwrap_or(&self.url) |
| } |
| |
| /// Get the URL that can be used to fetch the index entry for the specified |
| /// crate |
| /// |
| /// The body of a successful response for the returned URL can be parsed |
| /// via [`Crate::from_slice`] |
| #[inline] |
| #[must_use] |
| pub fn crate_url(&self, name: &str) -> Option<String> { |
| let rel_path = crate_name_to_relative_path(name, Some('/'))?; |
| Some(format!("{}{rel_path}", self.url())) |
| } |
| |
| /// Gets the full path to the cache file for the specified crate |
| fn cache_path(&self, name: &str) -> Option<PathBuf> { |
| let rel_path = crate_name_to_relative_path(name, None)?; |
| |
| // avoid realloc on each push |
| let mut cache_path = PathBuf::with_capacity(path_max_byte_len(&self.path) + 8 + rel_path.len()); |
| cache_path.push(&self.path); |
| cache_path.push(".cache"); |
| cache_path.push(rel_path); |
| |
| Some(cache_path) |
| } |
| |
| /// Reads the version of the cache entry for the specified crate, if it exists |
| /// |
| /// The version is of the form `key:value`, where, currently, the key is either |
| /// `etag` or `last-modified` |
| #[cfg(feature = "sparse")] |
| fn read_cache_version(&self, name: &str) -> Option<String> { |
| let cache_path = self.cache_path(name)?; |
| let bytes = std::fs::read(cache_path).ok()?; |
| |
| const CURRENT_CACHE_VERSION: u8 = 3; |
| const CURRENT_INDEX_FORMAT_VERSION: u32 = 2; |
| |
| let (&first_byte, rest) = bytes.split_first()?; |
| |
| if first_byte != CURRENT_CACHE_VERSION { |
| return None; |
| } |
| |
| let index_v_bytes = rest.get(..4)?; |
| let index_v = u32::from_le_bytes(index_v_bytes.try_into().unwrap()); |
| if index_v != CURRENT_INDEX_FORMAT_VERSION { |
| return None; |
| } |
| let rest = &rest[4..]; |
| |
| let version = crate::split(rest, 0) |
| .next() |
| .and_then(|version| std::str::from_utf8(version).ok().map(String::from)); |
| |
| version |
| } |
| |
| #[cfg(feature = "sparse")] |
| fn make_request(&self, url: &str, cache_version: Option<&str>) -> Result<http::request::Builder, Error> { |
| use http::header; |
| |
| let mut req = http::Request::get(url).version(http::Version::HTTP_2); |
| |
| { |
| let headers = req.headers_mut().unwrap(); |
| |
| // AFAICT this does not affect responses at the moment, but could in the future |
| // if there are changes |
| headers.insert("cargo-protocol", header::HeaderValue::from_static("version=1")); |
| // All index entries are just files with lines of JSON |
| headers.insert(header::ACCEPT, header::HeaderValue::from_static("text/plain")); |
| // We need to accept both identity and gzip, as otherwise cloudfront will |
| // always respond to requests with strong etag's, which will differ from |
| // cache entries generated by cargo |
| headers.insert( |
| header::ACCEPT_ENCODING, |
| header::HeaderValue::from_static("gzip,identity"), |
| ); |
| |
| // If we have a local cache entry, include its version with the |
| // appropriate header, this allows the server to respond with a |
| // cached, or even better, empty response if its version matches |
| // the local one making the request/response loop basically free |
| if let Some(cache_version) = cache_version { |
| if let Some((key, value)) = cache_version.split_once(':') { |
| if let Ok(value) = header::HeaderValue::from_str(value.trim()) { |
| if key == header::ETAG { |
| headers.insert(header::IF_NONE_MATCH, value); |
| } else if key == header::LAST_MODIFIED { |
| headers.insert(header::IF_MODIFIED_SINCE, value); |
| } else { |
| // We could error here, but that's kind of pointless |
| // since the response will be sent in full if we haven't |
| // specified one of the above headers. Though it does |
| // potentially indicate something weird is going on |
| } |
| } |
| } |
| } |
| } |
| |
| Ok(req) |
| } |
| |
| /// Creates an HTTP request that can be sent via your HTTP client of choice |
| /// to retrieve the config for this index. |
| /// |
| /// See [`Self::parse_config_response()`] processing the response from the remote |
| /// index. |
| /// |
| /// It is highly recommended to assume HTTP/2 when making requests to remote |
| /// indices, at least crates.io. |
| #[cfg(feature = "sparse")] |
| pub fn make_config_request(&self) -> Result<http::request::Builder, Error> { |
| self.make_request(&format!("{}config.json", self.url()), None) |
| } |
| |
| /// Creates an HTTP request that can be sent via your HTTP client of choice |
| /// to retrieve the current metadata for the specified crate `namw`. |
| /// |
| /// See [`Self::parse_cache_response()`] processing the response from the remote |
| /// index. |
| /// |
| /// It is highly recommended to assume HTTP/2 when making requests to remote |
| /// indices, at least crates.io. |
| #[cfg(feature = "sparse")] |
| pub fn make_cache_request(&self, name: &str) -> Result<http::request::Builder, Error> { |
| self.make_request( |
| &self |
| .crate_url(name) |
| .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidInput, "crate name is invalid"))?, |
| self.read_cache_version(name).as_deref(), |
| ) |
| } |
| |
| /// Process the response to a request created by [`Self::make_config_request()`]. |
| /// |
| /// If `write_config` is `true`, write the configuration to disk after parsing it. |
| /// Note that the write operation may fail, and as opposed to the similar parameter |
| /// in [`Self::parse_cache_response()`], write errors will not be ignored. |
| /// |
| /// Note that the `response` from sparse HTTP indices, at least crates.io, may |
| /// send responses with `gzip` compression, it is your responsibility to |
| /// decompress it before sending to this function. |
| #[cfg(feature = "sparse")] |
| pub fn parse_config_response( |
| &self, |
| response: http::Response<Vec<u8>>, |
| write_config: bool, |
| ) -> Result<IndexConfig, Error> { |
| use http::StatusCode; |
| let (parts, body) = response.into_parts(); |
| |
| match parts.status { |
| StatusCode::OK => { |
| let res = serde_json::from_slice(&body).map_err(Error::Json); |
| if write_config { |
| let path = self.path.join("config.json"); |
| std::fs::create_dir_all(path.parent().unwrap())?; |
| std::fs::write(&path, &body)?; |
| } |
| res |
| } |
| StatusCode::UNAUTHORIZED => { |
| Err(io::Error::new(io::ErrorKind::PermissionDenied, "the request was not authorized").into()) |
| } |
| StatusCode::NOT_FOUND => { |
| Err(io::Error::new(io::ErrorKind::NotFound, "config.json not found in registry").into()) |
| } |
| other => Err(io::Error::new( |
| io::ErrorKind::Unsupported, |
| format!( |
| "the server responded with status code '{other}', which is not supported in the current protocol" |
| ), |
| ) |
| .into()), |
| } |
| } |
| |
| /// Process the response to a request created by [`Self::make_cache_request`] |
| /// |
| /// This handles both the scenario where the local cache is missing the specified |
| /// crate, or it is out of date, as well as the local entry being up to date |
| /// and can just be read from disk |
| /// |
| /// You may specify whether an updated index entry is written locally to the |
| /// cache or not |
| /// |
| /// Note that responses from sparse HTTP indices, at least crates.io, may |
| /// send responses with `gzip` compression, it is your responsibility to |
| /// decompress it before sending to this function |
| #[cfg(feature = "sparse")] |
| pub fn parse_cache_response( |
| &self, |
| name: &str, |
| response: http::Response<Vec<u8>>, |
| write_cache_entry: bool, |
| ) -> Result<Option<Crate>, Error> { |
| use http::{header, StatusCode}; |
| let (parts, body) = response.into_parts(); |
| |
| match parts.status { |
| // The server responded with the full contents of the index entry |
| StatusCode::OK => { |
| let krate = Crate::from_slice(&body)?; |
| |
| if write_cache_entry { |
| // The same as cargo, prefer etag over last-modified |
| let version = if let Some(etag) = parts.headers.get(header::ETAG) { |
| etag.to_str().ok().map(|etag| format!("{}: {etag}", header::ETAG)) |
| } else if let Some(lm) = parts.headers.get(header::LAST_MODIFIED) { |
| lm.to_str().ok().map(|lm| format!("{}: {lm}", header::LAST_MODIFIED)) |
| } else { |
| None |
| }; |
| |
| let version = version.unwrap_or_else(|| "Unknown".to_owned()); |
| |
| // This should always succeed, but no need to panic or fail |
| if let Some(cache_path) = self.cache_path(name) { |
| if std::fs::create_dir_all(cache_path.parent().unwrap()).is_ok() { |
| // It's unfortunate if this fails for some reason, but |
| // not writing the cache entry shouldn't stop the user |
| // from getting the crate's metadata |
| let _ = krate.write_cache_entry(&cache_path, &version); |
| } |
| } |
| } |
| |
| Ok(Some(krate)) |
| } |
| // The local cache entry is up to date with the latest entry on the |
| // server, we can just return the local one |
| StatusCode::NOT_MODIFIED => self.crate_from_cache(name).map(Option::Some), |
| // The server requires authorization but the user didn't provide it |
| StatusCode::UNAUTHORIZED => { |
| Err(io::Error::new(io::ErrorKind::PermissionDenied, "the request was not authorized").into()) |
| } |
| // The crate does not exist, or has been removed |
| StatusCode::NOT_FOUND | StatusCode::GONE | StatusCode::UNAVAILABLE_FOR_LEGAL_REASONS => Ok(None), |
| other => Err(io::Error::new( |
| io::ErrorKind::Unsupported, |
| format!( |
| "the server responded with status code '{other}', which is not supported in the current protocol" |
| ), |
| ) |
| .into()), |
| } |
| } |
| } |
| |
| #[cfg(test)] |
| #[cfg(feature = "sparse")] |
| mod tests { |
| use crate::SparseIndex; |
| use http::header; |
| |
| #[inline] |
| fn crates_io() -> SparseIndex { |
| SparseIndex::with_path( |
| std::path::Path::new(&std::env::var_os("CARGO_MANIFEST_DIR").unwrap()) |
| .join("tests/fixtures/sparse_registry_cache/cargo_home"), |
| crate::sparse::URL, |
| ) |
| .unwrap() |
| } |
| |
| // curl -v -H 'accept-encoding: gzip,identity' https://index.crates.io/cr/at/crates-index |
| const CRATES_INDEX_INDEX_ENTRY: &[u8] = include_bytes!("../tests/fixtures/crates-index.txt"); |
| |
| // Validates that a valid cache entry is written if the index entry has been |
| // modified |
| #[test] |
| fn writes_cache_entry() { |
| let index = crates_io(); |
| |
| let cache_path = index.cache_path("crates-index").unwrap(); |
| if cache_path.exists() { |
| std::fs::remove_file(&cache_path).expect("failed to remove existing crates-index cache file"); |
| } |
| |
| let response = http::Response::builder() |
| .status(http::StatusCode::OK) |
| .header(header::ETAG, "W/\"7fbfc422231ec53a9283f2eb2fb4f459\"") |
| .body(CRATES_INDEX_INDEX_ENTRY.to_vec()) |
| .unwrap(); |
| |
| let http_krate = index |
| .parse_cache_response("crates-index", response, true /* write cache entry */) |
| .unwrap() |
| .unwrap(); |
| assert!(cache_path.is_file(), "the cache entry was indeed written"); |
| let cache_krate = index.crate_from_cache("crates-index").unwrap(); |
| |
| for (http, cache) in http_krate.versions().iter().zip(cache_krate.versions().iter()) { |
| assert_eq!(http.version(), cache.version()); |
| } |
| } |
| } |