blob: c89d4eb2328a931fab4d252a4ff1cb9c0d463d50 [file] [log] [blame]
use std::collections::{BTreeMap, BTreeSet};
use std::fs::{copy, File};
use std::io::{BufRead, BufReader, Read, Write};
use std::path::{Path, PathBuf};
use anyhow::{anyhow, Context, Result};
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
/// JSON serde struct.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PatchDictSchema {
pub end_version: Option<u64>,
pub metadata: Option<BTreeMap<String, serde_json::Value>>,
#[serde(default, skip_serializing_if = "BTreeSet::is_empty")]
pub platforms: BTreeSet<String>,
pub rel_patch_path: String,
pub start_version: Option<u64>,
}
/// Struct to keep track of patches and their relative paths.
#[derive(Debug, Clone)]
pub struct PatchCollection {
pub patches: Vec<PatchDictSchema>,
pub workdir: PathBuf,
}
impl PatchCollection {
/// Create a `PatchCollection` from a PATCHES.
pub fn parse_from_file(json_file: &Path) -> Result<Self> {
Ok(Self {
patches: serde_json::from_reader(File::open(json_file)?)?,
workdir: json_file
.parent()
.ok_or_else(|| anyhow!("failed to get json_file parent"))?
.to_path_buf(),
})
}
/// Create a `PatchCollection` from a string literal and a workdir.
pub fn parse_from_str(workdir: PathBuf, contents: &str) -> Result<Self> {
Ok(Self {
patches: serde_json::from_str(contents).context("parsing from str")?,
workdir,
})
}
/// Copy this collection with patches filtered by given criterion.
pub fn filter_patches(&self, f: impl FnMut(&PatchDictSchema) -> bool) -> Self {
Self {
patches: self.patches.iter().cloned().filter(f).collect(),
workdir: self.workdir.clone(),
}
}
#[allow(dead_code)]
/// Return true if the collection is tracking any patches.
pub fn is_empty(&self) -> bool {
self.patches.is_empty()
}
/// Compute the set-set subtraction, returning a new `PatchCollection` which
/// keeps the minuend's workdir.
pub fn subtract(&self, subtrahend: &Self) -> Result<Self> {
let mut new_patches = Vec::new();
// This is O(n^2) when it could be much faster, but n is always going to be less
// than 1k and speed is not important here.
for our_patch in &self.patches {
let found_in_sub = subtrahend.patches.iter().any(|sub_patch| {
let hash1 = subtrahend
.hash_from_rel_patch(sub_patch)
.expect("getting hash from subtrahend patch");
let hash2 = self
.hash_from_rel_patch(our_patch)
.expect("getting hash from our patch");
hash1 == hash2
});
if !found_in_sub {
new_patches.push(our_patch.clone());
}
}
Ok(Self {
patches: new_patches,
workdir: self.workdir.clone(),
})
}
pub fn union(&self, other: &Self) -> Result<Self> {
self.union_helper(
other,
|p| self.hash_from_rel_patch(p),
|p| other.hash_from_rel_patch(p),
)
}
fn union_helper(
&self,
other: &Self,
our_hash_f: impl Fn(&PatchDictSchema) -> Result<String>,
their_hash_f: impl Fn(&PatchDictSchema) -> Result<String>,
) -> Result<Self> {
// 1. For all our patches:
// a. If there exists a matching patch hash from `other`:
// i. Create a new patch with merged platform info,
// ii. add the new patch to our new collection.
// iii. Mark the other patch as "merged"
// b. Otherwise, copy our patch to the new collection
// 2. For all unmerged patches from the `other`
// a. Copy their patch into the new collection
let mut combined_patches = Vec::new();
let mut other_merged = vec![false; other.patches.len()];
// 1.
for p in &self.patches {
let our_hash = our_hash_f(p)?;
let mut found = false;
// a.
for (idx, merged) in other_merged.iter_mut().enumerate() {
if !*merged {
let other_p = &other.patches[idx];
let their_hash = their_hash_f(other_p)?;
if our_hash == their_hash {
// i.
let new_platforms =
p.platforms.union(&other_p.platforms).cloned().collect();
// ii.
combined_patches.push(PatchDictSchema {
rel_patch_path: p.rel_patch_path.clone(),
start_version: p.start_version,
end_version: p.end_version,
platforms: new_platforms,
metadata: p.metadata.clone(),
});
// iii.
*merged = true;
found = true;
break;
}
}
}
// b.
if !found {
combined_patches.push(p.clone());
}
}
// 2.
// Add any remaining, other-only patches.
for (idx, merged) in other_merged.iter().enumerate() {
if !*merged {
combined_patches.push(other.patches[idx].clone());
}
}
Ok(Self {
workdir: self.workdir.clone(),
patches: combined_patches,
})
}
/// Copy all patches from this collection into another existing collection, and write that
/// to the existing collection's file.
pub fn transpose_write(&self, existing_collection: &mut Self) -> Result<()> {
for p in &self.patches {
let original_file_path = self.workdir.join(&p.rel_patch_path);
let copy_file_path = existing_collection.workdir.join(&p.rel_patch_path);
copy_create_parents(&original_file_path, &copy_file_path)?;
existing_collection.patches.push(p.clone());
}
existing_collection.write_patches_json("PATCHES.json")
}
/// Write out the patch collection contents to a PATCHES.json file.
fn write_patches_json(&self, filename: &str) -> Result<()> {
let write_path = self.workdir.join(filename);
let mut new_patches_file = File::create(&write_path)
.with_context(|| format!("writing to {}", write_path.display()))?;
new_patches_file.write_all(self.serialize_patches()?.as_bytes())?;
Ok(())
}
pub fn serialize_patches(&self) -> Result<String> {
let mut serialization_buffer = Vec::<u8>::new();
// Four spaces to indent json serialization.
let mut serializer = serde_json::Serializer::with_formatter(
&mut serialization_buffer,
serde_json::ser::PrettyFormatter::with_indent(b" "),
);
self.patches
.serialize(&mut serializer)
.context("serializing patches to JSON")?;
// Append a newline at the end if not present. This is necessary to get
// past some pre-upload hooks.
if serialization_buffer.last() != Some(&b'\n') {
serialization_buffer.push(b'\n');
}
Ok(std::str::from_utf8(&serialization_buffer)?.to_string())
}
fn hash_from_rel_patch(&self, patch: &PatchDictSchema) -> Result<String> {
hash_from_patch_path(&self.workdir.join(&patch.rel_patch_path))
}
}
/// Get the hash from the patch file contents.
///
/// Not every patch file actually contains its own hash,
/// we must compute the hash ourselves when it's not found.
fn hash_from_patch(patch_contents: impl Read) -> Result<String> {
let mut reader = BufReader::new(patch_contents);
let mut buf = String::new();
reader.read_line(&mut buf)?;
let mut first_line_iter = buf.trim().split(' ').fuse();
let (fst_word, snd_word) = (first_line_iter.next(), first_line_iter.next());
if let (Some("commit" | "From"), Some(hash_str)) = (fst_word, snd_word) {
// If the first line starts with either "commit" or "From", the following
// text is almost certainly a commit hash.
Ok(hash_str.to_string())
} else {
// This is an annoying case where the patch isn't actually a commit.
// So we'll hash the entire file, and hope that's sufficient.
let mut hasher = Sha256::new();
hasher.update(&buf); // Have to hash the first line.
reader.read_to_string(&mut buf)?;
hasher.update(buf); // Hash the rest of the file.
let sha = hasher.finalize();
Ok(format!("{:x}", &sha))
}
}
fn hash_from_patch_path(patch: &Path) -> Result<String> {
let f = File::open(patch)?;
hash_from_patch(f)
}
/// Copy a file from one path to another, and create any parent
/// directories along the way.
fn copy_create_parents(from: &Path, to: &Path) -> Result<()> {
let to_parent = to
.parent()
.with_context(|| format!("getting parent of {}", to.display()))?;
if !to_parent.exists() {
std::fs::create_dir_all(to_parent)?;
}
copy(&from, &to)
.with_context(|| format!("copying file from {} to {}", &from.display(), &to.display()))?;
Ok(())
}
#[cfg(test)]
mod test {
use super::*;
/// Test we can extract the hash from patch files.
#[test]
fn test_hash_from_patch() {
// Example git patch from Gerrit
let desired_hash = "004be4037e1e9c6092323c5c9268acb3ecf9176c";
let test_file_contents = "commit 004be4037e1e9c6092323c5c9268acb3ecf9176c\n\
Author: An Author <some_email>\n\
Date: Thu Aug 6 12:34:16 2020 -0700";
assert_eq!(
&hash_from_patch(test_file_contents.as_bytes()).unwrap(),
desired_hash
);
// Example git patch from upstream
let desired_hash = "6f85225ef3791357f9b1aa097b575b0a2b0dff48";
let test_file_contents = "From 6f85225ef3791357f9b1aa097b575b0a2b0dff48\n\
Mon Sep 17 00:00:00 2001\n\
From: Another Author <another_email>\n\
Date: Wed, 18 Aug 2021 15:03:03 -0700";
assert_eq!(
&hash_from_patch(test_file_contents.as_bytes()).unwrap(),
desired_hash
);
}
#[test]
fn test_union() {
let patch1 = PatchDictSchema {
start_version: Some(0),
end_version: Some(1),
rel_patch_path: "a".into(),
metadata: None,
platforms: BTreeSet::from(["x".into()]),
};
let patch2 = PatchDictSchema {
rel_patch_path: "b".into(),
platforms: BTreeSet::from(["x".into(), "y".into()]),
..patch1.clone()
};
let patch3 = PatchDictSchema {
platforms: BTreeSet::from(["z".into(), "x".into()]),
..patch1.clone()
};
let collection1 = PatchCollection {
workdir: PathBuf::new(),
patches: vec![patch1, patch2],
};
let collection2 = PatchCollection {
workdir: PathBuf::new(),
patches: vec![patch3],
};
let union = collection1
.union_helper(
&collection2,
|p| Ok(p.rel_patch_path.to_string()),
|p| Ok(p.rel_patch_path.to_string()),
)
.expect("could not create union");
assert_eq!(union.patches.len(), 2);
assert_eq!(
union.patches[0].platforms.iter().collect::<Vec<&String>>(),
vec!["x", "z"]
);
assert_eq!(
union.patches[1].platforms.iter().collect::<Vec<&String>>(),
vec!["x", "y"]
);
}
#[test]
fn test_union_empties() {
let patch1 = PatchDictSchema {
start_version: Some(0),
end_version: Some(1),
rel_patch_path: "a".into(),
metadata: None,
platforms: Default::default(),
};
let collection1 = PatchCollection {
workdir: PathBuf::new(),
patches: vec![patch1.clone()],
};
let collection2 = PatchCollection {
workdir: PathBuf::new(),
patches: vec![patch1],
};
let union = collection1
.union_helper(
&collection2,
|p| Ok(p.rel_patch_path.to_string()),
|p| Ok(p.rel_patch_path.to_string()),
)
.expect("could not create union");
assert_eq!(union.patches.len(), 1);
assert_eq!(union.patches[0].platforms.len(), 0);
}
}