blob: 8d6bbf9710936048c8230eab33afee4155c00ba6 [file] [log] [blame]
//! The build graph, a graph between files and commands.
use anyhow::{bail, Context};
use rustc_hash::{FxHashMap, FxHasher, FxBuildHasher};
use crate::{
canon::shell_escape,
concurrent_linked_list::ConcurrentLinkedList,
densemap::{self, DenseMap},
eval::EvalString,
hash::BuildHash,
load::{Scope, ScopePosition},
smallmap::SmallMap,
};
use std::time::SystemTime;
use std::{collections::HashMap, sync::Arc};
use std::{
hash::BuildHasherDefault,
path::{Path, PathBuf},
sync::Mutex,
};
/// Id for Build nodes in the Graph.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
pub struct BuildId(u32);
impl densemap::Index for BuildId {
fn index(&self) -> usize {
self.0 as usize
}
}
impl From<usize> for BuildId {
fn from(u: usize) -> BuildId {
BuildId(u as u32)
}
}
/// A single file referenced as part of a build.
#[derive(Debug, Default)]
pub struct File {
/// Canonical path to the file.
pub name: Arc<String>,
/// The Build that generates this file, if any.
pub input: Mutex<Option<BuildId>>,
/// The Builds that depend on this file as an input.
pub dependents: ConcurrentLinkedList<BuildId>,
}
impl File {
pub fn path(&self) -> &Path {
Path::new(self.name.as_ref())
}
}
/// A textual location within a build.ninja file, used in error messages.
#[derive(Debug, Clone)]
pub struct FileLoc {
pub filename: Arc<PathBuf>,
pub line: usize,
}
impl std::fmt::Display for FileLoc {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
write!(f, "{}:{}", self.filename.display(), self.line)
}
}
#[derive(Debug, Clone, Hash)]
pub struct RspFile {
pub path: std::path::PathBuf,
pub content: String,
}
/// Input files to a Build.
#[derive(Debug)]
pub struct BuildIns {
/// Internally we stuff explicit/implicit/order-only ins all into one Vec.
/// This is mostly to simplify some of the iteration and is a little more
/// memory efficient than three separate Vecs, but it is kept internal to
/// Build and only exposed via methods on Build.
pub ids: Vec<Arc<File>>,
pub explicit: usize,
pub implicit: usize,
pub order_only: usize,
// validation count implied by other counts.
// pub validation: usize,
}
/// Output files from a Build.
#[derive(Debug)]
pub struct BuildOuts {
/// Similar to ins, we keep both explicit and implicit outs in one Vec.
pub ids: Vec<Arc<File>>,
pub explicit: usize,
pub implicit: usize,
}
impl BuildOuts {
/// CMake seems to generate build files with the same output mentioned
/// multiple times in the outputs list. Given that Ninja accepts these,
/// this function removes duplicates from the output list.
pub fn remove_duplicates(&mut self) {
let mut ids = Vec::new();
for (i, id) in self.ids.iter().enumerate() {
if self.ids[0..i]
.iter()
.any(|prev| std::ptr::eq(prev.as_ref(), id.as_ref()))
{
// Skip over duplicate.
if i < self.explicit {
self.explicit -= 1;
}
continue;
}
ids.push(id.clone());
}
self.ids = ids;
}
pub fn num_outs(&self) -> usize {
self.explicit + self.implicit
}
}
#[cfg(test)]
mod tests {
use super::*;
fn assert_file_arc_vecs_equal(a: Vec<Arc<File>>, b: Vec<Arc<File>>) {
for (x, y) in a.into_iter().zip(b.into_iter()) {
if !Arc::ptr_eq(&x, &y) {
panic!("File vecs not equal");
}
}
}
#[test]
fn remove_dups_explicit() {
let file1 = Arc::new(File::default());
let file2 = Arc::new(File::default());
let mut outs = BuildOuts {
ids: vec![file1.clone(), file1.clone(), file2.clone()],
explicit: 2,
implicit: 0,
};
outs.remove_duplicates();
assert_file_arc_vecs_equal(outs.ids, vec![file1, file2]);
assert_eq!(outs.explicit, 1);
}
#[test]
fn remove_dups_implicit() {
let file1 = Arc::new(File::default());
let file2 = Arc::new(File::default());
let mut outs = BuildOuts {
ids: vec![file1.clone(), file2.clone(), file1.clone()],
explicit: 2,
implicit: 0,
};
outs.remove_duplicates();
assert_file_arc_vecs_equal(outs.ids, vec![file1, file2]);
assert_eq!(outs.explicit, 2);
}
}
/// A variable lookup environment for magic $in/$out variables.
struct BuildImplicitVars<'a> {
explicit_ins: &'a [Arc<File>],
explicit_outs: &'a [Arc<File>],
escape: bool,
}
impl<'text> crate::eval::Env for BuildImplicitVars<'text> {
fn evaluate_var(
&self,
result: &mut String,
var: &str,
envs: &[&dyn crate::eval::Env],
scope: &Scope,
position: ScopePosition,
) {
let mut common = |files: &[Arc<File>], sep: &'static str| {
for (i, file) in files.iter().enumerate() {
if i > 0 {
result.push_str(sep);
}
if self.escape {
shell_escape(&file.name, result);
} else {
result.push_str(&file.name);
}
}
};
match var {
"in" => common(self.explicit_ins, " "),
"in_newline" => common(self.explicit_ins, "\n"),
"out" => common(self.explicit_outs, " "),
"out_newline" => common(self.explicit_outs, "\n"),
_ => {
if let Some(env) = envs.first() {
env.evaluate_var(result, var, &envs[1..], scope, position);
} else {
scope.evaluate(result, var, position);
}
}
}
}
}
/// A single build action, generating File outputs from File inputs with a command.
#[derive(Debug)]
pub struct Build {
pub id: BuildId,
/// The scope that this build is part of. Used when evaluating the build's
/// bindings.
pub scope: Option<Arc<Scope>>,
/// The position of this build in the scope. Used when evaluating the
/// build's bindings.
pub scope_position: ScopePosition,
/// The unevalated output/input files. These strings really have a lifetime
/// of 'text, but we use 'static so that we don't need to add 'text to the
/// build itself. We unsafely cast 'text strings to 'static. The strings
/// are evaluated and this vec is cleared before the lifetime of 'text is
/// over.
pub unevaluated_outs_and_ins: Vec<EvalString<&'static str>>,
pub rule: String,
// The unevaluated variable bindings. They're stored unevalated so that
// we don't have to evaluate all bindings on all builds.
pub bindings: SmallMap<String, EvalString<String>>,
/// Source location this Build was declared.
pub location: FileLoc,
/// Input files.
pub ins: BuildIns,
/// Additional inputs discovered from a previous build.
discovered_ins: Vec<Arc<File>>,
/// Output files.
pub outs: BuildOuts,
/// Indicates the build does not generate the output files (not a standard ninja feature).
pub phony_output: bool
}
impl Build {
pub fn new(
rule: String,
bindings: SmallMap<String, EvalString<String>>,
location: FileLoc,
ins: BuildIns,
outs: BuildOuts,
unevaluated_outs_and_ins: Vec<EvalString<&'static str>>,
phony_output: bool,
) -> Self {
Build {
id: BuildId::from(0),
rule,
scope: None,
scope_position: ScopePosition(0),
bindings,
location,
ins,
discovered_ins: Vec::new(),
outs,
unevaluated_outs_and_ins,
phony_output,
}
}
/// Input paths that appear in `$in`.
pub fn explicit_ins(&self) -> &[Arc<File>] {
&self.ins.ids[0..self.ins.explicit]
}
/// Input paths that, if changed, invalidate the output.
/// Note this omits discovered_ins, which also invalidate the output.
pub fn dirtying_ins(&self) -> &[Arc<File>] {
&self.ins.ids[0..(self.ins.explicit + self.ins.implicit)]
}
/// Inputs that are needed before building.
/// Distinct from dirtying_ins in that it includes order-only dependencies.
/// Note that we don't order on discovered_ins, because they're not allowed to
/// affect build order.
pub fn ordering_ins(&self) -> &[Arc<File>] {
&self.ins.ids[0..(self.ins.order_only + self.ins.explicit + self.ins.implicit)]
}
/// Inputs that are needed before validating information.
/// Validation inputs will be built whenever this Build is built, but this Build will not
/// wait for them to complete before running. The validation inputs can fail to build, which
/// will cause the overall build to fail.
pub fn validation_ins(&self) -> &[Arc<File>] {
&self.ins.ids[(self.ins.order_only + self.ins.explicit + self.ins.implicit)..]
}
// Returns the MTime of the newest dirtying in to the build. If the build
// has no dirtying inputs, an MTime for the unix epoch is returned. If any
// input is missing, an mtime of missing will be returned.
pub fn newest_dirtying_in(&self, file_state: &FileState) -> anyhow::Result<MTime> {
let mut result = MTime::Stamp(SystemTime::UNIX_EPOCH);
for i in self.dirtying_ins() {
match file_state.get(&i) {
Some(MTime::Missing) => result = MTime::Missing,
Some(MTime::Stamp(t)) => match result {
MTime::Missing => {},
MTime::Stamp(t2) if t > t2 => result = MTime::Stamp(t),
MTime::Stamp(_) => {},
},
None => bail!("Expected all inputs to be statted already, but {} wasn't", i.name),
}
}
Ok(result)
}
fn vecs_of_arcs_eq<T>(a: &Vec<Arc<T>>, b: &Vec<Arc<T>>) -> bool {
if a.len() != b.len() {
return false;
}
for (x, y) in a.iter().zip(b.iter()) {
if !Arc::ptr_eq(x, y) {
return false;
}
}
return true;
}
/// Potentially update discovered_ins with a new set of deps, returning true if they changed.
pub fn update_discovered(&mut self, deps: Vec<Arc<File>>) -> bool {
if Self::vecs_of_arcs_eq(&deps, &self.discovered_ins) {
false
} else {
self.set_discovered_ins(deps);
true
}
}
pub fn set_discovered_ins(&mut self, deps: Vec<Arc<File>>) {
self.discovered_ins = deps;
}
/// Input paths that were discovered after building, for use in the next build.
pub fn discovered_ins(&self) -> &[Arc<File>] {
&self.discovered_ins
}
/// Output paths that appear in `$out`.
pub fn explicit_outs(&self) -> &[Arc<File>] {
&self.outs.ids[0..self.outs.explicit]
}
/// Output paths that are updated when the build runs.
pub fn outs(&self) -> &[Arc<File>] {
&self.outs.ids
}
fn get_binding(&self, key: &str, escape_special_vars: bool) -> Option<String> {
let scope = self.scope.as_ref().unwrap();
// interestingly, the lookup order is to start with the build -> scope,
// but if the build doesn't have the binding, fall back to
// rule -> implicits -> build -> scope. A more logical order might
// always be rule -> (implicits -> ) build -> scope, which does work
// for the android build, but gets incorrect descriptions.
// The description from the rule is used instead of from the build.
if let Some(x) = self.bindings.get(key) {
return Some(x.evaluate(&[], scope, self.scope_position));
}
let implicit_vars = BuildImplicitVars {
explicit_ins: &self.ins.ids[..self.ins.explicit],
explicit_outs: &self.outs.ids[..self.outs.explicit],
escape: escape_special_vars,
};
let rule = scope.get_rule(&self.rule, self.scope_position).unwrap();
Some(rule.vars.get(key)?.evaluate(
&[&implicit_vars, &self.bindings],
scope,
self.scope_position
))
}
pub fn get_rspfile(&self) -> anyhow::Result<Option<RspFile>> {
let rspfile_path = self.get_binding("rspfile", false);
let rspfile_content = self.get_binding("rspfile_content", true);
let rspfile = match (rspfile_path, rspfile_content) {
(None, None) => None,
(Some(path), Some(content)) => Some(RspFile {
path: std::path::PathBuf::from(path),
content,
}),
_ => bail!("rspfile and rspfile_content need to be both specified"),
};
Ok(rspfile)
}
pub fn get_parse_showincludes(&self) -> anyhow::Result<bool> {
Ok(match self.get_binding("deps", true).as_deref() {
None => false,
Some("gcc") => false,
Some("msvc") => true,
Some(other) => bail!("invalid deps attribute {:?}", other),
})
}
pub fn get_cmdline(&self) -> Option<String> {
self.get_binding("command", true)
}
pub fn get_description(&self) -> Option<String> {
self.get_binding("description", false)
}
pub fn get_depfile(&self) -> Option<String> {
self.get_binding("depfile", false)
}
pub fn get_pool(&self) -> Option<String> {
self.get_binding("pool", true)
}
}
/// The build graph: owns Files/Builds and maps BuildIds to them.
#[derive(Default)]
pub struct Graph {
pub builds: DenseMap<BuildId, Box<Build>>,
pub files: GraphFiles,
}
/// Files identified by their string names.
/// Split from Graph for lifetime reasons.
#[derive(Default)]
pub struct GraphFiles {
by_name: dashmap::DashMap<Arc<String>, Arc<File>, BuildHasherDefault<FxHasher>>,
}
impl Graph {
pub fn new(builds: Vec<Box<Build>>, files: GraphFiles) -> anyhow::Result<Self> {
let result = Graph {
builds: DenseMap::from_vec(builds),
files,
};
Ok(result)
}
pub fn initialize_build(build: &mut Build) -> anyhow::Result<()> {
let new_id = build.id;
let mut fixup_dups = false;
for input in &build.ins.ids {
input.dependents.prepend(new_id);
}
for f in &build.outs.ids {
let mut input = f.input.lock().unwrap();
match *input {
Some(prev) if prev == new_id => {
fixup_dups = true;
println!(
"n2: warn: {}: {:?} is repeated in output list",
build.location, f.name,
);
}
Some(_) => {
let location = build.location.clone();
anyhow::bail!(
"{}: {:?} is already an output of another build",
location,
f.name,
);
}
None => *input = Some(new_id),
}
}
if fixup_dups {
build.outs.remove_duplicates();
}
Ok(())
}
}
impl GraphFiles {
/// Look up a file by its name. Name must have been canonicalized already.
pub fn lookup(&self, file: String) -> Option<Arc<File>> {
self.by_name.get(&Arc::new(file)).map(|x| x.clone())
}
/// Look up a file by its name, adding it if not already present.
/// Name must have been canonicalized already. Only accepting an owned
/// string allows us to avoid a string copy and a hashmap lookup when we
/// need to create a new id, but would also be possible to create a version
/// of this function that accepts string references that is more optimized
/// for the case where the entry already exists. But so far, all of our
/// usages of this function have an owned string easily accessible anyways.
pub fn id_from_canonical(&self, file: String) -> Arc<File> {
let file = Arc::new(file);
match self.by_name.entry(file) {
dashmap::mapref::entry::Entry::Occupied(o) => o.get().clone(),
dashmap::mapref::entry::Entry::Vacant(v) => {
let mut f = File::default();
f.name = v.key().clone();
let f = Arc::new(f);
v.insert(f.clone());
f
}
}
}
pub fn all_files(&self) -> impl Iterator<Item = Arc<File>> + '_ {
self.by_name.iter().map(|x| x.clone())
}
pub fn num_files(&self) -> usize {
self.by_name.len()
}
}
/// MTime info gathered for a file. This also models "file is absent".
/// It's not using an Option<> just because it makes the code using it easier
/// to follow.
#[derive(Copy, Clone, Debug, PartialEq)]
pub enum MTime {
Missing,
Stamp(SystemTime),
}
/// stat() an on-disk path, producing its MTime.
pub fn stat(path: &Path) -> std::io::Result<MTime> {
// TODO: On Windows, use FindFirstFileEx()/FindNextFile() to get timestamps per
// directory, for better stat perf.
metadata_to_mtime(std::fs::metadata(path))
}
/// lstat() an on-disk path, producing its MTime.
pub fn lstat(path: &Path) -> std::io::Result<MTime> {
metadata_to_mtime(std::fs::symlink_metadata(path))
}
fn metadata_to_mtime(meta: std::io::Result<std::fs::Metadata>) -> std::io::Result<MTime> {
Ok(match meta {
Ok(meta) => MTime::Stamp(meta.modified().unwrap()),
Err(err) => {
if err.kind() == std::io::ErrorKind::NotFound {
MTime::Missing
} else {
return Err(err);
}
}
})
}
/// Gathered state of on-disk files.
/// Due to discovered deps this map may grow after graph initialization.
pub struct FileState(FxHashMap<*const File, Option<MTime>>);
impl FileState {
pub fn new(graph: &Graph) -> Self {
let hm = HashMap::with_capacity_and_hasher(
graph.files.num_files(),
FxBuildHasher::default(),
);
FileState(hm)
}
pub fn get(&self, id: &File) -> Option<MTime> {
self.0.get(&(id as *const File)).copied().flatten()
}
// set_if_missing is intended for phonies to provie a fake mtime for their
// outputs, as their outputs are not real files like in normal builds
pub fn set_if_missing(&mut self, id: &File, mtime: MTime) {
match self.0.entry(id as *const File) {
std::collections::hash_map::Entry::Occupied(mut e) => match e.get() {
Some(MTime::Missing) => { e.insert(Some(mtime)); },
Some(MTime::Stamp(_)) => {},
None => { e.insert(Some(mtime)); },
},
std::collections::hash_map::Entry::Vacant(e) => { e.insert(Some(mtime)); },
};
}
pub fn stat(&mut self, id: &File, path: &Path) -> anyhow::Result<MTime> {
let mtime = if id.input.lock().unwrap().is_some() {
lstat(path).with_context(|| format!("lstat: {:?}", path))?
} else {
stat(path).with_context(|| format!("stat: {:?}", path))?
};
self.0.insert(id as *const File, Some(mtime));
Ok(mtime)
}
}
#[derive(Default)]
pub struct Hashes(HashMap<BuildId, BuildHash>);
impl Hashes {
pub fn set(&mut self, id: BuildId, hash: BuildHash) {
self.0.insert(id, hash);
}
pub fn get(&self, id: BuildId) -> Option<BuildHash> {
self.0.get(&id).copied()
}
}
#[test]
fn stat_mtime_resolution() {
use std::time::Duration;
let temp_dir = tempfile::tempdir().unwrap();
let filename = temp_dir.path().join("dummy");
// Write once and stat.
std::fs::write(&filename, "foo").unwrap();
let mtime1 = match stat(&filename).unwrap() {
MTime::Stamp(mtime) => mtime,
_ => panic!("File not found: {}", filename.display()),
};
// Sleep for a short interval.
std::thread::sleep(std::time::Duration::from_millis(10));
// Write twice and stat.
std::fs::write(&filename, "foo").unwrap();
let mtime2 = match stat(&filename).unwrap() {
MTime::Stamp(mtime) => mtime,
_ => panic!("File not found: {}", filename.display()),
};
let diff = mtime2.duration_since(mtime1).unwrap();
assert!(diff > Duration::ZERO);
assert!(diff < Duration::from_millis(500));
}