blob: 200e11f3e6cd4d3e34952ce55b12c084d20b503a [file] [log] [blame]
//! Build runner, choosing and executing tasks as determined by out of date inputs.
use log::{debug, log_enabled};
use crate::trace::CompleteEvent;
use crate::{
canon::canon_path, db, densemap::DenseMap, graph::*, hash, process, progress,
progress::Progress, signal, smallmap::SmallMap, task, trace,
};
use std::collections::HashSet;
use std::collections::VecDeque;
use std::sync::Arc;
/// Build steps go through this sequence of states.
/// See "Build states" in the design notes.
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum BuildState {
/// Default initial state, for Builds unneeded by the current build.
Unknown,
/// Builds we want to ensure are up to date, but which aren't ready yet.
Want,
/// Builds whose dependencies are up to date and are ready to be
/// checked. This is purely a function of whether all builds before
/// it have have run, and is independent of any file state.
///
/// Preconditions:
/// - generated inputs: have already been stat()ed as part of completing
/// the step that generated those inputs
/// - non-generated inputs: may not have yet stat()ed, so doing those
/// stat()s is part of the work of running these builds
Ready,
/// Builds who have been determined not up to date and which are ready
/// to be executed.
Queued,
/// Currently executing.
Running,
/// Finished executing successfully.
Done,
/// Finished executing but failed.
Failed,
}
/// Counters that track builds in each state, excluding phony builds.
/// This is only for display to the user and should not be used as a source of
/// truth for tracking progress.
/// Only covers builds not in the "unknown" state, which means it's only builds
/// that are considered part of the current build.
#[derive(Clone, Debug, Default)]
pub struct StateCounts {
num_want: usize,
num_ready: usize,
num_queued: usize,
num_running: usize,
num_done: usize,
num_failed: usize,
// skipped is not a real state. This represents the number of builds that
// went directly from ready to done, either because:
// - The build was a phony
// - The build and all its dependencies had already been completed in a
// prior build.
// - The build's dependencies reran, but produced the same result so
// this build didn't need to rerun. This is what "restat = true" gives
// you in regular ninja, but in n2, restat is implicit.
num_skipped: usize,
}
impl StateCounts {
pub fn add(&mut self, state: BuildState, delta: isize) {
let initial = self.get(state) as isize;
match state {
BuildState::Unknown => panic!("unexpected state"),
BuildState::Want => self.num_want = (initial + delta) as usize,
BuildState::Ready => self.num_ready = (initial + delta) as usize,
BuildState::Queued => self.num_queued = (initial + delta) as usize,
BuildState::Running => self.num_running = (initial + delta) as usize,
BuildState::Done => self.num_done = (initial + delta) as usize,
BuildState::Failed => self.num_failed = (initial + delta) as usize,
};
}
pub fn get(&self, state: BuildState) -> usize {
match state {
BuildState::Unknown => panic!("unexpected state"),
BuildState::Want => self.num_want,
BuildState::Ready => self.num_ready,
BuildState::Queued => self.num_queued,
BuildState::Running => self.num_running,
BuildState::Done => self.num_done,
BuildState::Failed => self.num_failed,
}
}
pub fn add_skipped(&mut self) {
self.num_skipped += 1;
}
pub fn total(&self) -> usize {
self.num_want
+ self.num_ready
+ self.num_queued
+ self.num_running
+ self.num_done
+ self.num_failed
}
pub fn total_except_skipped(&self) -> usize {
self.total() - self.num_skipped
}
}
/// Pools gather collections of running builds.
/// Each running build is running "in" a pool; there's a default unbounded
/// pool for builds that don't specify one.
/// See "Tracking build state" in the design notes.
#[derive(Debug)]
struct PoolState {
/// A queue of builds that are ready to be executed in this pool.
queued: VecDeque<BuildId>,
/// The number of builds currently running in this pool.
running: usize,
/// The total depth of the pool. 0 means unbounded.
depth: usize,
}
impl PoolState {
fn new(depth: usize) -> Self {
PoolState {
queued: VecDeque::new(),
running: 0,
depth,
}
}
}
/// BuildStates tracks progress of each Build step through the build.
/// See "Tracking build state" in the design notes.
struct BuildStates {
states: DenseMap<BuildId, BuildState>,
/// Counts of builds in each state.
counts: StateCounts,
/// Total number of builds that haven't been driven to completion
/// (done or failed).
total_pending: usize,
/// Builds in the ready state, stored redundantly for quick access.
ready: VecDeque<BuildId>,
/// Named pools of queued and running builds.
/// Builds otherwise default to using an unnamed infinite pool.
pools: SmallMap<String, PoolState>,
}
impl BuildStates {
fn new(size: BuildId, depths: SmallMap<String, usize>) -> Self {
let mut pools = SmallMap::default();
// The implied default pool.
pools.insert(String::from(""), PoolState::new(0));
// TODO: the console pool is just a depth-1 pool for now.
pools.insert(String::from("console"), PoolState::new(1));
for (name, depth) in depths.into_iter() {
pools.insert(name, PoolState::new(depth));
}
BuildStates {
states: DenseMap::new_sized(size, BuildState::Unknown),
counts: StateCounts::default(),
total_pending: 0,
ready: VecDeque::new(),
pools,
}
}
fn get(&self, id: BuildId) -> BuildState {
self.states[id]
}
fn set(&mut self, id: BuildId, build: &Build, state: BuildState) {
// This function is called on all state transitions.
// We get 'prev', the previous state, and 'state', the new state.
let prev = std::mem::replace(&mut self.states[id], state);
if log_enabled!(log::Level::Debug) {
let cmd = build.get_cmdline().unwrap_or("<no command>".to_owned());
debug!("build `{}` transitioned from state {:?} to {:?}", cmd, prev, state);
}
// We skip user-facing counters for phony builds.
let skip_ui_count = build.get_cmdline().is_none();
// println!("{:?} {:?}=>{:?} {:?}", id, prev, state, self.counts);
if prev == BuildState::Unknown {
self.total_pending += 1;
} else {
if prev == BuildState::Running {
self.get_pool(build).unwrap().running -= 1;
}
if !skip_ui_count {
self.counts.add(prev, -1);
}
}
match state {
BuildState::Ready => {
self.ready.push_back(id);
}
BuildState::Running => {
// Trace instants render poorly in the old Chrome UI, and
// not at all in speedscope or Perfetto.
// if self.counts.get(BuildState::Running) == 0 {
// trace::if_enabled(|t| t.write_instant("first build"));
// }
self.get_pool(build).unwrap().running += 1;
}
BuildState::Done | BuildState::Failed => {
self.total_pending -= 1;
}
_ => {}
};
if !skip_ui_count {
self.counts.add(state, 1);
if (prev == BuildState::Ready || prev == BuildState::Want) && state == BuildState::Done {
self.counts.add_skipped();
}
}
/*
This is too expensive to log on every individual state change...
trace::if_enabled(|t| {
t.write_counts(
"builds",
[
("want", self.counts.get(BuildState::Want)),
("ready", self.counts.get(BuildState::Ready)),
("queued", self.counts.get(BuildState::Queued)),
("running", self.counts.get(BuildState::Running)),
("done", self.counts.get(BuildState::Done)),
]
.iter(),
)
});*/
}
fn unfinished(&self) -> bool {
self.total_pending > 0
}
/// Visits a BuildId that is an input to the desired output.
/// Will recursively visit its own inputs.
fn want_build<'a>(
&mut self,
graph: &Graph,
stack: &mut Vec<Arc<File>>,
id: BuildId,
) -> anyhow::Result<()> {
if self.get(id) != BuildState::Unknown {
return Ok(()); // Already visited.
}
let build = &graph.builds[id];
self.set(id, build, BuildState::Want);
// Any Build that doesn't depend on an output of another Build is ready.
let mut ready = true;
for file in build.ordering_ins() {
self.want_file(graph, stack, file.clone())?;
ready = ready && file.input.lock().unwrap().is_none();
}
for file in build.validation_ins() {
// This build doesn't technically depend on the validation inputs, so
// allocate a new stack. Validation inputs could in theory depend on this build's
// outputs.
let mut stack = Vec::new();
self.want_file(graph, &mut stack, file.clone())?;
}
if ready {
self.set(id, build, BuildState::Ready);
}
Ok(())
}
/// Visits a FileId that is an input to the desired output.
/// Will recursively visit its own inputs.
pub fn want_file(
&mut self,
graph: &Graph,
stack: &mut Vec<Arc<File>>,
file: Arc<File>,
) -> anyhow::Result<()> {
// Check for a dependency cycle.
if let Some(cycle) = stack
.iter()
.position(|f| std::ptr::eq(f.as_ref(), file.as_ref()))
{
let mut err = "dependency cycle: ".to_string();
for file in stack[cycle..].iter() {
err.push_str(&format!("{} -> ", file.name));
}
err.push_str(&file.name);
anyhow::bail!(err);
}
let input_guard = file.input.lock().unwrap();
if let Some(bid) = *input_guard {
drop(input_guard);
stack.push(file.clone());
self.want_build(graph, stack, bid)?;
stack.pop();
}
Ok(())
}
pub fn pop_ready(&mut self) -> Option<BuildId> {
// Here is where we might consider prioritizing from among the available
// ready set.
self.ready.pop_front()
}
/// Look up a PoolState by name.
fn get_pool(&mut self, build: &Build) -> Option<&mut PoolState> {
let owned_name = build.get_pool();
let name = owned_name.as_deref().unwrap_or("");
for (key, pool) in self.pools.iter_mut() {
if key == name {
return Some(pool);
}
}
None
}
/// Mark a build as ready to run.
/// May fail if the build references an unknown pool.
pub fn enqueue(&mut self, id: BuildId, build: &Build) -> anyhow::Result<()> {
self.set(id, build, BuildState::Queued);
let pool = self.get_pool(build).ok_or_else(|| {
anyhow::anyhow!(
"{}: unknown pool {:?}",
build.location,
// Unnamed pool lookups always succeed, this error is about
// named pools.
build.get_pool().as_ref().unwrap()
)
})?;
pool.queued.push_back(id);
Ok(())
}
/// Pop a ready to run queued build.
pub fn pop_queued(&mut self) -> Option<BuildId> {
for (_, pool) in self.pools.iter_mut() {
if pool.depth == 0 || pool.running < pool.depth {
if let Some(id) = pool.queued.pop_front() {
return Some(id);
}
}
}
None
}
}
#[derive(Clone)]
pub struct Options {
pub failures_left: Option<usize>,
pub parallelism: usize,
/// When true, verbosely explain why targets are considered dirty.
pub explain: bool,
/// When true, just mark targets up to date without running anything.
pub adopt: bool,
}
pub struct Work<'a> {
graph: Graph,
db: db::Writer,
pub progress: &'a mut dyn Progress,
options: Options,
file_state: FileState,
last_hashes: Hashes,
build_states: BuildStates,
}
impl<'a> Work<'a> {
pub fn new(
graph: Graph,
last_hashes: Hashes,
db: db::Writer,
options: &Options,
progress: &'a mut dyn Progress,
pools: SmallMap<String, usize>,
) -> Self {
let file_state = FileState::new(&graph);
let build_count = graph.builds.next_id();
Work {
graph,
db,
progress,
options: options.clone(),
file_state,
last_hashes,
build_states: BuildStates::new(build_count, pools),
}
}
pub fn lookup(&mut self, name: &str) -> Option<Arc<File>> {
self.graph.files.lookup(canon_path(name))
}
pub fn want_file(&mut self, file: Arc<File>) -> anyhow::Result<()> {
let mut stack = Vec::new();
self.build_states.want_file(&self.graph, &mut stack, file)
}
pub fn want_every_file(&mut self, exclude: Option<Arc<File>>) -> anyhow::Result<()> {
for id in self.graph.files.all_files() {
if let Some(exclude) = exclude.as_ref() {
if std::ptr::eq(id.as_ref(), exclude.as_ref()) {
continue;
}
}
let mut stack = Vec::new();
self.build_states
.want_file(&self.graph, &mut stack, id.clone())?;
}
Ok(())
}
/// Check whether a given build is ready, generally after one of its inputs
/// has been updated.
fn recheck_ready(&self, id: BuildId) -> bool {
let build = &self.graph.builds[id];
// println!("recheck {:?} {} ({}...)", id, build.location, self.graph.file(build.outs()[0]).name);
for file in build.ordering_ins() {
match *file.input.lock().unwrap() {
None => {
// Only generated inputs contribute to readiness.
continue;
}
Some(id) => {
if self.build_states.get(id) != BuildState::Done {
// println!(" {:?} {} not done, it's {:?}", id, file.name, self.build_states.get(id));
return false;
}
}
}
}
// println!("{:?} now ready", id);
true
}
/// Return the id of any input file to a ready build step that is missing.
/// Assumes the input dependencies have already executed, but otherwise
/// may stat the file on disk.
fn ensure_input_files(
&mut self,
id: BuildId,
discovered: bool,
) -> anyhow::Result<Option<Arc<File>>> {
let build = &self.graph.builds[id];
let files = if discovered {
build.discovered_ins()
} else {
build.dirtying_ins()
};
for file in files {
if !build.phony_output {
if let Some(input_build) = *file.input.lock().unwrap() {
if self.graph.builds[input_build].phony_output {
anyhow::bail!(
"{}: real file '{}' depends on phony output '{}'",
build.location,
build.outs().first().map(|f| f.name.as_str()).unwrap_or("unknown"),
file.name
)
}
}
}
let mtime = match self.file_state.get(file.as_ref()) {
Some(mtime) => mtime,
None => {
if file.input.lock().unwrap().is_some() {
// This dep is generated by some other build step, but the
// build graph didn't cause that other build step to be
// visited first. This is an error in the build file.
// For example, imagine:
// build generated.h: codegen_headers ...
// build generated.stamp: stamp || generated.h
// build foo.o: cc ...
// If we deps discover that foo.o depends on generated.h,
// we must have some dependency path from foo.o to generated.h,
// either direct or indirect (like the stamp). If that
// were present, then we'd already have file_state for this
// file and wouldn't get here.
anyhow::bail!(
"{}: used generated file {}, but has no dependency path to it",
build.location,
file.name
);
}
self.file_state.stat(file.as_ref(), file.path())?
}
};
if mtime == MTime::Missing {
return Ok(Some(file.clone()));
}
}
Ok(None)
}
/// Given a task that just finished, record any discovered deps and hash.
/// Postcondition: all outputs have been stat()ed.
fn record_finished(&mut self, id: BuildId, result: task::TaskResult) -> anyhow::Result<()> {
// Clean up the deps discovered from the task.
let mut deps: Vec<Arc<File>> = Vec::new();
if let Some(names) = result.discovered_deps {
for name in names {
let fileid = self.graph.files.id_from_canonical(canon_path(name));
// Filter duplicates from the file list.
if deps
.iter()
.any(|x| std::ptr::eq(x.as_ref(), fileid.as_ref()))
{
continue;
}
// Filter out any deps that were already dirtying in the build file.
// Note that it's allowed to have a duplicate against an order-only
// dep; see `discover_existing_dep` test.
if self.graph.builds[id]
.dirtying_ins()
.iter()
.any(|x| std::ptr::eq(x.as_ref(), fileid.as_ref()))
{
continue;
}
deps.push(fileid);
}
}
// We may have discovered new deps, so ensure we have mtimes for those.
let deps_changed = self.graph.builds[id].update_discovered(deps);
if deps_changed {
if let Some(missing) = self.ensure_input_files(id, true)? {
anyhow::bail!(
"{}: depfile references nonexistent {}",
self.graph.builds[id].location,
missing.name
);
}
}
// Check for missing input files, which should not be possible
// because they would be requested to be built previously, and if their
// build failed, the following missing output file check would've
// failed.
let missing_input_file = self.graph.builds[id]
.dirtying_ins()
.iter()
.find(|file| self.file_state.get(file.as_ref()).unwrap() == MTime::Missing);
if let Some(missing) = missing_input_file {
anyhow::bail!(
"{}: input {} missing",
self.graph.builds[id].location,
missing.name
);
}
if self.graph.builds[id].get_cmdline().is_none() {
// This is a phony build. Instead of statting outputs (because they
// probably don't exist), copy the newest input mtime onto the
// output files.
let build = &self.graph.builds[id];
let mtime = build.newest_dirtying_in(&self.file_state)?;
for o in build.outs() {
self.file_state.set_if_missing(o, mtime);
}
} else if !self.graph.builds[id].phony_output {
// Update any cached state of the output files to reflect their new state.
// Also error out if any output files were not created by the command.
let missing_output_file = self.stat_all_outputs(id)?;
if let Some(missing) = missing_output_file {
anyhow::bail!(
"{}: output file missing after successful execution: {}",
self.graph.builds[id].location,
missing.name
);
}
}
let build = &self.graph.builds[id];
let hash = hash::hash_build(&self.file_state, build)?;
self.db.write_build(&self.graph, id, hash)?;
Ok(())
}
/// Given a build that just finished, check whether its dependent builds are now ready.
fn ready_dependents(&mut self, id: BuildId) {
let build = &self.graph.builds[id];
self.build_states.set(id, build, BuildState::Done);
let mut dependents = HashSet::new();
for file in build.outs() {
for &build_id in file.dependents.iter() {
if self.build_states.get(build_id) != BuildState::Want {
continue;
}
dependents.insert(build_id);
}
}
for id in dependents {
if !self.recheck_ready(id) {
continue;
}
self.build_states
.set(id, &self.graph.builds[id], BuildState::Ready);
}
}
/// Stat all the outputs of a build.
/// Called before it's run (for determining whether it's up to date) and
/// after (to see if it touched any outputs).
fn stat_all_outputs(&mut self, id: BuildId) -> anyhow::Result<Option<Arc<File>>> {
let build = &self.graph.builds[id];
let mut missing = None;
for file in build.outs() {
let mtime = self.file_state.stat(file.as_ref(), file.path())?;
if mtime == MTime::Missing && missing.is_none() {
missing = Some(file.clone());
}
}
Ok(missing)
}
/// Stat all the input/output files for a given build in anticipation of
/// deciding whether it needs to be run again.
/// Prereq: any dependent input is already generated.
/// Returns a build error if any required input files are missing.
/// Otherwise returns the missing id if any expected but not required files,
/// e.g. outputs, are missing, implying that the build needs to be executed.
fn check_build_files_missing(&mut self, id: BuildId) -> anyhow::Result<Option<Arc<File>>> {
// Ensure we have state for all input files.
if let Some(missing) = self.ensure_input_files(id, false)? {
if missing.input.lock().unwrap().is_none() {
let build = &self.graph.builds[id];
anyhow::bail!("{}: input {} missing", build.location, missing.name);
}
return Ok(Some(missing));
}
if let Some(missing) = self.ensure_input_files(id, true)? {
return Ok(Some(missing));
}
// Ensure we have state for all output files.
// We know this build is solely responsible for updating these outputs,
// and if we're checking if it's dirty we are visiting it the first
// time, so we stat unconditionally.
// This is looking at if the outputs are already present.
if let Some(missing) = self.stat_all_outputs(id)? {
return Ok(Some(missing));
}
// All files accounted for.
Ok(None)
}
/// Check a ready build for whether it needs to run, returning true if so.
/// Prereq: any dependent input is already generated.
fn check_build_dirty(&mut self, id: BuildId) -> anyhow::Result<bool> {
let file_missing = self.check_build_files_missing(id)?;
let build = &self.graph.builds[id];
// If any files are missing, the build is dirty without needing
// to consider hashes.
if let Some(missing) = file_missing {
if self.options.explain {
self.progress.log(&format!(
"explain: {}: input {} missing",
build.location, missing.name
));
}
return Ok(true);
}
// If we get here, all the relevant files are present and stat()ed,
// so compare the hash against the last hash.
// TODO: skip this whole function if no previous hash is present.
// More complex than just moving this block up, because we currently
// assume that we've always checked inputs after we've run a build.
let prev_hash = match self.last_hashes.get(id) {
None => {
if self.options.explain {
self.progress.log(&format!(
"explain: {}: no previous state known",
build.location
));
}
return Ok(true);
}
Some(prev_hash) => prev_hash,
};
let hash = hash::hash_build(&self.file_state, build)?;
if prev_hash != hash {
if self.options.explain {
self.progress
.log(&format!("explain: {}: manifest changed", build.location));
self.progress
.log(&hash::explain_hash_build(&self.file_state, build)?);
}
return Ok(true);
}
Ok(false)
}
/// Create the parent directories of a given list of fileids.
/// Used to create directories used for outputs.
/// TODO: do this within the thread executing the subtask?
fn create_parent_dirs(&self, ids: &[Arc<File>]) -> anyhow::Result<()> {
let mut dirs: Vec<&std::path::Path> = Vec::new();
for out in ids {
if let Some(parent) = out.path().parent() {
if dirs.iter().any(|&p| p == parent) {
continue;
}
std::fs::create_dir_all(parent)?;
dirs.push(parent);
}
}
Ok(())
}
/// Runs the build.
/// Returns the number of tasks executed on successful builds, or None on failed builds.
pub fn run(&mut self) -> anyhow::Result<Option<usize>> {
#[cfg(unix)]
signal::register_sigint();
debug!("started work::run");
let mut tasks_done = 0;
let mut tasks_failed = 0;
let mut runner = task::Runner::new(self.options.parallelism);
while self.build_states.unfinished() {
// Approach:
// - First make sure we're running as many queued tasks as the runner
// allows.
// - Next make sure we've finished or enqueued any tasks that are
// ready.
// - If either one of those made progress, loop, to ensure the other
// one gets to work from the result.
// - If neither made progress, wait for a task to complete and
// loop.
let mut made_progress = false;
while runner.can_start_more() {
let id = match self.build_states.pop_queued() {
Some(id) => id,
None => break,
};
let build = &self.graph.builds[id];
if build.get_cmdline().is_none() {
// This is a phony, don't run it, just go straight to the
// finished state.
self.record_finished(id, task::TaskResult {
termination: process::Termination::Success,
output: vec![],
discovered_deps: None,
})?;
self.ready_dependents(id);
} else {
self.build_states.set(id, build, BuildState::Running);
self.create_parent_dirs(build.outs())?;
runner.start(id, build)?;
self.progress.task_started(id, build);
}
made_progress = true;
}
while let Some(id) = self.build_states.pop_ready() {
if !self.check_build_dirty(id)? {
// Not dirty; go directly to the Done state.
self.ready_dependents(id);
} else if self.options.adopt {
// Act as if the target already finished.
self.record_finished(
id,
task::TaskResult {
termination: process::Termination::Success,
output: vec![],
discovered_deps: None,
},
)?;
self.ready_dependents(id);
} else {
self.build_states.enqueue(id, &self.graph.builds[id])?;
}
made_progress = true;
}
// Update progress here because the if(made_progress) continue
// below might exit the loop, and even if it doesn't, we may end
// up waiting a long time for a task to finish, in which case it
// would be good to display the faster count updates from above.
self.progress.update(&self.build_states.counts);
if made_progress {
continue;
}
if !runner.is_running() {
if tasks_failed > 0 {
// No more progress can be made, hopefully due to tasks that failed.
debug!("exiting main work loop because there are no more running tasks and a task failed");
break;
}
panic!("BUG: no work to do and runner not running");
}
debug!("waiting for task to finish...");
let task = runner.wait(|id, line| {
self.progress.task_output(id, line);
});
debug!("got a finished task");
let build = &self.graph.builds[task.buildid];
if trace::is_enabled() {
let desc = progress::build_message(build);
trace::trace(trace::TraceEvent::Complete(CompleteEvent::new_owned(desc, task.tid + 1, task.span.0, task.span.1)));
}
self.progress
.task_finished(task.buildid, build, &task.result);
match task.result.termination {
process::Termination::Failure => {
if let Some(failures_left) = &mut self.options.failures_left {
*failures_left -= 1;
if *failures_left == 0 {
debug!("Hit the maximum amount of failures allowed, exiting early");
return Ok(None);
}
}
tasks_failed += 1;
self.build_states
.set(task.buildid, build, BuildState::Failed);
}
process::Termination::Interrupted => {
// If the task was interrupted bail immediately.
debug!("A task was interrupted, exiting early");
return Ok(None);
}
process::Termination::Success => {
tasks_done += 1;
self.record_finished(task.buildid, task.result)?;
self.ready_dependents(task.buildid);
}
};
}
debug!("exited main work loop");
// If the user ctl-c's, it likely caused a subtask to fail.
// But at least for the LLVM test suite it can catch sigint and print
// "interrupted by user" and exit with success, and in that case we
// don't want n2 to print a "succeeded" message afterwards.
let success = tasks_failed == 0 && !signal::was_interrupted();
Ok(success.then_some(tasks_done))
}
}