src/deps_log.cc - platform/external/ninja - Git at Google

 // Copyright 2012 Google Inc. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 #include "deps_log.h"

 #include <assert.h>
 #include <stdio.h>
 #include <errno.h>
 #include <string.h>
 #ifndef _WIN32
 #include <unistd.h>
 #endif

 #include <numeric>

 #include "disk_interface.h"
 #include "graph.h"
 #include "metrics.h"
 #include "parallel_map.h"
 #include "state.h"
 #include "util.h"

 // The version is stored as 4 bytes after the signature and also serves as a
 // byte order mark. Signature and version combined are 16 bytes long.
 static constexpr StringPiece kFileSignature { "# ninjadeps\n", 12 };
 static_assert(kFileSignature.size() % 4 == 0,
               "file signature size is not a multiple of 4");
 static constexpr size_t kFileHeaderSize = kFileSignature.size() + 4;
 const int kCurrentVersion = 3;

 // Record size is currently limited to less than the full 32 bit, due to
 // internal buffers having to have this size.
 const unsigned kMaxRecordSize = (1 << 19) - 1;

 DepsLog::~DepsLog() {
   Close();
 }

 bool DepsLog::OpenForWrite(const string& path, const DiskInterface& disk, string* err) {
   if (needs_recompaction_) {
     if (!Recompact(path, disk, err))
       return false;
   }

   file_ = fopen(path.c_str(), "ab");
   if (!file_) {
     *err = strerror(errno);
     return false;
   }
   // Set the buffer size to this and flush the file buffer after every record
   // to make sure records aren't written partially.
   setvbuf(file_, NULL, _IOFBF, kMaxRecordSize + 1);
   SetCloseOnExec(fileno(file_));

   // Opening a file in append mode doesn't set the file pointer to the file's
   // end on Windows. Do that explicitly.
   fseek(file_, 0, SEEK_END);

   if (ftell(file_) == 0) {
     if (fwrite(kFileSignature.data(), kFileSignature.size(), 1, file_) < 1) {
       *err = strerror(errno);
       return false;
     }
     if (fwrite(&kCurrentVersion, 4, 1, file_) < 1) {
       *err = strerror(errno);
       return false;
     }
   }
   if (fflush(file_) != 0) {
     *err = strerror(errno);
     return false;
   }
   return true;
 }

 bool DepsLog::RecordDeps(Node* node, TimeStamp mtime,
                          const vector<Node*>& nodes) {
   return RecordDeps(node, mtime, nodes.size(),
                     nodes.empty() ? NULL : (Node**)&nodes.front());
 }

 bool DepsLog::RecordDeps(Node* node, TimeStamp mtime,
                          int node_count, Node** nodes) {
   // Track whether there's any new data to be recorded.
   bool made_change = false;

   // Assign ids to all nodes that are missing one.
   if (node->id() < 0) {
     if (!RecordId(node))
       return false;
     made_change = true;
   }
   for (int i = 0; i < node_count; ++i) {
     if (nodes[i]->id() < 0) {
       if (!RecordId(nodes[i]))
         return false;
       made_change = true;
     }
   }

   // See if the new data is different than the existing data, if any.
   if (!made_change) {
     Deps* deps = GetDeps(node);
     if (!deps ||
         deps->mtime != mtime ||
         deps->node_count != node_count) {
       made_change = true;
     } else {
       for (int i = 0; i < node_count; ++i) {
         if (deps->nodes[i] != nodes[i]) {
           made_change = true;
           break;
         }
       }
     }
   }

   // Don't write anything if there's no new info.
   if (!made_change)
     return true;

   // Update on-disk representation.
   unsigned size = 4 * (1 + 1 + node_count);
   if (size > kMaxRecordSize) {
     errno = ERANGE;
     return false;
   }
   size |= 0x80000000;  // Deps record: set high bit.
   if (fwrite(&size, 4, 1, file_) < 1)
     return false;
   int id = node->id();
   if (fwrite(&id, 4, 1, file_) < 1)
     return false;
   int timestamp = mtime;
   if (fwrite(&timestamp, 4, 1, file_) < 1)
     return false;
   for (int i = 0; i < node_count; ++i) {
     id = nodes[i]->id();
     if (fwrite(&id, 4, 1, file_) < 1)
       return false;
   }
   if (fflush(file_) != 0)
     return false;

   // Update in-memory representation.
   Deps* deps = new Deps(mtime, node_count);
   for (int i = 0; i < node_count; ++i)
     deps->nodes[i] = nodes[i];
   UpdateDeps(node->id(), deps);

   return true;
 }

 void DepsLog::Close() {
   if (file_)
     fclose(file_);
   file_ = NULL;
 }

 // Return the number of words in the record, including the header, or 0 if
 // the header is invalid.
 static inline size_t RecordSizeInWords(size_t header) {
   header &= 0x7FFFFFFF;
   if (header % sizeof(uint32_t) != 0) return 0;
   // Either (node ID and mtime) or (data and checksum)
   if (header < sizeof(uint32_t) * 2) return 0;
   if (header > kMaxRecordSize) return 0;
   return header / sizeof(uint32_t) + 1;
 }

 static inline bool IsDepsRecordHeader(size_t header) {
   return (header & 0x80000000) == 0x80000000;
 }

 static inline bool IsValidDepsRecordHeader(size_t header) {
   return IsDepsRecordHeader(header) && RecordSizeInWords(header);
 }

 /// Split the v3 deps log into independently-parseable chunks using a heuristic.
 /// If the heuristic fails, we'll still load the file correctly, but it could be
 /// slower.
 ///
 /// There are two kinds of records -- path and deps records. Their formats:
 ///
 ///    path:
 ///     - uint32 size -- high bit is clear
 ///     - String content. The string is padded to a multiple of 4 bytes with
 ///       trailing NULs.
 ///     - uint32 checksum (ones complement of the path's index / node ID)
 ///
 ///    deps:
 ///     - uint32 size -- high bit is set
 ///     - int32 output_path_id
 ///     - uint32 output_path_mtime
 ///     - int32 input_path_id[...] -- every remaining word is an input ID
 ///
 /// To split the deps log into chunks, look for uint32 words with the value
 /// 0x8000xxxx, where xxxx is nonzero. Such a word is almost guaranteed to be
 /// the size field of a deps record (with fewer than ~16K dependencies):
 ///  - It can't be part of a string, because paths can't have embedded NULs.
 ///  - It (probably) can't be a node ID, because node IDs are represented using
 ///    "int", and it would be unlikely to have more than 2 billion of them. An
 ///    Android build typically has about 1 million nodes.
 ///  - It's unlikely to be part of a path checksum, because that would also
 ///    imply that we have at least 2 billion nodes.
 ///  - It could be an mtime from 1901, which we rule out by looking for the
 ///    mtime's deps size two words above the split candidate.
 ///
 /// This heuristic can fail in a few ways:
 ///  - We only find path records in the area we scan.
 ///  - The deps records all have >16K of dependencies. (Almost all deps records
 ///    I've seen in the Android build have a few hundred. Only a few have ~10K.)
 ///  - The area contains only deps entries with an mtime from 1901 and one
 ///    dependency.
 ///
 /// Maybe we can add a delimiter to the log format and replace this code. I
 /// believe this heuristic can be adapted to work with the v4 format, which
 /// expands the mtime to 64-bits.
 static std::vector<std::pair<size_t, size_t>>
 SplitDepsLog(const uint32_t* table, size_t size, ThreadPool* thread_pool) {
   if (size == 0) return {};

   std::vector<std::pair<size_t, size_t>> blind_splits = SplitByThreads(size);
   std::vector<std::pair<size_t, size_t>> chunks;
   size_t chunk_start = 0;

   auto split_candidates = ParallelMap(thread_pool, blind_splits,
       [table](std::pair<size_t, size_t> chunk) {
     // Skip the first two words to allow for the 1901 mtime check later on.
     for (size_t index = chunk.first + 2; index < chunk.second; ++index) {
       size_t this_header = table[index];
       if (!IsValidDepsRecordHeader(this_header)) continue;
       if ((this_header & 0xFFFF0000) != 0x80000000) continue;

       // We've either found a deps record or a 1901 mtime (unlikely). If it's an
       // mtime, the word two spaces back will be a valid deps size (0x800xxxxx).
       if (IsValidDepsRecordHeader(table[index - 2])) continue;

       // Success: In a valid deps log, this index must start a deps record.
       return index;
     }
     return SIZE_MAX;
   });
   for (size_t candidate : split_candidates) {
     if (candidate != SIZE_MAX) {
       assert(chunk_start < candidate);
       chunks.push_back({ chunk_start, candidate });
       chunk_start = candidate;
     }
   }

   assert(chunk_start < size);
   chunks.push_back({ chunk_start, size });
   return chunks;
 }

 struct DepsLogInput {
   std::unique_ptr<LoadedFile> file;
   const uint32_t* table = nullptr;
   size_t table_size = 0;
 };

 static bool OpenDepsLogForReading(const std::string& path,
                                   DepsLogInput* log,
                                   std::string* err) {
   *log = {};

   RealDiskInterface file_reader;
   std::string load_err;
   switch (file_reader.LoadFile(path, &log->file, &load_err)) {
   case FileReader::Okay:
     break;
   case FileReader::NotFound:
     return true;
   default:
     *err = load_err;
     return false;
   }

   bool valid_header = false;
   int version = 0;
   if (log->file->content().size() >= kFileHeaderSize ||
       log->file->content().substr(0, kFileSignature.size()) == kFileSignature) {
     valid_header = true;
     memcpy(&version,
            log->file->content().data() + kFileSignature.size(),
            sizeof(version));
   }

   // Note: For version differences, this should migrate to the new format.
   // But the v1 format could sometimes (rarely) end up with invalid data, so
   // don't migrate v1 to v3 to force a rebuild. (v2 only existed for a few days,
   // and there was no release with it, so pretend that it never happened.)
   if (!valid_header || version != kCurrentVersion) {
     if (version == 1)
       *err = "deps log version change; rebuilding";
     else
       *err = "bad deps log signature or version; starting over";
     log->file.reset();
     unlink(path.c_str());
     // Don't report this as a failure.  An empty deps log will cause
     // us to rebuild the outputs anyway.
     return true;
   }

   log->table =
       reinterpret_cast<const uint32_t*>(
           log->file->content().data() + kFileHeaderSize);
   log->table_size =
       (log->file->content().size() - kFileHeaderSize) / sizeof(uint32_t);

   return true;
 }

 bool DepsLog::Load(const string& path, State* state, string* err) {
   METRIC_RECORD(".ninja_deps load");

   assert(nodes_.empty());
   DepsLogInput log;

   if (!OpenDepsLogForReading(path, &log, err)) return false;
   if (log.file.get() == nullptr) return true;

   struct NINJA_ALIGNAS_CACHE_LINE Chunk {
     size_t start = 0;
     size_t stop = 0;
     int first_node_id = 0;
     int initial_node_count = 0;
     int final_node_count = 0;
     size_t deps_count = 0;
     bool parse_error = false;
   };

   std::unique_ptr<ThreadPool> thread_pool = CreateThreadPool();

   std::vector<Chunk> chunks;
   for (std::pair<size_t, size_t> span :
       SplitDepsLog(log.table, log.table_size, thread_pool.get())) {
     Chunk chunk {};
     chunk.start = span.first;
     chunk.stop = span.second;
     chunks.push_back(chunk);
   }

   // Compute the starting node ID for each chunk. The result is correct as long as
   // preceding chunks are parsed successfully. If there is a parsing error in a
   // chunk, then following chunks are discarded after the validation pass.
   ParallelMap(thread_pool.get(), chunks, [&log](Chunk& chunk) {
     size_t index = chunk.start;
     while (index < chunk.stop) {
       size_t header = log.table[index];
       size_t size = RecordSizeInWords(header);
       if (!size) return; // invalid header
       if (!IsDepsRecordHeader(header)) {
         ++chunk.initial_node_count;
       }
       index += size;
     }
   });
   int initial_node_count = 0;
   for (size_t i = 0; i < chunks.size(); ++i) {
     Chunk& chunk = chunks[i];
     chunk.first_node_id = initial_node_count;
     initial_node_count += chunk.initial_node_count;
   }

   // A map from node ID to the final file table index of the dep record
   // outputting the given node ID. The index is biased by 1 because 0 indicates
   // that no dep record outputs this ID.
   std::vector<std::atomic<size_t>> dep_index(initial_node_count);
   // A map from node ID to file index of that node, with no bias.
   std::vector<size_t> node_index(initial_node_count);

   // The main parsing pass. Validate each chunk's entries and, for each node ID,
   // record the location of its node and deps records. If there is parser error,
   // truncate the log just before the problem record.
   ParallelMap(thread_pool.get(), chunks,
       [&log, &dep_index, &node_index](Chunk& chunk) {
     size_t index = chunk.start;
     int next_node_id = chunk.first_node_id;
     while (index < chunk.stop) {
       size_t header = log.table[index];
       size_t size = RecordSizeInWords(header);
       if (!size || (index + size > chunk.stop)) break;
       if (IsDepsRecordHeader(header)) {
         // Verify that input/output node IDs are valid.
         int output_id = log.table[index + 1];
         if (output_id < 0 || output_id >= next_node_id) break;
         for (size_t i = 3; i < size; ++i) {
           int input_id = log.table[index + i];
           if (input_id < 0 || input_id >= next_node_id) break;
         }
         AtomicUpdateMaximum(&dep_index[output_id], index + 1);
         ++chunk.deps_count;
       } else {
         // Validate the path's checksum.
         int checksum = log.table[index + size - 1];
         if (checksum != ~next_node_id) break;
         node_index[next_node_id] = index;
         ++next_node_id;
         ++chunk.final_node_count;
       }
       index += size;
     }
     // We'll exit early on a parser error.
     if (index < chunk.stop) {
       chunk.stop = index;
       chunk.parse_error = true;
     }
   });
   int node_count = 0;
   size_t total_dep_record_count = 0;
   for (size_t i = 0; i < chunks.size(); ++i) {
     Chunk& chunk = chunks[i];
     assert(chunk.first_node_id == node_count);
     total_dep_record_count += chunk.deps_count;
     node_count += chunk.final_node_count;
     if (chunk.parse_error) {
       // Part of this chunk may have been parsed successfully, so keep it, but
       // discard all later chunks.
       chunks.resize(i + 1);
       break;
     }
   }

   // The final node count could be smaller than the initial count if there was a
   // parser error.
   assert(node_count <= initial_node_count);

   // The log is valid. Commit the nodes into the state graph. First make sure
   // that the hash table has at least one bucket for each node in this deps log.
   state->paths_.reserve(node_count);
   nodes_.resize(node_count);
   ParallelMap(thread_pool.get(), IntegralRange<int>(0, node_count),
       [this, state, &log, &node_index](int node_id) {
     size_t index = node_index[node_id];
     size_t header = log.table[index];
     size_t size = RecordSizeInWords(header);
     const char* path = reinterpret_cast<const char*>(&log.table[index + 1]);
     size_t path_size = (size - 2) * sizeof(uint32_t);
     if (path[path_size - 1] == '\0') --path_size;
     if (path[path_size - 1] == '\0') --path_size;
     if (path[path_size - 1] == '\0') --path_size;
     // It is not necessary to pass in a correct slash_bits here. It will
     // either be a Node that's in the manifest (in which case it will
     // already have a correct slash_bits that GetNode will look up), or it
     // is an implicit dependency from a .d which does not affect the build
     // command (and so need not have its slashes maintained).
     Node* node = state->GetNode(StringPiece(path, path_size), 0);
     assert(node->id() < 0);
     node->set_id(node_id);
     nodes_[node_id] = node;
   });

   // Add the deps records.
   deps_.resize(node_count);
   std::vector<size_t> unique_counts = ParallelMap(thread_pool.get(),
       SplitByThreads(node_count),
       [this, &log, &dep_index](std::pair<int, int> node_chunk) {
     size_t unique_count = 0;
     for (int node_id = node_chunk.first; node_id < node_chunk.second; ++node_id) {
       size_t index = dep_index[node_id];
       if (index == 0) continue;
       --index;
       ++unique_count;
       size_t header = log.table[index];
       size_t size = RecordSizeInWords(header);
       assert(size != 0 && IsDepsRecordHeader(header));
       int output_id = log.table[index + 1];
       int mtime = log.table[index + 2];
       int deps_count = size - 3;
       Deps* deps = new Deps(mtime, deps_count);
       for (int i = 0; i < deps_count; ++i) {
         int input_id = log.table[index + 3 + i];
         Node* node = nodes_[input_id];
         assert(node != nullptr);
         deps->nodes[i] = node;
       }
       deps_[output_id] = deps;
     }
     return unique_count;
   });
   size_t unique_dep_record_count = std::accumulate(unique_counts.begin(),
                                                    unique_counts.end(), 0);

   const size_t actual_file_size = log.file->content().size();
   const size_t parsed_file_size = kFileHeaderSize +
       (chunks.empty() ? 0 : chunks.back().stop) * sizeof(uint32_t);
   assert(parsed_file_size <= actual_file_size);
   if (parsed_file_size < actual_file_size) {
     // An error occurred while loading; try to recover by truncating the file to
     // the last fully-read record.
     *err = "premature end of file";
     log.file.reset();

     if (!Truncate(path, parsed_file_size, err))
       return false;

     // The truncate succeeded; we'll just report the load error as a
     // warning because the build can proceed.
     *err += "; recovering";
     return true;
   }

   // Rebuild the log if there are too many dead records.
   const unsigned kMinCompactionEntryCount = 1000;
   const unsigned kCompactionRatio = 3;
   if (total_dep_record_count > kMinCompactionEntryCount &&
       total_dep_record_count > unique_dep_record_count * kCompactionRatio) {
     needs_recompaction_ = true;
   }

   return true;
 }

 DepsLog::Deps* DepsLog::GetDeps(Node* node) {
   // Abort if the node has no id (never referenced in the deps) or if
   // there's no deps recorded for the node.
   if (node->id() < 0 || node->id() >= (int)deps_.size())
     return NULL;
   return deps_[node->id()];
 }

 bool DepsLog::Recompact(const string& path, const DiskInterface& disk, string* err) {
   METRIC_RECORD(".ninja_deps recompact");

   Close();
   string temp_path = path + ".recompact";

   // OpenForWrite() opens for append.  Make sure it's not appending to a
   // left-over file from a previous recompaction attempt that crashed somehow.
   unlink(temp_path.c_str());

   DepsLog new_log;
   if (!new_log.OpenForWrite(temp_path, disk, err))
     return false;

   // Clear all known ids so that new ones can be reassigned.  The new indices
   // will refer to the ordering in new_log, not in the current log.
   for (vector<Node*>::iterator i = nodes_.begin(); i != nodes_.end(); ++i)
     (*i)->set_id(-1);

   // Write out all deps again.
   for (int old_id = 0; old_id < (int)deps_.size(); ++old_id) {
     Deps* deps = deps_[old_id];
     if (!deps) continue;  // If nodes_[old_id] is a leaf, it has no deps.

     Node* node = nodes_[old_id];
     if (node->in_edge()) {
       // If the current manifest defines this edge, skip if it's not dep
       // producing.
       if (node->in_edge()->GetBinding("deps").empty()) continue;
     } else {
       // If the current manifest does not define this edge, skip if it's missing
       // from the disk.
       string err;
       TimeStamp mtime = disk.Stat(node->path(), &err);
       if (mtime == -1)
         Error("%s", err.c_str()); // log and ignore Stat() errors
       if (mtime == 0)
         continue;
     }

     if (!new_log.RecordDeps(nodes_[old_id], deps->mtime,
                             deps->node_count, deps->nodes)) {
       new_log.Close();
       return false;
     }
   }

   new_log.Close();

   // All nodes now have ids that refer to new_log, so steal its data.
   deps_.swap(new_log.deps_);
   nodes_.swap(new_log.nodes_);

   if (unlink(path.c_str()) < 0) {
     *err = strerror(errno);
     return false;
   }

   if (rename(temp_path.c_str(), path.c_str()) < 0) {
     *err = strerror(errno);
     return false;
   }

   return true;
 }

 bool DepsLog::IsDepsEntryLiveFor(Node* node) {
   // Skip entries that don't have in-edges or whose edges don't have a
   // "deps" attribute. They were in the deps log from previous builds, but the
   // files they were for were removed from the build.
   return node->in_edge() && !node->in_edge()->GetBinding("deps").empty();
 }

 bool DepsLog::UpdateDeps(int out_id, Deps* deps) {
   if (out_id >= (int)deps_.size())
     deps_.resize(out_id + 1);

   bool delete_old = deps_[out_id] != NULL;
   if (delete_old)
     delete deps_[out_id];
   deps_[out_id] = deps;
   return delete_old;
 }

 bool DepsLog::RecordId(Node* node) {
   int path_size = node->path().size();
   int padding = (4 - path_size % 4) % 4;  // Pad path to 4 byte boundary.

   unsigned size = path_size + padding + 4;
   if (size > kMaxRecordSize) {
     errno = ERANGE;
     return false;
   }
   if (fwrite(&size, 4, 1, file_) < 1)
     return false;
   if (fwrite(node->path().data(), path_size, 1, file_) < 1) {
     assert(node->path().size() > 0);
     return false;
   }
   if (padding && fwrite("\0\0", padding, 1, file_) < 1)
     return false;
   int id = nodes_.size();
   unsigned checksum = ~(unsigned)id;
   if (fwrite(&checksum, 4, 1, file_) < 1)
     return false;
   if (fflush(file_) != 0)
     return false;

   node->set_id(id);
   nodes_.push_back(node);

   return true;
 }
	// Copyright 2012 Google Inc. All Rights Reserved.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	#include "deps_log.h"

	#include <assert.h>
	#include <stdio.h>
	#include <errno.h>
	#include <string.h>
	#ifndef _WIN32
	#include <unistd.h>
	#endif

	#include <numeric>

	#include "disk_interface.h"
	#include "graph.h"
	#include "metrics.h"
	#include "parallel_map.h"
	#include "state.h"
	#include "util.h"

	// The version is stored as 4 bytes after the signature and also serves as a
	// byte order mark. Signature and version combined are 16 bytes long.
	static constexpr StringPiece kFileSignature { "# ninjadeps\n", 12 };
	static_assert(kFileSignature.size() % 4 == 0,
	"file signature size is not a multiple of 4");
	static constexpr size_t kFileHeaderSize = kFileSignature.size() + 4;
	const int kCurrentVersion = 3;

	// Record size is currently limited to less than the full 32 bit, due to
	// internal buffers having to have this size.
	const unsigned kMaxRecordSize = (1 << 19) - 1;

	DepsLog::~DepsLog() {
	Close();
	}

	bool DepsLog::OpenForWrite(const string& path, const DiskInterface& disk, string* err) {
	if (needs_recompaction_) {
	if (!Recompact(path, disk, err))
	return false;
	}

	file_ = fopen(path.c_str(), "ab");
	if (!file_) {
	*err = strerror(errno);
	return false;
	}
	// Set the buffer size to this and flush the file buffer after every record
	// to make sure records aren't written partially.
	setvbuf(file_, NULL, _IOFBF, kMaxRecordSize + 1);
	SetCloseOnExec(fileno(file_));

	// Opening a file in append mode doesn't set the file pointer to the file's
	// end on Windows. Do that explicitly.
	fseek(file_, 0, SEEK_END);

	if (ftell(file_) == 0) {
	if (fwrite(kFileSignature.data(), kFileSignature.size(), 1, file_) < 1) {
	*err = strerror(errno);
	return false;
	}
	if (fwrite(&kCurrentVersion, 4, 1, file_) < 1) {
	*err = strerror(errno);
	return false;
	}
	}
	if (fflush(file_) != 0) {
	*err = strerror(errno);
	return false;
	}
	return true;
	}

	bool DepsLog::RecordDeps(Node* node, TimeStamp mtime,
	const vector<Node*>& nodes) {
	return RecordDeps(node, mtime, nodes.size(),
	nodes.empty() ? NULL : (Node**)&nodes.front());
	}

	bool DepsLog::RecordDeps(Node* node, TimeStamp mtime,
	int node_count, Node** nodes) {
	// Track whether there's any new data to be recorded.
	bool made_change = false;

	// Assign ids to all nodes that are missing one.
	if (node->id() < 0) {
	if (!RecordId(node))
	return false;
	made_change = true;
	}
	for (int i = 0; i < node_count; ++i) {
	if (nodes[i]->id() < 0) {
	if (!RecordId(nodes[i]))
	return false;
	made_change = true;
	}
	}

	// See if the new data is different than the existing data, if any.
	if (!made_change) {
	Deps* deps = GetDeps(node);
	if (!deps \|\|
	deps->mtime != mtime \|\|
	deps->node_count != node_count) {
	made_change = true;
	} else {
	for (int i = 0; i < node_count; ++i) {
	if (deps->nodes[i] != nodes[i]) {
	made_change = true;
	break;
	}
	}
	}
	}

	// Don't write anything if there's no new info.
	if (!made_change)
	return true;

	// Update on-disk representation.
	unsigned size = 4 * (1 + 1 + node_count);
	if (size > kMaxRecordSize) {
	errno = ERANGE;
	return false;
	}
	size \|= 0x80000000; // Deps record: set high bit.
	if (fwrite(&size, 4, 1, file_) < 1)
	return false;
	int id = node->id();
	if (fwrite(&id, 4, 1, file_) < 1)
	return false;
	int timestamp = mtime;
	if (fwrite(&timestamp, 4, 1, file_) < 1)
	return false;
	for (int i = 0; i < node_count; ++i) {
	id = nodes[i]->id();
	if (fwrite(&id, 4, 1, file_) < 1)
	return false;
	}
	if (fflush(file_) != 0)
	return false;

	// Update in-memory representation.
	Deps* deps = new Deps(mtime, node_count);
	for (int i = 0; i < node_count; ++i)
	deps->nodes[i] = nodes[i];
	UpdateDeps(node->id(), deps);

	return true;
	}

	void DepsLog::Close() {
	if (file_)
	fclose(file_);
	file_ = NULL;
	}

	// Return the number of words in the record, including the header, or 0 if
	// the header is invalid.
	static inline size_t RecordSizeInWords(size_t header) {
	header &= 0x7FFFFFFF;
	if (header % sizeof(uint32_t) != 0) return 0;
	// Either (node ID and mtime) or (data and checksum)
	if (header < sizeof(uint32_t) * 2) return 0;
	if (header > kMaxRecordSize) return 0;
	return header / sizeof(uint32_t) + 1;
	}

	static inline bool IsDepsRecordHeader(size_t header) {
	return (header & 0x80000000) == 0x80000000;
	}

	static inline bool IsValidDepsRecordHeader(size_t header) {
	return IsDepsRecordHeader(header) && RecordSizeInWords(header);
	}

	/// Split the v3 deps log into independently-parseable chunks using a heuristic.
	/// If the heuristic fails, we'll still load the file correctly, but it could be
	/// slower.
	///
	/// There are two kinds of records -- path and deps records. Their formats:
	///
	/// path:
	/// - uint32 size -- high bit is clear
	/// - String content. The string is padded to a multiple of 4 bytes with
	/// trailing NULs.
	/// - uint32 checksum (ones complement of the path's index / node ID)
	///
	/// deps:
	/// - uint32 size -- high bit is set
	/// - int32 output_path_id
	/// - uint32 output_path_mtime
	/// - int32 input_path_id[...] -- every remaining word is an input ID
	///
	/// To split the deps log into chunks, look for uint32 words with the value
	/// 0x8000xxxx, where xxxx is nonzero. Such a word is almost guaranteed to be
	/// the size field of a deps record (with fewer than ~16K dependencies):
	/// - It can't be part of a string, because paths can't have embedded NULs.
	/// - It (probably) can't be a node ID, because node IDs are represented using
	/// "int", and it would be unlikely to have more than 2 billion of them. An
	/// Android build typically has about 1 million nodes.
	/// - It's unlikely to be part of a path checksum, because that would also
	/// imply that we have at least 2 billion nodes.
	/// - It could be an mtime from 1901, which we rule out by looking for the
	/// mtime's deps size two words above the split candidate.
	///
	/// This heuristic can fail in a few ways:
	/// - We only find path records in the area we scan.
	/// - The deps records all have >16K of dependencies. (Almost all deps records
	/// I've seen in the Android build have a few hundred. Only a few have ~10K.)
	/// - The area contains only deps entries with an mtime from 1901 and one
	/// dependency.
	///
	/// Maybe we can add a delimiter to the log format and replace this code. I
	/// believe this heuristic can be adapted to work with the v4 format, which
	/// expands the mtime to 64-bits.
	static std::vector<std::pair<size_t, size_t>>
	SplitDepsLog(const uint32_t* table, size_t size, ThreadPool* thread_pool) {
	if (size == 0) return {};

	std::vector<std::pair<size_t, size_t>> blind_splits = SplitByThreads(size);
	std::vector<std::pair<size_t, size_t>> chunks;
	size_t chunk_start = 0;

	auto split_candidates = ParallelMap(thread_pool, blind_splits,
	[table](std::pair<size_t, size_t> chunk) {
	// Skip the first two words to allow for the 1901 mtime check later on.
	for (size_t index = chunk.first + 2; index < chunk.second; ++index) {
	size_t this_header = table[index];
	if (!IsValidDepsRecordHeader(this_header)) continue;
	if ((this_header & 0xFFFF0000) != 0x80000000) continue;

	// We've either found a deps record or a 1901 mtime (unlikely). If it's an
	// mtime, the word two spaces back will be a valid deps size (0x800xxxxx).
	if (IsValidDepsRecordHeader(table[index - 2])) continue;

	// Success: In a valid deps log, this index must start a deps record.
	return index;
	}
	return SIZE_MAX;
	});
	for (size_t candidate : split_candidates) {
	if (candidate != SIZE_MAX) {
	assert(chunk_start < candidate);
	chunks.push_back({ chunk_start, candidate });
	chunk_start = candidate;
	}
	}

	assert(chunk_start < size);
	chunks.push_back({ chunk_start, size });
	return chunks;
	}

	struct DepsLogInput {
	std::unique_ptr<LoadedFile> file;
	const uint32_t* table = nullptr;
	size_t table_size = 0;
	};

	static bool OpenDepsLogForReading(const std::string& path,
	DepsLogInput* log,
	std::string* err) {
	*log = {};

	RealDiskInterface file_reader;
	std::string load_err;
	switch (file_reader.LoadFile(path, &log->file, &load_err)) {
	case FileReader::Okay:
	break;
	case FileReader::NotFound:
	return true;
	default:
	*err = load_err;
	return false;
	}

	bool valid_header = false;
	int version = 0;
	if (log->file->content().size() >= kFileHeaderSize \|\|
	log->file->content().substr(0, kFileSignature.size()) == kFileSignature) {
	valid_header = true;
	memcpy(&version,
	log->file->content().data() + kFileSignature.size(),
	sizeof(version));
	}

	// Note: For version differences, this should migrate to the new format.
	// But the v1 format could sometimes (rarely) end up with invalid data, so
	// don't migrate v1 to v3 to force a rebuild. (v2 only existed for a few days,
	// and there was no release with it, so pretend that it never happened.)
	if (!valid_header \|\| version != kCurrentVersion) {
	if (version == 1)
	*err = "deps log version change; rebuilding";
	else
	*err = "bad deps log signature or version; starting over";
	log->file.reset();
	unlink(path.c_str());
	// Don't report this as a failure. An empty deps log will cause
	// us to rebuild the outputs anyway.
	return true;
	}

	log->table =
	reinterpret_cast<const uint32_t*>(
	log->file->content().data() + kFileHeaderSize);
	log->table_size =
	(log->file->content().size() - kFileHeaderSize) / sizeof(uint32_t);

	return true;
	}

	bool DepsLog::Load(const string& path, State* state, string* err) {
	METRIC_RECORD(".ninja_deps load");

	assert(nodes_.empty());
	DepsLogInput log;

	if (!OpenDepsLogForReading(path, &log, err)) return false;
	if (log.file.get() == nullptr) return true;

	struct NINJA_ALIGNAS_CACHE_LINE Chunk {
	size_t start = 0;
	size_t stop = 0;
	int first_node_id = 0;
	int initial_node_count = 0;
	int final_node_count = 0;
	size_t deps_count = 0;
	bool parse_error = false;
	};

	std::unique_ptr<ThreadPool> thread_pool = CreateThreadPool();

	std::vector<Chunk> chunks;
	for (std::pair<size_t, size_t> span :
	SplitDepsLog(log.table, log.table_size, thread_pool.get())) {
	Chunk chunk {};
	chunk.start = span.first;
	chunk.stop = span.second;
	chunks.push_back(chunk);
	}

	// Compute the starting node ID for each chunk. The result is correct as long as
	// preceding chunks are parsed successfully. If there is a parsing error in a
	// chunk, then following chunks are discarded after the validation pass.
	ParallelMap(thread_pool.get(), chunks, [&log](Chunk& chunk) {
	size_t index = chunk.start;
	while (index < chunk.stop) {
	size_t header = log.table[index];
	size_t size = RecordSizeInWords(header);
	if (!size) return; // invalid header
	if (!IsDepsRecordHeader(header)) {
	++chunk.initial_node_count;
	}
	index += size;
	}
	});
	int initial_node_count = 0;
	for (size_t i = 0; i < chunks.size(); ++i) {
	Chunk& chunk = chunks[i];
	chunk.first_node_id = initial_node_count;
	initial_node_count += chunk.initial_node_count;
	}

	// A map from node ID to the final file table index of the dep record
	// outputting the given node ID. The index is biased by 1 because 0 indicates
	// that no dep record outputs this ID.
	std::vector<std::atomic<size_t>> dep_index(initial_node_count);
	// A map from node ID to file index of that node, with no bias.
	std::vector<size_t> node_index(initial_node_count);

	// The main parsing pass. Validate each chunk's entries and, for each node ID,
	// record the location of its node and deps records. If there is parser error,
	// truncate the log just before the problem record.
	ParallelMap(thread_pool.get(), chunks,
	[&log, &dep_index, &node_index](Chunk& chunk) {
	size_t index = chunk.start;
	int next_node_id = chunk.first_node_id;
	while (index < chunk.stop) {
	size_t header = log.table[index];
	size_t size = RecordSizeInWords(header);
	if (!size \|\| (index + size > chunk.stop)) break;
	if (IsDepsRecordHeader(header)) {
	// Verify that input/output node IDs are valid.
	int output_id = log.table[index + 1];
	if (output_id < 0 \|\| output_id >= next_node_id) break;
	for (size_t i = 3; i < size; ++i) {
	int input_id = log.table[index + i];
	if (input_id < 0 \|\| input_id >= next_node_id) break;
	}
	AtomicUpdateMaximum(&dep_index[output_id], index + 1);
	++chunk.deps_count;
	} else {
	// Validate the path's checksum.
	int checksum = log.table[index + size - 1];
	if (checksum != ~next_node_id) break;
	node_index[next_node_id] = index;
	++next_node_id;
	++chunk.final_node_count;
	}
	index += size;
	}
	// We'll exit early on a parser error.
	if (index < chunk.stop) {
	chunk.stop = index;
	chunk.parse_error = true;
	}
	});
	int node_count = 0;
	size_t total_dep_record_count = 0;
	for (size_t i = 0; i < chunks.size(); ++i) {
	Chunk& chunk = chunks[i];
	assert(chunk.first_node_id == node_count);
	total_dep_record_count += chunk.deps_count;
	node_count += chunk.final_node_count;
	if (chunk.parse_error) {
	// Part of this chunk may have been parsed successfully, so keep it, but
	// discard all later chunks.
	chunks.resize(i + 1);
	break;
	}
	}

	// The final node count could be smaller than the initial count if there was a
	// parser error.
	assert(node_count <= initial_node_count);

	// The log is valid. Commit the nodes into the state graph. First make sure
	// that the hash table has at least one bucket for each node in this deps log.
	state->paths_.reserve(node_count);
	nodes_.resize(node_count);
	ParallelMap(thread_pool.get(), IntegralRange<int>(0, node_count),
	[this, state, &log, &node_index](int node_id) {
	size_t index = node_index[node_id];
	size_t header = log.table[index];
	size_t size = RecordSizeInWords(header);
	const char* path = reinterpret_cast<const char*>(&log.table[index + 1]);
	size_t path_size = (size - 2) * sizeof(uint32_t);
	if (path[path_size - 1] == '\0') --path_size;
	if (path[path_size - 1] == '\0') --path_size;
	if (path[path_size - 1] == '\0') --path_size;
	// It is not necessary to pass in a correct slash_bits here. It will
	// either be a Node that's in the manifest (in which case it will
	// already have a correct slash_bits that GetNode will look up), or it
	// is an implicit dependency from a .d which does not affect the build
	// command (and so need not have its slashes maintained).
	Node* node = state->GetNode(StringPiece(path, path_size), 0);
	assert(node->id() < 0);
	node->set_id(node_id);
	nodes_[node_id] = node;
	});

	// Add the deps records.
	deps_.resize(node_count);
	std::vector<size_t> unique_counts = ParallelMap(thread_pool.get(),
	SplitByThreads(node_count),
	[this, &log, &dep_index](std::pair<int, int> node_chunk) {
	size_t unique_count = 0;
	for (int node_id = node_chunk.first; node_id < node_chunk.second; ++node_id) {
	size_t index = dep_index[node_id];
	if (index == 0) continue;
	--index;
	++unique_count;
	size_t header = log.table[index];
	size_t size = RecordSizeInWords(header);
	assert(size != 0 && IsDepsRecordHeader(header));
	int output_id = log.table[index + 1];
	int mtime = log.table[index + 2];
	int deps_count = size - 3;
	Deps* deps = new Deps(mtime, deps_count);
	for (int i = 0; i < deps_count; ++i) {
	int input_id = log.table[index + 3 + i];
	Node* node = nodes_[input_id];
	assert(node != nullptr);
	deps->nodes[i] = node;
	}
	deps_[output_id] = deps;
	}
	return unique_count;
	});
	size_t unique_dep_record_count = std::accumulate(unique_counts.begin(),
	unique_counts.end(), 0);

	const size_t actual_file_size = log.file->content().size();
	const size_t parsed_file_size = kFileHeaderSize +
	(chunks.empty() ? 0 : chunks.back().stop) * sizeof(uint32_t);
	assert(parsed_file_size <= actual_file_size);
	if (parsed_file_size < actual_file_size) {
	// An error occurred while loading; try to recover by truncating the file to
	// the last fully-read record.
	*err = "premature end of file";
	log.file.reset();

	if (!Truncate(path, parsed_file_size, err))
	return false;

	// The truncate succeeded; we'll just report the load error as a
	// warning because the build can proceed.
	*err += "; recovering";
	return true;
	}

	// Rebuild the log if there are too many dead records.
	const unsigned kMinCompactionEntryCount = 1000;
	const unsigned kCompactionRatio = 3;
	if (total_dep_record_count > kMinCompactionEntryCount &&
	total_dep_record_count > unique_dep_record_count * kCompactionRatio) {
	needs_recompaction_ = true;
	}

	return true;
	}

	DepsLog::Deps* DepsLog::GetDeps(Node* node) {
	// Abort if the node has no id (never referenced in the deps) or if
	// there's no deps recorded for the node.
	if (node->id() < 0 \|\| node->id() >= (int)deps_.size())
	return NULL;
	return deps_[node->id()];
	}

	bool DepsLog::Recompact(const string& path, const DiskInterface& disk, string* err) {
	METRIC_RECORD(".ninja_deps recompact");

	Close();
	string temp_path = path + ".recompact";

	// OpenForWrite() opens for append. Make sure it's not appending to a
	// left-over file from a previous recompaction attempt that crashed somehow.
	unlink(temp_path.c_str());

	DepsLog new_log;
	if (!new_log.OpenForWrite(temp_path, disk, err))
	return false;

	// Clear all known ids so that new ones can be reassigned. The new indices
	// will refer to the ordering in new_log, not in the current log.
	for (vector<Node*>::iterator i = nodes_.begin(); i != nodes_.end(); ++i)
	(*i)->set_id(-1);

	// Write out all deps again.
	for (int old_id = 0; old_id < (int)deps_.size(); ++old_id) {
	Deps* deps = deps_[old_id];
	if (!deps) continue; // If nodes_[old_id] is a leaf, it has no deps.

	Node* node = nodes_[old_id];
	if (node->in_edge()) {
	// If the current manifest defines this edge, skip if it's not dep
	// producing.
	if (node->in_edge()->GetBinding("deps").empty()) continue;
	} else {
	// If the current manifest does not define this edge, skip if it's missing
	// from the disk.
	string err;
	TimeStamp mtime = disk.Stat(node->path(), &err);
	if (mtime == -1)
	Error("%s", err.c_str()); // log and ignore Stat() errors
	if (mtime == 0)
	continue;
	}

	if (!new_log.RecordDeps(nodes_[old_id], deps->mtime,
	deps->node_count, deps->nodes)) {
	new_log.Close();
	return false;
	}
	}

	new_log.Close();

	// All nodes now have ids that refer to new_log, so steal its data.
	deps_.swap(new_log.deps_);
	nodes_.swap(new_log.nodes_);

	if (unlink(path.c_str()) < 0) {
	*err = strerror(errno);
	return false;
	}

	if (rename(temp_path.c_str(), path.c_str()) < 0) {
	*err = strerror(errno);
	return false;
	}

	return true;
	}

	bool DepsLog::IsDepsEntryLiveFor(Node* node) {
	// Skip entries that don't have in-edges or whose edges don't have a
	// "deps" attribute. They were in the deps log from previous builds, but the
	// files they were for were removed from the build.
	return node->in_edge() && !node->in_edge()->GetBinding("deps").empty();
	}

	bool DepsLog::UpdateDeps(int out_id, Deps* deps) {
	if (out_id >= (int)deps_.size())
	deps_.resize(out_id + 1);

	bool delete_old = deps_[out_id] != NULL;
	if (delete_old)
	delete deps_[out_id];
	deps_[out_id] = deps;
	return delete_old;
	}

	bool DepsLog::RecordId(Node* node) {
	int path_size = node->path().size();
	int padding = (4 - path_size % 4) % 4; // Pad path to 4 byte boundary.

	unsigned size = path_size + padding + 4;
	if (size > kMaxRecordSize) {
	errno = ERANGE;
	return false;
	}
	if (fwrite(&size, 4, 1, file_) < 1)
	return false;
	if (fwrite(node->path().data(), path_size, 1, file_) < 1) {
	assert(node->path().size() > 0);
	return false;
	}
	if (padding && fwrite("\0\0", padding, 1, file_) < 1)
	return false;
	int id = nodes_.size();
	unsigned checksum = ~(unsigned)id;
	if (fwrite(&checksum, 4, 1, file_) < 1)
	return false;
	if (fflush(file_) != 0)
	return false;

	node->set_id(id);
	nodes_.push_back(node);

	return true;
	}