blob: 7dba87d6b856cc9da49189c99d217ac2bb877156 [file] [log] [blame]
// Copyright 2018 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "manifest_chunk_parser.h"
#include <assert.h>
#include <thread>
#include <unordered_map>
#include "lexer.h"
#include "metrics.h"
#include "thread_pool.h"
#include "util.h"
namespace manifest_chunk {
class ChunkParser {
const LoadedFile& file_;
const bool experimentalEnvvar_;
Lexer lexer_;
const char* chunk_end_ = nullptr;
std::vector<ParserItem>* out_ = nullptr;
Clump* current_clump_ = nullptr;
Clump* MakeClump() {
if (current_clump_)
return current_clump_;
current_clump_ = new Clump { file_ };
out_->push_back(current_clump_);
return current_clump_;
}
void OutItem(ParserItem item) {
current_clump_ = nullptr;
out_->push_back(std::move(item));
}
bool OutError(const std::string& err) {
OutItem(new Error { err });
return false;
}
bool LexerError(const std::string& message);
bool ExpectToken(Lexer::Token expected);
bool ParseLet(StringPiece* key, StringPiece* value);
bool ParseFileInclude(bool new_scope);
bool ParsePool();
bool ParseDefault();
bool ParseBinding();
bool ParseRule();
bool ParseEdge();
public:
ChunkParser(const LoadedFile& file,
StringPiece chunk_content,
std::vector<ParserItem>* out,
bool experimentalEnvvar)
: file_(file),
experimentalEnvvar_(experimentalEnvvar),
lexer_(file.filename(), file.content(), chunk_content.data()),
chunk_end_(chunk_content.data() + chunk_content.size()),
out_(out) {}
bool ParseChunk();
};
bool ChunkParser::LexerError(const std::string& message) {
std::string err;
lexer_.Error(message, &err);
return OutError(err);
}
bool ChunkParser::ExpectToken(Lexer::Token expected) {
Lexer::Token token = lexer_.ReadToken();
if (token != expected) {
std::string message = string("expected ") + Lexer::TokenName(expected);
message += string(", got ") + Lexer::TokenName(token);
message += Lexer::TokenErrorHint(expected);
return LexerError(message);
}
return true;
}
bool ChunkParser::ParseLet(StringPiece* key, StringPiece* value) {
if (!lexer_.ReadIdent(key))
return LexerError("expected variable name");
if (!ExpectToken(Lexer::EQUALS))
return false;
std::string err;
if (!lexer_.ReadBindingValue(value, &err))
return OutError(err);
return true;
}
bool ChunkParser::ParseFileInclude(bool new_scope) {
Include* include = new Include();
include->new_scope_ = new_scope;
std::string err;
if (!lexer_.ReadPath(&include->path_, &err))
return OutError(err);
include->diag_pos_ = lexer_.GetLastTokenOffset();
if (!ExpectToken(Lexer::NEWLINE))
return false;
// If new_scope == false, it would be possible to just ignore the next token.
// If a Lexer::INDENT somehow was the next token, it would fail with
// 'ninja: build.ninja:NNN: unexpected indent'. This might be a slightly more
// helpful message.
if (lexer_.PeekToken(Lexer::INDENT)) {
if (!new_scope)
return LexerError("indent after 'include' line is invalid.");
for (;;) {
if (lexer_.PeekToken(Lexer::CHDIR)) {
break;
}
if (experimentalEnvvar_) {
// Accept one or more "env foo = bar\n<indent><chdir>"
StringPiece envkey;
if (lexer_.ReadIdent(&envkey)) {
if (envkey.compare("env")) {
return LexerError("unexpected \"" + envkey.AsString() +
"\". Only 'chdir =' allowed here.");
}
StringPiece key;
StringPiece val;
if (!lexer_.ReadIdent(&key)) {
return LexerError(
"expected \"env VAR = value1 value2 value3\", only got \"env\"");
}
if (!key.compare("chdir")) {
return LexerError("reserved word chdir: \"env chdir =\"");
}
if (!ExpectToken(Lexer::EQUALS)) {
return false; // ExpectToken() has already set Lexer error
}
std::string err;
if (!lexer_.ReadBindingValue(&val, &err)) {
return OutError(err);
}
const char* ws = " \t\r\n";
std::string valstr = val.AsString();
valstr.erase(valstr.find_last_not_of(ws) + 1);
valstr.erase(0, valstr.find_first_not_of(ws));
auto p = include->envvar.emplace(key.AsString(), valstr);
if (!p.second) {
return LexerError("duplicate env var");
}
if (!lexer_.PeekToken(Lexer::INDENT)) {
return LexerError(
"got \"env " + key.AsString() + " = " + valstr + "\" but missing chdir.");
}
continue; // Successfully parsed an env statement.
}
}
return LexerError("only 'chdir =' is allowed after 'subninja' line.");
}
if (!ExpectToken(Lexer::EQUALS))
return LexerError("only 'chdir =' is allowed after 'subninja' line.");
if (!lexer_.ReadPath(&include->chdir_, &err))
return OutError(err);
// Disallow '..' and '.' relative paths
include->chdir_plus_slash_ = include->chdir_.str_.AsString();
auto first_sep = include->chdir_plus_slash_.find('/');
if (first_sep == std::string::npos) {
first_sep = include->chdir_plus_slash_.size();
}
#ifdef _WIN32
auto first_win_sep = include->chdir_plus_slash_.find('\\');
if (first_win_sep != std::string::npos && first_win_sep < first_sep) {
first_sep = first_win_sep;
}
#endif
if ((first_sep == 1 && include->chdir_plus_slash_[0] == '.') ||
(first_sep == 2 && !include->chdir_plus_slash_.compare(0, 2, "..")))
return LexerError("invalid use of '.' or '..' in chdir");
// The trailing '/' is added in manifest_parser.cc.
OutItem(include);
return true;
}
OutItem(include);
return true;
}
bool ChunkParser::ParsePool() {
Pool* pool = new Pool();
StringPiece name;
if (!lexer_.ReadIdent(&name))
return LexerError("expected pool name");
pool->name_ = name;
if (!ExpectToken(Lexer::NEWLINE))
return false;
pool->parse_state_.pool_name_diag_pos = lexer_.GetLastTokenOffset();
while (lexer_.PeekIndent()) {
StringPiece key;
StringPiece value;
if (!ParseLet(&key, &value))
return false;
if (key == "depth") {
pool->parse_state_.depth = value;
pool->parse_state_.depth_diag_pos = lexer_.GetLastTokenOffset();
} else {
return LexerError("unexpected variable '" + key.AsString() + "'");
}
}
if (pool->parse_state_.depth.empty())
return LexerError("expected 'depth =' line");
Clump* clump = MakeClump();
pool->pos_ = clump->AllocNextPos();
clump->pools_.push_back(pool);
return true;
}
bool ChunkParser::ParseDefault() {
bool first = true;
while (true) {
LexedPath path;
std::string err;
if (!lexer_.ReadPath(&path, &err))
return OutError(err);
if (path.str_.empty()) {
if (first)
return LexerError("expected target name");
else
break;
}
first = false;
DefaultTarget* target = new DefaultTarget();
target->parsed_path_ = std::move(path);
target->diag_pos_ = lexer_.GetLastTokenOffset();
Clump* clump = MakeClump();
target->pos_ = clump->AllocNextPos();
clump->default_targets_.push_back(target);
}
return ExpectToken(Lexer::NEWLINE);
}
static const HashedStrView kNinjaRequiredVersion { "ninja_required_version" };
bool ChunkParser::ParseBinding() {
Binding* binding = new Binding();
lexer_.UnreadToken();
StringPiece name;
if (!ParseLet(&name, &binding->parsed_value_))
return false;
binding->name_ = name;
// Record a ninja_required_version binding specially. We want to do this
// version check ASAP in case we encounter unexpected syntax. We can't do the
// check immediately because the version string is evaluated and might need
// bindings from an earlier chunk. We could probably do this version check as
// part of ordinary scope setup while processing a Clump, but keeping it
// separate guarantees that the version check is done early enough.
if (binding->name_ == kNinjaRequiredVersion) {
OutItem(new RequiredVersion { binding->parsed_value_ });
}
Clump* clump = MakeClump();
binding->pos_ = clump->AllocNextPos();
clump->bindings_.push_back(binding);
return true;
}
static const HashedStrView kRspFile { "rspfile" };
static const HashedStrView kRspFileContent { "rspfile_content" };
static const HashedStrView kCommand { "command" };
bool ChunkParser::ParseRule() {
Rule* rule = new Rule();
StringPiece rule_name;
if (!lexer_.ReadIdent(&rule_name))
return LexerError("expected rule name");
rule->name_ = rule_name;
if (!ExpectToken(Lexer::NEWLINE))
return false;
rule->parse_state_.rule_name_diag_pos = lexer_.GetLastTokenOffset();
while (lexer_.PeekIndent()) {
StringPiece key;
StringPiece value;
if (!ParseLet(&key, &value))
return false;
if (!Rule::IsReservedBinding(key)) {
// Die on other keyvals for now; revisit if we want to add a scope here.
// If we allow arbitrary key values here, we'll need to revisit how cycle
// detection works when evaluating a rule variable.
return LexerError("unexpected variable '" + key.AsString() + "'");
}
rule->bindings_.emplace_back(std::piecewise_construct,
std::tuple<StringPiece>(key),
std::tuple<const char*, size_t>(value.data(),
value.size()));
}
if (static_cast<bool>(rule->GetBinding(kRspFile)) !=
static_cast<bool>(rule->GetBinding(kRspFileContent))) {
return LexerError("rspfile and rspfile_content need to be both specified");
}
if (rule->GetBinding(kCommand) == nullptr)
return LexerError("expected 'command =' line");
Clump* clump = MakeClump();
rule->pos_ = clump->AllocNextPos();
clump->rules_.push_back(rule);
return true;
}
bool ChunkParser::ParseEdge() {
Edge* edge = new Edge();
auto parse_path_list = [this, edge](Edge::DeferredPathList::Type type, int& count) -> bool {
const char* start_pos = lexer_.GetPos();
while (true) {
LexedPath path;
std::string err;
if (!lexer_.ReadPath(&path, &err))
return OutError(err);
if (path.str_.empty()) {
// In the initial parsing pass, the manifest's bindings aren't ready
// yet, so paths can't be evaluated. Rather than store the path itself
// (wasting memory), store just enough information to parse the path
// lists again once bindings are ready.
edge->parse_state_.deferred_path_lists.push_back({
start_pos, type, count,
});
return true;
}
count++;
}
};
if (!parse_path_list(Edge::DeferredPathList::OUTPUT,
edge->explicit_outs_))
return false;
// Add all implicit outs, counting how many as we go.
if (lexer_.PeekToken(Lexer::PIPE)) {
if (!parse_path_list(Edge::DeferredPathList::OUTPUT,
edge->implicit_outs_))
return false;
}
if (edge->explicit_outs_ + edge->implicit_outs_ == 0)
return LexerError("expected path");
if (!ExpectToken(Lexer::COLON))
return false;
StringPiece rule_name;
if (!lexer_.ReadIdent(&rule_name))
return LexerError("expected build command name");
edge->parse_state_.rule_name = rule_name;
edge->parse_state_.rule_name_diag_pos = lexer_.GetLastTokenOffset();
if (!parse_path_list(Edge::DeferredPathList::INPUT,
edge->explicit_deps_))
return false;
// Add all implicit deps, counting how many as we go.
if (lexer_.PeekToken(Lexer::PIPE)) {
if (!parse_path_list(Edge::DeferredPathList::INPUT,
edge->implicit_deps_))
return false;
}
// Add all order-only deps, counting how many as we go.
if (lexer_.PeekToken(Lexer::PIPE2)) {
if (!parse_path_list(Edge::DeferredPathList::INPUT,
edge->order_only_deps_))
return false;
}
// Add all validation deps, counting how many as we go.
if (lexer_.PeekToken(Lexer::PIPEAT)) {
if (!parse_path_list(Edge::DeferredPathList::VALIDATION,
edge->validation_deps_))
return false;
}
if (!ExpectToken(Lexer::NEWLINE))
return false;
while (lexer_.PeekIndent()) {
StringPiece key;
StringPiece val;
if (!ParseLet(&key, &val))
return false;
std::tuple<const char*, size_t> val_ctor_params(val.data(), val.size());
edge->unevaled_bindings_.emplace_back(std::piecewise_construct,
std::tuple<StringPiece>(key),
val_ctor_params);
}
edge->parse_state_.final_diag_pos = lexer_.GetLastTokenOffset();
Clump* clump = MakeClump();
edge->pos_ = clump->AllocNextPos();
// Can't edge->onPosResolvedToScope(clump->pos_.scope.scope) right here, since
// clump->pos_.scope.scope is not set until DfsParser::HandleClump().
clump->edges_.push_back(edge);
clump->edge_output_count_ += edge->explicit_outs_;
return true;
}
bool ChunkParser::ParseChunk() {
while (true) {
if (lexer_.GetPos() >= chunk_end_) {
assert(lexer_.GetPos() == chunk_end_ &&
"lexer scanned beyond the end of a manifest chunk");
return true;
}
Lexer::Token token = lexer_.ReadToken();
bool success = true;
switch (token) {
case Lexer::INCLUDE: success = ParseFileInclude(false); break;
case Lexer::SUBNINJA: success = ParseFileInclude(true); break;
case Lexer::POOL: success = ParsePool(); break;
case Lexer::DEFAULT: success = ParseDefault(); break;
case Lexer::IDENT: success = ParseBinding(); break;
case Lexer::RULE: success = ParseRule(); break;
case Lexer::BUILD: success = ParseEdge(); break;
case Lexer::NEWLINE: break;
case Lexer::ERROR: return LexerError(lexer_.DescribeLastError());
case Lexer::TNUL: return LexerError("unexpected NUL byte");
case Lexer::TEOF:
assert(false && "EOF should have been detected before reading a token");
break;
default:
return LexerError(std::string("unexpected ") + Lexer::TokenName(token));
}
if (!success) return false;
}
return false; // not reached
}
void ParseChunk(const LoadedFile& file, StringPiece chunk_content,
std::vector<ParserItem>* out, bool experimentalEnvvar) {
ChunkParser parser(file, chunk_content, out, experimentalEnvvar);
if (!parser.ParseChunk()) {
assert(!out->empty());
assert(out->back().kind == ParserItem::kError);
assert(!out->back().u.error->msg_.empty());
}
}
std::vector<StringPiece> SplitManifestIntoChunks(StringPiece input) {
METRIC_RECORD(".ninja load : split manifest");
// The lexer requires that a NUL character appear after the file's content.
// Make the NUL implicit for the rest of the manifest parsing.
//
// In general, parsing a manifest chunk examines the terminating text after
// the chunk's explicit end. For example, suppose we have two consecutive
// chunks:
// - "build foo: gen\n pool = mypool\n"
// - "build bar: gen\n"
// The parser for the "foo" chunk will examine the start of "build" from the
// "bar" chunk when it looks for another indented edge binding.
assert(!input.empty() && input.back() == '\0' &&
"input lacks a NUL terminator");
input.remove_suffix(1);
// For efficiency, avoid splitting small files. kChunkMinSize can be lowered
// to 0 for testing.
const size_t kChunkMinSize = 1024 * 1024;
const size_t chunk_count = GetOptimalThreadPoolJobCount();
const size_t chunk_size = std::max(kChunkMinSize,
input.size() / chunk_count + 1);
std::vector<StringPiece> result;
result.reserve(chunk_count);
size_t start = 0;
while (start < input.size()) {
size_t next = std::min(start + chunk_size, input.size());
next = AdvanceToNextManifestChunk(input, next);
result.push_back(input.substr(start, next - start));
start = next;
}
return result;
}
} // namespace manifest_chunk