blob: 0feb499213f988d7631a066b79838f15661de909 [file] [log] [blame]
// Copyright 2018 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "manifest_chunk_parser.h"
#include <assert.h>
#include <thread>
#include <unordered_map>
#include "lexer.h"
#include "metrics.h"
#include "thread_pool.h"
#include "util.h"
namespace manifest_chunk {
class ChunkParser {
const LoadedFile& file_;
Lexer lexer_;
const char* chunk_end_ = nullptr;
std::vector<ParserItem>* out_ = nullptr;
Clump* current_clump_ = nullptr;
Clump* MakeClump() {
if (current_clump_)
return current_clump_;
current_clump_ = new Clump { file_ };
out_->push_back(current_clump_);
return current_clump_;
}
void OutItem(ParserItem item) {
current_clump_ = nullptr;
out_->push_back(item);
}
bool OutError(const std::string& err) {
OutItem(new Error { err });
return false;
}
bool LexerError(const std::string& message);
bool ExpectToken(Lexer::Token expected);
bool ParseLet(StringPiece* key, StringPiece* value);
bool ParseFileInclude(bool new_scope);
bool ParsePool();
bool ParseDefault();
bool ParseBinding();
bool ParseRule();
bool ParseEdge();
public:
ChunkParser(const LoadedFile& file,
StringPiece chunk_content,
std::vector<ParserItem>* out)
: file_(file),
lexer_(file.filename(), file.content(), chunk_content.data()),
chunk_end_(chunk_content.data() + chunk_content.size()),
out_(out) {}
bool ParseChunk();
};
bool ChunkParser::LexerError(const std::string& message) {
std::string err;
lexer_.Error(message, &err);
return OutError(err);
}
bool ChunkParser::ExpectToken(Lexer::Token expected) {
Lexer::Token token = lexer_.ReadToken();
if (token != expected) {
std::string message = string("expected ") + Lexer::TokenName(expected);
message += string(", got ") + Lexer::TokenName(token);
message += Lexer::TokenErrorHint(expected);
return LexerError(message);
}
return true;
}
bool ChunkParser::ParseLet(StringPiece* key, StringPiece* value) {
if (!lexer_.ReadIdent(key))
return LexerError("expected variable name");
if (!ExpectToken(Lexer::EQUALS))
return false;
std::string err;
if (!lexer_.ReadBindingValue(value, &err))
return OutError(err);
return true;
}
bool ChunkParser::ParseFileInclude(bool new_scope) {
Include* include = new Include();
include->new_scope_ = new_scope;
std::string err;
if (!lexer_.ReadPath(&include->path_, &err))
return OutError(err);
include->diag_pos_ = lexer_.GetLastTokenOffset();
if (!ExpectToken(Lexer::NEWLINE))
return false;
OutItem(include);
return true;
}
bool ChunkParser::ParsePool() {
Pool* pool = new Pool();
StringPiece name;
if (!lexer_.ReadIdent(&name))
return LexerError("expected pool name");
pool->name_ = name;
if (!ExpectToken(Lexer::NEWLINE))
return false;
pool->parse_state_.pool_name_diag_pos = lexer_.GetLastTokenOffset();
while (lexer_.PeekIndent()) {
StringPiece key;
StringPiece value;
if (!ParseLet(&key, &value))
return false;
if (key == "depth") {
pool->parse_state_.depth = value;
pool->parse_state_.depth_diag_pos = lexer_.GetLastTokenOffset();
} else {
return LexerError("unexpected variable '" + key.AsString() + "'");
}
}
if (pool->parse_state_.depth.empty())
return LexerError("expected 'depth =' line");
Clump* clump = MakeClump();
pool->pos_ = clump->AllocNextPos();
clump->pools_.push_back(pool);
return true;
}
bool ChunkParser::ParseDefault() {
bool first = true;
while (true) {
LexedPath path;
std::string err;
if (!lexer_.ReadPath(&path, &err))
return OutError(err);
if (path.str_.empty()) {
if (first)
return LexerError("expected target name");
else
break;
}
first = false;
DefaultTarget* target = new DefaultTarget();
target->parsed_path_ = std::move(path);
target->diag_pos_ = lexer_.GetLastTokenOffset();
Clump* clump = MakeClump();
target->pos_ = clump->AllocNextPos();
clump->default_targets_.push_back(target);
}
return ExpectToken(Lexer::NEWLINE);
}
static const HashedStrView kNinjaRequiredVersion { "ninja_required_version" };
bool ChunkParser::ParseBinding() {
Binding* binding = new Binding();
lexer_.UnreadToken();
StringPiece name;
if (!ParseLet(&name, &binding->parsed_value_))
return false;
binding->name_ = name;
// Record a ninja_required_version binding specially. We want to do this
// version check ASAP in case we encounter unexpected syntax. We can't do the
// check immediately because the version string is evaluated and might need
// bindings from an earlier chunk. We could probably do this version check as
// part of ordinary scope setup while processing a Clump, but keeping it
// separate guarantees that the version check is done early enough.
if (binding->name_ == kNinjaRequiredVersion)
OutItem(new RequiredVersion { binding->parsed_value_ });
Clump* clump = MakeClump();
binding->pos_ = clump->AllocNextPos();
clump->bindings_.push_back(binding);
return true;
}
static const HashedStrView kRspFile { "rspfile" };
static const HashedStrView kRspFileContent { "rspfile_content" };
static const HashedStrView kCommand { "command" };
bool ChunkParser::ParseRule() {
Rule* rule = new Rule();
StringPiece rule_name;
if (!lexer_.ReadIdent(&rule_name))
return LexerError("expected rule name");
rule->name_ = rule_name;
if (!ExpectToken(Lexer::NEWLINE))
return false;
rule->parse_state_.rule_name_diag_pos = lexer_.GetLastTokenOffset();
while (lexer_.PeekIndent()) {
StringPiece key;
StringPiece value;
if (!ParseLet(&key, &value))
return false;
if (!Rule::IsReservedBinding(key)) {
// Die on other keyvals for now; revisit if we want to add a scope here.
// If we allow arbitrary key values here, we'll need to revisit how cycle
// detection works when evaluating a rule variable.
return LexerError("unexpected variable '" + key.AsString() + "'");
}
rule->bindings_.emplace_back(std::piecewise_construct,
std::tuple<StringPiece>(key),
std::tuple<const char*, size_t>(value.data(),
value.size()));
}
if (static_cast<bool>(rule->GetBinding(kRspFile)) !=
static_cast<bool>(rule->GetBinding(kRspFileContent))) {
return LexerError("rspfile and rspfile_content need to be both specified");
}
if (rule->GetBinding(kCommand) == nullptr)
return LexerError("expected 'command =' line");
Clump* clump = MakeClump();
rule->pos_ = clump->AllocNextPos();
clump->rules_.push_back(rule);
return true;
}
bool ChunkParser::ParseEdge() {
Edge* edge = new Edge();
auto parse_path_list = [this, edge](bool is_output, int& count) -> bool {
const char* start_pos = lexer_.GetPos();
while (true) {
LexedPath path;
std::string err;
if (!lexer_.ReadPath(&path, &err))
return OutError(err);
if (path.str_.empty()) {
// In the initial parsing pass, the manifest's bindings aren't ready
// yet, so paths can't be evaluated. Rather than store the path itself
// (wasting memory), store just enough information to parse the path
// lists again once bindings are ready.
edge->parse_state_.deferred_path_lists.push_back({
start_pos, is_output, count,
});
return true;
}
count++;
}
};
if (!parse_path_list(true, edge->explicit_outs_))
return false;
// Add all implicit outs, counting how many as we go.
if (lexer_.PeekToken(Lexer::PIPE)) {
if (!parse_path_list(true, edge->implicit_outs_))
return false;
}
if (edge->explicit_outs_ + edge->implicit_outs_ == 0)
return LexerError("expected path");
if (!ExpectToken(Lexer::COLON))
return false;
StringPiece rule_name;
if (!lexer_.ReadIdent(&rule_name))
return LexerError("expected build command name");
edge->parse_state_.rule_name = rule_name;
edge->parse_state_.rule_name_diag_pos = lexer_.GetLastTokenOffset();
if (!parse_path_list(false, edge->explicit_deps_))
return false;
// Add all implicit deps, counting how many as we go.
if (lexer_.PeekToken(Lexer::PIPE)) {
if (!parse_path_list(false, edge->implicit_deps_))
return false;
}
// Add all order-only deps, counting how many as we go.
if (lexer_.PeekToken(Lexer::PIPE2)) {
if (!parse_path_list(false, edge->order_only_deps_))
return false;
}
if (!ExpectToken(Lexer::NEWLINE))
return false;
while (lexer_.PeekIndent()) {
StringPiece key;
StringPiece val;
if (!ParseLet(&key, &val))
return false;
std::tuple<const char*, size_t> val_ctor_params(val.data(), val.size());
edge->unevaled_bindings_.emplace_back(std::piecewise_construct,
std::tuple<StringPiece>(key),
val_ctor_params);
}
edge->parse_state_.final_diag_pos = lexer_.GetLastTokenOffset();
Clump* clump = MakeClump();
edge->pos_ = clump->AllocNextPos();
clump->edges_.push_back(edge);
clump->edge_output_count_ += edge->explicit_outs_;
return true;
}
bool ChunkParser::ParseChunk() {
while (true) {
if (lexer_.GetPos() >= chunk_end_) {
assert(lexer_.GetPos() == chunk_end_ &&
"lexer scanned beyond the end of a manifest chunk");
return true;
}
Lexer::Token token = lexer_.ReadToken();
bool success = true;
switch (token) {
case Lexer::INCLUDE: success = ParseFileInclude(false); break;
case Lexer::SUBNINJA: success = ParseFileInclude(true); break;
case Lexer::POOL: success = ParsePool(); break;
case Lexer::DEFAULT: success = ParseDefault(); break;
case Lexer::IDENT: success = ParseBinding(); break;
case Lexer::RULE: success = ParseRule(); break;
case Lexer::BUILD: success = ParseEdge(); break;
case Lexer::NEWLINE: break;
case Lexer::ERROR: return LexerError(lexer_.DescribeLastError());
case Lexer::TNUL: return LexerError("unexpected NUL byte");
case Lexer::TEOF:
assert(false && "EOF should have been detected before reading a token");
break;
default:
return LexerError(std::string("unexpected ") + Lexer::TokenName(token));
}
if (!success) return false;
}
return false; // not reached
}
void ParseChunk(const LoadedFile& file, StringPiece chunk_content,
std::vector<ParserItem>* out) {
ChunkParser parser(file, chunk_content, out);
if (!parser.ParseChunk()) {
assert(!out->empty());
assert(out->back().kind == ParserItem::kError);
assert(!out->back().u.error->msg_.empty());
}
}
std::vector<StringPiece> SplitManifestIntoChunks(StringPiece input) {
METRIC_RECORD(".ninja load : split manifest");
// The lexer requires that a NUL character appear after the file's content.
// Make the NUL implicit for the rest of the manifest parsing.
//
// In general, parsing a manifest chunk examines the terminating text after
// the chunk's explicit end. For example, suppose we have two consecutive
// chunks:
// - "build foo: gen\n pool = mypool\n"
// - "build bar: gen\n"
// The parser for the "foo" chunk will examine the start of "build" from the
// "bar" chunk when it looks for another indented edge binding.
assert(!input.empty() && input.back() == '\0' &&
"input lacks a NUL terminator");
input.remove_suffix(1);
// For efficiency, avoid splitting small files. kChunkMinSize can be lowered
// to 0 for testing.
const size_t kChunkMinSize = 1024 * 1024;
const size_t chunk_count = GetOptimalThreadPoolJobCount();
const size_t chunk_size = std::max(kChunkMinSize,
input.size() / chunk_count + 1);
std::vector<StringPiece> result;
result.reserve(chunk_count);
size_t start = 0;
while (start < input.size()) {
size_t next = std::min(start + chunk_size, input.size());
next = AdvanceToNextManifestChunk(input, next);
result.push_back(input.substr(start, next - start));
start = next;
}
return result;
}
} // namespace manifest_chunk