| // Copyright 2018 Google Inc. All Rights Reserved. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #include "manifest_chunk_parser.h" |
| |
| #include <assert.h> |
| |
| #include <thread> |
| #include <unordered_map> |
| |
| #include "lexer.h" |
| #include "metrics.h" |
| #include "thread_pool.h" |
| #include "util.h" |
| |
| namespace manifest_chunk { |
| |
| class ChunkParser { |
| const LoadedFile& file_; |
| const bool experimentalEnvvar_; |
| Lexer lexer_; |
| const char* chunk_end_ = nullptr; |
| std::vector<ParserItem>* out_ = nullptr; |
| Clump* current_clump_ = nullptr; |
| |
| Clump* MakeClump() { |
| if (current_clump_) |
| return current_clump_; |
| current_clump_ = new Clump { file_ }; |
| out_->push_back(current_clump_); |
| return current_clump_; |
| } |
| |
| void OutItem(ParserItem item) { |
| current_clump_ = nullptr; |
| out_->push_back(std::move(item)); |
| } |
| |
| bool OutError(const std::string& err) { |
| OutItem(new Error { err }); |
| return false; |
| } |
| |
| bool LexerError(const std::string& message); |
| bool ExpectToken(Lexer::Token expected); |
| bool ParseLet(StringPiece* key, StringPiece* value); |
| bool ParseFileInclude(bool new_scope); |
| bool ParsePool(); |
| bool ParseDefault(); |
| bool ParseBinding(); |
| bool ParseRule(); |
| bool ParseEdge(); |
| |
| public: |
| ChunkParser(const LoadedFile& file, |
| StringPiece chunk_content, |
| std::vector<ParserItem>* out, |
| bool experimentalEnvvar) |
| : file_(file), |
| experimentalEnvvar_(experimentalEnvvar), |
| lexer_(file.filename(), file.content(), chunk_content.data()), |
| chunk_end_(chunk_content.data() + chunk_content.size()), |
| out_(out) {} |
| |
| bool ParseChunk(); |
| }; |
| |
| bool ChunkParser::LexerError(const std::string& message) { |
| std::string err; |
| lexer_.Error(message, &err); |
| return OutError(err); |
| } |
| |
| bool ChunkParser::ExpectToken(Lexer::Token expected) { |
| Lexer::Token token = lexer_.ReadToken(); |
| if (token != expected) { |
| std::string message = string("expected ") + Lexer::TokenName(expected); |
| message += string(", got ") + Lexer::TokenName(token); |
| message += Lexer::TokenErrorHint(expected); |
| return LexerError(message); |
| } |
| return true; |
| } |
| |
| bool ChunkParser::ParseLet(StringPiece* key, StringPiece* value) { |
| if (!lexer_.ReadIdent(key)) |
| return LexerError("expected variable name"); |
| if (!ExpectToken(Lexer::EQUALS)) |
| return false; |
| std::string err; |
| if (!lexer_.ReadBindingValue(value, &err)) |
| return OutError(err); |
| return true; |
| } |
| |
| bool ChunkParser::ParseFileInclude(bool new_scope) { |
| Include* include = new Include(); |
| include->new_scope_ = new_scope; |
| std::string err; |
| if (!lexer_.ReadPath(&include->path_, &err)) |
| return OutError(err); |
| include->diag_pos_ = lexer_.GetLastTokenOffset(); |
| if (!ExpectToken(Lexer::NEWLINE)) |
| return false; |
| // If new_scope == false, it would be possible to just ignore the next token. |
| // If a Lexer::INDENT somehow was the next token, it would fail with |
| // 'ninja: build.ninja:NNN: unexpected indent'. This might be a slightly more |
| // helpful message. |
| if (lexer_.PeekToken(Lexer::INDENT)) { |
| if (!new_scope) |
| return LexerError("indent after 'include' line is invalid."); |
| for (;;) { |
| if (lexer_.PeekToken(Lexer::CHDIR)) { |
| break; |
| } |
| if (experimentalEnvvar_) { |
| // Accept one or more "env foo = bar\n<indent><chdir>" |
| StringPiece envkey; |
| if (lexer_.ReadIdent(&envkey)) { |
| if (envkey.compare("env")) { |
| return LexerError("unexpected \"" + envkey.AsString() + |
| "\". Only 'chdir =' allowed here."); |
| } |
| StringPiece key; |
| StringPiece val; |
| if (!lexer_.ReadIdent(&key)) { |
| return LexerError( |
| "expected \"env VAR = value1 value2 value3\", only got \"env\""); |
| } |
| if (!key.compare("chdir")) { |
| return LexerError("reserved word chdir: \"env chdir =\""); |
| } |
| if (!ExpectToken(Lexer::EQUALS)) { |
| return false; // ExpectToken() has already set Lexer error |
| } |
| std::string err; |
| if (!lexer_.ReadBindingValue(&val, &err)) { |
| return OutError(err); |
| } |
| |
| const char* ws = " \t\r\n"; |
| std::string valstr = val.AsString(); |
| valstr.erase(valstr.find_last_not_of(ws) + 1); |
| valstr.erase(0, valstr.find_first_not_of(ws)); |
| |
| auto p = include->envvar.emplace(key.AsString(), valstr); |
| if (!p.second) { |
| return LexerError("duplicate env var"); |
| } |
| if (!lexer_.PeekToken(Lexer::INDENT)) { |
| return LexerError( |
| "got \"env " + key.AsString() + " = " + valstr + "\" but missing chdir."); |
| } |
| continue; // Successfully parsed an env statement. |
| } |
| } |
| return LexerError("only 'chdir =' is allowed after 'subninja' line."); |
| } |
| if (!ExpectToken(Lexer::EQUALS)) |
| return LexerError("only 'chdir =' is allowed after 'subninja' line."); |
| if (!lexer_.ReadPath(&include->chdir_, &err)) |
| return OutError(err); |
| |
| // Disallow '..' and '.' relative paths |
| include->chdir_plus_slash_ = include->chdir_.str_.AsString(); |
| auto first_sep = include->chdir_plus_slash_.find('/'); |
| if (first_sep == std::string::npos) { |
| first_sep = include->chdir_plus_slash_.size(); |
| } |
| #ifdef _WIN32 |
| auto first_win_sep = include->chdir_plus_slash_.find('\\'); |
| if (first_win_sep != std::string::npos && first_win_sep < first_sep) { |
| first_sep = first_win_sep; |
| } |
| #endif |
| if ((first_sep == 1 && include->chdir_plus_slash_[0] == '.') || |
| (first_sep == 2 && !include->chdir_plus_slash_.compare(0, 2, ".."))) |
| return LexerError("invalid use of '.' or '..' in chdir"); |
| |
| // The trailing '/' is added in manifest_parser.cc. |
| OutItem(include); |
| return true; |
| } |
| OutItem(include); |
| return true; |
| } |
| |
| bool ChunkParser::ParsePool() { |
| Pool* pool = new Pool(); |
| |
| StringPiece name; |
| if (!lexer_.ReadIdent(&name)) |
| return LexerError("expected pool name"); |
| pool->name_ = name; |
| |
| if (!ExpectToken(Lexer::NEWLINE)) |
| return false; |
| |
| pool->parse_state_.pool_name_diag_pos = lexer_.GetLastTokenOffset(); |
| |
| while (lexer_.PeekIndent()) { |
| StringPiece key; |
| StringPiece value; |
| if (!ParseLet(&key, &value)) |
| return false; |
| |
| if (key == "depth") { |
| pool->parse_state_.depth = value; |
| pool->parse_state_.depth_diag_pos = lexer_.GetLastTokenOffset(); |
| } else { |
| return LexerError("unexpected variable '" + key.AsString() + "'"); |
| } |
| } |
| |
| if (pool->parse_state_.depth.empty()) |
| return LexerError("expected 'depth =' line"); |
| |
| Clump* clump = MakeClump(); |
| pool->pos_ = clump->AllocNextPos(); |
| clump->pools_.push_back(pool); |
| return true; |
| } |
| |
| bool ChunkParser::ParseDefault() { |
| bool first = true; |
| while (true) { |
| LexedPath path; |
| std::string err; |
| if (!lexer_.ReadPath(&path, &err)) |
| return OutError(err); |
| if (path.str_.empty()) { |
| if (first) |
| return LexerError("expected target name"); |
| else |
| break; |
| } |
| first = false; |
| |
| DefaultTarget* target = new DefaultTarget(); |
| target->parsed_path_ = std::move(path); |
| target->diag_pos_ = lexer_.GetLastTokenOffset(); |
| |
| Clump* clump = MakeClump(); |
| target->pos_ = clump->AllocNextPos(); |
| clump->default_targets_.push_back(target); |
| } |
| return ExpectToken(Lexer::NEWLINE); |
| } |
| |
| static const HashedStrView kNinjaRequiredVersion { "ninja_required_version" }; |
| |
| bool ChunkParser::ParseBinding() { |
| Binding* binding = new Binding(); |
| |
| lexer_.UnreadToken(); |
| StringPiece name; |
| if (!ParseLet(&name, &binding->parsed_value_)) |
| return false; |
| binding->name_ = name; |
| |
| // Record a ninja_required_version binding specially. We want to do this |
| // version check ASAP in case we encounter unexpected syntax. We can't do the |
| // check immediately because the version string is evaluated and might need |
| // bindings from an earlier chunk. We could probably do this version check as |
| // part of ordinary scope setup while processing a Clump, but keeping it |
| // separate guarantees that the version check is done early enough. |
| if (binding->name_ == kNinjaRequiredVersion) { |
| OutItem(new RequiredVersion { binding->parsed_value_ }); |
| } |
| |
| Clump* clump = MakeClump(); |
| binding->pos_ = clump->AllocNextPos(); |
| clump->bindings_.push_back(binding); |
| return true; |
| } |
| |
| static const HashedStrView kRspFile { "rspfile" }; |
| static const HashedStrView kRspFileContent { "rspfile_content" }; |
| static const HashedStrView kCommand { "command" }; |
| |
| bool ChunkParser::ParseRule() { |
| Rule* rule = new Rule(); |
| |
| StringPiece rule_name; |
| if (!lexer_.ReadIdent(&rule_name)) |
| return LexerError("expected rule name"); |
| rule->name_ = rule_name; |
| |
| if (!ExpectToken(Lexer::NEWLINE)) |
| return false; |
| |
| rule->parse_state_.rule_name_diag_pos = lexer_.GetLastTokenOffset(); |
| |
| while (lexer_.PeekIndent()) { |
| StringPiece key; |
| StringPiece value; |
| if (!ParseLet(&key, &value)) |
| return false; |
| |
| if (!Rule::IsReservedBinding(key)) { |
| // Die on other keyvals for now; revisit if we want to add a scope here. |
| // If we allow arbitrary key values here, we'll need to revisit how cycle |
| // detection works when evaluating a rule variable. |
| return LexerError("unexpected variable '" + key.AsString() + "'"); |
| } |
| |
| rule->bindings_.emplace_back(std::piecewise_construct, |
| std::tuple<StringPiece>(key), |
| std::tuple<const char*, size_t>(value.data(), |
| value.size())); |
| } |
| |
| if (static_cast<bool>(rule->GetBinding(kRspFile)) != |
| static_cast<bool>(rule->GetBinding(kRspFileContent))) { |
| return LexerError("rspfile and rspfile_content need to be both specified"); |
| } |
| |
| if (rule->GetBinding(kCommand) == nullptr) |
| return LexerError("expected 'command =' line"); |
| |
| Clump* clump = MakeClump(); |
| rule->pos_ = clump->AllocNextPos(); |
| clump->rules_.push_back(rule); |
| return true; |
| } |
| |
| bool ChunkParser::ParseEdge() { |
| Edge* edge = new Edge(); |
| |
| auto parse_path_list = [this, edge](Edge::DeferredPathList::Type type, int& count) -> bool { |
| const char* start_pos = lexer_.GetPos(); |
| while (true) { |
| LexedPath path; |
| std::string err; |
| if (!lexer_.ReadPath(&path, &err)) |
| return OutError(err); |
| if (path.str_.empty()) { |
| // In the initial parsing pass, the manifest's bindings aren't ready |
| // yet, so paths can't be evaluated. Rather than store the path itself |
| // (wasting memory), store just enough information to parse the path |
| // lists again once bindings are ready. |
| edge->parse_state_.deferred_path_lists.push_back({ |
| start_pos, type, count, |
| }); |
| return true; |
| } |
| count++; |
| } |
| }; |
| |
| if (!parse_path_list(Edge::DeferredPathList::OUTPUT, |
| edge->explicit_outs_)) |
| return false; |
| |
| // Add all implicit outs, counting how many as we go. |
| if (lexer_.PeekToken(Lexer::PIPE)) { |
| if (!parse_path_list(Edge::DeferredPathList::OUTPUT, |
| edge->implicit_outs_)) |
| return false; |
| } |
| |
| if (edge->explicit_outs_ + edge->implicit_outs_ == 0) |
| return LexerError("expected path"); |
| |
| if (!ExpectToken(Lexer::COLON)) |
| return false; |
| |
| StringPiece rule_name; |
| if (!lexer_.ReadIdent(&rule_name)) |
| return LexerError("expected build command name"); |
| edge->parse_state_.rule_name = rule_name; |
| edge->parse_state_.rule_name_diag_pos = lexer_.GetLastTokenOffset(); |
| |
| if (!parse_path_list(Edge::DeferredPathList::INPUT, |
| edge->explicit_deps_)) |
| return false; |
| |
| // Add all implicit deps, counting how many as we go. |
| if (lexer_.PeekToken(Lexer::PIPE)) { |
| if (!parse_path_list(Edge::DeferredPathList::INPUT, |
| edge->implicit_deps_)) |
| return false; |
| } |
| |
| // Add all order-only deps, counting how many as we go. |
| if (lexer_.PeekToken(Lexer::PIPE2)) { |
| if (!parse_path_list(Edge::DeferredPathList::INPUT, |
| edge->order_only_deps_)) |
| return false; |
| } |
| |
| // Add all validation deps, counting how many as we go. |
| if (lexer_.PeekToken(Lexer::PIPEAT)) { |
| if (!parse_path_list(Edge::DeferredPathList::VALIDATION, |
| edge->validation_deps_)) |
| return false; |
| } |
| |
| |
| if (!ExpectToken(Lexer::NEWLINE)) |
| return false; |
| |
| while (lexer_.PeekIndent()) { |
| StringPiece key; |
| StringPiece val; |
| if (!ParseLet(&key, &val)) |
| return false; |
| |
| std::tuple<const char*, size_t> val_ctor_params(val.data(), val.size()); |
| edge->unevaled_bindings_.emplace_back(std::piecewise_construct, |
| std::tuple<StringPiece>(key), |
| val_ctor_params); |
| } |
| |
| edge->parse_state_.final_diag_pos = lexer_.GetLastTokenOffset(); |
| |
| Clump* clump = MakeClump(); |
| edge->pos_ = clump->AllocNextPos(); |
| // Can't edge->onPosResolvedToScope(clump->pos_.scope.scope) right here, since |
| // clump->pos_.scope.scope is not set until DfsParser::HandleClump(). |
| clump->edges_.push_back(edge); |
| clump->edge_output_count_ += edge->explicit_outs_; |
| return true; |
| } |
| |
| bool ChunkParser::ParseChunk() { |
| while (true) { |
| if (lexer_.GetPos() >= chunk_end_) { |
| assert(lexer_.GetPos() == chunk_end_ && |
| "lexer scanned beyond the end of a manifest chunk"); |
| return true; |
| } |
| |
| Lexer::Token token = lexer_.ReadToken(); |
| bool success = true; |
| switch (token) { |
| case Lexer::INCLUDE: success = ParseFileInclude(false); break; |
| case Lexer::SUBNINJA: success = ParseFileInclude(true); break; |
| case Lexer::POOL: success = ParsePool(); break; |
| case Lexer::DEFAULT: success = ParseDefault(); break; |
| case Lexer::IDENT: success = ParseBinding(); break; |
| case Lexer::RULE: success = ParseRule(); break; |
| case Lexer::BUILD: success = ParseEdge(); break; |
| case Lexer::NEWLINE: break; |
| case Lexer::ERROR: return LexerError(lexer_.DescribeLastError()); |
| case Lexer::TNUL: return LexerError("unexpected NUL byte"); |
| case Lexer::TEOF: |
| assert(false && "EOF should have been detected before reading a token"); |
| break; |
| default: |
| return LexerError(std::string("unexpected ") + Lexer::TokenName(token)); |
| } |
| if (!success) return false; |
| } |
| return false; // not reached |
| } |
| |
| void ParseChunk(const LoadedFile& file, StringPiece chunk_content, |
| std::vector<ParserItem>* out, bool experimentalEnvvar) { |
| ChunkParser parser(file, chunk_content, out, experimentalEnvvar); |
| if (!parser.ParseChunk()) { |
| assert(!out->empty()); |
| assert(out->back().kind == ParserItem::kError); |
| assert(!out->back().u.error->msg_.empty()); |
| } |
| } |
| |
| std::vector<StringPiece> SplitManifestIntoChunks(StringPiece input) { |
| METRIC_RECORD(".ninja load : split manifest"); |
| |
| // The lexer requires that a NUL character appear after the file's content. |
| // Make the NUL implicit for the rest of the manifest parsing. |
| // |
| // In general, parsing a manifest chunk examines the terminating text after |
| // the chunk's explicit end. For example, suppose we have two consecutive |
| // chunks: |
| // - "build foo: gen\n pool = mypool\n" |
| // - "build bar: gen\n" |
| // The parser for the "foo" chunk will examine the start of "build" from the |
| // "bar" chunk when it looks for another indented edge binding. |
| assert(!input.empty() && input.back() == '\0' && |
| "input lacks a NUL terminator"); |
| input.remove_suffix(1); |
| |
| // For efficiency, avoid splitting small files. kChunkMinSize can be lowered |
| // to 0 for testing. |
| const size_t kChunkMinSize = 1024 * 1024; |
| |
| const size_t chunk_count = GetOptimalThreadPoolJobCount(); |
| const size_t chunk_size = std::max(kChunkMinSize, |
| input.size() / chunk_count + 1); |
| |
| std::vector<StringPiece> result; |
| result.reserve(chunk_count); |
| |
| size_t start = 0; |
| while (start < input.size()) { |
| size_t next = std::min(start + chunk_size, input.size()); |
| next = AdvanceToNextManifestChunk(input, next); |
| result.push_back(input.substr(start, next - start)); |
| start = next; |
| } |
| |
| return result; |
| } |
| |
| } // namespace manifest_chunk |