blob: 9586733ab19333d34648010c75a60ef21778d24c [file] [log] [blame]
* Copyright 2014 The Kythe Authors. All rights reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
// This file uses the Clang style conventions.
#include "CommandLineUtils.h"
#include <stddef.h>
#include <stdio.h>
#include <string.h>
#include <algorithm>
#include <mutex>
#include <string>
#include <vector>
#include "absl/strings/str_format.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Regex.h"
namespace kythe {
namespace common {
namespace {
/// \brief A `llvm::Regex` wrapper that only performs full matches on
/// non-empty strings.
/// The second restriction makes it easier to write long chains of 'or'-ed
/// regular expressions which may contain empty options without those silently
/// matching empty strings.
class FullMatchRegex {
/// \param Regex an extended-syntax regex to match.
explicit FullMatchRegex(llvm::StringRef Regex)
: InnerRegex("^(" + Regex.str() + ")$", llvm::Regex::NoFlags) {
std::string st;
if (!InnerRegex.isValid(st)) {
absl::FPrintF(stderr, "%s (regex was %s)\n", st, Regex.str());
assert(0 && "!InnerRegex.isValid()");
/// \return true if `String` is nonempty and a full match of this regex.
bool FullMatch(llvm::StringRef String) const {
std::lock_guard<std::mutex> MutexLock(RegexMutex);
llvm::SmallVector<llvm::StringRef, 1> Matches;
return !String.empty() && InnerRegex.match(String, &Matches);
mutable llvm::Regex InnerRegex;
/// This mutex protects `InnerRegex`, since `llvm::Regex` is not threadsafe.
mutable std::mutex RegexMutex;
} // anonymous namespace
// Decide what will the driver do based on the inputs found on the command
// line.
DriverAction DetermineDriverAction(const std::vector<std::string>& args) {
const FullMatchRegex c_file_re("[^-].*\\.(c|i)");
const FullMatchRegex cxx_file_re("[^-].*\\.(C|c\\+\\+|cc|cp|cpp|cxx|CPP|ii)");
const FullMatchRegex fortran_file_re(
const FullMatchRegex go_file_re("[^-].*\\.go");
const FullMatchRegex asm_file_re("[^-].*\\.(s|S|sx)");
enum DriverAction action = UNKNOWN;
bool is_link = true;
for (size_t i = 0; i < args.size(); ++i) {
const std::string& arg = args[i];
if (arg == "-c") {
is_link = false;
} else if (arg == "-x" && i < args.size() - 1) {
// If we find -x, the language is being overridden by the user.
const std::string& language = args[i + 1];
if (language == "c++" || language == "c++-header" ||
language == "c++-cpp-output")
action = CXX_COMPILE;
else if (language == "c" || language == "c-header" ||
language == "cpp-output")
action = C_COMPILE;
else if (language == "assembler" || language == "assembler-with-cpp")
action = ASSEMBLY;
else if (language == "f77" || language == "f77-cpp-input" ||
language == "f95" || language == "f95-cpp-input")
else if (language == "go")
action = GO_COMPILE;
} else if (action == UNKNOWN) {
// If we still have not recognized the input language, try to
// recognize it from the input file (in order of relative frequency).
if (cxx_file_re.FullMatch(arg)) {
action = CXX_COMPILE;
} else if (c_file_re.FullMatch(arg)) {
action = C_COMPILE;
} else if (asm_file_re.FullMatch(arg)) {
action = ASSEMBLY;
} else if (go_file_re.FullMatch(arg)) {
action = GO_COMPILE;
} else if (fortran_file_re.FullMatch(arg)) {
// If the user did not specify -c, then the linker will be invoked.
// Note that if the command line was something like "clang",
// it will be considered a LINK action.
if (is_link) return LINK;
return action;
// Returns true if a C or C++ source file (or other files we want Clang
// diagnostics for) appears in the given command line or args.
bool HasCxxInputInCommandLineOrArgs(
const std::vector<std::string>& command_line_or_args) {
const enum DriverAction action = DetermineDriverAction(command_line_or_args);
return action == CXX_COMPILE || action == C_COMPILE;
// Returns a copy of the input vector with every string which matches the
// regular expression removed.
static std::vector<std::string> CopyOmittingMatches(
const FullMatchRegex& re, const std::vector<std::string>& input) {
std::vector<std::string> output;
input.begin(), input.end(), back_inserter(output),
[&re](const std::string& arg) { return re.FullMatch(arg); });
return output;
// Returns a copy of the input vector after removing each string which matches
// the regular expression and one string immediately following the matching
// string.
static std::vector<std::string> CopyOmittingMatchesAndFollowers(
const FullMatchRegex& re, const std::vector<std::string>& input) {
std::vector<std::string> output;
for (size_t i = 0; i < input.size(); ++i) {
if (!re.FullMatch(input[i])) {
} else {
++i; // Skip the matching string *and* the next string.
return output;
// Returns a copy of the input vector with the supplied prefix string removed
// from any element of which it was a prefix.
static std::vector<std::string> StripPrefix(
const std::string& prefix, const std::vector<std::string>& input) {
std::vector<std::string> output;
const size_t prefix_size = prefix.size();
for (const auto& arg : input) {
if (, prefix_size, prefix) == 0) {
} else {
return output;
std::vector<std::string> GCCArgsToClangArgs(
const std::vector<std::string>& gcc_args) {
// These are GCC-specific arguments which Clang does not yet understand or
// support without issuing ugly warnings, and cannot otherwise be suppressed.
const FullMatchRegex unsupported_args_re(
"|-f(no-)?warn-incomplete-patterns" // Why do we see this haskell flag?
"|-mcpu=.*" // For -mcpu=armv7-a, this leads to an assertion failure
// in llvm::ARM::getSubArch (and an error about an
// unsupported -mcpu); for cortex-a15, we get no such
// failure. TODO(zarko): Leave this filtered out for now,
// but figure out what to do to make this work properly.
const FullMatchRegex unsupported_args_with_values_re("-wrapper");
return StripPrefix("-Xclang-only=",
CopyOmittingMatches(unsupported_args_re, gcc_args)));
std::vector<std::string> GCCArgsToClangSyntaxOnlyArgs(
const std::vector<std::string>& gcc_args) {
return AdjustClangArgsForSyntaxOnly(GCCArgsToClangArgs(gcc_args));
std::vector<std::string> GCCArgsToClangAnalyzeArgs(
const std::vector<std::string>& gcc_args) {
return AdjustClangArgsForAnalyze(GCCArgsToClangArgs(gcc_args));
std::vector<std::string> AdjustClangArgsForSyntaxOnly(
const std::vector<std::string>& clang_args) {
// These are arguments which are inapplicable to '-fsyntax-only' behavior, but
// are applicable to regular compilation.
const FullMatchRegex inapplicable_args_re(
"|-fsyntax-only" // We don't want multiple -fsyntax-only args.
const FullMatchRegex inapplicable_args_with_values_re("-M[FTQ]");
std::vector<std::string> result = CopyOmittingMatchesAndFollowers(
CopyOmittingMatches(inapplicable_args_re, clang_args));
return result;
std::vector<std::string> AdjustClangArgsForAnalyze(
const std::vector<std::string>& clang_args) {
// --analyze is just like -fsyntax-only, except for the name of the
// flag itself.
std::vector<std::string> args = AdjustClangArgsForSyntaxOnly(clang_args);
std::replace(args.begin(), args.end(), std::string("-fsyntax-only"),
// cfg-temporary-dtors is still off by default in the analyzer, but analyzing
// that way would give us lots of false positives. This can go away once the
// temporary destructors support switches to on.
args.insert(args.end(), {"-Xanalyzer", "-analyzer-config", "-Xanalyzer",
return args;
std::vector<std::string> ClangArgsToGCCArgs(
const std::vector<std::string>& clang_args) {
// These are Clang-specific args which GCC does not understand.
const FullMatchRegex unsupported_args_re(
// TODO(zarko): Are plugin arguments sensible to keep?
const FullMatchRegex unsupported_args_with_values_re(
// It's important to remove the matches that have followers first -- those
// followers might match one of the flag regular expressions, and removing
// just the follower completely changes the semantics of the command.
return StripPrefix(
unsupported_args_with_values_re, clang_args)));
std::vector<std::string> AdjustClangArgsForAddressSanitizer(
const std::vector<std::string>& input) {
const FullMatchRegex inapplicable_flags_re("-static");
const FullMatchRegex inapplicable_flags_with_shared_re("-pie");
for (const auto& arg : input) {
if (arg == "-shared") {
return CopyOmittingMatches(
CopyOmittingMatches(inapplicable_flags_re, input));
return CopyOmittingMatches(inapplicable_flags_re, input);
std::vector<char*> CommandLineToArgv(const std::vector<std::string>& command) {
std::vector<char*> result;
result.reserve(command.size() + 1);
for (const auto& arg : command) {
return result;
} // namespace common
} // namespace kythe