blob: 8315ab2a1bd7a25b13d864b61613d276b8a234b7 [file] [log] [blame]
/*
* Copyright 2016 The Kythe Authors. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kythe/cxx/indexer/cxx/frontend.h"
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
#include <string>
#include "absl/memory/memory.h"
#include "absl/strings/str_format.h"
#include "gflags/gflags.h"
#include "google/protobuf/io/coded_stream.h"
#include "google/protobuf/io/gzip_stream.h"
#include "google/protobuf/io/zero_copy_stream.h"
#include "google/protobuf/io/zero_copy_stream_impl.h"
#include "kythe/cxx/common/kzip_reader.h"
#include "kythe/cxx/common/path_utils.h"
#include "kythe/cxx/indexer/cxx/proto_conversions.h"
#include "kythe/proto/buildinfo.pb.h"
#include "kythe/proto/claim.pb.h"
#include "llvm/ADT/STLExtras.h"
DEFINE_string(o, "-", "Output filename");
DEFINE_string(i, "-", "Input filename");
DEFINE_bool(ignore_unimplemented, true,
"Continue indexing even if we find something we don't support.");
DEFINE_bool(flush_after_each_entry, true,
"Flush output after writing each entry.");
DEFINE_string(static_claim, "", "Use a static claim table.");
DEFINE_bool(claim_unknown, true, "Process files with unknown claim status.");
DEFINE_string(cache, "", "Use a memcache instance (ex: \"--SERVER=foo:1234\")");
DEFINE_int32(min_size, 4096, "Minimum size of an entry bundle");
DEFINE_int32(max_size, 1024 * 32, "Maximum size of an entry bundle");
DEFINE_bool(cache_stats, false, "Show cache stats");
DEFINE_string(icorpus, "", "Corpus to use for files specified with -i");
DEFINE_string(ibuild_config, "",
"Build config to use for files specified with -i");
DEFINE_bool(normalize_file_vnames, false, "Normalize incoming file vnames.");
DEFINE_string(experimental_dynamic_claim_cache, "",
"Use a memcache instance for dynamic claims (EXPERIMENTAL)");
// Setting this to a value > 1 allows the same object (e.g., a transcript of
// an include file) to be claimed multiple times. In the absence of transcript
// labels, setting this value to 1 means that only one environment will be
// considered when indexing a vname. This may result in (among other effects)
// conditionally included code never being indexed if the symbols checked differ
// between translation units.
DEFINE_uint64(experimental_dynamic_overclaim, 1,
"Maximum number of dynamic claims per claimable (EXPERIMENTAL)");
DEFINE_bool(test_claim, false, "Use an in-memory claim database for testing.");
namespace kythe {
namespace {
/// The prefix prepended to silent inputs. Only checked when "--test_claim"
/// is enabled.
constexpr char kSilentPrefix[] = "silent:";
/// The message type URI for the build details message.
constexpr char kBuildDetailsURI[] = "kythe.io/proto/kythe.proto.BuildDetails";
/// \return the input name stripped of its prefix if it's silent; an empty
/// string otherwise.
llvm::StringRef strip_silent_input_prefix(llvm::StringRef argument) {
if (FLAGS_test_claim && argument.startswith(kSilentPrefix)) {
return argument.drop_front(::strlen(kSilentPrefix));
}
return {};
}
/// \brief Reads the output of the static claim tool.
///
/// `path` should be a file that contains a GZip-compressed sequence of
/// varint-prefixed wire format ClaimAssignment protobuf messages.
void DecodeStaticClaimTable(const std::string& path,
kythe::StaticClaimClient* client) {
using namespace google::protobuf::io;
int fd = open(path.c_str(), O_RDONLY, S_IREAD | S_IWRITE);
CHECK_GE(fd, 0) << "Couldn't open input file " << path;
FileInputStream file_input_stream(fd);
GzipInputStream gzip_input_stream(&file_input_stream);
google::protobuf::uint32 byte_size;
for (;;) {
CodedInputStream coded_input_stream(&gzip_input_stream);
coded_input_stream.SetTotalBytesLimit(INT_MAX, -1);
if (!coded_input_stream.ReadVarint32(&byte_size)) {
break;
}
coded_input_stream.PushLimit(byte_size);
kythe::proto::ClaimAssignment claim;
CHECK(claim.ParseFromCodedStream(&coded_input_stream));
// NB: We don't filter on compilation unit here. A dependency has three
// static states (wrt some CU): unknown, owned by CU, owned by another CU.
client->AssignClaim(claim.dependency_v_name(), claim.compilation_v_name());
}
close(fd);
}
/// \brief Normalize input file vnames by cleaning paths and clearing
/// signatures.
void MaybeNormalizeFileVNames(IndexerJob* job) {
if (!FLAGS_normalize_file_vnames) {
return;
}
for (auto& input : *job->unit.mutable_required_input()) {
input.mutable_v_name()->set_path(CleanPath(input.v_name().path()));
input.mutable_v_name()->clear_signature();
}
}
void UpdateJobWdirFromUnit(IndexerJob* job) {
job->working_directory = job->unit.working_directory();
CHECK(!job->working_directory.empty())
<< "Indexer jobs must have their absolute working directory set.";
CHECK(llvm::sys::path::is_absolute(job->working_directory))
<< "Indexer jobs must have their absolute working directory set.";
}
/// \brief Reads data from a .kindex file into memory.
/// \param path The path from which the file should be read.
/// \param virtual_files A vector to be filled with FileData.
/// \param unit A `CompilationUnit` to be decoded from the .kindex.
void DecodeIndexFile(const std::string& path,
std::vector<proto::FileData>* virtual_files,
proto::CompilationUnit* unit) {
using namespace google::protobuf::io;
int fd = open(path.c_str(), O_RDONLY, S_IREAD | S_IWRITE);
CHECK_GE(fd, 0) << "Couldn't open input file " << path;
FileInputStream file_input_stream(fd);
GzipInputStream gzip_input_stream(&file_input_stream);
google::protobuf::uint32 byte_size;
for (;;) {
CodedInputStream coded_input_stream(&gzip_input_stream);
coded_input_stream.SetTotalBytesLimit(INT_MAX, -1);
if (!coded_input_stream.ReadVarint32(&byte_size)) {
break;
}
coded_input_stream.PushLimit(byte_size);
if (unit) {
CHECK(unit->ParseFromCodedStream(&coded_input_stream));
unit = nullptr;
} else {
proto::FileData content;
CHECK(content.ParseFromCodedStream(&coded_input_stream));
CHECK(content.has_info());
virtual_files->push_back(std::move(content));
}
}
CHECK(!unit) << "Never saw a CompilationUnit.";
close(fd);
}
/// \brief Reads all compilations from a .kzip file into memory.
/// \param path The path from which the file should be read.
/// \param jobs A vector to add a job to for each compilation in the kzip.
/// \param silent The silent flag is copied to each of the jobs created from the
/// kzip file.
void DecodeKZipFile(const std::string& path, bool silent,
const IndexerContext::CompilationVisitCallback& visit) {
StatusOr<IndexReader> reader = kythe::KzipReader::Open(path);
CHECK(reader) << "Couldn't open kzip from " << path;
bool compilation_read = false;
auto status = reader->Scan([&](absl::string_view digest) {
IndexerJob job;
job.silent = silent;
auto compilation = reader->ReadUnit(digest);
for (const auto& file : compilation->unit().required_input()) {
auto content = reader->ReadFile(file.info().digest());
CHECK(content) << "Unable to read file with digest: "
<< file.info().digest() << ": " << content.status();
proto::FileData file_data;
file_data.set_content(*content);
file_data.mutable_info()->set_path(file.info().path());
file_data.mutable_info()->set_digest(file.info().digest());
job.virtual_files.push_back(std::move(file_data));
}
job.unit = compilation->unit();
UpdateJobWdirFromUnit(&job);
MaybeNormalizeFileVNames(&job);
visit(job);
compilation_read = true;
return true;
});
CHECK(status.ok()) << status.ToString();
CHECK(compilation_read) << "Missing compilation in " << path;
}
} // anonymous namespace
std::string IndexerContext::UsageMessage(const std::string& program_title,
const std::string& program_name) {
std::string message = "Command-line frontend for " + program_title;
message.append(R"(.
Invokes the program on compilation unit(s). By default reads source text from
stdin and writes binary Kythe artifacts to stdout as a sequence of Entry
protos. Command-line arguments may be passed to the underlying compiler, if
one should exist, as positional parameters.
There may be a positional parameter specified that ends in .kzip or .kindex
(deprecated). If one exists, no other positional parameters may be specified,
nor may an additional input parameter be specified. Input will be read from the
index file.
If -test_claim is specified, you may specify that one or more kindex inputs
should not produce any output by prepending the prefix "silent:" to the input's
name.
Examples:)");
message.append(program_name + " some/index.kindex\n");
message.append(program_name + " -i foo.cc -o foo.bin -- -DINDEXING\n");
message.append(program_name + " -i foo.cc | verifier foo.cc");
return message;
}
bool IndexerContext::HasIndexArguments() {
for (const auto& arg : args_) {
auto path = llvm::StringRef(arg);
if (path.endswith(".kindex") || path.endswith(".kzip")) {
CHECK_EQ("-", FLAGS_i)
<< "No other input is allowed when reading from an index file or an "
<< "index pack.";
return true;
}
}
return false;
}
void IndexerContext::LoadDataFromIndex(const std::string& file_or_cu,
const CompilationVisitCallback& visit) {
std::string name = strip_silent_input_prefix(file_or_cu);
const bool silent = !name.empty();
if (name.empty()) {
name = file_or_cu;
}
if (llvm::StringRef(file_or_cu).endswith(".kzip")) {
DecodeKZipFile(name, silent, visit);
} else {
IndexerJob job;
job.silent = silent;
DecodeIndexFile(name, &job.virtual_files, &job.unit);
UpdateJobWdirFromUnit(&job);
MaybeNormalizeFileVNames(&job);
visit(job);
}
}
void IndexerContext::LoadDataFromUnpackedFile(
const std::string& default_filename,
const CompilationVisitCallback& visit) {
IndexerJob job;
int read_fd = STDIN_FILENO;
std::string source_file_name = default_filename;
llvm::SmallString<1024> cwd;
CHECK(!llvm::sys::fs::current_path(cwd));
job.working_directory = cwd.str();
if (FLAGS_i != "-") {
read_fd = open(FLAGS_i.c_str(), O_RDONLY);
if (read_fd == -1) {
perror("Can't open input file");
exit(1);
}
source_file_name = FLAGS_i;
}
args_.push_back(source_file_name);
char buf[1024];
llvm::SmallString<1024> source_data;
ssize_t amount_read;
while ((amount_read = read(read_fd, buf, 1024)) > 0) {
source_data.append(llvm::StringRef(buf, amount_read));
}
if (amount_read < 0) {
perror("Error reading input file");
exit(1);
}
close(read_fd);
// clang wants the source file to be null-terminated, but this should
// not be in range of the StringRef. std::string ends with \0.
proto::FileData file_data;
file_data.mutable_info()->set_path(source_file_name);
file_data.set_content(source_data.str());
job.virtual_files.push_back(std::move(file_data));
job.unit.add_source_file(source_file_name);
for (const auto& arg : args_) {
job.unit.add_argument(arg);
}
job.unit.mutable_v_name()->set_corpus(FLAGS_icorpus);
if (!FLAGS_ibuild_config.empty()) {
proto::BuildDetails details;
details.set_build_config(FLAGS_ibuild_config);
auto* any = job.unit.add_details();
any->PackFrom(details);
any->set_type_url(kBuildDetailsURI);
}
MaybeNormalizeFileVNames(&job);
visit(job);
}
void IndexerContext::InitializeClaimClient() {
if (!FLAGS_experimental_dynamic_claim_cache.empty()) {
auto dynamic_claims = absl::make_unique<kythe::DynamicClaimClient>();
dynamic_claims->set_max_redundant_claims(
FLAGS_experimental_dynamic_overclaim);
if (!dynamic_claims->OpenMemcache(FLAGS_experimental_dynamic_claim_cache)) {
absl::FPrintF(stderr, "Can't open memcached\n");
exit(1);
}
claim_client_ = std::move(dynamic_claims);
} else {
auto static_claims = absl::make_unique<kythe::StaticClaimClient>();
if (!FLAGS_static_claim.empty()) {
DecodeStaticClaimTable(FLAGS_static_claim, static_claims.get());
}
static_claims->set_process_unknown_status(FLAGS_claim_unknown);
claim_client_ = std::move(static_claims);
}
}
void IndexerContext::OpenOutputStreams() {
write_fd_ = STDOUT_FILENO;
if (FLAGS_o != "-") {
write_fd_ = ::open(FLAGS_o.c_str(), O_WRONLY | O_CREAT | O_TRUNC,
S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
if (write_fd_ == -1) {
::perror("Can't open output file");
::exit(1);
}
}
raw_output_ =
absl::make_unique<google::protobuf::io::FileOutputStream>(write_fd_);
kythe_output_ = absl::make_unique<kythe::FileOutputStream>(raw_output_.get());
kythe_output_->set_show_stats(FLAGS_cache_stats);
kythe_output_->set_flush_after_each_entry(FLAGS_flush_after_each_entry);
}
void IndexerContext::CloseOutputStreams() {
if (kythe_output_) {
kythe_output_.reset();
raw_output_.reset();
if (::close(write_fd_) != 0) {
::perror("Error closing output file");
::exit(1);
}
}
}
void IndexerContext::OpenHashCache() {
if (!FLAGS_cache.empty()) {
auto memcache_hash_cache = llvm::make_unique<MemcachedHashCache>();
CHECK(memcache_hash_cache->OpenMemcache(FLAGS_cache));
memcache_hash_cache->SetSizeLimits(FLAGS_min_size, FLAGS_max_size);
hash_cache_ = std::move(memcache_hash_cache);
}
}
IndexerContext::IndexerContext(const std::vector<std::string>& args,
const std::string& default_filename)
: args_(args),
default_filename_(default_filename),
ignore_unimplemented_(FLAGS_ignore_unimplemented) {
args_.erase(std::remove(args_.begin(), args_.end(), std::string()),
args_.end());
unpacked_inputs_ = !HasIndexArguments();
InitializeClaimClient();
OpenOutputStreams();
OpenHashCache();
}
IndexerContext::~IndexerContext() { CloseOutputStreams(); }
void IndexerContext::EnumerateCompilations(
const CompilationVisitCallback& visit) {
if (unpacked_inputs_) {
LoadDataFromUnpackedFile(default_filename_, visit);
} else {
for (size_t arg = 1; arg < args_.size(); ++arg) {
LoadDataFromIndex(args_[arg], visit);
}
}
}
} // namespace kythe