Build C++ extractor for Android.
The extractor has to match the Android C++ toolchain, and it's desirable
to avoid pulling in too many additional dependencies. This requires
making the following changes:
* protobuffers code in Andrid source tree is old, downgrade some calls
in json_proto.cc and cxx_extractor.cc (plus, pull 'sstream` header explicitly)
* use libziparchive to manipulate Zip files (upstream uses libzip)
* fix argument type when calling PragmaHandler::HandlePragma (another
downgrade)
* suppress warnings about unknown pragma
* downgrade include file lookup in path_utils.cc
* provide stub to use Android's logging instead of Google logging.
* remove dependency on uuid library, rely on LLVM API to generate create
a temporary file with unique name
Bug: 121267023
Test: build cxx_extractor in clang-tools branch, use it to generate
.kzip files
Change-Id: I5df762b549ba406c23e05f2836acb8f5bc5f0120
diff --git a/Android.bp b/Android.bp
index 116e854..0238866 100644
--- a/Android.bp
+++ b/Android.bp
@@ -66,3 +66,86 @@
],
java_version: "1.9",
}
+
+cc_defaults {
+ name: "kythe_common_defaults",
+ cflags: [
+ "-Wall",
+ "-Wextra",
+ "-Wno-deprecated",
+ "-Wno-missing-field-initializers",
+ "-Wno-unused-parameter",
+ "-Wno-unused-private-field",
+ "-Wno-sign-compare",
+ "-Wno-unused-variable",
+ "-fno-exceptions",
+ "-fno-rtti",
+ ],
+}
+
+cc_library_host_static {
+ name: "kythe_cxx_glog",
+ srcs: ["kythe/cxx/glog/logging.cc"],
+ export_include_dirs: ["kythe/cxx"],
+ defaults: ["kythe_common_defaults"],
+ shared_libs: ["libbase"],
+}
+
+cc_binary_host {
+ name: "cxx_extractor",
+ defaults: [
+ "kythe_common_defaults",
+ ],
+ srcs: [
+ "kythe/cxx/common/file_utils.cc",
+ "kythe/cxx/common/file_vname_generator.cc",
+ "kythe/cxx/common/index_writer.cc",
+ "kythe/cxx/common/json_proto.cc",
+ "kythe/cxx/common/kythe_metadata_file.cc",
+ "kythe/cxx/common/kzip_writer_aosp.cc",
+ "kythe/cxx/common/path_utils.cc",
+ "kythe/cxx/common/protobuf_metadata_file.cc",
+ "kythe/cxx/common/schema/edges.cc",
+ "kythe/cxx/common/status.cc",
+ "kythe/cxx/extractor/CommandLineUtils.cc",
+ "kythe/cxx/extractor/cxx_details.cc",
+ "kythe/cxx/extractor/cxx_extractor.cc",
+ "kythe/cxx/extractor/cxx_extractor_main.cc",
+ "kythe/cxx/extractor/index_pack.cc",
+ "kythe/cxx/extractor/language.cc",
+ "kythe/cxx/extractor/path_utils.cc",
+ "kythe/cxx/indexer/cxx/clang_utils.cc",
+ "third_party/llvm/src/cxx_extractor_preprocessor_utils.cc",
+ "third_party/llvm/src/clang_builtin_headers.cc",
+ ":kythe_protos",
+ ],
+ shared_libs: [
+ "libbase",
+ "libLLVM_host",
+ "libclang_cxx_host",
+ "libprotobuf-cpp-full",
+ "libziparchive",
+ ],
+ proto: {
+ include_dirs: [
+ "external/kythe",
+ "external/protobuf/src",
+ ],
+ type: "full",
+ canonical_path_from_root: false,
+ },
+ header_libs: [
+ "libabsl_headers",
+ "rapidjson_headers",
+ ],
+ static_libs: [
+ "kythe_cxx_glog",
+ "regex-re2",
+ "libabsl_strings",
+ "libabsl_base",
+ "libabsl_numeric",
+ "libcrypto",
+ "libgflags",
+ ],
+ generated_headers: ["clang_builtin_headers_resources"],
+}
diff --git a/kythe/cxx/common/json_proto.cc b/kythe/cxx/common/json_proto.cc
index 1398cfc..0901322 100644
--- a/kythe/cxx/common/json_proto.cc
+++ b/kythe/cxx/common/json_proto.cc
@@ -88,7 +88,7 @@
auto resolver =
MakeTypeResolverForPool(message.GetDescriptor()->file()->pool());
- google::protobuf::util::JsonPrintOptions options;
+ google::protobuf::util::JsonOptions options;
options.preserve_proto_field_names = true;
auto status = google::protobuf::util::BinaryToJsonString(
@@ -171,12 +171,10 @@
auto resolver =
MakeTypeResolverForPool(message->GetDescriptor()->file()->pool());
- google::protobuf::util::JsonParseOptions options;
- options.case_insensitive_enum_parsing = false;
+// ANDROID_BUILD: our protobuf code is old
+// and lacks google::protobuf::util::JsonOptions.case_insensitive_enum_parsing
auto status = google::protobuf::util::JsonToBinaryString(
- resolver.get(), message->GetDescriptor()->full_name(), content, &binary,
- options);
-
+ resolver.get(), message->GetDescriptor()->full_name(), content, &binary);
if (!status.ok()) {
LOG(ERROR) << status.ToString() << ": " << content;
return false;
@@ -193,12 +191,10 @@
std::string binary;
google::protobuf::io::StringOutputStream output(&binary);
- google::protobuf::util::JsonParseOptions options;
- options.case_insensitive_enum_parsing = false;
+ // ANDROID_BUILD: our protobuf code is old
+ // and lacks google::protobuf::util::JsonOptions.case_insensitive_enum_parsing
auto status = google::protobuf::util::JsonToBinaryStream(
- resolver.get(), message->GetDescriptor()->full_name(), input, &output,
- options);
-
+ resolver.get(), message->GetDescriptor()->full_name(), input, &output);
if (!status.ok()) {
return Status(static_cast<StatusCode>(status.error_code()),
std::string(status.error_message()));
diff --git a/kythe/cxx/common/kzip_writer_aosp.cc b/kythe/cxx/common/kzip_writer_aosp.cc
new file mode 100644
index 0000000..ee514d4
--- /dev/null
+++ b/kythe/cxx/common/kzip_writer_aosp.cc
@@ -0,0 +1,141 @@
+#include "kythe/cxx/common/kzip_writer_aosp.h"
+
+#include <openssl/sha.h>
+#include <array>
+#include <string>
+
+#include "kythe/cxx/common/json_proto.h"
+#include "absl/memory/memory.h"
+#include "absl/strings/escaping.h"
+#include "kythe/proto/analysis.pb.h"
+
+namespace kythe {
+namespace {
+
+constexpr absl::string_view kRoot = "root/";
+constexpr absl::string_view kUnitRoot = "root/units/";
+constexpr absl::string_view kFileRoot = "root/files/";
+
+}
+
+std::string KzipWriter::SHA256Digest(absl::string_view content) {
+ std::array<unsigned char, SHA256_DIGEST_LENGTH> buf;
+ ::SHA256(reinterpret_cast<const unsigned char*>(content.data()),
+ content.size(), buf.data());
+ return absl::BytesToHexString(
+ absl::string_view(reinterpret_cast<const char*>(buf.data()), buf.size()));
+}
+
+Status KzipWriter::WriteTextFile(const std::string& path,
+ absl::string_view content) {
+ int32_t rc = zip_writer_.StartEntry(path.c_str(), ZipWriter::kCompress);
+ if (rc == 0) {
+ rc = zip_writer_.WriteBytes(content.data(), content.size());
+ }
+ if (rc == 0) {
+ rc = zip_writer_.FinishEntry();
+ }
+ return rc ? InternalError(ZipWriter::ErrorCodeString(rc)) : OkStatus();
+}
+
+// Creates entries for the three directories if not already present.
+int32_t KzipWriter::InitializeArchive() {
+ if (initialized_) {
+ return 0;
+ }
+ initialized_ = true;
+ for (const auto name : {kRoot, kUnitRoot, kFileRoot}) {
+ int32_t rc = zip_writer_.StartEntry(name.data(), 0);
+ if (rc == 0) {
+ rc = zip_writer_.FinishEntry();
+ }
+ if (rc) {
+ return rc;
+ }
+ }
+ return 0;
+}
+
+
+/* static */
+StatusOr<IndexWriter> KzipWriter::Create(absl::string_view path) {
+ FILE *fp = fopen(path.data(), "wb");
+ if (!fp) {
+ return UnimplementedError(strerror(errno));
+ }
+ return IndexWriter(absl::WrapUnique(new KzipWriter(fp)));
+}
+
+KzipWriter::KzipWriter(FILE *fp):fp_(fp), zip_writer_(fp), initialized_(false) {}
+
+KzipWriter::~KzipWriter() {
+ DCHECK(fp_ == nullptr) << "KzipWriterAosp::Close was not called!";
+}
+
+StatusOr<std::string> KzipWriter::WriteUnit(
+ const kythe::proto::IndexedCompilation& unit) {
+ int32_t rc = InitializeArchive();
+ if (rc) {
+ return InternalError(ZipWriter::ErrorCodeString(rc));
+ }
+ if (auto json = WriteMessageAsJsonToString(unit)) {
+ auto file = InsertFile(kUnitRoot, std::move(*json));
+ if (file.inserted()) {
+ auto status = WriteTextFile(file.path(), file.contents());
+ if (!status.ok()) {
+ contents_.erase(file.path());
+ return status;
+ }
+ }
+ return std::string(file.digest());
+ } else {
+ return json.status();
+ }
+}
+
+StatusOr<std::string> KzipWriter::WriteFile(absl::string_view content) {
+ int32_t rc = InitializeArchive();
+ if (rc) {
+ return InternalError(ZipWriter::ErrorCodeString(rc));
+ }
+ auto file = InsertFile(kFileRoot, content);
+ if (file.inserted()) {
+ auto status = WriteTextFile(file.path(), file.contents());
+ if (!status.ok()) {
+ contents_.erase(file.path());
+ return status;
+ }
+ }
+ return std::string(file.digest());
+}
+
+Status KzipWriter::Close() {
+ int32_t rc = zip_writer_.Finish();
+ fclose(fp_);
+ fp_ = nullptr;
+ return rc ? InternalError(ZipWriter::ErrorCodeString(rc)) : OkStatus();
+}
+
+auto KzipWriter::InsertFile(absl::string_view root, absl::string_view content)
+ -> InsertionResult {
+ auto digest = SHA256Digest(content);
+ auto path = absl::StrCat(root, digest);
+ // Initially insert an empty string for the file content.
+ auto result = InsertionResult{contents_.emplace(path, "")};
+ if (result.inserted()) {
+ // Only copy in the real content if it was actually inserted into the map.
+ result.insertion.first->second = std::string(content);
+ }
+ return result;
+}
+
+inline absl::string_view KzipWriter::InsertionResult::digest() const {
+ auto pos = path().find_last_of('/');
+ if (pos == absl::string_view::npos) {
+ return path();
+ }
+ return absl::ClippedSubstr(path(), pos + 1);
+
+}
+
+} // namespace kythe
diff --git a/kythe/cxx/common/kzip_writer_aosp.h b/kythe/cxx/common/kzip_writer_aosp.h
new file mode 100644
index 0000000..a1483cc
--- /dev/null
+++ b/kythe/cxx/common/kzip_writer_aosp.h
@@ -0,0 +1,64 @@
+#ifndef KYTHE_CXX_COMMON_KZIP_WRITER_AOSP_H_
+#define KYTHE_CXX_COMMON_KZIP_WRITER_AOSP_H_
+
+#include <unordered_map>
+
+#include "absl/strings/string_view.h"
+#include "kythe/cxx/common/index_writer.h"
+#include "kythe/cxx/common/status_or.h"
+#include "kythe/proto/analysis.pb.h"
+#include "ziparchive/zip_writer.h"
+
+namespace kythe {
+
+/// \brief Kzip implementation of IndexWriter for AOSP.
+/// see https://www.kythe.io/docs/kythe-kzip.html for format description.
+class KzipWriter : public IndexWriterInterface {
+ public:
+ /// \brief Constructs a Kzip IndexWriter which will create and write to
+ /// \param path Path to the file to create. Must not currently exist.
+ static StatusOr<IndexWriter> Create(absl::string_view path);
+
+ /// \brief Destroys the KzipWriter.
+ ~KzipWriter() override;
+
+ /// \brief Writes the unit to the kzip file, returning its digest.
+ StatusOr<std::string> WriteUnit(
+ const kythe::proto::IndexedCompilation& unit) override;
+
+ /// \brief Writes the file contents to the kzip file, returning their digest.
+ StatusOr<std::string> WriteFile(absl::string_view content) override;
+
+ /// \brief Flushes accumulated writes and closes the kzip file.
+ /// Close must be called before the KzipWriter is destroyed!
+ Status Close() override;
+
+ private:
+ using Path = std::string;
+ using Contents = std::string;
+ using FileMap = std::unordered_map<Path, Contents>;
+
+ struct InsertionResult {
+ absl::string_view digest() const;
+ const std::string& path() const { return insertion.first->first; }
+ absl::string_view contents() const { return insertion.first->second; }
+ bool inserted() const { return insertion.second; }
+
+ std::pair<FileMap::iterator, bool> insertion;
+ };
+
+ explicit KzipWriter(FILE *fp);
+
+ InsertionResult InsertFile(absl::string_view root, absl::string_view content);
+ Status WriteTextFile(const std::string& path, absl::string_view content);
+ int32_t InitializeArchive();
+ static std::string SHA256Digest(absl::string_view content);
+
+ FILE *fp_;
+ ZipWriter zip_writer_;
+ bool initialized_ = false; // Whether or not the `root` entry exists.
+ FileMap contents_;
+};
+
+} // namespace kythe
+#endif // KYTHE_CXX_COMMON_KZIP_WRITER_AOSP_H_
diff --git a/kythe/cxx/common/protobuf_metadata_file.h b/kythe/cxx/common/protobuf_metadata_file.h
index 2daea72..5323131 100644
--- a/kythe/cxx/common/protobuf_metadata_file.h
+++ b/kythe/cxx/common/protobuf_metadata_file.h
@@ -18,6 +18,8 @@
#define KYTHE_CXX_COMMON_PROTOBUF_METADATA_FILE_H_
#include <memory>
+// ANDROID_BUILD: need <sstream> for std::stringstream
+#include <sstream>
#include "glog/logging.h"
#include "google/protobuf/descriptor.pb.h"
diff --git a/kythe/cxx/extractor/cxx_extractor.cc b/kythe/cxx/extractor/cxx_extractor.cc
index 140fff3..e43d69f 100644
--- a/kythe/cxx/extractor/cxx_extractor.cc
+++ b/kythe/cxx/extractor/cxx_extractor.cc
@@ -33,6 +33,8 @@
#include "absl/strings/string_view.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Frontend/FrontendAction.h"
+// ANDROID_BUILD
+#include "clang/Lex/LexDiagnostic.h"
#include "clang/Lex/MacroArgs.h"
#include "clang/Lex/PPCallbacks.h"
#include "clang/Lex/Preprocessor.h"
@@ -41,7 +43,8 @@
#include "glog/logging.h"
#include "kythe/cxx/common/file_utils.h"
#include "kythe/cxx/common/json_proto.h"
-#include "kythe/cxx/common/kzip_writer.h"
+// ANDROID_BUILD
+#include "kythe/cxx/common/kzip_writer_aosp.h"
#include "kythe/cxx/common/path_utils.h"
#include "kythe/cxx/extractor/CommandLineUtils.h"
#include "kythe/cxx/extractor/language.h"
@@ -434,10 +437,9 @@
ClaimPragmaHandlerWrapper(ExtractorPPCallbacks* context)
: PragmaHandler("kythe_claim"), context_(context) {}
void HandlePragma(clang::Preprocessor& preprocessor,
- clang::PragmaIntroducer introducer,
+ clang::PragmaIntroducerKind introducer,
clang::Token& first_token) override {
- context_->HandleKytheClaimPragma(preprocessor, introducer.Kind,
- first_token);
+ context_->HandleKytheClaimPragma(preprocessor, introducer, first_token);
}
private:
@@ -451,9 +453,9 @@
MetadataPragmaHandlerWrapper(ExtractorPPCallbacks* context)
: PragmaHandler("kythe_metadata"), context_(context) {}
void HandlePragma(clang::Preprocessor& preprocessor,
- clang::PragmaIntroducer introducer,
+ clang::PragmaIntroducerKind introducer,
clang::Token& first_token) override {
- context_->HandleKytheMetadataPragma(preprocessor, introducer.Kind,
+ context_->HandleKytheMetadataPragma(preprocessor, introducer,
first_token);
}
@@ -855,6 +857,11 @@
<< "Expected to see only one TU; instead saw " << inputs.size() << ".";
main_source_file_ = inputs[0].getFile();
auto* preprocessor = &getCompilerInstance().getPreprocessor();
+ // ANDROID_BUILD:
+ // TODO: find a better way to ignore unknown pragmas
+ preprocessor->getDiagnostics().setSeverity(
+ clang::diag::warn_pragma_ignored, clang::diag::Severity::Ignored,
+ clang::SourceLocation());
preprocessor->addPPCallbacks(
llvm::make_unique<ExtractorPPCallbacks>(ExtractorState{
index_writer_, &getCompilerInstance().getSourceManager(),
@@ -1129,7 +1136,8 @@
build_details.set_rule_type(rule_type_);
build_details.set_build_config(build_config_);
// Include the details, but only if any of the fields are meaningfully set.
- if (build_details.ByteSizeLong() > 0) {
+ // ANDROID_BUILD: only ByteSize() is available
+ if (build_details.ByteSize() > 0) {
PackAny(build_details, kBuildDetailsURI, unit.add_details());
}
}
diff --git a/kythe/cxx/extractor/index_pack.cc b/kythe/cxx/extractor/index_pack.cc
index 7120561..206ec45 100644
--- a/kythe/cxx/extractor/index_pack.cc
+++ b/kythe/cxx/extractor/index_pack.cc
@@ -17,7 +17,6 @@
#include "index_pack.h"
#include <openssl/sha.h>
-#include <uuid/uuid.h>
#include <utility>
@@ -36,7 +35,6 @@
const char IndexPackFilesystem::kCompilationUnitDirectoryName[] = "units";
const char IndexPackFilesystem::kFileDataSuffix[] = ".data";
const char IndexPackFilesystem::kCompilationUnitSuffix[] = ".unit";
-const char IndexPackFilesystem::kTempFileSuffix[] = ".new";
std::unique_ptr<IndexPackPosixFilesystem> IndexPackPosixFilesystem::Open(
const std::string& root_path, IndexPackFilesystem::OpenMode open_mode,
@@ -107,32 +105,6 @@
return temp_path.str();
}
-/// \brief Represents a single UUID, generated during construction.
-class Uuid {
- public:
- Uuid() {
- uuid_t uuid;
- uuid_generate_random(uuid);
- // "The uuid_unparse function converts the supplied UUID uu from the binary
- // representation into a 36-byte string (plus tailing '\0')"
- char uuid_buffer[37];
- uuid_unparse_lower(uuid, uuid_buffer);
- payload_ = uuid_buffer;
- }
-
- /// \brief Returns a UUID (if ok()) or an error string (if !ok()).
- const std::string& payload() { return payload_; }
-
- /// \brief Checks whether the uuid generated correctly.
- bool ok() { return ok_; }
-
- private:
- /// Error text (if !ok_) or a UUID string (if ok_).
- std::string payload_;
- /// Determines whether UUID generation was successful.
- bool ok_ = true;
-};
-
/// \brief Opens a new file with a unique name in some directory.
/// \param abs_root_directory The absolute path to the directory.
/// \param fd_out Will be set to the fd of the open file.
@@ -142,28 +114,14 @@
static bool OpenUniqueTempFileIn(const std::string& abs_root_directory,
int* fd_out, std::string* path_out,
std::string* error_text) {
- for (;;) {
- Uuid new_uuid;
- if (!new_uuid.ok()) {
- *error_text = new_uuid.payload();
- return false;
- }
- llvm::SmallString<256> path(abs_root_directory);
- llvm::sys::path::append(
- path, new_uuid.payload() + IndexPackFilesystem::kTempFileSuffix);
- if (auto err = llvm::sys::fs::openFileForWrite(
- llvm::Twine(path), *fd_out, llvm::sys::fs::CD_CreateNew,
- llvm::sys::fs::OF_None,
- llvm::sys::fs::all_read | llvm::sys::fs::all_write)) {
- if (err != std::errc::file_exists) {
- *error_text = err.message();
- return false;
- }
- } else {
- *path_out = path.str();
- return true;
- }
+ llvm::SmallString<256> path(abs_root_directory);
+ llvm::sys::path::append(path, "%%%%%%%%%%%%%%%%.tmp");
+ if (auto err = llvm::sys::fs::createUniqueFile(path, *fd_out, path)) {
+ *error_text = err.message();
+ return false;
}
+ *path_out = path.str();
+ return true;
}
bool IndexPackPosixFilesystem::ReadFileContent(DataKind data_kind,
diff --git a/kythe/cxx/extractor/path_utils.cc b/kythe/cxx/extractor/path_utils.cc
index e689de6..4674397 100644
--- a/kythe/cxx/extractor/path_utils.cc
+++ b/kythe/cxx/extractor/path_utils.cc
@@ -32,6 +32,9 @@
llvm::SmallVectorImpl<char>* relative_path,
llvm::SmallVectorImpl<char>* result_filename) {
clang::Token filename_token;
+// ANDROID_BUILD: the new version is currently off because Android's
+// Clang toolchain hasn't reached r356433.
+#if 0
if (preprocessor->LexHeaderName(filename_token)) {
return nullptr;
}
@@ -42,6 +45,30 @@
llvm::SmallString<128> filename_buffer;
llvm::StringRef filename =
preprocessor->getSpelling(filename_token, filename_buffer);
+#else
+ llvm::SmallString<128> filename_buffer;
+ clang::SourceLocation filename_end;
+ llvm::StringRef filename;
+ preprocessor->getCurrentLexer()->LexIncludeFilename(filename_token);
+ switch (filename_token.getKind()) {
+ case clang::tok::eod:
+ return nullptr;
+ case clang::tok::angle_string_literal:
+ case clang::tok::string_literal:
+ filename = preprocessor->getSpelling(filename_token, filename_buffer);
+ break;
+ case clang::tok::less:
+ filename_buffer.push_back('<');
+ if (preprocessor->ConcatenateIncludeName(filename_buffer, filename_end))
+ return nullptr;
+ filename = filename_buffer;
+ break;
+ default:
+ preprocessor->DiscardUntilEndOfDirective();
+ fprintf(stderr, "Bad include-style pragma.\n");
+ return nullptr;
+ }
+#endif
bool is_angled = preprocessor->GetIncludeFilenameSpelling(
filename_token.getLocation(), filename);
if (filename.empty()) {
diff --git a/kythe/cxx/glog/logging.cc b/kythe/cxx/glog/logging.cc
new file mode 100644
index 0000000..8b85c6a
--- /dev/null
+++ b/kythe/cxx/glog/logging.cc
@@ -0,0 +1,5 @@
+#include "glog/logging.h"
+namespace google {
+void InitGoogleLogging(const char *) {
+}
+}
diff --git a/kythe/cxx/glog/logging.h b/kythe/cxx/glog/logging.h
new file mode 100644
index 0000000..3aca935
--- /dev/null
+++ b/kythe/cxx/glog/logging.h
@@ -0,0 +1,15 @@
+/*
+ * Android-compatible logging.h to avoid pulling in Google logging package.
+ */
+
+#if !defined(GLOG_LOGGING_H_)
+#define GLOG_LOGGING_H_
+#include "android-base/logging.h"
+#define DFATAL FATAL
+#define VLOG(verbose_level) LOG(VERBOSE)
+
+namespace google {
+void InitGoogleLogging(const char *argv0);
+}
+
+#endif // GLOG_LOGGING_H