Build C++ extractor for Android.

The extractor has to match the Android C++ toolchain, and it's desirable
to avoid pulling in too many additional dependencies. This requires
making the following changes:
* protobuffers code in Andrid source tree is old, downgrade some calls
in json_proto.cc and cxx_extractor.cc (plus, pull 'sstream` header explicitly)
* use libziparchive to manipulate Zip files (upstream uses libzip)
* fix argument type when calling PragmaHandler::HandlePragma (another
downgrade)
* suppress warnings about unknown pragma
* downgrade include file lookup in path_utils.cc
* provide stub to use Android's logging instead of Google logging.
* remove dependency on uuid library, rely on LLVM API to generate create
a temporary file with unique name

Bug: 121267023
Test: build cxx_extractor in clang-tools branch, use it to generate
.kzip files

Change-Id: I5df762b549ba406c23e05f2836acb8f5bc5f0120
diff --git a/Android.bp b/Android.bp
index 116e854..0238866 100644
--- a/Android.bp
+++ b/Android.bp
@@ -66,3 +66,86 @@
     ],
     java_version: "1.9",
 }
+
+cc_defaults {
+    name: "kythe_common_defaults",
+    cflags: [
+        "-Wall",
+        "-Wextra",
+        "-Wno-deprecated",
+        "-Wno-missing-field-initializers",
+        "-Wno-unused-parameter",
+        "-Wno-unused-private-field",
+        "-Wno-sign-compare",
+        "-Wno-unused-variable",
+        "-fno-exceptions",
+        "-fno-rtti",
+    ],
+}
+
+cc_library_host_static {
+    name: "kythe_cxx_glog",
+    srcs: ["kythe/cxx/glog/logging.cc"],
+    export_include_dirs: ["kythe/cxx"],
+    defaults: ["kythe_common_defaults"],
+    shared_libs: ["libbase"],
+}
+
+cc_binary_host {
+    name: "cxx_extractor",
+    defaults: [
+        "kythe_common_defaults",
+    ],
+    srcs: [
+        "kythe/cxx/common/file_utils.cc",
+        "kythe/cxx/common/file_vname_generator.cc",
+        "kythe/cxx/common/index_writer.cc",
+        "kythe/cxx/common/json_proto.cc",
+        "kythe/cxx/common/kythe_metadata_file.cc",
+        "kythe/cxx/common/kzip_writer_aosp.cc",
+        "kythe/cxx/common/path_utils.cc",
+        "kythe/cxx/common/protobuf_metadata_file.cc",
+        "kythe/cxx/common/schema/edges.cc",
+        "kythe/cxx/common/status.cc",
+        "kythe/cxx/extractor/CommandLineUtils.cc",
+        "kythe/cxx/extractor/cxx_details.cc",
+        "kythe/cxx/extractor/cxx_extractor.cc",
+        "kythe/cxx/extractor/cxx_extractor_main.cc",
+        "kythe/cxx/extractor/index_pack.cc",
+        "kythe/cxx/extractor/language.cc",
+        "kythe/cxx/extractor/path_utils.cc",
+        "kythe/cxx/indexer/cxx/clang_utils.cc",
+        "third_party/llvm/src/cxx_extractor_preprocessor_utils.cc",
+        "third_party/llvm/src/clang_builtin_headers.cc",
+        ":kythe_protos",
+    ],
+    shared_libs: [
+        "libbase",
+        "libLLVM_host",
+        "libclang_cxx_host",
+        "libprotobuf-cpp-full",
+        "libziparchive",
+    ],
+    proto: {
+        include_dirs: [
+            "external/kythe",
+            "external/protobuf/src",
+        ],
+        type: "full",
+        canonical_path_from_root: false,
+    },
+    header_libs: [
+        "libabsl_headers",
+        "rapidjson_headers",
+    ],
+    static_libs: [
+        "kythe_cxx_glog",
+        "regex-re2",
+        "libabsl_strings",
+        "libabsl_base",
+        "libabsl_numeric",
+        "libcrypto",
+        "libgflags",
+    ],
+    generated_headers: ["clang_builtin_headers_resources"],
+}
diff --git a/kythe/cxx/common/json_proto.cc b/kythe/cxx/common/json_proto.cc
index 1398cfc..0901322 100644
--- a/kythe/cxx/common/json_proto.cc
+++ b/kythe/cxx/common/json_proto.cc
@@ -88,7 +88,7 @@
   auto resolver =
       MakeTypeResolverForPool(message.GetDescriptor()->file()->pool());
 
-  google::protobuf::util::JsonPrintOptions options;
+  google::protobuf::util::JsonOptions options;
   options.preserve_proto_field_names = true;
 
   auto status = google::protobuf::util::BinaryToJsonString(
@@ -171,12 +171,10 @@
     auto resolver =
         MakeTypeResolverForPool(message->GetDescriptor()->file()->pool());
 
-    google::protobuf::util::JsonParseOptions options;
-    options.case_insensitive_enum_parsing = false;
+// ANDROID_BUILD: our protobuf code is old
+// and lacks google::protobuf::util::JsonOptions.case_insensitive_enum_parsing
     auto status = google::protobuf::util::JsonToBinaryString(
-        resolver.get(), message->GetDescriptor()->full_name(), content, &binary,
-        options);
-
+        resolver.get(), message->GetDescriptor()->full_name(), content, &binary);
     if (!status.ok()) {
       LOG(ERROR) << status.ToString() << ": " << content;
       return false;
@@ -193,12 +191,10 @@
 
   std::string binary;
   google::protobuf::io::StringOutputStream output(&binary);
-  google::protobuf::util::JsonParseOptions options;
-  options.case_insensitive_enum_parsing = false;
+  // ANDROID_BUILD: our protobuf code is old
+  // and lacks google::protobuf::util::JsonOptions.case_insensitive_enum_parsing
   auto status = google::protobuf::util::JsonToBinaryStream(
-      resolver.get(), message->GetDescriptor()->full_name(), input, &output,
-      options);
-
+      resolver.get(), message->GetDescriptor()->full_name(), input, &output);
   if (!status.ok()) {
     return Status(static_cast<StatusCode>(status.error_code()),
                   std::string(status.error_message()));
diff --git a/kythe/cxx/common/kzip_writer_aosp.cc b/kythe/cxx/common/kzip_writer_aosp.cc
new file mode 100644
index 0000000..ee514d4
--- /dev/null
+++ b/kythe/cxx/common/kzip_writer_aosp.cc
@@ -0,0 +1,141 @@
+#include "kythe/cxx/common/kzip_writer_aosp.h"
+
+#include <openssl/sha.h>
+#include <array>
+#include <string>
+
+#include "kythe/cxx/common/json_proto.h"
+#include "absl/memory/memory.h"
+#include "absl/strings/escaping.h"
+#include "kythe/proto/analysis.pb.h"
+
+namespace kythe {
+namespace {
+
+constexpr absl::string_view kRoot = "root/";
+constexpr absl::string_view kUnitRoot = "root/units/";
+constexpr absl::string_view kFileRoot = "root/files/";
+
+}
+
+std::string KzipWriter::SHA256Digest(absl::string_view content) {
+  std::array<unsigned char, SHA256_DIGEST_LENGTH> buf;
+  ::SHA256(reinterpret_cast<const unsigned char*>(content.data()),
+           content.size(), buf.data());
+  return absl::BytesToHexString(
+      absl::string_view(reinterpret_cast<const char*>(buf.data()), buf.size()));
+}
+
+Status KzipWriter::WriteTextFile(const std::string& path,
+                     absl::string_view content) {
+  int32_t rc = zip_writer_.StartEntry(path.c_str(), ZipWriter::kCompress);
+  if (rc == 0) {
+    rc = zip_writer_.WriteBytes(content.data(), content.size());
+  }
+  if (rc == 0) {
+    rc = zip_writer_.FinishEntry();
+  }
+  return rc ? InternalError(ZipWriter::ErrorCodeString(rc)) : OkStatus();
+}
+
+// Creates entries for the three directories if not already present.
+int32_t KzipWriter::InitializeArchive() {
+  if (initialized_) {
+    return 0;
+  }
+  initialized_ = true;
+  for (const auto name : {kRoot, kUnitRoot, kFileRoot}) {
+    int32_t rc = zip_writer_.StartEntry(name.data(), 0);
+    if (rc == 0) {
+      rc = zip_writer_.FinishEntry();
+    }
+    if (rc) {
+      return rc;
+    }
+  }
+  return 0;
+}
+
+
+/* static */
+StatusOr<IndexWriter> KzipWriter::Create(absl::string_view path) {
+  FILE *fp = fopen(path.data(), "wb");
+  if (!fp) {
+    return UnimplementedError(strerror(errno));
+  }
+  return IndexWriter(absl::WrapUnique(new KzipWriter(fp)));
+}
+
+KzipWriter::KzipWriter(FILE *fp):fp_(fp), zip_writer_(fp), initialized_(false) {}
+
+KzipWriter::~KzipWriter() {
+  DCHECK(fp_ == nullptr) << "KzipWriterAosp::Close was not called!";
+}
+
+StatusOr<std::string> KzipWriter::WriteUnit(
+    const kythe::proto::IndexedCompilation& unit) {
+  int32_t rc = InitializeArchive();
+  if (rc) {
+      return InternalError(ZipWriter::ErrorCodeString(rc));
+  }
+  if (auto json = WriteMessageAsJsonToString(unit)) {
+    auto file = InsertFile(kUnitRoot, std::move(*json));
+    if (file.inserted()) {
+      auto status = WriteTextFile(file.path(), file.contents());
+      if (!status.ok()) {
+        contents_.erase(file.path());
+        return status;
+      }
+    }
+    return std::string(file.digest());
+  } else {
+    return json.status();
+  }
+}
+
+StatusOr<std::string> KzipWriter::WriteFile(absl::string_view content) {
+  int32_t rc = InitializeArchive();
+  if (rc) {
+      return InternalError(ZipWriter::ErrorCodeString(rc));
+  }
+  auto file = InsertFile(kFileRoot, content);
+  if (file.inserted()) {
+    auto status = WriteTextFile(file.path(), file.contents());
+    if (!status.ok()) {
+      contents_.erase(file.path());
+      return status;
+    }
+  }
+  return std::string(file.digest());
+}
+
+Status KzipWriter::Close() {
+  int32_t rc = zip_writer_.Finish();
+  fclose(fp_);
+  fp_ = nullptr;
+  return rc ? InternalError(ZipWriter::ErrorCodeString(rc)) : OkStatus();
+}
+
+auto KzipWriter::InsertFile(absl::string_view root, absl::string_view content)
+    -> InsertionResult {
+  auto digest = SHA256Digest(content);
+  auto path = absl::StrCat(root, digest);
+  // Initially insert an empty string for the file content.
+  auto result = InsertionResult{contents_.emplace(path, "")};
+  if (result.inserted()) {
+    // Only copy in the real content if it was actually inserted into the map.
+    result.insertion.first->second = std::string(content);
+  }
+  return result;
+}
+
+inline absl::string_view KzipWriter::InsertionResult::digest() const {
+  auto pos = path().find_last_of('/');
+  if (pos == absl::string_view::npos) {
+    return path();
+  }
+  return absl::ClippedSubstr(path(), pos + 1);
+
+}
+
+}  // namespace kythe
diff --git a/kythe/cxx/common/kzip_writer_aosp.h b/kythe/cxx/common/kzip_writer_aosp.h
new file mode 100644
index 0000000..a1483cc
--- /dev/null
+++ b/kythe/cxx/common/kzip_writer_aosp.h
@@ -0,0 +1,64 @@
+#ifndef KYTHE_CXX_COMMON_KZIP_WRITER_AOSP_H_
+#define KYTHE_CXX_COMMON_KZIP_WRITER_AOSP_H_
+
+#include <unordered_map>
+
+#include "absl/strings/string_view.h"
+#include "kythe/cxx/common/index_writer.h"
+#include "kythe/cxx/common/status_or.h"
+#include "kythe/proto/analysis.pb.h"
+#include "ziparchive/zip_writer.h"
+
+namespace kythe {
+
+/// \brief Kzip implementation of IndexWriter for AOSP.
+/// see https://www.kythe.io/docs/kythe-kzip.html for format description.
+class KzipWriter : public IndexWriterInterface {
+ public:
+  /// \brief Constructs a Kzip IndexWriter which will create and write to
+  /// \param path Path to the file to create. Must not currently exist.
+  static StatusOr<IndexWriter> Create(absl::string_view path);
+
+  /// \brief Destroys the KzipWriter.
+  ~KzipWriter() override;
+
+  /// \brief Writes the unit to the kzip file, returning its digest.
+  StatusOr<std::string> WriteUnit(
+      const kythe::proto::IndexedCompilation& unit) override;
+
+  /// \brief Writes the file contents to the kzip file, returning their digest.
+  StatusOr<std::string> WriteFile(absl::string_view content) override;
+
+  /// \brief Flushes accumulated writes and closes the kzip file.
+  /// Close must be called before the KzipWriter is destroyed!
+  Status Close() override;
+
+ private:
+  using Path = std::string;
+  using Contents = std::string;
+  using FileMap = std::unordered_map<Path, Contents>;
+
+  struct InsertionResult {
+    absl::string_view digest() const;
+    const std::string& path() const { return insertion.first->first; }
+    absl::string_view contents() const { return insertion.first->second; }
+    bool inserted() const { return insertion.second; }
+
+    std::pair<FileMap::iterator, bool> insertion;
+  };
+
+  explicit KzipWriter(FILE *fp);
+
+  InsertionResult InsertFile(absl::string_view root, absl::string_view content);
+  Status WriteTextFile(const std::string& path, absl::string_view content);
+  int32_t InitializeArchive();
+  static std::string SHA256Digest(absl::string_view content);
+
+  FILE *fp_;
+  ZipWriter zip_writer_;
+  bool initialized_ = false;  // Whether or not the `root` entry exists.
+  FileMap contents_;
+};
+
+}  // namespace kythe
+#endif  // KYTHE_CXX_COMMON_KZIP_WRITER_AOSP_H_
diff --git a/kythe/cxx/common/protobuf_metadata_file.h b/kythe/cxx/common/protobuf_metadata_file.h
index 2daea72..5323131 100644
--- a/kythe/cxx/common/protobuf_metadata_file.h
+++ b/kythe/cxx/common/protobuf_metadata_file.h
@@ -18,6 +18,8 @@
 #define KYTHE_CXX_COMMON_PROTOBUF_METADATA_FILE_H_
 
 #include <memory>
+// ANDROID_BUILD: need <sstream> for std::stringstream
+#include <sstream>
 
 #include "glog/logging.h"
 #include "google/protobuf/descriptor.pb.h"
diff --git a/kythe/cxx/extractor/cxx_extractor.cc b/kythe/cxx/extractor/cxx_extractor.cc
index 140fff3..e43d69f 100644
--- a/kythe/cxx/extractor/cxx_extractor.cc
+++ b/kythe/cxx/extractor/cxx_extractor.cc
@@ -33,6 +33,8 @@
 #include "absl/strings/string_view.h"
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/Frontend/FrontendAction.h"
+// ANDROID_BUILD
+#include "clang/Lex/LexDiagnostic.h"
 #include "clang/Lex/MacroArgs.h"
 #include "clang/Lex/PPCallbacks.h"
 #include "clang/Lex/Preprocessor.h"
@@ -41,7 +43,8 @@
 #include "glog/logging.h"
 #include "kythe/cxx/common/file_utils.h"
 #include "kythe/cxx/common/json_proto.h"
-#include "kythe/cxx/common/kzip_writer.h"
+// ANDROID_BUILD
+#include "kythe/cxx/common/kzip_writer_aosp.h"
 #include "kythe/cxx/common/path_utils.h"
 #include "kythe/cxx/extractor/CommandLineUtils.h"
 #include "kythe/cxx/extractor/language.h"
@@ -434,10 +437,9 @@
     ClaimPragmaHandlerWrapper(ExtractorPPCallbacks* context)
         : PragmaHandler("kythe_claim"), context_(context) {}
     void HandlePragma(clang::Preprocessor& preprocessor,
-                      clang::PragmaIntroducer introducer,
+                      clang::PragmaIntroducerKind introducer,
                       clang::Token& first_token) override {
-      context_->HandleKytheClaimPragma(preprocessor, introducer.Kind,
-                                       first_token);
+      context_->HandleKytheClaimPragma(preprocessor, introducer, first_token);
     }
 
    private:
@@ -451,9 +453,9 @@
     MetadataPragmaHandlerWrapper(ExtractorPPCallbacks* context)
         : PragmaHandler("kythe_metadata"), context_(context) {}
     void HandlePragma(clang::Preprocessor& preprocessor,
-                      clang::PragmaIntroducer introducer,
+                      clang::PragmaIntroducerKind introducer,
                       clang::Token& first_token) override {
-      context_->HandleKytheMetadataPragma(preprocessor, introducer.Kind,
+      context_->HandleKytheMetadataPragma(preprocessor, introducer,
                                           first_token);
     }
 
@@ -855,6 +857,11 @@
         << "Expected to see only one TU; instead saw " << inputs.size() << ".";
     main_source_file_ = inputs[0].getFile();
     auto* preprocessor = &getCompilerInstance().getPreprocessor();
+    // ANDROID_BUILD:
+    // TODO: find a better way to ignore unknown pragmas
+    preprocessor->getDiagnostics().setSeverity(
+        clang::diag::warn_pragma_ignored, clang::diag::Severity::Ignored,
+        clang::SourceLocation());
     preprocessor->addPPCallbacks(
         llvm::make_unique<ExtractorPPCallbacks>(ExtractorState{
             index_writer_, &getCompilerInstance().getSourceManager(),
@@ -1129,7 +1136,8 @@
     build_details.set_rule_type(rule_type_);
     build_details.set_build_config(build_config_);
     // Include the details, but only if any of the fields are meaningfully set.
-    if (build_details.ByteSizeLong() > 0) {
+    // ANDROID_BUILD: only ByteSize() is available
+    if (build_details.ByteSize() > 0) {
       PackAny(build_details, kBuildDetailsURI, unit.add_details());
     }
   }
diff --git a/kythe/cxx/extractor/index_pack.cc b/kythe/cxx/extractor/index_pack.cc
index 7120561..206ec45 100644
--- a/kythe/cxx/extractor/index_pack.cc
+++ b/kythe/cxx/extractor/index_pack.cc
@@ -17,7 +17,6 @@
 #include "index_pack.h"
 
 #include <openssl/sha.h>
-#include <uuid/uuid.h>
 
 #include <utility>
 
@@ -36,7 +35,6 @@
 const char IndexPackFilesystem::kCompilationUnitDirectoryName[] = "units";
 const char IndexPackFilesystem::kFileDataSuffix[] = ".data";
 const char IndexPackFilesystem::kCompilationUnitSuffix[] = ".unit";
-const char IndexPackFilesystem::kTempFileSuffix[] = ".new";
 
 std::unique_ptr<IndexPackPosixFilesystem> IndexPackPosixFilesystem::Open(
     const std::string& root_path, IndexPackFilesystem::OpenMode open_mode,
@@ -107,32 +105,6 @@
   return temp_path.str();
 }
 
-/// \brief Represents a single UUID, generated during construction.
-class Uuid {
- public:
-  Uuid() {
-    uuid_t uuid;
-    uuid_generate_random(uuid);
-    // "The uuid_unparse function converts the supplied UUID uu from the binary
-    // representation into a 36-byte string (plus tailing '\0')"
-    char uuid_buffer[37];
-    uuid_unparse_lower(uuid, uuid_buffer);
-    payload_ = uuid_buffer;
-  }
-
-  /// \brief Returns a UUID (if ok()) or an error string (if !ok()).
-  const std::string& payload() { return payload_; }
-
-  /// \brief Checks whether the uuid generated correctly.
-  bool ok() { return ok_; }
-
- private:
-  /// Error text (if !ok_) or a UUID string (if ok_).
-  std::string payload_;
-  /// Determines whether UUID generation was successful.
-  bool ok_ = true;
-};
-
 /// \brief Opens a new file with a unique name in some directory.
 /// \param abs_root_directory The absolute path to the directory.
 /// \param fd_out Will be set to the fd of the open file.
@@ -142,28 +114,14 @@
 static bool OpenUniqueTempFileIn(const std::string& abs_root_directory,
                                  int* fd_out, std::string* path_out,
                                  std::string* error_text) {
-  for (;;) {
-    Uuid new_uuid;
-    if (!new_uuid.ok()) {
-      *error_text = new_uuid.payload();
-      return false;
-    }
-    llvm::SmallString<256> path(abs_root_directory);
-    llvm::sys::path::append(
-        path, new_uuid.payload() + IndexPackFilesystem::kTempFileSuffix);
-    if (auto err = llvm::sys::fs::openFileForWrite(
-            llvm::Twine(path), *fd_out, llvm::sys::fs::CD_CreateNew,
-            llvm::sys::fs::OF_None,
-            llvm::sys::fs::all_read | llvm::sys::fs::all_write)) {
-      if (err != std::errc::file_exists) {
-        *error_text = err.message();
-        return false;
-      }
-    } else {
-      *path_out = path.str();
-      return true;
-    }
+  llvm::SmallString<256> path(abs_root_directory);
+  llvm::sys::path::append(path,  "%%%%%%%%%%%%%%%%.tmp");
+  if (auto err = llvm::sys::fs::createUniqueFile(path, *fd_out, path)) {
+    *error_text = err.message();
+    return false;
   }
+  *path_out = path.str();
+  return true;
 }
 
 bool IndexPackPosixFilesystem::ReadFileContent(DataKind data_kind,
diff --git a/kythe/cxx/extractor/path_utils.cc b/kythe/cxx/extractor/path_utils.cc
index e689de6..4674397 100644
--- a/kythe/cxx/extractor/path_utils.cc
+++ b/kythe/cxx/extractor/path_utils.cc
@@ -32,6 +32,9 @@
     llvm::SmallVectorImpl<char>* relative_path,
     llvm::SmallVectorImpl<char>* result_filename) {
   clang::Token filename_token;
+// ANDROID_BUILD: the new version is currently off because Android's
+// Clang toolchain hasn't reached r356433.
+#if 0
   if (preprocessor->LexHeaderName(filename_token)) {
     return nullptr;
   }
@@ -42,6 +45,30 @@
   llvm::SmallString<128> filename_buffer;
   llvm::StringRef filename =
       preprocessor->getSpelling(filename_token, filename_buffer);
+#else
+  llvm::SmallString<128> filename_buffer;
+  clang::SourceLocation filename_end;
+  llvm::StringRef filename;
+  preprocessor->getCurrentLexer()->LexIncludeFilename(filename_token);
+  switch (filename_token.getKind()) {
+    case clang::tok::eod:
+      return nullptr;
+    case clang::tok::angle_string_literal:
+    case clang::tok::string_literal:
+      filename = preprocessor->getSpelling(filename_token, filename_buffer);
+      break;
+    case clang::tok::less:
+      filename_buffer.push_back('<');
+      if (preprocessor->ConcatenateIncludeName(filename_buffer, filename_end))
+        return nullptr;
+      filename = filename_buffer;
+      break;
+    default:
+      preprocessor->DiscardUntilEndOfDirective();
+      fprintf(stderr, "Bad include-style pragma.\n");
+      return nullptr;
+  }
+#endif
   bool is_angled = preprocessor->GetIncludeFilenameSpelling(
       filename_token.getLocation(), filename);
   if (filename.empty()) {
diff --git a/kythe/cxx/glog/logging.cc b/kythe/cxx/glog/logging.cc
new file mode 100644
index 0000000..8b85c6a
--- /dev/null
+++ b/kythe/cxx/glog/logging.cc
@@ -0,0 +1,5 @@
+#include "glog/logging.h"
+namespace google {
+void InitGoogleLogging(const char *) {
+}
+}
diff --git a/kythe/cxx/glog/logging.h b/kythe/cxx/glog/logging.h
new file mode 100644
index 0000000..3aca935
--- /dev/null
+++ b/kythe/cxx/glog/logging.h
@@ -0,0 +1,15 @@
+/*
+ * Android-compatible logging.h to avoid pulling in Google logging package.
+ */
+
+#if !defined(GLOG_LOGGING_H_)
+#define GLOG_LOGGING_H_
+#include "android-base/logging.h"
+#define DFATAL FATAL
+#define VLOG(verbose_level) LOG(VERBOSE)
+
+namespace google {
+void InitGoogleLogging(const char *argv0);
+}
+
+#endif // GLOG_LOGGING_H