fix(cxx_indexer): use URI-friendly encoding for VName fields (#3705)
Save on ticket storage by avoiding characters that expand when URI-encoded.
diff --git a/kythe/cxx/common/kythe_uri_test.cc b/kythe/cxx/common/kythe_uri_test.cc
index 1f9e106..2485e2a 100644
--- a/kythe/cxx/common/kythe_uri_test.cc
+++ b/kythe/cxx/common/kythe_uri_test.cc
@@ -86,6 +86,9 @@
{"kythe:?path=P", MakeURI().Path("P")},
{"kythe:?lang=L", MakeURI().Language("L")},
+ // Special characters.
+ {"kythe:#-%2B_%2F", MakeURI().Signature("-+_/")},
+
// Corner cases about relative paths. NB: MakeURI() goes through VNames.
{"kythe://..", MakeURI().Corpus("..")},
{"kythe://../", MakeURI().Corpus("../")},
diff --git a/kythe/cxx/indexer/cxx/GraphObserver.cc b/kythe/cxx/indexer/cxx/GraphObserver.cc
index 33a0dc9..88d81ec 100644
--- a/kythe/cxx/indexer/cxx/GraphObserver.cc
+++ b/kythe/cxx/indexer/cxx/GraphObserver.cc
@@ -22,8 +22,8 @@
namespace kythe {
-// base64 has a 4:3 overhead and SHA256_DIGEST_LENGTH is 32. 32*4/3 = 42.
-constexpr size_t kSha256DigestBase64MaxEncodingLength = 42;
+// base64 has a 4:3 overhead and SHA256_DIGEST_LENGTH is 32. 32*4/3 = 42.66666
+constexpr size_t kSha256DigestBase64MaxEncodingLength = 43;
std::string CompressString(absl::string_view InString, bool Force) {
if (InString.size() <= kSha256DigestBase64MaxEncodingLength && !Force) {
@@ -36,7 +36,10 @@
std::string Hash(SHA256_DIGEST_LENGTH, '\0');
::SHA256_Final(reinterpret_cast<unsigned char*>(&Hash[0]), &Sha);
std::string Result;
- absl::Base64Escape(Hash, &Result);
+ // Use web-safe escaping because vnames are frequently URI-encoded. This
+ // doesn't include padding ('=') or the characters + or /, all of which will
+ // expand to three-byte sequences in such an encoding.
+ absl::WebSafeBase64Escape(Hash, &Result);
return Result;
}