Merge "convertPrintable: add benchmarks, make readable, optimize." into main am: 0f2d451a24 am: 01005c1c26

Original change: https://android-review.googlesource.com/c/platform/system/logging/+/2853528

Change-Id: I58556dd919a691c9c62015ddb832373be05686cc
Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
diff --git a/liblog/logprint.cpp b/liblog/logprint.cpp
index e949c65..e182b75 100644
--- a/liblog/logprint.cpp
+++ b/liblog/logprint.cpp
@@ -1088,59 +1088,64 @@
 }
 
 /*
- * Convert to printable from message to p buffer, return string length. If p is
- * NULL, do not copy, but still return the expected string length.
+ * Convert to printable from src to dst buffer, returning dst bytes used.
+ * If dst is NULL, do not copy, but still return the dst bytes required.
  */
-size_t convertPrintable(char* p, const char* message, size_t messageLen) {
-  char* begin = p;
-  bool print = p != NULL;
-  mbstate_t mb_state = {};
+size_t convertPrintable(char* dst0, const char* src0, size_t n) {
+  char* dst = dst0;
+  const unsigned char* src = reinterpret_cast<const unsigned char*>(src0);
+  mbstate_t mbs = {};
 
-  while (messageLen) {
-    char buf[6];
-    ssize_t len = sizeof(buf) - 1;
-    if ((size_t)len > messageLen) {
-      len = messageLen;
+  bool print = (dst != nullptr);
+
+  while (n > 0) {
+    // ASCII fast path to cover most logging; space/tab/newline aren't escaped,
+    // but backslash is.
+    if ((*src >= ' ' && *src < 0x7f && *src != '\\') || *src == '\n' || *src == '\t') {
+      if (print) *dst = *src;
+      dst++;
+      src++;
+      n--;
+      continue;
     }
-    len = mbrtowc(nullptr, message, len, &mb_state);
 
-    if (len <= 0) {
-      snprintf(buf, sizeof(buf), "\\x%02X", static_cast<unsigned char>(*message));
-      len = 1;
+    // Unprintable fast path #1: single-character C escapes.
+    if ((*src >= '\a' && *src <= '\r') || *src == '\\') {
+      if (print) {
+        dst[0] = '\\';
+        dst[1] = (*src == '\\') ? '\\' : "abtnvfr"[*src - '\a'];
+      }
+      dst += 2;
+      src++;
+      n--;
+      continue;
+    }
+    // Unprintable fast path #2: everything else below space, plus DEL.
+    if (*src < ' ' || *src == 0x7f) {
+      if (print) sprintf(dst, "\\x%02X", *src);
+      dst += 4;
+      src++;
+      n--;
+      continue;
+    }
+
+    // Are we at the start of a valid UTF-8 encoding?
+    ssize_t len = mbrtowc(nullptr, reinterpret_cast<const char*>(src), n, &mbs);
+    if (len > 0) {
+      if (print) memcpy(dst, src, len);
+      dst += len;
+      src += len;
+      n -= len;
     } else {
-      buf[0] = '\0';
-      if (len == 1) {
-        if (*message == '\a') {
-          strcpy(buf, "\\a");
-        } else if (*message == '\b') {
-          strcpy(buf, "\\b");
-        } else if (*message == '\t') {
-          strcpy(buf, "\t"); /* Do not escape tabs */
-        } else if (*message == '\v') {
-          strcpy(buf, "\\v");
-        } else if (*message == '\f') {
-          strcpy(buf, "\\f");
-        } else if (*message == '\r') {
-          strcpy(buf, "\\r");
-        } else if (*message == '\\') {
-          strcpy(buf, "\\\\");
-        } else if ((*message < ' ') || (*message >= 0x7f)) {
-          snprintf(buf, sizeof(buf), "\\x%02X", static_cast<unsigned char>(*message));
-        }
-      }
-      if (!buf[0]) {
-        strncpy(buf, message, len);
-        buf[len] = '\0';
-      }
+      // Assume it's just one bad byte, and try again after escaping it.
+      if (print) sprintf(dst, "\\x%02X", *src);
+      dst += 4;
+      src++;
+      n--;
     }
-    if (print) {
-      strcpy(p, buf);
-    }
-    p += strlen(buf);
-    message += len;
-    messageLen -= len;
   }
-  return p - begin;
+  if (print) *dst = '\0';
+  return dst - dst0;
 }
 
 #ifdef __ANDROID__
diff --git a/liblog/properties.cpp b/liblog/properties.cpp
index 7ab5cc8..657901a 100644
--- a/liblog/properties.cpp
+++ b/liblog/properties.cpp
@@ -315,4 +315,8 @@
   return 1;
 }
 
+int __android_log_security() {
+  return 0;
+}
+
 #endif
diff --git a/liblog/tests/Android.bp b/liblog/tests/Android.bp
index 06b6126..f5c2680 100644
--- a/liblog/tests/Android.bp
+++ b/liblog/tests/Android.bp
@@ -33,15 +33,16 @@
         "-Wall",
         "-Wextra",
         "-Werror",
+        "-Wno-unused-result",
         "-fno-builtin",
     ],
     shared_libs: [
-        "libm",
         "libbase",
         "libcutils",
     ],
     static_libs: ["liblog"],
     srcs: ["liblog_benchmark.cpp"],
+    host_supported: true,
 }
 
 // -----------------------------------------------------------------------------
@@ -54,6 +55,7 @@
         "liblog_convertPrintable_fuzzer.cpp",
     ],
     static_libs: ["liblog"],
+    host_supported: true,
 }
 
 // -----------------------------------------------------------------------------
diff --git a/liblog/tests/liblog_benchmark.cpp b/liblog/tests/liblog_benchmark.cpp
index 5ca2c8c..8a7dae6 100644
--- a/liblog/tests/liblog_benchmark.cpp
+++ b/liblog/tests/liblog_benchmark.cpp
@@ -18,6 +18,7 @@
 #include <inttypes.h>
 #include <poll.h>
 #include <sched.h>
+#include <signal.h>
 #include <sys/socket.h>
 #include <sys/syscall.h>
 #include <sys/types.h>
@@ -34,6 +35,8 @@
 #include <log/log_read.h>
 #include <private/android_logger.h>
 
+size_t convertPrintable(char*, const char*, size_t);
+
 BENCHMARK_MAIN();
 
 // enhanced version of LOG_FAILURE_RETRY to add support for EAGAIN and
@@ -194,7 +197,7 @@
   pmsg_header.pid = getpid();
 
   android_log_header_t header;
-  header.tid = gettid();
+  header.tid = 0;
   header.realtime.tv_sec = ts.tv_sec;
   header.realtime.tv_nsec = ts.tv_nsec;
 
@@ -280,7 +283,7 @@
   buffer->pmsg_header.uid = getuid();
   buffer->pmsg_header.pid = getpid();
 
-  buffer->header.tid = gettid();
+  buffer->header.tid = 0;
   buffer->header.realtime.tv_sec = ts.tv_sec;
   buffer->header.realtime.tv_nsec = ts.tv_nsec;
 
@@ -354,7 +357,7 @@
   buffer->pmsg_header.uid = getuid();
   buffer->pmsg_header.pid = getpid();
 
-  buffer->header.tid = gettid();
+  buffer->header.tid = 0;
   buffer->header.realtime.tv_sec = ts.tv_sec;
   buffer->header.realtime.tv_nsec = ts.tv_nsec;
 
@@ -428,7 +431,7 @@
   buffer->pmsg_header.uid = getuid();
   buffer->pmsg_header.pid = getpid();
 
-  buffer->header.tid = gettid();
+  buffer->header.tid = 0;
   buffer->header.realtime.tv_sec = ts.tv_sec;
   buffer->header.realtime.tv_nsec = ts.tv_nsec;
 
@@ -500,7 +503,7 @@
   buffer->pmsg_header.uid = getuid();
   buffer->pmsg_header.pid = getpid();
 
-  buffer->header.tid = gettid();
+  buffer->header.tid = 0;
   buffer->header.realtime.tv_sec = ts.tv_sec;
   buffer->header.realtime.tv_nsec = ts.tv_nsec;
 
@@ -787,9 +790,6 @@
 }
 BENCHMARK(BM_is_loggable);
 
-/*
- *	Measure the time it takes for __android_log_security.
- */
 static void BM_security(benchmark::State& state) {
   while (state.KeepRunning()) {
     __android_log_security();
@@ -997,3 +997,33 @@
   android::base::SetProperty("log.tag." + test_log_tag, "");
 }
 BENCHMARK(BM_log_verbose_overhead);
+
+static void BM_log_convertPrintable_ascii(benchmark::State& state) {
+  char buf[BUFSIZ];
+  const char* s = "hello, world! this is a plain ASCII string 1234.";
+  size_t n = strlen(s);
+  for (auto _ : state) {
+    convertPrintable(buf, s, n);
+  }
+}
+BENCHMARK(BM_log_convertPrintable_ascii);
+
+static void BM_log_convertPrintable_non_printable(benchmark::State& state) {
+  char buf[BUFSIZ];
+  const char* s = "hello,\x01world!\x02this is a plain ASCII string 1234\x7f";
+  size_t n = strlen(s);
+  for (auto _ : state) {
+    convertPrintable(buf, s, n);
+  }
+}
+BENCHMARK(BM_log_convertPrintable_non_printable);
+
+static void BM_log_convertPrintable_non_ascii(benchmark::State& state) {
+  char buf[BUFSIZ];
+  const char* s = "동해 물과 백두산이 마르고 닳도록, 하느님이 보우하사 우리나라 만세.";
+  size_t n = strlen(s);
+  for (auto _ : state) {
+    convertPrintable(buf, s, n);
+  }
+}
+BENCHMARK(BM_log_convertPrintable_non_ascii);
diff --git a/liblog/tests/liblog_convertPrintable_fuzzer.cpp b/liblog/tests/liblog_convertPrintable_fuzzer.cpp
index f4e376a..a362381 100644
--- a/liblog/tests/liblog_convertPrintable_fuzzer.cpp
+++ b/liblog/tests/liblog_convertPrintable_fuzzer.cpp
@@ -16,7 +16,7 @@
 
 #include <fuzzer/FuzzedDataProvider.h>
 
-size_t convertPrintable(char* p, const char* message, size_t messageLen);
+size_t convertPrintable(char*, const char*, size_t);
 
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t length) {
   FuzzedDataProvider dataProvider(data, length);
diff --git a/liblog/tests/logprint_test.cpp b/liblog/tests/logprint_test.cpp
index 8475699..6329241 100644
--- a/liblog/tests/logprint_test.cpp
+++ b/liblog/tests/logprint_test.cpp
@@ -24,7 +24,7 @@
 
 using namespace std::string_literals;
 
-size_t convertPrintable(char* p, const char* message, size_t messageLen);
+size_t convertPrintable(char*, const char*, size_t);
 
 TEST(liblog, convertPrintable_ascii) {
   auto input = "easy string, output same";
@@ -32,6 +32,7 @@
   EXPECT_EQ(output_size, strlen(input));
 
   char output[output_size + 1];
+  memset(output, 'x', sizeof(output));
 
   output_size = convertPrintable(output, input, strlen(input));
   EXPECT_EQ(output_size, strlen(input));
@@ -39,18 +40,19 @@
 }
 
 TEST(liblog, convertPrintable_escapes) {
-  std::string input = "escape\x00\x7f\a\b\t\v\f\r\\"s;
+  std::string input = "escape\x00\x7f\a\b\t\n\v\f\r\\"s;
   // We want to test escaping of ASCII NUL at the end too.
   auto input_size = input.size() + 1;
 
-  // Note that \t is not escaped.
-  std::string expected_output = "escape\\x00\\x7F\\a\\b\t\\v\\f\\r\\\\\\x00"s;
+  // Note that \t and \n are not escaped.
+  std::string expected_output = "escape\\x00\\x7F\\a\\b\t\n\\v\\f\\r\\\\\\x00"s;
   auto expected_output_size = expected_output.size();
 
   auto output_size = convertPrintable(nullptr, input.c_str(), input_size);
   EXPECT_EQ(output_size, expected_output_size) << input_size;
 
   char output[output_size + 1];
+  memset(output, 'x', sizeof(output));
 
   output_size = convertPrintable(output, input.c_str(), input_size);
   EXPECT_EQ(output_size, expected_output_size);
@@ -65,6 +67,7 @@
   EXPECT_EQ(output_size, strlen(input));
 
   char output[output_size + 1];
+  memset(output, 'x', sizeof(output));
 
   output_size = convertPrintable(output, input, strlen(input));
   EXPECT_EQ(output_size, strlen(input));
@@ -79,6 +82,7 @@
   EXPECT_EQ(output_size, strlen(expected_output));
 
   char output[output_size + 1];
+  memset(output, 'x', sizeof(output));
 
   output_size = convertPrintable(output, input, strlen(input));
   EXPECT_EQ(output_size, strlen(expected_output));
@@ -97,6 +101,7 @@
   EXPECT_EQ(output_size, strlen(expected_output));
 
   char output[output_size + 1];
+  memset(output, 'x', sizeof(output));
 
   output_size = convertPrintable(output, input, strlen(input));
   EXPECT_EQ(output_size, strlen(expected_output));