tp: fix displayTimeUnit parsing for JSON traces

This CL fixes issues with parsing of the displayTimeUnit
key/value especially around whitespace. Also we now increment
a stat when we see the displayTimeUnit key too late in the trace.

Fixes: github#106
Change-Id: I3f53b148c46b0a3a14d8ab7a41c0913a141341ef
diff --git a/src/trace_processor/importers/json/json_trace_tokenizer.cc b/src/trace_processor/importers/json/json_trace_tokenizer.cc
index 695e692..aa2b259 100644
--- a/src/trace_processor/importers/json/json_trace_tokenizer.cc
+++ b/src/trace_processor/importers/json/json_trace_tokenizer.cc
@@ -33,8 +33,6 @@
 
 namespace {
 
-#if PERFETTO_BUILDFLAG(PERFETTO_TP_JSON)
-
 util::Status AppendUnescapedCharacter(char c,
                                       bool is_escaping,
                                       std::string* key) {
@@ -103,11 +101,8 @@
   return ReadStringRes::kNeedsMoreData;
 }
 
-#endif  // PERFETTO_BUILDFLAG(PERFETTO_TP_JSON)
-
 }  // namespace
 
-#if PERFETTO_BUILDFLAG(PERFETTO_TP_JSON)
 ReadDictRes ReadOneJsonDict(const char* start,
                             const char* end,
                             base::StringView* value,
@@ -365,7 +360,54 @@
   }
   return ReadSystemLineRes::kNeedsMoreData;
 }
-#endif  // PERFETTO_BUILDFLAG(PERFETTO_TP_JSON)
+
+base::Optional<json::TimeUnit> MaybeParseDisplayTimeUnit(
+    base::StringView buffer) {
+  size_t idx = buffer.find("\"displayTimeUnit\"");
+  if (idx == base::StringView::npos)
+    return base::nullopt;
+
+  base::StringView time_unit = buffer.substr(idx);
+  std::string key;
+  const char* value_start = nullptr;
+  ReadKeyRes res =
+      ReadOneJsonKey(time_unit.data(), time_unit.end(), &key, &value_start);
+
+  // If we didn't read the key successfully, it could have been that
+  // displayTimeUnit is used somewhere else in the dict (maybe as a value?) so
+  // just ignore it.
+  if (res == ReadKeyRes::kEndOfDictionary || res == ReadKeyRes::kFatalError ||
+      res == ReadKeyRes::kNeedsMoreData) {
+    return base::nullopt;
+  }
+
+  // Double check that we did actually get the displayTimeUnit key.
+  if (key != "displayTimeUnit")
+    return base::nullopt;
+
+  // If the value isn't a string, we can do little except bail.
+  if (value_start[0] != '"')
+    return base::nullopt;
+
+  std::string value;
+  const char* unused;
+  ReadStringRes value_res =
+      ReadOneJsonString(value_start + 1, time_unit.end(), &value, &unused);
+
+  // If we didn't read the value successfully, we could be in another key
+  // in a nested dictionary. Just ignore the error.
+  if (value_res == ReadStringRes::kFatalError ||
+      value_res == ReadStringRes::kNeedsMoreData) {
+    return base::nullopt;
+  }
+
+  if (value == "ns") {
+    return json::TimeUnit::kNs;
+  } else if (value == "ms") {
+    return json::TimeUnit::kMs;
+  }
+  return base::nullopt;
+}
 
 JsonTraceTokenizer::JsonTraceTokenizer(TraceProcessorContext* ctx)
     : context_(ctx) {}
@@ -375,28 +417,23 @@
                                        size_t size) {
   PERFETTO_DCHECK(json::IsJsonSupported());
 
-#if PERFETTO_BUILDFLAG(PERFETTO_TP_JSON)
   buffer_.insert(buffer_.end(), data.get(), data.get() + size);
   const char* buf = buffer_.data();
   const char* next = buf;
   const char* end = buf + buffer_.size();
 
-  JsonTracker* json_tracker = JsonTracker::GetOrCreate(context_);
-
-  // It's possible the displayTimeUnit key is at the end of the json
-  // file so to be correct we ought to parse the whole file looking
-  // for this key before parsing any events however this would require
-  // two passes on the file so for now we only handle displayTimeUnit
-  // correctly if it is at the beginning of the file.
-  const base::StringView view(buf, size);
-  if (view.find("\"displayTimeUnit\":\"ns\"") != base::StringView::npos) {
-    json_tracker->SetTimeUnit(json::TimeUnit::kNs);
-  } else if (view.find("\"displayTimeUnit\":\"ms\"") !=
-             base::StringView::npos) {
-    json_tracker->SetTimeUnit(json::TimeUnit::kMs);
-  }
-
   if (offset_ == 0) {
+    // It's possible the displayTimeUnit key is at the end of the json
+    // file so to be correct we ought to parse the whole file looking
+    // for this key before parsing any events however this would require
+    // two passes on the file so for now we only handle displayTimeUnit
+    // correctly if it is at the beginning of the file.
+    base::Optional<json::TimeUnit> timeunit =
+        MaybeParseDisplayTimeUnit(base::StringView(buf, size));
+    if (timeunit) {
+      JsonTracker::GetOrCreate(context_)->SetTimeUnit(*timeunit);
+    }
+
     // Strip leading whitespace.
     while (next != end && isspace(*next)) {
       next++;
@@ -436,18 +473,8 @@
   offset_ += static_cast<uint64_t>(next - buf);
   buffer_.erase(buffer_.begin(), buffer_.begin() + (next - buf));
   return util::OkStatus();
-#else
-  perfetto::base::ignore_result(data);
-  perfetto::base::ignore_result(size);
-  perfetto::base::ignore_result(context_);
-  perfetto::base::ignore_result(format_);
-  perfetto::base::ignore_result(position_);
-  perfetto::base::ignore_result(offset_);
-  return util::ErrStatus("Cannot parse JSON trace due to missing JSON support");
-#endif  // PERFETTO_BUILDFLAG(PERFETTO_TP_JSON)
 }
 
-#if PERFETTO_BUILDFLAG(PERFETTO_TP_JSON)
 util::Status JsonTraceTokenizer::ParseInternal(const char* start,
                                                const char* end,
                                                const char** out) {
@@ -491,6 +518,12 @@
           return util::ErrStatus("displayTimeUnit too large");
         if (time_unit != "ms" && time_unit != "ns")
           return util::ErrStatus("displayTimeUnit unknown");
+        // If the displayTimeUnit came after the first chunk, the increment the
+        // too late stat.
+        if (offset_ != 0) {
+          context_->storage->IncrementStats(
+              stats::json_display_time_unit_too_late);
+        }
         return ParseInternal(next, end, out);
       } else {
         // If we don't recognize the key, just ignore the rest of the trace and
@@ -595,7 +628,6 @@
   *out = next;
   return util::OkStatus();
 }
-#endif  // PERFETTO_BUILDFLAG(PERFETTO_TP_JSON)
 
 void JsonTraceTokenizer::NotifyEndOfFile() {}
 
diff --git a/src/trace_processor/importers/json/json_trace_tokenizer.h b/src/trace_processor/importers/json/json_trace_tokenizer.h
index 86f5731..4daf661 100644
--- a/src/trace_processor/importers/json/json_trace_tokenizer.h
+++ b/src/trace_processor/importers/json/json_trace_tokenizer.h
@@ -20,6 +20,7 @@
 #include <stdint.h>
 
 #include "src/trace_processor/importers/common/chunked_trace_reader.h"
+#include "src/trace_processor/importers/json/json_utils.h"
 #include "src/trace_processor/importers/systrace/systrace_line_tokenizer.h"
 #include "src/trace_processor/storage/trace_storage.h"
 
@@ -32,7 +33,6 @@
 
 class TraceProcessorContext;
 
-#if PERFETTO_BUILDFLAG(PERFETTO_TP_JSON)
 // Visible for testing.
 enum class ReadDictRes {
   kFoundDict,
@@ -93,7 +93,11 @@
                                          const char* end,
                                          std::string* line,
                                          const char** next);
-#endif
+
+// Parses the "displayTimeUnit" key from the given trace buffer
+// and returns the associated time unit if one exists.
+base::Optional<json::TimeUnit> MaybeParseDisplayTimeUnit(
+    base::StringView buffer);
 
 // Reads a JSON trace in chunks and extracts top level json objects.
 class JsonTraceTokenizer : public ChunkedTraceReader {
@@ -139,11 +143,9 @@
     kEof,
   };
 
-#if PERFETTO_BUILDFLAG(PERFETTO_TP_JSON)
   util::Status ParseInternal(const char* start,
                              const char* end,
                              const char** next);
-#endif
 
   TraceProcessorContext* const context_;
 
diff --git a/src/trace_processor/importers/json/json_trace_tokenizer_unittest.cc b/src/trace_processor/importers/json/json_trace_tokenizer_unittest.cc
index 051f52a..6a11a44 100644
--- a/src/trace_processor/importers/json/json_trace_tokenizer_unittest.cc
+++ b/src/trace_processor/importers/json/json_trace_tokenizer_unittest.cc
@@ -272,6 +272,58 @@
   ASSERT_EQ(*line, R"({"ts": 149029, "foo": "bar"})");
 }
 
+TEST(JsonTraceTokenizerTest, DisplayTimeUnit) {
+  ASSERT_EQ(MaybeParseDisplayTimeUnit(R"(
+    {
+    }
+  )"),
+            base::nullopt);
+  ASSERT_EQ(MaybeParseDisplayTimeUnit(R"(
+    {
+      "displayTimeUnit": 0
+    }
+  )"),
+            base::nullopt);
+  ASSERT_EQ(MaybeParseDisplayTimeUnit(R"(
+    {
+      "str": "displayTimeUnit"
+    }
+  )"),
+            base::nullopt);
+
+  ASSERT_EQ(MaybeParseDisplayTimeUnit(R"(
+    {
+      "traceEvents": [
+        {
+          "pid": 1, "tid": 1, "name": "test",
+          "ts": 1, "dur": 1000, "ph": "X", "cat": "fee"
+        }
+      ],
+      "displayTimeUnit": "ms"
+    }
+  )"),
+            json::TimeUnit::kMs);
+  ASSERT_EQ(MaybeParseDisplayTimeUnit(R"(
+    {
+      "traceEvents": [
+        {
+          "pid": 1, "tid": 1, "name": "test",
+          "ts": 1, "dur": 1000, "ph": "X", "cat": "fee"
+        }
+      ],
+      "displayTimeUnit": "ns"
+    }
+  )"),
+            json::TimeUnit::kNs);
+
+  ASSERT_EQ(MaybeParseDisplayTimeUnit(R"(
+    {
+      "displayTimeUnit":"ms"
+    }
+  )"),
+            json::TimeUnit::kMs);
+}
+
 }  // namespace
 }  // namespace trace_processor
 }  // namespace perfetto
diff --git a/src/trace_processor/importers/json/json_tracker.h b/src/trace_processor/importers/json/json_tracker.h
index d786bf1..167881e 100644
--- a/src/trace_processor/importers/json/json_tracker.h
+++ b/src/trace_processor/importers/json/json_tracker.h
@@ -47,6 +47,9 @@
   base::Optional<int64_t> CoerceToTs(const Json::Value& value) {
     return json::CoerceToTs(time_unit_, value);
   }
+  base::Optional<int64_t> CoerceToTs(const std::string& value) {
+    return json::CoerceToTs(time_unit_, value);
+  }
 
  private:
   json::TimeUnit time_unit_ = json::TimeUnit::kUs;
diff --git a/src/trace_processor/importers/json/json_utils.cc b/src/trace_processor/importers/json/json_utils.cc
index 5d0b2d1..c44efa1 100644
--- a/src/trace_processor/importers/json/json_utils.cc
+++ b/src/trace_processor/importers/json/json_utils.cc
@@ -57,22 +57,8 @@
     case Json::uintValue:
     case Json::intValue:
       return value.asInt64() * TimeUnitToNs(unit);
-    case Json::stringValue: {
-      std::string s = value.asString();
-      size_t lhs_end = std::min<size_t>(s.find('.'), s.size());
-      size_t rhs_start = std::min<size_t>(lhs_end + 1, s.size());
-      base::Optional<int64_t> lhs = base::StringToInt64(s.substr(0, lhs_end));
-      base::Optional<double> rhs =
-          base::StringToDouble("0." + s.substr(rhs_start, std::string::npos));
-      if ((!lhs.has_value() && lhs_end > 0) ||
-          (!rhs.has_value() && rhs_start < s.size())) {
-        return base::nullopt;
-      }
-      int64_t factor = TimeUnitToNs(unit);
-      return lhs.value_or(0) * factor +
-             static_cast<int64_t>(rhs.value_or(0) *
-                                  static_cast<double>(factor));
-    }
+    case Json::stringValue:
+      return CoerceToTs(unit, value.asString());
     default:
       return base::nullopt;
   }
@@ -83,6 +69,29 @@
 #endif
 }
 
+base::Optional<int64_t> CoerceToTs(TimeUnit unit, const std::string& s) {
+  PERFETTO_DCHECK(IsJsonSupported());
+
+#if PERFETTO_BUILDFLAG(PERFETTO_TP_JSON)
+  size_t lhs_end = std::min<size_t>(s.find('.'), s.size());
+  size_t rhs_start = std::min<size_t>(lhs_end + 1, s.size());
+  base::Optional<int64_t> lhs = base::StringToInt64(s.substr(0, lhs_end));
+  base::Optional<double> rhs =
+      base::StringToDouble("0." + s.substr(rhs_start, std::string::npos));
+  if ((!lhs.has_value() && lhs_end > 0) ||
+      (!rhs.has_value() && rhs_start < s.size())) {
+    return base::nullopt;
+  }
+  int64_t factor = TimeUnitToNs(unit);
+  return lhs.value_or(0) * factor +
+         static_cast<int64_t>(rhs.value_or(0) * static_cast<double>(factor));
+#else
+  perfetto::base::ignore_result(unit);
+  perfetto::base::ignore_result(s);
+  return base::nullopt;
+#endif
+}
+
 base::Optional<int64_t> CoerceToInt64(const Json::Value& value) {
   PERFETTO_DCHECK(IsJsonSupported());
 
diff --git a/src/trace_processor/importers/json/json_utils.h b/src/trace_processor/importers/json/json_utils.h
index f4a70b3..74a2c3a 100644
--- a/src/trace_processor/importers/json/json_utils.h
+++ b/src/trace_processor/importers/json/json_utils.h
@@ -42,6 +42,7 @@
 
 enum class TimeUnit { kNs = 1, kUs = 1000, kMs = 1000000 };
 base::Optional<int64_t> CoerceToTs(TimeUnit unit, const Json::Value& value);
+base::Optional<int64_t> CoerceToTs(TimeUnit unit, const std::string& value);
 base::Optional<int64_t> CoerceToInt64(const Json::Value& value);
 base::Optional<uint32_t> CoerceToUint32(const Json::Value& value);
 
diff --git a/src/trace_processor/storage/stats.h b/src/trace_processor/storage/stats.h
index bc44fef..4a05a74 100644
--- a/src/trace_processor/storage/stats.h
+++ b/src/trace_processor/storage/stats.h
@@ -124,6 +124,10 @@
   F(process_tracker_errors,             kSingle,  kError,    kAnalysis, ""),   \
   F(json_tokenizer_failure,             kSingle,  kError,    kTrace,    ""),   \
   F(json_parser_failure,                kSingle,  kError,    kTrace,    ""),   \
+  F(json_display_time_unit_too_late,    kSingle,  kError,    kTrace,           \
+      "The displayTimeUnit key came too late in the JSON trace so was "        \
+      "ignored. Trace processor only supports displayTimeUnit appearing "      \
+      "at the start of JSON traces"),                                          \
   F(heap_graph_invalid_string_id,       kIndexed, kError,    kTrace,    ""),   \
   F(heap_graph_non_finalized_graph,     kSingle,  kError,    kTrace,    ""),   \
   F(heap_graph_malformed_packet,        kIndexed, kError,    kTrace,    ""),   \