Save a unique ID for each trace when it is processed.
This UUID will be overridden by protos.

Bug: 175041881
Change-Id: I10140975e3121b2ad31c6bd7ff63167f6fba1b89
diff --git a/src/trace_processor/importers/proto/proto_trace_parser.cc b/src/trace_processor/importers/proto/proto_trace_parser.cc
index 26383f2..9a70b3f 100644
--- a/src/trace_processor/importers/proto/proto_trace_parser.cc
+++ b/src/trace_processor/importers/proto/proto_trace_parser.cc
@@ -649,6 +649,7 @@
     StringId id = context_->storage->InternString(base::StringView(str));
     context_->metadata_tracker->SetMetadata(metadata::trace_uuid,
                                             Variadic::String(id));
+    context_->uuid_found_in_trace = true;
   }
 
   if (trace_config.has_unique_session_name()) {
diff --git a/src/trace_processor/importers/proto/proto_trace_parser_unittest.cc b/src/trace_processor/importers/proto/proto_trace_parser_unittest.cc
index 1812f14..c0f11cd 100644
--- a/src/trace_processor/importers/proto/proto_trace_parser_unittest.cc
+++ b/src/trace_processor/importers/proto/proto_trace_parser_unittest.cc
@@ -2970,6 +2970,7 @@
   SqlValue value =
       context_.metadata_tracker->GetMetadataForTesting(metadata::trace_uuid);
   EXPECT_STREQ(value.string_value, "00000000-0000-0002-0000-000000000001");
+  ASSERT_TRUE(context_.uuid_found_in_trace);
 }
 
 TEST_F(ProtoTraceParserTest, ConfigPbtxt) {
diff --git a/src/trace_processor/trace_database_integrationtest.cc b/src/trace_processor/trace_database_integrationtest.cc
index e963b54..c6a8c62 100644
--- a/src/trace_processor/trace_database_integrationtest.cc
+++ b/src/trace_processor/trace_database_integrationtest.cc
@@ -41,12 +41,14 @@
       : processor_(TraceProcessor::CreateInstance(Config())) {}
 
  protected:
-  util::Status LoadTrace(const char* name, size_t min_chunk_size = 512) {
-    EXPECT_LE(min_chunk_size, kMaxChunkSize);
+  util::Status LoadTrace(const char* name,
+                         size_t min_chunk_size = 512,
+                         size_t max_chunk_size = kMaxChunkSize) {
+    EXPECT_LE(min_chunk_size, max_chunk_size);
     base::ScopedFstream f(fopen(
         base::GetTestDataPath(std::string("test/data/") + name).c_str(), "rb"));
     std::minstd_rand0 rnd_engine(0);
-    std::uniform_int_distribution<size_t> dist(min_chunk_size, kMaxChunkSize);
+    std::uniform_int_distribution<size_t> dist(min_chunk_size, max_chunk_size);
     while (!feof(*f)) {
       size_t chunk_size = dist(rnd_engine);
       std::unique_ptr<uint8_t[]> buf(new uint8_t[chunk_size]);
@@ -363,6 +365,38 @@
   ASSERT_EQ(it.Get(0).long_value, 276174);
 }
 
+/*
+ * This trace does not have a uuid. The uuid will be generated from the first
+ * 4096 bytes, which will be read in one chunk.
+ */
+TEST_F(TraceProcessorIntegrationTest, TraceWithoutUuidReadInOneChunk) {
+  ASSERT_TRUE(LoadTrace("example_android_trace_30s.pb", kMaxChunkSize).ok());
+  auto it = Query("select str_value from metadata where name = 'trace_uuid'");
+  ASSERT_TRUE(it.Next());
+  EXPECT_STREQ(it.Get(0).string_value, "00000000-0000-0000-8906-ebb53e1d0738");
+}
+
+/*
+ * This trace does not have a uuid. The uuid will be generated from the first
+ * 4096 bytes, which will be read in multiple chunks.
+ */
+TEST_F(TraceProcessorIntegrationTest, TraceWithoutUuidReadInMultipleChuncks) {
+  ASSERT_TRUE(LoadTrace("example_android_trace_30s.pb", 512, 2048).ok());
+  auto it = Query("select str_value from metadata where name = 'trace_uuid'");
+  ASSERT_TRUE(it.Next());
+  EXPECT_STREQ(it.Get(0).string_value, "00000000-0000-0000-8906-ebb53e1d0738");
+}
+
+/*
+ * This trace has a uuid. It will not be overriden by the hash of the first 4096
+ * bytes.
+ */
+TEST_F(TraceProcessorIntegrationTest, TraceWithUuidReadInParts) {
+  ASSERT_TRUE(LoadTrace("trace_with_uuid.pftrace", 512, 2048).ok());
+  auto it = Query("select str_value from metadata where name = 'trace_uuid'");
+  ASSERT_TRUE(it.Next());
+  EXPECT_STREQ(it.Get(0).string_value, "123e4567-e89b-12d3-a456-426655443322");
+}
 }  // namespace
 }  // namespace trace_processor
 }  // namespace perfetto
diff --git a/src/trace_processor/trace_processor_storage_impl.cc b/src/trace_processor/trace_processor_storage_impl.cc
index e9c0f37..b71eac7 100644
--- a/src/trace_processor/trace_processor_storage_impl.cc
+++ b/src/trace_processor/trace_processor_storage_impl.cc
@@ -17,6 +17,7 @@
 #include "src/trace_processor/trace_processor_storage_impl.h"
 
 #include "perfetto/base/logging.h"
+#include "perfetto/ext/base/uuid.h"
 #include "src/trace_processor/forwarding_trace_parser.h"
 #include "src/trace_processor/importers/chrome_track_event.descriptor.h"
 #include "src/trace_processor/importers/common/args_tracker.h"
@@ -37,7 +38,6 @@
 #include "src/trace_processor/importers/track_event.descriptor.h"
 #include "src/trace_processor/trace_sorter.h"
 #include "src/trace_processor/util/descriptors.h"
-#include "src/trace_processor/util/trace_blob_view.h"
 
 namespace perfetto {
 namespace trace_processor {
@@ -94,6 +94,19 @@
 
   auto scoped_trace = context_.storage->TraceExecutionTimeIntoStats(
       stats::parse_trace_duration_ns);
+
+  if (hash_input_size_remaining_ > 0 && !context_.uuid_found_in_trace) {
+    const size_t hash_size = std::min(hash_input_size_remaining_, size);
+    hash_input_size_remaining_ -= hash_size;
+
+    trace_hash_.Update(reinterpret_cast<const char*>(data.get()), hash_size);
+    base::Uuid uuid(static_cast<int64_t>(trace_hash_.digest()), 0);
+    const StringId id_for_uuid =
+        context_.storage->InternString(base::StringView(uuid.ToPrettyString()));
+    context_.metadata_tracker->SetMetadata(metadata::trace_uuid,
+                                           Variadic::String(id_for_uuid));
+  }
+
   util::Status status = context_.chunk_reader->Parse(std::move(data), size);
   unrecoverable_parse_error_ |= !status.ok();
   return status;
diff --git a/src/trace_processor/trace_processor_storage_impl.h b/src/trace_processor/trace_processor_storage_impl.h
index 1c029c6..4b225d0 100644
--- a/src/trace_processor/trace_processor_storage_impl.h
+++ b/src/trace_processor/trace_processor_storage_impl.h
@@ -19,6 +19,7 @@
 
 #include <memory>
 
+#include "perfetto/ext/base/hash.h"
 #include "perfetto/trace_processor/basic_types.h"
 #include "perfetto/trace_processor/status.h"
 #include "perfetto/trace_processor/trace_processor_storage.h"
@@ -38,8 +39,10 @@
   TraceProcessorContext* context() { return &context_; }
 
  protected:
+  base::Hash trace_hash_;
   TraceProcessorContext context_;
   bool unrecoverable_parse_error_ = false;
+  size_t hash_input_size_remaining_ = 4096;
 };
 
 }  // namespace trace_processor
diff --git a/src/trace_processor/types/trace_processor_context.h b/src/trace_processor/types/trace_processor_context.h
index b2c63ba..1cf723c 100644
--- a/src/trace_processor/types/trace_processor_context.h
+++ b/src/trace_processor/types/trace_processor_context.h
@@ -118,6 +118,11 @@
   std::vector<std::vector<ProtoImporterModule*>> modules_by_field;
   std::vector<std::unique_ptr<ProtoImporterModule>> modules;
   FtraceModule* ftrace_module = nullptr;
+
+  // Marks whether the uuid was read from the trace.
+  // If the uuid was NOT read, the uuid will be made from the hash of the first
+  // 4KB of the trace.
+  bool uuid_found_in_trace = false;
 };
 
 }  // namespace trace_processor
diff --git a/test/trace_processor/memory/trace_metadata.out b/test/trace_processor/memory/trace_metadata.out
index b66bf1a..1f64280 100644
--- a/test/trace_processor/memory/trace_metadata.out
+++ b/test/trace_processor/memory/trace_metadata.out
@@ -1,5 +1,6 @@
 trace_metadata {
   trace_duration_ns: 9519159074,
+  trace_uuid: "00000000-0000-0000-e77f-20a2204c2a49",
   trace_size_bytes: 6365447
   trace_config_pbtxt: "buffers: {\n  size_kb: 32768\n  fill_policy: UNSPECIFIED\n}\ndata_sources: {\n  config: {\n    name: \"linux.ftrace\"\n    target_buffer: 0\n    trace_duration_ms: 0\n    tracing_session_id: 0\n    ftrace_config: {\n      ftrace_events: \"print\"\n      ftrace_events: \"sched_switch\"\n      ftrace_events: \"rss_stat\"\n      ftrace_events: \"ion_heap_shrink\"\n      ftrace_events: \"ion_heap_grow\"\n      atrace_categories: \"am\"\n      atrace_categories: \"dalvik\"\n      buffer_size_kb: 0\n      drain_period_ms: 0\n    }\n    chrome_config: {\n      trace_config: \"\"\n    }\n    inode_file_config: {\n      scan_interval_ms: 0\n      scan_delay_ms: 0\n      scan_batch_size: 0\n      do_not_scan: false\n    }\n    process_stats_config: {\n      scan_all_processes_on_start: false\n      record_thread_names: false\n      proc_stats_poll_ms: 0\n    }\n    sys_stats_config: {\n      meminfo_period_ms: 0\n      vmstat_period_ms: 0\n      stat_period_ms: 0\n    }\n    heapprofd_config: {\n      sampling_interval_bytes: 0\n      all: false\n      continuous_dump_config: {\n        dump_phase_ms: 0\n        dump_interval_ms: 0\n      }\n    }\n    legacy_config: \"\"\n  }\n}\ndata_sources: {\n  config: {\n    name: \"linux.process_stats\"\n    target_buffer: 0\n    trace_duration_ms: 0\n    tracing_session_id: 0\n    ftrace_config: {\n      buffer_size_kb: 0\n      drain_period_ms: 0\n    }\n    chrome_config: {\n      trace_config: \"\"\n    }\n    inode_file_config: {\n      scan_interval_ms: 0\n      scan_delay_ms: 0\n      scan_batch_size: 0\n      do_not_scan: false\n    }\n    process_stats_config: {\n      scan_all_processes_on_start: false\n      record_thread_names: false\n      proc_stats_poll_ms: 100\n    }\n    sys_stats_config: {\n      meminfo_period_ms: 0\n      vmstat_period_ms: 0\n      stat_period_ms: 0\n    }\n    heapprofd_config: {\n      sampling_interval_bytes: 0\n      all: false\n      continuous_dump_config: {\n        dump_phase_ms: 0\n        dump_interval_ms: 0\n      }\n    }\n    legacy_config: \"\"\n  }\n}\ndata_sources: {\n  config: {\n    name: \"linux.sys_stats\"\n    target_buffer: 0\n    trace_duration_ms: 0\n    tracing_session_id: 0\n    ftrace_config: {\n      buffer_size_kb: 0\n      drain_period_ms: 0\n    }\n    chrome_config: {\n      trace_config: \"\"\n    }\n    inode_file_config: {\n      scan_interval_ms: 0\n      scan_delay_ms: 0\n      scan_batch_size: 0\n      do_not_scan: false\n    }\n    process_stats_config: {\n      scan_all_processes_on_start: false\n      record_thread_names: false\n      proc_stats_poll_ms: 0\n    }\n    sys_stats_config: {\n      meminfo_period_ms: 50\n      meminfo_counters: MEMINFO_MEM_AVAILABLE\n      meminfo_counters: MEMINFO_SWAP_CACHED\n      meminfo_counters: MEMINFO_ACTIVE\n      meminfo_counters: MEMINFO_INACTIVE\n      vmstat_period_ms: 0\n      stat_period_ms: 0\n    }\n    heapprofd_config: {\n      sampling_interval_bytes: 0\n      all: false\n      continuous_dump_config: {\n        dump_phase_ms: 0\n        dump_interval_ms: 0\n      }\n    }\n    legacy_config: \"\"\n  }\n}\nduration_ms: 10000\nenable_extra_guardrails: false\nlockdown_mode: LOCKDOWN_UNCHANGED\nstatsd_metadata: {\n  triggering_alert_id: 0\n  triggering_config_uid: 0\n  triggering_config_id: 0\n}\nwrite_into_file: false\nfile_write_period_ms: 0\nmax_file_size_bytes: 0\nguardrail_overrides: {\n  max_upload_per_day_bytes: 0\n}\ndeferred_start: false",
   sched_duration_ns: 9452761359
diff --git a/test/trace_processor/parsing/chrome_metadata.out b/test/trace_processor/parsing/chrome_metadata.out
index 71e8908..ffdc373 100644
--- a/test/trace_processor/parsing/chrome_metadata.out
+++ b/test/trace_processor/parsing/chrome_metadata.out
@@ -1,4 +1,5 @@
 "id","type","name","key_type","int_value","str_value"
-0,"metadata","cr-playstore_version_code","single",101,"[NULL]"
-1,"metadata","cr-enabled_categories","single","[NULL]","cat1,cat2,cat3"
-2,"metadata","trace_size_bytes","single",50,"[NULL]"
+0,"metadata","trace_uuid","single","[NULL]","00000000-0000-0000-23b6-9c184f48509d"
+1,"metadata","cr-playstore_version_code","single",101,"[NULL]"
+2,"metadata","cr-enabled_categories","single","[NULL]","cat1,cat2,cat3"
+3,"metadata","trace_size_bytes","single",50,"[NULL]"
diff --git a/test/trace_processor/tables/trace_metadata.json.out b/test/trace_processor/tables/trace_metadata.json.out
index 0d3f5d0..2a363cf 100644
--- a/test/trace_processor/tables/trace_metadata.json.out
+++ b/test/trace_processor/tables/trace_metadata.json.out
@@ -1,6 +1,7 @@
 {
   "trace_metadata": {
     "trace_duration_ns": 9519159074,
+    "trace_uuid": "00000000-0000-0000-e77f-20a2204c2a49",
     "trace_size_bytes": 6365447,
     "trace_config_pbtxt": "buffers: {\n  size_kb: 32768\n  fill_policy: UNSPECIFIED\n}\ndata_sources: {\n  config: {\n    name: \"linux.ftrace\"\n    target_buffer: 0\n    trace_duration_ms: 0\n    tracing_session_id: 0\n    ftrace_config: {\n      ftrace_events: \"print\"\n      ftrace_events: \"sched_switch\"\n      ftrace_events: \"rss_stat\"\n      ftrace_events: \"ion_heap_shrink\"\n      ftrace_events: \"ion_heap_grow\"\n      atrace_categories: \"am\"\n      atrace_categories: \"dalvik\"\n      buffer_size_kb: 0\n      drain_period_ms: 0\n    }\n    chrome_config: {\n      trace_config: \"\"\n    }\n    inode_file_config: {\n      scan_interval_ms: 0\n      scan_delay_ms: 0\n      scan_batch_size: 0\n      do_not_scan: false\n    }\n    process_stats_config: {\n      scan_all_processes_on_start: false\n      record_thread_names: false\n      proc_stats_poll_ms: 0\n    }\n    sys_stats_config: {\n      meminfo_period_ms: 0\n      vmstat_period_ms: 0\n      stat_period_ms: 0\n    }\n    heapprofd_config: {\n      sampling_interval_bytes: 0\n      all: false\n      continuous_dump_config: {\n        dump_phase_ms: 0\n        dump_interval_ms: 0\n      }\n    }\n    legacy_config: \"\"\n  }\n}\ndata_sources: {\n  config: {\n    name: \"linux.process_stats\"\n    target_buffer: 0\n    trace_duration_ms: 0\n    tracing_session_id: 0\n    ftrace_config: {\n      buffer_size_kb: 0\n      drain_period_ms: 0\n    }\n    chrome_config: {\n      trace_config: \"\"\n    }\n    inode_file_config: {\n      scan_interval_ms: 0\n      scan_delay_ms: 0\n      scan_batch_size: 0\n      do_not_scan: false\n    }\n    process_stats_config: {\n      scan_all_processes_on_start: false\n      record_thread_names: false\n      proc_stats_poll_ms: 100\n    }\n    sys_stats_config: {\n      meminfo_period_ms: 0\n      vmstat_period_ms: 0\n      stat_period_ms: 0\n    }\n    heapprofd_config: {\n      sampling_interval_bytes: 0\n      all: false\n      continuous_dump_config: {\n        dump_phase_ms: 0\n        dump_interval_ms: 0\n      }\n    }\n    legacy_config: \"\"\n  }\n}\ndata_sources: {\n  config: {\n    name: \"linux.sys_stats\"\n    target_buffer: 0\n    trace_duration_ms: 0\n    tracing_session_id: 0\n    ftrace_config: {\n      buffer_size_kb: 0\n      drain_period_ms: 0\n    }\n    chrome_config: {\n      trace_config: \"\"\n    }\n    inode_file_config: {\n      scan_interval_ms: 0\n      scan_delay_ms: 0\n      scan_batch_size: 0\n      do_not_scan: false\n    }\n    process_stats_config: {\n      scan_all_processes_on_start: false\n      record_thread_names: false\n      proc_stats_poll_ms: 0\n    }\n    sys_stats_config: {\n      meminfo_period_ms: 50\n      meminfo_counters: MEMINFO_MEM_AVAILABLE\n      meminfo_counters: MEMINFO_SWAP_CACHED\n      meminfo_counters: MEMINFO_ACTIVE\n      meminfo_counters: MEMINFO_INACTIVE\n      vmstat_period_ms: 0\n      stat_period_ms: 0\n    }\n    heapprofd_config: {\n      sampling_interval_bytes: 0\n      all: false\n      continuous_dump_config: {\n        dump_phase_ms: 0\n        dump_interval_ms: 0\n      }\n    }\n    legacy_config: \"\"\n  }\n}\nduration_ms: 10000\nenable_extra_guardrails: false\nlockdown_mode: LOCKDOWN_UNCHANGED\nstatsd_metadata: {\n  triggering_alert_id: 0\n  triggering_config_uid: 0\n  triggering_config_id: 0\n}\nwrite_into_file: false\nfile_write_period_ms: 0\nmax_file_size_bytes: 0\nguardrail_overrides: {\n  max_upload_per_day_bytes: 0\n}\ndeferred_start: false",
     "sched_duration_ns": 9452761359
diff --git a/tools/install-build-deps b/tools/install-build-deps
index 6575bea..68150c5 100755
--- a/tools/install-build-deps
+++ b/tools/install-build-deps
@@ -234,8 +234,8 @@
     # Example traces for regression tests.
     Dependency(
         'test/data.zip',
-        'https://storage.googleapis.com/perfetto/test-data-20210518-223638.zip',
-        'a9119ae4828fae92c9ec8449e3fd91d753e60b60fa59611fb8e6c0dd6ed69b13',
+        'https://storage.googleapis.com/perfetto/test-data-20210604-141038.zip',
+        'f202d92ea541b7072562b579470771a5e2b414572a5421c501ea0785a57726eb',
         'all', 'all',
     ),
 
diff --git a/tools/test_data.txt b/tools/test_data.txt
index 0585217..35a5977 100644
--- a/tools/test_data.txt
+++ b/tools/test_data.txt
@@ -9,3 +9,4 @@
 test/data/example_android_trace_30s.pb
 test/data/full_trace_filter.bytecode
 test/data/kallsyms.txt
+test/data/trace_with_uuid.pftrace