Merge changes I6d585e67,I1b4a44e7

* changes:
  traced_perf: add memory guardrail (stops the DS if daemon above limit)
  traced_perf: memory guardrail proto changes
diff --git a/include/perfetto/profiling/pprof_builder.h b/include/perfetto/profiling/pprof_builder.h
index 45aef7e..20b9d2e 100644
--- a/include/perfetto/profiling/pprof_builder.h
+++ b/include/perfetto/profiling/pprof_builder.h
@@ -33,14 +33,24 @@
 
 namespace trace_to_text {
 
-struct SerializedProfile {
-  uint64_t pid;
-  std::string heap_name;
-  std::string serialized;
+enum class ProfileType {
+  kHeapProfile,
+  kPerfProfile,
 };
 
-bool TraceToPprof(trace_processor::TraceProcessor*,
+struct SerializedProfile {
+  ProfileType profile_type;
+  uint64_t pid;
+  std::string serialized;
+  // non-empty if profile_type == kHeapProfile
+  std::string heap_name;
+};
+
+enum class ConversionMode { kHeapProfile, kPerfProfile };
+
+bool TraceToPprof(trace_processor::TraceProcessor* tp,
                   std::vector<SerializedProfile>* output,
+                  ConversionMode mode = ConversionMode::kHeapProfile,
                   uint64_t pid = 0,
                   const std::vector<uint64_t>& timestamps = {});
 
diff --git a/src/profiling/memory/unwinding.cc b/src/profiling/memory/unwinding.cc
index a7b4f37..868eb06 100644
--- a/src/profiling/memory/unwinding.cc
+++ b/src/profiling/memory/unwinding.cc
@@ -172,14 +172,15 @@
       unwinder.SetDexFiles(metadata->dex_files.get());
 #endif
     }
+    out->frames.swap(unwinder.frames());  // Provide the unwinder buffer to use.
     unwinder.Unwind(&kSkipMaps, /*map_suffixes_to_ignore=*/nullptr);
+    out->frames.swap(unwinder.frames());  // Take the buffer back.
     error_code = unwinder.LastErrorCode();
     if (error_code != unwindstack::ERROR_INVALID_MAP &&
         (unwinder.warnings() & unwindstack::WARNING_DEX_PC_NOT_IN_MAP) == 0) {
       break;
     }
   }
-  out->frames = unwinder.ConsumeFrames();
   out->build_ids.resize(out->frames.size());
   for (size_t i = 0; i < out->frames.size(); ++i) {
     out->build_ids[i] = metadata->GetBuildId(out->frames[i]);
diff --git a/src/trace_processor/metrics/chrome/rail_modes.sql b/src/trace_processor/metrics/chrome/rail_modes.sql
index 6d7434b..a432412 100644
--- a/src/trace_processor/metrics/chrome/rail_modes.sql
+++ b/src/trace_processor/metrics/chrome/rail_modes.sql
@@ -158,9 +158,15 @@
   dur,
   1
 FROM (SELECT start_ts AS ts,
-    COALESCE(MIN(ts) - start_ts - const.vsync_padding, end_ts - start_ts) AS dur
-  FROM trace_bounds, slice, const
-  WHERE name = "VSync") WHERE dur > 0
+          COALESCE((
+              SELECT MIN(ts)
+              FROM slice
+              WHERE name = "VSync"
+            ) - start_ts - const.vsync_padding,
+            end_ts - start_ts
+          ) AS dur
+FROM trace_bounds, const)
+WHERE dur > 0
 UNION
 -- Insert a slice between the last vsync and end_ts
 SELECT last_vsync AS ts,
diff --git a/src/trace_processor/sqlite/span_join_operator_table.cc b/src/trace_processor/sqlite/span_join_operator_table.cc
index c847a25..1d84f37 100644
--- a/src/trace_processor/sqlite/span_join_operator_table.cc
+++ b/src/trace_processor/sqlite/span_join_operator_table.cc
@@ -387,11 +387,16 @@
                                           FilterHistory) {
   PERFETTO_TP_TRACE("SPAN_JOIN_XFILTER");
 
-  util::Status status = t1_.Initialize(qc, argv);
+  util::Status status =
+      t1_.Initialize(qc, argv, Query::InitialEofBehavior::kTreatAsEof);
   if (!status.ok())
     return SQLITE_ERROR;
 
-  status = t2_.Initialize(qc, argv);
+  status = t2_.Initialize(
+      qc, argv,
+      table_->IsLeftJoin()
+          ? Query::InitialEofBehavior::kTreatAsMissingPartitionShadow
+          : Query::InitialEofBehavior::kTreatAsEof);
   if (!status.ok())
     return SQLITE_ERROR;
 
@@ -469,7 +474,7 @@
     // Find which slice finishes first.
     next_query_ = FindEarliestFinishQuery();
 
-    // If the current span is overlapping, just finsh there to emit the current
+    // If the current span is overlapping, just finish there to emit the current
     // slice.
     if (IsOverlappingSpan())
       break;
@@ -581,11 +586,19 @@
 
 util::Status SpanJoinOperatorTable::Query::Initialize(
     const QueryConstraints& qc,
-    sqlite3_value** argv) {
+    sqlite3_value** argv,
+    InitialEofBehavior eof_behavior) {
   *this = Query(table_, definition(), db_);
   sql_query_ = CreateSqlQuery(
       table_->ComputeSqlConstraintsForDefinition(*defn_, qc, argv));
-  return Rewind();
+  util::Status status = Rewind();
+  if (!status.ok())
+    return status;
+  if (eof_behavior == InitialEofBehavior::kTreatAsMissingPartitionShadow &&
+      IsEof()) {
+    state_ = State::kMissingPartitionShadow;
+  }
+  return status;
 }
 
 util::Status SpanJoinOperatorTable::Query::Next() {
diff --git a/src/trace_processor/sqlite/span_join_operator_table.h b/src/trace_processor/sqlite/span_join_operator_table.h
index 4aa3522..ebfb867 100644
--- a/src/trace_processor/sqlite/span_join_operator_table.h
+++ b/src/trace_processor/sqlite/span_join_operator_table.h
@@ -165,8 +165,16 @@
     Query(Query&&) noexcept = default;
     Query& operator=(Query&&) = default;
 
+    enum class InitialEofBehavior {
+      kTreatAsEof,
+      kTreatAsMissingPartitionShadow
+    };
+
     // Initializes the query with the given constraints and query parameters.
-    util::Status Initialize(const QueryConstraints& qc, sqlite3_value** argv);
+    util::Status Initialize(
+        const QueryConstraints& qc,
+        sqlite3_value** argv,
+        InitialEofBehavior eof_behavior = InitialEofBehavior::kTreatAsEof);
 
     // Forwards the query to the next valid slice.
     util::Status Next();
diff --git a/src/trace_processor/sqlite/span_join_operator_table_unittest.cc b/src/trace_processor/sqlite/span_join_operator_table_unittest.cc
index 87bb645..4c28397 100644
--- a/src/trace_processor/sqlite/span_join_operator_table_unittest.cc
+++ b/src/trace_processor/sqlite/span_join_operator_table_unittest.cc
@@ -254,6 +254,92 @@
   ASSERT_EQ(sqlite3_step(stmt_.get()), SQLITE_DONE);
 }
 
+TEST_F(SpanJoinOperatorTableTest, LeftJoinTwoSpanTables) {
+  RunStatement(
+      "CREATE TEMP TABLE f("
+      "ts BIG INT PRIMARY KEY, "
+      "dur BIG INT, "
+      "cpu UNSIGNED INT"
+      ");");
+  RunStatement(
+      "CREATE TEMP TABLE s("
+      "ts BIG INT PRIMARY KEY, "
+      "dur BIG INT, "
+      "tid UNSIGNED INT"
+      ");");
+  RunStatement("CREATE VIRTUAL TABLE sp USING span_left_join(f, s);");
+
+  RunStatement("INSERT INTO f VALUES(100, 10, 0);");
+  RunStatement("INSERT INTO f VALUES(110, 50, 1);");
+
+  RunStatement("INSERT INTO s VALUES(100, 5, 1);");
+  RunStatement("INSERT INTO s VALUES(110, 40, 2);");
+  RunStatement("INSERT INTO s VALUES(150, 50, 3);");
+
+  PrepareValidStatement("SELECT * FROM sp");
+
+  ASSERT_EQ(sqlite3_step(stmt_.get()), SQLITE_ROW);
+  ASSERT_EQ(sqlite3_column_int64(stmt_.get(), 0), 100);
+  ASSERT_EQ(sqlite3_column_int64(stmt_.get(), 1), 5);
+  ASSERT_EQ(sqlite3_column_int64(stmt_.get(), 2), 0);
+  ASSERT_EQ(sqlite3_column_int64(stmt_.get(), 3), 1);
+
+  ASSERT_EQ(sqlite3_step(stmt_.get()), SQLITE_ROW);
+  ASSERT_EQ(sqlite3_column_int64(stmt_.get(), 0), 105);
+  ASSERT_EQ(sqlite3_column_int64(stmt_.get(), 1), 5);
+  ASSERT_EQ(sqlite3_column_int64(stmt_.get(), 2), 0);
+  ASSERT_EQ(sqlite3_column_type(stmt_.get(), 3), SQLITE_NULL);
+
+  ASSERT_EQ(sqlite3_step(stmt_.get()), SQLITE_ROW);
+  ASSERT_EQ(sqlite3_column_int64(stmt_.get(), 0), 110);
+  ASSERT_EQ(sqlite3_column_int64(stmt_.get(), 1), 40);
+  ASSERT_EQ(sqlite3_column_int64(stmt_.get(), 2), 1);
+  ASSERT_EQ(sqlite3_column_int64(stmt_.get(), 3), 2);
+
+  ASSERT_EQ(sqlite3_step(stmt_.get()), SQLITE_ROW);
+  ASSERT_EQ(sqlite3_column_int64(stmt_.get(), 0), 150);
+  ASSERT_EQ(sqlite3_column_int64(stmt_.get(), 1), 10);
+  ASSERT_EQ(sqlite3_column_int64(stmt_.get(), 2), 1);
+  ASSERT_EQ(sqlite3_column_int64(stmt_.get(), 3), 3);
+
+  ASSERT_EQ(sqlite3_step(stmt_.get()), SQLITE_DONE);
+}
+
+TEST_F(SpanJoinOperatorTableTest, LeftJoinTwoSpanTables_EmptyRight) {
+  RunStatement(
+      "CREATE TEMP TABLE f("
+      "ts BIG INT PRIMARY KEY, "
+      "dur BIG INT, "
+      "cpu UNSIGNED INT"
+      ");");
+  RunStatement(
+      "CREATE TEMP TABLE s("
+      "ts BIG INT PRIMARY KEY, "
+      "dur BIG INT, "
+      "tid UNSIGNED INT"
+      ");");
+  RunStatement("CREATE VIRTUAL TABLE sp USING span_left_join(f, s);");
+
+  RunStatement("INSERT INTO f VALUES(100, 10, 0);");
+  RunStatement("INSERT INTO f VALUES(110, 50, 1);");
+
+  PrepareValidStatement("SELECT * FROM sp");
+
+  ASSERT_EQ(sqlite3_step(stmt_.get()), SQLITE_ROW);
+  ASSERT_EQ(sqlite3_column_int64(stmt_.get(), 0), 100);
+  ASSERT_EQ(sqlite3_column_int64(stmt_.get(), 1), 10);
+  ASSERT_EQ(sqlite3_column_int64(stmt_.get(), 2), 0);
+  ASSERT_EQ(sqlite3_column_type(stmt_.get(), 3), SQLITE_NULL);
+
+  ASSERT_EQ(sqlite3_step(stmt_.get()), SQLITE_ROW);
+  ASSERT_EQ(sqlite3_column_int64(stmt_.get(), 0), 110);
+  ASSERT_EQ(sqlite3_column_int64(stmt_.get(), 1), 50);
+  ASSERT_EQ(sqlite3_column_int64(stmt_.get(), 2), 1);
+  ASSERT_EQ(sqlite3_column_type(stmt_.get(), 3), SQLITE_NULL);
+
+  ASSERT_EQ(sqlite3_step(stmt_.get()), SQLITE_DONE);
+}
+
 }  // namespace
 }  // namespace trace_processor
 }  // namespace perfetto
diff --git a/test/end_to_end_integrationtest.cc b/test/end_to_end_integrationtest.cc
index fbd3760..a8d29e6 100644
--- a/test/end_to_end_integrationtest.cc
+++ b/test/end_to_end_integrationtest.cc
@@ -380,7 +380,9 @@
 //    We cannot change the length of the production code in
 //    CanReadKernelSymbolAddresses() to deal with it.
 // 2. On user (i.e. non-userdebug) builds. As that doesn't work there by design.
-#if PERFETTO_BUILDFLAG(PERFETTO_ANDROID_BUILD) && defined(__i386__)
+// 3. On ARM builds, because they fail on our CI.
+#if (PERFETTO_BUILDFLAG(PERFETTO_ANDROID_BUILD) && defined(__i386__)) || \
+    defined(__arm__)
 #define MAYBE_KernelAddressSymbolization DISABLED_KernelAddressSymbolization
 #else
 #define MAYBE_KernelAddressSymbolization KernelAddressSymbolization
diff --git a/test/synth_common.py b/test/synth_common.py
index 00ec45a..7fe68fd 100644
--- a/test/synth_common.py
+++ b/test/synth_common.py
@@ -44,7 +44,11 @@
 
 
 def ms_to_ns(time_in_ms):
-  return time_in_ms * 1000000
+  return int(time_in_ms * 1000000)
+
+
+def s_to_ns(time_in_s):
+  return int(time_in_s * 1000000000)
 
 
 class Trace(object):
@@ -612,6 +616,14 @@
         "Scheduler.RAILMode", ts=ts, dur=dur, track=track)
     packet.track_event.chrome_renderer_scheduler_state.rail_mode = mode
 
+  def add_chrome_metadata(self, os_name=None):
+    metadata = self.add_packet().chrome_events.metadata.add()
+    if os_name is not None:
+      metadata.name = "os-name"
+      metadata.string_value = os_name
+
+    return metadata
+
 
 def create_trace():
   parser = argparse.ArgumentParser()
diff --git a/test/trace_processor/chrome/index b/test/trace_processor/chrome/index
index 02b6dfc..479a685 100644
--- a/test/trace_processor/chrome/index
+++ b/test/trace_processor/chrome/index
@@ -27,3 +27,5 @@
 cpu_time_by_combined_rail_mode.py cpu_time_by_combined_rail_mode.sql cpu_time_by_combined_rail_mode.out
 actual_power_by_combined_rail_mode.py actual_power_by_combined_rail_mode.sql actual_power_by_combined_rail_mode.out
 estimated_power_by_combined_rail_mode.py estimated_power_by_combined_rail_mode.sql estimated_power_by_combined_rail_mode.out
+modified_rail_modes.py modified_rail_modes.sql modified_rail_modes.out
+modified_rail_modes_no_vsyncs.py modified_rail_modes.sql modified_rail_modes_no_vsyncs.out
diff --git a/test/trace_processor/chrome/modified_rail_modes.out b/test/trace_processor/chrome/modified_rail_modes.out
new file mode 100644
index 0000000..8cb0286
--- /dev/null
+++ b/test/trace_processor/chrome/modified_rail_modes.out
@@ -0,0 +1,7 @@
+
+"id","ts","dur","mode"
+1,0,1000000000,"response"
+2,1000000000,2000000000,"load"
+3,3000000000,283333324,"animation"
+4,3283333324,216666676,"foreground_idle"
+5,3500000000,1000000000,"background"
diff --git a/test/trace_processor/chrome/modified_rail_modes.py b/test/trace_processor/chrome/modified_rail_modes.py
new file mode 100644
index 0000000..1deb786
--- /dev/null
+++ b/test/trace_processor/chrome/modified_rail_modes.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+# Copyright (C) 2020 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from os import sys
+
+import synth_common
+from synth_common import s_to_ns
+
+trace = synth_common.create_trace()
+
+trace.add_chrome_metadata(os_name="Android")
+
+track1 = 1234
+track2 = 4567
+gpu_track = 7890
+
+trace.add_process_track_descriptor(track1, pid=0)
+trace.add_process_track_descriptor(track2, pid=2)
+trace.add_process_track_descriptor(gpu_track, pid=4)
+
+frame_period = s_to_ns(1.0 / 60)
+
+trace.add_track_event_slice("VSync", ts=s_to_ns(3), dur=10, track=gpu_track)
+trace.add_track_event_slice(
+    "VSync", ts=s_to_ns(3) + frame_period, dur=10, track=gpu_track)
+# Frame skipped, but modified rail mode won't go back to foreground_idle
+trace.add_track_event_slice(
+    "VSync", ts=s_to_ns(3) + frame_period * 3, dur=10, track=gpu_track)
+# Larger gap now when mode will go to foreground_idle
+trace.add_track_event_slice(
+    "VSync", ts=s_to_ns(3) + frame_period * 12, dur=10, track=gpu_track)
+trace.add_track_event_slice(
+    "VSync", ts=s_to_ns(3) + frame_period * 13, dur=10, track=gpu_track)
+trace.add_track_event_slice(
+    "VSync", ts=s_to_ns(3) + frame_period * 14, dur=10, track=gpu_track)
+
+trace.add_rail_mode_slice(
+    ts=0, dur=s_to_ns(1), track=track1, mode=synth_common.RAIL_MODE_RESPONSE)
+trace.add_rail_mode_slice(
+    ts=s_to_ns(1),
+    dur=s_to_ns(2),
+    track=track1,
+    mode=synth_common.RAIL_MODE_LOAD)
+trace.add_rail_mode_slice(
+    ts=s_to_ns(3), dur=-1, track=track1, mode=synth_common.RAIL_MODE_IDLE)
+
+trace.add_rail_mode_slice(
+    ts=0, dur=s_to_ns(1), track=track2, mode=synth_common.RAIL_MODE_ANIMATION)
+trace.add_rail_mode_slice(
+    ts=s_to_ns(1),
+    dur=s_to_ns(2.5),
+    track=track2,
+    mode=synth_common.RAIL_MODE_IDLE)
+trace.add_rail_mode_slice(
+    ts=s_to_ns(2.5),
+    dur=s_to_ns(1),
+    track=track2,
+    mode=synth_common.RAIL_MODE_ANIMATION)
+trace.add_rail_mode_slice(
+    ts=s_to_ns(3.5),
+    dur=s_to_ns(1),
+    track=track2,
+    mode=synth_common.RAIL_MODE_IDLE)
+
+sys.stdout.buffer.write(trace.trace.SerializeToString())
diff --git a/test/trace_processor/chrome/modified_rail_modes.sql b/test/trace_processor/chrome/modified_rail_modes.sql
new file mode 100644
index 0000000..053aca8
--- /dev/null
+++ b/test/trace_processor/chrome/modified_rail_modes.sql
@@ -0,0 +1,15 @@
+-- Copyright 2020 The Android Open Source Project
+--
+-- Licensed under the Apache License, Version 2.0 (the "License");
+-- you may not use this file except in compliance with the License.
+-- You may obtain a copy of the License at
+--
+--     https://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+SELECT RUN_METRIC('chrome/rail_modes.sql') AS suppress_query_output;
+SELECT * FROM modified_rail_slices;
diff --git a/test/trace_processor/chrome/modified_rail_modes_no_vsyncs.out b/test/trace_processor/chrome/modified_rail_modes_no_vsyncs.out
new file mode 100644
index 0000000..53e37e5
--- /dev/null
+++ b/test/trace_processor/chrome/modified_rail_modes_no_vsyncs.out
@@ -0,0 +1,6 @@
+
+"id","ts","dur","mode"
+1,0,1000000000,"response"
+2,1000000000,2000000000,"load"
+3,3000000000,500000000,"foreground_idle"
+4,3500000000,1000000000,"background"
diff --git a/test/trace_processor/chrome/modified_rail_modes_no_vsyncs.py b/test/trace_processor/chrome/modified_rail_modes_no_vsyncs.py
new file mode 100644
index 0000000..ff0aebc
--- /dev/null
+++ b/test/trace_processor/chrome/modified_rail_modes_no_vsyncs.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python3
+# Copyright (C) 2020 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from os import sys
+
+import synth_common
+from synth_common import s_to_ns
+
+trace = synth_common.create_trace()
+
+trace.add_chrome_metadata(os_name="Android")
+
+track1 = 1234
+track2 = 4567
+
+trace.add_process_track_descriptor(track1, pid=0)
+trace.add_process_track_descriptor(track2, pid=2)
+
+trace.add_rail_mode_slice(
+    ts=0, dur=s_to_ns(1), track=track1, mode=synth_common.RAIL_MODE_RESPONSE)
+trace.add_rail_mode_slice(
+    ts=s_to_ns(1),
+    dur=s_to_ns(2),
+    track=track1,
+    mode=synth_common.RAIL_MODE_LOAD)
+trace.add_rail_mode_slice(
+    ts=s_to_ns(3), dur=-1, track=track1, mode=synth_common.RAIL_MODE_IDLE)
+
+trace.add_rail_mode_slice(
+    ts=0, dur=s_to_ns(1), track=track2, mode=synth_common.RAIL_MODE_ANIMATION)
+trace.add_rail_mode_slice(
+    ts=s_to_ns(1),
+    dur=s_to_ns(2.5),
+    track=track2,
+    mode=synth_common.RAIL_MODE_IDLE)
+trace.add_rail_mode_slice(
+    ts=s_to_ns(2.5),
+    dur=s_to_ns(1),
+    track=track2,
+    mode=synth_common.RAIL_MODE_ANIMATION)
+trace.add_rail_mode_slice(
+    ts=s_to_ns(3.5),
+    dur=s_to_ns(1),
+    track=track2,
+    mode=synth_common.RAIL_MODE_IDLE)
+
+sys.stdout.buffer.write(trace.trace.SerializeToString())
diff --git a/tools/trace_to_text/main.cc b/tools/trace_to_text/main.cc
index 8f8934a..d5f05c9 100644
--- a/tools/trace_to_text/main.cc
+++ b/tools/trace_to_text/main.cc
@@ -50,6 +50,7 @@
           "[trace.pb] "
           "[trace.txt]\n"
           "\nProfile mode only:\n"
+          "\t--perf generate a perf profile\n"
           "\t--timestamps TIMESTAMP1,TIMESTAMP2,... generate profiles "
           "only for these timestamps\n"
           "\t--pid PID generate profiles only for this process id\n",
@@ -73,6 +74,7 @@
   uint64_t pid = 0;
   std::vector<uint64_t> timestamps;
   bool full_sort = false;
+  bool perf_profile = false;
   for (int i = 1; i < argc; i++) {
     if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--version") == 0) {
       printf("%s\n", base::GetVersionString());
@@ -99,6 +101,8 @@
       for (const std::string& ts : ts_strings) {
         timestamps.emplace_back(StringToUint64OrDie(ts.c_str()));
       }
+    } else if (strcmp(argv[i], "--perf") == 0) {
+      perf_profile = true;
     } else if (strcmp(argv[i], "--full-sort") == 0) {
       full_sort = true;
     } else {
@@ -150,6 +154,10 @@
         "formats.");
     return 1;
   }
+  if (perf_profile && format != "profile") {
+    PERFETTO_ELOG("--perf requires profile format.");
+    return 1;
+  }
 
   if (format == "json")
     return TraceToJson(input_stream, output_stream, /*compress=*/false,
@@ -178,8 +186,12 @@
   if (format == "text")
     return TraceToText(input_stream, output_stream);
 
-  if (format == "profile")
-    return TraceToProfile(input_stream, output_stream, pid, timestamps);
+  if (format == "profile") {
+    return perf_profile ? TraceToPerfProfile(input_stream, output_stream, pid,
+                                             timestamps)
+                        : TraceToHeapProfile(input_stream, output_stream, pid,
+                                             timestamps);
+  }
 
   if (format == "hprof")
     return TraceToHprof(input_stream, output_stream, pid, timestamps);
diff --git a/tools/trace_to_text/pprof_builder.cc b/tools/trace_to_text/pprof_builder.cc
index ced52ee..f206f16 100644
--- a/tools/trace_to_text/pprof_builder.cc
+++ b/tools/trace_to_text/pprof_builder.cc
@@ -52,12 +52,7 @@
 
 namespace {
 
-struct View {
-  const char* type;
-  const char* unit;
-  const char* aggregator;
-  const char* filter;
-};
+using ::perfetto::trace_processor::Iterator;
 
 void MaybeDemangle(std::string* name) {
 #if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
@@ -72,80 +67,22 @@
   }
 }
 
-const View kSpaceView{"space", "bytes", "SUM(size)", nullptr};
-const View kAllocSpaceView{"alloc_space", "bytes", "SUM(size)", "size >= 0"};
-const View kAllocObjectsView{"alloc_objects", "count", "sum(count)",
-                             "size >= 0"};
-const View kObjectsView{"objects", "count", "SUM(count)", nullptr};
+uint64_t ToPprofId(int64_t id) {
+  PERFETTO_DCHECK(id >= 0);
+  return static_cast<uint64_t>(id) + 1;
+}
 
-const View kViews[] = {kAllocObjectsView, kObjectsView, kAllocSpaceView,
-                       kSpaceView};
-
-using trace_processor::Iterator;
-
-constexpr const char* kQueryProfiles =
-    "select distinct hpa.upid, hpa.ts, p.pid, hpa.heap_name "
-    "from heap_profile_allocation hpa, "
-    "process p where p.upid = hpa.upid;";
-
-int64_t GetStatsInt(trace_processor::TraceProcessor* tp,
-                    const std::string& name,
-                    uint64_t pid) {
-  auto it = tp->ExecuteQuery("SELECT value from stats where name = '" + name +
-                             "' AND idx = " + std::to_string(pid));
-  if (!it.Next()) {
-    if (!it.Status().ok()) {
-      PERFETTO_DFATAL_OR_ELOG("Invalid iterator: %s",
-                              it.Status().message().c_str());
-      return -1;
+std::string AsCsvString(std::vector<uint64_t> vals) {
+  std::string ret;
+  for (size_t i = 0; i < vals.size(); i++) {
+    if (i != 0) {
+      ret += ",";
     }
-    // TODO(fmayer): Remove this case once we always get an entry in the stats
-    // table.
-    return 0;
+    ret += std::to_string(vals[i]);
   }
-  return it.Get(0).AsLong();
+  return ret;
 }
 
-bool VerifyPIDStats(trace_processor::TraceProcessor* tp, uint64_t pid) {
-  bool success = true;
-  int64_t stat = GetStatsInt(tp, "heapprofd_buffer_corrupted", pid);
-  if (stat == -1) {
-    PERFETTO_DFATAL_OR_ELOG("Failed to get heapprofd_buffer_corrupted stat");
-  } else if (stat > 0) {
-    success = false;
-    PERFETTO_ELOG("WARNING: The profile for %" PRIu64
-                  " ended early due to a buffer corruption."
-                  " THIS IS ALWAYS A BUG IN HEAPPROFD OR"
-                  " CLIENT MEMORY CORRUPTION.",
-                  pid);
-  }
-  stat = GetStatsInt(tp, "heapprofd_buffer_overran", pid);
-  if (stat == -1) {
-    PERFETTO_DFATAL_OR_ELOG("Failed to get heapprofd_buffer_overran stat");
-  } else if (stat > 0) {
-    success = false;
-    PERFETTO_ELOG("WARNING: The profile for %" PRIu64
-                  " ended early due to a buffer overrun.",
-                  pid);
-  }
-
-  stat = GetStatsInt(tp, "heapprofd_rejected_concurrent", pid);
-  if (stat == -1) {
-    PERFETTO_DFATAL_OR_ELOG("Failed to get heapprofd_rejected_concurrent stat");
-  } else if (stat > 0) {
-    success = false;
-    PERFETTO_ELOG("WARNING: The profile for %" PRIu64
-                  " was rejected due to a concurrent profile.",
-                  pid);
-  }
-  return success;
-}
-
-struct Callsite {
-  int64_t id;
-  int64_t frame_id;
-};
-
 // Return map from callsite_id to list of frame_ids that make up the callstack.
 std::vector<std::vector<int64_t>> GetCallsiteToFrames(
     trace_processor::TraceProcessor* tp) {
@@ -184,6 +121,17 @@
   return result;
 }
 
+base::Optional<int64_t> GetMaxSymbolId(trace_processor::TraceProcessor* tp) {
+  auto max_symbol_id_it =
+      tp->ExecuteQuery("select max(id) from stack_profile_symbol");
+  if (!max_symbol_id_it.Next()) {
+    PERFETTO_DFATAL_OR_ELOG("Failed to get max symbol set id: %s",
+                            max_symbol_id_it.Status().message().c_str());
+    return base::nullopt;
+  }
+  return base::make_optional(max_symbol_id_it.Get(0).AsLong());
+}
+
 struct Line {
   int64_t symbol_id;
   uint32_t line_number;
@@ -210,6 +158,28 @@
   return result;
 }
 
+base::Optional<int64_t> GetStatsEntry(
+    trace_processor::TraceProcessor* tp,
+    const std::string& name,
+    base::Optional<uint64_t> idx = base::nullopt) {
+  std::string query = "select value from stats where name == '" + name + "'";
+  if (idx.has_value())
+    query += " and idx == " + std::to_string(idx.value());
+
+  auto it = tp->ExecuteQuery(query);
+  if (!it.Next()) {
+    if (!it.Status().ok()) {
+      PERFETTO_DFATAL_OR_ELOG("Invalid iterator: %s",
+                              it.Status().message().c_str());
+      return base::nullopt;
+    }
+    // some stats are not present unless non-zero
+    return base::make_optional(0);
+  }
+  return base::make_optional(it.Get(0).AsLong());
+}
+
+// Helper for constructing |perftools.profiles.Profile| protos.
 class GProfileBuilder {
  public:
   GProfileBuilder(
@@ -225,78 +195,57 @@
     PERFETTO_CHECK(empty_id == 0);
   }
 
-  std::vector<Iterator> BuildViewIterators(trace_processor::TraceProcessor* tp,
-                                           uint64_t upid,
-                                           uint64_t ts,
-                                           const char* heap_name) {
-    std::vector<Iterator> view_its;
-    for (size_t i = 0; i < base::ArraySize(kViews); ++i) {
-      const View& v = kViews[i];
-      std::string query = "SELECT hpa.callsite_id ";
-      query += ", " + std::string(v.aggregator) +
-               " FROM heap_profile_allocation hpa ";
-      // TODO(fmayer): Figure out where negative callsite_id comes from.
-      query += "WHERE hpa.callsite_id >= 0 ";
-      query += "AND hpa.upid = " + std::to_string(upid) + " ";
-      query += "AND hpa.ts <= " + std::to_string(ts) + " ";
-      query += "AND hpa.heap_name = '" + std::string(heap_name) + "' ";
-      if (v.filter)
-        query += "AND " + std::string(v.filter) + " ";
-      query += "GROUP BY hpa.callsite_id;";
-      view_its.emplace_back(tp->ExecuteQuery(query));
+  void WriteSampleTypes(
+      const std::vector<std::pair<std::string, std::string>>& sample_types) {
+    // The interner might eagerly append to the profile proto, prevent it from
+    // breaking up other messages by making a separate pass.
+    for (const auto& st : sample_types) {
+      Intern(st.first);
+      Intern(st.second);
     }
-    return view_its;
+    for (const auto& st : sample_types) {
+      auto* sample_type = result_->add_sample_type();
+      sample_type->set_type(Intern(st.first));
+      sample_type->set_unit(Intern(st.second));
+    }
   }
 
-  bool WriteAllocations(std::vector<Iterator>* view_its,
-                        std::set<int64_t>* seen_frames) {
-    for (;;) {
-      bool all_next = true;
-      bool any_next = false;
-      for (size_t i = 0; i < base::ArraySize(kViews); ++i) {
-        Iterator& it = (*view_its)[i];
-        bool next = it.Next();
-        if (!it.Status().ok()) {
-          PERFETTO_DFATAL_OR_ELOG("Invalid view iterator: %s",
-                                  it.Status().message().c_str());
-          return false;
-        }
-        all_next = all_next && next;
-        any_next = any_next || next;
-      }
-
-      if (!all_next) {
-        PERFETTO_CHECK(!any_next);
-        break;
-      }
-
-      auto* gsample = result_->add_sample();
-      protozero::PackedVarInt sample_values;
-      int64_t callstack_id = -1;
-      for (size_t i = 0; i < base::ArraySize(kViews); ++i) {
-        if (i == 0) {
-          callstack_id = (*view_its)[i].Get(0).AsLong();
-          auto frames = FramesForCallstack(callstack_id);
-          if (frames.empty())
-            return false;
-          protozero::PackedVarInt location_ids;
-          for (int64_t frame : frames)
-            location_ids.Append(ToPprofId(frame));
-          gsample->set_location_id(location_ids);
-          seen_frames->insert(frames.cbegin(), frames.cend());
-        } else {
-          if (callstack_id != (*view_its)[i].Get(0).AsLong()) {
-            PERFETTO_DFATAL_OR_ELOG("Wrong callstack.");
-            return false;
-          }
-        }
-        sample_values.Append((*view_its)[i].Get(1).AsLong());
-      }
-      gsample->set_value(sample_values);
+  bool AddSample(const protozero::PackedVarInt& values, int64_t callstack_id) {
+    const auto& frames = FramesForCallstack(callstack_id);
+    if (frames.empty()) {
+      PERFETTO_DFATAL_OR_ELOG(
+          "Failed to find frames for callstack id %" PRIi64 "", callstack_id);
+      return false;
     }
+    protozero::PackedVarInt location_ids;
+    for (int64_t frame : frames)
+      location_ids.Append(ToPprofId(frame));
+
+    auto* gsample = result_->add_sample();
+    gsample->set_value(values);
+    gsample->set_location_id(location_ids);
+
+    // remember frames to be emitted
+    seen_frames_.insert(frames.cbegin(), frames.cend());
+
     return true;
   }
 
+  std::string CompleteProfile(trace_processor::TraceProcessor* tp) {
+    std::set<int64_t> seen_mappings;
+    std::set<int64_t> seen_symbol_ids;
+
+    // Write the location info for frames referenced by the added samples.
+    if (!WriteFrames(tp, &seen_mappings, &seen_symbol_ids))
+      return {};
+    if (!WriteMappings(tp, seen_mappings))
+      return {};
+    if (!WriteSymbols(tp, seen_symbol_ids))
+      return {};
+    return result_.SerializeAsString();
+  }
+
+ private:
   bool WriteMappings(trace_processor::TraceProcessor* tp,
                      const std::set<int64_t>& seen_mappings) {
     Iterator mapping_it = tp->ExecuteQuery(
@@ -371,7 +320,6 @@
   }
 
   bool WriteFrames(trace_processor::TraceProcessor* tp,
-                   const std::set<int64_t>& seen_frames,
                    std::set<int64_t>* seen_mappings,
                    std::set<int64_t>* seen_symbol_ids) {
     Iterator frame_it = tp->ExecuteQuery(
@@ -381,7 +329,7 @@
     size_t frames_no = 0;
     while (frame_it.Next()) {
       int64_t frame_id = frame_it.Get(0).AsLong();
-      if (seen_frames.find(frame_id) == seen_frames.end())
+      if (seen_frames_.find(frame_id) == seen_frames_.end())
         continue;
       frames_no++;
       std::string frame_name = frame_it.Get(1).AsString();
@@ -429,54 +377,13 @@
                               frame_it.Status().message().c_str());
       return false;
     }
-    if (frames_no != seen_frames.size()) {
+    if (frames_no != seen_frames_.size()) {
       PERFETTO_DFATAL_OR_ELOG("Missing frames.");
       return false;
     }
     return true;
   }
 
-  uint64_t ToPprofId(int64_t id) {
-    PERFETTO_DCHECK(id >= 0);
-    return static_cast<uint64_t>(id) + 1;
-  }
-
-  void WriteSampleTypes() {
-    for (size_t i = 0; i < base::ArraySize(kViews); ++i) {
-      Intern(kViews[i].type);
-      Intern(kViews[i].unit);
-    }
-
-    for (size_t i = 0; i < base::ArraySize(kViews); ++i) {
-      auto* sample_type = result_->add_sample_type();
-      sample_type->set_type(Intern(kViews[i].type));
-      sample_type->set_unit(Intern(kViews[i].unit));
-    }
-  }
-
-  std::string GenerateGProfile(trace_processor::TraceProcessor* tp,
-                               uint64_t upid,
-                               uint64_t ts,
-                               const char* heap_name) {
-    std::set<int64_t> seen_frames;
-    std::set<int64_t> seen_mappings;
-    std::set<int64_t> seen_symbol_ids;
-
-    std::vector<Iterator> view_its =
-        BuildViewIterators(tp, upid, ts, heap_name);
-
-    WriteSampleTypes();
-    if (!WriteAllocations(&view_its, &seen_frames))
-      return {};
-    if (!WriteFrames(tp, seen_frames, &seen_mappings, &seen_symbol_ids))
-      return {};
-    if (!WriteMappings(tp, seen_mappings))
-      return {};
-    if (!WriteSymbols(tp, seen_symbol_ids))
-      return {};
-    return result_.SerializeAsString();
-  }
-
   const std::vector<int64_t>& FramesForCallstack(int64_t callstack_id) {
     size_t callsite_idx = static_cast<size_t>(callstack_id);
     PERFETTO_CHECK(callstack_id >= 0 &&
@@ -501,7 +408,6 @@
     return it->second;
   }
 
- private:
   protozero::HeapBuffered<third_party::perftools::profiles::pbzero::Profile>
       result_;
   std::map<std::string, int64_t> string_table_;
@@ -509,54 +415,183 @@
   const std::map<int64_t, std::vector<Line>>& symbol_set_id_to_lines_;
   const std::vector<Line> empty_line_vector_;
   int64_t max_symbol_id_;
+
+  std::set<int64_t> seen_frames_;
 };
 
 }  // namespace
 
-bool TraceToPprof(trace_processor::TraceProcessor* tp,
-                  std::vector<SerializedProfile>* output,
-                  uint64_t pid,
-                  const std::vector<uint64_t>& timestamps) {
-  auto max_symbol_id_it =
-      tp->ExecuteQuery("SELECT MAX(id) from stack_profile_symbol");
-  if (!max_symbol_id_it.Next()) {
-    PERFETTO_DFATAL_OR_ELOG("Failed to get max symbol set id: %s",
-                            max_symbol_id_it.Status().message().c_str());
-    return false;
+namespace heap_profile {
+struct View {
+  const char* type;
+  const char* unit;
+  const char* aggregator;
+  const char* filter;
+};
+const View kSpaceView{"space", "bytes", "SUM(size)", nullptr};
+const View kAllocSpaceView{"alloc_space", "bytes", "SUM(size)", "size >= 0"};
+const View kAllocObjectsView{"alloc_objects", "count", "sum(count)",
+                             "size >= 0"};
+const View kObjectsView{"objects", "count", "SUM(count)", nullptr};
+
+const View kViews[] = {kAllocObjectsView, kObjectsView, kAllocSpaceView,
+                       kSpaceView};
+
+static bool VerifyPIDStats(trace_processor::TraceProcessor* tp, uint64_t pid) {
+  bool success = true;
+  base::Optional<int64_t> stat =
+      GetStatsEntry(tp, "heapprofd_buffer_corrupted", base::make_optional(pid));
+  if (!stat.has_value()) {
+    PERFETTO_DFATAL_OR_ELOG("Failed to get heapprofd_buffer_corrupted stat");
+  } else if (stat.value() > 0) {
+    success = false;
+    PERFETTO_ELOG("WARNING: The profile for %" PRIu64
+                  " ended early due to a buffer corruption."
+                  " THIS IS ALWAYS A BUG IN HEAPPROFD OR"
+                  " CLIENT MEMORY CORRUPTION.",
+                  pid);
+  }
+  stat =
+      GetStatsEntry(tp, "heapprofd_buffer_overran", base::make_optional(pid));
+  if (!stat.has_value()) {
+    PERFETTO_DFATAL_OR_ELOG("Failed to get heapprofd_buffer_overran stat");
+  } else if (stat.value() > 0) {
+    success = false;
+    PERFETTO_ELOG("WARNING: The profile for %" PRIu64
+                  " ended early due to a buffer overrun.",
+                  pid);
   }
 
-  int64_t max_symbol_id = max_symbol_id_it.Get(0).AsLong();
+  stat = GetStatsEntry(tp, "heapprofd_rejected_concurrent", pid);
+  if (!stat.has_value()) {
+    PERFETTO_DFATAL_OR_ELOG("Failed to get heapprofd_rejected_concurrent stat");
+  } else if (stat.value() > 0) {
+    success = false;
+    PERFETTO_ELOG("WARNING: The profile for %" PRIu64
+                  " was rejected due to a concurrent profile.",
+                  pid);
+  }
+  return success;
+}
+
+static std::vector<Iterator> BuildViewIterators(
+    trace_processor::TraceProcessor* tp,
+    uint64_t upid,
+    uint64_t ts,
+    const char* heap_name) {
+  std::vector<Iterator> view_its;
+  for (size_t i = 0; i < base::ArraySize(kViews); ++i) {
+    const View& v = kViews[i];
+    std::string query = "SELECT hpa.callsite_id ";
+    query +=
+        ", " + std::string(v.aggregator) + " FROM heap_profile_allocation hpa ";
+    // TODO(fmayer): Figure out where negative callsite_id comes from.
+    query += "WHERE hpa.callsite_id >= 0 ";
+    query += "AND hpa.upid = " + std::to_string(upid) + " ";
+    query += "AND hpa.ts <= " + std::to_string(ts) + " ";
+    query += "AND hpa.heap_name = '" + std::string(heap_name) + "' ";
+    if (v.filter)
+      query += "AND " + std::string(v.filter) + " ";
+    query += "GROUP BY hpa.callsite_id;";
+    view_its.emplace_back(tp->ExecuteQuery(query));
+  }
+  return view_its;
+}
+
+static bool WriteAllocations(GProfileBuilder* builder,
+                             std::vector<Iterator>* view_its) {
+  for (;;) {
+    bool all_next = true;
+    bool any_next = false;
+    for (size_t i = 0; i < base::ArraySize(kViews); ++i) {
+      Iterator& it = (*view_its)[i];
+      bool next = it.Next();
+      if (!it.Status().ok()) {
+        PERFETTO_DFATAL_OR_ELOG("Invalid view iterator: %s",
+                                it.Status().message().c_str());
+        return false;
+      }
+      all_next = all_next && next;
+      any_next = any_next || next;
+    }
+
+    if (!all_next) {
+      PERFETTO_CHECK(!any_next);
+      break;
+    }
+
+    protozero::PackedVarInt sample_values;
+    int64_t callstack_id = -1;
+    for (size_t i = 0; i < base::ArraySize(kViews); ++i) {
+      if (i == 0) {
+        callstack_id = (*view_its)[i].Get(0).AsLong();
+      } else if (callstack_id != (*view_its)[i].Get(0).AsLong()) {
+        PERFETTO_DFATAL_OR_ELOG("Wrong callstack.");
+        return false;
+      }
+      sample_values.Append((*view_its)[i].Get(1).AsLong());
+    }
+
+    if (!builder->AddSample(sample_values, callstack_id))
+      return false;
+  }
+  return true;
+}
+
+static bool TraceToHeapPprof(trace_processor::TraceProcessor* tp,
+                             std::vector<SerializedProfile>* output,
+                             uint64_t target_pid,
+                             const std::vector<uint64_t>& target_timestamps) {
   const auto callsite_to_frames = GetCallsiteToFrames(tp);
   const auto symbol_set_id_to_lines = GetSymbolSetIdToLines(tp);
+  base::Optional<int64_t> max_symbol_id = GetMaxSymbolId(tp);
+  if (!max_symbol_id.has_value())
+    return false;
 
   bool any_fail = false;
-  Iterator it = tp->ExecuteQuery(kQueryProfiles);
+  Iterator it = tp->ExecuteQuery(
+      "select distinct hpa.upid, hpa.ts, p.pid, hpa.heap_name "
+      "from heap_profile_allocation hpa, "
+      "process p where p.upid = hpa.upid;");
   while (it.Next()) {
     GProfileBuilder builder(callsite_to_frames, symbol_set_id_to_lines,
-                            max_symbol_id);
+                            max_symbol_id.value());
     uint64_t upid = static_cast<uint64_t>(it.Get(0).AsLong());
     uint64_t ts = static_cast<uint64_t>(it.Get(1).AsLong());
     uint64_t profile_pid = static_cast<uint64_t>(it.Get(2).AsLong());
     const char* heap_name = it.Get(3).AsString();
-    if ((pid > 0 && profile_pid != pid) ||
-        (!timestamps.empty() && std::find(timestamps.begin(), timestamps.end(),
-                                          ts) == timestamps.end())) {
+    if ((target_pid > 0 && profile_pid != target_pid) ||
+        (!target_timestamps.empty() &&
+         std::find(target_timestamps.begin(), target_timestamps.end(), ts) ==
+             target_timestamps.end())) {
       continue;
     }
 
-    if (!VerifyPIDStats(tp, pid))
+    if (!VerifyPIDStats(tp, profile_pid))
       any_fail = true;
 
-    std::string pid_query = "select pid from process where upid = ";
-    pid_query += std::to_string(upid) + ";";
-    Iterator pid_it = tp->ExecuteQuery(pid_query);
-    PERFETTO_CHECK(pid_it.Next());
+    std::vector<std::pair<std::string, std::string>> sample_types;
+    for (size_t i = 0; i < base::ArraySize(kViews); ++i) {
+      sample_types.emplace_back(std::string(kViews[i].type),
+                                std::string(kViews[i].unit));
+    }
+    builder.WriteSampleTypes(sample_types);
 
-    std::string profile_proto =
-        builder.GenerateGProfile(tp, upid, ts, heap_name);
+    std::vector<Iterator> view_its =
+        BuildViewIterators(tp, upid, ts, heap_name);
+    std::string profile_proto;
+    if (WriteAllocations(&builder, &view_its)) {
+      profile_proto = builder.CompleteProfile(tp);
+    }
     output->emplace_back(
-        SerializedProfile{static_cast<uint64_t>(pid_it.Get(0).AsLong()),
-                          heap_name, profile_proto});
+        SerializedProfile{ProfileType::kHeapProfile, profile_pid,
+                          std::move(profile_proto), heap_name});
+  }
+
+  if (!it.Status().ok()) {
+    PERFETTO_DFATAL_OR_ELOG("Invalid iterator: %s",
+                            it.Status().message().c_str());
+    return false;
   }
   if (any_fail) {
     PERFETTO_ELOG(
@@ -564,13 +599,149 @@
         "https://perfetto.dev/docs/data-sources/"
         "native-heap-profiler#troubleshooting");
   }
+  return true;
+}
+}  // namespace heap_profile
+
+namespace perf_profile {
+struct ProcessInfo {
+  uint64_t pid;
+  std::vector<uint64_t> utids;
+};
+
+// Returns a map of upid -> {pid, utids[]} for sampled processes.
+static std::map<uint64_t, ProcessInfo> GetProcessMap(
+    trace_processor::TraceProcessor* tp) {
+  Iterator it = tp->ExecuteQuery(
+      "select distinct process.upid, process.pid, thread.utid from perf_sample "
+      "join thread using (utid) join process using (upid) order by "
+      "process.upid asc");
+  std::map<uint64_t, ProcessInfo> process_map;
+  while (it.Next()) {
+    uint64_t upid = static_cast<uint64_t>(it.Get(0).AsLong());
+    uint64_t pid = static_cast<uint64_t>(it.Get(1).AsLong());
+    uint64_t utid = static_cast<uint64_t>(it.Get(2).AsLong());
+    process_map[upid].pid = pid;
+    process_map[upid].utids.push_back(utid);
+  }
   if (!it.Status().ok()) {
     PERFETTO_DFATAL_OR_ELOG("Invalid iterator: %s",
                             it.Status().message().c_str());
+    return {};
+  }
+  return process_map;
+}
+
+static void LogTracePerfEventIssues(trace_processor::TraceProcessor* tp) {
+  base::Optional<int64_t> stat = GetStatsEntry(tp, "perf_samples_skipped");
+  if (!stat.has_value()) {
+    PERFETTO_DFATAL_OR_ELOG("Failed to look up perf_samples_skipped stat");
+  } else if (stat.value() > 0) {
+    PERFETTO_ELOG(
+        "Warning: the trace recorded %" PRIi64
+        " skipped samples, which otherwise matched the tracing config. This "
+        "would cause a process to be completely absent from the trace, but "
+        "does *not* imply data loss in any of the output profiles.",
+        stat.value());
+  }
+
+  stat = GetStatsEntry(tp, "perf_samples_skipped_dataloss");
+  if (!stat.has_value()) {
+    PERFETTO_DFATAL_OR_ELOG(
+        "Failed to look up perf_samples_skipped_dataloss stat");
+  } else if (stat.value() > 0) {
+    PERFETTO_ELOG("DATA LOSS: the trace recorded %" PRIi64
+                  " lost perf samples (within traced_perf). This means that "
+                  "the trace is missing information, but it is not known "
+                  "which profile that affected.",
+                  stat.value());
+  }
+
+  // Check if any per-cpu ringbuffers encountered dataloss (as recorded by the
+  // kernel).
+  Iterator it = tp->ExecuteQuery(
+      "select idx, value from stats where name == 'perf_cpu_lost_records' and "
+      "value > 0 order by idx asc");
+  while (it.Next()) {
+    PERFETTO_ELOG(
+        "DATA LOSS: during the trace, the per-cpu kernel ring buffer for cpu "
+        "%" PRIi64 " recorded %" PRIi64
+        " lost samples. This means that the trace is missing information, "
+        "but it is not known which profile that affected.",
+        static_cast<int64_t>(it.Get(0).AsLong()),
+        static_cast<int64_t>(it.Get(1).AsLong()));
+  }
+  if (!it.Status().ok()) {
+    PERFETTO_DFATAL_OR_ELOG("Invalid iterator: %s",
+                            it.Status().message().c_str());
+  }
+}
+
+// TODO(rsavitski): decide whether errors in |AddSample| should result in an
+// empty profile (and/or whether they should make the overall conversion
+// unsuccessful). Furthermore, clarify the return value's semantics for both
+// perf and heap profiles.
+static bool TraceToPerfPprof(trace_processor::TraceProcessor* tp,
+                             std::vector<SerializedProfile>* output,
+                             uint64_t target_pid) {
+  const auto callsite_to_frames = GetCallsiteToFrames(tp);
+  const auto symbol_set_id_to_lines = GetSymbolSetIdToLines(tp);
+  base::Optional<int64_t> max_symbol_id = GetMaxSymbolId(tp);
+  if (!max_symbol_id.has_value())
     return false;
+
+  LogTracePerfEventIssues(tp);
+
+  // Aggregate samples by upid when building profiles.
+  std::map<uint64_t, ProcessInfo> process_map = GetProcessMap(tp);
+  for (const auto& p : process_map) {
+    const ProcessInfo& process = p.second;
+
+    if (target_pid != 0 && process.pid != target_pid)
+      continue;
+
+    GProfileBuilder builder(callsite_to_frames, symbol_set_id_to_lines,
+                            max_symbol_id.value());
+
+    builder.WriteSampleTypes({{"samples", "count"}});
+
+    std::string query = "select callsite_id from perf_sample where utid in (" +
+                        AsCsvString(process.utids) + ") order by ts asc;";
+
+    protozero::PackedVarInt single_count_value;
+    single_count_value.Append(1);
+
+    Iterator it = tp->ExecuteQuery(query);
+    while (it.Next()) {
+      int64_t callsite_id = static_cast<int64_t>(it.Get(0).AsLong());
+      builder.AddSample(single_count_value, callsite_id);
+    }
+    if (!it.Status().ok()) {
+      PERFETTO_DFATAL_OR_ELOG("Failed to iterate over samples.");
+      return false;
+    }
+
+    std::string profile_proto = builder.CompleteProfile(tp);
+    output->emplace_back(SerializedProfile{
+        ProfileType::kPerfProfile, process.pid, std::move(profile_proto), ""});
   }
   return true;
 }
+}  // namespace perf_profile
+
+bool TraceToPprof(trace_processor::TraceProcessor* tp,
+                  std::vector<SerializedProfile>* output,
+                  ConversionMode mode,
+                  uint64_t pid,
+                  const std::vector<uint64_t>& timestamps) {
+  switch (mode) {
+    case (ConversionMode::kHeapProfile):
+      return heap_profile::TraceToHeapPprof(tp, output, pid, timestamps);
+    case (ConversionMode::kPerfProfile):
+      return perf_profile::TraceToPerfPprof(tp, output, pid);
+  }
+  PERFETTO_FATAL("unknown conversion option");  // for gcc
+}
 
 }  // namespace trace_to_text
 }  // namespace perfetto
diff --git a/tools/trace_to_text/trace_to_profile.cc b/tools/trace_to_text/trace_to_profile.cc
index ce0f6d8..5a5d261 100644
--- a/tools/trace_to_text/trace_to_profile.cc
+++ b/tools/trace_to_text/trace_to_profile.cc
@@ -33,7 +33,6 @@
 #include "src/profiling/symbolizer/symbolizer.h"
 
 namespace {
-
 constexpr const char* kDefaultTmp = "/tmp";
 
 std::string GetTemp() {
@@ -44,7 +43,6 @@
     return tmp;
   return kDefaultTmp;
 }
-
 }  // namespace
 
 namespace perfetto {
@@ -76,37 +74,36 @@
   tp->NotifyEndOfFile();
 }
 
-}  // namespace
-
-int TraceToProfile(std::istream* input,
-                   std::ostream* output,
-                   uint64_t pid,
-                   std::vector<uint64_t> timestamps) {
+int TraceToProfile(
+    std::istream* input,
+    std::ostream* output,
+    uint64_t pid,
+    std::vector<uint64_t> timestamps,
+    ConversionMode conversion_mode,
+    std::string dirname_prefix,
+    std::function<std::string(const SerializedProfile&)> filename_fn) {
   std::vector<SerializedProfile> profiles;
   trace_processor::Config config;
   std::unique_ptr<trace_processor::TraceProcessor> tp =
       trace_processor::TraceProcessor::CreateInstance(config);
 
   if (!ReadTrace(tp.get(), input))
-    return false;
+    return -1;
 
   tp->NotifyEndOfFile();
   MaybeSymbolize(tp.get());
   MaybeDeobfuscate(tp.get());
 
-  TraceToPprof(tp.get(), &profiles, pid, timestamps);
+  TraceToPprof(tp.get(), &profiles, conversion_mode, pid, timestamps);
   if (profiles.empty()) {
     return 0;
   }
 
   std::string temp_dir =
-      GetTemp() + "/heap_profile-" + base::GetTimeFmt("%y%m%d%H%M%S");
+      GetTemp() + "/" + dirname_prefix + base::GetTimeFmt("%y%m%d%H%M%S");
   PERFETTO_CHECK(base::Mkdir(temp_dir));
-  size_t itr = 0;
   for (const auto& profile : profiles) {
-    std::string filename = temp_dir + "/heap_dump." + std::to_string(++itr) +
-                           "." + std::to_string(profile.pid) + "." +
-                           profile.heap_name + ".pb";
+    std::string filename = temp_dir + "/" + filename_fn(profile);
     base::ScopedFile fd(base::OpenFile(filename, O_CREAT | O_WRONLY, 0700));
     if (!fd)
       PERFETTO_FATAL("Failed to open %s", filename.c_str());
@@ -118,5 +115,37 @@
   return 0;
 }
 
+}  // namespace
+
+int TraceToHeapProfile(std::istream* input,
+                       std::ostream* output,
+                       uint64_t pid,
+                       std::vector<uint64_t> timestamps) {
+  int file_idx = 0;
+  auto filename_fn = [&file_idx](const SerializedProfile& profile) {
+    return "heap_dump." + std::to_string(++file_idx) + "." +
+           std::to_string(profile.pid) + "." + profile.heap_name + ".pb";
+  };
+
+  return TraceToProfile(input, output, pid, timestamps,
+                        ConversionMode::kHeapProfile, "heap_profile-",
+                        filename_fn);
+}
+
+int TraceToPerfProfile(std::istream* input,
+                       std::ostream* output,
+                       uint64_t pid,
+                       std::vector<uint64_t> timestamps) {
+  int file_idx = 0;
+  auto filename_fn = [&file_idx](const SerializedProfile& profile) {
+    return "profile." + std::to_string(++file_idx) + ".pid." +
+           std::to_string(profile.pid) + ".pb";
+  };
+
+  return TraceToProfile(input, output, pid, timestamps,
+                        ConversionMode::kPerfProfile, "perf_profile-",
+                        filename_fn);
+}
+
 }  // namespace trace_to_text
 }  // namespace perfetto
diff --git a/tools/trace_to_text/trace_to_profile.h b/tools/trace_to_text/trace_to_profile.h
index 629d3ef..1c41aad 100644
--- a/tools/trace_to_text/trace_to_profile.h
+++ b/tools/trace_to_text/trace_to_profile.h
@@ -23,10 +23,17 @@
 namespace perfetto {
 namespace trace_to_text {
 
-int TraceToProfile(std::istream* input,
-                   std::ostream* output,
-                   uint64_t pid = 0,
-                   std::vector<uint64_t> timestamps = {});
+// 0: success
+int TraceToHeapProfile(std::istream* input,
+                       std::ostream* output,
+                       uint64_t pid,
+                       std::vector<uint64_t> timestamps);
+
+// 0: success
+int TraceToPerfProfile(std::istream* input,
+                       std::ostream* output,
+                       uint64_t pid,
+                       std::vector<uint64_t> timestamps);
 
 }  // namespace trace_to_text
 }  // namespace perfetto