More advanced timing loggers.

The new timing loggers have lower overhead since they only push into
a vector. The new format has two types, a start timing and a stop
timing. You can thing of these as brackets associated with a
timestamp. It uses these to construct various statistics when needed,
such as: Total time, exclusive time, and nesting depth.

Changed PrettyDuration to have a default of 3 digits after the decimal
point.

Exaple of a GC dump with exclusive / total times and indenting:
I/art     (23546): GC iteration timing logger [Exclusive time] [Total time]
I/art     (23546):   0ms InitializePhase
I/art     (23546):   0.305ms/167.746ms MarkingPhase
I/art     (23546):     0ms BindBitmaps
I/art     (23546):     0ms FindDefaultSpaceBitmap
I/art     (23546):     0ms/1.709ms ProcessCards
I/art     (23546):       0.183ms ImageModUnionClearCards
I/art     (23546):       0.916ms ZygoteModUnionClearCards
I/art     (23546):       0.610ms AllocSpaceClearCards
I/art     (23546):       1.373ms AllocSpaceClearCards
I/art     (23546):     0.305ms/6.318ms MarkRoots
I/art     (23546):       2.106ms MarkRootsCheckpoint
I/art     (23546):       0.153ms MarkNonThreadRoots
I/art     (23546):       4.287ms MarkConcurrentRoots
I/art     (23546):     43.461ms UpdateAndMarkImageModUnionTable
I/art     (23546):     0ms/112.712ms RecursiveMark
I/art     (23546):       112.712ms ProcessMarkStack
I/art     (23546):     0.610ms/2.777ms PreCleanCards
I/art     (23546):       0.305ms/0.855ms ProcessCards
I/art     (23546):         0.153ms ImageModUnionClearCards
I/art     (23546):         0.610ms ZygoteModUnionClearCards
I/art     (23546):         0.610ms AllocSpaceClearCards
I/art     (23546):         0.549ms AllocSpaceClearCards
I/art     (23546):       0.549ms MarkRootsCheckpoint
I/art     (23546):       0.610ms MarkNonThreadRoots
I/art     (23546):       0ms MarkConcurrentRoots
I/art     (23546):       0.610ms ScanGrayImageSpaceObjects
I/art     (23546):       0.305ms ScanGrayZygoteSpaceObjects
I/art     (23546):       0.305ms ScanGrayAllocSpaceObjects
I/art     (23546):       1.129ms ScanGrayAllocSpaceObjects
I/art     (23546):       0ms ProcessMarkStack
I/art     (23546):   0ms/0.977ms (Paused)PausePhase
I/art     (23546):     0.244ms ReMarkRoots
I/art     (23546):     0.672ms (Paused)ScanGrayObjects
I/art     (23546):     0ms (Paused)ProcessMarkStack
I/art     (23546):     0ms/0.610ms SwapStacks
I/art     (23546):       0.610ms RevokeAllThreadLocalAllocationStacks
I/art     (23546):     0ms PreSweepingGcVerification
I/art     (23546):   0ms/10.621ms ReclaimPhase
I/art     (23546):     0.610ms/0.702ms ProcessReferences
I/art     (23546):       0.214ms/0.641ms EnqueueFinalizerReferences
I/art     (23546):         0.427ms ProcessMarkStack
I/art     (23546):     0.488ms SweepSystemWeaks
I/art     (23546):     0.824ms/9.400ms Sweep
I/art     (23546):       0ms SweepMallocSpace
I/art     (23546):       0.214ms SweepZygoteSpace
I/art     (23546):       0.122ms SweepMallocSpace
I/art     (23546):       6.226ms SweepMallocSpace
I/art     (23546):       0ms SweepMallocSpace
I/art     (23546):       2.144ms SweepLargeObjects
I/art     (23546):     0.305ms SwapBitmaps
I/art     (23546):     0ms UnBindBitmaps
I/art     (23546):   0.275ms FinishPhase
I/art     (23546): GC iteration timing logger: end, 178.971ms

Change-Id: Ia55b65609468f212b3cd65cda66b843da42be645
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h
index 5050d4e..45cf2fb 100644
--- a/compiler/common_compiler_test.h
+++ b/compiler/common_compiler_test.h
@@ -371,10 +371,10 @@
   void CompileMethod(mirror::ArtMethod* method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     CHECK(method != nullptr);
     TimingLogger timings("CommonTest::CompileMethod", false, false);
-    timings.StartSplit("CompileOne");
+    TimingLogger::ScopedTiming t(__FUNCTION__, &timings);
     compiler_driver_->CompileOne(method, &timings);
+    TimingLogger::ScopedTiming t2("MakeExecutable", &timings);
     MakeExecutable(method);
-    timings.EndSplit();
   }
 
   void CompileDirectMethod(Handle<mirror::ClassLoader> class_loader, const char* class_name,
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index 0845656..72990b4 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -114,19 +114,20 @@
 
 void CompilationUnit::StartTimingSplit(const char* label) {
   if (compiler_driver->GetDumpPasses()) {
-    timings.StartSplit(label);
+    timings.StartTiming(label);
   }
 }
 
 void CompilationUnit::NewTimingSplit(const char* label) {
   if (compiler_driver->GetDumpPasses()) {
-    timings.NewSplit(label);
+    timings.EndTiming();
+    timings.StartTiming(label);
   }
 }
 
 void CompilationUnit::EndTiming() {
   if (compiler_driver->GetDumpPasses()) {
-    timings.EndSplit();
+    timings.EndTiming();
     if (enable_debug & (1 << kDebugTimings)) {
       LOG(INFO) << "TIMINGS " << PrettyMethod(method_idx, *dex_file);
       LOG(INFO) << Dumpable<TimingLogger>(timings);
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 4a331fc..96625c5 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -697,7 +697,7 @@
     return;
   }
 
-  timings->NewSplit("LoadImageClasses");
+  TimingLogger::ScopedTiming t("LoadImageClasses", timings);
   // Make a first class to load all classes explicitly listed in the file
   Thread* self = Thread::Current();
   ScopedObjectAccess soa(self);
@@ -794,8 +794,7 @@
 
 void CompilerDriver::UpdateImageClasses(TimingLogger* timings) {
   if (IsImage()) {
-    timings->NewSplit("UpdateImageClasses");
-
+    TimingLogger::ScopedTiming t("UpdateImageClasses", timings);
     // Update image_classes_ with classes for objects created by <clinit> methods.
     Thread* self = Thread::Current();
     const char* old_cause = self->StartAssertNoThreadSuspension("ImageWriter");
@@ -1606,11 +1605,11 @@
   if (IsImage()) {
     // For images we resolve all types, such as array, whereas for applications just those with
     // classdefs are resolved by ResolveClassFieldsAndMethods.
-    timings->NewSplit("Resolve Types");
+    TimingLogger::ScopedTiming t("Resolve Types", timings);
     context.ForAll(0, dex_file.NumTypeIds(), ResolveType, thread_count_);
   }
 
-  timings->NewSplit("Resolve MethodsAndFields");
+  TimingLogger::ScopedTiming t("Resolve MethodsAndFields", timings);
   context.ForAll(0, dex_file.NumClassDefs(), ResolveClassFieldsAndMethods, thread_count_);
 }
 
@@ -1672,7 +1671,7 @@
 
 void CompilerDriver::VerifyDexFile(jobject class_loader, const DexFile& dex_file,
                                    ThreadPool* thread_pool, TimingLogger* timings) {
-  timings->NewSplit("Verify Dex File");
+  TimingLogger::ScopedTiming t("Verify Dex File", timings);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, thread_pool);
   context.ForAll(0, dex_file.NumClassDefs(), VerifyClass, thread_count_);
@@ -1765,7 +1764,7 @@
 
 void CompilerDriver::InitializeClasses(jobject jni_class_loader, const DexFile& dex_file,
                                        ThreadPool* thread_pool, TimingLogger* timings) {
-  timings->NewSplit("InitializeNoClinit");
+  TimingLogger::ScopedTiming t("InitializeNoClinit", timings);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   ParallelCompilationManager context(class_linker, jni_class_loader, this, &dex_file, thread_pool);
   size_t thread_count;
@@ -1877,7 +1876,7 @@
 
 void CompilerDriver::CompileDexFile(jobject class_loader, const DexFile& dex_file,
                                     ThreadPool* thread_pool, TimingLogger* timings) {
-  timings->NewSplit("Compile Dex File");
+  TimingLogger::ScopedTiming t("Compile Dex File", timings);
   ParallelCompilationManager context(Runtime::Current()->GetClassLinker(), class_loader, this,
                                      &dex_file, thread_pool);
   context.ForAll(0, dex_file.NumClassDefs(), CompilerDriver::CompileClass, thread_count_);
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index ca956aa..5325a68 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -38,12 +38,12 @@
  protected:
   void CompileAll(jobject class_loader) LOCKS_EXCLUDED(Locks::mutator_lock_) {
     TimingLogger timings("CompilerDriverTest::CompileAll", false, false);
-    timings.StartSplit("CompileAll");
+    TimingLogger::ScopedTiming t(__FUNCTION__, &timings);
     compiler_driver_->CompileAll(class_loader,
                                  Runtime::Current()->GetCompileTimeClassPath(class_loader),
                                  &timings);
+    t.NewTiming("MakeAllExecutable");
     MakeAllExecutable(class_loader);
-    timings.EndSplit();
   }
 
   void EnsureCompiled(jobject class_loader, const char* class_name, const char* method,
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 92be147..e8bbaef 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -64,7 +64,7 @@
       jobject class_loader = NULL;
       ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
       TimingLogger timings("ImageTest::WriteRead", false, false);
-      timings.StartSplit("CompileAll");
+      TimingLogger::ScopedTiming t("CompileAll", &timings);
       if (kUsePortableCompiler) {
         // TODO: we disable this for portable so the test executes in a reasonable amount of time.
         //       We shouldn't need to do this.
@@ -75,6 +75,7 @@
       }
       compiler_driver_->CompileAll(class_loader, class_linker->GetBootClassPath(), &timings);
 
+      t.NewTiming("WriteElf");
       ScopedObjectAccess soa(Thread::Current());
       OatWriter oat_writer(class_linker->GetBootClassPath(),
                            0, 0, "", compiler_driver_.get(), &timings);
@@ -84,7 +85,6 @@
                                                 &oat_writer,
                                                 oat_file.GetFile());
       ASSERT_TRUE(success);
-      timings.EndSplit();
     }
   }
   // Workound bug that mcld::Linker::emit closes oat_file by reopening as dup_oat.
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 4590880..e1b6992 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -91,31 +91,31 @@
     size_oat_class_method_offsets_(0) {
   size_t offset;
   {
-    TimingLogger::ScopedSplit split("InitOatHeader", timings);
+    TimingLogger::ScopedTiming split("InitOatHeader", timings);
     offset = InitOatHeader();
   }
   {
-    TimingLogger::ScopedSplit split("InitOatDexFiles", timings);
+    TimingLogger::ScopedTiming split("InitOatDexFiles", timings);
     offset = InitOatDexFiles(offset);
   }
   {
-    TimingLogger::ScopedSplit split("InitDexFiles", timings);
+    TimingLogger::ScopedTiming split("InitDexFiles", timings);
     offset = InitDexFiles(offset);
   }
   {
-    TimingLogger::ScopedSplit split("InitOatClasses", timings);
+    TimingLogger::ScopedTiming split("InitOatClasses", timings);
     offset = InitOatClasses(offset);
   }
   {
-    TimingLogger::ScopedSplit split("InitOatMaps", timings);
+    TimingLogger::ScopedTiming split("InitOatMaps", timings);
     offset = InitOatMaps(offset);
   }
   {
-    TimingLogger::ScopedSplit split("InitOatCode", timings);
+    TimingLogger::ScopedTiming split("InitOatCode", timings);
     offset = InitOatCode(offset);
   }
   {
-    TimingLogger::ScopedSplit split("InitOatCodeDexFiles", timings);
+    TimingLogger::ScopedTiming split("InitOatCodeDexFiles", timings);
     offset = InitOatCodeDexFiles(offset);
   }
   size_ = offset;
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 4be6d87..2d55140 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -367,12 +367,12 @@
 
     driver->CompileAll(class_loader, dex_files, &timings);
 
-    timings.NewSplit("dex2oat OatWriter");
+    TimingLogger::ScopedTiming t2("dex2oat OatWriter", &timings);
     std::string image_file_location;
     uint32_t image_file_location_oat_checksum = 0;
     uintptr_t image_file_location_oat_data_begin = 0;
     if (!driver->IsImage()) {
-      TimingLogger::ScopedSplit split("Loading image checksum", &timings);
+      TimingLogger::ScopedTiming t3("Loading image checksum", &timings);
       gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetImageSpace();
       image_file_location_oat_checksum = image_space->GetImageHeader().GetOatChecksum();
       image_file_location_oat_data_begin =
@@ -380,14 +380,13 @@
       image_file_location = image_space->GetImageFilename();
     }
 
-    OatWriter oat_writer(dex_files,
-                         image_file_location_oat_checksum,
+    OatWriter oat_writer(dex_files, image_file_location_oat_checksum,
                          image_file_location_oat_data_begin,
                          image_file_location,
                          driver.get(),
                          &timings);
 
-    TimingLogger::ScopedSplit split("Writing ELF", &timings);
+    t2.NewTiming("Writing ELF");
     if (!driver->WriteElf(android_root, is_host, dex_files, &oat_writer, oat_file)) {
       LOG(ERROR) << "Failed to write ELF file " << oat_file->GetPath();
       return nullptr;
@@ -1213,7 +1212,7 @@
     return EXIT_FAILURE;
   }
 
-  timings.StartSplit("dex2oat Setup");
+  timings.StartTiming("dex2oat Setup");
   LOG(INFO) << CommandLine();
 
   Runtime::Options runtime_options;
@@ -1448,7 +1447,7 @@
   // Elf32_Phdr.p_vaddr values by the desired base address.
   //
   if (image) {
-    timings.NewSplit("dex2oat ImageWriter");
+    TimingLogger::ScopedTiming t("dex2oat ImageWriter", &timings);
     bool image_creation_success = dex2oat->CreateImageFile(image_filename,
                                                            image_base,
                                                            oat_unstripped,
@@ -1461,7 +1460,7 @@
   }
 
   if (is_host) {
-    timings.EndSplit();
+    timings.EndTiming();
     if (dump_timing || (dump_slow_timing && timings.GetTotalNs() > MsToNs(1000))) {
       LOG(INFO) << Dumpable<TimingLogger>(timings);
     }
@@ -1474,7 +1473,7 @@
   // If we don't want to strip in place, copy from unstripped location to stripped location.
   // We need to strip after image creation because FixupElf needs to use .strtab.
   if (oat_unstripped != oat_stripped) {
-    timings.NewSplit("dex2oat OatFile copy");
+    TimingLogger::ScopedTiming t("dex2oat OatFile copy", &timings);
     oat_file.reset();
      std::unique_ptr<File> in(OS::OpenFileForReading(oat_unstripped.c_str()));
     std::unique_ptr<File> out(OS::CreateEmptyFile(oat_stripped.c_str()));
@@ -1509,7 +1508,7 @@
   }
 #endif  // ART_USE_PORTABLE_COMPILER
 
-  timings.EndSplit();
+  timings.EndTiming();
 
   if (dump_timing || (dump_slow_timing && timings.GetTotalNs() > MsToNs(1000))) {
     LOG(INFO) << Dumpable<TimingLogger>(timings);
diff --git a/runtime/base/histogram-inl.h b/runtime/base/histogram-inl.h
index 7c09999..4c18ce4 100644
--- a/runtime/base/histogram-inl.h
+++ b/runtime/base/histogram-inl.h
@@ -164,18 +164,18 @@
 template <class Value>
 inline void Histogram<Value>::PrintConfidenceIntervals(std::ostream &os, double interval,
                                                        const CumulativeData& data) const {
+  static constexpr size_t kFractionalDigits = 3;
   DCHECK_GT(interval, 0);
   DCHECK_LT(interval, 1.0);
-
-  double per_0 = (1.0 - interval) / 2.0;
-  double per_1 = per_0 + interval;
-  TimeUnit unit = GetAppropriateTimeUnit(Mean() * kAdjust);
-  os << Name() << ":\tSum: ";
-  os << PrettyDuration(Sum() * kAdjust) << " ";
-  os << (interval * 100) << "% C.I. " << FormatDuration(Percentile(per_0, data) * kAdjust, unit);
-  os << "-" << FormatDuration(Percentile(per_1, data) * kAdjust, unit) << " ";
-  os << "Avg: " << FormatDuration(Mean() * kAdjust, unit) << " Max: ";
-  os << FormatDuration(Max() * kAdjust, unit) << "\n";
+  const double per_0 = (1.0 - interval) / 2.0;
+  const double per_1 = per_0 + interval;
+  const TimeUnit unit = GetAppropriateTimeUnit(Mean() * kAdjust);
+  os << Name() << ":\tSum: " << PrettyDuration(Sum() * kAdjust) << " "
+     << (interval * 100) << "% C.I. " << FormatDuration(Percentile(per_0, data) * kAdjust, unit,
+                                                        kFractionalDigits)
+     << "-" << FormatDuration(Percentile(per_1, data) * kAdjust, unit, kFractionalDigits) << " "
+     << "Avg: " << FormatDuration(Mean() * kAdjust, unit, kFractionalDigits) << " Max: "
+     << FormatDuration(Max() * kAdjust, unit, kFractionalDigits) << "\n";
 }
 
 template <class Value>
diff --git a/runtime/base/timing_logger.cc b/runtime/base/timing_logger.cc
index a155002..b6a2aaf 100644
--- a/runtime/base/timing_logger.cc
+++ b/runtime/base/timing_logger.cc
@@ -33,6 +33,8 @@
 
 constexpr size_t CumulativeLogger::kLowMemoryBucketCount;
 constexpr size_t CumulativeLogger::kDefaultBucketCount;
+constexpr size_t TimingLogger::kIndexNotFound;
+
 CumulativeLogger::CumulativeLogger(const std::string& name)
     : name_(name),
       lock_name_("CumulativeLoggerLock" + name),
@@ -66,10 +68,12 @@
 
 void CumulativeLogger::AddLogger(const TimingLogger &logger) {
   MutexLock mu(Thread::Current(), lock_);
-  for (const TimingLogger::SplitTiming& split : logger.GetSplits()) {
-    uint64_t split_time = split.first;
-    const char* split_name = split.second;
-    AddPair(split_name, split_time);
+  TimingLogger::TimingData timing_data(logger.CalculateTimingData());
+  const std::vector<TimingLogger::Timing>& timings = logger.GetTimings();
+  for (size_t i = 0; i < timings.size(); ++i) {
+    if (timings[i].IsStartTiming()) {
+      AddPair(timings[i].GetName(), timing_data.GetExclusiveTime(i));
+    }
   }
   ++iterations_;
 }
@@ -124,166 +128,125 @@
 }
 
 TimingLogger::TimingLogger(const char* name, bool precise, bool verbose)
-    : name_(name), precise_(precise), verbose_(verbose), current_split_(NULL) {
+    : name_(name), precise_(precise), verbose_(verbose) {
 }
 
 void TimingLogger::Reset() {
-  current_split_ = NULL;
-  splits_.clear();
+  timings_.clear();
 }
 
-void TimingLogger::StartSplit(const char* new_split_label) {
-  DCHECK(new_split_label != nullptr) << "Starting split with null label.";
-  TimingLogger::ScopedSplit* explicit_scoped_split =
-      new TimingLogger::ScopedSplit(new_split_label, this);
-  explicit_scoped_split->explicit_ = true;
+void TimingLogger::StartTiming(const char* label) {
+  DCHECK(label != nullptr);
+  timings_.push_back(Timing(NanoTime(), label));
+  ATRACE_BEGIN(label);
 }
 
-void TimingLogger::EndSplit() {
-  CHECK(current_split_ != nullptr) << "Ending a non-existent split.";
-  DCHECK(current_split_->label_ != nullptr);
-  DCHECK(current_split_->explicit_ == true)
-      << "Explicitly ending scoped split: " << current_split_->label_;
-  delete current_split_;
-  // TODO: current_split_ = nullptr;
-}
-
-// Ends the current split and starts the one given by the label.
-void TimingLogger::NewSplit(const char* new_split_label) {
-  if (current_split_ == nullptr) {
-    StartSplit(new_split_label);
-  } else {
-    DCHECK(new_split_label != nullptr) << "New split (" << new_split_label << ") with null label.";
-    current_split_->TailInsertSplit(new_split_label);
-  }
+void TimingLogger::EndTiming() {
+  timings_.push_back(Timing(NanoTime(), nullptr));
+  ATRACE_END();
 }
 
 uint64_t TimingLogger::GetTotalNs() const {
-  uint64_t total_ns = 0;
-  for (const TimingLogger::SplitTiming& split : splits_) {
-    total_ns += split.first;
+  if (timings_.size() < 2) {
+    return 0;
   }
-  return total_ns;
+  return timings_.back().GetTime() - timings_.front().GetTime();
 }
 
-void TimingLogger::Dump(std::ostream &os) const {
+size_t TimingLogger::FindTimingIndex(const char* name, size_t start_idx) const {
+  DCHECK_LT(start_idx, timings_.size());
+  for (size_t i = start_idx; i < timings_.size(); ++i) {
+    if (timings_[i].IsStartTiming() && strcmp(timings_[i].GetName(), name) == 0) {
+      return i;
+    }
+  }
+  return kIndexNotFound;
+}
+
+TimingLogger::TimingData TimingLogger::CalculateTimingData() const {
+  TimingLogger::TimingData ret;
+  ret.data_.resize(timings_.size());
+  std::vector<size_t> open_stack;
+  for (size_t i = 0; i < timings_.size(); ++i) {
+    if (timings_[i].IsEndTiming()) {
+      CHECK(!open_stack.empty()) << "No starting split for ending split at index " << i;
+      size_t open_idx = open_stack.back();
+      uint64_t time = timings_[i].GetTime() - timings_[open_idx].GetTime();
+      ret.data_[open_idx].exclusive_time += time;
+      DCHECK_EQ(ret.data_[open_idx].total_time, 0U);
+      ret.data_[open_idx].total_time += time;
+      // Each open split has exactly one end.
+      open_stack.pop_back();
+      // If there is a parent node, subtract from the exclusive time.
+      if (!open_stack.empty()) {
+        // Note this may go negative, but will work due to 2s complement when we add the value
+        // total time value later.
+        ret.data_[open_stack.back()].exclusive_time -= time;
+      }
+    } else {
+      open_stack.push_back(i);
+    }
+  }
+  CHECK(open_stack.empty()) << "Missing ending for timing "
+      << timings_[open_stack.back()].GetName() << " at index " << open_stack.back();
+  return ret;  // No need to fear, C++11 move semantics are here.
+}
+
+void TimingLogger::Dump(std::ostream &os, const char* indent_string) const {
+  static constexpr size_t kFractionalDigits = 3;
+  TimingLogger::TimingData timing_data(CalculateTimingData());
   uint64_t longest_split = 0;
-  uint64_t total_ns = 0;
-  for (const SplitTiming& split : splits_) {
-    uint64_t split_time = split.first;
-    longest_split = std::max(longest_split, split_time);
-    total_ns += split_time;
+  for (size_t i = 0; i < timings_.size(); ++i) {
+    longest_split = std::max(longest_split, timing_data.GetTotalTime(i));
   }
   // Compute which type of unit we will use for printing the timings.
   TimeUnit tu = GetAppropriateTimeUnit(longest_split);
   uint64_t divisor = GetNsToTimeUnitDivisor(tu);
+  uint64_t mod_fraction = divisor >= 1000 ? divisor / 1000 : 1;
   // Print formatted splits.
-  for (const SplitTiming& split : splits_) {
-    uint64_t split_time = split.first;
-    if (!precise_ && divisor >= 1000) {
-      // Make the fractional part 0.
-      split_time -= split_time % (divisor / 1000);
+  size_t tab_count = 1;
+  os << name_ << " [Exclusive time] [Total time]\n";
+  for (size_t i = 0; i < timings_.size(); ++i) {
+    if (timings_[i].IsStartTiming()) {
+      uint64_t exclusive_time = timing_data.GetExclusiveTime(i);
+      uint64_t total_time = timing_data.GetTotalTime(i);
+      if (!precise_) {
+        // Make the fractional part 0.
+        exclusive_time -= exclusive_time % mod_fraction;
+        total_time -= total_time % mod_fraction;
+      }
+      for (size_t j = 0; j < tab_count; ++j) {
+        os << indent_string;
+      }
+      os << FormatDuration(exclusive_time, tu, kFractionalDigits);
+      // If they are the same, just print one value to prevent spam.
+      if (exclusive_time != total_time) {
+        os << "/" << FormatDuration(total_time, tu, kFractionalDigits);
+      }
+      os << " " << timings_[i].GetName() << "\n";
+      ++tab_count;
+    } else {
+      --tab_count;
     }
-    os << name_ << ": " << std::setw(8) << FormatDuration(split_time, tu) << " "
-       << split.second << "\n";
   }
-  os << name_ << ": end, " << NsToMs(total_ns) << " ms\n";
+  os << name_ << ": end, " << PrettyDuration(GetTotalNs()) << "\n";
 }
 
-TimingLogger::ScopedSplit::ScopedSplit(const char* label, TimingLogger* timing_logger) {
-  DCHECK(label != NULL) << "New scoped split (" << label << ") with null label.";
-  CHECK(timing_logger != NULL) << "New scoped split (" << label << ") without TimingLogger.";
-  timing_logger_ = timing_logger;
-  label_ = label;
-  running_ns_ = 0;
-  explicit_ = false;
-
-  // Stash away the current split and pause it.
-  enclosing_split_ = timing_logger->current_split_;
-  if (enclosing_split_ != NULL) {
-    enclosing_split_->Pause();
+void TimingLogger::Verify() {
+  size_t counts[2] = { 0 };
+  for (size_t i = 0; i < timings_.size(); ++i) {
+    if (i > 0) {
+      CHECK_LE(timings_[i - 1].GetTime(), timings_[i].GetTime());
+    }
+    ++counts[timings_[i].IsStartTiming() ? 0 : 1];
   }
-
-  timing_logger_->current_split_ = this;
-
-  ATRACE_BEGIN(label_);
-
-  start_ns_ = NanoTime();
-  if (timing_logger_->verbose_) {
-    LOG(INFO) << "Begin: " << label_;
-  }
+  CHECK_EQ(counts[0], counts[1]) << "Number of StartTiming and EndTiming doesn't match";
 }
 
-TimingLogger::ScopedSplit::~ScopedSplit() {
-  uint64_t current_time = NanoTime();
-  uint64_t split_time = current_time - start_ns_;
-  running_ns_ += split_time;
-  ATRACE_END();
-
-  if (timing_logger_->verbose_) {
-    LOG(INFO) << "End: " << label_ << " " << PrettyDuration(split_time);
+TimingLogger::~TimingLogger() {
+  if (kIsDebugBuild) {
+    Verify();
   }
-
-  // If one or more enclosed explicitly started splits are not terminated we can
-  // either fail or "unwind" the stack of splits in the timing logger to 'this'
-  // (by deleting the intervening scoped splits). This implements the latter.
-  TimingLogger::ScopedSplit* current = timing_logger_->current_split_;
-  while ((current != NULL) && (current != this)) {
-    delete current;
-    current = timing_logger_->current_split_;
-  }
-
-  CHECK(current != NULL) << "Missing scoped split (" << this->label_
-                           << ") in timing logger (" << timing_logger_->name_ << ").";
-  CHECK(timing_logger_->current_split_ == this);
-
-  timing_logger_->splits_.push_back(SplitTiming(running_ns_, label_));
-
-  timing_logger_->current_split_ = enclosing_split_;
-  if (enclosing_split_ != NULL) {
-    enclosing_split_->Resume();
-  }
-}
-
-
-void TimingLogger::ScopedSplit::TailInsertSplit(const char* label) {
-  // Sleight of hand here: Rather than embedding a new scoped split, we're updating the current
-  // scoped split in place. Basically, it's one way to make explicit and scoped splits compose
-  // well while maintaining the current semantics of NewSplit. An alternative is to push a new split
-  // since we unwind the stack of scoped splits in the scoped split destructor. However, this implies
-  // that the current split is not ended by NewSplit (which calls TailInsertSplit), which would
-  // be different from what we had before.
-
-  uint64_t current_time = NanoTime();
-  uint64_t split_time = current_time - start_ns_;
-  ATRACE_END();
-  timing_logger_->splits_.push_back(std::pair<uint64_t, const char*>(split_time, label_));
-
-  if (timing_logger_->verbose_) {
-    LOG(INFO) << "End: " << label_ << " " << PrettyDuration(split_time) << "\n"
-              << "Begin: " << label;
-  }
-
-  label_ = label;
-  start_ns_ = current_time;
-  running_ns_ = 0;
-
-  ATRACE_BEGIN(label);
-}
-
-void TimingLogger::ScopedSplit::Pause() {
-  uint64_t current_time = NanoTime();
-  uint64_t split_time = current_time - start_ns_;
-  running_ns_ += split_time;
-  ATRACE_END();
-}
-
-
-void TimingLogger::ScopedSplit::Resume() {
-  uint64_t current_time = NanoTime();
-
-  start_ns_ = current_time;
-  ATRACE_BEGIN(label_);
 }
 
 }  // namespace art
diff --git a/runtime/base/timing_logger.h b/runtime/base/timing_logger.h
index 9b55898..b300109 100644
--- a/runtime/base/timing_logger.h
+++ b/runtime/base/timing_logger.h
@@ -77,93 +77,119 @@
 // A timing logger that knows when a split starts for the purposes of logging tools, like systrace.
 class TimingLogger {
  public:
-  // Splits are nanosecond times and split names.
-  typedef std::pair<uint64_t, const char*> SplitTiming;
-  typedef std::vector<SplitTiming> SplitTimings;
+  static constexpr size_t kIndexNotFound = static_cast<size_t>(-1);
+
+  class Timing {
+   public:
+    Timing(uint64_t time, const char* name) : time_(time), name_(name) {
+    }
+    bool IsStartTiming() const {
+      return !IsEndTiming();
+    }
+    bool IsEndTiming() const {
+      return name_ == nullptr;
+    }
+    uint64_t GetTime() const {
+      return time_;
+    }
+    const char* GetName() const {
+      return name_;
+    }
+
+   private:
+    uint64_t time_;
+    const char* name_;
+  };
+
+  // Extra data that is only calculated when you call dump to prevent excess allocation.
+  class TimingData {
+   public:
+    TimingData() = default;
+    TimingData(TimingData&& other) {
+      std::swap(data_, other.data_);
+    }
+    TimingData& operator=(TimingData&& other) {
+      std::swap(data_, other.data_);
+      return *this;
+    }
+    uint64_t GetTotalTime(size_t idx) {
+      return data_[idx].total_time;
+    }
+    uint64_t GetExclusiveTime(size_t idx) {
+      return data_[idx].exclusive_time;
+    }
+
+   private:
+    // Each begin split has a total time and exclusive time. Exclusive time is total time - total
+    // time of children nodes.
+    struct CalculatedDataPoint {
+      CalculatedDataPoint() : total_time(0), exclusive_time(0) {}
+      uint64_t total_time;
+      uint64_t exclusive_time;
+    };
+    std::vector<CalculatedDataPoint> data_;
+    friend class TimingLogger;
+  };
 
   explicit TimingLogger(const char* name, bool precise, bool verbose);
-  ~TimingLogger() {
-    // TODO: DCHECK(current_split_ == nullptr) << "Forgot to end split: " << current_split_->label_;
-  }
-  // Clears current splits and labels.
+  ~TimingLogger();
+  // Verify that all open timings have related closed timings.
+  void Verify();
+  // Clears current timings and labels.
   void Reset();
-
-  // Starts a split
-  void StartSplit(const char* new_split_label);
-
-  // Ends the current split and starts the one given by the label.
-  void NewSplit(const char* new_split_label);
-
-  // Ends the current split and records the end time.
-  void EndSplit();
-
+  // Starts a timing.
+  void StartTiming(const char* new_split_label);
+  // Ends the current timing.
+  void EndTiming();
+  // End the current timing and start a new timing. Usage not recommended.
+  void NewTiming(const char* new_split_label) {
+    EndTiming();
+    StartTiming(new_split_label);
+  }
+  // Returns the total duration of the timings (sum of total times).
   uint64_t GetTotalNs() const;
-
-  void Dump(std::ostream& os) const;
+  // Find the index of a timing by name.
+  size_t FindTimingIndex(const char* name, size_t start_idx) const;
+  void Dump(std::ostream& os, const char* indent_string = "  ") const;
 
   // Scoped timing splits that can be nested and composed with the explicit split
   // starts and ends.
-  class ScopedSplit {
-    public:
-      explicit ScopedSplit(const char* label, TimingLogger* timing_logger);
+  class ScopedTiming {
+   public:
+    explicit ScopedTiming(const char* label, TimingLogger* logger) : logger_(logger) {
+      logger_->StartTiming(label);
+    }
+    ~ScopedTiming() {
+      logger_->EndTiming();
+    }
+    // Closes the current timing and opens a new timing.
+    void NewTiming(const char* label) {
+      logger_->NewTiming(label);
+    }
 
-      ~ScopedSplit();
-
-      friend class TimingLogger;
-
-    private:
-      // Pauses timing of the split, usually due to nesting of another split.
-      void Pause();
-
-      // Resumes timing of the split, usually because a nested split has ended.
-      void Resume();
-
-      // Used by new split to swap splits in place in a ScopedSplit instance.
-      void TailInsertSplit(const char* label);
-
-      // The scoped split immediately enclosing this split. Essentially, we get a
-      // stack of nested splits through this field.
-      ScopedSplit* enclosing_split_;
-
-      // Was this created via TimingLogger's StartSplit?
-      bool explicit_;
-
-      // The split's name.
-      const char* label_;
-
-      // The current split's latest start time. (It may have been paused and restarted.)
-      uint64_t start_ns_;
-
-      // The running time, outside of pauses.
-      uint64_t running_ns_;
-
-      // The timing logger holding this split.
-      TimingLogger* timing_logger_;
-
-      DISALLOW_COPY_AND_ASSIGN(ScopedSplit);
+   private:
+    TimingLogger* const logger_;  // The timing logger which the scoped timing is associated with.
+    DISALLOW_COPY_AND_ASSIGN(ScopedTiming);
   };
 
-  const SplitTimings& GetSplits() const {
-    return splits_;
+  // Return the time points of when each start / end timings start and finish.
+  const std::vector<Timing>& GetTimings() const {
+    return timings_;
   }
 
-  friend class ScopedSplit;
+  TimingData CalculateTimingData() const;
+
  protected:
   // The name of the timing logger.
   const char* const name_;
-
   // Do we want to print the exactly recorded split (true) or round down to the time unit being
   // used (false).
   const bool precise_;
-
   // Verbose logging.
   const bool verbose_;
-
-  // The current scoped split is also the 'top' of the stack of splits in progress.
-  ScopedSplit* current_split_;
-
-  // Splits that have ended.
-  SplitTimings splits_;
+  // Timing points that are either start or end points. For each starting point ret[i] = location
+  // of end split associated with i. If it is and end split ret[i] = i.
+  std::vector<Timing> timings_;
 
  private:
   DISALLOW_COPY_AND_ASSIGN(TimingLogger);
diff --git a/runtime/base/timing_logger_test.cc b/runtime/base/timing_logger_test.cc
index 0757751..35a73d0 100644
--- a/runtime/base/timing_logger_test.cc
+++ b/runtime/base/timing_logger_test.cc
@@ -26,16 +26,14 @@
 
 TEST_F(TimingLoggerTest, StartEnd) {
   const char* split1name = "First Split";
-  TimingLogger timings("StartEnd", true, false);
-
-  timings.StartSplit(split1name);
-
-  timings.EndSplit();  // Ends split1.
-
-  const TimingLogger::SplitTimings& splits = timings.GetSplits();
-
-  EXPECT_EQ(1U, splits.size());
-  EXPECT_STREQ(splits[0].second, split1name);
+  TimingLogger logger("StartEnd", true, false);
+  logger.StartTiming(split1name);
+  logger.EndTiming();  // Ends split1.
+  const auto& timings = logger.GetTimings();
+  EXPECT_EQ(2U, timings.size());  // Start, End splits
+  EXPECT_TRUE(timings[0].IsStartTiming());
+  EXPECT_STREQ(timings[0].GetName(), split1name);
+  EXPECT_TRUE(timings[1].IsEndTiming());
 }
 
 
@@ -43,56 +41,61 @@
   const char* split1name = "First Split";
   const char* split2name = "Second Split";
   const char* split3name = "Third Split";
-  TimingLogger timings("StartNewEnd", true, false);
-
-  timings.StartSplit(split1name);
-
-  timings.NewSplit(split2name);  // Ends split1.
-
-  timings.NewSplit(split3name);  // Ends split2.
-
-  timings.EndSplit();  // Ends split3.
-
-  const TimingLogger::SplitTimings& splits = timings.GetSplits();
-
-  EXPECT_EQ(3U, splits.size());
-  EXPECT_STREQ(splits[0].second, split1name);
-  EXPECT_STREQ(splits[1].second, split2name);
-  EXPECT_STREQ(splits[2].second, split3name);
+  TimingLogger logger("StartNewEnd", true, false);
+  logger.StartTiming(split1name);
+  logger.NewTiming(split2name);
+  logger.NewTiming(split3name);
+  logger.EndTiming();
+  // Get the timings and verify that they are sane.
+  const auto& timings = logger.GetTimings();
+  // 6 timings in the timing logger at this point.
+  EXPECT_EQ(6U, timings.size());
+  EXPECT_TRUE(timings[0].IsStartTiming());
+  EXPECT_STREQ(timings[0].GetName(), split1name);
+  EXPECT_TRUE(timings[1].IsEndTiming());
+  EXPECT_TRUE(timings[2].IsStartTiming());
+  EXPECT_STREQ(timings[2].GetName(), split2name);
+  EXPECT_TRUE(timings[3].IsEndTiming());
+  EXPECT_TRUE(timings[4].IsStartTiming());
+  EXPECT_STREQ(timings[4].GetName(), split3name);
+  EXPECT_TRUE(timings[5].IsEndTiming());
 }
 
 TEST_F(TimingLoggerTest, StartNewEndNested) {
-  const char* split1name = "First Split";
-  const char* split2name = "Second Split";
-  const char* split3name = "Third Split";
-  const char* split4name = "Fourth Split";
-  const char* split5name = "Fifth Split";
-  TimingLogger timings("StartNewEndNested", true, false);
-
-  timings.StartSplit(split1name);
-
-  timings.NewSplit(split2name);  // Ends split1.
-
-  timings.StartSplit(split3name);
-
-  timings.StartSplit(split4name);
-
-  timings.NewSplit(split5name);  // Ends split4.
-
-  timings.EndSplit();  // Ends split5.
-
-  timings.EndSplit();  // Ends split3.
-
-  timings.EndSplit();  // Ends split2.
-
-  const TimingLogger::SplitTimings& splits = timings.GetSplits();
-
-  EXPECT_EQ(5U, splits.size());
-  EXPECT_STREQ(splits[0].second, split1name);
-  EXPECT_STREQ(splits[1].second, split4name);
-  EXPECT_STREQ(splits[2].second, split5name);
-  EXPECT_STREQ(splits[3].second, split3name);
-  EXPECT_STREQ(splits[4].second, split2name);
+  const char* name1 = "First Split";
+  const char* name2 = "Second Split";
+  const char* name3 = "Third Split";
+  const char* name4 = "Fourth Split";
+  const char* name5 = "Fifth Split";
+  TimingLogger logger("StartNewEndNested", true, false);
+  logger.StartTiming(name1);
+  logger.NewTiming(name2);  // Ends timing1.
+  logger.StartTiming(name3);
+  logger.StartTiming(name4);
+  logger.NewTiming(name5);  // Ends timing4.
+  logger.EndTiming();  // Ends timing5.
+  logger.EndTiming();  // Ends timing3.
+  logger.EndTiming();  // Ends timing2.
+  const auto& timings = logger.GetTimings();
+  EXPECT_EQ(10U, timings.size());
+  size_t idx_1 = logger.FindTimingIndex(name1, 0);
+  size_t idx_2 = logger.FindTimingIndex(name2, 0);
+  size_t idx_3 = logger.FindTimingIndex(name3, 0);
+  size_t idx_4 = logger.FindTimingIndex(name4, 0);
+  size_t idx_5 = logger.FindTimingIndex(name5, 0);
+  size_t idx_6 = logger.FindTimingIndex("Not found", 0);
+  EXPECT_NE(idx_1, TimingLogger::kIndexNotFound);
+  EXPECT_NE(idx_2, TimingLogger::kIndexNotFound);
+  EXPECT_NE(idx_3, TimingLogger::kIndexNotFound);
+  EXPECT_NE(idx_4, TimingLogger::kIndexNotFound);
+  EXPECT_NE(idx_5, TimingLogger::kIndexNotFound);
+  EXPECT_EQ(idx_6, TimingLogger::kIndexNotFound);
+  TimingLogger::TimingData data = logger.CalculateTimingData();
+  EXPECT_STREQ(timings[idx_1].GetName(), name1);
+  EXPECT_STREQ(timings[idx_2].GetName(), name2);
+  EXPECT_STREQ(timings[idx_3].GetName(), name3);
+  EXPECT_STREQ(timings[idx_4].GetName(), name4);
+  EXPECT_STREQ(timings[idx_5].GetName(), name5);
 }
 
 
@@ -101,31 +104,32 @@
   const char* innersplit1 = "Inner Split 1";
   const char* innerinnersplit1 = "Inner Inner Split 1";
   const char* innersplit2 = "Inner Split 2";
-  TimingLogger timings("Scoped", true, false);
-
+  TimingLogger logger("Scoped", true, false);
   {
-      TimingLogger::ScopedSplit outer(outersplit, &timings);
-
+    TimingLogger::ScopedTiming outer(outersplit, &logger);
+    {
+      TimingLogger::ScopedTiming inner1(innersplit1, &logger);
       {
-          TimingLogger::ScopedSplit inner1(innersplit1, &timings);
-
-          {
-              TimingLogger::ScopedSplit innerinner1(innerinnersplit1, &timings);
-          }  // Ends innerinnersplit1.
-      }  // Ends innersplit1.
-
-      {
-          TimingLogger::ScopedSplit inner2(innersplit2, &timings);
-      }  // Ends innersplit2.
+        TimingLogger::ScopedTiming innerinner1(innerinnersplit1, &logger);
+      }  // Ends innerinnersplit1.
+    }  // Ends innersplit1.
+    {
+      TimingLogger::ScopedTiming inner2(innersplit2, &logger);
+    }  // Ends innersplit2.
   }  // Ends outersplit.
-
-  const TimingLogger::SplitTimings& splits = timings.GetSplits();
-
-  EXPECT_EQ(4U, splits.size());
-  EXPECT_STREQ(splits[0].second, innerinnersplit1);
-  EXPECT_STREQ(splits[1].second, innersplit1);
-  EXPECT_STREQ(splits[2].second, innersplit2);
-  EXPECT_STREQ(splits[3].second, outersplit);
+  const size_t idx_outersplit = logger.FindTimingIndex(outersplit, 0);
+  const size_t idx_innersplit1 = logger.FindTimingIndex(innersplit1, 0);
+  const size_t idx_innerinnersplit1 = logger.FindTimingIndex(innerinnersplit1, 0);
+  const size_t idx_innersplit2 = logger.FindTimingIndex(innersplit2, 0);
+  const auto& timings = logger.GetTimings();
+  EXPECT_EQ(8U, timings.size());  // 4 start timings and 4 end timings.
+  EXPECT_GE(timings[idx_innerinnersplit1].GetTime(), timings[idx_innersplit1].GetTime());
+  EXPECT_GE(timings[idx_innersplit2].GetTime(), timings[idx_innersplit1].GetTime());
+  TimingLogger::TimingData data(logger.CalculateTimingData());
+  EXPECT_GE(data.GetTotalTime(idx_outersplit), data.GetTotalTime(idx_innerinnersplit1));
+  EXPECT_GE(data.GetTotalTime(idx_outersplit),
+            data.GetTotalTime(idx_innersplit1) + data.GetTotalTime(idx_innersplit2));
+  EXPECT_GE(data.GetTotalTime(idx_innersplit1), data.GetTotalTime(idx_innerinnersplit1));
 }
 
 
@@ -134,27 +138,24 @@
   const char* innersplit = "Inner Split";
   const char* innerinnersplit1 = "Inner Inner Split 1";
   const char* innerinnersplit2 = "Inner Inner Split 2";
-  TimingLogger timings("Scoped", true, false);
-
-  timings.StartSplit(outersplit);
-
+  TimingLogger logger("Scoped", true, false);
+  logger.StartTiming(outersplit);
   {
-      TimingLogger::ScopedSplit inner(innersplit, &timings);
-
-      timings.StartSplit(innerinnersplit1);
-
-      timings.NewSplit(innerinnersplit2);  // Ends innerinnersplit1.
+    TimingLogger::ScopedTiming inner(innersplit, &logger);
+    logger.StartTiming(innerinnersplit1);
+    logger.NewTiming(innerinnersplit2);  // Ends innerinnersplit1.
+    logger.EndTiming();
   }  // Ends innerinnersplit2, then innersplit.
-
-  timings.EndSplit();  // Ends outersplit.
-
-  const TimingLogger::SplitTimings& splits = timings.GetSplits();
-
-  EXPECT_EQ(4U, splits.size());
-  EXPECT_STREQ(splits[0].second, innerinnersplit1);
-  EXPECT_STREQ(splits[1].second, innerinnersplit2);
-  EXPECT_STREQ(splits[2].second, innersplit);
-  EXPECT_STREQ(splits[3].second, outersplit);
+  logger.EndTiming();  // Ends outersplit.
+  const size_t idx_outersplit = logger.FindTimingIndex(outersplit, 0);
+  const size_t idx_innersplit = logger.FindTimingIndex(innersplit, 0);
+  const size_t idx_innerinnersplit1 = logger.FindTimingIndex(innerinnersplit1, 0);
+  const size_t idx_innerinnersplit2 = logger.FindTimingIndex(innerinnersplit2, 0);
+  const auto& timings = logger.GetTimings();
+  EXPECT_EQ(8U, timings.size());
+  EXPECT_LE(timings[idx_outersplit].GetTime(), timings[idx_innersplit].GetTime());
+  EXPECT_LE(timings[idx_innersplit].GetTime(), timings[idx_innerinnersplit1].GetTime());
+  EXPECT_LE(timings[idx_innerinnersplit1].GetTime(), timings[idx_innerinnersplit2].GetTime());
 }
 
 }  // namespace art
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 656c55b..09fb97a 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -529,7 +529,7 @@
 }
 
 size_t RosAlloc::Free(Thread* self, void* ptr) {
-  WriterMutexLock rmu(self, bulk_free_lock_);
+  ReaderMutexLock rmu(self, bulk_free_lock_);
   return FreeInternal(self, ptr);
 }
 
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index 8622fd6..46d79bf 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -101,6 +101,7 @@
 }
 
 void GarbageCollector::SwapBitmaps() {
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   // Swap the live and mark bitmaps for each alloc space. This is needed since sweep re-swaps
   // these bitmaps. The bitmap swapping is an optimization so that we do not need to clear the live
   // bits of dead objects in the live bitmap.
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index ebd1738..4044852 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc
@@ -49,7 +49,6 @@
 #include "thread-inl.h"
 #include "thread_list.h"
 
-using ::art::mirror::Class;
 using ::art::mirror::Object;
 
 namespace art {
@@ -57,7 +56,7 @@
 namespace collector {
 
 void MarkCompact::BindBitmaps() {
-  GetTimings()->StartSplit("BindBitmaps");
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
   // Mark all of the spaces we never collect as immune.
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
@@ -66,7 +65,6 @@
       CHECK(immune_region_.AddContinuousSpace(space)) << "Failed to add space " << *space;
     }
   }
-  GetTimings()->EndSplit();
 }
 
 MarkCompact::MarkCompact(Heap* heap, const std::string& name_prefix)
@@ -120,7 +118,7 @@
 };
 
 void MarkCompact::CalculateObjectForwardingAddresses() {
-  GetTimings()->NewSplit(__FUNCTION__);
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   // The bump pointer in the space where the next forwarding address will be.
   bump_pointer_ = reinterpret_cast<byte*>(space_->Begin());
   // Visit all the marked objects in the bitmap.
@@ -131,7 +129,7 @@
 }
 
 void MarkCompact::InitializePhase() {
-  TimingLogger::ScopedSplit split("InitializePhase", GetTimings());
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   mark_stack_ = heap_->GetMarkStack();
   DCHECK(mark_stack_ != nullptr);
   immune_region_.Reset();
@@ -143,7 +141,6 @@
 }
 
 void MarkCompact::ProcessReferences(Thread* self) {
-  TimingLogger::ScopedSplit split("ProcessReferences", GetTimings());
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
   heap_->GetReferenceProcessor()->ProcessReferences(
       false, GetTimings(), GetCurrentIteration()->GetClearSoftReferences(),
@@ -187,6 +184,7 @@
 }
 
 void MarkCompact::MarkingPhase() {
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   Thread* self = Thread::Current();
   // Bitmap which describes which objects we have to move.
   objects_before_forwarding_.reset(accounting::ContinuousSpaceBitmap::Create(
@@ -195,21 +193,22 @@
   objects_with_lockword_.reset(accounting::ContinuousSpaceBitmap::Create(
       "objects with lock words", space_->Begin(), space_->Size()));
   CHECK(Locks::mutator_lock_->IsExclusiveHeld(self));
-  TimingLogger::ScopedSplit split("MarkingPhase", GetTimings());
   // Assume the cleared space is already empty.
   BindBitmaps();
+  t.NewTiming("ProcessCards");
   // Process dirty cards and add dirty cards to mod-union tables.
   heap_->ProcessCards(GetTimings(), false);
   // Clear the whole card table since we can not Get any additional dirty cards during the
   // paused GC. This saves memory but only works for pause the world collectors.
-  GetTimings()->NewSplit("ClearCardTable");
+  t.NewTiming("ClearCardTable");
   heap_->GetCardTable()->ClearCardTable();
   // Need to do this before the checkpoint since we don't want any threads to add references to
   // the live stack during the recursive mark.
-  GetTimings()->NewSplit("SwapStacks");
   if (kUseThreadLocalAllocationStack) {
+    t.NewTiming("RevokeAllThreadLocalAllocationStacks");
     heap_->RevokeAllThreadLocalAllocationStacks(self);
   }
+  t.NewTiming("SwapStacks");
   heap_->SwapStacks(self);
   {
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
@@ -227,21 +226,20 @@
   // Revoke buffers before measuring how many objects were moved since the TLABs need to be revoked
   // before they are properly counted.
   RevokeAllThreadLocalBuffers();
-  GetTimings()->StartSplit("PreSweepingGcVerification");
   // Disabled due to an issue where we have objects in the bump pointer space which reference dead
   // objects.
   // heap_->PreSweepingGcVerification(this);
-  GetTimings()->EndSplit();
 }
 
 void MarkCompact::UpdateAndMarkModUnion() {
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   for (auto& space : heap_->GetContinuousSpaces()) {
     // If the space is immune then we need to mark the references to other spaces.
     if (immune_region_.ContainsSpace(space)) {
       accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
       if (table != nullptr) {
         // TODO: Improve naming.
-        TimingLogger::ScopedSplit split(
+        TimingLogger::ScopedTiming t(
             space->IsZygoteSpace() ? "UpdateAndMarkZygoteModUnionTable" :
                                      "UpdateAndMarkImageModUnionTable", GetTimings());
         table->UpdateAndMarkReferences(MarkHeapReferenceCallback, this);
@@ -251,28 +249,28 @@
 }
 
 void MarkCompact::MarkReachableObjects() {
-  GetTimings()->StartSplit("MarkStackAsLive");
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   accounting::ObjectStack* live_stack = heap_->GetLiveStack();
-  heap_->MarkAllocStackAsLive(live_stack);
+  {
+    TimingLogger::ScopedTiming t2("MarkAllocStackAsLive", GetTimings());
+    heap_->MarkAllocStackAsLive(live_stack);
+  }
   live_stack->Reset();
   // Recursively process the mark stack.
   ProcessMarkStack();
-  GetTimings()->EndSplit();
 }
 
 void MarkCompact::ReclaimPhase() {
-  TimingLogger::ScopedSplit split("ReclaimPhase", GetTimings());
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
   // Reclaim unmarked objects.
   Sweep(false);
   // Swap the live and mark bitmaps for each space which we modified space. This is an
   // optimization that enables us to not clear live bits inside of the sweep. Only swaps unbound
   // bitmaps.
-  GetTimings()->StartSplit("SwapBitmapsAndUnBindBitmaps");
   SwapBitmaps();
   GetHeap()->UnBindBitmaps();  // Unbind the live and mark bitmaps.
   Compact();
-  GetTimings()->EndSplit();
 }
 
 void MarkCompact::ResizeMarkStack(size_t new_size) {
@@ -340,7 +338,7 @@
 };
 
 void MarkCompact::UpdateReferences() {
-  GetTimings()->NewSplit(__FUNCTION__);
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   Runtime* runtime = Runtime::Current();
   // Update roots.
   runtime->VisitRoots(UpdateRootCallback, this);
@@ -350,7 +348,7 @@
     accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
     if (table != nullptr) {
       // TODO: Improve naming.
-      TimingLogger::ScopedSplit split(
+      TimingLogger::ScopedTiming t(
           space->IsZygoteSpace() ? "UpdateZygoteModUnionTableReferences" :
                                    "UpdateImageModUnionTableReferences",
                                    GetTimings());
@@ -381,7 +379,7 @@
 }
 
 void MarkCompact::Compact() {
-  GetTimings()->NewSplit(__FUNCTION__);
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   CalculateObjectForwardingAddresses();
   UpdateReferences();
   MoveObjects();
@@ -389,7 +387,7 @@
   int64_t objects_freed = space_->GetObjectsAllocated() - live_objects_in_space_;
   int64_t bytes_freed = reinterpret_cast<int64_t>(space_->End()) -
       reinterpret_cast<int64_t>(bump_pointer_);
-  GetTimings()->NewSplit("RecordFree");
+  t.NewTiming("RecordFree");
   space_->RecordFree(objects_freed, bytes_freed);
   RecordFree(ObjectBytePair(objects_freed, bytes_freed));
   space_->SetEnd(bump_pointer_);
@@ -399,7 +397,7 @@
 
 // Marks all objects in the root set.
 void MarkCompact::MarkRoots() {
-  GetTimings()->NewSplit("MarkRoots");
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   Runtime::Current()->VisitRoots(MarkRootCallback, this);
 }
 
@@ -483,9 +481,8 @@
 }
 
 void MarkCompact::SweepSystemWeaks() {
-  GetTimings()->StartSplit("SweepSystemWeaks");
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   Runtime::Current()->SweepSystemWeaks(IsMarkedCallback, this);
-  GetTimings()->EndSplit();
 }
 
 bool MarkCompact::ShouldSweepSpace(space::ContinuousSpace* space) const {
@@ -523,7 +520,7 @@
 }
 
 void MarkCompact::MoveObjects() {
-  GetTimings()->NewSplit(__FUNCTION__);
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   // Move the objects in the before forwarding bitmap.
   MoveObjectVisitor visitor(this);
   objects_before_forwarding_->VisitMarkedRange(reinterpret_cast<uintptr_t>(space_->Begin()),
@@ -533,15 +530,15 @@
 }
 
 void MarkCompact::Sweep(bool swap_bitmaps) {
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   DCHECK(mark_stack_->IsEmpty());
-  TimingLogger::ScopedSplit split("Sweep", GetTimings());
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
     if (space->IsContinuousMemMapAllocSpace()) {
       space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace();
       if (!ShouldSweepSpace(alloc_space)) {
         continue;
       }
-      TimingLogger::ScopedSplit split(
+      TimingLogger::ScopedTiming t(
           alloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepAllocSpace", GetTimings());
       RecordFree(alloc_space->Sweep(swap_bitmaps));
     }
@@ -550,7 +547,7 @@
 }
 
 void MarkCompact::SweepLargeObjects(bool swap_bitmaps) {
-  TimingLogger::ScopedSplit split("SweepLargeObjects", GetTimings());
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   RecordFreeLOS(heap_->GetLargeObjectsSpace()->Sweep(swap_bitmaps));
 }
 
@@ -590,13 +587,12 @@
 
 // Scan anything that's on the mark stack.
 void MarkCompact::ProcessMarkStack() {
-  GetTimings()->StartSplit("ProcessMarkStack");
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   while (!mark_stack_->IsEmpty()) {
     Object* obj = mark_stack_->PopBack();
     DCHECK(obj != nullptr);
     ScanObject(obj);
   }
-  GetTimings()->EndSplit();
 }
 
 void MarkCompact::SetSpace(space::BumpPointerSpace* space) {
@@ -605,7 +601,7 @@
 }
 
 void MarkCompact::FinishPhase() {
-  TimingLogger::ScopedSplit split("FinishPhase", GetTimings());
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   space_ = nullptr;
   CHECK(mark_stack_->IsEmpty());
   mark_stack_->Reset();
@@ -618,9 +614,8 @@
 }
 
 void MarkCompact::RevokeAllThreadLocalBuffers() {
-  GetTimings()->StartSplit("(Paused)RevokeAllThreadLocalBuffers");
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   GetHeap()->RevokeAllThreadLocalBuffers();
-  GetTimings()->EndSplit();
 }
 
 }  // namespace collector
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index d08796b..7e97b3b 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -81,7 +81,7 @@
 static constexpr bool kRevokeRosAllocThreadLocalBuffersAtCheckpoint = true;
 
 void MarkSweep::BindBitmaps() {
-  GetTimings()->StartSplit("BindBitmaps");
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
   // Mark all of the spaces we never collect as immune.
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
@@ -89,7 +89,6 @@
       CHECK(immune_region_.AddContinuousSpace(space)) << "Failed to add space " << *space;
     }
   }
-  GetTimings()->EndSplit();
 }
 
 MarkSweep::MarkSweep(Heap* heap, bool is_concurrent, const std::string& name_prefix)
@@ -110,7 +109,7 @@
 }
 
 void MarkSweep::InitializePhase() {
-  TimingLogger::ScopedSplit split("InitializePhase", GetTimings());
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   mark_stack_ = heap_->GetMarkStack();
   DCHECK(mark_stack_ != nullptr);
   immune_region_.Reset();
@@ -170,7 +169,6 @@
 }
 
 void MarkSweep::ProcessReferences(Thread* self) {
-  TimingLogger::ScopedSplit split("ProcessReferences", GetTimings());
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
   GetHeap()->GetReferenceProcessor()->ProcessReferences(
       true, GetTimings(), GetCurrentIteration()->GetClearSoftReferences(),
@@ -178,7 +176,7 @@
 }
 
 void MarkSweep::PausePhase() {
-  TimingLogger::ScopedSplit split("(Paused)PausePhase", GetTimings());
+  TimingLogger::ScopedTiming t("(Paused)PausePhase", GetTimings());
   Thread* self = Thread::Current();
   Locks::mutator_lock_->AssertExclusiveHeld(self);
   if (IsConcurrent()) {
@@ -190,7 +188,7 @@
     RecursiveMarkDirtyObjects(true, accounting::CardTable::kCardDirty);
   }
   {
-    TimingLogger::ScopedSplit split("SwapStacks", GetTimings());
+    TimingLogger::ScopedTiming t2("SwapStacks", GetTimings());
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
     heap_->SwapStacks(self);
     live_stack_freeze_size_ = heap_->GetLiveStack()->Size();
@@ -198,9 +196,7 @@
     // stacks and don't want anybody to allocate into the live stack.
     RevokeAllThreadLocalAllocationStacks(self);
   }
-  GetTimings()->StartSplit("PreSweepingGcVerification");
   heap_->PreSweepingGcVerification(this);
-  GetTimings()->EndSplit();
   // Disallow new system weaks to prevent a race which occurs when someone adds a new system
   // weak before we sweep them. Since this new system weak may not be marked, the GC may
   // incorrectly sweep it. This also fixes a race where interning may attempt to return a strong
@@ -214,6 +210,7 @@
 void MarkSweep::PreCleanCards() {
   // Don't do this for non concurrent GCs since they don't have any dirty cards.
   if (kPreCleanCards && IsConcurrent()) {
+    TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
     Thread* self = Thread::Current();
     CHECK(!Locks::mutator_lock_->IsExclusiveHeld(self));
     // Process dirty cards and add dirty cards to mod union tables, also ages cards.
@@ -243,14 +240,14 @@
 
 void MarkSweep::RevokeAllThreadLocalAllocationStacks(Thread* self) {
   if (kUseThreadLocalAllocationStack) {
-    GetTimings()->NewSplit("RevokeAllThreadLocalAllocationStacks");
+    TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
     Locks::mutator_lock_->AssertExclusiveHeld(self);
     heap_->RevokeAllThreadLocalAllocationStacks(self);
   }
 }
 
 void MarkSweep::MarkingPhase() {
-  TimingLogger::ScopedSplit split("MarkingPhase", GetTimings());
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   Thread* self = Thread::Current();
   BindBitmaps();
   FindDefaultSpaceBitmap();
@@ -268,7 +265,7 @@
     if (immune_region_.ContainsSpace(space)) {
       const char* name = space->IsZygoteSpace() ? "UpdateAndMarkZygoteModUnionTable" :
           "UpdateAndMarkImageModUnionTable";
-      TimingLogger::ScopedSplit split(name, GetTimings());
+      TimingLogger::ScopedTiming t(name, GetTimings());
       accounting::ModUnionTable* mod_union_table = heap_->FindModUnionTableFromSpace(space);
       CHECK(mod_union_table != nullptr);
       mod_union_table->UpdateAndMarkReferences(MarkHeapReferenceCallback, this);
@@ -283,7 +280,7 @@
 }
 
 void MarkSweep::ReclaimPhase() {
-  TimingLogger::ScopedSplit split("ReclaimPhase", GetTimings());
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   Thread* self = Thread::Current();
   // Process the references concurrently.
   ProcessReferences(self);
@@ -291,25 +288,19 @@
   Runtime::Current()->AllowNewSystemWeaks();
   {
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-
     // Reclaim unmarked objects.
     Sweep(false);
-
     // Swap the live and mark bitmaps for each space which we modified space. This is an
     // optimization that enables us to not clear live bits inside of the sweep. Only swaps unbound
     // bitmaps.
-    GetTimings()->StartSplit("SwapBitmaps");
     SwapBitmaps();
-    GetTimings()->EndSplit();
-
     // Unbind the live and mark bitmaps.
-    TimingLogger::ScopedSplit split("UnBindBitmaps", GetTimings());
     GetHeap()->UnBindBitmaps();
   }
 }
 
 void MarkSweep::FindDefaultSpaceBitmap() {
-  TimingLogger::ScopedSplit split("FindDefaultMarkBitmap", GetTimings());
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
     accounting::ContinuousSpaceBitmap* bitmap = space->GetMarkBitmap();
     // We want to have the main space instead of non moving if possible.
@@ -506,11 +497,10 @@
 }
 
 void MarkSweep::MarkRoots(Thread* self) {
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
     // If we exclusively hold the mutator lock, all threads must be suspended.
-    GetTimings()->StartSplit("MarkRoots");
     Runtime::Current()->VisitRoots(MarkRootCallback, this);
-    GetTimings()->EndSplit();
     RevokeAllThreadLocalAllocationStacks(self);
   } else {
     MarkRootsCheckpoint(self, kRevokeRosAllocThreadLocalBuffersAtCheckpoint);
@@ -522,16 +512,14 @@
 }
 
 void MarkSweep::MarkNonThreadRoots() {
-  GetTimings()->StartSplit("MarkNonThreadRoots");
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   Runtime::Current()->VisitNonThreadRoots(MarkRootCallback, this);
-  GetTimings()->EndSplit();
 }
 
 void MarkSweep::MarkConcurrentRoots(VisitRootFlags flags) {
-  GetTimings()->StartSplit("MarkConcurrentRoots");
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   // Visit all runtime roots and clear dirty flags.
   Runtime::Current()->VisitConcurrentRoots(MarkRootCallback, this, flags);
-  GetTimings()->EndSplit();
 }
 
 class ScanObjectVisitor {
@@ -752,7 +740,8 @@
     Thread* self = Thread::Current();
     // Can't have a different split for each space since multiple spaces can have their cards being
     // scanned at the same time.
-    GetTimings()->StartSplit(paused ? "(Paused)ScanGrayObjects" : "ScanGrayObjects");
+    TimingLogger::ScopedTiming t(paused ? "(Paused)ScanGrayObjects" : __FUNCTION__,
+        GetTimings());
     // Try to take some of the mark stack since we can pass this off to the worker tasks.
     Object** mark_stack_begin = mark_stack_->Begin();
     Object** mark_stack_end = mark_stack_->End();
@@ -805,28 +794,28 @@
     thread_pool->StartWorkers(self);
     thread_pool->Wait(self, true, true);
     thread_pool->StopWorkers(self);
-    GetTimings()->EndSplit();
   } else {
     for (const auto& space : GetHeap()->GetContinuousSpaces()) {
       if (space->GetMarkBitmap() != nullptr) {
         // Image spaces are handled properly since live == marked for them.
+        const char* name = nullptr;
         switch (space->GetGcRetentionPolicy()) {
-          case space::kGcRetentionPolicyNeverCollect:
-            GetTimings()->StartSplit(paused ? "(Paused)ScanGrayImageSpaceObjects" :
-                "ScanGrayImageSpaceObjects");
-            break;
-          case space::kGcRetentionPolicyFullCollect:
-            GetTimings()->StartSplit(paused ? "(Paused)ScanGrayZygoteSpaceObjects" :
-                "ScanGrayZygoteSpaceObjects");
-            break;
-          case space::kGcRetentionPolicyAlwaysCollect:
-            GetTimings()->StartSplit(paused ? "(Paused)ScanGrayAllocSpaceObjects" :
-                "ScanGrayAllocSpaceObjects");
-            break;
-          }
+        case space::kGcRetentionPolicyNeverCollect:
+          name = paused ? "(Paused)ScanGrayImageSpaceObjects" : "ScanGrayImageSpaceObjects";
+          break;
+        case space::kGcRetentionPolicyFullCollect:
+          name = paused ? "(Paused)ScanGrayZygoteSpaceObjects" : "ScanGrayZygoteSpaceObjects";
+          break;
+        case space::kGcRetentionPolicyAlwaysCollect:
+          name = paused ? "(Paused)ScanGrayAllocSpaceObjects" : "ScanGrayAllocSpaceObjects";
+          break;
+        default:
+          LOG(FATAL) << "Unreachable";
+        }
+        TimingLogger::ScopedTiming t(name, GetTimings());
         ScanObjectVisitor visitor(this);
-        card_table->Scan(space->GetMarkBitmap(), space->Begin(), space->End(), visitor, minimum_age);
-        GetTimings()->EndSplit();
+        card_table->Scan(space->GetMarkBitmap(), space->Begin(), space->End(), visitor,
+                         minimum_age);
       }
     }
   }
@@ -836,9 +825,7 @@
  public:
   RecursiveMarkTask(ThreadPool* thread_pool, MarkSweep* mark_sweep,
                     accounting::ContinuousSpaceBitmap* bitmap, uintptr_t begin, uintptr_t end)
-      : MarkStackTask<false>(thread_pool, mark_sweep, 0, NULL),
-        bitmap_(bitmap),
-        begin_(begin),
+      : MarkStackTask<false>(thread_pool, mark_sweep, 0, NULL), bitmap_(bitmap), begin_(begin),
         end_(end) {
   }
 
@@ -863,7 +850,7 @@
 // Populates the mark stack based on the set of marked objects and
 // recursively marks until the mark stack is emptied.
 void MarkSweep::RecursiveMark() {
-  TimingLogger::ScopedSplit split("RecursiveMark", GetTimings());
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   // RecursiveMark will build the lists of known instances of the Reference classes. See
   // DelayReferenceReferent for details.
   if (kUseRecursiveMark) {
@@ -930,25 +917,22 @@
 }
 
 void MarkSweep::ReMarkRoots() {
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
-  GetTimings()->StartSplit("(Paused)ReMarkRoots");
   Runtime::Current()->VisitRoots(
       MarkRootCallback, this, static_cast<VisitRootFlags>(kVisitRootFlagNewRoots |
                                                           kVisitRootFlagStopLoggingNewRoots |
                                                           kVisitRootFlagClearRootLog));
-  GetTimings()->EndSplit();
   if (kVerifyRootsMarked) {
-    GetTimings()->StartSplit("(Paused)VerifyRoots");
+    TimingLogger::ScopedTiming t("(Paused)VerifyRoots", GetTimings());
     Runtime::Current()->VisitRoots(VerifyRootMarked, this);
-    GetTimings()->EndSplit();
   }
 }
 
 void MarkSweep::SweepSystemWeaks(Thread* self) {
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-  GetTimings()->StartSplit("SweepSystemWeaks");
   Runtime::Current()->SweepSystemWeaks(IsMarkedCallback, this);
-  GetTimings()->EndSplit();
 }
 
 mirror::Object* MarkSweep::VerifySystemWeakIsLiveCallback(Object* obj, void* arg) {
@@ -969,6 +953,7 @@
 }
 
 void MarkSweep::VerifySystemWeaks() {
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   // Verify system weaks, uses a special object visitor which returns the input object.
   Runtime::Current()->SweepSystemWeaks(VerifySystemWeakIsLiveCallback, this);
 }
@@ -1005,8 +990,8 @@
 
 void MarkSweep::MarkRootsCheckpoint(Thread* self,
                                     bool revoke_ros_alloc_thread_local_buffers_at_checkpoint) {
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   CheckpointMarkThreadRoots check_point(this, revoke_ros_alloc_thread_local_buffers_at_checkpoint);
-  GetTimings()->StartSplit("MarkRootsCheckpoint");
   ThreadList* thread_list = Runtime::Current()->GetThreadList();
   // Request the check point is run on all threads returning a count of the threads that must
   // run through the barrier including self.
@@ -1021,11 +1006,10 @@
   }
   Locks::mutator_lock_->SharedLock(self);
   Locks::heap_bitmap_lock_->ExclusiveLock(self);
-  GetTimings()->EndSplit();
 }
 
 void MarkSweep::SweepArray(accounting::ObjectStack* allocations, bool swap_bitmaps) {
-  GetTimings()->StartSplit("SweepArray");
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   Thread* self = Thread::Current();
   mirror::Object** chunk_free_buffer = reinterpret_cast<mirror::Object**>(
       sweep_array_free_buffer_mem_map_->BaseBegin());
@@ -1072,10 +1056,9 @@
         // if needed.
         if (!mark_bitmap->Test(obj)) {
           if (chunk_free_pos >= kSweepArrayChunkFreeSize) {
-            GetTimings()->StartSplit("FreeList");
+            TimingLogger::ScopedTiming t("FreeList", GetTimings());
             freed.objects += chunk_free_pos;
             freed.bytes += alloc_space->FreeList(self, chunk_free_pos, chunk_free_buffer);
-            GetTimings()->EndSplit();
             chunk_free_pos = 0;
           }
           chunk_free_buffer[chunk_free_pos++] = obj;
@@ -1085,10 +1068,9 @@
       }
     }
     if (chunk_free_pos > 0) {
-      GetTimings()->StartSplit("FreeList");
+      TimingLogger::ScopedTiming t("FreeList", GetTimings());
       freed.objects += chunk_free_pos;
       freed.bytes += alloc_space->FreeList(self, chunk_free_pos, chunk_free_buffer);
-      GetTimings()->EndSplit();
       chunk_free_pos = 0;
     }
     // All of the references which space contained are no longer in the allocation stack, update
@@ -1113,31 +1095,33 @@
       freed_los.bytes += large_object_space->Free(self, obj);
     }
   }
-  GetTimings()->NewSplit("RecordFree");
-  RecordFree(freed);
-  RecordFreeLOS(freed_los);
-  GetTimings()->NewSplit("ResetStack");
-  allocations->Reset();
-  GetTimings()->EndSplit();
+  {
+    TimingLogger::ScopedTiming t("RecordFree", GetTimings());
+    RecordFree(freed);
+    RecordFreeLOS(freed_los);
+    t.NewTiming("ResetStack");
+    allocations->Reset();
+  }
   sweep_array_free_buffer_mem_map_->MadviseDontNeedAndZero();
 }
 
 void MarkSweep::Sweep(bool swap_bitmaps) {
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   // Ensure that nobody inserted items in the live stack after we swapped the stacks.
   CHECK_GE(live_stack_freeze_size_, GetHeap()->GetLiveStack()->Size());
-  // Mark everything allocated since the last as GC live so that we can sweep concurrently,
-  // knowing that new allocations won't be marked as live.
-  GetTimings()->StartSplit("MarkStackAsLive");
-  accounting::ObjectStack* live_stack = heap_->GetLiveStack();
-  heap_->MarkAllocStackAsLive(live_stack);
-  live_stack->Reset();
-  GetTimings()->EndSplit();
-
-  DCHECK(mark_stack_->IsEmpty());
+  {
+    TimingLogger::ScopedTiming t2("MarkAllocStackAsLive", GetTimings());
+    // Mark everything allocated since the last as GC live so that we can sweep concurrently,
+    // knowing that new allocations won't be marked as live.
+    accounting::ObjectStack* live_stack = heap_->GetLiveStack();
+    heap_->MarkAllocStackAsLive(live_stack);
+    live_stack->Reset();
+    DCHECK(mark_stack_->IsEmpty());
+  }
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
     if (space->IsContinuousMemMapAllocSpace()) {
       space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace();
-      TimingLogger::ScopedSplit split(
+      TimingLogger::ScopedTiming split(
           alloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepMallocSpace", GetTimings());
       RecordFree(alloc_space->Sweep(swap_bitmaps));
     }
@@ -1146,7 +1130,7 @@
 }
 
 void MarkSweep::SweepLargeObjects(bool swap_bitmaps) {
-  TimingLogger::ScopedSplit split("SweepLargeObjects", GetTimings());
+  TimingLogger::ScopedTiming split(__FUNCTION__, GetTimings());
   RecordFreeLOS(heap_->GetLargeObjectsSpace()->Sweep(swap_bitmaps));
 }
 
@@ -1215,7 +1199,7 @@
 
 // Scan anything that's on the mark stack.
 void MarkSweep::ProcessMarkStack(bool paused) {
-  GetTimings()->StartSplit(paused ? "(Paused)ProcessMarkStack" : "ProcessMarkStack");
+  TimingLogger::ScopedTiming t(paused ? "(Paused)ProcessMarkStack" : __FUNCTION__, GetTimings());
   size_t thread_count = GetThreadCount(paused);
   if (kParallelProcessMarkStack && thread_count > 1 &&
       mark_stack_->Size() >= kMinimumParallelMarkStackSize) {
@@ -1248,7 +1232,6 @@
       ScanObject(obj);
     }
   }
-  GetTimings()->EndSplit();
 }
 
 inline bool MarkSweep::IsMarked(const Object* object) const {
@@ -1262,7 +1245,7 @@
 }
 
 void MarkSweep::FinishPhase() {
-  TimingLogger::ScopedSplit split("FinishPhase", GetTimings());
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   if (kCountScannedTypes) {
     VLOG(gc) << "MarkSweep scanned classes=" << class_count_.LoadRelaxed()
         << " arrays=" << array_count_.LoadRelaxed() << " other=" << other_count_.LoadRelaxed();
@@ -1299,9 +1282,8 @@
     // not be in use.
     GetHeap()->AssertAllBumpPointerSpaceThreadLocalBuffersAreRevoked();
   } else {
-    GetTimings()->StartSplit("(Paused)RevokeAllThreadLocalBuffers");
+    TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
     GetHeap()->RevokeAllThreadLocalBuffers();
-    GetTimings()->EndSplit();
   }
 }
 
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 8a3ac9d..cabfe21 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -59,7 +59,7 @@
 static constexpr size_t kLargeObjectBytesAllocatedThreshold = 16 * MB;
 
 void SemiSpace::BindBitmaps() {
-  GetTimings()->StartSplit("BindBitmaps");
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   WriterMutexLock mu(self_, *Locks::heap_bitmap_lock_);
   // Mark all of the spaces we never collect as immune.
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
@@ -83,7 +83,6 @@
     // We won't collect the large object space if a bump pointer space only collection.
     is_large_object_space_immune_ = true;
   }
-  GetTimings()->EndSplit();
 }
 
 SemiSpace::SemiSpace(Heap* heap, bool generational, const std::string& name_prefix)
@@ -131,7 +130,7 @@
 }
 
 void SemiSpace::InitializePhase() {
-  TimingLogger::ScopedSplit split("InitializePhase", GetTimings());
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   mark_stack_ = heap_->GetMarkStack();
   DCHECK(mark_stack_ != nullptr);
   immune_region_.Reset();
@@ -151,7 +150,6 @@
 }
 
 void SemiSpace::ProcessReferences(Thread* self) {
-  TimingLogger::ScopedSplit split("ProcessReferences", GetTimings());
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
   GetHeap()->GetReferenceProcessor()->ProcessReferences(
       false, GetTimings(), GetCurrentIteration()->GetClearSoftReferences(),
@@ -159,6 +157,7 @@
 }
 
 void SemiSpace::MarkingPhase() {
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   CHECK(Locks::mutator_lock_->IsExclusiveHeld(self_));
   if (kStoreStackTraces) {
     Locks::mutator_lock_->AssertExclusiveHeld(self_);
@@ -197,10 +196,7 @@
     // If generational, clear soft references if a whole heap collection.
     GetCurrentIteration()->SetClearSoftReferences(true);
   }
-
   Locks::mutator_lock_->AssertExclusiveHeld(self_);
-
-  TimingLogger::ScopedSplit split("MarkingPhase", GetTimings());
   if (generational_) {
     // If last_gc_to_space_end_ is out of the bounds of the from-space
     // (the to-space from last GC), then point it to the beginning of
@@ -218,12 +214,13 @@
   heap_->ProcessCards(GetTimings(), kUseRememberedSet && generational_);
   // Clear the whole card table since we can not Get any additional dirty cards during the
   // paused GC. This saves memory but only works for pause the world collectors.
-  GetTimings()->NewSplit("ClearCardTable");
+  t.NewTiming("ClearCardTable");
   heap_->GetCardTable()->ClearCardTable();
   // Need to do this before the checkpoint since we don't want any threads to add references to
   // the live stack during the recursive mark.
-  GetTimings()->NewSplit("SwapStacks");
+  t.NewTiming("SwapStacks");
   if (kUseThreadLocalAllocationStack) {
+    TimingLogger::ScopedTiming t("RevokeAllThreadLocalAllocationStacks", GetTimings());
     heap_->RevokeAllThreadLocalAllocationStacks(self_);
   }
   heap_->SwapStacks(self_);
@@ -240,7 +237,6 @@
     ReaderMutexLock mu(self_, *Locks::heap_bitmap_lock_);
     SweepSystemWeaks();
   }
-  GetTimings()->NewSplit("RecordFree");
   // Revoke buffers before measuring how many objects were moved since the TLABs need to be revoked
   // before they are properly counted.
   RevokeAllThreadLocalBuffers();
@@ -257,9 +253,7 @@
   from_space_->Clear();
   VLOG(heap) << "Protecting from_space_: " << *from_space_;
   from_space_->GetMemMap()->Protect(kProtectFromSpace ? PROT_NONE : PROT_READ);
-  GetTimings()->StartSplit("PreSweepingGcVerification");
   heap_->PreSweepingGcVerification(this);
-  GetTimings()->EndSplit();
   if (swap_semi_spaces_) {
     heap_->SwapSemiSpaces();
   }
@@ -272,7 +266,7 @@
       accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
       if (table != nullptr) {
         // TODO: Improve naming.
-        TimingLogger::ScopedSplit split(
+        TimingLogger::ScopedTiming t(
             space->IsZygoteSpace() ? "UpdateAndMarkZygoteModUnionTable" :
                                      "UpdateAndMarkImageModUnionTable",
                                      GetTimings());
@@ -354,12 +348,14 @@
 };
 
 void SemiSpace::MarkReachableObjects() {
-  GetTimings()->StartSplit("MarkStackAsLive");
-  accounting::ObjectStack* live_stack = heap_->GetLiveStack();
-  heap_->MarkAllocStackAsLive(live_stack);
-  live_stack->Reset();
-
-  GetTimings()->NewSplit("UpdateAndMarkRememberedSets");
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
+  {
+    TimingLogger::ScopedTiming t2("MarkStackAsLive", GetTimings());
+    accounting::ObjectStack* live_stack = heap_->GetLiveStack();
+    heap_->MarkAllocStackAsLive(live_stack);
+    live_stack->Reset();
+  }
+  t.NewTiming("UpdateAndMarkRememberedSets");
   for (auto& space : heap_->GetContinuousSpaces()) {
     // If the space is immune and has no mod union table (the
     // non-moving space when the bump pointer space only collection is
@@ -398,7 +394,7 @@
   }
 
   if (is_large_object_space_immune_) {
-    GetTimings()->NewSplit("VisitLargeObjects");
+    TimingLogger::ScopedTiming t("VisitLargeObjects", GetTimings());
     DCHECK(generational_ && !whole_heap_collection_);
     // Delay copying the live set to the marked set until here from
     // BindBitmaps() as the large objects on the allocation stack may
@@ -416,31 +412,24 @@
                                         reinterpret_cast<uintptr_t>(large_object_space->End()),
                                         visitor);
   }
-  GetTimings()->EndSplit();
   // Recursively process the mark stack.
   ProcessMarkStack();
 }
 
 void SemiSpace::ReclaimPhase() {
-  TimingLogger::ScopedSplit split("ReclaimPhase", GetTimings());
-  {
-    WriterMutexLock mu(self_, *Locks::heap_bitmap_lock_);
-    // Reclaim unmarked objects.
-    Sweep(false);
-    // Swap the live and mark bitmaps for each space which we modified space. This is an
-    // optimization that enables us to not clear live bits inside of the sweep. Only swaps unbound
-    // bitmaps.
-    GetTimings()->StartSplit("SwapBitmaps");
-    SwapBitmaps();
-    GetTimings()->EndSplit();
-    // Unbind the live and mark bitmaps.
-    TimingLogger::ScopedSplit split("UnBindBitmaps", GetTimings());
-    GetHeap()->UnBindBitmaps();
-  }
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
+  WriterMutexLock mu(self_, *Locks::heap_bitmap_lock_);
+  // Reclaim unmarked objects.
+  Sweep(false);
+  // Swap the live and mark bitmaps for each space which we modified space. This is an
+  // optimization that enables us to not clear live bits inside of the sweep. Only swaps unbound
+  // bitmaps.
+  SwapBitmaps();
+  // Unbind the live and mark bitmaps.
+  GetHeap()->UnBindBitmaps();
   if (saved_bytes_ > 0) {
     VLOG(heap) << "Avoided dirtying " << PrettySize(saved_bytes_);
   }
-
   if (generational_) {
     // Record the end (top) of the to space so we can distinguish
     // between objects that were allocated since the last GC and the
@@ -629,8 +618,7 @@
 
 // Marks all objects in the root set.
 void SemiSpace::MarkRoots() {
-  GetTimings()->NewSplit("MarkRoots");
-  // TODO: Visit up image roots as well?
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   Runtime::Current()->VisitRoots(MarkRootCallback, this);
 }
 
@@ -655,9 +643,8 @@
 }
 
 void SemiSpace::SweepSystemWeaks() {
-  GetTimings()->StartSplit("SweepSystemWeaks");
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   Runtime::Current()->SweepSystemWeaks(MarkedForwardingAddressCallback, this);
-  GetTimings()->EndSplit();
 }
 
 bool SemiSpace::ShouldSweepSpace(space::ContinuousSpace* space) const {
@@ -665,15 +652,15 @@
 }
 
 void SemiSpace::Sweep(bool swap_bitmaps) {
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   DCHECK(mark_stack_->IsEmpty());
-  TimingLogger::ScopedSplit split("Sweep", GetTimings());
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
     if (space->IsContinuousMemMapAllocSpace()) {
       space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace();
       if (!ShouldSweepSpace(alloc_space)) {
         continue;
       }
-      TimingLogger::ScopedSplit split(
+      TimingLogger::ScopedTiming split(
           alloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepAllocSpace", GetTimings());
       RecordFree(alloc_space->Sweep(swap_bitmaps));
     }
@@ -685,7 +672,7 @@
 
 void SemiSpace::SweepLargeObjects(bool swap_bitmaps) {
   DCHECK(!is_large_object_space_immune_);
-  TimingLogger::ScopedSplit split("SweepLargeObjects", GetTimings());
+  TimingLogger::ScopedTiming split("SweepLargeObjects", GetTimings());
   RecordFreeLOS(heap_->GetLargeObjectsSpace()->Sweep(swap_bitmaps));
 }
 
@@ -726,6 +713,7 @@
 
 // Scan anything that's on the mark stack.
 void SemiSpace::ProcessMarkStack() {
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   space::MallocSpace* promo_dest_space = nullptr;
   accounting::ContinuousSpaceBitmap* live_bitmap = nullptr;
   if (generational_ && !whole_heap_collection_) {
@@ -739,7 +727,6 @@
     DCHECK(mark_bitmap != nullptr);
     DCHECK_EQ(live_bitmap, mark_bitmap);
   }
-  GetTimings()->StartSplit("ProcessMarkStack");
   while (!mark_stack_->IsEmpty()) {
     Object* obj = mark_stack_->PopBack();
     if (generational_ && !whole_heap_collection_ && promo_dest_space->HasAddress(obj)) {
@@ -750,7 +737,6 @@
     }
     ScanObject(obj);
   }
-  GetTimings()->EndSplit();
 }
 
 inline Object* SemiSpace::GetMarkedForwardAddress(mirror::Object* obj) const
@@ -781,7 +767,7 @@
 }
 
 void SemiSpace::FinishPhase() {
-  TimingLogger::ScopedSplit split("FinishPhase", GetTimings());
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   // Null the "to" and "from" spaces since compacting from one to the other isn't valid until
   // further action is done by the heap.
   to_space_ = nullptr;
@@ -822,9 +808,8 @@
 }
 
 void SemiSpace::RevokeAllThreadLocalBuffers() {
-  GetTimings()->StartSplit("(Paused)RevokeAllThreadLocalBuffers");
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   GetHeap()->RevokeAllThreadLocalBuffers();
-  GetTimings()->EndSplit();
 }
 
 }  // namespace collector
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 6c63e5f..03ae8c3 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -1580,6 +1580,7 @@
 };
 
 void Heap::UnBindBitmaps() {
+  TimingLogger::ScopedTiming t("UnBindBitmaps", GetCurrentGcIteration()->GetTimings());
   for (const auto& space : GetContinuousSpaces()) {
     if (space->IsContinuousMemMapAllocSpace()) {
       space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace();
@@ -1848,7 +1849,7 @@
   const size_t duration = GetCurrentGcIteration()->GetDurationNs();
   const std::vector<uint64_t>& pause_times = GetCurrentGcIteration()->GetPauseTimes();
   // Print the GC if it is an explicit GC (e.g. Runtime.gc()) or a slow GC
-  // (mutator time blocked >=  long_pause_log_threshold_).
+  // (mutator time blocked >= long_pause_log_threshold_).
   bool log_gc = gc_cause == kGcCauseExplicit;
   if (!log_gc && CareAboutPauseTimes()) {
     // GC for alloc pauses the allocating thread, so consider it as a pause.
@@ -2314,6 +2315,7 @@
 }
 
 void Heap::ProcessCards(TimingLogger* timings, bool use_rem_sets) {
+  TimingLogger::ScopedTiming t(__FUNCTION__, timings);
   // Clear cards and keep track of cards cleared in the mod-union table.
   for (const auto& space : continuous_spaces_) {
     accounting::ModUnionTable* table = FindModUnionTableFromSpace(space);
@@ -2321,15 +2323,15 @@
     if (table != nullptr) {
       const char* name = space->IsZygoteSpace() ? "ZygoteModUnionClearCards" :
           "ImageModUnionClearCards";
-      TimingLogger::ScopedSplit split(name, timings);
+      TimingLogger::ScopedTiming t(name, timings);
       table->ClearCards();
     } else if (use_rem_sets && rem_set != nullptr) {
       DCHECK(collector::SemiSpace::kUseRememberedSet && collector_type_ == kCollectorTypeGSS)
           << static_cast<int>(collector_type_);
-      TimingLogger::ScopedSplit split("AllocSpaceRemSetClearCards", timings);
+      TimingLogger::ScopedTiming t("AllocSpaceRemSetClearCards", timings);
       rem_set->ClearCards();
     } else if (space->GetType() != space::kSpaceTypeBumpPointerSpace) {
-      TimingLogger::ScopedSplit split("AllocSpaceClearCards", timings);
+      TimingLogger::ScopedTiming t("AllocSpaceClearCards", timings);
       // No mod union table for the AllocSpace. Age the cards so that the GC knows that these cards
       // were dirty before the GC started.
       // TODO: Need to use atomic for the case where aged(cleaning thread) -> dirty(other thread)
@@ -2349,8 +2351,9 @@
 void Heap::PreGcVerificationPaused(collector::GarbageCollector* gc) {
   Thread* const self = Thread::Current();
   TimingLogger* const timings = current_gc_iteration_.GetTimings();
+  TimingLogger::ScopedTiming t(__FUNCTION__, timings);
   if (verify_pre_gc_heap_) {
-    TimingLogger::ScopedSplit split("PreGcVerifyHeapReferences", timings);
+    TimingLogger::ScopedTiming t("(Paused)PreGcVerifyHeapReferences", timings);
     ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
     size_t failures = VerifyHeapReferences();
     if (failures > 0) {
@@ -2360,7 +2363,7 @@
   }
   // Check that all objects which reference things in the live stack are on dirty cards.
   if (verify_missing_card_marks_) {
-    TimingLogger::ScopedSplit split("PreGcVerifyMissingCardMarks", timings);
+    TimingLogger::ScopedTiming t("(Paused)PreGcVerifyMissingCardMarks", timings);
     ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
     SwapStacks(self);
     // Sort the live stack so that we can quickly binary search it later.
@@ -2370,7 +2373,7 @@
     SwapStacks(self);
   }
   if (verify_mod_union_table_) {
-    TimingLogger::ScopedSplit split("PreGcVerifyModUnionTables", timings);
+    TimingLogger::ScopedTiming t("(Paused)PreGcVerifyModUnionTables", timings);
     ReaderMutexLock reader_lock(self, *Locks::heap_bitmap_lock_);
     for (const auto& table_pair : mod_union_tables_) {
       accounting::ModUnionTable* mod_union_table = table_pair.second;
@@ -2397,10 +2400,11 @@
 void Heap::PreSweepingGcVerification(collector::GarbageCollector* gc) {
   Thread* const self = Thread::Current();
   TimingLogger* const timings = current_gc_iteration_.GetTimings();
+  TimingLogger::ScopedTiming t(__FUNCTION__, timings);
   // Called before sweeping occurs since we want to make sure we are not going so reclaim any
   // reachable objects.
   if (verify_pre_sweeping_heap_) {
-    TimingLogger::ScopedSplit split("PostSweepingVerifyHeapReferences", timings);
+    TimingLogger::ScopedTiming t("(Paused)PostSweepingVerifyHeapReferences", timings);
     CHECK_NE(self->GetState(), kRunnable);
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
     // Swapping bound bitmaps does nothing.
@@ -2423,16 +2427,17 @@
   // Only pause if we have to do some verification.
   Thread* const self = Thread::Current();
   TimingLogger* const timings = GetCurrentGcIteration()->GetTimings();
+  TimingLogger::ScopedTiming t(__FUNCTION__, timings);
   if (verify_system_weaks_) {
     ReaderMutexLock mu2(self, *Locks::heap_bitmap_lock_);
     collector::MarkSweep* mark_sweep = down_cast<collector::MarkSweep*>(gc);
     mark_sweep->VerifySystemWeaks();
   }
   if (verify_post_gc_rosalloc_) {
-    RosAllocVerification(timings, "PostGcRosAllocVerification");
+    RosAllocVerification(timings, "(Paused)PostGcRosAllocVerification");
   }
   if (verify_post_gc_heap_) {
-    TimingLogger::ScopedSplit split("PostGcVerifyHeapReferences", timings);
+    TimingLogger::ScopedTiming t("(Paused)PostGcVerifyHeapReferences", timings);
     ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
     size_t failures = VerifyHeapReferences();
     if (failures > 0) {
@@ -2450,7 +2455,7 @@
 }
 
 void Heap::RosAllocVerification(TimingLogger* timings, const char* name) {
-  TimingLogger::ScopedSplit split(name, timings);
+  TimingLogger::ScopedTiming t(name, timings);
   for (const auto& space : continuous_spaces_) {
     if (space->IsRosAllocSpace()) {
       VLOG(heap) << name << " : " << space->GetName();
diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc
index 292781e..e52bc1f 100644
--- a/runtime/gc/reference_processor.cc
+++ b/runtime/gc/reference_processor.cc
@@ -110,6 +110,7 @@
                                            MarkObjectCallback* mark_object_callback,
                                            ProcessMarkStackCallback* process_mark_stack_callback,
                                            void* arg) {
+  TimingLogger::ScopedTiming t(concurrent ? __FUNCTION__ : "(Paused)ProcessReferences", timings);
   Thread* self = Thread::Current();
   {
     MutexLock mu(self, lock_);
@@ -118,10 +119,9 @@
     process_references_args_.arg_ = arg;
     CHECK_EQ(slow_path_enabled_, concurrent) << "Slow path must be enabled iff concurrent";
   }
-  timings->StartSplit(concurrent ? "ProcessReferences" : "(Paused)ProcessReferences");
   // Unless required to clear soft references with white references, preserve some white referents.
   if (!clear_soft_references) {
-    TimingLogger::ScopedSplit split(concurrent ? "ForwardSoftReferences" :
+    TimingLogger::ScopedTiming split(concurrent ? "ForwardSoftReferences" :
         "(Paused)ForwardSoftReferences", timings);
     if (concurrent) {
       StartPreservingReferences(self);
@@ -138,7 +138,7 @@
   soft_reference_queue_.ClearWhiteReferences(&cleared_references_, is_marked_callback, arg);
   weak_reference_queue_.ClearWhiteReferences(&cleared_references_, is_marked_callback, arg);
   {
-    TimingLogger::ScopedSplit split(concurrent ? "EnqueueFinalizerReferences" :
+    TimingLogger::ScopedTiming t(concurrent ? "EnqueueFinalizerReferences" :
         "(Paused)EnqueueFinalizerReferences", timings);
     if (concurrent) {
       StartPreservingReferences(self);
@@ -173,7 +173,6 @@
       DisableSlowPath(self);
     }
   }
-  timings->EndSplit();
 }
 
 // Process the "referent" field in a java.lang.ref.Reference.  If the referent has not yet been
diff --git a/runtime/utils.cc b/runtime/utils.cc
index f60f795..e5b8b22 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -468,11 +468,12 @@
                       negative_str, byte_count / kBytesPerUnit[i], kUnitStrings[i]);
 }
 
-std::string PrettyDuration(uint64_t nano_duration) {
+std::string PrettyDuration(uint64_t nano_duration, size_t max_fraction_digits) {
   if (nano_duration == 0) {
     return "0";
   } else {
-    return FormatDuration(nano_duration, GetAppropriateTimeUnit(nano_duration));
+    return FormatDuration(nano_duration, GetAppropriateTimeUnit(nano_duration),
+                          max_fraction_digits);
   }
 }
 
@@ -509,45 +510,41 @@
   return 0;
 }
 
-std::string FormatDuration(uint64_t nano_duration, TimeUnit time_unit) {
-  const char* unit = NULL;
+std::string FormatDuration(uint64_t nano_duration, TimeUnit time_unit,
+                           size_t max_fraction_digits) {
+  const char* unit = nullptr;
   uint64_t divisor = GetNsToTimeUnitDivisor(time_unit);
-  uint32_t zero_fill = 1;
   switch (time_unit) {
     case kTimeUnitSecond:
       unit = "s";
-      zero_fill = 9;
       break;
     case kTimeUnitMillisecond:
       unit = "ms";
-      zero_fill = 6;
       break;
     case kTimeUnitMicrosecond:
       unit = "us";
-      zero_fill = 3;
       break;
     case kTimeUnitNanosecond:
       unit = "ns";
-      zero_fill = 0;
       break;
   }
-
-  uint64_t whole_part = nano_duration / divisor;
+  const uint64_t whole_part = nano_duration / divisor;
   uint64_t fractional_part = nano_duration % divisor;
   if (fractional_part == 0) {
     return StringPrintf("%" PRIu64 "%s", whole_part, unit);
   } else {
-    while ((fractional_part % 1000) == 0) {
-      zero_fill -= 3;
-      fractional_part /= 1000;
+    static constexpr size_t kMaxDigits = 30;
+    char fraction_buffer[kMaxDigits];
+    char* ptr = fraction_buffer;
+    uint64_t multiplier = 10;
+    // This infinite loops if fractional part is 0.
+    while (fractional_part * multiplier < divisor) {
+      multiplier *= 10;
+      *ptr++ = '0';
     }
-    if (zero_fill == 3) {
-      return StringPrintf("%" PRIu64 ".%03" PRIu64 "%s", whole_part, fractional_part, unit);
-    } else if (zero_fill == 6) {
-      return StringPrintf("%" PRIu64 ".%06" PRIu64 "%s", whole_part, fractional_part, unit);
-    } else {
-      return StringPrintf("%" PRIu64 ".%09" PRIu64 "%s", whole_part, fractional_part, unit);
-    }
+    sprintf(ptr, "%" PRIu64, fractional_part);
+    fraction_buffer[std::min(kMaxDigits - 1, max_fraction_digits)] = '\0';
+    return StringPrintf("%" PRIu64 ".%s%s", whole_part, fraction_buffer, unit);
   }
 }
 
diff --git a/runtime/utils.h b/runtime/utils.h
index 6d52459..a61d30f 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -265,10 +265,11 @@
 // Returns a human-readable time string which prints every nanosecond while trying to limit the
 // number of trailing zeros. Prints using the largest human readable unit up to a second.
 // e.g. "1ms", "1.000000001s", "1.001us"
-std::string PrettyDuration(uint64_t nano_duration);
+std::string PrettyDuration(uint64_t nano_duration, size_t max_fraction_digits = 3);
 
 // Format a nanosecond time to specified units.
-std::string FormatDuration(uint64_t nano_duration, TimeUnit time_unit);
+std::string FormatDuration(uint64_t nano_duration, TimeUnit time_unit,
+                           size_t max_fraction_digits);
 
 // Get the appropriate unit for a nanosecond duration.
 TimeUnit GetAppropriateTimeUnit(uint64_t nano_duration);
diff --git a/runtime/utils_test.cc b/runtime/utils_test.cc
index 4a1e477..7cd5980 100644
--- a/runtime/utils_test.cc
+++ b/runtime/utils_test.cc
@@ -171,14 +171,15 @@
   EXPECT_EQ("10s", PrettyDuration(10 * one_sec));
   EXPECT_EQ("100s", PrettyDuration(100 * one_sec));
   EXPECT_EQ("1.001s", PrettyDuration(1 * one_sec + one_ms));
-  EXPECT_EQ("1.000001s", PrettyDuration(1 * one_sec + one_us));
-  EXPECT_EQ("1.000000001s", PrettyDuration(1 * one_sec + 1));
+  EXPECT_EQ("1.000001s", PrettyDuration(1 * one_sec + one_us, 6));
+  EXPECT_EQ("1.000000001s", PrettyDuration(1 * one_sec + 1, 9));
+  EXPECT_EQ("1.000s", PrettyDuration(1 * one_sec + one_us, 3));
 
   EXPECT_EQ("1ms", PrettyDuration(1 * one_ms));
   EXPECT_EQ("10ms", PrettyDuration(10 * one_ms));
   EXPECT_EQ("100ms", PrettyDuration(100 * one_ms));
   EXPECT_EQ("1.001ms", PrettyDuration(1 * one_ms + one_us));
-  EXPECT_EQ("1.000001ms", PrettyDuration(1 * one_ms + 1));
+  EXPECT_EQ("1.000001ms", PrettyDuration(1 * one_ms + 1, 6));
 
   EXPECT_EQ("1us", PrettyDuration(1 * one_us));
   EXPECT_EQ("10us", PrettyDuration(10 * one_us));