Merge "ART: Streaming trace mode"
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index f8c0e83..51600f7 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -50,11 +50,6 @@
 
 const bool kVerboseInstrumentation = false;
 
-// Do we want to deoptimize for method entry and exit listeners or just try to intercept
-// invocations? Deoptimization forces all code to run in the interpreter and considerably hurts the
-// application's performance.
-static constexpr bool kDeoptimizeForAccurateMethodEntryExitListeners = true;
-
 static bool InstallStubsClassVisitor(mirror::Class* klass, void* arg)
     EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) {
   Instrumentation* instrumentation = reinterpret_cast<Instrumentation*>(arg);
@@ -846,8 +841,7 @@
   ConfigureStubs(false, false);
 }
 
-void Instrumentation::EnableMethodTracing() {
-  bool require_interpreter = kDeoptimizeForAccurateMethodEntryExitListeners;
+void Instrumentation::EnableMethodTracing(bool require_interpreter) {
   ConfigureStubs(!require_interpreter, require_interpreter);
 }
 
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 41821a6..8b7fcca 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -49,6 +49,11 @@
   kNumHandlerTables
 };
 
+// Do we want to deoptimize for method entry and exit listeners or just try to intercept
+// invocations? Deoptimization forces all code to run in the interpreter and considerably hurts the
+// application's performance.
+static constexpr bool kDeoptimizeForAccurateMethodEntryExitListeners = true;
+
 // Instrumentation event listener API. Registered listeners will get the appropriate call back for
 // the events they are listening for. The call backs supply the thread, method and dex_pc the event
 // occurred upon. The thread may or may not be Thread::Current().
@@ -170,7 +175,8 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Enable method tracing by installing instrumentation entry/exit stubs.
-  void EnableMethodTracing()
+  void EnableMethodTracing(
+      bool require_interpreter = kDeoptimizeForAccurateMethodEntryExitListeners)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(Locks::thread_list_lock_, Locks::classlinker_classes_lock_);
 
diff --git a/runtime/native/dalvik_system_ZygoteHooks.cc b/runtime/native/dalvik_system_ZygoteHooks.cc
index 022c56f..af01a02 100644
--- a/runtime/native/dalvik_system_ZygoteHooks.cc
+++ b/runtime/native/dalvik_system_ZygoteHooks.cc
@@ -18,14 +18,18 @@
 
 #include <stdlib.h>
 
+#include <cutils/process_name.h>
+
 #include "arch/instruction_set.h"
 #include "debugger.h"
 #include "java_vm_ext.h"
 #include "jit/jit.h"
 #include "jni_internal.h"
 #include "JNIHelp.h"
+#include "scoped_thread_state_change.h"
 #include "ScopedUtfChars.h"
 #include "thread-inl.h"
+#include "trace.h"
 
 #if defined(__linux__)
 #include <sys/prctl.h>
@@ -121,6 +125,11 @@
 
   runtime->PreZygoteFork();
 
+  if (Trace::GetMethodTracingMode() != TracingMode::kTracingInactive) {
+    // Tracing active, pause it.
+    Trace::Pause();
+  }
+
   // Grab thread before fork potentially makes Thread::pthread_key_self_ unusable.
   return reinterpret_cast<jlong>(ThreadForEnv(env));
 }
@@ -132,6 +141,49 @@
   thread->InitAfterFork();
   EnableDebugFeatures(debug_flags);
 
+  // Update tracing.
+  if (Trace::GetMethodTracingMode() != TracingMode::kTracingInactive) {
+    Trace::TraceOutputMode output_mode = Trace::GetOutputMode();
+    Trace::TraceMode trace_mode = Trace::GetMode();
+
+    // Just drop it.
+    Trace::Abort();
+
+    // Only restart if it was streaming mode.
+    // TODO: Expose buffer size, so we can also do file mode.
+    if (output_mode == Trace::TraceOutputMode::kStreaming) {
+      const char* proc_name_cutils = get_process_name();
+      std::string proc_name;
+      if (proc_name_cutils != nullptr) {
+        proc_name = proc_name_cutils;
+      }
+      if (proc_name_cutils == nullptr || proc_name == "zygote" || proc_name == "zygote64") {
+        // Either no process name, or the name hasn't been changed, yet. Just use pid.
+        pid_t pid = getpid();
+        proc_name = StringPrintf("%u", static_cast<uint32_t>(pid));
+      }
+
+      std::string profiles_dir(GetDalvikCache("profiles", false /* create_if_absent */));
+      if (!profiles_dir.empty()) {
+        std::string trace_file = StringPrintf("%s/%s.trace.bin", profiles_dir.c_str(),
+                                              proc_name.c_str());
+        Trace::Start(trace_file.c_str(),
+                     -1,
+                     -1,  // TODO: Expose buffer size.
+                     0,   // TODO: Expose flags.
+                     output_mode,
+                     trace_mode,
+                     0);  // TODO: Expose interval.
+        if (thread->IsExceptionPending()) {
+          ScopedObjectAccess soa(env);
+          thread->ClearException();
+        }
+      } else {
+        LOG(ERROR) << "Profiles dir is empty?!?!";
+      }
+    }
+  }
+
   if (instruction_set != nullptr) {
     ScopedUtfChars isa_string(env, instruction_set);
     InstructionSet isa = GetInstructionSetFromString(isa_string.c_str());
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index c23f744..0758b27 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -216,6 +216,8 @@
       .Define("-Xmethod-trace-file-size:_")
           .WithType<unsigned int>()
           .IntoKey(M::MethodTraceFileSize)
+      .Define("-Xmethod-trace-stream")
+          .IntoKey(M::MethodTraceStreaming)
       .Define("-Xprofile:_")
           .WithType<TraceClockSource>()
           .WithValueMap({{"threadcpuclock", TraceClockSource::kThreadCpu},
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 543b9dc..7bebb96 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -552,6 +552,17 @@
     StartProfiler(profile_output_filename_.c_str());
   }
 
+  if (trace_config_.get() != nullptr && trace_config_->trace_file != "") {
+    ScopedThreadStateChange tsc(self, kWaitingForMethodTracingStart);
+    Trace::Start(trace_config_->trace_file.c_str(),
+                 -1,
+                 static_cast<int>(trace_config_->trace_file_size),
+                 0,
+                 trace_config_->trace_output_mode,
+                 trace_config_->trace_mode,
+                 0);
+  }
+
   return true;
 }
 
@@ -1000,7 +1011,9 @@
     trace_config_->trace_file = runtime_options.ReleaseOrDefault(Opt::MethodTraceFile);
     trace_config_->trace_file_size = runtime_options.ReleaseOrDefault(Opt::MethodTraceFileSize);
     trace_config_->trace_mode = Trace::TraceMode::kMethodTracing;
-    trace_config_->trace_output_mode = Trace::TraceOutputMode::kFile;
+    trace_config_->trace_output_mode = runtime_options.Exists(Opt::MethodTraceStreaming) ?
+        Trace::TraceOutputMode::kStreaming :
+        Trace::TraceOutputMode::kFile;
   }
 
   {
@@ -1026,17 +1039,6 @@
   // TODO: move this to just be an Trace::Start argument
   Trace::SetDefaultClockSource(runtime_options.GetOrDefault(Opt::ProfileClock));
 
-  if (trace_config_.get() != nullptr) {
-    ScopedThreadStateChange tsc(self, kWaitingForMethodTracingStart);
-    Trace::Start(trace_config_->trace_file.c_str(),
-                 -1,
-                 static_cast<int>(trace_config_->trace_file_size),
-                 0,
-                 trace_config_->trace_output_mode,
-                 trace_config_->trace_mode,
-                 0);
-  }
-
   // Pre-allocate an OutOfMemoryError for the double-OOME case.
   self->ThrowNewException("Ljava/lang/OutOfMemoryError;",
                           "OutOfMemoryError thrown while trying to throw OutOfMemoryError; "
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 339f925..eff787a 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -95,6 +95,7 @@
 RUNTIME_OPTIONS_KEY (Unit,                MethodTrace)
 RUNTIME_OPTIONS_KEY (std::string,         MethodTraceFile,                "/data/method-trace-file.bin")
 RUNTIME_OPTIONS_KEY (unsigned int,        MethodTraceFileSize,            10 * MB)
+RUNTIME_OPTIONS_KEY (Unit,                MethodTraceStreaming)
 RUNTIME_OPTIONS_KEY (TraceClockSource,    ProfileClock,                   kDefaultTraceClockSource)  // -Xprofile:
 RUNTIME_OPTIONS_KEY (TestProfilerOptions, ProfilerOpts)  // -Xenable-profiler, -Xprofile-*
 RUNTIME_OPTIONS_KEY (std::string,         Compiler)
diff --git a/runtime/trace.cc b/runtime/trace.cc
index 7326865..5322f9f 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -31,7 +31,7 @@
 #include "instrumentation.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
-#include "mirror/dex_cache.h"
+#include "mirror/dex_cache-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
 #include "os.h"
@@ -85,9 +85,12 @@
     kTraceMethodActionMask = 0x03,  // two bits
 };
 
+static constexpr uint8_t kOpNewMethod = 1U;
+static constexpr uint8_t kOpNewThread = 2U;
+
 class BuildStackTraceVisitor : public StackVisitor {
  public:
-  explicit BuildStackTraceVisitor(Thread* thread) : StackVisitor(thread, NULL),
+  explicit BuildStackTraceVisitor(Thread* thread) : StackVisitor(thread, nullptr),
       method_trace_(Trace::AllocStackTrace()) {}
 
   bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -118,7 +121,7 @@
 
 TraceClockSource Trace::default_clock_source_ = kDefaultTraceClockSource;
 
-Trace* volatile Trace::the_trace_ = NULL;
+Trace* volatile Trace::the_trace_ = nullptr;
 pthread_t Trace::sampling_pthread_ = 0U;
 std::unique_ptr<std::vector<mirror::ArtMethod*>> Trace::temp_stack_trace_;
 
@@ -138,7 +141,7 @@
 }
 
 std::vector<mirror::ArtMethod*>* Trace::AllocStackTrace() {
-  if (temp_stack_trace_.get() != NULL) {
+  if (temp_stack_trace_.get() != nullptr) {
     return temp_stack_trace_.release();
   } else {
     return new std::vector<mirror::ArtMethod*>();
@@ -246,7 +249,7 @@
 static void ClearThreadStackTraceAndClockBase(Thread* thread, void* arg ATTRIBUTE_UNUSED) {
   thread->SetTraceClockBase(0);
   std::vector<mirror::ArtMethod*>* stack_trace = thread->GetStackTraceSample();
-  thread->SetStackTraceSample(NULL);
+  thread->SetStackTraceSample(nullptr);
   delete stack_trace;
 }
 
@@ -260,7 +263,7 @@
   uint32_t thread_clock_diff = 0;
   uint32_t wall_clock_diff = 0;
   ReadClocks(thread, &thread_clock_diff, &wall_clock_diff);
-  if (old_stack_trace == NULL) {
+  if (old_stack_trace == nullptr) {
     // If there's no previous stack trace sample for this thread, log an entry event for all
     // methods in the trace.
     for (std::vector<mirror::ArtMethod*>::reverse_iterator rit = stack_trace->rbegin();
@@ -308,7 +311,7 @@
     {
       MutexLock mu(self, *Locks::trace_lock_);
       the_trace = the_trace_;
-      if (the_trace == NULL) {
+      if (the_trace == nullptr) {
         break;
       }
     }
@@ -323,7 +326,7 @@
   }
 
   runtime->DetachCurrentThread();
-  return NULL;
+  return nullptr;
 }
 
 void Trace::Start(const char* trace_filename, int trace_fd, int buffer_size, int flags,
@@ -331,7 +334,7 @@
   Thread* self = Thread::Current();
   {
     MutexLock mu(self, *Locks::trace_lock_);
-    if (the_trace_ != NULL) {
+    if (the_trace_ != nullptr) {
       LOG(ERROR) << "Trace already in progress, ignoring this request";
       return;
     }
@@ -354,7 +357,7 @@
       trace_file.reset(new File(trace_fd, "tracefile"));
       trace_file->DisableAutoClose();
     }
-    if (trace_file.get() == NULL) {
+    if (trace_file.get() == nullptr) {
       PLOG(ERROR) << "Unable to open trace file '" << trace_filename << "'";
       ScopedObjectAccess soa(self);
       ThrowRuntimeException("Unable to open trace file '%s'", trace_filename);
@@ -372,20 +375,23 @@
   // Create Trace object.
   {
     MutexLock mu(self, *Locks::trace_lock_);
-    if (the_trace_ != NULL) {
+    if (the_trace_ != nullptr) {
       LOG(ERROR) << "Trace already in progress, ignoring this request";
     } else {
       enable_stats = (flags && kTraceCountAllocs) != 0;
-      the_trace_ = new Trace(trace_file.release(), buffer_size, flags, trace_mode);
+      the_trace_ = new Trace(trace_file.release(), trace_filename, buffer_size, flags, output_mode,
+                             trace_mode);
       if (trace_mode == TraceMode::kSampling) {
-        CHECK_PTHREAD_CALL(pthread_create, (&sampling_pthread_, NULL, &RunSamplingThread,
+        CHECK_PTHREAD_CALL(pthread_create, (&sampling_pthread_, nullptr, &RunSamplingThread,
                                             reinterpret_cast<void*>(interval_us)),
                                             "Sampling profiler thread");
+        the_trace_->interval_us_ = interval_us;
       } else {
         runtime->GetInstrumentation()->AddListener(the_trace_,
                                                    instrumentation::Instrumentation::kMethodEntered |
                                                    instrumentation::Instrumentation::kMethodExited |
                                                    instrumentation::Instrumentation::kMethodUnwind);
+        // TODO: In full-PIC mode, we don't need to fully deopt.
         runtime->GetInstrumentation()->EnableMethodTracing();
       }
     }
@@ -399,18 +405,18 @@
   }
 }
 
-void Trace::Stop() {
+void Trace::StopTracing(bool finish_tracing, bool flush_file) {
   bool stop_alloc_counting = false;
   Runtime* const runtime = Runtime::Current();
   Trace* the_trace = nullptr;
   pthread_t sampling_pthread = 0U;
   {
     MutexLock mu(Thread::Current(), *Locks::trace_lock_);
-    if (the_trace_ == NULL) {
+    if (the_trace_ == nullptr) {
       LOG(ERROR) << "Trace stop requested, but no trace currently running";
     } else {
       the_trace = the_trace_;
-      the_trace_ = NULL;
+      the_trace_ = nullptr;
       sampling_pthread = sampling_pthread_;
     }
   }
@@ -418,13 +424,16 @@
   // the sampling thread access a stale pointer. This finishes since the sampling thread exits when
   // the_trace_ is null.
   if (sampling_pthread != 0U) {
-    CHECK_PTHREAD_CALL(pthread_join, (sampling_pthread, NULL), "sampling thread shutdown");
+    CHECK_PTHREAD_CALL(pthread_join, (sampling_pthread, nullptr), "sampling thread shutdown");
     sampling_pthread_ = 0U;
   }
   runtime->GetThreadList()->SuspendAll(__FUNCTION__);
+
   if (the_trace != nullptr) {
-    stop_alloc_counting = (the_trace->flags_ & kTraceCountAllocs) != 0;
-    the_trace->FinishTracing();
+    stop_alloc_counting = (the_trace->flags_ & Trace::kTraceCountAllocs) != 0;
+    if (finish_tracing) {
+      the_trace->FinishTracing();
+    }
 
     if (the_trace->trace_mode_ == TraceMode::kSampling) {
       MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
@@ -438,8 +447,12 @@
     }
     if (the_trace->trace_file_.get() != nullptr) {
       // Do not try to erase, so flush and close explicitly.
-      if (the_trace->trace_file_->Flush() != 0) {
-        PLOG(ERROR) << "Could not flush trace file.";
+      if (flush_file) {
+        if (the_trace->trace_file_->Flush() != 0) {
+          PLOG(ERROR) << "Could not flush trace file.";
+        }
+      } else {
+        the_trace->trace_file_->MarkUnchecked();  // Do not trigger guard.
       }
       if (the_trace->trace_file_->Close() != 0) {
         PLOG(ERROR) << "Could not close trace file.";
@@ -454,15 +467,118 @@
   }
 }
 
+void Trace::Abort() {
+  // Do not write anything anymore.
+  StopTracing(false, false);
+}
+
+void Trace::Stop() {
+  // Finish writing.
+  StopTracing(true, true);
+}
+
 void Trace::Shutdown() {
   if (GetMethodTracingMode() != kTracingInactive) {
     Stop();
   }
 }
 
+void Trace::Pause() {
+  bool stop_alloc_counting = false;
+  Runtime* runtime = Runtime::Current();
+  Trace* the_trace = nullptr;
+
+  pthread_t sampling_pthread = 0U;
+  {
+    MutexLock mu(Thread::Current(), *Locks::trace_lock_);
+    if (the_trace_ == nullptr) {
+      LOG(ERROR) << "Trace pause requested, but no trace currently running";
+      return;
+    } else {
+      the_trace = the_trace_;
+      sampling_pthread = sampling_pthread_;
+    }
+  }
+
+  if (sampling_pthread != 0U) {
+    {
+      MutexLock mu(Thread::Current(), *Locks::trace_lock_);
+      the_trace_ = nullptr;
+    }
+    CHECK_PTHREAD_CALL(pthread_join, (sampling_pthread, nullptr), "sampling thread shutdown");
+    sampling_pthread_ = 0U;
+    {
+      MutexLock mu(Thread::Current(), *Locks::trace_lock_);
+      the_trace_ = the_trace;
+    }
+  }
+
+  if (the_trace != nullptr) {
+    runtime->GetThreadList()->SuspendAll(__FUNCTION__);
+    stop_alloc_counting = (the_trace->flags_ & Trace::kTraceCountAllocs) != 0;
+
+    if (the_trace->trace_mode_ == TraceMode::kSampling) {
+      MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
+      runtime->GetThreadList()->ForEach(ClearThreadStackTraceAndClockBase, nullptr);
+    } else {
+      runtime->GetInstrumentation()->DisableMethodTracing();
+      runtime->GetInstrumentation()->RemoveListener(the_trace,
+                                                    instrumentation::Instrumentation::kMethodEntered |
+                                                    instrumentation::Instrumentation::kMethodExited |
+                                                    instrumentation::Instrumentation::kMethodUnwind);
+    }
+    runtime->GetThreadList()->ResumeAll();
+  }
+
+  if (stop_alloc_counting) {
+    // Can be racy since SetStatsEnabled is not guarded by any locks.
+    Runtime::Current()->SetStatsEnabled(false);
+  }
+}
+
+void Trace::Resume() {
+  Thread* self = Thread::Current();
+  Trace* the_trace;
+  {
+    MutexLock mu(self, *Locks::trace_lock_);
+    if (the_trace_ == nullptr) {
+      LOG(ERROR) << "No trace to resume (or sampling mode), ignoring this request";
+      return;
+    }
+    the_trace = the_trace_;
+  }
+
+  Runtime* runtime = Runtime::Current();
+
+  // Enable count of allocs if specified in the flags.
+  bool enable_stats = (the_trace->flags_ && kTraceCountAllocs) != 0;
+
+  runtime->GetThreadList()->SuspendAll(__FUNCTION__);
+
+  // Reenable.
+  if (the_trace->trace_mode_ == TraceMode::kSampling) {
+    CHECK_PTHREAD_CALL(pthread_create, (&sampling_pthread_, nullptr, &RunSamplingThread,
+        reinterpret_cast<void*>(the_trace->interval_us_)), "Sampling profiler thread");
+  } else {
+    runtime->GetInstrumentation()->AddListener(the_trace,
+                                               instrumentation::Instrumentation::kMethodEntered |
+                                               instrumentation::Instrumentation::kMethodExited |
+                                               instrumentation::Instrumentation::kMethodUnwind);
+    // TODO: In full-PIC mode, we don't need to fully deopt.
+    runtime->GetInstrumentation()->EnableMethodTracing();
+  }
+
+  runtime->GetThreadList()->ResumeAll();
+
+  // Can't call this when holding the mutator lock.
+  if (enable_stats) {
+    runtime->SetStatsEnabled(true);
+  }
+}
+
 TracingMode Trace::GetMethodTracingMode() {
   MutexLock mu(Thread::Current(), *Locks::trace_lock_);
-  if (the_trace_ == NULL) {
+  if (the_trace_ == nullptr) {
     return kTracingInactive;
   } else {
     switch (the_trace_->trace_mode_) {
@@ -476,13 +592,26 @@
   }
 }
 
-Trace::Trace(File* trace_file, int buffer_size, int flags, TraceMode trace_mode)
-    : trace_file_(trace_file), buf_(new uint8_t[buffer_size]()), flags_(flags),
-      trace_mode_(trace_mode), clock_source_(default_clock_source_),
-      buffer_size_(buffer_size), start_time_(MicroTime()),
-      clock_overhead_ns_(GetClockOverheadNanoSeconds()), cur_offset_(0), overflow_(false) {
-  // Set up the beginning of the trace.
+static constexpr size_t kStreamingBufferSize = 16 * KB;
+
+Trace::Trace(File* trace_file, const char* trace_name, int buffer_size, int flags,
+             TraceOutputMode output_mode, TraceMode trace_mode)
+    : trace_file_(trace_file),
+      buf_(new uint8_t[output_mode == TraceOutputMode::kStreaming ?
+          kStreamingBufferSize :
+          buffer_size]()),
+      flags_(flags), trace_output_mode_(output_mode), trace_mode_(trace_mode),
+      clock_source_(default_clock_source_),
+      buffer_size_(output_mode == TraceOutputMode::kStreaming ?
+          kStreamingBufferSize :
+          buffer_size),
+      start_time_(MicroTime()), clock_overhead_ns_(GetClockOverheadNanoSeconds()), cur_offset_(0),
+      overflow_(false), interval_us_(0), streaming_lock_(nullptr) {
   uint16_t trace_version = GetTraceVersion(clock_source_);
+  if (output_mode == TraceOutputMode::kStreaming) {
+    trace_version |= 0xF0U;
+  }
+  // Set up the beginning of the trace.
   memset(buf_.get(), 0, kTraceHeaderLength);
   Append4LE(buf_.get(), kTraceMagicValue);
   Append2LE(buf_.get() + 4, trace_version);
@@ -495,6 +624,16 @@
 
   // Update current offset.
   cur_offset_.StoreRelaxed(kTraceHeaderLength);
+
+  if (output_mode == TraceOutputMode::kStreaming) {
+    streaming_file_name_ = trace_name;
+    streaming_lock_ = new Mutex("tracing lock");
+    seen_threads_.reset(new ThreadIDBitSet());
+  }
+}
+
+Trace::~Trace() {
+  delete streaming_lock_;
 }
 
 static void DumpBuf(uint8_t* buf, size_t buf_size, TraceClockSource clock_source)
@@ -511,14 +650,38 @@
   }
 }
 
-void Trace::FinishTracing() {
-  // Compute elapsed time.
-  uint64_t elapsed = MicroTime() - start_time_;
+static void GetVisitedMethodsFromBitSets(
+    const std::map<mirror::DexCache*, DexIndexBitSet*>& seen_methods,
+    std::set<mirror::ArtMethod*>* visited_methods) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  for (auto& e : seen_methods) {
+    DexIndexBitSet* bit_set = e.second;
+    for (uint32_t i = 0; i < bit_set->size(); ++i) {
+      if ((*bit_set)[i]) {
+        visited_methods->insert(e.first->GetResolvedMethod(i));
+      }
+    }
+  }
+}
 
-  size_t final_offset = cur_offset_.LoadRelaxed();
+void Trace::FinishTracing() {
+  size_t final_offset = 0;
 
   std::set<mirror::ArtMethod*> visited_methods;
-  GetVisitedMethods(final_offset, &visited_methods);
+  if (trace_output_mode_ == TraceOutputMode::kStreaming) {
+    // Write the secondary file with all the method names.
+    GetVisitedMethodsFromBitSets(seen_methods_, &visited_methods);
+
+    // Clean up.
+    for (auto& e : seen_methods_) {
+      delete e.second;
+    }
+  } else {
+    final_offset = cur_offset_.LoadRelaxed();
+    GetVisitedMethods(final_offset, &visited_methods);
+  }
+
+  // Compute elapsed time.
+  uint64_t elapsed = MicroTime() - start_time_;
 
   std::ostringstream os;
 
@@ -535,8 +698,10 @@
     os << StringPrintf("clock=wall\n");
   }
   os << StringPrintf("elapsed-time-usec=%" PRIu64 "\n", elapsed);
-  size_t num_records = (final_offset - kTraceHeaderLength) / GetRecordSize(clock_source_);
-  os << StringPrintf("num-method-calls=%zd\n", num_records);
+  if (trace_output_mode_ != TraceOutputMode::kStreaming) {
+    size_t num_records = (final_offset - kTraceHeaderLength) / GetRecordSize(clock_source_);
+    os << StringPrintf("num-method-calls=%zd\n", num_records);
+  }
   os << StringPrintf("clock-call-overhead-nsec=%d\n", clock_overhead_ns_);
   os << StringPrintf("vm=art\n");
   os << StringPrintf("pid=%d\n", getpid());
@@ -550,27 +715,44 @@
   os << StringPrintf("%cmethods\n", kTraceTokenChar);
   DumpMethodList(os, visited_methods);
   os << StringPrintf("%cend\n", kTraceTokenChar);
-
   std::string header(os.str());
-  if (trace_file_.get() == NULL) {
-    iovec iov[2];
-    iov[0].iov_base = reinterpret_cast<void*>(const_cast<char*>(header.c_str()));
-    iov[0].iov_len = header.length();
-    iov[1].iov_base = buf_.get();
-    iov[1].iov_len = final_offset;
-    Dbg::DdmSendChunkV(CHUNK_TYPE("MPSE"), iov, 2);
-    const bool kDumpTraceInfo = false;
-    if (kDumpTraceInfo) {
-      LOG(INFO) << "Trace sent:\n" << header;
-      DumpBuf(buf_.get(), final_offset, clock_source_);
+
+  if (trace_output_mode_ == TraceOutputMode::kStreaming) {
+    File file;
+    if (!file.Open(streaming_file_name_ + ".sec", O_CREAT | O_WRONLY)) {
+      LOG(WARNING) << "Could not open secondary trace file!";
+      return;
     }
-  } else {
-    if (!trace_file_->WriteFully(header.c_str(), header.length()) ||
-        !trace_file_->WriteFully(buf_.get(), final_offset)) {
+    if (!file.WriteFully(header.c_str(), header.length())) {
+      file.Erase();
       std::string detail(StringPrintf("Trace data write failed: %s", strerror(errno)));
       PLOG(ERROR) << detail;
       ThrowRuntimeException("%s", detail.c_str());
     }
+    if (file.FlushCloseOrErase() != 0) {
+      PLOG(ERROR) << "Could not write secondary file";
+    }
+  } else {
+    if (trace_file_.get() == nullptr) {
+      iovec iov[2];
+      iov[0].iov_base = reinterpret_cast<void*>(const_cast<char*>(header.c_str()));
+      iov[0].iov_len = header.length();
+      iov[1].iov_base = buf_.get();
+      iov[1].iov_len = final_offset;
+      Dbg::DdmSendChunkV(CHUNK_TYPE("MPSE"), iov, 2);
+      const bool kDumpTraceInfo = false;
+      if (kDumpTraceInfo) {
+        LOG(INFO) << "Trace sent:\n" << header;
+        DumpBuf(buf_.get(), final_offset, clock_source_);
+      }
+    } else {
+      if (!trace_file_->WriteFully(header.c_str(), header.length()) ||
+          !trace_file_->WriteFully(buf_.get(), final_offset)) {
+        std::string detail(StringPrintf("Trace data write failed: %s", strerror(errno)));
+        PLOG(ERROR) << detail;
+        ThrowRuntimeException("%s", detail.c_str());
+      }
+    }
   }
 }
 
@@ -654,20 +836,76 @@
   }
 }
 
+bool Trace::RegisterMethod(mirror::ArtMethod* method) {
+  mirror::DexCache* dex_cache = method->GetDexCache();
+  if (dex_cache->GetResolvedMethod(method->GetDexMethodIndex()) != method) {
+    DCHECK(dex_cache->GetResolvedMethod(method->GetDexMethodIndex()) == nullptr);
+    dex_cache->SetResolvedMethod(method->GetDexMethodIndex(), method);
+  }
+  if (seen_methods_.find(dex_cache) == seen_methods_.end()) {
+    seen_methods_.insert(std::make_pair(dex_cache, new DexIndexBitSet()));
+  }
+  DexIndexBitSet* bit_set = seen_methods_.find(dex_cache)->second;
+  if (!(*bit_set)[method->GetDexMethodIndex()]) {
+    bit_set->set(method->GetDexMethodIndex());
+    return true;
+  }
+  return false;
+}
+
+bool Trace::RegisterThread(Thread* thread) {
+  pid_t tid = thread->GetTid();
+  CHECK_LT(0U, static_cast<uint32_t>(tid));
+  CHECK_LT(static_cast<uint32_t>(tid), 65536U);
+
+  if (!(*seen_threads_)[tid]) {
+    seen_threads_->set(tid);
+    return true;
+  }
+  return false;
+}
+
+static std::string GetMethodLine(mirror::ArtMethod* method)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  return StringPrintf("%p\t%s\t%s\t%s\t%s\n", method,
+      PrettyDescriptor(method->GetDeclaringClassDescriptor()).c_str(), method->GetName(),
+      method->GetSignature().ToString().c_str(), method->GetDeclaringClassSourceFile());
+}
+
+void Trace::WriteToBuf(const uint8_t* src, size_t src_size) {
+  int32_t old_offset = cur_offset_.LoadRelaxed();
+  int32_t new_offset = old_offset + static_cast<int32_t>(src_size);
+  if (new_offset > buffer_size_) {
+    // Flush buffer.
+    if (!trace_file_->WriteFully(buf_.get(), old_offset)) {
+      PLOG(WARNING) << "Failed streaming a tracing event.";
+    }
+    old_offset = 0;
+    new_offset = static_cast<int32_t>(src_size);
+  }
+  cur_offset_.StoreRelease(new_offset);
+  // Fill in data.
+  memcpy(buf_.get() + old_offset, src, src_size);
+}
+
 void Trace::LogMethodTraceEvent(Thread* thread, mirror::ArtMethod* method,
                                 instrumentation::Instrumentation::InstrumentationEvent event,
                                 uint32_t thread_clock_diff, uint32_t wall_clock_diff) {
   // Advance cur_offset_ atomically.
   int32_t new_offset;
-  int32_t old_offset;
-  do {
-    old_offset = cur_offset_.LoadRelaxed();
-    new_offset = old_offset + GetRecordSize(clock_source_);
-    if (new_offset > buffer_size_) {
-      overflow_ = true;
-      return;
-    }
-  } while (!cur_offset_.CompareExchangeWeakSequentiallyConsistent(old_offset, new_offset));
+  int32_t old_offset = 0;
+
+  // We do a busy loop here trying to acquire the next offset.
+  if (trace_output_mode_ != TraceOutputMode::kStreaming) {
+    do {
+      old_offset = cur_offset_.LoadRelaxed();
+      new_offset = old_offset + GetRecordSize(clock_source_);
+      if (new_offset > buffer_size_) {
+        overflow_ = true;
+        return;
+      }
+    } while (!cur_offset_.CompareExchangeWeakSequentiallyConsistent(old_offset, new_offset));
+  }
 
   TraceAction action = kTraceMethodEnter;
   switch (event) {
@@ -687,7 +925,15 @@
   uint32_t method_value = EncodeTraceMethodAndAction(method, action);
 
   // Write data
-  uint8_t* ptr = buf_.get() + old_offset;
+  uint8_t* ptr;
+  static constexpr size_t kPacketSize = 14U;  // The maximum size of data in a packet.
+  uint8_t stack_buf[kPacketSize];             // Space to store a packet when in streaming mode.
+  if (trace_output_mode_ == TraceOutputMode::kStreaming) {
+    ptr = stack_buf;
+  } else {
+    ptr = buf_.get() + old_offset;
+  }
+
   Append2LE(ptr, thread->GetTid());
   Append4LE(ptr + 2, method_value);
   ptr += 6;
@@ -699,6 +945,34 @@
   if (UseWallClock()) {
     Append4LE(ptr, wall_clock_diff);
   }
+  static_assert(kPacketSize == 2 + 4 + 4 + 4, "Packet size incorrect.");
+
+  if (trace_output_mode_ == TraceOutputMode::kStreaming) {
+    MutexLock mu(Thread::Current(), *streaming_lock_);  // To serialize writing.
+    if (RegisterMethod(method)) {
+      // Write a special block with the name.
+      std::string method_line(GetMethodLine(method));
+      uint8_t buf2[5];
+      Append2LE(buf2, 0);
+      buf2[2] = kOpNewMethod;
+      Append2LE(buf2 + 3, static_cast<uint16_t>(method_line.length()));
+      WriteToBuf(buf2, sizeof(buf2));
+      WriteToBuf(reinterpret_cast<const uint8_t*>(method_line.c_str()), method_line.length());
+    }
+    if (RegisterThread(thread)) {
+      // It might be better to postpone this. Threads might not have received names...
+      std::string thread_name;
+      thread->GetThreadName(thread_name);
+      uint8_t buf2[7];
+      Append2LE(buf2, 0);
+      buf2[2] = kOpNewThread;
+      Append2LE(buf2 + 3, static_cast<uint16_t>(thread->GetTid()));
+      Append2LE(buf2 + 5, static_cast<uint16_t>(thread_name.length()));
+      WriteToBuf(buf2, sizeof(buf2));
+      WriteToBuf(reinterpret_cast<const uint8_t*>(thread_name.c_str()), thread_name.length());
+    }
+    WriteToBuf(stack_buf, sizeof(stack_buf));
+  }
 }
 
 void Trace::GetVisitedMethods(size_t buf_size,
@@ -716,9 +990,7 @@
 
 void Trace::DumpMethodList(std::ostream& os, const std::set<mirror::ArtMethod*>& visited_methods) {
   for (const auto& method : visited_methods) {
-    os << StringPrintf("%p\t%s\t%s\t%s\t%s\n", method,
-        PrettyDescriptor(method->GetDeclaringClassDescriptor()).c_str(), method->GetName(),
-        method->GetSignature().ToString().c_str(), method->GetDeclaringClassSourceFile());
+    os << GetMethodLine(method);
   }
 }
 
@@ -750,4 +1022,16 @@
   }
 }
 
+Trace::TraceOutputMode Trace::GetOutputMode() {
+  MutexLock mu(Thread::Current(), *Locks::trace_lock_);
+  CHECK(the_trace_ != nullptr) << "Trace output mode requested, but no trace currently running";
+  return the_trace_->trace_output_mode_;
+}
+
+Trace::TraceMode Trace::GetMode() {
+  MutexLock mu(Thread::Current(), *Locks::trace_lock_);
+  CHECK(the_trace_ != nullptr) << "Trace mode requested, but no trace currently running";
+  return the_trace_->trace_mode_;
+}
+
 }  // namespace art
diff --git a/runtime/trace.h b/runtime/trace.h
index d8bd428..b8329ff 100644
--- a/runtime/trace.h
+++ b/runtime/trace.h
@@ -17,6 +17,8 @@
 #ifndef ART_RUNTIME_TRACE_H_
 #define ART_RUNTIME_TRACE_H_
 
+#include <bitset>
+#include <map>
 #include <memory>
 #include <ostream>
 #include <set>
@@ -34,11 +36,15 @@
 
 namespace mirror {
   class ArtMethod;
+  class DexCache;
 }  // namespace mirror
 
 class ArtField;
 class Thread;
 
+using DexIndexBitSet = std::bitset<65536>;
+using ThreadIDBitSet = std::bitset<65536>;
+
 enum TracingMode {
   kTracingInactive,
   kMethodTracingActive,
@@ -53,7 +59,8 @@
 
   enum class TraceOutputMode {
     kFile,
-    kDDMS
+    kDDMS,
+    kStreaming
   };
 
   enum class TraceMode {
@@ -61,6 +68,8 @@
     kSampling
   };
 
+  ~Trace();
+
   static void SetDefaultClockSource(TraceClockSource clock_source);
 
   static void Start(const char* trace_filename, int trace_fd, int buffer_size, int flags,
@@ -69,7 +78,16 @@
                      Locks::thread_list_lock_,
                      Locks::thread_suspend_count_lock_,
                      Locks::trace_lock_);
+  static void Pause() LOCKS_EXCLUDED(Locks::trace_lock_, Locks::thread_list_lock_);
+  static void Resume() LOCKS_EXCLUDED(Locks::trace_lock_);
+
+  // Stop tracing. This will finish the trace and write it to file/send it via DDMS.
   static void Stop()
+        LOCKS_EXCLUDED(Locks::mutator_lock_,
+                       Locks::thread_list_lock_,
+                       Locks::trace_lock_);
+  // Abort tracing. This will just stop tracing and *not* write/send the collected data.
+  static void Abort()
       LOCKS_EXCLUDED(Locks::mutator_lock_,
                      Locks::thread_list_lock_,
                      Locks::trace_lock_);
@@ -116,19 +134,25 @@
   // Save id and name of a thread before it exits.
   static void StoreExitingThreadInfo(Thread* thread);
 
+  static TraceOutputMode GetOutputMode() LOCKS_EXCLUDED(Locks::trace_lock_);
+  static TraceMode GetMode() LOCKS_EXCLUDED(Locks::trace_lock_);
+
  private:
-  explicit Trace(File* trace_file, int buffer_size, int flags, TraceMode trace_mode);
+  Trace(File* trace_file, const char* trace_name, int buffer_size, int flags,
+        TraceOutputMode output_mode, TraceMode trace_mode);
 
   // The sampling interval in microseconds is passed as an argument.
   static void* RunSamplingThread(void* arg) LOCKS_EXCLUDED(Locks::trace_lock_);
 
+  static void StopTracing(bool finish_tracing, bool flush_file);
   void FinishTracing() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void ReadClocks(Thread* thread, uint32_t* thread_clock_diff, uint32_t* wall_clock_diff);
 
   void LogMethodTraceEvent(Thread* thread, mirror::ArtMethod* method,
                            instrumentation::Instrumentation::InstrumentationEvent event,
-                           uint32_t thread_clock_diff, uint32_t wall_clock_diff);
+                           uint32_t thread_clock_diff, uint32_t wall_clock_diff)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Methods to output traced methods and threads.
   void GetVisitedMethods(size_t end_offset, std::set<mirror::ArtMethod*>* visited_methods);
@@ -136,6 +160,18 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void DumpThreadList(std::ostream& os) LOCKS_EXCLUDED(Locks::thread_list_lock_);
 
+  // Methods to register seen entitites in streaming mode. The methods return true if the entity
+  // is newly discovered.
+  bool RegisterMethod(mirror::ArtMethod* method)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) EXCLUSIVE_LOCKS_REQUIRED(streaming_lock_);
+  bool RegisterThread(Thread* thread)
+      EXCLUSIVE_LOCKS_REQUIRED(streaming_lock_);
+
+  // Copy a temporary buffer to the main buffer. Used for streaming. Exposed here for lock
+  // annotation.
+  void WriteToBuf(const uint8_t* src, size_t src_size)
+      EXCLUSIVE_LOCKS_REQUIRED(streaming_lock_);
+
   // Singleton instance of the Trace or NULL when no method tracing is active.
   static Trace* volatile the_trace_ GUARDED_BY(Locks::trace_lock_);
 
@@ -157,7 +193,10 @@
   // Flags enabling extra tracing of things such as alloc counts.
   const int flags_;
 
-  // True if traceview should sample instead of instrumenting method entry/exit.
+  // The kind of output for this tracing.
+  const TraceOutputMode trace_output_mode_;
+
+  // The tracing method.
   const TraceMode trace_mode_;
 
   const TraceClockSource clock_source_;
@@ -180,6 +219,15 @@
   // Map of thread ids and names that have already exited.
   SafeMap<pid_t, std::string> exited_threads_;
 
+  // Sampling profiler sampling interval.
+  int interval_us_;
+
+  // Streaming mode data.
+  std::string streaming_file_name_;
+  Mutex* streaming_lock_;
+  std::map<mirror::DexCache*, DexIndexBitSet*> seen_methods_;
+  std::unique_ptr<ThreadIDBitSet> seen_threads_;
+
   DISALLOW_COPY_AND_ASSIGN(Trace);
 };
 
diff --git a/runtime/utils.cc b/runtime/utils.cc
index f13da8b..a303aa4 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -1440,32 +1440,58 @@
   }
 }
 
-std::string GetDalvikCacheOrDie(const char* subdir, const bool create_if_absent) {
+static std::string GetDalvikCacheImpl(const char* subdir,
+                                      const bool create_if_absent,
+                                      const bool abort_on_error) {
   CHECK(subdir != nullptr);
   const char* android_data = GetAndroidData();
   const std::string dalvik_cache_root(StringPrintf("%s/dalvik-cache/", android_data));
   const std::string dalvik_cache = dalvik_cache_root + subdir;
-  if (create_if_absent && !OS::DirectoryExists(dalvik_cache.c_str())) {
+  if (!OS::DirectoryExists(dalvik_cache.c_str())) {
+    if (!create_if_absent) {
+      // TODO: Check callers. Traditional behavior is to not to abort, even when abort_on_error.
+      return "";
+    }
+
     // Don't create the system's /data/dalvik-cache/... because it needs special permissions.
-    if (strcmp(android_data, "/data") != 0) {
-      int result = mkdir(dalvik_cache_root.c_str(), 0700);
-      if (result != 0 && errno != EEXIST) {
-        PLOG(FATAL) << "Failed to create dalvik-cache directory " << dalvik_cache_root;
-        return "";
+    if (strcmp(android_data, "/data") == 0) {
+      if (abort_on_error) {
+        LOG(FATAL) << "Failed to find dalvik-cache directory " << dalvik_cache
+                   << ", cannot create /data dalvik-cache.";
+        UNREACHABLE();
       }
-      result = mkdir(dalvik_cache.c_str(), 0700);
-      if (result != 0) {
+      return "";
+    }
+
+    int result = mkdir(dalvik_cache_root.c_str(), 0700);
+    if (result != 0 && errno != EEXIST) {
+      if (abort_on_error) {
+        PLOG(FATAL) << "Failed to create dalvik-cache root directory " << dalvik_cache_root;
+        UNREACHABLE();
+      }
+      return "";
+    }
+
+    result = mkdir(dalvik_cache.c_str(), 0700);
+    if (result != 0) {
+      if (abort_on_error) {
         PLOG(FATAL) << "Failed to create dalvik-cache directory " << dalvik_cache;
-        return "";
+        UNREACHABLE();
       }
-    } else {
-      LOG(FATAL) << "Failed to find dalvik-cache directory " << dalvik_cache;
       return "";
     }
   }
   return dalvik_cache;
 }
 
+std::string GetDalvikCache(const char* subdir, const bool create_if_absent) {
+  return GetDalvikCacheImpl(subdir, create_if_absent, false);
+}
+
+std::string GetDalvikCacheOrDie(const char* subdir, const bool create_if_absent) {
+  return GetDalvikCacheImpl(subdir, create_if_absent, true);
+}
+
 bool GetDalvikCacheFilename(const char* location, const char* cache_location,
                             std::string* filename, std::string* error_msg) {
   if (location[0] != '/') {
diff --git a/runtime/utils.h b/runtime/utils.h
index 6dee5fe..6708c67 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -511,6 +511,9 @@
 // Find $ANDROID_DATA, /data, or return nullptr.
 const char* GetAndroidDataSafe(std::string* error_msg);
 
+// Returns the dalvik-cache location, with subdir appended. Returns the empty string if the cache
+// could not be found (or created).
+std::string GetDalvikCache(const char* subdir, bool create_if_absent = true);
 // Returns the dalvik-cache location, or dies trying. subdir will be
 // appended to the cache location.
 std::string GetDalvikCacheOrDie(const char* subdir, bool create_if_absent = true);
diff --git a/runtime/utils_test.cc b/runtime/utils_test.cc
index aa7bc64..6ccbd13 100644
--- a/runtime/utils_test.cc
+++ b/runtime/utils_test.cc
@@ -367,6 +367,15 @@
                GetDalvikCacheFilenameOrDie("/system/framework/boot.oat", "/foo").c_str());
 }
 
+TEST_F(UtilsTest, GetDalvikCache) {
+  EXPECT_STREQ("", GetDalvikCache("should-not-exist123", false).c_str());
+
+  EXPECT_STREQ((android_data_ + "/dalvik-cache/.").c_str(), GetDalvikCache(".", false).c_str());
+  EXPECT_STREQ((android_data_ + "/dalvik-cache/should-not-be-there").c_str(),
+               GetDalvikCache("should-not-be-there", true).c_str());
+}
+
+
 TEST_F(UtilsTest, GetSystemImageFilename) {
   EXPECT_STREQ("/system/framework/arm/boot.art",
                GetSystemImageFilename("/system/framework/boot.art", kArm).c_str());
diff --git a/tools/stream-trace-converter.py b/tools/stream-trace-converter.py
new file mode 100755
index 0000000..951b05b
--- /dev/null
+++ b/tools/stream-trace-converter.py
@@ -0,0 +1,186 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Script that parses a trace filed produced in streaming mode. The file is broken up into
+   a header and body part, which, when concatenated, make up a non-streaming trace file that
+   can be used with traceview."""
+
+import sys
+
+class MyException(Exception):
+  pass
+
+class BufferUnderrun(Exception):
+  pass
+
+def ReadShortLE(f):
+  byte1 = f.read(1)
+  if not byte1:
+    raise BufferUnderrun()
+  byte2 = f.read(1)
+  if not byte2:
+    raise BufferUnderrun()
+  return ord(byte1) + (ord(byte2) << 8);
+
+def WriteShortLE(f, val):
+  bytes = [ (val & 0xFF), ((val >> 8) & 0xFF) ]
+  asbytearray = bytearray(bytes)
+  f.write(asbytearray)
+
+def ReadIntLE(f):
+  byte1 = f.read(1)
+  if not byte1:
+    raise BufferUnderrun()
+  byte2 = f.read(1)
+  if not byte2:
+    raise BufferUnderrun()
+  byte3 = f.read(1)
+  if not byte3:
+    raise BufferUnderrun()
+  byte4 = f.read(1)
+  if not byte4:
+    raise BufferUnderrun()
+  return ord(byte1) + (ord(byte2) << 8) + (ord(byte3) << 16) + (ord(byte4) << 24);
+
+def WriteIntLE(f, val):
+  bytes = [ (val & 0xFF), ((val >> 8) & 0xFF), ((val >> 16) & 0xFF), ((val >> 24) & 0xFF) ]
+  asbytearray = bytearray(bytes)
+  f.write(asbytearray)
+
+def Copy(input, output, length):
+  buf = input.read(length)
+  if len(buf) != length:
+    raise BufferUnderrun()
+  output.write(buf)
+
+class Rewriter:
+
+  def PrintHeader(self, header):
+    header.write('*version\n');
+    header.write('3\n');
+    header.write('data-file-overflow=false\n');
+    header.write('clock=dual\n');
+    header.write('vm=art\n');
+
+  def ProcessDataHeader(self, input, body):
+    magic = ReadIntLE(input)
+    if magic != 0x574f4c53:
+      raise MyException("Magic wrong")
+
+    WriteIntLE(body, magic)
+
+    version = ReadShortLE(input)
+    if (version & 0xf0) != 0xf0:
+      raise MyException("Does not seem to be a streaming trace: %d." % version)
+    version = version ^ 0xf0
+
+    if version != 3:
+      raise MyException("Only support version 3")
+
+    WriteShortLE(body, version)
+
+    # read offset
+    offsetToData = ReadShortLE(input) - 16
+    WriteShortLE(body, offsetToData + 16)
+
+    # copy startWhen
+    Copy(input, body, 8)
+
+    if version == 1:
+      self._mRecordSize = 9;
+    elif version == 2:
+      self._mRecordSize = 10;
+    else:
+      self._mRecordSize = ReadShortLE(input)
+      WriteShortLE(body, self._mRecordSize)
+      offsetToData -= 2;
+
+    # Skip over offsetToData bytes
+    Copy(input, body, offsetToData)
+
+  def ProcessMethod(self, input):
+    stringLength = ReadShortLE(input)
+    str = input.read(stringLength)
+    self._methods.append(str)
+    print 'New method: %s' % str
+
+  def ProcessThread(self, input):
+    tid = ReadShortLE(input)
+    stringLength = ReadShortLE(input)
+    str = input.read(stringLength)
+    self._threads.append('%d\t%s\n' % (tid, str))
+    print 'New thread: %d/%s' % (tid, str)
+
+  def ProcessSpecial(self, input):
+    code = ord(input.read(1))
+    if code == 1:
+      self.ProcessMethod(input)
+    elif code == 2:
+      self.ProcessThread(input)
+    else:
+      raise MyException("Unknown special!")
+
+  def Process(self, input, body):
+    try:
+      while True:
+        threadId = ReadShortLE(input)
+        if threadId == 0:
+          self.ProcessSpecial(input)
+        else:
+          # Regular package, just copy
+          WriteShortLE(body, threadId)
+          Copy(input, body, self._mRecordSize - 2)
+    except BufferUnderrun:
+      print 'Buffer underrun, file was probably truncated. Results should still be usable.'
+
+  def Finalize(self, header):
+    header.write('*threads\n')
+    for t in self._threads:
+      header.write(t)
+    header.write('*methods\n')
+    for m in self._methods:
+      header.write(m)
+    header.write('*end\n')
+
+  def ProcessFile(self, filename):
+    input = open(filename, 'rb')                     # Input file
+    header = open(filename + '.header', 'w')         # Header part
+    body = open(filename + '.body', 'wb')            # Body part
+
+    self.PrintHeader(header)
+
+    self.ProcessDataHeader(input, body)
+
+    self._methods = []
+    self._threads = []
+    self.Process(input, body)
+
+    self.Finalize(header)
+
+    input.close()
+    header.close()
+    body.close()
+
+def main():
+  Rewriter().ProcessFile(sys.argv[1])
+  header_name = sys.argv[1] + '.header'
+  body_name = sys.argv[1] + '.body'
+  print 'Results have been written to %s and %s.' % (header_name, body_name)
+  print 'Concatenate the files to get a result usable with traceview.'
+  sys.exit(0)
+
+if __name__ == '__main__':
+  main()
\ No newline at end of file