Merge "Add niwa@ to squashfs_utils/OWNERS"
diff --git a/ext4_utils/OWNERS b/ext4_utils/OWNERS
new file mode 100644
index 0000000..a3f5c12
--- /dev/null
+++ b/ext4_utils/OWNERS
@@ -0,0 +1,2 @@
+dvander@google.com
+drosen@google.com
diff --git a/f2fs_utils/OWNERS b/f2fs_utils/OWNERS
new file mode 100644
index 0000000..1c863c5
--- /dev/null
+++ b/f2fs_utils/OWNERS
@@ -0,0 +1,3 @@
+jaegeuk@google.com
+daehojeong@google.com
+drosen@google.com
diff --git a/profcollectd/README.md b/profcollectd/README.md
new file mode 100644
index 0000000..dba75a2
--- /dev/null
+++ b/profcollectd/README.md
@@ -0,0 +1,110 @@
+# Profcollect
+
+Profcollect is a system daemon that facilitates sampling profile collection and reporting for native
+platform applications.
+
+Profcollect can only be enabled on `userdebug` or `eng` builds.
+
+## Supported Platforms
+
+Currently Profcollect only supports collecting profiles from Coresight ETM enabled ARM devices.
+
+Instructions to enable Coresight ETM can be found from the
+[simpleperf manual](https://android.googlesource.com/platform/system/extras/+/refs/heads/master/simpleperf/doc/collect_etm_data_for_autofdo.md).
+
+## Usage
+
+Profcollect has two components: `profcollectd`, the system daemon, and `profcollectctl`, the command
+line interface.
+
+### Collection
+
+`profcollectd` can be started from `adb` directly (under root), or automatically on system boot by
+setting system property through:
+
+```
+adb shell device_config put profcollect_native_boot enabled true
+```
+
+Profcollect collects profiles periodically, as well as through triggers like app launch events. Only
+a percentage of these events result in a profile collection to avoid using too much resource, these
+are controlled by the following configurations:
+
+| Event      | Config                 |
+|------------|------------------------|
+| Periodic   | collection\_interval   |
+| App launch | applaunch\_trace\_freq |
+
+Setting the frequency value to `0` disables collection for the corresponding event.
+
+### Processing
+
+The raw tracing data needs to be combined with the original binary to create the AutoFDO branch
+list. This is a costly process, thus it is done separately from the profile collection. Profcollect
+attempts to process all the traces when the device is idle and connected to a power supply. It can
+also be initiated by running:
+
+```
+adb shell profcollectctl process
+```
+
+### Reporting
+
+#### Manual
+
+After actively using the device for a period of time, the device should have gathered enough data to
+generate a good quality PGO profile that represents typical system usage. Run the following command
+to create a profile report:
+
+```
+$ adb shell profcollectctl report
+Creating profile report
+Report created at: 12345678-0000-abcd-8000-12345678abcd
+```
+
+You can then fetch the report by running (under root):
+
+```
+adb pull /data/misc/profcollectd/report/12345678-0000-abcd-8000-12345678abcd.zip
+```
+
+#### Automated Uploading to Server
+
+*In development*
+
+### Post Processing
+
+For each trace file, run:
+
+```
+simpleperf inject \
+    -i {TRACE_FILE_NAME} \
+    -o {OUTPUT_FILE_NAME}.data \
+    --binary {BINARY_NAME} \
+    --symdir out/target/product/{PRODUCT_NAME}/symbols
+```
+
+Afterwards, run [AutoFDO](https://github.com/google/autofdo) to generate Clang PGO profiles:
+
+```
+create_llvm_prof \
+    --profiler text \
+    --binary=${BINARY_PATH} \
+    --profile=${INPUT_FILE_NAME} \
+    --out={OUTPUT_FILE_NAME}.profdata
+```
+
+Finally, merge all the PGO profiles into one profile:
+
+```
+find {INPUT_DIR} -name *.profdata > proflist
+prebuilts/clang/host/linux-x86/llvm-binutils-stable/llvm-profdata merge \
+    --binary \
+    --sample \
+    --input-files proflist \
+    --output merged.profdata
+```
+
+More profile data usually generates better quality profiles. You may combine data from multiple
+devices running the same build to improve profile quality, and/or reduce the performance impact for
+each device (by reducing collection frequency).
diff --git a/simpleperf/Android.bp b/simpleperf/Android.bp
index 4b9dd99..d91e1e8 100644
--- a/simpleperf/Android.bp
+++ b/simpleperf/Android.bp
@@ -167,6 +167,9 @@
                 "libc++fs",
                 "libdexfile_support",
             ],
+            runtime_libs: [
+                "libdexfile", // libdexfile_support dependency
+            ],
         },
         host: {
             static_libs: [
@@ -287,6 +290,7 @@
         linux: {
             // See note for libdexfile_static in simpleperf_ndk.
             static_libs: ["libdexfile_support"],
+            runtime_libs: ["libdexfile"], // libdexfile_support dependency
         },
     },
 }
@@ -336,6 +340,16 @@
     visibility: ["//system/extras/profcollectd:__subpackages__"],
 }
 
+rust_test {
+    name: "libsimpleperf_profcollect_bindgen_test",
+    srcs: [":libsimpleperf_profcollect_bindgen"],
+    crate_name: "simpleperf_profcollect_bindgen_test",
+    test_suites: ["general-tests"],
+    auto_gen_config: true,
+    clippy_lints: "none",
+    lints: "none",
+}
+
 // simpleperf released in ndk
 cc_binary {
     name: "simpleperf_ndk",
diff --git a/simpleperf/README.md b/simpleperf/README.md
index ae30480..c1d2368 100644
--- a/simpleperf/README.md
+++ b/simpleperf/README.md
@@ -17,8 +17,74 @@
 Once you have the build id (a 7-digit number) and the build is complete, run the
 update script from within the `system/extras/simpleperf` directory:
 ```
-./scripts/update.py --bid 1234567 -vv
+$ ./scripts/update.py --build 1234567
 ```
 
 This will create a new change that you can `repo upload`, then approve and
 submit as normal.
+
+For testing, I usually only run python host tests as below:
+```
+$ ./scripts/test/test.py --only-host-test
+```
+
+To test all scripts, please use python 3.8+ and install below packages:
+```
+$ pip install bokeh jinja2 pandas protobuf textable
+```
+
+## Updating the prebuilts in prebuilts/simpleperf
+
+Download ndk branch.
+```
+$ repo init -u persistent-https://android.git.corp.google.com/platform/manifest -b master-ndk
+$ repo sync
+```
+
+In prebuilts/simpleperf, run `update.py`:
+```
+$ ./update.py --build <bid>
+```
+
+Then manually edit `ChangeLog`.
+This will create a new change that you can `repo upload`, then approve and submit as normal.
+
+For testing, we need to test if the scripts run on darwin/linux/windows for different android
+versions. I usually split it to four parts:
+
+1. Test on android emulators running on linux x86_64 host, for android version N/O/P/Q/R/S/current.
+
+```
+$ ./test/test.py -d <devices> -r 3
+```
+
+The scripts support android >= N. But it's easier to test old versions on emulators. So I only test
+android N on emulators.
+
+Currently, the tests have problems in clean up. So tests on emulator may fail and take too long to
+run. And there are a few known failed cases. Hopefully they will be fixed soon.
+
+1. Test on android devices connected to linux x86_64 host, for android version O/P/Q/R/S/current.
+
+```
+$ ./test/test.py -d <devices> -r 3
+```
+
+3. Test on an android device connected to darwin x86_64 host, for one of android version O/P/Q/R/S/current.
+
+```
+$ ./test/test.py -d <devices> -r 1
+```
+
+4. Test on an android device connected to darwin x86_64 host, for one of android version O/P/Q/R/S/current.
+
+```
+$ ./test/test.py -d <devices> -r 1
+```
+
+To check simpleperf contents released in ndk, we can build ndk package.
+```
+$ <top_dir>/ndk/checkbuild.py --package --system linux --module simpleperf
+```
+
+The ndk package is generated in `out/` directory.
diff --git a/simpleperf/SampleDisplayer.h b/simpleperf/SampleDisplayer.h
index cc27a46..05724a0 100644
--- a/simpleperf/SampleDisplayer.h
+++ b/simpleperf/SampleDisplayer.h
@@ -20,6 +20,7 @@
 #include <inttypes.h>
 
 #include <functional>
+#include <optional>
 #include <string>
 
 #include <android-base/logging.h>
@@ -199,6 +200,9 @@
  public:
   void SetInfo(const InfoT* info) { info_ = info; }
   void SetReportFormat(bool report_csv) { report_csv_ = report_csv; }
+  void SetFilterFunction(const std::function<bool(const EntryT*, const InfoT*)>& filter) {
+    filter_func_ = filter;
+  }
 
   void AddDisplayFunction(const std::string& name, display_sample_func_t func) {
     Item item;
@@ -226,6 +230,9 @@
     if (report_csv_) {
       return;
     }
+    if (filter_func_ && !filter_func_.value()(sample, info_)) {
+      return;
+    }
     for (auto& item : display_v_) {
       std::string data =
           (item.func != nullptr) ? item.func(sample) : item.func_with_info(sample, info_);
@@ -249,6 +256,9 @@
   }
 
   void PrintSample(FILE* fp, const EntryT* sample) {
+    if (filter_func_ && !filter_func_.value()(sample, info_)) {
+      return;
+    }
     for (size_t i = 0; i < display_v_.size(); ++i) {
       auto& item = display_v_[i];
       std::string data =
@@ -277,6 +287,7 @@
   const InfoT* info_;
   std::vector<Item> display_v_;
   std::vector<exclusive_display_sample_func_t> exclusive_display_v_;
+  std::optional<std::function<bool(const EntryT*, const InfoT*)>> filter_func_;
   bool report_csv_ = false;
 };
 
diff --git a/simpleperf/cmd_kmem.cpp b/simpleperf/cmd_kmem.cpp
index 9860396..10eb5c9 100644
--- a/simpleperf/cmd_kmem.cpp
+++ b/simpleperf/cmd_kmem.cpp
@@ -301,7 +301,7 @@
         print_callgraph_(false),
         callgraph_show_callee_(false),
         record_filename_("perf.data"),
-        record_file_arch_(GetBuildArch()) {}
+        record_file_arch_(GetTargetArch()) {}
 
   bool Run(const std::vector<std::string>& args);
 
diff --git a/simpleperf/cmd_list.cpp b/simpleperf/cmd_list.cpp
index 4966b9c..4953d9f 100644
--- a/simpleperf/cmd_list.cpp
+++ b/simpleperf/cmd_list.cpp
@@ -51,8 +51,8 @@
     return IsEventAttrSupported(attr, event_type.name) ? EventTypeStatus::SUPPORTED
                                                        : EventTypeStatus::NOT_SUPPORTED;
   }
-  if (event_type.limited_arch == "arm" && GetBuildArch() != ARCH_ARM &&
-      GetBuildArch() != ARCH_ARM64) {
+  if (event_type.limited_arch == "arm" && GetTargetArch() != ARCH_ARM &&
+      GetTargetArch() != ARCH_ARM64) {
     return EventTypeStatus::NOT_SUPPORTED;
   }
   // Because the kernel may not check whether the raw event is supported by the cpu pmu.
@@ -87,7 +87,7 @@
 static void PrintEventTypesOfType(const std::string& type_name, const std::string& type_desc,
                                   const std::function<bool(const EventType&)>& is_type_fn) {
   printf("List of %s:\n", type_desc.c_str());
-  if (GetBuildArch() == ARCH_ARM || GetBuildArch() == ARCH_ARM64) {
+  if (GetTargetArch() == ARCH_ARM || GetTargetArch() == ARCH_ARM64) {
     if (type_name == "raw") {
       printf(
           // clang-format off
diff --git a/simpleperf/cmd_monitor.cpp b/simpleperf/cmd_monitor.cpp
index 50f055e..45e918a 100644
--- a/simpleperf/cmd_monitor.cpp
+++ b/simpleperf/cmd_monitor.cpp
@@ -427,7 +427,7 @@
   }
 
   if (fp_callchain_sampling_) {
-    if (GetBuildArch() == ARCH_ARM) {
+    if (GetTargetArch() == ARCH_ARM) {
       LOG(WARNING) << "`--callgraph fp` option doesn't work well on arm architecture, "
                    << "consider using `-g` option or profiling on aarch64 architecture.";
     }
diff --git a/simpleperf/cmd_record.cpp b/simpleperf/cmd_record.cpp
index a70a586..11947e5 100644
--- a/simpleperf/cmd_record.cpp
+++ b/simpleperf/cmd_record.cpp
@@ -500,8 +500,17 @@
 
   // 2. Add default event type.
   if (event_selection_set_.empty()) {
+    std::string event_type = default_measured_event_type;
+    if (GetTargetArch() == ARCH_X86_32 || GetTargetArch() == ARCH_X86_64) {
+      // Emulators may not support hardware events. So switch to cpu-clock when cpu-cycles isn't
+      // available.
+      if (!IsHardwareEventSupported()) {
+        event_type = "cpu-clock";
+        LOG(INFO) << "Hardware events are not available, switch to cpu-clock.";
+      }
+    }
     size_t group_id;
-    if (!event_selection_set_.AddEventType(default_measured_event_type, &group_id)) {
+    if (!event_selection_set_.AddEventType(event_type, &group_id)) {
       return false;
     }
     if (sample_speed_) {
@@ -1110,7 +1119,7 @@
   }
 
   if (fp_callchain_sampling_) {
-    if (GetBuildArch() == ARCH_ARM) {
+    if (GetTargetArch() == ARCH_ARM) {
       LOG(WARNING) << "`--callgraph fp` option doesn't work well on arm architecture, "
                    << "consider using `-g` option or profiling on aarch64 architecture.";
     }
diff --git a/simpleperf/cmd_record_test.cpp b/simpleperf/cmd_record_test.cpp
index 350a24f..9908c0b 100644
--- a/simpleperf/cmd_record_test.cpp
+++ b/simpleperf/cmd_record_test.cpp
@@ -184,16 +184,16 @@
   TEST_REQUIRE_HW_COUNTER();
   OMIT_TEST_ON_NON_NATIVE_ABIS();
   size_t event_number;
-  if (GetBuildArch() == ARCH_ARM64 || GetBuildArch() == ARCH_ARM) {
+  if (GetTargetArch() == ARCH_ARM64 || GetTargetArch() == ARCH_ARM) {
     // As in D5.10.2 of the ARMv8 manual, ARM defines the event number space for PMU. part of the
     // space is for common event numbers (which will stay the same for all ARM chips), part of the
     // space is for implementation defined events. Here 0x08 is a common event for instructions.
     event_number = 0x08;
-  } else if (GetBuildArch() == ARCH_X86_32 || GetBuildArch() == ARCH_X86_64) {
+  } else if (GetTargetArch() == ARCH_X86_32 || GetTargetArch() == ARCH_X86_64) {
     // As in volume 3 chapter 19 of the Intel manual, 0x00c0 is the event number for instruction.
     event_number = 0x00c0;
   } else {
-    GTEST_LOG_(INFO) << "Omit arch " << GetBuildArch();
+    GTEST_LOG_(INFO) << "Omit arch " << GetTargetArch();
     return;
   }
   std::string event_name = android::base::StringPrintf("r%zx", event_number);
@@ -230,7 +230,7 @@
 }
 
 TEST(record_cmd, fp_callchain_sampling_warning_on_arm) {
-  if (GetBuildArch() != ARCH_ARM) {
+  if (GetTargetArch() != ARCH_ARM) {
     GTEST_LOG_(INFO) << "This test does nothing as it only tests on arm arch.";
     return;
   }
@@ -1018,12 +1018,12 @@
   TEST_REQUIRE_PMU_COUNTER();
   TEST_REQUIRE_HW_COUNTER();
   std::string event_string;
-  if (GetBuildArch() == ARCH_X86_64) {
+  if (GetTargetArch() == ARCH_X86_64) {
     event_string = "cpu/cpu-cycles/";
-  } else if (GetBuildArch() == ARCH_ARM64) {
+  } else if (GetTargetArch() == ARCH_ARM64) {
     event_string = "armv8_pmuv3/cpu_cycles/";
   } else {
-    GTEST_LOG_(INFO) << "Omit arch " << GetBuildArch();
+    GTEST_LOG_(INFO) << "Omit arch " << GetTargetArch();
     return;
   }
   TEST_IN_ROOT(ASSERT_TRUE(RunRecordCmd({"-e", event_string})));
diff --git a/simpleperf/cmd_report.cpp b/simpleperf/cmd_report.cpp
index 399e4d8..0480383 100644
--- a/simpleperf/cmd_report.cpp
+++ b/simpleperf/cmd_report.cpp
@@ -389,7 +389,7 @@
 "--no-demangle         Don't demangle symbol names.\n"
 "--no-show-ip          Don't show vaddr in file for unknown symbols.\n"
 "-o report_file_name   Set report file name, default is stdout.\n"
-"--percent-limit <percent>  Set min percentage shown when printing call graph.\n"
+"--percent-limit <percent>  Set min percentage in report entries and call graphs.\n"
 "--pids pid1,pid2,...  Report only for selected pids.\n"
 "--raw-period          Report period count instead of period percentage.\n"
 "--sort key1,key2,...  Select keys used to sort and print the report. The\n"
@@ -418,14 +418,14 @@
                 // clang-format on
                 ),
         record_filename_("perf.data"),
-        record_file_arch_(GetBuildArch()),
+        record_file_arch_(GetTargetArch()),
         use_branch_address_(false),
         system_wide_collection_(false),
         accumulate_callchain_(false),
         print_callgraph_(false),
         callgraph_show_callee_(false),
         callgraph_max_stack_(UINT32_MAX),
-        callgraph_percent_limit_(0),
+        percent_limit_(0),
         raw_period_(false),
         brief_callgraph_(true),
         trace_offcpu_(false),
@@ -466,7 +466,7 @@
   bool print_callgraph_;
   bool callgraph_show_callee_;
   uint32_t callgraph_max_stack_;
-  double callgraph_percent_limit_;
+  double percent_limit_;
   bool raw_period_;
   bool brief_callgraph_;
   bool trace_offcpu_;
@@ -597,7 +597,7 @@
   }
 
   options.PullStringValue("-o", &report_filename_);
-  if (!options.PullDoubleValue("--percent-limit", &callgraph_percent_limit_, 0)) {
+  if (!options.PullDoubleValue("--percent-limit", &percent_limit_, 0)) {
     return false;
   }
 
@@ -729,12 +729,19 @@
       if (has_vaddr_in_file_key) {
         displayer.AddExclusiveDisplayFunction(ReportCmdCallgraphDisplayerWithVaddrInFile());
       } else {
-        displayer.AddExclusiveDisplayFunction(ReportCmdCallgraphDisplayer(
-            callgraph_max_stack_, callgraph_percent_limit_, brief_callgraph_));
+        displayer.AddExclusiveDisplayFunction(
+            ReportCmdCallgraphDisplayer(callgraph_max_stack_, percent_limit_, brief_callgraph_));
       }
     }
   }
 
+  if (percent_limit_ != 0.0) {
+    displayer.SetFilterFunction([this](const SampleEntry* sample, const SampleTree* sample_tree) {
+      uint64_t total_period = sample->period + sample->accumulated_period;
+      return total_period >= sample_tree->total_period * percent_limit_ / 100.0;
+    });
+  }
+
   sample_tree_builder_options_.comparator = comparator;
   sample_tree_builder_options_.thread_tree = &thread_tree_;
 
diff --git a/simpleperf/cmd_report_test.cpp b/simpleperf/cmd_report_test.cpp
index fb79c93..10ec2d7 100644
--- a/simpleperf/cmd_report_test.cpp
+++ b/simpleperf/cmd_report_test.cpp
@@ -443,6 +443,17 @@
   ASSERT_NE(content.find("89.03"), std::string::npos);
 }
 
+TEST_F(ReportCommandTest, percent_limit_option) {
+  Report(PERF_DATA);
+  ASSERT_TRUE(success);
+  ASSERT_NE(content.find("7.70%"), std::string::npos);
+  ASSERT_NE(content.find("3.23%"), std::string::npos);
+  Report(PERF_DATA, {"--percent-limit", "3.24"});
+  ASSERT_TRUE(success);
+  ASSERT_NE(content.find("7.70%"), std::string::npos);
+  ASSERT_EQ(content.find("3.23%"), std::string::npos);
+}
+
 TEST_F(ReportCommandTest, kallsyms_option) {
   Report(PERF_DATA, {"--kallsyms", GetTestData("kallsyms")});
   ASSERT_TRUE(success);
diff --git a/simpleperf/cmd_stat.cpp b/simpleperf/cmd_stat.cpp
index 6a4fb1a..2479d8d 100644
--- a/simpleperf/cmd_stat.cpp
+++ b/simpleperf/cmd_stat.cpp
@@ -272,7 +272,7 @@
     event_name = it->second.first;
     rate_desc = it->second.second;
   }
-  if (event_name.empty() && (GetBuildArch() == ARCH_ARM || GetBuildArch() == ARCH_ARM64)) {
+  if (event_name.empty() && (GetTargetArch() == ARCH_ARM || GetTargetArch() == ARCH_ARM64)) {
     if (auto it = ARM_EVENT_RATE_MAP.find(miss_event_name); it != ARM_EVENT_RATE_MAP.end()) {
       event_name = it->second.first;
       rate_desc = it->second.second;
diff --git a/simpleperf/cmd_stat_test.cpp b/simpleperf/cmd_stat_test.cpp
index 3e68ac6..f9fad56 100644
--- a/simpleperf/cmd_stat_test.cpp
+++ b/simpleperf/cmd_stat_test.cpp
@@ -59,16 +59,16 @@
   TEST_REQUIRE_HW_COUNTER();
   OMIT_TEST_ON_NON_NATIVE_ABIS();
   size_t event_number;
-  if (GetBuildArch() == ARCH_ARM64 || GetBuildArch() == ARCH_ARM) {
+  if (GetTargetArch() == ARCH_ARM64 || GetTargetArch() == ARCH_ARM) {
     // As in D5.10.2 of the ARMv8 manual, ARM defines the event number space for PMU. part of the
     // space is for common event numbers (which will stay the same for all ARM chips), part of the
     // space is for implementation defined events. Here 0x08 is a common event for instructions.
     event_number = 0x08;
-  } else if (GetBuildArch() == ARCH_X86_32 || GetBuildArch() == ARCH_X86_64) {
+  } else if (GetTargetArch() == ARCH_X86_32 || GetTargetArch() == ARCH_X86_64) {
     // As in volume 3 chapter 19 of the Intel manual, 0x00c0 is the event number for instruction.
     event_number = 0x00c0;
   } else {
-    GTEST_LOG_(INFO) << "Omit arch " << GetBuildArch();
+    GTEST_LOG_(INFO) << "Omit arch " << GetTargetArch();
     return;
   }
   std::string event_name = android::base::StringPrintf("r%zx", event_number);
@@ -79,12 +79,12 @@
   TEST_REQUIRE_PMU_COUNTER();
   TEST_REQUIRE_HW_COUNTER();
   std::string event_string;
-  if (GetBuildArch() == ARCH_X86_64) {
+  if (GetTargetArch() == ARCH_X86_64) {
     event_string = "cpu/instructions/";
-  } else if (GetBuildArch() == ARCH_ARM64) {
+  } else if (GetTargetArch() == ARCH_ARM64) {
     event_string = "armv8_pmuv3/inst_retired/";
   } else {
-    GTEST_LOG_(INFO) << "Omit arch " << GetBuildArch();
+    GTEST_LOG_(INFO) << "Omit arch " << GetTargetArch();
     return;
   }
   TEST_IN_ROOT(ASSERT_TRUE(StatCmd()->Run({"-a", "-e", event_string, "sleep", "1"})));
diff --git a/simpleperf/doc/README.md b/simpleperf/doc/README.md
index 4bd0a57..39207c6 100644
--- a/simpleperf/doc/README.md
+++ b/simpleperf/doc/README.md
@@ -180,7 +180,7 @@
 
 ```sh
 # Collect binaries needed by perf.data in binary_cache/.
-$ python binary_cache_builder.py -lib NATIVE_LIB_DIR,...
+$ ./binary_cache_builder.py -lib NATIVE_LIB_DIR,...
 ```
 
 The NATIVE_LIB_DIRs passed in -lib option are the directories containing unstripped native
@@ -188,11 +188,11 @@
 in binary_cache/ for use when reporting.
 
 ```sh
-$ python report.py --symfs binary_cache
+$ ./report.py --symfs binary_cache
 
 # report_html.py searches binary_cache/ automatically, so you don't need to
 # pass it any argument.
-$ python report_html.py
+$ ./report_html.py
 ```
 
 ### Fix broken callchain stopped at C functions
@@ -215,7 +215,7 @@
 To use app_profiler.py:
 
 ```sh
-$ python app_profiler.py -lib <unstripped_dir>
+$ ./app_profiler.py -lib <unstripped_dir>
 ```
 
 ### Show annotated source code and disassembly
diff --git a/simpleperf/doc/android_application_profiling.md b/simpleperf/doc/android_application_profiling.md
index f0e06c0..a16f633 100644
--- a/simpleperf/doc/android_application_profiling.md
+++ b/simpleperf/doc/android_application_profiling.md
@@ -145,7 +145,7 @@
 # Android >= P.
 # -a option selects the Activity to profile.
 # -lib option gives the directory to find debug native libraries.
-$ python app_profiler.py -p com.example.simpleperf.simpleperfexamplewithnative --compile_java_code \
+$ ./app_profiler.py -p com.example.simpleperf.simpleperfexamplewithnative --compile_java_code \
     -a .MixActivity -lib path_of_SimpleperfExampleWithNative
 ```
 
@@ -157,7 +157,7 @@
 
 ```sh
 # Report perf.data in stdio interface.
-$ python report.py
+$ ./report.py
 Cmdline: /data/data/com.example.simpleperf.simpleperfexamplewithnative/simpleperf record ...
 Arch: arm64
 Event: task-clock:u (type 1, config 1)
@@ -175,10 +175,10 @@
 
 ```sh
 # Report perf.data in html interface.
-$ python report_html.py
+$ ./report_html.py
 
 # Add source code and disassembly. Change the path of source_dirs if it not correct.
-$ python report_html.py --add_source_code --source_dirs path_of_SimpleperfExampleWithNative \
+$ ./report_html.py --add_source_code --source_dirs path_of_SimpleperfExampleWithNative \
       --add_disassembly
 ```
 
@@ -191,22 +191,22 @@
 
 ```sh
 # Record dwarf based call graphs: add "-g" in the -r option.
-$ python app_profiler.py -p com.example.simpleperf.simpleperfexamplewithnative \
+$ ./app_profiler.py -p com.example.simpleperf.simpleperfexamplewithnative \
         -r "-e task-clock:u -f 1000 --duration 10 -g" -lib path_of_SimpleperfExampleWithNative
 
 # Record stack frame based call graphs: add "--call-graph fp" in the -r option.
-$ python app_profiler.py -p com.example.simpleperf.simpleperfexamplewithnative \
+$ ./app_profiler.py -p com.example.simpleperf.simpleperfexamplewithnative \
         -r "-e task-clock:u -f 1000 --duration 10 --call-graph fp" \
         -lib path_of_SimpleperfExampleWithNative
 
 # Report call graphs in stdio interface.
-$ python report.py -g
+$ ./report.py -g
 
 # Report call graphs in python Tk interface.
-$ python report.py -g --gui
+$ ./report.py -g --gui
 
 # Report call graphs in html interface.
-$ python report_html.py
+$ ./report_html.py
 
 # Report call graphs in flamegraphs.
 # On Windows, use inferno.bat instead of ./inferno.sh.
@@ -220,7 +220,7 @@
 and disassembly annotation. It is the recommended way to show reports.
 
 ```sh
-$ python report_html.py
+$ ./report_html.py
 ```
 
 ## Show flamegraph
@@ -239,7 +239,7 @@
 
 ```sh
 $ git clone https://github.com/brendangregg/FlameGraph.git
-$ python report_sample.py --symfs binary_cache >out.perf
+$ ./report_sample.py --symfs binary_cache >out.perf
 $ FlameGraph/stackcollapse-perf.pl out.perf >out.folded
 $ FlameGraph/flamegraph.pl out.folded >a.svg
 ```
@@ -262,7 +262,7 @@
 First check if trace-offcpu feature is supported on the device.
 
 ```sh
-$ python run_simpleperf_on_device.py list --show-features
+$ ./run_simpleperf_on_device.py list --show-features
 dwarf-based-call-graph
 trace-offcpu
 ```
@@ -270,10 +270,10 @@
 If trace-offcpu is supported, it will be shown in the feature list. Then we can try it.
 
 ```sh
-$ python app_profiler.py -p com.example.simpleperf.simpleperfexamplewithnative -a .SleepActivity \
+$ ./app_profiler.py -p com.example.simpleperf.simpleperfexamplewithnative -a .SleepActivity \
     -r "-g -e task-clock:u -f 1000 --duration 10 --trace-offcpu" \
     -lib path_of_SimpleperfExampleWithNative
-$ python report_html.py --add_disassembly --add_source_code \
+$ ./report_html.py --add_disassembly --add_source_code \
     --source_dirs path_of_SimpleperfExampleWithNative
 ```
 
@@ -283,12 +283,12 @@
 
 ```sh
 # Start simpleperf recording, then start the Activity to profile.
-$ python app_profiler.py -p com.example.simpleperf.simpleperfexamplewithnative -a .MainActivity
+$ ./app_profiler.py -p com.example.simpleperf.simpleperfexamplewithnative -a .MainActivity
 
 # We can also start the Activity on the device manually.
 # 1. Make sure the application isn't running or one of the recent apps.
 # 2. Start simpleperf recording.
-$ python app_profiler.py -p com.example.simpleperf.simpleperfexamplewithnative
+$ ./app_profiler.py -p com.example.simpleperf.simpleperfexamplewithnative
 # 3. Start the app manually on the device.
 ```
 
diff --git a/simpleperf/doc/android_platform_profiling.md b/simpleperf/doc/android_platform_profiling.md
index 21cf0f4..b199f2b 100644
--- a/simpleperf/doc/android_platform_profiling.md
+++ b/simpleperf/doc/android_platform_profiling.md
@@ -21,10 +21,10 @@
 ```sh
 # Record surfaceflinger process for 10 seconds with dwarf based call graph. More examples are in
 # scripts reference in the doc.
-$ python app_profiler.py -np surfaceflinger -r "-g --duration 10"
+$ ./app_profiler.py -np surfaceflinger -r "-g --duration 10"
 
 # Generate html report.
-$ python report_html.py
+$ ./report_html.py
 ```
 
 4. Since Android >= O has symbols for system libraries on device, we don't need to use unstripped
@@ -33,14 +33,14 @@
 
 ```sh
 # Doing recording with app_profiler.py or simpleperf on device, and generates perf.data on host.
-$ python app_profiler.py -np surfaceflinger -r "--call-graph fp --duration 10"
+$ ./app_profiler.py -np surfaceflinger -r "--call-graph fp --duration 10"
 
 # Collect unstripped binaries from $ANDROID_PRODUCT_OUT/symbols to binary_cache/.
-$ python binary_cache_builder.py -lib $ANDROID_PRODUCT_OUT/symbols
+$ ./binary_cache_builder.py -lib $ANDROID_PRODUCT_OUT/symbols
 
 # Report source code and disassembly. Disassembling all binaries is slow, so it's better to add
 # --binary_filter option to only disassemble selected binaries.
-$ python report_html.py --add_source_code --source_dirs $ANDROID_BUILD_TOP --add_disassembly \
+$ ./report_html.py --add_source_code --source_dirs $ANDROID_BUILD_TOP --add_disassembly \
   --binary_filter surfaceflinger.so
 ```
 
diff --git a/simpleperf/doc/executable_commands_reference.md b/simpleperf/doc/executable_commands_reference.md
index 6e696d4..5775eb1 100644
--- a/simpleperf/doc/executable_commands_reference.md
+++ b/simpleperf/doc/executable_commands_reference.md
@@ -502,7 +502,7 @@
 $ simpleperf record -g -p 11904 --duration 10 --trace-offcpu
 
 # Record with --trace-offcpu using app_profiler.py.
-$ python app_profiler.py -p com.example.simpleperf.simpleperfexamplewithnative -a .SleepActivity \
+$ ./app_profiler.py -p com.example.simpleperf.simpleperfexamplewithnative -a .SleepActivity \
     -r "-g -e task-clock:u -f 1000 --duration 10 --trace-offcpu"
 ```
 
@@ -510,9 +510,9 @@
 First we record without --trace-offcpu.
 
 ```sh
-$ python app_profiler.py -p com.example.simpleperf.simpleperfexamplewithnative -a .SleepActivity
+$ ./app_profiler.py -p com.example.simpleperf.simpleperfexamplewithnative -a .SleepActivity
 
-$ python report_html.py --add_disassembly --add_source_code --source_dirs ../demo
+$ ./report_html.py --add_disassembly --add_source_code --source_dirs ../demo
 ```
 
 The result is [here](./without_trace_offcpu.html).
@@ -520,10 +520,10 @@
 But if we add --trace-offcpu, the result changes.
 
 ```sh
-$ python app_profiler.py -p com.example.simpleperf.simpleperfexamplewithnative -a .SleepActivity \
+$ ./app_profiler.py -p com.example.simpleperf.simpleperfexamplewithnative -a .SleepActivity \
     -r "-g -e task-clock:u --trace-offcpu -f 1000 --duration 10"
 
-$ python report_html.py --add_disassembly --add_source_code --source_dirs ../demo
+$ ./report_html.py --add_disassembly --add_source_code --source_dirs ../demo
 ```
 
 The result is [here](./trace_offcpu.html).
diff --git a/simpleperf/doc/scripts_reference.md b/simpleperf/doc/scripts_reference.md
index 734c56d..cfeb22f 100644
--- a/simpleperf/doc/scripts_reference.md
+++ b/simpleperf/doc/scripts_reference.md
@@ -25,34 +25,34 @@
 
 ```sh
 # Record an Android application.
-$ python app_profiler.py -p com.example.simpleperf.simpleperfexamplewithnative
+$ ./app_profiler.py -p com.example.simpleperf.simpleperfexamplewithnative
 
 # Record an Android application with Java code compiled into native instructions.
-$ python app_profiler.py -p com.example.simpleperf.simpleperfexamplewithnative --compile_java_code
+$ ./app_profiler.py -p com.example.simpleperf.simpleperfexamplewithnative --compile_java_code
 
 # Record the launch of an Activity of an Android application.
-$ python app_profiler.py -p com.example.simpleperf.simpleperfexamplewithnative -a .SleepActivity
+$ ./app_profiler.py -p com.example.simpleperf.simpleperfexamplewithnative -a .SleepActivity
 
 # Record a native process.
-$ python app_profiler.py -np surfaceflinger
+$ ./app_profiler.py -np surfaceflinger
 
 # Record a native process given its pid.
-$ python app_profiler.py --pid 11324
+$ ./app_profiler.py --pid 11324
 
 # Record a command.
-$ python app_profiler.py -cmd \
+$ ./app_profiler.py -cmd \
     "dex2oat --dex-file=/data/local/tmp/app-profiling.apk --oat-file=/data/local/tmp/a.oat"
 
 # Record an Android application, and use -r to send custom options to the record command.
-$ python app_profiler.py -p com.example.simpleperf.simpleperfexamplewithnative \
+$ ./app_profiler.py -p com.example.simpleperf.simpleperfexamplewithnative \
     -r "-e cpu-clock -g --duration 30"
 
 # Record both on CPU time and off CPU time.
-$ python app_profiler.py -p com.example.simpleperf.simpleperfexamplewithnative \
+$ ./app_profiler.py -p com.example.simpleperf.simpleperfexamplewithnative \
     -r "-e task-clock -g -f 1000 --duration 10 --trace-offcpu"
 
 # Save profiling data in a custom file (like perf_custom.data) instead of perf.data.
-$ python app_profiler.py -p com.example.simpleperf.simpleperfexamplewithnative -o perf_custom.data
+$ ./app_profiler.py -p com.example.simpleperf.simpleperfexamplewithnative -o perf_custom.data
 ```
 
 ### Profile from launch of an application
@@ -64,7 +64,7 @@
 command with --app, then start the app. Below is an example.
 
 ```sh
-$ python run_simpleperf_on_device.py record
+$ ./run_simpleperf_on_device.py record
     --app com.example.simpleperf.simpleperfexamplewithnative \
     -g --duration 1 -o /data/local/tmp/perf.data
 # Start the app manually or using the `am` command.
@@ -74,7 +74,7 @@
 after recording has started.
 
 ```sh
-$ python app_profiler.py -p com.example.simpleperf.simpleperfexamplewithnative -a .MainActivity
+$ ./app_profiler.py -p com.example.simpleperf.simpleperfexamplewithnative -a .MainActivity
 ```
 
 ## api_profiler.py
@@ -91,11 +91,11 @@
 Below is an example.
 
 ```sh
-$ python run_simpleperf_without_usb_connection.py start \
+$ ./run_simpleperf_without_usb_connection.py start \
     -p com.example.simpleperf.simpleperfexamplewithnative
 # After the command finishes successfully, unplug the USB cable, run the
 # SimpleperfExampleWithNative app. After a few seconds, plug in the USB cable.
-$ python run_simpleperf_without_usb_connection.py stop
+$ ./run_simpleperf_without_usb_connection.py stop
 # It may take a while to stop recording. After that, the profiling data is collected in perf.data
 # on host.
 ```
@@ -117,11 +117,11 @@
 
 ```sh
 # Generate binary_cache for perf.data, by pulling binaries from the device.
-$ python binary_cache_builder.py
+$ ./binary_cache_builder.py
 
 # Generate binary_cache, by pulling binaries from the device and finding binaries in
 # SimpleperfExampleWithNative.
-$ python binary_cache_builder.py -lib path_of_SimpleperfExampleWithNative
+$ ./binary_cache_builder.py -lib path_of_SimpleperfExampleWithNative
 ```
 
 ## run_simpleperf_on_device.py
@@ -136,10 +136,10 @@
 
 ```sh
 # Report call graph
-$ python report.py -g
+$ ./report.py -g
 
 # Report call graph in a GUI window implemented by Python Tk.
-$ python report.py -g --gui
+$ ./report.py -g --gui
 ```
 
 ## report_html.py
@@ -152,27 +152,27 @@
 
 ```sh
 # Generate chart statistics, sample table and flamegraphs, based on perf.data.
-$ python report_html.py
+$ ./report_html.py
 
 # Add source code.
-$ python report_html.py --add_source_code --source_dirs path_of_SimpleperfExampleWithNative
+$ ./report_html.py --add_source_code --source_dirs path_of_SimpleperfExampleWithNative
 
 # Add disassembly.
-$ python report_html.py --add_disassembly
+$ ./report_html.py --add_disassembly
 
 # Adding disassembly for all binaries can cost a lot of time. So we can choose to only add
 # disassembly for selected binaries.
-$ python report_html.py --add_disassembly --binary_filter libgame.so
+$ ./report_html.py --add_disassembly --binary_filter libgame.so
 
 # report_html.py accepts more than one recording data file.
-$ python report_html.py -i perf1.data perf2.data
+$ ./report_html.py -i perf1.data perf2.data
 ```
 
 Below is an example of generating html profiling results for SimpleperfExampleWithNative.
 
 ```sh
-$ python app_profiler.py -p com.example.simpleperf.simpleperfexamplewithnative
-$ python report_html.py --add_source_code --source_dirs path_of_SimpleperfExampleWithNative \
+$ ./app_profiler.py -p com.example.simpleperf.simpleperfexamplewithnative
+$ ./report_html.py --add_source_code --source_dirs path_of_SimpleperfExampleWithNative \
     --add_disassembly
 ```
 
@@ -219,7 +219,7 @@
 
 ```sh
 # Convert perf.data in the current directory to pprof.proto format.
-$ python pprof_proto_generator.py
+$ ./pprof_proto_generator.py
 # Show report in pdf format.
 $ pprof -pdf pprof.profile
 
@@ -236,7 +236,7 @@
 
 ```sh
 # Convert perf.data in the current directory to a format used by FlameGraph.
-$ python report_sample.py --symfs binary_cache >out.perf
+$ ./report_sample.py --symfs binary_cache >out.perf
 $ git clone https://github.com/brendangregg/FlameGraph.git
 $ FlameGraph/stackcollapse-perf.pl out.perf >out.folded
 $ FlameGraph/flamegraph.pl out.folded >a.svg
diff --git a/simpleperf/dso.cpp b/simpleperf/dso.cpp
index 8d00238..815e8cb 100644
--- a/simpleperf/dso.cpp
+++ b/simpleperf/dso.cpp
@@ -229,6 +229,20 @@
   }
 }
 
+std::string_view Symbol::FunctionName() const {
+  // Name with signature is like "void ctep.v(cteo, ctgc, ctbn)".
+  std::string_view name = DemangledName();
+  auto brace_pos = name.find('(');
+  if (brace_pos != name.npos) {
+    name = name.substr(0, brace_pos);
+    auto space_pos = name.rfind(' ');
+    if (space_pos != name.npos) {
+      name = name.substr(space_pos + 1);
+    }
+  }
+  return name;
+}
+
 static bool CompareSymbolToAddr(const Symbol& s, uint64_t addr) {
   return s.addr < addr;
 }
diff --git a/simpleperf/dso.h b/simpleperf/dso.h
index 4bac3e0..866aadf 100644
--- a/simpleperf/dso.h
+++ b/simpleperf/dso.h
@@ -66,6 +66,8 @@
 
   const char* DemangledName() const;
   void SetDemangledName(std::string_view name) const;
+  // Return function name without signature.
+  std::string_view FunctionName() const;
 
   bool HasDumpId() const { return dump_id_ != UINT_MAX; }
 
diff --git a/simpleperf/dso_test.cpp b/simpleperf/dso_test.cpp
index 02958ce..1338c1c 100644
--- a/simpleperf/dso_test.cpp
+++ b/simpleperf/dso_test.cpp
@@ -301,3 +301,12 @@
   ASSERT_EQ(0x12345678, dso->IpToVaddrInFile(0x12345678, 0x0, 0x0));
   ASSERT_EQ(0x12345678, dso->IpToVaddrInFile(0x12345678, 0xe9201000, 0xa5000));
 }
+
+TEST(dso, FunctionName) {
+  Symbol symbol = Symbol("void ctep.v(cteo, ctgc, ctbn)", 0x0, 0x1);
+  ASSERT_EQ(symbol.FunctionName(), "ctep.v");
+  symbol = Symbol("ctep.v(cteo, ctgc, ctbn)", 0x0, 0x1);
+  ASSERT_EQ(symbol.FunctionName(), "ctep.v");
+  symbol = Symbol("ctep.v", 0x0, 0x1);
+  ASSERT_EQ(symbol.FunctionName(), "ctep.v");
+}
diff --git a/simpleperf/environment.cpp b/simpleperf/environment.cpp
index 01c9bc7..64c5708 100644
--- a/simpleperf/environment.cpp
+++ b/simpleperf/environment.cpp
@@ -426,14 +426,14 @@
   utsname uname_buf;
   if (TEMP_FAILURE_RETRY(uname(&uname_buf)) != 0) {
     PLOG(WARNING) << "uname() failed";
-    return GetBuildArch();
+    return GetTargetArch();
   }
   ArchType arch = GetArchType(uname_buf.machine);
 #endif
   if (arch != ARCH_UNSUPPORTED) {
     return arch;
   }
-  return GetBuildArch();
+  return GetTargetArch();
 }
 
 void PrepareVdsoFile() {
diff --git a/simpleperf/event_selection_set.cpp b/simpleperf/event_selection_set.cpp
index 82d41fd..1c9f532 100644
--- a/simpleperf/event_selection_set.cpp
+++ b/simpleperf/event_selection_set.cpp
@@ -58,7 +58,7 @@
   perf_event_attr attr = CreateDefaultPerfEventAttr(*type);
   attr.sample_type |= PERF_SAMPLE_CALLCHAIN | PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER;
   attr.exclude_callchain_user = 1;
-  attr.sample_regs_user = GetSupportedRegMask(GetBuildArch());
+  attr.sample_regs_user = GetSupportedRegMask(GetTargetArch());
   attr.sample_stack_user = 8192;
   return IsEventAttrSupported(attr, type->name);
 }
@@ -145,6 +145,15 @@
   return IsEventAttrSupported(attr, type->name);
 }
 
+bool IsHardwareEventSupported() {
+  const EventType* type = FindEventTypeByName("cpu-cycles");
+  if (type == nullptr) {
+    return false;
+  }
+  perf_event_attr attr = CreateDefaultPerfEventAttr(*type);
+  return IsEventAttrSupported(attr, type->name);
+}
+
 std::string AddrFilter::ToString() const {
   switch (type) {
     case FILE_RANGE:
diff --git a/simpleperf/event_selection_set.h b/simpleperf/event_selection_set.h
index 7d84b62..52b010f 100644
--- a/simpleperf/event_selection_set.h
+++ b/simpleperf/event_selection_set.h
@@ -222,6 +222,7 @@
 bool IsDumpingRegsForTracepointEventsSupported();
 bool IsSettingClockIdSupported();
 bool IsMmap2Supported();
+bool IsHardwareEventSupported();
 
 }  // namespace simpleperf
 
diff --git a/simpleperf/perf_regs.h b/simpleperf/perf_regs.h
index 17e28a6..569a469 100644
--- a/simpleperf/perf_regs.h
+++ b/simpleperf/perf_regs.h
@@ -45,7 +45,7 @@
   ARCH_UNSUPPORTED,
 };
 
-constexpr ArchType GetBuildArch() {
+constexpr ArchType GetTargetArch() {
 #if defined(__i386__)
   return ARCH_X86_32;
 #elif defined(__x86_64__)
diff --git a/simpleperf/record_test.cpp b/simpleperf/record_test.cpp
index 6d65a98..2c39a70 100644
--- a/simpleperf/record_test.cpp
+++ b/simpleperf/record_test.cpp
@@ -124,7 +124,7 @@
   SampleRecord r(event_attr, 0, 1, 2, 3, 4, 5, 6, {1, 5, 0, PERF_CONTEXT_USER, 6, 0}, {}, 0);
   r.header.misc = PERF_RECORD_MISC_KERNEL;
   r.AdjustCallChainGeneratedByKernel();
-  uint64_t adjustValue = (GetBuildArch() == ARCH_ARM || GetBuildArch() == ARCH_ARM64) ? 2 : 1;
+  uint64_t adjustValue = (GetTargetArch() == ARCH_ARM || GetTargetArch() == ARCH_ARM64) ? 2 : 1;
   SampleRecord expected(event_attr, 0, 1, 2, 3, 4, 5, 6,
                         {1, 5 - adjustValue, PERF_CONTEXT_KERNEL, PERF_CONTEXT_USER,
                          6 - adjustValue, PERF_CONTEXT_USER},
diff --git a/simpleperf/report_utils.cpp b/simpleperf/report_utils.cpp
index a946800..0cc1191 100644
--- a/simpleperf/report_utils.cpp
+++ b/simpleperf/report_utils.cpp
@@ -172,11 +172,12 @@
   CollectJavaMethods();
   for (size_t i = 0; i < callchain.size();) {
     auto& entry = callchain[i];
-    if (entry.dso->IsForJavaMethod() && entry.dso->type() == DSO_ELF_FILE) {
+    if (entry.execution_type == CallChainExecutionType::JIT_JVM_METHOD) {
       // This is a JIT java method, merge it with the interpreted java method having the same
       // name if possible. Otherwise, merge it with other JIT java methods having the same name
       // by assigning a common dso_name.
-      if (auto it = java_method_map_.find(entry.symbol->Name()); it != java_method_map_.end()) {
+      if (auto it = java_method_map_.find(std::string(entry.symbol->FunctionName()));
+          it != java_method_map_.end()) {
         entry.dso = it->second.dso;
         entry.symbol = it->second.symbol;
         // Not enough info to map an offset in a JIT method to an offset in a dex file. So just
@@ -214,13 +215,29 @@
   }
 }
 
+static bool IsJavaEntry(const CallChainReportEntry& entry) {
+  static const char* COMPILED_JAVA_FILE_SUFFIXES[] = {".odex", ".oat", ".dex"};
+  if (entry.execution_type == CallChainExecutionType::JIT_JVM_METHOD ||
+      entry.execution_type == CallChainExecutionType::INTERPRETED_JVM_METHOD) {
+    return true;
+  }
+  if (entry.execution_type == CallChainExecutionType::NATIVE_METHOD) {
+    const std::string& path = entry.dso->Path();
+    for (const char* suffix : COMPILED_JAVA_FILE_SUFFIXES) {
+      if (android::base::EndsWith(path, suffix)) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
 void CallChainReportBuilder::DeObfuscateJavaMethods(std::vector<CallChainReportEntry>& callchain) {
   for (auto& entry : callchain) {
-    if (entry.execution_type != CallChainExecutionType::JIT_JVM_METHOD &&
-        entry.execution_type != CallChainExecutionType::INTERPRETED_JVM_METHOD) {
+    if (!IsJavaEntry(entry)) {
       continue;
     }
-    std::string_view name = entry.symbol->DemangledName();
+    std::string_view name = entry.symbol->FunctionName();
     if (auto split_pos = name.rfind('.'); split_pos != name.npos) {
       std::string obfuscated_classname(name.substr(0, split_pos));
       if (auto it = proguard_class_map_.find(obfuscated_classname);
diff --git a/simpleperf/report_utils_test.cpp b/simpleperf/report_utils_test.cpp
index cc24635..1745a85 100644
--- a/simpleperf/report_utils_test.cpp
+++ b/simpleperf/report_utils_test.cpp
@@ -35,48 +35,36 @@
     thread = thread_tree.FindThread(1);
 
     // Add symbol info for the native library.
-    FileFeature file;
-    file.path = fake_native_lib_path;
-    file.type = DSO_ELF_FILE;
-    file.min_vaddr = file.file_offset_of_min_vaddr = 0;
-    file.symbols = {
-        Symbol("native_func1", 0x0, 0x100),
-        Symbol("art_jni_trampoline", 0x100, 0x100),
-    };
-    thread_tree.AddDsoInfo(file);
+    SetSymbols(fake_native_lib_path, DSO_ELF_FILE,
+               {
+                   Symbol("native_func1", 0x0, 0x100),
+                   Symbol("art_jni_trampoline", 0x100, 0x100),
+               });
 
     // Add symbol info for the interpreter library.
-    file.path = fake_interpreter_path;
-    file.type = DSO_ELF_FILE;
-    file.min_vaddr = file.file_offset_of_min_vaddr = 0;
-    file.symbols = {
-        Symbol("art_func1", 0x0, 0x100),
-        Symbol("art_func2", 0x100, 0x100),
-        Symbol("_ZN3artL13Method_invokeEP7_JNIEnvP8_jobjectS3_P13_jobjectArray", 0x200, 0x100),
-    };
-    thread_tree.AddDsoInfo(file);
+    SetSymbols(
+        fake_interpreter_path, DSO_ELF_FILE,
+        {
+            Symbol("art_func1", 0x0, 0x100),
+            Symbol("art_func2", 0x100, 0x100),
+            Symbol("_ZN3artL13Method_invokeEP7_JNIEnvP8_jobjectS3_P13_jobjectArray", 0x200, 0x100),
+        });
 
     // Add symbol info for the dex file.
-    file.path = fake_dex_file_path;
-    file.type = DSO_DEX_FILE;
-    file.min_vaddr = file.file_offset_of_min_vaddr = 0;
-    file.symbols = {
-        Symbol("java_method1", 0x0, 0x100),
-        Symbol("java_method2", 0x100, 0x100),
-        Symbol("obfuscated_class.obfuscated_java_method", 0x200, 0x100),
-    };
-    thread_tree.AddDsoInfo(file);
+    SetSymbols(fake_dex_file_path, DSO_DEX_FILE,
+               {
+                   Symbol("java_method1", 0x0, 0x100),
+                   Symbol("java_method2", 0x100, 0x100),
+                   Symbol("obfuscated_class.obfuscated_java_method", 0x200, 0x100),
+               });
 
     // Add symbol info for the jit cache.
-    file.path = fake_jit_cache_path;
-    file.type = DSO_ELF_FILE;
-    file.min_vaddr = file.file_offset_of_min_vaddr = 0;
-    file.symbols = {
-        Symbol("java_method2", 0x3000, 0x100),
-        Symbol("java_method3", 0x3100, 0x100),
-        Symbol("obfuscated_class.obfuscated_java_method2", 0x3200, 0x100),
-    };
-    thread_tree.AddDsoInfo(file);
+    SetSymbols(fake_jit_cache_path, DSO_ELF_FILE,
+               {
+                   Symbol("java_method2", 0x3000, 0x100),
+                   Symbol("java_method3", 0x3100, 0x100),
+                   Symbol("obfuscated_class.obfuscated_java_method2", 0x3200, 0x100),
+               });
 
     // Add map layout for libraries used in the thread:
     // 0x0000 - 0x1000 is mapped to the native library.
@@ -90,6 +78,15 @@
                              map_flags::PROT_JIT_SYMFILE_MAP);
   }
 
+  void SetSymbols(const std::string& path, DsoType dso_type, const std::vector<Symbol>& symbols) {
+    FileFeature file;
+    file.path = path;
+    file.type = dso_type;
+    file.min_vaddr = file.file_offset_of_min_vaddr = 0;
+    file.symbols = symbols;
+    thread_tree.AddDsoInfo(file);
+  }
+
   ThreadTree thread_tree;
   const ThreadEntry* thread;
   const std::string fake_native_lib_path = "fake_dir/fake_native_lib.so";
@@ -310,3 +307,108 @@
   ASSERT_EQ(entries[1].vaddr_in_file, 0x3200);
   ASSERT_EQ(entries[1].execution_type, CallChainExecutionType::JIT_JVM_METHOD);
 }
+
+TEST_F(CallChainReportBuilderTest, add_proguard_mapping_file_for_jit_method_with_signature) {
+  std::vector<uint64_t> fake_ips = {
+      0x3200,  // 3200,  // void ctep.v(cteo, ctgc, ctbn)
+  };
+  SetSymbols(fake_jit_cache_path, DSO_ELF_FILE,
+             {Symbol("void ctep.v(cteo, ctgc, ctbn)", 0x3200, 0x100)});
+  CallChainReportBuilder builder(thread_tree);
+  TemporaryFile tmpfile;
+  close(tmpfile.release());
+  ASSERT_TRUE(android::base::WriteStringToFile(
+      "android.support.v4.app.RemoteActionCompatParcelizer -> ctep:\n"
+      "    13:13:androidx.core.app.RemoteActionCompat read(androidx.versionedparcelable.Versioned"
+      "Parcel) -> v\n",
+      tmpfile.path));
+  builder.AddProguardMappingFile(tmpfile.path);
+  std::vector<CallChainReportEntry> entries = builder.Build(thread, fake_ips, 0);
+  ASSERT_EQ(entries.size(), 1);
+  ASSERT_EQ(entries[0].ip, 0x3200);
+  ASSERT_STREQ(entries[0].symbol->DemangledName(),
+               "android.support.v4.app.RemoteActionCompatParcelizer.read");
+  ASSERT_EQ(entries[0].dso->Path(), fake_jit_cache_path);
+  ASSERT_EQ(entries[0].vaddr_in_file, 0x3200);
+  ASSERT_EQ(entries[0].execution_type, CallChainExecutionType::JIT_JVM_METHOD);
+}
+
+TEST_F(CallChainReportBuilderTest,
+       add_proguard_mapping_file_for_compiled_java_method_with_signature) {
+  TemporaryFile tmpfile;
+  close(tmpfile.release());
+  ASSERT_TRUE(android::base::WriteStringToFile(
+      "android.support.v4.app.RemoteActionCompatParcelizer -> ctep:\n"
+      "    13:13:androidx.core.app.RemoteActionCompat read(androidx.versionedparcelable.Versioned"
+      "Parcel) -> v\n",
+      tmpfile.path));
+
+  for (const char* suffix : {".odex", ".oat", ".dex"}) {
+    std::string compiled_java_path = "compiled_java" + std::string(suffix);
+    SetSymbols(compiled_java_path, DSO_ELF_FILE,
+               {Symbol("void ctep.v(cteo, ctgc, ctbn)", 0x0, 0x100)});
+    thread_tree.AddThreadMap(1, 1, 0x4000, 0x1000, 0x0, compiled_java_path);
+    std::vector<uint64_t> fake_ips = {
+        0x4000,  // 4000,  // void ctep.v(cteo, ctgc, ctbn)
+    };
+
+    CallChainReportBuilder builder(thread_tree);
+    builder.AddProguardMappingFile(tmpfile.path);
+    std::vector<CallChainReportEntry> entries = builder.Build(thread, fake_ips, 0);
+    ASSERT_EQ(entries.size(), 1);
+    ASSERT_EQ(entries[0].ip, 0x4000);
+    ASSERT_STREQ(entries[0].symbol->DemangledName(),
+                 "android.support.v4.app.RemoteActionCompatParcelizer.read");
+    ASSERT_EQ(entries[0].dso->Path(), compiled_java_path);
+    ASSERT_EQ(entries[0].vaddr_in_file, 0x0);
+    ASSERT_EQ(entries[0].execution_type, CallChainExecutionType::NATIVE_METHOD);
+  }
+}
+
+TEST_F(CallChainReportBuilderTest, convert_jit_frame_for_jit_method_with_signature) {
+  std::vector<uint64_t> fake_ips = {
+      0x2200,  // 2200,  // ctep.v
+      0x3200,  // 3200,  // void ctep.v(cteo, ctgc, ctbn)
+  };
+  SetSymbols(fake_dex_file_path, DSO_DEX_FILE, {Symbol("ctep.v", 0x200, 0x100)});
+  SetSymbols(fake_jit_cache_path, DSO_ELF_FILE,
+             {Symbol("void ctep.v(cteo, ctgc, ctbn)", 0x3200, 0x100)});
+  CallChainReportBuilder builder(thread_tree);
+  // Test if we can convert jit method with signature.
+  std::vector<CallChainReportEntry> entries = builder.Build(thread, fake_ips, 0);
+  ASSERT_EQ(entries.size(), 2);
+  ASSERT_EQ(entries[0].ip, 0x2200);
+  ASSERT_STREQ(entries[0].symbol->DemangledName(), "ctep.v");
+  ASSERT_EQ(entries[0].dso->Path(), fake_dex_file_path);
+  ASSERT_EQ(entries[0].vaddr_in_file, 0x200);
+  ASSERT_EQ(entries[0].execution_type, CallChainExecutionType::INTERPRETED_JVM_METHOD);
+  ASSERT_EQ(entries[1].ip, 0x3200);
+  ASSERT_STREQ(entries[1].symbol->DemangledName(), "ctep.v");
+  ASSERT_EQ(entries[1].dso->Path(), fake_dex_file_path);
+  ASSERT_EQ(entries[1].vaddr_in_file, 0x200);
+  ASSERT_EQ(entries[1].execution_type, CallChainExecutionType::JIT_JVM_METHOD);
+
+  // Test adding proguard mapping file.
+  TemporaryFile tmpfile;
+  close(tmpfile.release());
+  ASSERT_TRUE(android::base::WriteStringToFile(
+      "android.support.v4.app.RemoteActionCompatParcelizer -> ctep:\n"
+      "    13:13:androidx.core.app.RemoteActionCompat read(androidx.versionedparcelable.Versioned"
+      "Parcel) -> v\n",
+      tmpfile.path));
+  builder.AddProguardMappingFile(tmpfile.path);
+  entries = builder.Build(thread, fake_ips, 0);
+  ASSERT_EQ(entries.size(), 2);
+  ASSERT_EQ(entries[0].ip, 0x2200);
+  ASSERT_STREQ(entries[0].symbol->DemangledName(),
+               "android.support.v4.app.RemoteActionCompatParcelizer.read");
+  ASSERT_EQ(entries[0].dso->Path(), fake_dex_file_path);
+  ASSERT_EQ(entries[0].vaddr_in_file, 0x200);
+  ASSERT_EQ(entries[0].execution_type, CallChainExecutionType::INTERPRETED_JVM_METHOD);
+  ASSERT_EQ(entries[1].ip, 0x3200);
+  ASSERT_STREQ(entries[1].symbol->DemangledName(),
+               "android.support.v4.app.RemoteActionCompatParcelizer.read");
+  ASSERT_EQ(entries[1].dso->Path(), fake_dex_file_path);
+  ASSERT_EQ(entries[1].vaddr_in_file, 0x200);
+  ASSERT_EQ(entries[1].execution_type, CallChainExecutionType::JIT_JVM_METHOD);
+}
diff --git a/simpleperf/scripts/annotate.py b/simpleperf/scripts/annotate.py
index 7f2392e..27d67a8 100755
--- a/simpleperf/scripts/annotate.py
+++ b/simpleperf/scripts/annotate.py
@@ -18,16 +18,15 @@
 """annotate.py: annotate source files based on perf.data.
 """
 
-
-import argparse
+import logging
 import os
 import os.path
 import shutil
 
 from simpleperf_report_lib import ReportLib
 from simpleperf_utils import (
-    Addr2Nearestline, BinaryFinder, extant_dir, flatten_arg_list, is_windows, log_exit, log_info,
-    log_warning, ReadElf, SourceFileSearcher)
+    Addr2Nearestline, BaseArgumentParser, BinaryFinder, extant_dir, flatten_arg_list, is_windows,
+    log_exit, ReadElf, SourceFileSearcher)
 
 
 class SourceLine(object):
@@ -386,7 +385,7 @@
         for key in self.file_periods:
             from_path = key
             if not os.path.isfile(from_path):
-                log_warning("can't find source file for path %s" % from_path)
+                logging.warning("can't find source file for path %s" % from_path)
                 continue
             if from_path.startswith('/'):
                 to_path = os.path.join(dest_dir, from_path[1:])
@@ -406,7 +405,7 @@
           3. For each line not hitting the same line as functions, show
              line periods.
         """
-        log_info('annotate file %s' % from_path)
+        logging.info('annotate file %s' % from_path)
         with open(from_path, 'r') as rf:
             lines = rf.readlines()
 
@@ -446,7 +445,7 @@
 
 
 def main():
-    parser = argparse.ArgumentParser(description="""
+    parser = BaseArgumentParser(description="""
         Annotate source files based on profiling data. It reads line information from binary_cache
         generated by app_profiler.py or binary_cache_builder.py, and generate annotated source
         files in annotated_files directory.""")
@@ -478,7 +477,7 @@
 
     annotator = SourceFileAnnotator(config)
     annotator.annotate()
-    log_info('annotate finish successfully, please check result in annotated_files/.')
+    logging.info('annotate finish successfully, please check result in annotated_files/.')
 
 
 if __name__ == '__main__':
diff --git a/simpleperf/scripts/api_profiler.py b/simpleperf/scripts/api_profiler.py
index d39b8e3..18b4bda 100755
--- a/simpleperf/scripts/api_profiler.py
+++ b/simpleperf/scripts/api_profiler.py
@@ -26,14 +26,14 @@
     4. Run `api_profiler.py collect` to collect recording data on host.
 """
 
-from __future__ import print_function
-import argparse
+import logging
 import os
 import os.path
 import shutil
 import zipfile
 
-from simpleperf_utils import AdbHelper, get_target_binary_path, log_exit, log_info, remove
+from simpleperf_utils import (AdbHelper, BaseArgumentParser,
+                              get_target_binary_path, log_exit, remove)
 
 
 def prepare_recording(args):
@@ -84,24 +84,17 @@
     zip_file_path = os.path.join(args.out_dir, 'simpleperf_data.zip')
     with zipfile.ZipFile(zip_file_path, 'r') as zip_fh:
         names = zip_fh.namelist()
-        log_info('There are %d recording data files.' % len(names))
+        logging.info('There are %d recording data files.' % len(names))
         for name in names:
-            log_info('recording file: %s' % os.path.join(args.out_dir, name))
+            logging.info('recording file: %s' % os.path.join(args.out_dir, name))
             zip_fh.extract(name, args.out_dir)
     remove(zip_file_path)
 
 
-class ArgumentHelpFormatter(argparse.ArgumentDefaultsHelpFormatter,
-                            argparse.RawDescriptionHelpFormatter):
-    pass
-
-
 def main():
-    parser = argparse.ArgumentParser(description=__doc__,
-                                     formatter_class=ArgumentHelpFormatter)
+    parser = BaseArgumentParser(description=__doc__)
     subparsers = parser.add_subparsers()
-    prepare_parser = subparsers.add_parser('prepare', help='Prepare recording on device.',
-                                           formatter_class=ArgumentHelpFormatter)
+    prepare_parser = subparsers.add_parser('prepare', help='Prepare recording on device.')
     prepare_parser.add_argument('--max-sample-rate', nargs=1, type=int, default=[100000], help="""
                                 Set max sample rate (only on Android >= Q).""")
     prepare_parser.add_argument('--max-cpu-percent', nargs=1, type=int, default=[25], help="""
@@ -111,8 +104,7 @@
                                 Set max kernel buffer size for recording (only on Android >= Q).
                                 """)
     prepare_parser.set_defaults(func=prepare_recording)
-    collect_parser = subparsers.add_parser('collect', help='Collect recording data.',
-                                           formatter_class=ArgumentHelpFormatter)
+    collect_parser = subparsers.add_parser('collect', help='Collect recording data.')
     collect_parser.add_argument('-p', '--app', nargs=1, required=True, help="""
                                 The app package name of the app profiled.""")
     collect_parser.add_argument('-o', '--out-dir', default='simpleperf_data', help="""
diff --git a/simpleperf/scripts/app_profiler.py b/simpleperf/scripts/app_profiler.py
index 38b3602..808e3e9 100755
--- a/simpleperf/scripts/app_profiler.py
+++ b/simpleperf/scripts/app_profiler.py
@@ -21,20 +21,22 @@
     and pulls profiling data and related binaries on host.
 """
 
-from __future__ import print_function
-import argparse
+import logging
 import os
 import os.path
+import re
 import subprocess
 import sys
 import time
 
 from simpleperf_utils import (
-    AdbHelper, bytes_to_str, extant_dir, get_script_dir, get_target_binary_path, log_debug,
-    log_info, log_exit, ReadElf, remove, set_log_level, str_to_bytes)
+    AdbHelper, BaseArgumentParser, bytes_to_str, extant_dir, get_script_dir, get_target_binary_path,
+    log_exit, ReadElf, remove, str_to_bytes)
 
 NATIVE_LIBS_DIR_ON_DEVICE = '/data/local/tmp/native_libs/'
 
+SHELL_PS_UID_PATTERN = re.compile(r'USER.*\nu(\d+)_.*')
+
 
 class HostElfEntry(object):
     """Represent a native lib on host in NativeLibDownloader."""
@@ -201,14 +203,14 @@
         self.record_subproc = None
 
     def profile(self):
-        log_info('prepare profiling')
+        logging.info('prepare profiling')
         self.prepare()
-        log_info('start profiling')
+        logging.info('start profiling')
         self.start()
         self.wait_profiling()
-        log_info('collect profiling data')
+        logging.info('collect profiling data')
         self.collect_profiling_data()
-        log_info('profiling is finished.')
+        logging.info('profiling is finished.')
 
     def prepare(self):
         """Prepare recording. """
@@ -239,7 +241,7 @@
         args += ['--log', self.args.log]
         args += target_args
         adb_args = [self.adb.adb_path, 'shell'] + args
-        log_info('run adb cmd: %s' % adb_args)
+        logging.info('run adb cmd: %s' % adb_args)
         self.record_subproc = subprocess.Popen(adb_args)
 
     def wait_profiling(self):
@@ -253,7 +255,7 @@
             # Don't check return value of record_subproc. Because record_subproc also
             # receives Ctrl-C, and always returns non-zero.
             returncode = 0
-        log_debug('profiling result [%s]' % (returncode == 0))
+        logging.debug('profiling result [%s]' % (returncode == 0))
         if returncode != 0:
             log_exit('Failed to record profiling data.')
 
@@ -310,15 +312,34 @@
                 pid = self.find_app_process()
                 if not pid:
                     break
+                count += 1
+                if count >= 5:
+                    logging.info('unable to kill %s, skipping...' % self.args.app)
+                    break
                 # When testing on Android N, `am force-stop` sometimes can't kill
                 # com.example.simpleperf.simpleperfexampleofkotlin. So use kill when this happens.
-                count += 1
                 if count >= 3:
                     self.run_in_app_dir(['kill', '-9', str(pid)])
 
     def find_app_process(self):
-        result, output = self.adb.run_and_return_output(['shell', 'pidof', self.args.app])
-        return int(output) if result else None
+        result, pidof_output = self.adb.run_and_return_output(
+            ['shell', 'pidof', self.args.app])
+        if not result:
+            return None
+        result, current_user = self.adb.run_and_return_output(
+            ['shell', 'am', 'get-current-user'])
+        if not result:
+            return None
+        pids = pidof_output.split()
+        for pid in pids:
+            result, ps_output = self.adb.run_and_return_output(
+                ['shell', 'ps', '-p', pid, '-o', 'USER'])
+            if not result:
+              return None
+            uid = SHELL_PS_UID_PATTERN.search(ps_output).group(1)
+            if uid == current_user.strip():
+              return int(pid)
+        return None
 
     def run_in_app_dir(self, args):
         if self.is_root_device:
@@ -357,7 +378,7 @@
     """Profile a native program."""
 
     def start(self):
-        log_info('Waiting for native process %s' % self.args.native_program)
+        logging.info('Waiting for native process %s' % self.args.native_program)
         while True:
             (result, pid) = self.adb.run_and_return_output(['shell', 'pidof',
                                                             self.args.native_program])
@@ -398,8 +419,7 @@
 
 
 def main():
-    parser = argparse.ArgumentParser(description=__doc__,
-                                     formatter_class=argparse.RawDescriptionHelpFormatter)
+    parser = BaseArgumentParser(description=__doc__)
 
     target_group = parser.add_argument_group(title='Select profiling target'
                                              ).add_mutually_exclusive_group(required=True)
@@ -470,15 +490,12 @@
                              help="""Force adb to run in non root mode. By default, app_profiler.py
                                      will try to switch to root mode to be able to profile released
                                      Android apps.""")
-    other_group.add_argument(
-        '--log', choices=['debug', 'info', 'warning'], default='info', help='set log level')
 
     def check_args(args):
         if (not args.app) and (args.compile_java_code or args.activity or args.test):
             log_exit('--compile_java_code, -a, -t can only be used when profiling an Android app.')
 
     args = parser.parse_args()
-    set_log_level(args.log)
     check_args(args)
     if args.app:
         profiler = AppProfiler(args)
diff --git a/simpleperf/scripts/bin/android/arm/simpleperf b/simpleperf/scripts/bin/android/arm/simpleperf
index 81927ac..5be3dc1 100755
--- a/simpleperf/scripts/bin/android/arm/simpleperf
+++ b/simpleperf/scripts/bin/android/arm/simpleperf
Binary files differ
diff --git a/simpleperf/scripts/bin/android/arm64/simpleperf b/simpleperf/scripts/bin/android/arm64/simpleperf
index f915514..63fe413 100755
--- a/simpleperf/scripts/bin/android/arm64/simpleperf
+++ b/simpleperf/scripts/bin/android/arm64/simpleperf
Binary files differ
diff --git a/simpleperf/scripts/bin/android/x86/simpleperf b/simpleperf/scripts/bin/android/x86/simpleperf
index 850e75c..156c5e0 100755
--- a/simpleperf/scripts/bin/android/x86/simpleperf
+++ b/simpleperf/scripts/bin/android/x86/simpleperf
Binary files differ
diff --git a/simpleperf/scripts/bin/android/x86_64/simpleperf b/simpleperf/scripts/bin/android/x86_64/simpleperf
index e23b9e1..f671b68 100755
--- a/simpleperf/scripts/bin/android/x86_64/simpleperf
+++ b/simpleperf/scripts/bin/android/x86_64/simpleperf
Binary files differ
diff --git a/simpleperf/scripts/bin/darwin/x86_64/libsimpleperf_report.dylib b/simpleperf/scripts/bin/darwin/x86_64/libsimpleperf_report.dylib
index 276de8c..6b217c3 100755
--- a/simpleperf/scripts/bin/darwin/x86_64/libsimpleperf_report.dylib
+++ b/simpleperf/scripts/bin/darwin/x86_64/libsimpleperf_report.dylib
Binary files differ
diff --git a/simpleperf/scripts/bin/darwin/x86_64/simpleperf b/simpleperf/scripts/bin/darwin/x86_64/simpleperf
index b17efa5..56c5df8 100755
--- a/simpleperf/scripts/bin/darwin/x86_64/simpleperf
+++ b/simpleperf/scripts/bin/darwin/x86_64/simpleperf
Binary files differ
diff --git a/simpleperf/scripts/bin/linux/x86_64/libsimpleperf_report.so b/simpleperf/scripts/bin/linux/x86_64/libsimpleperf_report.so
index 6131000..9ec685a 100755
--- a/simpleperf/scripts/bin/linux/x86_64/libsimpleperf_report.so
+++ b/simpleperf/scripts/bin/linux/x86_64/libsimpleperf_report.so
Binary files differ
diff --git a/simpleperf/scripts/bin/linux/x86_64/simpleperf b/simpleperf/scripts/bin/linux/x86_64/simpleperf
index 15a3b4d..08deae9 100755
--- a/simpleperf/scripts/bin/linux/x86_64/simpleperf
+++ b/simpleperf/scripts/bin/linux/x86_64/simpleperf
Binary files differ
diff --git a/simpleperf/scripts/bin/windows/x86_64/libsimpleperf_report.dll b/simpleperf/scripts/bin/windows/x86_64/libsimpleperf_report.dll
index 9fef11d..f2bdea8 100755
--- a/simpleperf/scripts/bin/windows/x86_64/libsimpleperf_report.dll
+++ b/simpleperf/scripts/bin/windows/x86_64/libsimpleperf_report.dll
Binary files differ
diff --git a/simpleperf/scripts/bin/windows/x86_64/simpleperf.exe b/simpleperf/scripts/bin/windows/x86_64/simpleperf.exe
index d44b082..a7bc8db 100755
--- a/simpleperf/scripts/bin/windows/x86_64/simpleperf.exe
+++ b/simpleperf/scripts/bin/windows/x86_64/simpleperf.exe
Binary files differ
diff --git a/simpleperf/scripts/binary_cache_builder.py b/simpleperf/scripts/binary_cache_builder.py
index 362b894..4322e2c 100755
--- a/simpleperf/scripts/binary_cache_builder.py
+++ b/simpleperf/scripts/binary_cache_builder.py
@@ -19,9 +19,8 @@
     it, and put them in binary_cache.
 """
 
-from __future__ import print_function
-import argparse
 from dataclasses import dataclass
+import logging
 import os
 import os.path
 from pathlib import Path
@@ -29,8 +28,9 @@
 from typing import List, Optional, Union
 
 from simpleperf_report_lib import ReportLib
-from simpleperf_utils import (AdbHelper, extant_dir, extant_file, flatten_arg_list, log_info,
-                              log_warning, ReadElf, set_log_level, str_to_bytes)
+from simpleperf_utils import (
+    AdbHelper, BaseArgumentParser, extant_dir, extant_file, flatten_arg_list,
+    ReadElf, str_to_bytes)
 
 
 def is_jit_symfile(dso_name):
@@ -130,7 +130,7 @@
         target_dir = os.path.dirname(target_file)
         if not os.path.isdir(target_dir):
             os.makedirs(target_dir)
-        log_info('copy to binary_cache: %s to %s' % (from_path, target_file))
+        logging.info('copy to binary_cache: %s to %s' % (from_path, target_file))
         shutil.copy(from_path, target_file)
 
     def _need_to_copy(self, source_file, target_file, expected_build_id):
@@ -178,10 +178,10 @@
                 os.makedirs(target_dir)
             if os.path.isfile(binary_cache_file):
                 os.remove(binary_cache_file)
-            log_info('pull file to binary_cache: %s to %s' % (binary, binary_cache_file))
+            logging.info('pull file to binary_cache: %s to %s' % (binary, binary_cache_file))
             self._pull_file_from_device(binary, binary_cache_file)
         else:
-            log_info('use current file in binary_cache: %s' % binary_cache_file)
+            logging.info('use current file in binary_cache: %s' % binary_cache_file)
 
     def _read_build_id(self, file_path):
         """read build id of a binary on host."""
@@ -197,7 +197,7 @@
                 self.adb.run(['pull', '/data/local/tmp/' + filename, host_path])):
             self.adb.run(['shell', 'rm', '/data/local/tmp/' + filename])
             return True
-        log_warning('failed to pull %s from device' % device_path)
+        logging.warning('failed to pull %s from device' % device_path)
         return False
 
     def _pull_kernel_symbols(self):
@@ -225,7 +225,7 @@
 
 
 def main():
-    parser = argparse.ArgumentParser(description="""
+    parser = BaseArgumentParser(description="""
         Pull binaries needed by perf.data from device to binary_cache directory.""")
     parser.add_argument('-i', '--perf_data_path', default='perf.data', type=extant_file, help="""
         The path of profiling data.""")
@@ -234,10 +234,7 @@
     parser.add_argument('--disable_adb_root', action='store_true', help="""
         Force adb to run in non root mode.""")
     parser.add_argument('--ndk_path', nargs=1, help='Find tools in the ndk path.')
-    parser.add_argument(
-        '--log', choices=['debug', 'info', 'warning'], default='info', help='set log level')
     args = parser.parse_args()
-    set_log_level(args.log)
     ndk_path = None if not args.ndk_path else args.ndk_path[0]
     builder = BinaryCacheBuilder(ndk_path, args.disable_adb_root)
     symfs_dirs = flatten_arg_list(args.native_lib_dir)
diff --git a/simpleperf/scripts/debug_unwind_reporter.py b/simpleperf/scripts/debug_unwind_reporter.py
index 238c9ac..5c6a0cd 100755
--- a/simpleperf/scripts/debug_unwind_reporter.py
+++ b/simpleperf/scripts/debug_unwind_reporter.py
@@ -37,7 +37,7 @@
 
 import argparse
 from collections import Counter, defaultdict
-from simpleperf_utils import ArgParseFormatter
+from simpleperf_utils import BaseArgumentParser
 from texttable import Texttable
 from typing import Dict, Iterator, List
 
@@ -229,7 +229,7 @@
 
 
 def get_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser(description=__doc__, formatter_class=ArgParseFormatter)
+    parser = BaseArgumentParser(description=__doc__)
     parser.add_argument('-i', '--input-file', required=True,
                         help='report file generated by debug-unwind cmd')
     parser.add_argument(
diff --git a/simpleperf/scripts/inferno/inferno.py b/simpleperf/scripts/inferno/inferno.py
index 3195494..0d0bfc3 100755
--- a/simpleperf/scripts/inferno/inferno.py
+++ b/simpleperf/scripts/inferno/inferno.py
@@ -32,6 +32,7 @@
 
 import argparse
 import datetime
+import logging
 import os
 import subprocess
 import sys
@@ -41,7 +42,7 @@
 SCRIPTS_PATH = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
 sys.path.append(SCRIPTS_PATH)
 from simpleperf_report_lib import ReportLib
-from simpleperf_utils import log_exit, log_fatal, log_info, AdbHelper, open_report_in_browser
+from simpleperf_utils import log_exit, log_fatal, AdbHelper, open_report_in_browser
 
 from data_types import Process
 from svg_renderer import get_proper_scaled_time_string, render_svg
@@ -78,10 +79,10 @@
             record_arg_str += "-c %s -e %s " % (num_events, event_name)
         else:
             log_exit("Event format string of -e option cann't be recognized.")
-        log_info("Using event sampling (-c %s -e %s)." % (num_events, event_name))
+        logging.info("Using event sampling (-c %s -e %s)." % (num_events, event_name))
     else:
         record_arg_str += "-f %d " % args.sample_frequency
-        log_info("Using frequency sampling (-f %d)." % args.sample_frequency)
+        logging.info("Using frequency sampling (-f %d)." % args.sample_frequency)
     record_arg_str += "--duration %d " % args.capture_duration
     app_profiler_args += ["-r", record_arg_str]
     returncode = subprocess.call(app_profiler_args)
@@ -149,7 +150,7 @@
         min_event_count = thread.num_events * args.min_callchain_percentage * 0.01
         thread.flamegraph.trim_callchain(min_event_count, args.max_callchain_depth)
 
-    log_info("Parsed %s callchains." % process.num_samples)
+    logging.info("Parsed %s callchains." % process.num_samples)
 
 
 def get_local_asset_content(local_path):
@@ -330,7 +331,7 @@
             process.name = 'system_wide'
         else:
             process.name = args.app or args.native_program or ('Process %d' % args.pid)
-        log_info("Starting data collection stage for '%s'." % process.name)
+        logging.info("Starting data collection stage for '%s'." % process.name)
         if not collect_data(args):
             log_exit("Unable to collect data.")
         if process.pid == 0:
@@ -365,7 +366,7 @@
             log_fatal("Recursion limit exceeded (%s), try --max_callchain_depth." % r)
         raise r
 
-    log_info("Flamegraph generated at '%s'." % report_path)
+    logging.info("Flamegraph generated at '%s'." % report_path)
 
 
 if __name__ == "__main__":
diff --git a/simpleperf/scripts/pprof_proto_generator.py b/simpleperf/scripts/pprof_proto_generator.py
index 103e38c..20f6198 100755
--- a/simpleperf/scripts/pprof_proto_generator.py
+++ b/simpleperf/scripts/pprof_proto_generator.py
@@ -19,18 +19,18 @@
     used by pprof.
 
   Example:
-    python app_profiler.py
-    python pprof_proto_generator.py
+    ./app_profiler.py
+    ./pprof_proto_generator.py
     pprof -text pprof.profile
 """
 
-import argparse
+import logging
 import os
 import os.path
 
 from simpleperf_report_lib import ReportLib
-from simpleperf_utils import (Addr2Nearestline, BinaryFinder, extant_dir,
-                              flatten_arg_list, log_info, log_exit, ReadElf, ToolFinder)
+from simpleperf_utils import (Addr2Nearestline, BaseArgumentParser, BinaryFinder, extant_dir,
+                              flatten_arg_list, log_exit, ReadElf, ToolFinder)
 try:
     import profile_pb2
 except ImportError:
@@ -107,7 +107,8 @@
         for i in range(len(sample.value)):
             print('%svalue[%d] = %d' % (space, i, sample.value[i]))
         for i in range(len(sample.label)):
-            print('%slabel[%d] = ', (space, i))
+          print('%slabel[%d] = %s:%s' % (space, i, self.string(sample.label[i].key),
+                                         self.string(sample.label[i].str)))
 
     def show_location_id(self, location_id, space=''):
         location = self.profile.location[location_id - 1]
@@ -162,11 +163,19 @@
         return self.string_table[string_id]
 
 
+class Label(object):
+    def __init__(self, key_id: int, str_id: int):
+      # See profile.Label.key
+      self.key_id = key_id
+      # See profile.Label.str
+      self.str_id = str_id
+
 class Sample(object):
 
     def __init__(self):
         self.location_ids = []
         self.values = {}
+        self.labels = []
 
     def add_location_id(self, location_id):
         self.location_ids.append(location_id)
@@ -311,6 +320,9 @@
             sample = Sample()
             sample.add_value(sample_type_id, 1)
             sample.add_value(sample_type_id + 1, report_sample.period)
+            label = Label(self.get_string_id("thread"),
+                          self.get_string_id(report_sample.thread_comm))
+            sample.labels.append(label)
             if self._filter_symbol(symbol):
                 location_id = self.get_location_id(report_sample.ip, symbol)
                 sample.add_location_id(location_id)
@@ -479,10 +491,10 @@
     def gen_source_lines(self, jobs: int):
         # 1. Create Addr2line instance
         if not self.config.get('binary_cache_dir'):
-            log_info("Can't generate line information because binary_cache is missing.")
+            logging.info("Can't generate line information because binary_cache is missing.")
             return
         if not ToolFinder.find_tool_path('llvm-symbolizer', self.config['ndk_path']):
-            log_info("Can't generate line information because can't find llvm-symbolizer.")
+            logging.info("Can't generate line information because can't find llvm-symbolizer.")
             return
         # We have changed dso names to paths in binary_cache in self.get_binary(). So no need to
         # pass binary_cache_dir to BinaryFinder.
@@ -515,14 +527,18 @@
             sources = addr2line.get_addr_source(dso, location.vaddr_in_dso)
             if not sources:
                 continue
-            for (source_id, source) in enumerate(sources):
+            for i, source in enumerate(sources):
                 source_file, source_line, function_name = source
-                function_id = self.get_function_id(function_name, dso_name, 0)
+                if i == 0:
+                    # Don't override original function name from report library, which is more
+                    # accurate when proguard mapping file is given.
+                    function_id = location.lines[0].function_id
+                    # Clear default line info.
+                    location.lines.clear()
+                else:
+                    function_id = self.get_function_id(function_name, dso_name, 0)
                 if function_id == 0:
                     continue
-                if source_id == 0:
-                    # Clear default line info
-                    location.lines = []
                 location.lines.append(self.add_line(source_file, source_line, function_id))
 
         for function in self.function_list:
@@ -554,6 +570,11 @@
             values[sample_type_id] = sample.values[sample_type_id]
         profile_sample.value.extend(values)
 
+        for l in sample.labels:
+          label = profile_sample.label.add()
+          label.key = l.key_id
+          label.str = l.str_id
+
     def gen_profile_mapping(self, mapping):
         profile_mapping = self.profile.mapping.add()
         profile_mapping.id = mapping.id
@@ -591,7 +612,7 @@
 
 
 def main():
-    parser = argparse.ArgumentParser(description='Generate pprof profile data in pprof.profile.')
+    parser = BaseArgumentParser(description='Generate pprof profile data in pprof.profile.')
     parser.add_argument('--show', nargs='?', action='append', help='print existing pprof.profile.')
     parser.add_argument('-i', '--record_file', nargs='+', default=['perf.data'], help="""
         Set profiling data file to report. Default is perf.data""")
diff --git a/simpleperf/scripts/report.py b/simpleperf/scripts/report.py
index e42160c..973d93e 100755
--- a/simpleperf/scripts/report.py
+++ b/simpleperf/scripts/report.py
@@ -24,6 +24,7 @@
 generate report file, and display it.
 """
 
+import logging
 import os
 import os.path
 import re
@@ -184,7 +185,7 @@
                 last_node = node
 
     if has_skipped_callgraph:
-        log_warning('some callgraphs are skipped in brief callgraph mode')
+        logging.warning('some callgraphs are skipped in brief callgraph mode')
 
     return event_reports
 
diff --git a/simpleperf/scripts/report_html.py b/simpleperf/scripts/report_html.py
index 66d167b..36b87b7 100755
--- a/simpleperf/scripts/report_html.py
+++ b/simpleperf/scripts/report_html.py
@@ -22,6 +22,7 @@
 from dataclasses import dataclass
 import datetime
 import json
+import logging
 import os
 from pathlib import Path
 import sys
@@ -29,7 +30,7 @@
 
 from simpleperf_report_lib import ReportLib, SymbolStruct
 from simpleperf_utils import (
-    Addr2Nearestline, ArgParseFormatter, BinaryFinder, get_script_dir, log_exit, log_info, Objdump,
+    Addr2Nearestline, BaseArgumentParser, BinaryFinder, get_script_dir, log_exit, Objdump,
     open_report_in_browser, ReadElf, SourceFileSearcher)
 
 MAX_CALLSTACK_LENGTH = 750
@@ -801,7 +802,7 @@
                 dso_info = objdump.get_dso_info(lib.name, lib.build_id)
                 if not dso_info:
                     continue
-                log_info('Disassemble %s' % dso_info[0])
+                logging.info('Disassemble %s' % dso_info[0])
                 futures: List[Future] = []
                 for function in functions:
                     futures.append(
@@ -955,8 +956,7 @@
 
 
 def get_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser(
-        description='report profiling data', formatter_class=ArgParseFormatter)
+    parser = BaseArgumentParser(description='report profiling data')
     parser.add_argument('-i', '--record_file', nargs='+', default=['perf.data'], help="""
                         Set profiling data file to report.""")
     parser.add_argument('-o', '--report_path', default='report.html', help='Set output html file')
@@ -1041,7 +1041,7 @@
 
     if not args.no_browser:
         open_report_in_browser(args.report_path)
-    log_info("Report generated at '%s'." % args.report_path)
+    logging.info("Report generated at '%s'." % args.report_path)
 
 
 if __name__ == '__main__':
diff --git a/simpleperf/scripts/report_sample.py b/simpleperf/scripts/report_sample.py
index d05f1f7..db74214 100755
--- a/simpleperf/scripts/report_sample.py
+++ b/simpleperf/scripts/report_sample.py
@@ -19,15 +19,18 @@
 """
 
 from __future__ import print_function
-import argparse
 from simpleperf_report_lib import ReportLib
+from simpleperf_utils import BaseArgumentParser
+from typing import List
 
 
-def report_sample(record_file, symfs_dir, kallsyms_file, show_tracing_data):
+def report_sample(record_file, symfs_dir, kallsyms_file, show_tracing_data, proguard_mapping_file : List[str] = None):
     """ read record_file, and print each sample"""
     lib = ReportLib()
 
     lib.ShowIpForUnknownSymbol()
+    for file_path in proguard_mapping_file or []:
+      lib.AddProguardMappingFile(file_path)
     if symfs_dir is not None:
         lib.SetSymfs(symfs_dir)
     if record_file is not None:
@@ -44,15 +47,15 @@
         symbol = lib.GetSymbolOfCurrentSample()
         callchain = lib.GetCallChainOfCurrentSample()
 
-        sec = sample.time / 1000000000
-        usec = (sample.time - sec * 1000000000) / 1000
-        print('%s\t%d [%03d] %d.%06d:\t\t%d %s:' % (sample.thread_comm,
-                                                    sample.tid, sample.cpu, sec,
-                                                    usec, sample.period, event.name))
-        print('%16x\t%s (%s)' % (sample.ip, symbol.symbol_name, symbol.dso_name))
+        sec = sample.time // 1000000000
+        usec = (sample.time - sec * 1000000000) // 1000
+        print('%s\t%d/%d [%03d] %d.%06d: %d %s:' % (sample.thread_comm,
+                                                       sample.pid, sample.tid, sample.cpu, sec,
+                                                       usec, sample.period, event.name))
+        print('%16x %s (%s)' % (sample.ip, symbol.symbol_name, symbol.dso_name))
         for i in range(callchain.nr):
             entry = callchain.entries[i]
-            print('%16x\t%s (%s)' % (entry.ip, entry.symbol.symbol_name, entry.symbol.dso_name))
+            print('%16x %s (%s)' % (entry.ip, entry.symbol.symbol_name, entry.symbol.dso_name))
         if show_tracing_data:
             data = lib.GetTracingDataOfCurrentSample()
             if data:
@@ -63,15 +66,18 @@
 
 
 def main():
-    parser = argparse.ArgumentParser(description='Report samples in perf.data.')
+    parser = BaseArgumentParser(description='Report samples in perf.data.')
     parser.add_argument('--symfs',
                         help='Set the path to find binaries with symbols and debug info.')
     parser.add_argument('--kallsyms', help='Set the path to find kernel symbols.')
     parser.add_argument('record_file', nargs='?', default='perf.data',
                         help='Default is perf.data.')
     parser.add_argument('--show_tracing_data', action='store_true', help='print tracing data.')
+    parser.add_argument(
+        '--proguard-mapping-file', nargs='+',
+        help='Add proguard mapping file to de-obfuscate symbols')
     args = parser.parse_args()
-    report_sample(args.record_file, args.symfs, args.kallsyms, args.show_tracing_data)
+    report_sample(args.record_file, args.symfs, args.kallsyms, args.show_tracing_data, args.proguard_mapping_file)
 
 
 if __name__ == '__main__':
diff --git a/simpleperf/scripts/run_simpleperf_on_device.py b/simpleperf/scripts/run_simpleperf_on_device.py
index 4cd167c..39e0d91 100755
--- a/simpleperf/scripts/run_simpleperf_on_device.py
+++ b/simpleperf/scripts/run_simpleperf_on_device.py
@@ -21,11 +21,11 @@
 """
 import subprocess
 import sys
-from simpleperf_utils import AdbHelper, disable_debug_log, get_target_binary_path
+from simpleperf_utils import AdbHelper, get_target_binary_path, Log
 
 
 def main():
-    disable_debug_log()
+    Log.init()
     adb = AdbHelper()
     device_arch = adb.get_device_arch()
     simpleperf_binary = get_target_binary_path(device_arch, 'simpleperf')
diff --git a/simpleperf/scripts/run_simpleperf_without_usb_connection.py b/simpleperf/scripts/run_simpleperf_without_usb_connection.py
index 19700ee..8beddcd 100755
--- a/simpleperf/scripts/run_simpleperf_without_usb_connection.py
+++ b/simpleperf/scripts/run_simpleperf_without_usb_connection.py
@@ -26,13 +26,12 @@
     during profiling time, simpleperf only records the first running.
 """
 
-from __future__ import print_function
-import argparse
+import logging
 import subprocess
 import sys
 import time
 
-from simpleperf_utils import AdbHelper, get_target_binary_path, log_warning
+from simpleperf_utils import AdbHelper, BaseArgumentParser, get_target_binary_path
 
 
 def start_recording(args):
@@ -65,7 +64,7 @@
     adb = AdbHelper()
     result = adb.run(['shell', 'pidof', 'simpleperf'])
     if not result:
-        log_warning('No simpleperf process on device. The recording has ended.')
+        logging.warning('No simpleperf process on device. The recording has ended.')
     else:
         adb.run(['shell', 'pkill', '-l', '2', 'simpleperf'])
         print('Waiting for simpleperf process to finish...')
@@ -77,8 +76,7 @@
 
 
 def main():
-    parser = argparse.ArgumentParser(description=__doc__,
-                                     formatter_class=argparse.RawDescriptionHelpFormatter)
+    parser = BaseArgumentParser(description=__doc__)
     subparsers = parser.add_subparsers()
     start_parser = subparsers.add_parser('start', help='Start recording.')
     start_parser.add_argument('-r', '--record_options',
diff --git a/simpleperf/scripts/simpleperf_utils.py b/simpleperf/scripts/simpleperf_utils.py
index 466c448..648ddd1 100644
--- a/simpleperf/scripts/simpleperf_utils.py
+++ b/simpleperf/scripts/simpleperf_utils.py
@@ -33,6 +33,9 @@
 from typing import Dict, Iterator, List, Optional, Set, Union
 
 
+NDK_ERROR_MESSAGE = "Please install the Android NDK (https://developer.android.com/studio/projects/install-ndk), then set NDK path with --ndk_path option."
+
+
 def get_script_dir() -> str:
     return os.path.dirname(os.path.realpath(__file__))
 
@@ -57,42 +60,6 @@
     return sys.version_info >= (3, 0)
 
 
-def log_debug(msg: str):
-    logging.debug(msg)
-
-
-def log_info(msg: str):
-    logging.info(msg)
-
-
-def log_warning(msg: str):
-    logging.warning(msg)
-
-
-def log_fatal(msg: str):
-    raise Exception(msg)
-
-
-def log_exit(msg: str):
-    sys.exit(msg)
-
-
-def disable_debug_log():
-    logging.getLogger().setLevel(logging.WARN)
-
-
-def set_log_level(level_name: str):
-    if level_name == 'debug':
-        level = logging.DEBUG
-    elif level_name == 'info':
-        level = logging.INFO
-    elif level_name == 'warning':
-        level = logging.WARNING
-    else:
-        log_fatal('unknown log level: %s' % level_name)
-    logging.getLogger().setLevel(level)
-
-
 def str_to_bytes(str_value: str) -> bytes:
     if not is_python3():
         return str_value
@@ -326,7 +293,7 @@
     def run_and_return_output(self, adb_args: List[str], log_output: bool = False,
                               log_stderr: bool = False) -> Tuple[bool, str]:
         adb_args = [self.adb_path] + adb_args
-        log_debug('run adb cmd: %s' % adb_args)
+        logging.debug('run adb cmd: %s' % adb_args)
         env = None
         if self.serial_number:
             env = os.environ.copy()
@@ -339,10 +306,10 @@
         returncode = subproc.returncode
         result = (returncode == 0)
         if log_output and stdout_data:
-            log_debug(stdout_data)
+            logging.debug(stdout_data)
         if log_stderr and stderr_data:
-            log_warning(stderr_data)
-        log_debug('run adb cmd: %s  [result %s]' % (adb_args, result))
+            logging.warning(stderr_data)
+        logging.debug('run adb cmd: %s  [result %s]' % (adb_args, result))
         return (result, stdout_data)
 
     def check_run(self, adb_args: List[str], log_output: bool = False):
@@ -361,7 +328,7 @@
             return
         if 'root' not in stdoutdata:
             return
-        log_info('unroot adb')
+        logging.info('unroot adb')
         self.run(['unroot'])
         self.run(['wait-for-device'])
         time.sleep(1)
@@ -574,7 +541,7 @@
             binary_finder: BinaryFinder, with_function_name: bool):
         self.symbolizer_path = ToolFinder.find_tool_path('llvm-symbolizer', ndk_path)
         if not self.symbolizer_path:
-            log_exit("Can't find llvm-symbolizer. Please set ndk path with --ndk_path option.")
+            log_exit("Can't find llvm-symbolizer. " + NDK_ERROR_MESSAGE)
         self.readelf = ReadElf(ndk_path)
         self.dso_map: Dict[str, Addr2Nearestline.Dso] = {}  # map from dso_path to Dso.
         self.binary_finder = binary_finder
@@ -600,11 +567,11 @@
         real_path = self.binary_finder.find_binary(dso_path, dso.build_id)
         if not real_path:
             if dso_path not in ['//anon', 'unknown', '[kernel.kallsyms]']:
-                log_debug("Can't find dso %s" % dso_path)
+                logging.debug("Can't find dso %s" % dso_path)
             return
 
         if not self._check_debug_line_section(real_path):
-            log_debug("file %s doesn't contain .debug_line section." % real_path)
+            logging.debug("file %s doesn't contain .debug_line section." % real_path)
             return
 
         addr_step = self._get_addr_step(real_path)
@@ -830,7 +797,7 @@
             if not objdump_path:
                 objdump_path = ToolFinder.find_tool_path('llvm-objdump', self.ndk_path, arch)
             if not objdump_path:
-                log_exit("Can't find llvm-objdump. Please set ndk path with --ndk_path option.")
+                log_exit("Can't find llvm-objdump." + NDK_ERROR_MESSAGE)
             self.objdump_paths[arch] = objdump_path
 
         # 3. Run objdump.
@@ -867,7 +834,7 @@
     def __init__(self, ndk_path: Optional[str]):
         self.readelf_path = ToolFinder.find_tool_path('llvm-readelf', ndk_path)
         if not self.readelf_path:
-            log_exit("Can't find llvm-readelf. Please set ndk path with --ndk_path option.")
+            log_exit("Can't find llvm-readelf. " + NDK_ERROR_MESSAGE)
 
     @staticmethod
     def is_elf_file(path: Union[Path, str]) -> bool:
@@ -968,9 +935,53 @@
     return path
 
 
+def log_fatal(msg: str):
+    raise Exception(msg)
+
+
+def log_exit(msg: str):
+    sys.exit(msg)
+
+
+class LogFormatter(logging.Formatter):
+    """ Use custom logging format. """
+
+    def __init__(self):
+        super().__init__('%(asctime)s [%(levelname)s] (%(filename)s:%(lineno)d) %(message)s')
+
+    def formatTime(self, record, datefmt):
+        return super().formatTime(record, '%H:%M:%S') + ',%03d' % record.msecs
+
+
+class Log:
+    initialized = False
+
+    @classmethod
+    def init(cls, log_level: str = 'info'):
+        assert not cls.initialized
+        cls.initialized = True
+        cls.logger = logging.root
+        cls.logger.setLevel(log_level.upper())
+        handler = logging.StreamHandler()
+        handler.setFormatter(LogFormatter())
+        cls.logger.addHandler(handler)
+
+
 class ArgParseFormatter(
         argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptionHelpFormatter):
     pass
 
 
-logging.getLogger().setLevel(logging.DEBUG)
+class BaseArgumentParser(argparse.ArgumentParser):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs, formatter_class=ArgParseFormatter)
+
+    def parse_known_args(self, *args, **kwargs):
+        self.add_argument(
+            '--log', choices=['debug', 'info', 'warning'],
+            default='info', help='set log level')
+        namespace, left_args = super().parse_known_args(*args, **kwargs)
+
+        if not Log.initialized:
+            Log.init(namespace.log)
+        return namespace, left_args
diff --git a/simpleperf/scripts/test/api_profiler_test.py b/simpleperf/scripts/test/api_profiler_test.py
index 037b8fc..81a3e5a 100644
--- a/simpleperf/scripts/test/api_profiler_test.py
+++ b/simpleperf/scripts/test/api_profiler_test.py
@@ -14,10 +14,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import logging
 import os
 import time
 
-from simpleperf_utils import log_info, remove
+from simpleperf_utils import remove
 from . test_utils import TestBase, TestHelper
 
 
@@ -25,7 +26,7 @@
     def run_api_test(self, package_name, apk_name, expected_reports, min_android_version):
         adb = TestHelper.adb
         if TestHelper.android_version < ord(min_android_version) - ord('L') + 5:
-            log_info('skip this test on Android < %s.' % min_android_version)
+            logging.info('skip this test on Android < %s.' % min_android_version)
             return
         # step 1: Prepare profiling.
         self.run_cmd(['api_profiler.py', 'prepare'])
diff --git a/simpleperf/scripts/test/do_test.py b/simpleperf/scripts/test/do_test.py
index f27c0cb..17ff349 100755
--- a/simpleperf/scripts/test/do_test.py
+++ b/simpleperf/scripts/test/do_test.py
@@ -39,7 +39,7 @@
 from typing import List, Optional
 import unittest
 
-from simpleperf_utils import extant_dir, log_exit, remove, ArgParseFormatter
+from simpleperf_utils import BaseArgumentParser, extant_dir, log_exit, remove
 
 from . api_profiler_test import *
 from . annotate_test import *
@@ -61,7 +61,7 @@
 
 
 def get_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser(description=__doc__, formatter_class=ArgParseFormatter)
+    parser = BaseArgumentParser(description=__doc__)
     parser.add_argument('--browser', action='store_true', help='open report html file in browser.')
     parser.add_argument(
         '-d', '--device', nargs='+',
diff --git a/simpleperf/scripts/test/pprof_proto_generator_test.py b/simpleperf/scripts/test/pprof_proto_generator_test.py
index 51d8d8b..72089f7 100644
--- a/simpleperf/scripts/test/pprof_proto_generator_test.py
+++ b/simpleperf/scripts/test/pprof_proto_generator_test.py
@@ -19,7 +19,7 @@
 from typing import List, Optional
 
 from binary_cache_builder import BinaryCacheBuilder
-from pprof_proto_generator import load_pprof_profile
+from pprof_proto_generator import load_pprof_profile, PprofProfileGenerator
 from . test_utils import TestBase, TestHelper
 
 
@@ -172,3 +172,44 @@
                     self.assertEqual(function.start_line, check_item.func_start_line)
                     break
             self.assertTrue(found, check_item)
+
+    def test_function_name_not_changed_by_line_info(self):
+        """ Adding line info shouldn't override function names from report library, which are more
+            accurate when proguard mapping file is given.
+        """
+        testdata_file = TestHelper.testdata_path('runtest_two_functions_arm64_perf.data')
+
+        # Build binary_cache.
+        binary_cache_builder = BinaryCacheBuilder(TestHelper.ndk_path, False)
+        binary_cache_builder.build_binary_cache(testdata_file, [TestHelper.testdata_dir])
+
+        # Read recording file.
+        config = {'ndk_path': None, 'max_chain_length': 1000000, 'proguard_mapping_file': None}
+        generator = PprofProfileGenerator(config)
+        generator.load_record_file(testdata_file)
+
+        # Change function name.
+        sample = generator.sample_list[0]
+        self.assertGreaterEqual(len(sample.location_ids), 1)
+        location = generator.location_list[sample.location_ids[0] - 1]
+        self.assertGreaterEqual(len(location.lines), 1)
+        function = generator.get_function(location.lines[0].function_id)
+        function_name = generator.get_string(function.name_id)
+        self.assertEqual(function_name, 'Function1()')
+        location.lines[0].function_id = generator.get_function_id(
+            'NewFunction1()', generator.get_string(function.dso_name_id), function.vaddr_in_dso)
+
+        # Add line info.
+        generator.gen_source_lines(1)
+
+        # Check function name and line info.
+        sample = generator.sample_list[0]
+        self.assertGreaterEqual(len(sample.location_ids), 1)
+        location = generator.location_list[sample.location_ids[0] - 1]
+        self.assertGreaterEqual(len(location.lines), 1)
+        function = generator.get_function(location.lines[0].function_id)
+        function_name = generator.get_string(function.name_id)
+        self.assertEqual(function_name, 'NewFunction1()')
+        self.assertNotEqual(function.source_filename_id, 0)
+        source_filename = generator.get_string(function.source_filename_id)
+        self.assertIn('two_functions.cpp', source_filename)
diff --git a/simpleperf/scripts/update.py b/simpleperf/scripts/update.py
index d27fff3..cc30283 100755
--- a/simpleperf/scripts/update.py
+++ b/simpleperf/scripts/update.py
@@ -39,7 +39,7 @@
     InstallEntry('MODULES-IN-system-extras-simpleperf',
                  'simpleperf/android/arm64/simpleperf_ndk64',
                  'android/arm64/simpleperf'),
-    InstallEntry('MODULES-IN-system-extras-simpleperf',
+    InstallEntry('MODULES-IN-system-extras-simpleperf_arm',
                  'simpleperf/android/arm/simpleperf_ndk',
                  'android/arm/simpleperf'),
     InstallEntry('MODULES-IN-system-extras-simpleperf_x86',
diff --git a/simpleperf/test_util.cpp b/simpleperf/test_util.cpp
index 547eaf9..594f349 100644
--- a/simpleperf/test_util.cpp
+++ b/simpleperf/test_util.cpp
@@ -35,11 +35,11 @@
     pclose(fp);
     std::string s = buf;
     in_native_abi = 1;
-    if (GetBuildArch() == ARCH_X86_32 || GetBuildArch() == ARCH_X86_64) {
+    if (GetTargetArch() == ARCH_X86_32 || GetTargetArch() == ARCH_X86_64) {
       if (s.find("86") == std::string::npos) {
         in_native_abi = 0;
       }
-    } else if (GetBuildArch() == ARCH_ARM || GetBuildArch() == ARCH_ARM64) {
+    } else if (GetTargetArch() == ARCH_ARM || GetTargetArch() == ARCH_ARM64) {
       if (s.find("arm") == std::string::npos && s.find("aarch64") == std::string::npos) {
         in_native_abi = 0;
       }