#!/usr/bin/env python3
""" read, collect binaries needed by
it, and put them in binary_cache.
from collections import defaultdict
import logging
import os
import os.path
from pathlib import Path
import shutil
import sys
from typing import Dict, List, Optional, Tuple, Union
from simpleperf_report_lib import ReportLib
from simpleperf_utils import (
AdbHelper, BaseArgumentParser, extant_dir, extant_file, flatten_arg_list,
ReadElf, str_to_bytes)
def is_jit_symfile(dso_name):
return dso_name.split('/')[-1].startswith('TemporaryFile')
class BinaryCache:
def __init__(self, binary_dir: Path):
self.binary_dir = binary_dir
def get_path_in_cache(self, device_path: str, build_id: str) -> Path:
""" Given a binary path in, return its corresponding path in the cache.
if build_id:
filename = device_path.split('/')[-1]
# Add build id to make the filename unique.
return self.binary_dir / build_id[2:] / filename
# For elf file without build id, we can only follow its path on device. Otherwise,
# simpleperf can't find it. However, we don't prefer this way. Because:
# 1) It doesn't work for native libs loaded directly from apk
# (android:extractNativeLibs=”false”).
# 2) It may exceed path limit on windows.
if device_path.startswith('/'):
device_path = device_path[1:]
device_path = device_path.replace('/', os.sep)
return Path(os.path.join(self.binary_dir, device_path))
class BinarySource:
""" Source to find debug binaries. """
def __init__(self, readelf: ReadElf):
self.readelf = readelf
def collect_binaries(self, binaries: Dict[str, str], binary_cache: BinaryCache):
""" pull binaries needed in to binary_cache.
binaries: maps from binary path to its build_id in
raise Exception('not implemented')
def read_build_id(self, path: Path):
return self.readelf.get_build_id(path)
class BinarySourceFromDevice(BinarySource):
""" Pull binaries from device. """
def __init__(self, readelf: ReadElf, disable_adb_root: bool):
self.adb = AdbHelper(enable_switch_to_root=not disable_adb_root)
def collect_binaries(self, binaries: Dict[str, str], binary_cache: BinaryCache):
if not self.adb.is_device_available():
for path, build_id in binaries.items():
self.collect_binary(path, build_id, binary_cache)
self.pull_kernel_symbols(binary_cache.binary_dir / 'kallsyms')
def collect_binary(self, path: str, build_id: str, binary_cache: BinaryCache):
if not path.startswith('/') or path == "//anon" or path.startswith("/dev/"):
# [kernel.kallsyms] or unknown, or something we can't find binary.
binary_cache_file = binary_cache.get_path_in_cache(path, build_id)
self.check_and_pull_binary(path, build_id, binary_cache_file)
def check_and_pull_binary(self, path: str, expected_build_id: str, binary_cache_file: Path):
"""If the binary_cache_file exists and has the expected_build_id, there
is no need to pull the binary from device. Otherwise, pull it.
if binary_cache_file.is_file() and (
not expected_build_id or expected_build_id == self.read_build_id(binary_cache_file)
):'use current file in binary_cache: %s', binary_cache_file)
else:'pull file to binary_cache: %s to %s', path, binary_cache_file)
target_dir = binary_cache_file.parent
os.makedirs(target_dir, exist_ok=True)
if binary_cache_file.is_file():
success = self.pull_file_from_device(path, binary_cache_file)
except FileNotFoundError:
# It happens on windows when the filename or extension is too long.
success = False
if not success:
logging.warning('failed to pull %s from device', path)
def pull_file_from_device(self, device_path: str, host_path: Path) -> bool:
if['pull', device_path, str(host_path)]):
return True
# On non-root devices, we can't pull /data/app/XXX/base.odex directly.
# Instead, we can first copy the file to /data/local/tmp, then pull it.
filename = device_path[device_path.rfind('/')+1:]
if (['shell', 'cp', device_path, '/data/local/tmp']) and['pull', '/data/local/tmp/' + filename, host_path])):['shell', 'rm', '/data/local/tmp/' + filename])
return True
return False
def pull_kernel_symbols(self, file_path: Path):
if file_path.is_file():
if self.adb.switch_to_root():['shell', 'echo', '0', '>/proc/sys/kernel/kptr_restrict'])['pull', '/proc/kallsyms', file_path])
class BinarySourceFromLibDirs(BinarySource):
""" Collect binaries from lib dirs. """
def __init__(self, readelf: ReadElf, lib_dirs: List[Path]):
self.lib_dirs = lib_dirs
self.filename_map = None
self.build_id_map = None
self.binary_cache = None
def collect_binaries(self, binaries: Dict[str, str], binary_cache: BinaryCache):
self.binary_cache = binary_cache
# Search all files in lib_dirs, and copy matching files to build_cache.
for lib_dir in self.lib_dirs:
if self.is_platform_symbols_dir(lib_dir):
def create_filename_map(self, binaries: Dict[str, str]):
""" Create a map mapping from filename to binaries having the name. """
self.filename_map = defaultdict(list)
for path, build_id in binaries.items():
index = path.rfind('/')
filename = path[index + 1:]
self.filename_map[filename].append((path, build_id))
def create_build_id_map(self, binaries: Dict[str, str]):
""" Create a map mapping from build id to binary path. """
self.build_id_map = {}
for path, build_id in binaries.items():
if build_id:
self.build_id_map[build_id] = path
def is_platform_symbols_dir(self, lib_dir: Path):
""" Check if lib_dir points to $ANDROID_PRODUCT_OUT/symbols. """
subdir_names = [ for p in lib_dir.iterdir()]
return == 'symbols' and 'system' in subdir_names
def search_platform_symbols_dir(self, lib_dir: Path):
""" Platform symbols dir contains too many binaries. Reading build ids for
all of them takes a long time. So we only read build ids for binaries
having names exist in filename_map.
for root, _, files in os.walk(lib_dir):
for filename in files:
binaries = self.filename_map.get(filename)
if not binaries:
file_path = Path(os.path.join(root, filename))
build_id = self.read_build_id(file_path)
for path, expected_build_id in binaries:
if expected_build_id == build_id:
self.copy_to_binary_cache(file_path, build_id, path)
def search_dir(self, lib_dir: Path):
""" For a normal lib dir, it's unlikely to contain many binaries. So we can read
build ids for all binaries in it. But users may give debug binaries with a name
different from the one recorded in So we should only rely on build id
if it is available.
for root, _, files in os.walk(lib_dir):
for filename in files:
file_path = Path(os.path.join(root, filename))
build_id = self.read_build_id(file_path)
if build_id:
# For elf file with build id, use build id to match.
device_path = self.build_id_map.get(build_id)
if device_path:
self.copy_to_binary_cache(file_path, build_id, device_path)
elif self.readelf.is_elf_file(file_path):
# For elf file without build id, use filename to match.
for path, expected_build_id in self.filename_map.get(filename, []):
if not expected_build_id:
self.copy_to_binary_cache(file_path, '', path)
def copy_to_binary_cache(
self, from_path: Path, expected_build_id: str, device_path: str):
to_path = self.binary_cache.get_path_in_cache(device_path, expected_build_id)
if not self.need_to_copy(from_path, to_path, expected_build_id):
# The existing file in binary_cache can provide more information, so no need to copy.
to_dir = to_path.parent
if not to_dir.is_dir():
os.makedirs(to_dir)'copy to binary_cache: %s to %s', from_path, to_path)
shutil.copy(from_path, to_path)
def need_to_copy(self, from_path: Path, to_path: Path, expected_build_id: str):
if not to_path.is_file() or self.read_build_id(to_path) != expected_build_id:
return True
return self.get_file_stripped_level(from_path) < self.get_file_stripped_level(to_path)
def get_file_stripped_level(self, path: Path) -> int:
"""Return stripped level of an ELF file. Larger value means more stripped."""
sections = self.readelf.get_sections(path)
if '.debug_line' in sections:
return 0
if '.symtab' in sections:
return 1
return 2
class BinaryCacheBuilder:
"""Collect all binaries needed by in binary_cache."""
def __init__(self, ndk_path: Optional[str], disable_adb_root: bool):
self.readelf = ReadElf(ndk_path)
self.device_source = BinarySourceFromDevice(self.readelf, disable_adb_root)
self.binary_cache_dir = Path('binary_cache')
self.binary_cache = BinaryCache(self.binary_cache_dir)
self.binaries = {}
def build_binary_cache(self, perf_data_path: str, symfs_dirs: List[Union[Path, str]]) -> bool:
if not self.copy_binaries_from_symfs_dirs(symfs_dirs):
return False
return True
def collect_used_binaries(self, perf_data_path):
"""read, collect all used binaries and their build id(if available)."""
# A dict mapping from binary name to build_id
binaries = {}
lib = ReportLib()
while True:
sample = lib.GetNextSample()
if sample is None:
symbols = [lib.GetSymbolOfCurrentSample()]
callchain = lib.GetCallChainOfCurrentSample()
for i in range(
for symbol in symbols:
dso_name = symbol.dso_name
if dso_name not in binaries:
if is_jit_symfile(dso_name):
name = 'vmlinux' if dso_name == '[kernel.kallsyms]' else dso_name
binaries[name] = lib.GetBuildIdForPath(dso_name)
self.binaries = binaries
def copy_binaries_from_symfs_dirs(self, symfs_dirs: List[Union[str, Path]]) -> bool:
if symfs_dirs:
lib_dirs: List[Path] = []
for symfs_dir in symfs_dirs:
if isinstance(symfs_dir, str):
symfs_dir = Path(symfs_dir)
if not symfs_dir.is_dir():
logging.error("can't find dir %s", symfs_dir)
return False
lib_dir_source = BinarySourceFromLibDirs(self.readelf, lib_dirs)
lib_dir_source.collect_binaries(self.binaries, self.binary_cache)
return True
def pull_binaries_from_device(self):
self.device_source.collect_binaries(self.binaries, self.binary_cache)
def create_build_id_list(self):
""" Create build_id_list. So report scripts can find a binary by its build_id instead of
build_id_list_path = self.binary_cache_dir / 'build_id_list'
# Write in binary mode to avoid "\r\n" problem on windows, which can confuse simpleperf.
with open(build_id_list_path, 'wb') as fh:
for root, _, files in os.walk(self.binary_cache_dir):
for filename in files:
path = Path(os.path.join(root, filename))
build_id = self.readelf.get_build_id(path)
if build_id:
relative_path = path.relative_to(self.binary_cache_dir)
line = f'{build_id}={relative_path}\n'
def find_path_in_cache(self, device_path: str) -> Optional[Path]:
build_id = self.binaries.get(device_path)
return self.binary_cache.get_path_in_cache(device_path, build_id)
def main() -> bool:
parser = BaseArgumentParser(description="""
Pull binaries needed by from device to binary_cache directory.""")
parser.add_argument('-i', '--perf_data_path', default='', type=extant_file, help="""
The path of profiling data.""")
parser.add_argument('-lib', '--native_lib_dir', type=extant_dir, nargs='+', help="""
Path to find debug version of native shared libraries used in the app.""", action='append')
parser.add_argument('--disable_adb_root', action='store_true', help="""
Force adb to run in non root mode.""")
parser.add_argument('--ndk_path', nargs=1, help='Find tools in the ndk path.')
args = parser.parse_args()
ndk_path = None if not args.ndk_path else args.ndk_path[0]
builder = BinaryCacheBuilder(ndk_path, args.disable_adb_root)
symfs_dirs = flatten_arg_list(args.native_lib_dir)
return builder.build_binary_cache(args.perf_data_path, symfs_dirs)
if __name__ == '__main__':
sys.exit(0 if main() else 1)