| #!/usr/bin/python |
| # |
| # lockstat Trace and display lock contention stats |
| # |
| # USAGE: lockstat |
| |
| # Licensed under the Apache License, Version 2.0 (the "License") |
| # 28-Jul-2017 Gisle Dankel Created this. |
| |
| from bcc import BPF |
| from ctypes import c_int |
| from time import sleep |
| from datetime import datetime |
| import argparse |
| import subprocess |
| import os |
| |
| # One Lock object per TGID and uaddr. |
| class Lock(object): |
| def __init__(self): |
| self.contention_count = 0 |
| self.elapsed_blocked = 0 |
| self.thread_count = 0 |
| self.last_stack_syms = [] |
| |
| def update(self, count, block_time, last_stack_syms): |
| self.contention_count += count |
| self.elapsed_blocked += block_time |
| self.thread_count += 1 |
| self.last_stack_syms = last_stack_syms |
| |
| def run_command_get_pid(command): |
| p = subprocess.Popen(command.split()) |
| return p.pid |
| |
| examples = """ |
| EXAMPLES: |
| |
| ./lockstat |
| Trace calls to sys_futex and display contented locks every 5 seconds |
| for all processes running on the system |
| ./lockstat -p <pid> |
| Trace only for the specified pid and display contended locks |
| every 5 seconds |
| ./lockstat -p <pid> -t |
| Trace for a specified pid and print a message on each entry and exit to |
| sys_futex until interrupted or killed |
| ./lockstat -p <pid> 10 |
| Trace the specified pid and show a message every 10 seconds |
| ./lockstat -c <command> 1 30 |
| Run the specified command and display contended locks every 1 second |
| for a total of 30 times |
| """ |
| |
| description = """ |
| Trace kernel futex events. |
| These often occur because of lock contention, e.g. involving a pthread_mutex. |
| This script resemblers the following SystemTap example: |
| https://sourceware.org/systemtap/SystemTap_Beginners_Guide/futexcontentionsect.html |
| """ |
| |
| parser = argparse.ArgumentParser(description=description, |
| formatter_class=argparse.RawDescriptionHelpFormatter, |
| epilog=examples) |
| parser.add_argument("-p", "--pid", type=int, default=-1, |
| help="the PID to trace; if not specified, trace all") |
| parser.add_argument("-t", "--trace", action="store_true", |
| help="print trace messages for each futex enter/exit") |
| parser.add_argument("interval", nargs="?", default=5, type=int, |
| help="interval in seconds to print summary") |
| parser.add_argument("count", nargs="?", type=int, |
| help="number of times to print the report before exiting") |
| parser.add_argument("-c", "--command", |
| help="execute and trace the specified command") |
| |
| args = parser.parse_args() |
| |
| pid = args.pid |
| command = args.command |
| interval = args.interval |
| num_prints = args.count |
| trace_all = args.trace |
| |
| if command is not None: |
| print("Executing '%s' and tracing the resulting process." % command) |
| pid = run_command_get_pid(command) |
| |
| bpf_source = """ |
| #include <uapi/linux/futex.h> |
| #include <uapi/linux/ptrace.h> |
| #include <linux/sched.h> |
| #include <linux/time.h> |
| |
| struct comm_t { |
| char name[TASK_COMM_LEN]; |
| }; |
| |
| struct lock_key_t { |
| u64 uaddr; |
| u32 pid; |
| u32 tgid; |
| }; |
| |
| struct lock_info_t { |
| u64 elapsed_blocked; |
| u64 contention_count; |
| u64 sid; |
| }; |
| |
| BPF_HASH(pid_lock, u32, u64); |
| BPF_HASH(pid_blocktime, u32, u64); |
| BPF_HASH(tgid_comm, u32, struct comm_t); |
| BPF_HASH(lock_stats, struct lock_key_t, struct lock_info_t, 1000000); |
| BPF_STACK_TRACE(stack_traces, 16384); |
| |
| static inline int update_stats(u32 pid, u32 tgid, u64 uaddr, u64 block_time, u64 sid) { |
| struct lock_key_t key = {}; |
| struct lock_info_t zero = {}; |
| struct lock_info_t *info; |
| |
| key.pid = pid; |
| key.tgid = tgid; |
| key.uaddr = uaddr; |
| info = lock_stats.lookup_or_init(&key, &zero); |
| info->elapsed_blocked += block_time; |
| info->contention_count++; |
| info->sid = sid; |
| |
| if (0 == tgid_comm.lookup(&tgid)) { |
| struct comm_t comm; |
| bpf_get_current_comm(&comm.name, sizeof(comm.name)); |
| tgid_comm.update(&tgid, &comm); |
| } |
| return 0; |
| } |
| |
| // FIXME: Should attach to sys_enter_futex and sys_exit_futex tracepoints here, |
| // but that does not currently work |
| int sys_futex_enter(struct pt_regs *ctx, u32 *uaddr, int op, u32 val, |
| struct timespec *utime, u32 *uaddr2, u32 val3) { |
| int cmd = op & FUTEX_CMD_MASK; |
| if (cmd != FUTEX_WAIT) |
| return 0; |
| |
| u64 pid_tgid = bpf_get_current_pid_tgid(); |
| u32 pid = pid_tgid; |
| u32 tgid = pid_tgid >> 32; |
| |
| if (!(THREAD_FILTER)) |
| return 0; |
| |
| u64 timestamp = bpf_ktime_get_ns(); |
| u64 uaddr64 = (u64) uaddr; |
| pid_lock.update(&pid, &uaddr64); |
| pid_blocktime.update(&pid, ×tamp); |
| |
| if (SHOULD_PRINT) |
| bpf_trace_printk("enter sys_futex, pid = %u, uaddr = %x, " |
| "cmd = %u\\n", pid, uaddr64, cmd); |
| return 0; |
| } |
| |
| int sys_futex_exit(struct pt_regs *ctx) { |
| u64 pid_tgid = bpf_get_current_pid_tgid(); |
| u32 pid = pid_tgid; |
| u32 tgid = pid_tgid >> 32; |
| if (!(THREAD_FILTER)) |
| return 0; |
| |
| u64 *blocktime = pid_blocktime.lookup(&pid); |
| u64 *uaddr = pid_lock.lookup(&pid); |
| u64 timestamp = bpf_ktime_get_ns(); |
| u64 elapsed; |
| u64 sid; |
| |
| if (blocktime == 0 || uaddr == 0) |
| return 0; // not FUTEX_WAIT, or (less likely) missed futex_enter |
| |
| elapsed = timestamp - *blocktime; |
| |
| sid = stack_traces.get_stackid(ctx, BPF_F_USER_STACK); |
| update_stats(pid, tgid, *uaddr, elapsed, sid); |
| pid_lock.delete(&pid); |
| pid_blocktime.delete(&pid); |
| |
| if (SHOULD_PRINT) { |
| bpf_trace_printk("exit sys_futex, uaddr = %x, elapsed = %uns\\n", |
| uaddr == 0 ? 0 : *uaddr, elapsed); |
| } |
| return 0; |
| } |
| |
| """ |
| |
| bpf_source = bpf_source.replace("SHOULD_PRINT", "1" if trace_all else "0") |
| |
| thread_filter = '1' |
| if pid != -1: |
| print("Tracing pid %d, Ctrl+C to quit." % pid) |
| # 'tgid' in kernel space is what people thin of as 'pid' in userspace |
| thread_filter = "tgid == %d" % pid |
| else: |
| print("Tracing all processes, Ctrl+C to quit.") |
| |
| bpf_source = bpf_source.replace("THREAD_FILTER", thread_filter) |
| |
| bpf_program = BPF(text=bpf_source) |
| bpf_program.attach_kprobe(event="SyS_futex", fn_name="sys_futex_enter") |
| bpf_program.attach_kretprobe(event="SyS_futex", fn_name="sys_futex_exit") |
| |
| def get_syms(stack, pid): |
| global bpf_program |
| syms = [] |
| for addr in stack: |
| s = bpf_program.sym(addr, pid, show_offset=True) |
| syms.append(s) |
| return syms |
| |
| def print_syms(syms): |
| print("=========") |
| for f in syms: |
| print(f) |
| print("=========") |
| |
| def is_android_monitor_lock(syms): |
| for s in syms: |
| if 'art::Monitor::Lock' in s: |
| return True |
| return False |
| |
| def disp_stack(stack, pid): |
| for addr in stack: |
| s = bpf_program.sym(addr, pid, show_offset=True) |
| |
| def create_tgid_stats(): |
| global bpf_program |
| stats = bpf_program["lock_stats"] |
| res = {} |
| stack_traces = bpf_program['stack_traces'] |
| for key, val in stats.items(): |
| # Only display Android monitor locks |
| if val.sid >= 0: |
| ust = stack_traces.walk(val.sid) |
| syms = get_syms(ust, key.pid) |
| if not is_android_monitor_lock(syms): |
| continue |
| else: |
| continue |
| |
| if not key.tgid in res: |
| res[key.tgid] = {} |
| if not key.uaddr in res[key.tgid]: |
| res[key.tgid][key.uaddr] = Lock() |
| |
| lock = res[key.tgid][key.uaddr] |
| lock.update(val.contention_count, val.elapsed_blocked, syms) |
| return res |
| |
| def print_comm_stats(stats): |
| if stats == {}: |
| return |
| |
| comms = bpf_program["tgid_comm"] |
| print("\n%s:" % (datetime.now().strftime("%H:%M:%S"))) |
| for tgid, locks in stats.items(): |
| comm = comms[c_int(tgid)].name |
| print("\n %s (%d):" % (comm, tgid)) |
| sorted_locks = sorted(locks.items(), |
| key=lambda x: x[1].elapsed_blocked, |
| reverse=True) |
| for addr, stats in sorted_locks: |
| print(" %x: %dms (%d contentions involving %d threads, avg %dus)" % |
| (addr, stats.elapsed_blocked / 1000000, |
| stats.contention_count, stats.thread_count, |
| stats.elapsed_blocked / stats.contention_count / 1000)) |
| |
| # No use of displaying lock stacks since we're only |
| # filtering for Android monitor locks. |
| # |
| # print("Last stack for this lock:") |
| # print_syms(stats.last_stack_syms) |
| |
| count_so_far = 0 |
| while True: |
| if trace_all: |
| print(bpf_program.trace_fields()) |
| else: |
| try: |
| sleep(interval) |
| except KeyboardInterrupt: |
| exit() |
| print_comm_stats(create_tgid_stats()) |
| count_so_far += 1 |
| bpf_program['tgid_comm'].clear() |
| bpf_program['lock_stats'].clear() |
| bpf_program['pid_lock'].clear() |
| bpf_program['pid_blocktime'].clear() |
| |
| if num_prints is not None and count_so_far >= num_prints: |
| exit() |