|  | #!/usr/bin/python | 
|  | # | 
|  | # syscount   Summarize syscall counts and latencies. | 
|  | # | 
|  | # USAGE: syscount [-p PID] [-i INTERVAL] [-T TOP] [-x] [-L] [-m] [-P] [-l] | 
|  | # | 
|  | # Copyright 2017, Sasha Goldshtein. | 
|  | # Licensed under the Apache License, Version 2.0 (the "License") | 
|  | # | 
|  | # 15-Feb-2017   Sasha Goldshtein    Created this. | 
|  |  | 
|  | from time import sleep, strftime | 
|  | import argparse | 
|  | import errno | 
|  | import itertools | 
|  | import sys | 
|  | import signal | 
|  | from bcc import BPF | 
|  | from bcc.utils import printb | 
|  | from bcc.syscall import syscall_name, syscalls | 
|  |  | 
|  | if sys.version_info.major < 3: | 
|  | izip_longest = itertools.izip_longest | 
|  | else: | 
|  | izip_longest = itertools.zip_longest | 
|  |  | 
|  | # signal handler | 
|  | def signal_ignore(signal, frame): | 
|  | print() | 
|  |  | 
|  | def handle_errno(errstr): | 
|  | try: | 
|  | return abs(int(errstr)) | 
|  | except ValueError: | 
|  | pass | 
|  |  | 
|  | try: | 
|  | return getattr(errno, errstr) | 
|  | except AttributeError: | 
|  | raise argparse.ArgumentTypeError("couldn't map %s to an errno" % errstr) | 
|  |  | 
|  |  | 
|  | parser = argparse.ArgumentParser( | 
|  | description="Summarize syscall counts and latencies.") | 
|  | parser.add_argument("-p", "--pid", type=int, help="trace only this pid") | 
|  | parser.add_argument("-i", "--interval", type=int, | 
|  | help="print summary at this interval (seconds)") | 
|  | parser.add_argument("-d", "--duration", type=int, | 
|  | help="total duration of trace, in seconds") | 
|  | parser.add_argument("-T", "--top", type=int, default=10, | 
|  | help="print only the top syscalls by count or latency") | 
|  | parser.add_argument("-x", "--failures", action="store_true", | 
|  | help="trace only failed syscalls (return < 0)") | 
|  | parser.add_argument("-e", "--errno", type=handle_errno, | 
|  | help="trace only syscalls that return this error (numeric or EPERM, etc.)") | 
|  | parser.add_argument("-L", "--latency", action="store_true", | 
|  | help="collect syscall latency") | 
|  | parser.add_argument("-m", "--milliseconds", action="store_true", | 
|  | help="display latency in milliseconds (default: microseconds)") | 
|  | parser.add_argument("-P", "--process", action="store_true", | 
|  | help="count by process and not by syscall") | 
|  | parser.add_argument("-l", "--list", action="store_true", | 
|  | help="print list of recognized syscalls and exit") | 
|  | parser.add_argument("--ebpf", action="store_true", | 
|  | help=argparse.SUPPRESS) | 
|  | args = parser.parse_args() | 
|  | if args.duration and not args.interval: | 
|  | args.interval = args.duration | 
|  | if not args.interval: | 
|  | args.interval = 99999999 | 
|  |  | 
|  | if args.list: | 
|  | for grp in izip_longest(*(iter(sorted(syscalls.values())),) * 4): | 
|  | print("   ".join(["%-20s" % s for s in grp if s is not None])) | 
|  | sys.exit(0) | 
|  |  | 
|  | text = """ | 
|  | #ifdef LATENCY | 
|  | struct data_t { | 
|  | u64 count; | 
|  | u64 total_ns; | 
|  | }; | 
|  |  | 
|  | BPF_HASH(start, u64, u64); | 
|  | BPF_HASH(data, u32, struct data_t); | 
|  | #else | 
|  | BPF_HASH(data, u32, u64); | 
|  | #endif | 
|  |  | 
|  | #ifdef LATENCY | 
|  | TRACEPOINT_PROBE(raw_syscalls, sys_enter) { | 
|  | u64 pid_tgid = bpf_get_current_pid_tgid(); | 
|  |  | 
|  | #ifdef FILTER_PID | 
|  | if (pid_tgid >> 32 != FILTER_PID) | 
|  | return 0; | 
|  | #endif | 
|  |  | 
|  | u64 t = bpf_ktime_get_ns(); | 
|  | start.update(&pid_tgid, &t); | 
|  | return 0; | 
|  | } | 
|  | #endif | 
|  |  | 
|  | TRACEPOINT_PROBE(raw_syscalls, sys_exit) { | 
|  | u64 pid_tgid = bpf_get_current_pid_tgid(); | 
|  |  | 
|  | #ifdef FILTER_PID | 
|  | if (pid_tgid >> 32 != FILTER_PID) | 
|  | return 0; | 
|  | #endif | 
|  |  | 
|  | #ifdef FILTER_FAILED | 
|  | if (args->ret >= 0) | 
|  | return 0; | 
|  | #endif | 
|  |  | 
|  | #ifdef FILTER_ERRNO | 
|  | if (args->ret != -FILTER_ERRNO) | 
|  | return 0; | 
|  | #endif | 
|  |  | 
|  | #ifdef BY_PROCESS | 
|  | u32 key = pid_tgid >> 32; | 
|  | #else | 
|  | u32 key = args->id; | 
|  | #endif | 
|  |  | 
|  | #ifdef LATENCY | 
|  | struct data_t *val, zero = {}; | 
|  | u64 *start_ns = start.lookup(&pid_tgid); | 
|  | if (!start_ns) | 
|  | return 0; | 
|  |  | 
|  | val = data.lookup_or_try_init(&key, &zero); | 
|  | if (val) { | 
|  | val->count++; | 
|  | val->total_ns += bpf_ktime_get_ns() - *start_ns; | 
|  | } | 
|  | #else | 
|  | u64 *val, zero = 0; | 
|  | val = data.lookup_or_try_init(&key, &zero); | 
|  | if (val) { | 
|  | ++(*val); | 
|  | } | 
|  | #endif | 
|  | return 0; | 
|  | } | 
|  | """ | 
|  |  | 
|  | if args.pid: | 
|  | text = ("#define FILTER_PID %d\n" % args.pid) + text | 
|  | if args.failures: | 
|  | text = "#define FILTER_FAILED\n" + text | 
|  | if args.errno: | 
|  | text = "#define FILTER_ERRNO %d\n" % abs(args.errno) + text | 
|  | if args.latency: | 
|  | text = "#define LATENCY\n" + text | 
|  | if args.process: | 
|  | text = "#define BY_PROCESS\n" + text | 
|  | if args.ebpf: | 
|  | print(text) | 
|  | exit() | 
|  |  | 
|  | bpf = BPF(text=text) | 
|  |  | 
|  | def print_stats(): | 
|  | if args.latency: | 
|  | print_latency_stats() | 
|  | else: | 
|  | print_count_stats() | 
|  |  | 
|  | agg_colname = "PID    COMM" if args.process else "SYSCALL" | 
|  | time_colname = "TIME (ms)" if args.milliseconds else "TIME (us)" | 
|  |  | 
|  | def comm_for_pid(pid): | 
|  | try: | 
|  | return open("/proc/%d/comm" % pid, "rb").read().strip() | 
|  | except Exception: | 
|  | return b"[unknown]" | 
|  |  | 
|  | def agg_colval(key): | 
|  | if args.process: | 
|  | return b"%-6d %-15s" % (key.value, comm_for_pid(key.value)) | 
|  | else: | 
|  | return syscall_name(key.value) | 
|  |  | 
|  | def print_count_stats(): | 
|  | data = bpf["data"] | 
|  | print("[%s]" % strftime("%H:%M:%S")) | 
|  | print("%-22s %8s" % (agg_colname, "COUNT")) | 
|  | for k, v in sorted(data.items(), key=lambda kv: -kv[1].value)[:args.top]: | 
|  | if k.value == 0xFFFFFFFF: | 
|  | continue    # happens occasionally, we don't need it | 
|  | printb(b"%-22s %8d" % (agg_colval(k), v.value)) | 
|  | print("") | 
|  | data.clear() | 
|  |  | 
|  | def print_latency_stats(): | 
|  | data = bpf["data"] | 
|  | print("[%s]" % strftime("%H:%M:%S")) | 
|  | print("%-22s %8s %16s" % (agg_colname, "COUNT", time_colname)) | 
|  | for k, v in sorted(data.items(), | 
|  | key=lambda kv: -kv[1].total_ns)[:args.top]: | 
|  | if k.value == 0xFFFFFFFF: | 
|  | continue    # happens occasionally, we don't need it | 
|  | printb((b"%-22s %8d " + (b"%16.6f" if args.milliseconds else b"%16.3f")) % | 
|  | (agg_colval(k), v.count, | 
|  | v.total_ns / (1e6 if args.milliseconds else 1e3))) | 
|  | print("") | 
|  | data.clear() | 
|  |  | 
|  | print("Tracing %ssyscalls, printing top %d... Ctrl+C to quit." % | 
|  | ("failed " if args.failures else "", args.top)) | 
|  | exiting = 0 if args.interval else 1 | 
|  | seconds = 0 | 
|  | while True: | 
|  | try: | 
|  | sleep(args.interval) | 
|  | seconds += args.interval | 
|  | except KeyboardInterrupt: | 
|  | exiting = 1 | 
|  | signal.signal(signal.SIGINT, signal_ignore) | 
|  | if args.duration and seconds >= args.duration: | 
|  | exiting = 1 | 
|  |  | 
|  | print_stats() | 
|  |  | 
|  | if exiting: | 
|  | print("Detaching...") | 
|  | exit() |