blob: b262aac57c534e26bcd8f5a211cbbbf3939da33e [file] [log] [blame]
#!/usr/bin/env python
# @lint-avoid-python-3-compatibility-imports
#
# funclatency Time functions and print latency as a histogram.
# For Linux, uses BCC, eBPF.
#
# USAGE: funclatency [-h] [-p PID] [-i INTERVAL] [-T] [-u] [-m] [-F] [-r] [-v]
# pattern
#
# Run "funclatency -h" for full usage.
#
# The pattern is a string with optional '*' wildcards, similar to file
# globbing. If you'd prefer to use regular expressions, use the -r option.
#
# Without the '-l' option, only the innermost calls will be recorded.
# Use '-l LEVEL' to record the outermost n levels of nested/recursive functions.
#
# Copyright (c) 2015 Brendan Gregg.
# Copyright (c) 2021 Chenyue Zhou.
# Licensed under the Apache License, Version 2.0 (the "License")
#
# 20-Sep-2015 Brendan Gregg Created this.
# 06-Oct-2016 Sasha Goldshtein Added user function support.
# 14-Apr-2021 Chenyue Zhou Added nested or recursive function support.
from __future__ import print_function
from bcc import BPF
from time import sleep, strftime
import argparse
import signal
# arguments
examples = """examples:
./funclatency do_sys_open # time the do_sys_open() kernel function
./funclatency c:read # time the read() C library function
./funclatency -u vfs_read # time vfs_read(), in microseconds
./funclatency -m do_nanosleep # time do_nanosleep(), in milliseconds
./funclatency -i 2 -d 10 c:open # output every 2 seconds, for duration 10s
./funclatency -mTi 5 vfs_read # output every 5 seconds, with timestamps
./funclatency -p 181 vfs_read # time process 181 only
./funclatency 'vfs_fstat*' # time both vfs_fstat() and vfs_fstatat()
./funclatency 'c:*printf' # time the *printf family of functions
./funclatency -F 'vfs_r*' # show one histogram per matched function
"""
parser = argparse.ArgumentParser(
description="Time functions and print latency as a histogram",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=examples)
parser.add_argument("-p", "--pid", type=int,
help="trace this PID only")
parser.add_argument("-i", "--interval", type=int,
help="summary interval, in seconds")
parser.add_argument("-d", "--duration", type=int,
help="total duration of trace, in seconds")
parser.add_argument("-T", "--timestamp", action="store_true",
help="include timestamp on output")
parser.add_argument("-u", "--microseconds", action="store_true",
help="microsecond histogram")
parser.add_argument("-m", "--milliseconds", action="store_true",
help="millisecond histogram")
parser.add_argument("-F", "--function", action="store_true",
help="show a separate histogram per function")
parser.add_argument("-r", "--regexp", action="store_true",
help="use regular expressions. Default is \"*\" wildcards only.")
parser.add_argument("-l", "--level", type=int,
help="set the level of nested or recursive functions")
parser.add_argument("-v", "--verbose", action="store_true",
help="print the BPF program (for debugging purposes)")
parser.add_argument("pattern",
help="search expression for functions")
parser.add_argument("--ebpf", action="store_true",
help=argparse.SUPPRESS)
args = parser.parse_args()
if args.duration and not args.interval:
args.interval = args.duration
if not args.interval:
args.interval = 99999999
def bail(error):
print("Error: " + error)
exit(1)
parts = args.pattern.split(':')
if len(parts) == 1:
library = None
pattern = args.pattern
elif len(parts) == 2:
library = parts[0]
libpath = BPF.find_library(library) or BPF.find_exe(library)
if not libpath:
bail("can't resolve library %s" % library)
library = libpath
pattern = parts[1]
else:
bail("unrecognized pattern format '%s'" % args.pattern)
if not args.regexp:
pattern = pattern.replace('*', '.*')
pattern = '^' + pattern + '$'
# define BPF program
bpf_text = """
#include <uapi/linux/ptrace.h>
typedef struct ip_pid {
u64 ip;
u64 pid;
} ip_pid_t;
typedef struct hist_key {
ip_pid_t key;
u64 slot;
} hist_key_t;
TYPEDEF
BPF_ARRAY(avg, u64, 2);
STORAGE
FUNCTION
int trace_func_entry(struct pt_regs *ctx)
{
u64 pid_tgid = bpf_get_current_pid_tgid();
u32 pid = pid_tgid;
u32 tgid = pid_tgid >> 32;
u64 ts = bpf_ktime_get_ns();
FILTER
ENTRYSTORE
return 0;
}
int trace_func_return(struct pt_regs *ctx)
{
u64 *tsp, delta;
u64 pid_tgid = bpf_get_current_pid_tgid();
u32 pid = pid_tgid;
u32 tgid = pid_tgid >> 32;
// calculate delta time
CALCULATE
u32 lat = 0;
u32 cnt = 1;
avg.atomic_increment(lat, delta);
avg.atomic_increment(cnt);
FACTOR
// store as histogram
STORE
return 0;
}
"""
# do we need to store the IP and pid for each invocation?
need_key = args.function or (library and not args.pid)
# code substitutions
if args.pid:
bpf_text = bpf_text.replace('FILTER',
'if (tgid != %d) { return 0; }' % args.pid)
else:
bpf_text = bpf_text.replace('FILTER', '')
if args.milliseconds:
bpf_text = bpf_text.replace('FACTOR', 'delta /= 1000000;')
label = "msecs"
elif args.microseconds:
bpf_text = bpf_text.replace('FACTOR', 'delta /= 1000;')
label = "usecs"
else:
bpf_text = bpf_text.replace('FACTOR', '')
label = "nsecs"
if need_key:
pid = '-1' if not library else 'tgid'
if args.level and args.level > 1:
bpf_text = bpf_text.replace('TYPEDEF',
"""
#define STACK_DEPTH %s
typedef struct {
u64 ip;
u64 start_ts;
} func_cache_t;
/* LIFO */
typedef struct {
u32 head;
func_cache_t cache[STACK_DEPTH];
} func_stack_t;
""" % args.level)
bpf_text = bpf_text.replace('STORAGE',
"""
BPF_HASH(func_stack, u32, func_stack_t);
BPF_HISTOGRAM(dist, hist_key_t);
""")
bpf_text = bpf_text.replace('FUNCTION',
"""
static inline int stack_pop(func_stack_t *stack, func_cache_t *cache) {
if (stack->head <= 0) {
return -1;
}
u32 index = --stack->head;
if (index < STACK_DEPTH) {
/* bound check */
cache->ip = stack->cache[index].ip;
cache->start_ts = stack->cache[index].start_ts;
}
return 0;
}
static inline int stack_push(func_stack_t *stack, func_cache_t *cache) {
u32 index = stack->head;
if (index > STACK_DEPTH - 1) {
/* bound check */
return -1;
}
stack->head++;
stack->cache[index].ip = cache->ip;
stack->cache[index].start_ts = cache->start_ts;
return 0;
}
""")
bpf_text = bpf_text.replace('ENTRYSTORE',
"""
u64 ip = PT_REGS_IP(ctx);
func_cache_t cache = {
.ip = ip,
.start_ts = ts,
};
func_stack_t *stack = func_stack.lookup(&pid);
if (!stack) {
func_stack_t new_stack = {
.head = 0,
};
if (!stack_push(&new_stack, &cache)) {
func_stack.update(&pid, &new_stack);
}
return 0;
}
if (!stack_push(stack, &cache)) {
func_stack.update(&pid, stack);
}
""")
bpf_text = bpf_text.replace('CALCULATE',
"""
u64 ip, start_ts;
func_stack_t *stack = func_stack.lookup(&pid);
if (!stack) {
/* miss start */
return 0;
}
func_cache_t cache = {};
if (stack_pop(stack, &cache)) {
func_stack.delete(&pid);
return 0;
}
ip = cache.ip;
start_ts = cache.start_ts;
delta = bpf_ktime_get_ns() - start_ts;
""")
bpf_text = bpf_text.replace('STORE',
"""
hist_key_t key;
key.key.ip = ip;
key.key.pid = %s;
key.slot = bpf_log2l(delta);
dist.atomic_increment(key);
if (stack->head == 0) {
/* empty */
func_stack.delete(&pid);
}
""" % pid)
else:
bpf_text = bpf_text.replace('STORAGE', 'BPF_HASH(ipaddr, u32);\n'\
'BPF_HISTOGRAM(dist, hist_key_t);\n'\
'BPF_HASH(start, u32);')
# stash the IP on entry, as on return it's kretprobe_trampoline:
bpf_text = bpf_text.replace('ENTRYSTORE',
'u64 ip = PT_REGS_IP(ctx); ipaddr.update(&pid, &ip);'\
' start.update(&pid, &ts);')
bpf_text = bpf_text.replace('STORE',
"""
u64 ip, *ipp = ipaddr.lookup(&pid);
if (ipp) {
ip = *ipp;
hist_key_t key;
key.key.ip = ip;
key.key.pid = %s;
key.slot = bpf_log2l(delta);
dist.atomic_increment(key);
ipaddr.delete(&pid);
}
""" % pid)
else:
bpf_text = bpf_text.replace('STORAGE', 'BPF_HISTOGRAM(dist);\n'\
'BPF_HASH(start, u32);')
bpf_text = bpf_text.replace('ENTRYSTORE', 'start.update(&pid, &ts);')
bpf_text = bpf_text.replace('STORE',
'dist.atomic_increment(bpf_log2l(delta));')
bpf_text = bpf_text.replace('TYPEDEF', '')
bpf_text = bpf_text.replace('FUNCTION', '')
bpf_text = bpf_text.replace('CALCULATE',
"""
tsp = start.lookup(&pid);
if (tsp == 0) {
return 0; // missed start
}
delta = bpf_ktime_get_ns() - *tsp;
start.delete(&pid);
""")
if args.verbose or args.ebpf:
print(bpf_text)
if args.ebpf:
exit()
# signal handler
def signal_ignore(signal, frame):
print()
# load BPF program
b = BPF(text=bpf_text)
# attach probes
if not library:
b.attach_kprobe(event_re=pattern, fn_name="trace_func_entry")
b.attach_kretprobe(event_re=pattern, fn_name="trace_func_return")
matched = b.num_open_kprobes()
else:
b.attach_uprobe(name=library, sym_re=pattern, fn_name="trace_func_entry",
pid=args.pid or -1)
b.attach_uretprobe(name=library, sym_re=pattern,
fn_name="trace_func_return", pid=args.pid or -1)
matched = b.num_open_uprobes()
if matched == 0:
print("0 functions matched by \"%s\". Exiting." % args.pattern)
exit()
# header
print("Tracing %d functions for \"%s\"... Hit Ctrl-C to end." %
(matched / 2, args.pattern))
# output
def print_section(key):
if not library:
return BPF.sym(key[0], -1).decode('utf-8', 'replace')
else:
return "%s [%d]" % (BPF.sym(key[0], key[1]).decode('utf-8', 'replace'), key[1])
exiting = 0 if args.interval else 1
seconds = 0
dist = b.get_table("dist")
while (1):
try:
sleep(args.interval)
seconds += args.interval
except KeyboardInterrupt:
exiting = 1
# as cleanup can take many seconds, trap Ctrl-C:
signal.signal(signal.SIGINT, signal_ignore)
if args.duration and seconds >= args.duration:
exiting = 1
print()
if args.timestamp:
print("%-8s\n" % strftime("%H:%M:%S"), end="")
if need_key:
dist.print_log2_hist(label, "Function", section_print_fn=print_section,
bucket_fn=lambda k: (k.ip, k.pid))
else:
dist.print_log2_hist(label)
total = b['avg'][0].value
counts = b['avg'][1].value
if counts > 0:
if label == 'msecs':
total /= 1000000
elif label == 'usecs':
total /= 1000
avg = total/counts
print("\navg = %ld %s, total: %ld %s, count: %ld\n" %(total/counts, label, total, label, counts))
dist.clear()
b['avg'].clear()
if exiting:
print("Detaching...")
exit()