| // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) |
| // Copyright (c) 2020 Anton Protopopov |
| // |
| // Based on syscount(8) from BCC by Sasha Goldshtein |
| #include <unistd.h> |
| #include <signal.h> |
| #include <fcntl.h> |
| #include <time.h> |
| #include <argp.h> |
| #include <bpf/bpf.h> |
| #include "syscount.h" |
| #include "syscount.skel.h" |
| #include "errno_helpers.h" |
| #include "syscall_helpers.h" |
| #include "trace_helpers.h" |
| |
| /* This structure extends data_t by adding a key item which should be sorted |
| * together with the count and total_ns fields */ |
| struct data_ext_t { |
| __u64 count; |
| __u64 total_ns; |
| char comm[TASK_COMM_LEN]; |
| __u32 key; |
| }; |
| |
| |
| #define warn(...) fprintf(stderr, __VA_ARGS__) |
| |
| const char *argp_program_version = "syscount 0.1"; |
| const char *argp_program_bug_address = |
| "https://github.com/iovisor/bcc/tree/master/libbpf-tools"; |
| static const char argp_program_doc[] = |
| "\nsyscount: summarize syscall counts and latencies\n" |
| "\n" |
| "EXAMPLES:\n" |
| " syscount # print top 10 syscalls by count every second\n" |
| " syscount -p $(pidof dd) # look only at a particular process\n" |
| " syscount -L # measure and sort output by latency\n" |
| " syscount -P # group statistics by pid, not by syscall\n" |
| " syscount -x -i 5 # count only failed syscalls\n" |
| " syscount -e ENOENT -i 5 # count only syscalls failed with a given errno" |
| ; |
| |
| static const struct argp_option opts[] = { |
| { "verbose", 'v', NULL, 0, "Verbose debug output" }, |
| { "pid", 'p', "PID", 0, "Process PID to trace" }, |
| { "interval", 'i', "INTERVAL", 0, "Print summary at this interval" |
| " (seconds), 0 for infinite wait (default)" }, |
| { "duration", 'd', "DURATION", 0, "Total tracing duration (seconds)" }, |
| { "top", 'T', "TOP", 0, "Print only the top syscalls (default 10)" }, |
| { "failures", 'x', NULL, 0, "Trace only failed syscalls" }, |
| { "latency", 'L', NULL, 0, "Collect syscall latency" }, |
| { "milliseconds", 'm', NULL, 0, "Display latency in milliseconds" |
| " (default: microseconds)" }, |
| { "process", 'P', NULL, 0, "Count by process and not by syscall" }, |
| { "errno", 'e', "ERRNO", 0, "Trace only syscalls that return this error" |
| "(numeric or EPERM, etc.)" }, |
| { "list", 'l', NULL, 0, "Print list of recognized syscalls and exit" }, |
| { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" }, |
| {}, |
| }; |
| |
| static struct env { |
| bool list_syscalls; |
| bool milliseconds; |
| bool failures; |
| bool verbose; |
| bool latency; |
| bool process; |
| int filter_errno; |
| int interval; |
| int duration; |
| int top; |
| pid_t pid; |
| } env = { |
| .top = 10, |
| }; |
| |
| static int get_int(const char *arg, int *ret, int min, int max) |
| { |
| char *end; |
| long val; |
| |
| errno = 0; |
| val = strtol(arg, &end, 10); |
| if (errno) { |
| warn("strtol: %s: %s\n", arg, strerror(errno)); |
| return -1; |
| } else if (end == arg || val < min || val > max) { |
| return -1; |
| } |
| if (ret) |
| *ret = val; |
| return 0; |
| } |
| |
| static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) |
| { |
| if (level == LIBBPF_DEBUG && !env.verbose) |
| return 0; |
| |
| return vfprintf(stderr, format, args); |
| } |
| |
| static int compar_count(const void *dx, const void *dy) |
| { |
| __u64 x = ((struct data_ext_t *) dx)->count; |
| __u64 y = ((struct data_ext_t *) dy)->count; |
| return x > y ? -1 : !(x == y); |
| } |
| |
| static int compar_latency(const void *dx, const void *dy) |
| { |
| __u64 x = ((struct data_ext_t *) dx)->total_ns; |
| __u64 y = ((struct data_ext_t *) dy)->total_ns; |
| return x > y ? -1 : !(x == y); |
| } |
| |
| static const char *agg_col(struct data_ext_t *val, char *buf, size_t size) |
| { |
| if (env.process) { |
| snprintf(buf, size, "%-6u %-15s", val->key, val->comm); |
| } else { |
| syscall_name(val->key, buf, size); |
| } |
| return buf; |
| } |
| |
| static const char *agg_colname(void) |
| { |
| return (env.process) ? "PID COMM" : "SYSCALL"; |
| } |
| |
| static const char *time_colname(void) |
| { |
| return (env.milliseconds) ? "TIME (ms)" : "TIME (us)"; |
| } |
| |
| static void print_latency_header(void) |
| { |
| printf("%-22s %8s %16s\n", agg_colname(), "COUNT", time_colname()); |
| } |
| |
| static void print_count_header(void) |
| { |
| printf("%-22s %8s\n", agg_colname(), "COUNT"); |
| } |
| |
| static void print_latency(struct data_ext_t *vals, size_t count) |
| { |
| double div = env.milliseconds ? 1000000.0 : 1000.0; |
| char buf[2 * TASK_COMM_LEN]; |
| int i; |
| |
| print_latency_header(); |
| for (i = 0; i < count && i < env.top; i++) |
| printf("%-22s %8llu %16.3lf\n", |
| agg_col(&vals[i], buf, sizeof(buf)), |
| vals[i].count, vals[i].total_ns / div); |
| printf("\n"); |
| } |
| |
| static void print_count(struct data_ext_t *vals, size_t count) |
| { |
| char buf[2 * TASK_COMM_LEN]; |
| int i; |
| |
| print_count_header(); |
| for (i = 0; i < count && i < env.top; i++) |
| printf("%-22s %8llu\n", |
| agg_col(&vals[i], buf, sizeof(buf)), vals[i].count); |
| printf("\n"); |
| } |
| |
| static void print_timestamp() |
| { |
| time_t now = time(NULL); |
| struct tm tm; |
| |
| if (localtime_r(&now, &tm)) |
| printf("[%02d:%02d:%02d]\n", tm.tm_hour, tm.tm_min, tm.tm_sec); |
| else |
| warn("localtime_r: %s", strerror(errno)); |
| } |
| |
| static bool batch_map_ops = true; /* hope for the best */ |
| |
| static bool read_vals_batch(int fd, struct data_ext_t *vals, __u32 *count) |
| { |
| struct data_t orig_vals[*count]; |
| void *in = NULL, *out; |
| __u32 i, n, n_read = 0; |
| __u32 keys[*count]; |
| int err = 0; |
| |
| while (n_read < *count && !err) { |
| n = *count - n_read; |
| err = bpf_map_lookup_and_delete_batch(fd, &in, &out, |
| keys + n_read, orig_vals + n_read, &n, NULL); |
| if (err && errno != ENOENT) { |
| /* we want to propagate EINVAL upper, so that |
| * the batch_map_ops flag is set to false */ |
| if (errno != EINVAL) |
| warn("bpf_map_lookup_and_delete_batch: %s\n", |
| strerror(-err)); |
| return false; |
| } |
| n_read += n; |
| in = out; |
| } |
| |
| for (i = 0; i < n_read; i++) { |
| vals[i].count = orig_vals[i].count; |
| vals[i].total_ns = orig_vals[i].total_ns; |
| vals[i].key = keys[i]; |
| strncpy(vals[i].comm, orig_vals[i].comm, TASK_COMM_LEN); |
| } |
| |
| *count = n_read; |
| return true; |
| } |
| |
| static bool read_vals(int fd, struct data_ext_t *vals, __u32 *count) |
| { |
| __u32 keys[MAX_ENTRIES]; |
| struct data_t val; |
| __u32 key = -1; |
| __u32 next_key; |
| int i = 0, j; |
| int err; |
| |
| if (batch_map_ops) { |
| bool ok = read_vals_batch(fd, vals, count); |
| if (!ok && errno == EINVAL) { |
| /* fall back to a racy variant */ |
| batch_map_ops = false; |
| } else { |
| return ok; |
| } |
| } |
| |
| if (!vals || !count || !*count) |
| return true; |
| |
| for (key = -1; i < *count; ) { |
| err = bpf_map_get_next_key(fd, &key, &next_key); |
| if (err && errno != ENOENT) { |
| warn("failed to get next key: %s\n", strerror(errno)); |
| return false; |
| } else if (err) { |
| break; |
| } |
| key = keys[i++] = next_key; |
| } |
| |
| for (j = 0; j < i; j++) { |
| err = bpf_map_lookup_elem(fd, &keys[j], &val); |
| if (err && errno != ENOENT) { |
| warn("failed to lookup element: %s\n", strerror(errno)); |
| return false; |
| } |
| vals[j].count = val.count; |
| vals[j].total_ns = val.total_ns; |
| vals[j].key = keys[j]; |
| memcpy(vals[j].comm, val.comm, TASK_COMM_LEN); |
| } |
| |
| /* There is a race here: system calls which are represented by keys |
| * above and happened between lookup and delete will be ignored. This |
| * will be fixed in future by using bpf_map_lookup_and_delete_batch, |
| * but this function is too fresh to use it in bcc. */ |
| |
| for (j = 0; j < i; j++) { |
| err = bpf_map_delete_elem(fd, &keys[j]); |
| if (err) { |
| warn("failed to delete element: %s\n", strerror(errno)); |
| return false; |
| } |
| } |
| |
| *count = i; |
| return true; |
| } |
| |
| static error_t parse_arg(int key, char *arg, struct argp_state *state) |
| { |
| int number; |
| int err; |
| |
| switch (key) { |
| case 'h': |
| argp_state_help(state, stderr, ARGP_HELP_STD_HELP); |
| break; |
| case 'v': |
| env.verbose = true; |
| break; |
| case 'x': |
| env.failures = true; |
| break; |
| case 'L': |
| env.latency = true; |
| break; |
| case 'm': |
| env.milliseconds = true; |
| break; |
| case 'P': |
| env.process = true; |
| break; |
| case 'p': |
| err = get_int(arg, &env.pid, 1, INT_MAX); |
| if (err) { |
| warn("invalid PID: %s\n", arg); |
| argp_usage(state); |
| } |
| break; |
| case 'i': |
| err = get_int(arg, &env.interval, 0, INT_MAX); |
| if (err) { |
| warn("invalid INTERVAL: %s\n", arg); |
| argp_usage(state); |
| } |
| break; |
| case 'd': |
| err = get_int(arg, &env.duration, 1, INT_MAX); |
| if (err) { |
| warn("invalid DURATION: %s\n", arg); |
| argp_usage(state); |
| } |
| break; |
| case 'T': |
| err = get_int(arg, &env.top, 1, INT_MAX); |
| if (err) { |
| warn("invalid TOP: %s\n", arg); |
| argp_usage(state); |
| } |
| break; |
| case 'e': |
| err = get_int(arg, &number, 1, INT_MAX); |
| if (err) { |
| number = errno_by_name(arg); |
| if (number < 0) { |
| warn("invalid errno: %s (bad, or can't " |
| "parse dynamically; consider using " |
| "numeric value and/or installing the " |
| "errno program from moreutils)\n", arg); |
| argp_usage(state); |
| } |
| } |
| env.filter_errno = number; |
| break; |
| case 'l': |
| env.list_syscalls = true; |
| break; |
| default: |
| return ARGP_ERR_UNKNOWN; |
| } |
| return 0; |
| } |
| |
| static volatile sig_atomic_t hang_on = 1; |
| |
| void sig_int(int signo) |
| { |
| hang_on = 0; |
| } |
| |
| int main(int argc, char **argv) |
| { |
| void (*print)(struct data_ext_t *, size_t); |
| int (*compar)(const void *, const void *); |
| static const struct argp argp = { |
| .options = opts, |
| .parser = parse_arg, |
| .doc = argp_program_doc, |
| }; |
| struct data_ext_t vals[MAX_ENTRIES]; |
| struct syscount_bpf *obj; |
| int seconds = 0; |
| __u32 count; |
| int err; |
| |
| init_syscall_names(); |
| |
| err = argp_parse(&argp, argc, argv, 0, NULL, NULL); |
| if (err) |
| goto free_names; |
| |
| if (env.list_syscalls) { |
| list_syscalls(); |
| goto free_names; |
| } |
| |
| libbpf_set_strict_mode(LIBBPF_STRICT_ALL); |
| libbpf_set_print(libbpf_print_fn); |
| |
| obj = syscount_bpf__open(); |
| if (!obj) { |
| warn("failed to open BPF object\n"); |
| err = 1; |
| goto free_names; |
| } |
| |
| if (env.pid) |
| obj->rodata->filter_pid = env.pid; |
| if (env.failures) |
| obj->rodata->filter_failed = true; |
| if (env.latency) |
| obj->rodata->measure_latency = true; |
| if (env.process) |
| obj->rodata->count_by_process = true; |
| if (env.filter_errno) |
| obj->rodata->filter_errno = env.filter_errno; |
| |
| err = syscount_bpf__load(obj); |
| if (err) { |
| warn("failed to load BPF object: %s\n", strerror(-err)); |
| goto cleanup_obj; |
| } |
| |
| obj->links.sys_exit = bpf_program__attach(obj->progs.sys_exit); |
| if (!obj->links.sys_exit) { |
| err = -errno; |
| warn("failed to attach sys_exit program: %s\n", strerror(-err)); |
| goto cleanup_obj; |
| } |
| if (env.latency) { |
| obj->links.sys_enter = bpf_program__attach(obj->progs.sys_enter); |
| if (!obj->links.sys_enter) { |
| err = -errno; |
| warn("failed to attach sys_enter programs: %s\n", |
| strerror(-err)); |
| goto cleanup_obj; |
| } |
| } |
| |
| if (signal(SIGINT, sig_int) == SIG_ERR) { |
| warn("can't set signal handler: %s\n", strerror(errno)); |
| goto cleanup_obj; |
| } |
| |
| compar = env.latency ? compar_latency : compar_count; |
| print = env.latency ? print_latency : print_count; |
| |
| printf("Tracing syscalls, printing top %d... Ctrl+C to quit.\n", env.top); |
| while (hang_on) { |
| sleep(env.interval ?: 1); |
| if (env.duration) { |
| seconds += env.interval ?: 1; |
| if (seconds >= env.duration) |
| hang_on = 0; |
| } |
| if (hang_on && !env.interval) |
| continue; |
| |
| count = MAX_ENTRIES; |
| if (!read_vals(bpf_map__fd(obj->maps.data), vals, &count)) |
| break; |
| if (!count) |
| continue; |
| |
| qsort(vals, count, sizeof(vals[0]), compar); |
| print_timestamp(); |
| print(vals, count); |
| } |
| |
| cleanup_obj: |
| syscount_bpf__destroy(obj); |
| free_names: |
| free_syscall_names(); |
| |
| return err != 0; |
| } |