| #!/usr/bin/python | 
 | # @lint-avoid-python-3-compatibility-imports | 
 | # | 
 | # dirtop  file reads and writes by directory. | 
 | #          For Linux, uses BCC, eBPF. | 
 | # | 
 | # USAGE: dirtop.py -d 'directory1,directory2' [-h] [-C] [-r MAXROWS] [interval] [count] | 
 | # | 
 | # This uses in-kernel eBPF maps to store per process summaries for efficiency. | 
 | # | 
 | # Copyright 2016 Netflix, Inc. | 
 | # Licensed under the Apache License, Version 2.0 (the "License") | 
 | # | 
 | # 13-Mar-2020   Erwan Velu      Created dirtop from filetop | 
 | # 06-Feb-2016   Brendan Gregg   Created filetop. | 
 |  | 
 | from __future__ import print_function | 
 | from bcc import BPF | 
 | from time import sleep, strftime | 
 | import argparse | 
 | import os | 
 | import stat | 
 | from subprocess import call | 
 |  | 
 | # arguments | 
 | examples = """examples: | 
 |     ./dirtop -d '/hdfs/uuid/*/yarn'       # directory I/O top, 1 second refresh | 
 |     ./dirtop -d '/hdfs/uuid/*/yarn' -C    # don't clear the screen | 
 |     ./dirtop -d '/hdfs/uuid/*/yarn' 5     # 5 second summaries | 
 |     ./dirtop -d '/hdfs/uuid/*/yarn' 5 10  # 5 second summaries, 10 times only | 
 |     ./dirtop -d '/hdfs/uuid/*/yarn,/hdfs/uuid/*/data' # Running dirtop on two set of directories | 
 | """ | 
 | parser = argparse.ArgumentParser( | 
 |     description="File reads and writes by process", | 
 |     formatter_class=argparse.RawDescriptionHelpFormatter, | 
 |     epilog=examples) | 
 | parser.add_argument("-C", "--noclear", action="store_true", | 
 |                     help="don't clear the screen") | 
 | parser.add_argument("-r", "--maxrows", default=20, | 
 |                     help="maximum rows to print, default 20") | 
 | parser.add_argument("-s", "--sort", default="all", | 
 |                     choices=["all", "reads", "writes", "rbytes", "wbytes"], | 
 |                     help="sort column, default all") | 
 | parser.add_argument("-p", "--pid", type=int, metavar="PID", dest="tgid", | 
 |                     help="trace this PID only") | 
 | parser.add_argument("interval", nargs="?", default=1, | 
 |                     help="output interval, in seconds") | 
 | parser.add_argument("count", nargs="?", default=99999999, | 
 |                     help="number of outputs") | 
 | parser.add_argument("--ebpf", action="store_true", | 
 |                     help=argparse.SUPPRESS) | 
 | parser.add_argument("-d", "--root-directories", type=str, required=True, dest="rootdirs", | 
 |                     help="select the directories to observe, separated by commas") | 
 | args = parser.parse_args() | 
 | interval = int(args.interval) | 
 | countdown = int(args.count) | 
 | maxrows = int(args.maxrows) | 
 | clear = not int(args.noclear) | 
 | debug = 0 | 
 |  | 
 | # linux stats | 
 | loadavg = "/proc/loadavg" | 
 |  | 
 | # define BPF program | 
 | bpf_text = """ | 
 | # include <uapi/linux/ptrace.h> | 
 | # include <linux/blkdev.h> | 
 |  | 
 | // the key for the output summary | 
 | struct info_t { | 
 |     unsigned long inode_id; | 
 | }; | 
 |  | 
 | // the value of the output summary | 
 | struct val_t { | 
 |     u64 reads; | 
 |     u64 writes; | 
 |     u64 rbytes; | 
 |     u64 wbytes; | 
 | }; | 
 |  | 
 | BPF_HASH(counts, struct info_t, struct val_t); | 
 |  | 
 | static int do_entry(struct pt_regs *ctx, struct file *file, | 
 |     char __user *buf, size_t count, int is_read) | 
 | { | 
 |     u32 tgid = bpf_get_current_pid_tgid() >> 32; | 
 |     if (TGID_FILTER) | 
 |         return 0; | 
 |  | 
 |     // The directory inodes we look at | 
 |     u32 dir_ids[INODES_NUMBER] =  DIRECTORY_INODES; | 
 |     struct info_t info = {.inode_id = 0}; | 
 |     struct dentry *pde = file->f_path.dentry; | 
 |     for (int i=0; i<50; i++) { | 
 |         // If we don't have any parent, we reached the root | 
 |         if (!pde->d_parent) { | 
 |             break; | 
 |         } | 
 |         pde = pde->d_parent; | 
 |         // Does the files is part of the directory we look for | 
 |         for(int dir_id=0; dir_id<INODES_NUMBER; dir_id++) { | 
 |             if (pde->d_inode->i_ino == dir_ids[dir_id]) { | 
 |                 // Yes, let's export the top directory inode | 
 |                 info.inode_id = pde->d_inode->i_ino; | 
 |                 break; | 
 |             } | 
 |         } | 
 |     } | 
 |     // If we didn't found any, let's abort | 
 |     if (info.inode_id == 0) { | 
 |         return 0; | 
 |     } | 
 |  | 
 |     struct val_t *valp, zero = {}; | 
 |     valp = counts.lookup_or_try_init(&info, &zero); | 
 |     if (valp) { | 
 |         if (is_read) { | 
 |             valp->reads++; | 
 |             valp->rbytes += count; | 
 |         } else { | 
 |             valp->writes++; | 
 |             valp->wbytes += count; | 
 |         } | 
 |     } | 
 |     return 0; | 
 | } | 
 |  | 
 | int trace_read_entry(struct pt_regs *ctx, struct file *file, | 
 |     char __user *buf, size_t count) | 
 | { | 
 |     return do_entry(ctx, file, buf, count, 1); | 
 | } | 
 |  | 
 | int trace_write_entry(struct pt_regs *ctx, struct file *file, | 
 |     char __user *buf, size_t count) | 
 | { | 
 |     return do_entry(ctx, file, buf, count, 0); | 
 | } | 
 |  | 
 | """ | 
 |  | 
 |  | 
 | def get_searched_ids(root_directories): | 
 |     """Export the inode numbers of the selected directories.""" | 
 |     from glob import glob | 
 |     inode_to_path = {} | 
 |     inodes = "{" | 
 |     total_dirs = 0 | 
 |     for root_directory in root_directories.split(','): | 
 |         try: | 
 |             searched_dirs = glob(root_directory, recursive=True) | 
 |         except TypeError: | 
 |             searched_dirs = glob(root_directory) | 
 |         if not searched_dirs: | 
 |             continue | 
 |  | 
 |         for mydir in searched_dirs: | 
 |             total_dirs = total_dirs + 1 | 
 |             # If we pass more than 15 dirs, ebpf program fails | 
 |             if total_dirs > 15: | 
 |                 print('15 directories limit reached') | 
 |                 break | 
 |             inode_id = os.lstat(mydir)[stat.ST_INO] | 
 |             if inode_id in inode_to_path: | 
 |                 if inode_to_path[inode_id] == mydir: | 
 |                     print('Skipping {} as already considered'.format(mydir)) | 
 |             else: | 
 |                 inodes = "{},{}".format(inodes, inode_id) | 
 |                 inode_to_path[inode_id] = mydir | 
 |                 print('Considering {} with inode_id {}'.format(mydir, inode_id)) | 
 |  | 
 |     inodes = inodes + '}' | 
 |     if len(inode_to_path) == 0: | 
 |         print('Cannot find any valid directory') | 
 |         exit() | 
 |     return inodes.replace('{,', '{'), inode_to_path | 
 |  | 
 |  | 
 | if args.tgid: | 
 |     bpf_text = bpf_text.replace('TGID_FILTER', 'tgid != %d' % args.tgid) | 
 | else: | 
 |     bpf_text = bpf_text.replace('TGID_FILTER', '0') | 
 |  | 
 | inodes, inodes_to_path = get_searched_ids(args.rootdirs) | 
 | bpf_text = bpf_text.replace("DIRECTORY_INODES", inodes) | 
 | bpf_text = bpf_text.replace( | 
 |     "INODES_NUMBER", '{}'.format(len(inodes.split(',')))) | 
 |  | 
 | if debug or args.ebpf: | 
 |     print(bpf_text) | 
 |     if args.ebpf: | 
 |         exit() | 
 |  | 
 | # initialize BPF | 
 | b = BPF(text=bpf_text) | 
 | b.attach_kprobe(event="vfs_read", fn_name="trace_read_entry") | 
 | b.attach_kprobe(event="vfs_write", fn_name="trace_write_entry") | 
 |  | 
 | DNAME_INLINE_LEN = 32  # linux/dcache.h | 
 |  | 
 | print('Tracing... Output every %d secs. Hit Ctrl-C to end' % interval) | 
 |  | 
 |  | 
 | def sort_fn(counts): | 
 |     """Define how to sort the columns""" | 
 |     if args.sort == "all": | 
 |         return (counts[1].rbytes + counts[1].wbytes + counts[1].reads + counts[1].writes) | 
 |     else: | 
 |         return getattr(counts[1], args.sort) | 
 |  | 
 |  | 
 | # output | 
 | exiting = 0 | 
 | while 1: | 
 |     try: | 
 |         sleep(interval) | 
 |     except KeyboardInterrupt: | 
 |         exiting = 1 | 
 |  | 
 |     # header | 
 |     if clear: | 
 |         call("clear") | 
 |     else: | 
 |         print() | 
 |     with open(loadavg) as stats: | 
 |         print("%-8s loadavg: %s" % (strftime("%H:%M:%S"), stats.read())) | 
 |  | 
 |     print("%-6s %-6s %-8s %-8s %s" % | 
 |           ("READS", "WRITES", "R_Kb", "W_Kb", "PATH")) | 
 |     # by-TID output | 
 |     counts = b.get_table("counts") | 
 |     line = 0 | 
 |     reads = {} | 
 |     writes = {} | 
 |     reads_Kb = {} | 
 |     writes_Kb = {} | 
 |     for k, v in reversed(sorted(counts.items(), | 
 |                                 key=sort_fn)): | 
 |         # If it's the first time we see this inode | 
 |         if k.inode_id not in reads: | 
 |             # let's create a new entry | 
 |             reads[k.inode_id] = v.reads | 
 |             writes[k.inode_id] = v.writes | 
 |             reads_Kb[k.inode_id] = v.rbytes / 1024 | 
 |             writes_Kb[k.inode_id] = v.wbytes / 1024 | 
 |         else: | 
 |             # unless add the current performance metrics | 
 |             # to the previous ones | 
 |             reads[k.inode_id] += v.reads | 
 |             writes[k.inode_id] += v.writes | 
 |             reads_Kb[k.inode_id] += v.rbytes / 1024 | 
 |             writes_Kb[k.inode_id] += v.wbytes / 1024 | 
 |  | 
 |     for node_id in reads: | 
 |         print("%-6d %-6d %-8d %-8d %s" % | 
 |               (reads[node_id], writes[node_id], reads_Kb[node_id], writes_Kb[node_id], inodes_to_path[node_id])) | 
 |         line += 1 | 
 |         if line >= maxrows: | 
 |             break | 
 |  | 
 |     counts.clear() | 
 |  | 
 |     countdown -= 1 | 
 |     if exiting or countdown == 0: | 
 |         print("Detaching...") | 
 |         exit() |