tools/dirtop.py - platform/external/bcc - Git at Google

 #!/usr/bin/python
 # @lint-avoid-python-3-compatibility-imports
 #
 # dirtop  file reads and writes by directory.
 #          For Linux, uses BCC, eBPF.
 #
 # USAGE: dirtop.py -d 'directory1,directory2' [-h] [-C] [-r MAXROWS] [interval] [count]
 #
 # This uses in-kernel eBPF maps to store per process summaries for efficiency.
 #
 # Copyright 2016 Netflix, Inc.
 # Licensed under the Apache License, Version 2.0 (the "License")
 #
 # 13-Mar-2020   Erwan Velu      Created dirtop from filetop
 # 06-Feb-2016   Brendan Gregg   Created filetop.

 from __future__ import print_function
 from bcc import BPF
 from time import sleep, strftime
 import argparse
 import os
 import stat
 from subprocess import call

 # arguments
 examples = """examples:
     ./dirtop -d '/hdfs/uuid/*/yarn'       # directory I/O top, 1 second refresh
     ./dirtop -d '/hdfs/uuid/*/yarn' -C    # don't clear the screen
     ./dirtop -d '/hdfs/uuid/*/yarn' 5     # 5 second summaries
     ./dirtop -d '/hdfs/uuid/*/yarn' 5 10  # 5 second summaries, 10 times only
     ./dirtop -d '/hdfs/uuid/*/yarn,/hdfs/uuid/*/data' # Running dirtop on two set of directories
 """
 parser = argparse.ArgumentParser(
     description="File reads and writes by process",
     formatter_class=argparse.RawDescriptionHelpFormatter,
     epilog=examples)
 parser.add_argument("-C", "--noclear", action="store_true",
                     help="don't clear the screen")
 parser.add_argument("-r", "--maxrows", default=20,
                     help="maximum rows to print, default 20")
 parser.add_argument("-s", "--sort", default="all",
                     choices=["all", "reads", "writes", "rbytes", "wbytes"],
                     help="sort column, default all")
 parser.add_argument("-p", "--pid", type=int, metavar="PID", dest="tgid",
                     help="trace this PID only")
 parser.add_argument("interval", nargs="?", default=1,
                     help="output interval, in seconds")
 parser.add_argument("count", nargs="?", default=99999999,
                     help="number of outputs")
 parser.add_argument("--ebpf", action="store_true",
                     help=argparse.SUPPRESS)
 parser.add_argument("-d", "--root-directories", type=str, required=True, dest="rootdirs",
                     help="select the directories to observe, separated by commas")
 args = parser.parse_args()
 interval = int(args.interval)
 countdown = int(args.count)
 maxrows = int(args.maxrows)
 clear = not int(args.noclear)
 debug = 0

 # linux stats
 loadavg = "/proc/loadavg"

 # define BPF program
 bpf_text = """
 # include <uapi/linux/ptrace.h>
 # include <linux/blkdev.h>

 // the key for the output summary
 struct info_t {
     unsigned long inode_id;
 };

 // the value of the output summary
 struct val_t {
     u64 reads;
     u64 writes;
     u64 rbytes;
     u64 wbytes;
 };

 BPF_HASH(counts, struct info_t, struct val_t);

 static int do_entry(struct pt_regs *ctx, struct file *file,
     char __user *buf, size_t count, int is_read)
 {
     u32 tgid = bpf_get_current_pid_tgid() >> 32;
     if (TGID_FILTER)
         return 0;

     // The directory inodes we look at
     u32 dir_ids[INODES_NUMBER] =  DIRECTORY_INODES;
     struct info_t info = {.inode_id = 0};
     struct dentry *pde = file->f_path.dentry;
     for (int i=0; i<50; i++) {
         // If we don't have any parent, we reached the root
         if (!pde->d_parent) {
             break;
         }
         pde = pde->d_parent;
         // Does the files is part of the directory we look for
         for(int dir_id=0; dir_id<INODES_NUMBER; dir_id++) {
             if (pde->d_inode->i_ino == dir_ids[dir_id]) {
                 // Yes, let's export the top directory inode
                 info.inode_id = pde->d_inode->i_ino;
                 break;
             }
         }
     }
     // If we didn't found any, let's abort
     if (info.inode_id == 0) {
         return 0;
     }

     struct val_t *valp, zero = {};
     valp = counts.lookup_or_try_init(&info, &zero);
     if (valp) {
         if (is_read) {
             valp->reads++;
             valp->rbytes += count;
         } else {
             valp->writes++;
             valp->wbytes += count;
         }
     }
     return 0;
 }

 int trace_read_entry(struct pt_regs *ctx, struct file *file,
     char __user *buf, size_t count)
 {
     return do_entry(ctx, file, buf, count, 1);
 }

 int trace_write_entry(struct pt_regs *ctx, struct file *file,
     char __user *buf, size_t count)
 {
     return do_entry(ctx, file, buf, count, 0);
 }

 """


 def get_searched_ids(root_directories):
     """Export the inode numbers of the selected directories."""
     from glob import glob
     inode_to_path = {}
     inodes = "{"
     total_dirs = 0
     for root_directory in root_directories.split(','):
         try:
             searched_dirs = glob(root_directory, recursive=True)
         except TypeError:
             searched_dirs = glob(root_directory)
         if not searched_dirs:
             continue

         for mydir in searched_dirs:
             total_dirs = total_dirs + 1
             # If we pass more than 15 dirs, ebpf program fails
             if total_dirs > 15:
                 print('15 directories limit reached')
                 break
             inode_id = os.lstat(mydir)[stat.ST_INO]
             if inode_id in inode_to_path:
                 if inode_to_path[inode_id] == mydir:
                     print('Skipping {} as already considered'.format(mydir))
             else:
                 inodes = "{},{}".format(inodes, inode_id)
                 inode_to_path[inode_id] = mydir
                 print('Considering {} with inode_id {}'.format(mydir, inode_id))

     inodes = inodes + '}'
     if len(inode_to_path) == 0:
         print('Cannot find any valid directory')
         exit()
     return inodes.replace('{,', '{'), inode_to_path


 if args.tgid:
     bpf_text = bpf_text.replace('TGID_FILTER', 'tgid != %d' % args.tgid)
 else:
     bpf_text = bpf_text.replace('TGID_FILTER', '0')

 inodes, inodes_to_path = get_searched_ids(args.rootdirs)
 bpf_text = bpf_text.replace("DIRECTORY_INODES", inodes)
 bpf_text = bpf_text.replace(
     "INODES_NUMBER", '{}'.format(len(inodes.split(','))))

 if debug or args.ebpf:
     print(bpf_text)
     if args.ebpf:
         exit()

 # initialize BPF
 b = BPF(text=bpf_text)
 b.attach_kprobe(event="vfs_read", fn_name="trace_read_entry")
 b.attach_kprobe(event="vfs_write", fn_name="trace_write_entry")

 DNAME_INLINE_LEN = 32  # linux/dcache.h

 print('Tracing... Output every %d secs. Hit Ctrl-C to end' % interval)


 def sort_fn(counts):
     """Define how to sort the columns"""
     if args.sort == "all":
         return (counts[1].rbytes + counts[1].wbytes + counts[1].reads + counts[1].writes)
     else:
         return getattr(counts[1], args.sort)


 # output
 exiting = 0
 while 1:
     try:
         sleep(interval)
     except KeyboardInterrupt:
         exiting = 1

     # header
     if clear:
         call("clear")
     else:
         print()
     with open(loadavg) as stats:
         print("%-8s loadavg: %s" % (strftime("%H:%M:%S"), stats.read()))

     print("%-6s %-6s %-8s %-8s %s" %
           ("READS", "WRITES", "R_Kb", "W_Kb", "PATH"))
     # by-TID output
     counts = b.get_table("counts")
     line = 0
     reads = {}
     writes = {}
     reads_Kb = {}
     writes_Kb = {}
     for k, v in reversed(sorted(counts.items(),
                                 key=sort_fn)):
         # If it's the first time we see this inode
         if k.inode_id not in reads:
             # let's create a new entry
             reads[k.inode_id] = v.reads
             writes[k.inode_id] = v.writes
             reads_Kb[k.inode_id] = v.rbytes / 1024
             writes_Kb[k.inode_id] = v.wbytes / 1024
         else:
             # unless add the current performance metrics
             # to the previous ones
             reads[k.inode_id] += v.reads
             writes[k.inode_id] += v.writes
             reads_Kb[k.inode_id] += v.rbytes / 1024
             writes_Kb[k.inode_id] += v.wbytes / 1024

     for node_id in reads:
         print("%-6d %-6d %-8d %-8d %s" %
               (reads[node_id], writes[node_id], reads_Kb[node_id], writes_Kb[node_id], inodes_to_path[node_id]))
         line += 1
         if line >= maxrows:
             break

     counts.clear()

     countdown -= 1
     if exiting or countdown == 0:
         print("Detaching...")
         exit()
	#!/usr/bin/python
	# @lint-avoid-python-3-compatibility-imports
	#
	# dirtop file reads and writes by directory.
	# For Linux, uses BCC, eBPF.
	#
	# USAGE: dirtop.py -d 'directory1,directory2' [-h] [-C] [-r MAXROWS] [interval] [count]
	#
	# This uses in-kernel eBPF maps to store per process summaries for efficiency.
	#
	# Copyright 2016 Netflix, Inc.
	# Licensed under the Apache License, Version 2.0 (the "License")
	#
	# 13-Mar-2020 Erwan Velu Created dirtop from filetop
	# 06-Feb-2016 Brendan Gregg Created filetop.

	from __future__ import print_function
	from bcc import BPF
	from time import sleep, strftime
	import argparse
	import os
	import stat
	from subprocess import call

	# arguments
	examples = """examples:
	./dirtop -d '/hdfs/uuid/*/yarn' # directory I/O top, 1 second refresh
	./dirtop -d '/hdfs/uuid/*/yarn' -C # don't clear the screen
	./dirtop -d '/hdfs/uuid/*/yarn' 5 # 5 second summaries
	./dirtop -d '/hdfs/uuid/*/yarn' 5 10 # 5 second summaries, 10 times only
	./dirtop -d '/hdfs/uuid//yarn,/hdfs/uuid//data' # Running dirtop on two set of directories
	"""
	parser = argparse.ArgumentParser(
	description="File reads and writes by process",
	formatter_class=argparse.RawDescriptionHelpFormatter,
	epilog=examples)
	parser.add_argument("-C", "--noclear", action="store_true",
	help="don't clear the screen")
	parser.add_argument("-r", "--maxrows", default=20,
	help="maximum rows to print, default 20")
	parser.add_argument("-s", "--sort", default="all",
	choices=["all", "reads", "writes", "rbytes", "wbytes"],
	help="sort column, default all")
	parser.add_argument("-p", "--pid", type=int, metavar="PID", dest="tgid",
	help="trace this PID only")
	parser.add_argument("interval", nargs="?", default=1,
	help="output interval, in seconds")
	parser.add_argument("count", nargs="?", default=99999999,
	help="number of outputs")
	parser.add_argument("--ebpf", action="store_true",
	help=argparse.SUPPRESS)
	parser.add_argument("-d", "--root-directories", type=str, required=True, dest="rootdirs",
	help="select the directories to observe, separated by commas")
	args = parser.parse_args()
	interval = int(args.interval)
	countdown = int(args.count)
	maxrows = int(args.maxrows)
	clear = not int(args.noclear)
	debug = 0

	# linux stats
	loadavg = "/proc/loadavg"

	# define BPF program
	bpf_text = """
	# include <uapi/linux/ptrace.h>
	# include <linux/blkdev.h>

	// the key for the output summary
	struct info_t {
	unsigned long inode_id;
	};

	// the value of the output summary
	struct val_t {
	u64 reads;
	u64 writes;
	u64 rbytes;
	u64 wbytes;
	};

	BPF_HASH(counts, struct info_t, struct val_t);

	static int do_entry(struct pt_regs ctx, struct file file,
	char __user *buf, size_t count, int is_read)
	{
	u32 tgid = bpf_get_current_pid_tgid() >> 32;
	if (TGID_FILTER)
	return 0;

	// The directory inodes we look at
	u32 dir_ids[INODES_NUMBER] = DIRECTORY_INODES;
	struct info_t info = {.inode_id = 0};
	struct dentry *pde = file->f_path.dentry;
	for (int i=0; i<50; i++) {
	// If we don't have any parent, we reached the root
	if (!pde->d_parent) {
	break;
	}
	pde = pde->d_parent;
	// Does the files is part of the directory we look for
	for(int dir_id=0; dir_id<INODES_NUMBER; dir_id++) {
	if (pde->d_inode->i_ino == dir_ids[dir_id]) {
	// Yes, let's export the top directory inode
	info.inode_id = pde->d_inode->i_ino;
	break;
	}
	}
	}
	// If we didn't found any, let's abort
	if (info.inode_id == 0) {
	return 0;
	}

	struct val_t *valp, zero = {};
	valp = counts.lookup_or_try_init(&info, &zero);
	if (valp) {
	if (is_read) {
	valp->reads++;
	valp->rbytes += count;
	} else {
	valp->writes++;
	valp->wbytes += count;
	}
	}
	return 0;
	}

	int trace_read_entry(struct pt_regs ctx, struct file file,
	char __user *buf, size_t count)
	{
	return do_entry(ctx, file, buf, count, 1);
	}

	int trace_write_entry(struct pt_regs ctx, struct file file,
	char __user *buf, size_t count)
	{
	return do_entry(ctx, file, buf, count, 0);
	}

	"""


	def get_searched_ids(root_directories):
	"""Export the inode numbers of the selected directories."""
	from glob import glob
	inode_to_path = {}
	inodes = "{"
	total_dirs = 0
	for root_directory in root_directories.split(','):
	try:
	searched_dirs = glob(root_directory, recursive=True)
	except TypeError:
	searched_dirs = glob(root_directory)
	if not searched_dirs:
	continue

	for mydir in searched_dirs:
	total_dirs = total_dirs + 1
	# If we pass more than 15 dirs, ebpf program fails
	if total_dirs > 15:
	print('15 directories limit reached')
	break
	inode_id = os.lstat(mydir)[stat.ST_INO]
	if inode_id in inode_to_path:
	if inode_to_path[inode_id] == mydir:
	print('Skipping {} as already considered'.format(mydir))
	else:
	inodes = "{},{}".format(inodes, inode_id)
	inode_to_path[inode_id] = mydir
	print('Considering {} with inode_id {}'.format(mydir, inode_id))

	inodes = inodes + '}'
	if len(inode_to_path) == 0:
	print('Cannot find any valid directory')
	exit()
	return inodes.replace('{,', '{'), inode_to_path


	if args.tgid:
	bpf_text = bpf_text.replace('TGID_FILTER', 'tgid != %d' % args.tgid)
	else:
	bpf_text = bpf_text.replace('TGID_FILTER', '0')

	inodes, inodes_to_path = get_searched_ids(args.rootdirs)
	bpf_text = bpf_text.replace("DIRECTORY_INODES", inodes)
	bpf_text = bpf_text.replace(
	"INODES_NUMBER", '{}'.format(len(inodes.split(','))))

	if debug or args.ebpf:
	print(bpf_text)
	if args.ebpf:
	exit()

	# initialize BPF
	b = BPF(text=bpf_text)
	b.attach_kprobe(event="vfs_read", fn_name="trace_read_entry")
	b.attach_kprobe(event="vfs_write", fn_name="trace_write_entry")

	DNAME_INLINE_LEN = 32 # linux/dcache.h

	print('Tracing... Output every %d secs. Hit Ctrl-C to end' % interval)


	def sort_fn(counts):
	"""Define how to sort the columns"""
	if args.sort == "all":
	return (counts[1].rbytes + counts[1].wbytes + counts[1].reads + counts[1].writes)
	else:
	return getattr(counts[1], args.sort)


	# output
	exiting = 0
	while 1:
	try:
	sleep(interval)
	except KeyboardInterrupt:
	exiting = 1

	# header
	if clear:
	call("clear")
	else:
	print()
	with open(loadavg) as stats:
	print("%-8s loadavg: %s" % (strftime("%H:%M:%S"), stats.read()))

	print("%-6s %-6s %-8s %-8s %s" %
	("READS", "WRITES", "R_Kb", "W_Kb", "PATH"))
	# by-TID output
	counts = b.get_table("counts")
	line = 0
	reads = {}
	writes = {}
	reads_Kb = {}
	writes_Kb = {}
	for k, v in reversed(sorted(counts.items(),
	key=sort_fn)):
	# If it's the first time we see this inode
	if k.inode_id not in reads:
	# let's create a new entry
	reads[k.inode_id] = v.reads
	writes[k.inode_id] = v.writes
	reads_Kb[k.inode_id] = v.rbytes / 1024
	writes_Kb[k.inode_id] = v.wbytes / 1024
	else:
	# unless add the current performance metrics
	# to the previous ones
	reads[k.inode_id] += v.reads
	writes[k.inode_id] += v.writes
	reads_Kb[k.inode_id] += v.rbytes / 1024
	writes_Kb[k.inode_id] += v.wbytes / 1024

	for node_id in reads:
	print("%-6d %-6d %-8d %-8d %s" %
	(reads[node_id], writes[node_id], reads_Kb[node_id], writes_Kb[node_id], inodes_to_path[node_id]))
	line += 1
	if line >= maxrows:
	break

	counts.clear()

	countdown -= 1
	if exiting or countdown == 0:
	print("Detaching...")
	exit()