tools/runqlen.py - platform/external/bcc - Git at Google

 #!/usr/bin/env python
 # @lint-avoid-python-3-compatibility-imports
 #
 # runqlen    Summarize scheduler run queue length as a histogram.
 #            For Linux, uses BCC, eBPF.
 #
 # This counts the length of the run queue, excluding the currently running
 # thread, and shows it as a histogram.
 #
 # Also answers run queue occupancy.
 #
 # USAGE: runqlen [-h] [-T] [-Q] [-m] [-D] [interval] [count]
 #
 # REQUIRES: Linux 4.9+ (BPF_PROG_TYPE_PERF_EVENT support). Under tools/old is
 # a version of this tool that may work on Linux 4.6 - 4.8.
 #
 # Copyright 2016 Netflix, Inc.
 # Licensed under the Apache License, Version 2.0 (the "License")
 #
 # 12-Dec-2016   Brendan Gregg   Created this.

 from __future__ import print_function
 from bcc import BPF, PerfType, PerfSWConfig
 from time import sleep, strftime
 from tempfile import NamedTemporaryFile
 from os import open, close, dup, unlink, O_WRONLY
 import argparse

 # arguments
 examples = """examples:
     ./runqlen            # summarize run queue length as a histogram
     ./runqlen 1 10       # print 1 second summaries, 10 times
     ./runqlen -T 1       # 1s summaries and timestamps
     ./runqlen -O         # report run queue occupancy
     ./runqlen -C         # show each CPU separately
 """
 parser = argparse.ArgumentParser(
     description="Summarize scheduler run queue length as a histogram",
     formatter_class=argparse.RawDescriptionHelpFormatter,
     epilog=examples)
 parser.add_argument("-T", "--timestamp", action="store_true",
     help="include timestamp on output")
 parser.add_argument("-O", "--runqocc", action="store_true",
     help="report run queue occupancy")
 parser.add_argument("-C", "--cpus", action="store_true",
     help="print output for each CPU separately")
 parser.add_argument("interval", nargs="?", default=99999999,
     help="output interval, in seconds")
 parser.add_argument("count", nargs="?", default=99999999,
     help="number of outputs")
 parser.add_argument("--ebpf", action="store_true",
     help=argparse.SUPPRESS)
 args = parser.parse_args()
 countdown = int(args.count)
 debug = 0
 frequency = 99

 # Linux 4.15 introduced a new field runnable_weight
 # in linux_src:kernel/sched/sched.h as
 #     struct cfs_rq {
 #         struct load_weight load;
 #         unsigned long runnable_weight;
 #         unsigned int nr_running, h_nr_running;
 #         ......
 #     }
 # and this tool requires to access nr_running to get
 # runqueue len information.
 #
 # The commit which introduces cfs_rq->runnable_weight
 # field also introduces the field sched_entity->runnable_weight
 # where sched_entity is defined in linux_src:include/linux/sched.h.
 #
 # To cope with pre-4.15 and 4.15/post-4.15 releases,
 # we run a simple BPF program to detect whether
 # field sched_entity->runnable_weight exists. The existence of
 # this field should infer the existence of cfs_rq->runnable_weight.
 #
 # This will need maintenance as the relationship between these
 # two fields may change in the future.
 #
 def check_runnable_weight_field():
     # Define the bpf program for checking purpose
     bpf_check_text = """
 #include <linux/sched.h>
 unsigned long dummy(struct sched_entity *entity)
 {
     return entity->runnable_weight;
 }
 """

     # Get a temporary file name
     tmp_file = NamedTemporaryFile(delete=False)
     tmp_file.close();

     # Duplicate and close stderr (fd = 2)
     old_stderr = dup(2)
     close(2)

     # Open a new file, should get fd number 2
     # This will avoid printing llvm errors on the screen
     fd = open(tmp_file.name, O_WRONLY)
     try:
         t = BPF(text=bpf_check_text)
         success_compile = True
     except:
         success_compile = False

     # Release the fd 2, and next dup should restore old stderr
     close(fd)
     dup(old_stderr)
     close(old_stderr)

     # remove the temporary file and return
     unlink(tmp_file.name)
     return success_compile


 # define BPF program
 bpf_text = """
 #include <uapi/linux/ptrace.h>
 #include <linux/sched.h>

 // Declare enough of cfs_rq to find nr_running, since we can't #import the
 // header. This will need maintenance. It is from kernel/sched/sched.h:
 struct cfs_rq_partial {
     struct load_weight load;
     RUNNABLE_WEIGHT_FIELD
     unsigned int nr_running, h_nr_running;
 };

 typedef struct cpu_key {
     int cpu;
     unsigned int slot;
 } cpu_key_t;
 STORAGE

 int do_perf_event()
 {
     unsigned int len = 0;
     pid_t pid = 0;
     struct task_struct *task = NULL;
     struct cfs_rq_partial *my_q = NULL;

     // Fetch the run queue length from task->se.cfs_rq->nr_running. This is an
     // unstable interface and may need maintenance. Perhaps a future version
     // of BPF will support task_rq(p) or something similar as a more reliable
     // interface.
     task = (struct task_struct *)bpf_get_current_task();
     my_q = (struct cfs_rq_partial *)task->se.cfs_rq;
     len = my_q->nr_running;

     // Calculate run queue length by subtracting the currently running task,
     // if present. len 0 == idle, len 1 == one running task.
     if (len > 0)
         len--;

     STORE

     return 0;
 }
 """

 # code substitutions
 if args.cpus:
     bpf_text = bpf_text.replace('STORAGE',
         'BPF_HISTOGRAM(dist, cpu_key_t);')
     bpf_text = bpf_text.replace('STORE', 'cpu_key_t key = {.slot = len}; ' +
         'key.cpu = bpf_get_smp_processor_id(); ' +
         'dist.increment(key);')
 else:
     bpf_text = bpf_text.replace('STORAGE',
         'BPF_HISTOGRAM(dist, unsigned int);')
     bpf_text = bpf_text.replace('STORE', 'dist.increment(len);')

 if check_runnable_weight_field():
     bpf_text = bpf_text.replace('RUNNABLE_WEIGHT_FIELD', 'unsigned long runnable_weight;')
 else:
     bpf_text = bpf_text.replace('RUNNABLE_WEIGHT_FIELD', '')

 if debug or args.ebpf:
     print(bpf_text)
     if args.ebpf:
         exit()

 # initialize BPF & perf_events
 b = BPF(text=bpf_text)
 b.attach_perf_event(ev_type=PerfType.SOFTWARE,
     ev_config=PerfSWConfig.CPU_CLOCK, fn_name="do_perf_event",
     sample_period=0, sample_freq=frequency)

 print("Sampling run queue length... Hit Ctrl-C to end.")

 # output
 exiting = 0 if args.interval else 1
 dist = b.get_table("dist")
 while (1):
     try:
         sleep(int(args.interval))
     except KeyboardInterrupt:
         exiting = 1

     print()
     if args.timestamp:
         print("%-8s\n" % strftime("%H:%M:%S"), end="")

     if args.runqocc:
         if args.cpus:
             # run queue occupancy, per-CPU summary
             idle = {}
             queued = {}
             cpumax = 0
             for k, v in dist.items():
                 if k.cpu > cpumax:
                     cpumax = k.cpu
             for c in range(0, cpumax + 1):
                 idle[c] = 0
                 queued[c] = 0
             for k, v in dist.items():
                 if k.slot == 0:
                     idle[k.cpu] += v.value
                 else:
                     queued[k.cpu] += v.value
             for c in range(0, cpumax + 1):
                 samples = idle[c] + queued[c]
                 if samples:
                     runqocc = float(queued[c]) / samples
                 else:
                     runqocc = 0
                 print("runqocc, CPU %-3d %6.2f%%" % (c, 100 * runqocc))

         else:
             # run queue occupancy, system-wide summary
             idle = 0
             queued = 0
             for k, v in dist.items():
                 if k.value == 0:
                     idle += v.value
                 else:
                     queued += v.value
             samples = idle + queued
             if samples:
                 runqocc = float(queued) / samples
             else:
                 runqocc = 0
             print("runqocc: %0.2f%%" % (100 * runqocc))

     else:
         # run queue length histograms
         dist.print_linear_hist("runqlen", "cpu")

     dist.clear()

     countdown -= 1
     if exiting or countdown == 0:
         exit()
	#!/usr/bin/env python
	# @lint-avoid-python-3-compatibility-imports
	#
	# runqlen Summarize scheduler run queue length as a histogram.
	# For Linux, uses BCC, eBPF.
	#
	# This counts the length of the run queue, excluding the currently running
	# thread, and shows it as a histogram.
	#
	# Also answers run queue occupancy.
	#
	# USAGE: runqlen [-h] [-T] [-Q] [-m] [-D] [interval] [count]
	#
	# REQUIRES: Linux 4.9+ (BPF_PROG_TYPE_PERF_EVENT support). Under tools/old is
	# a version of this tool that may work on Linux 4.6 - 4.8.
	#
	# Copyright 2016 Netflix, Inc.
	# Licensed under the Apache License, Version 2.0 (the "License")
	#
	# 12-Dec-2016 Brendan Gregg Created this.

	from __future__ import print_function
	from bcc import BPF, PerfType, PerfSWConfig
	from time import sleep, strftime
	from tempfile import NamedTemporaryFile
	from os import open, close, dup, unlink, O_WRONLY
	import argparse

	# arguments
	examples = """examples:
	./runqlen # summarize run queue length as a histogram
	./runqlen 1 10 # print 1 second summaries, 10 times
	./runqlen -T 1 # 1s summaries and timestamps
	./runqlen -O # report run queue occupancy
	./runqlen -C # show each CPU separately
	"""
	parser = argparse.ArgumentParser(
	description="Summarize scheduler run queue length as a histogram",
	formatter_class=argparse.RawDescriptionHelpFormatter,
	epilog=examples)
	parser.add_argument("-T", "--timestamp", action="store_true",
	help="include timestamp on output")
	parser.add_argument("-O", "--runqocc", action="store_true",
	help="report run queue occupancy")
	parser.add_argument("-C", "--cpus", action="store_true",
	help="print output for each CPU separately")
	parser.add_argument("interval", nargs="?", default=99999999,
	help="output interval, in seconds")
	parser.add_argument("count", nargs="?", default=99999999,
	help="number of outputs")
	parser.add_argument("--ebpf", action="store_true",
	help=argparse.SUPPRESS)
	args = parser.parse_args()
	countdown = int(args.count)
	debug = 0
	frequency = 99

	# Linux 4.15 introduced a new field runnable_weight
	# in linux_src:kernel/sched/sched.h as
	# struct cfs_rq {
	# struct load_weight load;
	# unsigned long runnable_weight;
	# unsigned int nr_running, h_nr_running;
	# ......
	# }
	# and this tool requires to access nr_running to get
	# runqueue len information.
	#
	# The commit which introduces cfs_rq->runnable_weight
	# field also introduces the field sched_entity->runnable_weight
	# where sched_entity is defined in linux_src:include/linux/sched.h.
	#
	# To cope with pre-4.15 and 4.15/post-4.15 releases,
	# we run a simple BPF program to detect whether
	# field sched_entity->runnable_weight exists. The existence of
	# this field should infer the existence of cfs_rq->runnable_weight.
	#
	# This will need maintenance as the relationship between these
	# two fields may change in the future.
	#
	def check_runnable_weight_field():
	# Define the bpf program for checking purpose
	bpf_check_text = """
	#include <linux/sched.h>
	unsigned long dummy(struct sched_entity *entity)
	{
	return entity->runnable_weight;
	}
	"""

	# Get a temporary file name
	tmp_file = NamedTemporaryFile(delete=False)
	tmp_file.close();

	# Duplicate and close stderr (fd = 2)
	old_stderr = dup(2)
	close(2)

	# Open a new file, should get fd number 2
	# This will avoid printing llvm errors on the screen
	fd = open(tmp_file.name, O_WRONLY)
	try:
	t = BPF(text=bpf_check_text)
	success_compile = True
	except:
	success_compile = False

	# Release the fd 2, and next dup should restore old stderr
	close(fd)
	dup(old_stderr)
	close(old_stderr)

	# remove the temporary file and return
	unlink(tmp_file.name)
	return success_compile


	# define BPF program
	bpf_text = """
	#include <uapi/linux/ptrace.h>
	#include <linux/sched.h>

	// Declare enough of cfs_rq to find nr_running, since we can't #import the
	// header. This will need maintenance. It is from kernel/sched/sched.h:
	struct cfs_rq_partial {
	struct load_weight load;
	RUNNABLE_WEIGHT_FIELD
	unsigned int nr_running, h_nr_running;
	};

	typedef struct cpu_key {
	int cpu;
	unsigned int slot;
	} cpu_key_t;
	STORAGE

	int do_perf_event()
	{
	unsigned int len = 0;
	pid_t pid = 0;
	struct task_struct *task = NULL;
	struct cfs_rq_partial *my_q = NULL;

	// Fetch the run queue length from task->se.cfs_rq->nr_running. This is an
	// unstable interface and may need maintenance. Perhaps a future version
	// of BPF will support task_rq(p) or something similar as a more reliable
	// interface.
	task = (struct task_struct *)bpf_get_current_task();
	my_q = (struct cfs_rq_partial *)task->se.cfs_rq;
	len = my_q->nr_running;

	// Calculate run queue length by subtracting the currently running task,
	// if present. len 0 == idle, len 1 == one running task.
	if (len > 0)
	len--;

	STORE

	return 0;
	}
	"""

	# code substitutions
	if args.cpus:
	bpf_text = bpf_text.replace('STORAGE',
	'BPF_HISTOGRAM(dist, cpu_key_t);')
	bpf_text = bpf_text.replace('STORE', 'cpu_key_t key = {.slot = len}; ' +
	'key.cpu = bpf_get_smp_processor_id(); ' +
	'dist.increment(key);')
	else:
	bpf_text = bpf_text.replace('STORAGE',
	'BPF_HISTOGRAM(dist, unsigned int);')
	bpf_text = bpf_text.replace('STORE', 'dist.increment(len);')

	if check_runnable_weight_field():
	bpf_text = bpf_text.replace('RUNNABLE_WEIGHT_FIELD', 'unsigned long runnable_weight;')
	else:
	bpf_text = bpf_text.replace('RUNNABLE_WEIGHT_FIELD', '')

	if debug or args.ebpf:
	print(bpf_text)
	if args.ebpf:
	exit()

	# initialize BPF & perf_events
	b = BPF(text=bpf_text)
	b.attach_perf_event(ev_type=PerfType.SOFTWARE,
	ev_config=PerfSWConfig.CPU_CLOCK, fn_name="do_perf_event",
	sample_period=0, sample_freq=frequency)

	print("Sampling run queue length... Hit Ctrl-C to end.")

	# output
	exiting = 0 if args.interval else 1
	dist = b.get_table("dist")
	while (1):
	try:
	sleep(int(args.interval))
	except KeyboardInterrupt:
	exiting = 1

	print()
	if args.timestamp:
	print("%-8s\n" % strftime("%H:%M:%S"), end="")

	if args.runqocc:
	if args.cpus:
	# run queue occupancy, per-CPU summary
	idle = {}
	queued = {}
	cpumax = 0
	for k, v in dist.items():
	if k.cpu > cpumax:
	cpumax = k.cpu
	for c in range(0, cpumax + 1):
	idle[c] = 0
	queued[c] = 0
	for k, v in dist.items():
	if k.slot == 0:
	idle[k.cpu] += v.value
	else:
	queued[k.cpu] += v.value
	for c in range(0, cpumax + 1):
	samples = idle[c] + queued[c]
	if samples:
	runqocc = float(queued[c]) / samples
	else:
	runqocc = 0
	print("runqocc, CPU %-3d %6.2f%%" % (c, 100 * runqocc))

	else:
	# run queue occupancy, system-wide summary
	idle = 0
	queued = 0
	for k, v in dist.items():
	if k.value == 0:
	idle += v.value
	else:
	queued += v.value
	samples = idle + queued
	if samples:
	runqocc = float(queued) / samples
	else:
	runqocc = 0
	print("runqocc: %0.2f%%" % (100 * runqocc))

	else:
	# run queue length histograms
	dist.print_linear_hist("runqlen", "cpu")

	dist.clear()

	countdown -= 1
	if exiting or countdown == 0:
	exit()