Netqtop 3037 (#3048)
The tool netqtop uses tracepoints NET_DEV_START_XMIT and NETIF_RECEIVE_SKB to intercept every transmitted and received packet, as a result, considerable performance descent is expected.
Details for some performance evaluation can be found at
https://github.com/iovisor/bcc/pull/3048
diff --git a/README.md b/README.md
index 5834062..d54e384 100644
--- a/README.md
+++ b/README.md
@@ -128,6 +128,7 @@
- tools/[memleak](tools/memleak.py): Display outstanding memory allocations to find memory leaks. [Examples](tools/memleak_example.txt).
- tools/[mountsnoop](tools/mountsnoop.py): Trace mount and umount syscalls system-wide. [Examples](tools/mountsnoop_example.txt).
- tools/[mysqld_qslower](tools/mysqld_qslower.py): Trace MySQL server queries slower than a threshold. [Examples](tools/mysqld_qslower_example.txt).
+- tools/[netqtop](tools/netqtop.py) tools/[netqtop.c](tools/netqtop.c): Trace and display packets distribution on NIC queues. [Examples](tools/netqtop_example.txt).
- tools/[nfsslower](tools/nfsslower.py): Trace slow NFS operations. [Examples](tools/nfsslower_example.txt).
- tools/[nfsdist](tools/nfsdist.py): Summarize NFS operation latency distribution as a histogram. [Examples](tools/nfsdist_example.txt).
- tools/[offcputime](tools/offcputime.py): Summarize off-CPU time by kernel stack trace. [Examples](tools/offcputime_example.txt).
diff --git a/man/man8/netqtop.8 b/man/man8/netqtop.8
new file mode 100644
index 0000000..bfa34d1
--- /dev/null
+++ b/man/man8/netqtop.8
@@ -0,0 +1,56 @@
+.TH netqtop 8 "2020-07-30" "USER COMMANDS"
+.SH NAME
+netqtop \- Summarize PPS, BPS, average size of packets and packet counts ordered by packet sizes
+on each queue of a network interface.
+.SH SYNOPSIS
+.B netqtop [\-n nic] [\-i interval] [\-t throughput]
+.SH DESCRIPTION
+netqtop accounts statistics of both transmitted and received packets on each queue of
+a specified network interface to help developers check if its traffic load is balanced.
+The result is displayed as a table with columns of PPS, BPS, average size and
+packet counts in range [0,64), [64, 5120), [512, 2048), [2048, 16K), [16K, 64K).
+This is printed every given interval (default 1) in seconds.
+
+The tool uses the net:net_dev_start_xmit and net:netif_receive_skb kernel tracepoints.
+Since it uses tracepoint, the tool only works on Linux 4.7+.
+
+netqtop introduces significant overhead while network traffic is large. See OVERHEAD
+section below.
+
+.SH REQUIREMENTS
+CONFIG_bpf and bcc
+.SH OPTIONS
+.TP
+\-n NIC
+Specify the network interface card
+.TP
+\-i INTERVAL
+Print results every INTERVAL seconds.
+The default value is 1.
+.TP
+\-t THROUGHPUT
+Print BPS and PPS of each queue.
+.SH EXAMPLES
+.TP
+Account statistics of eth0 and output every 2 seconds:
+#
+.B netqtop -n eth0 -i 1
+.SH OVERHEAD
+In performance test, netqtop introduces a overhead up to 30% PPS drop
+while printing interval is set to 1 second. So be mindful of potential packet drop
+when using this tool.
+
+It also increases ping-pong latency by about 1 usec.
+.SH SOURCE
+This is from bcc
+.IP
+https://github.com/iovisor/bcc
+.PP
+Also look in the bcc distribution for a netqtop_example.txt file containing
+example usage, output and commentary for this tool.
+.SH OS
+Linux
+.SH STABILITY
+Unstable - in development
+.SH AUTHOR
+Yolandajn
diff --git a/tools/netqtop.c b/tools/netqtop.c
new file mode 100644
index 0000000..52605dd
--- /dev/null
+++ b/tools/netqtop.c
@@ -0,0 +1,113 @@
+
+#include <linux/netdevice.h>
+#include <linux/ethtool.h>
+#if IFNAMSIZ != 16
+#error "IFNAMSIZ != 16 is not supported"
+#endif
+#define MAX_QUEUE_NUM 1024
+
+/**
+* This union is use to store name of the specified interface
+* and read it as two different data types
+*/
+union name_buf{
+ char name[IFNAMSIZ];
+ struct {
+ u64 hi;
+ u64 lo;
+ }name_int;
+};
+
+/* data retrieved in tracepoints */
+struct queue_data{
+ u64 total_pkt_len;
+ u32 num_pkt;
+ u32 size_64B;
+ u32 size_512B;
+ u32 size_2K;
+ u32 size_16K;
+ u32 size_64K;
+};
+
+/* array of length 1 for device name */
+BPF_ARRAY(name_map, union name_buf, 1);
+/* table for transmit & receive packets */
+BPF_HASH(tx_q, u16, struct queue_data, MAX_QUEUE_NUM);
+BPF_HASH(rx_q, u16, struct queue_data, MAX_QUEUE_NUM);
+
+static inline int name_filter(struct sk_buff* skb){
+ /* get device name from skb */
+ union name_buf real_devname;
+ struct net_device *dev;
+ bpf_probe_read(&dev, sizeof(skb->dev), ((char *)skb + offsetof(struct sk_buff, dev)));
+ bpf_probe_read(&real_devname, IFNAMSIZ, dev->name);
+
+ int key=0;
+ union name_buf *leaf = name_map.lookup(&key);
+ if(!leaf){
+ return 0;
+ }
+ if((leaf->name_int).hi != real_devname.name_int.hi || (leaf->name_int).lo != real_devname.name_int.lo){
+ return 0;
+ }
+
+ return 1;
+}
+
+static void updata_data(struct queue_data *data, u64 len){
+ data->total_pkt_len += len;
+ data->num_pkt ++;
+ if(len / 64 == 0){
+ data->size_64B ++;
+ }
+ else if(len / 512 == 0){
+ data->size_512B ++;
+ }
+ else if(len / 2048 == 0){
+ data->size_2K ++;
+ }
+ else if(len / 16384 == 0){
+ data->size_16K ++;
+ }
+ else if(len / 65536 == 0){
+ data->size_64K ++;
+ }
+}
+
+TRACEPOINT_PROBE(net, net_dev_start_xmit){
+ /* read device name */
+ struct sk_buff* skb = (struct sk_buff*)args->skbaddr;
+ if(!name_filter(skb)){
+ return 0;
+ }
+
+ /* update table */
+ u16 qid = skb->queue_mapping;
+ struct queue_data newdata;
+ __builtin_memset(&newdata, 0, sizeof(newdata));
+ struct queue_data *data = tx_q.lookup_or_try_init(&qid, &newdata);
+ if(!data){
+ return 0;
+ }
+ updata_data(data, skb->len);
+
+ return 0;
+}
+
+TRACEPOINT_PROBE(net, netif_receive_skb){
+ struct sk_buff* skb = (struct sk_buff*)args->skbaddr;
+ if(!name_filter(skb)){
+ return 0;
+ }
+
+ u16 qid = skb->queue_mapping;
+ struct queue_data newdata;
+ __builtin_memset(&newdata, 0, sizeof(newdata));
+ struct queue_data *data = rx_q.lookup_or_try_init(&qid, &newdata);
+ if(!data){
+ return 0;
+ }
+ updata_data(data, skb->len);
+
+ return 0;
+}
diff --git a/tools/netqtop.py b/tools/netqtop.py
new file mode 100755
index 0000000..e2823ac
--- /dev/null
+++ b/tools/netqtop.py
@@ -0,0 +1,218 @@
+#!/usr/bin/python
+
+from bcc import BPF
+from ctypes import *
+import argparse
+import os
+from time import sleep,time,localtime,asctime
+import types
+
+# pre defines -------------------------------
+ROOT_PATH = "/sys/class/net"
+IFNAMSIZ = 16
+COL_WIDTH = 10
+MAX_QUEUE_NUM = 1024
+EBPF_FILE = "netqtop.c"
+
+# structure for network interface name array
+class Devname(Structure):
+ _fields_=[
+ ('name', c_char*IFNAMSIZ)
+ ]
+
+################## printer for results ###################
+def to_str(num):
+ s = ""
+ if num > 1000000:
+ return str(round(num/(1024*1024.0), 2)) + 'M'
+ elif num > 1000:
+ return str(round(num/1024.0, 2)) + 'K'
+ else:
+ if type(num) == types.FloatType:
+ return str(round(num, 2))
+ else:
+ return str(num)
+
+def print_table(table, qnum):
+ global print_interval
+
+ # ---- print headers ----------------
+ headers = [
+ "QueueID",
+ "avg_size",
+ "[0, 64)",
+ "[64, 512)",
+ "[512, 2K)",
+ "[2K, 16K)",
+ "[16K, 64K)"
+ ]
+ if args.throughput:
+ headers.append("BPS")
+ headers.append("PPS")
+
+ for hd in headers:
+ print(hd.center(COL_WIDTH)),
+ print
+
+ # ------- calculates --------------
+ qids=[]
+ tBPS = 0
+ tPPS = 0
+ tAVG = 0
+ tGroup = [0,0,0,0,0]
+ tpkt = 0
+ tlen = 0
+ for k, v in table.items():
+ qids += [k.value]
+ tlen += v.total_pkt_len
+ tpkt += v.num_pkt
+ tGroup[0] += v.size_64B
+ tGroup[1] += v.size_512B
+ tGroup[2] += v.size_2K
+ tGroup[3] += v.size_16K
+ tGroup[4] += v.size_64K
+ tBPS = tlen / print_interval
+ tPPS = tpkt / print_interval
+ if tpkt != 0:
+ tAVG = tlen / tpkt
+
+ # -------- print table --------------
+ for k in range(qnum):
+ if k in qids:
+ item = table[c_ushort(k)]
+ data = [
+ k,
+ item.total_pkt_len,
+ item.num_pkt,
+ item.size_64B,
+ item.size_512B,
+ item.size_2K,
+ item.size_16K,
+ item.size_64K
+ ]
+ else:
+ data = [k,0,0,0,0,0,0,0]
+
+ # print a line per queue
+ avg = 0
+ if data[2] != 0:
+ avg = data[1] / data[2]
+ print("%5d %11s %10s %10s %10s %10s %10s" % (
+ data[0],
+ to_str(avg),
+ to_str(data[3]),
+ to_str(data[4]),
+ to_str(data[5]),
+ to_str(data[6]),
+ to_str(data[7])
+ )),
+ if args.throughput:
+ BPS = data[1] / print_interval
+ PPS = data[2] / print_interval
+ print("%10s %10s" % (
+ to_str(BPS),
+ to_str(PPS)
+ ))
+ else:
+ print
+
+ # ------- print total --------------
+ print(" Total %10s %10s %10s %10s %10s %10s" % (
+ to_str(tAVG),
+ to_str(tGroup[0]),
+ to_str(tGroup[1]),
+ to_str(tGroup[2]),
+ to_str(tGroup[3]),
+ to_str(tGroup[4])
+ )),
+
+ if args.throughput:
+ print("%10s %10s" % (
+ to_str(tBPS),
+ to_str(tPPS)
+ ))
+ else:
+ print
+
+
+def print_result(b):
+ # --------- print tx queues ---------------
+ print(asctime(localtime(time())))
+ print("TX")
+ table = b['tx_q']
+ print_table(table, tx_num)
+ b['tx_q'].clear()
+
+ # --------- print rx queues ---------------
+ print("")
+ print("RX")
+ table = b['rx_q']
+ print_table(table, rx_num)
+ b['rx_q'].clear()
+ if args.throughput:
+ print("-"*95)
+ else:
+ print("-"*76)
+
+############## specify network interface #################
+parser = argparse.ArgumentParser(description="")
+parser.add_argument("--name", "-n", type=str, default="")
+parser.add_argument("--interval", "-i", type=float, default=1)
+parser.add_argument("--throughput", "-t", action="store_true")
+parser.add_argument("--ebpf", action="store_true", help=argparse.SUPPRESS)
+args = parser.parse_args()
+
+if args.ebpf:
+ with open(EBPF_FILE) as fileobj:
+ progtxt = fileobj.read()
+ print(progtxt)
+ exit()
+
+if args.name == "":
+ print ("Please specify a network interface.")
+ exit()
+else:
+ dev_name = args.name
+
+if len(dev_name) > IFNAMSIZ-1:
+ print ("NIC name too long")
+ exit()
+
+print_interval = args.interval + 0.0
+if print_interval == 0:
+ print "print interval must be non-zero"
+ exit()
+
+################ get number of queues #####################
+tx_num = 0
+rx_num = 0
+path = ROOT_PATH + "/" + dev_name + "/queues"
+if not os.path.exists(path):
+ print "Net interface", dev_name, "does not exits."
+ exit()
+
+list = os.listdir(path)
+for s in list:
+ if s[0] == 'r':
+ rx_num += 1
+ if s[0] == 't':
+ tx_num += 1
+
+if tx_num > MAX_QUEUE_NUM or rx_num > MAX_QUEUE_NUM:
+ print "number of queues over 1024 is not supported."
+ exit()
+
+################## start tracing ##################
+b = BPF(src_file = EBPF_FILE)
+# --------- set hash array --------
+devname_map = b['name_map']
+_name = Devname()
+_name.name = dev_name
+devname_map[0] = _name
+
+while 1:
+ try:
+ sleep(print_interval)
+ print_result(b)
+ except KeyboardInterrupt:
+ exit()
diff --git a/tools/netqtop_example.txt b/tools/netqtop_example.txt
new file mode 100644
index 0000000..443cfb7
--- /dev/null
+++ b/tools/netqtop_example.txt
@@ -0,0 +1,190 @@
+Demonstrations of netqtop.
+
+
+netqtop traces the kernel functions performing packet transmit (xmit_one)
+and packet receive (__netif_receive_skb_core) on data link layer. The tool
+not only traces every packet via a specified network interface, but also accounts
+the PPS, BPS and average size of packets as well as packet amounts (categorized by
+size range) on sending and receiving direction respectively. Results are printed
+as tables, which can be used to understand traffic load allocation on each queue
+of interested network interface to see if it is balanced. And the overall performance
+is provided in the buttom.
+
+For example, suppose you want to know current traffic on lo, and print result
+every second:
+# ./netqtop.py -n lo -i 1
+Thu Sep 10 11:28:39 2020
+TX
+ QueueID avg_size [0, 64) [64, 512) [512, 2K) [2K, 16K) [16K, 64K)
+ 0 88 0 9 0 0 0
+ Total 88 0 9 0 0 0
+
+RX
+ QueueID avg_size [0, 64) [64, 512) [512, 2K) [2K, 16K) [16K, 64K)
+ 0 74 4 5 0 0 0
+ Total 74 4 5 0 0 0
+----------------------------------------------------------------------------
+Thu Sep 10 11:28:40 2020
+TX
+ QueueID avg_size [0, 64) [64, 512) [512, 2K) [2K, 16K) [16K, 64K)
+ 0 233 0 3 1 0 0
+ Total 233 0 3 1 0 0
+
+RX
+ QueueID avg_size [0, 64) [64, 512) [512, 2K) [2K, 16K) [16K, 64K)
+ 0 219 2 1 1 0 0
+ Total 219 2 1 1 0 0
+----------------------------------------------------------------------------
+
+or you can just use the default mode
+# ./netqtop.py -n lo
+Thu Sep 10 11:27:45 2020
+TX
+ QueueID avg_size [0, 64) [64, 512) [512, 2K) [2K, 16K) [16K, 64K)
+ 0 92 0 7 0 0 0
+ Total 92 0 7 0 0 0
+
+RX
+ QueueID avg_size [0, 64) [64, 512) [512, 2K) [2K, 16K) [16K, 64K)
+ 0 78 3 4 0 0 0
+ Total 78 3 4 0 0 0
+----------------------------------------------------------------------------
+Thu Sep 10 11:27:46 2020
+TX
+ QueueID avg_size [0, 64) [64, 512) [512, 2K) [2K, 16K) [16K, 64K)
+ 0 179 0 5 1 0 0
+ Total 179 0 5 1 0 0
+
+RX
+ QueueID avg_size [0, 64) [64, 512) [512, 2K) [2K, 16K) [16K, 64K)
+ 0 165 3 2 1 0 0
+ Total 165 3 2 1 0 0
+----------------------------------------------------------------------------
+
+This NIC only has 1 queue.
+If you want the tool to print results after a longer interval, specify seconds with -i:
+# ./netqtop.py -n lo -i 3
+Thu Sep 10 11:31:26 2020
+TX
+ QueueID avg_size [0, 64) [64, 512) [512, 2K) [2K, 16K) [16K, 64K)
+ 0 85 0 11 0 0 0
+ Total 85 0 11 0 0 0
+
+RX
+ QueueID avg_size [0, 64) [64, 512) [512, 2K) [2K, 16K) [16K, 64K)
+ 0 71 5 6 0 0 0
+ Total 71 5 6 0 0 0
+----------------------------------------------------------------------------
+Thu Sep 10 11:31:29 2020
+TX
+ QueueID avg_size [0, 64) [64, 512) [512, 2K) [2K, 16K) [16K, 64K)
+ 0 153 0 7 1 0 0
+ Total 153 0 7 1 0 0
+
+RX
+ QueueID avg_size [0, 64) [64, 512) [512, 2K) [2K, 16K) [16K, 64K)
+ 0 139 4 3 1 0 0
+ Total 139 4 3 1 0 0
+----------------------------------------------------------------------------
+
+To see PPS and BPS of each queue, use -t:
+# ./netqtop.py -n lo -i 1 -t
+Thu Sep 10 11:37:02 2020
+TX
+ QueueID avg_size [0, 64) [64, 512) [512, 2K) [2K, 16K) [16K, 64K) BPS PPS
+ 0 114 0 10 0 0 0 1.11K 10.0
+ Total 114 0 10 0 0 0 1.11K 10.0
+
+RX
+ QueueID avg_size [0, 64) [64, 512) [512, 2K) [2K, 16K) [16K, 64K) BPS PPS
+ 0 100 4 6 0 0 0 1000.0 10.0
+ Total 100 4 6 0 0 0 1000.0 10.0
+-----------------------------------------------------------------------------------------------
+Thu Sep 10 11:37:03 2020
+TX
+ QueueID avg_size [0, 64) [64, 512) [512, 2K) [2K, 16K) [16K, 64K) BPS PPS
+ 0 271 0 3 1 0 0 1.06K 4.0
+ Total 271 0 3 1 0 0 1.06K 4.0
+
+RX
+ QueueID avg_size [0, 64) [64, 512) [512, 2K) [2K, 16K) [16K, 64K) BPS PPS
+ 0 257 2 1 1 0 0 1.0K 4.0
+ Total 257 2 1 1 0 0 1.0K 4.0
+-----------------------------------------------------------------------------------------------
+
+When filtering multi-queue NICs, you do not need to specify the number of queues,
+the tool calculates it for you:
+# ./netqtop.py -n eth0 -t
+Thu Sep 10 11:39:21 2020
+TX
+ QueueID avg_size [0, 64) [64, 512) [512, 2K) [2K, 16K) [16K, 64K) BPS PPS
+ 0 0 0 0 0 0 0 0.0 0.0
+ 1 0 0 0 0 0 0 0.0 0.0
+ 2 0 0 0 0 0 0 0.0 0.0
+ 3 0 0 0 0 0 0 0.0 0.0
+ 4 0 0 0 0 0 0 0.0 0.0
+ 5 0 0 0 0 0 0 0.0 0.0
+ 6 0 0 0 0 0 0 0.0 0.0
+ 7 0 0 0 0 0 0 0.0 0.0
+ 8 54 2 0 0 0 0 108.0 2.0
+ 9 161 0 9 0 0 0 1.42K 9.0
+ 10 0 0 0 0 0 0 0.0 0.0
+ 11 0 0 0 0 0 0 0.0 0.0
+ 12 0 0 0 0 0 0 0.0 0.0
+ 13 0 0 0 0 0 0 0.0 0.0
+ 14 0 0 0 0 0 0 0.0 0.0
+ 15 0 0 0 0 0 0 0.0 0.0
+ 16 0 0 0 0 0 0 0.0 0.0
+ 17 0 0 0 0 0 0 0.0 0.0
+ 18 0 0 0 0 0 0 0.0 0.0
+ 19 0 0 0 0 0 0 0.0 0.0
+ 20 0 0 0 0 0 0 0.0 0.0
+ 21 0 0 0 0 0 0 0.0 0.0
+ 22 0 0 0 0 0 0 0.0 0.0
+ 23 0 0 0 0 0 0 0.0 0.0
+ 24 0 0 0 0 0 0 0.0 0.0
+ 25 0 0 0 0 0 0 0.0 0.0
+ 26 0 0 0 0 0 0 0.0 0.0
+ 27 0 0 0 0 0 0 0.0 0.0
+ 28 0 0 0 0 0 0 0.0 0.0
+ 29 0 0 0 0 0 0 0.0 0.0
+ 30 0 0 0 0 0 0 0.0 0.0
+ 31 0 0 0 0 0 0 0.0 0.0
+ Total 141 2 9 0 0 0 1.52K 11.0
+
+RX
+ QueueID avg_size [0, 64) [64, 512) [512, 2K) [2K, 16K) [16K, 64K) BPS PPS
+ 0 127 3 9 0 0 0 1.5K 12.0
+ 1 0 0 0 0 0 0 0.0 0.0
+ 2 0 0 0 0 0 0 0.0 0.0
+ 3 0 0 0 0 0 0 0.0 0.0
+ 4 0 0 0 0 0 0 0.0 0.0
+ 5 0 0 0 0 0 0 0.0 0.0
+ 6 0 0 0 0 0 0 0.0 0.0
+ 7 0 0 0 0 0 0 0.0 0.0
+ 8 0 0 0 0 0 0 0.0 0.0
+ 9 0 0 0 0 0 0 0.0 0.0
+ 10 0 0 0 0 0 0 0.0 0.0
+ 11 0 0 0 0 0 0 0.0 0.0
+ 12 0 0 0 0 0 0 0.0 0.0
+ 13 0 0 0 0 0 0 0.0 0.0
+ 14 0 0 0 0 0 0 0.0 0.0
+ 15 0 0 0 0 0 0 0.0 0.0
+ 16 0 0 0 0 0 0 0.0 0.0
+ 17 0 0 0 0 0 0 0.0 0.0
+ 18 0 0 0 0 0 0 0.0 0.0
+ 19 0 0 0 0 0 0 0.0 0.0
+ 20 0 0 0 0 0 0 0.0 0.0
+ 21 0 0 0 0 0 0 0.0 0.0
+ 22 0 0 0 0 0 0 0.0 0.0
+ 23 0 0 0 0 0 0 0.0 0.0
+ 24 0 0 0 0 0 0 0.0 0.0
+ 25 0 0 0 0 0 0 0.0 0.0
+ 26 0 0 0 0 0 0 0.0 0.0
+ 27 0 0 0 0 0 0 0.0 0.0
+ 28 0 0 0 0 0 0 0.0 0.0
+ 29 0 0 0 0 0 0 0.0 0.0
+ 30 0 0 0 0 0 0 0.0 0.0
+ 31 0 0 0 0 0 0 0.0 0.0
+ Total 127 3 9 0 0 0 1.5K 12.0
+-----------------------------------------------------------------------------------------------
\ No newline at end of file