Netqtop 3037 (#3048)

The tool netqtop uses tracepoints NET_DEV_START_XMIT and NETIF_RECEIVE_SKB to intercept every transmitted and received packet, as a result, considerable performance descent is expected.
Details for some performance evaluation can be found at
   https://github.com/iovisor/bcc/pull/3048
diff --git a/README.md b/README.md
index 5834062..d54e384 100644
--- a/README.md
+++ b/README.md
@@ -128,6 +128,7 @@
 - tools/[memleak](tools/memleak.py): Display outstanding memory allocations to find memory leaks. [Examples](tools/memleak_example.txt).
 - tools/[mountsnoop](tools/mountsnoop.py): Trace mount and umount syscalls system-wide. [Examples](tools/mountsnoop_example.txt).
 - tools/[mysqld_qslower](tools/mysqld_qslower.py): Trace MySQL server queries slower than a threshold. [Examples](tools/mysqld_qslower_example.txt).
+- tools/[netqtop](tools/netqtop.py) tools/[netqtop.c](tools/netqtop.c): Trace and display packets distribution on NIC queues. [Examples](tools/netqtop_example.txt).
 - tools/[nfsslower](tools/nfsslower.py): Trace slow NFS operations. [Examples](tools/nfsslower_example.txt).
 - tools/[nfsdist](tools/nfsdist.py): Summarize NFS operation latency distribution as a histogram. [Examples](tools/nfsdist_example.txt).
 - tools/[offcputime](tools/offcputime.py): Summarize off-CPU time by kernel stack trace. [Examples](tools/offcputime_example.txt).
diff --git a/man/man8/netqtop.8 b/man/man8/netqtop.8
new file mode 100644
index 0000000..bfa34d1
--- /dev/null
+++ b/man/man8/netqtop.8
@@ -0,0 +1,56 @@
+.TH netqtop 8 "2020-07-30" "USER COMMANDS"
+.SH NAME
+netqtop \- Summarize PPS, BPS, average size of packets and packet counts ordered by packet sizes 
+on each queue of a network interface.
+.SH SYNOPSIS
+.B netqtop [\-n nic] [\-i interval] [\-t throughput]
+.SH DESCRIPTION
+netqtop accounts statistics of both transmitted and received packets on each queue of 
+a specified network interface to help developers check if its traffic load is balanced. 
+The result is displayed as a table with columns of PPS, BPS, average size and 
+packet counts in range [0,64), [64, 5120), [512, 2048), [2048, 16K), [16K, 64K). 
+This is printed every given interval (default 1) in seconds.
+
+The tool uses the net:net_dev_start_xmit and net:netif_receive_skb kernel tracepoints. 
+Since it uses tracepoint, the tool only works on Linux 4.7+.
+
+netqtop introduces significant overhead while network traffic is large. See OVERHEAD 
+section below.
+
+.SH REQUIREMENTS
+CONFIG_bpf and bcc
+.SH OPTIONS
+.TP
+\-n NIC
+Specify the network interface card
+.TP
+\-i INTERVAL
+Print results every INTERVAL seconds.
+The default value is 1.
+.TP
+\-t THROUGHPUT
+Print BPS and PPS of each queue.
+.SH EXAMPLES
+.TP
+Account statistics of eth0 and output every 2 seconds:
+#
+.B netqtop -n eth0 -i 1
+.SH OVERHEAD
+In performance test, netqtop introduces a overhead up to 30% PPS drop 
+while printing interval is set to 1 second. So be mindful of potential packet drop 
+when using this tool.
+
+It also increases ping-pong latency by about 1 usec. 
+.SH SOURCE
+This is from bcc
+.IP
+https://github.com/iovisor/bcc
+.PP
+Also look in the bcc distribution for a netqtop_example.txt file containing
+example usage, output and commentary for this tool.
+.SH OS 
+Linux
+.SH STABILITY
+Unstable - in development
+.SH AUTHOR
+Yolandajn
diff --git a/tools/netqtop.c b/tools/netqtop.c
new file mode 100644
index 0000000..52605dd
--- /dev/null
+++ b/tools/netqtop.c
@@ -0,0 +1,113 @@
+
+#include <linux/netdevice.h>
+#include <linux/ethtool.h>
+#if IFNAMSIZ != 16 
+#error "IFNAMSIZ != 16 is not supported"
+#endif
+#define MAX_QUEUE_NUM 1024
+
+/**
+* This union is use to store name of the specified interface
+* and read it as two different data types
+*/
+union name_buf{
+    char name[IFNAMSIZ];
+    struct {
+        u64 hi;
+        u64 lo;
+    }name_int;
+};
+
+/* data retrieved in tracepoints */
+struct queue_data{
+    u64 total_pkt_len;
+    u32 num_pkt;
+    u32 size_64B;
+    u32 size_512B;
+    u32 size_2K;
+    u32 size_16K;
+    u32 size_64K;
+};
+
+/* array of length 1 for device name */
+BPF_ARRAY(name_map, union name_buf, 1);
+/* table for transmit & receive packets */
+BPF_HASH(tx_q, u16, struct queue_data, MAX_QUEUE_NUM);
+BPF_HASH(rx_q, u16, struct queue_data, MAX_QUEUE_NUM);
+
+static inline int name_filter(struct sk_buff* skb){
+    /* get device name from skb */
+    union name_buf real_devname;
+    struct net_device *dev;
+    bpf_probe_read(&dev, sizeof(skb->dev), ((char *)skb + offsetof(struct sk_buff, dev)));
+    bpf_probe_read(&real_devname, IFNAMSIZ, dev->name);
+
+    int key=0;
+    union name_buf *leaf = name_map.lookup(&key);
+    if(!leaf){
+        return 0;
+    }
+    if((leaf->name_int).hi != real_devname.name_int.hi || (leaf->name_int).lo != real_devname.name_int.lo){
+        return 0;
+    }
+
+    return 1;
+}
+
+static void updata_data(struct queue_data *data, u64 len){
+    data->total_pkt_len += len;
+    data->num_pkt ++;
+    if(len / 64 == 0){
+        data->size_64B ++;
+    }
+    else if(len / 512 == 0){
+        data->size_512B ++;
+    }
+    else if(len / 2048 == 0){
+        data->size_2K ++;
+    }
+    else if(len / 16384 == 0){
+        data->size_16K ++;
+    }
+    else if(len / 65536 == 0){
+        data->size_64K ++;
+    }
+}
+
+TRACEPOINT_PROBE(net, net_dev_start_xmit){
+    /* read device name */
+    struct sk_buff* skb = (struct sk_buff*)args->skbaddr;
+    if(!name_filter(skb)){
+        return 0;
+    }
+
+    /* update table */
+    u16 qid = skb->queue_mapping;
+    struct queue_data newdata;
+    __builtin_memset(&newdata, 0, sizeof(newdata));
+    struct queue_data *data = tx_q.lookup_or_try_init(&qid, &newdata);
+    if(!data){
+        return 0;
+    }
+    updata_data(data, skb->len);
+    
+    return 0;
+}
+
+TRACEPOINT_PROBE(net, netif_receive_skb){
+    struct sk_buff* skb = (struct sk_buff*)args->skbaddr;
+    if(!name_filter(skb)){
+        return 0;
+    }
+
+    u16 qid = skb->queue_mapping;
+    struct queue_data newdata;
+    __builtin_memset(&newdata, 0, sizeof(newdata));
+    struct queue_data *data = rx_q.lookup_or_try_init(&qid, &newdata);
+    if(!data){
+        return 0;
+    }
+    updata_data(data, skb->len);
+    
+    return 0;
+}
diff --git a/tools/netqtop.py b/tools/netqtop.py
new file mode 100755
index 0000000..e2823ac
--- /dev/null
+++ b/tools/netqtop.py
@@ -0,0 +1,218 @@
+#!/usr/bin/python
+
+from bcc import BPF
+from ctypes import *
+import argparse
+import os
+from time import sleep,time,localtime,asctime
+import types
+
+# pre defines -------------------------------
+ROOT_PATH = "/sys/class/net"
+IFNAMSIZ = 16
+COL_WIDTH = 10
+MAX_QUEUE_NUM = 1024
+EBPF_FILE = "netqtop.c"
+
+# structure for network interface name array
+class Devname(Structure):
+    _fields_=[
+        ('name', c_char*IFNAMSIZ)
+    ]
+
+################## printer for results ###################
+def to_str(num):
+    s = ""
+    if num > 1000000:
+        return str(round(num/(1024*1024.0), 2)) + 'M'
+    elif num > 1000:
+        return str(round(num/1024.0, 2)) + 'K'
+    else:
+        if type(num) == types.FloatType:
+            return str(round(num, 2))
+        else:
+            return str(num)
+
+def print_table(table, qnum):
+    global print_interval
+
+    # ---- print headers ----------------
+    headers = [
+		"QueueID", 
+		"avg_size", 
+		"[0, 64)", 
+		"[64, 512)", 
+		"[512, 2K)", 
+		"[2K, 16K)",
+		"[16K, 64K)"
+	]
+    if args.throughput:
+        headers.append("BPS")
+        headers.append("PPS")
+
+    for hd in headers:
+		print(hd.center(COL_WIDTH)),
+    print
+
+    # ------- calculates --------------
+    qids=[]
+    tBPS = 0
+    tPPS = 0
+    tAVG = 0
+    tGroup = [0,0,0,0,0]
+    tpkt = 0
+    tlen = 0
+    for k, v in table.items():
+        qids += [k.value]
+        tlen += v.total_pkt_len
+        tpkt += v.num_pkt
+        tGroup[0] += v.size_64B
+        tGroup[1] += v.size_512B
+        tGroup[2] += v.size_2K
+        tGroup[3] += v.size_16K
+        tGroup[4] += v.size_64K
+    tBPS = tlen / print_interval
+    tPPS = tpkt / print_interval
+    if tpkt != 0:
+        tAVG = tlen / tpkt
+
+    # -------- print table --------------
+    for k in range(qnum):
+        if k in qids:
+            item = table[c_ushort(k)]
+            data = [
+                k,
+                item.total_pkt_len,
+                item.num_pkt,
+                item.size_64B,
+                item.size_512B,
+                item.size_2K,
+                item.size_16K,
+                item.size_64K
+            ]
+        else:
+            data = [k,0,0,0,0,0,0,0]
+        
+        # print a line per queue
+        avg = 0
+        if data[2] != 0:
+            avg = data[1] / data[2]
+        print("%5d %11s %10s %10s %10s %10s %10s" % (
+            data[0],
+            to_str(avg),
+            to_str(data[3]),
+            to_str(data[4]),
+            to_str(data[5]),
+            to_str(data[6]),
+            to_str(data[7])
+        )),
+        if args.throughput:
+            BPS = data[1] / print_interval
+            PPS = data[2] / print_interval
+            print("%10s %10s" % (
+                to_str(BPS),
+                to_str(PPS)
+            ))
+        else:
+            print
+    
+    # ------- print total --------------
+    print(" Total %10s %10s %10s %10s %10s %10s" % (
+        to_str(tAVG),
+        to_str(tGroup[0]),
+        to_str(tGroup[1]),
+        to_str(tGroup[2]),
+        to_str(tGroup[3]),
+        to_str(tGroup[4])
+    )),
+
+    if args.throughput:
+        print("%10s %10s" % (
+            to_str(tBPS),
+            to_str(tPPS)
+        ))
+    else:
+        print
+
+
+def print_result(b):
+    # --------- print tx queues ---------------
+    print(asctime(localtime(time())))
+    print("TX")
+    table = b['tx_q']
+    print_table(table, tx_num)
+    b['tx_q'].clear()
+
+    # --------- print rx queues ---------------
+    print("")
+    print("RX")
+    table = b['rx_q']
+    print_table(table, rx_num)
+    b['rx_q'].clear()
+    if args.throughput:
+        print("-"*95)
+    else:
+        print("-"*76)
+
+############## specify network interface #################
+parser = argparse.ArgumentParser(description="")
+parser.add_argument("--name", "-n", type=str, default="")
+parser.add_argument("--interval", "-i", type=float, default=1)
+parser.add_argument("--throughput", "-t", action="store_true")
+parser.add_argument("--ebpf", action="store_true", help=argparse.SUPPRESS)
+args = parser.parse_args()
+
+if args.ebpf:
+    with open(EBPF_FILE) as fileobj:
+        progtxt = fileobj.read()
+        print(progtxt)
+    exit()
+
+if args.name == "":
+	print ("Please specify a network interface.")
+	exit()
+else:
+	dev_name = args.name
+
+if len(dev_name) > IFNAMSIZ-1:
+    print ("NIC name too long")
+    exit()
+
+print_interval = args.interval + 0.0
+if print_interval == 0:
+    print "print interval must be non-zero"
+    exit()
+
+################ get number of queues #####################
+tx_num = 0
+rx_num = 0
+path = ROOT_PATH + "/" + dev_name + "/queues"
+if not os.path.exists(path):
+	print "Net interface", dev_name, "does not exits."
+	exit()
+
+list = os.listdir(path)
+for s in list:
+    if s[0] == 'r':
+        rx_num += 1
+    if s[0] == 't':
+        tx_num += 1
+
+if tx_num > MAX_QUEUE_NUM or rx_num > MAX_QUEUE_NUM:
+    print "number of queues over 1024 is not supported."
+    exit()
+
+################## start tracing ##################
+b = BPF(src_file = EBPF_FILE)
+# --------- set hash array --------
+devname_map = b['name_map']
+_name = Devname()
+_name.name = dev_name
+devname_map[0] = _name
+
+while 1:
+    try:
+        sleep(print_interval)
+        print_result(b)
+    except KeyboardInterrupt:
+        exit()
diff --git a/tools/netqtop_example.txt b/tools/netqtop_example.txt
new file mode 100644
index 0000000..443cfb7
--- /dev/null
+++ b/tools/netqtop_example.txt
@@ -0,0 +1,190 @@
+Demonstrations of netqtop.
+
+
+netqtop traces the kernel functions performing packet transmit (xmit_one) 
+and packet receive (__netif_receive_skb_core) on data link layer. The tool 
+not only traces every packet via a specified network interface, but also accounts 
+the PPS, BPS and average size of packets as well as packet amounts (categorized by 
+size range) on sending and receiving direction respectively. Results are printed 
+as tables, which can be used to understand traffic load allocation on each queue 
+of interested network interface to see if it is balanced. And the overall performance 
+is provided in the buttom.
+
+For example, suppose you want to know current traffic on lo, and print result 
+every second:
+# ./netqtop.py -n lo -i 1
+Thu Sep 10 11:28:39 2020
+TX
+ QueueID    avg_size   [0, 64)   [64, 512)  [512, 2K)  [2K, 16K)  [16K, 64K)
+    0          88          0          9          0          0          0
+ Total         88          0          9          0          0          0
+
+RX
+ QueueID    avg_size   [0, 64)   [64, 512)  [512, 2K)  [2K, 16K)  [16K, 64K)
+    0          74          4          5          0          0          0
+ Total         74          4          5          0          0          0
+----------------------------------------------------------------------------
+Thu Sep 10 11:28:40 2020
+TX
+ QueueID    avg_size   [0, 64)   [64, 512)  [512, 2K)  [2K, 16K)  [16K, 64K)
+    0         233          0          3          1          0          0
+ Total        233          0          3          1          0          0
+
+RX
+ QueueID    avg_size   [0, 64)   [64, 512)  [512, 2K)  [2K, 16K)  [16K, 64K)
+    0         219          2          1          1          0          0
+ Total        219          2          1          1          0          0
+----------------------------------------------------------------------------
+
+or you can just use the default mode
+# ./netqtop.py -n lo
+Thu Sep 10 11:27:45 2020
+TX
+ QueueID    avg_size   [0, 64)   [64, 512)  [512, 2K)  [2K, 16K)  [16K, 64K)
+    0          92          0          7          0          0          0
+ Total         92          0          7          0          0          0
+
+RX
+ QueueID    avg_size   [0, 64)   [64, 512)  [512, 2K)  [2K, 16K)  [16K, 64K)
+    0          78          3          4          0          0          0
+ Total         78          3          4          0          0          0
+----------------------------------------------------------------------------
+Thu Sep 10 11:27:46 2020
+TX
+ QueueID    avg_size   [0, 64)   [64, 512)  [512, 2K)  [2K, 16K)  [16K, 64K)
+    0         179          0          5          1          0          0
+ Total        179          0          5          1          0          0
+
+RX
+ QueueID    avg_size   [0, 64)   [64, 512)  [512, 2K)  [2K, 16K)  [16K, 64K)
+    0         165          3          2          1          0          0
+ Total        165          3          2          1          0          0
+----------------------------------------------------------------------------
+
+This NIC only has 1 queue.
+If you want the tool to print results after a longer interval, specify seconds with -i:
+# ./netqtop.py -n lo -i 3
+Thu Sep 10 11:31:26 2020
+TX
+ QueueID    avg_size   [0, 64)   [64, 512)  [512, 2K)  [2K, 16K)  [16K, 64K)
+    0          85          0         11          0          0          0
+ Total         85          0         11          0          0          0
+
+RX
+ QueueID    avg_size   [0, 64)   [64, 512)  [512, 2K)  [2K, 16K)  [16K, 64K)
+    0          71          5          6          0          0          0
+ Total         71          5          6          0          0          0
+----------------------------------------------------------------------------
+Thu Sep 10 11:31:29 2020
+TX
+ QueueID    avg_size   [0, 64)   [64, 512)  [512, 2K)  [2K, 16K)  [16K, 64K)
+    0         153          0          7          1          0          0
+ Total        153          0          7          1          0          0
+
+RX
+ QueueID    avg_size   [0, 64)   [64, 512)  [512, 2K)  [2K, 16K)  [16K, 64K)
+    0         139          4          3          1          0          0
+ Total        139          4          3          1          0          0
+----------------------------------------------------------------------------
+
+To see PPS and BPS of each queue, use -t:
+# ./netqtop.py -n lo -i 1 -t
+Thu Sep 10 11:37:02 2020
+TX
+ QueueID    avg_size   [0, 64)   [64, 512)  [512, 2K)  [2K, 16K)  [16K, 64K)    BPS        PPS    
+    0         114          0         10          0          0          0      1.11K       10.0
+ Total        114          0         10          0          0          0      1.11K       10.0
+
+RX
+ QueueID    avg_size   [0, 64)   [64, 512)  [512, 2K)  [2K, 16K)  [16K, 64K)    BPS        PPS    
+    0         100          4          6          0          0          0     1000.0       10.0
+ Total        100          4          6          0          0          0     1000.0       10.0
+-----------------------------------------------------------------------------------------------
+Thu Sep 10 11:37:03 2020
+TX
+ QueueID    avg_size   [0, 64)   [64, 512)  [512, 2K)  [2K, 16K)  [16K, 64K)    BPS        PPS    
+    0         271          0          3          1          0          0      1.06K        4.0
+ Total        271          0          3          1          0          0      1.06K        4.0
+
+RX
+ QueueID    avg_size   [0, 64)   [64, 512)  [512, 2K)  [2K, 16K)  [16K, 64K)    BPS        PPS    
+    0         257          2          1          1          0          0       1.0K        4.0
+ Total        257          2          1          1          0          0       1.0K        4.0
+-----------------------------------------------------------------------------------------------
+
+When filtering multi-queue NICs, you do not need to specify the number of queues, 
+the tool calculates it for you:
+# ./netqtop.py -n eth0 -t
+Thu Sep 10 11:39:21 2020
+TX
+ QueueID    avg_size   [0, 64)   [64, 512)  [512, 2K)  [2K, 16K)  [16K, 64K)    BPS        PPS    
+    0           0          0          0          0          0          0        0.0        0.0
+    1           0          0          0          0          0          0        0.0        0.0
+    2           0          0          0          0          0          0        0.0        0.0
+    3           0          0          0          0          0          0        0.0        0.0
+    4           0          0          0          0          0          0        0.0        0.0
+    5           0          0          0          0          0          0        0.0        0.0
+    6           0          0          0          0          0          0        0.0        0.0
+    7           0          0          0          0          0          0        0.0        0.0
+    8          54          2          0          0          0          0      108.0        2.0
+    9         161          0          9          0          0          0      1.42K        9.0
+   10           0          0          0          0          0          0        0.0        0.0
+   11           0          0          0          0          0          0        0.0        0.0
+   12           0          0          0          0          0          0        0.0        0.0
+   13           0          0          0          0          0          0        0.0        0.0
+   14           0          0          0          0          0          0        0.0        0.0
+   15           0          0          0          0          0          0        0.0        0.0
+   16           0          0          0          0          0          0        0.0        0.0
+   17           0          0          0          0          0          0        0.0        0.0
+   18           0          0          0          0          0          0        0.0        0.0
+   19           0          0          0          0          0          0        0.0        0.0
+   20           0          0          0          0          0          0        0.0        0.0
+   21           0          0          0          0          0          0        0.0        0.0
+   22           0          0          0          0          0          0        0.0        0.0
+   23           0          0          0          0          0          0        0.0        0.0
+   24           0          0          0          0          0          0        0.0        0.0
+   25           0          0          0          0          0          0        0.0        0.0
+   26           0          0          0          0          0          0        0.0        0.0
+   27           0          0          0          0          0          0        0.0        0.0
+   28           0          0          0          0          0          0        0.0        0.0
+   29           0          0          0          0          0          0        0.0        0.0
+   30           0          0          0          0          0          0        0.0        0.0
+   31           0          0          0          0          0          0        0.0        0.0
+ Total        141          2          9          0          0          0      1.52K       11.0
+
+RX
+ QueueID    avg_size   [0, 64)   [64, 512)  [512, 2K)  [2K, 16K)  [16K, 64K)    BPS        PPS    
+    0         127          3          9          0          0          0       1.5K       12.0
+    1           0          0          0          0          0          0        0.0        0.0
+    2           0          0          0          0          0          0        0.0        0.0
+    3           0          0          0          0          0          0        0.0        0.0
+    4           0          0          0          0          0          0        0.0        0.0
+    5           0          0          0          0          0          0        0.0        0.0
+    6           0          0          0          0          0          0        0.0        0.0
+    7           0          0          0          0          0          0        0.0        0.0
+    8           0          0          0          0          0          0        0.0        0.0
+    9           0          0          0          0          0          0        0.0        0.0
+   10           0          0          0          0          0          0        0.0        0.0
+   11           0          0          0          0          0          0        0.0        0.0
+   12           0          0          0          0          0          0        0.0        0.0
+   13           0          0          0          0          0          0        0.0        0.0
+   14           0          0          0          0          0          0        0.0        0.0
+   15           0          0          0          0          0          0        0.0        0.0
+   16           0          0          0          0          0          0        0.0        0.0
+   17           0          0          0          0          0          0        0.0        0.0
+   18           0          0          0          0          0          0        0.0        0.0
+   19           0          0          0          0          0          0        0.0        0.0
+   20           0          0          0          0          0          0        0.0        0.0
+   21           0          0          0          0          0          0        0.0        0.0
+   22           0          0          0          0          0          0        0.0        0.0
+   23           0          0          0          0          0          0        0.0        0.0
+   24           0          0          0          0          0          0        0.0        0.0
+   25           0          0          0          0          0          0        0.0        0.0
+   26           0          0          0          0          0          0        0.0        0.0
+   27           0          0          0          0          0          0        0.0        0.0
+   28           0          0          0          0          0          0        0.0        0.0
+   29           0          0          0          0          0          0        0.0        0.0
+   30           0          0          0          0          0          0        0.0        0.0
+   31           0          0          0          0          0          0        0.0        0.0
+ Total        127          3          9          0          0          0       1.5K       12.0
+-----------------------------------------------------------------------------------------------
\ No newline at end of file