tools/runqlen: Set the size of BPF map object according to the number of CPUs

When runqlen.py uses the -C parameter to show histograms for each CPU
separately, it can only output information for the first 64 CPUs when
the number of CPUs is large (e.g. AMD Milan has 256 CPUs).

This is because BPF_HISTOGRAM does not set the size of the BPF map
object in the code, and the default size is 64, so it can be fixed by
setting it to the current number of CPUs.

Signed-off-by: Zhiyong Ye <yezhiyong@bytedance.com>
diff --git a/tools/runqlen.py b/tools/runqlen.py
index c77947a..03cf389 100755
--- a/tools/runqlen.py
+++ b/tools/runqlen.py
@@ -20,7 +20,7 @@
 # 12-Dec-2016   Brendan Gregg   Created this.
 
 from __future__ import print_function
-from bcc import BPF, PerfType, PerfSWConfig
+from bcc import BPF, PerfType, PerfSWConfig, utils
 from time import sleep, strftime
 from tempfile import NamedTemporaryFile
 from os import open, close, dup, unlink, O_WRONLY
@@ -163,7 +163,7 @@
 # code substitutions
 if args.cpus:
     bpf_text = bpf_text.replace('STORAGE',
-        'BPF_HISTOGRAM(dist, cpu_key_t);')
+        'BPF_HISTOGRAM(dist, cpu_key_t, MAX_CPUS);')
     bpf_text = bpf_text.replace('STORE', 'cpu_key_t key = {.slot = len}; ' +
         'key.cpu = bpf_get_smp_processor_id(); ' +
         'dist.increment(key);')
@@ -182,8 +182,10 @@
     if args.ebpf:
         exit()
 
+num_cpus = len(utils.get_online_cpus())
+
 # initialize BPF & perf_events
-b = BPF(text=bpf_text)
+b = BPF(text=bpf_text, cflags=['-DMAX_CPUS=%s' % str(num_cpus)])
 b.attach_perf_event(ev_type=PerfType.SOFTWARE,
     ev_config=PerfSWConfig.CPU_CLOCK, fn_name="do_perf_event",
     sample_period=0, sample_freq=frequency)