blob: 27cc83cdda30ef6645a6827490e826a447d7b6bc [file]
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (c) 2009 Paul Mackerras <paulus@samba.org>
* Copyright (c) 2014-2022 Linux Test Project
*/
/*
* Here's a little test program that checks whether software counters
* (specifically, the task clock counter) work correctly when they're in
* a group with hardware counters.
*
* What it does is to create several groups, each with one hardware
* counter, counting instructions, plus a task clock counter. It needs
* to know an upper bound N on the number of hardware counters you have
* (N defaults to 8), and it creates N+4 groups to force them to be
* multiplexed. It also creates an overall task clock counter.
*
* Then it spins for a while, and then stops all the counters and reads
* them. It takes the total of the task clock counters in the groups and
* computes the ratio of that total to the overall execution time from
* the overall task clock counter.
*
* That ratio should be equal to the number of actual hardware counters
* that can count instructions. If the task clock counters in the groups
* don't stop when their group gets taken off the PMU, the ratio will
* instead be close to N+4. The program will declare that the test fails
* if the ratio is greater than N (actually, N + 0.005 to allow for FP
* rounding errors and RT throttling overhead).
*/
#define _GNU_SOURCE
#include <errno.h>
#include <sched.h>
#include <signal.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/prctl.h>
#include <sys/time.h>
#include <sys/types.h>
#include "config.h"
#include "tst_test.h"
#include "lapi/cpuset.h"
#include "lapi/syscalls.h"
#include "perf_event_open.h"
#define MAX_CTRS 1000
struct read_format {
unsigned long long value;
/* if PERF_FORMAT_TOTAL_TIME_ENABLED */
unsigned long long time_enabled;
/* if PERF_FORMAT_TOTAL_TIME_RUNNING */
unsigned long long time_running;
};
static char *verbose;
static int ntotal, nhw;
static int tsk0 = -1, hwfd[MAX_CTRS], tskfd[MAX_CTRS];
static int volatile work_done;
static unsigned int est_loops;
static void all_counters_set(int state)
{
if (prctl(state) == -1)
tst_brk(TBROK | TERRNO, "prctl(%d) failed", state);
}
static void alarm_handler(int sig LTP_ATTRIBUTE_UNUSED)
{
work_done = 1;
}
static void bench_work(int time_ms)
{
unsigned int i;
struct itimerval val;
struct sigaction sa;
memset(&sa, 0, sizeof(sa));
sa.sa_handler = alarm_handler;
sa.sa_flags = SA_RESETHAND;
SAFE_SIGACTION(SIGALRM, &sa, NULL);
work_done = 0;
memset(&val, 0, sizeof(val));
val.it_value.tv_sec = time_ms / 1000;
val.it_value.tv_usec = (time_ms % 1000) * 1000;
if (setitimer(ITIMER_REAL, &val, NULL))
tst_brk(TBROK | TERRNO, "setitimer");
while (!work_done) {
for (i = 0; i < 100000; ++i)
asm volatile (""::"g" (i));
est_loops++;
}
tst_res(TINFO, "bench_work estimated loops = %u in %d ms", est_loops, time_ms);
}
static void do_work(int mult)
{
unsigned long i, j, loops = mult * est_loops;
for (j = 0; j < loops; j++)
for (i = 0; i < 100000; i++)
asm volatile (""::"g" (i));
}
#ifndef __s390__
static int count_hardware_counters(void)
{
struct perf_event_attr hw_event;
int i, hwctrs = 0;
int fdarry[MAX_CTRS];
struct read_format buf, buf2, diff;
memset(&hw_event, 0, sizeof(struct perf_event_attr));
hw_event.type = PERF_TYPE_HARDWARE;
hw_event.size = sizeof(struct perf_event_attr);
hw_event.disabled = 1;
hw_event.config = PERF_COUNT_HW_INSTRUCTIONS;
hw_event.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
PERF_FORMAT_TOTAL_TIME_RUNNING;
for (i = 0; i < MAX_CTRS; i++) {
fdarry[i] = perf_event_open(&hw_event, 0, -1, -1, 0);
all_counters_set(PR_TASK_PERF_EVENTS_ENABLE);
do_work(1);
if (read(fdarry[i], &buf, sizeof(buf)) != sizeof(buf))
tst_brk(TBROK | TERRNO, "error reading counter(s) #1");
do_work(1);
all_counters_set(PR_TASK_PERF_EVENTS_DISABLE);
if (read(fdarry[i], &buf2, sizeof(buf2)) != sizeof(buf2))
tst_brk(TBROK | TERRNO, "error reading counter(s) #2");
diff.value = buf2.value - buf.value;
diff.time_enabled = buf2.time_enabled - buf.time_enabled;
diff.time_running = buf2.time_running - buf.time_running;
tst_res(TINFO, "[%d] value:%lld time_enabled:%lld "
"time_running:%lld", i, diff.value,
diff.time_enabled, diff.time_running);
/*
* Normally time_enabled and time_running are the same value.
* But if more events are started than available counter slots
* on the PMU, then multiplexing happens and events run only
* part of the time. Time_enabled and time_running's values
* will be different. In this case the time_enabled and time_
* running values can be used to scale an estimated value for
* the count. So if buf.time_enabled and buf.time_running are
* not equal, we can think that PMU hardware counters
* multiplexing happens and the number of the opened events
* are the number of max available hardware counters.
*/
if (diff.time_enabled != diff.time_running) {
hwctrs = i;
break;
}
}
for (i = 0; i <= hwctrs; i++)
SAFE_CLOSE(fdarry[i]);
return hwctrs;
}
#endif /* __s390__ */
static void bind_to_current_cpu(void)
{
#ifdef HAVE_SCHED_GETCPU
int cpu = sched_getcpu();
size_t mask_size;
cpu_set_t *mask;
if (cpu == -1)
tst_brk(TBROK | TERRNO, "sched_getcpu() failed");
mask = CPU_ALLOC(cpu + 1);
mask_size = CPU_ALLOC_SIZE(cpu + 1);
CPU_ZERO_S(mask_size, mask);
CPU_SET(cpu, mask);
if (sched_setaffinity(0, mask_size, mask) == -1)
tst_brk(TBROK | TERRNO, "sched_setaffinity() failed");
CPU_FREE(mask);
#endif
}
static void setup(void)
{
int i;
struct perf_event_attr tsk_event, hw_event;
struct sched_param sparam = {.sched_priority = 1};
if (sched_setscheduler(0, SCHED_FIFO, &sparam)) {
tst_brk(TBROK | TERRNO,
"sched_setscheduler(0, SCHED_FIFO, ...) failed");
}
for (i = 0; i < MAX_CTRS; i++) {
hwfd[i] = -1;
tskfd[i] = -1;
}
bench_work(500);
/*
* According to perf_event_open's manpage, the official way of
* knowing if perf_event_open() support is enabled is checking for
* the existence of the file /proc/sys/kernel/perf_event_paranoid.
*/
if (access("/proc/sys/kernel/perf_event_paranoid", F_OK) == -1)
tst_brk(TCONF, "Kernel doesn't have perf_event support");
bind_to_current_cpu();
#ifdef __s390__
/*
* On s390 the "time_enabled" and "time_running" values are always the
* same, therefore count_hardware_counters() does not work.
*
* There are distinct/dedicated counters that can be used independently.
* Use the dedicated counter for instructions here.
*/
ntotal = nhw = 1;
#else
nhw = count_hardware_counters();
ntotal = nhw + 4;
#endif
memset(&hw_event, 0, sizeof(struct perf_event_attr));
memset(&tsk_event, 0, sizeof(struct perf_event_attr));
tsk_event.type = PERF_TYPE_SOFTWARE;
tsk_event.size = sizeof(struct perf_event_attr);
tsk_event.disabled = 1;
tsk_event.config = PERF_COUNT_SW_TASK_CLOCK;
hw_event.type = PERF_TYPE_HARDWARE;
hw_event.size = sizeof(struct perf_event_attr);
hw_event.disabled = 1;
hw_event.config = PERF_COUNT_HW_INSTRUCTIONS;
tsk0 = perf_event_open(&tsk_event, 0, -1, -1, 0);
tsk_event.disabled = 0;
for (i = 0; i < ntotal; ++i) {
hwfd[i] = perf_event_open(&hw_event, 0, -1, -1, 0);
tskfd[i] = perf_event_open(&tsk_event, 0, -1, hwfd[i], 0);
}
}
static void cleanup(void)
{
int i;
for (i = 0; i < ntotal; i++) {
if (hwfd[i] != -1)
SAFE_CLOSE(hwfd[i]);
if (tskfd[i] != -1)
SAFE_CLOSE(tskfd[i]);
}
if (tsk0 != -1)
SAFE_CLOSE(tsk0);
}
static void verify(void)
{
unsigned long long vt0, vt[MAX_CTRS], vh[MAX_CTRS];
unsigned long long vtsum = 0, vhsum = 0;
int i;
double ratio;
struct sched_param sparam = {.sched_priority = 0};
all_counters_set(PR_TASK_PERF_EVENTS_ENABLE);
do_work(8);
/* stop groups with hw counters first before tsk0 */
for (i = 0; i < ntotal; i++) {
ioctl(hwfd[i], PERF_EVENT_IOC_DISABLE);
ioctl(tskfd[i], PERF_EVENT_IOC_DISABLE);
}
all_counters_set(PR_TASK_PERF_EVENTS_DISABLE);
sparam.sched_priority = 0;
if (sched_setscheduler(0, SCHED_OTHER, &sparam)) {
tst_brk(TBROK | TERRNO,
"sched_setscheduler(0, SCHED_OTHER, ...) failed");
}
if (read(tsk0, &vt0, sizeof(vt0)) != sizeof(vt0))
tst_brk(TBROK | TERRNO, "error reading task clock counter");
for (i = 0; i < ntotal; ++i) {
if (read(tskfd[i], &vt[i], sizeof(vt[i])) != sizeof(vt[i]) ||
read(hwfd[i], &vh[i], sizeof(vh[i])) != sizeof(vh[i]))
tst_brk(TBROK | TERRNO, "error reading counter(s)");
vtsum += vt[i];
vhsum += vh[i];
}
tst_res(TINFO, "nhw: %d, overall task clock: %llu", nhw, vt0);
tst_res(TINFO, "hw sum: %llu, task clock sum: %llu", vhsum, vtsum);
if (verbose) {
tst_res(TINFO, "hw counters:");
for (i = 0; i < ntotal; ++i)
tst_res(TINFO, " %llu", vh[i]);
tst_res(TINFO, "task clock counters:");
for (i = 0; i < ntotal; ++i)
tst_res(TINFO, " %llu", vt[i]);
}
ratio = (double)vtsum / vt0;
tst_res(TINFO, "ratio: %lf", ratio);
if (ratio > nhw + 0.005) {
tst_res(TFAIL, "test failed (ratio was greater than %d)", nhw);
} else {
tst_res(TPASS, "test passed");
}
}
static struct tst_test test = {
.setup = setup,
.cleanup = cleanup,
.options = (struct tst_option[]) {
{"v", &verbose, "Verbose output"},
{},
},
.test_all = verify,
.needs_root = 1,
.timeout = 72
};