blob: d1c93f237b8b537af55a4086e56dbb8eb199aadb [file] [log] [blame]
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2019 The Linux Foundation. All rights reserved.
*/
#include <linux/bitops.h>
#include <linux/bug.h>
#include <linux/cpuhotplug.h>
#include <linux/cpumask.h>
#include <linux/device.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/of.h>
#include <linux/of_irq.h>
#include <linux/of_address.h>
#include <linux/percpu.h>
#include <linux/perf_event.h>
#include <linux/platform_device.h>
#include <linux/smp.h>
#include <linux/spinlock.h>
#include <linux/sysfs.h>
#include <linux/types.h>
#include <asm/local64.h>
#define L2_COUNTERS_BUG "[l2 counters error]: "
/*
* Events id
* 0xXXX here,
*
* 1 bit(lsb) for group (group is either txn/tenure counter).
* 4 bits for serial number for counter starting from 0 to 8.
* 5 bits for bit position of counter enable bit in a register.
*/
#define L2_EVENT_CYCLE_CNTR 0x000
#define L2_EVENT_DDR_WR_CNTR 0x022
#define L2_EVENT_DDR_RD_CNTR 0x044
#define L2_EVENT_SNP_RD_CNTR 0x066
#define L2_EVENT_ACP_WR_CNTR 0x088
#define L2_EVENT_TENURE_CNTR 0x26b
#define L2_EVENT_LOW_RANGE_OCCUR_CNTR 0x04d
#define L2_EVENT_MID_RANGE_OCCUR_CNTR 0x0cf
#define L2_EVENT_HIGH_RANGE_OCCUR_CNTR 0x151
#define EVENT_GROUP_MASK 0x1
#define REGBIT_MASK 0x3e0
#define ID_MASK 0x1e
#define TRANSACTION_CNTRS_GROUP_ID 0x0
#define TENURE_CNTRS_GROUP_ID 0x1
#define ID_SHIFT 0x1
#define REGBIT_SHIFT 0x5
#define TXN_CONFIG_REG_OFFSET 0x54c
#define OVERFLOW_REG_OFFSET 0x560
#define CNTR_SET_VAL_REG_OFFSET 0x55c
#define TXN_CYCLE_CNTR_DATA 0x634
#define TXN_DDR_WR_CNTR_DATA 0x638
#define TXN_DDR_RD_CNTR_DATA 0x63c
#define TXN_SNP_RD_CNTR_DATA 0x640
#define TXN_ACP_WR_CNTR_DATA 0x644
#define TENURE_CONFIG_REG_OFFSET 0x52c
#define LOW_RANGE_OCCURRENCE_CNTR_DATA 0x53c
#define MID_RANGE_OCCURRENCE_CNTR_DATA 0x540
#define HIGH_RANGE_OCCURRENCE_CNTR_DATA 0x544
#define LPM_TENURE_CNTR_DATA 0x548
#define LOW_RANGE_TENURE_VAL 0x534
#define MID_RANGE_TENURE_VAL 0x538
#define TENURE_ENABLE_ALL 0x880444
#define TENURE_CNTR_ENABLE 19
#define LOW_RANGE_OCCURRENCE_CNTR_ENABLE 2
#define MID_RANGE_OCCURRENCE_CNTR_ENABLE 6
#define HIGH_RANGE_OCCURRENCE_CNTR_ENABLE 10
#define OCCURRENCE_CNTR_ENABLE_MASK (BIT(2) | BIT(6) | BIT(10))
#define LPM_MODE_TENURE_CNTR_RESET 12
#define LOW_RANGE_OCCURRENCE_CNTR_RESET 0
#define MID_RANGE_OCCURRENCE_CNTR_RESET 4
#define HIGH_RANGE_OCCURRENCE_CNTR_RESET 8
/* Txn reset/set/overflow bit offsets */
#define TXN_RESET_BIT 5
#define TXN_RESET_ALL_CNTR 0x000003e0
#define TXN_RESET_ALL_CNTR_OVSR_BIT 0x007c0000
#define TENURE_RESET_ALL_CNTR 0x00001111
#define TENURE_RESET_OVERFLOW_ALL_CNTR 0x00002888
#define TXN_SET_BIT 13
#define TXN_OVERFLOW_RESET_BIT 18
#define LOW_RANGE_OCCURRENCE_CNTR_OVERFLOW_RESET 3
#define MID_RANGE_OCCURRENCE_CNTR_OVERFLOW_RESET 7
#define HIGH_RANGE_OCCURRENCE_CNTR_OVERFLOW_RESET 11
#define LPM_MODE_TENURE_CNTR_OVERFLOW_RESET 13
enum counter_index {
CLUSTER_CYCLE_COUNTER,
DDR_WR_CNTR,
DDR_RD_CNTR,
SNP_RD_CNTR,
ACP_WR_CNTR,
LPM_TENURE_CNTR,
LOW_OCCURRENCE_CNTR,
MID_OCCURRENCE_CNTR,
HIGH_OCCURRENCE_CNTR,
MAX_L2_CNTRS
};
/*
* Each cluster has its own PMU(counters) and associated with one or more CPUs.
* This structure represents one of the hardware PMUs.
*/
struct cluster_pmu {
struct device dev;
struct list_head next;
struct perf_event *events[MAX_L2_CNTRS];
void __iomem *reg_addr;
struct l2cache_pmu *l2cache_pmu;
DECLARE_BITMAP(used_counters, MAX_L2_CNTRS);
int irq;
int cluster_id;
/* The CPU that is used for collecting events on this cluster */
int on_cpu;
/* All the CPUs associated with this cluster */
cpumask_t cluster_cpus;
spinlock_t pmu_lock;
};
/*
* Aggregate PMU. Implements the core pmu functions and manages
* the hardware PMUs.
*/
struct l2cache_pmu {
struct hlist_node node;
u32 num_pmus;
struct pmu pmu;
int num_counters;
cpumask_t cpumask;
struct platform_device *pdev;
struct cluster_pmu * __percpu *pmu_cluster;
struct list_head clusters;
};
static unsigned int which_cluster_tenure = 1;
static u32 l2_counter_present_mask;
#define to_l2cache_pmu(p) (container_of(p, struct l2cache_pmu, pmu))
#define to_cluster_device(d) container_of(d, struct cluster_pmu, dev)
static inline struct cluster_pmu *get_cluster_pmu(
struct l2cache_pmu *l2cache_pmu, int cpu)
{
return *per_cpu_ptr(l2cache_pmu->pmu_cluster, cpu);
}
static inline u32 cluster_tenure_counter_read(struct cluster_pmu *cluster,
u32 idx)
{
u32 val = 0;
switch (idx) {
case LOW_RANGE_OCCURRENCE_CNTR_ENABLE:
val = readl_relaxed(cluster->reg_addr +
LOW_RANGE_OCCURRENCE_CNTR_DATA);
break;
case MID_RANGE_OCCURRENCE_CNTR_ENABLE:
val = readl_relaxed(cluster->reg_addr +
MID_RANGE_OCCURRENCE_CNTR_DATA);
break;
case HIGH_RANGE_OCCURRENCE_CNTR_ENABLE:
val = readl_relaxed(cluster->reg_addr +
HIGH_RANGE_OCCURRENCE_CNTR_DATA);
break;
default:
pr_crit(L2_COUNTERS_BUG
"Invalid index, during %s\n", __func__);
}
return val;
}
static inline u32 cluster_pmu_counter_get_value(struct cluster_pmu *cluster,
u32 idx, u32 event_grp)
{
if (event_grp == TENURE_CNTRS_GROUP_ID)
return cluster_tenure_counter_read(cluster, idx);
return readl_relaxed(cluster->reg_addr +
TXN_CYCLE_CNTR_DATA + (4 * idx));
}
static inline u32 cluster_txn_config_read(struct cluster_pmu *cluster)
{
return readl_relaxed(cluster->reg_addr + TXN_CONFIG_REG_OFFSET);
}
static inline void cluster_txn_config_write(struct cluster_pmu *cluster,
u32 val)
{
writel_relaxed(val, cluster->reg_addr + TXN_CONFIG_REG_OFFSET);
}
static inline u32 cluster_tenure_config_read(struct cluster_pmu *cluster)
{
return readl_relaxed(cluster->reg_addr + TENURE_CONFIG_REG_OFFSET);
}
static inline void cluster_tenure_config_write(struct cluster_pmu *cluster,
u32 val)
{
writel_relaxed(val, cluster->reg_addr + TENURE_CONFIG_REG_OFFSET);
}
static void cluster_txn_cntr_reset(struct cluster_pmu *cluster, u32 idx)
{
cluster_txn_config_write(cluster, cluster_txn_config_read(cluster)
| BIT(idx));
cluster_txn_config_write(cluster, cluster_txn_config_read(cluster)
& ~BIT(idx));
}
static void cluster_pmu_reset(struct cluster_pmu *cluster)
{
cluster_txn_config_write(cluster, cluster_txn_config_read(cluster)
| TXN_RESET_ALL_CNTR);
cluster_txn_config_write(cluster, cluster_txn_config_read(cluster)
& ~TXN_RESET_ALL_CNTR);
cluster_txn_config_write(cluster, cluster_txn_config_read(cluster)
| TXN_RESET_ALL_CNTR_OVSR_BIT);
cluster_txn_config_write(cluster, cluster_txn_config_read(cluster)
& ~TXN_RESET_ALL_CNTR_OVSR_BIT);
cluster_tenure_config_write(cluster, cluster_tenure_config_read(cluster)
| TENURE_RESET_ALL_CNTR);
cluster_tenure_config_write(cluster, cluster_tenure_config_read(cluster)
& ~TENURE_RESET_ALL_CNTR);
cluster_tenure_config_write(cluster, cluster_tenure_config_read(cluster)
| TENURE_RESET_OVERFLOW_ALL_CNTR);
cluster_tenure_config_write(cluster, cluster_tenure_config_read(cluster)
& ~TENURE_RESET_OVERFLOW_ALL_CNTR);
}
static void cluster_tenure_counter_reset(struct cluster_pmu *cluster, u32 idx)
{
cluster_tenure_config_write(cluster, cluster_tenure_config_read(cluster)
| BIT(idx));
cluster_tenure_config_write(cluster, cluster_tenure_config_read(cluster)
& ~BIT(idx));
}
static inline void cluster_tenure_counter_enable(struct cluster_pmu *cluster,
u32 idx)
{
u32 val;
val = cluster_tenure_config_read(cluster);
/* Already enabled */
if (val & BIT(idx))
return;
switch (idx) {
case LOW_RANGE_OCCURRENCE_CNTR_ENABLE:
cluster_tenure_counter_reset(cluster,
LOW_RANGE_OCCURRENCE_CNTR_RESET);
break;
case MID_RANGE_OCCURRENCE_CNTR_ENABLE:
cluster_tenure_counter_reset(cluster,
MID_RANGE_OCCURRENCE_CNTR_RESET);
break;
case HIGH_RANGE_OCCURRENCE_CNTR_ENABLE:
cluster_tenure_counter_reset(cluster,
HIGH_RANGE_OCCURRENCE_CNTR_RESET);
break;
default:
pr_crit(L2_COUNTERS_BUG
"Invalid index, during %s\n", __func__);
return;
}
if (!(val & BIT(TENURE_CNTR_ENABLE))) {
cluster_tenure_counter_reset(cluster,
LPM_MODE_TENURE_CNTR_RESET);
/*
* Enable tenure counter as a part of enablement of any
* occurrences counter, as occurrence counters would not
* increment unless tenure counter is enabled.
*/
cluster_tenure_config_write(cluster,
cluster_tenure_config_read(cluster)
| BIT(TENURE_CNTR_ENABLE));
}
cluster_tenure_config_write(cluster,
cluster_tenure_config_read(cluster) | BIT(idx));
}
static inline void cluster_txn_counter_enable(struct cluster_pmu *cluster,
u32 idx)
{
u32 val;
val = cluster_txn_config_read(cluster);
if (val & BIT(idx))
return;
cluster_txn_cntr_reset(cluster, TXN_RESET_BIT + idx);
cluster_txn_config_write(cluster, cluster_txn_config_read(cluster)
| BIT(idx));
}
static inline void cluster_tenure_counter_disable(struct cluster_pmu *cluster,
u32 idx)
{
u32 val;
cluster_tenure_config_write(cluster, cluster_tenure_config_read(cluster)
& ~BIT(idx));
val = cluster_tenure_config_read(cluster);
if (!(val & OCCURRENCE_CNTR_ENABLE_MASK))
cluster_tenure_config_write(cluster, val &
~BIT(TENURE_CNTR_ENABLE));
}
static inline void cluster_txn_counter_disable(struct cluster_pmu *cluster,
u32 idx)
{
cluster_txn_config_write(cluster,
cluster_txn_config_read(cluster) & ~BIT(idx));
}
static inline u32 cluster_reg_read(struct cluster_pmu *cluster, u32 offset)
{
return readl_relaxed(cluster->reg_addr + offset);
}
static inline void cluster_tenure_cntr_reset_ovsr(struct cluster_pmu *cluster,
u32 event_idx)
{
switch (event_idx) {
case LPM_TENURE_CNTR:
cluster_tenure_counter_reset(cluster,
LPM_MODE_TENURE_CNTR_OVERFLOW_RESET);
break;
case LOW_RANGE_OCCURRENCE_CNTR_ENABLE:
cluster_tenure_counter_reset(cluster,
LOW_RANGE_OCCURRENCE_CNTR_OVERFLOW_RESET);
break;
case MID_RANGE_OCCURRENCE_CNTR_ENABLE:
cluster_tenure_counter_reset(cluster,
MID_RANGE_OCCURRENCE_CNTR_OVERFLOW_RESET);
break;
case HIGH_RANGE_OCCURRENCE_CNTR_ENABLE:
cluster_tenure_counter_reset(cluster,
HIGH_RANGE_OCCURRENCE_CNTR_OVERFLOW_RESET);
break;
default:
pr_crit(L2_COUNTERS_BUG
"Invalid index, during %s\n", __func__);
}
}
static inline void cluster_pmu_reset_ovsr(struct cluster_pmu *cluster,
u32 config_base)
{
u32 event_idx;
u32 event_grp;
event_idx = (config_base & REGBIT_MASK) >> REGBIT_SHIFT;
event_grp = config_base & EVENT_GROUP_MASK;
if (event_grp == TENURE_CNTRS_GROUP_ID)
cluster_tenure_cntr_reset_ovsr(cluster, event_idx);
else
cluster_txn_cntr_reset(cluster,
TXN_OVERFLOW_RESET_BIT + event_idx);
}
static inline bool cluster_pmu_has_overflowed(u32 ovsr)
{
return !!(ovsr & l2_counter_present_mask);
}
static inline bool cluster_pmu_counter_has_overflowed(u32 ovsr, u32 idx)
{
return !!(ovsr & BIT(idx));
}
static void l2_cache_event_update(struct perf_event *event, u32 ovsr)
{
struct hw_perf_event *hwc = &event->hw;
u64 delta, prev, now;
u32 event_idx = hwc->config_base;
u32 event_grp;
struct cluster_pmu *cluster = event->pmu_private;
prev = local64_read(&hwc->prev_count);
if (ovsr) {
now = 0xffffffff;
goto out;
}
event_idx = (hwc->config_base & REGBIT_MASK) >> REGBIT_SHIFT;
event_grp = hwc->config_base & EVENT_GROUP_MASK;
do {
prev = local64_read(&hwc->prev_count);
now = cluster_pmu_counter_get_value(cluster, event_idx,
event_grp);
} while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev);
/* All are 32-bit counters */
out:
delta = now - prev;
delta &= 0xffffffff;
local64_add(delta, &event->count);
if (ovsr)
local64_set(&hwc->prev_count, 0);
}
static int l2_cache_get_event_idx(struct cluster_pmu *cluster,
struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
int idx;
int num_ctrs = cluster->l2cache_pmu->num_counters;
idx = (hwc->config_base & ID_MASK) >> ID_SHIFT;
if (idx >= num_ctrs)
return -EINVAL;
if (test_bit(idx, cluster->used_counters))
return -EAGAIN;
set_bit(idx, cluster->used_counters);
return idx;
}
static void l2_cache_clear_event_idx(struct cluster_pmu *cluster,
struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
clear_bit(idx, cluster->used_counters);
}
static irqreturn_t l2_cache_handle_irq(int irq_num, void *data)
{
struct cluster_pmu *cluster = data;
int num_counters = cluster->l2cache_pmu->num_counters;
u32 ovsr;
int idx;
u32 config_base;
ovsr = cluster_reg_read(cluster, OVERFLOW_REG_OFFSET);
if (!cluster_pmu_has_overflowed(ovsr))
return IRQ_NONE;
/*
* LPM tenure counter overflow would be a special case, although
* it would never happen, but for a ideal case we would reset
* it's overflow bit. I hope hardware takes care the overflow
* of tenure counter and its classifying category but even if
* it does not, we would get a extra count gets added
* erroneously to one of low/mid/high occurrence counter, but
* that is very rare and we can ignore it too.
*/
if (ovsr & BIT(LPM_TENURE_CNTR))
cluster_tenure_cntr_reset_ovsr(cluster, LPM_TENURE_CNTR);
spin_lock(&cluster->pmu_lock);
for_each_set_bit(idx, cluster->used_counters, num_counters) {
struct perf_event *event = cluster->events[idx];
struct hw_perf_event *hwc;
if (WARN_ON_ONCE(!event))
continue;
if (!cluster_pmu_counter_has_overflowed(ovsr, idx))
continue;
l2_cache_event_update(event, 1);
hwc = &event->hw;
config_base = hwc->config_base;
cluster_pmu_reset_ovsr(cluster, config_base);
}
spin_unlock(&cluster->pmu_lock);
return IRQ_HANDLED;
}
static int l2_cache_event_init(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
struct cluster_pmu *cluster;
struct l2cache_pmu *l2cache_pmu;
if (event->attr.type != event->pmu->type)
return -ENOENT;
l2cache_pmu = to_l2cache_pmu(event->pmu);
if (hwc->sample_period) {
dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
"Sampling not supported\n");
return -EOPNOTSUPP;
}
if (event->cpu < 0) {
dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
"Per-task mode not supported\n");
return -EOPNOTSUPP;
}
/* We can not filter accurately so we just don't allow it. */
if (event->attr.exclude_user || event->attr.exclude_kernel ||
event->attr.exclude_hv || event->attr.exclude_idle) {
dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
"Can't exclude execution levels\n");
return -EOPNOTSUPP;
}
cluster = get_cluster_pmu(l2cache_pmu, event->cpu);
if (!cluster) {
/* CPU has not been initialised */
dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
"CPU%d not associated with L2 cluster\n", event->cpu);
return -EINVAL;
}
/* Ensure all events in a group are on the same cpu */
if ((event->group_leader != event) &&
(cluster->on_cpu != event->group_leader->cpu)) {
dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
"Can't create group on CPUs %d and %d",
event->cpu, event->group_leader->cpu);
return -EINVAL;
}
hwc->idx = -1;
hwc->config_base = event->attr.config;
event->readable_on_cpus = CPU_MASK_ALL;
event->pmu_private = cluster;
/*
* We are overiding event->cpu, as it is possible to enable events,
* even if the event->cpu is offline.
*/
event->cpu = cluster->on_cpu;
return 0;
}
static void l2_cache_event_start(struct perf_event *event, int flags)
{
struct hw_perf_event *hwc = &event->hw;
struct cluster_pmu *cluster = event->pmu_private;
int event_idx;
hwc->state = 0;
event_idx = (hwc->config_base & REGBIT_MASK) >> REGBIT_SHIFT;
if ((hwc->config_base & EVENT_GROUP_MASK) == TENURE_CNTRS_GROUP_ID) {
cluster_tenure_counter_enable(cluster, event_idx);
return;
}
cluster_txn_counter_enable(cluster, event_idx);
}
static void l2_cache_event_stop(struct perf_event *event, int flags)
{
struct hw_perf_event *hwc = &event->hw;
struct cluster_pmu *cluster = event->pmu_private;
int event_idx;
u32 ovsr;
if (hwc->state & PERF_HES_STOPPED)
return;
event_idx = (hwc->config_base & REGBIT_MASK) >> REGBIT_SHIFT;
if ((hwc->config_base & EVENT_GROUP_MASK) == TENURE_CNTRS_GROUP_ID)
cluster_tenure_counter_disable(cluster, event_idx);
else
cluster_txn_counter_disable(cluster, event_idx);
ovsr = cluster_reg_read(cluster, OVERFLOW_REG_OFFSET);
if (cluster_pmu_counter_has_overflowed(ovsr, event_idx)) {
l2_cache_event_update(event, 1);
cluster_pmu_reset_ovsr(cluster, hwc->config_base);
}
if (flags & PERF_EF_UPDATE)
l2_cache_event_update(event, 0);
hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
}
static int l2_cache_event_add(struct perf_event *event, int flags)
{
struct hw_perf_event *hwc = &event->hw;
struct cluster_pmu *cluster = event->pmu_private;
int idx;
idx = l2_cache_get_event_idx(cluster, event);
if (idx < 0)
return idx;
hwc->idx = idx;
hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
cluster->events[idx] = event;
local64_set(&hwc->prev_count, 0);
if (flags & PERF_EF_START)
l2_cache_event_start(event, flags);
/* Propagate changes to the userspace mapping. */
perf_event_update_userpage(event);
return 0;
}
static void l2_cache_event_del(struct perf_event *event, int flags)
{
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
unsigned long intr_flag;
struct cluster_pmu *cluster = event->pmu_private;
/*
* We could race here with overflow interrupt of this event.
* So, let's be safe here.
*/
spin_lock_irqsave(&cluster->pmu_lock, intr_flag);
l2_cache_event_stop(event, flags | PERF_EF_UPDATE);
l2_cache_clear_event_idx(cluster, event);
cluster->events[idx] = NULL;
hwc->idx = -1;
spin_unlock_irqrestore(&cluster->pmu_lock, intr_flag);
perf_event_update_userpage(event);
}
static void l2_cache_event_read(struct perf_event *event)
{
l2_cache_event_update(event, 0);
}
static ssize_t low_tenure_threshold_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
struct l2cache_pmu *l2cache_pmu = to_l2cache_pmu(dev_get_drvdata(dev));
struct cluster_pmu *cluster = NULL;
u32 val;
int ret;
ret = kstrtouint(buf, 0, &val);
if (ret < 0)
return ret;
if (val == 0 || val > INT_MAX)
return -EINVAL;
list_for_each_entry(cluster, &l2cache_pmu->clusters, next) {
if (cluster->cluster_id == which_cluster_tenure)
writel_relaxed(val,
cluster->reg_addr + LOW_RANGE_TENURE_VAL);
}
return count;
}
static ssize_t low_tenure_threshold_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct l2cache_pmu *l2cache_pmu = to_l2cache_pmu(dev_get_drvdata(dev));
struct cluster_pmu *cluster = NULL;
u32 val = 0;
list_for_each_entry(cluster, &l2cache_pmu->clusters, next) {
if (cluster->cluster_id == which_cluster_tenure)
val = cluster_reg_read(cluster, LOW_RANGE_TENURE_VAL);
}
return snprintf(buf, PAGE_SIZE, "0x%x\n", val);
}
static ssize_t mid_tenure_threshold_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
struct l2cache_pmu *l2cache_pmu = to_l2cache_pmu(dev_get_drvdata(dev));
struct cluster_pmu *cluster = NULL;
u32 val;
int ret;
ret = kstrtouint(buf, 0, &val);
if (ret < 0)
return ret;
if (val == 0 || val > INT_MAX)
return -EINVAL;
list_for_each_entry(cluster, &l2cache_pmu->clusters, next) {
if (cluster->cluster_id == which_cluster_tenure)
writel_relaxed(val,
cluster->reg_addr + MID_RANGE_TENURE_VAL);
}
return count;
}
static ssize_t mid_tenure_threshold_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct l2cache_pmu *l2cache_pmu = to_l2cache_pmu(dev_get_drvdata(dev));
struct cluster_pmu *cluster = NULL;
u32 val = 0;
list_for_each_entry(cluster, &l2cache_pmu->clusters, next) {
if (cluster->cluster_id == which_cluster_tenure)
val = cluster_reg_read(cluster, MID_RANGE_TENURE_VAL);
}
return snprintf(buf, PAGE_SIZE, "0x%x\n", val);
}
static ssize_t which_cluster_tenure_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
return snprintf(buf, PAGE_SIZE, "0x%x\n", which_cluster_tenure);
}
static ssize_t which_cluster_tenure_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
int ret;
ret = kstrtouint(buf, 0, &which_cluster_tenure);
if (ret < 0)
return ret;
if (which_cluster_tenure > 1)
return -EINVAL;
return count;
}
static struct device_attribute mid_tenure_threshold_attr =
__ATTR(mid_tenure_threshold, 0644,
mid_tenure_threshold_show,
mid_tenure_threshold_store);
static struct attribute *mid_tenure_threshold_attrs[] = {
&mid_tenure_threshold_attr.attr,
NULL,
};
static struct attribute_group mid_tenure_threshold_group = {
.attrs = mid_tenure_threshold_attrs,
};
static struct device_attribute low_tenure_threshold_attr =
__ATTR(low_tenure_threshold, 0644,
low_tenure_threshold_show,
low_tenure_threshold_store);
static struct attribute *low_tenure_threshold_attrs[] = {
&low_tenure_threshold_attr.attr,
NULL,
};
static struct attribute_group low_tenure_threshold_group = {
.attrs = low_tenure_threshold_attrs,
};
static struct device_attribute which_cluster_tenure_attr =
__ATTR(which_cluster_tenure, 0644,
which_cluster_tenure_show,
which_cluster_tenure_store);
static struct attribute *which_cluster_tenure_attrs[] = {
&which_cluster_tenure_attr.attr,
NULL,
};
static struct attribute_group which_cluster_tenure_group = {
.attrs = which_cluster_tenure_attrs,
};
static ssize_t l2_cache_pmu_cpumask_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct l2cache_pmu *l2cache_pmu = to_l2cache_pmu(dev_get_drvdata(dev));
return cpumap_print_to_pagebuf(true, buf, &l2cache_pmu->cpumask);
}
static struct device_attribute l2_cache_pmu_cpumask_attr =
__ATTR(cpumask, 0444, l2_cache_pmu_cpumask_show, NULL);
static struct attribute *l2_cache_pmu_cpumask_attrs[] = {
&l2_cache_pmu_cpumask_attr.attr,
NULL,
};
static struct attribute_group l2_cache_pmu_cpumask_group = {
.attrs = l2_cache_pmu_cpumask_attrs,
};
PMU_FORMAT_ATTR(event, "config:0-9");
static struct attribute *l2_cache_pmu_formats[] = {
&format_attr_event.attr,
NULL,
};
static struct attribute_group l2_cache_pmu_format_group = {
.name = "format",
.attrs = l2_cache_pmu_formats,
};
static ssize_t l2cache_pmu_event_show(struct device *dev,
struct device_attribute *attr, char *page)
{
struct perf_pmu_events_attr *pmu_attr;
pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
return snprintf(page, PAGE_SIZE, "event=0x%02llx\n", pmu_attr->id);
}
#define L2CACHE_EVENT_ATTR(_name, _id) \
(&((struct perf_pmu_events_attr[]) { \
{ .attr = __ATTR(_name, 0444, l2cache_pmu_event_show, NULL), \
.id = _id, } \
})[0].attr.attr)
static struct attribute *l2_cache_pmu_events[] = {
L2CACHE_EVENT_ATTR(cycles, L2_EVENT_CYCLE_CNTR),
L2CACHE_EVENT_ATTR(ddr_write, L2_EVENT_DDR_WR_CNTR),
L2CACHE_EVENT_ATTR(ddr_read, L2_EVENT_DDR_RD_CNTR),
L2CACHE_EVENT_ATTR(snoop_read, L2_EVENT_SNP_RD_CNTR),
L2CACHE_EVENT_ATTR(acp_write, L2_EVENT_ACP_WR_CNTR),
L2CACHE_EVENT_ATTR(low_range_occur, L2_EVENT_LOW_RANGE_OCCUR_CNTR),
L2CACHE_EVENT_ATTR(mid_range_occur, L2_EVENT_MID_RANGE_OCCUR_CNTR),
L2CACHE_EVENT_ATTR(high_range_occur, L2_EVENT_HIGH_RANGE_OCCUR_CNTR),
NULL
};
static struct attribute_group l2_cache_pmu_events_group = {
.name = "events",
.attrs = l2_cache_pmu_events,
};
static const struct attribute_group *l2_cache_pmu_attr_grps[] = {
&l2_cache_pmu_format_group,
&l2_cache_pmu_cpumask_group,
&l2_cache_pmu_events_group,
&mid_tenure_threshold_group,
&low_tenure_threshold_group,
&which_cluster_tenure_group,
NULL,
};
static struct cluster_pmu *l2_cache_associate_cpu_with_cluster(
struct l2cache_pmu *l2cache_pmu, int cpu)
{
u64 mpidr;
int cpu_cluster_id;
struct cluster_pmu *cluster = NULL;
/*
* This assumes that the cluster_id is in MPIDR[aff1] for
* single-threaded cores, and MPIDR[aff2] for multi-threaded
* cores. This logic will have to be updated if this changes.
*/
mpidr = read_cpuid_mpidr();
if (mpidr & MPIDR_MT_BITMASK)
cpu_cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 2);
else
cpu_cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 1);
list_for_each_entry(cluster, &l2cache_pmu->clusters, next) {
if (cluster->cluster_id != cpu_cluster_id)
continue;
dev_info(&l2cache_pmu->pdev->dev,
"CPU%d associated with cluster %d\n", cpu,
cluster->cluster_id);
cpumask_set_cpu(cpu, &cluster->cluster_cpus);
*per_cpu_ptr(l2cache_pmu->pmu_cluster, cpu) = cluster;
break;
}
return cluster;
}
static void clusters_initialization(struct l2cache_pmu *l2cache_pmu,
unsigned int cpu)
{
struct cluster_pmu *temp_cluster = NULL;
list_for_each_entry(temp_cluster, &l2cache_pmu->clusters, next) {
cluster_pmu_reset(temp_cluster);
enable_irq(temp_cluster->irq);
temp_cluster->on_cpu = cpu;
}
}
static int l2cache_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
{
struct cluster_pmu *cluster;
struct l2cache_pmu *l2cache_pmu;
cpumask_t cluster_online_cpus;
if (!node)
goto out;
l2cache_pmu = hlist_entry_safe(node, struct l2cache_pmu, node);
cluster = get_cluster_pmu(l2cache_pmu, cpu);
if (!cluster) {
/* First time this CPU has come online */
cluster = l2_cache_associate_cpu_with_cluster(l2cache_pmu, cpu);
if (!cluster) {
/* Only if broken firmware doesn't list every cluster */
WARN_ONCE(1, "No L2 cache cluster for CPU%d\n", cpu);
goto out;
}
}
/*
* If another CPU is managing this cluster, whether that cpu is
* from the same cluster.
*/
if (cluster->on_cpu != -1) {
cpumask_and(&cluster_online_cpus, &cluster->cluster_cpus,
get_cpu_mask(cluster->on_cpu));
if (cpumask_test_cpu(cluster->on_cpu, &cluster_online_cpus))
goto out;
} else {
clusters_initialization(l2cache_pmu, cpu);
cpumask_set_cpu(cpu, &l2cache_pmu->cpumask);
goto out;
}
cluster->on_cpu = cpu;
cpumask_set_cpu(cpu, &l2cache_pmu->cpumask);
out:
return 0;
}
static void disable_clusters_interrupt(struct l2cache_pmu *l2cache_pmu)
{
struct cluster_pmu *temp_cluster = NULL;
list_for_each_entry(temp_cluster, &l2cache_pmu->clusters, next)
disable_irq(temp_cluster->irq);
}
static int l2cache_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
{
struct cluster_pmu *cluster;
struct l2cache_pmu *l2cache_pmu;
cpumask_t cluster_online_cpus;
unsigned int target;
struct cluster_pmu *temp_cluster = NULL;
if (!node)
goto out;
l2cache_pmu = hlist_entry_safe(node, struct l2cache_pmu, node);
cluster = get_cluster_pmu(l2cache_pmu, cpu);
if (!cluster)
goto out;
/* If this CPU is not managing the cluster, we're done */
if (cluster->on_cpu != cpu)
goto out;
/* Give up ownership of cluster */
cpumask_clear_cpu(cpu, &l2cache_pmu->cpumask);
cluster->on_cpu = -1;
/* Any other CPU for this cluster which is still online */
cpumask_and(&cluster_online_cpus, &cluster->cluster_cpus,
cpu_online_mask);
target = cpumask_any_but(&cluster_online_cpus, cpu);
if (target >= nr_cpu_ids) {
cpumask_and(&cluster_online_cpus, &l2cache_pmu->cpumask,
cpu_online_mask);
target = cpumask_first(&cluster_online_cpus);
if (target >= nr_cpu_ids) {
disable_clusters_interrupt(l2cache_pmu);
goto out;
}
}
cluster->on_cpu = target;
if (cpumask_first(&l2cache_pmu->cpumask) >= nr_cpu_ids) {
list_for_each_entry(temp_cluster,
&l2cache_pmu->clusters, next) {
if (temp_cluster->cluster_id != cluster->cluster_id)
temp_cluster->on_cpu = target;
}
}
perf_pmu_migrate_context(&l2cache_pmu->pmu, cpu, target);
cpumask_set_cpu(target, &l2cache_pmu->cpumask);
out:
return 0;
}
static void l2_cache_pmu_dev_release(struct device *dev)
{
struct cluster_pmu *cluster = to_cluster_device(dev);
kfree(cluster);
}
static int l2_cache_pmu_probe_cluster(struct device *parent,
struct device_node *cn, void *data)
{
struct l2cache_pmu *l2cache_pmu = data;
struct cluster_pmu *cluster;
u32 fw_cluster_id;
struct resource res;
int ret;
int irq;
cluster = kzalloc(sizeof(*cluster), GFP_KERNEL);
if (!cluster) {
ret = -ENOMEM;
return ret;
}
cluster->dev.parent = parent;
cluster->dev.of_node = cn;
cluster->dev.release = l2_cache_pmu_dev_release;
dev_set_name(&cluster->dev, "%s:%s", dev_name(parent), cn->name);
ret = device_register(&cluster->dev);
if (ret) {
pr_err(L2_COUNTERS_BUG
"failed to register l2 cache pmu device\n");
goto err_put_dev;
}
ret = of_property_read_u32(cn, "cluster-id", &fw_cluster_id);
if (ret) {
pr_err(L2_COUNTERS_BUG "Missing cluster-id.\n");
goto err_put_dev;
}
ret = of_address_to_resource(cn, 0, &res);
if (ret) {
pr_err(L2_COUNTERS_BUG "not able to find the resource\n");
goto err_put_dev;
}
cluster->reg_addr = devm_ioremap_resource(&cluster->dev, &res);
if (IS_ERR(cluster->reg_addr)) {
ret = PTR_ERR(cluster->reg_addr);
pr_err(L2_COUNTERS_BUG "not able to remap the resource\n");
goto err_put_dev;
}
INIT_LIST_HEAD(&cluster->next);
cluster->cluster_id = fw_cluster_id;
cluster->l2cache_pmu = l2cache_pmu;
irq = of_irq_get(cn, 0);
if (irq < 0) {
pr_err(L2_COUNTERS_BUG
"Failed to get valid irq for cluster %ld\n",
fw_cluster_id);
goto err_put_dev;
}
irq_set_status_flags(irq, IRQ_NOAUTOEN);
cluster->irq = irq;
cluster->on_cpu = -1;
ret = devm_request_irq(&cluster->dev, irq, l2_cache_handle_irq,
IRQF_NOBALANCING | IRQF_NO_THREAD,
"l2-cache-pmu", cluster);
if (ret) {
pr_err(L2_COUNTERS_BUG
"Unable to request IRQ%d for L2 PMU counters\n", irq);
goto err_put_dev;
}
pr_info(L2_COUNTERS_BUG
"Registered L2 cache PMU cluster %ld\n", fw_cluster_id);
spin_lock_init(&cluster->pmu_lock);
list_add(&cluster->next, &l2cache_pmu->clusters);
l2cache_pmu->num_pmus++;
return 0;
err_put_dev:
put_device(&cluster->dev);
return ret;
}
static int l2_cache_pmu_probe(struct platform_device *pdev)
{
int err;
struct l2cache_pmu *l2cache_pmu;
struct device_node *pn = pdev->dev.of_node;
struct device_node *cn;
l2cache_pmu =
devm_kzalloc(&pdev->dev, sizeof(*l2cache_pmu), GFP_KERNEL);
if (!l2cache_pmu)
return -ENOMEM;
INIT_LIST_HEAD(&l2cache_pmu->clusters);
platform_set_drvdata(pdev, l2cache_pmu);
l2cache_pmu->pmu = (struct pmu) {
.name = "l2cache_counters",
.task_ctx_nr = perf_invalid_context,
.event_init = l2_cache_event_init,
.add = l2_cache_event_add,
.del = l2_cache_event_del,
.start = l2_cache_event_start,
.stop = l2_cache_event_stop,
.read = l2_cache_event_read,
.attr_groups = l2_cache_pmu_attr_grps,
};
l2cache_pmu->num_counters = MAX_L2_CNTRS;
l2cache_pmu->pdev = pdev;
l2cache_pmu->pmu_cluster = devm_alloc_percpu(&pdev->dev,
struct cluster_pmu *);
if (!l2cache_pmu->pmu_cluster)
return -ENOMEM;
l2_counter_present_mask = GENMASK(l2cache_pmu->num_counters - 1, 0);
cpumask_clear(&l2cache_pmu->cpumask);
for_each_available_child_of_node(pn, cn) {
err = l2_cache_pmu_probe_cluster(&pdev->dev, cn, l2cache_pmu);
if (err < 0) {
of_node_put(cn);
dev_err(&pdev->dev,
"No hardware L2 cache PMUs found\n");
return err;
}
}
if (l2cache_pmu->num_pmus == 0) {
dev_err(&pdev->dev, "No hardware L2 cache PMUs found\n");
return -ENODEV;
}
err = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
&l2cache_pmu->node);
if (err) {
dev_err(&pdev->dev, "Error %d registering hotplug\n", err);
return err;
}
err = perf_pmu_register(&l2cache_pmu->pmu, l2cache_pmu->pmu.name, -1);
if (err) {
dev_err(&pdev->dev, "Error %d registering L2 cache PMU\n", err);
goto out_unregister;
}
dev_info(&pdev->dev, "Registered L2 cache PMU using %d HW PMUs\n",
l2cache_pmu->num_pmus);
return 0;
out_unregister:
cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
&l2cache_pmu->node);
return err;
}
static int l2cache_pmu_unregister_device(struct device *dev, void *data)
{
device_unregister(dev);
return 0;
}
static int l2_cache_pmu_remove(struct platform_device *pdev)
{
struct l2cache_pmu *l2cache_pmu = platform_get_drvdata(pdev);
int ret;
perf_pmu_unregister(&l2cache_pmu->pmu);
cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
&l2cache_pmu->node);
ret = device_for_each_child(&pdev->dev, NULL,
l2cache_pmu_unregister_device);
if (ret)
dev_warn(&pdev->dev,
"can't remove cluster pmu device: %d\n", ret);
return ret;
}
static const struct of_device_id l2_cache_pmu_of_match[] = {
{ .compatible = "qcom,l2cache-pmu" },
{},
};
static struct platform_driver l2_cache_pmu_driver = {
.driver = {
.name = "l2cache-pmu",
.of_match_table = l2_cache_pmu_of_match,
},
.probe = l2_cache_pmu_probe,
.remove = l2_cache_pmu_remove,
};
static int __init register_l2_cache_pmu_driver(void)
{
int err;
err = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
"AP_PERF_ARM_QCOM_L2_ONLINE",
l2cache_pmu_online_cpu,
l2cache_pmu_offline_cpu);
if (err)
return err;
return platform_driver_register(&l2_cache_pmu_driver);
}
device_initcall(register_l2_cache_pmu_driver);