drivers/perf/qcom_l2_counters.c - kernel/msm.git - Git at Google

 // SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2019 The Linux Foundation. All rights reserved.
  */

 #include <linux/bitops.h>
 #include <linux/bug.h>
 #include <linux/cpuhotplug.h>
 #include <linux/cpumask.h>
 #include <linux/device.h>
 #include <linux/errno.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/of_address.h>
 #include <linux/percpu.h>
 #include <linux/perf_event.h>
 #include <linux/platform_device.h>
 #include <linux/smp.h>
 #include <linux/spinlock.h>
 #include <linux/sysfs.h>
 #include <linux/types.h>

 #include <asm/local64.h>

 #define L2_COUNTERS_BUG		 "[l2 counters error]: "
 /*
  * Events id
  * 0xXXX here,
  *
  * 1 bit(lsb) for group (group is either txn/tenure counter).
  * 4 bits for serial number for counter starting from 0 to 8.
  * 5 bits for bit position of counter enable bit in a register.
  */
 #define L2_EVENT_CYCLE_CNTR			0x000
 #define L2_EVENT_DDR_WR_CNTR			0x022
 #define L2_EVENT_DDR_RD_CNTR			0x044
 #define L2_EVENT_SNP_RD_CNTR			0x066
 #define L2_EVENT_ACP_WR_CNTR			0x088

 #define L2_EVENT_TENURE_CNTR			0x26b
 #define L2_EVENT_LOW_RANGE_OCCUR_CNTR		0x04d
 #define L2_EVENT_MID_RANGE_OCCUR_CNTR		0x0cf
 #define L2_EVENT_HIGH_RANGE_OCCUR_CNTR		0x151

 #define EVENT_GROUP_MASK			0x1
 #define REGBIT_MASK				0x3e0
 #define ID_MASK					0x1e

 #define TRANSACTION_CNTRS_GROUP_ID		0x0
 #define TENURE_CNTRS_GROUP_ID			0x1
 #define ID_SHIFT				0x1
 #define REGBIT_SHIFT				0x5

 #define TXN_CONFIG_REG_OFFSET			0x54c
 #define OVERFLOW_REG_OFFSET			0x560
 #define CNTR_SET_VAL_REG_OFFSET			0x55c
 #define TXN_CYCLE_CNTR_DATA			0x634
 #define TXN_DDR_WR_CNTR_DATA			0x638
 #define TXN_DDR_RD_CNTR_DATA			0x63c
 #define TXN_SNP_RD_CNTR_DATA			0x640
 #define TXN_ACP_WR_CNTR_DATA			0x644

 #define TENURE_CONFIG_REG_OFFSET		0x52c
 #define LOW_RANGE_OCCURRENCE_CNTR_DATA		0x53c
 #define MID_RANGE_OCCURRENCE_CNTR_DATA		0x540
 #define HIGH_RANGE_OCCURRENCE_CNTR_DATA		0x544
 #define LPM_TENURE_CNTR_DATA			0x548
 #define LOW_RANGE_TENURE_VAL			0x534
 #define MID_RANGE_TENURE_VAL			0x538

 #define TENURE_ENABLE_ALL			0x880444
 #define TENURE_CNTR_ENABLE			19
 #define LOW_RANGE_OCCURRENCE_CNTR_ENABLE	2
 #define MID_RANGE_OCCURRENCE_CNTR_ENABLE	6
 #define HIGH_RANGE_OCCURRENCE_CNTR_ENABLE	10
 #define OCCURRENCE_CNTR_ENABLE_MASK		(BIT(2) | BIT(6) | BIT(10))

 #define LPM_MODE_TENURE_CNTR_RESET		12
 #define LOW_RANGE_OCCURRENCE_CNTR_RESET		 0
 #define MID_RANGE_OCCURRENCE_CNTR_RESET		 4
 #define HIGH_RANGE_OCCURRENCE_CNTR_RESET	 8

 /* Txn reset/set/overflow bit offsets */
 #define TXN_RESET_BIT				 5
 #define TXN_RESET_ALL_CNTR			0x000003e0
 #define TXN_RESET_ALL_CNTR_OVSR_BIT		0x007c0000
 #define TENURE_RESET_ALL_CNTR			0x00001111
 #define TENURE_RESET_OVERFLOW_ALL_CNTR		0x00002888

 #define TXN_SET_BIT				13
 #define TXN_OVERFLOW_RESET_BIT			18

 #define LOW_RANGE_OCCURRENCE_CNTR_OVERFLOW_RESET	 3
 #define MID_RANGE_OCCURRENCE_CNTR_OVERFLOW_RESET	 7
 #define HIGH_RANGE_OCCURRENCE_CNTR_OVERFLOW_RESET	11
 #define LPM_MODE_TENURE_CNTR_OVERFLOW_RESET		13

 enum counter_index {
 	CLUSTER_CYCLE_COUNTER,
 	DDR_WR_CNTR,
 	DDR_RD_CNTR,
 	SNP_RD_CNTR,
 	ACP_WR_CNTR,
 	LPM_TENURE_CNTR,
 	LOW_OCCURRENCE_CNTR,
 	MID_OCCURRENCE_CNTR,
 	HIGH_OCCURRENCE_CNTR,
 	MAX_L2_CNTRS
 };

 /*
  * Each cluster has its own PMU(counters) and associated with one or more CPUs.
  * This structure represents one of the hardware PMUs.
  */
 struct cluster_pmu {
 	struct device dev;
 	struct list_head next;
 	struct perf_event *events[MAX_L2_CNTRS];
 	void __iomem *reg_addr;
 	struct l2cache_pmu *l2cache_pmu;
 	DECLARE_BITMAP(used_counters, MAX_L2_CNTRS);
 	int irq;
 	int cluster_id;
 	/* The CPU that is used for collecting events on this cluster */
 	int on_cpu;
 	/* All the CPUs associated with this cluster */
 	cpumask_t cluster_cpus;
 	spinlock_t pmu_lock;
 };

 /*
  * Aggregate PMU. Implements the core pmu functions and manages
  * the hardware PMUs.
  */
 struct l2cache_pmu {
 	struct hlist_node node;
 	u32 num_pmus;
 	struct pmu pmu;
 	int num_counters;
 	cpumask_t cpumask;
 	struct platform_device *pdev;
 	struct cluster_pmu * __percpu *pmu_cluster;
 	struct list_head clusters;
 };

 static unsigned int which_cluster_tenure = 1;
 static u32 l2_counter_present_mask;

 #define to_l2cache_pmu(p)	(container_of(p, struct l2cache_pmu, pmu))
 #define to_cluster_device(d)	container_of(d, struct cluster_pmu, dev)

 static inline struct cluster_pmu *get_cluster_pmu(
 	struct l2cache_pmu *l2cache_pmu, int cpu)
 {
 	return *per_cpu_ptr(l2cache_pmu->pmu_cluster, cpu);
 }

 static inline u32 cluster_tenure_counter_read(struct cluster_pmu *cluster,
 		u32 idx)
 {
 	u32 val = 0;

 	switch (idx) {
 	case LOW_RANGE_OCCURRENCE_CNTR_ENABLE:
 		val = readl_relaxed(cluster->reg_addr +
 					LOW_RANGE_OCCURRENCE_CNTR_DATA);
 		break;

 	case MID_RANGE_OCCURRENCE_CNTR_ENABLE:
 		val = readl_relaxed(cluster->reg_addr +
 					MID_RANGE_OCCURRENCE_CNTR_DATA);
 		break;

 	case HIGH_RANGE_OCCURRENCE_CNTR_ENABLE:
 		val = readl_relaxed(cluster->reg_addr +
 					HIGH_RANGE_OCCURRENCE_CNTR_DATA);
 		break;

 	default:
 		pr_crit(L2_COUNTERS_BUG
 			"Invalid index, during %s\n", __func__);
 	}

 	return val;
 }

 static inline u32 cluster_pmu_counter_get_value(struct cluster_pmu *cluster,
 		u32 idx, u32 event_grp)
 {
 	if (event_grp == TENURE_CNTRS_GROUP_ID)
 		return cluster_tenure_counter_read(cluster, idx);

 	return readl_relaxed(cluster->reg_addr +
 					TXN_CYCLE_CNTR_DATA + (4 * idx));
 }

 static inline u32 cluster_txn_config_read(struct cluster_pmu *cluster)
 {
 	return readl_relaxed(cluster->reg_addr + TXN_CONFIG_REG_OFFSET);
 }

 static inline void cluster_txn_config_write(struct cluster_pmu *cluster,
 		u32 val)
 {
 	writel_relaxed(val, cluster->reg_addr + TXN_CONFIG_REG_OFFSET);
 }

 static inline u32 cluster_tenure_config_read(struct cluster_pmu *cluster)
 {
 	return readl_relaxed(cluster->reg_addr + TENURE_CONFIG_REG_OFFSET);
 }

 static inline void cluster_tenure_config_write(struct cluster_pmu *cluster,
 		u32 val)
 {
 	writel_relaxed(val, cluster->reg_addr + TENURE_CONFIG_REG_OFFSET);
 }

 static void cluster_txn_cntr_reset(struct cluster_pmu *cluster, u32 idx)
 {
 	cluster_txn_config_write(cluster, cluster_txn_config_read(cluster)
 				| BIT(idx));
 	cluster_txn_config_write(cluster, cluster_txn_config_read(cluster)
 				& ~BIT(idx));
 }

 static void cluster_pmu_reset(struct cluster_pmu *cluster)
 {
 	cluster_txn_config_write(cluster, cluster_txn_config_read(cluster)
 				| TXN_RESET_ALL_CNTR);
 	cluster_txn_config_write(cluster, cluster_txn_config_read(cluster)
 				& ~TXN_RESET_ALL_CNTR);
 	cluster_txn_config_write(cluster, cluster_txn_config_read(cluster)
 				| TXN_RESET_ALL_CNTR_OVSR_BIT);
 	cluster_txn_config_write(cluster, cluster_txn_config_read(cluster)
 				& ~TXN_RESET_ALL_CNTR_OVSR_BIT);
 	cluster_tenure_config_write(cluster, cluster_tenure_config_read(cluster)
 				| TENURE_RESET_ALL_CNTR);
 	cluster_tenure_config_write(cluster, cluster_tenure_config_read(cluster)
 				& ~TENURE_RESET_ALL_CNTR);
 	cluster_tenure_config_write(cluster, cluster_tenure_config_read(cluster)
 				| TENURE_RESET_OVERFLOW_ALL_CNTR);
 	cluster_tenure_config_write(cluster, cluster_tenure_config_read(cluster)
 				& ~TENURE_RESET_OVERFLOW_ALL_CNTR);
 }

 static void cluster_tenure_counter_reset(struct cluster_pmu *cluster, u32 idx)
 {
 	cluster_tenure_config_write(cluster, cluster_tenure_config_read(cluster)
 				| BIT(idx));
 	cluster_tenure_config_write(cluster, cluster_tenure_config_read(cluster)
 				& ~BIT(idx));
 }

 static inline void cluster_tenure_counter_enable(struct cluster_pmu *cluster,
 						u32 idx)
 {
 	u32 val;

 	val = cluster_tenure_config_read(cluster);
 	/* Already enabled */
 	if (val & BIT(idx))
 		return;

 	switch (idx) {
 	case LOW_RANGE_OCCURRENCE_CNTR_ENABLE:
 		cluster_tenure_counter_reset(cluster,
 					LOW_RANGE_OCCURRENCE_CNTR_RESET);
 		break;

 	case MID_RANGE_OCCURRENCE_CNTR_ENABLE:
 		cluster_tenure_counter_reset(cluster,
 					MID_RANGE_OCCURRENCE_CNTR_RESET);
 		break;

 	case HIGH_RANGE_OCCURRENCE_CNTR_ENABLE:
 		cluster_tenure_counter_reset(cluster,
 					HIGH_RANGE_OCCURRENCE_CNTR_RESET);
 		break;

 	default:
 		pr_crit(L2_COUNTERS_BUG
 			"Invalid index, during %s\n", __func__);
 		return;
 	}

 	if (!(val & BIT(TENURE_CNTR_ENABLE))) {
 		cluster_tenure_counter_reset(cluster,
 					LPM_MODE_TENURE_CNTR_RESET);
 		/*
 		 * Enable tenure counter as a part of enablement of any
 		 * occurrences counter, as occurrence counters would not
 		 * increment unless tenure counter is enabled.
 		 */
 		cluster_tenure_config_write(cluster,
 			cluster_tenure_config_read(cluster)
 			| BIT(TENURE_CNTR_ENABLE));
 	}

 	cluster_tenure_config_write(cluster,
 			cluster_tenure_config_read(cluster) | BIT(idx));
 }

 static inline void cluster_txn_counter_enable(struct cluster_pmu *cluster,
 						u32 idx)
 {
 	u32 val;

 	val = cluster_txn_config_read(cluster);
 	if (val & BIT(idx))
 		return;

 	cluster_txn_cntr_reset(cluster, TXN_RESET_BIT + idx);
 	cluster_txn_config_write(cluster, cluster_txn_config_read(cluster)
 				| BIT(idx));
 }

 static inline void cluster_tenure_counter_disable(struct cluster_pmu *cluster,
 						u32 idx)
 {
 	u32 val;

 	cluster_tenure_config_write(cluster, cluster_tenure_config_read(cluster)
 					& ~BIT(idx));
 	val = cluster_tenure_config_read(cluster);
 	if (!(val & OCCURRENCE_CNTR_ENABLE_MASK))
 		cluster_tenure_config_write(cluster, val &
 					~BIT(TENURE_CNTR_ENABLE));
 }

 static inline void cluster_txn_counter_disable(struct cluster_pmu *cluster,
 						u32 idx)
 {
 	cluster_txn_config_write(cluster,
 		cluster_txn_config_read(cluster) & ~BIT(idx));
 }

 static inline u32 cluster_reg_read(struct cluster_pmu *cluster, u32 offset)
 {
 	return readl_relaxed(cluster->reg_addr + offset);
 }

 static inline void cluster_tenure_cntr_reset_ovsr(struct cluster_pmu *cluster,
 		u32 event_idx)
 {
 	switch (event_idx) {
 	case LPM_TENURE_CNTR:
 		cluster_tenure_counter_reset(cluster,
 				LPM_MODE_TENURE_CNTR_OVERFLOW_RESET);
 		break;

 	case LOW_RANGE_OCCURRENCE_CNTR_ENABLE:
 		cluster_tenure_counter_reset(cluster,
 				LOW_RANGE_OCCURRENCE_CNTR_OVERFLOW_RESET);
 		break;

 	case MID_RANGE_OCCURRENCE_CNTR_ENABLE:
 		cluster_tenure_counter_reset(cluster,
 				MID_RANGE_OCCURRENCE_CNTR_OVERFLOW_RESET);
 		break;

 	case HIGH_RANGE_OCCURRENCE_CNTR_ENABLE:
 		cluster_tenure_counter_reset(cluster,
 				HIGH_RANGE_OCCURRENCE_CNTR_OVERFLOW_RESET);
 		break;

 	default:
 		pr_crit(L2_COUNTERS_BUG
 			"Invalid index, during %s\n", __func__);
 	}
 }

 static inline void cluster_pmu_reset_ovsr(struct cluster_pmu *cluster,
 		u32 config_base)
 {
 	u32 event_idx;
 	u32 event_grp;

 	event_idx = (config_base & REGBIT_MASK) >> REGBIT_SHIFT;
 	event_grp = config_base & EVENT_GROUP_MASK;

 	if (event_grp == TENURE_CNTRS_GROUP_ID)
 		cluster_tenure_cntr_reset_ovsr(cluster, event_idx);
 	else
 		cluster_txn_cntr_reset(cluster,
 			TXN_OVERFLOW_RESET_BIT + event_idx);
 }

 static inline bool cluster_pmu_has_overflowed(u32 ovsr)
 {
 	return !!(ovsr & l2_counter_present_mask);
 }

 static inline bool cluster_pmu_counter_has_overflowed(u32 ovsr, u32 idx)
 {
 	return !!(ovsr & BIT(idx));
 }

 static void l2_cache_event_update(struct perf_event *event, u32 ovsr)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	u64 delta, prev, now;
 	u32 event_idx = hwc->config_base;
 	u32 event_grp;
 	struct cluster_pmu *cluster = event->pmu_private;

 	prev = local64_read(&hwc->prev_count);
 	if (ovsr) {
 		now = 0xffffffff;
 		goto out;
 	}

 	event_idx = (hwc->config_base & REGBIT_MASK) >> REGBIT_SHIFT;
 	event_grp = hwc->config_base & EVENT_GROUP_MASK;
 	do {
 		prev = local64_read(&hwc->prev_count);
 		now = cluster_pmu_counter_get_value(cluster, event_idx,
 				event_grp);
 	} while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev);

 	/* All are 32-bit counters */
 out:
 	delta = now - prev;
 	delta &= 0xffffffff;

 	local64_add(delta, &event->count);
 	if (ovsr)
 		local64_set(&hwc->prev_count, 0);
 }

 static int l2_cache_get_event_idx(struct cluster_pmu *cluster,
 				   struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	int idx;
 	int num_ctrs = cluster->l2cache_pmu->num_counters;

 	idx = (hwc->config_base & ID_MASK) >> ID_SHIFT;
 	if (idx >= num_ctrs)
 		return -EINVAL;

 	if (test_bit(idx, cluster->used_counters))
 		return -EAGAIN;

 	set_bit(idx, cluster->used_counters);
 	return idx;
 }

 static void l2_cache_clear_event_idx(struct cluster_pmu *cluster,
 				      struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	int idx = hwc->idx;

 	clear_bit(idx, cluster->used_counters);
 }

 static irqreturn_t l2_cache_handle_irq(int irq_num, void *data)
 {
 	struct cluster_pmu *cluster = data;
 	int num_counters = cluster->l2cache_pmu->num_counters;
 	u32 ovsr;
 	int idx;
 	u32 config_base;

 	ovsr = cluster_reg_read(cluster, OVERFLOW_REG_OFFSET);
 	if (!cluster_pmu_has_overflowed(ovsr))
 		return IRQ_NONE;

 	/*
 	 * LPM tenure counter overflow would be a special case, although
 	 * it would never happen, but for a ideal case we would reset
 	 * it's overflow bit. I hope hardware takes care the overflow
 	 * of tenure counter and its classifying category but even if
 	 * it does not, we would get a extra count gets added
 	 * erroneously to one of low/mid/high occurrence counter, but
 	 * that is very rare and we can ignore it too.
 	 */
 	if (ovsr & BIT(LPM_TENURE_CNTR))
 		cluster_tenure_cntr_reset_ovsr(cluster, LPM_TENURE_CNTR);

 	spin_lock(&cluster->pmu_lock);
 	for_each_set_bit(idx, cluster->used_counters, num_counters) {
 		struct perf_event *event = cluster->events[idx];
 		struct hw_perf_event *hwc;

 		if (WARN_ON_ONCE(!event))
 			continue;

 		if (!cluster_pmu_counter_has_overflowed(ovsr, idx))
 			continue;

 		l2_cache_event_update(event, 1);
 		hwc = &event->hw;
 		config_base = hwc->config_base;
 		cluster_pmu_reset_ovsr(cluster, config_base);
 	}
 	spin_unlock(&cluster->pmu_lock);
 	return IRQ_HANDLED;
 }

 static int l2_cache_event_init(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	struct cluster_pmu *cluster;
 	struct l2cache_pmu *l2cache_pmu;

 	if (event->attr.type != event->pmu->type)
 		return -ENOENT;

 	l2cache_pmu = to_l2cache_pmu(event->pmu);

 	if (hwc->sample_period) {
 		dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
 				    "Sampling not supported\n");
 		return -EOPNOTSUPP;
 	}

 	if (event->cpu < 0) {
 		dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
 				    "Per-task mode not supported\n");
 		return -EOPNOTSUPP;
 	}

 	/* We can not filter accurately so we just don't allow it. */
 	if (event->attr.exclude_user || event->attr.exclude_kernel ||
 	    event->attr.exclude_hv || event->attr.exclude_idle) {
 		dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
 				    "Can't exclude execution levels\n");
 		return -EOPNOTSUPP;
 	}

 	cluster = get_cluster_pmu(l2cache_pmu, event->cpu);
 	if (!cluster) {
 		/* CPU has not been initialised */
 		dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
 			"CPU%d not associated with L2 cluster\n", event->cpu);
 		return -EINVAL;
 	}

 	/* Ensure all events in a group are on the same cpu */
 	if ((event->group_leader != event) &&
 	    (cluster->on_cpu != event->group_leader->cpu)) {
 		dev_dbg_ratelimited(&l2cache_pmu->pdev->dev,
 			 "Can't create group on CPUs %d and %d",
 			 event->cpu, event->group_leader->cpu);
 		return -EINVAL;
 	}

 	hwc->idx = -1;
 	hwc->config_base = event->attr.config;
 	event->readable_on_cpus = CPU_MASK_ALL;
 	event->pmu_private = cluster;

 	/*
 	 * We are overiding event->cpu, as it is possible to enable events,
 	 * even if the event->cpu is offline.
 	 */
 	event->cpu = cluster->on_cpu;
 	return 0;
 }

 static void l2_cache_event_start(struct perf_event *event, int flags)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	struct cluster_pmu *cluster = event->pmu_private;
 	int event_idx;

 	hwc->state = 0;
 	event_idx = (hwc->config_base & REGBIT_MASK) >> REGBIT_SHIFT;
 	if ((hwc->config_base & EVENT_GROUP_MASK) == TENURE_CNTRS_GROUP_ID) {
 		cluster_tenure_counter_enable(cluster, event_idx);
 		return;
 	}

 	cluster_txn_counter_enable(cluster, event_idx);
 }

 static void l2_cache_event_stop(struct perf_event *event, int flags)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	struct cluster_pmu *cluster = event->pmu_private;
 	int event_idx;
 	u32 ovsr;

 	if (hwc->state & PERF_HES_STOPPED)
 		return;

 	event_idx = (hwc->config_base & REGBIT_MASK) >> REGBIT_SHIFT;
 	if ((hwc->config_base & EVENT_GROUP_MASK) == TENURE_CNTRS_GROUP_ID)
 		cluster_tenure_counter_disable(cluster, event_idx);
 	else
 		cluster_txn_counter_disable(cluster, event_idx);

 	ovsr = cluster_reg_read(cluster, OVERFLOW_REG_OFFSET);
 	if (cluster_pmu_counter_has_overflowed(ovsr, event_idx)) {
 		l2_cache_event_update(event, 1);
 		cluster_pmu_reset_ovsr(cluster, hwc->config_base);
 	}

 	if (flags & PERF_EF_UPDATE)
 		l2_cache_event_update(event, 0);

 	hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
 }

 static int l2_cache_event_add(struct perf_event *event, int flags)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	struct cluster_pmu *cluster = event->pmu_private;
 	int idx;

 	idx = l2_cache_get_event_idx(cluster, event);
 	if (idx < 0)
 		return idx;

 	hwc->idx = idx;
 	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
 	cluster->events[idx] = event;
 	local64_set(&hwc->prev_count, 0);

 	if (flags & PERF_EF_START)
 		l2_cache_event_start(event, flags);

 	/* Propagate changes to the userspace mapping. */
 	perf_event_update_userpage(event);

 	return 0;
 }

 static void l2_cache_event_del(struct perf_event *event, int flags)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	int idx = hwc->idx;
 	unsigned long intr_flag;
 	struct cluster_pmu *cluster = event->pmu_private;

 	/*
 	 * We could race here with overflow interrupt of this event.
 	 * So, let's be safe here.
 	 */
 	spin_lock_irqsave(&cluster->pmu_lock, intr_flag);
 	l2_cache_event_stop(event, flags | PERF_EF_UPDATE);
 	l2_cache_clear_event_idx(cluster, event);
 	cluster->events[idx] = NULL;
 	hwc->idx = -1;
 	spin_unlock_irqrestore(&cluster->pmu_lock, intr_flag);

 	perf_event_update_userpage(event);
 }

 static void l2_cache_event_read(struct perf_event *event)
 {
 	l2_cache_event_update(event, 0);
 }

 static ssize_t low_tenure_threshold_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct l2cache_pmu *l2cache_pmu = to_l2cache_pmu(dev_get_drvdata(dev));
 	struct cluster_pmu *cluster = NULL;
 	u32 val;
 	int ret;

 	ret = kstrtouint(buf, 0, &val);
 	if (ret < 0)
 		return ret;

 	if (val == 0 || val > INT_MAX)
 		return -EINVAL;

 	list_for_each_entry(cluster, &l2cache_pmu->clusters, next) {
 		if (cluster->cluster_id == which_cluster_tenure)
 			writel_relaxed(val,
 				cluster->reg_addr + LOW_RANGE_TENURE_VAL);
 	}

 	return count;
 }

 static ssize_t low_tenure_threshold_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
 	struct l2cache_pmu *l2cache_pmu = to_l2cache_pmu(dev_get_drvdata(dev));
 	struct cluster_pmu *cluster = NULL;
 	u32 val = 0;

 	list_for_each_entry(cluster, &l2cache_pmu->clusters, next) {
 		if (cluster->cluster_id == which_cluster_tenure)
 			val = cluster_reg_read(cluster, LOW_RANGE_TENURE_VAL);
 	}

 	return snprintf(buf, PAGE_SIZE, "0x%x\n", val);
 }

 static ssize_t mid_tenure_threshold_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct l2cache_pmu *l2cache_pmu = to_l2cache_pmu(dev_get_drvdata(dev));
 	struct cluster_pmu *cluster = NULL;
 	u32 val;
 	int ret;

 	ret = kstrtouint(buf, 0, &val);
 	if (ret < 0)
 		return ret;

 	if (val == 0 || val > INT_MAX)
 		return -EINVAL;

 	list_for_each_entry(cluster, &l2cache_pmu->clusters, next) {
 		if (cluster->cluster_id == which_cluster_tenure)
 			writel_relaxed(val,
 				cluster->reg_addr + MID_RANGE_TENURE_VAL);
 	}

 	return count;
 }

 static ssize_t mid_tenure_threshold_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
 	struct l2cache_pmu *l2cache_pmu = to_l2cache_pmu(dev_get_drvdata(dev));
 	struct cluster_pmu *cluster = NULL;
 	u32 val = 0;

 	list_for_each_entry(cluster, &l2cache_pmu->clusters, next) {
 		if (cluster->cluster_id == which_cluster_tenure)
 			val = cluster_reg_read(cluster, MID_RANGE_TENURE_VAL);
 	}

 	return snprintf(buf, PAGE_SIZE, "0x%x\n", val);
 }

 static ssize_t which_cluster_tenure_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
 	return snprintf(buf, PAGE_SIZE, "0x%x\n", which_cluster_tenure);
 }

 static ssize_t which_cluster_tenure_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
 {
 	int ret;

 	ret = kstrtouint(buf, 0, &which_cluster_tenure);
 	if (ret < 0)
 		return ret;

 	if (which_cluster_tenure > 1)
 		return -EINVAL;

 	return count;
 }

 static struct device_attribute mid_tenure_threshold_attr =
 	__ATTR(mid_tenure_threshold, 0644,
 			mid_tenure_threshold_show,
 			mid_tenure_threshold_store);

 static struct attribute *mid_tenure_threshold_attrs[] = {
 	&mid_tenure_threshold_attr.attr,
 	NULL,
 };

 static struct attribute_group mid_tenure_threshold_group = {
 	.attrs = mid_tenure_threshold_attrs,
 };

 static struct device_attribute low_tenure_threshold_attr =
 	__ATTR(low_tenure_threshold, 0644,
 			low_tenure_threshold_show,
 			low_tenure_threshold_store);

 static struct attribute *low_tenure_threshold_attrs[] = {
 	&low_tenure_threshold_attr.attr,
 	NULL,
 };

 static struct attribute_group low_tenure_threshold_group = {
 	.attrs = low_tenure_threshold_attrs,
 };

 static struct device_attribute which_cluster_tenure_attr =
 	__ATTR(which_cluster_tenure, 0644,
 			which_cluster_tenure_show,
 			which_cluster_tenure_store);

 static struct attribute *which_cluster_tenure_attrs[] = {
 	&which_cluster_tenure_attr.attr,
 	NULL,
 };

 static struct attribute_group which_cluster_tenure_group = {
 	.attrs = which_cluster_tenure_attrs,
 };

 static ssize_t l2_cache_pmu_cpumask_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
 	struct l2cache_pmu *l2cache_pmu = to_l2cache_pmu(dev_get_drvdata(dev));

 	return cpumap_print_to_pagebuf(true, buf, &l2cache_pmu->cpumask);
 }

 static struct device_attribute l2_cache_pmu_cpumask_attr =
 	__ATTR(cpumask, 0444, l2_cache_pmu_cpumask_show, NULL);


 static struct attribute *l2_cache_pmu_cpumask_attrs[] = {
 	&l2_cache_pmu_cpumask_attr.attr,
 	NULL,
 };

 static struct attribute_group l2_cache_pmu_cpumask_group = {
 	.attrs = l2_cache_pmu_cpumask_attrs,
 };

 PMU_FORMAT_ATTR(event,     "config:0-9");
 static struct attribute *l2_cache_pmu_formats[] = {
 	&format_attr_event.attr,
 	NULL,
 };

 static struct attribute_group l2_cache_pmu_format_group = {
 	.name = "format",
 	.attrs = l2_cache_pmu_formats,
 };

 static ssize_t l2cache_pmu_event_show(struct device *dev,
 				      struct device_attribute *attr, char *page)
 {
 	struct perf_pmu_events_attr *pmu_attr;

 	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
 	return snprintf(page, PAGE_SIZE, "event=0x%02llx\n", pmu_attr->id);
 }

 #define L2CACHE_EVENT_ATTR(_name, _id)					     \
 	(&((struct perf_pmu_events_attr[]) {				     \
 		{ .attr = __ATTR(_name, 0444, l2cache_pmu_event_show, NULL), \
 		  .id = _id, }						     \
 	})[0].attr.attr)

 static struct attribute *l2_cache_pmu_events[] = {
 	L2CACHE_EVENT_ATTR(cycles, L2_EVENT_CYCLE_CNTR),
 	L2CACHE_EVENT_ATTR(ddr_write, L2_EVENT_DDR_WR_CNTR),
 	L2CACHE_EVENT_ATTR(ddr_read, L2_EVENT_DDR_RD_CNTR),
 	L2CACHE_EVENT_ATTR(snoop_read, L2_EVENT_SNP_RD_CNTR),
 	L2CACHE_EVENT_ATTR(acp_write, L2_EVENT_ACP_WR_CNTR),
 	L2CACHE_EVENT_ATTR(low_range_occur, L2_EVENT_LOW_RANGE_OCCUR_CNTR),
 	L2CACHE_EVENT_ATTR(mid_range_occur, L2_EVENT_MID_RANGE_OCCUR_CNTR),
 	L2CACHE_EVENT_ATTR(high_range_occur, L2_EVENT_HIGH_RANGE_OCCUR_CNTR),
 	NULL
 };

 static struct attribute_group l2_cache_pmu_events_group = {
 	.name = "events",
 	.attrs = l2_cache_pmu_events,
 };

 static const struct attribute_group *l2_cache_pmu_attr_grps[] = {
 	&l2_cache_pmu_format_group,
 	&l2_cache_pmu_cpumask_group,
 	&l2_cache_pmu_events_group,
 	&mid_tenure_threshold_group,
 	&low_tenure_threshold_group,
 	&which_cluster_tenure_group,
 	NULL,
 };

 static struct cluster_pmu *l2_cache_associate_cpu_with_cluster(
 	struct l2cache_pmu *l2cache_pmu, int cpu)
 {
 	u64 mpidr;
 	int cpu_cluster_id;
 	struct cluster_pmu *cluster = NULL;

 	/*
 	 * This assumes that the cluster_id is in MPIDR[aff1] for
 	 * single-threaded cores, and MPIDR[aff2] for multi-threaded
 	 * cores. This logic will have to be updated if this changes.
 	 */
 	mpidr = read_cpuid_mpidr();
 	if (mpidr & MPIDR_MT_BITMASK)
 		cpu_cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 2);
 	else
 		cpu_cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 1);

 	list_for_each_entry(cluster, &l2cache_pmu->clusters, next) {
 		if (cluster->cluster_id != cpu_cluster_id)
 			continue;

 		dev_info(&l2cache_pmu->pdev->dev,
 			 "CPU%d associated with cluster %d\n", cpu,
 			 cluster->cluster_id);
 		cpumask_set_cpu(cpu, &cluster->cluster_cpus);
 		*per_cpu_ptr(l2cache_pmu->pmu_cluster, cpu) = cluster;
 		break;
 	}

 	return cluster;
 }

 static void clusters_initialization(struct l2cache_pmu *l2cache_pmu,
 						unsigned int cpu)
 {
 	struct cluster_pmu *temp_cluster = NULL;

 	list_for_each_entry(temp_cluster, &l2cache_pmu->clusters, next) {
 		cluster_pmu_reset(temp_cluster);
 		enable_irq(temp_cluster->irq);
 		temp_cluster->on_cpu = cpu;
 	}
 }

 static int l2cache_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
 {
 	struct cluster_pmu *cluster;
 	struct l2cache_pmu *l2cache_pmu;
 	cpumask_t cluster_online_cpus;

 	if (!node)
 		goto out;

 	l2cache_pmu = hlist_entry_safe(node, struct l2cache_pmu, node);
 	cluster = get_cluster_pmu(l2cache_pmu, cpu);
 	if (!cluster) {
 		/* First time this CPU has come online */
 		cluster = l2_cache_associate_cpu_with_cluster(l2cache_pmu, cpu);
 		if (!cluster) {
 			/* Only if broken firmware doesn't list every cluster */
 			WARN_ONCE(1, "No L2 cache cluster for CPU%d\n", cpu);
 			goto out;
 		}
 	}

 	/*
 	 * If another CPU is managing this cluster, whether that cpu is
 	 * from the same cluster.
 	 */
 	if (cluster->on_cpu != -1) {
 		cpumask_and(&cluster_online_cpus, &cluster->cluster_cpus,
 						get_cpu_mask(cluster->on_cpu));
 		if (cpumask_test_cpu(cluster->on_cpu, &cluster_online_cpus))
 			goto out;
 	} else {
 		clusters_initialization(l2cache_pmu, cpu);
 		cpumask_set_cpu(cpu, &l2cache_pmu->cpumask);
 		goto out;
 	}

 	cluster->on_cpu = cpu;
 	cpumask_set_cpu(cpu, &l2cache_pmu->cpumask);

 out:
 	return 0;
 }

 static void disable_clusters_interrupt(struct l2cache_pmu *l2cache_pmu)
 {
 	struct cluster_pmu *temp_cluster = NULL;

 	list_for_each_entry(temp_cluster, &l2cache_pmu->clusters, next)
 		disable_irq(temp_cluster->irq);
 }

 static int l2cache_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
 {
 	struct cluster_pmu *cluster;
 	struct l2cache_pmu *l2cache_pmu;
 	cpumask_t cluster_online_cpus;
 	unsigned int target;
 	struct cluster_pmu *temp_cluster = NULL;

 	if (!node)
 		goto out;

 	l2cache_pmu = hlist_entry_safe(node, struct l2cache_pmu, node);
 	cluster = get_cluster_pmu(l2cache_pmu, cpu);
 	if (!cluster)
 		goto out;

 	/* If this CPU is not managing the cluster, we're done */
 	if (cluster->on_cpu != cpu)
 		goto out;

 	/* Give up ownership of cluster */
 	cpumask_clear_cpu(cpu, &l2cache_pmu->cpumask);
 	cluster->on_cpu = -1;

 	/* Any other CPU for this cluster which is still online */
 	cpumask_and(&cluster_online_cpus, &cluster->cluster_cpus,
 			cpu_online_mask);
 	target = cpumask_any_but(&cluster_online_cpus, cpu);
 	if (target >= nr_cpu_ids) {
 		cpumask_and(&cluster_online_cpus, &l2cache_pmu->cpumask,
 			cpu_online_mask);
 		target = cpumask_first(&cluster_online_cpus);
 		if (target >= nr_cpu_ids) {
 			disable_clusters_interrupt(l2cache_pmu);
 			goto out;
 		}
 	}

 	cluster->on_cpu = target;
 	if (cpumask_first(&l2cache_pmu->cpumask) >= nr_cpu_ids) {
 		list_for_each_entry(temp_cluster,
 					&l2cache_pmu->clusters, next) {
 			if (temp_cluster->cluster_id != cluster->cluster_id)
 				temp_cluster->on_cpu = target;
 		}
 	}

 	perf_pmu_migrate_context(&l2cache_pmu->pmu, cpu, target);
 	cpumask_set_cpu(target, &l2cache_pmu->cpumask);

 out:
 	return 0;
 }

 static void l2_cache_pmu_dev_release(struct device *dev)
 {
 	struct cluster_pmu *cluster = to_cluster_device(dev);

 	kfree(cluster);
 }

 static int l2_cache_pmu_probe_cluster(struct device *parent,
 					struct device_node *cn, void *data)
 {
 	struct l2cache_pmu *l2cache_pmu = data;
 	struct cluster_pmu *cluster;
 	u32 fw_cluster_id;
 	struct resource res;
 	int ret;
 	int irq;

 	cluster = kzalloc(sizeof(*cluster), GFP_KERNEL);
 	if (!cluster) {
 		ret = -ENOMEM;
 		return ret;
 	}

 	cluster->dev.parent = parent;
 	cluster->dev.of_node = cn;
 	cluster->dev.release = l2_cache_pmu_dev_release;
 	dev_set_name(&cluster->dev, "%s:%s", dev_name(parent), cn->name);

 	ret = device_register(&cluster->dev);
 	if (ret) {
 		pr_err(L2_COUNTERS_BUG
 			"failed to register l2 cache pmu device\n");
 		goto err_put_dev;
 	}

 	ret = of_property_read_u32(cn, "cluster-id", &fw_cluster_id);
 	if (ret) {
 		pr_err(L2_COUNTERS_BUG "Missing cluster-id.\n");
 		goto err_put_dev;
 	}

 	ret = of_address_to_resource(cn, 0, &res);
 	if (ret) {
 		pr_err(L2_COUNTERS_BUG "not able to find the resource\n");
 		goto err_put_dev;
 	}

 	cluster->reg_addr = devm_ioremap_resource(&cluster->dev, &res);
 	if (IS_ERR(cluster->reg_addr)) {
 		ret = PTR_ERR(cluster->reg_addr);
 		pr_err(L2_COUNTERS_BUG "not able to remap the resource\n");
 		goto err_put_dev;
 	}

 	INIT_LIST_HEAD(&cluster->next);
 	cluster->cluster_id = fw_cluster_id;
 	cluster->l2cache_pmu = l2cache_pmu;

 	irq = of_irq_get(cn, 0);
 	if (irq < 0) {
 		pr_err(L2_COUNTERS_BUG
 			"Failed to get valid irq for cluster %ld\n",
 			fw_cluster_id);
 		goto err_put_dev;
 	}

 	irq_set_status_flags(irq, IRQ_NOAUTOEN);
 	cluster->irq = irq;
 	cluster->on_cpu = -1;

 	ret = devm_request_irq(&cluster->dev, irq, l2_cache_handle_irq,
 			       IRQF_NOBALANCING | IRQF_NO_THREAD,
 			       "l2-cache-pmu", cluster);
 	if (ret) {
 		pr_err(L2_COUNTERS_BUG
 			"Unable to request IRQ%d for L2 PMU counters\n", irq);
 		goto err_put_dev;
 	}

 	pr_info(L2_COUNTERS_BUG
 		"Registered L2 cache PMU cluster %ld\n", fw_cluster_id);

 	spin_lock_init(&cluster->pmu_lock);
 	list_add(&cluster->next, &l2cache_pmu->clusters);
 	l2cache_pmu->num_pmus++;

 	return 0;

 err_put_dev:
 	put_device(&cluster->dev);
 	return ret;
 }

 static int l2_cache_pmu_probe(struct platform_device *pdev)
 {
 	int err;
 	struct l2cache_pmu *l2cache_pmu;
 	struct device_node *pn = pdev->dev.of_node;
 	struct device_node *cn;

 	l2cache_pmu =
 		devm_kzalloc(&pdev->dev, sizeof(*l2cache_pmu), GFP_KERNEL);
 	if (!l2cache_pmu)
 		return -ENOMEM;

 	INIT_LIST_HEAD(&l2cache_pmu->clusters);
 	platform_set_drvdata(pdev, l2cache_pmu);
 	l2cache_pmu->pmu = (struct pmu) {
 		.name		= "l2cache_counters",
 		.task_ctx_nr    = perf_invalid_context,
 		.event_init	= l2_cache_event_init,
 		.add		= l2_cache_event_add,
 		.del		= l2_cache_event_del,
 		.start		= l2_cache_event_start,
 		.stop		= l2_cache_event_stop,
 		.read		= l2_cache_event_read,
 		.attr_groups	= l2_cache_pmu_attr_grps,
 	};

 	l2cache_pmu->num_counters = MAX_L2_CNTRS;
 	l2cache_pmu->pdev = pdev;
 	l2cache_pmu->pmu_cluster = devm_alloc_percpu(&pdev->dev,
 						     struct cluster_pmu *);
 	if (!l2cache_pmu->pmu_cluster)
 		return -ENOMEM;

 	l2_counter_present_mask = GENMASK(l2cache_pmu->num_counters - 1, 0);
 	cpumask_clear(&l2cache_pmu->cpumask);

 	for_each_available_child_of_node(pn, cn) {
 		err = l2_cache_pmu_probe_cluster(&pdev->dev, cn, l2cache_pmu);
 		if (err < 0) {
 			of_node_put(cn);
 			dev_err(&pdev->dev,
 				"No hardware L2 cache PMUs found\n");
 			return err;
 		}
 	}

 	if (l2cache_pmu->num_pmus == 0) {
 		dev_err(&pdev->dev, "No hardware L2 cache PMUs found\n");
 		return -ENODEV;
 	}

 	err = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
 				       &l2cache_pmu->node);
 	if (err) {
 		dev_err(&pdev->dev, "Error %d registering hotplug\n", err);
 		return err;
 	}

 	err = perf_pmu_register(&l2cache_pmu->pmu, l2cache_pmu->pmu.name, -1);
 	if (err) {
 		dev_err(&pdev->dev, "Error %d registering L2 cache PMU\n", err);
 		goto out_unregister;
 	}

 	dev_info(&pdev->dev, "Registered L2 cache PMU using %d HW PMUs\n",
 		 l2cache_pmu->num_pmus);

 	return 0;

 out_unregister:
 	cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
 				    &l2cache_pmu->node);
 	return err;
 }

 static int l2cache_pmu_unregister_device(struct device *dev, void *data)
 {
 	device_unregister(dev);
 	return 0;
 }

 static int l2_cache_pmu_remove(struct platform_device *pdev)
 {
 	struct l2cache_pmu *l2cache_pmu = platform_get_drvdata(pdev);
 	int ret;

 	perf_pmu_unregister(&l2cache_pmu->pmu);
 	cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
 			&l2cache_pmu->node);

 	ret = device_for_each_child(&pdev->dev, NULL,
 			l2cache_pmu_unregister_device);
 	if (ret)
 		dev_warn(&pdev->dev,
 			"can't remove cluster pmu device: %d\n", ret);
 	return ret;
 }

 static const struct of_device_id l2_cache_pmu_of_match[] = {
 	{ .compatible = "qcom,l2cache-pmu" },
 	{},
 };

 static struct platform_driver l2_cache_pmu_driver = {
 	.driver = {
 		.name = "l2cache-pmu",
 		.of_match_table = l2_cache_pmu_of_match,
 	},
 	.probe = l2_cache_pmu_probe,
 	.remove = l2_cache_pmu_remove,
 };

 static int __init register_l2_cache_pmu_driver(void)
 {
 	int err;

 	err = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
 				      "AP_PERF_ARM_QCOM_L2_ONLINE",
 				      l2cache_pmu_online_cpu,
 				      l2cache_pmu_offline_cpu);
 	if (err)
 		return err;

 	return platform_driver_register(&l2_cache_pmu_driver);
 }
 device_initcall(register_l2_cache_pmu_driver);