blob: 9499c1fb6276d25ab46e887b490a0d1d210bf3ef [file] [log] [blame]
/*
* drivers/misc/tegra-profiler/armv7_pmu.c
*
* Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <asm/cputype.h>
#include <asm/pmu.h>
#include <linux/tegra_profiler.h>
#include "armv7_pmu.h"
#include "quadd.h"
#include "debug.h"
static struct armv7_pmu_ctx pmu_ctx;
DEFINE_PER_CPU(u32[QUADD_MAX_PMU_COUNTERS], pmu_prev_val);
static unsigned quadd_armv7_a9_events_map[QUADD_EVENT_TYPE_MAX] = {
[QUADD_EVENT_TYPE_INSTRUCTIONS] =
QUADD_ARMV7_A9_HW_EVENT_INST_OUT_OF_RENAME_STAGE,
[QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS] =
QUADD_ARMV7_HW_EVENT_PC_WRITE,
[QUADD_EVENT_TYPE_BRANCH_MISSES] =
QUADD_ARMV7_HW_EVENT_PC_BRANCH_MIS_PRED,
[QUADD_EVENT_TYPE_BUS_CYCLES] =
QUADD_ARMV7_HW_EVENT_CLOCK_CYCLES,
[QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES] =
QUADD_ARMV7_HW_EVENT_DCACHE_REFILL,
[QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES] =
QUADD_ARMV7_HW_EVENT_DCACHE_REFILL,
[QUADD_EVENT_TYPE_L1_ICACHE_MISSES] =
QUADD_ARMV7_HW_EVENT_IFETCH_MISS,
[QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES] =
QUADD_ARMV7_UNSUPPORTED_EVENT,
[QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES] =
QUADD_ARMV7_UNSUPPORTED_EVENT,
[QUADD_EVENT_TYPE_L2_ICACHE_MISSES] =
QUADD_ARMV7_UNSUPPORTED_EVENT,
};
static unsigned quadd_armv7_a15_events_map[QUADD_EVENT_TYPE_MAX] = {
[QUADD_EVENT_TYPE_INSTRUCTIONS] =
QUADD_ARMV7_HW_EVENT_INSTR_EXECUTED,
[QUADD_EVENT_TYPE_BRANCH_INSTRUCTIONS] =
QUADD_ARMV7_A15_HW_EVENT_SPEC_PC_WRITE,
[QUADD_EVENT_TYPE_BRANCH_MISSES] =
QUADD_ARMV7_HW_EVENT_PC_BRANCH_MIS_PRED,
[QUADD_EVENT_TYPE_BUS_CYCLES] = QUADD_ARMV7_HW_EVENT_BUS_CYCLES,
[QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES] =
QUADD_ARMV7_A15_HW_EVENT_L1_DCACHE_READ_REFILL,
[QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES] =
QUADD_ARMV7_A15_HW_EVENT_L1_DCACHE_WRITE_REFILL,
[QUADD_EVENT_TYPE_L1_ICACHE_MISSES] =
QUADD_ARMV7_HW_EVENT_IFETCH_MISS,
[QUADD_EVENT_TYPE_L2_DCACHE_READ_MISSES] =
QUADD_ARMV7_A15_HW_EVENT_L2_DCACHE_READ_REFILL,
[QUADD_EVENT_TYPE_L2_DCACHE_WRITE_MISSES] =
QUADD_ARMV7_A15_HW_EVENT_L2_DCACHE_WRITE_REFILL,
[QUADD_EVENT_TYPE_L2_ICACHE_MISSES] =
QUADD_ARMV7_UNSUPPORTED_EVENT,
};
static u32 armv7_pmu_pmnc_read(void)
{
u32 val;
asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val));
return val;
}
static void armv7_pmu_pmnc_write(u32 val)
{
val &= QUADD_ARMV7_PMNC_MASK;
asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val));
}
static void armv7_pmu_pmnc_enable_counter(int index)
{
u32 val;
if (index == QUADD_ARMV7_CYCLE_COUNTER)
val = QUADD_ARMV7_CCNT;
else
val = 1 << index;
asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val));
}
static void armv7_pmu_select_counter(unsigned int idx)
{
u32 val;
val = idx & QUADD_ARMV7_SELECT_MASK;
asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val));
}
static u32 armv7_pmu_adjust_value(u32 value, int event_id)
{
/*
* Cortex A8/A9: l1 cache performance counters
* don't differentiate between read and write data accesses/misses,
* so currently we are devided by two
*/
if (pmu_ctx.l1_cache_rw &&
(pmu_ctx.arch == QUADD_ARM_CPU_TYPE_CORTEX_A8 ||
pmu_ctx.arch == QUADD_ARM_CPU_TYPE_CORTEX_A9) &&
(event_id == QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES ||
event_id == QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES)) {
return value / 2;
}
return value;
}
static u32 armv7_pmu_read_counter(int idx)
{
u32 val = 0;
if (idx == QUADD_ARMV7_CYCLE_COUNTER) {
/* Cycle count register (PMCCNTR) reading */
asm volatile ("MRC p15, 0, %0, c9, c13, 0" : "=r"(val));
} else {
/* counter selection*/
armv7_pmu_select_counter(idx);
/* event count register reading */
asm volatile ("MRC p15, 0, %0, c9, c13, 2" : "=r"(val));
}
return val;
}
static __attribute__((unused)) void armv7_pmu_write_counter(int idx, u32 value)
{
if (idx == QUADD_ARMV7_CYCLE_COUNTER) {
/* Cycle count register (PMCCNTR) writing */
asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
} else {
/* counter selection*/
armv7_pmu_select_counter(idx);
/* event count register writing */
asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" (value));
}
}
static void armv7_pmu_event_select(u32 event)
{
event &= QUADD_ARMV7_EVTSEL_MASK;
asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (event));
}
static __attribute__((unused)) void armv7_pmnc_enable_interrupt(int idx)
{
u32 val;
if (idx == QUADD_ARMV7_CYCLE_COUNTER)
val = QUADD_ARMV7_CCNT;
else
val = 1 << idx;
asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val));
}
static __attribute__((unused)) void armv7_pmnc_disable_interrupt(int idx)
{
u32 val;
if (idx == QUADD_ARMV7_CYCLE_COUNTER)
val = QUADD_ARMV7_CCNT;
else
val = 1 << idx;
asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val));
}
static void armv7_pmnc_disable_all_interrupts(void)
{
u32 val = QUADD_ARMV7_CCNT | pmu_ctx.counters_mask;
asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val));
}
static void armv7_pmnc_reset_overflow_flags(void)
{
u32 val = QUADD_ARMV7_CCNT | pmu_ctx.counters_mask;
asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val));
}
static inline void select_event(unsigned int idx, unsigned int event)
{
/* counter selection */
armv7_pmu_select_counter(idx);
armv7_pmu_event_select(event);
}
static inline void disable_all_counters(void)
{
u32 val;
/* Disable all counters */
val = armv7_pmu_pmnc_read();
if (val & QUADD_ARMV7_PMNC_E)
armv7_pmu_pmnc_write(val & ~QUADD_ARMV7_PMNC_E);
}
static inline void enable_all_counters(void)
{
u32 val;
/* Enable all counters */
val = armv7_pmu_pmnc_read();
val |= QUADD_ARMV7_PMNC_E | QUADD_ARMV7_PMNC_X;
armv7_pmu_pmnc_write(val);
}
static inline void quadd_init_pmu(void)
{
armv7_pmnc_reset_overflow_flags();
armv7_pmnc_disable_all_interrupts();
}
static inline void reset_all_counters(void)
{
u32 val;
val = armv7_pmu_pmnc_read();
val |= QUADD_ARMV7_PMNC_P | QUADD_ARMV7_PMNC_C;
armv7_pmu_pmnc_write(val);
}
static int pmu_enable(void)
{
return 0;
}
static void pmu_disable(void)
{
}
static void pmu_start(void)
{
int i, idx;
u32 event;
u32 *prevp = __get_cpu_var(pmu_prev_val);
disable_all_counters();
quadd_init_pmu();
for (i = 0; i < pmu_ctx.nr_used_counters; i++) {
struct quadd_pmu_event_info *pmu_event = &pmu_ctx.pmu_events[i];
prevp[i] = 0;
event = pmu_event->hw_value;
idx = pmu_event->counter_idx;
if (idx != QUADD_ARMV7_CYCLE_COUNTER)
select_event(idx, event);
armv7_pmu_pmnc_enable_counter(idx);
}
reset_all_counters();
enable_all_counters();
qm_debug_start_source(QUADD_EVENT_SOURCE_PMU);
}
static void pmu_stop(void)
{
reset_all_counters();
disable_all_counters();
qm_debug_stop_source(QUADD_EVENT_SOURCE_PMU);
}
static int __maybe_unused pmu_read(struct event_data *events)
{
int idx, i;
u32 val;
u32 *prevp = __get_cpu_var(pmu_prev_val);
if (pmu_ctx.nr_used_counters == 0) {
pr_warn_once("error: counters were not initialized\n");
return 0;
}
for (i = 0; i < pmu_ctx.nr_used_counters; i++) {
struct quadd_pmu_event_info *pmu_event = &pmu_ctx.pmu_events[i];
idx = pmu_event->counter_idx;
val = armv7_pmu_read_counter(idx);
val = armv7_pmu_adjust_value(val, pmu_event->quadd_event_id);
events[i].event_source = QUADD_EVENT_SOURCE_PMU;
events[i].event_id = pmu_event->quadd_event_id;
events[i].val = val;
events[i].prev_val = prevp[i];
prevp[i] = val;
qm_debug_read_counter(events[i].event_id, events[i].prev_val,
events[i].val);
}
return pmu_ctx.nr_used_counters;
}
static int __maybe_unused pmu_read_emulate(struct event_data *events)
{
int i;
static u32 val = 100;
u32 *prevp = __get_cpu_var(pmu_prev_val);
for (i = 0; i < pmu_ctx.nr_used_counters; i++) {
if (val > 200)
val = 100;
events[i].event_id = prevp[i];
events[i].val = val;
val += 5;
}
return pmu_ctx.nr_used_counters;
}
static int set_events(int *events, int size)
{
int i, nr_l1_r = 0, nr_l1_w = 0, curr_idx = 0;
pmu_ctx.l1_cache_rw = 0;
pmu_ctx.nr_used_counters = 0;
if (!events || size == 0)
return 0;
if (size > QUADD_MAX_PMU_COUNTERS) {
pr_err("Too many events (> %d)\n", QUADD_MAX_PMU_COUNTERS);
return -ENOSPC;
}
if (!pmu_ctx.current_map) {
pr_err("Invalid current_map\n");
return -ENODEV;
}
for (i = 0; i < size; i++) {
struct quadd_pmu_event_info *pmu_event = &pmu_ctx.pmu_events[i];
if (events[i] > QUADD_EVENT_TYPE_MAX) {
pr_err("Error event: %d\n", events[i]);
return -EINVAL;
}
if (curr_idx >= pmu_ctx.nr_counters) {
pr_err("Too many events (> %d)\n",
pmu_ctx.nr_counters);
return -ENOSPC;
}
if (events[i] == QUADD_EVENT_TYPE_CPU_CYCLES) {
pmu_event->hw_value = QUADD_ARMV7_CPU_CYCLE_EVENT;
pmu_event->counter_idx = QUADD_ARMV7_CYCLE_COUNTER;
} else {
pmu_event->hw_value = pmu_ctx.current_map[events[i]];
pmu_event->counter_idx = curr_idx++;
}
pmu_event->quadd_event_id = events[i];
if (events[i] == QUADD_EVENT_TYPE_L1_DCACHE_READ_MISSES)
nr_l1_r++;
else if (events[i] == QUADD_EVENT_TYPE_L1_DCACHE_WRITE_MISSES)
nr_l1_w++;
pr_info("Event has been added: id/pmu value: %s/%#x\n",
quadd_get_event_str(events[i]),
pmu_event->hw_value);
}
pmu_ctx.nr_used_counters = size;
if (nr_l1_r > 0 && nr_l1_w > 0)
pmu_ctx.l1_cache_rw = 1;
return 0;
}
static int get_supported_events(int *events)
{
int i, nr_events = 0;
for (i = 0; i < QUADD_EVENT_TYPE_MAX; i++) {
if (pmu_ctx.current_map[i] != QUADD_ARMV7_UNSUPPORTED_EVENT)
events[nr_events++] = i;
}
return nr_events;
}
static struct quadd_event_source_interface pmu_armv7_int = {
.enable = pmu_enable,
.disable = pmu_disable,
.start = pmu_start,
.stop = pmu_stop,
#ifndef QUADD_USE_EMULATE_COUNTERS
.read = pmu_read,
#else
.read = pmu_read_emulate,
#endif
.set_events = set_events,
.get_supported_events = get_supported_events,
};
struct quadd_event_source_interface *quadd_armv7_pmu_init(void)
{
struct quadd_event_source_interface *pmu = NULL;
unsigned long cpu_id, cpu_implementer, part_number;
cpu_id = read_cpuid_id();
cpu_implementer = cpu_id >> 24;
part_number = cpu_id & 0xFFF0;
if (cpu_implementer == QUADD_ARM_CPU_IMPLEMENTER) {
switch (part_number) {
case QUADD_ARM_CPU_PART_NUMBER_CORTEX_A9:
pmu_ctx.arch = QUADD_ARM_CPU_TYPE_CORTEX_A9;
strcpy(pmu_ctx.arch_name, "Cortex A9");
pmu_ctx.nr_counters = 6;
pmu_ctx.counters_mask =
QUADD_ARMV7_COUNTERS_MASK_CORTEX_A9;
pmu_ctx.current_map = quadd_armv7_a9_events_map;
pmu = &pmu_armv7_int;
break;
case QUADD_ARM_CPU_PART_NUMBER_CORTEX_A15:
pmu_ctx.arch = QUADD_ARM_CPU_TYPE_CORTEX_A15;
strcpy(pmu_ctx.arch_name, "Cortex A15");
pmu_ctx.nr_counters = 6;
pmu_ctx.counters_mask =
QUADD_ARMV7_COUNTERS_MASK_CORTEX_A15;
pmu_ctx.current_map = quadd_armv7_a15_events_map;
pmu = &pmu_armv7_int;
break;
default:
pmu_ctx.arch = QUADD_ARM_CPU_TYPE_UNKNOWN;
strcpy(pmu_ctx.arch_name, "Unknown");
pmu_ctx.nr_counters = 0;
pmu_ctx.current_map = NULL;
break;
}
}
pr_info("arch: %s, number of counters: %d\n",
pmu_ctx.arch_name, pmu_ctx.nr_counters);
return pmu;
}