| /* Copyright (c) 2014-2016, The Linux Foundation. All rights reserved. |
| * |
| * This program is free software; you can redistribute it and/or modify |
| * it under the terms of the GNU General Public License version 2 and |
| * only version 2 as published by the Free Software Foundation. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| */ |
| |
| #include <linux/kernel.h> |
| #include <linux/edac.h> |
| #include <linux/interrupt.h> |
| #include <linux/of_device.h> |
| #include <linux/platform_device.h> |
| #include <linux/perf_event.h> |
| #include <linux/smp.h> |
| #include <linux/cpu.h> |
| #include <linux/cpu_pm.h> |
| #include <linux/of_irq.h> |
| #include <linux/spinlock.h> |
| #include <linux/slab.h> |
| #include <linux/workqueue.h> |
| #include <linux/percpu.h> |
| #include <linux/msm_rtb.h> |
| |
| #include <asm/cputype.h> |
| #include <asm/esr.h> |
| |
| #include "edac_core.h" |
| |
| #define A53_CPUMERRSR_FATAL(a) ((a) & (1LL << 63)) |
| #define A53_CPUMERRSR_OTHER(a) (((a) >> 40) & 0xff) |
| #define A53_CPUMERRSR_REPT(a) (((a) >> 32) & 0xff) |
| #define A53_CPUMERRSR_VALID(a) ((a) & (1 << 31)) |
| #define A53_CPUMERRSR_RAMID(a) (((a) >> 24) & 0x7f) |
| #define A53_CPUMERRSR_CPUID(a) (((a) >> 18) & 0x07) |
| #define A53_CPUMERRSR_ADDR(a) ((a) & 0xfff) |
| |
| #define A53_L2MERRSR_FATAL(a) ((a) & (1LL << 63)) |
| #define A53_L2MERRSR_OTHER(a) (((a) >> 40) & 0xff) |
| #define A53_L2MERRSR_REPT(a) (((a) >> 32) & 0xff) |
| #define A53_L2MERRSR_VALID(a) ((a) & (1 << 31)) |
| #define A53_L2MERRSR_RAMID(a) (((a) >> 24) & 0x7f) |
| #define A53_L2MERRSR_CPUID(a) (((a) >> 18) & 0x0f) |
| #define A53_L2MERRSR_INDEX(a) (((a) >> 3) & 0x3fff) |
| |
| #define A57_CPUMERRSR_FATAL(a) ((a) & (1LL << 63)) |
| #define A57_CPUMERRSR_OTHER(a) (((a) >> 40) & 0xff) |
| #define A57_CPUMERRSR_REPT(a) (((a) >> 32) & 0xff) |
| #define A57_CPUMERRSR_VALID(a) ((a) & (1 << 31)) |
| #define A57_CPUMERRSR_RAMID(a) (((a) >> 24) & 0x7f) |
| #define A57_CPUMERRSR_BANK(a) (((a) >> 18) & 0x1f) |
| #define A57_CPUMERRSR_INDEX(a) ((a) & 0x1ffff) |
| |
| #define A57_L2MERRSR_FATAL(a) ((a) & (1LL << 63)) |
| #define A57_L2MERRSR_OTHER(a) (((a) >> 40) & 0xff) |
| #define A57_L2MERRSR_REPT(a) (((a) >> 32) & 0xff) |
| #define A57_L2MERRSR_VALID(a) ((a) & (1 << 31)) |
| #define A57_L2MERRSR_RAMID(a) (((a) >> 24) & 0x7f) |
| #define A57_L2MERRSR_CPUID(a) (((a) >> 18) & 0x0f) |
| #define A57_L2MERRSR_INDEX(a) ((a) & 0x1ffff) |
| |
| #define KRYO2XX_GOLD_L2MERRSR_FATAL(a) ((a) & (1LL << 63)) |
| #define KRYO2XX_GOLD_L2MERRSR_OTHER(a) (((a) >> 40) & 0x3f) |
| #define KRYO2XX_GOLD_L2MERRSR_REPT(a) (((a) >> 32) & 0x3f) |
| #define KRYO2XX_GOLD_L2MERRSR_VALID(a) ((a) & (1 << 31)) |
| #define KRYO2XX_GOLD_L2MERRSR_RAMID(a) ((a) & (1 << 24)) |
| #define KRYO2XX_GOLD_L2MERRSR_WAY(a) (((a) >> 18) & 0x0f) |
| #define KRYO2XX_GOLD_L2MERRSR_INDEX(a) (((a) >> 3) & 0x3fff) |
| |
| #define L2ECTLR_INT_ERR (1 << 30) |
| #define L2ECTLR_EXT_ERR (1 << 29) |
| |
| #define ESR_SERROR(a) ((a) >> ESR_ELx_EC_SHIFT == ESR_ELx_EC_SERROR) |
| #define ESR_VALID(a) ((a) & BIT(24)) |
| #define ESR_L2_DBE(a) (ESR_SERROR(a) && ESR_VALID(a) && \ |
| (((a) & 0x00C00003) == 0x1)) |
| |
| #define CCI_IMPRECISEERROR_REG 0x10 |
| |
| #define L1_CACHE 0 |
| #define L2_CACHE 1 |
| #define CCI 2 |
| |
| #define A53_L1_CE 0 |
| #define A53_L1_UE 1 |
| #define A53_L2_CE 2 |
| #define A53_L2_UE 3 |
| #define A57_L1_CE 4 |
| #define A57_L1_UE 5 |
| #define A57_L2_CE 6 |
| #define A57_L2_UE 7 |
| #define L2_EXT_UE 8 |
| #define CCI_UE 9 |
| #define KRYO2XX_SILVER_L1_CE 10 |
| #define KRYO2XX_SILVER_L1_UE 11 |
| #define KRYO2XX_SILVER_L2_CE 12 |
| #define KRYO2XX_SILVER_L2_UE 13 |
| #define KRYO2XX_GOLD_L2_CE 14 |
| #define KRYO2XX_GOLD_L2_UE 15 |
| |
| #ifdef CONFIG_EDAC_CORTEX_ARM64_PANIC_ON_UE |
| #define ARM64_ERP_PANIC_ON_UE 1 |
| #else |
| #define ARM64_ERP_PANIC_ON_UE 0 |
| #endif |
| |
| #ifdef CONFIG_EDAC_CORTEX_ARM64_PANIC_ON_CE |
| static int panic_on_ce = 1; |
| #else |
| static int panic_on_ce; |
| #endif |
| module_param(panic_on_ce, int, 0); |
| |
| #define EDAC_CPU "arm64" |
| |
| enum error_type { |
| SBE, |
| DBE, |
| }; |
| |
| const char *err_name[] = { |
| "Single-bit", |
| "Double-bit", |
| }; |
| |
| struct erp_drvdata { |
| struct edac_device_ctl_info *edev_ctl; |
| void __iomem *cci_base; |
| struct notifier_block nb_pm; |
| struct notifier_block nb_cpu; |
| struct notifier_block nb_panic; |
| struct work_struct work; |
| struct perf_event *memerr_counters[NR_CPUS]; |
| }; |
| |
| static struct erp_drvdata *panic_handler_drvdata; |
| |
| struct erp_local_data { |
| struct erp_drvdata *drv; |
| enum error_type err; |
| }; |
| |
| #define MEM_ERROR_EVENT 0x1A |
| |
| struct errors_edac { |
| const char * const msg; |
| void (*func)(struct edac_device_ctl_info *edac_dev, |
| int inst_nr, int block_nr, const char *msg); |
| }; |
| |
| static const struct errors_edac errors[] = { |
| {"A53 L1 Correctable Error", edac_device_handle_ce }, |
| {"A53 L1 Uncorrectable Error", edac_device_handle_ue }, |
| {"A53 L2 Correctable Error", edac_device_handle_ce }, |
| {"A53 L2 Uncorrectable Error", edac_device_handle_ue }, |
| {"A57 L1 Correctable Error", edac_device_handle_ce }, |
| {"A57 L1 Uncorrectable Error", edac_device_handle_ue }, |
| {"A57 L2 Correctable Error", edac_device_handle_ce }, |
| {"A57 L2 Uncorrectable Error", edac_device_handle_ue }, |
| {"L2 External Error", edac_device_handle_ue }, |
| {"CCI Error", edac_device_handle_ue }, |
| {"Kryo2xx Silver L1 Correctable Error", edac_device_handle_ce }, |
| {"Kryo2xx Silver L1 Uncorrectable Error", edac_device_handle_ue }, |
| {"Kryo2xx Silver L2 Correctable Error", edac_device_handle_ce }, |
| {"Kryo2xx Silver L2 Uncorrectable Error", edac_device_handle_ue }, |
| {"Kryo2xx Gold L2 Correctable Error", edac_device_handle_ce }, |
| {"Kryo2xx Gold L2 Uncorrectable Error", edac_device_handle_ue }, |
| }; |
| |
| #define read_l2merrsr_el1 ({ \ |
| u64 __val; \ |
| asm("mrs %0, s3_1_c15_c2_3" : "=r" (__val)); \ |
| __val; \ |
| }) |
| |
| #define read_l2ectlr_el1 ({ \ |
| u32 __val; \ |
| asm("mrs %0, s3_1_c11_c0_3" : "=r" (__val)); \ |
| __val; \ |
| }) |
| |
| #define read_cpumerrsr_el1 ({ \ |
| u64 __val; \ |
| asm("mrs %0, s3_1_c15_c2_2" : "=r" (__val)); \ |
| __val; \ |
| }) |
| |
| #define read_esr_el1 ({ \ |
| u64 __val; \ |
| asm("mrs %0, esr_el1" : "=r" (__val)); \ |
| __val; \ |
| }) |
| |
| #define write_l2merrsr_el1(val) ({ \ |
| asm("msr s3_1_c15_c2_3, %0" : : "r" (val)); \ |
| }) |
| |
| #define write_l2ectlr_el1(val) ({ \ |
| asm("msr s3_1_c11_c0_3, %0" : : "r" (val)); \ |
| }) |
| |
| #define write_cpumerrsr_el1(val) ({ \ |
| asm("msr s3_1_c15_c2_2, %0" : : "r" (val)); \ |
| }) |
| |
| static void kryo2xx_print_error_state_regs(void) |
| { |
| u64 l2merrsr; |
| u64 cpumerrsr; |
| u32 esr_el1; |
| u32 l2ectlr; |
| |
| cpumerrsr = read_cpumerrsr_el1; |
| l2merrsr = read_l2merrsr_el1; |
| esr_el1 = read_esr_el1; |
| l2ectlr = read_l2ectlr_el1; |
| |
| /* store data in uncached rtb logs */ |
| uncached_logk_pc(LOGK_READL, __builtin_return_address(0), |
| (void *)cpumerrsr); |
| uncached_logk_pc(LOGK_READL, __builtin_return_address(0), |
| (void *)l2merrsr); |
| uncached_logk_pc(LOGK_READL, __builtin_return_address(0), |
| (void *)((u64)esr_el1)); |
| uncached_logk_pc(LOGK_READL, __builtin_return_address(0), |
| (void *)((u64)l2ectlr)); |
| |
| edac_printk(KERN_CRIT, EDAC_CPU, "CPUMERRSR value = %#llx\n", |
| cpumerrsr); |
| edac_printk(KERN_CRIT, EDAC_CPU, "L2MERRSR value = %#llx\n", l2merrsr); |
| |
| edac_printk(KERN_CRIT, EDAC_CPU, "ESR value = %#x\n", esr_el1); |
| edac_printk(KERN_CRIT, EDAC_CPU, "L2ECTLR value = %#x\n", l2ectlr); |
| if (ESR_L2_DBE(esr_el1)) |
| edac_printk(KERN_CRIT, EDAC_CPU, |
| "Double bit error on dirty L2 cacheline\n"); |
| } |
| |
| static void kryo2xx_gold_print_error_state_regs(void) |
| { |
| u64 l2merrsr; |
| u32 esr_el1; |
| u32 l2ectlr; |
| |
| l2merrsr = read_l2merrsr_el1; |
| esr_el1 = read_esr_el1; |
| l2ectlr = read_l2ectlr_el1; |
| |
| uncached_logk_pc(LOGK_READL, __builtin_return_address(0), |
| (void *)l2merrsr); |
| uncached_logk_pc(LOGK_READL, __builtin_return_address(0), |
| (void *)((u64)esr_el1)); |
| uncached_logk_pc(LOGK_READL, __builtin_return_address(0), |
| (void *)((u64)l2ectlr)); |
| |
| edac_printk(KERN_CRIT, EDAC_CPU, "L2MERRSR value = %#llx\n", l2merrsr); |
| |
| edac_printk(KERN_CRIT, EDAC_CPU, "ESR value = %#x\n", esr_el1); |
| edac_printk(KERN_CRIT, EDAC_CPU, "L2ECTLR value = %#x\n", l2ectlr); |
| if (ESR_L2_DBE(esr_el1)) |
| edac_printk(KERN_CRIT, EDAC_CPU, |
| "Double bit error on dirty L2 cacheline\n"); |
| } |
| |
| static void kryo2xx_silver_parse_cpumerrsr(struct erp_local_data *ed) |
| { |
| u64 cpumerrsr; |
| int cpuid; |
| |
| cpumerrsr = read_cpumerrsr_el1; |
| |
| if (!A53_CPUMERRSR_VALID(cpumerrsr)) |
| return; |
| |
| if (A53_CPUMERRSR_FATAL(cpumerrsr)) |
| ed->err = DBE; |
| |
| edac_printk(KERN_CRIT, EDAC_CPU, |
| "Kryo2xx Silver CPU%d L1 %s Error detected\n", |
| smp_processor_id(), err_name[ed->err]); |
| |
| kryo2xx_print_error_state_regs(); |
| if (ed->err == DBE) |
| edac_printk(KERN_CRIT, EDAC_CPU, "Fatal error\n"); |
| |
| cpuid = A53_CPUMERRSR_CPUID(cpumerrsr); |
| |
| switch (A53_CPUMERRSR_RAMID(cpumerrsr)) { |
| case 0x0: |
| edac_printk(KERN_CRIT, EDAC_CPU, |
| "L1 Instruction tag RAM way is %d\n", cpuid); |
| break; |
| case 0x1: |
| edac_printk(KERN_CRIT, EDAC_CPU, |
| "L1 Instruction data RAM bank is %d\n", cpuid); |
| break; |
| case 0x8: |
| edac_printk(KERN_CRIT, EDAC_CPU, |
| "L1 Data tag RAM cpu %d way is %d\n", |
| cpuid / 4, cpuid % 4); |
| break; |
| case 0x9: |
| edac_printk(KERN_CRIT, EDAC_CPU, |
| "L1 Data data RAM cpu %d way is %d\n", |
| cpuid / 4, cpuid % 4); |
| break; |
| case 0xA: |
| edac_printk(KERN_CRIT, EDAC_CPU, |
| "L1 Data dirty RAM cpu %d way is %d\n", |
| cpuid / 4, cpuid % 4); |
| break; |
| case 0x18: |
| edac_printk(KERN_CRIT, EDAC_CPU, "TLB RAM way is %d\n", cpuid); |
| break; |
| default: |
| edac_printk(KERN_CRIT, EDAC_CPU, |
| "Error in unknown RAM ID: %d\n", |
| (int) A53_CPUMERRSR_RAMID(cpumerrsr)); |
| break; |
| } |
| |
| edac_printk(KERN_CRIT, EDAC_CPU, "Repeated error count: %d\n", |
| (int) A53_CPUMERRSR_REPT(cpumerrsr)); |
| edac_printk(KERN_CRIT, EDAC_CPU, "Other error count: %d\n", |
| (int) A53_CPUMERRSR_OTHER(cpumerrsr)); |
| |
| if (ed->err == SBE) |
| errors[KRYO2XX_SILVER_L1_CE].func(ed->drv->edev_ctl, smp_processor_id(), |
| L1_CACHE, errors[KRYO2XX_SILVER_L1_CE].msg); |
| else if (ed->err == DBE) |
| errors[KRYO2XX_SILVER_L1_UE].func(ed->drv->edev_ctl, smp_processor_id(), |
| L1_CACHE, errors[KRYO2XX_SILVER_L1_UE].msg); |
| write_cpumerrsr_el1(0); |
| } |
| |
| static void kryo2xx_silver_parse_l2merrsr(struct erp_local_data *ed) |
| { |
| u64 l2merrsr; |
| u32 l2ectlr; |
| int cpuid; |
| |
| l2merrsr = read_l2merrsr_el1; |
| l2ectlr = read_l2ectlr_el1; |
| |
| if (!A53_L2MERRSR_VALID(l2merrsr)) |
| return; |
| |
| if (A53_L2MERRSR_FATAL(l2merrsr)) |
| ed->err = DBE; |
| |
| edac_printk(KERN_CRIT, EDAC_CPU, "Kyro2xx Silver L2 %s Error detected\n", |
| err_name[ed->err]); |
| kryo2xx_print_error_state_regs(); |
| if (ed->err == DBE) |
| edac_printk(KERN_CRIT, EDAC_CPU, "Fatal error\n"); |
| |
| cpuid = A53_L2MERRSR_CPUID(l2merrsr); |
| |
| switch (A53_L2MERRSR_RAMID(l2merrsr)) { |
| case 0x10: |
| edac_printk(KERN_CRIT, EDAC_CPU, |
| "L2 tag RAM way is %d\n", cpuid); |
| break; |
| case 0x11: |
| edac_printk(KERN_CRIT, EDAC_CPU, |
| "L2 data RAM bank is %d\n", cpuid); |
| break; |
| case 0x12: |
| edac_printk(KERN_CRIT, EDAC_CPU, |
| "SCU snoop filter RAM cpu %d way is %d\n", |
| cpuid / 4, cpuid % 4); |
| break; |
| default: |
| edac_printk(KERN_CRIT, EDAC_CPU, |
| "Error in unknown RAM ID: %d\n", |
| (int) A53_L2MERRSR_RAMID(l2merrsr)); |
| break; |
| } |
| |
| edac_printk(KERN_CRIT, EDAC_CPU, "Repeated error count: %d\n", |
| (int) A53_L2MERRSR_REPT(l2merrsr)); |
| edac_printk(KERN_CRIT, EDAC_CPU, "Other error count: %d\n", |
| (int) A53_L2MERRSR_OTHER(l2merrsr)); |
| |
| if (ed->err == SBE) |
| errors[KRYO2XX_SILVER_L2_CE].func(ed->drv->edev_ctl, smp_processor_id(), |
| L2_CACHE, errors[KRYO2XX_SILVER_L2_CE].msg); |
| else if (ed->err == DBE) |
| errors[KRYO2XX_SILVER_L2_UE].func(ed->drv->edev_ctl, smp_processor_id(), |
| L2_CACHE, errors[KRYO2XX_SILVER_L2_UE].msg); |
| write_l2merrsr_el1(0); |
| } |
| |
| |
| static void ca57_parse_cpumerrsr(struct erp_local_data *ed) |
| { |
| u64 cpumerrsr; |
| int bank; |
| |
| cpumerrsr = read_cpumerrsr_el1; |
| |
| if (!A57_CPUMERRSR_VALID(cpumerrsr)) |
| return; |
| |
| if (A57_CPUMERRSR_FATAL(cpumerrsr)) |
| ed->err = DBE; |
| |
| edac_printk(KERN_CRIT, EDAC_CPU, "Cortex A57 CPU%d L1 %s Error detected\n", |
| smp_processor_id(), err_name[ed->err]); |
| kryo2xx_print_error_state_regs(); |
| if (ed->err == DBE) |
| edac_printk(KERN_CRIT, EDAC_CPU, "Fatal error\n"); |
| |
| bank = A57_CPUMERRSR_BANK(cpumerrsr); |
| |
| switch (A57_CPUMERRSR_RAMID(cpumerrsr)) { |
| case 0x0: |
| edac_printk(KERN_CRIT, EDAC_CPU, |
| "L1 Instruction tag RAM bank %d\n", bank); |
| break; |
| case 0x1: |
| edac_printk(KERN_CRIT, EDAC_CPU, |
| "L1 Instruction data RAM bank %d\n", bank); |
| break; |
| case 0x8: |
| edac_printk(KERN_CRIT, EDAC_CPU, |
| "L1 Data tag RAM bank %d\n", bank); |
| break; |
| case 0x9: |
| edac_printk(KERN_CRIT, EDAC_CPU, |
| "L1 Data data RAM bank %d\n", bank); |
| break; |
| case 0x18: |
| edac_printk(KERN_CRIT, EDAC_CPU, |
| "TLB RAM bank %d\n", bank); |
| break; |
| default: |
| edac_printk(KERN_CRIT, EDAC_CPU, |
| "Error in unknown RAM ID: %d\n", |
| (int) A57_CPUMERRSR_RAMID(cpumerrsr)); |
| break; |
| } |
| |
| edac_printk(KERN_CRIT, EDAC_CPU, "Repeated error count: %d\n", |
| (int) A57_CPUMERRSR_REPT(cpumerrsr)); |
| edac_printk(KERN_CRIT, EDAC_CPU, "Other error count: %d\n", |
| (int) A57_CPUMERRSR_OTHER(cpumerrsr)); |
| |
| if (ed->err == SBE) |
| errors[A57_L1_CE].func(ed->drv->edev_ctl, smp_processor_id(), |
| L1_CACHE, errors[A57_L1_CE].msg); |
| else if (ed->err == DBE) |
| errors[A57_L1_UE].func(ed->drv->edev_ctl, smp_processor_id(), |
| L1_CACHE, errors[A57_L1_UE].msg); |
| write_cpumerrsr_el1(0); |
| } |
| |
| static void ca57_parse_l2merrsr(struct erp_local_data *ed) |
| { |
| u64 l2merrsr; |
| u32 l2ectlr; |
| int cpuid; |
| |
| l2merrsr = read_l2merrsr_el1; |
| l2ectlr = read_l2ectlr_el1; |
| |
| if (!A57_L2MERRSR_VALID(l2merrsr)) |
| return; |
| |
| if (A57_L2MERRSR_FATAL(l2merrsr)) |
| ed->err = DBE; |
| |
| edac_printk(KERN_CRIT, EDAC_CPU, "CortexA57 L2 %s Error detected\n", |
| err_name[ed->err]); |
| kryo2xx_print_error_state_regs(); |
| if (ed->err == DBE) |
| edac_printk(KERN_CRIT, EDAC_CPU, "Fatal error\n"); |
| |
| cpuid = A57_L2MERRSR_CPUID(l2merrsr); |
| |
| switch (A57_L2MERRSR_RAMID(l2merrsr)) { |
| case 0x10: |
| edac_printk(KERN_CRIT, EDAC_CPU, |
| "L2 tag RAM cpu %d way is %d\n", |
| cpuid / 2, cpuid % 2); |
| break; |
| case 0x11: |
| edac_printk(KERN_CRIT, EDAC_CPU, |
| "L2 data RAM cpu %d bank is %d\n", |
| cpuid / 2, cpuid % 2); |
| break; |
| case 0x12: |
| edac_printk(KERN_CRIT, EDAC_CPU, |
| "SCU snoop tag RAM bank is %d\n", cpuid); |
| break; |
| case 0x14: |
| edac_printk(KERN_CRIT, EDAC_CPU, |
| "L2 dirty RAM cpu %d bank is %d\n", |
| cpuid / 2, cpuid % 2); |
| break; |
| case 0x18: |
| edac_printk(KERN_CRIT, EDAC_CPU, |
| "L2 inclusion PF RAM bank is %d\n", cpuid); |
| break; |
| default: |
| edac_printk(KERN_CRIT, EDAC_CPU, |
| "Error in unknown RAM ID: %d\n", |
| (int) A57_L2MERRSR_RAMID(l2merrsr)); |
| break; |
| } |
| |
| edac_printk(KERN_CRIT, EDAC_CPU, "Repeated error count: %d\n", |
| (int) A57_L2MERRSR_REPT(l2merrsr)); |
| edac_printk(KERN_CRIT, EDAC_CPU, "Other error count: %d\n", |
| (int) A57_L2MERRSR_OTHER(l2merrsr)); |
| |
| if (ed->err == SBE) { |
| errors[A57_L2_CE].func(ed->drv->edev_ctl, smp_processor_id(), |
| L2_CACHE, errors[A57_L2_CE].msg); |
| } else if (ed->err == DBE) { |
| errors[A57_L2_UE].func(ed->drv->edev_ctl, smp_processor_id(), |
| L2_CACHE, errors[A57_L2_UE].msg); |
| } |
| write_l2merrsr_el1(0); |
| } |
| |
| |
| static void kryo2xx_gold_parse_l2merrsr(struct erp_local_data *ed) |
| { |
| u64 l2merrsr; |
| int ramid, way; |
| |
| l2merrsr = read_l2merrsr_el1; |
| |
| if (!KRYO2XX_GOLD_L2MERRSR_VALID(l2merrsr)) |
| return; |
| |
| if (KRYO2XX_GOLD_L2MERRSR_FATAL(l2merrsr)) |
| ed->err = DBE; |
| |
| edac_printk(KERN_CRIT, EDAC_CPU, "Kryo2xx Gold L2 %s Error detected\n", |
| err_name[ed->err]); |
| kryo2xx_gold_print_error_state_regs(); |
| if (ed->err == DBE) |
| edac_printk(KERN_CRIT, EDAC_CPU, "Fatal error\n"); |
| |
| way = KRYO2XX_GOLD_L2MERRSR_WAY(l2merrsr); |
| ramid = KRYO2XX_GOLD_L2MERRSR_RAMID(l2merrsr); |
| |
| edac_printk(KERN_CRIT, EDAC_CPU, |
| "L2 %s RAM error in way 0x%02x, index 0x%04x\n", |
| ramid ? "data" : "tag", |
| (int) KRYO2XX_GOLD_L2MERRSR_WAY(l2merrsr), |
| (int) KRYO2XX_GOLD_L2MERRSR_INDEX(l2merrsr)); |
| |
| edac_printk(KERN_CRIT, EDAC_CPU, "Repeated error count: %d\n", |
| (int) KRYO2XX_GOLD_L2MERRSR_REPT(l2merrsr)); |
| edac_printk(KERN_CRIT, EDAC_CPU, "Other error count: %d\n", |
| (int) KRYO2XX_GOLD_L2MERRSR_OTHER(l2merrsr)); |
| |
| if (ed->err == SBE) { |
| errors[KRYO2XX_GOLD_L2_CE].func(ed->drv->edev_ctl, smp_processor_id(), |
| L2_CACHE, errors[KRYO2XX_GOLD_L2_CE].msg); |
| } else if (ed->err == DBE) { |
| errors[KRYO2XX_GOLD_L2_UE].func(ed->drv->edev_ctl, smp_processor_id(), |
| L2_CACHE, errors[KRYO2XX_GOLD_L2_UE].msg); |
| } |
| write_l2merrsr_el1(0); |
| } |
| |
| static DEFINE_SPINLOCK(local_handler_lock); |
| static DEFINE_SPINLOCK(l2ectlr_lock); |
| |
| static void arm64_erp_local_handler(void *info) |
| { |
| struct erp_local_data *errdata = info; |
| unsigned int cpuid = read_cpuid_id(); |
| unsigned int partnum = read_cpuid_part_number(); |
| unsigned long flags, flags2; |
| u32 l2ectlr; |
| |
| spin_lock_irqsave(&local_handler_lock, flags); |
| edac_printk(KERN_CRIT, EDAC_CPU, "%s error information from CPU %d, MIDR=%#08x:\n", |
| err_name[errdata->err], raw_smp_processor_id(), cpuid); |
| |
| switch (partnum) { |
| case ARM_CPU_PART_CORTEX_A53: |
| case ARM_CPU_PART_KRYO2XX_SILVER: |
| kryo2xx_silver_parse_cpumerrsr(errdata); |
| kryo2xx_silver_parse_l2merrsr(errdata); |
| break; |
| |
| case ARM_CPU_PART_CORTEX_A72: |
| case ARM_CPU_PART_CORTEX_A57: |
| ca57_parse_cpumerrsr(errdata); |
| ca57_parse_l2merrsr(errdata); |
| break; |
| |
| case ARM_CPU_PART_KRYO2XX_GOLD: |
| kryo2xx_gold_parse_l2merrsr(errdata); |
| break; |
| |
| default: |
| edac_printk(KERN_CRIT, EDAC_CPU, "Unknown CPU Part Number in MIDR: %#04x (%#08x)\n", |
| partnum, cpuid); |
| }; |
| |
| /* Acklowledge internal error in L2ECTLR */ |
| spin_lock_irqsave(&l2ectlr_lock, flags2); |
| |
| l2ectlr = read_l2ectlr_el1; |
| |
| if (l2ectlr & L2ECTLR_INT_ERR) { |
| l2ectlr &= ~L2ECTLR_INT_ERR; |
| write_l2ectlr_el1(l2ectlr); |
| } |
| |
| spin_unlock_irqrestore(&l2ectlr_lock, flags2); |
| spin_unlock_irqrestore(&local_handler_lock, flags); |
| } |
| |
| static irqreturn_t arm64_dbe_handler(int irq, void *drvdata) |
| { |
| struct erp_local_data errdata; |
| |
| errdata.drv = drvdata; |
| errdata.err = DBE; |
| edac_printk(KERN_CRIT, EDAC_CPU, "ARM64 CPU ERP: Double-bit error interrupt received!\n"); |
| |
| on_each_cpu(arm64_erp_local_handler, &errdata, 1); |
| |
| return IRQ_HANDLED; |
| } |
| |
| static void arm64_ext_local_handler(void *info) |
| { |
| struct erp_drvdata *drv = info; |
| unsigned long flags, flags2; |
| u32 l2ectlr; |
| |
| spin_lock_irqsave(&local_handler_lock, flags); |
| |
| /* TODO: Shared locking for L2ECTLR access */ |
| spin_lock_irqsave(&l2ectlr_lock, flags2); |
| |
| l2ectlr = read_l2ectlr_el1; |
| |
| if (l2ectlr & L2ECTLR_EXT_ERR) { |
| edac_printk(KERN_CRIT, EDAC_CPU, |
| "L2 external error detected by CPU%d\n", |
| smp_processor_id()); |
| |
| errors[L2_EXT_UE].func(drv->edev_ctl, smp_processor_id(), |
| L2_CACHE, errors[L2_EXT_UE].msg); |
| |
| l2ectlr &= ~L2ECTLR_EXT_ERR; |
| write_l2ectlr_el1(l2ectlr); |
| } |
| |
| spin_unlock_irqrestore(&l2ectlr_lock, flags2); |
| spin_unlock_irqrestore(&local_handler_lock, flags); |
| } |
| |
| static irqreturn_t arm64_ext_handler(int irq, void *drvdata) |
| { |
| edac_printk(KERN_CRIT, EDAC_CPU, "External error interrupt received!\n"); |
| |
| on_each_cpu(arm64_ext_local_handler, drvdata, 1); |
| |
| return IRQ_HANDLED; |
| } |
| |
| static irqreturn_t arm64_cci_handler(int irq, void *drvdata) |
| { |
| struct erp_drvdata *drv = drvdata; |
| u32 cci_err_reg; |
| |
| edac_printk(KERN_CRIT, EDAC_CPU, "CCI error interrupt received!\n"); |
| |
| if (drv->cci_base) { |
| cci_err_reg = readl_relaxed(drv->cci_base + |
| CCI_IMPRECISEERROR_REG); |
| |
| edac_printk(KERN_CRIT, EDAC_CPU, "CCI imprecise error register: %#08x.\n", |
| cci_err_reg); |
| |
| /* This register has write-clear semantics */ |
| writel_relaxed(cci_err_reg, drv->cci_base + |
| CCI_IMPRECISEERROR_REG); |
| |
| /* Ensure error bits cleared before exiting ISR */ |
| mb(); |
| } else { |
| edac_printk(KERN_CRIT, EDAC_CPU, "CCI registers not available.\n"); |
| } |
| |
| errors[CCI_UE].func(drv->edev_ctl, 0, CCI, errors[CCI_UE].msg); |
| |
| return IRQ_HANDLED; |
| } |
| #ifndef CONFIG_EDAC_CORTEX_ARM64_DBE_IRQ_ONLY |
| static void arm64_sbe_handler(struct perf_event *event, |
| struct perf_sample_data *data, |
| struct pt_regs *regs) |
| { |
| struct erp_local_data errdata; |
| int cpu = raw_smp_processor_id(); |
| |
| errdata.drv = event->overflow_handler_context; |
| errdata.err = SBE; |
| edac_printk(KERN_CRIT, EDAC_CPU, "ARM64 CPU ERP: Single-bit error interrupt received on CPU %d!\n", |
| cpu); |
| arm64_erp_local_handler(&errdata); |
| } |
| #endif |
| |
| static int request_erp_irq(struct platform_device *pdev, const char *propname, |
| const char *desc, irq_handler_t handler, |
| void *ed) |
| { |
| int rc; |
| struct resource *r; |
| |
| r = platform_get_resource_byname(pdev, IORESOURCE_IRQ, propname); |
| |
| if (!r) { |
| pr_err("ARM64 CPU ERP: Could not find <%s> IRQ property. Proceeding anyway.\n", |
| propname); |
| return -EINVAL; |
| } |
| |
| rc = devm_request_threaded_irq(&pdev->dev, r->start, NULL, |
| handler, |
| IRQF_ONESHOT | IRQF_TRIGGER_RISING, |
| desc, |
| ed); |
| |
| if (rc) { |
| pr_err("ARM64 CPU ERP: Failed to request IRQ %d: %d (%s / %s). Proceeding anyway.\n", |
| (int) r->start, rc, propname, desc); |
| return -EINVAL; |
| } |
| |
| return 0; |
| } |
| |
| static void check_sbe_event(struct erp_drvdata *drv) |
| { |
| unsigned int partnum = read_cpuid_part_number(); |
| struct erp_local_data errdata; |
| unsigned long flags; |
| |
| errdata.drv = drv; |
| errdata.err = SBE; |
| |
| spin_lock_irqsave(&local_handler_lock, flags); |
| switch (partnum) { |
| case ARM_CPU_PART_CORTEX_A53: |
| case ARM_CPU_PART_KRYO2XX_SILVER: |
| kryo2xx_silver_parse_cpumerrsr(&errdata); |
| kryo2xx_silver_parse_l2merrsr(&errdata); |
| break; |
| |
| case ARM_CPU_PART_CORTEX_A72: |
| case ARM_CPU_PART_CORTEX_A57: |
| ca57_parse_cpumerrsr(&errdata); |
| ca57_parse_l2merrsr(&errdata); |
| break; |
| |
| case ARM_CPU_PART_KRYO2XX_GOLD: |
| kryo2xx_gold_parse_l2merrsr(&errdata); |
| break; |
| }; |
| spin_unlock_irqrestore(&local_handler_lock, flags); |
| } |
| |
| #ifdef CONFIG_EDAC_CORTEX_ARM64_DBE_IRQ_ONLY |
| static void create_sbe_counter(int cpu, void *info) |
| { } |
| #else |
| static void create_sbe_counter(int cpu, void *info) |
| { |
| struct erp_drvdata *drv = info; |
| struct perf_event *event = drv->memerr_counters[cpu]; |
| struct perf_event_attr attr = { |
| .pinned = 1, |
| .disabled = 0, /* 0 will enable the counter upon creation */ |
| .sample_period = 1, /* 1 will set the counter to max int */ |
| .type = PERF_TYPE_RAW, |
| .config = MEM_ERROR_EVENT, |
| .size = sizeof(struct perf_event_attr), |
| }; |
| |
| if (event) |
| return; |
| |
| /* Fails if cpu is not online */ |
| event = perf_event_create_kernel_counter(&attr, cpu, NULL, |
| arm64_sbe_handler, |
| drv); |
| if (IS_ERR(event)) { |
| pr_err("PERF Event creation failed on cpu %d ptr_err %ld\n", |
| cpu, PTR_ERR(event)); |
| return; |
| } |
| drv->memerr_counters[cpu] = event; |
| } |
| #endif |
| |
| static int arm64_pmu_cpu_pm_notify(struct notifier_block *self, |
| unsigned long action, void *v) |
| { |
| struct erp_drvdata *drv = container_of(self, struct erp_drvdata, nb_pm); |
| |
| switch (action) { |
| case CPU_PM_EXIT: |
| check_sbe_event(drv); |
| break; |
| } |
| |
| return NOTIFY_OK; |
| } |
| |
| static int arm64_edac_pmu_cpu_notify(struct notifier_block *self, |
| unsigned long action, void *hcpu) |
| { |
| struct erp_drvdata *drv = container_of(self, struct erp_drvdata, |
| nb_cpu); |
| unsigned long cpu = (unsigned long)hcpu; |
| |
| switch (action & ~CPU_TASKS_FROZEN) { |
| case CPU_ONLINE: |
| create_sbe_counter(cpu, drv); |
| break; |
| }; |
| |
| return NOTIFY_OK; |
| } |
| |
| #ifndef CONFIG_EDAC_CORTEX_ARM64_DBE_IRQ_ONLY |
| void arm64_check_cache_ecc(void *info) |
| { |
| if (panic_handler_drvdata) |
| check_sbe_event(panic_handler_drvdata); |
| } |
| #else |
| static inline void arm64_check_cache_ecc(void *info) {} |
| #endif |
| |
| static int arm64_erp_panic_notify(struct notifier_block *this, |
| unsigned long event, void *ptr) |
| { |
| arm64_check_cache_ecc(NULL); |
| |
| return NOTIFY_OK; |
| } |
| |
| static void arm64_monitor_cache_errors(struct edac_device_ctl_info *edev) |
| { |
| struct cpumask cluster_mask, old_mask; |
| int cpu; |
| |
| cpumask_clear(&cluster_mask); |
| cpumask_clear(&old_mask); |
| |
| for_each_possible_cpu(cpu) { |
| cpumask_copy(&cluster_mask, topology_core_cpumask(cpu)); |
| if (cpumask_equal(&cluster_mask, &old_mask)) |
| continue; |
| cpumask_copy(&old_mask, &cluster_mask); |
| smp_call_function_any(&cluster_mask, |
| arm64_check_cache_ecc, NULL, 0); |
| } |
| } |
| |
| static int arm64_cpu_erp_probe(struct platform_device *pdev) |
| { |
| struct device *dev = &pdev->dev; |
| struct erp_drvdata *drv; |
| struct resource *r; |
| int cpu; |
| u32 poll_msec; |
| |
| int rc, fail = 0; |
| |
| drv = devm_kzalloc(dev, sizeof(*drv), GFP_KERNEL); |
| |
| if (!drv) |
| return -ENOMEM; |
| |
| drv->edev_ctl = edac_device_alloc_ctl_info(0, "cpu", |
| num_possible_cpus(), "L", 3, 1, NULL, 0, |
| edac_device_alloc_index()); |
| |
| if (!drv->edev_ctl) |
| return -ENOMEM; |
| |
| rc = of_property_read_u32(pdev->dev.of_node, "poll-delay-ms", |
| &poll_msec); |
| if (!rc && !IS_ENABLED(CONFIG_EDAC_CORTEX_ARM64_DBE_IRQ_ONLY)) { |
| drv->edev_ctl->edac_check = arm64_monitor_cache_errors; |
| drv->edev_ctl->poll_msec = poll_msec; |
| drv->edev_ctl->defer_work = 1; |
| } |
| drv->edev_ctl->dev = dev; |
| drv->edev_ctl->mod_name = dev_name(dev); |
| drv->edev_ctl->dev_name = dev_name(dev); |
| drv->edev_ctl->ctl_name = "cache"; |
| drv->edev_ctl->panic_on_ce = panic_on_ce; |
| drv->edev_ctl->panic_on_ue = ARM64_ERP_PANIC_ON_UE; |
| |
| rc = edac_device_add_device(drv->edev_ctl); |
| if (rc) |
| goto out_mem; |
| |
| r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "cci"); |
| if (r) |
| drv->cci_base = devm_ioremap_resource(dev, r); |
| |
| if (request_erp_irq(pdev, "pri-dbe-irq", "ARM64 primary DBE IRQ", |
| arm64_dbe_handler, drv)) |
| fail++; |
| |
| if (request_erp_irq(pdev, "sec-dbe-irq", "ARM64 secondary DBE IRQ", |
| arm64_dbe_handler, drv)) |
| fail++; |
| |
| if (request_erp_irq(pdev, "pri-ext-irq", "ARM64 primary ext IRQ", |
| arm64_ext_handler, drv)) |
| fail++; |
| |
| if (request_erp_irq(pdev, "sec-ext-irq", "ARM64 secondary ext IRQ", |
| arm64_ext_handler, drv)) |
| fail++; |
| |
| /* |
| * We still try to register a handler for CCI errors even if we don't |
| * have access to cci_base, but error reporting becomes best-effort in |
| * that case. |
| */ |
| if (request_erp_irq(pdev, "cci-irq", "CCI error IRQ", |
| arm64_cci_handler, drv)) |
| fail++; |
| |
| if (IS_ENABLED(CONFIG_EDAC_CORTEX_ARM64_DBE_IRQ_ONLY)) { |
| pr_err("ARM64 CPU ERP: SBE detection is disabled.\n"); |
| goto out_irq; |
| } |
| |
| drv->nb_pm.notifier_call = arm64_pmu_cpu_pm_notify; |
| cpu_pm_register_notifier(&(drv->nb_pm)); |
| drv->nb_panic.notifier_call = arm64_erp_panic_notify; |
| atomic_notifier_chain_register(&panic_notifier_list, |
| &drv->nb_panic); |
| drv->nb_cpu.notifier_call = arm64_edac_pmu_cpu_notify; |
| register_cpu_notifier(&drv->nb_cpu); |
| get_online_cpus(); |
| for_each_online_cpu(cpu) |
| create_sbe_counter(cpu, drv); |
| put_online_cpus(); |
| |
| out_irq: |
| if (fail == of_irq_count(dev->of_node)) { |
| pr_err("ARM64 CPU ERP: Could not request any IRQs. Giving up.\n"); |
| rc = -ENODEV; |
| goto out_dev; |
| } |
| |
| panic_handler_drvdata = drv; |
| |
| return 0; |
| |
| out_dev: |
| edac_device_del_device(dev); |
| out_mem: |
| edac_device_free_ctl_info(drv->edev_ctl); |
| return rc; |
| } |
| |
| static const struct of_device_id arm64_cpu_erp_match_table[] = { |
| { .compatible = "arm,arm64-cpu-erp" }, |
| { } |
| }; |
| |
| static struct platform_driver arm64_cpu_erp_driver = { |
| .probe = arm64_cpu_erp_probe, |
| .driver = { |
| .name = "arm64_cpu_cache_erp", |
| .owner = THIS_MODULE, |
| .of_match_table = of_match_ptr(arm64_cpu_erp_match_table), |
| }, |
| }; |
| |
| static int __init arm64_cpu_erp_init(void) |
| { |
| return platform_driver_register(&arm64_cpu_erp_driver); |
| } |
| device_initcall_sync(arm64_cpu_erp_init); |