Tegra186: save/restore BL31 context to/from TZDRAM

This patch adds support to save the BL31 state to the TZDRAM
before entering system suspend. The TZRAM loses state during
system suspend and so we need to copy the entire BL31 code to
TZDRAM before entering the state.

In order to restore the state on exiting system suspend, a new
CPU reset handler is implemented which gets copied to TZDRAM
during boot. TO keep things simple we use this same reset handler
for booting secondary CPUs too.

Change-Id: I770f799c255d22279b5cdb9b4d587d3a4c54fad7
Signed-off-by: Varun Wadekar <vwadekar@nvidia.com>
diff --git a/plat/nvidia/tegra/include/drivers/memctrl_v2.h b/plat/nvidia/tegra/include/drivers/memctrl_v2.h
index fe7f7a0..04c0e8d 100644
--- a/plat/nvidia/tegra/include/drivers/memctrl_v2.h
+++ b/plat/nvidia/tegra/include/drivers/memctrl_v2.h
@@ -31,7 +31,6 @@
 #ifndef __MEMCTRLV2_H__
 #define __MEMCTRLV2_H__
 
-#include <mmio.h>
 #include <tegra_def.h>
 
 /*******************************************************************************
@@ -297,6 +296,8 @@
 #define MC_TXN_OVERRIDE_CGID_TAG_ADR		3
 #define MC_TXN_OVERRIDE_CGID_TAG_MASK		3
 
+#ifndef __ASSEMBLY__
+
 /*******************************************************************************
  * Structure to hold the transaction override settings to use to override
  * client inputs
@@ -347,6 +348,8 @@
 			.override_enable = OVERRIDE_ ## access \
 		}
 
+#endif /* __ASSMEBLY__ */
+
 /*******************************************************************************
  * TZDRAM carveout configuration registers
  ******************************************************************************/
@@ -545,6 +548,10 @@
 #define  MC_SMMU_CLIENT_CONFIG5_APEDMAW_UNORDERED	(0 << 0)
 #define  MC_SMMU_CLIENT_CONFIG5_APEDMAW_MASK	(1 << 0)
 
+#ifndef __ASSEMBLY__
+
+#include <mmio.h>
+
 static inline uint32_t tegra_mc_read_32(uint32_t off)
 {
 	return mmio_read_32(TEGRA_MC_BASE + off);
@@ -601,5 +608,6 @@
 			MC_TXN_OVERRIDE_CONFIG_AXID_OVERRIDE_CGID | \
 			MC_TXN_OVERRIDE_CONFIG_AXID_OVERRIDE_SO_DEV_CGID_SO_DEV_CLIENT); \
 	}
+#endif /* __ASSMEBLY__ */
 
 #endif /* __MEMCTRLV2_H__ */
diff --git a/plat/nvidia/tegra/include/drivers/smmu.h b/plat/nvidia/tegra/include/drivers/smmu.h
index bb08a55..0867c11 100644
--- a/plat/nvidia/tegra/include/drivers/smmu.h
+++ b/plat/nvidia/tegra/include/drivers/smmu.h
@@ -627,6 +627,6 @@
 }
 
 void tegra_smmu_init(void);
-void tegra_smmu_save_context(void);
+void tegra_smmu_save_context(uint64_t smmu_ctx_addr);
 
 #endif /*__SMMU_H */
diff --git a/plat/nvidia/tegra/include/tegra_private.h b/plat/nvidia/tegra/include/tegra_private.h
index 012bfd7..39006f6 100644
--- a/plat/nvidia/tegra/include/tegra_private.h
+++ b/plat/nvidia/tegra/include/tegra_private.h
@@ -119,4 +119,7 @@
 /* Declarations for tegra_delay_timer.c */
 void tegra_delay_timer_init(void);
 
+void tegra_secure_entrypoint(void);
+void tegra186_cpu_reset_handler(void);
+
 #endif /* __TEGRA_PRIVATE_H__ */
diff --git a/plat/nvidia/tegra/soc/t186/drivers/smmu/smmu.c b/plat/nvidia/tegra/soc/t186/drivers/smmu/smmu.c
index 2940f58..7f45895 100644
--- a/plat/nvidia/tegra/soc/t186/drivers/smmu/smmu.c
+++ b/plat/nvidia/tegra/soc/t186/drivers/smmu/smmu.c
@@ -31,7 +31,10 @@
 #include <assert.h>
 #include <debug.h>
 #include <memctrl_v2.h>
+#include <platform_def.h>
 #include <smmu.h>
+#include <string.h>
+#include <tegra_private.h>
 
 typedef struct smmu_regs {
 	uint32_t reg;
@@ -133,7 +136,7 @@
 		.val = 0xFFFFFFFF, \
 	}
 
-static smmu_regs_t smmu_ctx_regs[] = {
+static __attribute__((aligned(16))) smmu_regs_t smmu_ctx_regs[] = {
 	_START_OF_TABLE_,
 	mc_make_sid_security_cfg(SCEW),
 	mc_make_sid_security_cfg(AFIR),
@@ -421,12 +424,15 @@
 };
 
 /*
- * Save SMMU settings before "System Suspend"
+ * Save SMMU settings before "System Suspend" to TZDRAM
  */
-void tegra_smmu_save_context(void)
+void tegra_smmu_save_context(uint64_t smmu_ctx_addr)
 {
 	uint32_t i;
 #if DEBUG
+	plat_params_from_bl2_t *params_from_bl2 = bl31_get_plat_params();
+	uint64_t tzdram_base = params_from_bl2->tzdram_base;
+	uint64_t tzdram_end = tzdram_base + params_from_bl2->tzdram_size;
 	uint32_t reg_id1, pgshift, cb_size;
 
 	/* sanity check SMMU settings c*/
@@ -438,6 +444,8 @@
 	assert(!((pgshift != PGSHIFT) || (cb_size != CB_SIZE)));
 #endif
 
+	assert((smmu_ctx_addr >= tzdram_base) && (smmu_ctx_addr <= tzdram_end));
+
 	/* index of _END_OF_TABLE_ */
 	smmu_ctx_regs[0].val = ARRAY_SIZE(smmu_ctx_regs) - 1;
 
@@ -445,11 +453,15 @@
 	for (i = 1; i < ARRAY_SIZE(smmu_ctx_regs) - 1; i++)
 		smmu_ctx_regs[i].val = mmio_read_32(smmu_ctx_regs[i].reg);
 
+	/* Save SMMU config settings */
+	memcpy16((void *)(uintptr_t)smmu_ctx_addr, (void *)smmu_ctx_regs,
+		 sizeof(smmu_ctx_regs));
+
 	/* save the SMMU table address */
 	mmio_write_32(TEGRA_SCRATCH_BASE + SECURE_SCRATCH_RSV11_LO,
-		(uint32_t)(unsigned long)smmu_ctx_regs);
+		(uint32_t)smmu_ctx_addr);
 	mmio_write_32(TEGRA_SCRATCH_BASE + SECURE_SCRATCH_RSV11_HI,
-		(uint32_t)(((unsigned long)smmu_ctx_regs) >> 32));
+		(uint32_t)(smmu_ctx_addr >> 32));
 }
 
 /*
diff --git a/plat/nvidia/tegra/soc/t186/plat_psci_handlers.c b/plat/nvidia/tegra/soc/t186/plat_psci_handlers.c
index 7e35cc6..7c6868c 100644
--- a/plat/nvidia/tegra/soc/t186/plat_psci_handlers.c
+++ b/plat/nvidia/tegra/soc/t186/plat_psci_handlers.c
@@ -39,10 +39,17 @@
 #include <mce.h>
 #include <psci.h>
 #include <smmu.h>
+#include <string.h>
 #include <t18x_ari.h>
 #include <tegra_private.h>
 
 extern void prepare_cpu_pwr_dwn(void);
+extern void tegra186_cpu_reset_handler(void);
+extern uint32_t __tegra186_cpu_reset_handler_data,
+		__tegra186_cpu_reset_handler_end;
+
+/* TZDRAM offset for saving SMMU context */
+#define TEGRA186_SMMU_CTX_OFFSET	16
 
 /* state id mask */
 #define TEGRA186_STATE_ID_MASK		0xF
@@ -50,7 +57,7 @@
 #define TEGRA186_WAKE_TIME_MASK		0xFFFFFF
 #define TEGRA186_WAKE_TIME_SHIFT	4
 /* context size to save during system suspend */
-#define TEGRA186_SE_CONTEXT_SIZE		3
+#define TEGRA186_SE_CONTEXT_SIZE	3
 
 static uint32_t se_regs[TEGRA186_SE_CONTEXT_SIZE];
 static unsigned int wake_time[PLATFORM_CORE_COUNT];
@@ -98,6 +105,8 @@
 	int impl = (read_midr() >> MIDR_IMPL_SHIFT) & MIDR_IMPL_MASK;
 	cpu_context_t *ctx = cm_get_context(NON_SECURE);
 	gp_regs_t *gp_regs = get_gpregs_ctx(ctx);
+	plat_params_from_bl2_t *params_from_bl2 = bl31_get_plat_params();
+	uint64_t smmu_ctx_base;
 	uint32_t val;
 
 	assert(ctx);
@@ -147,8 +156,12 @@
 		val = mmio_read_32(TEGRA_MISC_BASE + MISCREG_PFCFG);
 		mmio_write_32(TEGRA_SCRATCH_BASE + SECURE_SCRATCH_RSV6, val);
 
-		/* save SMMU context */
-		tegra_smmu_save_context();
+		/* save SMMU context to TZDRAM */
+		smmu_ctx_base = params_from_bl2->tzdram_base +
+			((uintptr_t)&__tegra186_cpu_reset_handler_data -
+			 (uintptr_t)tegra186_cpu_reset_handler) +
+			TEGRA186_SMMU_CTX_OFFSET;
+		tegra_smmu_save_context((uintptr_t)smmu_ctx_base);
 
 		/* Prepare for system suspend */
 		write_ctx_reg(gp_regs, CTX_GPREG_X4, 1);
@@ -157,7 +170,7 @@
 		(void)mce_command_handler(MCE_CMD_UPDATE_CSTATE_INFO,
 			TEGRA_ARI_CLUSTER_CC7, 0, TEGRA_ARI_SYSTEM_SC7);
 
-		/* Enter system suspend state */
+		/* Instruct the MCE to enter system suspend state */
 		(void)mce_command_handler(MCE_CMD_ENTER_CSTATE,
 			TEGRA_ARI_CORE_C7, MCE_CORE_SLEEP_TIME_INFINITE, 0);
 
@@ -169,6 +182,31 @@
 	return PSCI_E_SUCCESS;
 }
 
+int tegra_soc_pwr_domain_power_down_wfi(const psci_power_state_t *target_state)
+{
+	const plat_local_state_t *pwr_domain_state =
+		target_state->pwr_domain_state;
+	plat_params_from_bl2_t *params_from_bl2 = bl31_get_plat_params();
+	unsigned int stateid_afflvl2 = pwr_domain_state[PLAT_MAX_PWR_LVL] &
+		TEGRA186_STATE_ID_MASK;
+	uint32_t val;
+
+	if (stateid_afflvl2 == PSTATE_ID_SOC_POWERDN) {
+		/*
+		 * The TZRAM loses power when we enter system suspend. To
+		 * allow graceful exit from system suspend, we need to copy
+		 * BL3-1 over to TZDRAM.
+		 */
+		val = params_from_bl2->tzdram_base +
+			((uintptr_t)&__tegra186_cpu_reset_handler_end -
+			 (uintptr_t)tegra186_cpu_reset_handler);
+		memcpy16((void *)(uintptr_t)val, (void *)(uintptr_t)BL31_BASE,
+			 (uintptr_t)&__BL31_END__ - (uintptr_t)BL31_BASE);
+	}
+
+	return PSCI_E_SUCCESS;
+}
+
 int tegra_soc_pwr_domain_on(u_register_t mpidr)
 {
 	int target_cpu = mpidr & MPIDR_CPU_MASK;
@@ -191,6 +229,8 @@
 int tegra_soc_pwr_domain_on_finish(const psci_power_state_t *target_state)
 {
 	int state_id = target_state->pwr_domain_state[PLAT_MAX_PWR_LVL];
+	cpu_context_t *ctx = cm_get_context(NON_SECURE);
+	gp_regs_t *gp_regs = get_gpregs_ctx(ctx);
 
 	/*
 	 * Check if we are exiting from deep sleep and restore SE
@@ -206,6 +246,17 @@
 
 		/* Init SMMU */
 		tegra_smmu_init();
+
+		/*
+		 * Reset power state info for the last core doing SC7 entry and exit,
+		 * we set deepest power state as CC7 and SC7 for SC7 entry which
+		 * may not be requested by non-secure SW which controls idle states.
+		 */
+		write_ctx_reg(gp_regs, CTX_GPREG_X4, 0);
+		write_ctx_reg(gp_regs, CTX_GPREG_X5, 0);
+		write_ctx_reg(gp_regs, CTX_GPREG_X6, 1);
+		(void)mce_command_handler(MCE_CMD_UPDATE_CSTATE_INFO,
+			TEGRA_ARI_CLUSTER_CC7, 0, TEGRA_ARI_SYSTEM_SC1);
 	}
 
 	return PSCI_E_SUCCESS;
diff --git a/plat/nvidia/tegra/soc/t186/plat_secondary.c b/plat/nvidia/tegra/soc/t186/plat_secondary.c
index df80289..406c1e0 100644
--- a/plat/nvidia/tegra/soc/t186/plat_secondary.c
+++ b/plat/nvidia/tegra/soc/t186/plat_secondary.c
@@ -28,10 +28,13 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <arch_helpers.h>
 #include <debug.h>
 #include <mce.h>
 #include <mmio.h>
+#include <string.h>
 #include <tegra_def.h>
+#include <tegra_private.h>
 
 #define MISCREG_CPU_RESET_VECTOR	0x2000
 #define MISCREG_AA64_RST_LOW		0x2004
@@ -42,7 +45,8 @@
 
 #define CPU_RESET_MODE_AA64		1
 
-extern void tegra_secure_entrypoint(void);
+extern uint64_t tegra_bl31_phys_base;
+extern uint64_t __tegra186_cpu_reset_handler_end;
 
 /*******************************************************************************
  * Setup secondary CPU vectors
@@ -50,12 +54,31 @@
 void plat_secondary_setup(void)
 {
 	uint32_t addr_low, addr_high;
-	uint64_t reset_addr = (uint64_t)tegra_secure_entrypoint;
+	plat_params_from_bl2_t *params_from_bl2 = bl31_get_plat_params();
+	uint64_t cpu_reset_handler_base;
 
 	INFO("Setting up secondary CPU boot\n");
 
-	addr_low = (uint32_t)reset_addr | CPU_RESET_MODE_AA64;
-	addr_high = (uint32_t)((reset_addr >> 32) & 0x7ff);
+	if ((tegra_bl31_phys_base >= TEGRA_TZRAM_BASE) &&
+	    (tegra_bl31_phys_base <= (TEGRA_TZRAM_BASE + TEGRA_TZRAM_SIZE))) {
+
+		/*
+		 * The BL31 code resides in the TZSRAM which loses state
+		 * when we enter System Suspend. Copy the wakeup trampoline
+		 * code to TZDRAM to help us exit from System Suspend.
+		 */
+		cpu_reset_handler_base = params_from_bl2->tzdram_base;
+		memcpy16((void *)((uintptr_t)cpu_reset_handler_base),
+			 (void *)(uintptr_t)tegra186_cpu_reset_handler,
+			 (uintptr_t)&__tegra186_cpu_reset_handler_end -
+			 (uintptr_t)tegra186_cpu_reset_handler);
+
+	} else {
+		cpu_reset_handler_base = (uintptr_t)tegra_secure_entrypoint;
+	}
+
+	addr_low = (uint32_t)cpu_reset_handler_base | CPU_RESET_MODE_AA64;
+	addr_high = (uint32_t)((cpu_reset_handler_base >> 32) & 0x7ff);
 
 	/* write lower 32 bits first, then the upper 11 bits */
 	mmio_write_32(TEGRA_MISC_BASE + MISCREG_AA64_RST_LOW, addr_low);
diff --git a/plat/nvidia/tegra/soc/t186/plat_trampoline.S b/plat/nvidia/tegra/soc/t186/plat_trampoline.S
new file mode 100644
index 0000000..5e0a9d7
--- /dev/null
+++ b/plat/nvidia/tegra/soc/t186/plat_trampoline.S
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of ARM nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <arch.h>
+#include <asm_macros.S>
+#include <memctrl_v2.h>
+#include <tegra_def.h>
+
+#define TEGRA186_SMMU_CTX_SIZE		0x420
+
+	.align 4
+	.globl	tegra186_cpu_reset_handler
+
+/* CPU reset handler routine */
+func tegra186_cpu_reset_handler
+	/*
+	 * The Memory Controller loses state during System Suspend. We
+	 * use this information to decide if the reset handler is running
+	 * after a System Suspend. Resume from system suspend requires
+	 * restoring the entire state from TZDRAM to TZRAM.
+	 */
+	mov	x1, #TEGRA_MC_BASE
+	ldr	w0, [x1, #MC_SECURITY_CFG3_0]
+	lsl	x0, x0, #32
+	ldr	w0, [x1, #MC_SECURITY_CFG0_0]
+	adr	x1, tegra186_cpu_reset_handler
+	cmp	x0, x1
+	beq	boot_cpu
+
+	/* resume from system suspend */
+	mov	x0, #BL31_BASE
+	adr	x1, __tegra186_cpu_reset_handler_end
+	adr	x2, __tegra186_cpu_reset_handler_data
+	ldr	x2, [x2, #8]
+
+	/* memcpy16 */
+m_loop16:
+	cmp	x2, #16
+	b.lt	m_loop1
+	ldp	x3, x4, [x1], #16
+	stp	x3, x4, [x0], #16
+	sub	x2, x2, #16
+	b	m_loop16
+	/* copy byte per byte */
+m_loop1:
+	cbz	x2, boot_cpu
+	ldrb	w3, [x1], #1
+	strb	w3, [x0], #1
+	subs	x2, x2, #1
+	b.ne	m_loop1
+
+boot_cpu:
+	adr	x0, __tegra186_cpu_reset_handler_data
+	ldr	x0, [x0]
+	br	x0
+endfunc tegra186_cpu_reset_handler
+
+	/*
+	 * Tegra186 reset data (offset 0x0 - 0x430)
+	 *
+	 * 0x000: secure world's entrypoint
+	 * 0x008: BL31 size (RO + RW)
+	 * 0x00C: SMMU context start
+	 * 0x42C: SMMU context end
+	 */
+
+	.align 4
+	.type	__tegra186_cpu_reset_handler_data, %object
+	.globl	__tegra186_cpu_reset_handler_data
+__tegra186_cpu_reset_handler_data:
+	.quad	tegra_secure_entrypoint
+	.quad	__BL31_END__ - BL31_BASE
+	.rept	TEGRA186_SMMU_CTX_SIZE
+	.quad	0
+	.endr
+	.size	__tegra186_cpu_reset_handler_data, \
+		. - __tegra186_cpu_reset_handler_data
+
+	.align 4
+	.globl	__tegra186_cpu_reset_handler_end
+__tegra186_cpu_reset_handler_end:
diff --git a/plat/nvidia/tegra/soc/t186/platform_t186.mk b/plat/nvidia/tegra/soc/t186/platform_t186.mk
index b8eaa7a..0387a0a 100644
--- a/plat/nvidia/tegra/soc/t186/platform_t186.mk
+++ b/plat/nvidia/tegra/soc/t186/platform_t186.mk
@@ -29,9 +29,6 @@
 #
 
 # platform configs
-ENABLE_NS_L2_CPUECTRL_RW_ACCESS		:= 1
-$(eval $(call add_define,ENABLE_NS_L2_CPUECTRL_RW_ACCESS))
-
 ENABLE_ROC_FOR_ORDERING_CLIENT_REQUESTS	:= 1
 $(eval $(call add_define,ENABLE_ROC_FOR_ORDERING_CLIENT_REQUESTS))
 
@@ -45,10 +42,10 @@
 PLATFORM_MAX_CPUS_PER_CLUSTER		:= 4
 $(eval $(call add_define,PLATFORM_MAX_CPUS_PER_CLUSTER))
 
-MAX_XLAT_TABLES				:= 16
+MAX_XLAT_TABLES				:= 20
 $(eval $(call add_define,MAX_XLAT_TABLES))
 
-MAX_MMAP_REGIONS			:= 16
+MAX_MMAP_REGIONS			:= 20
 $(eval $(call add_define,MAX_MMAP_REGIONS))
 
 # platform files
@@ -65,4 +62,5 @@
 				${SOC_DIR}/plat_psci_handlers.c		\
 				${SOC_DIR}/plat_setup.c			\
 				${SOC_DIR}/plat_secondary.c		\
-				${SOC_DIR}/plat_sip_calls.c
+				${SOC_DIR}/plat_sip_calls.c		\
+				${SOC_DIR}/plat_trampoline.S