AArch32: Add `memcpy4` function in assembly

At present the `el3_entrypoint_common` macro uses `memcpy`
function defined in lib/stdlib/mem.c file, to copy data
from ROM to RAM for BL1. Depending on the compiler being
used the stack could potentially be used, in `memcpy`,
for storing the local variables. Since the stack is
initialized much later in `el3_entrypoint_common` it
may result in unknown behaviour.

This patch adds `memcpy4` function definition in assembly so
that it can be used before the stack is initialized and it
also replaces `memcpy` by `memcpy4` in `el3_entrypoint_common`
macro, to copy data from ROM to RAM for BL1.

Change-Id: I3357a0e8095f05f71bbbf0b185585d9499bfd5e0
diff --git a/include/common/aarch32/el3_common_macros.S b/include/common/aarch32/el3_common_macros.S
index a572ef9..50ce952 100644
--- a/include/common/aarch32/el3_common_macros.S
+++ b/include/common/aarch32/el3_common_macros.S
@@ -261,7 +261,7 @@
 		ldr	r0, =__DATA_RAM_START__
 		ldr	r1, =__DATA_ROM_START__
 		ldr	r2, =__DATA_SIZE__
-		bl	memcpy
+		bl	memcpy4
 #endif
 	.endif /* _init_c_runtime */
 
diff --git a/lib/aarch32/misc_helpers.S b/lib/aarch32/misc_helpers.S
index fd7c6dd..bf4084a 100644
--- a/lib/aarch32/misc_helpers.S
+++ b/lib/aarch32/misc_helpers.S
@@ -34,6 +34,7 @@
 
 	.globl	smc
 	.globl	zeromem
+	.globl	memcpy4
 	.globl	disable_mmu_icache_secure
 	.globl	disable_mmu_secure
 
@@ -73,6 +74,40 @@
 	bx	lr
 endfunc zeromem
 
+/* --------------------------------------------------------------------------
+ * void memcpy4(void *dest, const void *src, unsigned int length)
+ *
+ * Copy length bytes from memory area src to memory area dest.
+ * The memory areas should not overlap.
+ * Destination and source addresses must be 4-byte aligned.
+ * --------------------------------------------------------------------------
+ */
+func memcpy4
+#if ASM_ASSERTION
+	orr	r3, r0, r1
+	tst	r3, #0x3
+	ASM_ASSERT(eq)
+#endif
+/* copy 4 bytes at a time */
+m_loop4:
+	cmp	r2, #4
+	blt	m_loop1
+	ldr	r3, [r1], #4
+	str	r3, [r0], #4
+	sub	r2, r2, #4
+	b	m_loop4
+/* copy byte per byte */
+m_loop1:
+	cmp	r2,#0
+	beq	m_end
+	ldrb	r3, [r1], #1
+	strb	r3, [r0], #1
+	subs	r2, r2, #1
+	bne	m_loop1
+m_end:
+	bx	lr
+endfunc memcpy4
+
 /* ---------------------------------------------------------------------------
  * Disable the MMU in Secure State
  * ---------------------------------------------------------------------------