bionic: arm64: generic: strcmp: align to 64B cache line

Align strcmp to 64B. This will ensure the preformance critical
loop is within one 64B cache line.

Change-Id: I88eef2f12b2a6442cacec9cdbdffbf17293e7d32
Signed-off-by: Yuanyuan Zhong <zyy@motorola.com>
Reviewed-on: https://gerrit.mot.com/902536
SME-Granted: SME Approvals Granted
SLTApproved: Slta Waiver <sltawvr@motorola.com>
Tested-by: Jira Key <jirakey@motorola.com>
Reviewed-by: Yi-Wei Zhao <gbjc64@motorola.com>
Reviewed-by: Igor Kovalenko <igork@motorola.com>
Submit-Approved: Jira Key <jirakey@motorola.com>
diff --git a/libc/arch-arm64/generic/bionic/strcmp.S b/libc/arch-arm64/generic/bionic/strcmp.S
index 3cce478..271452d 100644
--- a/libc/arch-arm64/generic/bionic/strcmp.S
+++ b/libc/arch-arm64/generic/bionic/strcmp.S
@@ -57,6 +57,7 @@
 
 	/* Start of performance-critical section  -- one 64B cache line.  */
 ENTRY(strcmp)
+.p2align  6
 	eor	tmp1, src1, src2
 	mov	zeroones, #REP8_01
 	tst	tmp1, #7