ART: Fix stub_test inline assembly for ARM64
The rebased Clang resolves register constraints differently from
before. Change the inline assembly.
Bug: 15393246
Change-Id: Iea2c2693f0a035db541ff91f796ee5f997a2ced7
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 44edd4b..0b7f268c 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -125,27 +125,24 @@
: ); // clobber.
#elif defined(__aarch64__)
__asm__ __volatile__(
- // Spill space for d8 - d15
+ // Spill x0-x7 which we say we don't clobber. May contain args.
"sub sp, sp, #64\n\t"
".cfi_adjust_cfa_offset 64\n\t"
- "stp d8, d9, [sp]\n\t"
- "stp d10, d11, [sp, #16]\n\t"
- "stp d12, d13, [sp, #32]\n\t"
- "stp d14, d15, [sp, #48]\n\t"
+ "stp x0, x1, [sp]\n\t"
+ "stp x2, x3, [sp, #16]\n\t"
+ "stp x4, x5, [sp, #32]\n\t"
+ "stp x6, x7, [sp, #48]\n\t"
- "sub sp, sp, #48\n\t" // Reserve stack space, 16B aligned
- ".cfi_adjust_cfa_offset 48\n\t"
- "stp %[referrer], x1, [sp]\n\t"// referrer, x1
- "stp x2, x3, [sp, #16]\n\t" // Save x2, x3
- "stp x18, x30, [sp, #32]\n\t" // Save x18(xSELF), xLR
+ "sub sp, sp, #16\n\t" // Reserve stack space, 16B aligned
+ ".cfi_adjust_cfa_offset 16\n\t"
+ "str %[referrer], [sp]\n\t" // referrer
// Push everything on the stack, so we don't rely on the order. What a mess. :-(
"sub sp, sp, #48\n\t"
".cfi_adjust_cfa_offset 48\n\t"
- "str %[arg0], [sp]\n\t"
- "str %[arg1], [sp, #8]\n\t"
- "str %[arg2], [sp, #16]\n\t"
- "str %[code], [sp, #24]\n\t"
+ // All things are "r" constraints, so direct str/stp should work.
+ "stp %[arg0], %[arg1], [sp]\n\t"
+ "stp %[arg2], %[code], [sp, #16]\n\t"
"str %[self], [sp, #32]\n\t"
// Now we definitely have x0-x3 free, use it to garble d8 - d15
@@ -169,17 +166,18 @@
"add x0, x0, 1\n\t"
"fmov d15, x0\n\t"
- // Load call params
- "ldr x0, [sp]\n\t"
- "ldr x1, [sp, #8]\n\t"
- "ldr x2, [sp, #16]\n\t"
- "ldr x3, [sp, #24]\n\t"
+ // Load call params into the right registers.
+ "ldp x0, x1, [sp]\n\t"
+ "ldp x2, x3, [sp, #16]\n\t"
"ldr x18, [sp, #32]\n\t"
"add sp, sp, #48\n\t"
".cfi_adjust_cfa_offset -48\n\t"
"blr x3\n\t" // Call the stub
+ "mov x8, x0\n\t" // Store result
+ "add sp, sp, #16\n\t" // Drop the quick "frame"
+ ".cfi_adjust_cfa_offset -16\n\t"
// Test d8 - d15. We can use x1 and x2.
"movk x1, #0xfad0\n\t"
@@ -225,31 +223,25 @@
"cmp x1, x2\n\t"
"b.ne 1f\n\t"
- "mov x2, #0\n\t"
- "str x2, %[fpr_result]\n\t"
+ "mov x9, #0\n\t" // Use x9 as flag, in clobber list
// Finish up.
"2:\n\t"
- "ldp x1, x2, [sp, #8]\n\t" // Restore x1, x2
- "ldp x3, x18, [sp, #24]\n\t" // Restore x3, xSELF
- "ldr x30, [sp, #40]\n\t" // Restore xLR
- "add sp, sp, #48\n\t" // Free stack space
- ".cfi_adjust_cfa_offset -48\n\t"
- "mov %[result], x0\n\t" // Save the result
-
- "ldp d8, d9, [sp]\n\t" // Restore d8 - d15
- "ldp d10, d11, [sp, #16]\n\t"
- "ldp d12, d13, [sp, #32]\n\t"
- "ldp d14, d15, [sp, #48]\n\t"
- "add sp, sp, #64\n\t"
+ "ldp x0, x1, [sp]\n\t" // Restore stuff not named clobbered, may contain fpr_result
+ "ldp x2, x3, [sp, #16]\n\t"
+ "ldp x4, x5, [sp, #32]\n\t"
+ "ldp x6, x7, [sp, #48]\n\t"
+ "add sp, sp, #64\n\t" // Free stack space, now sp as on entry
".cfi_adjust_cfa_offset -64\n\t"
+ "str x9, %[fpr_result]\n\t" // Store the FPR comparison result
+ "mov %[result], x8\n\t" // Store the call result
+
"b 3f\n\t" // Goto end
// Failed fpr verification.
"1:\n\t"
- "mov x2, #1\n\t"
- "str x2, %[fpr_result]\n\t"
+ "mov x9, #1\n\t"
"b 2b\n\t" // Goto finish-up
// End
@@ -258,7 +250,12 @@
// Use the result from r0
: [arg0] "0"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
[referrer] "r"(referrer), [fpr_result] "m" (fpr_result)
- : "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17"); // clobber.
+ : "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19", "x20",
+ "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x30",
+ "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
+ "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
+ "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
+ "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31"); // clobber.
#elif defined(__x86_64__)
// Note: Uses the native convention
// TODO: Set the thread?
@@ -351,29 +348,25 @@
: ); // clobber.
#elif defined(__aarch64__)
__asm__ __volatile__(
- // Spill space for d8 - d15
+ // Spill x0-x7 which we say we don't clobber. May contain args.
"sub sp, sp, #64\n\t"
".cfi_adjust_cfa_offset 64\n\t"
- "stp d8, d9, [sp]\n\t"
- "stp d10, d11, [sp, #16]\n\t"
- "stp d12, d13, [sp, #32]\n\t"
- "stp d14, d15, [sp, #48]\n\t"
+ "stp x0, x1, [sp]\n\t"
+ "stp x2, x3, [sp, #16]\n\t"
+ "stp x4, x5, [sp, #32]\n\t"
+ "stp x6, x7, [sp, #48]\n\t"
- "sub sp, sp, #48\n\t" // Reserve stack space, 16B aligned
- ".cfi_adjust_cfa_offset 48\n\t"
- "stp %[referrer], x1, [sp]\n\t"// referrer, x1
- "stp x2, x3, [sp, #16]\n\t" // Save x2, x3
- "stp x18, x30, [sp, #32]\n\t" // Save x18(xSELF), xLR
+ "sub sp, sp, #16\n\t" // Reserve stack space, 16B aligned
+ ".cfi_adjust_cfa_offset 16\n\t"
+ "str %[referrer], [sp]\n\t" // referrer
// Push everything on the stack, so we don't rely on the order. What a mess. :-(
"sub sp, sp, #48\n\t"
".cfi_adjust_cfa_offset 48\n\t"
- "str %[arg0], [sp]\n\t"
- "str %[arg1], [sp, #8]\n\t"
- "str %[arg2], [sp, #16]\n\t"
- "str %[code], [sp, #24]\n\t"
- "str %[self], [sp, #32]\n\t"
- "str %[hidden], [sp, #40]\n\t"
+ // All things are "r" constraints, so direct str/stp should work.
+ "stp %[arg0], %[arg1], [sp]\n\t"
+ "stp %[arg2], %[code], [sp, #16]\n\t"
+ "stp %[self], %[hidden], [sp, #32]\n\t"
// Now we definitely have x0-x3 free, use it to garble d8 - d15
"movk x0, #0xfad0\n\t"
@@ -396,18 +389,17 @@
"add x0, x0, 1\n\t"
"fmov d15, x0\n\t"
- // Load call params
- "ldr x0, [sp]\n\t"
- "ldr x1, [sp, #8]\n\t"
- "ldr x2, [sp, #16]\n\t"
- "ldr x3, [sp, #24]\n\t"
- "ldr x18, [sp, #32]\n\t"
- "ldr x12, [sp, #40]\n\t"
+ // Load call params into the right registers.
+ "ldp x0, x1, [sp]\n\t"
+ "ldp x2, x3, [sp, #16]\n\t"
+ "ldp x18, x12, [sp, #32]\n\t"
"add sp, sp, #48\n\t"
".cfi_adjust_cfa_offset -48\n\t"
-
"blr x3\n\t" // Call the stub
+ "mov x8, x0\n\t" // Store result
+ "add sp, sp, #16\n\t" // Drop the quick "frame"
+ ".cfi_adjust_cfa_offset -16\n\t"
// Test d8 - d15. We can use x1 and x2.
"movk x1, #0xfad0\n\t"
@@ -453,38 +445,39 @@
"cmp x1, x2\n\t"
"b.ne 1f\n\t"
- "mov %[fpr_result], #0\n\t"
+ "mov x9, #0\n\t" // Use x9 as flag, in clobber list
// Finish up.
"2:\n\t"
- "ldp x1, x2, [sp, #8]\n\t" // Restore x1, x2
- "ldp x3, x18, [sp, #24]\n\t" // Restore x3, xSELF
- "ldr x30, [sp, #40]\n\t" // Restore xLR
- "add sp, sp, #48\n\t" // Free stack space
- ".cfi_adjust_cfa_offset -48\n\t"
- "mov %[result], x0\n\t" // Save the result
-
- "ldp d8, d9, [sp]\n\t" // Restore d8 - d15
- "ldp d10, d11, [sp, #16]\n\t"
- "ldp d12, d13, [sp, #32]\n\t"
- "ldp d14, d15, [sp, #48]\n\t"
- "add sp, sp, #64\n\t"
+ "ldp x0, x1, [sp]\n\t" // Restore stuff not named clobbered, may contain fpr_result
+ "ldp x2, x3, [sp, #16]\n\t"
+ "ldp x4, x5, [sp, #32]\n\t"
+ "ldp x6, x7, [sp, #48]\n\t"
+ "add sp, sp, #64\n\t" // Free stack space, now sp as on entry
".cfi_adjust_cfa_offset -64\n\t"
+ "str x9, %[fpr_result]\n\t" // Store the FPR comparison result
+ "mov %[result], x8\n\t" // Store the call result
+
"b 3f\n\t" // Goto end
// Failed fpr verification.
"1:\n\t"
- "mov %[fpr_result], #1\n\t"
+ "mov x9, #1\n\t"
"b 2b\n\t" // Goto finish-up
// End
"3:\n\t"
- : [result] "=r" (result), [fpr_result] "=r" (fpr_result)
- // Use the result from r0
+ : [result] "=r" (result)
+ // Use the result from r0
: [arg0] "0"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
- [referrer] "r"(referrer), [hidden] "r"(hidden)
- : "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17"); // clobber.
+ [referrer] "r"(referrer), [hidden] "r"(hidden), [fpr_result] "m" (fpr_result)
+ : "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19", "x20",
+ "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x30",
+ "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
+ "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
+ "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
+ "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31"); // clobber.
#elif defined(__x86_64__)
// Note: Uses the native convention
// TODO: Set the thread?