Optimizing String.Equals as an intrinsic (ARM64)
The fifth implementation of String.Equals. I added an intrinsic
in ARM64 which is similar to the original java implementation
of String.equals: an instanceof check, null check, length check, and
reference equality check followed by a loop comparing strings four
characters at a time starting at the beginning of the string.
Interesting Benchmarking Values:
64 Bit Nexus 9:
Intrinsic Short (1-5 Character) Strings: 40 ns
Original Short (1-5 Character) Strings: 80 ns
Intrinsic Very Long (1000+ Character) Strings: 1556 ns
Original Very Long (1000+ Character) Strings: 4554 ns
Intrinsic Non-String Argument: 15 ns
Original Non-String Argument: 62 ns
Bug: 21481923
Change-Id: If37b399614c2250f52ac709a3b50c356419ca88a
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 5631373..a5332ea 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -1055,6 +1055,102 @@
__ Bind(slow_path->GetExitLabel());
}
+void IntrinsicLocationsBuilderARM64::VisitStringEquals(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ // Temporary registers to store lengths of strings and for calculations.
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) {
+ vixl::MacroAssembler* masm = GetVIXLAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+
+ Register str = WRegisterFrom(locations->InAt(0));
+ Register arg = WRegisterFrom(locations->InAt(1));
+ Register out = XRegisterFrom(locations->Out());
+
+ UseScratchRegisterScope scratch_scope(masm);
+ Register temp = scratch_scope.AcquireW();
+ Register temp1 = WRegisterFrom(locations->GetTemp(0));
+ Register temp2 = WRegisterFrom(locations->GetTemp(1));
+
+ vixl::Label loop;
+ vixl::Label end;
+ vixl::Label return_true;
+ vixl::Label return_false;
+
+ // Get offsets of count, value, and class fields within a string object.
+ const int32_t count_offset = mirror::String::CountOffset().Int32Value();
+ const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
+ const int32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+
+ // Note that the null check must have been done earlier.
+ DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+ // Check if input is null, return false if it is.
+ __ Cbz(arg, &return_false);
+
+ // Reference equality check, return true if same reference.
+ __ Cmp(str, arg);
+ __ B(&return_true, eq);
+
+ // Instanceof check for the argument by comparing class fields.
+ // All string objects must have the same type since String cannot be subclassed.
+ // Receiver must be a string object, so its class field is equal to all strings' class fields.
+ // If the argument is a string object, its class field must be equal to receiver's class field.
+ __ Ldr(temp, MemOperand(str.X(), class_offset));
+ __ Ldr(temp1, MemOperand(arg.X(), class_offset));
+ __ Cmp(temp, temp1);
+ __ B(&return_false, ne);
+
+ // Load lengths of this and argument strings.
+ __ Ldr(temp, MemOperand(str.X(), count_offset));
+ __ Ldr(temp1, MemOperand(arg.X(), count_offset));
+ // Check if lengths are equal, return false if they're not.
+ __ Cmp(temp, temp1);
+ __ B(&return_false, ne);
+ // Store offset of string value in preparation for comparison loop
+ __ Mov(temp1, value_offset);
+ // Return true if both strings are empty.
+ __ Cbz(temp, &return_true);
+
+ // Assertions that must hold in order to compare strings 4 characters at a time.
+ DCHECK_ALIGNED(value_offset, 8);
+ static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
+
+ temp1 = temp1.X();
+ temp2 = temp2.X();
+
+ // Loop to compare strings 4 characters at a time starting at the beginning of the string.
+ // Ok to do this because strings are zero-padded to be 8-byte aligned.
+ __ Bind(&loop);
+ __ Ldr(out, MemOperand(str.X(), temp1));
+ __ Ldr(temp2, MemOperand(arg.X(), temp1));
+ __ Add(temp1, temp1, Operand(sizeof(uint64_t)));
+ __ Cmp(out, temp2);
+ __ B(&return_false, ne);
+ __ Sub(temp, temp, Operand(4), SetFlags);
+ __ B(&loop, gt);
+
+ // Return true and exit the function.
+ // If loop does not result in returning false, we return true.
+ __ Bind(&return_true);
+ __ Mov(out, 1);
+ __ B(&end);
+
+ // Return false and exit the function.
+ __ Bind(&return_false);
+ __ Mov(out, 0);
+ __ Bind(&end);
+}
+
static void GenerateVisitStringIndexOf(HInvoke* invoke,
vixl::MacroAssembler* masm,
CodeGeneratorARM64* codegen,
@@ -1229,7 +1325,6 @@
UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
-UNIMPLEMENTED_INTRINSIC(StringEquals)
#undef UNIMPLEMENTED_INTRINSIC