Revert^2 "x86_64: Implement VarHandle.get{,Acquire,Opaque,Volatile} for byte array views."
This reverts commit 6620caa89a4691bc5cbdbc5af7599b17fd8896f5.
Reason for revert: Relanding original change after fixing linker error:
ld.lld: error: undefined symbol: art::ObjPtr<art::mirror::Class>
art::GetClassRoot<(art::ReadBarrierOption)0>(art::ClassRoot)
The error was caused by a missing header which contained an `inline`
definition of the above function. The error may or may not happen
depending on the compiler: it may choose to not inline, and then there
is no linker error.
Bug: 71781600
Test: Manually marked the function as always inline and ensured that the
error can be reproduced and is fixed by including the header.
Change-Id: Ibcea2c3fc81ea75b8e6e6517d9ce872e79eda0d6
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 0584dc1..dae2ae2 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -18,6 +18,7 @@
#include "arch/x86_64/jni_frame_x86_64.h"
#include "art_method-inl.h"
+#include "class_root-inl.h"
#include "class_table.h"
#include "code_generator_utils.h"
#include "compiled_method.h"
@@ -1286,6 +1287,18 @@
}
}
+void CodeGeneratorX86_64::LoadClassRootForIntrinsic(CpuRegister reg, ClassRoot class_root) {
+ if (GetCompilerOptions().IsBootImage()) {
+ ScopedObjectAccess soa(Thread::Current());
+ ObjPtr<mirror::Class> klass = GetClassRoot(class_root);
+ boot_image_type_patches_.emplace_back(&klass->GetDexFile(), klass->GetDexTypeIndex().index_);
+ __ Bind(&boot_image_type_patches_.back().label);
+ } else {
+ uint32_t boot_image_offset = GetBootImageOffset(class_root);
+ LoadBootImageAddress(reg, boot_image_offset);
+ }
+}
+
// The label points to the end of the "movl" or another instruction but the literal offset
// for method patch needs to point to the embedded constant which occupies the last 4 bytes.
constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index db0b9d7..3e601bb 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -488,6 +488,7 @@
void LoadBootImageAddress(CpuRegister reg, uint32_t boot_image_reference);
void LoadIntrinsicDeclaringClass(CpuRegister reg, HInvoke* invoke);
+ void LoadClassRootForIntrinsic(CpuRegister reg, ClassRoot class_root);
void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override;
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index dcdab10..a7c5639 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -4457,7 +4457,7 @@
codegen->GetCompilerOptions().IsBootImage() ||
!Runtime::Current()->GetHeap()->GetBootImageSpaces().empty();
DCHECK(boot_image_available || codegen->GetCompilerOptions().IsJitCompiler());
- size_t can_be_view = (DataType::Size(value_type) != 1u) && boot_image_available;
+ bool can_be_view = (DataType::Size(value_type) != 1u) && boot_image_available;
vixl::aarch64::Label* slow_path_label =
can_be_view ? slow_path->GetByteArrayViewCheckLabel() : slow_path->GetEntryLabel();
__ Cmp(temp2, static_cast<uint16_t>(primitive_type));
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 53e3886..a3ad409 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -4217,7 +4217,7 @@
codegen->GetCompilerOptions().IsBootImage() ||
!Runtime::Current()->GetHeap()->GetBootImageSpaces().empty();
DCHECK(boot_image_available || codegen->GetCompilerOptions().IsJitCompiler());
- size_t can_be_view =
+ bool can_be_view =
((value_type != DataType::Type::kReference) && (DataType::Size(value_type) != 1u)) &&
boot_image_available;
vixl32::Label* slow_path_label =
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index ca319ef..0abcbc7 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -184,25 +184,43 @@
locations->SetOut(Location::SameAsFirstInput());
}
-static void GenReverseBytes(LocationSummary* locations,
- DataType::Type size,
- X86_64Assembler* assembler) {
- CpuRegister out = locations->Out().AsRegister<CpuRegister>();
-
- switch (size) {
+static void GenReverseBytes(Location out,
+ DataType::Type type,
+ X86_64Assembler* assembler,
+ CpuRegister temp = CpuRegister(kNoRegister)) {
+ switch (type) {
case DataType::Type::kInt16:
// TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
- __ bswapl(out);
- __ sarl(out, Immediate(16));
+ __ bswapl(out.AsRegister<CpuRegister>());
+ __ sarl(out.AsRegister<CpuRegister>(), Immediate(16));
+ break;
+ case DataType::Type::kUint16:
+ // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
+ __ bswapl(out.AsRegister<CpuRegister>());
+ __ shrl(out.AsRegister<CpuRegister>(), Immediate(16));
break;
case DataType::Type::kInt32:
- __ bswapl(out);
+ case DataType::Type::kUint32:
+ __ bswapl(out.AsRegister<CpuRegister>());
break;
case DataType::Type::kInt64:
- __ bswapq(out);
+ case DataType::Type::kUint64:
+ __ bswapq(out.AsRegister<CpuRegister>());
+ break;
+ case DataType::Type::kFloat32:
+ DCHECK_NE(temp.AsRegister(), kNoRegister);
+ __ movd(temp, out.AsFpuRegister<XmmRegister>(), /*is64bit=*/ false);
+ __ bswapl(temp);
+ __ movd(out.AsFpuRegister<XmmRegister>(), temp, /*is64bit=*/ false);
+ break;
+ case DataType::Type::kFloat64:
+ DCHECK_NE(temp.AsRegister(), kNoRegister);
+ __ movd(temp, out.AsFpuRegister<XmmRegister>(), /*is64bit=*/ true);
+ __ bswapq(temp);
+ __ movd(out.AsFpuRegister<XmmRegister>(), temp, /*is64bit=*/ true);
break;
default:
- LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
+ LOG(FATAL) << "Unexpected type for reverse-bytes: " << type;
UNREACHABLE();
}
}
@@ -212,7 +230,7 @@
}
void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
- GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
+ GenReverseBytes(invoke->GetLocations()->Out(), DataType::Type::kInt32, GetAssembler());
}
void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
@@ -220,7 +238,7 @@
}
void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
- GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
+ GenReverseBytes(invoke->GetLocations()->Out(), DataType::Type::kInt64, GetAssembler());
}
void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
@@ -228,7 +246,7 @@
}
void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
- GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
+ GenReverseBytes(invoke->GetLocations()->Out(), DataType::Type::kInt16, GetAssembler());
}
static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
@@ -3202,9 +3220,45 @@
__ imulq(y);
}
+class VarHandleSlowPathX86_64 : public IntrinsicSlowPathX86_64 {
+ public:
+ explicit VarHandleSlowPathX86_64(HInvoke* invoke)
+ : IntrinsicSlowPathX86_64(invoke) {
+ }
+
+ Label* GetByteArrayViewCheckLabel() {
+ return &byte_array_view_check_label_;
+ }
+
+ Label* GetNativeByteOrderLabel() {
+ return &native_byte_order_label_;
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) override {
+ if (GetByteArrayViewCheckLabel()->IsLinked()) {
+ EmitByteArrayViewCode(down_cast<CodeGeneratorX86_64*>(codegen));
+ }
+ IntrinsicSlowPathX86_64::EmitNativeCode(codegen);
+ }
+
+ private:
+ HInvoke* GetInvoke() const {
+ return GetInstruction()->AsInvoke();
+ }
+
+ mirror::VarHandle::AccessModeTemplate GetAccessModeTemplate() const {
+ return mirror::VarHandle::GetAccessModeTemplateByIntrinsic(GetInvoke()->GetIntrinsic());
+ }
+
+ void EmitByteArrayViewCode(CodeGeneratorX86_64* codegen);
+
+ Label byte_array_view_check_label_;
+ Label native_byte_order_label_;
+};
+
// Generate subtype check without read barriers.
static void GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorX86_64* codegen,
- SlowPathCode* slow_path,
+ VarHandleSlowPathX86_64* slow_path,
CpuRegister object,
CpuRegister temp,
Address type_address,
@@ -3244,7 +3298,7 @@
// check without read barrier, so it can have false negatives which we handle in the slow path.
static void GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke* invoke,
CodeGeneratorX86_64* codegen,
- SlowPathCode* slow_path,
+ VarHandleSlowPathX86_64* slow_path,
DataType::Type type) {
X86_64Assembler* assembler = codegen->GetAssembler();
@@ -3296,7 +3350,7 @@
static void GenerateVarHandleStaticFieldCheck(HInvoke* invoke,
CodeGeneratorX86_64* codegen,
- SlowPathCode* slow_path) {
+ VarHandleSlowPathX86_64* slow_path) {
X86_64Assembler* assembler = codegen->GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -3312,7 +3366,7 @@
static void GenerateVarHandleInstanceFieldChecks(HInvoke* invoke,
CodeGeneratorX86_64* codegen,
- SlowPathCode* slow_path) {
+ VarHandleSlowPathX86_64* slow_path) {
VarHandleOptimizations optimizations(invoke);
X86_64Assembler* assembler = codegen->GetAssembler();
@@ -3348,7 +3402,7 @@
static void GenerateVarHandleArrayChecks(HInvoke* invoke,
CodeGeneratorX86_64* codegen,
- SlowPathCode* slow_path) {
+ VarHandleSlowPathX86_64* slow_path) {
VarHandleOptimizations optimizations(invoke);
X86_64Assembler* assembler = codegen->GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -3405,10 +3459,25 @@
__ testl(temp, temp);
__ j(kZero, slow_path->GetEntryLabel());
- // TODO: handle byte array views. Currently the check below always fails for them, so they fall
- // back to slow path.
+ // Check that the array component type matches the primitive type.
+ Label* slow_path_label;
+ if (primitive_type == Primitive::kPrimNot) {
+ slow_path_label = slow_path->GetEntryLabel();
+ } else {
+ // With the exception of `kPrimNot` (handled above), `kPrimByte` and `kPrimBoolean`,
+ // we shall check for a byte array view in the slow path.
+ // The check requires the ByteArrayViewVarHandle.class to be in the boot image,
+ // so we cannot emit that if we're JITting without boot image.
+ bool boot_image_available =
+ codegen->GetCompilerOptions().IsBootImage() ||
+ !Runtime::Current()->GetHeap()->GetBootImageSpaces().empty();
+ DCHECK(boot_image_available || codegen->GetCompilerOptions().IsJitCompiler());
+ bool can_be_view = (DataType::Size(value_type) != 1u) && boot_image_available;
+ slow_path_label =
+ can_be_view ? slow_path->GetByteArrayViewCheckLabel() : slow_path->GetEntryLabel();
+ }
__ cmpw(Address(temp, primitive_type_offset), Immediate(static_cast<uint16_t>(primitive_type)));
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, slow_path_label);
// Check for array index out of bounds.
__ cmpl(index, Address(object, array_length_offset.Int32Value()));
@@ -3417,7 +3486,7 @@
static void GenerateVarHandleCoordinateChecks(HInvoke* invoke,
CodeGeneratorX86_64* codegen,
- SlowPathCode* slow_path) {
+ VarHandleSlowPathX86_64* slow_path) {
size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
if (expected_coordinates_count == 0u) {
GenerateVarHandleStaticFieldCheck(invoke, codegen, slow_path);
@@ -3429,11 +3498,11 @@
}
}
-static SlowPathCode* GenerateVarHandleChecks(HInvoke* invoke,
- CodeGeneratorX86_64* codegen,
- DataType::Type type) {
- SlowPathCode* slow_path =
- new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
+static VarHandleSlowPathX86_64* GenerateVarHandleChecks(HInvoke* invoke,
+ CodeGeneratorX86_64* codegen,
+ DataType::Type type) {
+ VarHandleSlowPathX86_64* slow_path =
+ new (codegen->GetScopedAllocator()) VarHandleSlowPathX86_64(invoke);
codegen->AddSlowPath(slow_path);
GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, type);
@@ -3579,16 +3648,22 @@
}
}
-static void GenerateVarHandleGet(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
+static void GenerateVarHandleGet(HInvoke* invoke,
+ CodeGeneratorX86_64* codegen,
+ bool byte_swap = false) {
DataType::Type type = invoke->GetType();
DCHECK_NE(type, DataType::Type::kVoid);
LocationSummary* locations = invoke->GetLocations();
X86_64Assembler* assembler = codegen->GetAssembler();
- SlowPathCode* slow_path = GenerateVarHandleChecks(invoke, codegen, type);
VarHandleTarget target = GetVarHandleTarget(invoke);
- GenerateVarHandleTarget(invoke, target, codegen);
+ VarHandleSlowPathX86_64* slow_path = nullptr;
+ if (!byte_swap) {
+ slow_path = GenerateVarHandleChecks(invoke, codegen, type);
+ GenerateVarHandleTarget(invoke, target, codegen);
+ __ Bind(slow_path->GetNativeByteOrderLabel());
+ }
// Load the value from the field
Address src(CpuRegister(target.object), CpuRegister(target.offset), TIMES_1, 0);
@@ -3603,11 +3678,18 @@
__ movl(out.AsRegister<CpuRegister>(), src);
__ MaybeUnpoisonHeapReference(out.AsRegister<CpuRegister>());
}
+ DCHECK(!byte_swap);
} else {
codegen->LoadFromMemoryNoReference(type, out, src);
+ if (byte_swap) {
+ CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
+ GenReverseBytes(out, type, assembler, temp);
+ }
}
- __ Bind(slow_path->GetExitLabel());
+ if (!byte_swap) {
+ __ Bind(slow_path->GetExitLabel());
+ }
}
void IntrinsicLocationsBuilderX86_64::VisitVarHandleGet(HInvoke* invoke) {
@@ -3665,7 +3747,7 @@
uint32_t value_index = invoke->GetNumberOfArguments() - 1;
DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
- SlowPathCode* slow_path = GenerateVarHandleChecks(invoke, codegen, value_type);
+ VarHandleSlowPathX86_64* slow_path = GenerateVarHandleChecks(invoke, codegen, value_type);
VarHandleTarget target = GetVarHandleTarget(invoke);
GenerateVarHandleTarget(invoke, target, codegen);
@@ -3792,7 +3874,7 @@
uint32_t new_value_index = number_of_arguments - 1;
DataType::Type type = GetDataTypeFromShorty(invoke, expected_value_index);
- SlowPathCode* slow_path = GenerateVarHandleChecks(invoke, codegen, type);
+ VarHandleSlowPathX86_64* slow_path = GenerateVarHandleChecks(invoke, codegen, type);
VarHandleTarget target = GetVarHandleTarget(invoke);
GenerateVarHandleTarget(invoke, target, codegen);
@@ -3927,7 +4009,7 @@
uint32_t value_index = number_of_arguments - 1;
DataType::Type type = invoke->GetType();
- SlowPathCode* slow_path = GenerateVarHandleChecks(invoke, codegen, type);
+ VarHandleSlowPathX86_64* slow_path = GenerateVarHandleChecks(invoke, codegen, type);
VarHandleTarget target = GetVarHandleTarget(invoke);
GenerateVarHandleTarget(invoke, target, codegen);
@@ -4103,7 +4185,7 @@
uint32_t value_index = number_of_arguments - 1;
DataType::Type type = invoke->GetType();
- SlowPathCode* slow_path = GenerateVarHandleChecks(invoke, codegen, type);
+ VarHandleSlowPathX86_64* slow_path = GenerateVarHandleChecks(invoke, codegen, type);
VarHandleTarget target = GetVarHandleTarget(invoke);
GenerateVarHandleTarget(invoke, target, codegen);
@@ -4260,7 +4342,7 @@
uint32_t value_index = number_of_arguments - 1;
DataType::Type type = invoke->GetType();
- SlowPathCode* slow_path = GenerateVarHandleChecks(invoke, codegen, type);
+ VarHandleSlowPathX86_64* slow_path = GenerateVarHandleChecks(invoke, codegen, type);
VarHandleTarget target = GetVarHandleTarget(invoke);
GenerateVarHandleTarget(invoke, target, codegen);
@@ -4481,6 +4563,68 @@
/*need_any_any_barrier=*/ false);
}
+void VarHandleSlowPathX86_64::EmitByteArrayViewCode(CodeGeneratorX86_64* codegen) {
+ DCHECK(GetByteArrayViewCheckLabel()->IsLinked());
+ X86_64Assembler* assembler = codegen->GetAssembler();
+
+ HInvoke* invoke = GetInvoke();
+ LocationSummary* locations = invoke->GetLocations();
+ mirror::VarHandle::AccessModeTemplate access_mode_template = GetAccessModeTemplate();
+ DataType::Type value_type =
+ GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
+ DCHECK_NE(value_type, DataType::Type::kReference);
+ size_t size = DataType::Size(value_type);
+ DCHECK_GT(size, 1u);
+
+ CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
+ CpuRegister object = locations->InAt(1).AsRegister<CpuRegister>();
+ CpuRegister index = locations->InAt(2).AsRegister<CpuRegister>();
+ CpuRegister temp = locations->GetTemp(locations->GetTempCount() - 1).AsRegister<CpuRegister>();
+
+ MemberOffset class_offset = mirror::Object::ClassOffset();
+ MemberOffset array_length_offset = mirror::Array::LengthOffset();
+ MemberOffset data_offset = mirror::Array::DataOffset(Primitive::kPrimByte);
+ MemberOffset native_byte_order_offset = mirror::ByteArrayViewVarHandle::NativeByteOrderOffset();
+
+ VarHandleTarget target = GetVarHandleTarget(invoke);
+
+ __ Bind(GetByteArrayViewCheckLabel());
+
+ // The main path checked that the coordinateType0 is an array class that matches
+ // the class of the actual coordinate argument but it does not match the value type.
+ // Check if the `varhandle` references a ByteArrayViewVarHandle instance.
+ codegen->LoadClassRootForIntrinsic(temp, ClassRoot::kJavaLangInvokeByteArrayViewVarHandle);
+ assembler->MaybePoisonHeapReference(temp);
+ __ cmpl(temp, Address(varhandle, class_offset.Int32Value()));
+ __ j(kNotEqual, GetEntryLabel());
+
+ // Check for array index out of bounds.
+ __ movl(temp, Address(object, array_length_offset.Int32Value()));
+ // SUB sets flags in the same way as CMP.
+ __ subl(temp, index);
+ __ j(kBelowEqual, GetEntryLabel());
+ // The difference between index and array length must be enough for the `value_type` size.
+ __ cmpl(temp, Immediate(size));
+ __ j(kBelow, GetEntryLabel());
+
+ // Construct the target.
+ __ leal(CpuRegister(target.offset), Address(index, TIMES_1, data_offset.Int32Value()));
+
+ // Alignment check. For unaligned access, go to the runtime.
+ DCHECK(IsPowerOfTwo(size));
+ __ testl(CpuRegister(target.offset), Immediate(size - 1u));
+ __ j(kNotZero, GetEntryLabel());
+
+ // Byte order check. For native byte order return to the main path.
+ __ cmpl(Address(varhandle, native_byte_order_offset.Int32Value()), Immediate(0));
+ __ j(kNotEqual, GetNativeByteOrderLabel());
+
+ DCHECK(access_mode_template == mirror::VarHandle::AccessModeTemplate::kGet);
+ GenerateVarHandleGet(invoke, codegen, /*byte_swap=*/ true);
+
+ __ jmp(GetExitLabel());
+}
+
UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite)
UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite)
UNIMPLEMENTED_INTRINSIC(X86_64, CRC32Update)