Revert^2 "x86_64: Implement VarHandle.get{,Acquire,Opaque,Volatile} for byte array views."

This reverts commit 6620caa89a4691bc5cbdbc5af7599b17fd8896f5.

Reason for revert: Relanding original change after fixing linker error:
  ld.lld: error: undefined symbol: art::ObjPtr<art::mirror::Class>
       art::GetClassRoot<(art::ReadBarrierOption)0>(art::ClassRoot)

  The error was caused by a missing header which contained an `inline`
  definition of the above function. The error may or may not happen
  depending on the compiler: it may choose to not inline, and then there
  is no linker error.

Bug: 71781600
Test: Manually marked the function as always inline and ensured that the
  error can be reproduced and is fixed by including the header.
Change-Id: Ibcea2c3fc81ea75b8e6e6517d9ce872e79eda0d6
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 0584dc1..dae2ae2 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -18,6 +18,7 @@
 
 #include "arch/x86_64/jni_frame_x86_64.h"
 #include "art_method-inl.h"
+#include "class_root-inl.h"
 #include "class_table.h"
 #include "code_generator_utils.h"
 #include "compiled_method.h"
@@ -1286,6 +1287,18 @@
   }
 }
 
+void CodeGeneratorX86_64::LoadClassRootForIntrinsic(CpuRegister reg, ClassRoot class_root) {
+  if (GetCompilerOptions().IsBootImage()) {
+    ScopedObjectAccess soa(Thread::Current());
+    ObjPtr<mirror::Class> klass = GetClassRoot(class_root);
+    boot_image_type_patches_.emplace_back(&klass->GetDexFile(), klass->GetDexTypeIndex().index_);
+    __ Bind(&boot_image_type_patches_.back().label);
+  } else {
+    uint32_t boot_image_offset = GetBootImageOffset(class_root);
+    LoadBootImageAddress(reg, boot_image_offset);
+  }
+}
+
 // The label points to the end of the "movl" or another instruction but the literal offset
 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index db0b9d7..3e601bb 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -488,6 +488,7 @@
 
   void LoadBootImageAddress(CpuRegister reg, uint32_t boot_image_reference);
   void LoadIntrinsicDeclaringClass(CpuRegister reg, HInvoke* invoke);
+  void LoadClassRootForIntrinsic(CpuRegister reg, ClassRoot class_root);
 
   void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override;
 
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index dcdab10..a7c5639 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -4457,7 +4457,7 @@
         codegen->GetCompilerOptions().IsBootImage() ||
         !Runtime::Current()->GetHeap()->GetBootImageSpaces().empty();
     DCHECK(boot_image_available || codegen->GetCompilerOptions().IsJitCompiler());
-    size_t can_be_view = (DataType::Size(value_type) != 1u) && boot_image_available;
+    bool can_be_view = (DataType::Size(value_type) != 1u) && boot_image_available;
     vixl::aarch64::Label* slow_path_label =
         can_be_view ? slow_path->GetByteArrayViewCheckLabel() : slow_path->GetEntryLabel();
     __ Cmp(temp2, static_cast<uint16_t>(primitive_type));
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 53e3886..a3ad409 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -4217,7 +4217,7 @@
       codegen->GetCompilerOptions().IsBootImage() ||
       !Runtime::Current()->GetHeap()->GetBootImageSpaces().empty();
   DCHECK(boot_image_available || codegen->GetCompilerOptions().IsJitCompiler());
-  size_t can_be_view =
+  bool can_be_view =
       ((value_type != DataType::Type::kReference) && (DataType::Size(value_type) != 1u)) &&
       boot_image_available;
   vixl32::Label* slow_path_label =
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index ca319ef..0abcbc7 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -184,25 +184,43 @@
   locations->SetOut(Location::SameAsFirstInput());
 }
 
-static void GenReverseBytes(LocationSummary* locations,
-                            DataType::Type size,
-                            X86_64Assembler* assembler) {
-  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
-
-  switch (size) {
+static void GenReverseBytes(Location out,
+                            DataType::Type type,
+                            X86_64Assembler* assembler,
+                            CpuRegister temp = CpuRegister(kNoRegister)) {
+  switch (type) {
     case DataType::Type::kInt16:
       // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
-      __ bswapl(out);
-      __ sarl(out, Immediate(16));
+      __ bswapl(out.AsRegister<CpuRegister>());
+      __ sarl(out.AsRegister<CpuRegister>(), Immediate(16));
+      break;
+    case DataType::Type::kUint16:
+      // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
+      __ bswapl(out.AsRegister<CpuRegister>());
+      __ shrl(out.AsRegister<CpuRegister>(), Immediate(16));
       break;
     case DataType::Type::kInt32:
-      __ bswapl(out);
+    case DataType::Type::kUint32:
+      __ bswapl(out.AsRegister<CpuRegister>());
       break;
     case DataType::Type::kInt64:
-      __ bswapq(out);
+    case DataType::Type::kUint64:
+      __ bswapq(out.AsRegister<CpuRegister>());
+      break;
+    case DataType::Type::kFloat32:
+      DCHECK_NE(temp.AsRegister(), kNoRegister);
+      __ movd(temp, out.AsFpuRegister<XmmRegister>(), /*is64bit=*/ false);
+      __ bswapl(temp);
+      __ movd(out.AsFpuRegister<XmmRegister>(), temp, /*is64bit=*/ false);
+      break;
+    case DataType::Type::kFloat64:
+      DCHECK_NE(temp.AsRegister(), kNoRegister);
+      __ movd(temp, out.AsFpuRegister<XmmRegister>(), /*is64bit=*/ true);
+      __ bswapq(temp);
+      __ movd(out.AsFpuRegister<XmmRegister>(), temp, /*is64bit=*/ true);
       break;
     default:
-      LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
+      LOG(FATAL) << "Unexpected type for reverse-bytes: " << type;
       UNREACHABLE();
   }
 }
@@ -212,7 +230,7 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
-  GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
+  GenReverseBytes(invoke->GetLocations()->Out(), DataType::Type::kInt32, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
@@ -220,7 +238,7 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
-  GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
+  GenReverseBytes(invoke->GetLocations()->Out(), DataType::Type::kInt64, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
@@ -228,7 +246,7 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
-  GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
+  GenReverseBytes(invoke->GetLocations()->Out(), DataType::Type::kInt16, GetAssembler());
 }
 
 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
@@ -3202,9 +3220,45 @@
   __ imulq(y);
 }
 
+class VarHandleSlowPathX86_64 : public IntrinsicSlowPathX86_64 {
+ public:
+  explicit VarHandleSlowPathX86_64(HInvoke* invoke)
+      : IntrinsicSlowPathX86_64(invoke) {
+  }
+
+  Label* GetByteArrayViewCheckLabel() {
+    return &byte_array_view_check_label_;
+  }
+
+  Label* GetNativeByteOrderLabel() {
+    return &native_byte_order_label_;
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) override {
+    if (GetByteArrayViewCheckLabel()->IsLinked()) {
+      EmitByteArrayViewCode(down_cast<CodeGeneratorX86_64*>(codegen));
+    }
+    IntrinsicSlowPathX86_64::EmitNativeCode(codegen);
+  }
+
+ private:
+  HInvoke* GetInvoke() const {
+    return GetInstruction()->AsInvoke();
+  }
+
+  mirror::VarHandle::AccessModeTemplate GetAccessModeTemplate() const {
+    return mirror::VarHandle::GetAccessModeTemplateByIntrinsic(GetInvoke()->GetIntrinsic());
+  }
+
+  void EmitByteArrayViewCode(CodeGeneratorX86_64* codegen);
+
+  Label byte_array_view_check_label_;
+  Label native_byte_order_label_;
+};
+
 // Generate subtype check without read barriers.
 static void GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorX86_64* codegen,
-                                                    SlowPathCode* slow_path,
+                                                    VarHandleSlowPathX86_64* slow_path,
                                                     CpuRegister object,
                                                     CpuRegister temp,
                                                     Address type_address,
@@ -3244,7 +3298,7 @@
 // check without read barrier, so it can have false negatives which we handle in the slow path.
 static void GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke* invoke,
                                                         CodeGeneratorX86_64* codegen,
-                                                        SlowPathCode* slow_path,
+                                                        VarHandleSlowPathX86_64* slow_path,
                                                         DataType::Type type) {
   X86_64Assembler* assembler = codegen->GetAssembler();
 
@@ -3296,7 +3350,7 @@
 
 static void GenerateVarHandleStaticFieldCheck(HInvoke* invoke,
                                               CodeGeneratorX86_64* codegen,
-                                              SlowPathCode* slow_path) {
+                                              VarHandleSlowPathX86_64* slow_path) {
   X86_64Assembler* assembler = codegen->GetAssembler();
 
   LocationSummary* locations = invoke->GetLocations();
@@ -3312,7 +3366,7 @@
 
 static void GenerateVarHandleInstanceFieldChecks(HInvoke* invoke,
                                                  CodeGeneratorX86_64* codegen,
-                                                 SlowPathCode* slow_path) {
+                                                 VarHandleSlowPathX86_64* slow_path) {
   VarHandleOptimizations optimizations(invoke);
   X86_64Assembler* assembler = codegen->GetAssembler();
 
@@ -3348,7 +3402,7 @@
 
 static void GenerateVarHandleArrayChecks(HInvoke* invoke,
                                          CodeGeneratorX86_64* codegen,
-                                         SlowPathCode* slow_path) {
+                                         VarHandleSlowPathX86_64* slow_path) {
   VarHandleOptimizations optimizations(invoke);
   X86_64Assembler* assembler = codegen->GetAssembler();
   LocationSummary* locations = invoke->GetLocations();
@@ -3405,10 +3459,25 @@
   __ testl(temp, temp);
   __ j(kZero, slow_path->GetEntryLabel());
 
-  // TODO: handle byte array views. Currently the check below always fails for them, so they fall
-  // back to slow path.
+  // Check that the array component type matches the primitive type.
+  Label* slow_path_label;
+  if (primitive_type == Primitive::kPrimNot) {
+    slow_path_label = slow_path->GetEntryLabel();
+  } else {
+    // With the exception of `kPrimNot` (handled above), `kPrimByte` and `kPrimBoolean`,
+    // we shall check for a byte array view in the slow path.
+    // The check requires the ByteArrayViewVarHandle.class to be in the boot image,
+    // so we cannot emit that if we're JITting without boot image.
+    bool boot_image_available =
+        codegen->GetCompilerOptions().IsBootImage() ||
+        !Runtime::Current()->GetHeap()->GetBootImageSpaces().empty();
+    DCHECK(boot_image_available || codegen->GetCompilerOptions().IsJitCompiler());
+    bool can_be_view = (DataType::Size(value_type) != 1u) && boot_image_available;
+    slow_path_label =
+        can_be_view ? slow_path->GetByteArrayViewCheckLabel() : slow_path->GetEntryLabel();
+  }
   __ cmpw(Address(temp, primitive_type_offset), Immediate(static_cast<uint16_t>(primitive_type)));
-  __ j(kNotEqual, slow_path->GetEntryLabel());
+  __ j(kNotEqual, slow_path_label);
 
   // Check for array index out of bounds.
   __ cmpl(index, Address(object, array_length_offset.Int32Value()));
@@ -3417,7 +3486,7 @@
 
 static void GenerateVarHandleCoordinateChecks(HInvoke* invoke,
                                               CodeGeneratorX86_64* codegen,
-                                              SlowPathCode* slow_path) {
+                                              VarHandleSlowPathX86_64* slow_path) {
   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
   if (expected_coordinates_count == 0u) {
     GenerateVarHandleStaticFieldCheck(invoke, codegen, slow_path);
@@ -3429,11 +3498,11 @@
   }
 }
 
-static SlowPathCode* GenerateVarHandleChecks(HInvoke* invoke,
-                                             CodeGeneratorX86_64* codegen,
-                                             DataType::Type type) {
-  SlowPathCode* slow_path =
-      new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
+static VarHandleSlowPathX86_64* GenerateVarHandleChecks(HInvoke* invoke,
+                                                        CodeGeneratorX86_64* codegen,
+                                                        DataType::Type type) {
+  VarHandleSlowPathX86_64* slow_path =
+      new (codegen->GetScopedAllocator()) VarHandleSlowPathX86_64(invoke);
   codegen->AddSlowPath(slow_path);
 
   GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, type);
@@ -3579,16 +3648,22 @@
   }
 }
 
-static void GenerateVarHandleGet(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
+static void GenerateVarHandleGet(HInvoke* invoke,
+                                 CodeGeneratorX86_64* codegen,
+                                 bool byte_swap = false) {
   DataType::Type type = invoke->GetType();
   DCHECK_NE(type, DataType::Type::kVoid);
 
   LocationSummary* locations = invoke->GetLocations();
   X86_64Assembler* assembler = codegen->GetAssembler();
 
-  SlowPathCode* slow_path = GenerateVarHandleChecks(invoke, codegen, type);
   VarHandleTarget target = GetVarHandleTarget(invoke);
-  GenerateVarHandleTarget(invoke, target, codegen);
+  VarHandleSlowPathX86_64* slow_path = nullptr;
+  if (!byte_swap) {
+    slow_path = GenerateVarHandleChecks(invoke, codegen, type);
+    GenerateVarHandleTarget(invoke, target, codegen);
+    __ Bind(slow_path->GetNativeByteOrderLabel());
+  }
 
   // Load the value from the field
   Address src(CpuRegister(target.object), CpuRegister(target.offset), TIMES_1, 0);
@@ -3603,11 +3678,18 @@
       __ movl(out.AsRegister<CpuRegister>(), src);
       __ MaybeUnpoisonHeapReference(out.AsRegister<CpuRegister>());
     }
+    DCHECK(!byte_swap);
   } else {
     codegen->LoadFromMemoryNoReference(type, out, src);
+    if (byte_swap) {
+      CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
+      GenReverseBytes(out, type, assembler, temp);
+    }
   }
 
-  __ Bind(slow_path->GetExitLabel());
+  if (!byte_swap) {
+    __ Bind(slow_path->GetExitLabel());
+  }
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGet(HInvoke* invoke) {
@@ -3665,7 +3747,7 @@
   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
   DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
 
-  SlowPathCode* slow_path = GenerateVarHandleChecks(invoke, codegen, value_type);
+  VarHandleSlowPathX86_64* slow_path = GenerateVarHandleChecks(invoke, codegen, value_type);
   VarHandleTarget target = GetVarHandleTarget(invoke);
   GenerateVarHandleTarget(invoke, target, codegen);
 
@@ -3792,7 +3874,7 @@
   uint32_t new_value_index = number_of_arguments - 1;
   DataType::Type type = GetDataTypeFromShorty(invoke, expected_value_index);
 
-  SlowPathCode* slow_path = GenerateVarHandleChecks(invoke, codegen, type);
+  VarHandleSlowPathX86_64* slow_path = GenerateVarHandleChecks(invoke, codegen, type);
   VarHandleTarget target = GetVarHandleTarget(invoke);
   GenerateVarHandleTarget(invoke, target, codegen);
 
@@ -3927,7 +4009,7 @@
   uint32_t value_index = number_of_arguments - 1;
   DataType::Type type = invoke->GetType();
 
-  SlowPathCode* slow_path = GenerateVarHandleChecks(invoke, codegen, type);
+  VarHandleSlowPathX86_64* slow_path = GenerateVarHandleChecks(invoke, codegen, type);
   VarHandleTarget target = GetVarHandleTarget(invoke);
   GenerateVarHandleTarget(invoke, target, codegen);
 
@@ -4103,7 +4185,7 @@
   uint32_t value_index = number_of_arguments - 1;
   DataType::Type type = invoke->GetType();
 
-  SlowPathCode* slow_path = GenerateVarHandleChecks(invoke, codegen, type);
+  VarHandleSlowPathX86_64* slow_path = GenerateVarHandleChecks(invoke, codegen, type);
   VarHandleTarget target = GetVarHandleTarget(invoke);
   GenerateVarHandleTarget(invoke, target, codegen);
 
@@ -4260,7 +4342,7 @@
   uint32_t value_index = number_of_arguments - 1;
   DataType::Type type = invoke->GetType();
 
-  SlowPathCode* slow_path = GenerateVarHandleChecks(invoke, codegen, type);
+  VarHandleSlowPathX86_64* slow_path = GenerateVarHandleChecks(invoke, codegen, type);
   VarHandleTarget target = GetVarHandleTarget(invoke);
   GenerateVarHandleTarget(invoke, target, codegen);
 
@@ -4481,6 +4563,68 @@
                                    /*need_any_any_barrier=*/ false);
 }
 
+void VarHandleSlowPathX86_64::EmitByteArrayViewCode(CodeGeneratorX86_64* codegen) {
+  DCHECK(GetByteArrayViewCheckLabel()->IsLinked());
+  X86_64Assembler* assembler = codegen->GetAssembler();
+
+  HInvoke* invoke = GetInvoke();
+  LocationSummary* locations = invoke->GetLocations();
+  mirror::VarHandle::AccessModeTemplate access_mode_template = GetAccessModeTemplate();
+  DataType::Type value_type =
+      GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
+  DCHECK_NE(value_type, DataType::Type::kReference);
+  size_t size = DataType::Size(value_type);
+  DCHECK_GT(size, 1u);
+
+  CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
+  CpuRegister object = locations->InAt(1).AsRegister<CpuRegister>();
+  CpuRegister index = locations->InAt(2).AsRegister<CpuRegister>();
+  CpuRegister temp = locations->GetTemp(locations->GetTempCount() - 1).AsRegister<CpuRegister>();
+
+  MemberOffset class_offset = mirror::Object::ClassOffset();
+  MemberOffset array_length_offset = mirror::Array::LengthOffset();
+  MemberOffset data_offset = mirror::Array::DataOffset(Primitive::kPrimByte);
+  MemberOffset native_byte_order_offset = mirror::ByteArrayViewVarHandle::NativeByteOrderOffset();
+
+  VarHandleTarget target = GetVarHandleTarget(invoke);
+
+  __ Bind(GetByteArrayViewCheckLabel());
+
+  // The main path checked that the coordinateType0 is an array class that matches
+  // the class of the actual coordinate argument but it does not match the value type.
+  // Check if the `varhandle` references a ByteArrayViewVarHandle instance.
+  codegen->LoadClassRootForIntrinsic(temp, ClassRoot::kJavaLangInvokeByteArrayViewVarHandle);
+  assembler->MaybePoisonHeapReference(temp);
+  __ cmpl(temp, Address(varhandle, class_offset.Int32Value()));
+  __ j(kNotEqual, GetEntryLabel());
+
+  // Check for array index out of bounds.
+  __ movl(temp, Address(object, array_length_offset.Int32Value()));
+  // SUB sets flags in the same way as CMP.
+  __ subl(temp, index);
+  __ j(kBelowEqual, GetEntryLabel());
+  // The difference between index and array length must be enough for the `value_type` size.
+  __ cmpl(temp, Immediate(size));
+  __ j(kBelow, GetEntryLabel());
+
+  // Construct the target.
+  __ leal(CpuRegister(target.offset), Address(index, TIMES_1, data_offset.Int32Value()));
+
+  // Alignment check. For unaligned access, go to the runtime.
+  DCHECK(IsPowerOfTwo(size));
+  __ testl(CpuRegister(target.offset), Immediate(size - 1u));
+  __ j(kNotZero, GetEntryLabel());
+
+  // Byte order check. For native byte order return to the main path.
+  __ cmpl(Address(varhandle, native_byte_order_offset.Int32Value()), Immediate(0));
+  __ j(kNotEqual, GetNativeByteOrderLabel());
+
+  DCHECK(access_mode_template == mirror::VarHandle::AccessModeTemplate::kGet);
+  GenerateVarHandleGet(invoke, codegen, /*byte_swap=*/ true);
+
+  __ jmp(GetExitLabel());
+}
+
 UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite)
 UNIMPLEMENTED_INTRINSIC(X86_64, CRC32Update)