x86-64 Baker's read barrier fast path implementation.
Introduce an x86-64 fast path implementation in Optimizing
for Baker's read barriers (for both heap reference loads and
GC root loads). The marking phase of the read barrier is
performed by a slow path, invoking the runtime entry point
artReadBarrierMark.
Other read barrier algorithms continue to use the original
slow path based implementation, which has been renamed as
GenerateReadBarrierSlow/GenerateReadBarrierForRootSlow.
Bug: 12687968
Change-Id: I9329293ddca7f9bcb512132bde6675aa202b98b2
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 1fee192..2c5fbc7 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -456,6 +456,56 @@
DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
};
+// Slow path marking an object during a read barrier.
+class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
+ public:
+ ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location out, Location obj)
+ : instruction_(instruction), out_(out), obj_(obj) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86_64"; }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ Register reg_out = out_.AsRegister<Register>();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+ DCHECK(instruction_->IsInstanceFieldGet() ||
+ instruction_->IsStaticFieldGet() ||
+ instruction_->IsArrayGet() ||
+ instruction_->IsLoadClass() ||
+ instruction_->IsLoadString() ||
+ instruction_->IsInstanceOf() ||
+ instruction_->IsCheckCast())
+ << "Unexpected instruction in read barrier marking slow path: "
+ << instruction_->DebugName();
+
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations);
+
+ InvokeRuntimeCallingConvention calling_convention;
+ CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+ x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_);
+ x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark),
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
+ CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>();
+ x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
+
+ RestoreLiveRegisters(codegen, locations);
+ __ jmp(GetExitLabel());
+ }
+
+ private:
+ HInstruction* const instruction_;
+ const Location out_;
+ const Location obj_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
+};
+
// Slow path generating a read barrier for a heap reference.
class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
public:
@@ -477,7 +527,7 @@
// reference load to be instrumented, e.g.:
//
// __ movl(out, Address(out, offset));
- // codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset);
+ // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
//
// In that case, we have lost the information about the original
// object, and the emitted read barrier cannot work properly.
@@ -493,7 +543,9 @@
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
DCHECK(!instruction_->IsInvoke() ||
(instruction_->IsInvokeStaticOrDirect() &&
- instruction_->GetLocations()->Intrinsified()));
+ instruction_->GetLocations()->Intrinsified()))
+ << "Unexpected instruction in read barrier for heap reference slow path: "
+ << instruction_->DebugName();
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
@@ -634,13 +686,17 @@
class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
public:
ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
- : instruction_(instruction), out_(out), root_(root) {}
+ : instruction_(instruction), out_(out), root_(root) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
DCHECK(locations->CanCall());
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
- DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString());
+ DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+ << "Unexpected instruction in read barrier for GC root slow path: "
+ << instruction_->DebugName();
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
@@ -731,7 +787,7 @@
case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
// temp = thread->string_init_entrypoint
__ gs()->movl(temp.AsRegister<CpuRegister>(),
- Address::Absolute(invoke->GetStringInitOffset(), true));
+ Address::Absolute(invoke->GetStringInitOffset(), /* no_rip */ true));
break;
case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
@@ -748,7 +804,7 @@
pc_relative_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file,
invoke->GetDexCacheArrayOffset());
__ movq(temp.AsRegister<CpuRegister>(),
- Address::Absolute(kDummy32BitOffset, false /* no_rip */));
+ Address::Absolute(kDummy32BitOffset, /* no_rip */ false));
// Bind the label at the end of the "movl" insn.
__ Bind(&pc_relative_dex_cache_patches_.back().label);
break;
@@ -907,7 +963,7 @@
uint32_t dex_pc,
SlowPathCode* slow_path) {
ValidateInvokeRuntime(instruction, slow_path);
- __ gs()->call(Address::Absolute(entry_point_offset, true));
+ __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip */ true));
RecordPcInfo(instruction, dex_pc, slow_path);
}
@@ -1939,7 +1995,7 @@
}
void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
- GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
+ codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
}
void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
@@ -2667,7 +2723,8 @@
} else {
DCHECK(in.GetConstant()->IsIntConstant());
__ movl(out.AsRegister<CpuRegister>(),
- Immediate(static_cast<uint16_t>(in.GetConstant()->AsIntConstant()->GetValue())));
+ Immediate(static_cast<uint16_t>(
+ in.GetConstant()->AsIntConstant()->GetValue())));
}
break;
@@ -2911,7 +2968,8 @@
__ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
} else if (second.IsConstant()) {
__ addss(first.AsFpuRegister<XmmRegister>(),
- codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+ codegen_->LiteralFloatAddress(
+ second.GetConstant()->AsFloatConstant()->GetValue()));
} else {
DCHECK(second.IsStackSlot());
__ addss(first.AsFpuRegister<XmmRegister>(),
@@ -2925,7 +2983,8 @@
__ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
} else if (second.IsConstant()) {
__ addsd(first.AsFpuRegister<XmmRegister>(),
- codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+ codegen_->LiteralDoubleAddress(
+ second.GetConstant()->AsDoubleConstant()->GetValue()));
} else {
DCHECK(second.IsDoubleStackSlot());
__ addsd(first.AsFpuRegister<XmmRegister>(),
@@ -3000,7 +3059,8 @@
__ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
} else if (second.IsConstant()) {
__ subss(first.AsFpuRegister<XmmRegister>(),
- codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+ codegen_->LiteralFloatAddress(
+ second.GetConstant()->AsFloatConstant()->GetValue()));
} else {
DCHECK(second.IsStackSlot());
__ subss(first.AsFpuRegister<XmmRegister>(),
@@ -3014,7 +3074,8 @@
__ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
} else if (second.IsConstant()) {
__ subsd(first.AsFpuRegister<XmmRegister>(),
- codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+ codegen_->LiteralDoubleAddress(
+ second.GetConstant()->AsDoubleConstant()->GetValue()));
} else {
DCHECK(second.IsDoubleStackSlot());
__ subsd(first.AsFpuRegister<XmmRegister>(),
@@ -3121,7 +3182,8 @@
__ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
} else if (second.IsConstant()) {
__ mulss(first.AsFpuRegister<XmmRegister>(),
- codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+ codegen_->LiteralFloatAddress(
+ second.GetConstant()->AsFloatConstant()->GetValue()));
} else {
DCHECK(second.IsStackSlot());
__ mulss(first.AsFpuRegister<XmmRegister>(),
@@ -3136,7 +3198,8 @@
__ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
} else if (second.IsConstant()) {
__ mulsd(first.AsFpuRegister<XmmRegister>(),
- codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+ codegen_->LiteralDoubleAddress(
+ second.GetConstant()->AsDoubleConstant()->GetValue()));
} else {
DCHECK(second.IsDoubleStackSlot());
__ mulsd(first.AsFpuRegister<XmmRegister>(),
@@ -3542,7 +3605,8 @@
__ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
} else if (second.IsConstant()) {
__ divss(first.AsFpuRegister<XmmRegister>(),
- codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+ codegen_->LiteralFloatAddress(
+ second.GetConstant()->AsFloatConstant()->GetValue()));
} else {
DCHECK(second.IsStackSlot());
__ divss(first.AsFpuRegister<XmmRegister>(),
@@ -3556,7 +3620,8 @@
__ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
} else if (second.IsConstant()) {
__ divsd(first.AsFpuRegister<XmmRegister>(),
- codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+ codegen_->LiteralDoubleAddress(
+ second.GetConstant()->AsDoubleConstant()->GetValue()));
} else {
DCHECK(second.IsDoubleStackSlot());
__ divsd(first.AsFpuRegister<XmmRegister>(),
@@ -3960,10 +4025,10 @@
LOG(FATAL) << "Unimplemented";
}
-void InstructionCodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
+void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
/*
* According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
- * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model.
+ * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model.
* For those cases, all we need to ensure is that there is a scheduling barrier in place.
*/
switch (kind) {
@@ -4003,6 +4068,11 @@
Location::RequiresRegister(),
object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
}
+ if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+ // We need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier.
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
@@ -4038,12 +4108,36 @@
break;
}
- case Primitive::kPrimInt:
- case Primitive::kPrimNot: {
+ case Primitive::kPrimInt: {
__ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
break;
}
+ case Primitive::kPrimNot: {
+ // /* HeapReference<Object> */ out = *(base + offset)
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ Location temp_loc = locations->GetTemp(0);
+ // Note that a potential implicit null check is handled in this
+ // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ instruction, out, base, offset, temp_loc, /* needs_null_check */ true);
+ if (is_volatile) {
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+ }
+ } else {
+ __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ if (is_volatile) {
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+ }
+ // If read barriers are enabled, emit read barriers other than
+ // Baker's using a slow path (and also unpoison the loaded
+ // reference, if heap poisoning is enabled).
+ codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
+ }
+ break;
+ }
+
case Primitive::kPrimLong: {
__ movq(out.AsRegister<CpuRegister>(), Address(base, offset));
break;
@@ -4064,14 +4158,20 @@
UNREACHABLE();
}
- codegen_->MaybeRecordImplicitNullCheck(instruction);
-
- if (is_volatile) {
- GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+ if (field_type == Primitive::kPrimNot) {
+ // Potential implicit null checks, in the case of reference
+ // fields, are handled in the previous switch statement.
+ } else {
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
}
- if (field_type == Primitive::kPrimNot) {
- codegen_->MaybeGenerateReadBarrier(instruction, out, out, base_loc, offset);
+ if (is_volatile) {
+ if (field_type == Primitive::kPrimNot) {
+ // Memory barriers, in the case of references, are also handled
+ // in the previous switch statement.
+ } else {
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+ }
}
}
@@ -4125,7 +4225,7 @@
uint32_t offset = field_info.GetFieldOffset().Uint32Value();
if (is_volatile) {
- GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
}
bool maybe_record_implicit_null_check_done = false;
@@ -4231,7 +4331,7 @@
}
if (is_volatile) {
- GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
}
}
@@ -4408,6 +4508,11 @@
Location::RequiresRegister(),
object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
}
+ // We need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier.
+ if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
@@ -4415,12 +4520,13 @@
Location obj_loc = locations->InAt(0);
CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
Location index = locations->InAt(1);
- Primitive::Type type = instruction->GetType();
+ Location out_loc = locations->Out();
+ Primitive::Type type = instruction->GetType();
switch (type) {
case Primitive::kPrimBoolean: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
- CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ CpuRegister out = out_loc.AsRegister<CpuRegister>();
if (index.IsConstant()) {
__ movzxb(out, Address(obj,
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
@@ -4432,7 +4538,7 @@
case Primitive::kPrimByte: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
- CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ CpuRegister out = out_loc.AsRegister<CpuRegister>();
if (index.IsConstant()) {
__ movsxb(out, Address(obj,
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
@@ -4444,7 +4550,7 @@
case Primitive::kPrimShort: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
- CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ CpuRegister out = out_loc.AsRegister<CpuRegister>();
if (index.IsConstant()) {
__ movsxw(out, Address(obj,
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
@@ -4456,7 +4562,7 @@
case Primitive::kPrimChar: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
- CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ CpuRegister out = out_loc.AsRegister<CpuRegister>();
if (index.IsConstant()) {
__ movzxw(out, Address(obj,
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
@@ -4466,13 +4572,9 @@
break;
}
- case Primitive::kPrimInt:
- case Primitive::kPrimNot: {
- static_assert(
- sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
- "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ case Primitive::kPrimInt: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
- CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ CpuRegister out = out_loc.AsRegister<CpuRegister>();
if (index.IsConstant()) {
__ movl(out, Address(obj,
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
@@ -4482,9 +4584,46 @@
break;
}
+ case Primitive::kPrimNot: {
+ static_assert(
+ sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+ // /* HeapReference<Object> */ out =
+ // *(obj + data_offset + index * sizeof(HeapReference<Object>))
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ Location temp = locations->GetTemp(0);
+ // Note that a potential implicit null check is handled in this
+ // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
+ codegen_->GenerateArrayLoadWithBakerReadBarrier(
+ instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
+ } else {
+ CpuRegister out = out_loc.AsRegister<CpuRegister>();
+ if (index.IsConstant()) {
+ uint32_t offset =
+ (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+ __ movl(out, Address(obj, offset));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ // If read barriers are enabled, emit read barriers other than
+ // Baker's using a slow path (and also unpoison the loaded
+ // reference, if heap poisoning is enabled).
+ codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
+ } else {
+ __ movl(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ // If read barriers are enabled, emit read barriers other than
+ // Baker's using a slow path (and also unpoison the loaded
+ // reference, if heap poisoning is enabled).
+ codegen_->MaybeGenerateReadBarrierSlow(
+ instruction, out_loc, out_loc, obj_loc, data_offset, index);
+ }
+ }
+ break;
+ }
+
case Primitive::kPrimLong: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
- CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ CpuRegister out = out_loc.AsRegister<CpuRegister>();
if (index.IsConstant()) {
__ movq(out, Address(obj,
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
@@ -4496,7 +4635,7 @@
case Primitive::kPrimFloat: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
- XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+ XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
if (index.IsConstant()) {
__ movss(out, Address(obj,
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
@@ -4508,7 +4647,7 @@
case Primitive::kPrimDouble: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
- XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+ XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
if (index.IsConstant()) {
__ movsd(out, Address(obj,
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
@@ -4522,20 +4661,12 @@
LOG(FATAL) << "Unreachable type " << type;
UNREACHABLE();
}
- codegen_->MaybeRecordImplicitNullCheck(instruction);
if (type == Primitive::kPrimNot) {
- static_assert(
- sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
- "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
- uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
- Location out = locations->Out();
- if (index.IsConstant()) {
- uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
- codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset);
- } else {
- codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, data_offset, index);
- }
+ // Potential implicit null checks, in the case of reference
+ // arrays, are handled in the previous switch statement.
+ } else {
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
}
}
@@ -4659,12 +4790,12 @@
// __ movl(temp2, temp);
// // /* HeapReference<Class> */ temp = temp->component_type_
// __ movl(temp, Address(temp, component_offset));
- // codegen_->GenerateReadBarrier(
+ // codegen_->GenerateReadBarrierSlow(
// instruction, temp_loc, temp_loc, temp2_loc, component_offset);
//
// // /* HeapReference<Class> */ temp2 = register_value->klass_
// __ movl(temp2, Address(register_value, class_offset));
- // codegen_->GenerateReadBarrier(
+ // codegen_->GenerateReadBarrierSlow(
// instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc);
//
// __ cmpl(temp, temp2);
@@ -4890,8 +5021,8 @@
__ testl(value, value);
__ j(kEqual, &is_null);
}
- __ gs()->movq(card, Address::Absolute(
- Thread::CardTableOffset<kX86_64WordSize>().Int32Value(), true));
+ __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64WordSize>().Int32Value(),
+ /* no_rip */ true));
__ movq(temp, object);
__ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
__ movb(Address(temp, card, TIMES_1, 0), card);
@@ -4950,8 +5081,9 @@
DCHECK_EQ(slow_path->GetSuccessor(), successor);
}
- __ gs()->cmpw(Address::Absolute(
- Thread::ThreadFlagsOffset<kX86_64WordSize>().Int32Value(), true), Immediate(0));
+ __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64WordSize>().Int32Value(),
+ /* no_rip */ true),
+ Immediate(0));
if (successor == nullptr) {
__ j(kNotEqual, slow_path->GetEntryLabel());
__ Bind(slow_path->GetReturnLabel());
@@ -5175,7 +5307,7 @@
Immediate(mirror::Class::kStatusInitialized));
__ j(kLess, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
- // No need for memory fence, thanks to the X86_64 memory model.
+ // No need for memory fence, thanks to the x86-64 memory model.
}
void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
@@ -5206,32 +5338,16 @@
if (cls->IsReferrersClass()) {
DCHECK(!cls->CanCallRuntime());
DCHECK(!cls->MustGenerateClinitCheck());
- uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
- if (kEmitCompilerReadBarrier) {
- // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
- __ leaq(out, Address(current_method, declaring_class_offset));
- // /* mirror::Class* */ out = out->Read()
- codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
- } else {
- // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
- __ movl(out, Address(current_method, declaring_class_offset));
- }
+ // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+ GenerateGcRootFieldLoad(
+ cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
} else {
// /* GcRoot<mirror::Class>[] */ out =
// current_method.ptr_sized_fields_->dex_cache_resolved_types_
__ movq(out, Address(current_method,
ArtMethod::DexCacheResolvedTypesOffset(kX86_64PointerSize).Int32Value()));
-
- size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
- if (kEmitCompilerReadBarrier) {
- // /* GcRoot<mirror::Class>* */ out = &out[type_index]
- __ leaq(out, Address(out, cache_offset));
- // /* mirror::Class* */ out = out->Read()
- codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
- } else {
- // /* GcRoot<mirror::Class> */ out = out[type_index]
- __ movl(out, Address(out, cache_offset));
- }
+ // /* GcRoot<mirror::Class> */ out = out[type_index]
+ GenerateGcRootFieldLoad(cls, out_loc, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
DCHECK(cls->CanCallRuntime());
@@ -5284,30 +5400,14 @@
CpuRegister out = out_loc.AsRegister<CpuRegister>();
CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
- uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
- if (kEmitCompilerReadBarrier) {
- // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
- __ leaq(out, Address(current_method, declaring_class_offset));
- // /* mirror::Class* */ out = out->Read()
- codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
- } else {
- // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
- __ movl(out, Address(current_method, declaring_class_offset));
- }
-
+ // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+ GenerateGcRootFieldLoad(
+ load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
// /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
__ movq(out, Address(out, mirror::Class::DexCacheStringsOffset().Uint32Value()));
-
- size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex());
- if (kEmitCompilerReadBarrier) {
- // /* GcRoot<mirror::String>* */ out = &out[string_index]
- __ leaq(out, Address(out, cache_offset));
- // /* mirror::String* */ out = out->Read()
- codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
- } else {
- // /* GcRoot<mirror::String> */ out = out[string_index]
- __ movl(out, Address(out, cache_offset));
- }
+ // /* GcRoot<mirror::String> */ out = out[string_index]
+ GenerateGcRootFieldLoad(
+ load, out_loc, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
if (!load->IsInDexCache()) {
SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load);
@@ -5319,7 +5419,8 @@
}
static Address GetExceptionTlsAddress() {
- return Address::Absolute(Thread::ExceptionOffset<kX86_64WordSize>().Int32Value(), true);
+ return Address::Absolute(Thread::ExceptionOffset<kX86_64WordSize>().Int32Value(),
+ /* no_rip */ true);
}
void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) {
@@ -5355,6 +5456,14 @@
CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
}
+static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
+ return kEmitCompilerReadBarrier &&
+ (kUseBakerReadBarrier ||
+ type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+ type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+ type_check_kind == TypeCheckKind::kArrayObjectCheck);
+}
+
void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
@@ -5380,21 +5489,22 @@
locations->SetOut(Location::RequiresRegister());
// When read barriers are enabled, we need a temporary register for
// some cases.
- if (kEmitCompilerReadBarrier &&
- (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
- type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
- type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+ if (TypeCheckNeedsATemporary(type_check_kind)) {
locations->AddTemp(Location::RequiresRegister());
}
}
void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
+ TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
LocationSummary* locations = instruction->GetLocations();
Location obj_loc = locations->InAt(0);
CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
Location cls = locations->InAt(1);
Location out_loc = locations->Out();
CpuRegister out = out_loc.AsRegister<CpuRegister>();
+ Location temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+ locations->GetTemp(0) :
+ Location::NoLocation();
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -5410,10 +5520,9 @@
}
// /* HeapReference<Class> */ out = obj->klass_
- __ movl(out, Address(obj, class_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, temp_loc);
- switch (instruction->GetTypeCheckKind()) {
+ switch (type_check_kind) {
case TypeCheckKind::kExactCheck: {
if (cls.IsRegister()) {
__ cmpl(out, cls.AsRegister<CpuRegister>());
@@ -5439,17 +5548,8 @@
// object to avoid doing a comparison we know will fail.
NearLabel loop, success;
__ Bind(&loop);
- Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
- if (kEmitCompilerReadBarrier) {
- // Save the value of `out` into `temp` before overwriting it
- // in the following move operation, as we will need it for the
- // read barrier below.
- CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
- __ movl(temp, out);
- }
// /* HeapReference<Class> */ out = out->super_class_
- __ movl(out, Address(out, super_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
+ GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc);
__ testl(out, out);
// If `out` is null, we use it for the result, and jump to `done`.
__ j(kEqual, &done);
@@ -5478,17 +5578,8 @@
__ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
}
__ j(kEqual, &success);
- Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
- if (kEmitCompilerReadBarrier) {
- // Save the value of `out` into `temp` before overwriting it
- // in the following move operation, as we will need it for the
- // read barrier below.
- CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
- __ movl(temp, out);
- }
// /* HeapReference<Class> */ out = out->super_class_
- __ movl(out, Address(out, super_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
+ GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc);
__ testl(out, out);
__ j(kNotEqual, &loop);
// If `out` is null, we use it for the result, and jump to `done`.
@@ -5512,17 +5603,8 @@
}
__ j(kEqual, &exact_check);
// Otherwise, we need to check that the object's class is a non-primitive array.
- Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
- if (kEmitCompilerReadBarrier) {
- // Save the value of `out` into `temp` before overwriting it
- // in the following move operation, as we will need it for the
- // read barrier below.
- CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
- __ movl(temp, out);
- }
// /* HeapReference<Class> */ out = out->component_type_
- __ movl(out, Address(out, component_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset);
+ GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, temp_loc);
__ testl(out, out);
// If `out` is null, we use it for the result, and jump to `done`.
__ j(kEqual, &done);
@@ -5566,6 +5648,13 @@
// HInstanceOf instruction (following the runtime calling
// convention), which might be cluttered by the potential first
// read barrier emission at the beginning of this method.
+ //
+ // TODO: Introduce a new runtime entry point taking the object
+ // to test (instead of its class) as argument, and let it deal
+ // with the read barrier issues. This will let us refactor this
+ // case of the `switch` code as it was previously (with a direct
+ // call to the runtime not using a type checking slow path).
+ // This should also be beneficial for the other cases above.
DCHECK(locations->OnlyCallsOnSlowPath());
slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
/* is_fatal */ false);
@@ -5618,27 +5707,27 @@
locations->AddTemp(Location::RequiresRegister());
// When read barriers are enabled, we need an additional temporary
// register for some cases.
- if (kEmitCompilerReadBarrier &&
- (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
- type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
- type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+ if (TypeCheckNeedsATemporary(type_check_kind)) {
locations->AddTemp(Location::RequiresRegister());
}
}
void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
+ TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
LocationSummary* locations = instruction->GetLocations();
Location obj_loc = locations->InAt(0);
CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
Location cls = locations->InAt(1);
Location temp_loc = locations->GetTemp(0);
CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
+ Location temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+ locations->GetTemp(1) :
+ Location::NoLocation();
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
- TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
bool is_type_check_slow_path_fatal =
(type_check_kind == TypeCheckKind::kExactCheck ||
type_check_kind == TypeCheckKind::kAbstractClassCheck ||
@@ -5650,7 +5739,7 @@
is_type_check_slow_path_fatal);
codegen_->AddSlowPath(type_check_slow_path);
- NearLabel done;
+ Label done;
// Avoid null check if we know obj is not null.
if (instruction->MustDoNullCheck()) {
__ testl(obj, obj);
@@ -5658,8 +5747,7 @@
}
// /* HeapReference<Class> */ temp = obj->klass_
- __ movl(temp, Address(obj, class_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
switch (type_check_kind) {
case TypeCheckKind::kExactCheck:
@@ -5681,18 +5769,8 @@
// object to avoid doing a comparison we know will fail.
NearLabel loop, compare_classes;
__ Bind(&loop);
- Location temp2_loc =
- kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
- if (kEmitCompilerReadBarrier) {
- // Save the value of `temp` into `temp2` before overwriting it
- // in the following move operation, as we will need it for the
- // read barrier below.
- CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
- __ movl(temp2, temp);
- }
// /* HeapReference<Class> */ temp = temp->super_class_
- __ movl(temp, Address(temp, super_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+ GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc);
// If the class reference currently in `temp` is not null, jump
// to the `compare_classes` label to compare it with the checked
@@ -5705,8 +5783,7 @@
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- __ movl(temp, Address(obj, class_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
__ jmp(type_check_slow_path->GetEntryLabel());
__ Bind(&compare_classes);
@@ -5732,18 +5809,8 @@
}
__ j(kEqual, &done);
- Location temp2_loc =
- kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
- if (kEmitCompilerReadBarrier) {
- // Save the value of `temp` into `temp2` before overwriting it
- // in the following move operation, as we will need it for the
- // read barrier below.
- CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
- __ movl(temp2, temp);
- }
// /* HeapReference<Class> */ temp = temp->super_class_
- __ movl(temp, Address(temp, super_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+ GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc);
// If the class reference currently in `temp` is not null, jump
// back at the beginning of the loop.
@@ -5755,8 +5822,7 @@
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- __ movl(temp, Address(obj, class_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
__ jmp(type_check_slow_path->GetEntryLabel());
break;
}
@@ -5773,19 +5839,8 @@
__ j(kEqual, &done);
// Otherwise, we need to check that the object's class is a non-primitive array.
- Location temp2_loc =
- kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
- if (kEmitCompilerReadBarrier) {
- // Save the value of `temp` into `temp2` before overwriting it
- // in the following move operation, as we will need it for the
- // read barrier below.
- CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
- __ movl(temp2, temp);
- }
// /* HeapReference<Class> */ temp = temp->component_type_
- __ movl(temp, Address(temp, component_offset));
- codegen_->MaybeGenerateReadBarrier(
- instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+ GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, temp2_loc);
// If the component type is not null (i.e. the object is indeed
// an array), jump to label `check_non_primitive_component_type`
@@ -5799,8 +5854,7 @@
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- __ movl(temp, Address(obj, class_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
__ jmp(type_check_slow_path->GetEntryLabel());
__ Bind(&check_non_primitive_component_type);
@@ -5808,8 +5862,7 @@
__ j(kEqual, &done);
// Same comment as above regarding `temp` and the slow path.
// /* HeapReference<Class> */ temp = obj->klass_
- __ movl(temp, Address(obj, class_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
__ jmp(type_check_slow_path->GetEntryLabel());
break;
}
@@ -5826,6 +5879,13 @@
// instruction (following the runtime calling convention), which
// might be cluttered by the potential first read barrier
// emission at the beginning of this method.
+ //
+ // TODO: Introduce a new runtime entry point taking the object
+ // to test (instead of its class) as argument, and let it deal
+ // with the read barrier issues. This will let us refactor this
+ // case of the `switch` code as it was previously (with a direct
+ // call to the runtime not using a type checking slow path).
+ // This should also be beneficial for the other cases above.
__ jmp(type_check_slow_path->GetEntryLabel());
break;
}
@@ -5969,14 +6029,227 @@
}
}
-void CodeGeneratorX86_64::GenerateReadBarrier(HInstruction* instruction,
- Location out,
- Location ref,
- Location obj,
- uint32_t offset,
- Location index) {
+void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(HInstruction* instruction,
+ Location out,
+ uint32_t offset,
+ Location temp) {
+ CpuRegister out_reg = out.AsRegister<CpuRegister>();
+ if (kEmitCompilerReadBarrier) {
+ if (kUseBakerReadBarrier) {
+ // Load with fast path based Baker's read barrier.
+ // /* HeapReference<Object> */ out = *(out + offset)
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ instruction, out, out_reg, offset, temp, /* needs_null_check */ false);
+ } else {
+ // Load with slow path based read barrier.
+ // Save the value of `out` into `temp` before overwriting it
+ // in the following move operation, as we will need it for the
+ // read barrier below.
+ __ movl(temp.AsRegister<CpuRegister>(), out_reg);
+ // /* HeapReference<Object> */ out = *(out + offset)
+ __ movl(out_reg, Address(out_reg, offset));
+ codegen_->GenerateReadBarrierSlow(instruction, out, out, temp, offset);
+ }
+ } else {
+ // Plain load with no read barrier.
+ // /* HeapReference<Object> */ out = *(out + offset)
+ __ movl(out_reg, Address(out_reg, offset));
+ __ MaybeUnpoisonHeapReference(out_reg);
+ }
+}
+
+void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+ Location out,
+ Location obj,
+ uint32_t offset,
+ Location temp) {
+ CpuRegister out_reg = out.AsRegister<CpuRegister>();
+ CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
+ if (kEmitCompilerReadBarrier) {
+ if (kUseBakerReadBarrier) {
+ // Load with fast path based Baker's read barrier.
+ // /* HeapReference<Object> */ out = *(obj + offset)
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ instruction, out, obj_reg, offset, temp, /* needs_null_check */ false);
+ } else {
+ // Load with slow path based read barrier.
+ // /* HeapReference<Object> */ out = *(obj + offset)
+ __ movl(out_reg, Address(obj_reg, offset));
+ codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
+ }
+ } else {
+ // Plain load with no read barrier.
+ // /* HeapReference<Object> */ out = *(obj + offset)
+ __ movl(out_reg, Address(obj_reg, offset));
+ __ MaybeUnpoisonHeapReference(out_reg);
+ }
+}
+
+void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instruction,
+ Location root,
+ CpuRegister obj,
+ uint32_t offset) {
+ CpuRegister root_reg = root.AsRegister<CpuRegister>();
+ if (kEmitCompilerReadBarrier) {
+ if (kUseBakerReadBarrier) {
+ // Fast path implementation of art::ReadBarrier::BarrierForRoot when
+ // Baker's read barrier are used:
+ //
+ // root = obj.field;
+ // if (Thread::Current()->GetIsGcMarking()) {
+ // root = ReadBarrier::Mark(root)
+ // }
+
+ // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+ __ movl(root_reg, Address(obj, offset));
+ static_assert(
+ sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+ "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+ "have different sizes.");
+ static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::CompressedReference<mirror::Object> and int32_t "
+ "have different sizes.");
+
+ // Slow path used to mark the GC root `root`.
+ SlowPathCode* slow_path =
+ new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, root, root);
+ codegen_->AddSlowPath(slow_path);
+
+ __ gs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86_64WordSize>().Int32Value(),
+ /* no_rip */ true),
+ Immediate(0));
+ __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ } else {
+ // GC root loaded through a slow path for read barriers other
+ // than Baker's.
+ // /* GcRoot<mirror::Object>* */ root = obj + offset
+ __ leaq(root_reg, Address(obj, offset));
+ // /* mirror::Object* */ root = root->Read()
+ codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+ }
+ } else {
+ // Plain GC root load with no read barrier.
+ // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+ __ movl(root_reg, Address(obj, offset));
+ }
+}
+
+void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ CpuRegister obj,
+ uint32_t offset,
+ Location temp,
+ bool needs_null_check) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+
+ // /* HeapReference<Object> */ ref = *(obj + offset)
+ Address src(obj, offset);
+ GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
+}
+
+void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ CpuRegister obj,
+ uint32_t data_offset,
+ Location index,
+ Location temp,
+ bool needs_null_check) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+
+ // /* HeapReference<Object> */ ref =
+ // *(obj + data_offset + index * sizeof(HeapReference<Object>))
+ Address src = index.IsConstant() ?
+ Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) :
+ Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset);
+ GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
+}
+
+void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ CpuRegister obj,
+ const Address& src,
+ Location temp,
+ bool needs_null_check) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+
+ // In slow path based read barriers, the read barrier call is
+ // inserted after the original load. However, in fast path based
+ // Baker's read barriers, we need to perform the load of
+ // mirror::Object::monitor_ *before* the original reference load.
+ // This load-load ordering is required by the read barrier.
+ // The fast path/slow path (for Baker's algorithm) should look like:
+ //
+ // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // HeapReference<Object> ref = *src; // Original reference load.
+ // bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+ // if (is_gray) {
+ // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
+ // }
+ //
+ // Note: the original implementation in ReadBarrier::Barrier is
+ // slightly more complex as:
+ // - it implements the load-load fence using a data dependency on
+ // the high-bits of rb_state, which are expected to be all zeroes;
+ // - it performs additional checks that we do not do here for
+ // performance reasons.
+
+ CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
+ CpuRegister temp_reg = temp.AsRegister<CpuRegister>();
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+
+ // /* int32_t */ monitor = obj->monitor_
+ __ movl(temp_reg, Address(obj, monitor_offset));
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+ // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
+ __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift));
+ __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask));
+ static_assert(
+ LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
+ "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
+
+ // Load fence to prevent load-load reordering.
+ // Note that this is a no-op, thanks to the x86-64 memory model.
+ GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+
+ // The actual reference load.
+ // /* HeapReference<Object> */ ref = *src
+ __ movl(ref_reg, src);
+
+ // Object* ref = ref_addr->AsMirrorPtr()
+ __ MaybeUnpoisonHeapReference(ref_reg);
+
+ // Slow path used to mark the object `ref` when it is gray.
+ SlowPathCode* slow_path =
+ new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, ref, ref);
+ AddSlowPath(slow_path);
+
+ // if (rb_state == ReadBarrier::gray_ptr_)
+ // ref = ReadBarrier::Mark(ref);
+ __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_));
+ __ j(kEqual, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index) {
DCHECK(kEmitCompilerReadBarrier);
+ // Insert a slow path based read barrier *after* the reference load.
+ //
// If heap poisoning is enabled, the unpoisoning of the loaded
// reference will be carried out by the runtime within the slow
// path.
@@ -5990,57 +6263,41 @@
ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index);
AddSlowPath(slow_path);
- // TODO: When read barrier has a fast path, add it here.
- /* Currently the read barrier call is inserted after the original load.
- * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the
- * original load. This load-load ordering is required by the read barrier.
- * The fast path/slow path (for Baker's algorithm) should look like:
- *
- * bool isGray = obj.LockWord & kReadBarrierMask;
- * lfence; // load fence or artificial data dependence to prevent load-load reordering
- * ref = obj.field; // this is the original load
- * if (isGray) {
- * ref = Mark(ref); // ideally the slow path just does Mark(ref)
- * }
- */
-
__ jmp(slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
}
-void CodeGeneratorX86_64::MaybeGenerateReadBarrier(HInstruction* instruction,
- Location out,
- Location ref,
- Location obj,
- uint32_t offset,
- Location index) {
+void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index) {
if (kEmitCompilerReadBarrier) {
+ // Baker's read barriers shall be handled by the fast path
+ // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier).
+ DCHECK(!kUseBakerReadBarrier);
// If heap poisoning is enabled, unpoisoning will be taken care of
// by the runtime within the slow path.
- GenerateReadBarrier(instruction, out, ref, obj, offset, index);
+ GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
} else if (kPoisonHeapReferences) {
__ UnpoisonHeapReference(out.AsRegister<CpuRegister>());
}
}
-void CodeGeneratorX86_64::GenerateReadBarrierForRoot(HInstruction* instruction,
- Location out,
- Location root) {
+void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
+ Location out,
+ Location root) {
DCHECK(kEmitCompilerReadBarrier);
+ // Insert a slow path based read barrier *after* the GC root load.
+ //
// Note that GC roots are not affected by heap poisoning, so we do
// not need to do anything special for this here.
SlowPathCode* slow_path =
new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86_64(instruction, out, root);
AddSlowPath(slow_path);
- // TODO: Implement a fast path for ReadBarrierForRoot, performing
- // the following operation (for Baker's algorithm):
- //
- // if (thread.tls32_.is_gc_marking) {
- // root = Mark(root);
- // }
-
__ jmp(slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
}
@@ -6289,7 +6546,7 @@
// TODO: trg as memory.
void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, Primitive::Type type) {
if (!trg.IsValid()) {
- DCHECK(type == Primitive::kPrimVoid);
+ DCHECK_EQ(type, Primitive::kPrimVoid);
return;
}
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 7351fed..dda9ea2 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -213,11 +213,44 @@
void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
void GenerateDivRemIntegral(HBinaryOperation* instruction);
void HandleShift(HBinaryOperation* operation);
- void GenerateMemoryBarrier(MemBarrierKind kind);
+
void HandleFieldSet(HInstruction* instruction,
const FieldInfo& field_info,
bool value_can_be_null);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+
+ // Generate a heap reference load using one register `out`:
+ //
+ // out <- *(out + offset)
+ //
+ // while honoring heap poisoning and/or read barriers (if any).
+ // Register `temp` is used when generating a read barrier.
+ void GenerateReferenceLoadOneRegister(HInstruction* instruction,
+ Location out,
+ uint32_t offset,
+ Location temp);
+ // Generate a heap reference load using two different registers
+ // `out` and `obj`:
+ //
+ // out <- *(obj + offset)
+ //
+ // while honoring heap poisoning and/or read barriers (if any).
+ // Register `temp` is used when generating a Baker's read barrier.
+ void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+ Location out,
+ Location obj,
+ uint32_t offset,
+ Location temp);
+ // Generate a GC root reference load:
+ //
+ // root <- *(obj + offset)
+ //
+ // while honoring read barriers (if any).
+ void GenerateGcRootFieldLoad(HInstruction* instruction,
+ Location root,
+ CpuRegister obj,
+ uint32_t offset);
+
void GenerateImplicitNullCheck(HNullCheck* instruction);
void GenerateExplicitNullCheck(HNullCheck* instruction);
void PushOntoFPStack(Location source, uint32_t temp_offset,
@@ -324,6 +357,8 @@
CpuRegister value,
bool value_can_be_null);
+ void GenerateMemoryBarrier(MemBarrierKind kind);
+
// Helper method to move a value between two locations.
void Move(Location destination, Location source);
@@ -356,7 +391,26 @@
return isa_features_;
}
- // Generate a read barrier for a heap reference within `instruction`.
+ // Fast path implementation of ReadBarrier::Barrier for a heap
+ // reference field load when Baker's read barriers are used.
+ void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location out,
+ CpuRegister obj,
+ uint32_t offset,
+ Location temp,
+ bool needs_null_check);
+ // Fast path implementation of ReadBarrier::Barrier for a heap
+ // reference array load when Baker's read barriers are used.
+ void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location out,
+ CpuRegister obj,
+ uint32_t data_offset,
+ Location index,
+ Location temp,
+ bool needs_null_check);
+
+ // Generate a read barrier for a heap reference within `instruction`
+ // using a slow path.
//
// A read barrier for an object reference read from the heap is
// implemented as a call to the artReadBarrierSlow runtime entry
@@ -373,23 +427,25 @@
// When `index` provided (i.e., when it is different from
// Location::NoLocation()), the offset value passed to
// artReadBarrierSlow is adjusted to take `index` into account.
- void GenerateReadBarrier(HInstruction* instruction,
- Location out,
- Location ref,
- Location obj,
- uint32_t offset,
- Location index = Location::NoLocation());
+ void GenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index = Location::NoLocation());
- // If read barriers are enabled, generate a read barrier for a heap reference.
- // If heap poisoning is enabled, also unpoison the reference in `out`.
- void MaybeGenerateReadBarrier(HInstruction* instruction,
- Location out,
- Location ref,
- Location obj,
- uint32_t offset,
- Location index = Location::NoLocation());
+ // If read barriers are enabled, generate a read barrier for a heap
+ // reference using a slow path. If heap poisoning is enabled, also
+ // unpoison the reference in `out`.
+ void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index = Location::NoLocation());
- // Generate a read barrier for a GC root within `instruction`.
+ // Generate a read barrier for a GC root within `instruction` using
+ // a slow path.
//
// A read barrier for an object reference GC root is implemented as
// a call to the artReadBarrierForRootSlow runtime entry point,
@@ -399,7 +455,7 @@
//
// The `out` location contains the value returned by
// artReadBarrierForRootSlow.
- void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root);
+ void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
int ConstantAreaStart() const {
return constant_area_start_;
@@ -424,6 +480,15 @@
HInstruction* instruction);
private:
+ // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
+ // and GenerateArrayLoadWithBakerReadBarrier.
+ void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ CpuRegister obj,
+ const Address& src,
+ Location temp,
+ bool needs_null_check);
+
struct PcRelativeDexCacheAccessInfo {
PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off)
: target_dex_file(dex_file), element_offset(element_off), label() { }
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index ac9b245..ce737e3 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -1917,16 +1917,30 @@
Location offset_loc = locations->InAt(2);
CpuRegister offset = offset_loc.AsRegister<CpuRegister>();
Location output_loc = locations->Out();
- CpuRegister output = locations->Out().AsRegister<CpuRegister>();
+ CpuRegister output = output_loc.AsRegister<CpuRegister>();
switch (type) {
case Primitive::kPrimInt:
- case Primitive::kPrimNot:
__ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
- if (type == Primitive::kPrimNot) {
- codegen->MaybeGenerateReadBarrier(invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
+ break;
+
+ case Primitive::kPrimNot: {
+ if (kEmitCompilerReadBarrier) {
+ if (kUseBakerReadBarrier) {
+ Location temp = locations->GetTemp(0);
+ codegen->GenerateArrayLoadWithBakerReadBarrier(
+ invoke, output_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false);
+ } else {
+ __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
+ codegen->GenerateReadBarrierSlow(
+ invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
+ }
+ } else {
+ __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
+ __ MaybeUnpoisonHeapReference(output);
}
break;
+ }
case Primitive::kPrimLong:
__ movq(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
@@ -1938,7 +1952,9 @@
}
}
-static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
+ HInvoke* invoke,
+ Primitive::Type type) {
bool can_call = kEmitCompilerReadBarrier &&
(invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
@@ -1951,25 +1967,30 @@
locations->SetInAt(1, Location::RequiresRegister());
locations->SetInAt(2, Location::RequiresRegister());
locations->SetOut(Location::RequiresRegister());
+ if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // We need a temporary register for the read barrier marking slow
+ // path in InstructionCodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier.
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
}