Merge "Replace memory barriers to better reflect Java needs."
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index 799a742..bdedadb 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -440,19 +440,23 @@
/**
* @brief Memory barrier types (see "The JSR-133 Cookbook for Compiler Writers").
- * @details Without context sensitive analysis, the most conservative set of barriers
- * must be issued to ensure the Java Memory Model. Thus the recipe is as follows:
- * -# Use StoreStore barrier before volatile store.
- * -# Use StoreLoad barrier after volatile store.
- * -# Use LoadLoad and LoadStore barrier after each volatile load.
+ * @details We define the combined barrier types that are actually required
+ * by the Java Memory Model, rather than using exactly the terminology from
+ * the JSR-133 cookbook. These should, in many cases, be replaced by acquire/release
+ * primitives. Note that the JSR-133 cookbook generally does not deal with
+ * store atomicity issues, and the recipes there are not always entirely sufficient.
+ * The current recipe is as follows:
+ * -# Use AnyStore ~= (LoadStore | StoreStore) ~= release barrier before volatile store.
+ * -# Use AnyAny barrier after volatile store. (StoreLoad is as expensive.)
+ * -# Use LoadAny barrier ~= (LoadLoad | LoadStore) ~= acquire barrierafter each volatile load.
* -# Use StoreStore barrier after all stores but before return from any constructor whose
- * class has final fields.
+ * class has final fields.
*/
enum MemBarrierKind {
- kLoadStore,
- kLoadLoad,
+ kAnyStore,
+ kLoadAny,
kStoreStore,
- kStoreLoad
+ kAnyAny
};
std::ostream& operator<<(std::ostream& os, const MemBarrierKind& kind);
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index 04d6898..01e17bf 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -218,7 +218,7 @@
LIR* success_target = NewLIR0(kPseudoTargetLabel);
lock_success_branch->target = success_target;
- GenMemBarrier(kLoadLoad);
+ GenMemBarrier(kLoadAny);
} else {
// Explicit null-check as slow-path is entered using an IT.
GenNullCheck(rs_r0, opt_flags);
@@ -240,7 +240,7 @@
LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rARM_LR);
OpEndIT(it);
MarkSafepointPC(call_inst);
- GenMemBarrier(kLoadLoad);
+ GenMemBarrier(kLoadAny);
}
}
@@ -269,7 +269,7 @@
MarkPossibleNullPointerException(opt_flags);
LoadConstantNoClobber(rs_r3, 0);
LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_r1, rs_r2, NULL);
- GenMemBarrier(kStoreLoad);
+ GenMemBarrier(kAnyStore);
Store32Disp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3);
LIR* unlock_success_branch = OpUnconditionalBranch(NULL);
@@ -298,7 +298,7 @@
OpRegReg(kOpCmp, rs_r1, rs_r2);
LIR* it = OpIT(kCondEq, "EE");
- if (GenMemBarrier(kStoreLoad)) {
+ if (GenMemBarrier(kAnyStore)) {
UpdateIT(it, "TEE");
}
Store32Disp/*eq*/(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3);
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 95071d9..f4ea592 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -764,6 +764,7 @@
UNIMPLEMENTED(FATAL) << "Should not be called.";
}
+// Generate a CAS with memory_order_seq_cst semantics.
bool ArmMir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
DCHECK_EQ(cu_->instruction_set, kThumb2);
// Unused - RegLocation rl_src_unsafe = info->args[0];
@@ -818,8 +819,8 @@
}
}
- // Release store semantics, get the barrier out of the way. TODO: revisit
- GenMemBarrier(kStoreLoad);
+ // Prevent reordering with prior memory operations.
+ GenMemBarrier(kAnyStore);
RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
RegLocation rl_new_value;
@@ -908,6 +909,9 @@
FreeTemp(rl_expected.reg); // Now unneeded.
}
+ // Prevent reordering with subsequent memory operations.
+ GenMemBarrier(kLoadAny);
+
// result := (tmp1 != 0) ? 0 : 1;
RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
OpRegRegImm(kOpRsub, rl_result.reg, r_tmp, 1);
@@ -987,10 +991,10 @@
int dmb_flavor;
// TODO: revisit Arm barrier kinds
switch (barrier_kind) {
- case kLoadStore: dmb_flavor = kISH; break;
- case kLoadLoad: dmb_flavor = kISH; break;
+ case kAnyStore: dmb_flavor = kISH; break;
+ case kLoadAny: dmb_flavor = kISH; break;
case kStoreStore: dmb_flavor = kISHST; break;
- case kStoreLoad: dmb_flavor = kISH; break;
+ case kAnyAny: dmb_flavor = kISH; break;
default:
LOG(FATAL) << "Unexpected MemBarrierKind: " << barrier_kind;
dmb_flavor = kSY; // quiet gcc.
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index 2d5e291..9cbf7b8 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -986,10 +986,7 @@
}
if (UNLIKELY(is_volatile == kVolatile)) {
- // Without context sensitive analysis, we must issue the most conservative barriers.
- // In this case, either a load or store may follow so we issue both barriers.
- GenMemBarrier(kLoadLoad);
- GenMemBarrier(kLoadStore);
+ GenMemBarrier(kLoadAny);
}
return load;
@@ -1091,8 +1088,8 @@
LIR* ArmMir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
OpSize size, VolatileKind is_volatile) {
if (UNLIKELY(is_volatile == kVolatile)) {
- // There might have been a store before this volatile one so insert StoreStore barrier.
- GenMemBarrier(kStoreStore);
+ // Ensure that prior accesses become visible to other threads first.
+ GenMemBarrier(kAnyStore);
}
LIR* store;
@@ -1135,8 +1132,9 @@
}
if (UNLIKELY(is_volatile == kVolatile)) {
- // A load might follow the volatile store so insert a StoreLoad barrier.
- GenMemBarrier(kStoreLoad);
+ // Preserve order with respect to any subsequent volatile loads.
+ // We need StoreLoad, but that generally requires the most expensive barrier.
+ GenMemBarrier(kAnyAny);
}
return store;
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h
index 7490646..ef4c0de 100644
--- a/compiler/dex/quick/arm64/arm64_lir.h
+++ b/compiler/dex/quick/arm64/arm64_lir.h
@@ -376,6 +376,7 @@
kST = 0xe,
kISH = 0xb,
kISHST = 0xa,
+ kISHLD = 0x9,
kNSH = 0x7,
kNSHST = 0x6
};
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 56dcbe5..d24f419 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -228,7 +228,7 @@
LIR* success_target = NewLIR0(kPseudoTargetLabel);
lock_success_branch->target = success_target;
- GenMemBarrier(kLoadLoad);
+ GenMemBarrier(kLoadAny);
}
/*
@@ -258,7 +258,7 @@
Load32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_w2);
MarkPossibleNullPointerException(opt_flags);
LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_w1, rs_w2, NULL);
- GenMemBarrier(kStoreLoad);
+ GenMemBarrier(kAnyStore);
Store32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_wzr);
LIR* unlock_success_branch = OpUnconditionalBranch(NULL);
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index a7ca685..f7aa39f 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -740,10 +740,16 @@
int dmb_flavor;
// TODO: revisit Arm barrier kinds
switch (barrier_kind) {
- case kLoadStore: dmb_flavor = kISH; break;
- case kLoadLoad: dmb_flavor = kISH; break;
+ case kAnyStore: dmb_flavor = kISH; break;
+ case kLoadAny: dmb_flavor = kISH; break;
+ // We conjecture that kISHLD is insufficient. It is documented
+ // to provide LoadLoad | StoreStore ordering. But if this were used
+ // to implement volatile loads, we suspect that the lack of store
+ // atomicity on ARM would cause us to allow incorrect results for
+ // the canonical IRIW example. But we're not sure.
+ // We should be using acquire loads instead.
case kStoreStore: dmb_flavor = kISHST; break;
- case kStoreLoad: dmb_flavor = kISH; break;
+ case kAnyAny: dmb_flavor = kISH; break;
default:
LOG(FATAL) << "Unexpected MemBarrierKind: " << barrier_kind;
dmb_flavor = kSY; // quiet gcc.
diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc
index 22a4ec4..097fcdc 100644
--- a/compiler/dex/quick/arm64/utility_arm64.cc
+++ b/compiler/dex/quick/arm64/utility_arm64.cc
@@ -1145,10 +1145,8 @@
LIR* load = LoadBaseDispBody(r_base, displacement, r_dest, size);
if (UNLIKELY(is_volatile == kVolatile)) {
- // Without context sensitive analysis, we must issue the most conservative barriers.
- // In this case, either a load or store may follow so we issue both barriers.
- GenMemBarrier(kLoadLoad);
- GenMemBarrier(kLoadStore);
+ // TODO: This should generate an acquire load instead of the barrier.
+ GenMemBarrier(kLoadAny);
}
return load;
@@ -1232,9 +1230,10 @@
LIR* Arm64Mir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
OpSize size, VolatileKind is_volatile) {
+ // TODO: This should generate a release store and no barriers.
if (UNLIKELY(is_volatile == kVolatile)) {
- // There might have been a store before this volatile one so insert StoreStore barrier.
- GenMemBarrier(kStoreStore);
+ // Ensure that prior accesses become visible to other threads first.
+ GenMemBarrier(kAnyStore);
}
// StoreBaseDisp() will emit correct insn for atomic store on arm64
@@ -1243,8 +1242,9 @@
LIR* store = StoreBaseDispBody(r_base, displacement, r_src, size);
if (UNLIKELY(is_volatile == kVolatile)) {
- // A load might follow the volatile store so insert a StoreLoad barrier.
- GenMemBarrier(kStoreLoad);
+ // Preserve order with respect to any subsequent volatile loads.
+ // We need StoreLoad, but that generally requires the most expensive barrier.
+ GenMemBarrier(kAnyAny);
}
return store;
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 6dc019a..c266a3c 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -629,8 +629,12 @@
field_info.StorageIndex(), r_base));
FreeTemp(r_tmp);
- // Ensure load of status and load of value don't re-order.
- GenMemBarrier(kLoadLoad);
+ // Ensure load of status and store of value don't re-order.
+ // TODO: Presumably the actual value store is control-dependent on the status load,
+ // and will thus not be reordered in any case, since stores are never speculated.
+ // Does later code "know" that the class is now initialized? If so, we still
+ // need the barrier to guard later static loads.
+ GenMemBarrier(kLoadAny);
}
FreeTemp(r_method);
}
@@ -723,7 +727,7 @@
FreeTemp(r_tmp);
// Ensure load of status and load of value don't re-order.
- GenMemBarrier(kLoadLoad);
+ GenMemBarrier(kLoadAny);
}
FreeTemp(r_method);
}
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 6c0dfe8..56986b4 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -1711,10 +1711,7 @@
}
if (is_volatile) {
- // Without context sensitive analysis, we must issue the most conservative barriers.
- // In this case, either a load or store may follow so we issue both barriers.
- GenMemBarrier(kLoadLoad);
- GenMemBarrier(kLoadStore);
+ GenMemBarrier(kLoadAny);
}
if (is_long) {
@@ -1737,8 +1734,7 @@
rl_src_offset = NarrowRegLoc(rl_src_offset); // ignore high half in info->args[3]
RegLocation rl_src_value = info->args[4]; // value to store
if (is_volatile || is_ordered) {
- // There might have been a store before this volatile one so insert StoreStore barrier.
- GenMemBarrier(kStoreStore);
+ GenMemBarrier(kAnyStore);
}
RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
@@ -1767,8 +1763,9 @@
FreeTemp(rl_offset.reg);
if (is_volatile) {
- // A load might follow the volatile store so insert a StoreLoad barrier.
- GenMemBarrier(kStoreLoad);
+ // Prevent reordering with a subsequent volatile load.
+ // May also be needed to address store atomicity issues.
+ GenMemBarrier(kAnyAny);
}
if (is_object) {
MarkGCCard(rl_value.reg, rl_object.reg);
diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc
index 129a696..75d3c5d 100644
--- a/compiler/dex/quick/mips/utility_mips.cc
+++ b/compiler/dex/quick/mips/utility_mips.cc
@@ -563,10 +563,7 @@
load = LoadBaseDispBody(r_base, displacement, r_dest, size);
if (UNLIKELY(is_volatile == kVolatile)) {
- // Without context sensitive analysis, we must issue the most conservative barriers.
- // In this case, either a load or store may follow so we issue both barriers.
- GenMemBarrier(kLoadLoad);
- GenMemBarrier(kLoadStore);
+ GenMemBarrier(kLoadAny);
}
return load;
@@ -658,8 +655,8 @@
OpSize size, VolatileKind is_volatile) {
if (is_volatile == kVolatile) {
DCHECK(size != k64 && size != kDouble);
- // There might have been a store before this volatile one so insert StoreStore barrier.
- GenMemBarrier(kStoreStore);
+ // Ensure that prior accesses become visible to other threads first.
+ GenMemBarrier(kAnyStore);
}
// TODO: base this on target.
@@ -670,8 +667,9 @@
store = StoreBaseDispBody(r_base, displacement, r_src, size);
if (UNLIKELY(is_volatile == kVolatile)) {
- // A load might follow the volatile store so insert a StoreLoad barrier.
- GenMemBarrier(kStoreLoad);
+ // Preserve order with respect to any subsequent volatile loads.
+ // We need StoreLoad, but that generally requires the most expensive barrier.
+ GenMemBarrier(kAnyAny);
}
return store;
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index f1166f6..4ecc5d8 100755
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -861,7 +861,7 @@
// After a store we need to insert barrier in case of potential load. Since the
// locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated.
- GenMemBarrier(kStoreLoad);
+ GenMemBarrier(kAnyAny);
FreeTemp(rs_r0q);
} else if (is_long) {
@@ -913,10 +913,11 @@
}
NewLIR4(kX86LockCmpxchg64A, rs_obj.GetReg(), rs_off.GetReg(), 0, 0);
- // After a store we need to insert barrier in case of potential load. Since the
- // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated.
- GenMemBarrier(kStoreLoad);
-
+ // After a store we need to insert barrier to prevent reordering with either
+ // earlier or later memory accesses. Since
+ // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated,
+ // and it will be associated with the cmpxchg instruction, preventing both.
+ GenMemBarrier(kAnyAny);
if (push_si) {
FreeTemp(rs_rSI);
@@ -954,9 +955,11 @@
LoadValueDirect(rl_src_expected, rs_r0);
NewLIR5(kX86LockCmpxchgAR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0, rl_new_value.reg.GetReg());
- // After a store we need to insert barrier in case of potential load. Since the
- // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated.
- GenMemBarrier(kStoreLoad);
+ // After a store we need to insert barrier to prevent reordering with either
+ // earlier or later memory accesses. Since
+ // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated,
+ // and it will be associated with the cmpxchg instruction, preventing both.
+ GenMemBarrier(kAnyAny);
FreeTemp(rs_r0);
}
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 1ebbbbd..4310525 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -582,11 +582,11 @@
bool ret = false;
/*
- * According to the JSR-133 Cookbook, for x86 only StoreLoad barriers need memory fence. All other barriers
- * (LoadLoad, LoadStore, StoreStore) are nops due to the x86 memory model. For those cases, all we need
- * to ensure is that there is a scheduling barrier in place.
+ * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
+ * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model.
+ * For those cases, all we need to ensure is that there is a scheduling barrier in place.
*/
- if (barrier_kind == kStoreLoad) {
+ if (barrier_kind == kAnyAny) {
// If no LIR exists already that can be used a barrier, then generate an mfence.
if (mem_barrier == nullptr) {
mem_barrier = NewLIR0(kX86Mfence);
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index 5c7c91b..045e58e 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -762,10 +762,7 @@
size);
if (UNLIKELY(is_volatile == kVolatile)) {
- // Without context sensitive analysis, we must issue the most conservative barriers.
- // In this case, either a load or store may follow so we issue both barriers.
- GenMemBarrier(kLoadLoad);
- GenMemBarrier(kLoadStore);
+ GenMemBarrier(kLoadAny); // Only a scheduling barrier.
}
return load;
@@ -863,8 +860,7 @@
LIR* X86Mir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, OpSize size,
VolatileKind is_volatile) {
if (UNLIKELY(is_volatile == kVolatile)) {
- // There might have been a store before this volatile one so insert StoreStore barrier.
- GenMemBarrier(kStoreStore);
+ GenMemBarrier(kAnyStore); // Only a scheduling barrier.
}
// StoreBaseDisp() will emit correct insn for atomic store on x86
@@ -873,8 +869,9 @@
LIR* store = StoreBaseIndexedDisp(r_base, RegStorage::InvalidReg(), 0, displacement, r_src, size);
if (UNLIKELY(is_volatile == kVolatile)) {
- // A load might follow the volatile store so insert a StoreLoad barrier.
- GenMemBarrier(kStoreLoad);
+ // A volatile load might follow the volatile store so insert a StoreLoad barrier.
+ // This does require a fence, even on x86.
+ GenMemBarrier(kAnyAny);
}
return store;
diff --git a/compiler/llvm/gbc_expander.cc b/compiler/llvm/gbc_expander.cc
index f8dca66..902f8dd 100644
--- a/compiler/llvm/gbc_expander.cc
+++ b/compiler/llvm/gbc_expander.cc
@@ -1648,7 +1648,7 @@
field_value = SignOrZeroExtendCat1Types(field_value, field_jty);
if (is_volatile) {
- irb_.CreateMemoryBarrier(art::kLoadLoad);
+ irb_.CreateMemoryBarrier(art::kLoadAny);
}
}
@@ -1702,7 +1702,7 @@
DCHECK_GE(field_offset.Int32Value(), 0);
if (is_volatile) {
- irb_.CreateMemoryBarrier(art::kStoreStore);
+ irb_.CreateMemoryBarrier(art::kAnyStore);
}
llvm::PointerType* field_type =
@@ -1717,7 +1717,7 @@
irb_.CreateStore(new_value, field_addr, kTBAAHeapInstance, field_jty);
if (is_volatile) {
- irb_.CreateMemoryBarrier(art::kLoadLoad);
+ irb_.CreateMemoryBarrier(art::kAnyAny);
}
if (field_jty == kObject) { // If put an object, mark the GC card table.
@@ -1870,7 +1870,7 @@
phi->addIncoming(loaded_storage_object_addr, block_after_load_static);
// Ensure load of status and load of value don't re-order.
- irb_.CreateMemoryBarrier(art::kLoadLoad);
+ irb_.CreateMemoryBarrier(art::kLoadAny);
return phi;
}
@@ -1948,7 +1948,7 @@
static_field_value = SignOrZeroExtendCat1Types(static_field_value, field_jty);
if (is_volatile) {
- irb_.CreateMemoryBarrier(art::kLoadLoad);
+ irb_.CreateMemoryBarrier(art::kLoadAny);
}
}
@@ -2025,7 +2025,7 @@
}
if (is_volatile) {
- irb_.CreateMemoryBarrier(art::kStoreStore);
+ irb_.CreateMemoryBarrier(art::kAnyStore);
}
llvm::Value* static_field_offset_value = irb_.getPtrEquivInt(field_offset.Int32Value());
@@ -2038,7 +2038,7 @@
irb_.CreateStore(new_value, static_field_addr, kTBAAHeapStatic, field_jty);
if (is_volatile) {
- irb_.CreateMemoryBarrier(art::kStoreLoad);
+ irb_.CreateMemoryBarrier(art::kAnyAny);
}
if (field_jty == kObject) { // If put an object, mark the GC card table.