ART: Fix MonitorExit code on ARM

We do not emit barriers on non-SMP systems. But on ARM, we have
places that need to conditionally execute, which is done through
an IT instruction. The guide of said instruction thus changes
between SMP and non-SMP systems.

To cleanly approach this, change the API so that GenMemBarrier
returns whether it generated an instruction. ARM will have to
query the result and update any dependent IT.

Throw a build system error if TARGET_CPU_SMP is not set.

Fix runtime/Android.mk to work with new multilib host.

Bug: 14989275
Change-Id: I9e611b770e8a1cd4ca19367d7dae0573ec08dc61
diff --git a/build/Android.common.mk b/build/Android.common.mk
index 188ddb5..a0e9df1 100644
--- a/build/Android.common.mk
+++ b/build/Android.common.mk
@@ -249,7 +249,11 @@
 ifeq ($(TARGET_CPU_SMP),true)
   ART_TARGET_CFLAGS += -DANDROID_SMP=1
 else
-  ART_TARGET_CFLAGS += -DANDROID_SMP=0
+  ifeq ($(TARGET_CPU_SMP),false)
+    ART_TARGET_CFLAGS += -DANDROID_SMP=0
+  else
+    $(error TARGET_CPU_SMP must be (true|false), found $(TARGET_CPU_SMP))
+  endif
 endif
 ART_TARGET_CFLAGS += $(ART_DEFAULT_GC_TYPE_CFLAGS)
 
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index 435242a..041c1aa 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -295,8 +295,11 @@
     LoadConstantNoClobber(rs_r3, 0);
     // Is lock unheld on lock or held by us (==thread_id) on unlock?
     OpRegReg(kOpCmp, rs_r1, rs_r2);
-    LIR* it = OpIT(kCondEq, "TEE");
-    GenMemBarrier(kStoreLoad);
+
+    LIR* it = OpIT(kCondEq, "EE");
+    if (GenMemBarrier(kStoreLoad)) {
+      UpdateIT(it, "TEE");
+    }
     Store32Disp/*eq*/(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3);
     // Go expensive route - UnlockObjectFromCode(obj);
     LoadWordDisp/*ne*/(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pUnlockObject).Int32Value(),
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 6696cf7..1ee59c6 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -141,7 +141,7 @@
     void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double);
     void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir);
     void GenSelect(BasicBlock* bb, MIR* mir);
-    void GenMemBarrier(MemBarrierKind barrier_kind);
+    bool GenMemBarrier(MemBarrierKind barrier_kind);
     void GenMonitorEnter(int opt_flags, RegLocation rl_src);
     void GenMonitorExit(int opt_flags, RegLocation rl_src);
     void GenMoveException(RegLocation rl_dest);
@@ -160,6 +160,7 @@
     LIR* OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target);
     LIR* OpFpRegCopy(RegStorage r_dest, RegStorage r_src);
     LIR* OpIT(ConditionCode cond, const char* guide);
+    void UpdateIT(LIR* it, const char* new_guide);
     void OpEndIT(LIR* it);
     LIR* OpMem(OpKind op, RegStorage r_base, int disp);
     LIR* OpPcRelLoad(RegStorage reg, LIR* target);
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 2e0e559..2d4834c 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -67,6 +67,34 @@
   return NewLIR2(kThumb2It, code, mask);
 }
 
+void ArmMir2Lir::UpdateIT(LIR* it, const char* new_guide) {
+  int mask;
+  int mask3 = 0;
+  int mask2 = 0;
+  int mask1 = 0;
+  ArmConditionCode code = static_cast<ArmConditionCode>(it->operands[0]);
+  int cond_bit = code & 1;
+  int alt_bit = cond_bit ^ 1;
+
+  // Note: case fallthroughs intentional
+  switch (strlen(new_guide)) {
+    case 3:
+      mask1 = (new_guide[2] == 'T') ? cond_bit : alt_bit;
+    case 2:
+      mask2 = (new_guide[1] == 'T') ? cond_bit : alt_bit;
+    case 1:
+      mask3 = (new_guide[0] == 'T') ? cond_bit : alt_bit;
+      break;
+    case 0:
+      break;
+    default:
+      LOG(FATAL) << "OAT: bad case in UpdateIT";
+  }
+  mask = (mask3 << 3) | (mask2 << 2) | (mask1 << 1) |
+      (1 << (3 - strlen(new_guide)));
+  it->operands[1] = mask;
+}
+
 void ArmMir2Lir::OpEndIT(LIR* it) {
   // TODO: use the 'it' pointer to do some checks with the LIR, for example
   //       we could check that the number of instructions matches the mask
@@ -934,7 +962,7 @@
   return OpCondBranch(c_code, target);
 }
 
-void ArmMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
+bool ArmMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
 #if ANDROID_SMP != 0
   // Start off with using the last LIR as the barrier. If it is not enough, then we will generate one.
   LIR* barrier = last_lir_insn_;
@@ -952,15 +980,21 @@
       break;
   }
 
+  bool ret = false;
+
   // If the same barrier already exists, don't generate another.
   if (barrier == nullptr
       || (barrier != nullptr && (barrier->opcode != kThumb2Dmb || barrier->operands[0] != dmb_flavor))) {
     barrier = NewLIR1(kThumb2Dmb, dmb_flavor);
+    ret = true;
   }
 
   // At this point we must have a memory barrier. Mark it as a scheduling barrier as well.
   DCHECK(!barrier->flags.use_def_invalid);
   barrier->u.m.def_mask = ENCODE_ALL;
+  return ret;
+#else
+  return false;
 #endif
 }
 
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 51e97cd..1bcf19b 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -278,6 +278,7 @@
     MarkPossibleNullPointerException(opt_flags);
     LoadConstantNoClobber(rs_x3, 0);
     LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_x1, rs_x2, NULL);
+    GenMemBarrier(kStoreLoad);
     Store32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_x3);
     LIR* unlock_success_branch = OpUnconditionalBranch(NULL);
 
@@ -295,7 +296,6 @@
 
     LIR* success_target = NewLIR0(kPseudoTargetLabel);
     unlock_success_branch->target = success_target;
-    GenMemBarrier(kStoreLoad);
   } else {
     // Explicit null-check as slow-path is entered using an IT.
     GenNullCheck(rs_x0, opt_flags);
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index af0029c..418a989 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -142,7 +142,7 @@
     void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double);
     void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir);
     void GenSelect(BasicBlock* bb, MIR* mir);
-    void GenMemBarrier(MemBarrierKind barrier_kind);
+    bool GenMemBarrier(MemBarrierKind barrier_kind);
     void GenMonitorEnter(int opt_flags, RegLocation rl_src);
     void GenMonitorExit(int opt_flags, RegLocation rl_src);
     void GenMoveException(RegLocation rl_dest);
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index 0465249..f2a57e7 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -712,7 +712,7 @@
   return OpCondBranch(c_code, target);
 }
 
-void Arm64Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
+bool Arm64Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
 #if ANDROID_SMP != 0
   // Start off with using the last LIR as the barrier. If it is not enough, then we will generate one.
   LIR* barrier = last_lir_insn_;
@@ -730,15 +730,21 @@
       break;
   }
 
+  bool ret = false;
+
   // If the same barrier already exists, don't generate another.
   if (barrier == nullptr
       || (barrier->opcode != kA64Dmb1B || barrier->operands[0] != dmb_flavor)) {
     barrier = NewLIR1(kA64Dmb1B, dmb_flavor);
+    ret = true;
   }
 
   // At this point we must have a memory barrier. Mark it as a scheduling barrier as well.
   DCHECK(!barrier->flags.use_def_invalid);
   barrier->u.m.def_mask = ENCODE_ALL;
+  return ret;
+#else
+  return false;
 #endif
 }
 
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index b7ea34f..c5b40da 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -139,7 +139,7 @@
     void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double);
     void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir);
     void GenSelect(BasicBlock* bb, MIR* mir);
-    void GenMemBarrier(MemBarrierKind barrier_kind);
+    bool GenMemBarrier(MemBarrierKind barrier_kind);
     void GenMoveException(RegLocation rl_dest);
     void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit,
                                        int first_bit, int second_bit);
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index 2821209..35345e8 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -433,9 +433,12 @@
   FreeTemp(rs_rMIPS_ARG3);
 }
 
-void MipsMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
+bool MipsMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
 #if ANDROID_SMP != 0
   NewLIR1(kMipsSync, 0 /* Only stype currently supported */);
+  return true;
+#else
+  return false;
 #endif
 }
 
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 6a0f3b2..836d2ac 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -1192,8 +1192,9 @@
      * barrier, then it will be used as such. Otherwise, a new LIR will be generated
      * that can keep the semantics.
      * @param barrier_kind The kind of memory barrier to generate.
+     * @return whether a new instruction was generated.
      */
-    virtual void GenMemBarrier(MemBarrierKind barrier_kind) = 0;
+    virtual bool GenMemBarrier(MemBarrierKind barrier_kind) = 0;
 
     virtual void GenMoveException(RegLocation rl_dest) = 0;
     virtual void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit,
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 11e7ff9..ef8c33c 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -142,7 +142,7 @@
     void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double);
     void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir);
     void GenSelect(BasicBlock* bb, MIR* mir);
-    void GenMemBarrier(MemBarrierKind barrier_kind);
+    bool GenMemBarrier(MemBarrierKind barrier_kind);
     void GenMoveException(RegLocation rl_dest);
     void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit,
                                        int first_bit, int second_bit);
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 2db9845..e3312a2 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -421,11 +421,12 @@
     return false;
 }
 
-void X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
+bool X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
 #if ANDROID_SMP != 0
   // Start off with using the last LIR as the barrier. If it is not enough, then we will update it.
   LIR* mem_barrier = last_lir_insn_;
 
+  bool ret = false;
   /*
    * According to the JSR-133 Cookbook, for x86 only StoreLoad barriers need memory fence. All other barriers
    * (LoadLoad, LoadStore, StoreStore) are nops due to the x86 memory model. For those cases, all we need
@@ -435,11 +436,13 @@
     // If no LIR exists already that can be used a barrier, then generate an mfence.
     if (mem_barrier == nullptr) {
       mem_barrier = NewLIR0(kX86Mfence);
+      ret = true;
     }
 
     // If last instruction does not provide full barrier, then insert an mfence.
     if (ProvidesFullMemoryBarrier(static_cast<X86OpCode>(mem_barrier->opcode)) == false) {
       mem_barrier = NewLIR0(kX86Mfence);
+      ret = true;
     }
   }
 
@@ -451,6 +454,9 @@
     DCHECK(!mem_barrier->flags.use_def_invalid);
     mem_barrier->u.m.def_mask = ENCODE_ALL;
   }
+  return ret;
+#else
+  return false;
 #endif
 }
 
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 72f1774..052d12e 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -270,8 +270,7 @@
 	runtime_linux.cc \
 	thread_linux.cc
 
-ifeq ($(HOST_ARCH),x86)
-ifneq ($(BUILD_HOST_64bit),)
+ifeq ($(HOST_ARCH),x86_64)
 LIBART_HOST_SRC_FILES += \
 	arch/x86_64/context_x86_64.cc \
 	arch/x86_64/entrypoints_init_x86_64.cc \
@@ -282,6 +281,7 @@
 	arch/x86_64/fault_handler_x86_64.cc \
 	monitor_pool.cc
 else
+  ifeq ($(HOST_ARCH),x86)
 LIBART_HOST_SRC_FILES += \
 	arch/x86/context_x86.cc \
 	arch/x86/entrypoints_init_x86.cc \
@@ -290,10 +290,10 @@
 	arch/x86/quick_entrypoints_x86.S \
 	arch/x86/fault_handler_x86.cc \
 	arch/x86/thread_x86.cc
-endif
-else # HOST_ARCH != x86
+  else # HOST_ARCH != x86 && HOST_ARCH != x86_64
 $(error unsupported HOST_ARCH=$(HOST_ARCH))
-endif # HOST_ARCH != x86
+  endif
+endif
 
 
 LIBART_ENUM_OPERATOR_OUT_HEADER_FILES := \