MIPS: Implement heap poisoning in ART's Optimizing compiler.

This is in preparation for read barrier support.

Bug: 12687968

Test: test-art-host-gtest
Test: booted MIPS32R2 in QEMU
Test: test-art-target
Test: booted MIPS64 (with 2nd arch MIPS32R6) in QEMU
Test: test-art-target (both MIPS64R6 and MIPS32R6)

Note: built with ART_HEAP_POISONING=true.

Change-Id: I0e6e04ff8de2fc8ca6126388409fa218e6920734
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index c9dde7c..791e632 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -2073,6 +2073,11 @@
       LOG(FATAL) << "Unreachable type " << instruction->GetType();
       UNREACHABLE();
   }
+
+  if (type == Primitive::kPrimNot) {
+    Register out = locations->Out().AsRegister<Register>();
+    __ MaybeUnpoisonHeapReference(out);
+  }
 }
 
 void LocationsBuilderMIPS::VisitArrayLength(HArrayLength* instruction) {
@@ -2200,7 +2205,31 @@
           DCHECK(!needs_write_barrier);
         } else {
           Register value = value_location.AsRegister<Register>();
-          __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker);
+          if (kPoisonHeapReferences && needs_write_barrier) {
+            // Note that in the case where `value` is a null reference,
+            // we do not enter this block, as a null reference does not
+            // need poisoning.
+            DCHECK_EQ(value_type, Primitive::kPrimNot);
+            // Use Sw() instead of StoreToOffset() in order to be able to
+            // hold the poisoned reference in AT and thus avoid allocating
+            // yet another temporary register.
+            if (index.IsConstant()) {
+              if (!IsInt<16>(static_cast<int32_t>(data_offset))) {
+                int16_t low = Low16Bits(data_offset);
+                uint32_t high = data_offset - low;
+                __ Addiu32(TMP, obj, high);
+                base_reg = TMP;
+                data_offset = low;
+              }
+            } else {
+              DCHECK(IsInt<16>(static_cast<int32_t>(data_offset)));
+            }
+            __ PoisonHeapReference(AT, value);
+            __ Sw(AT, base_reg, data_offset);
+            null_checker();
+          } else {
+            __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker);
+          }
           if (needs_write_barrier) {
             DCHECK_EQ(value_type, Primitive::kPrimNot);
             codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull());
@@ -2208,6 +2237,8 @@
         }
       } else {
         DCHECK_EQ(value_type, Primitive::kPrimNot);
+        // Note: if heap poisoning is enabled, pAputObject takes care
+        // of poisoning the reference.
         codegen_->InvokeRuntime(kQuickAputObject, instruction, instruction->GetDexPc());
         CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
       }
@@ -2322,6 +2353,7 @@
   __ Beqz(obj, slow_path->GetExitLabel());
   // Compare the class of `obj` with `cls`.
   __ LoadFromOffset(kLoadWord, obj_cls, obj, mirror::Object::ClassOffset().Int32Value());
+  __ MaybeUnpoisonHeapReference(obj_cls);
   __ Bne(obj_cls, cls, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
@@ -4958,6 +4990,9 @@
         dst = locations->Out().AsRegister<Register>();
       }
       __ LoadFromOffset(load_type, dst, obj, offset, null_checker);
+      if (type == Primitive::kPrimNot) {
+        __ MaybeUnpoisonHeapReference(dst);
+      }
     } else {
       DCHECK(locations->Out().IsFpuRegister());
       FRegister dst = locations->Out().AsFpuRegister<FRegister>();
@@ -5016,6 +5051,7 @@
   StoreOperandType store_type = kStoreByte;
   bool is_volatile = field_info.IsVolatile();
   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
+  bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1));
   auto null_checker = GetImplicitNullChecker(instruction);
 
   switch (type) {
@@ -5089,7 +5125,16 @@
       } else {
         src = value_location.AsRegister<Register>();
       }
-      __ StoreToOffset(store_type, src, obj, offset, null_checker);
+      if (kPoisonHeapReferences && needs_write_barrier) {
+        // Note that in the case where `value` is a null reference,
+        // we do not enter this block, as a null reference does not
+        // need poisoning.
+        DCHECK_EQ(type, Primitive::kPrimNot);
+        __ PoisonHeapReference(TMP, src);
+        __ StoreToOffset(store_type, TMP, obj, offset, null_checker);
+      } else {
+        __ StoreToOffset(store_type, src, obj, offset, null_checker);
+      }
     } else {
       FRegister src = value_location.AsFpuRegister<FRegister>();
       if (type == Primitive::kPrimFloat) {
@@ -5101,7 +5146,7 @@
   }
 
   // TODO: memory barriers?
-  if (CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1))) {
+  if (needs_write_barrier) {
     Register src = value_location.AsRegister<Register>();
     codegen_->MarkGCCard(obj, src, value_can_be_null);
   }
@@ -5173,6 +5218,7 @@
 
   // Compare the class of `obj` with `cls`.
   __ LoadFromOffset(kLoadWord, out, obj, mirror::Object::ClassOffset().Int32Value());
+  __ MaybeUnpoisonHeapReference(out);
   if (instruction->IsExactCheck()) {
     // Classes must be equal for the instanceof to succeed.
     __ Xor(out, out, cls);
@@ -5239,6 +5285,14 @@
     __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
   }
   codegen_->MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // However this is not required in practice, as this is an
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
+  __ MaybeUnpoisonHeapReference(temp);
   __ LoadFromOffset(kLoadWord, temp, temp,
       mirror::Class::ImtPtrOffset(kMipsPointerSize).Uint32Value());
   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
@@ -5562,6 +5616,14 @@
   // temp = object->GetClass();
   __ LoadFromOffset(kLoadWord, temp, receiver, class_offset);
   MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // However this is not required in practice, as this is an
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
+  __ MaybeUnpoisonHeapReference(temp);
   // temp = temp->GetMethodAt(method_offset);
   __ LoadFromOffset(kLoadWord, temp, temp, method_offset);
   // T9 = temp->GetEntryPoint();
@@ -5692,7 +5754,7 @@
           codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex());
       bool reordering = __ SetReorder(false);
       codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg);
-      __ LoadFromOffset(kLoadWord, out, out, /* placeholder */ 0x5678);
+      GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678);
       __ SetReorder(reordering);
       generate_null_check = true;
       break;
@@ -5837,7 +5899,7 @@
           codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
       bool reordering = __ SetReorder(false);
       codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg);
-      __ LoadFromOffset(kLoadWord, out, out, /* placeholder */ 0x5678);
+      GenerateGcRootFieldLoad(load, out_loc, out, /* placeholder */ 0x5678);
       __ SetReorder(reordering);
       SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load);
       codegen_->AddSlowPath(slow_path);
@@ -6059,6 +6121,8 @@
 }
 
 void InstructionCodeGeneratorMIPS::VisitNewArray(HNewArray* instruction) {
+  // Note: if heap poisoning is enabled, the entry point takes care
+  // of poisoning the reference.
   codegen_->InvokeRuntime(kQuickAllocArrayResolved, instruction, instruction->GetDexPc());
   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
 }
@@ -6076,6 +6140,8 @@
 }
 
 void InstructionCodeGeneratorMIPS::VisitNewInstance(HNewInstance* instruction) {
+  // Note: if heap poisoning is enabled, the entry point takes care
+  // of poisoning the reference.
   if (instruction->IsStringAlloc()) {
     // String is allocated through StringFactory. Call NewEmptyString entry point.
     Register temp = instruction->GetLocations()->GetTemp(0).AsRegister<Register>();
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 5be0da4..817854b 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -1653,6 +1653,11 @@
   if (!maybe_compressed_char_at) {
     codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
+
+  if (type == Primitive::kPrimNot) {
+    GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+    __ MaybeUnpoisonHeapReference(out);
+  }
 }
 
 void LocationsBuilderMIPS64::VisitArrayLength(HArrayLength* instruction) {
@@ -1740,16 +1745,49 @@
     case Primitive::kPrimNot: {
       if (!needs_runtime_call) {
         uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+        GpuRegister base_reg;
         GpuRegister value = locations->InAt(2).AsRegister<GpuRegister>();
         if (index.IsConstant()) {
-          size_t offset =
-              (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-          __ StoreToOffset(kStoreWord, value, obj, offset);
+          data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
+          base_reg = obj;
         } else {
           DCHECK(index.IsRegister()) << index;
           __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_4);
           __ Daddu(TMP, obj, TMP);
-          __ StoreToOffset(kStoreWord, value, TMP, data_offset);
+          base_reg = TMP;
+        }
+        if (kPoisonHeapReferences && needs_write_barrier) {
+          // Note that in the case where `value` is a null reference,
+          // we do not enter this block, as a null reference does not
+          // need poisoning.
+          DCHECK_EQ(value_type, Primitive::kPrimNot);
+          // Use Sw() instead of StoreToOffset() in order to be able to
+          // hold the poisoned reference in AT and thus avoid allocating
+          // yet another temporary register.
+          if (index.IsConstant()) {
+            if (!IsInt<16>(static_cast<int32_t>(data_offset))) {
+              int16_t low16 = Low16Bits(data_offset);
+              // For consistency with StoreToOffset() and such treat data_offset as int32_t.
+              uint64_t high48 = static_cast<uint64_t>(static_cast<int32_t>(data_offset)) - low16;
+              int16_t upper16 = High16Bits(high48);
+              // Allow the full [-2GB,+2GB) range in case `low16` is negative and needs a
+              // compensatory 64KB added, which may push `high48` above 2GB and require
+              // the dahi instruction.
+              int16_t higher16 = High32Bits(high48) + ((upper16 < 0) ? 1 : 0);
+              __ Daui(TMP, obj, upper16);
+              if (higher16 != 0) {
+                __ Dahi(TMP, higher16);
+              }
+              base_reg = TMP;
+              data_offset = low16;
+            }
+          } else {
+            DCHECK(IsInt<16>(static_cast<int32_t>(data_offset)));
+          }
+          __ PoisonHeapReference(AT, value);
+          __ Sw(AT, base_reg, data_offset);
+        } else {
+          __ StoreToOffset(kStoreWord, value, base_reg, data_offset);
         }
         codegen_->MaybeRecordImplicitNullCheck(instruction);
         if (needs_write_barrier) {
@@ -1758,6 +1796,8 @@
         }
       } else {
         DCHECK_EQ(value_type, Primitive::kPrimNot);
+        // Note: if heap poisoning is enabled, pAputObject takes care
+        // of poisoning the reference.
         codegen_->InvokeRuntime(kQuickAputObject, instruction, instruction->GetDexPc());
         CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
       }
@@ -1871,6 +1911,7 @@
   __ Beqzc(obj, slow_path->GetExitLabel());
   // Compare the class of `obj` with `cls`.
   __ LoadFromOffset(kLoadUnsignedWord, obj_cls, obj, mirror::Object::ClassOffset().Int32Value());
+  __ MaybeUnpoisonHeapReference(obj_cls);
   __ Bnec(obj_cls, cls, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
@@ -3086,6 +3127,7 @@
   LocationSummary* locations = instruction->GetLocations();
   GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
   LoadOperandType load_type = kLoadUnsignedByte;
+  uint32_t offset = field_info.GetFieldOffset().Uint32Value();
   switch (type) {
     case Primitive::kPrimBoolean:
       load_type = kLoadUnsignedByte;
@@ -3117,15 +3159,20 @@
   if (!Primitive::IsFloatingPointType(type)) {
     DCHECK(locations->Out().IsRegister());
     GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
-    __ LoadFromOffset(load_type, dst, obj, field_info.GetFieldOffset().Uint32Value());
+    __ LoadFromOffset(load_type, dst, obj, offset);
   } else {
     DCHECK(locations->Out().IsFpuRegister());
     FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
-    __ LoadFpuFromOffset(load_type, dst, obj, field_info.GetFieldOffset().Uint32Value());
+    __ LoadFpuFromOffset(load_type, dst, obj, offset);
   }
 
   codegen_->MaybeRecordImplicitNullCheck(instruction);
   // TODO: memory barrier?
+
+  if (type == Primitive::kPrimNot) {
+    GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
+    __ MaybeUnpoisonHeapReference(dst);
+  }
 }
 
 void LocationsBuilderMIPS64::HandleFieldSet(HInstruction* instruction,
@@ -3147,6 +3194,8 @@
   LocationSummary* locations = instruction->GetLocations();
   GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
   StoreOperandType store_type = kStoreByte;
+  uint32_t offset = field_info.GetFieldOffset().Uint32Value();
+  bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1));
   switch (type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
@@ -3172,16 +3221,25 @@
   if (!Primitive::IsFloatingPointType(type)) {
     DCHECK(locations->InAt(1).IsRegister());
     GpuRegister src = locations->InAt(1).AsRegister<GpuRegister>();
-    __ StoreToOffset(store_type, src, obj, field_info.GetFieldOffset().Uint32Value());
+    if (kPoisonHeapReferences && needs_write_barrier) {
+      // Note that in the case where `value` is a null reference,
+      // we do not enter this block, as a null reference does not
+      // need poisoning.
+      DCHECK_EQ(type, Primitive::kPrimNot);
+      __ PoisonHeapReference(TMP, src);
+      __ StoreToOffset(store_type, TMP, obj, offset);
+    } else {
+      __ StoreToOffset(store_type, src, obj, offset);
+    }
   } else {
     DCHECK(locations->InAt(1).IsFpuRegister());
     FpuRegister src = locations->InAt(1).AsFpuRegister<FpuRegister>();
-    __ StoreFpuToOffset(store_type, src, obj, field_info.GetFieldOffset().Uint32Value());
+    __ StoreFpuToOffset(store_type, src, obj, offset);
   }
 
   codegen_->MaybeRecordImplicitNullCheck(instruction);
   // TODO: memory barriers?
-  if (CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1))) {
+  if (needs_write_barrier) {
     DCHECK(locations->InAt(1).IsRegister());
     GpuRegister src = locations->InAt(1).AsRegister<GpuRegister>();
     codegen_->MarkGCCard(obj, src, value_can_be_null);
@@ -3247,6 +3305,7 @@
 
   // Compare the class of `obj` with `cls`.
   __ LoadFromOffset(kLoadUnsignedWord, out, obj, mirror::Object::ClassOffset().Int32Value());
+  __ MaybeUnpoisonHeapReference(out);
   if (instruction->IsExactCheck()) {
     // Classes must be equal for the instanceof to succeed.
     __ Xor(out, out, cls);
@@ -3325,6 +3384,14 @@
     __ LoadFromOffset(kLoadUnsignedWord, temp, receiver.AsRegister<GpuRegister>(), class_offset);
   }
   codegen_->MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // However this is not required in practice, as this is an
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
+  __ MaybeUnpoisonHeapReference(temp);
   __ LoadFromOffset(kLoadDoubleword, temp, temp,
       mirror::Class::ImtPtrOffset(kMips64PointerSize).Uint32Value());
   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
@@ -3567,6 +3634,14 @@
   // temp = object->GetClass();
   __ LoadFromOffset(kLoadUnsignedWord, temp, receiver, class_offset);
   MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // However this is not required in practice, as this is an
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
+  __ MaybeUnpoisonHeapReference(temp);
   // temp = temp->GetMethodAt(method_offset);
   __ LoadFromOffset(kLoadDoubleword, temp, temp, method_offset);
   // T9 = temp->GetEntryPoint();
@@ -3666,8 +3741,8 @@
     case HLoadClass::LoadKind::kBssEntry: {
       CodeGeneratorMIPS64::PcRelativePatchInfo* info =
           codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex());
-      codegen_->EmitPcRelativeAddressPlaceholderHigh(info, AT);
-      __ Lwu(out, AT, /* placeholder */ 0x5678);
+      codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out);
+      GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678);
       generate_null_check = true;
       break;
     }
@@ -3773,8 +3848,8 @@
       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
       CodeGeneratorMIPS64::PcRelativePatchInfo* info =
           codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
-      codegen_->EmitPcRelativeAddressPlaceholderHigh(info, AT);
-      __ Lwu(out, AT, /* placeholder */ 0x5678);
+      codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out);
+      GenerateGcRootFieldLoad(load, out_loc, out, /* placeholder */ 0x5678);
       SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load);
       codegen_->AddSlowPath(slow_path);
       __ Beqzc(out, slow_path->GetEntryLabel());
@@ -3944,6 +4019,8 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitNewArray(HNewArray* instruction) {
+  // Note: if heap poisoning is enabled, the entry point takes care
+  // of poisoning the reference.
   codegen_->InvokeRuntime(kQuickAllocArrayResolved, instruction, instruction->GetDexPc());
   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
 }
@@ -3961,6 +4038,8 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitNewInstance(HNewInstance* instruction) {
+  // Note: if heap poisoning is enabled, the entry point takes care
+  // of poisoning the reference.
   if (instruction->IsStringAlloc()) {
     // String is allocated through StringFactory. Call NewEmptyString entry point.
     GpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<GpuRegister>();
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 64a6840..21ed807 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -1572,6 +1572,10 @@
       __ Lwr(trg, TMP, 0);
       __ Lwl(trg, TMP, 3);
     }
+
+    if (type == Primitive::kPrimNot) {
+      __ MaybeUnpoisonHeapReference(trg);
+    }
   }
 }
 
@@ -1663,6 +1667,11 @@
   if ((type == Primitive::kPrimInt) || (type == Primitive::kPrimNot)) {
     Register value = locations->InAt(3).AsRegister<Register>();
 
+    if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+      __ PoisonHeapReference(AT, value);
+      value = AT;
+    }
+
     if (is_R6) {
       __ Sw(value, TMP, 0);
     } else {
@@ -1852,13 +1861,23 @@
     codegen->MarkGCCard(base, value, value_can_be_null);
   }
 
+  MipsLabel loop_head, exit_loop;
+  __ Addu(TMP, base, offset_lo);
+
+  if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+    __ PoisonHeapReference(expected);
+    // Do not poison `value`, if it is the same register as
+    // `expected`, which has just been poisoned.
+    if (value != expected) {
+      __ PoisonHeapReference(value);
+    }
+  }
+
   // do {
   //   tmp_value = [tmp_ptr] - expected;
   // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
   // result = tmp_value != 0;
 
-  MipsLabel loop_head, exit_loop;
-  __ Addu(TMP, base, offset_lo);
   __ Sync(0);
   __ Bind(&loop_head);
   if ((type == Primitive::kPrimInt) || (type == Primitive::kPrimNot)) {
@@ -1868,8 +1887,8 @@
       __ LlR2(out, TMP);
     }
   } else {
-      LOG(FATAL) << "Unsupported op size " << type;
-      UNREACHABLE();
+    LOG(FATAL) << "Unsupported op size " << type;
+    UNREACHABLE();
   }
   __ Subu(out, out, expected);          // If we didn't get the 'expected'
   __ Sltiu(out, out, 1);                // value, set 'out' to false, and
@@ -1894,6 +1913,15 @@
                                 // cycle atomically then retry.
   __ Bind(&exit_loop);
   __ Sync(0);
+
+  if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+    __ UnpoisonHeapReference(expected);
+    // Do not unpoison `value`, if it is the same register as
+    // `expected`, which has just been unpoisoned.
+    if (value != expected) {
+      __ UnpoisonHeapReference(value);
+    }
+  }
 }
 
 // boolean sun.misc.Unsafe.compareAndSwapInt(Object o, long offset, int expected, int x)
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 3888828..a28aac1 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1187,6 +1187,7 @@
 
     case Primitive::kPrimNot:
       __ Lwu(trg, TMP, 0);
+      __ MaybeUnpoisonHeapReference(trg);
       break;
 
     case Primitive::kPrimLong:
@@ -1285,7 +1286,12 @@
   switch (type) {
     case Primitive::kPrimInt:
     case Primitive::kPrimNot:
-      __ Sw(value, TMP, 0);
+      if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+        __ PoisonHeapReference(AT, value);
+        __ Sw(AT, TMP, 0);
+      } else {
+        __ Sw(value, TMP, 0);
+      }
       break;
 
     case Primitive::kPrimLong:
@@ -1454,13 +1460,23 @@
     codegen->MarkGCCard(base, value, value_can_be_null);
   }
 
+  Mips64Label loop_head, exit_loop;
+  __ Daddu(TMP, base, offset);
+
+  if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+    __ PoisonHeapReference(expected);
+    // Do not poison `value`, if it is the same register as
+    // `expected`, which has just been poisoned.
+    if (value != expected) {
+      __ PoisonHeapReference(value);
+    }
+  }
+
   // do {
   //   tmp_value = [tmp_ptr] - expected;
   // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
   // result = tmp_value != 0;
 
-  Mips64Label loop_head, exit_loop;
-  __ Daddu(TMP, base, offset);
   __ Sync(0);
   __ Bind(&loop_head);
   if (type == Primitive::kPrimLong) {
@@ -1469,6 +1485,11 @@
     // Note: We will need a read barrier here, when read barrier
     // support is added to the MIPS64 back end.
     __ Ll(out, TMP);
+    if (type == Primitive::kPrimNot) {
+      // The LL instruction sign-extends the 32-bit value, but
+      // 32-bit references must be zero-extended. Zero-extend `out`.
+      __ Dext(out, out, 0, 32);
+    }
   }
   __ Dsubu(out, out, expected);         // If we didn't get the 'expected'
   __ Sltiu(out, out, 1);                // value, set 'out' to false, and
@@ -1487,6 +1508,15 @@
                                 // cycle atomically then retry.
   __ Bind(&exit_loop);
   __ Sync(0);
+
+  if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+    __ UnpoisonHeapReference(expected);
+    // Do not unpoison `value`, if it is the same register as
+    // `expected`, which has just been unpoisoned.
+    if (value != expected) {
+      __ UnpoisonHeapReference(value);
+    }
+  }
 }
 
 // boolean sun.misc.Unsafe.compareAndSwapInt(Object o, long offset, int expected, int x)
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index 5e83e82..2e2231b 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -3475,8 +3475,8 @@
   CHECK(dest.IsCoreRegister() && base.AsMips().IsCoreRegister());
   LoadFromOffset(kLoadWord, dest.AsCoreRegister(),
                  base.AsMips().AsCoreRegister(), offs.Int32Value());
-  if (kPoisonHeapReferences && unpoison_reference) {
-    Subu(dest.AsCoreRegister(), ZERO, dest.AsCoreRegister());
+  if (unpoison_reference) {
+    MaybeUnpoisonHeapReference(dest.AsCoreRegister());
   }
 }
 
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 2fca185..47ddf25 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -727,6 +727,38 @@
   void Pop(Register rd);
   void PopAndReturn(Register rd, Register rt);
 
+  //
+  // Heap poisoning.
+  //
+
+  // Poison a heap reference contained in `src` and store it in `dst`.
+  void PoisonHeapReference(Register dst, Register src) {
+    // dst = -src.
+    Subu(dst, ZERO, src);
+  }
+  // Poison a heap reference contained in `reg`.
+  void PoisonHeapReference(Register reg) {
+    // reg = -reg.
+    PoisonHeapReference(reg, reg);
+  }
+  // Unpoison a heap reference contained in `reg`.
+  void UnpoisonHeapReference(Register reg) {
+    // reg = -reg.
+    Subu(reg, ZERO, reg);
+  }
+  // Poison a heap reference contained in `reg` if heap poisoning is enabled.
+  void MaybePoisonHeapReference(Register reg) {
+    if (kPoisonHeapReferences) {
+      PoisonHeapReference(reg);
+    }
+  }
+  // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
+  void MaybeUnpoisonHeapReference(Register reg) {
+    if (kPoisonHeapReferences) {
+      UnpoisonHeapReference(reg);
+    }
+  }
+
   void Bind(Label* label) OVERRIDE {
     Bind(down_cast<MipsLabel*>(label));
   }
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index 998f2c7..0f86f88 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -488,6 +488,11 @@
   EmitI(0xf, rs, rt, imm16);
 }
 
+void Mips64Assembler::Daui(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
+  CHECK_NE(rs, ZERO);
+  EmitI(0x1d, rs, rt, imm16);
+}
+
 void Mips64Assembler::Dahi(GpuRegister rs, uint16_t imm16) {
   EmitI(1, rs, static_cast<GpuRegister>(6), imm16);
 }
@@ -2367,12 +2372,8 @@
   CHECK(dest.IsGpuRegister() && base.AsMips64().IsGpuRegister());
   LoadFromOffset(kLoadUnsignedWord, dest.AsGpuRegister(),
                  base.AsMips64().AsGpuRegister(), offs.Int32Value());
-  if (kPoisonHeapReferences && unpoison_reference) {
-    // TODO: review
-    // Negate the 32-bit ref
-    Dsubu(dest.AsGpuRegister(), ZERO, dest.AsGpuRegister());
-    // And constrain it to 32 bits (zero-extend into bits 32 through 63) as on Arm64 and x86/64
-    Dext(dest.AsGpuRegister(), dest.AsGpuRegister(), 0, 32);
+  if (unpoison_reference) {
+    MaybeUnpoisonHeapReference(dest.AsGpuRegister());
   }
 }
 
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index a0a1db6..ee15c6d 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -512,6 +512,7 @@
   void Ldpc(GpuRegister rs, uint32_t imm18);  // MIPS64
   void Lui(GpuRegister rt, uint16_t imm16);
   void Aui(GpuRegister rt, GpuRegister rs, uint16_t imm16);
+  void Daui(GpuRegister rt, GpuRegister rs, uint16_t imm16);  // MIPS64
   void Dahi(GpuRegister rs, uint16_t imm16);  // MIPS64
   void Dati(GpuRegister rs, uint16_t imm16);  // MIPS64
   void Sync(uint32_t stype);
@@ -654,6 +655,44 @@
   void Addiu32(GpuRegister rt, GpuRegister rs, int32_t value);
   void Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp = AT);  // MIPS64
 
+  //
+  // Heap poisoning.
+  //
+
+  // Poison a heap reference contained in `src` and store it in `dst`.
+  void PoisonHeapReference(GpuRegister dst, GpuRegister src) {
+    // dst = -src.
+    // Negate the 32-bit ref.
+    Dsubu(dst, ZERO, src);
+    // And constrain it to 32 bits (zero-extend into bits 32 through 63) as on Arm64 and x86/64.
+    Dext(dst, dst, 0, 32);
+  }
+  // Poison a heap reference contained in `reg`.
+  void PoisonHeapReference(GpuRegister reg) {
+    // reg = -reg.
+    PoisonHeapReference(reg, reg);
+  }
+  // Unpoison a heap reference contained in `reg`.
+  void UnpoisonHeapReference(GpuRegister reg) {
+    // reg = -reg.
+    // Negate the 32-bit ref.
+    Dsubu(reg, ZERO, reg);
+    // And constrain it to 32 bits (zero-extend into bits 32 through 63) as on Arm64 and x86/64.
+    Dext(reg, reg, 0, 32);
+  }
+  // Poison a heap reference contained in `reg` if heap poisoning is enabled.
+  void MaybePoisonHeapReference(GpuRegister reg) {
+    if (kPoisonHeapReferences) {
+      PoisonHeapReference(reg);
+    }
+  }
+  // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
+  void MaybeUnpoisonHeapReference(GpuRegister reg) {
+    if (kPoisonHeapReferences) {
+      UnpoisonHeapReference(reg);
+    }
+  }
+
   void Bind(Label* label) OVERRIDE {
     Bind(down_cast<Mips64Label*>(label));
   }
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index 74b8f06..96a02c4 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -1269,6 +1269,24 @@
   DriverStr(RepeatRIb(&mips64::Mips64Assembler::Lui, 16, "lui ${reg}, {imm}"), "lui");
 }
 
+TEST_F(AssemblerMIPS64Test, Daui) {
+  std::vector<mips64::GpuRegister*> reg1_registers = GetRegisters();
+  std::vector<mips64::GpuRegister*> reg2_registers = GetRegisters();
+  reg2_registers.erase(reg2_registers.begin());  // reg2 can't be ZERO, remove it.
+  std::vector<int64_t> imms = CreateImmediateValuesBits(/* imm_bits */ 16, /* as_uint */ true);
+  WarnOnCombinations(reg1_registers.size() * reg2_registers.size() * imms.size());
+  std::ostringstream expected;
+  for (mips64::GpuRegister* reg1 : reg1_registers) {
+    for (mips64::GpuRegister* reg2 : reg2_registers) {
+      for (int64_t imm : imms) {
+        __ Daui(*reg1, *reg2, imm);
+        expected << "daui $" << *reg1 << ", $" << *reg2 << ", " << imm << "\n";
+      }
+    }
+  }
+  DriverStr(expected.str(), "daui");
+}
+
 TEST_F(AssemblerMIPS64Test, Dahi) {
   DriverStr(RepeatRIb(&mips64::Mips64Assembler::Dahi, 16, "dahi ${reg}, ${reg}, {imm}"), "dahi");
 }
diff --git a/runtime/arch/mips64/asm_support_mips64.S b/runtime/arch/mips64/asm_support_mips64.S
index 35f20fb..ef82bd2 100644
--- a/runtime/arch/mips64/asm_support_mips64.S
+++ b/runtime/arch/mips64/asm_support_mips64.S
@@ -70,14 +70,16 @@
 // Macros to poison (negate) the reference for heap poisoning.
 .macro POISON_HEAP_REF rRef
 #ifdef USE_HEAP_POISONING
-    subu \rRef, $zero, \rRef
+    dsubu \rRef, $zero, \rRef
+    dext  \rRef, \rRef, 0, 32
 #endif  // USE_HEAP_POISONING
 .endm
 
 // Macros to unpoison (negate) the reference for heap poisoning.
 .macro UNPOISON_HEAP_REF rRef
 #ifdef USE_HEAP_POISONING
-    subu \rRef, $zero, \rRef
+    dsubu \rRef, $zero, \rRef
+    dext  \rRef, \rRef, 0, 32
 #endif  // USE_HEAP_POISONING
 .endm