diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index bf3ed14..5b395c8 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -636,6 +636,8 @@
         } else if (current->IsIntConstant()) {
           int32_t value = current->AsIntConstant()->GetValue();
           stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, value);
+        } else if (current->IsNullConstant()) {
+          stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, 0);
         } else {
           DCHECK(current->IsFloatConstant());
           int32_t value = bit_cast<float, int32_t>(current->AsFloatConstant()->GetValue());
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 0310877..f46a36d 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -245,6 +245,32 @@
     return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0);
   }
 
+  static int32_t GetInt32ValueOf(HConstant* constant) {
+    if (constant->IsIntConstant()) {
+      return constant->AsIntConstant()->GetValue();
+    } else if (constant->IsNullConstant()) {
+      return 0;
+    } else {
+      DCHECK(constant->IsFloatConstant());
+      return bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue());
+    }
+  }
+
+  static int64_t GetInt64ValueOf(HConstant* constant) {
+    if (constant->IsIntConstant()) {
+      return constant->AsIntConstant()->GetValue();
+    } else if (constant->IsNullConstant()) {
+      return 0;
+    } else if (constant->IsFloatConstant()) {
+      return bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue());
+    } else if (constant->IsLongConstant()) {
+      return constant->AsLongConstant()->GetValue();
+    } else {
+      DCHECK(constant->IsDoubleConstant());
+      return bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue());
+    }
+  }
+
  protected:
   CodeGenerator(HGraph* graph,
                 size_t number_of_core_registers,
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 2a79f82..7b0231b 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -779,8 +779,8 @@
 
   if (locations != nullptr && locations->Out().IsConstant()) {
     HConstant* const_to_move = locations->Out().GetConstant();
-    if (const_to_move->IsIntConstant()) {
-      int32_t value = const_to_move->AsIntConstant()->GetValue();
+    if (const_to_move->IsIntConstant() || const_to_move->IsNullConstant()) {
+      int32_t value = GetInt32ValueOf(const_to_move);
       if (location.IsRegister()) {
         __ LoadImmediate(location.AsRegister<Register>(), value);
       } else {
@@ -947,8 +947,8 @@
         __ cmp(left, ShifterOperand(locations->InAt(1).AsRegister<Register>()));
       } else {
         DCHECK(locations->InAt(1).IsConstant());
-        int32_t value =
-            locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+        HConstant* constant = locations->InAt(1).GetConstant();
+        int32_t value = CodeGenerator::GetInt32ValueOf(constant);
         ShifterOperand operand;
         if (GetAssembler()->ShifterOperandCanHold(R0, left, CMP, value, &operand)) {
           __ cmp(left, operand);
@@ -1109,6 +1109,17 @@
   UNUSED(constant);
 }
 
+void LocationsBuilderARM::VisitNullConstant(HNullConstant* constant) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+  locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorARM::VisitNullConstant(HNullConstant* constant) {
+  // Will be generated at use site.
+  UNUSED(constant);
+}
+
 void LocationsBuilderARM::VisitLongConstant(HLongConstant* constant) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
@@ -3399,9 +3410,9 @@
     }
   } else {
     DCHECK(source.IsConstant()) << source;
-    HInstruction* constant = source.GetConstant();
-    if (constant->IsIntConstant()) {
-      int32_t value = constant->AsIntConstant()->GetValue();
+    HConstant* constant = source.GetConstant();
+    if (constant->IsIntConstant() || constant->IsNullConstant()) {
+      int32_t value = CodeGenerator::GetInt32ValueOf(constant);
       if (destination.IsRegister()) {
         __ LoadImmediate(destination.AsRegister<Register>(), value);
       } else {
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index ec716a4..8220207 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -491,18 +491,21 @@
   Primitive::Type type = instruction->GetType();
   DCHECK_NE(type, Primitive::kPrimVoid);
 
-  if (instruction->IsIntConstant() || instruction->IsLongConstant()) {
-    int64_t value = instruction->IsIntConstant() ? instruction->AsIntConstant()->GetValue()
-                                                 : instruction->AsLongConstant()->GetValue();
+  if (instruction->IsIntConstant()
+      || instruction->IsLongConstant()
+      || instruction->IsNullConstant()) {
+    int64_t value = GetInt64ValueOf(instruction->AsConstant());
     if (location.IsRegister()) {
       Register dst = RegisterFrom(location, type);
-      DCHECK((instruction->IsIntConstant() && dst.Is32Bits()) ||
+      DCHECK(((instruction->IsIntConstant() || instruction->IsNullConstant()) && dst.Is32Bits()) ||
              (instruction->IsLongConstant() && dst.Is64Bits()));
       __ Mov(dst, value);
     } else {
       DCHECK(location.IsStackSlot() || location.IsDoubleStackSlot());
       UseScratchRegisterScope temps(GetVIXLAssembler());
-      Register temp = instruction->IsIntConstant() ? temps.AcquireW() : temps.AcquireX();
+      Register temp = (instruction->IsIntConstant() || instruction->IsNullConstant())
+          ? temps.AcquireW()
+          : temps.AcquireX();
       __ Mov(temp, value);
       __ Str(temp, StackOperandFrom(location));
     }
@@ -643,10 +646,12 @@
 }
 
 void CodeGeneratorARM64::MoveConstant(CPURegister destination, HConstant* constant) {
-  if (constant->IsIntConstant() || constant->IsLongConstant()) {
-    __ Mov(Register(destination),
-           constant->IsIntConstant() ? constant->AsIntConstant()->GetValue()
-                                     : constant->AsLongConstant()->GetValue());
+  if (constant->IsIntConstant()) {
+    __ Mov(Register(destination), constant->AsIntConstant()->GetValue());
+  } else if (constant->IsLongConstant()) {
+    __ Mov(Register(destination), constant->AsLongConstant()->GetValue());
+  } else if (constant->IsNullConstant()) {
+    __ Mov(Register(destination), 0);
   } else if (constant->IsFloatConstant()) {
     __ Fmov(FPRegister(destination), constant->AsFloatConstant()->GetValue());
   } else {
@@ -660,6 +665,8 @@
   DCHECK(constant.IsConstant());
   HConstant* cst = constant.GetConstant();
   return (cst->IsIntConstant() && type == Primitive::kPrimInt) ||
+         // Null is mapped to a core W register, which we associate with kPrimInt.
+         (cst->IsNullConstant() && type == Primitive::kPrimInt) ||
          (cst->IsLongConstant() && type == Primitive::kPrimLong) ||
          (cst->IsFloatConstant() && type == Primitive::kPrimFloat) ||
          (cst->IsDoubleConstant() && type == Primitive::kPrimDouble);
@@ -680,7 +687,9 @@
     if (unspecified_type) {
       HConstant* src_cst = source.IsConstant() ? source.GetConstant() : nullptr;
       if (source.IsStackSlot() ||
-          (src_cst != nullptr && (src_cst->IsIntConstant() || src_cst->IsFloatConstant()))) {
+          (src_cst != nullptr && (src_cst->IsIntConstant()
+                                  || src_cst->IsFloatConstant()
+                                  || src_cst->IsNullConstant()))) {
         // For stack slots and 32bit constants, a 64bit type is appropriate.
         type = destination.IsRegister() ? Primitive::kPrimInt : Primitive::kPrimFloat;
       } else {
@@ -726,7 +735,7 @@
       UseScratchRegisterScope temps(GetVIXLAssembler());
       HConstant* src_cst = source.GetConstant();
       CPURegister temp;
-      if (src_cst->IsIntConstant()) {
+      if (src_cst->IsIntConstant() || src_cst->IsNullConstant()) {
         temp = temps.AcquireW();
       } else if (src_cst->IsLongConstant()) {
         temp = temps.AcquireX();
@@ -1770,6 +1779,16 @@
   UNUSED(constant);
 }
 
+void LocationsBuilderARM64::VisitNullConstant(HNullConstant* constant) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
+  locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorARM64::VisitNullConstant(HNullConstant* constant) {
+  // Will be generated at use site.
+  UNUSED(constant);
+}
+
 void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall);
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 1a95f41..8a73eb4 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -602,13 +602,7 @@
       __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
     } else if (source.IsConstant()) {
       HConstant* constant = source.GetConstant();
-      int32_t value;
-      if (constant->IsIntConstant()) {
-        value = constant->AsIntConstant()->GetValue();
-      } else {
-        DCHECK(constant->IsFloatConstant());
-        value = bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue());
-      }
+      int32_t value = GetInt32ValueOf(constant);
       __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
     } else {
       DCHECK(source.IsStackSlot());
@@ -674,8 +668,8 @@
 
   if (locations != nullptr && locations->Out().IsConstant()) {
     HConstant* const_to_move = locations->Out().GetConstant();
-    if (const_to_move->IsIntConstant()) {
-      Immediate imm(const_to_move->AsIntConstant()->GetValue());
+    if (const_to_move->IsIntConstant() || const_to_move->IsNullConstant()) {
+      Immediate imm(GetInt32ValueOf(const_to_move));
       if (location.IsRegister()) {
         __ movl(location.AsRegister<Register>(), imm);
       } else if (location.IsStackSlot()) {
@@ -925,7 +919,7 @@
               locations->InAt(1).AsRegister<Register>());
     } else if (locations->InAt(1).IsConstant()) {
       HConstant* instruction = locations->InAt(1).GetConstant();
-      Immediate imm(instruction->AsIntConstant()->GetValue());
+      Immediate imm(CodeGenerator::GetInt32ValueOf(instruction));
       __ cmpl(locations->InAt(0).AsRegister<Register>(), imm);
     } else {
       __ cmpl(locations->InAt(0).AsRegister<Register>(),
@@ -994,6 +988,17 @@
   UNUSED(constant);
 }
 
+void LocationsBuilderX86::VisitNullConstant(HNullConstant* constant) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+  locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorX86::VisitNullConstant(HNullConstant* constant) {
+  // Will be generated at use site.
+  UNUSED(constant);
+}
+
 void LocationsBuilderX86::VisitLongConstant(HLongConstant* constant) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
@@ -3500,8 +3505,8 @@
     }
   } else if (source.IsConstant()) {
     HConstant* constant = source.GetConstant();
-    if (constant->IsIntConstant()) {
-      Immediate imm(constant->AsIntConstant()->GetValue());
+    if (constant->IsIntConstant() || constant->IsNullConstant()) {
+      Immediate imm(CodeGenerator::GetInt32ValueOf(constant));
       if (destination.IsRegister()) {
         __ movl(destination.AsRegister<Register>(), imm);
       } else {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 88f1753..f7ec67f 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -607,13 +607,7 @@
                source.AsFpuRegister<XmmRegister>());
     } else if (source.IsConstant()) {
       HConstant* constant = source.GetConstant();
-      int32_t value;
-      if (constant->IsFloatConstant()) {
-        value = bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue());
-      } else {
-        DCHECK(constant->IsIntConstant());
-        value = constant->AsIntConstant()->GetValue();
-      }
+      int32_t value = GetInt32ValueOf(constant);
       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
     } else {
       DCHECK(source.IsStackSlot()) << source;
@@ -657,8 +651,8 @@
 
   if (locations != nullptr && locations->Out().IsConstant()) {
     HConstant* const_to_move = locations->Out().GetConstant();
-    if (const_to_move->IsIntConstant()) {
-      Immediate imm(const_to_move->AsIntConstant()->GetValue());
+    if (const_to_move->IsIntConstant() || const_to_move->IsNullConstant()) {
+      Immediate imm(GetInt32ValueOf(const_to_move));
       if (location.IsRegister()) {
         __ movl(location.AsRegister<CpuRegister>(), imm);
       } else if (location.IsStackSlot()) {
@@ -814,7 +808,7 @@
       if (rhs.IsRegister()) {
         __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
       } else if (rhs.IsConstant()) {
-        int32_t constant = rhs.GetConstant()->AsIntConstant()->GetValue();
+        int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
         if (constant == 0) {
           __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
         } else {
@@ -1034,6 +1028,17 @@
   UNUSED(constant);
 }
 
+void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+  locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant) {
+  // Will be generated at use site.
+  UNUSED(constant);
+}
+
 void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
@@ -3263,8 +3268,8 @@
     }
   } else if (source.IsConstant()) {
     HConstant* constant = source.GetConstant();
-    if (constant->IsIntConstant()) {
-      int32_t value = constant->AsIntConstant()->GetValue();
+    if (constant->IsIntConstant() || constant->IsNullConstant()) {
+      int32_t value = CodeGenerator::GetInt32ValueOf(constant);
       if (destination.IsRegister()) {
         if (value == 0) {
           __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 4ebb136..ef10428 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -285,6 +285,19 @@
   }
 }
 
+static Primitive::Type PrimitiveKind(Primitive::Type type) {
+  switch (type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimInt:
+      return Primitive::kPrimInt;
+    default:
+      return type;
+  }
+}
+
 void SSAChecker::VisitPhi(HPhi* phi) {
   VisitInstruction(phi);
 
@@ -321,18 +334,17 @@
       }
     }
   }
-}
-
-static Primitive::Type PrimitiveKind(Primitive::Type type) {
-  switch (type) {
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimInt:
-      return Primitive::kPrimInt;
-    default:
-      return type;
+  // Ensure that the inputs have the same primitive kind as the phi.
+  for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
+    HInstruction* input = phi->InputAt(i);
+    if (PrimitiveKind(input->GetType()) != PrimitiveKind(phi->GetType())) {
+        AddError(StringPrintf(
+            "Input %d at index %zu of phi %d from block %d does not have the "
+            "same type as the phi: %s versus %s",
+            input->GetId(), i, phi->GetId(), phi->GetBlock()->GetBlockId(),
+            Primitive::PrettyDescriptor(input->GetType()),
+            Primitive::PrettyDescriptor(phi->GetType())));
+    }
   }
 }
 
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 835bca6..c592737 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -184,6 +184,10 @@
     output_ << " " << instruction->GetValue();
   }
 
+  void VisitPhi(HPhi* phi) OVERRIDE {
+    output_ << " " << phi->GetRegNumber();
+  }
+
   void PrintInstruction(HInstruction* instruction) {
     output_ << instruction->DebugName();
     instruction->Accept(this);
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index cd36598..4a574b0 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -292,6 +292,15 @@
   return true;
 }
 
+HNullConstant* HGraph::GetNullConstant() {
+  if (cached_null_constant_ == nullptr) {
+    cached_null_constant_ = new (arena_) HNullConstant();
+    entry_block_->InsertInstructionBefore(cached_null_constant_,
+                                          entry_block_->GetLastInstruction());
+  }
+  return cached_null_constant_;
+}
+
 void HLoopInformation::Add(HBasicBlock* block) {
   blocks_.SetBit(block->GetBlockId());
 }
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index fd88e42..cebde3b 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -34,6 +34,7 @@
 class HIntConstant;
 class HInvoke;
 class HGraphVisitor;
+class HNullConstant;
 class HPhi;
 class HSuspendCheck;
 class LiveInterval;
@@ -194,6 +195,8 @@
     return reverse_post_order_;
   }
 
+  HNullConstant* GetNullConstant();
+
  private:
   HBasicBlock* FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const;
   void VisitBlockForDominatorTree(HBasicBlock* block,
@@ -233,6 +236,9 @@
   // The current id to assign to a newly added instruction. See HInstruction.id_.
   int32_t current_instruction_id_;
 
+  // Cached null constant that might be created when building SSA form.
+  HNullConstant* cached_null_constant_;
+
   ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1);
   DISALLOW_COPY_AND_ASSIGN(HGraph);
 };
@@ -610,6 +616,7 @@
   M(NewInstance, Instruction)                                           \
   M(Not, UnaryOperation)                                                \
   M(NotEqual, Condition)                                                \
+  M(NullConstant, Instruction)                                          \
   M(NullCheck, Instruction)                                             \
   M(Or, BinaryOperation)                                                \
   M(ParallelMove, Instruction)                                          \
@@ -914,7 +921,10 @@
 
   // Does not apply for all instructions, but having this at top level greatly
   // simplifies the null check elimination.
-  virtual bool CanBeNull() const { return true; }
+  virtual bool CanBeNull() const {
+    DCHECK_EQ(GetType(), Primitive::kPrimNot) << "CanBeNull only applies to reference types";
+    return true;
+  }
 
   virtual bool CanDoImplicitNullCheck() const { return false; }
 
@@ -1675,6 +1685,22 @@
   DISALLOW_COPY_AND_ASSIGN(HDoubleConstant);
 };
 
+class HNullConstant : public HConstant {
+ public:
+  HNullConstant() : HConstant(Primitive::kPrimNot) {}
+
+  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+    return true;
+  }
+
+  size_t ComputeHashCode() const OVERRIDE { return 0; }
+
+  DECLARE_INSTRUCTION(NullConstant);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HNullConstant);
+};
+
 // Constants of the type int. Those can be from Dex instructions, or
 // synthesized (for example with the if-eqz instruction).
 class HIntConstant : public HConstant {
diff --git a/compiler/optimizing/primitive_type_propagation.cc b/compiler/optimizing/primitive_type_propagation.cc
index 7e274f6..fe23fcf 100644
--- a/compiler/optimizing/primitive_type_propagation.cc
+++ b/compiler/optimizing/primitive_type_propagation.cc
@@ -40,6 +40,7 @@
 // Re-compute and update the type of the instruction. Returns
 // whether or not the type was changed.
 bool PrimitiveTypePropagation::UpdateType(HPhi* phi) {
+  DCHECK(phi->IsLive());
   Primitive::Type existing = phi->GetType();
 
   Primitive::Type new_type = existing;
@@ -49,15 +50,20 @@
   }
   phi->SetType(new_type);
 
-  if (new_type == Primitive::kPrimDouble || new_type == Primitive::kPrimFloat) {
+  if (new_type == Primitive::kPrimDouble
+      || new_type == Primitive::kPrimFloat
+      || new_type == Primitive::kPrimNot) {
     // If the phi is of floating point type, we need to update its inputs to that
     // type. For inputs that are phis, we need to recompute their types.
     for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
       HInstruction* input = phi->InputAt(i);
       if (input->GetType() != new_type) {
-        HInstruction* equivalent = SsaBuilder::GetFloatOrDoubleEquivalent(phi, input, new_type);
+        HInstruction* equivalent = (new_type == Primitive::kPrimNot)
+            ? SsaBuilder::GetReferenceTypeEquivalent(input)
+            : SsaBuilder::GetFloatOrDoubleEquivalent(phi, input, new_type);
         phi->ReplaceInput(equivalent, i);
         if (equivalent->IsPhi()) {
+          equivalent->AsPhi()->SetLive();
           AddToWorklist(equivalent->AsPhi());
         }
       }
@@ -78,15 +84,9 @@
   if (block->IsLoopHeader()) {
     for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
       HPhi* phi = it.Current()->AsPhi();
-      // Set the initial type for the phi. Use the non back edge input for reaching
-      // a fixed point faster.
-      Primitive::Type phi_type = phi->GetType();
-      // We merge with the existing type, that has been set by the SSA builder.
-      DCHECK(phi_type == Primitive::kPrimVoid
-          || phi_type == Primitive::kPrimFloat
-          || phi_type == Primitive::kPrimDouble);
-      phi->SetType(MergeTypes(phi->InputAt(0)->GetType(), phi->GetType()));
-      AddToWorklist(phi);
+      if (phi->IsLive()) {
+        AddToWorklist(phi);
+      }
     }
   } else {
     for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
@@ -95,7 +95,10 @@
       // doing a reverse post-order visit, therefore either the phi users are
       // non-loop phi and will be visited later in the visit, or are loop-phis,
       // and they are already in the work list.
-      UpdateType(it.Current()->AsPhi());
+      HPhi* phi = it.Current()->AsPhi();
+      if (phi->IsLive()) {
+        UpdateType(phi);
+      }
     }
   }
 }
@@ -110,13 +113,14 @@
 }
 
 void PrimitiveTypePropagation::AddToWorklist(HPhi* instruction) {
+  DCHECK(instruction->IsLive());
   worklist_.Add(instruction);
 }
 
 void PrimitiveTypePropagation::AddDependentInstructionsToWorklist(HPhi* instruction) {
   for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) {
     HPhi* phi = it.Current()->GetUser()->AsPhi();
-    if (phi != nullptr) {
+    if (phi != nullptr && phi->IsLive()) {
       AddToWorklist(phi);
     }
   }
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 24e6837..4f17b6f 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -18,10 +18,6 @@
 
 namespace art {
 
-// TODO: Only do the analysis on reference types. We currently have to handle
-// the `null` constant, that is represented as a `HIntConstant` and therefore
-// has the Primitive::kPrimInt type.
-
 void ReferenceTypePropagation::Run() {
   // Compute null status for instructions.
 
@@ -54,8 +50,10 @@
       // Set the initial type for the phi. Use the non back edge input for reaching
       // a fixed point faster.
       HPhi* phi = it.Current()->AsPhi();
-      AddToWorklist(phi);
-      phi->SetCanBeNull(phi->InputAt(0)->CanBeNull());
+      if (phi->GetType() == Primitive::kPrimNot) {
+        AddToWorklist(phi);
+        phi->SetCanBeNull(phi->InputAt(0)->CanBeNull());
+      }
     }
   } else {
     for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
@@ -64,7 +62,10 @@
       // doing a reverse post-order visit, therefore either the phi users are
       // non-loop phi and will be visited later in the visit, or are loop-phis,
       // and they are already in the work list.
-      UpdateNullability(it.Current()->AsPhi());
+      HPhi* phi = it.Current()->AsPhi();
+      if (phi->GetType() == Primitive::kPrimNot) {
+        UpdateNullability(phi);
+      }
     }
   }
 }
@@ -79,6 +80,7 @@
 }
 
 void ReferenceTypePropagation::AddToWorklist(HPhi* instruction) {
+  DCHECK_EQ(instruction->GetType(), Primitive::kPrimNot);
   worklist_.Add(instruction);
 }
 
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index c9a21aa..3dc7505 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -42,20 +42,33 @@
     }
   }
 
-  // 3) Remove dead phis. This will remove phis that are only used by environments:
+  // 3) Mark dead phis. This will mark phis that are only used by environments:
   // at the DEX level, the type of these phis does not need to be consistent, but
   // our code generator will complain if the inputs of a phi do not have the same
-  // type (modulo the special case of `null`).
-  SsaDeadPhiElimination dead_phis(GetGraph());
-  dead_phis.Run();
+  // type. The marking allows the type propagation to know which phis it needs
+  // to handle. We mark but do not eliminate: the elimination will be done in
+  // step 5).
+  {
+    SsaDeadPhiElimination dead_phis(GetGraph());
+    dead_phis.MarkDeadPhis();
+  }
 
   // 4) Propagate types of phis. At this point, phis are typed void in the general
-  // case, or float or double when we created a floating-point equivalent. So we
+  // case, or float/double/reference when we created an equivalent phi. So we
   // need to propagate the types across phis to give them a correct type.
   PrimitiveTypePropagation type_propagation(GetGraph());
   type_propagation.Run();
 
-  // 5) Clear locals.
+  // 5) Step 4) changes inputs of phis which may lead to dead phis again. We re-run
+  // the algorithm and this time elimimates them.
+  // TODO: Make this work with debug info and reference liveness. We currently
+  // eagerly remove phis used in environments.
+  {
+    SsaDeadPhiElimination dead_phis(GetGraph());
+    dead_phis.Run();
+  }
+
+  // 6) Clear locals.
   // TODO: Move this to a dead code eliminator phase.
   for (HInstructionIterator it(GetGraph()->GetEntryBlock()->GetInstructions());
        !it.Done();
@@ -185,15 +198,24 @@
 
 /**
  * Because of Dex format, we might end up having the same phi being
- * used for non floating point operations and floating point operations. Because
- * we want the graph to be correctly typed (and thereafter avoid moves between
+ * used for non floating point operations and floating point / reference operations.
+ * Because we want the graph to be correctly typed (and thereafter avoid moves between
  * floating point registers and core registers), we need to create a copy of the
- * phi with a floating point type.
+ * phi with a floating point / reference type.
  */
-static HPhi* GetFloatOrDoubleEquivalentOfPhi(HPhi* phi, Primitive::Type type) {
-  // We place the floating point phi next to this phi.
+static HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type) {
+  // We place the floating point /reference phi next to this phi.
   HInstruction* next = phi->GetNext();
-  if (next == nullptr || (next->AsPhi()->GetRegNumber() != phi->GetRegNumber())) {
+  if (next != nullptr
+      && next->AsPhi()->GetRegNumber() == phi->GetRegNumber()
+      && next->GetType() != type) {
+    // Move to the next phi to see if it is the one we are looking for.
+    next = next->GetNext();
+  }
+
+  if (next == nullptr
+      || (next->AsPhi()->GetRegNumber() != phi->GetRegNumber())
+      || (next->GetType() != type)) {
     ArenaAllocator* allocator = phi->GetBlock()->GetGraph()->GetArena();
     HPhi* new_phi = new (allocator) HPhi(allocator, phi->GetRegNumber(), phi->InputCount(), type);
     for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
@@ -223,7 +245,7 @@
   } else if (value->IsIntConstant()) {
     return GetFloatEquivalent(value->AsIntConstant());
   } else if (value->IsPhi()) {
-    return GetFloatOrDoubleEquivalentOfPhi(value->AsPhi(), type);
+    return GetFloatDoubleOrReferenceEquivalentOfPhi(value->AsPhi(), type);
   } else {
     // For other instructions, we assume the verifier has checked that the dex format is correctly
     // typed and the value in a dex register will not be used for both floating point and
@@ -234,12 +256,25 @@
   }
 }
 
+HInstruction* SsaBuilder::GetReferenceTypeEquivalent(HInstruction* value) {
+  if (value->IsIntConstant()) {
+    DCHECK_EQ(value->AsIntConstant()->GetValue(), 0);
+    return value->GetBlock()->GetGraph()->GetNullConstant();
+  } else {
+    DCHECK(value->IsPhi());
+    return GetFloatDoubleOrReferenceEquivalentOfPhi(value->AsPhi(), Primitive::kPrimNot);
+  }
+}
+
 void SsaBuilder::VisitLoadLocal(HLoadLocal* load) {
   HInstruction* value = current_locals_->GetInstructionAt(load->GetLocal()->GetRegNumber());
-  if (load->GetType() != value->GetType()
-      && (load->GetType() == Primitive::kPrimFloat || load->GetType() == Primitive::kPrimDouble)) {
-    // If the operation requests a specific type, we make sure its input is of that type.
-    value = GetFloatOrDoubleEquivalent(load, value, load->GetType());
+  // If the operation requests a specific type, we make sure its input is of that type.
+  if (load->GetType() != value->GetType()) {
+    if (load->GetType() == Primitive::kPrimFloat || load->GetType() == Primitive::kPrimDouble) {
+      value = GetFloatOrDoubleEquivalent(load, value, load->GetType());
+    } else if (load->GetType() == Primitive::kPrimNot) {
+      value = GetReferenceTypeEquivalent(value);
+    }
   }
   load->ReplaceWith(value);
   load->GetBlock()->RemoveInstruction(load);
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index 2eec87b..148e959 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -58,6 +58,8 @@
                                                   HInstruction* instruction,
                                                   Primitive::Type type);
 
+  static HInstruction* GetReferenceTypeEquivalent(HInstruction* instruction);
+
  private:
   // Locals for the current block being visited.
   HEnvironment* current_locals_;
diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc
index fd30c1b..f66a1c8 100644
--- a/compiler/optimizing/ssa_phi_elimination.cc
+++ b/compiler/optimizing/ssa_phi_elimination.cc
@@ -19,6 +19,11 @@
 namespace art {
 
 void SsaDeadPhiElimination::Run() {
+  MarkDeadPhis();
+  EliminateDeadPhis();
+}
+
+void SsaDeadPhiElimination::MarkDeadPhis() {
   // Add to the worklist phis referenced by non-phi instructions.
   for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
@@ -49,7 +54,9 @@
       }
     }
   }
+}
 
+void SsaDeadPhiElimination::EliminateDeadPhis() {
   // Remove phis that are not live. Visit in post order so that phis
   // that are not inputs of loop phis can be removed when they have
   // no users left (dead phis might use dead phis).
diff --git a/compiler/optimizing/ssa_phi_elimination.h b/compiler/optimizing/ssa_phi_elimination.h
index b789971..88a5279 100644
--- a/compiler/optimizing/ssa_phi_elimination.h
+++ b/compiler/optimizing/ssa_phi_elimination.h
@@ -34,6 +34,9 @@
 
   void Run() OVERRIDE;
 
+  void MarkDeadPhis();
+  void EliminateDeadPhis();
+
  private:
   GrowableArray<HPhi*> worklist_;
 
