Implement checkcast for optimizing.

- Ended up not using HTypeCheck because of how
  instanceof and checkcast end up having different logic
  for code generation.

- Fix a x86_64 assembler bug triggered by now enabling
  more methods to be compiled. Difficult to test today
  without b/18117217.

Change-Id: I3022e7ae03befb1d10bea9637ad21fadc430abe0
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 8418ab0..16b491d 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -704,6 +704,38 @@
   }
 }
 
+bool HGraphBuilder::BuildTypeCheck(const Instruction& instruction,
+                                   uint8_t destination,
+                                   uint8_t reference,
+                                   uint16_t type_index,
+                                   uint32_t dex_offset) {
+  bool type_known_final;
+  bool type_known_abstract;
+  bool is_referrers_class;
+  bool can_access = compiler_driver_->CanAccessTypeWithoutChecks(
+      dex_compilation_unit_->GetDexMethodIndex(), *dex_file_, type_index,
+      &type_known_final, &type_known_abstract, &is_referrers_class);
+  if (!can_access) {
+    return false;
+  }
+  HInstruction* object = LoadLocal(reference, Primitive::kPrimNot);
+  HLoadClass* cls = new (arena_) HLoadClass(type_index, is_referrers_class, dex_offset);
+  current_block_->AddInstruction(cls);
+  // The class needs a temporary before being used by the type check.
+  Temporaries temps(graph_, 1);
+  temps.Add(cls);
+  if (instruction.Opcode() == Instruction::INSTANCE_OF) {
+    current_block_->AddInstruction(
+        new (arena_) HInstanceOf(object, cls, type_known_final, dex_offset));
+    UpdateLocal(destination, current_block_->GetLastInstruction());
+  } else {
+    DCHECK_EQ(instruction.Opcode(), Instruction::CHECK_CAST);
+    current_block_->AddInstruction(
+        new (arena_) HCheckCast(object, cls, type_known_final, dex_offset));
+  }
+  return true;
+}
+
 void HGraphBuilder::PotentiallyAddSuspendCheck(int32_t target_offset, uint32_t dex_offset) {
   if (target_offset <= 0) {
     // Unconditionnally add a suspend check to backward branches. We can remove
@@ -1292,25 +1324,21 @@
     }
 
     case Instruction::INSTANCE_OF: {
+      uint8_t destination = instruction.VRegA_22c();
+      uint8_t reference = instruction.VRegB_22c();
       uint16_t type_index = instruction.VRegC_22c();
-      bool type_known_final;
-      bool type_known_abstract;
-      bool is_referrers_class;
-      bool can_access = compiler_driver_->CanAccessTypeWithoutChecks(
-          dex_compilation_unit_->GetDexMethodIndex(), *dex_file_, type_index,
-          &type_known_final, &type_known_abstract, &is_referrers_class);
-      if (!can_access) {
+      if (!BuildTypeCheck(instruction, destination, reference, type_index, dex_offset)) {
         return false;
       }
-      HInstruction* object = LoadLocal(instruction.VRegB_22c(), Primitive::kPrimNot);
-      HLoadClass* cls = new (arena_) HLoadClass(type_index, is_referrers_class, dex_offset);
-      current_block_->AddInstruction(cls);
-      // The class needs a temporary before being used by the type check.
-      Temporaries temps(graph_, 1);
-      temps.Add(cls);
-      current_block_->AddInstruction(
-          new (arena_) HTypeCheck(object, cls, type_known_final, dex_offset));
-      UpdateLocal(instruction.VRegA_22c(), current_block_->GetLastInstruction());
+      break;
+    }
+
+    case Instruction::CHECK_CAST: {
+      uint8_t reference = instruction.VRegA_21c();
+      uint16_t type_index = instruction.VRegB_21c();
+      if (!BuildTypeCheck(instruction, -1, reference, type_index, dex_offset)) {
+        return false;
+      }
       break;
     }
 
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 09c9a51..9cf8305 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -173,6 +173,14 @@
                               uint32_t element_count,
                               uint32_t dex_offset);
 
+  // Builds a `HInstanceOf`, or a `HCheckCast` instruction.
+  // Returns whether we succeeded in building the instruction.
+  bool BuildTypeCheck(const Instruction& instruction,
+                      uint8_t destination,
+                      uint8_t reference,
+                      uint16_t type_index,
+                      uint32_t dex_offset);
+
   ArenaAllocator* const arena_;
 
   // A list of the size of the dex code holding block information for
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 467c2a6..b0a56d5 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -269,13 +269,19 @@
 
 class TypeCheckSlowPathARM : public SlowPathCodeARM {
  public:
-  explicit TypeCheckSlowPathARM(HTypeCheck* instruction, Location object_class)
+  TypeCheckSlowPathARM(HInstruction* instruction,
+                       Location class_to_check,
+                       Location object_class,
+                       uint32_t dex_pc)
       : instruction_(instruction),
-        object_class_(object_class) {}
+        class_to_check_(class_to_check),
+        object_class_(object_class),
+        dex_pc_(dex_pc) {}
 
   virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
-    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
+    DCHECK(instruction_->IsCheckCast()
+           || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
 
     CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
     __ Bind(GetEntryLabel());
@@ -284,7 +290,7 @@
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
-    MoveOperands move1(locations->InAt(1),
+    MoveOperands move1(class_to_check_,
                        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
                        nullptr);
     MoveOperands move2(object_class_,
@@ -295,17 +301,23 @@
     parallel_move.AddMove(&move2);
     arm_codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
 
-    arm_codegen->InvokeRuntime(
-        QUICK_ENTRY_POINT(pInstanceofNonTrivial), instruction_, instruction_->GetDexPc());
-    arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0));
+    if (instruction_->IsInstanceOf()) {
+      arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), instruction_, dex_pc_);
+      arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0));
+    } else {
+      DCHECK(instruction_->IsCheckCast());
+      arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc_);
+    }
 
     codegen->RestoreLiveRegisters(locations);
     __ b(GetExitLabel());
   }
 
  private:
-  HTypeCheck* const instruction_;
+  HInstruction* const instruction_;
+  const Location class_to_check_;
   const Location object_class_;
+  uint32_t dex_pc_;
 
   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM);
 };
@@ -2658,7 +2670,7 @@
       QUICK_ENTRY_POINT(pDeliverException), instruction, instruction->GetDexPc());
 }
 
-void LocationsBuilderARM::VisitTypeCheck(HTypeCheck* instruction) {
+void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary::CallKind call_kind = instruction->IsClassFinal()
       ? LocationSummary::kNoCall
       : LocationSummary::kCallOnSlowPath;
@@ -2668,7 +2680,7 @@
   locations->SetOut(Location::RequiresRegister());
 }
 
-void InstructionCodeGeneratorARM::VisitTypeCheck(HTypeCheck* instruction) {
+void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   Register obj = locations->InAt(0).As<Register>();
   Register cls = locations->InAt(1).As<Register>();
@@ -2693,7 +2705,7 @@
     // If the classes are not equal, we go into a slow path.
     DCHECK(locations->OnlyCallsOnSlowPath());
     slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(
-        instruction, Location::RegisterLocation(out));
+        instruction, locations->InAt(1), locations->Out(), instruction->GetDexPc());
     codegen_->AddSlowPath(slow_path);
     __ b(slow_path->GetEntryLabel(), NE);
     __ LoadImmediate(out, 1);
@@ -2707,5 +2719,34 @@
   __ Bind(&done);
 }
 
+void LocationsBuilderARM::VisitCheckCast(HCheckCast* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
+      instruction, LocationSummary::kCallOnSlowPath);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  Register obj = locations->InAt(0).As<Register>();
+  Register cls = locations->InAt(1).As<Register>();
+  Register temp = locations->GetTemp(0).As<Register>();
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+
+  SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(
+      instruction, locations->InAt(1), locations->GetTemp(0), instruction->GetDexPc());
+  codegen_->AddSlowPath(slow_path);
+
+  // TODO: avoid this check if we know obj is not null.
+  __ cmp(obj, ShifterOperand(0));
+  __ b(slow_path->GetExitLabel(), EQ);
+  // Compare the class of `obj` with `cls`.
+  __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
+  __ cmp(temp, ShifterOperand(cls));
+  __ b(slow_path->GetEntryLabel(), NE);
+  __ Bind(slow_path->GetExitLabel());
+}
+
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 4dc836f..ac65c1d 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -631,8 +631,10 @@
         codegen_(codegen) {}
 
 #define FOR_EACH_UNIMPLEMENTED_INSTRUCTION(M)              \
+  M(CheckCast)                                             \
   M(ClinitCheck)                                           \
   M(DivZeroCheck)                                          \
+  M(InstanceOf)                                            \
   M(InvokeInterface)                                       \
   M(LoadClass)                                             \
   M(LoadException)                                         \
@@ -641,7 +643,6 @@
   M(StaticFieldGet)                                        \
   M(StaticFieldSet)                                        \
   M(Throw)                                                 \
-  M(TypeCheck)                                             \
   M(TypeConversion)                                        \
 
 #define UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name) name##UnimplementedInstructionBreakCode
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index d66180b..aa609a6 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -270,13 +270,19 @@
 
 class TypeCheckSlowPathX86 : public SlowPathCodeX86 {
  public:
-  TypeCheckSlowPathX86(HTypeCheck* instruction, Location object_class)
+  TypeCheckSlowPathX86(HInstruction* instruction,
+                       Location class_to_check,
+                       Location object_class,
+                       uint32_t dex_pc)
       : instruction_(instruction),
-        object_class_(object_class) {}
+        class_to_check_(class_to_check),
+        object_class_(object_class),
+        dex_pc_(dex_pc) {}
 
   virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
-    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
+    DCHECK(instruction_->IsCheckCast()
+           || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
 
     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
     __ Bind(GetEntryLabel());
@@ -285,7 +291,7 @@
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
-    MoveOperands move1(locations->InAt(1),
+    MoveOperands move1(class_to_check_,
                        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
                        nullptr);
     MoveOperands move2(object_class_,
@@ -296,17 +302,27 @@
     parallel_move.AddMove(&move2);
     x86_codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
 
-    __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pInstanceofNonTrivial)));
-    codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
-    x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
+    if (instruction_->IsInstanceOf()) {
+      __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pInstanceofNonTrivial)));
+    } else {
+      DCHECK(instruction_->IsCheckCast());
+      __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pCheckCast)));
+    }
+
+    codegen->RecordPcInfo(instruction_, dex_pc_);
+    if (instruction_->IsInstanceOf()) {
+      x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
+    }
     codegen->RestoreLiveRegisters(locations);
 
     __ jmp(GetExitLabel());
   }
 
  private:
-  HTypeCheck* const instruction_;
+  HInstruction* const instruction_;
+  const Location class_to_check_;
   const Location object_class_;
+  const uint32_t dex_pc_;
 
   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86);
 };
@@ -2753,7 +2769,7 @@
   codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
 }
 
-void LocationsBuilderX86::VisitTypeCheck(HTypeCheck* instruction) {
+void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary::CallKind call_kind = instruction->IsClassFinal()
       ? LocationSummary::kNoCall
       : LocationSummary::kCallOnSlowPath;
@@ -2763,7 +2779,7 @@
   locations->SetOut(Location::RequiresRegister());
 }
 
-void InstructionCodeGeneratorX86::VisitTypeCheck(HTypeCheck* instruction) {
+void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   Register obj = locations->InAt(0).As<Register>();
   Location cls = locations->InAt(1);
@@ -2794,7 +2810,7 @@
     // If the classes are not equal, we go into a slow path.
     DCHECK(locations->OnlyCallsOnSlowPath());
     slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(
-        instruction, Location::RegisterLocation(out));
+        instruction, locations->InAt(1), locations->Out(), instruction->GetDexPc());
     codegen_->AddSlowPath(slow_path);
     __ j(kNotEqual, slow_path->GetEntryLabel());
     __ movl(out, Immediate(1));
@@ -2808,5 +2824,40 @@
   __ Bind(&done);
 }
 
+void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
+      instruction, LocationSummary::kCallOnSlowPath);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::Any());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  Register obj = locations->InAt(0).As<Register>();
+  Location cls = locations->InAt(1);
+  Register temp = locations->GetTemp(0).As<Register>();
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  SlowPathCodeX86* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(
+      instruction, locations->InAt(1), locations->GetTemp(0), instruction->GetDexPc());
+  codegen_->AddSlowPath(slow_path);
+
+  // TODO: avoid this check if we know obj is not null.
+  __ testl(obj, obj);
+  __ j(kEqual, slow_path->GetExitLabel());
+  __ movl(temp, Address(obj, class_offset));
+
+  // Compare the class of `obj` with `cls`.
+  if (cls.IsRegister()) {
+    __ cmpl(temp, cls.As<Register>());
+  } else {
+    DCHECK(cls.IsStackSlot()) << cls;
+    __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
+  }
+
+  __ j(kNotEqual, slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
 }  // namespace x86
 }  // namespace art
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index e09b6ca..bd1e4f4 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -284,13 +284,19 @@
 
 class TypeCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
  public:
-  TypeCheckSlowPathX86_64(HTypeCheck* instruction, Location object_class)
+  TypeCheckSlowPathX86_64(HInstruction* instruction,
+                          Location class_to_check,
+                          Location object_class,
+                          uint32_t dex_pc)
       : instruction_(instruction),
-        object_class_(object_class) {}
+        class_to_check_(class_to_check),
+        object_class_(object_class),
+        dex_pc_(dex_pc) {}
 
   virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
-    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
+    DCHECK(instruction_->IsCheckCast()
+           || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
 
     CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
     __ Bind(GetEntryLabel());
@@ -299,7 +305,7 @@
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
-    MoveOperands move1(locations->InAt(1),
+    MoveOperands move1(class_to_check_,
                        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
                        nullptr);
     MoveOperands move2(object_class_,
@@ -310,18 +316,29 @@
     parallel_move.AddMove(&move2);
     x64_codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
 
-    __ gs()->call(
-        Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pInstanceofNonTrivial), true));
-    codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
-    x64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
+    if (instruction_->IsInstanceOf()) {
+      __ gs()->call(
+          Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pInstanceofNonTrivial), true));
+    } else {
+      DCHECK(instruction_->IsCheckCast());
+      __ gs()->call(
+          Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pCheckCast), true));
+    }
+    codegen->RecordPcInfo(instruction_, dex_pc_);
+
+    if (instruction_->IsInstanceOf()) {
+      x64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
+    }
 
     codegen->RestoreLiveRegisters(locations);
     __ jmp(GetExitLabel());
   }
 
  private:
-  HTypeCheck* const instruction_;
+  HInstruction* const instruction_;
+  const Location class_to_check_;
   const Location object_class_;
+  const uint32_t dex_pc_;
 
   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64);
 };
@@ -2743,7 +2760,7 @@
   codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
 }
 
-void LocationsBuilderX86_64::VisitTypeCheck(HTypeCheck* instruction) {
+void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary::CallKind call_kind = instruction->IsClassFinal()
       ? LocationSummary::kNoCall
       : LocationSummary::kCallOnSlowPath;
@@ -2753,7 +2770,7 @@
   locations->SetOut(Location::RequiresRegister());
 }
 
-void InstructionCodeGeneratorX86_64::VisitTypeCheck(HTypeCheck* instruction) {
+void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   CpuRegister obj = locations->InAt(0).As<CpuRegister>();
   Location cls = locations->InAt(1);
@@ -2783,7 +2800,7 @@
     // If the classes are not equal, we go into a slow path.
     DCHECK(locations->OnlyCallsOnSlowPath());
     slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(
-        instruction, Location::RegisterLocation(out.AsRegister()));
+        instruction, locations->InAt(1), locations->Out(), instruction->GetDexPc());
     codegen_->AddSlowPath(slow_path);
     __ j(kNotEqual, slow_path->GetEntryLabel());
     __ movl(out, Immediate(1));
@@ -2797,5 +2814,39 @@
   __ Bind(&done);
 }
 
+void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
+      instruction, LocationSummary::kCallOnSlowPath);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::Any());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  CpuRegister obj = locations->InAt(0).As<CpuRegister>();
+  Location cls = locations->InAt(1);
+  CpuRegister temp = locations->GetTemp(0).As<CpuRegister>();
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  SlowPathCodeX86_64* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(
+      instruction, locations->InAt(1), locations->GetTemp(0), instruction->GetDexPc());
+  codegen_->AddSlowPath(slow_path);
+
+  // TODO: avoid this check if we know obj is not null.
+  __ testl(obj, obj);
+  __ j(kEqual, slow_path->GetExitLabel());
+  // Compare the class of `obj` with `cls`.
+  __ movl(temp, Address(obj, class_offset));
+  if (cls.IsRegister()) {
+    __ cmpl(temp, cls.As<CpuRegister>());
+  } else {
+    DCHECK(cls.IsStackSlot()) << cls;
+    __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
+  }
+  // Classes must be equal for the checkcast to succeed.
+  __ j(kNotEqual, slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
 }  // namespace x86_64
 }  // namespace art
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 2dab605..a3b9cb1 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -479,6 +479,7 @@
   M(ArrayLength, Instruction)                                           \
   M(ArraySet, Instruction)                                              \
   M(BoundsCheck, Instruction)                                           \
+  M(CheckCast, Instruction)                                             \
   M(ClinitCheck, Instruction)                                           \
   M(Compare, BinaryOperation)                                           \
   M(Condition, BinaryOperation)                                         \
@@ -494,6 +495,7 @@
   M(If, Instruction)                                                    \
   M(InstanceFieldGet, Instruction)                                      \
   M(InstanceFieldSet, Instruction)                                      \
+  M(InstanceOf, Instruction)                                            \
   M(IntConstant, Constant)                                              \
   M(InvokeInterface, Invoke)                                            \
   M(InvokeStatic, Invoke)                                               \
@@ -525,7 +527,6 @@
   M(SuspendCheck, Instruction)                                          \
   M(Temporary, Instruction)                                             \
   M(Throw, Instruction)                                                 \
-  M(TypeCheck, Instruction)                                             \
   M(TypeConversion, Instruction)                                        \
 
 #define FOR_EACH_INSTRUCTION(M)                                         \
@@ -2351,12 +2352,12 @@
   DISALLOW_COPY_AND_ASSIGN(HThrow);
 };
 
-class HTypeCheck : public HExpression<2> {
+class HInstanceOf : public HExpression<2> {
  public:
-  explicit HTypeCheck(HInstruction* object,
-                      HLoadClass* constant,
-                      bool class_is_final,
-                      uint32_t dex_pc)
+  HInstanceOf(HInstruction* object,
+              HLoadClass* constant,
+              bool class_is_final,
+              uint32_t dex_pc)
       : HExpression(Primitive::kPrimBoolean, SideEffects::None()),
         class_is_final_(class_is_final),
         dex_pc_(dex_pc) {
@@ -2366,13 +2367,11 @@
 
   bool CanBeMoved() const OVERRIDE { return true; }
 
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    UNUSED(other);
+  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
   bool NeedsEnvironment() const OVERRIDE {
-    // TODO: Can we debug when doing a runtime instanceof check?
     return false;
   }
 
@@ -2380,13 +2379,52 @@
 
   bool IsClassFinal() const { return class_is_final_; }
 
-  DECLARE_INSTRUCTION(TypeCheck);
+  DECLARE_INSTRUCTION(InstanceOf);
 
  private:
   const bool class_is_final_;
   const uint32_t dex_pc_;
 
-  DISALLOW_COPY_AND_ASSIGN(HTypeCheck);
+  DISALLOW_COPY_AND_ASSIGN(HInstanceOf);
+};
+
+class HCheckCast : public HTemplateInstruction<2> {
+ public:
+  HCheckCast(HInstruction* object,
+             HLoadClass* constant,
+             bool class_is_final,
+             uint32_t dex_pc)
+      : HTemplateInstruction(SideEffects::None()),
+        class_is_final_(class_is_final),
+        dex_pc_(dex_pc) {
+    SetRawInputAt(0, object);
+    SetRawInputAt(1, constant);
+  }
+
+  bool CanBeMoved() const OVERRIDE { return true; }
+
+  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+    return true;
+  }
+
+  bool NeedsEnvironment() const OVERRIDE {
+    // Instruction may throw a CheckCastError.
+    return true;
+  }
+
+  bool CanThrow() const OVERRIDE { return true; }
+
+  uint32_t GetDexPc() const { return dex_pc_; }
+
+  bool IsClassFinal() const { return class_is_final_; }
+
+  DECLARE_INSTRUCTION(CheckCast);
+
+ private:
+  const bool class_is_final_;
+  const uint32_t dex_pc_;
+
+  DISALLOW_COPY_AND_ASSIGN(HCheckCast);
 };
 
 
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 5b70658..bc54702 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -353,7 +353,7 @@
 
 void X86_64Assembler::movsxd(CpuRegister dst, CpuRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  EmitRex64(dst);
+  EmitRex64(dst, src);
   EmitUint8(0x63);
   EmitRegisterOperand(dst.LowBits(), src.LowBits());
 }
diff --git a/test/424-checkcast/expected.txt b/test/424-checkcast/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/424-checkcast/expected.txt
diff --git a/test/424-checkcast/info.txt b/test/424-checkcast/info.txt
new file mode 100644
index 0000000..b50b082
--- /dev/null
+++ b/test/424-checkcast/info.txt
@@ -0,0 +1 @@
+Simple tests for the checkcast opcode.
diff --git a/test/424-checkcast/src/Main.java b/test/424-checkcast/src/Main.java
new file mode 100644
index 0000000..791b166
--- /dev/null
+++ b/test/424-checkcast/src/Main.java
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static Object a;
+
+  public static Object $opt$CheckCastMain() {
+    return (Main)a;
+  }
+
+  public static Object $opt$CheckCastFinalClass() {
+    return (FinalClass)a;
+  }
+
+  public static void main(String[] args) {
+    $opt$TestMain();
+    $opt$TestFinalClass();
+  }
+
+  public static void $opt$TestMain() {
+    a = new Main();
+    $opt$CheckCastMain();
+
+    a = null;
+    $opt$CheckCastMain();
+
+    a = new MainChild();
+    $opt$CheckCastMain();
+
+    a = new Object();
+    try {
+      $opt$CheckCastMain();
+      throw new Error("Should have gotten a ClassCastException");
+    } catch (ClassCastException ex) {}
+  }
+
+  public static void $opt$TestFinalClass() {
+    a = new FinalClass();
+    $opt$CheckCastFinalClass();
+
+    a = null;
+    $opt$CheckCastFinalClass();
+
+    a = new Main();
+    try {
+      $opt$CheckCastFinalClass();
+      throw new Error("Should have gotten a ClassCastException");
+    } catch (ClassCastException ex) {}
+
+    a = new Object();
+    try {
+      $opt$CheckCastFinalClass();
+      throw new Error("Should have gotten a ClassCastException");
+    } catch (ClassCastException ex) {}
+  }
+
+  static class MainChild extends Main {}
+
+  static final class FinalClass {}
+}
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index e7a0439..562ba59 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -444,6 +444,7 @@
   422-instanceof \
   422-type-conversion \
   423-invoke-interface \
+  424-checkcast \
   700-LoadArgRegs \
   701-easy-div-rem \
   702-LargeBranchOffset \