Type MoveOperands.

The ParallelMoveResolver implementation needs to know if a move
is for 64bits or not, to handle swaps correctly.

Bug found, and test case courtesy of Serguei I. Katkov.

Change-Id: I9a0917a1cfed398c07e57ad6251aea8c9b0b8506
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 8736374..f7fa5db 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -802,10 +802,15 @@
   }
 }
 
-void CodeGenerator::EmitParallelMoves(Location from1, Location to1, Location from2, Location to2) {
+void CodeGenerator::EmitParallelMoves(Location from1,
+                                      Location to1,
+                                      Primitive::Type type1,
+                                      Location from2,
+                                      Location to2,
+                                      Primitive::Type type2) {
   HParallelMove parallel_move(GetGraph()->GetArena());
-  parallel_move.AddMove(from1, to1, nullptr);
-  parallel_move.AddMove(from2, to2, nullptr);
+  parallel_move.AddMove(from1, to1, type1, nullptr);
+  parallel_move.AddMove(from2, to2, type2, nullptr);
   GetMoveResolver()->EmitNativeCode(&parallel_move);
 }
 
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index b888aca..e536b2d 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -244,7 +244,12 @@
   // of the architecture.
   static size_t GetCacheOffset(uint32_t index);
 
-  void EmitParallelMoves(Location from1, Location to1, Location from2, Location to2);
+  void EmitParallelMoves(Location from1,
+                         Location to1,
+                         Primitive::Type type1,
+                         Location from2,
+                         Location to2,
+                         Primitive::Type type2);
 
   static bool StoreNeedsWriteBarrier(Primitive::Type type, HInstruction* value) {
     // Check that null value is not represented as an integer constant.
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 332c99a..ab025a8 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -141,8 +141,10 @@
     codegen->EmitParallelMoves(
         index_location_,
         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+        Primitive::kPrimInt,
         length_location_,
-        Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+        Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+        Primitive::kPrimInt);
     arm_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc(), this);
   }
@@ -262,8 +264,10 @@
     codegen->EmitParallelMoves(
         class_to_check_,
         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+        Primitive::kPrimNot,
         object_class_,
-        Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+        Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+        Primitive::kPrimNot);
 
     if (instruction_->IsInstanceOf()) {
       arm_codegen->InvokeRuntime(
@@ -750,8 +754,10 @@
       EmitParallelMoves(
           Location::RegisterLocation(source.AsRegisterPairHigh<Register>()),
           Location::RegisterLocation(destination.AsRegisterPairHigh<Register>()),
+          Primitive::kPrimInt,
           Location::RegisterLocation(source.AsRegisterPairLow<Register>()),
-          Location::RegisterLocation(destination.AsRegisterPairLow<Register>()));
+          Location::RegisterLocation(destination.AsRegisterPairLow<Register>()),
+          Primitive::kPrimInt);
     } else if (source.IsFpuRegister()) {
       UNIMPLEMENTED(FATAL);
     } else {
@@ -789,8 +795,10 @@
       EmitParallelMoves(
           Location::StackSlot(source.GetStackIndex()),
           Location::StackSlot(destination.GetStackIndex()),
+          Primitive::kPrimInt,
           Location::StackSlot(source.GetHighStackIndex(kArmWordSize)),
-          Location::StackSlot(destination.GetHighStackIndex(kArmWordSize)));
+          Location::StackSlot(destination.GetHighStackIndex(kArmWordSize)),
+          Primitive::kPrimInt);
     }
   }
 }
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index a5ddd6b..e9f9d1a 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -122,8 +122,8 @@
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
     codegen->EmitParallelMoves(
-        index_location_, LocationFrom(calling_convention.GetRegisterAt(0)),
-        length_location_, LocationFrom(calling_convention.GetRegisterAt(1)));
+        index_location_, LocationFrom(calling_convention.GetRegisterAt(0)), Primitive::kPrimInt,
+        length_location_, LocationFrom(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt);
     arm64_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc(), this);
     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
@@ -322,8 +322,8 @@
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
     codegen->EmitParallelMoves(
-        class_to_check_, LocationFrom(calling_convention.GetRegisterAt(0)),
-        object_class_, LocationFrom(calling_convention.GetRegisterAt(1)));
+        class_to_check_, LocationFrom(calling_convention.GetRegisterAt(0)), Primitive::kPrimNot,
+        object_class_, LocationFrom(calling_convention.GetRegisterAt(1)), Primitive::kPrimNot);
 
     if (instruction_->IsInstanceOf()) {
       arm64_codegen->InvokeRuntime(
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 38f9ef8..f6b0cf3 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -113,8 +113,10 @@
     x86_codegen->EmitParallelMoves(
         index_location_,
         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+        Primitive::kPrimInt,
         length_location_,
-        Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+        Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+        Primitive::kPrimInt);
     __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pThrowArrayBounds)));
     RecordPcInfo(codegen, instruction_, instruction_->GetDexPc());
   }
@@ -266,8 +268,10 @@
     x86_codegen->EmitParallelMoves(
         class_to_check_,
         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+        Primitive::kPrimNot,
         object_class_,
-        Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+        Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+        Primitive::kPrimNot);
 
     if (instruction_->IsInstanceOf()) {
       __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize,
@@ -655,8 +659,10 @@
       EmitParallelMoves(
           Location::RegisterLocation(source.AsRegisterPairHigh<Register>()),
           Location::RegisterLocation(destination.AsRegisterPairHigh<Register>()),
+          Primitive::kPrimInt,
           Location::RegisterLocation(source.AsRegisterPairLow<Register>()),
-          Location::RegisterLocation(destination.AsRegisterPairLow<Register>()));
+          Location::RegisterLocation(destination.AsRegisterPairLow<Register>()),
+          Primitive::kPrimInt);
     } else if (source.IsFpuRegister()) {
       LOG(FATAL) << "Unimplemented";
     } else {
@@ -699,8 +705,10 @@
       EmitParallelMoves(
           Location::StackSlot(source.GetStackIndex()),
           Location::StackSlot(destination.GetStackIndex()),
+          Primitive::kPrimInt,
           Location::StackSlot(source.GetHighStackIndex(kX86WordSize)),
-          Location::StackSlot(destination.GetHighStackIndex(kX86WordSize)));
+          Location::StackSlot(destination.GetHighStackIndex(kX86WordSize)),
+          Primitive::kPrimInt);
     }
   }
 }
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 7a928d4..652cca4 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -161,8 +161,10 @@
     codegen->EmitParallelMoves(
         index_location_,
         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+        Primitive::kPrimInt,
         length_location_,
-        Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+        Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+        Primitive::kPrimInt);
     __ gs()->call(Address::Absolute(
         QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pThrowArrayBounds), true));
     RecordPcInfo(codegen, instruction_, instruction_->GetDexPc());
@@ -285,8 +287,10 @@
     codegen->EmitParallelMoves(
         class_to_check_,
         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+        Primitive::kPrimNot,
         object_class_,
-        Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+        Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+        Primitive::kPrimNot);
 
     if (instruction_->IsInstanceOf()) {
       __ gs()->call(
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 94e27e9..9a6062f 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -94,7 +94,7 @@
     Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
     Location actual_loc = locations->InAt(i);
 
-    parallel_move.AddMove(actual_loc, cc_loc, nullptr);
+    parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr);
   }
 
   codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index d1176c4..d3a4e6c 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -103,7 +103,7 @@
     Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
     Location actual_loc = locations->InAt(i);
 
-    parallel_move.AddMove(actual_loc, cc_loc, nullptr);
+    parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr);
   }
 
   codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index aec2d19..3c7a266 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -128,7 +128,7 @@
     Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
     Location actual_loc = locations->InAt(i);
 
-    parallel_move.AddMove(actual_loc, cc_loc, nullptr);
+    parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr);
   }
 
   codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index cbf94f0..d9a1c31 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -120,7 +120,7 @@
     Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
     Location actual_loc = locations->InAt(i);
 
-    parallel_move.AddMove(actual_loc, cc_loc, nullptr);
+    parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr);
   }
 
   codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 5f50494..a2179fa 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -3418,8 +3418,11 @@
 
 class MoveOperands : public ArenaObject<kArenaAllocMisc> {
  public:
-  MoveOperands(Location source, Location destination, HInstruction* instruction)
-      : source_(source), destination_(destination), instruction_(instruction) {}
+  MoveOperands(Location source,
+               Location destination,
+               Primitive::Type type,
+               HInstruction* instruction)
+      : source_(source), destination_(destination), type_(type), instruction_(instruction) {}
 
   Location GetSource() const { return source_; }
   Location GetDestination() const { return destination_; }
@@ -3467,11 +3470,17 @@
     return source_.IsInvalid();
   }
 
+  bool Is64BitMove() const {
+    return Primitive::Is64BitType(type_);
+  }
+
   HInstruction* GetInstruction() const { return instruction_; }
 
  private:
   Location source_;
   Location destination_;
+  // The type this move is for.
+  Primitive::Type type_;
   // The instruction this move is assocatied with. Null when this move is
   // for moving an input in the expected locations of user (including a phi user).
   // This is only used in debug mode, to ensure we do not connect interval siblings
@@ -3486,7 +3495,10 @@
   explicit HParallelMove(ArenaAllocator* arena)
       : HTemplateInstruction(SideEffects::None()), moves_(arena, kDefaultNumberOfMoves) {}
 
-  void AddMove(Location source, Location destination, HInstruction* instruction) {
+  void AddMove(Location source,
+               Location destination,
+               Primitive::Type type,
+               HInstruction* instruction) {
     DCHECK(source.IsValid());
     DCHECK(destination.IsValid());
     if (kIsDebugBuild) {
@@ -3512,7 +3524,7 @@
             << "Same destination for two moves in a parallel move.";
       }
     }
-    moves_.Add(MoveOperands(source, destination, instruction));
+    moves_.Add(MoveOperands(source, destination, type, instruction));
   }
 
   MoveOperands* MoveOperandsAt(size_t index) const {
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index 4936685..0c7f0da 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -189,9 +189,9 @@
       const MoveOperands& other_move = *moves_.Get(i);
       if (other_move.Blocks(destination)) {
         DCHECK(other_move.IsPending());
-        if (!destination.IsPair() && other_move.GetSource().IsPair()) {
-          // We swap pairs before swapping non-pairs. Go back from the
-          // cycle by returning the pair that must be swapped.
+        if (!move->Is64BitMove() && other_move.Is64BitMove()) {
+          // We swap 64bits moves before swapping 32bits moves. Go back from the
+          // cycle by returning the move that must be swapped.
           return moves_.Get(i);
         }
         do_swap = true;
@@ -216,7 +216,7 @@
         UpdateSourceOf(moves_.Get(i), swap_destination, source);
       }
     }
-    // If the swap was required because of a pair in the middle of a cycle,
+    // If the swap was required because of a 64bits move in the middle of a cycle,
     // we return the swapped move, so that the caller knows it needs to re-iterate
     // its dependency loop.
     return required_swap;
diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h
index 173cffc..36ce575 100644
--- a/compiler/optimizing/parallel_move_resolver.h
+++ b/compiler/optimizing/parallel_move_resolver.h
@@ -92,12 +92,18 @@
   // other moves to satisfy dependencies).
   //
   // Return whether another move in the dependency cycle needs to swap. This
-  // is to handle pair swaps, where we want the pair to swap first to avoid
-  // building pairs that are unexpected by the code generator. For example, if
-  // we were to swap R1 with R2, we would need to update all locations using
-  // R2 to R1. So a (R2,R3) pair register could become (R1,R3). We could make
-  // the code generator understand such pairs, but it's easier and cleaner to
-  // just not create such pairs and exchange pairs in priority.
+  // is to handle 64bits swaps:
+  // 1) In the case of register pairs, where we want the pair to swap first to avoid
+  //    building pairs that are unexpected by the code generator. For example, if
+  //    we were to swap R1 with R2, we would need to update all locations using
+  //    R2 to R1. So a (R2,R3) pair register could become (R1,R3). We could make
+  //    the code generator understand such pairs, but it's easier and cleaner to
+  //    just not create such pairs and exchange pairs in priority.
+  // 2) Even when the architecture does not have pairs, we must handle 64bits swaps
+  //    first. Consider the case: (R0->R1) (R1->S) (S->R0), where 'S' is a single
+  //    stack slot. If we end up swapping S and R0, S will only contain the low bits
+  //    of R0. If R0->R1 is for a 64bits instruction, R1 will therefore not contain
+  //    the right value.
   MoveOperands* PerformMove(size_t index);
 
   DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolver);
diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc
index 5c502f7..95cca51 100644
--- a/compiler/optimizing/parallel_move_test.cc
+++ b/compiler/optimizing/parallel_move_test.cc
@@ -87,6 +87,7 @@
     moves->AddMove(
         Location::RegisterLocation(operands[i][0]),
         Location::RegisterLocation(operands[i][1]),
+        Primitive::kPrimInt,
         nullptr);
   }
   return moves;
@@ -145,10 +146,12 @@
   moves->AddMove(
       Location::ConstantLocation(new (&allocator) HIntConstant(0)),
       Location::RegisterLocation(0),
+      Primitive::kPrimInt,
       nullptr);
   moves->AddMove(
       Location::RegisterLocation(1),
       Location::RegisterLocation(2),
+      Primitive::kPrimInt,
       nullptr);
   resolver.EmitNativeCode(moves);
   ASSERT_STREQ("(1 -> 2) (C -> 0)", resolver.GetMessage().c_str());
@@ -164,10 +167,12 @@
     moves->AddMove(
         Location::RegisterLocation(2),
         Location::RegisterLocation(4),
+        Primitive::kPrimInt,
         nullptr);
     moves->AddMove(
         Location::RegisterPairLocation(0, 1),
         Location::RegisterPairLocation(2, 3),
+        Primitive::kPrimLong,
         nullptr);
     resolver.EmitNativeCode(moves);
     ASSERT_STREQ("(2 -> 4) (0,1 -> 2,3)", resolver.GetMessage().c_str());
@@ -179,10 +184,12 @@
     moves->AddMove(
         Location::RegisterPairLocation(0, 1),
         Location::RegisterPairLocation(2, 3),
+        Primitive::kPrimLong,
         nullptr);
     moves->AddMove(
         Location::RegisterLocation(2),
         Location::RegisterLocation(4),
+        Primitive::kPrimInt,
         nullptr);
     resolver.EmitNativeCode(moves);
     ASSERT_STREQ("(2 -> 4) (0,1 -> 2,3)", resolver.GetMessage().c_str());
@@ -194,10 +201,12 @@
     moves->AddMove(
         Location::RegisterPairLocation(0, 1),
         Location::RegisterPairLocation(2, 3),
+        Primitive::kPrimLong,
         nullptr);
     moves->AddMove(
         Location::RegisterLocation(2),
         Location::RegisterLocation(0),
+        Primitive::kPrimInt,
         nullptr);
     resolver.EmitNativeCode(moves);
     ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str());
@@ -208,14 +217,17 @@
     moves->AddMove(
         Location::RegisterLocation(2),
         Location::RegisterLocation(7),
+        Primitive::kPrimInt,
         nullptr);
     moves->AddMove(
         Location::RegisterLocation(7),
         Location::RegisterLocation(1),
+        Primitive::kPrimInt,
         nullptr);
     moves->AddMove(
         Location::RegisterPairLocation(0, 1),
         Location::RegisterPairLocation(2, 3),
+        Primitive::kPrimLong,
         nullptr);
     resolver.EmitNativeCode(moves);
     ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str());
@@ -226,14 +238,17 @@
     moves->AddMove(
         Location::RegisterLocation(2),
         Location::RegisterLocation(7),
+        Primitive::kPrimInt,
         nullptr);
     moves->AddMove(
         Location::RegisterPairLocation(0, 1),
         Location::RegisterPairLocation(2, 3),
+        Primitive::kPrimLong,
         nullptr);
     moves->AddMove(
         Location::RegisterLocation(7),
         Location::RegisterLocation(1),
+        Primitive::kPrimInt,
         nullptr);
     resolver.EmitNativeCode(moves);
     ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str());
@@ -244,14 +259,17 @@
     moves->AddMove(
         Location::RegisterPairLocation(0, 1),
         Location::RegisterPairLocation(2, 3),
+        Primitive::kPrimLong,
         nullptr);
     moves->AddMove(
         Location::RegisterLocation(2),
         Location::RegisterLocation(7),
+        Primitive::kPrimInt,
         nullptr);
     moves->AddMove(
         Location::RegisterLocation(7),
         Location::RegisterLocation(1),
+        Primitive::kPrimInt,
         nullptr);
     resolver.EmitNativeCode(moves);
     ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str());
@@ -262,10 +280,12 @@
     moves->AddMove(
         Location::RegisterPairLocation(0, 1),
         Location::RegisterPairLocation(2, 3),
+        Primitive::kPrimLong,
         nullptr);
     moves->AddMove(
         Location::RegisterPairLocation(2, 3),
         Location::RegisterPairLocation(0, 1),
+        Primitive::kPrimLong,
         nullptr);
     resolver.EmitNativeCode(moves);
     ASSERT_STREQ("(2,3 <-> 0,1)", resolver.GetMessage().c_str());
@@ -276,10 +296,12 @@
     moves->AddMove(
         Location::RegisterPairLocation(2, 3),
         Location::RegisterPairLocation(0, 1),
+        Primitive::kPrimLong,
         nullptr);
     moves->AddMove(
         Location::RegisterPairLocation(0, 1),
         Location::RegisterPairLocation(2, 3),
+        Primitive::kPrimLong,
         nullptr);
     resolver.EmitNativeCode(moves);
     ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str());
@@ -292,18 +314,71 @@
     moves->AddMove(
         Location::RegisterLocation(10),
         Location::RegisterLocation(5),
+        Primitive::kPrimInt,
         nullptr);
     moves->AddMove(
         Location::RegisterPairLocation(4, 5),
         Location::DoubleStackSlot(32),
+        Primitive::kPrimLong,
         nullptr);
     moves->AddMove(
         Location::DoubleStackSlot(32),
         Location::RegisterPairLocation(10, 11),
+        Primitive::kPrimLong,
         nullptr);
     resolver.EmitNativeCode(moves);
     ASSERT_STREQ("(2x32(sp) <-> 10,11) (4,5 <-> 2x32(sp)) (4 -> 5)", resolver.GetMessage().c_str());
   }
 }
 
+// Test that we do 64bits moves before 32bits moves.
+TEST(ParallelMoveTest, CyclesWith64BitsMoves) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+
+  {
+    TestParallelMoveResolver resolver(&allocator);
+    HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
+    moves->AddMove(
+        Location::RegisterLocation(0),
+        Location::RegisterLocation(1),
+        Primitive::kPrimLong,
+        nullptr);
+    moves->AddMove(
+        Location::RegisterLocation(1),
+        Location::StackSlot(48),
+        Primitive::kPrimInt,
+        nullptr);
+    moves->AddMove(
+        Location::StackSlot(48),
+        Location::RegisterLocation(0),
+        Primitive::kPrimInt,
+        nullptr);
+    resolver.EmitNativeCode(moves);
+    ASSERT_STREQ("(0 <-> 1) (48(sp) <-> 0)", resolver.GetMessage().c_str());
+  }
+
+  {
+    TestParallelMoveResolver resolver(&allocator);
+    HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
+    moves->AddMove(
+        Location::RegisterPairLocation(0, 1),
+        Location::RegisterPairLocation(2, 3),
+        Primitive::kPrimLong,
+        nullptr);
+    moves->AddMove(
+        Location::RegisterPairLocation(2, 3),
+        Location::DoubleStackSlot(32),
+        Primitive::kPrimLong,
+        nullptr);
+    moves->AddMove(
+        Location::DoubleStackSlot(32),
+        Location::RegisterPairLocation(0, 1),
+        Primitive::kPrimLong,
+        nullptr);
+    resolver.EmitNativeCode(moves);
+    ASSERT_STREQ("(2x32(sp) <-> 0,1) (2,3 <-> 2x32(sp))", resolver.GetMessage().c_str());
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 8f26328..e03f5c3 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -1227,10 +1227,10 @@
       && codegen_->ShouldSplitLongMoves()
       // The parallel move resolver knows how to deal with long constants.
       && !source.IsConstant()) {
-    move->AddMove(source.ToLow(), destination.ToLow(), instruction);
-    move->AddMove(source.ToHigh(), destination.ToHigh(), nullptr);
+    move->AddMove(source.ToLow(), destination.ToLow(), Primitive::kPrimInt, instruction);
+    move->AddMove(source.ToHigh(), destination.ToHigh(), Primitive::kPrimInt, nullptr);
   } else {
-    move->AddMove(source, destination, instruction);
+    move->AddMove(source, destination, type, instruction);
   }
 }