Make data dependency around HDeoptimize correct.

We use HDeoptimize in a few places, but when it comes to data
dependency we either:
- don't have any (BCE, CHA), in which case we should make sure no
  code that the deoptimzation guards moves before the HDeoptimize
- have one on the receiver (inline cache), in which case we can
  update the dominated users with the HDeoptimize to get the data
  dependency correct.

bug:35661819
bug:36371709
test: 644-checker-deopt

Change-Id: I30b750f97b656dede9e10e7e43ac02c8604d7b7a
(cherry picked from commit 6f8e2c9913b24f746a154dda700f609cee3095f9)
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 2f31853..476906a 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -1619,8 +1619,8 @@
   void InsertDeoptInLoop(HLoopInformation* loop, HBasicBlock* block, HInstruction* condition) {
     HInstruction* suspend = loop->GetSuspendCheck();
     block->InsertInstructionBefore(condition, block->GetLastInstruction());
-    HDeoptimize* deoptimize =
-        new (GetGraph()->GetArena()) HDeoptimize(condition, suspend->GetDexPc());
+    HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize(
+        GetGraph()->GetArena(), condition, HDeoptimize::Kind::kBCE, suspend->GetDexPc());
     block->InsertInstructionBefore(deoptimize, block->GetLastInstruction());
     if (suspend->HasEnvironment()) {
       deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
@@ -1632,8 +1632,8 @@
   void InsertDeoptInBlock(HBoundsCheck* bounds_check, HInstruction* condition) {
     HBasicBlock* block = bounds_check->GetBlock();
     block->InsertInstructionBefore(condition, bounds_check);
-    HDeoptimize* deoptimize =
-        new (GetGraph()->GetArena()) HDeoptimize(condition, bounds_check->GetDexPc());
+    HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize(
+        GetGraph()->GetArena(), condition, HDeoptimize::Kind::kBCE, bounds_check->GetDexPc());
     block->InsertInstructionBefore(deoptimize, bounds_check);
     deoptimize->CopyEnvironmentFrom(bounds_check->GetEnvironment());
   }
diff --git a/compiler/optimizing/cha_guard_optimization.cc b/compiler/optimizing/cha_guard_optimization.cc
index 4f160ae..048073e 100644
--- a/compiler/optimizing/cha_guard_optimization.cc
+++ b/compiler/optimizing/cha_guard_optimization.cc
@@ -202,8 +202,8 @@
     HInstruction* suspend = loop_info->GetSuspendCheck();
     // Need a new deoptimize instruction that copies the environment
     // of the suspend instruction for the loop.
-    HDeoptimize* deoptimize =
-        new (GetGraph()->GetArena()) HDeoptimize(compare, suspend->GetDexPc());
+    HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize(
+        GetGraph()->GetArena(), compare, HDeoptimize::Kind::kInline, suspend->GetDexPc());
     pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction());
     deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
         suspend->GetEnvironment(), loop_info->GetHeader());
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 0dfae11..cc3c143 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -505,6 +505,10 @@
     StartAttributeStream("kind") << (try_boundary->IsEntry() ? "entry" : "exit");
   }
 
+  void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE {
+    StartAttributeStream("kind") << deoptimize->GetKind();
+  }
+
 #if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64)
   void VisitMultiplyAccumulate(HMultiplyAccumulate* instruction) OVERRIDE {
     StartAttributeStream("kind") << instruction->GetOpKind();
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index f733145..19f668d 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -742,7 +742,8 @@
       HShouldDeoptimizeFlag(graph_->GetArena(), dex_pc);
   HInstruction* compare = new (graph_->GetArena()) HNotEqual(
       deopt_flag, graph_->GetIntConstant(0, dex_pc));
-  HInstruction* deopt = new (graph_->GetArena()) HDeoptimize(compare, dex_pc);
+  HInstruction* deopt = new (graph_->GetArena()) HDeoptimize(
+      graph_->GetArena(), compare, HDeoptimize::Kind::kInline, dex_pc);
 
   if (cursor != nullptr) {
     bb_cursor->InsertInstructionAfter(deopt_flag, cursor);
@@ -806,9 +807,16 @@
   bb_cursor->InsertInstructionAfter(compare, load_class);
   if (with_deoptimization) {
     HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize(
-        compare, invoke_instruction->GetDexPc());
+        graph_->GetArena(),
+        compare,
+        receiver,
+        HDeoptimize::Kind::kInline,
+        invoke_instruction->GetDexPc());
     bb_cursor->InsertInstructionAfter(deoptimize, compare);
     deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
+    DCHECK_EQ(invoke_instruction->InputAt(0), receiver);
+    receiver->ReplaceUsesDominatedBy(deoptimize, deoptimize);
+    deoptimize->SetReferenceTypeInfo(receiver->GetReferenceTypeInfo());
   }
   return compare;
 }
@@ -1083,13 +1091,19 @@
     CreateDiamondPatternForPolymorphicInline(compare, return_replacement, invoke_instruction);
   } else {
     HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize(
-        compare, invoke_instruction->GetDexPc());
+        graph_->GetArena(),
+        compare,
+        receiver,
+        HDeoptimize::Kind::kInline,
+        invoke_instruction->GetDexPc());
     bb_cursor->InsertInstructionAfter(deoptimize, compare);
     deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
     if (return_replacement != nullptr) {
       invoke_instruction->ReplaceWith(return_replacement);
     }
+    receiver->ReplaceUsesDominatedBy(deoptimize, deoptimize);
     invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction);
+    deoptimize->SetReferenceTypeInfo(receiver->GetReferenceTypeInfo());
   }
 
   // Run type propagation to get the guard typed.
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 17421fc..60790e5 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -2132,6 +2132,9 @@
   if (cond->IsConstant()) {
     if (cond->AsIntConstant()->IsFalse()) {
       // Never deopt: instruction can be removed.
+      if (deoptimize->GuardsAnInput()) {
+        deoptimize->ReplaceWith(deoptimize->GuardedInput());
+      }
       deoptimize->GetBlock()->RemoveInstruction(deoptimize);
     } else {
       // Always deopt.
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index ec706e6..caada8b 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -1088,6 +1088,19 @@
   DCHECK(env_uses_.empty());
 }
 
+void HInstruction::ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction* replacement) {
+  const HUseList<HInstruction*>& uses = GetUses();
+  for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) {
+    HInstruction* user = it->GetUser();
+    size_t index = it->GetIndex();
+    // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput().
+    ++it;
+    if (dominator->StrictlyDominates(user)) {
+      user->ReplaceInput(replacement, index);
+    }
+  }
+}
+
 void HInstruction::ReplaceInput(HInstruction* replacement, size_t index) {
   HUserRecord<HInstruction*> input_use = InputRecordAt(index);
   if (input_use.GetInstruction() == replacement) {
@@ -1323,6 +1336,18 @@
   }
 }
 
+std::ostream& operator<<(std::ostream& os, const HDeoptimize::Kind& rhs) {
+  switch (rhs) {
+    case HDeoptimize::Kind::kBCE:
+      return os << "bce";
+    case HDeoptimize::Kind::kInline:
+      return os << "inline";
+    default:
+      LOG(FATAL) << "Unknown Deoptimization kind: " << static_cast<int>(rhs);
+      UNREACHABLE();
+  }
+}
+
 bool HCondition::IsBeforeWhenDisregardMoves(HInstruction* instruction) const {
   return this == instruction->GetPreviousDisregardingMoves();
 }
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index fb0c889..5f5a28c 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -2081,6 +2081,7 @@
   void SetLocations(LocationSummary* locations) { locations_ = locations; }
 
   void ReplaceWith(HInstruction* instruction);
+  void ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction* replacement);
   void ReplaceInput(HInstruction* replacement, size_t index);
 
   // This is almost the same as doing `ReplaceWith()`. But in this helper, the
@@ -2944,28 +2945,97 @@
 };
 
 // Deoptimize to interpreter, upon checking a condition.
-class HDeoptimize FINAL : public HTemplateInstruction<1> {
+class HDeoptimize FINAL : public HVariableInputSizeInstruction {
  public:
-  // We set CanTriggerGC to prevent any intermediate address to be live
-  // at the point of the `HDeoptimize`.
-  HDeoptimize(HInstruction* cond, uint32_t dex_pc)
-      : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc) {
+  enum class Kind {
+    kBCE,
+    kInline,
+    kLast = kInline
+  };
+
+  // Use this constructor when the `HDeoptimize` acts as a barrier, where no code can move
+  // across.
+  HDeoptimize(ArenaAllocator* arena, HInstruction* cond, Kind kind, uint32_t dex_pc)
+      : HVariableInputSizeInstruction(
+            SideEffects::All(),
+            dex_pc,
+            arena,
+            /* number_of_inputs */ 1,
+            kArenaAllocMisc) {
+    SetPackedFlag<kFieldCanBeMoved>(false);
+    SetPackedField<DeoptimizeKindField>(kind);
     SetRawInputAt(0, cond);
   }
 
-  bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
-    return true;
+  // Use this constructor when the `HDeoptimize` guards an instruction, and any user
+  // that relies on the deoptimization to pass should have its input be the `HDeoptimize`
+  // instead of `guard`.
+  // We set CanTriggerGC to prevent any intermediate address to be live
+  // at the point of the `HDeoptimize`.
+  HDeoptimize(ArenaAllocator* arena,
+              HInstruction* cond,
+              HInstruction* guard,
+              Kind kind,
+              uint32_t dex_pc)
+      : HVariableInputSizeInstruction(
+            SideEffects::CanTriggerGC(),
+            dex_pc,
+            arena,
+            /* number_of_inputs */ 2,
+            kArenaAllocMisc) {
+    SetPackedFlag<kFieldCanBeMoved>(true);
+    SetPackedField<DeoptimizeKindField>(kind);
+    SetRawInputAt(0, cond);
+    SetRawInputAt(1, guard);
   }
+
+  bool CanBeMoved() const OVERRIDE { return GetPackedFlag<kFieldCanBeMoved>(); }
+
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+    return (other->CanBeMoved() == CanBeMoved()) && (other->AsDeoptimize()->GetKind() == GetKind());
+  }
+
   bool NeedsEnvironment() const OVERRIDE { return true; }
+
   bool CanThrow() const OVERRIDE { return true; }
 
+  Kind GetKind() const { return GetPackedField<DeoptimizeKindField>(); }
+
+  Primitive::Type GetType() const OVERRIDE {
+    return GuardsAnInput() ? GuardedInput()->GetType() : Primitive::kPrimVoid;
+  }
+
+  bool GuardsAnInput() const {
+    return InputCount() == 2;
+  }
+
+  HInstruction* GuardedInput() const {
+    DCHECK(GuardsAnInput());
+    return InputAt(1);
+  }
+
+  void RemoveGuard() {
+    RemoveInputAt(1);
+  }
+
   DECLARE_INSTRUCTION(Deoptimize);
 
  private:
+  static constexpr size_t kFieldCanBeMoved = kNumberOfGenericPackedBits;
+  static constexpr size_t kFieldDeoptimizeKind = kNumberOfGenericPackedBits + 1;
+  static constexpr size_t kFieldDeoptimizeKindSize =
+      MinimumBitsToStore(static_cast<size_t>(Kind::kLast));
+  static constexpr size_t kNumberOfDeoptimizePackedBits =
+      kFieldDeoptimizeKind + kFieldDeoptimizeKindSize;
+  static_assert(kNumberOfDeoptimizePackedBits <= kMaxNumberOfPackedBits,
+                "Too many packed fields.");
+  using DeoptimizeKindField = BitField<Kind, kFieldDeoptimizeKind, kFieldDeoptimizeKindSize>;
+
   DISALLOW_COPY_AND_ASSIGN(HDeoptimize);
 };
 
+std::ostream& operator<<(std::ostream& os, const HDeoptimize::Kind& rhs);
+
 // Represents a should_deoptimize flag. Currently used for CHA-based devirtualization.
 // The compiled code checks this flag value in a guard before devirtualized call and
 // if it's true, starts to do deoptimization.
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index efbaf6c..66bfea9 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -40,6 +40,14 @@
   check->ReplaceWith(check->InputAt(0));
 }
 
+void PrepareForRegisterAllocation::VisitDeoptimize(HDeoptimize* deoptimize) {
+  if (deoptimize->GuardsAnInput()) {
+    // Replace the uses with the actual guarded instruction.
+    deoptimize->ReplaceWith(deoptimize->GuardedInput());
+    deoptimize->RemoveGuard();
+  }
+}
+
 void PrepareForRegisterAllocation::VisitBoundsCheck(HBoundsCheck* check) {
   check->ReplaceWith(check->InputAt(0));
   if (check->IsStringCharAt()) {
diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h
index c128227..7ffbe44 100644
--- a/compiler/optimizing/prepare_for_register_allocation.h
+++ b/compiler/optimizing/prepare_for_register_allocation.h
@@ -44,6 +44,7 @@
   void VisitClinitCheck(HClinitCheck* check) OVERRIDE;
   void VisitCondition(HCondition* condition) OVERRIDE;
   void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE;
+  void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE;
 
   bool CanMoveClinitCheck(HInstruction* input, HInstruction* user) const;
   bool CanEmitConditionAt(HCondition* condition, HInstruction* user) const;
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 6e332ca..d5637b9 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -310,8 +310,8 @@
     BoundTypeIn(receiver, trueBlock, /* start_instruction */ nullptr, class_rti);
   } else {
     DCHECK(check->IsDeoptimize());
-    if (compare->IsEqual()) {
-      BoundTypeIn(receiver, check->GetBlock(), check, class_rti);
+    if (compare->IsEqual() && check->AsDeoptimize()->GuardsAnInput()) {
+      check->SetReferenceTypeInfo(class_rti);
     }
   }
 }
diff --git a/compiler/optimizing/ssa_liveness_analysis_test.cc b/compiler/optimizing/ssa_liveness_analysis_test.cc
index 1916c73..a1016d1 100644
--- a/compiler/optimizing/ssa_liveness_analysis_test.cc
+++ b/compiler/optimizing/ssa_liveness_analysis_test.cc
@@ -189,13 +189,14 @@
   // Use HAboveOrEqual+HDeoptimize as the bounds check.
   HInstruction* ae = new (&allocator_) HAboveOrEqual(index, length);
   block->AddInstruction(ae);
-  HInstruction* deoptimize = new(&allocator_) HDeoptimize(ae, /* dex_pc */ 0u);
+  HInstruction* deoptimize =
+      new(&allocator_) HDeoptimize(&allocator_, ae, HDeoptimize::Kind::kBCE, /* dex_pc */ 0u);
   block->AddInstruction(deoptimize);
   HEnvironment* deoptimize_env = new (&allocator_) HEnvironment(&allocator_,
-                                                                  /* number_of_vregs */ 5,
-                                                                  /* method */ nullptr,
-                                                                  /* dex_pc */ 0u,
-                                                                  deoptimize);
+                                                                /* number_of_vregs */ 5,
+                                                                /* method */ nullptr,
+                                                                /* dex_pc */ 0u,
+                                                                deoptimize);
   deoptimize_env->CopyFrom(args);
   deoptimize->SetRawEnvironment(deoptimize_env);
   HInstruction* array_set =
diff --git a/test/644-checker-deopt/expected.txt b/test/644-checker-deopt/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/644-checker-deopt/expected.txt
diff --git a/test/644-checker-deopt/info.txt b/test/644-checker-deopt/info.txt
new file mode 100644
index 0000000..c5fb12c
--- /dev/null
+++ b/test/644-checker-deopt/info.txt
@@ -0,0 +1,2 @@
+Regression test for making sure HDeoptimize is executed before
+the code it should have prevented executing.
diff --git a/test/644-checker-deopt/profile b/test/644-checker-deopt/profile
new file mode 100644
index 0000000..cb261cc
--- /dev/null
+++ b/test/644-checker-deopt/profile
@@ -0,0 +1,2 @@
+LMain;->inlineMonomorphic(LMain;)I+LMain;
+LMain;->inlinePolymorphic(LMain;)I+LMain;,LSubMain;
diff --git a/test/644-checker-deopt/run b/test/644-checker-deopt/run
new file mode 100644
index 0000000..146e180
--- /dev/null
+++ b/test/644-checker-deopt/run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+exec ${RUN} $@ --profile -Xcompiler-option --compiler-filter=speed-profile
diff --git a/test/644-checker-deopt/src/Main.java b/test/644-checker-deopt/src/Main.java
new file mode 100644
index 0000000..17c80a6
--- /dev/null
+++ b/test/644-checker-deopt/src/Main.java
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /// CHECK-START: int Main.inlineMonomorphic(Main) inliner (before)
+  /// CHECK:       InvokeVirtual method_name:Main.getValue
+
+  /// CHECK-START: int Main.inlineMonomorphic(Main) inliner (after)
+  /// CHECK-NOT:   InvokeVirtual method_name:Main.getValue
+
+  /// CHECK-START: int Main.inlineMonomorphic(Main) licm (before)
+  /// CHECK:   <<Deopt:l\d+>> Deoptimize
+  /// CHECK:                  InstanceFieldGet [<<Deopt>>] field_name:Main.value
+
+  /// CHECK-START: int Main.inlineMonomorphic(Main) licm (after)
+  /// CHECK:   <<Deopt:l\d+>> Deoptimize
+  /// CHECK:                  InstanceFieldGet [<<Deopt>>] field_name:Main.value
+
+  public static int inlineMonomorphic(Main a) {
+    if (a == null) {
+      return 42;
+    }
+    int i = 0;
+    while (i < 100) {
+      i += a.getValue();
+    }
+    return i;
+  }
+
+  /// CHECK-START: int Main.inlinePolymorphic(Main) inliner (before)
+  /// CHECK:       InvokeVirtual method_name:Main.getValue
+
+  /// CHECK-START: int Main.inlinePolymorphic(Main) inliner (after)
+  /// CHECK-NOT:   InvokeVirtual method_name:Main.getValue
+
+  /// CHECK-START: int Main.inlineMonomorphic(Main) licm (before)
+  /// CHECK:   <<Deopt:l\d+>> Deoptimize
+  /// CHECK:                  InstanceFieldGet [<<Deopt>>] field_name:Main.value
+
+  /// CHECK-START: int Main.inlineMonomorphic(Main) licm (after)
+  /// CHECK:   <<Deopt:l\d+>> Deoptimize
+  /// CHECK:                  InstanceFieldGet [<<Deopt>>] field_name:Main.value
+  public static int inlinePolymorphic(Main a) {
+    return a.getValue();
+  }
+
+  public int getValue() {
+    return value;
+  }
+
+  public static void main(String[] args) {
+    inlineMonomorphic(new Main());
+  }
+
+  int value = 1;
+}
+
+// Add a subclass of 'Main' to write the polymorphic inline cache in the profile.
+class SubMain extends Main {
+}
diff --git a/test/knownfailures.json b/test/knownfailures.json
index 1a301b3..8aa0c55 100644
--- a/test/knownfailures.json
+++ b/test/knownfailures.json
@@ -362,9 +362,9 @@
         "variant": "interp-ac"
     },
     {
-        "tests": "638-checker-inline-caches",
-        "description": ["Disable 638-checker-inline-caches temporarily until a fix",
-                        "arrives."],
+        "tests": ["638-checker-inline-caches",
+                  "644-checker-deopt"],
+        "description": ["Disabled temporarily until a fix arrives."],
         "bug": "http://b/36371709"
     }
 ]