Enable support of VecLoad/VecStore in LSE

Changes:
- Enable VecLoad and VecStore support in LSE.
- This CL is based on Mingyao's CL: More general store elimination.
- The new gtest load_store_elimination_test is to test some corner cases
  where ArrayGet/ArraySet/VecLoad/VecStore are mixed and overlap.
- The new java 530-checker-lse-simd.

Test: test.py --host --optimizing --jit --gtest
Test: test.py --target --optimizing --jit
Test: run-gtests.sh
Test: load_store_elimination_test
Test: 530-checker-lse-simd
Test: ./art/test/run-test --optimizing --64 --gcstress 667-checker-simd-alignment
Test: m -j80 art-check-testing-apex-gen

Change-Id: I2d2024ec75a2aaef56b527db98abb40c5f16be79
diff --git a/build/apex/art_apex_test.py b/build/apex/art_apex_test.py
index 760f096..4062676 100755
--- a/build/apex/art_apex_test.py
+++ b/build/apex/art_apex_test.py
@@ -729,6 +729,7 @@
     self._checker.check_art_test_executable('linker_patch_test')
     self._checker.check_art_test_executable('live_interval_test')
     self._checker.check_art_test_executable('load_store_analysis_test')
+    self._checker.check_art_test_executable('load_store_elimination_test')
     self._checker.check_art_test_executable('loop_optimization_test')
     self._checker.check_art_test_executable('nodes_test')
     self._checker.check_art_test_executable('nodes_vector_test')
diff --git a/compiler/Android.bp b/compiler/Android.bp
index 532f7d6..b93d473 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -410,6 +410,7 @@
         "jni/jni_cfi_test.cc",
         "optimizing/codegen_test.cc",
         "optimizing/load_store_analysis_test.cc",
+        "optimizing/load_store_elimination_test.cc",
         "optimizing/optimizing_cfi_test.cc",
         "optimizing/scheduler_test.cc",
     ],
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index b33d0f4..4c150da 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -23,8 +23,6 @@
 #include "load_store_analysis.h"
 #include "side_effects_analysis.h"
 
-#include <iostream>
-
 /**
  * The general algorithm of load-store elimination (LSE).
  * Load-store analysis in the previous pass collects a list of heap locations
@@ -64,8 +62,9 @@
  *   all the heap values, depending on the instruction's side effects.
  * - Finalizable objects are considered as persisting at method
  *   return/deoptimization.
- * - Currently this LSE algorithm doesn't handle SIMD graph, e.g. with VecLoad
- *   and VecStore instructions.
+ * - SIMD graphs (with VecLoad and VecStore instructions) are also handled. Any
+ *   partial overlap access among ArrayGet/ArraySet/VecLoad/Store is seen as
+ *   alias and no load/store is eliminated in such case.
  * - Currently this LSE algorithm doesn't handle graph with try-catch, due to
  *   the special block merging structure.
  */
@@ -172,9 +171,7 @@
         DCHECK(substitute2->IsTypeConversion());
         continue;
       }
-      DCHECK(load2->IsInstanceFieldGet() ||
-             load2->IsStaticFieldGet() ||
-             load2->IsArrayGet());
+      DCHECK(IsLoad(load2));
       DCHECK(substitute2 != nullptr);
       if (substitute2 == substitute &&
           load2->GetType() == load->GetType() &&
@@ -204,9 +201,7 @@
         DCHECK(substitute_instructions_for_loads_[i]->IsTypeConversion());
         continue;
       }
-      DCHECK(load->IsInstanceFieldGet() ||
-             load->IsStaticFieldGet() ||
-             load->IsArrayGet());
+      DCHECK(IsLoad(load));
       HInstruction* substitute = substitute_instructions_for_loads_[i];
       DCHECK(substitute != nullptr);
       // We proactively retrieve the substitute for a removed load, so
@@ -224,7 +219,7 @@
       // We guarantee that type A stored as type B and then fetched out as
       // type C is the same as casting from type A to type C directly, since
       // type B and type C will have the same size which is guarenteed in
-      // HInstanceFieldGet/HStaticFieldGet/HArrayGet's SetType().
+      // HInstanceFieldGet/HStaticFieldGet/HArrayGet/HVecLoad's SetType().
       // So we only need one type conversion from type A to type C.
       HTypeConversion* type_conversion = AddTypeConversionIfNecessary(
           load, substitute, load->GetType());
@@ -240,7 +235,7 @@
 
     // At this point, stores in possibly_removed_stores_ can be safely removed.
     for (HInstruction* store : possibly_removed_stores_) {
-      DCHECK(store->IsInstanceFieldSet() || store->IsStaticFieldSet() || store->IsArraySet());
+      DCHECK(IsStore(store));
       store->GetBlock()->RemoveInstruction(store);
     }
 
@@ -261,26 +256,37 @@
   }
 
  private:
-  static bool IsLoad(HInstruction* instruction) {
+  static bool IsLoad(const HInstruction* instruction) {
     if (instruction == kUnknownHeapValue || instruction == kDefaultHeapValue) {
       return false;
     }
     // Unresolved load is not treated as a load.
     return instruction->IsInstanceFieldGet() ||
         instruction->IsStaticFieldGet() ||
+        instruction->IsVecLoad() ||
         instruction->IsArrayGet();
   }
 
-  static bool IsStore(HInstruction* instruction) {
+  static bool IsStore(const HInstruction* instruction) {
     if (instruction == kUnknownHeapValue || instruction == kDefaultHeapValue) {
       return false;
     }
     // Unresolved store is not treated as a store.
     return instruction->IsInstanceFieldSet() ||
         instruction->IsArraySet() ||
+        instruction->IsVecStore() ||
         instruction->IsStaticFieldSet();
   }
 
+  // Check if it is allowed to use default values for the specified load.
+  static bool IsDefaultAllowedForLoad(const HInstruction* load) {
+    DCHECK(IsLoad(load));
+    // Using defaults for VecLoads requires to create additional vector operations.
+    // As there are some issues with scheduling vector operations it is better to avoid creating
+    // them.
+    return !load->IsVecOperation();
+  }
+
   // Returns the real heap value by finding its substitute or by "peeling"
   // a store instruction.
   HInstruction* GetRealHeapValue(HInstruction* heap_value) {
@@ -298,6 +304,8 @@
       heap_value = heap_value->AsInstanceFieldSet()->GetValue();
     } else if (heap_value->IsStaticFieldSet()) {
       heap_value = heap_value->AsStaticFieldSet()->GetValue();
+    } else if (heap_value->IsVecStore()) {
+      heap_value = heap_value->AsVecStore()->GetValue();
     } else {
       DCHECK(heap_value->IsArraySet());
       heap_value = heap_value->AsArraySet()->GetValue();
@@ -553,10 +561,15 @@
         heap_values_for_[instruction->GetBlock()->GetBlockId()];
     HInstruction* heap_value = heap_values[idx];
     if (heap_value == kDefaultHeapValue) {
-      HInstruction* constant = GetDefaultValue(instruction->GetType());
-      AddRemovedLoad(instruction, constant);
-      heap_values[idx] = constant;
-      return;
+      if (IsDefaultAllowedForLoad(instruction)) {
+        HInstruction* constant = GetDefaultValue(instruction->GetType());
+        AddRemovedLoad(instruction, constant);
+        heap_values[idx] = constant;
+        return;
+      } else {
+        heap_values[idx] = kUnknownHeapValue;
+        heap_value = kUnknownHeapValue;
+      }
     }
     heap_value = GetRealHeapValue(heap_value);
     if (heap_value == kUnknownHeapValue) {
@@ -590,6 +603,35 @@
     return false;
   }
 
+  bool CanValueBeKeptIfSameAsNew(HInstruction* value,
+                                 HInstruction* new_value,
+                                 HInstruction* new_value_set_instr) {
+    // For field/array set location operations, if the value is the same as the new_value
+    // it can be kept even if aliasing happens. All aliased operations will access the same memory
+    // range.
+    // For vector values, this is not true. For example:
+    //  packed_data = [0xA, 0xB, 0xC, 0xD];            <-- Different values in each lane.
+    //  VecStore array[i  ,i+1,i+2,i+3] = packed_data;
+    //  VecStore array[i+1,i+2,i+3,i+4] = packed_data; <-- We are here (partial overlap).
+    //  VecLoad  vx = array[i,i+1,i+2,i+3];            <-- Cannot be eliminated because the value
+    //                                                     here is not packed_data anymore.
+    //
+    // TODO: to allow such 'same value' optimization on vector data,
+    // LSA needs to report more fine-grain MAY alias information:
+    // (1) May alias due to two vector data partial overlap.
+    //     e.g. a[i..i+3] and a[i+1,..,i+4].
+    // (2) May alias due to two vector data may complete overlap each other.
+    //     e.g. a[i..i+3] and b[i..i+3].
+    // (3) May alias but the exact relationship between two locations is unknown.
+    //     e.g. a[i..i+3] and b[j..j+3], where values of a,b,i,j are all unknown.
+    // This 'same value' optimization can apply only on case (2).
+    if (new_value_set_instr->IsVecOperation()) {
+      return false;
+    }
+
+    return Equal(value, new_value);
+  }
+
   void VisitSetLocation(HInstruction* instruction, size_t idx, HInstruction* value) {
     DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound);
     DCHECK(!IsStore(value)) << value->DebugName();
@@ -636,23 +678,16 @@
 
     // This store may kill values in other heap locations due to aliasing.
     for (size_t i = 0; i < heap_values.size(); i++) {
-      if (i == idx) {
+      if (i == idx ||
+          heap_values[i] == kUnknownHeapValue ||
+          CanValueBeKeptIfSameAsNew(heap_values[i], value, instruction) ||
+          !heap_location_collector_.MayAlias(i, idx)) {
         continue;
       }
-      if (Equal(heap_values[i], value)) {
-        // Same value should be kept even if aliasing happens.
-        continue;
-      }
-      if (heap_values[i] == kUnknownHeapValue) {
-        // Value is already unknown, no need for aliasing check.
-        continue;
-      }
-      if (heap_location_collector_.MayAlias(i, idx)) {
-        // Kill heap locations that may alias and as a result if the heap value
-        // is a store, the store needs to be kept.
-        KeepIfIsStore(heap_values[i]);
-        heap_values[i] = kUnknownHeapValue;
-      }
+      // Kill heap locations that may alias and as a result if the heap value
+      // is a store, the store needs to be kept.
+      KeepIfIsStore(heap_values[i]);
+      heap_values[i] = kUnknownHeapValue;
     }
   }
 
@@ -689,7 +724,16 @@
 
   void VisitArraySet(HArraySet* instruction) override {
     size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction);
-    VisitSetLocation(instruction, idx, instruction->InputAt(2));
+    VisitSetLocation(instruction, idx, instruction->GetValue());
+  }
+
+  void VisitVecLoad(HVecLoad* instruction) override {
+    VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction));
+  }
+
+  void VisitVecStore(HVecStore* instruction) override {
+    size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction);
+    VisitSetLocation(instruction, idx, instruction->GetValue());
   }
 
   void VisitDeoptimize(HDeoptimize* instruction) override {
@@ -892,11 +936,6 @@
     return false;
   }
 
-  // TODO: analyze VecLoad/VecStore better.
-  if (graph_->HasSIMD()) {
-    return false;
-  }
-
   LSEVisitor lse_visitor(graph_, heap_location_collector, side_effects_, stats_);
   for (HBasicBlock* block : graph_->GetReversePostOrder()) {
     lse_visitor.VisitBasicBlock(block);
diff --git a/compiler/optimizing/load_store_elimination_test.cc b/compiler/optimizing/load_store_elimination_test.cc
new file mode 100644
index 0000000..7380378
--- /dev/null
+++ b/compiler/optimizing/load_store_elimination_test.cc
@@ -0,0 +1,893 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <tuple>
+
+#include "load_store_analysis.h"
+#include "load_store_elimination.h"
+#include "nodes.h"
+#include "optimizing_unit_test.h"
+#include "side_effects_analysis.h"
+
+#include "gtest/gtest.h"
+
+namespace art {
+
+class LoadStoreEliminationTest : public ImprovedOptimizingUnitTest {
+ public:
+  void PerformLSE() {
+    graph_->BuildDominatorTree();
+    SideEffectsAnalysis side_effects(graph_);
+    side_effects.Run();
+    LoadStoreAnalysis lsa(graph_);
+    lsa.Run();
+    LoadStoreElimination lse(graph_, side_effects, lsa, nullptr);
+    lse.Run();
+    EXPECT_TRUE(CheckGraphSkipRefTypeInfoChecks());
+  }
+
+  // Create instructions shared among tests.
+  void CreateEntryBlockInstructions() {
+    HInstruction* c1 = graph_->GetIntConstant(1);
+    HInstruction* c4 = graph_->GetIntConstant(4);
+    i_add1_ = new (GetAllocator()) HAdd(DataType::Type::kInt32, i_, c1);
+    i_add4_ = new (GetAllocator()) HAdd(DataType::Type::kInt32, i_, c4);
+    entry_block_->AddInstruction(i_add1_);
+    entry_block_->AddInstruction(i_add4_);
+    entry_block_->AddInstruction(new (GetAllocator()) HGoto());
+  }
+
+  // Create the major CFG used by tests:
+  //    entry
+  //      |
+  //  pre_header
+  //      |
+  //    loop[]
+  //      |
+  //   return
+  //      |
+  //     exit
+  void CreateTestControlFlowGraph() {
+    pre_header_ = new (GetAllocator()) HBasicBlock(graph_);
+    loop_ = new (GetAllocator()) HBasicBlock(graph_);
+
+    graph_->AddBlock(pre_header_);
+    graph_->AddBlock(loop_);
+
+    entry_block_->ReplaceSuccessor(return_block_, pre_header_);
+    pre_header_->AddSuccessor(loop_);
+    loop_->AddSuccessor(loop_);
+    loop_->AddSuccessor(return_block_);
+
+    HInstruction* c0 = graph_->GetIntConstant(0);
+    HInstruction* c1 = graph_->GetIntConstant(1);
+    HInstruction* c128 = graph_->GetIntConstant(128);
+
+    CreateEntryBlockInstructions();
+
+    // pre_header block
+    //   phi = 0;
+    phi_ = new (GetAllocator()) HPhi(GetAllocator(), 0, 0, DataType::Type::kInt32);
+    loop_->AddPhi(phi_);
+    pre_header_->AddInstruction(new (GetAllocator()) HGoto());
+    phi_->AddInput(c0);
+
+    // loop block:
+    //   suspend_check
+    //   phi++;
+    //   if (phi >= 128)
+    suspend_check_ = new (GetAllocator()) HSuspendCheck();
+    HInstruction* inc_phi = new (GetAllocator()) HAdd(DataType::Type::kInt32, phi_, c1);
+    HInstruction* cmp = new (GetAllocator()) HGreaterThanOrEqual(phi_, c128);
+    HInstruction* hif = new (GetAllocator()) HIf(cmp);
+    loop_->AddInstruction(suspend_check_);
+    loop_->AddInstruction(inc_phi);
+    loop_->AddInstruction(cmp);
+    loop_->AddInstruction(hif);
+    phi_->AddInput(inc_phi);
+
+    CreateEnvForSuspendCheck();
+  }
+
+  void CreateEnvForSuspendCheck() {
+    ArenaVector<HInstruction*> current_locals({array_, i_, j_},
+                                              GetAllocator()->Adapter(kArenaAllocInstruction));
+    ManuallyBuildEnvFor(suspend_check_, &current_locals);
+  }
+
+  // Create the diamond-shaped CFG:
+  //      upper
+  //      /   \
+  //    left  right
+  //      \   /
+  //      down
+  //
+  // Return: the basic blocks forming the CFG in the following order {upper, left, right, down}.
+  std::tuple<HBasicBlock*, HBasicBlock*, HBasicBlock*, HBasicBlock*> CreateDiamondShapedCFG() {
+    CreateEntryBlockInstructions();
+
+    HBasicBlock* upper = new (GetAllocator()) HBasicBlock(graph_);
+    HBasicBlock* left = new (GetAllocator()) HBasicBlock(graph_);
+    HBasicBlock* right = new (GetAllocator()) HBasicBlock(graph_);
+
+    graph_->AddBlock(upper);
+    graph_->AddBlock(left);
+    graph_->AddBlock(right);
+
+    entry_block_->ReplaceSuccessor(return_block_, upper);
+    upper->AddSuccessor(left);
+    upper->AddSuccessor(right);
+    left->AddSuccessor(return_block_);
+    right->AddSuccessor(return_block_);
+
+    HInstruction* cmp = new (GetAllocator()) HGreaterThanOrEqual(i_, j_);
+    HInstruction* hif = new (GetAllocator()) HIf(cmp);
+    upper->AddInstruction(cmp);
+    upper->AddInstruction(hif);
+
+    left->AddInstruction(new (GetAllocator()) HGoto());
+    right->AddInstruction(new (GetAllocator()) HGoto());
+
+    return std::make_tuple(upper, left, right, return_block_);
+  }
+
+  // Add a HVecLoad instruction to the end of the provided basic block.
+  //
+  // Return: the created HVecLoad instruction.
+  HInstruction* AddVecLoad(HBasicBlock* block, HInstruction* array, HInstruction* index) {
+    DCHECK(block != nullptr);
+    DCHECK(array != nullptr);
+    DCHECK(index != nullptr);
+    HInstruction* vload = new (GetAllocator()) HVecLoad(
+        GetAllocator(),
+        array,
+        index,
+        DataType::Type::kInt32,
+        SideEffects::ArrayReadOfType(DataType::Type::kInt32),
+        4,
+        /*is_string_char_at*/ false,
+        kNoDexPc);
+    block->InsertInstructionBefore(vload, block->GetLastInstruction());
+    return vload;
+  }
+
+  // Add a HVecStore instruction to the end of the provided basic block.
+  // If no vdata is specified, generate HVecStore: array[index] = [1,1,1,1].
+  //
+  // Return: the created HVecStore instruction.
+  HInstruction* AddVecStore(HBasicBlock* block,
+                            HInstruction* array,
+                            HInstruction* index,
+                            HInstruction* vdata = nullptr) {
+    DCHECK(block != nullptr);
+    DCHECK(array != nullptr);
+    DCHECK(index != nullptr);
+    if (vdata == nullptr) {
+      HInstruction* c1 = graph_->GetIntConstant(1);
+      vdata = new (GetAllocator()) HVecReplicateScalar(GetAllocator(),
+                                                       c1,
+                                                       DataType::Type::kInt32,
+                                                       4,
+                                                       kNoDexPc);
+      block->InsertInstructionBefore(vdata, block->GetLastInstruction());
+    }
+    HInstruction* vstore = new (GetAllocator()) HVecStore(
+        GetAllocator(),
+        array,
+        index,
+        vdata,
+        DataType::Type::kInt32,
+        SideEffects::ArrayWriteOfType(DataType::Type::kInt32),
+        4,
+        kNoDexPc);
+    block->InsertInstructionBefore(vstore, block->GetLastInstruction());
+    return vstore;
+  }
+
+  // Add a HArrayGet instruction to the end of the provided basic block.
+  //
+  // Return: the created HArrayGet instruction.
+  HInstruction* AddArrayGet(HBasicBlock* block, HInstruction* array, HInstruction* index) {
+    DCHECK(block != nullptr);
+    DCHECK(array != nullptr);
+    DCHECK(index != nullptr);
+    HInstruction* get = new (GetAllocator()) HArrayGet(array, index, DataType::Type::kInt32, 0);
+    block->InsertInstructionBefore(get, block->GetLastInstruction());
+    return get;
+  }
+
+  // Add a HArraySet instruction to the end of the provided basic block.
+  // If no data is specified, generate HArraySet: array[index] = 1.
+  //
+  // Return: the created HArraySet instruction.
+  HInstruction* AddArraySet(HBasicBlock* block,
+                            HInstruction* array,
+                            HInstruction* index,
+                            HInstruction* data = nullptr) {
+    DCHECK(block != nullptr);
+    DCHECK(array != nullptr);
+    DCHECK(index != nullptr);
+    if (data == nullptr) {
+      data = graph_->GetIntConstant(1);
+    }
+    HInstruction* store = new (GetAllocator()) HArraySet(array,
+                                                         index,
+                                                         data,
+                                                         DataType::Type::kInt32,
+                                                         0);
+    block->InsertInstructionBefore(store, block->GetLastInstruction());
+    return store;
+  }
+
+  void CreateParameters() override {
+    parameters_.push_back(new (GetAllocator()) HParameterValue(graph_->GetDexFile(),
+                                                               dex::TypeIndex(0),
+                                                               0,
+                                                               DataType::Type::kInt32));
+    array_ = parameters_.back();
+    parameters_.push_back(new (GetAllocator()) HParameterValue(graph_->GetDexFile(),
+                                                               dex::TypeIndex(1),
+                                                               1,
+                                                               DataType::Type::kInt32));
+    i_ = parameters_.back();
+    parameters_.push_back(new (GetAllocator()) HParameterValue(graph_->GetDexFile(),
+                                                               dex::TypeIndex(1),
+                                                               2,
+                                                               DataType::Type::kInt32));
+    j_ = parameters_.back();
+  }
+
+  HBasicBlock* pre_header_;
+  HBasicBlock* loop_;
+
+  HInstruction* array_;
+  HInstruction* i_;
+  HInstruction* j_;
+  HInstruction* i_add1_;
+  HInstruction* i_add4_;
+  HInstruction* suspend_check_;
+
+  HPhi* phi_;
+};
+
+TEST_F(LoadStoreEliminationTest, ArrayGetSetElimination) {
+  InitGraph();
+  CreateTestControlFlowGraph();
+
+  HInstruction* c1 = graph_->GetIntConstant(1);
+  HInstruction* c2 = graph_->GetIntConstant(2);
+  HInstruction* c3 = graph_->GetIntConstant(3);
+
+  // array[1] = 1;
+  // x = array[1];  <--- Remove.
+  // y = array[2];
+  // array[1] = 1;  <--- Remove, since it stores same value.
+  // array[i] = 3;  <--- MAY alias.
+  // array[1] = 1;  <--- Cannot remove, even if it stores the same value.
+  AddArraySet(entry_block_, array_, c1, c1);
+  HInstruction* load1 = AddArrayGet(entry_block_, array_, c1);
+  HInstruction* load2 = AddArrayGet(entry_block_, array_, c2);
+  HInstruction* store1 = AddArraySet(entry_block_, array_, c1, c1);
+  AddArraySet(entry_block_, array_, i_, c3);
+  HInstruction* store2 = AddArraySet(entry_block_, array_, c1, c1);
+
+  PerformLSE();
+
+  ASSERT_TRUE(IsRemoved(load1));
+  ASSERT_FALSE(IsRemoved(load2));
+  ASSERT_TRUE(IsRemoved(store1));
+  ASSERT_FALSE(IsRemoved(store2));
+}
+
+TEST_F(LoadStoreEliminationTest, SameHeapValue1) {
+  InitGraph();
+  CreateTestControlFlowGraph();
+
+  HInstruction* c1 = graph_->GetIntConstant(1);
+  HInstruction* c2 = graph_->GetIntConstant(2);
+
+  // Test LSE handling same value stores on array.
+  // array[1] = 1;
+  // array[2] = 1;
+  // array[1] = 1;  <--- Can remove.
+  // array[1] = 2;  <--- Can NOT remove.
+  AddArraySet(entry_block_, array_, c1, c1);
+  AddArraySet(entry_block_, array_, c2, c1);
+  HInstruction* store1 = AddArraySet(entry_block_, array_, c1, c1);
+  HInstruction* store2 = AddArraySet(entry_block_, array_, c1, c2);
+
+  PerformLSE();
+
+  ASSERT_TRUE(IsRemoved(store1));
+  ASSERT_FALSE(IsRemoved(store2));
+}
+
+TEST_F(LoadStoreEliminationTest, SameHeapValue2) {
+  InitGraph();
+  CreateTestControlFlowGraph();
+
+  // Test LSE handling same value stores on vector.
+  // vdata = [0x1, 0x2, 0x3, 0x4, ...]
+  // VecStore array[i...] = vdata;
+  // VecStore array[j...] = vdata;  <--- MAY ALIAS.
+  // VecStore array[i...] = vdata;  <--- Cannot Remove, even if it's same value.
+  AddVecStore(entry_block_, array_, i_);
+  AddVecStore(entry_block_, array_, j_);
+  HInstruction* vstore = AddVecStore(entry_block_, array_, i_);
+
+  PerformLSE();
+
+  ASSERT_FALSE(IsRemoved(vstore));
+}
+
+TEST_F(LoadStoreEliminationTest, SameHeapValue3) {
+  InitGraph();
+  CreateTestControlFlowGraph();
+
+  // VecStore array[i...] = vdata;
+  // VecStore array[i+1...] = vdata;  <--- MAY alias due to partial overlap.
+  // VecStore array[i...] = vdata;    <--- Cannot remove, even if it's same value.
+  AddVecStore(entry_block_, array_, i_);
+  AddVecStore(entry_block_, array_, i_add1_);
+  HInstruction* vstore = AddVecStore(entry_block_, array_, i_);
+
+  PerformLSE();
+
+  ASSERT_FALSE(IsRemoved(vstore));
+}
+
+TEST_F(LoadStoreEliminationTest, OverlappingLoadStore) {
+  InitGraph();
+  CreateTestControlFlowGraph();
+
+  HInstruction* c1 = graph_->GetIntConstant(1);
+
+  // Test LSE handling array LSE when there is vector store in between.
+  // a[i] = 1;
+  // .. = a[i];                <-- Remove.
+  // a[i,i+1,i+2,i+3] = data;  <-- PARTIAL OVERLAP !
+  // .. = a[i];                <-- Cannot remove.
+  AddArraySet(entry_block_, array_, i_, c1);
+  HInstruction* load1 = AddArrayGet(entry_block_, array_, i_);
+  AddVecStore(entry_block_, array_, i_);
+  HInstruction* load2 = AddArrayGet(entry_block_, array_, i_);
+
+  // Test LSE handling vector load/store partial overlap.
+  // a[i,i+1,i+2,i+3] = data;
+  // a[i+4,i+5,i+6,i+7] = data;
+  // .. = a[i,i+1,i+2,i+3];
+  // .. = a[i+4,i+5,i+6,i+7];
+  // a[i+1,i+2,i+3,i+4] = data;  <-- PARTIAL OVERLAP !
+  // .. = a[i,i+1,i+2,i+3];
+  // .. = a[i+4,i+5,i+6,i+7];
+  AddVecStore(entry_block_, array_, i_);
+  AddVecStore(entry_block_, array_, i_add4_);
+  HInstruction* vload1 = AddVecLoad(entry_block_, array_, i_);
+  HInstruction* vload2 = AddVecLoad(entry_block_, array_, i_add4_);
+  AddVecStore(entry_block_, array_, i_add1_);
+  HInstruction* vload3 = AddVecLoad(entry_block_, array_, i_);
+  HInstruction* vload4 = AddVecLoad(entry_block_, array_, i_add4_);
+
+  // Test LSE handling vector LSE when there is array store in between.
+  // a[i,i+1,i+2,i+3] = data;
+  // a[i+1] = 1;                 <-- PARTIAL OVERLAP !
+  // .. = a[i,i+1,i+2,i+3];
+  AddVecStore(entry_block_, array_, i_);
+  AddArraySet(entry_block_, array_, i_, c1);
+  HInstruction* vload5 = AddVecLoad(entry_block_, array_, i_);
+
+  PerformLSE();
+
+  ASSERT_TRUE(IsRemoved(load1));
+  ASSERT_FALSE(IsRemoved(load2));
+
+  ASSERT_TRUE(IsRemoved(vload1));
+  ASSERT_TRUE(IsRemoved(vload2));
+  ASSERT_FALSE(IsRemoved(vload3));
+  ASSERT_FALSE(IsRemoved(vload4));
+
+  ASSERT_FALSE(IsRemoved(vload5));
+}
+// function (int[] a, int j) {
+// a[j] = 1;
+// for (int i=0; i<128; i++) {
+//    /* doesn't do any write */
+// }
+// a[j] = 1;
+TEST_F(LoadStoreEliminationTest, StoreAfterLoopWithoutSideEffects) {
+  InitGraph();
+  CreateTestControlFlowGraph();
+
+  HInstruction* c1 = graph_->GetIntConstant(1);
+
+  // a[j] = 1
+  AddArraySet(pre_header_, array_, j_, c1);
+
+  // LOOP BODY:
+  // .. = a[i,i+1,i+2,i+3];
+  AddVecLoad(loop_, array_, phi_);
+
+  // a[j] = 1;
+  HInstruction* array_set = AddArraySet(return_block_, array_, j_, c1);
+
+  PerformLSE();
+
+  ASSERT_TRUE(IsRemoved(array_set));
+}
+
+// function (int[] a, int j) {
+//   int[] b = new int[128];
+//   a[j] = 0;
+//   for (int phi=0; phi<128; phi++) {
+//     a[phi,phi+1,phi+2,phi+3] = [1,1,1,1];
+//     b[phi,phi+1,phi+2,phi+3] = a[phi,phi+1,phi+2,phi+3];
+//   }
+//   a[j] = 0;
+// }
+TEST_F(LoadStoreEliminationTest, StoreAfterSIMDLoopWithSideEffects) {
+  InitGraph();
+  CreateTestControlFlowGraph();
+
+  HInstruction* c0 = graph_->GetIntConstant(0);
+  HInstruction* c128 = graph_->GetIntConstant(128);
+
+  HInstruction* array_b = new (GetAllocator()) HNewArray(c0, c128, 0, 0);
+  pre_header_->InsertInstructionBefore(array_b, pre_header_->GetLastInstruction());
+  array_b->CopyEnvironmentFrom(suspend_check_->GetEnvironment());
+
+  // a[j] = 0;
+  AddArraySet(pre_header_, array_, j_, c0);
+
+  // LOOP BODY:
+  // a[phi,phi+1,phi+2,phi+3] = [1,1,1,1];
+  // b[phi,phi+1,phi+2,phi+3] = a[phi,phi+1,phi+2,phi+3];
+  AddVecStore(loop_, array_, phi_);
+  HInstruction* vload = AddVecLoad(loop_, array_, phi_);
+  AddVecStore(loop_, array_b, phi_, vload->AsVecLoad());
+
+  // a[j] = 0;
+  HInstruction* a_set = AddArraySet(return_block_, array_, j_, c0);
+
+  PerformLSE();
+
+  ASSERT_TRUE(IsRemoved(vload));
+  ASSERT_FALSE(IsRemoved(a_set));  // Cannot remove due to write side-effect in the loop.
+}
+
+// function (int[] a, int j) {
+//   int[] b = new int[128];
+//   a[j] = 0;
+//   for (int phi=0; phi<128; phi++) {
+//     a[phi,phi+1,phi+2,phi+3] = [1,1,1,1];
+//     b[phi,phi+1,phi+2,phi+3] = a[phi,phi+1,phi+2,phi+3];
+//   }
+//   x = a[j];
+// }
+TEST_F(LoadStoreEliminationTest, LoadAfterSIMDLoopWithSideEffects) {
+  InitGraph();
+  CreateTestControlFlowGraph();
+
+  HInstruction* c0 = graph_->GetIntConstant(0);
+  HInstruction* c128 = graph_->GetIntConstant(128);
+
+  HInstruction* array_b = new (GetAllocator()) HNewArray(c0, c128, 0, 0);
+  pre_header_->InsertInstructionBefore(array_b, pre_header_->GetLastInstruction());
+  array_b->CopyEnvironmentFrom(suspend_check_->GetEnvironment());
+
+  // a[j] = 0;
+  AddArraySet(pre_header_, array_, j_, c0);
+
+  // LOOP BODY:
+  // a[phi,phi+1,phi+2,phi+3] = [1,1,1,1];
+  // b[phi,phi+1,phi+2,phi+3] = a[phi,phi+1,phi+2,phi+3];
+  AddVecStore(loop_, array_, phi_);
+  HInstruction* vload = AddVecLoad(loop_, array_, phi_);
+  AddVecStore(loop_, array_b, phi_, vload->AsVecLoad());
+
+  // x = a[j];
+  HInstruction* load = AddArrayGet(return_block_, array_, j_);
+
+  PerformLSE();
+
+  ASSERT_TRUE(IsRemoved(vload));
+  ASSERT_FALSE(IsRemoved(load));  // Cannot remove due to write side-effect in the loop.
+}
+
+// Check that merging works correctly when there are VecStors in predecessors.
+//
+//                  vstore1: a[i,... i + 3] = [1,...1]
+//                       /          \
+//                      /            \
+// vstore2: a[i,... i + 3] = [1,...1]  vstore3: a[i+1, ... i + 4] = [1, ... 1]
+//                     \              /
+//                      \            /
+//                  vstore4: a[i,... i + 3] = [1,...1]
+//
+// Expected:
+//   'vstore2' is removed.
+//   'vstore3' is not removed.
+//   'vstore4' is not removed. Such cases are not supported at the moment.
+TEST_F(LoadStoreEliminationTest, MergePredecessorVecStores) {
+  InitGraph();
+
+  HBasicBlock* upper;
+  HBasicBlock* left;
+  HBasicBlock* right;
+  HBasicBlock* down;
+  std::tie(upper, left, right, down) = CreateDiamondShapedCFG();
+
+  // upper: a[i,... i + 3] = [1,...1]
+  HInstruction* vstore1 = AddVecStore(upper, array_, i_);
+  HInstruction* vdata = vstore1->InputAt(2);
+
+  // left: a[i,... i + 3] = [1,...1]
+  HInstruction* vstore2 = AddVecStore(left, array_, i_, vdata);
+
+  // right: a[i+1, ... i + 4] = [1, ... 1]
+  HInstruction* vstore3 = AddVecStore(right, array_, i_add1_, vdata);
+
+  // down: a[i,... i + 3] = [1,...1]
+  HInstruction* vstore4 = AddVecStore(down, array_, i_, vdata);
+
+  PerformLSE();
+
+  ASSERT_TRUE(IsRemoved(vstore2));
+  ASSERT_FALSE(IsRemoved(vstore3));
+  ASSERT_FALSE(IsRemoved(vstore4));
+}
+
+// Check that merging works correctly when there are ArraySets in predecessors.
+//
+//          a[i] = 1
+//        /          \
+//       /            \
+// store1: a[i] = 1  store2: a[i+1] = 1
+//       \            /
+//        \          /
+//          store3: a[i] = 1
+//
+// Expected:
+//   'store1' is removed.
+//   'store2' is not removed.
+//   'store3' is removed.
+TEST_F(LoadStoreEliminationTest, MergePredecessorStores) {
+  InitGraph();
+
+  HBasicBlock* upper;
+  HBasicBlock* left;
+  HBasicBlock* right;
+  HBasicBlock* down;
+  std::tie(upper, left, right, down) = CreateDiamondShapedCFG();
+
+  // upper: a[i,... i + 3] = [1,...1]
+  AddArraySet(upper, array_, i_);
+
+  // left: a[i,... i + 3] = [1,...1]
+  HInstruction* store1 = AddArraySet(left, array_, i_);
+
+  // right: a[i+1, ... i + 4] = [1, ... 1]
+  HInstruction* store2 = AddArraySet(right, array_, i_add1_);
+
+  // down: a[i,... i + 3] = [1,...1]
+  HInstruction* store3 = AddArraySet(down, array_, i_);
+
+  PerformLSE();
+
+  ASSERT_TRUE(IsRemoved(store1));
+  ASSERT_FALSE(IsRemoved(store2));
+  ASSERT_TRUE(IsRemoved(store3));
+}
+
+// Check that redundant VStore/VLoad are removed from a SIMD loop.
+//
+//  LOOP BODY
+//     vstore1: a[i,... i + 3] = [1,...1]
+//     vload:   x = a[i,... i + 3]
+//     vstore2: b[i,... i + 3] = x
+//     vstore3: a[i,... i + 3] = [1,...1]
+//
+// Expected:
+//   'vstore1' is not removed.
+//   'vload' is removed.
+//   'vstore3' is removed.
+TEST_F(LoadStoreEliminationTest, RedundantVStoreVLoadInLoop) {
+  InitGraph();
+  CreateTestControlFlowGraph();
+
+  HInstruction* c0 = graph_->GetIntConstant(0);
+  HInstruction* c128 = graph_->GetIntConstant(128);
+
+  HInstruction* array_a = new (GetAllocator()) HNewArray(c0, c128, 0, 0);
+  pre_header_->InsertInstructionBefore(array_a, pre_header_->GetLastInstruction());
+  array_a->CopyEnvironmentFrom(suspend_check_->GetEnvironment());
+
+  HInstruction* array_b = new (GetAllocator()) HNewArray(c0, c128, 0, 0);
+  pre_header_->InsertInstructionBefore(array_b, pre_header_->GetLastInstruction());
+  array_b->CopyEnvironmentFrom(suspend_check_->GetEnvironment());
+
+  // LOOP BODY:
+  //    a[i,... i + 3] = [1,...1]
+  //    x = a[i,... i + 3]
+  //    b[i,... i + 3] = x
+  //    a[i,... i + 3] = [1,...1]
+  HInstruction* vstore1 = AddVecStore(loop_, array_a, phi_);
+  HInstruction* vload = AddVecLoad(loop_, array_a, phi_);
+  AddVecStore(loop_, array_b, phi_, vload->AsVecLoad());
+  HInstruction* vstore3 = AddVecStore(loop_, array_a, phi_, vstore1->InputAt(2));
+
+  PerformLSE();
+
+  ASSERT_FALSE(IsRemoved(vstore1));
+  ASSERT_TRUE(IsRemoved(vload));
+  ASSERT_TRUE(IsRemoved(vstore3));
+}
+
+// Loop write side effects invalidate all stores.
+// This causes stores after such loops not to be removed, even
+// their values are known.
+TEST_F(LoadStoreEliminationTest, StoreAfterLoopWithSideEffects) {
+  InitGraph();
+  CreateTestControlFlowGraph();
+
+  HInstruction* c0 = graph_->GetIntConstant(0);
+  HInstruction* c2 = graph_->GetIntConstant(2);
+  HInstruction* c128 = graph_->GetIntConstant(128);
+
+  // array[0] = 2;
+  // loop:
+  //   b[i] = array[i]
+  // array[0] = 2
+  AddArraySet(entry_block_, array_, c0, c2);
+
+  HInstruction* array_b = new (GetAllocator()) HNewArray(c0, c128, 0, 0);
+  pre_header_->InsertInstructionBefore(array_b, pre_header_->GetLastInstruction());
+  array_b->CopyEnvironmentFrom(suspend_check_->GetEnvironment());
+
+  HInstruction* load = AddArrayGet(loop_, array_, phi_);
+  AddArraySet(loop_, array_b, phi_, load);
+
+  HInstruction* store = AddArraySet(return_block_, array_, c0, c2);
+
+  PerformLSE();
+
+  ASSERT_FALSE(IsRemoved(store));
+}
+
+// As it is not allowed to use defaults for VecLoads, check if there is a new created array
+// a VecLoad used in a loop and after it is not replaced with a default.
+TEST_F(LoadStoreEliminationTest, VLoadDefaultValueInLoopWithoutWriteSideEffects) {
+  InitGraph();
+  CreateTestControlFlowGraph();
+
+  HInstruction* c0 = graph_->GetIntConstant(0);
+  HInstruction* c128 = graph_->GetIntConstant(128);
+
+  HInstruction* array_a = new (GetAllocator()) HNewArray(c0, c128, 0, 0);
+  pre_header_->InsertInstructionBefore(array_a, pre_header_->GetLastInstruction());
+  array_a->CopyEnvironmentFrom(suspend_check_->GetEnvironment());
+
+  // LOOP BODY:
+  //    v = a[i,... i + 3]
+  // array[0,... 3] = v
+  HInstruction* vload = AddVecLoad(loop_, array_a, phi_);
+  HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad());
+
+  PerformLSE();
+
+  ASSERT_FALSE(IsRemoved(vload));
+  ASSERT_FALSE(IsRemoved(vstore));
+}
+
+// As it is not allowed to use defaults for VecLoads, check if there is a new created array
+// a VecLoad is not replaced with a default.
+TEST_F(LoadStoreEliminationTest, VLoadDefaultValue) {
+  InitGraph();
+  CreateTestControlFlowGraph();
+
+  HInstruction* c0 = graph_->GetIntConstant(0);
+  HInstruction* c128 = graph_->GetIntConstant(128);
+
+  HInstruction* array_a = new (GetAllocator()) HNewArray(c0, c128, 0, 0);
+  pre_header_->InsertInstructionBefore(array_a, pre_header_->GetLastInstruction());
+  array_a->CopyEnvironmentFrom(suspend_check_->GetEnvironment());
+
+  // v = a[0,... 3]
+  // array[0,... 3] = v
+  HInstruction* vload = AddVecLoad(pre_header_, array_a, c0);
+  HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad());
+
+  PerformLSE();
+
+  ASSERT_FALSE(IsRemoved(vload));
+  ASSERT_FALSE(IsRemoved(vstore));
+}
+
+// As it is allowed to use defaults for ordinary loads, check if there is a new created array
+// a load used in a loop and after it is replaced with a default.
+TEST_F(LoadStoreEliminationTest, LoadDefaultValueInLoopWithoutWriteSideEffects) {
+  InitGraph();
+  CreateTestControlFlowGraph();
+
+  HInstruction* c0 = graph_->GetIntConstant(0);
+  HInstruction* c128 = graph_->GetIntConstant(128);
+
+  HInstruction* array_a = new (GetAllocator()) HNewArray(c0, c128, 0, 0);
+  pre_header_->InsertInstructionBefore(array_a, pre_header_->GetLastInstruction());
+  array_a->CopyEnvironmentFrom(suspend_check_->GetEnvironment());
+
+  // LOOP BODY:
+  //    v = a[i]
+  // array[0] = v
+  HInstruction* load = AddArrayGet(loop_, array_a, phi_);
+  HInstruction* store = AddArraySet(return_block_, array_, c0, load);
+
+  PerformLSE();
+
+  ASSERT_TRUE(IsRemoved(load));
+  ASSERT_FALSE(IsRemoved(store));
+}
+
+// As it is allowed to use defaults for ordinary loads, check if there is a new created array
+// a load is replaced with a default.
+TEST_F(LoadStoreEliminationTest, LoadDefaultValue) {
+  InitGraph();
+  CreateTestControlFlowGraph();
+
+  HInstruction* c0 = graph_->GetIntConstant(0);
+  HInstruction* c128 = graph_->GetIntConstant(128);
+
+  HInstruction* array_a = new (GetAllocator()) HNewArray(c0, c128, 0, 0);
+  pre_header_->InsertInstructionBefore(array_a, pre_header_->GetLastInstruction());
+  array_a->CopyEnvironmentFrom(suspend_check_->GetEnvironment());
+
+  // v = a[0]
+  // array[0] = v
+  HInstruction* load = AddArrayGet(pre_header_, array_a, c0);
+  HInstruction* store = AddArraySet(return_block_, array_, c0, load);
+
+  PerformLSE();
+
+  ASSERT_TRUE(IsRemoved(load));
+  ASSERT_FALSE(IsRemoved(store));
+}
+
+// As it is not allowed to use defaults for VecLoads but allowed for regular loads,
+// check if there is a new created array, a VecLoad and a load used in a loop and after it,
+// VecLoad is not replaced with a default but the load is.
+TEST_F(LoadStoreEliminationTest, VLoadAndLoadDefaultValueInLoopWithoutWriteSideEffects) {
+  InitGraph();
+  CreateTestControlFlowGraph();
+
+  HInstruction* c0 = graph_->GetIntConstant(0);
+  HInstruction* c128 = graph_->GetIntConstant(128);
+
+  HInstruction* array_a = new (GetAllocator()) HNewArray(c0, c128, 0, 0);
+  pre_header_->InsertInstructionBefore(array_a, pre_header_->GetLastInstruction());
+  array_a->CopyEnvironmentFrom(suspend_check_->GetEnvironment());
+
+  // LOOP BODY:
+  //    v = a[i,... i + 3]
+  //    v1 = a[i]
+  // array[0,... 3] = v
+  // array[0] = v1
+  HInstruction* vload = AddVecLoad(loop_, array_a, phi_);
+  HInstruction* load = AddArrayGet(loop_, array_a, phi_);
+  HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad());
+  HInstruction* store = AddArraySet(return_block_, array_, c0, load);
+
+  PerformLSE();
+
+  ASSERT_FALSE(IsRemoved(vload));
+  ASSERT_TRUE(IsRemoved(load));
+  ASSERT_FALSE(IsRemoved(vstore));
+  ASSERT_FALSE(IsRemoved(store));
+}
+
+// As it is not allowed to use defaults for VecLoads but allowed for regular loads,
+// check if there is a new created array, a VecLoad and a load,
+// VecLoad is not replaced with a default but the load is.
+TEST_F(LoadStoreEliminationTest, VLoadAndLoadDefaultValue) {
+  InitGraph();
+  CreateTestControlFlowGraph();
+
+  HInstruction* c0 = graph_->GetIntConstant(0);
+  HInstruction* c128 = graph_->GetIntConstant(128);
+
+  HInstruction* array_a = new (GetAllocator()) HNewArray(c0, c128, 0, 0);
+  pre_header_->InsertInstructionBefore(array_a, pre_header_->GetLastInstruction());
+  array_a->CopyEnvironmentFrom(suspend_check_->GetEnvironment());
+
+  // v = a[0,... 3]
+  // v1 = a[0]
+  // array[0,... 3] = v
+  // array[0] = v1
+  HInstruction* vload = AddVecLoad(pre_header_, array_a, c0);
+  HInstruction* load = AddArrayGet(pre_header_, array_a, c0);
+  HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad());
+  HInstruction* store = AddArraySet(return_block_, array_, c0, load);
+
+  PerformLSE();
+
+  ASSERT_FALSE(IsRemoved(vload));
+  ASSERT_TRUE(IsRemoved(load));
+  ASSERT_FALSE(IsRemoved(vstore));
+  ASSERT_FALSE(IsRemoved(store));
+}
+
+// It is not allowed to use defaults for VecLoads. However it should not prevent from removing
+// loads getting the same value.
+// Check a load getting a known value is eliminated (a loop test case).
+TEST_F(LoadStoreEliminationTest, VLoadDefaultValueAndVLoadInLoopWithoutWriteSideEffects) {
+  InitGraph();
+  CreateTestControlFlowGraph();
+
+  HInstruction* c0 = graph_->GetIntConstant(0);
+  HInstruction* c128 = graph_->GetIntConstant(128);
+
+  HInstruction* array_a = new (GetAllocator()) HNewArray(c0, c128, 0, 0);
+  pre_header_->InsertInstructionBefore(array_a, pre_header_->GetLastInstruction());
+  array_a->CopyEnvironmentFrom(suspend_check_->GetEnvironment());
+
+  // LOOP BODY:
+  //    v = a[i,... i + 3]
+  //    v1 = a[i,... i + 3]
+  // array[0,... 3] = v
+  // array[128,... 131] = v1
+  HInstruction* vload1 = AddVecLoad(loop_, array_a, phi_);
+  HInstruction* vload2 = AddVecLoad(loop_, array_a, phi_);
+  HInstruction* vstore1 = AddVecStore(return_block_, array_, c0, vload1->AsVecLoad());
+  HInstruction* vstore2 = AddVecStore(return_block_, array_, c128, vload2->AsVecLoad());
+
+  PerformLSE();
+
+  ASSERT_FALSE(IsRemoved(vload1));
+  ASSERT_TRUE(IsRemoved(vload2));
+  ASSERT_FALSE(IsRemoved(vstore1));
+  ASSERT_FALSE(IsRemoved(vstore2));
+}
+
+// It is not allowed to use defaults for VecLoads. However it should not prevent from removing
+// loads getting the same value.
+// Check a load getting a known value is eliminated.
+TEST_F(LoadStoreEliminationTest, VLoadDefaultValueAndVLoad) {
+  InitGraph();
+  CreateTestControlFlowGraph();
+
+  HInstruction* c0 = graph_->GetIntConstant(0);
+  HInstruction* c128 = graph_->GetIntConstant(128);
+
+  HInstruction* array_a = new (GetAllocator()) HNewArray(c0, c128, 0, 0);
+  pre_header_->InsertInstructionBefore(array_a, pre_header_->GetLastInstruction());
+  array_a->CopyEnvironmentFrom(suspend_check_->GetEnvironment());
+
+  // v = a[0,... 3]
+  // v1 = a[0,... 3]
+  // array[0,... 3] = v
+  // array[128,... 131] = v1
+  HInstruction* vload1 = AddVecLoad(pre_header_, array_a, c0);
+  HInstruction* vload2 = AddVecLoad(pre_header_, array_a, c0);
+  HInstruction* vstore1 = AddVecStore(return_block_, array_, c0, vload1->AsVecLoad());
+  HInstruction* vstore2 = AddVecStore(return_block_, array_, c128, vload2->AsVecLoad());
+
+  PerformLSE();
+
+  ASSERT_FALSE(IsRemoved(vload1));
+  ASSERT_TRUE(IsRemoved(vload2));
+  ASSERT_FALSE(IsRemoved(vstore1));
+  ASSERT_FALSE(IsRemoved(vstore2));
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index efe4d6b..e817048 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -1155,6 +1155,8 @@
   // A store needs to stay in place.
   bool CanBeMoved() const override { return false; }
 
+  HInstruction* GetValue() const { return InputAt(2); }
+
   DECLARE_INSTRUCTION(VecStore);
 
  protected:
diff --git a/test/530-checker-lse-simd/expected.txt b/test/530-checker-lse-simd/expected.txt
new file mode 100644
index 0000000..03bb57b
--- /dev/null
+++ b/test/530-checker-lse-simd/expected.txt
@@ -0,0 +1,7 @@
+1
+3
+1
+2
+2
+1
+1
diff --git a/test/530-checker-lse-simd/info.txt b/test/530-checker-lse-simd/info.txt
new file mode 100644
index 0000000..61b4741
--- /dev/null
+++ b/test/530-checker-lse-simd/info.txt
@@ -0,0 +1 @@
+Checker test for testing load-store elimination in presence of VecLoad and VecStore.
diff --git a/test/530-checker-lse-simd/src/Main.java b/test/530-checker-lse-simd/src/Main.java
new file mode 100644
index 0000000..12dd297
--- /dev/null
+++ b/test/530-checker-lse-simd/src/Main.java
@@ -0,0 +1,267 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  // Based on Linpack.matgen
+  // Load-store elimination did not work when a function had SIMD code.
+  // In the test below loop B is vectorized.
+  // Check that a redundant ArrayGet is eliminated in loop A.
+
+  /// CHECK-START: double Main.$noinline$vecgen(double[], double[], int) load_store_elimination (before)
+  /// CHECK:      Rem
+  /// CHECK-NEXT: TypeConversion
+  /// CHECK-NEXT: Sub
+  /// CHECK-NEXT: Mul
+  /// CHECK-NEXT: ArraySet
+  /// CHECK-NEXT: ArrayGet
+  /// CHECK-NEXT: LessThanOrEqual
+  /// CHECK-NEXT: Select
+  /// CHECK-NEXT: Add
+  /// CHECK-NEXT: Goto loop:{{B\d+}}
+
+  /// CHECK-START: double Main.$noinline$vecgen(double[], double[], int) load_store_elimination (after)
+  /// CHECK:      Rem
+  /// CHECK-NEXT: TypeConversion
+  /// CHECK-NEXT: Sub
+  /// CHECK-NEXT: Mul
+  /// CHECK-NEXT: ArraySet
+  /// CHECK-NEXT: LessThanOrEqual
+  /// CHECK-NEXT: Select
+  /// CHECK-NEXT: Add
+  /// CHECK-NEXT: Goto loop:{{B\d+}}
+  static double $noinline$vecgen(double a[], double b[], int n) {
+    double norma = 0.0;
+    int init = 1325;
+    // Loop A
+    for (int i = 0; i < n; ++i) {
+      init = 3125*init % 65536;
+      a[i] = (init - 32768.0)/16384.0;
+      norma = (a[i] > norma) ? a[i] : norma; // ArrayGet should be removed by LSE.
+    }
+
+    // Loop B
+    for (int i = 0; i < n; ++i) {
+      b[i] += a[i];
+    }
+
+    return norma;
+  }
+
+
+  static void test01() {
+    double a[] = new double[1024];
+    double norma = $noinline$vecgen(a, a, a.length);
+    System.out.println((int)norma);
+    System.out.println((int)a[1023]);
+  }
+
+  // Check LSE works when a function has SIMD code.
+  //
+  /// CHECK-START: double Main.$noinline$test02(double[], int) load_store_elimination (before)
+  /// CHECK:      BoundsCheck loop:none
+  /// CHECK-NEXT: ArrayGet
+  /// CHECK-NEXT: Mul
+  /// CHECK-NEXT: ArraySet
+  /// CHECK-NEXT: ArrayGet
+  /// CHECK-NEXT: ArrayLength
+  /// CHECK-NEXT: BelowOrEqual
+  //
+  /// CHECK:      ArrayGet loop:none
+  /// CHECK-NEXT: Return
+
+  /// CHECK-START: double Main.$noinline$test02(double[], int) load_store_elimination (after)
+  /// CHECK:      BoundsCheck loop:none
+  /// CHECK-NEXT: ArrayGet
+  /// CHECK-NEXT: Mul
+  /// CHECK-NEXT: ArraySet
+  /// CHECK-NEXT: ArrayLength
+  /// CHECK-NEXT: BelowOrEqual
+  //
+  /// CHECK:      ArrayGet loop:none
+  /// CHECK-NEXT: Return
+  static double $noinline$test02(double a[], int n) {
+    double b[] = new double[n];
+    a[0] = a[0] / 2;
+
+    double norma = a[0]; // ArrayGet should be removed by LSE.
+
+    // The following loop is vectorized.
+    for (int i = 0; i < 128; ++i) {
+      b[i] += a[i];
+    }
+
+    norma = a[0];
+    return norma;
+  }
+
+  static void test02() {
+    double a[] = new double[128];
+    java.util.Arrays.fill(a, 2.0);
+    double norma = $noinline$test02(a, a.length);
+    System.out.println((int)norma);
+  }
+
+  // Check LSE works when a function has SIMD code.
+  //
+  /// CHECK-START: double Main.$noinline$test03(int) load_store_elimination (before)
+  /// CHECK:      ArrayGet loop:none
+  /// CHECK-NEXT: Return
+
+  /// CHECK-START: double Main.$noinline$test03(int) load_store_elimination (after)
+  /// CHECK-NOT:  ArrayGet loop:none
+  static double $noinline$test03(int n) {
+    double a[] = new double[n];
+    double b[] = new double[n];
+
+    a[0] = 2.0;
+
+    // The following loop is vectorized.
+    for (int i = 0; i < 128; ++i) {
+      b[i] += a[i];
+    }
+
+    a[0] = 2.0;
+    return a[0]; // ArrayGet should be removed by LSE.
+  }
+
+  static void test03() {
+    double norma = $noinline$test03(128);
+    System.out.println((int)norma);
+  }
+
+  // Check LSE eliminates VecLoad.
+  //
+  /// CHECK-START-ARM64: double[] Main.$noinline$test04(int) load_store_elimination (before)
+  /// CHECK:             VecStore
+  /// CHECK-NEXT:        VecLoad
+  /// CHECK-NEXT:        VecAdd
+  /// CHECK-NEXT:        VecStore
+  /// CHECK-NEXT:        Add
+  /// CHECK-NEXT:        Goto loop:{{B\d+}}
+
+  /// CHECK-START-ARM64: double[] Main.$noinline$test04(int) load_store_elimination (after)
+  /// CHECK:             VecStore
+  /// CHECK-NEXT:        VecAdd
+  /// CHECK-NEXT:        VecStore
+  /// CHECK-NEXT:        Add
+  /// CHECK-NEXT:        Goto loop:{{B\d+}}
+  static double[] $noinline$test04(int n) {
+    double a[] = new double[n];
+    double b[] = new double[n];
+
+    // The following loop is vectorized.
+    for (int i = 0; i < n; ++i) {
+      a[i] = 1;
+      b[i] = a[i] + a[i]; // VecLoad should be removed by LSE.
+    }
+
+    return b;
+  }
+
+  static void test04() {
+    double norma = $noinline$test04(128)[0];
+    System.out.println((int)norma);
+  }
+
+  // Check LSE eliminates VecLoad.
+  //
+  /// CHECK-START-ARM64: double[] Main.$noinline$test05(int) load_store_elimination (before)
+  /// CHECK:             VecStore
+  /// CHECK-NEXT:        VecLoad
+  /// CHECK-NEXT:        VecStore
+  /// CHECK-NEXT:        VecStore
+  /// CHECK-NEXT:        Add
+  /// CHECK-NEXT:        Goto loop:{{B\d+}}
+
+  /// CHECK-START-ARM64: double[] Main.$noinline$test05(int) load_store_elimination (after)
+  /// CHECK:             VecStore
+  /// CHECK-NEXT:        VecStore
+  /// CHECK-NEXT:        Add
+  /// CHECK-NEXT:        Goto loop:{{B\d+}}
+  static double[] $noinline$test05(int n) {
+    double a[] = new double[n];
+    double b[] = new double[n];
+
+    // The following loop is vectorized.
+    for (int i = 0; i < n; ++i) {
+      a[i] = 1;
+      b[i] = a[i];
+      a[i] = 1;
+    }
+
+    return b;
+  }
+
+  static void test05() {
+    double norma = $noinline$test05(128)[0];
+    System.out.println((int)norma);
+  }
+
+  // Check LSE eliminates VecLoad and ArrayGet in case of singletons and default values.
+  //
+  /// CHECK-START-ARM64: double[] Main.$noinline$test06(int) load_store_elimination (before)
+  /// CHECK:             BoundsCheck loop:none
+  /// CHECK-NEXT:        ArrayGet
+  /// CHECK-NEXT:        Add
+  /// CHECK-NEXT:        ArrayLength
+  //
+  /// CHECK:             VecLoad loop:{{B\d+}}
+  /// CHECK-NEXT:        VecStore
+  /// CHECK-NEXT:        VecLoad
+  /// CHECK-NEXT:        VecLoad
+  /// CHECK-NEXT:        VecAdd
+  /// CHECK-NEXT:        VecAdd
+  /// CHECK-NEXT:        VecStore
+
+  /// CHECK-START-ARM64: double[] Main.$noinline$test06(int) load_store_elimination (after)
+  /// CHECK:             BoundsCheck loop:none
+  /// CHECK-NEXT:        Add
+  /// CHECK-NEXT:        ArrayLength
+  //
+  /// CHECK:             VecLoad loop:{{B\d+}}
+  /// CHECK-NEXT:        VecStore
+  /// CHECK-NEXT:        VecAdd
+  /// CHECK-NEXT:        VecAdd
+  /// CHECK-NEXT:        VecStore
+  static double[] $noinline$test06(int n) {
+    double a[] = new double[n];
+    double b[] = new double[n];
+
+    double r = a[0] + 1.0; // ArrayGet:a[0] is eliminated and default 0.0 is used.
+    // The following loop is vectorized.
+    for (int i = 0; i < n; ++i) {
+      b[i] = a[i]; // VecLoad:a[i] is not eliminated.
+      b[i] += a[i] + r; // VecLoad:a[i] and VecLoad:b[i] are eliminated.
+    }
+
+    return b;
+  }
+
+  static void test06() {
+    double norma = $noinline$test06(128)[0];
+    System.out.println((int)norma);
+  }
+
+  public static void main(String[] args) {
+    test01();
+    test02();
+    test03();
+    test04();
+    test05();
+    test06();
+  }
+}
+