Emit bit manipulation instructions for x86 and x86_64

This patch performs instruction simplification to
generate instructions andn, blsmsk and blsr on
cpus that have avx2.

Test: test.py --host --64, test-art-host-gtest
Change-Id: Ie41a1b99ac2980f1e9f6a831a7d639bc3e248f0f
Signed-off-by: Shalini Salomi Bodapati <shalini.salomi.bodapati@intel.com>
diff --git a/compiler/Android.bp b/compiler/Android.bp
index c2f8e3c..30a65b2 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -157,6 +157,8 @@
                 "optimizing/code_generator_x86.cc",
                 "optimizing/code_generator_vector_x86.cc",
                 "optimizing/intrinsics_x86.cc",
+                "optimizing/instruction_simplifier_x86_shared.cc",
+                "optimizing/instruction_simplifier_x86.cc",
                 "optimizing/pc_relative_fixups_x86.cc",
                 "optimizing/x86_memory_gen.cc",
                 "utils/x86/assembler_x86.cc",
@@ -168,6 +170,7 @@
             srcs: [
                 "jni/quick/x86_64/calling_convention_x86_64.cc",
                 "optimizing/intrinsics_x86_64.cc",
+                "optimizing/instruction_simplifier_x86_64.cc",
                 "optimizing/code_generator_x86_64.cc",
                 "optimizing/code_generator_vector_x86_64.cc",
                 "utils/x86_64/assembler_x86_64.cc",
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 9f34a51..7dcf289 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -7424,6 +7424,61 @@
   }
 }
 
+void LocationsBuilderX86::VisitX86AndNot(HX86AndNot* instruction) {
+  DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
+  DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
+  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorX86::VisitX86AndNot(HX86AndNot* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  Location first = locations->InAt(0);
+  Location second = locations->InAt(1);
+  Location dest = locations->Out();
+  if (instruction->GetResultType() == DataType::Type::kInt32) {
+    __ andn(dest.AsRegister<Register>(),
+            first.AsRegister<Register>(),
+            second.AsRegister<Register>());
+  } else {
+    DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
+    __ andn(dest.AsRegisterPairLow<Register>(),
+            first.AsRegisterPairLow<Register>(),
+            second.AsRegisterPairLow<Register>());
+    __ andn(dest.AsRegisterPairHigh<Register>(),
+            first.AsRegisterPairHigh<Register>(),
+            second.AsRegisterPairHigh<Register>());
+  }
+}
+
+void LocationsBuilderX86::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
+  DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
+  DCHECK(instruction->GetType() == DataType::Type::kInt32) << instruction->GetType();
+  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorX86::VisitX86MaskOrResetLeastSetBit(
+    HX86MaskOrResetLeastSetBit* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  Location src = locations->InAt(0);
+  Location dest = locations->Out();
+  DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
+  switch (instruction->GetOpKind()) {
+    case HInstruction::kAnd:
+      __ blsr(dest.AsRegister<Register>(), src.AsRegister<Register>());
+      break;
+    case HInstruction::kXor:
+      __ blsmsk(dest.AsRegister<Register>(), src.AsRegister<Register>());
+      break;
+    default:
+      LOG(FATAL) << "Unreachable";
+  }
+}
+
 void LocationsBuilderX86::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
 void LocationsBuilderX86::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
 void LocationsBuilderX86::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 93b0461..1e49403 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -159,6 +159,7 @@
 
   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
   FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION)
 
 #undef DECLARE_VISIT_INSTRUCTION
 
@@ -190,6 +191,7 @@
 
   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
   FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION)
 
 #undef DECLARE_VISIT_INSTRUCTION
 
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index dac2dba..d825390 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -6734,6 +6734,48 @@
   }
 }
 
+void LocationsBuilderX86_64::VisitX86AndNot(HX86AndNot* instruction) {
+  DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
+  DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
+  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+  locations->SetInAt(0, Location::RequiresRegister());
+  // There is no immediate variant of negated bitwise and in X86.
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void LocationsBuilderX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
+  DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
+  DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
+  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorX86_64::VisitX86AndNot(HX86AndNot* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  Location first = locations->InAt(0);
+  Location second = locations->InAt(1);
+  Location dest = locations->Out();
+  __ andn(dest.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+}
+
+void InstructionCodeGeneratorX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  Location src = locations->InAt(0);
+  Location dest = locations->Out();
+  switch (instruction->GetOpKind()) {
+    case HInstruction::kAnd:
+      __ blsr(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>());
+      break;
+    case HInstruction::kXor:
+      __ blsmsk(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>());
+      break;
+    default:
+      LOG(FATAL) << "Unreachable";
+  }
+}
+
 void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
 void LocationsBuilderX86_64::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
 void LocationsBuilderX86_64::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 1e71397..72c4fd4 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -161,6 +161,7 @@
 
   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
   FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION)
 
 #undef DECLARE_VISIT_INSTRUCTION
 
@@ -192,6 +193,7 @@
 
   FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
   FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION)
 
 #undef DECLARE_VISIT_INSTRUCTION
 
diff --git a/compiler/optimizing/instruction_simplifier_x86.cc b/compiler/optimizing/instruction_simplifier_x86.cc
new file mode 100644
index 0000000..2d8f94a
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier_x86.cc
@@ -0,0 +1,88 @@
+/* Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instruction_simplifier_x86.h"
+#include "instruction_simplifier_x86_shared.h"
+#include "code_generator_x86.h"
+
+namespace art {
+
+namespace x86 {
+
+class InstructionSimplifierX86Visitor : public HGraphVisitor {
+ public:
+  InstructionSimplifierX86Visitor(HGraph* graph,
+                                  CodeGenerator* codegen,
+                                  OptimizingCompilerStats* stats)
+      : HGraphVisitor(graph),
+        codegen_(down_cast<CodeGeneratorX86*>(codegen)),
+        stats_(stats) {}
+
+  void RecordSimplification() {
+    MaybeRecordStat(stats_, MethodCompilationStat::kInstructionSimplificationsArch);
+  }
+
+  bool HasAVX2() {
+    return (codegen_->GetInstructionSetFeatures().HasAVX2());
+  }
+
+  void VisitBasicBlock(HBasicBlock* block) override {
+    for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+      HInstruction* instruction = it.Current();
+      if (instruction->IsInBlock()) {
+        instruction->Accept(this);
+      }
+    }
+  }
+
+  void VisitAnd(HAnd * instruction) override;
+  void VisitXor(HXor* instruction) override;
+
+ private:
+  CodeGeneratorX86* codegen_;
+  OptimizingCompilerStats* stats_;
+};
+
+
+void InstructionSimplifierX86Visitor::VisitAnd(HAnd* instruction) {
+  if (TryCombineAndNot(instruction)) {
+    RecordSimplification();
+  } else if (instruction->GetResultType() == DataType::Type::kInt32) {
+    if (TryGenerateResetLeastSetBit(instruction)) {
+      RecordSimplification();
+    }
+  }
+}
+
+void InstructionSimplifierX86Visitor::VisitXor(HXor* instruction) {
+  if (instruction->GetResultType() == DataType::Type::kInt32) {
+    if (TryGenerateMaskUptoLeastSetBit(instruction)) {
+      RecordSimplification();
+    }
+  }
+}
+
+bool InstructionSimplifierX86::Run() {
+  InstructionSimplifierX86Visitor visitor(graph_, codegen_, stats_);
+  if (visitor.HasAVX2()) {
+    visitor.VisitReversePostOrder();
+    return true;
+  }
+  return false;
+}
+
+}  // namespace x86
+}  // namespace art
+
diff --git a/compiler/optimizing/instruction_simplifier_x86.h b/compiler/optimizing/instruction_simplifier_x86.h
new file mode 100644
index 0000000..6f10006
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier_x86.h
@@ -0,0 +1,44 @@
+/*Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_H_
+#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_H_
+
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+
+class CodeGenerator;
+namespace x86 {
+
+class InstructionSimplifierX86 : public HOptimization {
+ public:
+  InstructionSimplifierX86(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats)
+      : HOptimization(graph, kInstructionSimplifierX86PassName, stats),
+        codegen_(codegen) {}
+
+  static constexpr const char* kInstructionSimplifierX86PassName = "instruction_simplifier_x86";
+
+  bool Run() override;
+
+ private:
+  CodeGenerator* codegen_;
+};
+
+}  // namespace x86
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_H_
diff --git a/compiler/optimizing/instruction_simplifier_x86_64.cc b/compiler/optimizing/instruction_simplifier_x86_64.cc
new file mode 100644
index 0000000..56c6b41
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier_x86_64.cc
@@ -0,0 +1,82 @@
+/* Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instruction_simplifier_x86_64.h"
+#include "instruction_simplifier_x86_shared.h"
+#include "code_generator_x86_64.h"
+
+namespace art {
+
+namespace x86_64 {
+
+class InstructionSimplifierX86_64Visitor : public HGraphVisitor {
+ public:
+  InstructionSimplifierX86_64Visitor(HGraph* graph,
+                                     CodeGenerator* codegen,
+                                     OptimizingCompilerStats* stats)
+      : HGraphVisitor(graph),
+        codegen_(down_cast<CodeGeneratorX86_64*>(codegen)),
+        stats_(stats) {}
+
+  void RecordSimplification() {
+    MaybeRecordStat(stats_, MethodCompilationStat::kInstructionSimplificationsArch);
+  }
+
+  bool HasAVX2() {
+    return codegen_->GetInstructionSetFeatures().HasAVX2();
+  }
+
+  void VisitBasicBlock(HBasicBlock* block) override {
+    for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+      HInstruction* instruction = it.Current();
+      if (instruction->IsInBlock()) {
+        instruction->Accept(this);
+      }
+    }
+  }
+
+  void VisitAnd(HAnd* instruction) override;
+  void VisitXor(HXor* instruction) override;
+
+ private:
+  CodeGeneratorX86_64* codegen_;
+  OptimizingCompilerStats* stats_;
+};
+
+void InstructionSimplifierX86_64Visitor::VisitAnd(HAnd* instruction) {
+  if (TryCombineAndNot(instruction)) {
+    RecordSimplification();
+  } else if (TryGenerateResetLeastSetBit(instruction)) {
+    RecordSimplification();
+  }
+}
+
+
+void InstructionSimplifierX86_64Visitor::VisitXor(HXor* instruction) {
+  if (TryGenerateMaskUptoLeastSetBit(instruction)) {
+    RecordSimplification();
+  }
+}
+
+bool InstructionSimplifierX86_64::Run() {
+  InstructionSimplifierX86_64Visitor visitor(graph_, codegen_, stats_);
+  if (visitor.HasAVX2()) {
+    visitor.VisitReversePostOrder();
+    return true;
+  }
+  return false;
+}
+}  // namespace x86_64
+}  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_x86_64.h b/compiler/optimizing/instruction_simplifier_x86_64.h
new file mode 100644
index 0000000..6cae24d
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier_x86_64.h
@@ -0,0 +1,48 @@
+/* Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_64_H_
+#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_64_H_
+
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+
+class CodeGenerator;
+
+namespace x86_64 {
+
+class InstructionSimplifierX86_64 : public HOptimization {
+ public:
+  InstructionSimplifierX86_64(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats)
+      : HOptimization(graph, kInstructionSimplifierX86_64PassName, stats),
+        codegen_(codegen) {}
+
+  static constexpr const char* kInstructionSimplifierX86_64PassName =
+      "instruction_simplifier_x86_64";
+
+  bool Run() override;
+
+ private:
+  CodeGenerator* codegen_;
+};
+
+}  // namespace x86_64
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_64_H_
+
+
diff --git a/compiler/optimizing/instruction_simplifier_x86_shared.cc b/compiler/optimizing/instruction_simplifier_x86_shared.cc
new file mode 100644
index 0000000..2805abb
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier_x86_shared.cc
@@ -0,0 +1,137 @@
+/* Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instruction_simplifier_x86_shared.h"
+#include "nodes_x86.h"
+
+namespace art {
+
+bool TryCombineAndNot(HAnd* instruction) {
+  DataType::Type type = instruction->GetType();
+  if (!DataType::IsIntOrLongType(type)) {
+    return false;
+  }
+  // Replace code looking like
+  //    Not tmp, y
+  //    And dst, x, tmp
+  //  with
+  //    AndNot dst, x, y
+  HInstruction* left = instruction->GetLeft();
+  HInstruction* right = instruction->GetRight();
+  // Perform simplication only when either left or right
+  // is Not. When both are Not, instruction should be simplified with
+  // DeMorgan's Laws.
+  if (left->IsNot() ^ right->IsNot()) {
+    bool left_is_not = left->IsNot();
+    HInstruction* other_ins = (left_is_not ? right : left);
+    HNot* not_ins = (left_is_not ? left : right)->AsNot();
+    // Only do the simplification if instruction has only one use
+    // and thus can be safely removed.
+    if (not_ins->HasOnlyOneNonEnvironmentUse()) {
+      ArenaAllocator* arena = instruction->GetBlock()->GetGraph()->GetAllocator();
+      HX86AndNot* and_not = new (arena) HX86AndNot(type,
+                                                   not_ins->GetInput(),
+                                                   other_ins,
+                                                   instruction->GetDexPc());
+      instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, and_not);
+      DCHECK(!not_ins->HasUses());
+      not_ins->GetBlock()->RemoveInstruction(not_ins);
+      return true;
+    }
+  }
+  return false;
+}
+
+bool TryGenerateResetLeastSetBit(HAnd* instruction) {
+  DataType::Type type = instruction->GetType();
+  if (!DataType::IsIntOrLongType(type)) {
+    return false;
+  }
+  // Replace code looking like
+  //    Add tmp, x, -1 or Sub tmp, x, 1
+  //    And dest x, tmp
+  //  with
+  //    MaskOrResetLeastSetBit dest, x
+  HInstruction* candidate = nullptr;
+  HInstruction* other = nullptr;
+  HInstruction* left = instruction->GetLeft();
+  HInstruction* right = instruction->GetRight();
+  if (AreLeastSetBitInputs(left, right)) {
+    candidate = left;
+    other = right;
+  } else if (AreLeastSetBitInputs(right, left)) {
+    candidate = right;
+    other = left;
+  }
+  if (candidate != nullptr && candidate->HasOnlyOneNonEnvironmentUse()) {
+    ArenaAllocator* arena = instruction->GetBlock()->GetGraph()->GetAllocator();
+    HX86MaskOrResetLeastSetBit* lsb = new (arena) HX86MaskOrResetLeastSetBit(
+        type, HInstruction::kAnd, other, instruction->GetDexPc());
+    instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, lsb);
+    DCHECK(!candidate->HasUses());
+    candidate->GetBlock()->RemoveInstruction(candidate);
+    return true;
+  }
+  return false;
+}
+
+bool TryGenerateMaskUptoLeastSetBit(HXor* instruction) {
+  DataType::Type type = instruction->GetType();
+  if (!DataType::IsIntOrLongType(type)) {
+    return false;
+  }
+  // Replace code looking like
+  //    Add tmp, x, -1 or Sub tmp, x, 1
+  //    Xor dest x, tmp
+  //  with
+  //    MaskOrResetLeastSetBit dest, x
+  HInstruction* left = instruction->GetLeft();
+  HInstruction* right = instruction->GetRight();
+  HInstruction* other = nullptr;
+  HInstruction* candidate = nullptr;
+  if (AreLeastSetBitInputs(left, right)) {
+    candidate = left;
+    other = right;
+  } else if (AreLeastSetBitInputs(right, left)) {
+    candidate = right;
+    other = left;
+  }
+  if (candidate != nullptr && candidate->HasOnlyOneNonEnvironmentUse()) {
+    ArenaAllocator* arena = instruction->GetBlock()->GetGraph()->GetAllocator();
+    HX86MaskOrResetLeastSetBit* lsb = new (arena) HX86MaskOrResetLeastSetBit(
+        type, HInstruction::kXor, other, instruction->GetDexPc());
+    instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, lsb);
+    DCHECK(!candidate->HasUses());
+    candidate->GetBlock()->RemoveInstruction(candidate);
+    return true;
+  }
+  return false;
+}
+
+bool AreLeastSetBitInputs(HInstruction* to_test, HInstruction* other) {
+  if (to_test->IsAdd()) {
+    HAdd* add = to_test->AsAdd();
+    HConstant* cst = add->GetConstantRight();
+    return cst != nullptr && cst->IsMinusOne() && other == add->GetLeastConstantLeft();
+  }
+  if (to_test->IsSub()) {
+    HSub* sub = to_test->AsSub();
+    HConstant* cst = sub->GetConstantRight();
+    return cst != nullptr && cst->IsOne() && other == sub->GetLeastConstantLeft();
+  }
+  return false;
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_x86_shared.h b/compiler/optimizing/instruction_simplifier_x86_shared.h
new file mode 100644
index 0000000..7f94d7e
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier_x86_shared.h
@@ -0,0 +1,29 @@
+/* Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_SHARED_H_
+#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_SHARED_H_
+
+#include "nodes.h"
+
+namespace art {
+bool TryCombineAndNot(HAnd* instruction);
+bool TryGenerateResetLeastSetBit(HAnd* instruction);
+bool TryGenerateMaskUptoLeastSetBit(HXor* instruction);
+bool AreLeastSetBitInputs(HInstruction* to_test, HInstruction* other);
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_SHARED_H_
+
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 2124380..6108522 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1497,6 +1497,14 @@
   M(X86PackedSwitch, Instruction)
 #endif
 
+#if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64)
+#define FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(M)                     \
+  M(X86AndNot, Instruction)                                                \
+  M(X86MaskOrResetLeastSetBit, Instruction)
+#else
+#define FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(M)
+#endif
+
 #define FOR_EACH_CONCRETE_INSTRUCTION_X86_64(M)
 
 #define FOR_EACH_CONCRETE_INSTRUCTION(M)                                \
@@ -1507,7 +1515,8 @@
   FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M)                                 \
   FOR_EACH_CONCRETE_INSTRUCTION_MIPS64(M)                               \
   FOR_EACH_CONCRETE_INSTRUCTION_X86(M)                                  \
-  FOR_EACH_CONCRETE_INSTRUCTION_X86_64(M)
+  FOR_EACH_CONCRETE_INSTRUCTION_X86_64(M)                               \
+  FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(M)
 
 #define FOR_EACH_ABSTRACT_INSTRUCTION(M)                                \
   M(Condition, BinaryOperation)                                         \
@@ -7766,7 +7775,7 @@
 #ifdef ART_ENABLE_CODEGEN_mips
 #include "nodes_mips.h"
 #endif
-#ifdef ART_ENABLE_CODEGEN_x86
+#if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64)
 #include "nodes_x86.h"
 #endif
 
diff --git a/compiler/optimizing/nodes_x86.h b/compiler/optimizing/nodes_x86.h
index a551104..8e8fbc1 100644
--- a/compiler/optimizing/nodes_x86.h
+++ b/compiler/optimizing/nodes_x86.h
@@ -128,6 +128,92 @@
   const int32_t num_entries_;
 };
 
+class HX86AndNot final : public HBinaryOperation {
+ public:
+  HX86AndNot(DataType::Type result_type,
+       HInstruction* left,
+       HInstruction* right,
+       uint32_t dex_pc = kNoDexPc)
+      : HBinaryOperation(kX86AndNot, result_type, left, right, SideEffects::None(), dex_pc) {
+  }
+
+  bool IsCommutative() const override { return false; }
+
+  template <typename T> static T Compute(T x, T y) { return ~x & y; }
+
+  HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override {
+    return GetBlock()->GetGraph()->GetIntConstant(
+        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override {
+    return GetBlock()->GetGraph()->GetLongConstant(
+        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+                      HFloatConstant* y ATTRIBUTE_UNUSED) const override {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+                      HDoubleConstant* y ATTRIBUTE_UNUSED) const override {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
+  }
+
+  DECLARE_INSTRUCTION(X86AndNot);
+
+ protected:
+  DEFAULT_COPY_CONSTRUCTOR(X86AndNot);
+};
+
+class HX86MaskOrResetLeastSetBit final : public HUnaryOperation {
+ public:
+  HX86MaskOrResetLeastSetBit(DataType::Type result_type, InstructionKind op,
+                             HInstruction* input, uint32_t dex_pc = kNoDexPc)
+      : HUnaryOperation(kX86MaskOrResetLeastSetBit, result_type, input, dex_pc),
+        op_kind_(op) {
+    DCHECK_EQ(result_type, DataType::Kind(input->GetType()));
+    DCHECK(op == HInstruction::kAnd || op == HInstruction::kXor) << op;
+  }
+  template <typename T>
+  auto Compute(T x) const -> decltype(x & (x-1)) {
+    static_assert(std::is_same<decltype(x & (x-1)), decltype(x ^(x-1))>::value,
+                  "Inconsistent  bitwise types");
+    switch (op_kind_) {
+      case HInstruction::kAnd:
+        return x & (x-1);
+      case HInstruction::kXor:
+        return x ^ (x-1);
+      default:
+        LOG(FATAL) << "Unreachable";
+        UNREACHABLE();
+    }
+  }
+
+  HConstant* Evaluate(HIntConstant* x) const override {
+    return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HLongConstant* x) const override {
+    return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED) const override {
+    LOG(FATAL) << DebugName() << "is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED) const override {
+    LOG(FATAL) << DebugName() << "is not defined for double values";
+    UNREACHABLE();
+  }
+  InstructionKind GetOpKind() const { return op_kind_; }
+
+  DECLARE_INSTRUCTION(X86MaskOrResetLeastSetBit);
+
+ protected:
+  const InstructionKind op_kind_;
+
+  DEFAULT_COPY_CONSTRUCTOR(X86MaskOrResetLeastSetBit);
+};
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_NODES_X86_H_
diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc
index 4b0941b..0f971e1 100644
--- a/compiler/optimizing/optimization.cc
+++ b/compiler/optimizing/optimization.cc
@@ -28,10 +28,14 @@
 #endif
 #ifdef ART_ENABLE_CODEGEN_x86
 #include "pc_relative_fixups_x86.h"
+#include "instruction_simplifier_x86.h"
 #endif
 #if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64)
 #include "x86_memory_gen.h"
 #endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
+#include "instruction_simplifier_x86_64.h"
+#endif
 
 #include "bounds_check_elimination.h"
 #include "cha_guard_optimization.h"
@@ -113,6 +117,12 @@
 #ifdef ART_ENABLE_CODEGEN_x86
     case OptimizationPass::kPcRelativeFixupsX86:
       return x86::PcRelativeFixups::kPcRelativeFixupsX86PassName;
+    case OptimizationPass::kInstructionSimplifierX86:
+      return x86::InstructionSimplifierX86::kInstructionSimplifierX86PassName;
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
+    case OptimizationPass::kInstructionSimplifierX86_64:
+      return x86_64::InstructionSimplifierX86_64::kInstructionSimplifierX86_64PassName;
 #endif
 #if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64)
     case OptimizationPass::kX86MemoryOperandGeneration:
@@ -311,6 +321,14 @@
         DCHECK(alt_name == nullptr) << "arch-specific pass does not support alternative name";
         opt = new (allocator) x86::X86MemoryOperandGeneration(graph, codegen, stats);
         break;
+      case OptimizationPass::kInstructionSimplifierX86:
+       opt = new (allocator) x86::InstructionSimplifierX86(graph, codegen, stats);
+       break;
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
+      case OptimizationPass::kInstructionSimplifierX86_64:
+        opt = new (allocator) x86_64::InstructionSimplifierX86_64(graph, codegen, stats);
+        break;
 #endif
       case OptimizationPass::kNone:
         LOG(FATAL) << "kNone does not represent an actual pass";
diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h
index ced383f..490007d 100644
--- a/compiler/optimizing/optimization.h
+++ b/compiler/optimizing/optimization.h
@@ -96,6 +96,10 @@
 #endif
 #ifdef ART_ENABLE_CODEGEN_x86
   kPcRelativeFixupsX86,
+  kInstructionSimplifierX86,
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
+  kInstructionSimplifierX86_64,
 #endif
 #if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64)
   kX86MemoryOperandGeneration,
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 4f495b6..c9c1194 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -579,6 +579,7 @@
 #ifdef ART_ENABLE_CODEGEN_x86
     case InstructionSet::kX86: {
       OptimizationDef x86_optimizations[] = {
+        OptDef(OptimizationPass::kInstructionSimplifierX86),
         OptDef(OptimizationPass::kSideEffectsAnalysis),
         OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"),
         OptDef(OptimizationPass::kPcRelativeFixupsX86),
@@ -595,6 +596,7 @@
 #ifdef ART_ENABLE_CODEGEN_x86_64
     case InstructionSet::kX86_64: {
       OptimizationDef x86_64_optimizations[] = {
+        OptDef(OptimizationPass::kInstructionSimplifierX86_64),
         OptDef(OptimizationPass::kSideEffectsAnalysis),
         OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"),
         OptDef(OptimizationPass::kX86MemoryOperandGeneration)
diff --git a/test/552-checker-x86-avx2-bit-manipulation/expected.txt b/test/552-checker-x86-avx2-bit-manipulation/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/552-checker-x86-avx2-bit-manipulation/expected.txt
diff --git a/test/552-checker-x86-avx2-bit-manipulation/info.txt b/test/552-checker-x86-avx2-bit-manipulation/info.txt
new file mode 100644
index 0000000..37bc6dd
--- /dev/null
+++ b/test/552-checker-x86-avx2-bit-manipulation/info.txt
@@ -0,0 +1 @@
+Tests for generating bit manipulation instructions on x86
diff --git a/test/552-checker-x86-avx2-bit-manipulation/src/Main.java b/test/552-checker-x86-avx2-bit-manipulation/src/Main.java
new file mode 100644
index 0000000..b8138dd
--- /dev/null
+++ b/test/552-checker-x86-avx2-bit-manipulation/src/Main.java
@@ -0,0 +1,225 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertLongEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  /// CHECK-START-X86_64: long Main.and_not_64(long, long) instruction_simplifier_x86_64 (before)
+  /// CHECK-DAG:    Phi     loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:    Not     loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:    And     loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:    Not     loop:none
+  /// CHECK-DAG:    And     loop:none
+
+  // TODO:re-enable when checker supports isa features
+  // CHECK-START-X86_64: long Main.and_not_64(long, long) instruction_simplifier_x86_64 (after)
+  // CHECK-DAG:      X86AndNot loop:<<Loop:B\d+>> outer_loop:none
+  // CHECK-DAG:      X86AndNot loop:none
+
+  // TODO:re-enable when checker supports isa features
+  // CHECK-START-X86_64: long Main.and_not_64(long, long) instruction_simplifier_x86_64 (after)
+  // CHECK-NOT:      Not       loop:<<Loop>>      outer_loop:none
+  // CHECK-NOT:      And       loop:<<Loop>>      outer_loop:none
+  // CHECK-NOT:      Not       loop:none
+  // CHECK-NOT:      And       loop:none
+  public static long and_not_64( long x, long y) {
+    long j = 1;
+    long k = 2;
+    for (long i = -64 ; i < 64; i++ ) {
+      x = x & ~i;
+      y = y | i;
+    }
+    return x & ~y;
+  }
+
+  /// CHECK-START-X86_64: int Main.and_not_32(int, int) instruction_simplifier_x86_64 (before)
+  /// CHECK-DAG:    Phi     loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:    Not     loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:    And     loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:    Not     loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:    And     loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:    Not     loop:none
+  /// CHECK-DAG:    And     loop:none
+
+  // TODO:re-enable when checker supports isa features
+  // CHECK-START-X86_64: int Main.and_not_32(int, int) instruction_simplifier_x86_64 (after)
+  // CHECK-DAG:      X86AndNot loop:<<Loop:B\d+>> outer_loop:none
+  // CHECK-DAG:      X86AndNot loop:<<Loop>>      outer_loop:none
+  // CHECK-DAG:      X86AndNot loop:none
+
+  // TODO:re-enable when checker supports isa features
+  // CHECK-START-X86_64: int Main.and_not_32(int, int) instruction_simplifier_x86_64 (after)
+  // CHECK-NOT:      Not       loop:<<Loop>>      outer_loop:none
+  // CHECK-NOT:      And       loop:<<Loop>>      outer_loop:none
+  // CHECK-NOT:      Not       loop:none
+  // CHECK-NOT:      And       loop:none
+  public static int and_not_32( int x, int y) {
+    int j = 1;
+    int k = 2;
+    for (int i = -64 ; i < 64; i++ ) {
+      x = x & ~i;
+      y = y | i;
+    }
+    return x & ~y;
+  }
+
+  /// CHECK-START-X86_64: int Main.reset_lowest_set_bit_32(int) instruction_simplifier_x86_64 (before)
+  /// CHECK-DAG:    Phi     loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:    Add     loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:    And     loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:    Add     loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:    And     loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:    Add     loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:    And     loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:    Add     loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:    And     loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:    Add     loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:    And     loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:    Add     loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:    And     loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:    Add     loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:    And     loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:    Add     loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:    And     loop:<<Loop>>      outer_loop:none
+
+
+  // TODO:re-enable when checker supports isa features
+  // CHECK-START-X86_64: int Main.reset_lowest_set_bit_32(int) instruction_simplifier_x86_64 (after)
+  // CHECK-DAG:      X86MaskOrResetLeastSetBit  loop:<<Loop:B\d+>> outer_loop:none
+  // CHECK-DAG:      X86MaskOrResetLeastSetBit  loop:<<Loop>> outer_loop:none
+  // CHECK-DAG:      X86MaskOrResetLeastSetBit  loop:<<Loop>> outer_loop:none
+  // CHECK-DAG:      X86MaskOrResetLeastSetBit  loop:<<Loop>> outer_loop:none
+  // CHECK-DAG:      X86MaskOrResetLeastSetBit  loop:<<Loop>> outer_loop:none
+  // CHECK-DAG:      X86MaskOrResetLeastSetBit  loop:<<Loop>> outer_loop:none
+  // CHECK-DAG:      X86MaskOrResetLeastSetBit  loop:<<Loop>> outer_loop:none
+  // CHECK-DAG:      X86MaskOrResetLeastSetBit  loop:<<Loop>> outer_loop:none
+
+  // TODO:re-enable when checker supports isa features
+  // CHECK-START-X86_64: int Main.reset_lowest_set_bit_32(int) instruction_simplifier_x86_64 (after)
+  // CHECK-NOT:      And                        loop:<<Loop>> outer_loop:none
+  public static int reset_lowest_set_bit_32(int x) {
+    int y = x;
+    int j = 5;
+    int k = 10;
+    int l = 20;
+    for (int i = -64 ; i < 64; i++) {
+      y = i & i-1;
+      j += y;
+      j = j & j-1;
+      k +=j;
+      k = k & k-1;
+      l +=k;
+      l = l & l-1;
+    }
+    return y + j + k + l;
+  }
+
+  /// CHECK-START-X86_64: long Main.reset_lowest_set_bit_64(long) instruction_simplifier_x86_64 (before)
+  /// CHECK-DAG:    Phi     loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:    Sub     loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:    And     loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:    Sub     loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:    And     loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:    Sub     loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:    And     loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:    Sub     loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:    And     loop:<<Loop>>      outer_loop:none
+
+  // TODO:re-enable when checker supports isa features
+  // CHECK-START-X86_64: long Main.reset_lowest_set_bit_64(long) instruction_simplifier_x86_64 (after)
+  // CHECK-DAG:      X86MaskOrResetLeastSetBit  loop:<<Loop:B\d+>> outer_loop:none
+  // CHECK-DAG:      X86MaskOrResetLeastSetBit  loop:<<Loop>> outer_loop:none
+  // CHECK-DAG:      X86MaskOrResetLeastSetBit  loop:<<Loop>> outer_loop:none
+  // CHECK-DAG:      X86MaskOrResetLeastSetBit  loop:<<Loop>> outer_loop:none
+
+  // TODO:re-enable when checker supports isa features
+  // CHECK-START-X86_64: long Main.reset_lowest_set_bit_64(long) instruction_simplifier_x86_64 (after)
+  // CHECK-NOT:      And                        loop:<<Loop>> outer_loop:none
+  // CHECK-NOT:      Sub                        loop:<<Loop>> outer_loop:none
+  public static long reset_lowest_set_bit_64(long x) {
+    long y = x;
+    long j = 5;
+    long k = 10;
+    long l = 20;
+    for (long i = -64 ; i < 64; i++) {
+      y = i & i-1;
+      j += y;
+      j = j & j-1;
+      k +=j;
+      k = k & k-1;
+      l +=k;
+      l = l & l-1;
+    }
+    return y + j + k + l;
+  }
+
+  /// CHECK-START-X86_64: int Main.get_mask_lowest_set_bit_32(int) instruction_simplifier_x86_64 (before)
+  /// CHECK-DAG:    Add     loop:none
+  /// CHECK-DAG:    Xor     loop:none
+
+  // TODO:re-enable when checker supports isa features
+  // CHECK-START-X86_64: int Main.get_mask_lowest_set_bit_32(int) instruction_simplifier_x86_64 (after)
+  // CHECK-DAG:      X86MaskOrResetLeastSetBit  loop:none
+
+  // TODO:re-enable when checker supports isa features
+  // CHECK-START-X86_64: int Main.get_mask_lowest_set_bit_32(int) instruction_simplifier_x86_64 (after)
+  // CHECK-NOT:      Add    loop:none
+  // CHECK-NOT:      Xor    loop:none
+  public static int get_mask_lowest_set_bit_32(int x) {
+    return (x-1) ^ x;
+  }
+
+  /// CHECK-START-X86_64: long Main.get_mask_lowest_set_bit_64(long) instruction_simplifier_x86_64 (before)
+  /// CHECK-DAG:    Sub     loop:none
+  /// CHECK-DAG:    Xor     loop:none
+
+  // TODO:re-enable when checker supports isa features
+  // CHECK-START-X86_64: long Main.get_mask_lowest_set_bit_64(long) instruction_simplifier_x86_64 (after)
+  // CHECK-DAG:      X86MaskOrResetLeastSetBit  loop:none
+
+  // TODO:re-enable when checker supports isa features
+  // CHECK-START-X86_64: long Main.get_mask_lowest_set_bit_64(long) instruction_simplifier_x86_64 (after)
+  // CHECK-NOT:      Sub    loop:none
+  // CHECK-NOT:      Xor    loop:none
+  public static long get_mask_lowest_set_bit_64(long x) {
+    return (x-1) ^ x;
+  }
+
+  public static void main(String[] args) {
+    int x = 50;
+    int y = x/2;
+    long a = Long.MAX_VALUE;
+    long b = Long.MAX_VALUE/2;
+    assertIntEquals(0,and_not_32(x,y));
+    assertLongEquals(0L, and_not_64(a,b));
+    assertIntEquals(-20502606, reset_lowest_set_bit_32(x));
+    assertLongEquals(-20502606L, reset_lowest_set_bit_64(a));
+    assertLongEquals(-20502606L, reset_lowest_set_bit_64(b));
+    assertIntEquals(1, get_mask_lowest_set_bit_32(y));
+    assertLongEquals(1L, get_mask_lowest_set_bit_64(b));
+  }
+}