[backend] Add machine ir optimizations

Bug: 295191075
Test: mm and berberis_host_tests
Change-Id: I80a9184aedaa206bb5652cdadd17a2867a5acd25
Merged-In: I80a9184aedaa206bb5652cdadd17a2867a5acd25
diff --git a/backend/Android.bp b/backend/Android.bp
index 0fd1b08..788eede 100644
--- a/backend/Android.bp
+++ b/backend/Android.bp
@@ -183,6 +183,7 @@
         "x86_64/liveness_analyzer.cc",
         "x86_64/machine_ir_analysis.cc",
         "x86_64/machine_ir_check.cc",
+        "x86_64/machine_ir_opt.cc",
         "x86_64/rename_vregs_local.cc",
     ],
 }
@@ -230,6 +231,7 @@
         "x86_64/machine_ir_analysis_test.cc",
         "x86_64/machine_ir_check_test.cc",
         "x86_64/machine_ir_exec_test.cc",
+        "x86_64/machine_ir_opt_test.cc",
         "x86_64/machine_ir_test.cc",
         "x86_64/machine_ir_test_corpus.cc",
         "x86_64/rename_vregs_local_test.cc",
diff --git a/backend/include/berberis/backend/x86_64/machine_ir_opt.h b/backend/include/berberis/backend/x86_64/machine_ir_opt.h
new file mode 100644
index 0000000..412f2ba
--- /dev/null
+++ b/backend/include/berberis/backend/x86_64/machine_ir_opt.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef BERBERIS_BACKEND_X86_64_MACHINE_IR_OPT_H_
+#define BERBERIS_BACKEND_X86_64_MACHINE_IR_OPT_H_
+
+#include "berberis/backend/x86_64/machine_ir.h"
+
+namespace berberis::x86_64 {
+
+void RemoveDeadCode(MachineIR* machine_ir);
+void RemoveCriticalEdges(MachineIR* machine_ir);
+void RemoveRedundantPut(MachineIR* ir);
+void RemoveForwarderBlocks(MachineIR* machine_ir);
+void ReorderBasicBlocksInReversePostOrder(MachineIR* machine_ir);
+
+}  // namespace berberis::x86_64
+
+#endif  // BERBERIS_BACKEND_X86_64_MACHINE_IR_OPT_H_
diff --git a/backend/x86_64/machine_ir_opt.cc b/backend/x86_64/machine_ir_opt.cc
new file mode 100644
index 0000000..a4807c2
--- /dev/null
+++ b/backend/x86_64/machine_ir_opt.cc
@@ -0,0 +1,280 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <bitset>
+#include <iterator>
+
+#include "berberis/backend/x86_64/context_liveness_analyzer.h"
+#include "berberis/backend/x86_64/machine_ir.h"
+#include "berberis/backend/x86_64/machine_ir_analysis.h"
+#include "berberis/backend/x86_64/machine_ir_opt.h"
+#include "berberis/backend/x86_64/vreg_bit_set.h"
+#include "berberis/base/arena_vector.h"
+
+namespace berberis::x86_64 {
+
+// TODO(b/232598137): Move more code in this file into the anonymous namespace.
+namespace {
+
+// Wrapper function for VRegBitSet, to ensure safe behavior of hardware registers - in particular,
+// all hardware registers are considered to be used (and thus not dead). Hardware registers are not
+// optimized, for efficiency's sake.
+class RegUsageBitSet {
+ public:
+  RegUsageBitSet(const MachineIR* machine_ir)
+      : reg_set_(machine_ir->NumVReg(), machine_ir->arena()) {}
+
+  void Set(MachineReg reg) {
+    if (reg.IsVReg()) {
+      reg_set_.Set(reg);
+    }
+  }
+
+  void Reset(MachineReg reg) {
+    if (reg.IsVReg()) {
+      reg_set_.Reset(reg);
+    }
+  }
+
+  bool operator[](MachineReg reg) const {
+    if (reg.IsVReg()) {
+      return reg_set_[reg];
+    } else {
+      return true;
+    }
+  }
+
+  void Clear() { reg_set_.Clear(); }
+
+ private:
+  VRegBitSet reg_set_;
+};
+
+bool AreResultsUsed(const MachineInsn* insn, const RegUsageBitSet& is_reg_used) {
+  for (int i = 0; i < insn->NumRegOperands(); ++i) {
+    if (insn->RegKindAt(i).IsDef() && is_reg_used[insn->RegAt(i)]) {
+      return true;
+    }
+  }
+  return false;
+}
+
+void SetInsnResultsUnused(const MachineInsn* insn, RegUsageBitSet& is_reg_used) {
+  for (int i = 0; i < insn->NumRegOperands(); ++i) {
+    if (insn->RegKindAt(i).IsDef()) {
+      is_reg_used.Reset(insn->RegAt(i));
+    }
+  }
+}
+
+void SetInsnArgumentsUsed(const MachineInsn* insn, RegUsageBitSet& is_reg_used) {
+  for (int i = 0; i < insn->NumRegOperands(); ++i) {
+    if (insn->RegKindAt(i).IsUse()) {
+      is_reg_used.Set(insn->RegAt(i));
+    }
+  }
+}
+
+}  // namespace
+
+void RemoveDeadCode(MachineIR* machine_ir) {
+  RegUsageBitSet is_reg_used(machine_ir);
+
+  for (auto* bb : machine_ir->bb_list()) {
+    is_reg_used.Clear();
+
+    for (auto vreg : bb->live_out()) {
+      is_reg_used.Set(vreg);
+    }
+
+    // Go from end to begin removing all unused instructions.
+    for (auto insn_it = bb->insn_list().rbegin(); insn_it != bb->insn_list().rend();) {
+      MachineInsn* insn = *insn_it++;
+
+      if (!insn->has_side_effects() && !AreResultsUsed(insn, is_reg_used)) {
+        // Note non trivial way in which reverse_iterator is erased.
+        insn_it = MachineInsnList::reverse_iterator(bb->insn_list().erase(insn_it.base()));
+        SetInsnResultsUnused(insn, is_reg_used);
+        continue;
+      }
+
+      SetInsnResultsUnused(insn, is_reg_used);
+      SetInsnArgumentsUsed(insn, is_reg_used);
+    }  // For insn in bb
+  }    // For bb in IR
+}
+
+void ChangeBranchTarget(MachineBasicBlock* bb,
+                        MachineBasicBlock* old_dst,
+                        MachineBasicBlock* new_dst) {
+  CHECK_GT(bb->insn_list().size(), 0);
+  auto last_insn = bb->insn_list().back();
+
+  // The branch instruction can either be PseudoCondBranch or PseudoBranch.
+  // When removing critical edges, the branch instruction is PseudoBranch if
+  // and only if bb has an outedge to a recovery block.
+  if (last_insn->opcode() == kMachineOpPseudoBranch) {
+    auto insn = static_cast<PseudoBranch*>(last_insn);
+    CHECK_EQ(insn->then_bb(), old_dst);
+    insn->set_then_bb(new_dst);
+    return;
+  }
+
+  CHECK(last_insn->opcode() == kMachineOpPseudoCondBranch);
+  auto insn = static_cast<PseudoCondBranch*>(last_insn);
+  if (insn->then_bb() == old_dst) {
+    insn->set_then_bb(new_dst);
+  } else if (insn->else_bb() == old_dst) {
+    insn->set_else_bb(new_dst);
+  }
+}
+
+void InsertNodeOnEdge(MachineIR* ir, MachineEdge* edge, int in_edge_index) {
+  MachineBasicBlock* pred_bb = edge->src();
+  MachineBasicBlock* succ_bb = edge->dst();
+  MachineBasicBlock* new_bb = ir->NewBasicBlock();
+  ir->bb_list().push_back(new_bb);
+
+  // Create a new edge between new_bb and bb.
+  MachineEdge* new_edge = NewInArena<MachineEdge>(ir->arena(), ir->arena(), new_bb, succ_bb);
+  new_bb->out_edges().push_back(new_edge);
+  succ_bb->in_edges()[in_edge_index] = new_edge;
+
+  // Reuse edge but change dst to new_bb.
+  edge->set_dst(new_bb);
+  new_bb->in_edges().push_back(edge);
+
+  ChangeBranchTarget(pred_bb, succ_bb, new_bb);
+  new_bb->insn_list().push_back(ir->NewInsn<PseudoBranch>(succ_bb));
+}
+
+void RemoveCriticalEdges(MachineIR* machine_ir) {
+  for (auto bb : machine_ir->bb_list()) {
+    if (bb->in_edges().size() < 2) {
+      continue;
+    }
+    for (size_t i = 0; i < bb->in_edges().size(); i++) {
+      MachineEdge* edge = bb->in_edges()[i];
+      MachineBasicBlock* pred_bb = edge->src();
+      if (pred_bb->out_edges().size() >= 2) {
+        // This is a critical edge!
+        InsertNodeOnEdge(machine_ir, edge, i);
+      }
+    }
+  }
+}
+
+MachineInsnList::reverse_iterator RemovePutIfDead(const ContextLivenessAnalyzer* analyzer,
+                                                  MachineBasicBlock* bb,
+                                                  MachineInsnList::reverse_iterator insn_it,
+                                                  const std::bitset<sizeof(CPUState)> seen_get) {
+  CHECK_NE(bb->out_edges().size(), 0);
+  auto* insn = AsMachineInsnX86_64(*insn_it);
+  auto disp = insn->disp();
+
+  if (seen_get.test(disp)) {
+    return ++insn_it;
+  }
+
+  for (auto out_edge : bb->out_edges()) {
+    auto dst = out_edge->dst();
+    if (analyzer->IsLiveIn(dst, disp)) {
+      return ++insn_it;
+    }
+  }
+
+  // The instruction writes to dead context.
+  auto forward_it = --(insn_it.base());
+  auto next_it = bb->insn_list().erase(forward_it);
+  std::reverse_iterator<MachineInsnList::iterator> rev_it(next_it);
+  return rev_it;
+}
+
+void RemoveRedundantPut(MachineIR* ir) {
+  ContextLivenessAnalyzer analyzer(ir);
+  analyzer.Init();
+  std::bitset<sizeof(CPUState)> seen_get;
+  for (auto* bb : ir->bb_list()) {
+    if (bb->out_edges().size() == 0) {
+      // We are only looking for PUTs that are dead due to other PUTs in
+      // sucessor basic blocks, because RemoveLocalGuestContextAccesses ensures
+      // that there is at most one PUT to each guest reg in a basic block.
+      continue;
+    }
+
+    seen_get.reset();
+    for (auto insn_it = bb->insn_list().rbegin(); insn_it != bb->insn_list().rend();) {
+      auto* insn = AsMachineInsnX86_64(*insn_it);
+      if (insn->IsCPUStatePut()) {
+        insn_it = RemovePutIfDead(&analyzer, bb, insn_it, seen_get);
+      } else {
+        if (insn->IsCPUStateGet()) {
+          seen_get.set(insn->disp(), true);
+        }
+        insn_it++;
+      }
+    }
+  }
+}
+
+bool IsForwarderBlock(MachineBasicBlock* bb) {
+  if (bb->insn_list().size() != 1) {
+    return false;
+  }
+
+  // Don't remove the entry block.
+  if (bb->in_edges().size() == 0) {
+    return false;
+  }
+
+  const MachineInsn* last_insn = bb->insn_list().back();
+  return last_insn->opcode() == PseudoBranch::kOpcode;
+}
+
+void UnlinkForwarderBlock(MachineBasicBlock* bb) {
+  CHECK_EQ(bb->out_edges().size(), 1);
+  auto dst = bb->out_edges()[0]->dst();
+  for (auto edge : bb->in_edges()) {
+    edge->set_dst(dst);
+    dst->in_edges().push_back(edge);
+    ChangeBranchTarget(edge->src(), bb, dst);
+  }
+
+  auto* edge = bb->out_edges()[0];
+  auto dst_in_edge_it = std::find(dst->in_edges().begin(), dst->in_edges().end(), edge);
+  CHECK(dst_in_edge_it != dst->in_edges().end());
+  dst->in_edges().erase(dst_in_edge_it);
+}
+
+void RemoveForwarderBlocks(MachineIR* ir) {
+  for (auto bb_it = ir->bb_list().begin(); bb_it != ir->bb_list().end();) {
+    auto* bb = *bb_it;
+    if (!IsForwarderBlock(bb)) {
+      bb_it++;
+      continue;
+    }
+
+    UnlinkForwarderBlock(bb);
+    bb_it = ir->bb_list().erase(bb_it);
+  }
+}
+
+void ReorderBasicBlocksInReversePostOrder(MachineIR* ir) {
+  ir->bb_list() = GetReversePostOrderBBList(ir);
+  ir->set_bb_order(MachineIR::BasicBlockOrder::kReversePostOrder);
+}
+
+}  // namespace berberis::x86_64
diff --git a/backend/x86_64/machine_ir_opt_test.cc b/backend/x86_64/machine_ir_opt_test.cc
new file mode 100644
index 0000000..c20d386
--- /dev/null
+++ b/backend/x86_64/machine_ir_opt_test.cc
@@ -0,0 +1,1000 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gtest/gtest.h"
+
+#include "berberis/backend/x86_64/machine_ir_opt.h"
+
+#include "berberis/backend/code_emitter.h"
+#include "berberis/backend/common/machine_ir_opt.h"
+#include "berberis/backend/x86_64/machine_ir.h"
+#include "berberis/backend/x86_64/machine_ir_builder.h"
+#include "berberis/backend/x86_64/machine_ir_check.h"
+#include "berberis/backend/x86_64/machine_ir_test_corpus.h"
+#include "berberis/base/arena_alloc.h"
+#include "berberis/guest_state/guest_addr.h"
+
+namespace berberis {
+
+namespace {
+
+TEST(MachineIRRemoveDeadCodeTest, DefKilledByAnotherDef) {
+  Arena arena;
+  x86_64::MachineIR machine_ir(&arena);
+
+  auto* bb = machine_ir.NewBasicBlock();
+
+  x86_64::MachineIRBuilder builder(&machine_ir);
+
+  MachineReg vreg1 = machine_ir.AllocVReg();
+
+  builder.StartBasicBlock(bb);
+  builder.Gen<x86_64::MovqRegReg>(vreg1, vreg1);
+  builder.Gen<x86_64::MovqRegImm>(vreg1, 1);
+  builder.Gen<PseudoBranch>(bb);
+
+  bb->live_out().push_back(vreg1);
+
+  x86_64::RemoveDeadCode(&machine_ir);
+
+  EXPECT_EQ(bb->insn_list().size(), 2UL);
+
+  auto insn_it = bb->insn_list().begin();
+  MachineInsn* insn = *insn_it;
+  MachineReg reg_after = insn->RegAt(0);
+  MachineOpcode opcode_after = insn->opcode();
+  EXPECT_EQ(kMachineOpMovqRegImm, opcode_after);
+  EXPECT_EQ(vreg1, reg_after);
+}
+
+TEST(MachineIRRemoveDeadCodeTest, RegUsedInSameBasicBlockNotErased) {
+  Arena arena;
+  x86_64::MachineIR machine_ir(&arena);
+
+  auto* bb = machine_ir.NewBasicBlock();
+
+  x86_64::MachineIRBuilder builder(&machine_ir);
+
+  MachineReg vreg1 = machine_ir.AllocVReg();
+  MachineReg vreg2 = machine_ir.AllocVReg();
+
+  builder.StartBasicBlock(bb);
+  builder.Gen<x86_64::MovqRegImm>(vreg1, 4);
+  builder.Gen<x86_64::MovqMemBaseDispReg>(vreg2, 0, vreg1);
+  builder.Gen<PseudoBranch>(bb);
+
+  bb->live_out().push_back(vreg1);
+
+  x86_64::RemoveDeadCode(&machine_ir);
+
+  EXPECT_EQ(bb->insn_list().size(), 3UL);
+
+  auto insn_it = bb->insn_list().begin();
+  MachineInsn* insn = *insn_it;
+  MachineReg reg_after = insn->RegAt(0);
+  MachineOpcode opcode_after = insn->opcode();
+  EXPECT_EQ(kMachineOpMovqRegImm, opcode_after);
+  EXPECT_EQ(vreg1, reg_after);
+}
+
+TEST(MachineIRRemoveDeadCodeTest, LiveOutRegNotErased) {
+  Arena arena;
+  x86_64::MachineIR machine_ir(&arena);
+
+  auto* bb = machine_ir.NewBasicBlock();
+
+  x86_64::MachineIRBuilder builder(&machine_ir);
+
+  MachineReg vreg1 = machine_ir.AllocVReg();
+
+  builder.StartBasicBlock(bb);
+  builder.Gen<x86_64::MovqRegImm>(vreg1, 4);
+  builder.Gen<PseudoBranch>(bb);
+
+  bb->live_out().push_back(vreg1);
+
+  x86_64::RemoveDeadCode(&machine_ir);
+
+  EXPECT_EQ(bb->insn_list().size(), 2UL);
+
+  auto insn_it = bb->insn_list().begin();
+  MachineInsn* insn = *insn_it;
+  MachineReg reg_after = insn->RegAt(0);
+  MachineOpcode opcode_after = insn->opcode();
+  EXPECT_EQ(kMachineOpMovqRegImm, opcode_after);
+  EXPECT_EQ(vreg1, reg_after);
+}
+
+TEST(MachineIRRemoveDeadCodeTest, UseOfRegBeforeDoesNotMakeInsnLive) {
+  Arena arena;
+  x86_64::MachineIR machine_ir(&arena);
+
+  auto* bb = machine_ir.NewBasicBlock();
+
+  x86_64::MachineIRBuilder builder(&machine_ir);
+
+  MachineReg vreg1 = machine_ir.AllocVReg();
+  MachineReg vreg2 = machine_ir.AllocVReg();
+
+  builder.StartBasicBlock(bb);
+  builder.Gen<x86_64::MovqRegImm>(vreg1, 4);
+  builder.Gen<x86_64::MovqRegReg>(vreg2, vreg1);
+  builder.Gen<PseudoBranch>(bb);
+
+  bb->live_out().push_back(vreg1);
+
+  x86_64::RemoveDeadCode(&machine_ir);
+
+  EXPECT_EQ(bb->insn_list().size(), 2UL);
+
+  auto insn_it = bb->insn_list().rbegin();
+  insn_it++;
+  MachineInsn* insn = *insn_it++;
+  MachineReg reg_after = insn->RegAt(0);
+  MachineOpcode opcode_after = insn->opcode();
+  EXPECT_EQ(kMachineOpMovqRegImm, opcode_after);
+  EXPECT_EQ(vreg1, reg_after);
+}
+
+TEST(MachineIRRemoveDeadCodeTest, UnusedRegErased) {
+  Arena arena;
+  x86_64::MachineIR machine_ir(&arena);
+
+  auto* bb = machine_ir.NewBasicBlock();
+
+  x86_64::MachineIRBuilder builder(&machine_ir);
+
+  MachineReg vreg1 = machine_ir.AllocVReg();
+
+  builder.StartBasicBlock(bb);
+  builder.Gen<x86_64::MovqRegImm>(vreg1, 4);
+  builder.Gen<PseudoBranch>(bb);
+
+  x86_64::RemoveDeadCode(&machine_ir);
+
+  EXPECT_EQ(bb->insn_list().size(), 1UL);
+
+  auto insn_it = bb->insn_list().begin();
+  MachineInsn* insn = *insn_it++;
+  MachineOpcode opcode_after = insn->opcode();
+  EXPECT_EQ(kMachineOpPseudoBranch, opcode_after);
+}
+
+TEST(MachineIRRemoveDeadCodeTest, DefKilledBySecondResultOfAnotherDef) {
+  Arena arena;
+  x86_64::MachineIR machine_ir(&arena);
+
+  auto* bb = machine_ir.NewBasicBlock();
+
+  x86_64::MachineIRBuilder builder(&machine_ir);
+
+  MachineReg vreg1 = machine_ir.AllocVReg();
+  MachineReg vreg2 = machine_ir.AllocVReg();
+  MachineReg vreg3 = machine_ir.AllocVReg();
+
+  builder.StartBasicBlock(bb);
+  builder.Gen<x86_64::AddbRegImm>(vreg1, 1, vreg3);
+  builder.Gen<x86_64::AddbRegImm>(vreg2, 2, vreg3);
+  builder.Gen<PseudoBranch>(bb);
+
+  bb->live_out().push_back(vreg2);
+
+  x86_64::RemoveDeadCode(&machine_ir);
+
+  EXPECT_EQ(bb->insn_list().size(), 2UL);
+
+  auto insn_it = bb->insn_list().begin();
+  MachineInsn* insn = *insn_it++;
+  MachineReg reg_after = insn->RegAt(0);
+  MachineOpcode opcode_after = insn->opcode();
+  EXPECT_EQ(kMachineOpAddbRegImm, opcode_after);
+  EXPECT_EQ(vreg2, reg_after);
+}
+
+TEST(MachineIRRemoveDeadCodeTest, HardRegisterAccess) {
+  Arena arena;
+  x86_64::MachineIR machine_ir(&arena);
+
+  auto* bb = machine_ir.NewBasicBlock();
+
+  x86_64::MachineIRBuilder builder(&machine_ir);
+
+  builder.StartBasicBlock(bb);
+  builder.Gen<x86_64::AddbRegImm>(x86_64::kMachineRegRAX, 3, x86_64::kMachineRegFLAGS);
+  builder.Gen<PseudoBranch>(bb);
+
+  x86_64::RemoveDeadCode(&machine_ir);
+
+  EXPECT_EQ(bb->insn_list().size(), 2UL);
+}
+
+TEST(MachineIRRemoveDeadCodeTest, CallImmArgIsLive) {
+  Arena arena;
+  x86_64::MachineIR machine_ir(&arena);
+  auto* bb = machine_ir.NewBasicBlock();
+  x86_64::MachineIRBuilder builder(&machine_ir);
+
+  builder.StartBasicBlock(bb);
+  builder.GenCallImm(0,
+                     machine_ir.AllocVReg(),
+                     std::array<x86_64::CallImm::Arg, 1>{
+                         {{machine_ir.AllocVReg(), x86_64::CallImm::kIntRegType}}});
+  builder.Gen<PseudoJump>(kNullGuestAddr);
+
+  x86_64::RemoveDeadCode(&machine_ir);
+
+  EXPECT_EQ(bb->insn_list().size(), 4UL);
+}
+
+int GetInEdgeIndex(MachineBasicBlock* dst_bb, MachineBasicBlock* src_bb) {
+  for (size_t i = 0; i < dst_bb->in_edges().size(); i++) {
+    if (dst_bb->in_edges()[i]->src() == src_bb) {
+      return i;
+    }
+  }
+  return -1;
+}
+
+int GetOutEdgeIndex(MachineBasicBlock* src_bb, MachineBasicBlock* dst_bb) {
+  for (size_t i = 0; i < src_bb->out_edges().size(); i++) {
+    if (src_bb->out_edges()[i]->dst() == dst_bb) {
+      return i;
+    }
+  }
+  return -1;
+}
+
+TEST(MachineIR, RemoveCriticalEdge) {
+  Arena arena;
+  berberis::x86_64::MachineIR machine_ir(&arena);
+
+  x86_64::MachineIRBuilder builder(&machine_ir);
+
+  // bb1   bb2
+  //   \  /  \
+  //   bb3   bb4
+  auto bb1 = machine_ir.NewBasicBlock();
+  auto bb2 = machine_ir.NewBasicBlock();
+  auto bb3 = machine_ir.NewBasicBlock();
+  auto bb4 = machine_ir.NewBasicBlock();
+  machine_ir.AddEdge(bb1, bb3);
+  machine_ir.AddEdge(bb2, bb3);
+  machine_ir.AddEdge(bb2, bb4);
+
+  builder.StartBasicBlock(bb1);
+  builder.Gen<PseudoBranch>(bb3);
+
+  builder.StartBasicBlock(bb2);
+  builder.Gen<PseudoCondBranch>(CodeEmitter::Condition::kZero, bb3, bb4, x86_64::kMachineRegFLAGS);
+
+  builder.StartBasicBlock(bb3);
+  builder.Gen<PseudoJump>(kNullGuestAddr);
+
+  builder.StartBasicBlock(bb4);
+  builder.Gen<PseudoJump>(kNullGuestAddr);
+
+  x86_64::RemoveCriticalEdges(&machine_ir);
+  ASSERT_EQ(x86_64::CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
+
+  ASSERT_EQ(bb3->in_edges().size(), 2UL);
+  int bb1_index_in_bb3 = GetInEdgeIndex(bb3, bb1);
+  ASSERT_NE(bb1_index_in_bb3, -1);
+  auto new_bb = bb3->in_edges()[1 - bb1_index_in_bb3]->src();
+
+  ASSERT_EQ(bb2->out_edges().size(), 2UL);
+  int bb4_index_in_bb2 = GetOutEdgeIndex(bb2, bb4);
+  ASSERT_NE(bb4_index_in_bb2, -1);
+  EXPECT_EQ(new_bb, bb2->out_edges()[1 - bb4_index_in_bb2]->dst());
+}
+
+TEST(MachineIR, RemoveCriticalEdgeLoop) {
+  Arena arena;
+  berberis::x86_64::MachineIR machine_ir(&arena);
+
+  x86_64::MachineIRBuilder builder(&machine_ir);
+
+  // bb1
+  //  |
+  // bb2 <---
+  //  |  \__/
+  // bb3
+  auto bb1 = machine_ir.NewBasicBlock();
+  auto bb2 = machine_ir.NewBasicBlock();
+  auto bb3 = machine_ir.NewBasicBlock();
+  machine_ir.AddEdge(bb1, bb2);
+  machine_ir.AddEdge(bb2, bb2);
+  machine_ir.AddEdge(bb2, bb3);
+
+  builder.StartBasicBlock(bb1);
+  builder.Gen<PseudoBranch>(bb2);
+
+  builder.StartBasicBlock(bb2);
+  builder.Gen<PseudoCondBranch>(CodeEmitter::Condition::kZero, bb2, bb3, x86_64::kMachineRegFLAGS);
+
+  builder.StartBasicBlock(bb3);
+  builder.Gen<PseudoJump>(kNullGuestAddr);
+
+  x86_64::RemoveCriticalEdges(&machine_ir);
+  ASSERT_EQ(x86_64::CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
+
+  ASSERT_EQ(bb2->in_edges().size(), 2UL);
+  int bb1_index_in_bb2 = GetInEdgeIndex(bb2, bb1);
+  ASSERT_NE(bb1_index_in_bb2, -1);
+  auto new_bb = bb2->in_edges()[1 - bb1_index_in_bb2]->src();
+
+  ASSERT_EQ(bb2->out_edges().size(), 2UL);
+  int bb3_index_in_bb2 = GetOutEdgeIndex(bb2, bb3);
+  ASSERT_NE(bb3_index_in_bb2, -1);
+  EXPECT_EQ(new_bb, bb2->out_edges()[1 - bb3_index_in_bb2]->dst());
+}
+
+TEST(MachineIR, RemoveCriticalEdgeRecovery) {
+  Arena arena;
+  berberis::x86_64::MachineIR machine_ir(&arena);
+
+  x86_64::MachineIRBuilder builder(&machine_ir);
+
+  // bb1   bb2
+  //   \  /  \
+  //   bb3  bb4(recovery)
+  auto bb1 = machine_ir.NewBasicBlock();
+  auto bb2 = machine_ir.NewBasicBlock();
+  auto bb3 = machine_ir.NewBasicBlock();
+  auto bb4 = machine_ir.NewBasicBlock();
+  machine_ir.AddEdge(bb1, bb3);
+  machine_ir.AddEdge(bb2, bb3);
+  machine_ir.AddEdge(bb2, bb4);
+
+  builder.StartBasicBlock(bb1);
+  builder.Gen<PseudoBranch>(bb3);
+
+  builder.StartBasicBlock(bb2);
+  builder.Gen<PseudoBranch>(bb3);
+
+  builder.StartBasicBlock(bb3);
+  builder.Gen<PseudoJump>(kNullGuestAddr);
+
+  builder.StartBasicBlock(bb4);
+  bb4->MarkAsRecovery();
+  builder.Gen<PseudoJump>(kNullGuestAddr);
+
+  x86_64::RemoveCriticalEdges(&machine_ir);
+  ASSERT_EQ(x86_64::CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
+
+  ASSERT_EQ(bb3->in_edges().size(), 2UL);
+  int bb1_index_in_bb3 = GetInEdgeIndex(bb3, bb1);
+  ASSERT_NE(bb1_index_in_bb3, -1);
+  auto new_bb = bb3->in_edges()[1 - bb1_index_in_bb3]->src();
+
+  ASSERT_EQ(bb2->out_edges().size(), 2UL);
+  int bb4_index_in_bb2 = GetOutEdgeIndex(bb2, bb4);
+  ASSERT_NE(bb4_index_in_bb2, -1);
+  EXPECT_EQ(new_bb, bb2->out_edges()[1 - bb4_index_in_bb2]->dst());
+
+  ASSERT_EQ(bb2->insn_list().size(), 1UL);
+  ASSERT_EQ(bb2->insn_list().front()->opcode(), kMachineOpPseudoBranch);
+  ASSERT_EQ(static_cast<PseudoBranch*>(bb2->insn_list().front())->then_bb(), new_bb);
+}
+
+TEST(MachineIR, PutsInSuccessorsKillPut) {
+  Arena arena;
+  x86_64::MachineIR machine_ir(&arena);
+
+  auto* bb1 = machine_ir.NewBasicBlock();
+  auto* bb2 = machine_ir.NewBasicBlock();
+  auto* bb3 = machine_ir.NewBasicBlock();
+  machine_ir.AddEdge(bb1, bb2);
+  machine_ir.AddEdge(bb1, bb3);
+  x86_64::MachineIRBuilder builder(&machine_ir);
+
+  auto vreg = machine_ir.AllocVReg();
+  builder.StartBasicBlock(bb1);
+  builder.GenPut(0, vreg);
+  builder.Gen<PseudoCondBranch>(CodeEmitter::Condition::kZero, bb2, bb3, x86_64::kMachineRegFLAGS);
+
+  builder.StartBasicBlock(bb2);
+  builder.GenPut(0, vreg);
+  builder.Gen<PseudoJump>(kNullGuestAddr);
+
+  builder.StartBasicBlock(bb3);
+  builder.GenPut(0, vreg);
+  builder.Gen<PseudoJump>(kNullGuestAddr);
+
+  EXPECT_EQ(x86_64::CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
+  x86_64::RemoveRedundantPut(&machine_ir);
+  EXPECT_EQ(x86_64::CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
+
+  ASSERT_EQ(1u, bb1->insn_list().size());
+  ASSERT_EQ(2u, bb2->insn_list().size());
+  ASSERT_EQ(2u, bb3->insn_list().size());
+}
+
+TEST(MachineIR, PutInOneOfTwoSuccessorsDoesNotKillPut) {
+  Arena arena;
+  x86_64::MachineIR machine_ir(&arena);
+
+  auto* bb1 = machine_ir.NewBasicBlock();
+  auto* bb2 = machine_ir.NewBasicBlock();
+  auto* bb3 = machine_ir.NewBasicBlock();
+  machine_ir.AddEdge(bb1, bb2);
+  machine_ir.AddEdge(bb1, bb3);
+  x86_64::MachineIRBuilder builder(&machine_ir);
+
+  auto vreg = machine_ir.AllocVReg();
+  builder.StartBasicBlock(bb1);
+  builder.GenPut(0, vreg);
+  builder.Gen<PseudoCondBranch>(CodeEmitter::Condition::kZero, bb2, bb3, x86_64::kMachineRegFLAGS);
+
+  builder.StartBasicBlock(bb2);
+  builder.GenPut(0, vreg);
+  builder.Gen<PseudoJump>(kNullGuestAddr);
+
+  builder.StartBasicBlock(bb3);
+  builder.Gen<PseudoJump>(kNullGuestAddr);
+
+  EXPECT_EQ(x86_64::CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
+  x86_64::RemoveRedundantPut(&machine_ir);
+  EXPECT_EQ(x86_64::CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
+
+  ASSERT_EQ(2u, bb1->insn_list().size());
+  ASSERT_EQ(2u, bb2->insn_list().size());
+  ASSERT_EQ(1u, bb3->insn_list().size());
+}
+
+TEST(MachineIR, MultiplePutsCanBeKilled) {
+  Arena arena;
+  x86_64::MachineIR machine_ir(&arena);
+
+  auto* bb1 = machine_ir.NewBasicBlock();
+  auto* bb2 = machine_ir.NewBasicBlock();
+  auto* bb3 = machine_ir.NewBasicBlock();
+  machine_ir.AddEdge(bb1, bb2);
+  machine_ir.AddEdge(bb1, bb3);
+  x86_64::MachineIRBuilder builder(&machine_ir);
+
+  auto vreg1 = machine_ir.AllocVReg();
+  auto vreg2 = machine_ir.AllocVReg();
+  builder.StartBasicBlock(bb1);
+  builder.GenPut(0, vreg1);
+  builder.GenPut(1, vreg2);
+  builder.Gen<PseudoCondBranch>(CodeEmitter::Condition::kZero, bb2, bb3, x86_64::kMachineRegFLAGS);
+
+  builder.StartBasicBlock(bb2);
+  builder.GenPut(0, vreg1);
+  builder.GenPut(1, vreg2);
+  builder.Gen<PseudoJump>(kNullGuestAddr);
+
+  builder.StartBasicBlock(bb3);
+  builder.GenPut(0, vreg1);
+  builder.GenPut(1, vreg2);
+  builder.Gen<PseudoJump>(kNullGuestAddr);
+
+  EXPECT_EQ(x86_64::CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
+  x86_64::RemoveRedundantPut(&machine_ir);
+  EXPECT_EQ(x86_64::CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
+
+  ASSERT_EQ(1u, bb1->insn_list().size());
+  ASSERT_EQ(3u, bb2->insn_list().size());
+  ASSERT_EQ(3u, bb3->insn_list().size());
+}
+
+TEST(MachineIR, GetInOneOfTheSuccessorsMakesPutLive) {
+  Arena arena;
+  x86_64::MachineIR machine_ir(&arena);
+
+  auto* bb1 = machine_ir.NewBasicBlock();
+  auto* bb2 = machine_ir.NewBasicBlock();
+  auto* bb3 = machine_ir.NewBasicBlock();
+  machine_ir.AddEdge(bb1, bb2);
+  machine_ir.AddEdge(bb1, bb3);
+  x86_64::MachineIRBuilder builder(&machine_ir);
+
+  auto vreg = machine_ir.AllocVReg();
+  builder.StartBasicBlock(bb1);
+  builder.GenPut(0, vreg);
+  builder.Gen<PseudoBranch>(bb2);
+
+  builder.StartBasicBlock(bb2);
+  builder.GenGet(vreg, 0);
+  builder.GenPut(0, vreg);
+  builder.Gen<PseudoJump>(kNullGuestAddr);
+
+  builder.StartBasicBlock(bb3);
+  builder.GenPut(0, vreg);
+  builder.Gen<PseudoJump>(kNullGuestAddr);
+
+  EXPECT_EQ(x86_64::CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
+  x86_64::RemoveRedundantPut(&machine_ir);
+  EXPECT_EQ(x86_64::CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
+
+  ASSERT_EQ(2u, bb1->insn_list().size());
+  ASSERT_EQ(3u, bb2->insn_list().size());
+  ASSERT_EQ(2u, bb3->insn_list().size());
+}
+
+TEST(MachineIR, ForwardingPseudoBranch) {
+  // We create:
+  //
+  // BB0 -> BB1
+  // BB1 (forwarder)
+  // BB2
+  //
+  // We verify that the jump to BB1 is redirected to BB2.
+
+  Arena arena;
+  x86_64::MachineIR machine_ir(&arena);
+  x86_64::MachineIRBuilder builder(&machine_ir);
+
+  auto* bb0 = machine_ir.NewBasicBlock();
+  auto* bb1 = machine_ir.NewBasicBlock();
+  auto* bb2 = machine_ir.NewBasicBlock();
+  machine_ir.AddEdge(bb0, bb1);
+  machine_ir.AddEdge(bb1, bb2);
+
+  builder.StartBasicBlock(bb0);
+  builder.Gen<x86_64::MovlRegImm>(x86_64::kMachineRegRAX, 23);
+  builder.Gen<PseudoBranch>(bb1);
+
+  // Create a forwarder block
+  builder.StartBasicBlock(bb1);
+  builder.Gen<PseudoBranch>(bb2);
+
+  builder.StartBasicBlock(bb2);
+  builder.Gen<PseudoJump>(kNullGuestAddr);
+
+  EXPECT_EQ(x86_64::CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
+  x86_64::RemoveForwarderBlocks(&machine_ir);
+  EXPECT_EQ(x86_64::CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
+
+  // Verify that we have exactly two basic blocks.
+  EXPECT_EQ(2u, machine_ir.bb_list().size());
+
+  auto bb_it = machine_ir.bb_list().begin();
+
+  // Verify that BB0 contains exactly two instructions.
+  EXPECT_EQ(bb0, *bb_it);
+  EXPECT_EQ(2u, bb0->insn_list().size());
+
+  // Verify that the last instruction is PseudoBranch that jumps
+  // to BB2.
+  MachineInsn* bb0_insn = bb0->insn_list().back();
+  EXPECT_EQ(kMachineOpPseudoBranch, bb0_insn->opcode());
+  PseudoBranch* bb0_branch_insn = static_cast<PseudoBranch*>(bb0_insn);
+  EXPECT_EQ(bb2, bb0_branch_insn->then_bb());
+
+  // Check for BB2.  Note that RemoveForwarderBlocks deletes BB1.
+  EXPECT_EQ(bb2, *(++bb_it));
+}
+
+TEST(MachineIR, ForwardingPseudoCondBranchThen) {
+  // We create:
+  //
+  // BB0 (cond jump)-> BB1 (then_bb) and BB3 (else_bb)
+  // BB1 (forwarder)
+  // BB2
+  // BB3
+  //
+  // We verify that the jump to BB1 is redirected to BB2.
+
+  Arena arena;
+  x86_64::MachineIR machine_ir(&arena);
+
+  auto* bb0 = machine_ir.NewBasicBlock();
+  auto* bb1 = machine_ir.NewBasicBlock();
+  auto* bb2 = machine_ir.NewBasicBlock();
+  auto* bb3 = machine_ir.NewBasicBlock();
+
+  machine_ir.AddEdge(bb0, bb1);
+  machine_ir.AddEdge(bb0, bb3);
+  machine_ir.AddEdge(bb1, bb2);
+  machine_ir.AddEdge(bb2, bb3);
+
+  x86_64::MachineIRBuilder builder(&machine_ir);
+  builder.StartBasicBlock(bb0);
+  builder.Gen<PseudoCondBranch>(CodeEmitter::Condition::kZero, bb1, bb3, x86_64::kMachineRegFLAGS);
+
+  // Create a forwarder block
+  builder.StartBasicBlock(bb1);
+  builder.Gen<PseudoBranch>(bb2);
+
+  builder.StartBasicBlock(bb2);
+  builder.Gen<x86_64::MovlRegImm>(x86_64::kMachineRegRAX, 23);
+  builder.Gen<PseudoBranch>(bb3);
+
+  builder.StartBasicBlock(bb3);
+  builder.Gen<PseudoJump>(kNullGuestAddr);
+
+  EXPECT_EQ(x86_64::CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
+  x86_64::RemoveForwarderBlocks(&machine_ir);
+  EXPECT_EQ(x86_64::CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
+
+  // Verify that we have exactly three basic blocks.
+  EXPECT_EQ(3u, machine_ir.bb_list().size());
+
+  auto bb_it = machine_ir.bb_list().begin();
+
+  // Verify that BB0 contains exactly one instruction.
+  EXPECT_EQ(bb0, *bb_it);
+  EXPECT_EQ(1u, bb0->insn_list().size());
+
+  // Verify that the sole instruction is PseudoCondBranch that jumps
+  // to BB2 (then_bb) and BB3 (else_bb).
+  MachineInsn* bb0_insn = bb0->insn_list().front();
+  EXPECT_EQ(kMachineOpPseudoCondBranch, bb0_insn->opcode());
+  PseudoCondBranch* bb0_branch_insn = static_cast<PseudoCondBranch*>(bb0_insn);
+  EXPECT_EQ(bb2, bb0_branch_insn->then_bb());
+  EXPECT_EQ(bb3, bb0_branch_insn->else_bb());
+
+  // Check for BB2.  Note that RemoveForwarderBlocks deletes BB1.
+  EXPECT_EQ(bb2, *(++bb_it));
+
+  // Check for BB3.
+  EXPECT_EQ(bb3, *(++bb_it));
+}
+
+TEST(MachineIR, ForwardingPseudoCondBranchElse) {
+  // We create:
+  //
+  // BB0 (cond jump)-> BB1 (then_bb) and BB2 (else_bb)
+  // BB1
+  // BB2 (forwarder)
+  // BB3
+  //
+  // We verify that the jump to BB2 is redirected to BB3.
+
+  Arena arena;
+  x86_64::MachineIR machine_ir(&arena);
+  x86_64::MachineIRBuilder builder(&machine_ir);
+
+  auto* bb0 = machine_ir.NewBasicBlock();
+  auto* bb1 = machine_ir.NewBasicBlock();
+  auto* bb2 = machine_ir.NewBasicBlock();
+  auto* bb3 = machine_ir.NewBasicBlock();
+
+  machine_ir.AddEdge(bb0, bb1);
+  machine_ir.AddEdge(bb0, bb2);
+  machine_ir.AddEdge(bb2, bb3);
+
+  builder.StartBasicBlock(bb0);
+  builder.Gen<PseudoCondBranch>(CodeEmitter::Condition::kZero, bb1, bb2, x86_64::kMachineRegFLAGS);
+
+  builder.StartBasicBlock(bb1);
+  builder.Gen<x86_64::MovlRegImm>(x86_64::kMachineRegRAX, 23);
+  builder.Gen<PseudoJump>(kNullGuestAddr);
+
+  // Create a forwarder block
+  builder.StartBasicBlock(bb2);
+  builder.Gen<PseudoBranch>(bb3);
+
+  builder.StartBasicBlock(bb3);
+  builder.Gen<PseudoJump>(kNullGuestAddr);
+
+  EXPECT_EQ(x86_64::CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
+  x86_64::RemoveForwarderBlocks(&machine_ir);
+  EXPECT_EQ(x86_64::CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
+
+  // Verify that we have exactly three basic blocks.
+  EXPECT_EQ(3u, machine_ir.bb_list().size());
+
+  auto bb_it = machine_ir.bb_list().begin();
+
+  // Verify that BB0 contains exactly one instruction.
+  EXPECT_EQ(bb0, *bb_it);
+  EXPECT_EQ(1u, bb0->insn_list().size());
+
+  // Verify that the sole instruction is PseudoCondBranch that jumps
+  // to BB1 (then_bb) and BB3 (else_bb).
+  MachineInsn* bb0_insn = bb0->insn_list().front();
+  EXPECT_EQ(kMachineOpPseudoCondBranch, bb0_insn->opcode());
+  PseudoCondBranch* bb0_branch_insn = static_cast<PseudoCondBranch*>(bb0_insn);
+  EXPECT_EQ(bb1, bb0_branch_insn->then_bb());
+  EXPECT_EQ(bb3, bb0_branch_insn->else_bb());
+
+  // Check for BB1.
+  EXPECT_EQ(bb1, *(++bb_it));
+
+  // Check for BB3.  Note that RemoveForwarderBlocks deletes BB2.
+  EXPECT_EQ(bb3, *(++bb_it));
+}
+
+TEST(MachineIR, ForwarderBlockAtTheBeginning) {
+  // We create:
+  //
+  // BB0 (forwarder) -> BB2
+  // BB1
+  // BB2
+  //
+  // We verify that RemoveForwarderBlocks does not change or remove
+  // basic blocks.
+
+  Arena arena;
+  x86_64::MachineIR machine_ir(&arena);
+  x86_64::MachineIRBuilder builder(&machine_ir);
+
+  auto* bb0 = machine_ir.NewBasicBlock();
+  auto* bb1 = machine_ir.NewBasicBlock();
+  auto* bb2 = machine_ir.NewBasicBlock();
+
+  machine_ir.AddEdge(bb0, bb2);
+  machine_ir.AddEdge(bb1, bb2);
+
+  builder.StartBasicBlock(bb0);
+  builder.Gen<PseudoBranch>(bb2);
+
+  // Create a forwarder block
+  builder.StartBasicBlock(bb1);
+  builder.Gen<x86_64::MovlRegImm>(x86_64::kMachineRegRAX, 29);
+  builder.Gen<PseudoBranch>(bb2);
+
+  builder.StartBasicBlock(bb2);
+  builder.Gen<PseudoJump>(kNullGuestAddr);
+
+  EXPECT_EQ(x86_64::CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
+  x86_64::RemoveForwarderBlocks(&machine_ir);
+  EXPECT_EQ(x86_64::CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
+
+  // Verify that we still have exactly three basic blocks.
+  EXPECT_EQ(3u, machine_ir.bb_list().size());
+
+  auto bb_it = machine_ir.bb_list().begin();
+
+  // Check for BB0.
+  EXPECT_EQ(bb0, *bb_it);
+
+  // Check for BB1.
+  EXPECT_EQ(bb1, *(++bb_it));
+
+  // Check for BB2.
+  EXPECT_EQ(bb2, *(++bb_it));
+}
+
+TEST(MachineIR, RemoveForwarderBlocks) {
+  // We create:
+  //
+  // BB0 (cond jump)->  BB3
+  // BB1
+  // BB2 (forwarder)
+  // BB3 (forwarder)
+  // BB4
+  // BB5
+  //
+  // Tested cases:
+  //   1) regular -> forwarder -> forwarder
+  //   2) cond else -> forwarder -> regular
+  //
+  // Not tested: cond then -> forwarder, loops, forwarder is the first bb in list
+  //
+  // Attention: forwarder loops are not allowed
+
+  Arena arena;
+  x86_64::MachineIR machine_ir(&arena);
+  x86_64::MachineIRBuilder builder(&machine_ir);
+
+  auto* bb0 = machine_ir.NewBasicBlock();
+  auto* bb1 = machine_ir.NewBasicBlock();
+  auto* bb2 = machine_ir.NewBasicBlock();
+  auto* bb3 = machine_ir.NewBasicBlock();
+  auto* bb4 = machine_ir.NewBasicBlock();
+  auto* bb5 = machine_ir.NewBasicBlock();
+
+  machine_ir.AddEdge(bb0, bb1);
+  machine_ir.AddEdge(bb0, bb3);
+  machine_ir.AddEdge(bb1, bb2);
+  machine_ir.AddEdge(bb2, bb3);
+  machine_ir.AddEdge(bb3, bb4);
+  machine_ir.AddEdge(bb4, bb5);
+
+  builder.StartBasicBlock(bb0);
+  builder.Gen<PseudoCondBranch>(CodeEmitter::Condition::kZero, bb1, bb3, x86_64::kMachineRegFLAGS);
+
+  builder.StartBasicBlock(bb1);
+  builder.Gen<x86_64::MovlRegImm>(x86_64::kMachineRegRAX, 23);
+  builder.Gen<PseudoBranch>(bb2);
+
+  // Create a forwarder block.
+  builder.StartBasicBlock(bb2);
+  builder.Gen<PseudoCopy>(x86_64::kMachineRegRAX, x86_64::kMachineRegRAX, 4);
+  builder.Gen<PseudoCopy>(x86_64::kMachineRegRBX, x86_64::kMachineRegRBX, 4);
+  builder.Gen<PseudoBranch>(bb3);
+
+  // Create another forwarder block.
+  builder.StartBasicBlock(bb3);
+  builder.Gen<PseudoBranch>(bb4);
+
+  builder.StartBasicBlock(bb4);
+  builder.Gen<x86_64::MovlRegImm>(x86_64::kMachineRegRBX, 7);
+  builder.Gen<PseudoBranch>(bb5);
+
+  builder.StartBasicBlock(bb5);
+  builder.Gen<PseudoJump>(kNullGuestAddr);
+
+  EXPECT_EQ(x86_64::CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
+  RemoveNopPseudoCopy(&machine_ir);
+  x86_64::RemoveForwarderBlocks(&machine_ir);
+  EXPECT_EQ(x86_64::CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
+
+  // Verify that we have exactly four basic blocks left after two
+  // forwarder blocks are removed.
+  //
+  // BB0 (cond jump)->  BB4 (target changed)
+  // BB1 (target changed)
+  // BB4
+  // BB5
+  EXPECT_EQ(4u, machine_ir.bb_list().size());
+
+  auto bb_it = machine_ir.bb_list().begin();
+
+  // Verify that BB0 jumps to BB1 (then_bb) and BB4 (else_bb).
+  EXPECT_EQ(bb0, *bb_it);
+  MachineInsn* bb0_last_insn = bb0->insn_list().back();
+  EXPECT_EQ(kMachineOpPseudoCondBranch, bb0_last_insn->opcode());
+  PseudoCondBranch* bb0_branch_insn = static_cast<PseudoCondBranch*>(bb0_last_insn);
+  EXPECT_EQ(bb1, bb0_branch_insn->then_bb());
+  EXPECT_EQ(bb4, bb0_branch_insn->else_bb());
+
+  // Verify that BB1 jumps to BB4.
+  EXPECT_EQ(bb1, *(++bb_it));
+  MachineInsn* bb1_last_insn = bb1->insn_list().back();
+  EXPECT_EQ(kMachineOpPseudoBranch, bb1_last_insn->opcode());
+  PseudoBranch* bb1_branch_insn = static_cast<PseudoBranch*>(bb1_last_insn);
+  EXPECT_EQ(bb4, bb1_branch_insn->then_bb());
+
+  // Check for BB4.  Note that RemoveForwarderBlocks deletes BB2 and
+  // BB3.
+  EXPECT_EQ(bb4, *(++bb_it));
+
+  // Check for BB5.
+  EXPECT_EQ(bb5, *(++bb_it));
+}
+
+TEST(MachineIR, RemoveNopPseudoCopy) {
+  // Verify that RemoveNopPseudoCopy removes PseudoCopy instructions
+  // with identical source and destination operands while retaining
+  // all other instructions.
+
+  Arena arena;
+  x86_64::MachineIR machine_ir(&arena);
+  auto* bb0 = machine_ir.NewBasicBlock();
+  x86_64::MachineIRBuilder builder(&machine_ir);
+
+  builder.StartBasicBlock(bb0);
+  builder.Gen<PseudoCopy>(x86_64::kMachineRegRAX, x86_64::kMachineRegRAX, 4);
+  builder.Gen<PseudoCopy>(x86_64::kMachineRegRBX, x86_64::kMachineRegRCX, 4);
+  builder.Gen<PseudoJump>(kNullGuestAddr);
+
+  EXPECT_EQ(x86_64::CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
+  RemoveNopPseudoCopy(&machine_ir);
+  EXPECT_EQ(x86_64::CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
+
+  // Verify that we have exactly one basic block.
+  EXPECT_EQ(1u, machine_ir.bb_list().size());
+
+  // Verify that bb0 contains exactly two instructions.
+  EXPECT_EQ(bb0, machine_ir.bb_list().front());
+  EXPECT_EQ(2u, bb0->insn_list().size());
+
+  auto insn_it = bb0->insn_list().begin();
+
+  // Verify that the first instruction is PseudoCopy that copies ECX
+  // to EBX.
+  MachineInsn* insn0 = *insn_it;
+  EXPECT_EQ(kMachineOpPseudoCopy, insn0->opcode());
+  EXPECT_EQ(x86_64::kMachineRegRBX, insn0->RegAt(0));
+  EXPECT_EQ(x86_64::kMachineRegRCX, insn0->RegAt(1));
+
+  // Verify that the next instruction is PseudoJump.
+  MachineInsn* insn1 = *(++insn_it);
+  EXPECT_EQ(kMachineOpPseudoJump, insn1->opcode());
+}
+
+TEST(MachineIR, ReorderBasicBlocksInReversePostOrder) {
+  //       |----|
+  //       v    |
+  // BB0  BB1  BB2
+  //  |         ^
+  //  |---------|
+  Arena arena;
+  x86_64::MachineIR machine_ir(&arena);
+  x86_64::MachineIRBuilder builder(&machine_ir);
+
+  auto* bb0 = machine_ir.NewBasicBlock();
+  auto* bb1 = machine_ir.NewBasicBlock();
+  auto* bb2 = machine_ir.NewBasicBlock();
+
+  machine_ir.AddEdge(bb0, bb2);
+  machine_ir.AddEdge(bb2, bb1);
+
+  builder.StartBasicBlock(bb0);
+  builder.Gen<PseudoBranch>(bb2);
+
+  builder.StartBasicBlock(bb1);
+  builder.Gen<PseudoJump>(kNullGuestAddr);
+
+  builder.StartBasicBlock(bb2);
+  builder.Gen<PseudoBranch>(bb1);
+
+  EXPECT_EQ(x86_64::CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
+  x86_64::ReorderBasicBlocksInReversePostOrder(&machine_ir);
+  EXPECT_EQ(x86_64::CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
+
+  EXPECT_EQ(3u, machine_ir.bb_list().size());
+
+  auto bb_it = machine_ir.bb_list().begin();
+  EXPECT_EQ(bb0, *bb_it);
+  EXPECT_EQ(bb2, *(++bb_it));
+  EXPECT_EQ(bb1, *(++bb_it));
+}
+
+TEST(MachineIR, ReorderDiamondControlFlowInReversePostOrder) {
+  Arena arena;
+  x86_64::MachineIR machine_ir(&arena);
+
+  auto [bb1, bb2, bb3, bb4] = BuildDiamondControlFlow(&machine_ir);
+
+  EXPECT_EQ(x86_64::CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
+  x86_64::ReorderBasicBlocksInReversePostOrder(&machine_ir);
+  EXPECT_EQ(x86_64::CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
+
+  EXPECT_EQ(4u, machine_ir.bb_list().size());
+
+  auto bb_it = machine_ir.bb_list().begin();
+  auto* enter_bb = *bb_it++;
+  auto* then_bb = *bb_it++;
+  auto* else_bb = *bb_it++;
+  auto* merge_bb = *bb_it++;
+  EXPECT_EQ(enter_bb, bb1);
+  // `Then` and `else` are not strictly ordered by RPO.
+  if (then_bb == bb2) {
+    EXPECT_EQ(else_bb, bb3);
+  } else {
+    EXPECT_EQ(then_bb, bb3);
+    EXPECT_EQ(else_bb, bb2);
+  }
+  EXPECT_EQ(merge_bb, bb4);
+}
+
+TEST(MachineIR, ReorderControlFlowWithLoopInReversePostOrder) {
+  Arena arena;
+  x86_64::MachineIR machine_ir(&arena);
+
+  auto [bb1, bb2, bb3, bb4, unused_vreg] = BuildDataFlowAcrossEmptyLoop(&machine_ir);
+
+  EXPECT_EQ(x86_64::CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
+  x86_64::ReorderBasicBlocksInReversePostOrder(&machine_ir);
+  EXPECT_EQ(x86_64::CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
+
+  EXPECT_EQ(4u, machine_ir.bb_list().size());
+
+  auto bb_it = machine_ir.bb_list().begin();
+  auto* enter_bb = *bb_it++;
+  auto* loop_head_bb = *bb_it++;
+  auto* then_bb = *bb_it++;
+  auto* else_bb = *bb_it++;
+  EXPECT_EQ(enter_bb, bb1);
+  EXPECT_EQ(loop_head_bb, bb2);
+  // `Then` and `else` are not strictly ordered by RPO.
+  // Note that loop may be separated by the post loop code.
+  if (then_bb == bb3) {
+    EXPECT_EQ(else_bb, bb4);
+  } else {
+    EXPECT_EQ(then_bb, bb4);
+    EXPECT_EQ(else_bb, bb3);
+  }
+}
+
+}  // namespace
+
+}  // namespace berberis