src/llvm-project/llvm/lib/Target/BPF/BPFMIPeephole.cpp - toolchain/rustc - Git at Google

 //===-------------- BPFMIPeephole.cpp - MI Peephole Cleanups  -------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // This pass performs peephole optimizations to cleanup ugly code sequences at
 // MachineInstruction layer.
 //
 // Currently, there are two optimizations implemented:
 //  - One pre-RA MachineSSA pass to eliminate type promotion sequences, those
 //    zero extend 32-bit subregisters to 64-bit registers, if the compiler
 //    could prove the subregisters is defined by 32-bit operations in which
 //    case the upper half of the underlying 64-bit registers were zeroed
 //    implicitly.
 //
 //  - One post-RA PreEmit pass to do final cleanup on some redundant
 //    instructions generated due to bad RA on subregister.
 //===----------------------------------------------------------------------===//

 #include "BPF.h"
 #include "BPFInstrInfo.h"
 #include "BPFTargetMachine.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/Debug.h"
 #include <set>

 using namespace llvm;

 #define DEBUG_TYPE "bpf-mi-zext-elim"

 static cl::opt<int> GotolAbsLowBound("gotol-abs-low-bound", cl::Hidden,
   cl::init(INT16_MAX >> 1), cl::desc("Specify gotol lower bound"));

 STATISTIC(ZExtElemNum, "Number of zero extension shifts eliminated");

 namespace {

 struct BPFMIPeephole : public MachineFunctionPass {

   static char ID;
   const BPFInstrInfo *TII;
   MachineFunction *MF;
   MachineRegisterInfo *MRI;

   BPFMIPeephole() : MachineFunctionPass(ID) {
     initializeBPFMIPeepholePass(*PassRegistry::getPassRegistry());
   }

 private:
   // Initialize class variables.
   void initialize(MachineFunction &MFParm);

   bool isCopyFrom32Def(MachineInstr *CopyMI);
   bool isInsnFrom32Def(MachineInstr *DefInsn);
   bool isPhiFrom32Def(MachineInstr *MovMI);
   bool isMovFrom32Def(MachineInstr *MovMI);
   bool eliminateZExtSeq();
   bool eliminateZExt();

   std::set<MachineInstr *> PhiInsns;

 public:

   // Main entry point for this pass.
   bool runOnMachineFunction(MachineFunction &MF) override {
     if (skipFunction(MF.getFunction()))
       return false;

     initialize(MF);

     // First try to eliminate (zext, lshift, rshift) and then
     // try to eliminate zext.
     bool ZExtSeqExist, ZExtExist;
     ZExtSeqExist = eliminateZExtSeq();
     ZExtExist = eliminateZExt();
     return ZExtSeqExist || ZExtExist;
   }
 };

 // Initialize class variables.
 void BPFMIPeephole::initialize(MachineFunction &MFParm) {
   MF = &MFParm;
   MRI = &MF->getRegInfo();
   TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
   LLVM_DEBUG(dbgs() << "*** BPF MachineSSA ZEXT Elim peephole pass ***\n\n");
 }

 bool BPFMIPeephole::isCopyFrom32Def(MachineInstr *CopyMI)
 {
   MachineOperand &opnd = CopyMI->getOperand(1);

   if (!opnd.isReg())
     return false;

   // Return false if getting value from a 32bit physical register.
   // Most likely, this physical register is aliased to
   // function call return value or current function parameters.
   Register Reg = opnd.getReg();
   if (!Reg.isVirtual())
     return false;

   if (MRI->getRegClass(Reg) == &BPF::GPRRegClass)
     return false;

   MachineInstr *DefInsn = MRI->getVRegDef(Reg);
   if (!isInsnFrom32Def(DefInsn))
     return false;

   return true;
 }

 bool BPFMIPeephole::isPhiFrom32Def(MachineInstr *PhiMI)
 {
   for (unsigned i = 1, e = PhiMI->getNumOperands(); i < e; i += 2) {
     MachineOperand &opnd = PhiMI->getOperand(i);

     if (!opnd.isReg())
       return false;

     MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg());
     if (!PhiDef)
       return false;
     if (PhiDef->isPHI()) {
       if (!PhiInsns.insert(PhiDef).second)
         return false;
       if (!isPhiFrom32Def(PhiDef))
         return false;
     }
     if (PhiDef->getOpcode() == BPF::COPY && !isCopyFrom32Def(PhiDef))
       return false;
   }

   return true;
 }

 // The \p DefInsn instruction defines a virtual register.
 bool BPFMIPeephole::isInsnFrom32Def(MachineInstr *DefInsn)
 {
   if (!DefInsn)
     return false;

   if (DefInsn->isPHI()) {
     if (!PhiInsns.insert(DefInsn).second)
       return false;
     if (!isPhiFrom32Def(DefInsn))
       return false;
   } else if (DefInsn->getOpcode() == BPF::COPY) {
     if (!isCopyFrom32Def(DefInsn))
       return false;
   }

   return true;
 }

 bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI)
 {
   MachineInstr *DefInsn = MRI->getVRegDef(MovMI->getOperand(1).getReg());

   LLVM_DEBUG(dbgs() << "  Def of Mov Src:");
   LLVM_DEBUG(DefInsn->dump());

   PhiInsns.clear();
   if (!isInsnFrom32Def(DefInsn))
     return false;

   LLVM_DEBUG(dbgs() << "  One ZExt elim sequence identified.\n");

   return true;
 }

 bool BPFMIPeephole::eliminateZExtSeq() {
   MachineInstr* ToErase = nullptr;
   bool Eliminated = false;

   for (MachineBasicBlock &MBB : *MF) {
     for (MachineInstr &MI : MBB) {
       // If the previous instruction was marked for elimination, remove it now.
       if (ToErase) {
         ToErase->eraseFromParent();
         ToErase = nullptr;
       }

       // Eliminate the 32-bit to 64-bit zero extension sequence when possible.
       //
       //   MOV_32_64 rB, wA
       //   SLL_ri    rB, rB, 32
       //   SRL_ri    rB, rB, 32
       if (MI.getOpcode() == BPF::SRL_ri &&
           MI.getOperand(2).getImm() == 32) {
         Register DstReg = MI.getOperand(0).getReg();
         Register ShfReg = MI.getOperand(1).getReg();
         MachineInstr *SllMI = MRI->getVRegDef(ShfReg);

         LLVM_DEBUG(dbgs() << "Starting SRL found:");
         LLVM_DEBUG(MI.dump());

         if (!SllMI ||
             SllMI->isPHI() ||
             SllMI->getOpcode() != BPF::SLL_ri ||
             SllMI->getOperand(2).getImm() != 32)
           continue;

         LLVM_DEBUG(dbgs() << "  SLL found:");
         LLVM_DEBUG(SllMI->dump());

         MachineInstr *MovMI = MRI->getVRegDef(SllMI->getOperand(1).getReg());
         if (!MovMI ||
             MovMI->isPHI() ||
             MovMI->getOpcode() != BPF::MOV_32_64)
           continue;

         LLVM_DEBUG(dbgs() << "  Type cast Mov found:");
         LLVM_DEBUG(MovMI->dump());

         Register SubReg = MovMI->getOperand(1).getReg();
         if (!isMovFrom32Def(MovMI)) {
           LLVM_DEBUG(dbgs()
                      << "  One ZExt elim sequence failed qualifying elim.\n");
           continue;
         }

         BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::SUBREG_TO_REG), DstReg)
           .addImm(0).addReg(SubReg).addImm(BPF::sub_32);

         SllMI->eraseFromParent();
         MovMI->eraseFromParent();
         // MI is the right shift, we can't erase it in it's own iteration.
         // Mark it to ToErase, and erase in the next iteration.
         ToErase = &MI;
         ZExtElemNum++;
         Eliminated = true;
       }
     }
   }

   return Eliminated;
 }

 bool BPFMIPeephole::eliminateZExt() {
   MachineInstr* ToErase = nullptr;
   bool Eliminated = false;

   for (MachineBasicBlock &MBB : *MF) {
     for (MachineInstr &MI : MBB) {
       // If the previous instruction was marked for elimination, remove it now.
       if (ToErase) {
         ToErase->eraseFromParent();
         ToErase = nullptr;
       }

       if (MI.getOpcode() != BPF::MOV_32_64)
         continue;

       // Eliminate MOV_32_64 if possible.
       //   MOV_32_64 rA, wB
       //
       // If wB has been zero extended, replace it with a SUBREG_TO_REG.
       // This is to workaround BPF programs where pkt->{data, data_end}
       // is encoded as u32, but actually the verifier populates them
       // as 64bit pointer. The MOV_32_64 will zero out the top 32 bits.
       LLVM_DEBUG(dbgs() << "Candidate MOV_32_64 instruction:");
       LLVM_DEBUG(MI.dump());

       if (!isMovFrom32Def(&MI))
         continue;

       LLVM_DEBUG(dbgs() << "Removing the MOV_32_64 instruction\n");

       Register dst = MI.getOperand(0).getReg();
       Register src = MI.getOperand(1).getReg();

       // Build a SUBREG_TO_REG instruction.
       BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::SUBREG_TO_REG), dst)
         .addImm(0).addReg(src).addImm(BPF::sub_32);

       ToErase = &MI;
       Eliminated = true;
     }
   }

   return Eliminated;
 }

 } // end default namespace

 INITIALIZE_PASS(BPFMIPeephole, DEBUG_TYPE,
                 "BPF MachineSSA Peephole Optimization For ZEXT Eliminate",
                 false, false)

 char BPFMIPeephole::ID = 0;
 FunctionPass* llvm::createBPFMIPeepholePass() { return new BPFMIPeephole(); }

 STATISTIC(RedundantMovElemNum, "Number of redundant moves eliminated");

 namespace {

 struct BPFMIPreEmitPeephole : public MachineFunctionPass {

   static char ID;
   MachineFunction *MF;
   const TargetRegisterInfo *TRI;
   const BPFInstrInfo *TII;
   bool SupportGotol;

   BPFMIPreEmitPeephole() : MachineFunctionPass(ID) {
     initializeBPFMIPreEmitPeepholePass(*PassRegistry::getPassRegistry());
   }

 private:
   // Initialize class variables.
   void initialize(MachineFunction &MFParm);

   bool in16BitRange(int Num);
   bool eliminateRedundantMov();
   bool adjustBranch();

 public:

   // Main entry point for this pass.
   bool runOnMachineFunction(MachineFunction &MF) override {
     if (skipFunction(MF.getFunction()))
       return false;

     initialize(MF);

     bool Changed;
     Changed = eliminateRedundantMov();
     if (SupportGotol)
       Changed = adjustBranch() || Changed;
     return Changed;
   }
 };

 // Initialize class variables.
 void BPFMIPreEmitPeephole::initialize(MachineFunction &MFParm) {
   MF = &MFParm;
   TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
   TRI = MF->getSubtarget<BPFSubtarget>().getRegisterInfo();
   SupportGotol = MF->getSubtarget<BPFSubtarget>().hasGotol();
   LLVM_DEBUG(dbgs() << "*** BPF PreEmit peephole pass ***\n\n");
 }

 bool BPFMIPreEmitPeephole::eliminateRedundantMov() {
   MachineInstr* ToErase = nullptr;
   bool Eliminated = false;

   for (MachineBasicBlock &MBB : *MF) {
     for (MachineInstr &MI : MBB) {
       // If the previous instruction was marked for elimination, remove it now.
       if (ToErase) {
         LLVM_DEBUG(dbgs() << "  Redundant Mov Eliminated:");
         LLVM_DEBUG(ToErase->dump());
         ToErase->eraseFromParent();
         ToErase = nullptr;
       }

       // Eliminate identical move:
       //
       //   MOV rA, rA
       //
       // Note that we cannot remove
       //   MOV_32_64  rA, wA
       //   MOV_rr_32  wA, wA
       // as these two instructions having side effects, zeroing out
       // top 32 bits of rA.
       unsigned Opcode = MI.getOpcode();
       if (Opcode == BPF::MOV_rr) {
         Register dst = MI.getOperand(0).getReg();
         Register src = MI.getOperand(1).getReg();

         if (dst != src)
           continue;

         ToErase = &MI;
         RedundantMovElemNum++;
         Eliminated = true;
       }
     }
   }

   return Eliminated;
 }

 bool BPFMIPreEmitPeephole::in16BitRange(int Num) {
   // Well, the cut-off is not precisely at 16bit range since
   // new codes are added during the transformation. So let us
   // a little bit conservative.
   return Num >= -GotolAbsLowBound && Num <= GotolAbsLowBound;
 }

 // Before cpu=v4, only 16bit branch target offset (-0x8000 to 0x7fff)
 // is supported for both unconditional (JMP) and condition (JEQ, JSGT,
 // etc.) branches. In certain cases, e.g., full unrolling, the branch
 // target offset might exceed 16bit range. If this happens, the llvm
 // will generate incorrect code as the offset is truncated to 16bit.
 //
 // To fix this rare case, a new insn JMPL is introduced. This new
 // insn supports supports 32bit branch target offset. The compiler
 // does not use this insn during insn selection. Rather, BPF backend
 // will estimate the branch target offset and do JMP -> JMPL and
 // JEQ -> JEQ + JMPL conversion if the estimated branch target offset
 // is beyond 16bit.
 bool BPFMIPreEmitPeephole::adjustBranch() {
   bool Changed = false;
   int CurrNumInsns = 0;
   DenseMap<MachineBasicBlock *, int> SoFarNumInsns;
   DenseMap<MachineBasicBlock *, MachineBasicBlock *> FollowThroughBB;
   std::vector<MachineBasicBlock *> MBBs;

   MachineBasicBlock *PrevBB = nullptr;
   for (MachineBasicBlock &MBB : *MF) {
     // MBB.size() is the number of insns in this basic block, including some
     // debug info, e.g., DEBUG_VALUE, so we may over-count a little bit.
     // Typically we have way more normal insns than DEBUG_VALUE insns.
     // Also, if we indeed need to convert conditional branch like JEQ to
     // JEQ + JMPL, we actually introduced some new insns like below.
     CurrNumInsns += (int)MBB.size();
     SoFarNumInsns[&MBB] = CurrNumInsns;
     if (PrevBB != nullptr)
       FollowThroughBB[PrevBB] = &MBB;
     PrevBB = &MBB;
     // A list of original BBs to make later traveral easier.
     MBBs.push_back(&MBB);
   }
   FollowThroughBB[PrevBB] = nullptr;

   for (unsigned i = 0; i < MBBs.size(); i++) {
     // We have four cases here:
     //  (1). no terminator, simple follow through.
     //  (2). jmp to another bb.
     //  (3). conditional jmp to another bb or follow through.
     //  (4). conditional jmp followed by an unconditional jmp.
     MachineInstr *CondJmp = nullptr, *UncondJmp = nullptr;

     MachineBasicBlock *MBB = MBBs[i];
     for (MachineInstr &Term : MBB->terminators()) {
       if (Term.isConditionalBranch()) {
         assert(CondJmp == nullptr);
         CondJmp = &Term;
       } else if (Term.isUnconditionalBranch()) {
         assert(UncondJmp == nullptr);
         UncondJmp = &Term;
       }
     }

     // (1). no terminator, simple follow through.
     if (!CondJmp && !UncondJmp)
       continue;

     MachineBasicBlock *CondTargetBB, *JmpBB;
     CurrNumInsns = SoFarNumInsns[MBB];

     // (2). jmp to another bb.
     if (!CondJmp && UncondJmp) {
       JmpBB = UncondJmp->getOperand(0).getMBB();
       if (in16BitRange(SoFarNumInsns[JmpBB] - JmpBB->size() - CurrNumInsns))
         continue;

       // replace this insn as a JMPL.
       BuildMI(MBB, UncondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(JmpBB);
       UncondJmp->eraseFromParent();
       Changed = true;
       continue;
     }

     const BasicBlock *TermBB = MBB->getBasicBlock();
     int Dist;

     // (3). conditional jmp to another bb or follow through.
     if (!UncondJmp) {
       CondTargetBB = CondJmp->getOperand(2).getMBB();
       MachineBasicBlock *FollowBB = FollowThroughBB[MBB];
       Dist = SoFarNumInsns[CondTargetBB] - CondTargetBB->size() - CurrNumInsns;
       if (in16BitRange(Dist))
         continue;

       // We have
       //   B2: ...
       //       if (cond) goto B5
       //   B3: ...
       // where B2 -> B5 is beyond 16bit range.
       //
       // We do not have 32bit cond jmp insn. So we try to do
       // the following.
       //   B2:     ...
       //           if (cond) goto New_B1
       //   New_B0  goto B3
       //   New_B1: gotol B5
       //   B3: ...
       // Basically two new basic blocks are created.
       MachineBasicBlock *New_B0 = MF->CreateMachineBasicBlock(TermBB);
       MachineBasicBlock *New_B1 = MF->CreateMachineBasicBlock(TermBB);

       // Insert New_B0 and New_B1 into function block list.
       MachineFunction::iterator MBB_I  = ++MBB->getIterator();
       MF->insert(MBB_I, New_B0);
       MF->insert(MBB_I, New_B1);

       // replace B2 cond jump
       if (CondJmp->getOperand(1).isReg())
         BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode()))
             .addReg(CondJmp->getOperand(0).getReg())
             .addReg(CondJmp->getOperand(1).getReg())
             .addMBB(New_B1);
       else
         BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode()))
             .addReg(CondJmp->getOperand(0).getReg())
             .addImm(CondJmp->getOperand(1).getImm())
             .addMBB(New_B1);

       // it is possible that CondTargetBB and FollowBB are the same. But the
       // above Dist checking should already filtered this case.
       MBB->removeSuccessor(CondTargetBB);
       MBB->removeSuccessor(FollowBB);
       MBB->addSuccessor(New_B0);
       MBB->addSuccessor(New_B1);

       // Populate insns in New_B0 and New_B1.
       BuildMI(New_B0, CondJmp->getDebugLoc(), TII->get(BPF::JMP)).addMBB(FollowBB);
       BuildMI(New_B1, CondJmp->getDebugLoc(), TII->get(BPF::JMPL))
           .addMBB(CondTargetBB);

       New_B0->addSuccessor(FollowBB);
       New_B1->addSuccessor(CondTargetBB);
       CondJmp->eraseFromParent();
       Changed = true;
       continue;
     }

     //  (4). conditional jmp followed by an unconditional jmp.
     CondTargetBB = CondJmp->getOperand(2).getMBB();
     JmpBB = UncondJmp->getOperand(0).getMBB();

     // We have
     //   B2: ...
     //       if (cond) goto B5
     //       JMP B7
     //   B3: ...
     //
     // If only B2->B5 is out of 16bit range, we can do
     //   B2: ...
     //       if (cond) goto new_B
     //       JMP B7
     //   New_B: gotol B5
     //   B3: ...
     //
     // If only 'JMP B7' is out of 16bit range, we can replace
     // 'JMP B7' with 'JMPL B7'.
     //
     // If both B2->B5 and 'JMP B7' is out of range, just do
     // both the above transformations.
     Dist = SoFarNumInsns[CondTargetBB] - CondTargetBB->size() - CurrNumInsns;
     if (!in16BitRange(Dist)) {
       MachineBasicBlock *New_B = MF->CreateMachineBasicBlock(TermBB);

       // Insert New_B0 into function block list.
       MF->insert(++MBB->getIterator(), New_B);

       // replace B2 cond jump
       if (CondJmp->getOperand(1).isReg())
         BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode()))
             .addReg(CondJmp->getOperand(0).getReg())
             .addReg(CondJmp->getOperand(1).getReg())
             .addMBB(New_B);
       else
         BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode()))
             .addReg(CondJmp->getOperand(0).getReg())
             .addImm(CondJmp->getOperand(1).getImm())
             .addMBB(New_B);

       if (CondTargetBB != JmpBB)
         MBB->removeSuccessor(CondTargetBB);
       MBB->addSuccessor(New_B);

       // Populate insn in New_B.
       BuildMI(New_B, CondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(CondTargetBB);

       New_B->addSuccessor(CondTargetBB);
       CondJmp->eraseFromParent();
       Changed = true;
     }

     if (!in16BitRange(SoFarNumInsns[JmpBB] - CurrNumInsns)) {
       BuildMI(MBB, UncondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(JmpBB);
       UncondJmp->eraseFromParent();
       Changed = true;
     }
   }

   return Changed;
 }

 } // end default namespace

 INITIALIZE_PASS(BPFMIPreEmitPeephole, "bpf-mi-pemit-peephole",
                 "BPF PreEmit Peephole Optimization", false, false)

 char BPFMIPreEmitPeephole::ID = 0;
 FunctionPass* llvm::createBPFMIPreEmitPeepholePass()
 {
   return new BPFMIPreEmitPeephole();
 }
	//===-------------- BPFMIPeephole.cpp - MI Peephole Cleanups -------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This pass performs peephole optimizations to cleanup ugly code sequences at
	// MachineInstruction layer.
	//
	// Currently, there are two optimizations implemented:
	// - One pre-RA MachineSSA pass to eliminate type promotion sequences, those
	// zero extend 32-bit subregisters to 64-bit registers, if the compiler
	// could prove the subregisters is defined by 32-bit operations in which
	// case the upper half of the underlying 64-bit registers were zeroed
	// implicitly.
	//
	// - One post-RA PreEmit pass to do final cleanup on some redundant
	// instructions generated due to bad RA on subregister.
	//===----------------------------------------------------------------------===//

	#include "BPF.h"
	#include "BPFInstrInfo.h"
	#include "BPFTargetMachine.h"
	#include "llvm/ADT/Statistic.h"
	#include "llvm/CodeGen/MachineFunctionPass.h"
	#include "llvm/CodeGen/MachineInstrBuilder.h"
	#include "llvm/CodeGen/MachineRegisterInfo.h"
	#include "llvm/Support/Debug.h"
	#include <set>

	using namespace llvm;

	#define DEBUG_TYPE "bpf-mi-zext-elim"

	static cl::opt<int> GotolAbsLowBound("gotol-abs-low-bound", cl::Hidden,
	cl::init(INT16_MAX >> 1), cl::desc("Specify gotol lower bound"));

	STATISTIC(ZExtElemNum, "Number of zero extension shifts eliminated");

	namespace {

	struct BPFMIPeephole : public MachineFunctionPass {

	static char ID;
	const BPFInstrInfo *TII;
	MachineFunction *MF;
	MachineRegisterInfo *MRI;

	BPFMIPeephole() : MachineFunctionPass(ID) {
	initializeBPFMIPeepholePass(*PassRegistry::getPassRegistry());
	}

	private:
	// Initialize class variables.
	void initialize(MachineFunction &MFParm);

	bool isCopyFrom32Def(MachineInstr *CopyMI);
	bool isInsnFrom32Def(MachineInstr *DefInsn);
	bool isPhiFrom32Def(MachineInstr *MovMI);
	bool isMovFrom32Def(MachineInstr *MovMI);
	bool eliminateZExtSeq();
	bool eliminateZExt();

	std::set<MachineInstr *> PhiInsns;

	public:

	// Main entry point for this pass.
	bool runOnMachineFunction(MachineFunction &MF) override {
	if (skipFunction(MF.getFunction()))
	return false;

	initialize(MF);

	// First try to eliminate (zext, lshift, rshift) and then
	// try to eliminate zext.
	bool ZExtSeqExist, ZExtExist;
	ZExtSeqExist = eliminateZExtSeq();
	ZExtExist = eliminateZExt();
	return ZExtSeqExist \|\| ZExtExist;
	}
	};

	// Initialize class variables.
	void BPFMIPeephole::initialize(MachineFunction &MFParm) {
	MF = &MFParm;
	MRI = &MF->getRegInfo();
	TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
	LLVM_DEBUG(dbgs() << "* BPF MachineSSA ZEXT Elim peephole pass *\n\n");
	}

	bool BPFMIPeephole::isCopyFrom32Def(MachineInstr *CopyMI)
	{
	MachineOperand &opnd = CopyMI->getOperand(1);

	if (!opnd.isReg())
	return false;

	// Return false if getting value from a 32bit physical register.
	// Most likely, this physical register is aliased to
	// function call return value or current function parameters.
	Register Reg = opnd.getReg();
	if (!Reg.isVirtual())
	return false;

	if (MRI->getRegClass(Reg) == &BPF::GPRRegClass)
	return false;

	MachineInstr *DefInsn = MRI->getVRegDef(Reg);
	if (!isInsnFrom32Def(DefInsn))
	return false;

	return true;
	}

	bool BPFMIPeephole::isPhiFrom32Def(MachineInstr *PhiMI)
	{
	for (unsigned i = 1, e = PhiMI->getNumOperands(); i < e; i += 2) {
	MachineOperand &opnd = PhiMI->getOperand(i);

	if (!opnd.isReg())
	return false;

	MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg());
	if (!PhiDef)
	return false;
	if (PhiDef->isPHI()) {
	if (!PhiInsns.insert(PhiDef).second)
	return false;
	if (!isPhiFrom32Def(PhiDef))
	return false;
	}
	if (PhiDef->getOpcode() == BPF::COPY && !isCopyFrom32Def(PhiDef))
	return false;
	}

	return true;
	}

	// The \p DefInsn instruction defines a virtual register.
	bool BPFMIPeephole::isInsnFrom32Def(MachineInstr *DefInsn)
	{
	if (!DefInsn)
	return false;

	if (DefInsn->isPHI()) {
	if (!PhiInsns.insert(DefInsn).second)
	return false;
	if (!isPhiFrom32Def(DefInsn))
	return false;
	} else if (DefInsn->getOpcode() == BPF::COPY) {
	if (!isCopyFrom32Def(DefInsn))
	return false;
	}

	return true;
	}

	bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI)
	{
	MachineInstr *DefInsn = MRI->getVRegDef(MovMI->getOperand(1).getReg());

	LLVM_DEBUG(dbgs() << " Def of Mov Src:");
	LLVM_DEBUG(DefInsn->dump());

	PhiInsns.clear();
	if (!isInsnFrom32Def(DefInsn))
	return false;

	LLVM_DEBUG(dbgs() << " One ZExt elim sequence identified.\n");

	return true;
	}

	bool BPFMIPeephole::eliminateZExtSeq() {
	MachineInstr* ToErase = nullptr;
	bool Eliminated = false;

	for (MachineBasicBlock &MBB : *MF) {
	for (MachineInstr &MI : MBB) {
	// If the previous instruction was marked for elimination, remove it now.
	if (ToErase) {
	ToErase->eraseFromParent();
	ToErase = nullptr;
	}

	// Eliminate the 32-bit to 64-bit zero extension sequence when possible.
	//
	// MOV_32_64 rB, wA
	// SLL_ri rB, rB, 32
	// SRL_ri rB, rB, 32
	if (MI.getOpcode() == BPF::SRL_ri &&
	MI.getOperand(2).getImm() == 32) {
	Register DstReg = MI.getOperand(0).getReg();
	Register ShfReg = MI.getOperand(1).getReg();
	MachineInstr *SllMI = MRI->getVRegDef(ShfReg);

	LLVM_DEBUG(dbgs() << "Starting SRL found:");
	LLVM_DEBUG(MI.dump());

	if (!SllMI \|\|
	SllMI->isPHI() \|\|
	SllMI->getOpcode() != BPF::SLL_ri \|\|
	SllMI->getOperand(2).getImm() != 32)
	continue;

	LLVM_DEBUG(dbgs() << " SLL found:");
	LLVM_DEBUG(SllMI->dump());

	MachineInstr *MovMI = MRI->getVRegDef(SllMI->getOperand(1).getReg());
	if (!MovMI \|\|
	MovMI->isPHI() \|\|
	MovMI->getOpcode() != BPF::MOV_32_64)
	continue;

	LLVM_DEBUG(dbgs() << " Type cast Mov found:");
	LLVM_DEBUG(MovMI->dump());

	Register SubReg = MovMI->getOperand(1).getReg();
	if (!isMovFrom32Def(MovMI)) {
	LLVM_DEBUG(dbgs()
	<< " One ZExt elim sequence failed qualifying elim.\n");
	continue;
	}

	BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::SUBREG_TO_REG), DstReg)
	.addImm(0).addReg(SubReg).addImm(BPF::sub_32);

	SllMI->eraseFromParent();
	MovMI->eraseFromParent();
	// MI is the right shift, we can't erase it in it's own iteration.
	// Mark it to ToErase, and erase in the next iteration.
	ToErase = &MI;
	ZExtElemNum++;
	Eliminated = true;
	}
	}
	}

	return Eliminated;
	}

	bool BPFMIPeephole::eliminateZExt() {
	MachineInstr* ToErase = nullptr;
	bool Eliminated = false;

	for (MachineBasicBlock &MBB : *MF) {
	for (MachineInstr &MI : MBB) {
	// If the previous instruction was marked for elimination, remove it now.
	if (ToErase) {
	ToErase->eraseFromParent();
	ToErase = nullptr;
	}

	if (MI.getOpcode() != BPF::MOV_32_64)
	continue;

	// Eliminate MOV_32_64 if possible.
	// MOV_32_64 rA, wB
	//
	// If wB has been zero extended, replace it with a SUBREG_TO_REG.
	// This is to workaround BPF programs where pkt->{data, data_end}
	// is encoded as u32, but actually the verifier populates them
	// as 64bit pointer. The MOV_32_64 will zero out the top 32 bits.
	LLVM_DEBUG(dbgs() << "Candidate MOV_32_64 instruction:");
	LLVM_DEBUG(MI.dump());

	if (!isMovFrom32Def(&MI))
	continue;

	LLVM_DEBUG(dbgs() << "Removing the MOV_32_64 instruction\n");

	Register dst = MI.getOperand(0).getReg();
	Register src = MI.getOperand(1).getReg();

	// Build a SUBREG_TO_REG instruction.
	BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::SUBREG_TO_REG), dst)
	.addImm(0).addReg(src).addImm(BPF::sub_32);

	ToErase = &MI;
	Eliminated = true;
	}
	}

	return Eliminated;
	}

	} // end default namespace

	INITIALIZE_PASS(BPFMIPeephole, DEBUG_TYPE,
	"BPF MachineSSA Peephole Optimization For ZEXT Eliminate",
	false, false)

	char BPFMIPeephole::ID = 0;
	FunctionPass* llvm::createBPFMIPeepholePass() { return new BPFMIPeephole(); }

	STATISTIC(RedundantMovElemNum, "Number of redundant moves eliminated");

	namespace {

	struct BPFMIPreEmitPeephole : public MachineFunctionPass {

	static char ID;
	MachineFunction *MF;
	const TargetRegisterInfo *TRI;
	const BPFInstrInfo *TII;
	bool SupportGotol;

	BPFMIPreEmitPeephole() : MachineFunctionPass(ID) {
	initializeBPFMIPreEmitPeepholePass(*PassRegistry::getPassRegistry());
	}

	private:
	// Initialize class variables.
	void initialize(MachineFunction &MFParm);

	bool in16BitRange(int Num);
	bool eliminateRedundantMov();
	bool adjustBranch();

	public:

	// Main entry point for this pass.
	bool runOnMachineFunction(MachineFunction &MF) override {
	if (skipFunction(MF.getFunction()))
	return false;

	initialize(MF);

	bool Changed;
	Changed = eliminateRedundantMov();
	if (SupportGotol)
	Changed = adjustBranch() \|\| Changed;
	return Changed;
	}
	};

	// Initialize class variables.
	void BPFMIPreEmitPeephole::initialize(MachineFunction &MFParm) {
	MF = &MFParm;
	TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
	TRI = MF->getSubtarget<BPFSubtarget>().getRegisterInfo();
	SupportGotol = MF->getSubtarget<BPFSubtarget>().hasGotol();
	LLVM_DEBUG(dbgs() << "* BPF PreEmit peephole pass *\n\n");
	}

	bool BPFMIPreEmitPeephole::eliminateRedundantMov() {
	MachineInstr* ToErase = nullptr;
	bool Eliminated = false;

	for (MachineBasicBlock &MBB : *MF) {
	for (MachineInstr &MI : MBB) {
	// If the previous instruction was marked for elimination, remove it now.
	if (ToErase) {
	LLVM_DEBUG(dbgs() << " Redundant Mov Eliminated:");
	LLVM_DEBUG(ToErase->dump());
	ToErase->eraseFromParent();
	ToErase = nullptr;
	}

	// Eliminate identical move:
	//
	// MOV rA, rA
	//
	// Note that we cannot remove
	// MOV_32_64 rA, wA
	// MOV_rr_32 wA, wA
	// as these two instructions having side effects, zeroing out
	// top 32 bits of rA.
	unsigned Opcode = MI.getOpcode();
	if (Opcode == BPF::MOV_rr) {
	Register dst = MI.getOperand(0).getReg();
	Register src = MI.getOperand(1).getReg();

	if (dst != src)
	continue;

	ToErase = &MI;
	RedundantMovElemNum++;
	Eliminated = true;
	}
	}
	}

	return Eliminated;
	}

	bool BPFMIPreEmitPeephole::in16BitRange(int Num) {
	// Well, the cut-off is not precisely at 16bit range since
	// new codes are added during the transformation. So let us
	// a little bit conservative.
	return Num >= -GotolAbsLowBound && Num <= GotolAbsLowBound;
	}

	// Before cpu=v4, only 16bit branch target offset (-0x8000 to 0x7fff)
	// is supported for both unconditional (JMP) and condition (JEQ, JSGT,
	// etc.) branches. In certain cases, e.g., full unrolling, the branch
	// target offset might exceed 16bit range. If this happens, the llvm
	// will generate incorrect code as the offset is truncated to 16bit.
	//
	// To fix this rare case, a new insn JMPL is introduced. This new
	// insn supports supports 32bit branch target offset. The compiler
	// does not use this insn during insn selection. Rather, BPF backend
	// will estimate the branch target offset and do JMP -> JMPL and
	// JEQ -> JEQ + JMPL conversion if the estimated branch target offset
	// is beyond 16bit.
	bool BPFMIPreEmitPeephole::adjustBranch() {
	bool Changed = false;
	int CurrNumInsns = 0;
	DenseMap<MachineBasicBlock *, int> SoFarNumInsns;
	DenseMap<MachineBasicBlock , MachineBasicBlock > FollowThroughBB;
	std::vector<MachineBasicBlock *> MBBs;

	MachineBasicBlock *PrevBB = nullptr;
	for (MachineBasicBlock &MBB : *MF) {
	// MBB.size() is the number of insns in this basic block, including some
	// debug info, e.g., DEBUG_VALUE, so we may over-count a little bit.
	// Typically we have way more normal insns than DEBUG_VALUE insns.
	// Also, if we indeed need to convert conditional branch like JEQ to
	// JEQ + JMPL, we actually introduced some new insns like below.
	CurrNumInsns += (int)MBB.size();
	SoFarNumInsns[&MBB] = CurrNumInsns;
	if (PrevBB != nullptr)
	FollowThroughBB[PrevBB] = &MBB;
	PrevBB = &MBB;
	// A list of original BBs to make later traveral easier.
	MBBs.push_back(&MBB);
	}
	FollowThroughBB[PrevBB] = nullptr;

	for (unsigned i = 0; i < MBBs.size(); i++) {
	// We have four cases here:
	// (1). no terminator, simple follow through.
	// (2). jmp to another bb.
	// (3). conditional jmp to another bb or follow through.
	// (4). conditional jmp followed by an unconditional jmp.
	MachineInstr CondJmp = nullptr, UncondJmp = nullptr;

	MachineBasicBlock *MBB = MBBs[i];
	for (MachineInstr &Term : MBB->terminators()) {
	if (Term.isConditionalBranch()) {
	assert(CondJmp == nullptr);
	CondJmp = &Term;
	} else if (Term.isUnconditionalBranch()) {
	assert(UncondJmp == nullptr);
	UncondJmp = &Term;
	}
	}

	// (1). no terminator, simple follow through.
	if (!CondJmp && !UncondJmp)
	continue;

	MachineBasicBlock CondTargetBB, JmpBB;
	CurrNumInsns = SoFarNumInsns[MBB];

	// (2). jmp to another bb.
	if (!CondJmp && UncondJmp) {
	JmpBB = UncondJmp->getOperand(0).getMBB();
	if (in16BitRange(SoFarNumInsns[JmpBB] - JmpBB->size() - CurrNumInsns))
	continue;

	// replace this insn as a JMPL.
	BuildMI(MBB, UncondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(JmpBB);
	UncondJmp->eraseFromParent();
	Changed = true;
	continue;
	}

	const BasicBlock *TermBB = MBB->getBasicBlock();
	int Dist;

	// (3). conditional jmp to another bb or follow through.
	if (!UncondJmp) {
	CondTargetBB = CondJmp->getOperand(2).getMBB();
	MachineBasicBlock *FollowBB = FollowThroughBB[MBB];
	Dist = SoFarNumInsns[CondTargetBB] - CondTargetBB->size() - CurrNumInsns;
	if (in16BitRange(Dist))
	continue;

	// We have
	// B2: ...
	// if (cond) goto B5
	// B3: ...
	// where B2 -> B5 is beyond 16bit range.
	//
	// We do not have 32bit cond jmp insn. So we try to do
	// the following.
	// B2: ...
	// if (cond) goto New_B1
	// New_B0 goto B3
	// New_B1: gotol B5
	// B3: ...
	// Basically two new basic blocks are created.
	MachineBasicBlock *New_B0 = MF->CreateMachineBasicBlock(TermBB);
	MachineBasicBlock *New_B1 = MF->CreateMachineBasicBlock(TermBB);

	// Insert New_B0 and New_B1 into function block list.
	MachineFunction::iterator MBB_I = ++MBB->getIterator();
	MF->insert(MBB_I, New_B0);
	MF->insert(MBB_I, New_B1);

	// replace B2 cond jump
	if (CondJmp->getOperand(1).isReg())
	BuildMI(MBB, MachineBasicBlock::iterator(CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode()))
	.addReg(CondJmp->getOperand(0).getReg())
	.addReg(CondJmp->getOperand(1).getReg())
	.addMBB(New_B1);
	else
	BuildMI(MBB, MachineBasicBlock::iterator(CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode()))
	.addReg(CondJmp->getOperand(0).getReg())
	.addImm(CondJmp->getOperand(1).getImm())
	.addMBB(New_B1);

	// it is possible that CondTargetBB and FollowBB are the same. But the
	// above Dist checking should already filtered this case.
	MBB->removeSuccessor(CondTargetBB);
	MBB->removeSuccessor(FollowBB);
	MBB->addSuccessor(New_B0);
	MBB->addSuccessor(New_B1);

	// Populate insns in New_B0 and New_B1.
	BuildMI(New_B0, CondJmp->getDebugLoc(), TII->get(BPF::JMP)).addMBB(FollowBB);
	BuildMI(New_B1, CondJmp->getDebugLoc(), TII->get(BPF::JMPL))
	.addMBB(CondTargetBB);

	New_B0->addSuccessor(FollowBB);
	New_B1->addSuccessor(CondTargetBB);
	CondJmp->eraseFromParent();
	Changed = true;
	continue;
	}

	// (4). conditional jmp followed by an unconditional jmp.
	CondTargetBB = CondJmp->getOperand(2).getMBB();
	JmpBB = UncondJmp->getOperand(0).getMBB();

	// We have
	// B2: ...
	// if (cond) goto B5
	// JMP B7
	// B3: ...
	//
	// If only B2->B5 is out of 16bit range, we can do
	// B2: ...
	// if (cond) goto new_B
	// JMP B7
	// New_B: gotol B5
	// B3: ...
	//
	// If only 'JMP B7' is out of 16bit range, we can replace
	// 'JMP B7' with 'JMPL B7'.
	//
	// If both B2->B5 and 'JMP B7' is out of range, just do
	// both the above transformations.
	Dist = SoFarNumInsns[CondTargetBB] - CondTargetBB->size() - CurrNumInsns;
	if (!in16BitRange(Dist)) {
	MachineBasicBlock *New_B = MF->CreateMachineBasicBlock(TermBB);

	// Insert New_B0 into function block list.
	MF->insert(++MBB->getIterator(), New_B);

	// replace B2 cond jump
	if (CondJmp->getOperand(1).isReg())
	BuildMI(MBB, MachineBasicBlock::iterator(CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode()))
	.addReg(CondJmp->getOperand(0).getReg())
	.addReg(CondJmp->getOperand(1).getReg())
	.addMBB(New_B);
	else
	BuildMI(MBB, MachineBasicBlock::iterator(CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode()))
	.addReg(CondJmp->getOperand(0).getReg())
	.addImm(CondJmp->getOperand(1).getImm())
	.addMBB(New_B);

	if (CondTargetBB != JmpBB)
	MBB->removeSuccessor(CondTargetBB);
	MBB->addSuccessor(New_B);

	// Populate insn in New_B.
	BuildMI(New_B, CondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(CondTargetBB);

	New_B->addSuccessor(CondTargetBB);
	CondJmp->eraseFromParent();
	Changed = true;
	}

	if (!in16BitRange(SoFarNumInsns[JmpBB] - CurrNumInsns)) {
	BuildMI(MBB, UncondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(JmpBB);
	UncondJmp->eraseFromParent();
	Changed = true;
	}
	}

	return Changed;
	}

	} // end default namespace

	INITIALIZE_PASS(BPFMIPreEmitPeephole, "bpf-mi-pemit-peephole",
	"BPF PreEmit Peephole Optimization", false, false)

	char BPFMIPreEmitPeephole::ID = 0;
	FunctionPass* llvm::createBPFMIPreEmitPeepholePass()
	{
	return new BPFMIPreEmitPeephole();
	}