blob: e218e22459b77672a361edf305ef7a46b428814e [file] [log] [blame]
//===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
// both before and after the DAG is legalized.
//
// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
// primarily intended to handle simplification opportunities that are implicit
// in the LLVM IR and exposed by the various codegen lowering phases.
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
using namespace llvm;
#define DEBUG_TYPE "dagcombine"
STATISTIC(NodesCombined , "Number of dag nodes combined");
STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
STATISTIC(SlicedLoads, "Number of load sliced");
namespace {
static cl::opt<bool>
CombinerAA("combiner-alias-analysis", cl::Hidden,
cl::desc("Enable DAG combiner alias-analysis heuristics"));
static cl::opt<bool>
CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
cl::desc("Enable DAG combiner's use of IR alias analysis"));
static cl::opt<bool>
UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
cl::desc("Enable DAG combiner's use of TBAA"));
#ifndef NDEBUG
static cl::opt<std::string>
CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
cl::desc("Only use DAG-combiner alias analysis in this"
" function"));
#endif
/// Hidden option to stress test load slicing, i.e., when this option
/// is enabled, load slicing bypasses most of its profitability guards.
static cl::opt<bool>
StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
cl::desc("Bypass the profitability model of load "
"slicing"),
cl::init(false));
static cl::opt<bool>
MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
cl::desc("DAG combiner may split indexing from loads"));
//------------------------------ DAGCombiner ---------------------------------//
class DAGCombiner {
SelectionDAG &DAG;
const TargetLowering &TLI;
CombineLevel Level;
CodeGenOpt::Level OptLevel;
bool LegalOperations;
bool LegalTypes;
bool ForCodeSize;
/// \brief Worklist of all of the nodes that need to be simplified.
///
/// This must behave as a stack -- new nodes to process are pushed onto the
/// back and when processing we pop off of the back.
///
/// The worklist will not contain duplicates but may contain null entries
/// due to nodes being deleted from the underlying DAG.
SmallVector<SDNode *, 64> Worklist;
/// \brief Mapping from an SDNode to its position on the worklist.
///
/// This is used to find and remove nodes from the worklist (by nulling
/// them) when they are deleted from the underlying DAG. It relies on
/// stable indices of nodes within the worklist.
DenseMap<SDNode *, unsigned> WorklistMap;
/// \brief Set of nodes which have been combined (at least once).
///
/// This is used to allow us to reliably add any operands of a DAG node
/// which have not yet been combined to the worklist.
SmallPtrSet<SDNode *, 64> CombinedNodes;
// AA - Used for DAG load/store alias analysis.
AliasAnalysis &AA;
/// When an instruction is simplified, add all users of the instruction to
/// the work lists because they might get more simplified now.
void AddUsersToWorklist(SDNode *N) {
for (SDNode *Node : N->uses())
AddToWorklist(Node);
}
/// Call the node-specific routine that folds each particular type of node.
SDValue visit(SDNode *N);
public:
/// Add to the worklist making sure its instance is at the back (next to be
/// processed.)
void AddToWorklist(SDNode *N) {
// Skip handle nodes as they can't usefully be combined and confuse the
// zero-use deletion strategy.
if (N->getOpcode() == ISD::HANDLENODE)
return;
if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
Worklist.push_back(N);
}
/// Remove all instances of N from the worklist.
void removeFromWorklist(SDNode *N) {
CombinedNodes.erase(N);
auto It = WorklistMap.find(N);
if (It == WorklistMap.end())
return; // Not in the worklist.
// Null out the entry rather than erasing it to avoid a linear operation.
Worklist[It->second] = nullptr;
WorklistMap.erase(It);
}
void deleteAndRecombine(SDNode *N);
bool recursivelyDeleteUnusedNodes(SDNode *N);
SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
bool AddTo = true);
SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
return CombineTo(N, &Res, 1, AddTo);
}
SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
bool AddTo = true) {
SDValue To[] = { Res0, Res1 };
return CombineTo(N, To, 2, AddTo);
}
void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
private:
/// Check the specified integer node value to see if it can be simplified or
/// if things it uses can be simplified by bit propagation.
/// If so, return true.
bool SimplifyDemandedBits(SDValue Op) {
unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
APInt Demanded = APInt::getAllOnesValue(BitWidth);
return SimplifyDemandedBits(Op, Demanded);
}
bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
bool CombineToPreIndexedLoadStore(SDNode *N);
bool CombineToPostIndexedLoadStore(SDNode *N);
SDValue SplitIndexingFromLoad(LoadSDNode *LD);
bool SliceUpLoad(SDNode *N);
/// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
/// load.
///
/// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
/// \param InVecVT type of the input vector to EVE with bitcasts resolved.
/// \param EltNo index of the vector element to load.
/// \param OriginalLoad load that EVE came from to be replaced.
/// \returns EVE on success SDValue() on failure.
SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
SDValue PromoteIntBinOp(SDValue Op);
SDValue PromoteIntShiftOp(SDValue Op);
SDValue PromoteExtend(SDValue Op);
bool PromoteLoad(SDValue Op);
void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
SDValue Trunc, SDValue ExtLoad, SDLoc DL,
ISD::NodeType ExtType);
/// Call the node-specific routine that knows how to fold each
/// particular type of node. If that doesn't do anything, try the
/// target-specific DAG combines.
SDValue combine(SDNode *N);
// Visitation implementation - Implement dag node combining for different
// node types. The semantics are as follows:
// Return Value:
// SDValue.getNode() == 0 - No change was made
// SDValue.getNode() == N - N was replaced, is dead and has been handled.
// otherwise - N should be replaced by the returned Operand.
//
SDValue visitTokenFactor(SDNode *N);
SDValue visitMERGE_VALUES(SDNode *N);
SDValue visitADD(SDNode *N);
SDValue visitSUB(SDNode *N);
SDValue visitADDC(SDNode *N);
SDValue visitSUBC(SDNode *N);
SDValue visitADDE(SDNode *N);
SDValue visitSUBE(SDNode *N);
SDValue visitMUL(SDNode *N);
SDValue visitSDIV(SDNode *N);
SDValue visitUDIV(SDNode *N);
SDValue visitSREM(SDNode *N);
SDValue visitUREM(SDNode *N);
SDValue visitMULHU(SDNode *N);
SDValue visitMULHS(SDNode *N);
SDValue visitSMUL_LOHI(SDNode *N);
SDValue visitUMUL_LOHI(SDNode *N);
SDValue visitSMULO(SDNode *N);
SDValue visitUMULO(SDNode *N);
SDValue visitSDIVREM(SDNode *N);
SDValue visitUDIVREM(SDNode *N);
SDValue visitAND(SDNode *N);
SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
SDValue visitOR(SDNode *N);
SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference);
SDValue visitXOR(SDNode *N);
SDValue SimplifyVBinOp(SDNode *N);
SDValue visitSHL(SDNode *N);
SDValue visitSRA(SDNode *N);
SDValue visitSRL(SDNode *N);
SDValue visitRotate(SDNode *N);
SDValue visitCTLZ(SDNode *N);
SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
SDValue visitCTTZ(SDNode *N);
SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
SDValue visitCTPOP(SDNode *N);
SDValue visitSELECT(SDNode *N);
SDValue visitVSELECT(SDNode *N);
SDValue visitSELECT_CC(SDNode *N);
SDValue visitSETCC(SDNode *N);
SDValue visitSIGN_EXTEND(SDNode *N);
SDValue visitZERO_EXTEND(SDNode *N);
SDValue visitANY_EXTEND(SDNode *N);
SDValue visitSIGN_EXTEND_INREG(SDNode *N);
SDValue visitTRUNCATE(SDNode *N);
SDValue visitBITCAST(SDNode *N);
SDValue visitBUILD_PAIR(SDNode *N);
SDValue visitFADD(SDNode *N);
SDValue visitFSUB(SDNode *N);
SDValue visitFMUL(SDNode *N);
SDValue visitFMA(SDNode *N);
SDValue visitFDIV(SDNode *N);
SDValue visitFREM(SDNode *N);
SDValue visitFSQRT(SDNode *N);
SDValue visitFCOPYSIGN(SDNode *N);
SDValue visitSINT_TO_FP(SDNode *N);
SDValue visitUINT_TO_FP(SDNode *N);
SDValue visitFP_TO_SINT(SDNode *N);
SDValue visitFP_TO_UINT(SDNode *N);
SDValue visitFP_ROUND(SDNode *N);
SDValue visitFP_ROUND_INREG(SDNode *N);
SDValue visitFP_EXTEND(SDNode *N);
SDValue visitFNEG(SDNode *N);
SDValue visitFABS(SDNode *N);
SDValue visitFCEIL(SDNode *N);
SDValue visitFTRUNC(SDNode *N);
SDValue visitFFLOOR(SDNode *N);
SDValue visitFMINNUM(SDNode *N);
SDValue visitFMAXNUM(SDNode *N);
SDValue visitBRCOND(SDNode *N);
SDValue visitBR_CC(SDNode *N);
SDValue visitLOAD(SDNode *N);
SDValue visitSTORE(SDNode *N);
SDValue visitINSERT_VECTOR_ELT(SDNode *N);
SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
SDValue visitBUILD_VECTOR(SDNode *N);
SDValue visitCONCAT_VECTORS(SDNode *N);
SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
SDValue visitVECTOR_SHUFFLE(SDNode *N);
SDValue visitSCALAR_TO_VECTOR(SDNode *N);
SDValue visitINSERT_SUBVECTOR(SDNode *N);
SDValue visitMLOAD(SDNode *N);
SDValue visitMSTORE(SDNode *N);
SDValue visitFP_TO_FP16(SDNode *N);
SDValue XformToShuffleWithZero(SDNode *N);
SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS);
SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
SDValue SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2);
SDValue SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue N2,
SDValue N3, ISD::CondCode CC,
bool NotExtCompare = false);
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
SDLoc DL, bool foldBooleans = true);
bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
SDValue &CC) const;
bool isOneUseSetCC(SDValue N) const;
SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
unsigned HiOp);
SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
SDValue CombineExtLoad(SDNode *N);
SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
SDValue BuildSDIV(SDNode *N);
SDValue BuildSDIVPow2(SDNode *N);
SDValue BuildUDIV(SDNode *N);
SDValue BuildReciprocalEstimate(SDValue Op);
SDValue BuildRsqrtEstimate(SDValue Op);
SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations);
SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations);
SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
bool DemandHighBits = true);
SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
SDValue InnerPos, SDValue InnerNeg,
unsigned PosOpcode, unsigned NegOpcode,
SDLoc DL);
SDNode *MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL);
SDValue ReduceLoadWidth(SDNode *N);
SDValue ReduceLoadOpStoreWidth(SDNode *N);
SDValue TransformFPLoadStorePair(SDNode *N);
SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
SDValue GetDemandedBits(SDValue V, const APInt &Mask);
/// Walk up chain skipping non-aliasing memory nodes,
/// looking for aliasing nodes and adding them to the Aliases vector.
void GatherAllAliases(SDNode *N, SDValue OriginalChain,
SmallVectorImpl<SDValue> &Aliases);
/// Return true if there is any possibility that the two addresses overlap.
bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
/// Walk up chain skipping non-aliasing memory nodes, looking for a better
/// chain (aliasing node.)
SDValue FindBetterChain(SDNode *N, SDValue Chain);
/// Holds a pointer to an LSBaseSDNode as well as information on where it
/// is located in a sequence of memory operations connected by a chain.
struct MemOpLink {
MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
// Ptr to the mem node.
LSBaseSDNode *MemNode;
// Offset from the base ptr.
int64_t OffsetFromBase;
// What is the sequence number of this mem node.
// Lowest mem operand in the DAG starts at zero.
unsigned SequenceNum;
};
/// This is a helper function for MergeConsecutiveStores. When the source
/// elements of the consecutive stores are all constants or all extracted
/// vector elements, try to merge them into one larger store.
/// \return True if a merged store was created.
bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
EVT MemVT, unsigned NumElem,
bool IsConstantSrc, bool UseVector);
/// Merge consecutive store operations into a wide store.
/// This optimization uses wide integers or vectors when possible.
/// \return True if some memory operations were changed.
bool MergeConsecutiveStores(StoreSDNode *N);
/// \brief Try to transform a truncation where C is a constant:
/// (trunc (and X, C)) -> (and (trunc X), (trunc C))
///
/// \p N needs to be a truncation and its first operand an AND. Other
/// requirements are checked by the function (e.g. that trunc is
/// single-use) and if missed an empty SDValue is returned.
SDValue distributeTruncateThroughAnd(SDNode *N);
public:
DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
: DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {
auto *F = DAG.getMachineFunction().getFunction();
ForCodeSize = F->hasFnAttribute(Attribute::OptimizeForSize) ||
F->hasFnAttribute(Attribute::MinSize);
}
/// Runs the dag combiner on all nodes in the work list
void Run(CombineLevel AtLevel);
SelectionDAG &getDAG() const { return DAG; }
/// Returns a type large enough to hold any valid shift amount - before type
/// legalization these can be huge.
EVT getShiftAmountTy(EVT LHSTy) {
assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
if (LHSTy.isVector())
return LHSTy;
return LegalTypes ? TLI.getScalarShiftAmountTy(LHSTy)
: TLI.getPointerTy();
}
/// This method returns true if we are running before type legalization or
/// if the specified VT is legal.
bool isTypeLegal(const EVT &VT) {
if (!LegalTypes) return true;
return TLI.isTypeLegal(VT);
}
/// Convenience wrapper around TargetLowering::getSetCCResultType
EVT getSetCCResultType(EVT VT) const {
return TLI.getSetCCResultType(*DAG.getContext(), VT);
}
};
}
namespace {
/// This class is a DAGUpdateListener that removes any deleted
/// nodes from the worklist.
class WorklistRemover : public SelectionDAG::DAGUpdateListener {
DAGCombiner &DC;
public:
explicit WorklistRemover(DAGCombiner &dc)
: SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
void NodeDeleted(SDNode *N, SDNode *E) override {
DC.removeFromWorklist(N);
}
};
}
//===----------------------------------------------------------------------===//
// TargetLowering::DAGCombinerInfo implementation
//===----------------------------------------------------------------------===//
void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
((DAGCombiner*)DC)->AddToWorklist(N);
}
void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) {
((DAGCombiner*)DC)->removeFromWorklist(N);
}
SDValue TargetLowering::DAGCombinerInfo::
CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
}
SDValue TargetLowering::DAGCombinerInfo::
CombineTo(SDNode *N, SDValue Res, bool AddTo) {
return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
}
SDValue TargetLowering::DAGCombinerInfo::
CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
}
void TargetLowering::DAGCombinerInfo::
CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
}
//===----------------------------------------------------------------------===//
// Helper Functions
//===----------------------------------------------------------------------===//
void DAGCombiner::deleteAndRecombine(SDNode *N) {
removeFromWorklist(N);
// If the operands of this node are only used by the node, they will now be
// dead. Make sure to re-visit them and recursively delete dead nodes.
for (const SDValue &Op : N->ops())
// For an operand generating multiple values, one of the values may
// become dead allowing further simplification (e.g. split index
// arithmetic from an indexed load).
if (Op->hasOneUse() || Op->getNumValues() > 1)
AddToWorklist(Op.getNode());
DAG.DeleteNode(N);
}
/// Return 1 if we can compute the negated form of the specified expression for
/// the same cost as the expression itself, or 2 if we can compute the negated
/// form more cheaply than the expression itself.
static char isNegatibleForFree(SDValue Op, bool LegalOperations,
const TargetLowering &TLI,
const TargetOptions *Options,
unsigned Depth = 0) {
// fneg is removable even if it has multiple uses.
if (Op.getOpcode() == ISD::FNEG) return 2;
// Don't allow anything with multiple uses.
if (!Op.hasOneUse()) return 0;
// Don't recurse exponentially.
if (Depth > 6) return 0;
switch (Op.getOpcode()) {
default: return false;
case ISD::ConstantFP:
// Don't invert constant FP values after legalize. The negated constant
// isn't necessarily legal.
return LegalOperations ? 0 : 1;
case ISD::FADD:
// FIXME: determine better conditions for this xform.
if (!Options->UnsafeFPMath) return 0;
// After operation legalization, it might not be legal to create new FSUBs.
if (LegalOperations &&
!TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType()))
return 0;
// fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
Options, Depth + 1))
return V;
// fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
Depth + 1);
case ISD::FSUB:
// We can't turn -(A-B) into B-A when we honor signed zeros.
if (!Options->UnsafeFPMath) return 0;
// fold (fneg (fsub A, B)) -> (fsub B, A)
return 1;
case ISD::FMUL:
case ISD::FDIV:
if (Options->HonorSignDependentRoundingFPMath()) return 0;
// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
Options, Depth + 1))
return V;
return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
Depth + 1);
case ISD::FP_EXTEND:
case ISD::FP_ROUND:
case ISD::FSIN:
return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
Depth + 1);
}
}
/// If isNegatibleForFree returns true, return the newly negated expression.
static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
bool LegalOperations, unsigned Depth = 0) {
const TargetOptions &Options = DAG.getTarget().Options;
// fneg is removable even if it has multiple uses.
if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
// Don't allow anything with multiple uses.
assert(Op.hasOneUse() && "Unknown reuse!");
assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
switch (Op.getOpcode()) {
default: llvm_unreachable("Unknown code");
case ISD::ConstantFP: {
APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
V.changeSign();
return DAG.getConstantFP(V, Op.getValueType());
}
case ISD::FADD:
// FIXME: determine better conditions for this xform.
assert(Options.UnsafeFPMath);
// fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
DAG.getTargetLoweringInfo(), &Options, Depth+1))
return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, Depth+1),
Op.getOperand(1));
// fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(1), DAG,
LegalOperations, Depth+1),
Op.getOperand(0));
case ISD::FSUB:
// We can't turn -(A-B) into B-A when we honor signed zeros.
assert(Options.UnsafeFPMath);
// fold (fneg (fsub 0, B)) -> B
if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
if (N0CFP->getValueAPF().isZero())
return Op.getOperand(1);
// fold (fneg (fsub A, B)) -> (fsub B, A)
return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
Op.getOperand(1), Op.getOperand(0));
case ISD::FMUL:
case ISD::FDIV:
assert(!Options.HonorSignDependentRoundingFPMath());
// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
DAG.getTargetLoweringInfo(), &Options, Depth+1))
return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, Depth+1),
Op.getOperand(1));
// fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
Op.getOperand(0),
GetNegatedExpression(Op.getOperand(1), DAG,
LegalOperations, Depth+1));
case ISD::FP_EXTEND:
case ISD::FSIN:
return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, Depth+1));
case ISD::FP_ROUND:
return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, Depth+1),
Op.getOperand(1));
}
}
// Return true if this node is a setcc, or is a select_cc
// that selects between the target values used for true and false, making it
// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
// the appropriate nodes based on the type of node we are checking. This
// simplifies life a bit for the callers.
bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
SDValue &CC) const {
if (N.getOpcode() == ISD::SETCC) {
LHS = N.getOperand(0);
RHS = N.getOperand(1);
CC = N.getOperand(2);
return true;
}
if (N.getOpcode() != ISD::SELECT_CC ||
!TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
!TLI.isConstFalseVal(N.getOperand(3).getNode()))
return false;
if (TLI.getBooleanContents(N.getValueType()) ==
TargetLowering::UndefinedBooleanContent)
return false;
LHS = N.getOperand(0);
RHS = N.getOperand(1);
CC = N.getOperand(4);
return true;
}
/// Return true if this is a SetCC-equivalent operation with only one use.
/// If this is true, it allows the users to invert the operation for free when
/// it is profitable to do so.
bool DAGCombiner::isOneUseSetCC(SDValue N) const {
SDValue N0, N1, N2;
if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
return true;
return false;
}
/// Returns true if N is a BUILD_VECTOR node whose
/// elements are all the same constant or undefined.
static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) {
BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N);
if (!C)
return false;
APInt SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
EVT EltVT = N->getValueType(0).getVectorElementType();
return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
HasAnyUndefs) &&
EltVT.getSizeInBits() >= SplatBitSize);
}
// \brief Returns the SDNode if it is a constant integer BuildVector
// or constant integer.
static SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) {
if (isa<ConstantSDNode>(N))
return N.getNode();
if (ISD::isBuildVectorOfConstantSDNodes(N.getNode()))
return N.getNode();
return nullptr;
}
// \brief Returns the SDNode if it is a constant float BuildVector
// or constant float.
static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
if (isa<ConstantFPSDNode>(N))
return N.getNode();
if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
return N.getNode();
return nullptr;
}
// \brief Returns the SDNode if it is a constant splat BuildVector or constant
// int.
static ConstantSDNode *isConstOrConstSplat(SDValue N) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
return CN;
if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
BitVector UndefElements;
ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
// BuildVectors can truncate their operands. Ignore that case here.
// FIXME: We blindly ignore splats which include undef which is overly
// pessimistic.
if (CN && UndefElements.none() &&
CN->getValueType(0) == N.getValueType().getScalarType())
return CN;
}
return nullptr;
}
// \brief Returns the SDNode if it is a constant splat BuildVector or constant
// float.
static ConstantFPSDNode *isConstOrConstSplatFP(SDValue N) {
if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
return CN;
if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
BitVector UndefElements;
ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements);
if (CN && UndefElements.none())
return CN;
}
return nullptr;
}
SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
SDValue N0, SDValue N1) {
EVT VT = N0.getValueType();
if (N0.getOpcode() == Opc) {
if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1)) {
// reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R))
return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
return SDValue();
}
if (N0.hasOneUse()) {
// reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
// use
SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
if (!OpNode.getNode())
return SDValue();
AddToWorklist(OpNode.getNode());
return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
}
}
}
if (N1.getOpcode() == Opc) {
if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0)) {
// reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, R, L))
return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
return SDValue();
}
if (N1.hasOneUse()) {
// reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one
// use
SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N1.getOperand(0), N0);
if (!OpNode.getNode())
return SDValue();
AddToWorklist(OpNode.getNode());
return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
}
}
}
return SDValue();
}
SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
bool AddTo) {
assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
++NodesCombined;
DEBUG(dbgs() << "\nReplacing.1 ";
N->dump(&DAG);
dbgs() << "\nWith: ";
To[0].getNode()->dump(&DAG);
dbgs() << " and " << NumTo-1 << " other values\n");
for (unsigned i = 0, e = NumTo; i != e; ++i)
assert((!To[i].getNode() ||
N->getValueType(i) == To[i].getValueType()) &&
"Cannot combine value to value of different type!");
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesWith(N, To);
if (AddTo) {
// Push the new nodes and any users onto the worklist
for (unsigned i = 0, e = NumTo; i != e; ++i) {
if (To[i].getNode()) {
AddToWorklist(To[i].getNode());
AddUsersToWorklist(To[i].getNode());
}
}
}
// Finally, if the node is now dead, remove it from the graph. The node
// may not be dead if the replacement process recursively simplified to
// something else needing this node.
if (N->use_empty())
deleteAndRecombine(N);
return SDValue(N, 0);
}
void DAGCombiner::
CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
// Replace all uses. If any nodes become isomorphic to other nodes and
// are deleted, make sure to remove them from our worklist.
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
// Push the new node and any (possibly new) users onto the worklist.
AddToWorklist(TLO.New.getNode());
AddUsersToWorklist(TLO.New.getNode());
// Finally, if the node is now dead, remove it from the graph. The node
// may not be dead if the replacement process recursively simplified to
// something else needing this node.
if (TLO.Old.getNode()->use_empty())
deleteAndRecombine(TLO.Old.getNode());
}
/// Check the specified integer node value to see if it can be simplified or if
/// things it uses can be simplified by bit propagation. If so, return true.
bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
APInt KnownZero, KnownOne;
if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
return false;
// Revisit the node.
AddToWorklist(Op.getNode());
// Replace the old value with the new one.
++NodesCombined;
DEBUG(dbgs() << "\nReplacing.2 ";
TLO.Old.getNode()->dump(&DAG);
dbgs() << "\nWith: ";
TLO.New.getNode()->dump(&DAG);
dbgs() << '\n');
CommitTargetLoweringOpt(TLO);
return true;
}
void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
SDLoc dl(Load);
EVT VT = Load->getValueType(0);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0));
DEBUG(dbgs() << "\nReplacing.9 ";
Load->dump(&DAG);
dbgs() << "\nWith: ";
Trunc.getNode()->dump(&DAG);
dbgs() << '\n');
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
deleteAndRecombine(Load);
AddToWorklist(Trunc.getNode());
}
SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
Replace = false;
SDLoc dl(Op);
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
EVT MemVT = LD->getMemoryVT();
ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
: ISD::EXTLOAD)
: LD->getExtensionType();
Replace = true;
return DAG.getExtLoad(ExtType, dl, PVT,
LD->getChain(), LD->getBasePtr(),
MemVT, LD->getMemOperand());
}
unsigned Opc = Op.getOpcode();
switch (Opc) {
default: break;
case ISD::AssertSext:
return DAG.getNode(ISD::AssertSext, dl, PVT,
SExtPromoteOperand(Op.getOperand(0), PVT),
Op.getOperand(1));
case ISD::AssertZext:
return DAG.getNode(ISD::AssertZext, dl, PVT,
ZExtPromoteOperand(Op.getOperand(0), PVT),
Op.getOperand(1));
case ISD::Constant: {
unsigned ExtOpc =
Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
return DAG.getNode(ExtOpc, dl, PVT, Op);
}
}
if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
return SDValue();
return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op);
}
SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
return SDValue();
EVT OldVT = Op.getValueType();
SDLoc dl(Op);
bool Replace = false;
SDValue NewOp = PromoteOperand(Op, PVT, Replace);
if (!NewOp.getNode())
return SDValue();
AddToWorklist(NewOp.getNode());
if (Replace)
ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp,
DAG.getValueType(OldVT));
}
SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
EVT OldVT = Op.getValueType();
SDLoc dl(Op);
bool Replace = false;
SDValue NewOp = PromoteOperand(Op, PVT, Replace);
if (!NewOp.getNode())
return SDValue();
AddToWorklist(NewOp.getNode());
if (Replace)
ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
return DAG.getZeroExtendInReg(NewOp, dl, OldVT);
}
/// Promote the specified integer binary operation if the target indicates it is
/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
/// i32 since i16 instructions are longer.
SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
if (!LegalOperations)
return SDValue();
EVT VT = Op.getValueType();
if (VT.isVector() || !VT.isInteger())
return SDValue();
// If operation type is 'undesirable', e.g. i16 on x86, consider
// promoting it.
unsigned Opc = Op.getOpcode();
if (TLI.isTypeDesirableForOp(Opc, VT))
return SDValue();
EVT PVT = VT;
// Consult target whether it is a good idea to promote this operation and
// what's the right type to promote it to.
if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
assert(PVT != VT && "Don't know what type to promote to!");
bool Replace0 = false;
SDValue N0 = Op.getOperand(0);
SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
if (!NN0.getNode())
return SDValue();
bool Replace1 = false;
SDValue N1 = Op.getOperand(1);
SDValue NN1;
if (N0 == N1)
NN1 = NN0;
else {
NN1 = PromoteOperand(N1, PVT, Replace1);
if (!NN1.getNode())
return SDValue();
}
AddToWorklist(NN0.getNode());
if (NN1.getNode())
AddToWorklist(NN1.getNode());
if (Replace0)
ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
if (Replace1)
ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
DEBUG(dbgs() << "\nPromoting ";
Op.getNode()->dump(&DAG));
SDLoc dl(Op);
return DAG.getNode(ISD::TRUNCATE, dl, VT,
DAG.getNode(Opc, dl, PVT, NN0, NN1));
}
return SDValue();
}
/// Promote the specified integer shift operation if the target indicates it is
/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
/// i32 since i16 instructions are longer.
SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
if (!LegalOperations)
return SDValue();
EVT VT = Op.getValueType();
if (VT.isVector() || !VT.isInteger())
return SDValue();
// If operation type is 'undesirable', e.g. i16 on x86, consider
// promoting it.
unsigned Opc = Op.getOpcode();
if (TLI.isTypeDesirableForOp(Opc, VT))
return SDValue();
EVT PVT = VT;
// Consult target whether it is a good idea to promote this operation and
// what's the right type to promote it to.
if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
assert(PVT != VT && "Don't know what type to promote to!");
bool Replace = false;
SDValue N0 = Op.getOperand(0);
if (Opc == ISD::SRA)
N0 = SExtPromoteOperand(Op.getOperand(0), PVT);
else if (Opc == ISD::SRL)
N0 = ZExtPromoteOperand(Op.getOperand(0), PVT);
else
N0 = PromoteOperand(N0, PVT, Replace);
if (!N0.getNode())
return SDValue();
AddToWorklist(N0.getNode());
if (Replace)
ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
DEBUG(dbgs() << "\nPromoting ";
Op.getNode()->dump(&DAG));
SDLoc dl(Op);
return DAG.getNode(ISD::TRUNCATE, dl, VT,
DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1)));
}
return SDValue();
}
SDValue DAGCombiner::PromoteExtend(SDValue Op) {
if (!LegalOperations)
return SDValue();
EVT VT = Op.getValueType();
if (VT.isVector() || !VT.isInteger())
return SDValue();
// If operation type is 'undesirable', e.g. i16 on x86, consider
// promoting it.
unsigned Opc = Op.getOpcode();
if (TLI.isTypeDesirableForOp(Opc, VT))
return SDValue();
EVT PVT = VT;
// Consult target whether it is a good idea to promote this operation and
// what's the right type to promote it to.
if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
assert(PVT != VT && "Don't know what type to promote to!");
// fold (aext (aext x)) -> (aext x)
// fold (aext (zext x)) -> (zext x)
// fold (aext (sext x)) -> (sext x)
DEBUG(dbgs() << "\nPromoting ";
Op.getNode()->dump(&DAG));
return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
}
return SDValue();
}
bool DAGCombiner::PromoteLoad(SDValue Op) {
if (!LegalOperations)
return false;
EVT VT = Op.getValueType();
if (VT.isVector() || !VT.isInteger())
return false;
// If operation type is 'undesirable', e.g. i16 on x86, consider
// promoting it.
unsigned Opc = Op.getOpcode();
if (TLI.isTypeDesirableForOp(Opc, VT))
return false;
EVT PVT = VT;
// Consult target whether it is a good idea to promote this operation and
// what's the right type to promote it to.
if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
assert(PVT != VT && "Don't know what type to promote to!");
SDLoc dl(Op);
SDNode *N = Op.getNode();
LoadSDNode *LD = cast<LoadSDNode>(N);
EVT MemVT = LD->getMemoryVT();
ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
: ISD::EXTLOAD)
: LD->getExtensionType();
SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
LD->getChain(), LD->getBasePtr(),
MemVT, LD->getMemOperand());
SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD);
DEBUG(dbgs() << "\nPromoting ";
N->dump(&DAG);
dbgs() << "\nTo: ";
Result.getNode()->dump(&DAG);
dbgs() << '\n');
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
deleteAndRecombine(N);
AddToWorklist(Result.getNode());
return true;
}
return false;
}
/// \brief Recursively delete a node which has no uses and any operands for
/// which it is the only use.
///
/// Note that this both deletes the nodes and removes them from the worklist.
/// It also adds any nodes who have had a user deleted to the worklist as they
/// may now have only one use and subject to other combines.
bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
if (!N->use_empty())
return false;
SmallSetVector<SDNode *, 16> Nodes;
Nodes.insert(N);
do {
N = Nodes.pop_back_val();
if (!N)
continue;
if (N->use_empty()) {
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
Nodes.insert(N->getOperand(i).getNode());
removeFromWorklist(N);
DAG.DeleteNode(N);
} else {
AddToWorklist(N);
}
} while (!Nodes.empty());
return true;
}
//===----------------------------------------------------------------------===//
// Main DAG Combiner implementation
//===----------------------------------------------------------------------===//
void DAGCombiner::Run(CombineLevel AtLevel) {
// set the instance variables, so that the various visit routines may use it.
Level = AtLevel;
LegalOperations = Level >= AfterLegalizeVectorOps;
LegalTypes = Level >= AfterLegalizeTypes;
// Add all the dag nodes to the worklist.
for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
E = DAG.allnodes_end(); I != E; ++I)
AddToWorklist(I);
// Create a dummy node (which is not added to allnodes), that adds a reference
// to the root node, preventing it from being deleted, and tracking any
// changes of the root.
HandleSDNode Dummy(DAG.getRoot());
// while the worklist isn't empty, find a node and
// try and combine it.
while (!WorklistMap.empty()) {
SDNode *N;
// The Worklist holds the SDNodes in order, but it may contain null entries.
do {
N = Worklist.pop_back_val();
} while (!N);
bool GoodWorklistEntry = WorklistMap.erase(N);
(void)GoodWorklistEntry;
assert(GoodWorklistEntry &&
"Found a worklist entry without a corresponding map entry!");
// If N has no uses, it is dead. Make sure to revisit all N's operands once
// N is deleted from the DAG, since they too may now be dead or may have a
// reduced number of uses, allowing other xforms.
if (recursivelyDeleteUnusedNodes(N))
continue;
WorklistRemover DeadNodes(*this);
// If this combine is running after legalizing the DAG, re-legalize any
// nodes pulled off the worklist.
if (Level == AfterLegalizeDAG) {
SmallSetVector<SDNode *, 16> UpdatedNodes;
bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
for (SDNode *LN : UpdatedNodes) {
AddToWorklist(LN);
AddUsersToWorklist(LN);
}
if (!NIsValid)
continue;
}
DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
// Add any operands of the new node which have not yet been combined to the
// worklist as well. Because the worklist uniques things already, this
// won't repeatedly process the same operand.
CombinedNodes.insert(N);
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
if (!CombinedNodes.count(N->getOperand(i).getNode()))
AddToWorklist(N->getOperand(i).getNode());
SDValue RV = combine(N);
if (!RV.getNode())
continue;
++NodesCombined;
// If we get back the same node we passed in, rather than a new node or
// zero, we know that the node must have defined multiple values and
// CombineTo was used. Since CombineTo takes care of the worklist
// mechanics for us, we have no work to do in this case.
if (RV.getNode() == N)
continue;
assert(N->getOpcode() != ISD::DELETED_NODE &&
RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
"Node was deleted but visit returned new node!");
DEBUG(dbgs() << " ... into: ";
RV.getNode()->dump(&DAG));
// Transfer debug value.
DAG.TransferDbgValues(SDValue(N, 0), RV);
if (N->getNumValues() == RV.getNode()->getNumValues())
DAG.ReplaceAllUsesWith(N, RV.getNode());
else {
assert(N->getValueType(0) == RV.getValueType() &&
N->getNumValues() == 1 && "Type mismatch");
SDValue OpV = RV;
DAG.ReplaceAllUsesWith(N, &OpV);
}
// Push the new node and any users onto the worklist
AddToWorklist(RV.getNode());
AddUsersToWorklist(RV.getNode());
// Finally, if the node is now dead, remove it from the graph. The node
// may not be dead if the replacement process recursively simplified to
// something else needing this node. This will also take care of adding any
// operands which have lost a user to the worklist.
recursivelyDeleteUnusedNodes(N);
}
// If the root changed (e.g. it was a dead load, update the root).
DAG.setRoot(Dummy.getValue());
DAG.RemoveDeadNodes();
}
SDValue DAGCombiner::visit(SDNode *N) {
switch (N->getOpcode()) {
default: break;
case ISD::TokenFactor: return visitTokenFactor(N);
case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
case ISD::ADD: return visitADD(N);
case ISD::SUB: return visitSUB(N);
case ISD::ADDC: return visitADDC(N);
case ISD::SUBC: return visitSUBC(N);
case ISD::ADDE: return visitADDE(N);
case ISD::SUBE: return visitSUBE(N);
case ISD::MUL: return visitMUL(N);
case ISD::SDIV: return visitSDIV(N);
case ISD::UDIV: return visitUDIV(N);
case ISD::SREM: return visitSREM(N);
case ISD::UREM: return visitUREM(N);
case ISD::MULHU: return visitMULHU(N);
case ISD::MULHS: return visitMULHS(N);
case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
case ISD::SMULO: return visitSMULO(N);
case ISD::UMULO: return visitUMULO(N);
case ISD::SDIVREM: return visitSDIVREM(N);
case ISD::UDIVREM: return visitUDIVREM(N);
case ISD::AND: return visitAND(N);
case ISD::OR: return visitOR(N);
case ISD::XOR: return visitXOR(N);
case ISD::SHL: return visitSHL(N);
case ISD::SRA: return visitSRA(N);
case ISD::SRL: return visitSRL(N);
case ISD::ROTR:
case ISD::ROTL: return visitRotate(N);
case ISD::CTLZ: return visitCTLZ(N);
case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
case ISD::CTTZ: return visitCTTZ(N);
case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
case ISD::CTPOP: return visitCTPOP(N);
case ISD::SELECT: return visitSELECT(N);
case ISD::VSELECT: return visitVSELECT(N);
case ISD::SELECT_CC: return visitSELECT_CC(N);
case ISD::SETCC: return visitSETCC(N);
case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
case ISD::TRUNCATE: return visitTRUNCATE(N);
case ISD::BITCAST: return visitBITCAST(N);
case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
case ISD::FADD: return visitFADD(N);
case ISD::FSUB: return visitFSUB(N);
case ISD::FMUL: return visitFMUL(N);
case ISD::FMA: return visitFMA(N);
case ISD::FDIV: return visitFDIV(N);
case ISD::FREM: return visitFREM(N);
case ISD::FSQRT: return visitFSQRT(N);
case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
case ISD::FP_ROUND: return visitFP_ROUND(N);
case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
case ISD::FP_EXTEND: return visitFP_EXTEND(N);
case ISD::FNEG: return visitFNEG(N);
case ISD::FABS: return visitFABS(N);
case ISD::FFLOOR: return visitFFLOOR(N);
case ISD::FMINNUM: return visitFMINNUM(N);
case ISD::FMAXNUM: return visitFMAXNUM(N);
case ISD::FCEIL: return visitFCEIL(N);
case ISD::FTRUNC: return visitFTRUNC(N);
case ISD::BRCOND: return visitBRCOND(N);
case ISD::BR_CC: return visitBR_CC(N);
case ISD::LOAD: return visitLOAD(N);
case ISD::STORE: return visitSTORE(N);
case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
case ISD::MLOAD: return visitMLOAD(N);
case ISD::MSTORE: return visitMSTORE(N);
case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
}
return SDValue();
}
SDValue DAGCombiner::combine(SDNode *N) {
SDValue RV = visit(N);
// If nothing happened, try a target-specific DAG combine.
if (!RV.getNode()) {
assert(N->getOpcode() != ISD::DELETED_NODE &&
"Node was deleted but visit returned NULL!");
if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
// Expose the DAG combiner to the target combiner impls.
TargetLowering::DAGCombinerInfo
DagCombineInfo(DAG, Level, false, this);
RV = TLI.PerformDAGCombine(N, DagCombineInfo);
}
}
// If nothing happened still, try promoting the operation.
if (!RV.getNode()) {
switch (N->getOpcode()) {
default: break;
case ISD::ADD:
case ISD::SUB:
case ISD::MUL:
case ISD::AND:
case ISD::OR:
case ISD::XOR:
RV = PromoteIntBinOp(SDValue(N, 0));
break;
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
RV = PromoteIntShiftOp(SDValue(N, 0));
break;
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
RV = PromoteExtend(SDValue(N, 0));
break;
case ISD::LOAD:
if (PromoteLoad(SDValue(N, 0)))
RV = SDValue(N, 0);
break;
}
}
// If N is a commutative binary node, try commuting it to enable more
// sdisel CSE.
if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&
N->getNumValues() == 1) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
// Constant operands are canonicalized to RHS.
if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
SDValue Ops[] = {N1, N0};
SDNode *CSENode;
if (const BinaryWithFlagsSDNode *BinNode =
dyn_cast<BinaryWithFlagsSDNode>(N)) {
CSENode = DAG.getNodeIfExists(
N->getOpcode(), N->getVTList(), Ops, BinNode->hasNoUnsignedWrap(),
BinNode->hasNoSignedWrap(), BinNode->isExact());
} else {
CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops);
}
if (CSENode)
return SDValue(CSENode, 0);
}
}
return RV;
}
/// Given a node, return its input chain if it has one, otherwise return a null
/// sd operand.
static SDValue getInputChainForNode(SDNode *N) {
if (unsigned NumOps = N->getNumOperands()) {
if (N->getOperand(0).getValueType() == MVT::Other)
return N->getOperand(0);
if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
return N->getOperand(NumOps-1);
for (unsigned i = 1; i < NumOps-1; ++i)
if (N->getOperand(i).getValueType() == MVT::Other)
return N->getOperand(i);
}
return SDValue();
}
SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
// If N has two operands, where one has an input chain equal to the other,
// the 'other' chain is redundant.
if (N->getNumOperands() == 2) {
if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
return N->getOperand(0);
if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
return N->getOperand(1);
}
SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
SmallPtrSet<SDNode*, 16> SeenOps;
bool Changed = false; // If we should replace this token factor.
// Start out with this token factor.
TFs.push_back(N);
// Iterate through token factors. The TFs grows when new token factors are
// encountered.
for (unsigned i = 0; i < TFs.size(); ++i) {
SDNode *TF = TFs[i];
// Check each of the operands.
for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) {
SDValue Op = TF->getOperand(i);
switch (Op.getOpcode()) {
case ISD::EntryToken:
// Entry tokens don't need to be added to the list. They are
// redundant.
Changed = true;
break;
case ISD::TokenFactor:
if (Op.hasOneUse() &&
std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) {
// Queue up for processing.
TFs.push_back(Op.getNode());
// Clean up in case the token factor is removed.
AddToWorklist(Op.getNode());
Changed = true;
break;
}
// Fall thru
default:
// Only add if it isn't already in the list.
if (SeenOps.insert(Op.getNode()).second)
Ops.push_back(Op);
else
Changed = true;
break;
}
}
}
SDValue Result;
// If we've changed things around then replace token factor.
if (Changed) {
if (Ops.empty()) {
// The entry token is the only possible outcome.
Result = DAG.getEntryNode();
} else {
// New and improved token factor.
Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
}
// Add users to worklist if AA is enabled, since it may introduce
// a lot of new chained token factors while removing memory deps.
bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
: DAG.getSubtarget().useAA();
return CombineTo(N, Result, UseAA /*add to worklist*/);
}
return Result;
}
/// MERGE_VALUES can always be eliminated.
SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
WorklistRemover DeadNodes(*this);
// Replacing results may cause a different MERGE_VALUES to suddenly
// be CSE'd with N, and carry its uses with it. Iterate until no
// uses remain, to ensure that the node can be safely deleted.
// First add the users of this node to the work list so that they
// can be tried again once they have new operands.
AddUsersToWorklist(N);
do {
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
} while (!N->use_empty());
deleteAndRecombine(N);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
SDValue DAGCombiner::visitADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
// fold vector ops
if (VT.isVector()) {
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
// fold (add x, 0) -> x, vector edition
if (ISD::isBuildVectorAllZeros(N1.getNode()))
return N0;
if (ISD::isBuildVectorAllZeros(N0.getNode()))
return N1;
}
// fold (add x, undef) -> undef
if (N0.getOpcode() == ISD::UNDEF)
return N0;
if (N1.getOpcode() == ISD::UNDEF)
return N1;
// fold (add c1, c2) -> c1+c2
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C);
// canonicalize constant to RHS
if (isConstantIntBuildVectorOrConstantInt(N0) &&
!isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0);
// fold (add x, 0) -> x
if (N1C && N1C->isNullValue())
return N0;
// fold (add Sym, c) -> Sym+c
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C &&
GA->getOpcode() == ISD::GlobalAddress)
return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
GA->getOffset() +
(uint64_t)N1C->getSExtValue());
// fold ((c1-A)+c2) -> (c1+c2)-A
if (N1C && N0.getOpcode() == ISD::SUB)
if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
return DAG.getNode(ISD::SUB, SDLoc(N), VT,
DAG.getConstant(N1C->getAPIntValue()+
N0C->getAPIntValue(), VT),
N0.getOperand(1));
// reassociate add
if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1))
return RADD;
// fold ((0-A) + B) -> B-A
if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) &&
cast<ConstantSDNode>(N0.getOperand(0))->isNullValue())
return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1));
// fold (A + (0-B)) -> A-B
if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) &&
cast<ConstantSDNode>(N1.getOperand(0))->isNullValue())
return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1));
// fold (A+(B-A)) -> B
if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
return N1.getOperand(0);
// fold ((B-A)+A) -> B
if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
return N0.getOperand(0);
// fold (A+(B-(A+C))) to (B-C)
if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
N0 == N1.getOperand(1).getOperand(0))
return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0),
N1.getOperand(1).getOperand(1));
// fold (A+(B-(C+A))) to (B-C)
if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
N0 == N1.getOperand(1).getOperand(1))
return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0),
N1.getOperand(1).getOperand(0));
// fold (A+((B-A)+or-C)) to (B+or-C)
if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
N1.getOperand(0).getOpcode() == ISD::SUB &&
N0 == N1.getOperand(0).getOperand(1))
return DAG.getNode(N1.getOpcode(), SDLoc(N), VT,
N1.getOperand(0).getOperand(0), N1.getOperand(1));
// fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
SDValue N10 = N1.getOperand(0);
SDValue N11 = N1.getOperand(1);
if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10))
return DAG.getNode(ISD::SUB, SDLoc(N), VT,
DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
}
if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
// fold (a+b) -> (a|b) iff a and b share no bits.
if (VT.isInteger() && !VT.isVector()) {
APInt LHSZero, LHSOne;
APInt RHSZero, RHSOne;
DAG.computeKnownBits(N0, LHSZero, LHSOne);
if (LHSZero.getBoolValue()) {
DAG.computeKnownBits(N1, RHSZero, RHSOne);
// If all possibly-set bits on the LHS are clear on the RHS, return an OR.
// If all possibly-set bits on the RHS are clear on the LHS, return an OR.
if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero){
if (!LegalOperations || TLI.isOperationLegal(ISD::OR, VT))
return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
}
}
}
// fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
if (N1.getOpcode() == ISD::SHL &&
N1.getOperand(0).getOpcode() == ISD::SUB)
if (ConstantSDNode *C =
dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0)))
if (C->getAPIntValue() == 0)
return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0,
DAG.getNode(ISD::SHL, SDLoc(N), VT,
N1.getOperand(0).getOperand(1),
N1.getOperand(1)));
if (N0.getOpcode() == ISD::SHL &&
N0.getOperand(0).getOpcode() == ISD::SUB)
if (ConstantSDNode *C =
dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0)))
if (C->getAPIntValue() == 0)
return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1,
DAG.getNode(ISD::SHL, SDLoc(N), VT,
N0.getOperand(0).getOperand(1),
N0.getOperand(1)));
if (N1.getOpcode() == ISD::AND) {
SDValue AndOp0 = N1.getOperand(0);
ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1));
unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
unsigned DestBits = VT.getScalarType().getSizeInBits();
// (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
// and similar xforms where the inner op is either ~0 or 0.
if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) {
SDLoc DL(N);
return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
}
}
// add (sext i1), X -> sub X, (zext i1)
if (N0.getOpcode() == ISD::SIGN_EXTEND &&
N0.getOperand(0).getValueType() == MVT::i1 &&
!TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
SDLoc DL(N);
SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
}
// add X, (sextinreg Y i1) -> sub X, (and Y 1)
if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
if (TN->getVT() == MVT::i1) {
SDLoc DL(N);
SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
DAG.getConstant(1, VT));
return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
}
}
return SDValue();
}
SDValue DAGCombiner::visitADDC(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
// If the flag result is dead, turn this into an ADD.
if (!N->hasAnyUseOfValue(1))
return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1),
DAG.getNode(ISD::CARRY_FALSE,
SDLoc(N), MVT::Glue));
// canonicalize constant to RHS.
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && !N1C)
return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0);
// fold (addc x, 0) -> x + no carry out
if (N1C && N1C->isNullValue())
return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
SDLoc(N), MVT::Glue));
// fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
APInt LHSZero, LHSOne;
APInt RHSZero, RHSOne;
DAG.computeKnownBits(N0, LHSZero, LHSOne);
if (LHSZero.getBoolValue()) {
DAG.computeKnownBits(N1, RHSZero, RHSOne);
// If all possibly-set bits on the LHS are clear on the RHS, return an OR.
// If all possibly-set bits on the RHS are clear on the LHS, return an OR.
if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1),
DAG.getNode(ISD::CARRY_FALSE,
SDLoc(N), MVT::Glue));
}
return SDValue();
}
SDValue DAGCombiner::visitADDE(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue CarryIn = N->getOperand(2);
// canonicalize constant to RHS
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && !N1C)
return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
N1, N0, CarryIn);
// fold (adde x, y, false) -> (addc x, y)
if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
return SDValue();
}
// Since it may not be valid to emit a fold to zero for vector initializers
// check if we can before folding.
static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT,
SelectionDAG &DAG,
bool LegalOperations, bool LegalTypes) {
if (!VT.isVector())
return DAG.getConstant(0, VT);
if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
return DAG.getConstant(0, VT);
return SDValue();
}
SDValue DAGCombiner::visitSUB(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
// fold vector ops
if (VT.isVector()) {
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
// fold (sub x, 0) -> x, vector edition
if (ISD::isBuildVectorAllZeros(N1.getNode()))
return N0;
}
// fold (sub x, x) -> 0
// FIXME: Refactor this and xor and other similar operations together.
if (N0 == N1)
return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
// fold (sub c1, c2) -> c1-c2
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C);
// fold (sub x, c) -> (add x, -c)
if (N1C)
return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0,
DAG.getConstant(-N1C->getAPIntValue(), VT));
// Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
if (N0C && N0C->isAllOnesValue())
return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
// fold A-(A-B) -> B
if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
return N1.getOperand(1);
// fold (A+B)-A -> B
if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
return N0.getOperand(1);
// fold (A+B)-B -> A
if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
return N0.getOperand(0);
// fold C2-(A+C1) -> (C2-C1)-A
ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr :
dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(),
VT);
return DAG.getNode(ISD::SUB, SDLoc(N), VT, NewC,
N1.getOperand(0));
}
// fold ((A+(B+or-C))-B) -> A+or-C
if (N0.getOpcode() == ISD::ADD &&
(N0.getOperand(1).getOpcode() == ISD::SUB ||
N0.getOperand(1).getOpcode() == ISD::ADD) &&
N0.getOperand(1).getOperand(0) == N1)
return DAG.getNode(N0.getOperand(1).getOpcode(), SDLoc(N), VT,
N0.getOperand(0), N0.getOperand(1).getOperand(1));
// fold ((A+(C+B))-B) -> A+C
if (N0.getOpcode() == ISD::ADD &&
N0.getOperand(1).getOpcode() == ISD::ADD &&
N0.getOperand(1).getOperand(1) == N1)
return DAG.getNode(ISD::ADD, SDLoc(N), VT,
N0.getOperand(0), N0.getOperand(1).getOperand(0));
// fold ((A-(B-C))-C) -> A-B
if (N0.getOpcode() == ISD::SUB &&
N0.getOperand(1).getOpcode() == ISD::SUB &&
N0.getOperand(1).getOperand(1) == N1)
return DAG.getNode(ISD::SUB, SDLoc(N), VT,
N0.getOperand(0), N0.getOperand(1).getOperand(0));
// If either operand of a sub is undef, the result is undef
if (N0.getOpcode() == ISD::UNDEF)
return N0;
if (N1.getOpcode() == ISD::UNDEF)
return N1;
// If the relocation model supports it, consider symbol offsets.
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
// fold (sub Sym, c) -> Sym-c
if (N1C && GA->getOpcode() == ISD::GlobalAddress)
return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
GA->getOffset() -
(uint64_t)N1C->getSExtValue());
// fold (sub Sym+c1, Sym+c2) -> c1-c2
if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
if (GA->getGlobal() == GB->getGlobal())
return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
VT);
}
// sub X, (sextinreg Y i1) -> add X, (and Y 1)
if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
if (TN->getVT() == MVT::i1) {
SDLoc DL(N);
SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
DAG.getConstant(1, VT));
return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
}
}
return SDValue();
}
SDValue DAGCombiner::visitSUBC(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
// If the flag result is dead, turn this into an SUB.
if (!N->hasAnyUseOfValue(1))
return CombineTo(N, DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1),
DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
MVT::Glue));
// fold (subc x, x) -> 0 + no borrow
if (N0 == N1)
return CombineTo(N, DAG.getConstant(0, VT),
DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
MVT::Glue));
// fold (subc x, 0) -> x + no borrow
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N1C && N1C->isNullValue())
return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
MVT::Glue));
// Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
if (N0C && N0C->isAllOnesValue())
return CombineTo(N, DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0),
DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
MVT::Glue));
return SDValue();
}
SDValue DAGCombiner::visitSUBE(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue CarryIn = N->getOperand(2);
// fold (sube x, y, false) -> (subc x, y)
if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
return SDValue();
}
SDValue DAGCombiner::visitMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
// fold (mul x, undef) -> 0
if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
return DAG.getConstant(0, VT);
bool N0IsConst = false;
bool N1IsConst = false;
APInt ConstValue0, ConstValue1;
// fold vector ops
if (VT.isVector()) {
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0);
N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1);
} else {
N0IsConst = isa<ConstantSDNode>(N0);
if (N0IsConst)
ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
N1IsConst = isa<ConstantSDNode>(N1);
if (N1IsConst)
ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
}
// fold (mul c1, c2) -> c1*c2
if (N0IsConst && N1IsConst)
return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0.getNode(), N1.getNode());
// canonicalize constant to RHS (vector doesn't have to splat)
if (isConstantIntBuildVectorOrConstantInt(N0) &&
!isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
// fold (mul x, 0) -> 0
if (N1IsConst && ConstValue1 == 0)
return N1;
// We require a splat of the entire scalar bit width for non-contiguous
// bit patterns.
bool IsFullSplat =
ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits();
// fold (mul x, 1) -> x
if (N1IsConst && ConstValue1 == 1 && IsFullSplat)
return N0;
// fold (mul x, -1) -> 0-x
if (N1IsConst && ConstValue1.isAllOnesValue())
return DAG.getNode(ISD::SUB, SDLoc(N), VT,
DAG.getConstant(0, VT), N0);
// fold (mul x, (1 << c)) -> x << c
if (N1IsConst && ConstValue1.isPowerOf2() && IsFullSplat)
return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
DAG.getConstant(ConstValue1.logBase2(),
getShiftAmountTy(N0.getValueType())));
// fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
if (N1IsConst && (-ConstValue1).isPowerOf2() && IsFullSplat) {
unsigned Log2Val = (-ConstValue1).logBase2();
// FIXME: If the input is something that is easily negated (e.g. a
// single-use add), we should put the negate there.
return DAG.getNode(ISD::SUB, SDLoc(N), VT,
DAG.getConstant(0, VT),
DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
DAG.getConstant(Log2Val,
getShiftAmountTy(N0.getValueType()))));
}
APInt Val;
// (mul (shl X, c1), c2) -> (mul X, c2 << c1)
if (N1IsConst && N0.getOpcode() == ISD::SHL &&
(isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
isa<ConstantSDNode>(N0.getOperand(1)))) {
SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT,
N1, N0.getOperand(1));
AddToWorklist(C3.getNode());
return DAG.getNode(ISD::MUL, SDLoc(N), VT,
N0.getOperand(0), C3);
}
// Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
// use.
{
SDValue Sh(nullptr,0), Y(nullptr,0);
// Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
if (N0.getOpcode() == ISD::SHL &&
(isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
isa<ConstantSDNode>(N0.getOperand(1))) &&
N0.getNode()->hasOneUse()) {
Sh = N0; Y = N1;
} else if (N1.getOpcode() == ISD::SHL &&
isa<ConstantSDNode>(N1.getOperand(1)) &&
N1.getNode()->hasOneUse()) {
Sh = N1; Y = N0;
}
if (Sh.getNode()) {
SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
Sh.getOperand(0), Y);
return DAG.getNode(ISD::SHL, SDLoc(N), VT,
Mul, Sh.getOperand(1));
}
}
// fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
if (N1IsConst && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
(isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
isa<ConstantSDNode>(N0.getOperand(1))))
return DAG.getNode(ISD::ADD, SDLoc(N), VT,
DAG.getNode(ISD::MUL, SDLoc(N0), VT,
N0.getOperand(0), N1),
DAG.getNode(ISD::MUL, SDLoc(N1), VT,
N0.getOperand(1), N1));
// reassociate mul
if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
return RMUL;
return SDValue();
}
SDValue DAGCombiner::visitSDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
// fold vector ops
if (VT.isVector())
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
// fold (sdiv c1, c2) -> c1/c2
ConstantSDNode *N0C = isConstOrConstSplat(N0);
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C && !N1C->isNullValue())
return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C);
// fold (sdiv X, 1) -> X
if (N1C && N1C->getAPIntValue() == 1LL)
return N0;
// fold (sdiv X, -1) -> 0-X
if (N1C && N1C->isAllOnesValue())
return DAG.getNode(ISD::SUB, SDLoc(N), VT,
DAG.getConstant(0, VT), N0);
// If we know the sign bits of both operands are zero, strength reduce to a
// udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
if (!VT.isVector()) {
if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::UDIV, SDLoc(N), N1.getValueType(),
N0, N1);
}
// fold (sdiv X, pow2) -> simple ops after legalize
if (N1C && !N1C->isNullValue() && (N1C->getAPIntValue().isPowerOf2() ||
(-N1C->getAPIntValue()).isPowerOf2())) {
// If dividing by powers of two is cheap, then don't perform the following
// fold.
if (TLI.isPow2SDivCheap())
return SDValue();
// Target-specific implementation of sdiv x, pow2.
SDValue Res = BuildSDIVPow2(N);
if (Res.getNode())
return Res;
unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
// Splat the sign bit into the register
SDValue SGN =
DAG.getNode(ISD::SRA, SDLoc(N), VT, N0,
DAG.getConstant(VT.getScalarSizeInBits() - 1,
getShiftAmountTy(N0.getValueType())));
AddToWorklist(SGN.getNode());
// Add (N0 < 0) ? abs2 - 1 : 0;
SDValue SRL =
DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN,
DAG.getConstant(VT.getScalarSizeInBits() - lg2,
getShiftAmountTy(SGN.getValueType())));
SDValue ADD = DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, SRL);
AddToWorklist(SRL.getNode());
AddToWorklist(ADD.getNode()); // Divide by pow2
SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), VT, ADD,
DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType())));
// If we're dividing by a positive value, we're done. Otherwise, we must
// negate the result.
if (N1C->getAPIntValue().isNonNegative())
return SRA;
AddToWorklist(SRA.getNode());
return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), SRA);
}
// If integer divide is expensive and we satisfy the requirements, emit an
// alternate sequence.
if (N1C && !TLI.isIntDivCheap()) {
SDValue Op = BuildSDIV(N);
if (Op.getNode()) return Op;
}
// undef / X -> 0
if (N0.getOpcode() == ISD::UNDEF)
return DAG.getConstant(0, VT);
// X / undef -> undef
if (N1.getOpcode() == ISD::UNDEF)
return N1;
return SDValue();
}
SDValue DAGCombiner::visitUDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
// fold vector ops
if (VT.isVector())
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
// fold (udiv c1, c2) -> c1/c2
ConstantSDNode *N0C = isConstOrConstSplat(N0);
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C && !N1C->isNullValue())
return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C);
// fold (udiv x, (1 << c)) -> x >>u c
if (N1C && N1C->getAPIntValue().isPowerOf2())
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0,
DAG.getConstant(N1C->getAPIntValue().logBase2(),
getShiftAmountTy(N0.getValueType())));
// fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
if (N1.getOpcode() == ISD::SHL) {
if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
if (SHC->getAPIntValue().isPowerOf2()) {
EVT ADDVT = N1.getOperand(1).getValueType();
SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N), ADDVT,
N1.getOperand(1),
DAG.getConstant(SHC->getAPIntValue()
.logBase2(),
ADDVT));
AddToWorklist(Add.getNode());
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, Add);
}
}
}
// fold (udiv x, c) -> alternate
if (N1C && !TLI.isIntDivCheap()) {
SDValue Op = BuildUDIV(N);
if (Op.getNode()) return Op;
}
// undef / X -> 0
if (N0.getOpcode() == ISD::UNDEF)
return DAG.getConstant(0, VT);
// X / undef -> undef
if (N1.getOpcode() == ISD::UNDEF)
return N1;
return SDValue();
}
SDValue DAGCombiner::visitSREM(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
// fold (srem c1, c2) -> c1%c2
ConstantSDNode *N0C = isConstOrConstSplat(N0);
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C && !N1C->isNullValue())
return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C);
// If we know the sign bits of both operands are zero, strength reduce to a
// urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
if (!VT.isVector()) {
if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::UREM, SDLoc(N), VT, N0, N1);
}
// If X/C can be simplified by the division-by-constant logic, lower
// X%C to the equivalent of X-X/C*C.
if (N1C && !N1C->isNullValue()) {
SDValue Div = DAG.getNode(ISD::SDIV, SDLoc(N), VT, N0, N1);
AddToWorklist(Div.getNode());
SDValue OptimizedDiv = combine(Div.getNode());
if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
OptimizedDiv, N1);
SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul);
AddToWorklist(Mul.getNode());
return Sub;
}
}
// undef % X -> 0
if (N0.getOpcode() == ISD::UNDEF)
return DAG.getConstant(0, VT);
// X % undef -> undef
if (N1.getOpcode() == ISD::UNDEF)
return N1;
return SDValue();
}
SDValue DAGCombiner::visitUREM(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
// fold (urem c1, c2) -> c1%c2
ConstantSDNode *N0C = isConstOrConstSplat(N0);
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (N0C && N1C && !N1C->isNullValue())
return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C);
// fold (urem x, pow2) -> (and x, pow2-1)
if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2())
return DAG.getNode(ISD::AND, SDLoc(N), VT, N0,
DAG.getConstant(N1C->getAPIntValue()-1,VT));
// fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
if (N1.getOpcode() == ISD::SHL) {
if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
if (SHC->getAPIntValue().isPowerOf2()) {
SDValue Add =
DAG.getNode(ISD::ADD, SDLoc(N), VT, N1,
DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()),
VT));
AddToWorklist(Add.getNode());
return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, Add);
}
}
}
// If X/C can be simplified by the division-by-constant logic, lower
// X%C to the equivalent of X-X/C*C.
if (N1C && !N1C->isNullValue()) {
SDValue Div = DAG.getNode(ISD::UDIV, SDLoc(N), VT, N0, N1);
AddToWorklist(Div.getNode());
SDValue OptimizedDiv = combine(Div.getNode());
if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
OptimizedDiv, N1);
SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul);
AddToWorklist(Mul.getNode());
return Sub;
}
}
// undef % X -> 0
if (N0.getOpcode() == ISD::UNDEF)
return DAG.getConstant(0, VT);
// X % undef -> undef
if (N1.getOpcode() == ISD::UNDEF)
return N1;
return SDValue();
}