blob: fec9d4e257c256b0eb552ee00982d7688bcf967f [file] [log] [blame]
//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file
// is mostly EmitInstrWithCustomInserter().
//
//===----------------------------------------------------------------------===//
#include "R600ISelLowering.h"
#include "R600Defines.h"
#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
using namespace llvm;
R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
AMDGPUTargetLowering(TM),
TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo()))
{
setOperationAction(ISD::MUL, MVT::i64, Expand);
addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
computeRegisterProperties();
setOperationAction(ISD::BR_CC, MVT::i32, Custom);
setOperationAction(ISD::FSUB, MVT::f32, Expand);
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::ROTL, MVT::i32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
setOperationAction(ISD::SETCC, MVT::i32, Custom);
setSchedulingPreference(Sched::VLIW);
}
MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
MachineInstr * MI, MachineBasicBlock * BB) const
{
MachineFunction * MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
MachineBasicBlock::iterator I = *MI;
switch (MI->getOpcode()) {
default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
case AMDGPU::CLAMP_R600:
{
MachineInstr *NewMI =
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
.addImm(0) // Flags
.addReg(AMDGPU::PRED_SEL_OFF);
TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
break;
}
case AMDGPU::FABS_R600:
{
MachineInstr *NewMI =
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
.addImm(0) // Flags
.addReg(AMDGPU::PRED_SEL_OFF);
TII->addFlag(NewMI, 1, MO_FLAG_ABS);
break;
}
case AMDGPU::FNEG_R600:
{
MachineInstr *NewMI =
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
.addImm(0) // Flags
.addReg(AMDGPU::PRED_SEL_OFF);
TII->addFlag(NewMI, 1, MO_FLAG_NEG);
break;
}
case AMDGPU::R600_LOAD_CONST:
{
int64_t RegIndex = MI->getOperand(1).getImm();
unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
.addOperand(MI->getOperand(0))
.addReg(ConstantReg);
break;
}
case AMDGPU::MASK_WRITE:
{
unsigned maskedRegister = MI->getOperand(0).getReg();
assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
TII->addFlag(defInstr, 0, MO_FLAG_MASK);
// Return early so the instruction is not erased
return BB;
}
case AMDGPU::RAT_WRITE_CACHELESS_eg:
{
// Convert to DWORD address
unsigned NewAddr = MRI.createVirtualRegister(
&AMDGPU::R600_TReg32_XRegClass);
unsigned ShiftValue = MRI.createVirtualRegister(
&AMDGPU::R600_TReg32RegClass);
unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
// XXX In theory, we should be able to pass ShiftValue directly to
// the LSHR_eg instruction as an inline literal, but I tried doing it
// this way and it didn't produce the correct results.
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV_IMM_I32),
ShiftValue)
.addReg(AMDGPU::ALU_LITERAL_X)
.addReg(AMDGPU::PRED_SEL_OFF)
.addImm(2);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr)
.addOperand(MI->getOperand(1))
.addReg(ShiftValue)
.addReg(AMDGPU::PRED_SEL_OFF);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
.addOperand(MI->getOperand(0))
.addReg(NewAddr)
.addImm(EOP); // Set End of program bit
break;
}
case AMDGPU::RESERVE_REG:
{
R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
int64_t ReservedIndex = MI->getOperand(0).getImm();
unsigned ReservedReg =
AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex);
MFI->ReservedRegs.push_back(ReservedReg);
break;
}
case AMDGPU::TXD:
{
unsigned t0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
unsigned t1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
.addOperand(MI->getOperand(3))
.addOperand(MI->getOperand(4))
.addOperand(MI->getOperand(5));
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
.addOperand(MI->getOperand(2))
.addOperand(MI->getOperand(4))
.addOperand(MI->getOperand(5));
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
.addOperand(MI->getOperand(4))
.addOperand(MI->getOperand(5))
.addReg(t0, RegState::Implicit)
.addReg(t1, RegState::Implicit);
break;
}
case AMDGPU::TXD_SHADOW:
{
unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
.addOperand(MI->getOperand(3))
.addOperand(MI->getOperand(4))
.addOperand(MI->getOperand(5));
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
.addOperand(MI->getOperand(2))
.addOperand(MI->getOperand(4))
.addOperand(MI->getOperand(5));
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
.addOperand(MI->getOperand(4))
.addOperand(MI->getOperand(5))
.addReg(t0, RegState::Implicit)
.addReg(t1, RegState::Implicit);
break;
}
case AMDGPU::BRANCH:
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
.addOperand(MI->getOperand(0))
.addReg(0);
break;
case AMDGPU::BRANCH_COND_f32:
{
MachineInstr *NewMI =
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
.addReg(AMDGPU::PREDICATE_BIT)
.addOperand(MI->getOperand(1))
.addImm(OPCODE_IS_ZERO)
.addImm(0); // Flags
TII->addFlag(NewMI, 1, MO_FLAG_PUSH);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
.addOperand(MI->getOperand(0))
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
break;
}
case AMDGPU::BRANCH_COND_i32:
{
MachineInstr *NewMI =
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
.addReg(AMDGPU::PREDICATE_BIT)
.addOperand(MI->getOperand(1))
.addImm(OPCODE_IS_ZERO_INT)
.addImm(0); // Flags
TII->addFlag(NewMI, 1, MO_FLAG_PUSH);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
.addOperand(MI->getOperand(0))
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
break;
}
}
MI->eraseFromParent();
return BB;
}
//===----------------------------------------------------------------------===//
// Custom DAG Lowering Operations
//===----------------------------------------------------------------------===//
using namespace llvm::Intrinsic;
using namespace llvm::AMDGPUIntrinsic;
SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
{
switch (Op.getOpcode()) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
case ISD::BR_CC: return LowerBR_CC(Op, DAG);
case ISD::ROTL: return LowerROTL(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
case ISD::INTRINSIC_VOID: {
SDValue Chain = Op.getOperand(0);
unsigned IntrinsicID =
cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
switch (IntrinsicID) {
case AMDGPUIntrinsic::AMDGPU_store_output: {
MachineFunction &MF = DAG.getMachineFunction();
MachineRegisterInfo &MRI = MF.getRegInfo();
int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
if (!MRI.isLiveOut(Reg)) {
MRI.addLiveOut(Reg);
}
return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
}
// default for switch(IntrinsicID)
default: break;
}
// break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
break;
}
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IntrinsicID =
cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
EVT VT = Op.getValueType();
DebugLoc DL = Op.getDebugLoc();
switch(IntrinsicID) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
case AMDGPUIntrinsic::R600_load_input: {
int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
}
case r600_read_ngroups_x:
return LowerImplicitParameter(DAG, VT, DL, 0);
case r600_read_ngroups_y:
return LowerImplicitParameter(DAG, VT, DL, 1);
case r600_read_ngroups_z:
return LowerImplicitParameter(DAG, VT, DL, 2);
case r600_read_global_size_x:
return LowerImplicitParameter(DAG, VT, DL, 3);
case r600_read_global_size_y:
return LowerImplicitParameter(DAG, VT, DL, 4);
case r600_read_global_size_z:
return LowerImplicitParameter(DAG, VT, DL, 5);
case r600_read_local_size_x:
return LowerImplicitParameter(DAG, VT, DL, 6);
case r600_read_local_size_y:
return LowerImplicitParameter(DAG, VT, DL, 7);
case r600_read_local_size_z:
return LowerImplicitParameter(DAG, VT, DL, 8);
case r600_read_tgid_x:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T1_X, VT);
case r600_read_tgid_y:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T1_Y, VT);
case r600_read_tgid_z:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T1_Z, VT);
case r600_read_tidig_x:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T0_X, VT);
case r600_read_tidig_y:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T0_Y, VT);
case r600_read_tidig_z:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T0_Z, VT);
}
// break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
break;
}
} // end switch(Op.getOpcode())
return SDValue();
}
SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
{
SDValue Chain = Op.getOperand(0);
SDValue CC = Op.getOperand(1);
SDValue LHS = Op.getOperand(2);
SDValue RHS = Op.getOperand(3);
SDValue JumpT = Op.getOperand(4);
SDValue CmpValue;
SDValue Result;
CmpValue = DAG.getNode(
ISD::SELECT_CC,
Op.getDebugLoc(),
MVT::i32,
LHS, RHS,
DAG.getConstant(-1, MVT::i32),
DAG.getConstant(0, MVT::i32),
CC);
Result = DAG.getNode(
AMDGPUISD::BRANCH_COND,
CmpValue.getDebugLoc(),
MVT::Other, Chain,
JumpT, CmpValue);
return Result;
}
SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
DebugLoc DL,
unsigned DwordOffset) const
{
unsigned ByteOffset = DwordOffset * 4;
PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
AMDGPUAS::PARAM_I_ADDRESS);
// We shouldn't be using an offset wider than 16-bits for implicit parameters.
assert(isInt<16>(ByteOffset));
return DAG.getLoad(VT, DL, DAG.getEntryNode(),
DAG.getConstant(ByteOffset, MVT::i32), // PTR
MachinePointerInfo(ConstantPointerNull::get(PtrType)),
false, false, false, 0);
}
SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const
{
DebugLoc DL = Op.getDebugLoc();
EVT VT = Op.getValueType();
return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
Op.getOperand(0),
Op.getOperand(0),
DAG.getNode(ISD::SUB, DL, VT,
DAG.getConstant(32, MVT::i32),
Op.getOperand(1)));
}
SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
{
DebugLoc DL = Op.getDebugLoc();
EVT VT = Op.getValueType();
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
SDValue True = Op.getOperand(2);
SDValue False = Op.getOperand(3);
SDValue CC = Op.getOperand(4);
ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
SDValue Temp;
// LHS and RHS are guaranteed to be the same value type
EVT CompareVT = LHS.getValueType();
// We need all the operands of SELECT_CC to have the same value type, so if
// necessary we need to convert LHS and RHS to be the same type True and
// False. True and False are guaranteed to have the same type as this
// SELECT_CC node.
if (CompareVT != VT) {
ISD::NodeType ConversionOp = ISD::DELETED_NODE;
if (VT == MVT::f32 && CompareVT == MVT::i32) {
if (isUnsignedIntSetCC(CCOpcode)) {
ConversionOp = ISD::UINT_TO_FP;
} else {
ConversionOp = ISD::SINT_TO_FP;
}
} else if (VT == MVT::i32 && CompareVT == MVT::f32) {
ConversionOp = ISD::FP_TO_SINT;
} else {
// I don't think there will be any other type pairings.
assert(!"Unhandled operand type parings in SELECT_CC");
}
// XXX Check the value of LHS and RHS and avoid creating sequences like
// (FTOI (ITOF))
LHS = DAG.getNode(ConversionOp, DL, VT, LHS);
RHS = DAG.getNode(ConversionOp, DL, VT, RHS);
}
// If True is a hardware TRUE value and False is a hardware FALSE value or
// vice-versa we can handle this with a native instruction (SET* instructions).
if ((isHWTrueValue(True) && isHWFalseValue(False))) {
return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
}
// XXX If True is a hardware TRUE value and False is a hardware FALSE value,
// we can handle this with a native instruction, but we need to swap true
// and false and change the conditional.
if (isHWTrueValue(False) && isHWFalseValue(True)) {
}
// XXX Check if we can lower this to a SELECT or if it is supported by a native
// operation. (The code below does this but we don't have the Instruction
// selection patterns to do this yet.
#if 0
if (isZero(LHS) || isZero(RHS)) {
SDValue Cond = (isZero(LHS) ? RHS : LHS);
bool SwapTF = false;
switch (CCOpcode) {
case ISD::SETOEQ:
case ISD::SETUEQ:
case ISD::SETEQ:
SwapTF = true;
// Fall through
case ISD::SETONE:
case ISD::SETUNE:
case ISD::SETNE:
// We can lower to select
if (SwapTF) {
Temp = True;
True = False;
False = Temp;
}
// CNDE
return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
default:
// Supported by a native operation (CNDGE, CNDGT)
return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
}
}
#endif
// If we make it this for it means we have no native instructions to handle
// this SELECT_CC, so we must lower it.
SDValue HWTrue, HWFalse;
if (VT == MVT::f32) {
HWTrue = DAG.getConstantFP(1.0f, VT);
HWFalse = DAG.getConstantFP(0.0f, VT);
} else if (VT == MVT::i32) {
HWTrue = DAG.getConstant(-1, VT);
HWFalse = DAG.getConstant(0, VT);
}
else {
assert(!"Unhandled value type in LowerSELECT_CC");
}
// Lower this unsupported SELECT_CC into a combination of two supported
// SELECT_CC operations.
SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, HWFalse, CC);
// Convert floating point condition to i1
if (VT == MVT::f32) {
Cond = DAG.getNode(ISD::FP_TO_SINT, DL, MVT::i32,
DAG.getNode(ISD::FNEG, DL, VT, Cond));
}
return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
}
SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
{
SDValue Cond;
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
SDValue CC = Op.getOperand(2);
DebugLoc DL = Op.getDebugLoc();
assert(Op.getValueType() == MVT::i32);
Cond = DAG.getNode(
ISD::SELECT_CC,
Op.getDebugLoc(),
MVT::i32,
LHS, RHS,
DAG.getConstant(-1, MVT::i32),
DAG.getConstant(0, MVT::i32),
CC);
Cond = DAG.getNode(
ISD::AND,
DL,
MVT::i32,
DAG.getConstant(1, MVT::i32),
Cond);
return Cond;
}