Changeset View
Changeset View
Standalone View
Standalone View
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
//===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===// | //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===// | ||||
// | // | ||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||
// See https://llvm.org/LICENSE.txt for license information. | // See https://llvm.org/LICENSE.txt for license information. | ||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||
// | // | ||||
//==-----------------------------------------------------------------------===// | //==-----------------------------------------------------------------------===// | ||||
// | // | ||||
/// \file | /// \file | ||||
/// Defines an instruction selector for the AMDGPU target. | /// Defines an instruction selector for the AMDGPU target. | ||||
// | // | ||||
//===----------------------------------------------------------------------===// | //===----------------------------------------------------------------------===// | ||||
#include "AMDGPUISelDAGToDAG.h" | |||||
#include "AMDGPU.h" | #include "AMDGPU.h" | ||||
#include "AMDGPUTargetMachine.h" | #include "AMDGPUTargetMachine.h" | ||||
#include "MCTargetDesc/R600MCTargetDesc.h" | #include "MCTargetDesc/R600MCTargetDesc.h" | ||||
#include "R600.h" | #include "R600RegisterInfo.h" | ||||
#include "R600Subtarget.h" | |||||
#include "SIMachineFunctionInfo.h" | #include "SIMachineFunctionInfo.h" | ||||
#include "llvm/Analysis/LegacyDivergenceAnalysis.h" | #include "llvm/Analysis/LegacyDivergenceAnalysis.h" | ||||
#include "llvm/Analysis/ValueTracking.h" | #include "llvm/Analysis/ValueTracking.h" | ||||
#include "llvm/CodeGen/FunctionLoweringInfo.h" | #include "llvm/CodeGen/FunctionLoweringInfo.h" | ||||
#include "llvm/CodeGen/SelectionDAG.h" | #include "llvm/CodeGen/SelectionDAG.h" | ||||
#include "llvm/CodeGen/SelectionDAGISel.h" | #include "llvm/CodeGen/SelectionDAGISel.h" | ||||
#include "llvm/CodeGen/SelectionDAGNodes.h" | #include "llvm/CodeGen/SelectionDAGNodes.h" | ||||
#include "llvm/IR/IntrinsicsAMDGPU.h" | #include "llvm/IR/IntrinsicsAMDGPU.h" | ||||
#include "llvm/InitializePasses.h" | #include "llvm/InitializePasses.h" | ||||
#ifdef EXPENSIVE_CHECKS | #ifdef EXPENSIVE_CHECKS | ||||
#include "llvm/Analysis/LoopInfo.h" | #include "llvm/Analysis/LoopInfo.h" | ||||
#include "llvm/IR/Dominators.h" | #include "llvm/IR/Dominators.h" | ||||
#endif | #endif | ||||
#define DEBUG_TYPE "isel" | #define DEBUG_TYPE "isel" | ||||
using namespace llvm; | using namespace llvm; | ||||
namespace llvm { | |||||
class R600InstrInfo; | |||||
} // end namespace llvm | |||||
//===----------------------------------------------------------------------===// | //===----------------------------------------------------------------------===// | ||||
// Instruction Selector Implementation | // Instruction Selector Implementation | ||||
//===----------------------------------------------------------------------===// | //===----------------------------------------------------------------------===// | ||||
namespace { | namespace { | ||||
static bool isNullConstantOrUndef(SDValue V) { | |||||
if (V.isUndef()) | |||||
return true; | |||||
ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); | |||||
return Const != nullptr && Const->isNullValue(); | |||||
} | |||||
static bool getConstantValue(SDValue N, uint32_t &Out) { | |||||
// This is only used for packed vectors, where ussing 0 for undef should | |||||
// always be good. | |||||
if (N.isUndef()) { | |||||
Out = 0; | |||||
return true; | |||||
} | |||||
if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) { | |||||
Out = C->getAPIntValue().getSExtValue(); | |||||
return true; | |||||
} | |||||
if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) { | |||||
Out = C->getValueAPF().bitcastToAPInt().getSExtValue(); | |||||
return true; | |||||
} | |||||
return false; | |||||
} | |||||
// TODO: Handle undef as zero | |||||
static SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG, | |||||
bool Negate = false) { | |||||
assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2); | |||||
uint32_t LHSVal, RHSVal; | |||||
if (getConstantValue(N->getOperand(0), LHSVal) && | |||||
getConstantValue(N->getOperand(1), RHSVal)) { | |||||
SDLoc SL(N); | |||||
uint32_t K = Negate ? | |||||
(-LHSVal & 0xffff) | (-RHSVal << 16) : | |||||
(LHSVal & 0xffff) | (RHSVal << 16); | |||||
return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0), | |||||
DAG.getTargetConstant(K, SL, MVT::i32)); | |||||
} | |||||
return nullptr; | |||||
} | |||||
static SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) { | |||||
return packConstantV2I16(N, DAG, true); | |||||
} | |||||
/// AMDGPU specific code to select AMDGPU machine instructions for | |||||
/// SelectionDAG operations. | |||||
class AMDGPUDAGToDAGISel : public SelectionDAGISel { | |||||
// Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can | |||||
// make the right decision when generating code for different targets. | |||||
const GCNSubtarget *Subtarget; | |||||
// Default FP mode for the current function. | |||||
AMDGPU::SIModeRegisterDefaults Mode; | |||||
bool EnableLateStructurizeCFG; | |||||
// Instructions that will be lowered with a final instruction that zeros the | |||||
// high result bits. | |||||
bool fp16SrcZerosHighBits(unsigned Opc) const; | |||||
public: | |||||
explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr, | |||||
CodeGenOpt::Level OptLevel = CodeGenOpt::Default) | |||||
: SelectionDAGISel(*TM, OptLevel) { | |||||
EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG; | |||||
} | |||||
~AMDGPUDAGToDAGISel() override = default; | |||||
void getAnalysisUsage(AnalysisUsage &AU) const override { | |||||
AU.addRequired<AMDGPUArgumentUsageInfo>(); | |||||
AU.addRequired<LegacyDivergenceAnalysis>(); | |||||
#ifdef EXPENSIVE_CHECKS | |||||
AU.addRequired<DominatorTreeWrapperPass>(); | |||||
AU.addRequired<LoopInfoWrapperPass>(); | |||||
#endif | |||||
SelectionDAGISel::getAnalysisUsage(AU); | |||||
} | |||||
bool matchLoadD16FromBuildVector(SDNode *N) const; | |||||
bool runOnMachineFunction(MachineFunction &MF) override; | |||||
void PreprocessISelDAG() override; | |||||
void Select(SDNode *N) override; | |||||
StringRef getPassName() const override; | |||||
void PostprocessISelDAG() override; | |||||
protected: | |||||
void SelectBuildVector(SDNode *N, unsigned RegClassID); | |||||
private: | |||||
std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const; | |||||
bool isNoNanSrc(SDValue N) const; | |||||
bool isInlineImmediate(const SDNode *N, bool Negated = false) const; | |||||
bool isNegInlineImmediate(const SDNode *N) const { | |||||
return isInlineImmediate(N, true); | |||||
} | |||||
bool isInlineImmediate16(int64_t Imm) const { | |||||
return AMDGPU::isInlinableLiteral16(Imm, Subtarget->hasInv2PiInlineImm()); | |||||
} | |||||
bool isInlineImmediate32(int64_t Imm) const { | |||||
return AMDGPU::isInlinableLiteral32(Imm, Subtarget->hasInv2PiInlineImm()); | |||||
} | |||||
bool isInlineImmediate64(int64_t Imm) const { | |||||
return AMDGPU::isInlinableLiteral64(Imm, Subtarget->hasInv2PiInlineImm()); | |||||
} | |||||
bool isInlineImmediate(const APFloat &Imm) const { | |||||
return Subtarget->getInstrInfo()->isInlineConstant(Imm); | |||||
} | |||||
bool isVGPRImm(const SDNode *N) const; | |||||
bool isUniformLoad(const SDNode *N) const; | |||||
bool isUniformBr(const SDNode *N) const; | |||||
bool isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS, | |||||
SDValue &RHS) const; | |||||
MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const; | |||||
SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const; | |||||
SDNode *glueCopyToM0(SDNode *N, SDValue Val) const; | |||||
SDNode *glueCopyToM0LDSInit(SDNode *N) const; | |||||
const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; | |||||
virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); | |||||
virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); | |||||
bool isDSOffsetLegal(SDValue Base, unsigned Offset) const; | |||||
bool isDSOffset2Legal(SDValue Base, unsigned Offset0, unsigned Offset1, | |||||
unsigned Size) const; | |||||
bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; | |||||
bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, | |||||
SDValue &Offset1) const; | |||||
bool SelectDS128Bit8ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, | |||||
SDValue &Offset1) const; | |||||
bool SelectDSReadWrite2(SDValue Ptr, SDValue &Base, SDValue &Offset0, | |||||
SDValue &Offset1, unsigned Size) const; | |||||
bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, | |||||
SDValue &SOffset, SDValue &Offset, SDValue &Offen, | |||||
SDValue &Idxen, SDValue &Addr64) const; | |||||
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, | |||||
SDValue &SOffset, SDValue &Offset) const; | |||||
bool SelectMUBUFScratchOffen(SDNode *Parent, | |||||
SDValue Addr, SDValue &RSrc, SDValue &VAddr, | |||||
SDValue &SOffset, SDValue &ImmOffset) const; | |||||
bool SelectMUBUFScratchOffset(SDNode *Parent, | |||||
SDValue Addr, SDValue &SRsrc, SDValue &Soffset, | |||||
SDValue &Offset) const; | |||||
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, | |||||
SDValue &Offset) const; | |||||
bool SelectFlatOffsetImpl(SDNode *N, SDValue Addr, SDValue &VAddr, | |||||
SDValue &Offset, uint64_t FlatVariant) const; | |||||
bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr, | |||||
SDValue &Offset) const; | |||||
bool SelectGlobalOffset(SDNode *N, SDValue Addr, SDValue &VAddr, | |||||
SDValue &Offset) const; | |||||
bool SelectScratchOffset(SDNode *N, SDValue Addr, SDValue &VAddr, | |||||
SDValue &Offset) const; | |||||
bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr, | |||||
SDValue &VOffset, SDValue &Offset) const; | |||||
bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr, | |||||
SDValue &Offset) const; | |||||
bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, | |||||
bool &Imm) const; | |||||
SDValue Expand32BitAddress(SDValue Addr) const; | |||||
bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, | |||||
bool &Imm) const; | |||||
bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; | |||||
bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; | |||||
bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; | |||||
bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; | |||||
bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; | |||||
bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; | |||||
bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const; | |||||
bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods, | |||||
bool AllowAbs = true) const; | |||||
bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; | |||||
bool SelectVOP3BMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; | |||||
bool SelectVOP3NoMods(SDValue In, SDValue &Src) const; | |||||
bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, | |||||
SDValue &Clamp, SDValue &Omod) const; | |||||
bool SelectVOP3BMods0(SDValue In, SDValue &Src, SDValue &SrcMods, | |||||
SDValue &Clamp, SDValue &Omod) const; | |||||
bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, | |||||
SDValue &Clamp, SDValue &Omod) const; | |||||
bool SelectVOP3OMods(SDValue In, SDValue &Src, | |||||
SDValue &Clamp, SDValue &Omod) const; | |||||
bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; | |||||
bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const; | |||||
bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; | |||||
bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const; | |||||
bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; | |||||
SDValue getHi16Elt(SDValue In) const; | |||||
SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const; | |||||
void SelectADD_SUB_I64(SDNode *N); | |||||
void SelectAddcSubb(SDNode *N); | |||||
void SelectUADDO_USUBO(SDNode *N); | |||||
void SelectDIV_SCALE(SDNode *N); | |||||
void SelectMAD_64_32(SDNode *N); | |||||
void SelectFMA_W_CHAIN(SDNode *N); | |||||
void SelectFMUL_W_CHAIN(SDNode *N); | |||||
SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val, | |||||
uint32_t Offset, uint32_t Width); | |||||
void SelectS_BFEFromShifts(SDNode *N); | |||||
void SelectS_BFE(SDNode *N); | |||||
bool isCBranchSCC(const SDNode *N) const; | |||||
void SelectBRCOND(SDNode *N); | |||||
void SelectFMAD_FMA(SDNode *N); | |||||
void SelectATOMIC_CMP_SWAP(SDNode *N); | |||||
void SelectDSAppendConsume(SDNode *N, unsigned IntrID); | |||||
void SelectDS_GWS(SDNode *N, unsigned IntrID); | |||||
void SelectInterpP1F16(SDNode *N); | |||||
void SelectINTRINSIC_W_CHAIN(SDNode *N); | |||||
void SelectINTRINSIC_WO_CHAIN(SDNode *N); | |||||
void SelectINTRINSIC_VOID(SDNode *N); | |||||
protected: | |||||
// Include the pieces autogenerated from the target description. | |||||
#include "AMDGPUGenDAGISel.inc" | |||||
}; | |||||
class R600DAGToDAGISel : public AMDGPUDAGToDAGISel { | |||||
const R600Subtarget *Subtarget; | |||||
bool isConstantLoad(const MemSDNode *N, int cbID) const; | |||||
bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); | |||||
bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, | |||||
SDValue& Offset); | |||||
public: | |||||
explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) : | |||||
AMDGPUDAGToDAGISel(TM, OptLevel) {} | |||||
void Select(SDNode *N) override; | |||||
bool SelectADDRIndirect(SDValue Addr, SDValue &Base, | |||||
SDValue &Offset) override; | |||||
bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, | |||||
SDValue &Offset) override; | |||||
bool runOnMachineFunction(MachineFunction &MF) override; | |||||
void PreprocessISelDAG() override {} | |||||
protected: | |||||
// Include the pieces autogenerated from the target description. | |||||
#include "R600GenDAGISel.inc" | |||||
}; | |||||
static SDValue stripBitcast(SDValue Val) { | static SDValue stripBitcast(SDValue Val) { | ||||
return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val; | return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val; | ||||
} | } | ||||
// Figure out if this is really an extract of the high 16-bits of a dword. | // Figure out if this is really an extract of the high 16-bits of a dword. | ||||
static bool isExtractHiElt(SDValue In, SDValue &Out) { | static bool isExtractHiElt(SDValue In, SDValue &Out) { | ||||
In = stripBitcast(In); | In = stripBitcast(In); | ||||
▲ Show 20 Lines • Show All 57 Lines • ▼ Show 20 Lines | |||||
/// This pass converts a legalized DAG into a AMDGPU-specific | /// This pass converts a legalized DAG into a AMDGPU-specific | ||||
// DAG, ready for instruction scheduling. | // DAG, ready for instruction scheduling. | ||||
FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM, | FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM, | ||||
CodeGenOpt::Level OptLevel) { | CodeGenOpt::Level OptLevel) { | ||||
return new AMDGPUDAGToDAGISel(TM, OptLevel); | return new AMDGPUDAGToDAGISel(TM, OptLevel); | ||||
} | } | ||||
/// This pass converts a legalized DAG into a R600-specific | AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel( | ||||
// DAG, ready for instruction scheduling. | TargetMachine *TM /*= nullptr*/, | ||||
FunctionPass *llvm::createR600ISelDag(TargetMachine *TM, | CodeGenOpt::Level OptLevel /*= CodeGenOpt::Default*/) | ||||
CodeGenOpt::Level OptLevel) { | : SelectionDAGISel(*TM, OptLevel) { | ||||
return new R600DAGToDAGISel(TM, OptLevel); | EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG; | ||||
} | } | ||||
bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { | bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { | ||||
#ifdef EXPENSIVE_CHECKS | #ifdef EXPENSIVE_CHECKS | ||||
DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); | DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); | ||||
LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); | LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); | ||||
for (auto &L : LI->getLoopsInPreorder()) { | for (auto &L : LI->getLoopsInPreorder()) { | ||||
assert(L->isLCSSAForm(DT)); | assert(L->isLCSSAForm(DT)); | ||||
▲ Show 20 Lines • Show All 61 Lines • ▼ Show 20 Lines | case AMDGPUISD::DIV_FIXUP: | ||||
return Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS; | return Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS; | ||||
default: | default: | ||||
// fcopysign, select and others may be lowered to 32-bit bit operations | // fcopysign, select and others may be lowered to 32-bit bit operations | ||||
// which don't zero the high bits. | // which don't zero the high bits. | ||||
return false; | return false; | ||||
} | } | ||||
} | } | ||||
void AMDGPUDAGToDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { | |||||
AU.addRequired<AMDGPUArgumentUsageInfo>(); | |||||
AU.addRequired<LegacyDivergenceAnalysis>(); | |||||
#ifdef EXPENSIVE_CHECKS | |||||
AU.addRequired<DominatorTreeWrapperPass>(); | |||||
AU.addRequired<LoopInfoWrapperPass>(); | |||||
#endif | |||||
SelectionDAGISel::getAnalysisUsage(AU); | |||||
} | |||||
bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const { | bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const { | ||||
assert(Subtarget->d16PreservesUnusedBits()); | assert(Subtarget->d16PreservesUnusedBits()); | ||||
MVT VT = N->getValueType(0).getSimpleVT(); | MVT VT = N->getValueType(0).getSimpleVT(); | ||||
if (VT != MVT::v2i16 && VT != MVT::v2f16) | if (VT != MVT::v2i16 && VT != MVT::v2f16) | ||||
return false; | return false; | ||||
SDValue Lo = N->getOperand(0); | SDValue Lo = N->getOperand(0); | ||||
SDValue Hi = N->getOperand(1); | SDValue Hi = N->getOperand(1); | ||||
▲ Show 20 Lines • Show All 2,627 Lines • ▼ Show 20 Lines | while (Position != CurDAG->allnodes_end()) { | ||||
if (ResNode) | if (ResNode) | ||||
ReplaceUses(Node, ResNode); | ReplaceUses(Node, ResNode); | ||||
IsModified = true; | IsModified = true; | ||||
} | } | ||||
} | } | ||||
CurDAG->RemoveDeadNodes(); | CurDAG->RemoveDeadNodes(); | ||||
} while (IsModified); | } while (IsModified); | ||||
} | } | ||||
bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { | |||||
Subtarget = &MF.getSubtarget<R600Subtarget>(); | |||||
return SelectionDAGISel::runOnMachineFunction(MF); | |||||
} | |||||
bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { | |||||
if (!N->readMem()) | |||||
return false; | |||||
if (CbId == -1) | |||||
return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS || | |||||
N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT; | |||||
return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId; | |||||
} | |||||
bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, | |||||
SDValue& IntPtr) { | |||||
if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { | |||||
IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), | |||||
true); | |||||
return true; | |||||
} | |||||
return false; | |||||
} | |||||
bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, | |||||
SDValue& BaseReg, SDValue &Offset) { | |||||
if (!isa<ConstantSDNode>(Addr)) { | |||||
BaseReg = Addr; | |||||
Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); | |||||
return true; | |||||
} | |||||
return false; | |||||
} | |||||
void R600DAGToDAGISel::Select(SDNode *N) { | |||||
unsigned int Opc = N->getOpcode(); | |||||
if (N->isMachineOpcode()) { | |||||
N->setNodeId(-1); | |||||
return; // Already selected. | |||||
} | |||||
switch (Opc) { | |||||
default: break; | |||||
case AMDGPUISD::BUILD_VERTICAL_VECTOR: | |||||
case ISD::SCALAR_TO_VECTOR: | |||||
case ISD::BUILD_VECTOR: { | |||||
EVT VT = N->getValueType(0); | |||||
unsigned NumVectorElts = VT.getVectorNumElements(); | |||||
unsigned RegClassID; | |||||
// BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG | |||||
// that adds a 128 bits reg copy when going through TwoAddressInstructions | |||||
// pass. We want to avoid 128 bits copies as much as possible because they | |||||
// can't be bundled by our scheduler. | |||||
switch(NumVectorElts) { | |||||
case 2: RegClassID = R600::R600_Reg64RegClassID; break; | |||||
case 4: | |||||
if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) | |||||
RegClassID = R600::R600_Reg128VerticalRegClassID; | |||||
else | |||||
RegClassID = R600::R600_Reg128RegClassID; | |||||
break; | |||||
default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); | |||||
} | |||||
SelectBuildVector(N, RegClassID); | |||||
return; | |||||
} | |||||
} | |||||
SelectCode(N); | |||||
} | |||||
bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, | |||||
SDValue &Offset) { | |||||
ConstantSDNode *C; | |||||
SDLoc DL(Addr); | |||||
if ((C = dyn_cast<ConstantSDNode>(Addr))) { | |||||
Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32); | |||||
Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); | |||||
} else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && | |||||
(C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) { | |||||
Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32); | |||||
Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); | |||||
} else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && | |||||
(C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { | |||||
Base = Addr.getOperand(0); | |||||
Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); | |||||
} else { | |||||
Base = Addr; | |||||
Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); | |||||
} | |||||
return true; | |||||
} | |||||
bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, | |||||
SDValue &Offset) { | |||||
ConstantSDNode *IMMOffset; | |||||
if (Addr.getOpcode() == ISD::ADD | |||||
&& (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) | |||||
&& isInt<16>(IMMOffset->getZExtValue())) { | |||||
Base = Addr.getOperand(0); | |||||
Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), | |||||
MVT::i32); | |||||
return true; | |||||
// If the pointer address is constant, we can move it to the offset field. | |||||
} else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) | |||||
&& isInt<16>(IMMOffset->getZExtValue())) { | |||||
Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), | |||||
SDLoc(CurDAG->getEntryNode()), | |||||
R600::ZERO, MVT::i32); | |||||
Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), | |||||
MVT::i32); | |||||
return true; | |||||
} | |||||
// Default case, no offset | |||||
Base = Addr; | |||||
Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); | |||||
return true; | |||||
} |