diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h --- a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h @@ -12,7 +12,6 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUALIASANALYSIS_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUALIASANALYSIS_H -#include "AMDGPU.h" #include "llvm/Analysis/AliasAnalysis.h" namespace llvm { @@ -66,9 +65,7 @@ public: static char ID; - AMDGPUAAWrapperPass() : ImmutablePass(ID) { - initializeAMDGPUAAWrapperPassPass(*PassRegistry::getPassRegistry()); - } + AMDGPUAAWrapperPass(); AMDGPUAAResult &getResult() { return *Result; } const AMDGPUAAResult &getResult() const { return *Result; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp @@ -10,6 +10,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPUAliasAnalysis.h" +#include "AMDGPU.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Instructions.h" @@ -37,6 +38,10 @@ return new AMDGPUExternalAAWrapper(); } +AMDGPUAAWrapperPass::AMDGPUAAWrapperPass() : ImmutablePass(ID) { + initializeAMDGPUAAWrapperPassPass(*PassRegistry::getPassRegistry()); +} + void AMDGPUAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h @@ -0,0 +1,254 @@ +//===-- AMDGPUISelDAGToDAG.h - A dag to dag inst selector for AMDGPU ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//==-----------------------------------------------------------------------===// +// +/// \file +/// Defines an instruction selector for the AMDGPU target. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H +#define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H + +#include "GCNSubtarget.h" +#include "SIMachineFunctionInfo.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +inline bool isNullConstantOrUndef(SDValue V) { + if (V.isUndef()) + return true; + + ConstantSDNode *Const = dyn_cast(V); + return Const != nullptr && Const->isNullValue(); +} + +inline bool getConstantValue(SDValue N, uint32_t &Out) { + // This is only used for packed vectors, where ussing 0 for undef should + // always be good. + if (N.isUndef()) { + Out = 0; + return true; + } + + if (const ConstantSDNode *C = dyn_cast(N)) { + Out = C->getAPIntValue().getSExtValue(); + return true; + } + + if (const ConstantFPSDNode *C = dyn_cast(N)) { + Out = C->getValueAPF().bitcastToAPInt().getSExtValue(); + return true; + } + + return false; +} + +// TODO: Handle undef as zero +inline SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG, + bool Negate = false) { + assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2); + uint32_t LHSVal, RHSVal; + if (getConstantValue(N->getOperand(0), LHSVal) && + getConstantValue(N->getOperand(1), RHSVal)) { + SDLoc SL(N); + uint32_t K = Negate ? (-LHSVal & 0xffff) | (-RHSVal << 16) + : (LHSVal & 0xffff) | (RHSVal << 16); + return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0), + DAG.getTargetConstant(K, SL, MVT::i32)); + } + + return nullptr; +} + +inline SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) { + return packConstantV2I16(N, DAG, true); +} + +/// AMDGPU specific code to select AMDGPU machine instructions for +/// SelectionDAG operations. +class AMDGPUDAGToDAGISel : public SelectionDAGISel { + // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can + // make the right decision when generating code for different targets. + const GCNSubtarget *Subtarget; + + // Default FP mode for the current function. + AMDGPU::SIModeRegisterDefaults Mode; + + bool EnableLateStructurizeCFG; + + // Instructions that will be lowered with a final instruction that zeros the + // high result bits. + bool fp16SrcZerosHighBits(unsigned Opc) const; + +public: + explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr, + CodeGenOpt::Level OptLevel = CodeGenOpt::Default); + ~AMDGPUDAGToDAGISel() override = default; + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + bool matchLoadD16FromBuildVector(SDNode *N) const; + + bool runOnMachineFunction(MachineFunction &MF) override; + void PreprocessISelDAG() override; + void Select(SDNode *N) override; + StringRef getPassName() const override; + void PostprocessISelDAG() override; + +protected: + void SelectBuildVector(SDNode *N, unsigned RegClassID); + +private: + std::pair foldFrameIndex(SDValue N) const; + bool isNoNanSrc(SDValue N) const; + bool isInlineImmediate(const SDNode *N, bool Negated = false) const; + bool isNegInlineImmediate(const SDNode *N) const { + return isInlineImmediate(N, true); + } + + bool isInlineImmediate16(int64_t Imm) const { + return AMDGPU::isInlinableLiteral16(Imm, Subtarget->hasInv2PiInlineImm()); + } + + bool isInlineImmediate32(int64_t Imm) const { + return AMDGPU::isInlinableLiteral32(Imm, Subtarget->hasInv2PiInlineImm()); + } + + bool isInlineImmediate64(int64_t Imm) const { + return AMDGPU::isInlinableLiteral64(Imm, Subtarget->hasInv2PiInlineImm()); + } + + bool isInlineImmediate(const APFloat &Imm) const { + return Subtarget->getInstrInfo()->isInlineConstant(Imm); + } + + bool isVGPRImm(const SDNode *N) const; + bool isUniformLoad(const SDNode *N) const; + bool isUniformBr(const SDNode *N) const; + + bool isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS, + SDValue &RHS) const; + + MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const; + + SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const; + SDNode *glueCopyToM0(SDNode *N, SDValue Val) const; + SDNode *glueCopyToM0LDSInit(SDNode *N) const; + + const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; + virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); + virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); + bool isDSOffsetLegal(SDValue Base, unsigned Offset) const; + bool isDSOffset2Legal(SDValue Base, unsigned Offset0, unsigned Offset1, + unsigned Size) const; + bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; + bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, + SDValue &Offset1) const; + bool SelectDS128Bit8ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, + SDValue &Offset1) const; + bool SelectDSReadWrite2(SDValue Ptr, SDValue &Base, SDValue &Offset0, + SDValue &Offset1, unsigned Size) const; + bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, + SDValue &SOffset, SDValue &Offset, SDValue &Offen, + SDValue &Idxen, SDValue &Addr64) const; + bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, + SDValue &SOffset, SDValue &Offset) const; + bool SelectMUBUFScratchOffen(SDNode *Parent, SDValue Addr, SDValue &RSrc, + SDValue &VAddr, SDValue &SOffset, + SDValue &ImmOffset) const; + bool SelectMUBUFScratchOffset(SDNode *Parent, SDValue Addr, SDValue &SRsrc, + SDValue &Soffset, SDValue &Offset) const; + + bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, + SDValue &Offset) const; + + bool SelectFlatOffsetImpl(SDNode *N, SDValue Addr, SDValue &VAddr, + SDValue &Offset, uint64_t FlatVariant) const; + bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr, + SDValue &Offset) const; + bool SelectGlobalOffset(SDNode *N, SDValue Addr, SDValue &VAddr, + SDValue &Offset) const; + bool SelectScratchOffset(SDNode *N, SDValue Addr, SDValue &VAddr, + SDValue &Offset) const; + bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr, + SDValue &VOffset, SDValue &Offset) const; + bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr, + SDValue &Offset) const; + + bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, + bool &Imm) const; + SDValue Expand32BitAddress(SDValue Addr) const; + bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, + bool &Imm) const; + bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; + bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; + bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; + bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; + bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; + bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; + + bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const; + bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods, + bool AllowAbs = true) const; + bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; + bool SelectVOP3BMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; + bool SelectVOP3NoMods(SDValue In, SDValue &Src) const; + bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, + SDValue &Clamp, SDValue &Omod) const; + bool SelectVOP3BMods0(SDValue In, SDValue &Src, SDValue &SrcMods, + SDValue &Clamp, SDValue &Omod) const; + bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, + SDValue &Clamp, SDValue &Omod) const; + + bool SelectVOP3OMods(SDValue In, SDValue &Src, SDValue &Clamp, + SDValue &Omod) const; + + bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; + + bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const; + + bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; + bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, + unsigned &Mods) const; + bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; + + SDValue getHi16Elt(SDValue In) const; + + SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const; + + void SelectADD_SUB_I64(SDNode *N); + void SelectAddcSubb(SDNode *N); + void SelectUADDO_USUBO(SDNode *N); + void SelectDIV_SCALE(SDNode *N); + void SelectMAD_64_32(SDNode *N); + void SelectFMA_W_CHAIN(SDNode *N); + void SelectFMUL_W_CHAIN(SDNode *N); + + SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val, + uint32_t Offset, uint32_t Width); + void SelectS_BFEFromShifts(SDNode *N); + void SelectS_BFE(SDNode *N); + bool isCBranchSCC(const SDNode *N) const; + void SelectBRCOND(SDNode *N); + void SelectFMAD_FMA(SDNode *N); + void SelectATOMIC_CMP_SWAP(SDNode *N); + void SelectDSAppendConsume(SDNode *N, unsigned IntrID); + void SelectDS_GWS(SDNode *N, unsigned IntrID); + void SelectInterpP1F16(SDNode *N); + void SelectINTRINSIC_W_CHAIN(SDNode *N); + void SelectINTRINSIC_WO_CHAIN(SDNode *N); + void SelectINTRINSIC_VOID(SDNode *N); + +protected: + // Include the pieces autogenerated from the target description. +#include "AMDGPUGenDAGISel.inc" +}; + +#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// +#include "AMDGPUISelDAGToDAG.h" #include "AMDGPU.h" #include "AMDGPUTargetMachine.h" #include "MCTargetDesc/R600MCTargetDesc.h" -#include "R600.h" -#include "R600Subtarget.h" +#include "R600RegisterInfo.h" #include "SIMachineFunctionInfo.h" #include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/Analysis/ValueTracking.h" @@ -35,287 +35,12 @@ using namespace llvm; -namespace llvm { - -class R600InstrInfo; - -} // end namespace llvm - //===----------------------------------------------------------------------===// // Instruction Selector Implementation //===----------------------------------------------------------------------===// namespace { -static bool isNullConstantOrUndef(SDValue V) { - if (V.isUndef()) - return true; - - ConstantSDNode *Const = dyn_cast(V); - return Const != nullptr && Const->isNullValue(); -} - -static bool getConstantValue(SDValue N, uint32_t &Out) { - // This is only used for packed vectors, where ussing 0 for undef should - // always be good. - if (N.isUndef()) { - Out = 0; - return true; - } - - if (const ConstantSDNode *C = dyn_cast(N)) { - Out = C->getAPIntValue().getSExtValue(); - return true; - } - - if (const ConstantFPSDNode *C = dyn_cast(N)) { - Out = C->getValueAPF().bitcastToAPInt().getSExtValue(); - return true; - } - - return false; -} - -// TODO: Handle undef as zero -static SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG, - bool Negate = false) { - assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2); - uint32_t LHSVal, RHSVal; - if (getConstantValue(N->getOperand(0), LHSVal) && - getConstantValue(N->getOperand(1), RHSVal)) { - SDLoc SL(N); - uint32_t K = Negate ? - (-LHSVal & 0xffff) | (-RHSVal << 16) : - (LHSVal & 0xffff) | (RHSVal << 16); - return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0), - DAG.getTargetConstant(K, SL, MVT::i32)); - } - - return nullptr; -} - -static SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) { - return packConstantV2I16(N, DAG, true); -} - -/// AMDGPU specific code to select AMDGPU machine instructions for -/// SelectionDAG operations. -class AMDGPUDAGToDAGISel : public SelectionDAGISel { - // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can - // make the right decision when generating code for different targets. - const GCNSubtarget *Subtarget; - - // Default FP mode for the current function. - AMDGPU::SIModeRegisterDefaults Mode; - - bool EnableLateStructurizeCFG; - - // Instructions that will be lowered with a final instruction that zeros the - // high result bits. - bool fp16SrcZerosHighBits(unsigned Opc) const; - -public: - explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr, - CodeGenOpt::Level OptLevel = CodeGenOpt::Default) - : SelectionDAGISel(*TM, OptLevel) { - EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG; - } - ~AMDGPUDAGToDAGISel() override = default; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addRequired(); -#ifdef EXPENSIVE_CHECKS - AU.addRequired(); - AU.addRequired(); -#endif - SelectionDAGISel::getAnalysisUsage(AU); - } - - bool matchLoadD16FromBuildVector(SDNode *N) const; - - bool runOnMachineFunction(MachineFunction &MF) override; - void PreprocessISelDAG() override; - void Select(SDNode *N) override; - StringRef getPassName() const override; - void PostprocessISelDAG() override; - -protected: - void SelectBuildVector(SDNode *N, unsigned RegClassID); - -private: - std::pair foldFrameIndex(SDValue N) const; - bool isNoNanSrc(SDValue N) const; - bool isInlineImmediate(const SDNode *N, bool Negated = false) const; - bool isNegInlineImmediate(const SDNode *N) const { - return isInlineImmediate(N, true); - } - - bool isInlineImmediate16(int64_t Imm) const { - return AMDGPU::isInlinableLiteral16(Imm, Subtarget->hasInv2PiInlineImm()); - } - - bool isInlineImmediate32(int64_t Imm) const { - return AMDGPU::isInlinableLiteral32(Imm, Subtarget->hasInv2PiInlineImm()); - } - - bool isInlineImmediate64(int64_t Imm) const { - return AMDGPU::isInlinableLiteral64(Imm, Subtarget->hasInv2PiInlineImm()); - } - - bool isInlineImmediate(const APFloat &Imm) const { - return Subtarget->getInstrInfo()->isInlineConstant(Imm); - } - - bool isVGPRImm(const SDNode *N) const; - bool isUniformLoad(const SDNode *N) const; - bool isUniformBr(const SDNode *N) const; - - bool isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS, - SDValue &RHS) const; - - MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const; - - SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const; - SDNode *glueCopyToM0(SDNode *N, SDValue Val) const; - SDNode *glueCopyToM0LDSInit(SDNode *N) const; - - const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; - virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); - virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); - bool isDSOffsetLegal(SDValue Base, unsigned Offset) const; - bool isDSOffset2Legal(SDValue Base, unsigned Offset0, unsigned Offset1, - unsigned Size) const; - bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; - bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, - SDValue &Offset1) const; - bool SelectDS128Bit8ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, - SDValue &Offset1) const; - bool SelectDSReadWrite2(SDValue Ptr, SDValue &Base, SDValue &Offset0, - SDValue &Offset1, unsigned Size) const; - bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, - SDValue &SOffset, SDValue &Offset, SDValue &Offen, - SDValue &Idxen, SDValue &Addr64) const; - bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, - SDValue &SOffset, SDValue &Offset) const; - bool SelectMUBUFScratchOffen(SDNode *Parent, - SDValue Addr, SDValue &RSrc, SDValue &VAddr, - SDValue &SOffset, SDValue &ImmOffset) const; - bool SelectMUBUFScratchOffset(SDNode *Parent, - SDValue Addr, SDValue &SRsrc, SDValue &Soffset, - SDValue &Offset) const; - - bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, - SDValue &Offset) const; - - bool SelectFlatOffsetImpl(SDNode *N, SDValue Addr, SDValue &VAddr, - SDValue &Offset, uint64_t FlatVariant) const; - bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr, - SDValue &Offset) const; - bool SelectGlobalOffset(SDNode *N, SDValue Addr, SDValue &VAddr, - SDValue &Offset) const; - bool SelectScratchOffset(SDNode *N, SDValue Addr, SDValue &VAddr, - SDValue &Offset) const; - bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr, - SDValue &VOffset, SDValue &Offset) const; - bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr, - SDValue &Offset) const; - - bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, - bool &Imm) const; - SDValue Expand32BitAddress(SDValue Addr) const; - bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, - bool &Imm) const; - bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; - bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; - bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; - bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; - bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; - bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; - - bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const; - bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods, - bool AllowAbs = true) const; - bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; - bool SelectVOP3BMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; - bool SelectVOP3NoMods(SDValue In, SDValue &Src) const; - bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, - SDValue &Clamp, SDValue &Omod) const; - bool SelectVOP3BMods0(SDValue In, SDValue &Src, SDValue &SrcMods, - SDValue &Clamp, SDValue &Omod) const; - bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, - SDValue &Clamp, SDValue &Omod) const; - - bool SelectVOP3OMods(SDValue In, SDValue &Src, - SDValue &Clamp, SDValue &Omod) const; - - bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; - - bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const; - - bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; - bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const; - bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; - - SDValue getHi16Elt(SDValue In) const; - - SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const; - - void SelectADD_SUB_I64(SDNode *N); - void SelectAddcSubb(SDNode *N); - void SelectUADDO_USUBO(SDNode *N); - void SelectDIV_SCALE(SDNode *N); - void SelectMAD_64_32(SDNode *N); - void SelectFMA_W_CHAIN(SDNode *N); - void SelectFMUL_W_CHAIN(SDNode *N); - - SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val, - uint32_t Offset, uint32_t Width); - void SelectS_BFEFromShifts(SDNode *N); - void SelectS_BFE(SDNode *N); - bool isCBranchSCC(const SDNode *N) const; - void SelectBRCOND(SDNode *N); - void SelectFMAD_FMA(SDNode *N); - void SelectATOMIC_CMP_SWAP(SDNode *N); - void SelectDSAppendConsume(SDNode *N, unsigned IntrID); - void SelectDS_GWS(SDNode *N, unsigned IntrID); - void SelectInterpP1F16(SDNode *N); - void SelectINTRINSIC_W_CHAIN(SDNode *N); - void SelectINTRINSIC_WO_CHAIN(SDNode *N); - void SelectINTRINSIC_VOID(SDNode *N); - -protected: - // Include the pieces autogenerated from the target description. -#include "AMDGPUGenDAGISel.inc" -}; - -class R600DAGToDAGISel : public AMDGPUDAGToDAGISel { - const R600Subtarget *Subtarget; - - bool isConstantLoad(const MemSDNode *N, int cbID) const; - bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); - bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, - SDValue& Offset); -public: - explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) : - AMDGPUDAGToDAGISel(TM, OptLevel) {} - - void Select(SDNode *N) override; - - bool SelectADDRIndirect(SDValue Addr, SDValue &Base, - SDValue &Offset) override; - bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, - SDValue &Offset) override; - - bool runOnMachineFunction(MachineFunction &MF) override; - - void PreprocessISelDAG() override {} - -protected: - // Include the pieces autogenerated from the target description. -#include "R600GenDAGISel.inc" -}; - static SDValue stripBitcast(SDValue Val) { return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val; } @@ -389,11 +114,11 @@ return new AMDGPUDAGToDAGISel(TM, OptLevel); } -/// This pass converts a legalized DAG into a R600-specific -// DAG, ready for instruction scheduling. -FunctionPass *llvm::createR600ISelDag(TargetMachine *TM, - CodeGenOpt::Level OptLevel) { - return new R600DAGToDAGISel(TM, OptLevel); +AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel( + TargetMachine *TM /*= nullptr*/, + CodeGenOpt::Level OptLevel /*= CodeGenOpt::Default*/) + : SelectionDAGISel(*TM, OptLevel) { + EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG; } bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { @@ -471,6 +196,16 @@ } } +void AMDGPUDAGToDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); +#ifdef EXPENSIVE_CHECKS + AU.addRequired(); + AU.addRequired(); +#endif + SelectionDAGISel::getAnalysisUsage(AU); +} + bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const { assert(Subtarget->d16PreservesUnusedBits()); MVT VT = N->getValueType(0).getSimpleVT(); @@ -3114,128 +2849,3 @@ CurDAG->RemoveDeadNodes(); } while (IsModified); } - -bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { - Subtarget = &MF.getSubtarget(); - return SelectionDAGISel::runOnMachineFunction(MF); -} - -bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { - if (!N->readMem()) - return false; - if (CbId == -1) - return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS || - N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT; - - return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId; -} - -bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, - SDValue& IntPtr) { - if (ConstantSDNode *Cst = dyn_cast(Addr)) { - IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), - true); - return true; - } - return false; -} - -bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, - SDValue& BaseReg, SDValue &Offset) { - if (!isa(Addr)) { - BaseReg = Addr; - Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); - return true; - } - return false; -} - -void R600DAGToDAGISel::Select(SDNode *N) { - unsigned int Opc = N->getOpcode(); - if (N->isMachineOpcode()) { - N->setNodeId(-1); - return; // Already selected. - } - - switch (Opc) { - default: break; - case AMDGPUISD::BUILD_VERTICAL_VECTOR: - case ISD::SCALAR_TO_VECTOR: - case ISD::BUILD_VECTOR: { - EVT VT = N->getValueType(0); - unsigned NumVectorElts = VT.getVectorNumElements(); - unsigned RegClassID; - // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG - // that adds a 128 bits reg copy when going through TwoAddressInstructions - // pass. We want to avoid 128 bits copies as much as possible because they - // can't be bundled by our scheduler. - switch(NumVectorElts) { - case 2: RegClassID = R600::R600_Reg64RegClassID; break; - case 4: - if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) - RegClassID = R600::R600_Reg128VerticalRegClassID; - else - RegClassID = R600::R600_Reg128RegClassID; - break; - default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); - } - SelectBuildVector(N, RegClassID); - return; - } - } - - SelectCode(N); -} - -bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, - SDValue &Offset) { - ConstantSDNode *C; - SDLoc DL(Addr); - - if ((C = dyn_cast(Addr))) { - Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32); - Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); - } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && - (C = dyn_cast(Addr.getOperand(0)))) { - Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32); - Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); - } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && - (C = dyn_cast(Addr.getOperand(1)))) { - Base = Addr.getOperand(0); - Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); - } else { - Base = Addr; - Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); - } - - return true; -} - -bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, - SDValue &Offset) { - ConstantSDNode *IMMOffset; - - if (Addr.getOpcode() == ISD::ADD - && (IMMOffset = dyn_cast(Addr.getOperand(1))) - && isInt<16>(IMMOffset->getZExtValue())) { - - Base = Addr.getOperand(0); - Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), - MVT::i32); - return true; - // If the pointer address is constant, we can move it to the offset field. - } else if ((IMMOffset = dyn_cast(Addr)) - && isInt<16>(IMMOffset->getZExtValue())) { - Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), - SDLoc(CurDAG->getEntryNode()), - R600::ZERO, MVT::i32); - Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), - MVT::i32); - return true; - } - - // Default case, no offset - Base = Addr; - Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); - return true; -} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h @@ -0,0 +1,68 @@ +//===- AMDGPUMCInstLower.h - Lower AMDGPU MachineInstr to an MCInst -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Header of lower AMDGPU MachineInstrs to their corresponding MCInst. +// +//===----------------------------------------------------------------------===// +// + +#ifndef LLVM_LIB_TARGET_AMDGPUMCINSTLOWER_H +#define LLVM_LIB_TARGET_AMDGPUMCINSTLOWER_H + +#include "AMDGPUTargetMachine.h" +#include "llvm/IR/Constants.h" +#include "llvm/Support/Casting.h" + +namespace llvm { +class AsmPrinter; +class MCContext; +} // namespace llvm + +using namespace llvm; + +class AMDGPUMCInstLower { + MCContext &Ctx; + const TargetSubtargetInfo &ST; + const AsmPrinter &AP; + +public: + AMDGPUMCInstLower(MCContext &ctx, const TargetSubtargetInfo &ST, + const AsmPrinter &AP); + + bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const; + + /// Lower a MachineInstr to an MCInst + void lower(const MachineInstr *MI, MCInst &OutMI) const; +}; + +inline const MCExpr *lowerAddrSpaceCast(const TargetMachine &TM, + const Constant *CV, + MCContext &OutContext) { + // TargetMachine does not support llvm-style cast. Use C++-style cast. + // This is safe since TM is always of type AMDGPUTargetMachine or its + // derived class. + auto &AT = static_cast(TM); + auto *CE = dyn_cast(CV); + + // Lower null pointers in private and local address space. + // Clang generates addrspacecast for null pointers in private and local + // address space, which needs to be lowered. + if (CE && CE->getOpcode() == Instruction::AddrSpaceCast) { + auto Op = CE->getOperand(0); + auto SrcAddr = Op->getType()->getPointerAddressSpace(); + if (Op->isNullValue() && AT.getNullPointerValue(SrcAddr) == 0) { + auto DstAddr = CE->getType()->getPointerAddressSpace(); + return MCConstantExpr::create(AT.getNullPointerValue(DstAddr), + OutContext); + } + } + return nullptr; +} + +#endif // LLVM_LIB_TARGET_AMDGPUMCINSTLOWER_H diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -12,12 +12,11 @@ //===----------------------------------------------------------------------===// // +#include "AMDGPUMCInstLower.h" #include "AMDGPUAsmPrinter.h" #include "AMDGPUTargetMachine.h" #include "MCTargetDesc/AMDGPUInstPrinter.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" -#include "R600AsmPrinter.h" -#include "R600Subtarget.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/IR/Constants.h" @@ -35,36 +34,6 @@ using namespace llvm; -namespace { - -class AMDGPUMCInstLower { - MCContext &Ctx; - const TargetSubtargetInfo &ST; - const AsmPrinter &AP; - -public: - AMDGPUMCInstLower(MCContext &ctx, const TargetSubtargetInfo &ST, - const AsmPrinter &AP); - - bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const; - - /// Lower a MachineInstr to an MCInst - void lower(const MachineInstr *MI, MCInst &OutMI) const; - -}; - -class R600MCInstLower : public AMDGPUMCInstLower { -public: - R600MCInstLower(MCContext &ctx, const R600Subtarget &ST, - const AsmPrinter &AP); - - /// Lower a MachineInstr to an MCInst - void lower(const MachineInstr *MI, MCInst &OutMI) const; -}; - - -} // End anonymous namespace - #include "AMDGPUGenMCPseudoLowering.inc" AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx, @@ -195,30 +164,6 @@ return MCInstLowering.lowerOperand(MO, MCOp); } -static const MCExpr *lowerAddrSpaceCast(const TargetMachine &TM, - const Constant *CV, - MCContext &OutContext) { - // TargetMachine does not support llvm-style cast. Use C++-style cast. - // This is safe since TM is always of type AMDGPUTargetMachine or its - // derived class. - auto &AT = static_cast(TM); - auto *CE = dyn_cast(CV); - - // Lower null pointers in private and local address space. - // Clang generates addrspacecast for null pointers in private and local - // address space, which needs to be lowered. - if (CE && CE->getOpcode() == Instruction::AddrSpaceCast) { - auto Op = CE->getOperand(0); - auto SrcAddr = Op->getType()->getPointerAddressSpace(); - if (Op->isNullValue() && AT.getNullPointerValue(SrcAddr) == 0) { - auto DstAddr = CE->getType()->getPointerAddressSpace(); - return MCConstantExpr::create(AT.getNullPointerValue(DstAddr), - OutContext); - } - } - return nullptr; -} - const MCExpr *AMDGPUAsmPrinter::lowerConstant(const Constant *CV) { if (const MCExpr *E = lowerAddrSpaceCast(TM, CV, OutContext)) return E; @@ -326,47 +271,3 @@ } } } - -R600MCInstLower::R600MCInstLower(MCContext &Ctx, const R600Subtarget &ST, - const AsmPrinter &AP) : - AMDGPUMCInstLower(Ctx, ST, AP) { } - -void R600MCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { - OutMI.setOpcode(MI->getOpcode()); - for (const MachineOperand &MO : MI->explicit_operands()) { - MCOperand MCOp; - lowerOperand(MO, MCOp); - OutMI.addOperand(MCOp); - } -} - -void R600AsmPrinter::emitInstruction(const MachineInstr *MI) { - const R600Subtarget &STI = MF->getSubtarget(); - R600MCInstLower MCInstLowering(OutContext, STI, *this); - - StringRef Err; - if (!STI.getInstrInfo()->verifyInstruction(*MI, Err)) { - LLVMContext &C = MI->getParent()->getParent()->getFunction().getContext(); - C.emitError("Illegal instruction detected: " + Err); - MI->print(errs()); - } - - if (MI->isBundle()) { - const MachineBasicBlock *MBB = MI->getParent(); - MachineBasicBlock::const_instr_iterator I = ++MI->getIterator(); - while (I != MBB->instr_end() && I->isInsideBundle()) { - emitInstruction(&*I); - ++I; - } - } else { - MCInst TmpInst; - MCInstLowering.lower(MI, TmpInst); - EmitToStreamer(*OutStreamer, TmpInst); - } -} - -const MCExpr *R600AsmPrinter::lowerConstant(const Constant *CV) { - if (const MCExpr *E = lowerAddrSpaceCast(TM, CV, OutContext)) - return E; - return AsmPrinter::lowerConstant(CV); -} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -17,7 +17,6 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H -#include "AMDGPU.h" #include "llvm/CodeGen/BasicTTIImpl.h" namespace llvm { @@ -172,13 +171,7 @@ bool isSourceOfDivergence(const Value *V) const; bool isAlwaysUniform(const Value *V) const; - unsigned getFlatAddressSpace() const { - // Don't bother running InferAddressSpaces pass on graphics shaders which - // don't use flat addressing. - if (IsGraphics) - return -1; - return AMDGPUAS::FLAT_ADDRESS; - } + unsigned getFlatAddressSpace() const; bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes, Intrinsic::ID IID) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -15,6 +15,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPUTargetTransformInfo.h" +#include "AMDGPU.h" #include "AMDGPUTargetMachine.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/Analysis/LoopInfo.h" @@ -1242,3 +1243,11 @@ : ST->hasHalfRate64Ops() ? getHalfRateInstrCost(CostKind) : getQuarterRateInstrCost(CostKind); } + +unsigned GCNTTIImpl::getFlatAddressSpace() const { + // Don't bother running InferAddressSpaces pass on graphics shaders which + // don't use flat addressing. + if (IsGraphics) + return -1; + return AMDGPUAS::FLAT_ADDRESS; +} diff --git a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp --- a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp @@ -6,7 +6,6 @@ // //==-----------------------------------------------------------------------===// -#include "AMDGPU.h" #include "MCTargetDesc/R600MCTargetDesc.h" #include "R600.h" #include "R600RegisterInfo.h" diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -44,24 +44,24 @@ AMDGPUAliasAnalysis.cpp AMDGPUAlwaysInlinePass.cpp AMDGPUAnnotateKernelFeatures.cpp - AMDGPUAttributor.cpp AMDGPUAnnotateUniformValues.cpp AMDGPUArgumentUsageInfo.cpp AMDGPUAsmPrinter.cpp AMDGPUAtomicOptimizer.cpp + AMDGPUAttributor.cpp AMDGPUCallLowering.cpp AMDGPUCodeGenPrepare.cpp + AMDGPUCtorDtorLowering.cpp AMDGPUExportClustering.cpp AMDGPUFixFunctionBitcasts.cpp - AMDGPUCtorDtorLowering.cpp AMDGPUFrameLowering.cpp + AMDGPUGlobalISelUtils.cpp AMDGPUHSAMetadataStreamer.cpp AMDGPUInstCombineIntrinsic.cpp AMDGPUInstrInfo.cpp AMDGPUInstructionSelector.cpp AMDGPUISelDAGToDAG.cpp AMDGPUISelLowering.cpp - AMDGPUGlobalISelUtils.cpp AMDGPULateCodeGenPrepare.cpp AMDGPULegalizerInfo.cpp AMDGPULibCalls.cpp @@ -77,13 +77,16 @@ AMDGPUMCInstLower.cpp AMDGPUMIRFormatter.cpp AMDGPUOpenCLEnqueuedBlockLowering.cpp + AMDGPUPerfHintAnalysis.cpp AMDGPUPostLegalizerCombiner.cpp AMDGPUPreLegalizerCombiner.cpp + AMDGPUPrintfRuntimeBinding.cpp AMDGPUPromoteAlloca.cpp AMDGPUPropagateAttributes.cpp AMDGPURegBankCombiner.cpp AMDGPURegisterBankInfo.cpp AMDGPUReplaceLDSUseWithPointer.cpp + AMDGPUResourceUsageAnalysis.cpp AMDGPURewriteOutArguments.cpp AMDGPUSubtarget.cpp AMDGPUTargetMachine.cpp @@ -91,13 +94,14 @@ AMDGPUTargetTransformInfo.cpp AMDGPUUnifyDivergentExitNodes.cpp AMDGPUUnifyMetadata.cpp - AMDGPUPerfHintAnalysis.cpp AMDILCFGStructurizer.cpp - AMDGPUPrintfRuntimeBinding.cpp - AMDGPUResourceUsageAnalysis.cpp + GCNDPPCombine.cpp GCNHazardRecognizer.cpp + GCNILPSched.cpp GCNIterativeScheduler.cpp GCNMinRegStrategy.cpp + GCNNSAReassign.cpp + GCNPreRAOptimizations.cpp GCNRegPressure.cpp GCNSchedStrategy.cpp R600AsmPrinter.cpp @@ -107,9 +111,11 @@ R600ExpandSpecialInstrs.cpp R600FrameLowering.cpp R600InstrInfo.cpp + R600ISelDAGToDAG.cpp R600ISelLowering.cpp R600MachineFunctionInfo.cpp R600MachineScheduler.cpp + R600MCInstLower.cpp R600OpenCLImageTypeLoweringPass.cpp R600OptimizeVectorRegisters.cpp R600Packetizer.cpp @@ -120,15 +126,14 @@ SIAnnotateControlFlow.cpp SIFixSGPRCopies.cpp SIFixVGPRCopies.cpp - SIPreAllocateWWMRegs.cpp SIFoldOperands.cpp SIFormMemoryClauses.cpp SIFrameLowering.cpp SIInsertHardClauses.cpp - SILateBranchLowering.cpp SIInsertWaitcnts.cpp SIInstrInfo.cpp SIISelLowering.cpp + SILateBranchLowering.cpp SILoadStoreOptimizer.cpp SILowerControlFlow.cpp SILowerI1Copies.cpp @@ -136,21 +141,18 @@ SIMachineFunctionInfo.cpp SIMachineScheduler.cpp SIMemoryLegalizer.cpp + SIModeRegister.cpp SIOptimizeExecMasking.cpp SIOptimizeExecMaskingPreRA.cpp SIOptimizeVGPRLiveRange.cpp SIPeepholeSDWA.cpp SIPostRABundler.cpp + SIPreAllocateWWMRegs.cpp SIPreEmitPeephole.cpp SIProgramInfo.cpp SIRegisterInfo.cpp SIShrinkInstructions.cpp SIWholeQuadMode.cpp - GCNILPSched.cpp - GCNNSAReassign.cpp - GCNDPPCombine.cpp - GCNPreRAOptimizations.cpp - SIModeRegister.cpp LINK_COMPONENTS Analysis diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -97,7 +97,6 @@ bool FP64; bool FMA; bool MIMG_R128; - bool IsGCN; bool CIInsts; bool GFX8Insts; bool GFX9Insts; @@ -165,13 +164,8 @@ bool HasArchitectedFlatScratch; bool AddNoCarryInsts; bool HasUnpackedD16VMem; - bool R600ALUInst; - bool CaymanISA; - bool CFALUBug; bool LDSMisalignedBug; bool HasMFMAInlineLiteralBug; - bool HasVertexCache; - short TexVTXClauseSize; bool UnalignedBufferAccess; bool UnalignedDSAccess; bool HasPackedTID; diff --git a/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp b/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp --- a/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp @@ -15,6 +15,7 @@ //===----------------------------------------------------------------------===// #include "R600AsmPrinter.h" +#include "AMDGPUMCInstLower.h" #include "MCTargetDesc/R600MCTargetDesc.h" #include "R600Defines.h" #include "R600MachineFunctionInfo.h" @@ -129,3 +130,9 @@ return false; } + +const MCExpr *R600AsmPrinter::lowerConstant(const Constant *CV) { + if (const MCExpr *E = lowerAddrSpaceCast(TM, CV, OutContext)) + return E; + return AsmPrinter::lowerConstant(CV); +} diff --git a/llvm/lib/Target/AMDGPU/R600ISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/R600ISelDAGToDAG.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/R600ISelDAGToDAG.cpp @@ -0,0 +1,184 @@ +//===-- R600ISelDAGToDAG.cpp - A dag to dag inst selector for R600 --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//==-----------------------------------------------------------------------===// +// +/// \file +/// Defines an instruction selector for the R600 subtarget. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "AMDGPUISelDAGToDAG.h" +#include "MCTargetDesc/R600MCTargetDesc.h" +#include "R600.h" +#include "R600Subtarget.h" +#include "llvm/Analysis/ValueTracking.h" + +class R600DAGToDAGISel : public AMDGPUDAGToDAGISel { + const R600Subtarget *Subtarget; + + bool isConstantLoad(const MemSDNode *N, int cbID) const; + bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue &IntPtr); + bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, + SDValue &Offset); + +public: + explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) + : AMDGPUDAGToDAGISel(TM, OptLevel) {} + + void Select(SDNode *N) override; + + bool SelectADDRIndirect(SDValue Addr, SDValue &Base, + SDValue &Offset) override; + bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, + SDValue &Offset) override; + + bool runOnMachineFunction(MachineFunction &MF) override; + + void PreprocessISelDAG() override {} + +protected: + // Include the pieces autogenerated from the target description. +#include "R600GenDAGISel.inc" +}; + +bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { + Subtarget = &MF.getSubtarget(); + return SelectionDAGISel::runOnMachineFunction(MF); +} + +bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { + if (!N->readMem()) + return false; + if (CbId == -1) + return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS || + N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT; + + return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId; +} + +bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, + SDValue &IntPtr) { + if (ConstantSDNode *Cst = dyn_cast(Addr)) { + IntPtr = + CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), true); + return true; + } + return false; +} + +bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, + SDValue &BaseReg, + SDValue &Offset) { + if (!isa(Addr)) { + BaseReg = Addr; + Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); + return true; + } + return false; +} + +void R600DAGToDAGISel::Select(SDNode *N) { + unsigned int Opc = N->getOpcode(); + if (N->isMachineOpcode()) { + N->setNodeId(-1); + return; // Already selected. + } + + switch (Opc) { + default: + break; + case AMDGPUISD::BUILD_VERTICAL_VECTOR: + case ISD::SCALAR_TO_VECTOR: + case ISD::BUILD_VECTOR: { + EVT VT = N->getValueType(0); + unsigned NumVectorElts = VT.getVectorNumElements(); + unsigned RegClassID; + // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG + // that adds a 128 bits reg copy when going through TwoAddressInstructions + // pass. We want to avoid 128 bits copies as much as possible because they + // can't be bundled by our scheduler. + switch (NumVectorElts) { + case 2: + RegClassID = R600::R600_Reg64RegClassID; + break; + case 4: + if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) + RegClassID = R600::R600_Reg128VerticalRegClassID; + else + RegClassID = R600::R600_Reg128RegClassID; + break; + default: + llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); + } + SelectBuildVector(N, RegClassID); + return; + } + } + + SelectCode(N); +} + +bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, + SDValue &Offset) { + ConstantSDNode *C; + SDLoc DL(Addr); + + if ((C = dyn_cast(Addr))) { + Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32); + Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); + } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && + (C = dyn_cast(Addr.getOperand(0)))) { + Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32); + Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); + } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && + (C = dyn_cast(Addr.getOperand(1)))) { + Base = Addr.getOperand(0); + Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); + } else { + Base = Addr; + Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); + } + + return true; +} + +bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, + SDValue &Offset) { + ConstantSDNode *IMMOffset; + + if (Addr.getOpcode() == ISD::ADD && + (IMMOffset = dyn_cast(Addr.getOperand(1))) && + isInt<16>(IMMOffset->getZExtValue())) { + + Base = Addr.getOperand(0); + Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), + MVT::i32); + return true; + // If the pointer address is constant, we can move it to the offset field. + } else if ((IMMOffset = dyn_cast(Addr)) && + isInt<16>(IMMOffset->getZExtValue())) { + Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), + SDLoc(CurDAG->getEntryNode()), R600::ZERO, + MVT::i32); + Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), + MVT::i32); + return true; + } + + // Default case, no offset + Base = Addr; + Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); + return true; +} + +/// This pass converts a legalized DAG into a R600-specific +// DAG, ready for instruction scheduling. +FunctionPass *llvm::createR600ISelDag(TargetMachine *TM, + CodeGenOpt::Level OptLevel) { + return new R600DAGToDAGISel(TM, OptLevel); +} diff --git a/llvm/lib/Target/AMDGPU/R600MCInstLower.cpp b/llvm/lib/Target/AMDGPU/R600MCInstLower.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/R600MCInstLower.cpp @@ -0,0 +1,67 @@ +//===- R600MCInstLower.cpp - Lower R600 MachineInstr to an MCInst ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Code to lower R600 MachineInstrs to their corresponding MCInst. +// +//===----------------------------------------------------------------------===// +// + +#include "AMDGPUMCInstLower.h" +#include "R600AsmPrinter.h" +#include "R600Subtarget.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" + +class R600MCInstLower : public AMDGPUMCInstLower { +public: + R600MCInstLower(MCContext &ctx, const R600Subtarget &ST, + const AsmPrinter &AP); + + /// Lower a MachineInstr to an MCInst + void lower(const MachineInstr *MI, MCInst &OutMI) const; +}; + +R600MCInstLower::R600MCInstLower(MCContext &Ctx, const R600Subtarget &ST, + const AsmPrinter &AP) + : AMDGPUMCInstLower(Ctx, ST, AP) {} + +void R600MCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { + OutMI.setOpcode(MI->getOpcode()); + for (const MachineOperand &MO : MI->explicit_operands()) { + MCOperand MCOp; + lowerOperand(MO, MCOp); + OutMI.addOperand(MCOp); + } +} + +void R600AsmPrinter::emitInstruction(const MachineInstr *MI) { + const R600Subtarget &STI = MF->getSubtarget(); + R600MCInstLower MCInstLowering(OutContext, STI, *this); + + StringRef Err; + if (!STI.getInstrInfo()->verifyInstruction(*MI, Err)) { + LLVMContext &C = MI->getParent()->getParent()->getFunction().getContext(); + C.emitError("Illegal instruction detected: " + Err); + MI->print(errs()); + } + + if (MI->isBundle()) { + const MachineBasicBlock *MBB = MI->getParent(); + MachineBasicBlock::const_instr_iterator I = ++MI->getIterator(); + while (I != MBB->instr_end() && I->isInsideBundle()) { + emitInstruction(&*I); + ++I; + } + } else { + MCInst TmpInst; + MCInstLowering.lower(MI, TmpInst); + EmitToStreamer(*OutStreamer, TmpInst); + } +} diff --git a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp --- a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp @@ -15,6 +15,7 @@ //===----------------------------------------------------------------------===// #include "R600TargetTransformInfo.h" +#include "AMDGPU.h" #include "AMDGPUTargetMachine.h" #include "R600Subtarget.h" diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h @@ -13,7 +13,6 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULDSUTILS_H #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULDSUTILS_H -#include "AMDGPU.h" #include "llvm/ADT/DenseMap.h" #include "llvm/IR/Constants.h" diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPULDSUtils.h" +#include "AMDGPU.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SetVector.h"