Index: llvm/trunk/lib/Target/AMDGPU/AMDGPU.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPU.td +++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.td @@ -411,23 +411,23 @@ "GCN or newer GPU" >; -class AMDGPUSubtargetFeatureGeneration Implies> : - SubtargetFeatureGeneration ; + SubtargetFeatureGeneration ; -def FeatureSouthernIslands : AMDGPUSubtargetFeatureGeneration<"SOUTHERN_ISLANDS", +def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS", [FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128, FeatureWavefrontSize64, FeatureGCN, FeatureLDSBankCount32, FeatureMovrel] >; -def FeatureSeaIslands : AMDGPUSubtargetFeatureGeneration<"SEA_ISLANDS", +def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS", [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128, FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace, FeatureCIInsts, FeatureMovrel] >; -def FeatureVolcanicIslands : AMDGPUSubtargetFeatureGeneration<"VOLCANIC_ISLANDS", +def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS", [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128, FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN, FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, @@ -438,7 +438,7 @@ ] >; -def FeatureGFX9 : AMDGPUSubtargetFeatureGeneration<"GFX9", +def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9", [FeatureFP64, FeatureLocalMemorySize65536, FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN, FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -219,7 +219,7 @@ } bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) { - const AMDGPUSubtarget &ST = TM->getSubtarget(F); + const GCNSubtarget &ST = TM->getSubtarget(F); bool HasFlat = ST.hasFlatAddressSpace(); bool HasApertureRegs = ST.hasApertureRegs(); SmallPtrSet ConstantExprVisited; Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h @@ -18,7 +18,7 @@ class Function; class raw_ostream; -class SISubtarget; +class GCNSubtarget; class TargetMachine; class TargetRegisterClass; class TargetRegisterInfo; Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -34,7 +34,7 @@ class AMDGPUMachineFunction; class AMDGPUTargetStreamer; class MCOperand; -class SISubtarget; +class GCNSubtarget; class AMDGPUAsmPrinter final : public AsmPrinter { private: @@ -50,7 +50,7 @@ bool HasDynamicallySizedStack = false; bool HasRecursion = false; - int32_t getTotalNumSGPRs(const SISubtarget &ST) const; + int32_t getTotalNumSGPRs(const GCNSubtarget &ST) const; }; SIProgramInfo CurrentProgramInfo; Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -67,7 +67,7 @@ // instructions to run at the double precision rate for the device so it's // probably best to just report no single precision denormals. static uint32_t getFPMode(const MachineFunction &F) { - const SISubtarget& ST = F.getSubtarget(); + const GCNSubtarget& ST = F.getSubtarget(); // TODO: Is there any real use for the flush in only / flush out only modes? uint32_t FP32Denormals = @@ -197,7 +197,7 @@ TM.getTargetTriple().getOS() == Triple::AMDHSA) return; - const AMDGPUSubtarget &STM = MF->getSubtarget(); + const GCNSubtarget &STM = MF->getSubtarget(); amd_kernel_code_t KernelCode; if (STM.isAmdCodeObjectV2(MF->getFunction())) { getAmdKernelCode(KernelCode, CurrentProgramInfo, *MF); @@ -255,14 +255,14 @@ } const SIMachineFunctionInfo *MFI = MF->getInfo(); - const AMDGPUSubtarget &STM = MF->getSubtarget(); + const GCNSubtarget &STM = MF->getSubtarget(); if (MFI->isEntryFunction() && STM.isAmdCodeObjectV2(MF->getFunction())) { SmallString<128> SymbolName; getNameWithPrefix(SymbolName, &MF->getFunction()), getTargetStreamer()->EmitAMDGPUSymbolType( SymbolName, ELF::STT_AMDGPU_HSA_KERNEL); } - const AMDGPUSubtarget &STI = MF->getSubtarget(); + const GCNSubtarget &STI = MF->getSubtarget(); if (STI.dumpCode()) { // Disassemble function name label to text. DisasmLines.push_back(MF->getName().str() + ":"); @@ -274,7 +274,7 @@ } void AMDGPUAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const { - const AMDGPUSubtarget &STI = MBB.getParent()->getSubtarget(); + const GCNSubtarget &STI = MBB.getParent()->getSubtarget(); if (STI.dumpCode() && !isBlockOnlyReachableByFallthrough(&MBB)) { // Write a line for the basic block label if it is not only fallthrough. DisasmLines.push_back( @@ -399,7 +399,7 @@ SetupMachineFunction(MF); - const AMDGPUSubtarget &STM = MF.getSubtarget(); + const GCNSubtarget &STM = MF.getSubtarget(); MCContext &Context = getObjFileLowering().getContext(); // FIXME: This should be an explicit check for Mesa. if (!STM.isAmdHsaOS() && !STM.isAmdPalOS()) { @@ -440,7 +440,7 @@ SIFunctionResourceInfo &Info = CallGraphResourceInfo[&MF.getFunction()]; emitCommonFunctionComments( Info.NumVGPR, - Info.getTotalNumSGPRs(MF.getSubtarget()), + Info.getTotalNumSGPRs(MF.getSubtarget()), Info.PrivateSegmentSize, getFunctionCodeSize(MF), MFI); return false; @@ -475,7 +475,7 @@ OutStreamer->emitRawComment( " WaveLimiterHint : " + Twine(MFI->needsWaveLimiter()), false); - if (MF.getSubtarget().debuggerEmitPrologue()) { + if (MF.getSubtarget().debuggerEmitPrologue()) { OutStreamer->emitRawComment( " DebuggerWavefrontPrivateSegmentOffsetSGPR: s" + Twine(CurrentProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR), false); @@ -526,7 +526,7 @@ } uint64_t AMDGPUAsmPrinter::getFunctionCodeSize(const MachineFunction &MF) const { - const SISubtarget &STM = MF.getSubtarget(); + const GCNSubtarget &STM = MF.getSubtarget(); const SIInstrInfo *TII = STM.getInstrInfo(); uint64_t CodeSize = 0; @@ -558,7 +558,7 @@ } int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumSGPRs( - const SISubtarget &ST) const { + const GCNSubtarget &ST) const { return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(ST.getFeatureBits(), UsesVCC, UsesFlatScratch); } @@ -568,7 +568,7 @@ SIFunctionResourceInfo Info; const SIMachineFunctionInfo *MFI = MF.getInfo(); - const SISubtarget &ST = MF.getSubtarget(); + const GCNSubtarget &ST = MF.getSubtarget(); const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); const MachineRegisterInfo &MRI = MF.getRegInfo(); const SIInstrInfo *TII = ST.getInstrInfo(); @@ -812,7 +812,7 @@ MF.getFunction().getContext().diagnose(DiagStackSize); } - const SISubtarget &STM = MF.getSubtarget(); + const GCNSubtarget &STM = MF.getSubtarget(); const SIMachineFunctionInfo *MFI = MF.getInfo(); const SIInstrInfo *TII = STM.getInstrInfo(); const SIRegisterInfo *RI = &TII->getRegisterInfo(); @@ -927,7 +927,7 @@ ProgInfo.DX10Clamp = STM.enableDX10Clamp(); unsigned LDSAlignShift; - if (STM.getGeneration() < SISubtarget::SEA_ISLANDS) { + if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) { // LDS is allocated in 64 dword blocks. LDSAlignShift = 8; } else { @@ -1000,7 +1000,7 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF, const SIProgramInfo &CurrentProgramInfo) { - const SISubtarget &STM = MF.getSubtarget(); + const GCNSubtarget &STM = MF.getSubtarget(); const SIMachineFunctionInfo *MFI = MF.getInfo(); unsigned RsrcReg = getRsrcReg(MF.getFunction().getCallingConv()); @@ -1129,7 +1129,7 @@ const SIProgramInfo &CurrentProgramInfo, const MachineFunction &MF) const { const SIMachineFunctionInfo *MFI = MF.getInfo(); - const SISubtarget &STM = MF.getSubtarget(); + const GCNSubtarget &STM = MF.getSubtarget(); AMDGPU::initDefaultAMDKernelCodeT(Out, STM.getFeatureBits()); Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -95,10 +95,10 @@ return false; MachineFunction &MF = MIRBuilder.getMF(); - const SISubtarget *Subtarget = &MF.getSubtarget(); + const GCNSubtarget *Subtarget = &MF.getSubtarget(); MachineRegisterInfo &MRI = MF.getRegInfo(); SIMachineFunctionInfo *Info = MF.getInfo(); - const SIRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + const SIRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); const DataLayout &DL = F.getParent()->getDataLayout(); SmallVector ArgLocs; Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUCallingConv.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUCallingConv.td +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUCallingConv.td @@ -132,11 +132,11 @@ ]>; def CC_AMDGPU : CallingConv<[ - CCIf<"static_cast" + CCIf<"static_cast" "(State.getMachineFunction().getSubtarget()).getGeneration() >= " "AMDGPUSubtarget::SOUTHERN_ISLANDS", CCDelegateTo>, - CCIf<"static_cast" + CCIf<"static_cast" "(State.getMachineFunction().getSubtarget()).getGeneration() >= " "AMDGPUSubtarget::SOUTHERN_ISLANDS && State.getCallingConv() == CallingConv::C", CCDelegateTo> Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -57,7 +57,7 @@ class AMDGPUCodeGenPrepare : public FunctionPass, public InstVisitor { - const SISubtarget *ST = nullptr; + const GCNSubtarget *ST = nullptr; DivergenceAnalysis *DA = nullptr; Module *Mod = nullptr; bool HasUnsafeFPMath = false; @@ -890,7 +890,7 @@ return false; const AMDGPUTargetMachine &TM = TPC->getTM(); - ST = &TM.getSubtarget(F); + ST = &TM.getSubtarget(F); DA = &getAnalysis(); HasUnsafeFPMath = hasUnsafeFPMath(F); AMDGPUASI = TM.getAMDGPUAS(); Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -203,7 +203,7 @@ Kernel::CodeProps::Metadata MetadataStreamer::getHSACodeProps( const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const { - const SISubtarget &STM = MF.getSubtarget(); + const GCNSubtarget &STM = MF.getSubtarget(); const SIMachineFunctionInfo &MFI = *MF.getInfo(); HSAMD::Kernel::CodeProps::Metadata HSACodeProps; const Function &F = MF.getFunction(); @@ -233,7 +233,7 @@ Kernel::DebugProps::Metadata MetadataStreamer::getHSADebugProps( const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const { - const SISubtarget &STM = MF.getSubtarget(); + const GCNSubtarget &STM = MF.getSubtarget(); HSAMD::Kernel::DebugProps::Metadata HSADebugProps; if (!STM.debuggerSupported()) Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -71,7 +71,7 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel { // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can // make the right decision when generating code for different targets. - const AMDGPUSubtarget *Subtarget; + const GCNSubtarget *Subtarget; AMDGPUAS AMDGPUASI; bool EnableLateStructurizeCFG; @@ -274,7 +274,7 @@ } bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { - Subtarget = &MF.getSubtarget(); + Subtarget = &MF.getSubtarget(); return SelectionDAGISel::runOnMachineFunction(MF); } @@ -316,7 +316,7 @@ } const SIRegisterInfo *TRI - = static_cast(Subtarget)->getRegisterInfo(); + = static_cast(Subtarget)->getRegisterInfo(); return TRI->getPhysRegClass(Reg); } @@ -1397,7 +1397,7 @@ return false; SDLoc SL(ByteOffsetNode); - AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration(); + GCNSubtarget::Generation Gen = Subtarget->getGeneration(); int64_t ByteOffset = C->getSExtValue(); int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset); @@ -1664,7 +1664,7 @@ return true; if (VT == MVT::i64) { - auto ST = static_cast(Subtarget); + auto ST = static_cast(Subtarget); ISD::CondCode CC = cast(Cond.getOperand(2))->get(); return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64(); Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -23,12 +23,12 @@ namespace llvm { class AMDGPUMachineFunction; -class AMDGPUCommonSubtarget; +class AMDGPUSubtarget; struct ArgDescriptor; class AMDGPUTargetLowering : public TargetLowering { private: - const AMDGPUCommonSubtarget *Subtarget; + const AMDGPUSubtarget *Subtarget; /// \returns AMDGPUISD::FFBH_U32 node if the incoming \p Op may have been /// legalized from a smaller type VT. Need to match pre-legalized type because @@ -125,7 +125,7 @@ void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl &Ins) const; public: - AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUCommonSubtarget &STI); + AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI); bool mayIgnoreSignedZero(SDValue Op) const { if (getTargetMachine().Options.NoSignedZerosFPMath) Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -155,7 +155,7 @@ } AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, - const AMDGPUCommonSubtarget &STI) + const AMDGPUSubtarget &STI) : TargetLowering(TM), Subtarget(&STI) { AMDGPUASI = AMDGPU::getAMDGPUAS(TM); // Lower floating point store/load to integer store/load to reduce the number @@ -3939,8 +3939,8 @@ uint32_t AMDGPUTargetLowering::getImplicitParameterOffset( const MachineFunction &MF, const ImplicitParameter Param) const { const AMDGPUMachineFunction *MFI = MF.getInfo(); - const AMDGPUCommonSubtarget &ST = - AMDGPUCommonSubtarget::get(getTargetMachine(), MF.getFunction()); + const AMDGPUSubtarget &ST = + AMDGPUSubtarget::get(getTargetMachine(), MF.getFunction()); unsigned ExplicitArgOffset = ST.getExplicitKernelArgOffset(MF.getFunction()); unsigned Alignment = ST.getAlignmentForImplicitArgPtr(); uint64_t ArgOffset = alignTo(MFI->getExplicitKernArgSize(), Alignment) + @@ -4242,8 +4242,8 @@ switch (IID) { case Intrinsic::amdgcn_mbcnt_lo: case Intrinsic::amdgcn_mbcnt_hi: { - const SISubtarget &ST = - DAG.getMachineFunction().getSubtarget(); + const GCNSubtarget &ST = + DAG.getMachineFunction().getSubtarget(); // These return at most the wavefront size - 1. unsigned Size = Op.getValueType().getSizeInBits(); Known.Zero.setHighBits(Size - ST.getWavefrontSizeLog2()); Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.h @@ -22,14 +22,14 @@ namespace llvm { -class AMDGPUSubtarget; +class GCNSubtarget; class MachineFunction; class MachineInstr; class MachineInstrBuilder; class AMDGPUInstrInfo { public: - explicit AMDGPUInstrInfo(const AMDGPUSubtarget &st); + explicit AMDGPUInstrInfo(const GCNSubtarget &st); static bool isUniformMMO(const MachineMemOperand *MMO); }; Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp @@ -26,7 +26,7 @@ // Pin the vtable to this file. //void AMDGPUInstrInfo::anchor() {} -AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST) { } +AMDGPUInstrInfo::AMDGPUInstrInfo(const GCNSubtarget &ST) { } // TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence. Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -22,26 +22,27 @@ namespace { #define GET_GLOBALISEL_PREDICATE_BITSET +#define AMDGPUSubtarget GCNSubtarget #include "AMDGPUGenGlobalISel.inc" #undef GET_GLOBALISEL_PREDICATE_BITSET +#undef AMDGPUSubtarget } namespace llvm { class AMDGPUInstrInfo; class AMDGPURegisterBankInfo; -class AMDGPUSubtarget; +class GCNSubtarget; class MachineInstr; class MachineOperand; class MachineRegisterInfo; class SIInstrInfo; class SIMachineFunctionInfo; class SIRegisterInfo; -class SISubtarget; class AMDGPUInstructionSelector : public InstructionSelector { public: - AMDGPUInstructionSelector(const SISubtarget &STI, + AMDGPUInstructionSelector(const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM); @@ -91,11 +92,13 @@ const SIRegisterInfo &TRI; const AMDGPURegisterBankInfo &RBI; const AMDGPUTargetMachine &TM; - const SISubtarget &STI; + const GCNSubtarget &STI; bool EnableLateStructurizeCFG; #define GET_GLOBALISEL_PREDICATES_DECL +#define AMDGPUSubtarget GCNSubtarget #include "AMDGPUGenGlobalISel.inc" #undef GET_GLOBALISEL_PREDICATES_DECL +#undef AMDGPUSubtarget #define GET_GLOBALISEL_TEMPORARIES_DECL #include "AMDGPUGenGlobalISel.inc" Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -37,11 +37,13 @@ using namespace llvm; #define GET_GLOBALISEL_IMPL +#define AMDGPUSubtarget GCNSubtarget #include "AMDGPUGenGlobalISel.inc" #undef GET_GLOBALISEL_IMPL +#undef AMDGPUSubtarget AMDGPUInstructionSelector::AMDGPUInstructionSelector( - const SISubtarget &STI, const AMDGPURegisterBankInfo &RBI, + const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM) : InstructionSelector(), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM), @@ -447,7 +449,7 @@ MachineBasicBlock *BB = I.getParent(); MachineFunction *MF = BB->getParent(); - const SISubtarget &Subtarget = MF->getSubtarget(); + const GCNSubtarget &Subtarget = MF->getSubtarget(); MachineRegisterInfo &MRI = MF->getRegInfo(); unsigned DstReg = I.getOperand(0).getReg(); const DebugLoc &DL = I.getDebugLoc(); Index: llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -21,12 +21,12 @@ class GCNTargetMachine; class LLVMContext; -class SISubtarget; +class GCNSubtarget; /// This class provides the information for the target register banks. class AMDGPULegalizerInfo : public LegalizerInfo { public: - AMDGPULegalizerInfo(const SISubtarget &ST, + AMDGPULegalizerInfo(const GCNSubtarget &ST, const GCNTargetMachine &TM); }; } // End llvm namespace. Index: llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -24,7 +24,7 @@ using namespace llvm; using namespace LegalizeActions; -AMDGPULegalizerInfo::AMDGPULegalizerInfo(const SISubtarget &ST, +AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST, const GCNTargetMachine &TM) { using namespace TargetOpcode; Index: llvm/trunk/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp @@ -124,7 +124,7 @@ if (!CI) continue; - Changed |= AMDGPUCommonSubtarget::get(TM, F).makeLIDRangeMetadata(CI); + Changed |= AMDGPUSubtarget::get(TM, F).makeLIDRangeMetadata(CI); } return Changed; } Index: llvm/trunk/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp @@ -68,7 +68,7 @@ auto &TPC = getAnalysis(); const TargetMachine &TM = TPC.getTM(); - const SISubtarget &ST = TM.getSubtarget(F); + const GCNSubtarget &ST = TM.getSubtarget(F); LLVMContext &Ctx = F.getParent()->getContext(); const DataLayout &DL = F.getParent()->getDataLayout(); BasicBlock &EntryBlock = *F.begin(); Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -204,7 +204,7 @@ bool AMDGPUAsmPrinter::lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const { - const AMDGPUSubtarget &STI = MF->getSubtarget(); + const GCNSubtarget &STI = MF->getSubtarget(); AMDGPUMCInstLower MCInstLowering(OutContext, STI, *this); return MCInstLowering.lowerOperand(MO, MCOp); } @@ -243,7 +243,7 @@ if (emitPseudoExpansionLowering(*OutStreamer, MI)) return; - const AMDGPUSubtarget &STI = MF->getSubtarget(); + const GCNSubtarget &STI = MF->getSubtarget(); AMDGPUMCInstLower MCInstLowering(OutContext, STI, *this); StringRef Err; Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp @@ -2871,7 +2871,7 @@ } bool AMDGPUMachineCFGStructurizer::runOnMachineFunction(MachineFunction &MF) { - const SISubtarget &ST = MF.getSubtarget(); + const GCNSubtarget &ST = MF.getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); TRI = ST.getRegisterInfo(); MRI = &(MF.getRegInfo()); Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineFunction.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineFunction.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineFunction.h @@ -15,7 +15,7 @@ namespace llvm { -class AMDGPUSubtarget; +class GCNSubtarget; class AMDGPUMachineFunction : public MachineFunctionInfo { /// A map to keep track of local memory objects and their offsets within the Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -152,7 +152,7 @@ IsAMDGCN = TT.getArch() == Triple::amdgcn; IsAMDHSA = TT.getOS() == Triple::AMDHSA; - const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, F); + const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(*TM, F); if (!ST.isPromoteAllocaEnabled()) return false; @@ -175,7 +175,7 @@ std::pair AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) { const Function &F = *Builder.GetInsertBlock()->getParent(); - const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, F); + const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(*TM, F); if (!IsAMDHSA) { Function *LocalSizeYFn @@ -261,8 +261,8 @@ } Value *AMDGPUPromoteAlloca::getWorkitemID(IRBuilder<> &Builder, unsigned N) { - const AMDGPUCommonSubtarget &ST = - AMDGPUCommonSubtarget::get(*TM, *Builder.GetInsertBlock()->getParent()); + const AMDGPUSubtarget &ST = + AMDGPUSubtarget::get(*TM, *Builder.GetInsertBlock()->getParent()); Intrinsic::ID IntrID = Intrinsic::ID::not_intrinsic; switch (N) { @@ -603,7 +603,7 @@ bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) { FunctionType *FTy = F.getFunctionType(); - const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, F); + const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(*TM, F); // If the function has any arguments in the local address space, then it's // possible these arguments require the entire local memory space, so @@ -730,7 +730,7 @@ if (!SufficientLDS) return false; - const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, ContainingFunction); + const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(*TM, ContainingFunction); unsigned WorkGroupSize = ST.getFlatWorkGroupSizes(ContainingFunction).second; const DataLayout &DL = Mod->getDataLayout(); Index: llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterInfo.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterInfo.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterInfo.h @@ -21,7 +21,7 @@ namespace llvm { -class AMDGPUSubtarget; +class GCNSubtarget; class TargetInstrInfo; struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo { Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -46,7 +46,19 @@ class StringRef; -class AMDGPUCommonSubtarget { +class AMDGPUSubtarget { +public: + enum Generation { + R600 = 0, + R700 = 1, + EVERGREEN = 2, + NORTHERN_ISLANDS = 3, + SOUTHERN_ISLANDS = 4, + SEA_ISLANDS = 5, + VOLCANIC_ISLANDS = 6, + GFX9 = 7 + }; + private: Triple TargetTriple; @@ -66,10 +78,10 @@ unsigned WavefrontSize; public: - AMDGPUCommonSubtarget(const Triple &TT, const FeatureBitset &FeatureBits); + AMDGPUSubtarget(const Triple &TT, const FeatureBitset &FeatureBits); - static const AMDGPUCommonSubtarget &get(const MachineFunction &MF); - static const AMDGPUCommonSubtarget &get(const TargetMachine &TM, + static const AMDGPUSubtarget &get(const MachineFunction &MF); + static const AMDGPUSubtarget &get(const TargetMachine &TM, const Function &F); /// \returns Default range flat work group size for a calling convention. @@ -219,21 +231,12 @@ /// Creates value range metadata on an workitemid.* inrinsic call or load. bool makeLIDRangeMetadata(Instruction *I) const; - virtual ~AMDGPUCommonSubtarget() {} + virtual ~AMDGPUSubtarget() {} }; -class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo, - public AMDGPUCommonSubtarget { +class GCNSubtarget : public AMDGPUGenSubtargetInfo, + public AMDGPUSubtarget { public: - enum Generation { - // Gap for R600 generations, so we can do comparisons between - // AMDGPUSubtarget and r600Subtarget. - SOUTHERN_ISLANDS = 4, - SEA_ISLANDS = 5, - VOLCANIC_ISLANDS = 6, - GFX9 = 7, - }; - enum { ISAVersion0_0_0, ISAVersion6_0_0, @@ -274,8 +277,6 @@ }; private: - SIFrameLowering FrameLowering; - /// GlobalISel related APIs. std::unique_ptr CallLoweringInfo; std::unique_ptr InstSelector; @@ -360,24 +361,34 @@ SelectionDAGTargetInfo TSInfo; AMDGPUAS AS; +private: + SITargetLowering TLInfo; + SIInstrInfo InstrInfo; + SIFrameLowering FrameLowering; public: - AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, - const TargetMachine &TM); - ~AMDGPUSubtarget() override; + GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, + const GCNTargetMachine &TM); + ~GCNSubtarget() override; - AMDGPUSubtarget &initializeSubtargetDependencies(const Triple &TT, + GCNSubtarget &initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS); - virtual const SIInstrInfo *getInstrInfo() const override = 0; + const SIInstrInfo *getInstrInfo() const override { + return &InstrInfo; + } const SIFrameLowering *getFrameLowering() const override { return &FrameLowering; } - virtual const SITargetLowering *getTargetLowering() const override = 0; + const SITargetLowering *getTargetLowering() const override { + return &TLInfo; + } - virtual const SIRegisterInfo *getRegisterInfo() const override = 0; + const SIRegisterInfo *getRegisterInfo() const override { + return &InstrInfo.getRegisterInfo(); + } const CallLowering *getCallLowering() const override { return CallLoweringInfo.get(); @@ -720,55 +731,7 @@ return AMDGPU::IsaInfo::getWavesPerWorkGroup( MCSubtargetInfo::getFeatureBits(), FlatWorkGroupSize); } -}; - -class SISubtarget final : public AMDGPUSubtarget { -private: - SIInstrInfo InstrInfo; - SIFrameLowering FrameLowering; - SITargetLowering TLInfo; - - /// GlobalISel related APIs. - std::unique_ptr CallLoweringInfo; - std::unique_ptr InstSelector; - std::unique_ptr Legalizer; - std::unique_ptr RegBankInfo; - -public: - SISubtarget(const Triple &TT, StringRef CPU, StringRef FS, - const GCNTargetMachine &TM); - - const SIInstrInfo *getInstrInfo() const override { - return &InstrInfo; - } - - const SIFrameLowering *getFrameLowering() const override { - return &FrameLowering; - } - - const SITargetLowering *getTargetLowering() const override { - return &TLInfo; - } - - const CallLowering *getCallLowering() const override { - return CallLoweringInfo.get(); - } - - const InstructionSelector *getInstructionSelector() const override { - return InstSelector.get(); - } - const LegalizerInfo *getLegalizerInfo() const override { - return Legalizer.get(); - } - - const RegisterBankInfo *getRegBankInfo() const override { - return RegBankInfo.get(); - } - - const SIRegisterInfo *getRegisterInfo() const override { - return &InstrInfo.getRegisterInfo(); - } // static wrappers static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI); @@ -988,12 +951,8 @@ const override; }; - class R600Subtarget final : public R600GenSubtargetInfo, - public AMDGPUCommonSubtarget { -public: - enum Generation { R600 = 0, R700 = 1, EVERGREEN = 2, NORTHERN_ISLANDS = 3 }; - + public AMDGPUSubtarget { private: R600InstrInfo InstrInfo; R600FrameLowering FrameLowering; Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -34,12 +34,14 @@ #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR +#define AMDGPUSubtarget GCNSubtarget #include "AMDGPUGenSubtargetInfo.inc" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR +#undef AMDGPUSubtarget #include "R600GenSubtargetInfo.inc" -AMDGPUSubtarget::~AMDGPUSubtarget() = default; +GCNSubtarget::~GCNSubtarget() = default; R600Subtarget & R600Subtarget::initializeSubtargetDependencies(const Triple &TT, @@ -51,7 +53,7 @@ // FIXME: I don't think think Evergreen has any useful support for // denormals, but should be checked. Should we issue a warning somewhere // if someone tries to enable these? - if (getGeneration() <= R600Subtarget::NORTHERN_ISLANDS) { + if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { FP32Denormals = false; } @@ -61,8 +63,8 @@ return *this; } -AMDGPUSubtarget & -AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, +GCNSubtarget & +GCNSubtarget::initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS) { // Determine default and user-specified characteristics // On SI+, we want FP64 denormals to be on by default. FP32 denormals can be @@ -122,7 +124,7 @@ return *this; } -AMDGPUCommonSubtarget::AMDGPUCommonSubtarget(const Triple &TT, +AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, const FeatureBitset &FeatureBits) : TargetTriple(TT), SubtargetFeatureBits(FeatureBits), @@ -140,11 +142,10 @@ WavefrontSize(0) { } -AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, - const TargetMachine &TM) : +GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, + const GCNTargetMachine &TM) : AMDGPUGenSubtargetInfo(TT, GPU, FS), - AMDGPUCommonSubtarget(TT, getFeatureBits()), - FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), + AMDGPUSubtarget(TT, getFeatureBits()), TargetTriple(TT), Gen(SOUTHERN_ISLANDS), IsaVersion(ISAVersion0_0_0), @@ -206,12 +207,20 @@ ScalarizeGlobal(false), - FeatureDisable(false) { + FeatureDisable(false), + TLInfo(TM, initializeSubtargetDependencies(TT, GPU, FS)), + InstrInfo(*this), + FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0) { AS = AMDGPU::getAMDGPUAS(TT); + CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering())); + Legalizer.reset(new AMDGPULegalizerInfo(*this, TM)); + RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo())); + InstSelector.reset(new AMDGPUInstructionSelector( + *this, *static_cast(RegBankInfo.get()), TM)); initializeSubtargetDependencies(TT, GPU, FS); } -unsigned AMDGPUCommonSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves, +unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves, const Function &F) const { if (NWaves == 1) return getLocalMemorySize(); @@ -221,7 +230,7 @@ return getLocalMemorySize() * MaxWaves / WorkGroupsPerCu / NWaves; } -unsigned AMDGPUCommonSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes, +unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &F) const { unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second; unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize); @@ -234,13 +243,13 @@ } unsigned -AMDGPUCommonSubtarget::getOccupancyWithLocalMemSize(const MachineFunction &MF) const { +AMDGPUSubtarget::getOccupancyWithLocalMemSize(const MachineFunction &MF) const { const auto *MFI = MF.getInfo(); return getOccupancyWithLocalMemSize(MFI->getLDSSize(), MF.getFunction()); } std::pair -AMDGPUCommonSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const { +AMDGPUSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const { switch (CC) { case CallingConv::AMDGPU_CS: case CallingConv::AMDGPU_KERNEL: @@ -258,7 +267,7 @@ } } -std::pair AMDGPUCommonSubtarget::getFlatWorkGroupSizes( +std::pair AMDGPUSubtarget::getFlatWorkGroupSizes( const Function &F) const { // FIXME: 1024 if function. // Default minimum/maximum flat work group sizes. @@ -288,7 +297,7 @@ return Requested; } -std::pair AMDGPUCommonSubtarget::getWavesPerEU( +std::pair AMDGPUSubtarget::getWavesPerEU( const Function &F) const { // Default minimum/maximum number of waves per execution unit. std::pair Default(1, getMaxWavesPerEU()); @@ -336,7 +345,7 @@ return Requested; } -bool AMDGPUCommonSubtarget::makeLIDRangeMetadata(Instruction *I) const { +bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const { Function *Kernel = I->getParent()->getParent(); unsigned MinSize = 0; unsigned MaxSize = getFlatWorkGroupSizes(*Kernel).second; @@ -401,7 +410,7 @@ R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS, const TargetMachine &TM) : R600GenSubtargetInfo(TT, GPU, FS), - AMDGPUCommonSubtarget(TT, getFeatureBits()), + AMDGPUSubtarget(TT, getFeatureBits()), InstrInfo(*this), FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), FMA(false), @@ -417,20 +426,7 @@ InstrItins(getInstrItineraryForCPU(GPU)), AS (AMDGPU::getAMDGPUAS(TT)) { } -SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS, - const GCNTargetMachine &TM) - : AMDGPUSubtarget(TT, GPU, FS, TM), InstrInfo(*this), - FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), - TLInfo(TM, *this) { - CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering())); - Legalizer.reset(new AMDGPULegalizerInfo(*this, TM)); - - RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo())); - InstSelector.reset(new AMDGPUInstructionSelector( - *this, *static_cast(RegBankInfo.get()), TM)); -} - -void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, +void GCNSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const { // Track register pressure so the scheduler can try to decrease // pressure once register usage is above the threshold defined by @@ -447,11 +443,11 @@ Policy.ShouldTrackLaneMasks = true; } -bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const { +bool GCNSubtarget::isVGPRSpillingEnabled(const Function& F) const { return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv()); } -uint64_t SISubtarget::getExplicitKernArgSize(const Function &F) const { +uint64_t GCNSubtarget::getExplicitKernArgSize(const Function &F) const { assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL); const DataLayout &DL = F.getParent()->getDataLayout(); @@ -467,7 +463,7 @@ return ExplicitArgBytes; } -unsigned SISubtarget::getKernArgSegmentSize(const Function &F, +unsigned GCNSubtarget::getKernArgSegmentSize(const Function &F, int64_t ExplicitArgBytes) const { if (ExplicitArgBytes == -1) ExplicitArgBytes = getExplicitKernArgSize(F); @@ -485,8 +481,8 @@ return alignTo(TotalSize, 4); } -unsigned SISubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const { - if (getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { +unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const { + if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { if (SGPRs <= 80) return 10; if (SGPRs <= 88) @@ -508,7 +504,7 @@ return 5; } -unsigned SISubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const { +unsigned GCNSubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const { if (VGPRs <= 24) return 10; if (VGPRs <= 28) @@ -530,7 +526,7 @@ return 1; } -unsigned SISubtarget::getReservedNumSGPRs(const MachineFunction &MF) const { +unsigned GCNSubtarget::getReservedNumSGPRs(const MachineFunction &MF) const { const SIMachineFunctionInfo &MFI = *MF.getInfo(); if (MFI.hasFlatScratchInit()) { if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) @@ -544,7 +540,7 @@ return 2; // VCC. } -unsigned SISubtarget::getMaxNumSGPRs(const MachineFunction &MF) const { +unsigned GCNSubtarget::getMaxNumSGPRs(const MachineFunction &MF) const { const Function &F = MF.getFunction(); const SIMachineFunctionInfo &MFI = *MF.getInfo(); @@ -594,7 +590,7 @@ MaxAddressableNumSGPRs); } -unsigned SISubtarget::getMaxNumVGPRs(const MachineFunction &MF) const { +unsigned GCNSubtarget::getMaxNumVGPRs(const MachineFunction &MF) const { const Function &F = MF.getFunction(); const SIMachineFunctionInfo &MFI = *MF.getInfo(); @@ -675,21 +671,21 @@ }; } // namespace -void SISubtarget::getPostRAMutations( +void GCNSubtarget::getPostRAMutations( std::vector> &Mutations) const { Mutations.push_back(llvm::make_unique(&InstrInfo)); } -const AMDGPUCommonSubtarget &AMDGPUCommonSubtarget::get(const MachineFunction &MF) { +const AMDGPUSubtarget &AMDGPUSubtarget::get(const MachineFunction &MF) { if (MF.getTarget().getTargetTriple().getArch() == Triple::amdgcn) - return static_cast(MF.getSubtarget()); + return static_cast(MF.getSubtarget()); else - return static_cast(MF.getSubtarget()); + return static_cast(MF.getSubtarget()); } -const AMDGPUCommonSubtarget &AMDGPUCommonSubtarget::get(const TargetMachine &TM, const Function &F) { +const AMDGPUSubtarget &AMDGPUSubtarget::get(const TargetMachine &TM, const Function &F) { if (TM.getTargetTriple().getArch() == Triple::amdgcn) - return static_cast(TM.getSubtarget(F)); + return static_cast(TM.getSubtarget(F)); else - return static_cast(TM.getSubtarget(F)); + return static_cast(TM.getSubtarget(F)); } Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -100,7 +100,7 @@ class GCNTargetMachine final : public AMDGPUTargetMachine { private: AMDGPUIntrinsicInfo IntrinsicInfo; - mutable StringMap> SubtargetMap; + mutable StringMap> SubtargetMap; public: GCNTargetMachine(const Target &T, const Triple &TT, StringRef CPU, @@ -110,7 +110,7 @@ TargetPassConfig *createPassConfig(PassManagerBase &PM) override; - const SISubtarget *getSubtargetImpl(const Function &) const override; + const GCNSubtarget *getSubtargetImpl(const Function &) const override; TargetTransformInfo getTargetTransformInfo(const Function &F) override; Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -466,7 +466,7 @@ CodeGenOpt::Level OL, bool JIT) : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {} -const SISubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const { +const GCNSubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const { StringRef GPU = getGPUName(F); StringRef FS = getFeatureString(F); @@ -479,7 +479,7 @@ // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); - I = llvm::make_unique(TargetTriple, GPU, FS, *this); + I = llvm::make_unique(TargetTriple, GPU, FS, *this); } I->setScalarizeGlobalBehavior(ScalarizeGlobal); @@ -750,7 +750,7 @@ ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler( MachineSchedContext *C) const { - const SISubtarget &ST = C->MF->getSubtarget(); + const GCNSubtarget &ST = C->MF->getSubtarget(); if (ST.enableSIScheduler()) return createSIMachineScheduler(C); return createGCNMaxOccupancyMachineScheduler(C); Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -62,7 +62,7 @@ friend BaseT; - const AMDGPUSubtarget *ST; + const GCNSubtarget *ST; const AMDGPUTargetLowering *TLI; AMDGPUTTIImpl CommonTTI; bool IsGraphicsShader; @@ -91,7 +91,7 @@ AMDGPU::HalfRate64Ops }; - const AMDGPUSubtarget *getST() const { return ST; } + const GCNSubtarget *getST() const { return ST; } const AMDGPUTargetLowering *getTLI() const { return TLI; } static inline int getFullRateInstrCost() { @@ -118,7 +118,7 @@ public: explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F) : BaseT(TM, F.getParent()->getDataLayout()), - ST(static_cast(TM->getSubtargetImpl(F))), + ST(static_cast(TM->getSubtargetImpl(F))), TLI(ST->getTargetLowering()), CommonTTI(TM, F), IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {} Index: llvm/trunk/lib/Target/AMDGPU/EvergreenInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/EvergreenInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/EvergreenInstructions.td @@ -14,13 +14,13 @@ //===----------------------------------------------------------------------===// def isEG : Predicate< - "Subtarget->getGeneration() >= R600Subtarget::EVERGREEN && " + "Subtarget->getGeneration() >= AMDGPUSubtarget::EVERGREEN && " "!Subtarget->hasCaymanISA()" >; def isEGorCayman : Predicate< - "Subtarget->getGeneration() == R600Subtarget::EVERGREEN ||" - "Subtarget->getGeneration() == R600Subtarget::NORTHERN_ISLANDS" + "Subtarget->getGeneration() == AMDGPUSubtarget::EVERGREEN ||" + "Subtarget->getGeneration() == AMDGPUSubtarget::NORTHERN_ISLANDS" >; class EGPat : AMDGPUPat { Index: llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h +++ llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -28,7 +28,7 @@ class ScheduleDAG; class SIInstrInfo; class SIRegisterInfo; -class SISubtarget; +class GCNSubtarget; class GCNHazardRecognizer final : public ScheduleHazardRecognizer { // This variable stores the instruction that has been emitted this cycle. It @@ -37,7 +37,7 @@ MachineInstr *CurrCycleInstr; std::list EmittedInstrs; const MachineFunction &MF; - const SISubtarget &ST; + const GCNSubtarget &ST; const SIInstrInfo &TII; const SIRegisterInfo &TRI; Index: llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -40,7 +40,7 @@ GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) : CurrCycleInstr(nullptr), MF(MF), - ST(MF.getSubtarget()), + ST(MF.getSubtarget()), TII(*ST.getInstrInfo()), TRI(TII.getRegisterInfo()), ClauseUses(TRI.getNumRegUnits()), @@ -356,13 +356,13 @@ } int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) { - const SISubtarget &ST = MF.getSubtarget(); + const GCNSubtarget &ST = MF.getSubtarget(); int WaitStatesNeeded = 0; WaitStatesNeeded = checkSoftClauseHazards(SMRD); // This SMRD hazard only affects SI. - if (ST.getGeneration() != SISubtarget::SOUTHERN_ISLANDS) + if (ST.getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS) return WaitStatesNeeded; // A read of an SGPR by SMRD instruction requires 4 wait states when the @@ -399,7 +399,7 @@ } int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) { - if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS) + if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS) return 0; int WaitStatesNeeded = checkSoftClauseHazards(VMEM); Index: llvm/trunk/lib/Target/AMDGPU/GCNIterativeScheduler.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/GCNIterativeScheduler.cpp +++ llvm/trunk/lib/Target/AMDGPU/GCNIterativeScheduler.cpp @@ -108,7 +108,7 @@ LLVM_DUMP_METHOD void GCNIterativeScheduler::printRegions(raw_ostream &OS) const { - const auto &ST = MF.getSubtarget(); + const auto &ST = MF.getSubtarget(); for (const auto R : Regions) { OS << "Region to schedule "; printRegion(OS, R->Begin, R->End, LIS, 1); @@ -132,7 +132,7 @@ void GCNIterativeScheduler::printSchedRP(raw_ostream &OS, const GCNRegPressure &Before, const GCNRegPressure &After) const { - const auto &ST = MF.getSubtarget(); + const auto &ST = MF.getSubtarget(); OS << "RP before: "; Before.print(OS, &ST); OS << "RP after: "; @@ -316,7 +316,7 @@ if (!Regions.empty() && Regions.back()->Begin == RegionBegin) { dbgs() << "Max RP: "; Regions.back()->MaxPressure.print( - dbgs(), &MF.getSubtarget()); + dbgs(), &MF.getSubtarget()); } dbgs() << '\n';); } @@ -418,7 +418,7 @@ #ifndef NDEBUG const auto RegionMaxRP = getRegionPressure(R); - const auto &ST = MF.getSubtarget(); + const auto &ST = MF.getSubtarget(); #endif assert((SchedMaxRP == RegionMaxRP && (MaxRP.empty() || SchedMaxRP == MaxRP)) || (dbgs() << "Max RP mismatch!!!\n" @@ -433,7 +433,7 @@ // Sort recorded regions by pressure - highest at the front void GCNIterativeScheduler::sortRegionsByPressure(unsigned TargetOcc) { - const auto &ST = MF.getSubtarget(); + const auto &ST = MF.getSubtarget(); llvm::sort(Regions.begin(), Regions.end(), [&ST, TargetOcc](const Region *R1, const Region *R2) { return R2->MaxPressure.less(ST, R1->MaxPressure, TargetOcc); @@ -451,7 +451,7 @@ // BestSchedules aren't deleted on fail. unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) { // TODO: assert Regions are sorted descending by pressure - const auto &ST = MF.getSubtarget(); + const auto &ST = MF.getSubtarget(); const auto Occ = Regions.front()->MaxPressure.getOccupancy(ST); LLVM_DEBUG(dbgs() << "Trying to improve occupancy, target = " << TargetOcc << ", current = " << Occ << '\n'); @@ -488,7 +488,7 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy( bool TryMaximizeOccupancy) { - const auto &ST = MF.getSubtarget(); + const auto &ST = MF.getSubtarget(); SIMachineFunctionInfo *MFI = MF.getInfo(); auto TgtOcc = MFI->getMinAllowedOccupancy(); @@ -542,7 +542,7 @@ // Minimal Register Strategy void GCNIterativeScheduler::scheduleMinReg(bool force) { - const auto &ST = MF.getSubtarget(); + const auto &ST = MF.getSubtarget(); const SIMachineFunctionInfo *MFI = MF.getInfo(); const auto TgtOcc = MFI->getOccupancy(); sortRegionsByPressure(TgtOcc); @@ -576,7 +576,7 @@ void GCNIterativeScheduler::scheduleILP( bool TryMaximizeOccupancy) { - const auto &ST = MF.getSubtarget(); + const auto &ST = MF.getSubtarget(); SIMachineFunctionInfo *MFI = MF.getInfo(); auto TgtOcc = MFI->getMinAllowedOccupancy(); Index: llvm/trunk/lib/Target/AMDGPU/GCNRegPressure.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/GCNRegPressure.h +++ llvm/trunk/lib/Target/AMDGPU/GCNRegPressure.h @@ -49,7 +49,7 @@ unsigned getVGPRTuplesWeight() const { return Value[VGPR_TUPLE]; } unsigned getSGPRTuplesWeight() const { return Value[SGPR_TUPLE]; } - unsigned getOccupancy(const SISubtarget &ST) const { + unsigned getOccupancy(const GCNSubtarget &ST) const { return std::min(ST.getOccupancyWithNumSGPRs(getSGPRNum()), ST.getOccupancyWithNumVGPRs(getVGPRNum())); } @@ -59,11 +59,11 @@ LaneBitmask NewMask, const MachineRegisterInfo &MRI); - bool higherOccupancy(const SISubtarget &ST, const GCNRegPressure& O) const { + bool higherOccupancy(const GCNSubtarget &ST, const GCNRegPressure& O) const { return getOccupancy(ST) > O.getOccupancy(ST); } - bool less(const SISubtarget &ST, const GCNRegPressure& O, + bool less(const GCNSubtarget &ST, const GCNRegPressure& O, unsigned MaxOccupancy = std::numeric_limits::max()) const; bool operator==(const GCNRegPressure &O) const { @@ -74,7 +74,7 @@ return !(*this == O); } - void print(raw_ostream &OS, const SISubtarget *ST = nullptr) const; + void print(raw_ostream &OS, const GCNSubtarget *ST = nullptr) const; void dump() const { print(dbgs()); } private: Index: llvm/trunk/lib/Target/AMDGPU/GCNRegPressure.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/GCNRegPressure.cpp +++ llvm/trunk/lib/Target/AMDGPU/GCNRegPressure.cpp @@ -132,7 +132,7 @@ } } -bool GCNRegPressure::less(const SISubtarget &ST, +bool GCNRegPressure::less(const GCNSubtarget &ST, const GCNRegPressure& O, unsigned MaxOccupancy) const { const auto SGPROcc = std::min(MaxOccupancy, @@ -178,7 +178,7 @@ #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD -void GCNRegPressure::print(raw_ostream &OS, const SISubtarget *ST) const { +void GCNRegPressure::print(raw_ostream &OS, const GCNSubtarget *ST) const { OS << "VGPRs: " << getVGPRNum(); if (ST) OS << "(O" << ST->getOccupancyWithNumVGPRs(getVGPRNum()) << ')'; OS << ", SGPRs: " << getSGPRNum(); Index: llvm/trunk/lib/Target/AMDGPU/GCNSchedStrategy.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/GCNSchedStrategy.h +++ llvm/trunk/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -21,7 +21,7 @@ class SIMachineFunctionInfo; class SIRegisterInfo; -class SISubtarget; +class GCNSubtarget; /// This is a minimal scheduler strategy. The main difference between this /// and the GenericScheduler is that GCNSchedStrategy uses different @@ -62,7 +62,7 @@ class GCNScheduleDAGMILive : public ScheduleDAGMILive { - const SISubtarget &ST; + const GCNSubtarget &ST; SIMachineFunctionInfo &MFI; Index: llvm/trunk/lib/Target/AMDGPU/GCNSchedStrategy.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ llvm/trunk/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -35,7 +35,7 @@ MF = &DAG->MF; - const SISubtarget &ST = MF->getSubtarget(); + const GCNSubtarget &ST = MF->getSubtarget(); // FIXME: This is also necessary, because some passes that run after // scheduling and before regalloc increase register pressure. @@ -294,7 +294,7 @@ GCNScheduleDAGMILive::GCNScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr S) : ScheduleDAGMILive(C, std::move(S)), - ST(MF.getSubtarget()), + ST(MF.getSubtarget()), MFI(*MF.getInfo()), StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy), Stage(0), RegionIdx(0) { Index: llvm/trunk/lib/Target/AMDGPU/R600AsmPrinter.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/R600AsmPrinter.cpp +++ llvm/trunk/lib/Target/AMDGPU/R600AsmPrinter.cpp @@ -69,7 +69,7 @@ } unsigned RsrcReg; - if (STM.getGeneration() >= R600Subtarget::EVERGREEN) { + if (STM.getGeneration() >= AMDGPUSubtarget::EVERGREEN) { // Evergreen / Northern Islands switch (MF.getFunction().getCallingConv()) { default: LLVM_FALLTHROUGH; Index: llvm/trunk/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp +++ llvm/trunk/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp @@ -137,7 +137,7 @@ return 0; case CFStack::FIRST_NON_WQM_PUSH: assert(!ST->hasCaymanISA()); - if (ST->getGeneration() <= R600Subtarget::R700) { + if (ST->getGeneration() <= AMDGPUSubtarget::R700) { // +1 For the push operation. // +2 Extra space required. return 3; @@ -150,7 +150,7 @@ return 2; } case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY: - assert(ST->getGeneration() >= R600Subtarget::EVERGREEN); + assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN); // +1 For the push operation. // +1 Extra space required. return 2; @@ -177,7 +177,7 @@ // See comment in // CFStack::getSubEntrySize() else if (CurrentEntries > 0 && - ST->getGeneration() > R600Subtarget::EVERGREEN && + ST->getGeneration() > AMDGPUSubtarget::EVERGREEN && !ST->hasCaymanISA() && !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY)) Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY; @@ -250,7 +250,7 @@ const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const { unsigned Opcode = 0; - bool isEg = (ST->getGeneration() >= R600Subtarget::EVERGREEN); + bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN); switch (CFI) { case CF_TC: Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600; Index: llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -791,7 +791,7 @@ SDValue TrigVal = DAG.getNode(TrigNode, DL, VT, DAG.getNode(ISD::FADD, DL, VT, FractPart, DAG.getConstantFP(-0.5, DL, MVT::f32))); - if (Gen >= R600Subtarget::R700) + if (Gen >= AMDGPUSubtarget::R700) return TrigVal; // On R600 hw, COS/SIN input must be between -Pi and Pi. return DAG.getNode(ISD::FMUL, DL, VT, TrigVal, Index: llvm/trunk/lib/Target/AMDGPU/R600InstrFormats.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/R600InstrFormats.td +++ llvm/trunk/lib/Target/AMDGPU/R600InstrFormats.td @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// -def isR600 : Predicate<"Subtarget->getGeneration() <= R600Subtarget::R700">; +def isR600 : Predicate<"Subtarget->getGeneration() <= AMDGPUSubtarget::R700">; def isR600toCayman : Predicate< - "Subtarget->getGeneration() <= R600Subtarget::NORTHERN_ISLANDS">; + "Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS">; class R600Pat : AMDGPUPat { let SubtargetPredicate = isR600toCayman; Index: llvm/trunk/lib/Target/AMDGPU/R600InstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/R600InstrInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/R600InstrInfo.cpp @@ -1320,7 +1320,7 @@ const { assert (MI->getOpcode() == R600::DOT_4 && "Not Implemented"); unsigned Opcode; - if (ST.getGeneration() <= R600Subtarget::R700) + if (ST.getGeneration() <= AMDGPUSubtarget::R700) Opcode = R600::DOT4_r600; else Opcode = R600::DOT4_eg; Index: llvm/trunk/lib/Target/AMDGPU/R600MachineScheduler.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/R600MachineScheduler.cpp +++ llvm/trunk/lib/Target/AMDGPU/R600MachineScheduler.cpp @@ -346,7 +346,7 @@ LLVM_DEBUG(dbgs() << "New Slot\n"); assert (OccupedSlotsMask && "Slot wasn't filled"); OccupedSlotsMask = 0; -// if (HwGen == R600Subtarget::NORTHERN_ISLANDS) +// if (HwGen == AMDGPUSubtarget::NORTHERN_ISLANDS) // OccupedSlotsMask |= 16; InstructionsGroupCandidate.clear(); LoadAlu(); Index: llvm/trunk/lib/Target/AMDGPU/R600RegisterInfo.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/R600RegisterInfo.h +++ llvm/trunk/lib/Target/AMDGPU/R600RegisterInfo.h @@ -20,8 +20,6 @@ namespace llvm { -class AMDGPUSubtarget; - struct R600RegisterInfo final : public R600GenRegisterInfo { RegClassWeight RCW; Index: llvm/trunk/lib/Target/AMDGPU/R700Instructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/R700Instructions.td +++ llvm/trunk/lib/Target/AMDGPU/R700Instructions.td @@ -13,7 +13,7 @@ // //===----------------------------------------------------------------------===// -def isR700 : Predicate<"Subtarget->getGeneration() == R600Subtarget::R700">; +def isR700 : Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::R700">; let Predicates = [isR700] in { def SIN_r700 : SIN_Common<0x6E>; Index: llvm/trunk/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp @@ -63,7 +63,7 @@ bool SIDebuggerInsertNops::runOnMachineFunction(MachineFunction &MF) { // Skip this pass if "amdgpu-debugger-insert-nops" attribute was not // specified. - const SISubtarget &ST = MF.getSubtarget(); + const GCNSubtarget &ST = MF.getSubtarget(); if (!ST.debuggerInsertNops()) return false; Index: llvm/trunk/lib/Target/AMDGPU/SIFixSGPRCopies.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -568,7 +568,7 @@ } bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { - const SISubtarget &ST = MF.getSubtarget(); + const GCNSubtarget &ST = MF.getSubtarget(); MachineRegisterInfo &MRI = MF.getRegInfo(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); const SIInstrInfo *TII = ST.getInstrInfo(); Index: llvm/trunk/lib/Target/AMDGPU/SIFixVGPRCopies.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIFixVGPRCopies.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIFixVGPRCopies.cpp @@ -47,7 +47,7 @@ char &llvm::SIFixVGPRCopiesID = SIFixVGPRCopies::ID; bool SIFixVGPRCopies::runOnMachineFunction(MachineFunction &MF) { - const SISubtarget &ST = MF.getSubtarget(); + const GCNSubtarget &ST = MF.getSubtarget(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); const SIInstrInfo *TII = ST.getInstrInfo(); bool Changed = false; Index: llvm/trunk/lib/Target/AMDGPU/SIFixWWMLiveness.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIFixWWMLiveness.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIFixWWMLiveness.cpp @@ -185,7 +185,7 @@ // This doesn't actually need LiveIntervals, but we can preserve them. LIS = getAnalysisIfAvailable(); - const SISubtarget &ST = MF.getSubtarget(); + const GCNSubtarget &ST = MF.getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); TRI = &TII->getRegisterInfo(); Index: llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -76,7 +76,7 @@ MachineRegisterInfo *MRI; const SIInstrInfo *TII; const SIRegisterInfo *TRI; - const AMDGPUSubtarget *ST; + const GCNSubtarget *ST; void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI, @@ -972,7 +972,7 @@ return false; MRI = &MF.getRegInfo(); - ST = &MF.getSubtarget(); + ST = &MF.getSubtarget(); TII = ST->getInstrInfo(); TRI = &TII->getRegisterInfo(); Index: llvm/trunk/lib/Target/AMDGPU/SIFormMemoryClauses.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIFormMemoryClauses.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIFormMemoryClauses.cpp @@ -70,7 +70,7 @@ bool processRegUses(const MachineInstr &MI, RegUse &Defs, RegUse &Uses, GCNDownwardRPTracker &RPT); - const SISubtarget *ST; + const GCNSubtarget *ST; const SIRegisterInfo *TRI; const MachineRegisterInfo *MRI; SIMachineFunctionInfo *MFI; @@ -296,7 +296,7 @@ if (skipFunction(MF.getFunction())) return false; - ST = &MF.getSubtarget(); + ST = &MF.getSubtarget(); if (!ST->isXNACKEnabled()) return false; Index: llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.h +++ llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.h @@ -17,7 +17,7 @@ class SIInstrInfo; class SIMachineFunctionInfo; class SIRegisterInfo; -class SISubtarget; +class GCNSubtarget; class SIFrameLowering final : public AMDGPUFrameLowering { public: @@ -48,19 +48,19 @@ MachineBasicBlock::iterator MI) const override; private: - void emitFlatScratchInit(const SISubtarget &ST, + void emitFlatScratchInit(const GCNSubtarget &ST, MachineFunction &MF, MachineBasicBlock &MBB) const; unsigned getReservedPrivateSegmentBufferReg( - const SISubtarget &ST, + const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI, SIMachineFunctionInfo *MFI, MachineFunction &MF) const; std::pair getReservedPrivateSegmentWaveByteOffsetReg( - const SISubtarget &ST, + const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI, SIMachineFunctionInfo *MFI, @@ -70,7 +70,7 @@ void emitDebuggerPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const; // Emit scratch setup code for AMDPAL or Mesa, assuming ResourceRegUsed is set. - void emitEntryFunctionScratchSetup(const SISubtarget &ST, MachineFunction &MF, + void emitEntryFunctionScratchSetup(const GCNSubtarget &ST, MachineFunction &MF, MachineBasicBlock &MBB, SIMachineFunctionInfo *MFI, MachineBasicBlock::iterator I, unsigned PreloadedPrivateBufferReg, unsigned ScratchRsrcReg) const; Index: llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -23,19 +23,19 @@ using namespace llvm; -static ArrayRef getAllSGPR128(const SISubtarget &ST, +static ArrayRef getAllSGPR128(const GCNSubtarget &ST, const MachineFunction &MF) { return makeArrayRef(AMDGPU::SGPR_128RegClass.begin(), ST.getMaxNumSGPRs(MF) / 4); } -static ArrayRef getAllSGPRs(const SISubtarget &ST, +static ArrayRef getAllSGPRs(const GCNSubtarget &ST, const MachineFunction &MF) { return makeArrayRef(AMDGPU::SGPR_32RegClass.begin(), ST.getMaxNumSGPRs(MF)); } -void SIFrameLowering::emitFlatScratchInit(const SISubtarget &ST, +void SIFrameLowering::emitFlatScratchInit(const GCNSubtarget &ST, MachineFunction &MF, MachineBasicBlock &MBB) const { const SIInstrInfo *TII = ST.getInstrInfo(); @@ -98,7 +98,7 @@ } unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg( - const SISubtarget &ST, + const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI, SIMachineFunctionInfo *MFI, @@ -149,7 +149,7 @@ // SGPRs. std::pair SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg( - const SISubtarget &ST, + const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI, SIMachineFunctionInfo *MFI, @@ -220,7 +220,7 @@ MachineBasicBlock &MBB) const { // Emit debugger prologue if "amdgpu-debugger-emit-prologue" attribute was // specified. - const SISubtarget &ST = MF.getSubtarget(); + const GCNSubtarget &ST = MF.getSubtarget(); if (ST.debuggerEmitPrologue()) emitDebuggerPrologue(MF, MBB); @@ -364,7 +364,7 @@ } // Emit scratch setup code for AMDPAL or Mesa, assuming ResourceRegUsed is set. -void SIFrameLowering::emitEntryFunctionScratchSetup(const SISubtarget &ST, +void SIFrameLowering::emitEntryFunctionScratchSetup(const GCNSubtarget &ST, MachineFunction &MF, MachineBasicBlock &MBB, SIMachineFunctionInfo *MFI, MachineBasicBlock::iterator I, unsigned PreloadedPrivateBufferReg, unsigned ScratchRsrcReg) const { @@ -508,7 +508,7 @@ static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock &MBB) { MachineFunction *MF = MBB.getParent(); - const SISubtarget &Subtarget = MF->getSubtarget(); + const GCNSubtarget &Subtarget = MF->getSubtarget(); const SIRegisterInfo &TRI = *Subtarget.getRegisterInfo(); LivePhysRegs LiveRegs(TRI); LiveRegs.addLiveIns(MBB); @@ -537,7 +537,7 @@ } const MachineFrameInfo &MFI = MF.getFrameInfo(); - const SISubtarget &ST = MF.getSubtarget(); + const GCNSubtarget &ST = MF.getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo &TRI = TII->getRegisterInfo(); @@ -607,7 +607,7 @@ if (FuncInfo->isEntryFunction()) return; - const SISubtarget &ST = MF.getSubtarget(); + const GCNSubtarget &ST = MF.getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); @@ -654,7 +654,7 @@ int SIFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const { - const SIRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); + const SIRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); FrameReg = RI->getFrameRegister(MF); return MF.getFrameInfo().getObjectOffset(FI); @@ -668,7 +668,7 @@ if (!MFI.hasStackObjects()) return; - const SISubtarget &ST = MF.getSubtarget(); + const GCNSubtarget &ST = MF.getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo &TRI = TII->getRegisterInfo(); SIMachineFunctionInfo *FuncInfo = MF.getInfo(); @@ -750,7 +750,7 @@ if (Amount == 0) return MBB.erase(I); - const SISubtarget &ST = MF.getSubtarget(); + const GCNSubtarget &ST = MF.getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); const DebugLoc &DL = I->getDebugLoc(); unsigned Opc = I->getOpcode(); @@ -779,7 +779,7 @@ void SIFrameLowering::emitDebuggerPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { - const SISubtarget &ST = MF.getSubtarget(); + const GCNSubtarget &ST = MF.getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo *TRI = &TII->getRegisterInfo(); const SIMachineFunctionInfo *MFI = MF.getInfo(); @@ -829,7 +829,7 @@ } bool SIFrameLowering::hasSP(const MachineFunction &MF) const { - const SIRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + const SIRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); // All stack operations are relative to the frame offset SGPR. const MachineFrameInfo &MFI = MF.getFrameInfo(); return MFI.hasCalls() || MFI.hasVarSizedObjects() || TRI->needsStackRealignment(MF); Index: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h +++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h @@ -23,7 +23,7 @@ class SITargetLowering final : public AMDGPUTargetLowering { private: - const SISubtarget *Subtarget; + const GCNSubtarget *Subtarget; SDValue lowerKernArgParameterPtr(SelectionDAG &DAG, const SDLoc &SL, SDValue Chain, uint64_t Offset) const; @@ -162,9 +162,9 @@ bool shouldEmitPCReloc(const GlobalValue *GV) const; public: - SITargetLowering(const TargetMachine &tm, const SISubtarget &STI); + SITargetLowering(const TargetMachine &tm, const GCNSubtarget &STI); - const SISubtarget *getSubtarget() const; + const GCNSubtarget *getSubtarget() const; bool isFPExtFoldable(unsigned Opcode, EVT DestVT, EVT SrcVT) const override; Index: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp @@ -112,7 +112,7 @@ } SITargetLowering::SITargetLowering(const TargetMachine &TM, - const SISubtarget &STI) + const GCNSubtarget &STI) : AMDGPUTargetLowering(TM, STI), Subtarget(&STI) { addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass); @@ -378,7 +378,7 @@ setOperationAction(ISD::FMINNUM, MVT::f64, Legal); setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); - if (Subtarget->getGeneration() >= SISubtarget::SEA_ISLANDS) { + if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) { setOperationAction(ISD::FTRUNC, MVT::f64, Legal); setOperationAction(ISD::FCEIL, MVT::f64, Legal); setOperationAction(ISD::FRINT, MVT::f64, Legal); @@ -667,7 +667,7 @@ setHasFloatingPointExceptions(Subtarget->hasFPExceptions()); } -const SISubtarget *SITargetLowering::getSubtarget() const { +const GCNSubtarget *SITargetLowering::getSubtarget() const { return Subtarget; } @@ -708,12 +708,12 @@ if (RsrcIntr->IsImage) { Info.ptrVal = MFI->getImagePSV( - *MF.getSubtarget().getInstrInfo(), + *MF.getSubtarget().getInstrInfo(), CI.getArgOperand(RsrcIntr->RsrcArg)); Info.align = 0; } else { Info.ptrVal = MFI->getBufferPSV( - *MF.getSubtarget().getInstrInfo(), + *MF.getSubtarget().getInstrInfo(), CI.getArgOperand(RsrcIntr->RsrcArg)); } @@ -877,16 +877,16 @@ if (Ty->isSized() && DL.getTypeStoreSize(Ty) < 4) return isLegalGlobalAddressingMode(AM); - if (Subtarget->getGeneration() == SISubtarget::SOUTHERN_ISLANDS) { + if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) { // SMRD instructions have an 8-bit, dword offset on SI. if (!isUInt<8>(AM.BaseOffs / 4)) return false; - } else if (Subtarget->getGeneration() == SISubtarget::SEA_ISLANDS) { + } else if (Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS) { // On CI+, this can also be a 32-bit literal constant offset. If it fits // in 8-bits, it can use a smaller encoding. if (!isUInt<32>(AM.BaseOffs / 4)) return false; - } else if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { + } else if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { // On VI, these use the SMEM format and the offset is 20-bit in bytes. if (!isUInt<20>(AM.BaseOffs)) return false; @@ -1560,7 +1560,7 @@ // the scratch registers to pass in. bool RequiresStackAccess = HasStackObjects || MFI.hasCalls(); - const SISubtarget &ST = MF.getSubtarget(); + const GCNSubtarget &ST = MF.getSubtarget(); if (ST.isAmdCodeObjectV2(MF.getFunction())) { if (RequiresStackAccess) { // If we have stack objects, we unquestionably need the private buffer @@ -1676,7 +1676,7 @@ const Function &Fn = MF.getFunction(); FunctionType *FType = MF.getFunction().getFunctionType(); SIMachineFunctionInfo *Info = MF.getInfo(); - const SISubtarget &ST = MF.getSubtarget(); + const GCNSubtarget &ST = MF.getSubtarget(); if (Subtarget->isAmdHsaOS() && AMDGPU::isShader(CallConv)) { DiagnosticInfoUnsupported NoGraphicsHSA( @@ -1808,7 +1808,7 @@ auto *ParamTy = dyn_cast(FType->getParamType(Ins[i].getOrigArgIndex())); - if (Subtarget->getGeneration() == SISubtarget::SOUTHERN_ISLANDS && + if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS && ParamTy && ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { // On SI local pointers are just offsets into LDS, so they are always // less than 16-bits. On CI and newer they could potentially be @@ -2668,7 +2668,7 @@ } - if (Subtarget->getGeneration() == SISubtarget::SOUTHERN_ISLANDS && + if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS && Subtarget->getRegisterInfo()->regsOverlap(Reg, AMDGPU::FLAT_SCR)) { report_fatal_error(Twine("invalid register \"" + StringRef(RegName) + "\" for subtarget.")); @@ -2959,7 +2959,7 @@ // Control flow needs to be inserted if indexing with a VGPR. static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI, MachineBasicBlock &MBB, - const SISubtarget &ST) { + const GCNSubtarget &ST) { const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo &TRI = TII->getRegisterInfo(); MachineFunction *MF = MBB.getParent(); @@ -3050,7 +3050,7 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI, MachineBasicBlock &MBB, - const SISubtarget &ST) { + const GCNSubtarget &ST) { const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo &TRI = TII->getRegisterInfo(); MachineFunction *MF = MBB.getParent(); @@ -3964,7 +3964,7 @@ SDLoc SL(Op); SDValue Chain = Op.getOperand(0); - if (Subtarget->getTrapHandlerAbi() != SISubtarget::TrapHandlerAbiHsa || + if (Subtarget->getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbiHsa || !Subtarget->isTrapHandlerEnabled()) return DAG.getNode(AMDGPUISD::ENDPGM, SL, MVT::Other, Chain); @@ -3979,7 +3979,7 @@ QueuePtr, SDValue()); SDValue Ops[] = { ToReg, - DAG.getTargetConstant(SISubtarget::TrapIDLLVMTrap, SL, MVT::i16), + DAG.getTargetConstant(GCNSubtarget::TrapIDLLVMTrap, SL, MVT::i16), SGPR01, ToReg.getValue(1) }; @@ -3991,7 +3991,7 @@ SDValue Chain = Op.getOperand(0); MachineFunction &MF = DAG.getMachineFunction(); - if (Subtarget->getTrapHandlerAbi() != SISubtarget::TrapHandlerAbiHsa || + if (Subtarget->getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbiHsa || !Subtarget->isTrapHandlerEnabled()) { DiagnosticInfoUnsupported NoTrap(MF.getFunction(), "debugtrap handler not supported", @@ -4004,7 +4004,7 @@ SDValue Ops[] = { Chain, - DAG.getTargetConstant(SISubtarget::TrapIDLLVMDebugTrap, SL, MVT::i16) + DAG.getTargetConstant(GCNSubtarget::TrapIDLLVMDebugTrap, SL, MVT::i16) }; return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops); } @@ -4513,7 +4513,7 @@ MVT StoreVT = VData.getSimpleValueType(); if (StoreVT.getScalarType() == MVT::f16) { - if (Subtarget->getGeneration() < SISubtarget::VOLCANIC_ISLANDS || + if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS || !BaseOpcode->HasD16) return Op; // D16 is unsupported for this instruction @@ -4526,7 +4526,7 @@ } else { MVT LoadVT = Op.getSimpleValueType(); if (LoadVT.getScalarType() == MVT::f16) { - if (Subtarget->getGeneration() < SISubtarget::VOLCANIC_ISLANDS || + if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS || !BaseOpcode->HasD16) return Op; // D16 is unsupported for this instruction @@ -4620,7 +4620,7 @@ int NumVAddrDwords = VAddr.getValueType().getSizeInBits() / 32; int Opcode = -1; - if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) + if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) Opcode = AMDGPU::getMIMGOpcode(Intr->BaseOpcode, AMDGPU::MIMGEncGfx8, NumVDataDwords, NumVAddrDwords); if (Opcode == -1) @@ -4699,16 +4699,16 @@ case Intrinsic::amdgcn_rsq: return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1)); case Intrinsic::amdgcn_rsq_legacy: - if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) + if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) return emitRemovedIntrinsicError(DAG, DL, VT); return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1)); case Intrinsic::amdgcn_rcp_legacy: - if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) + if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) return emitRemovedIntrinsicError(DAG, DL, VT); return DAG.getNode(AMDGPUISD::RCP_LEGACY, DL, VT, Op.getOperand(1)); case Intrinsic::amdgcn_rsq_clamp: { - if (Subtarget->getGeneration() < SISubtarget::VOLCANIC_ISLANDS) + if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS) return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1)); Type *Type = VT.getTypeForEVT(*DAG.getContext()); @@ -4845,7 +4845,7 @@ return DAG.getNode(AMDGPUISD::COS_HW, DL, VT, Op.getOperand(1)); case Intrinsic::amdgcn_log_clamp: { - if (Subtarget->getGeneration() < SISubtarget::VOLCANIC_ISLANDS) + if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS) return SDValue(); DiagnosticInfoUnsupported BadIntrin( @@ -5278,7 +5278,7 @@ } case Intrinsic::amdgcn_s_barrier: { if (getTargetMachine().getOptLevel() > CodeGenOpt::None) { - const SISubtarget &ST = MF.getSubtarget(); + const GCNSubtarget &ST = MF.getSubtarget(); unsigned WGSize = ST.getFlatWorkGroupSizes(MF.getFunction()).second; if (WGSize <= ST.getWavefrontSize()) return SDValue(DAG.getMachineNode(AMDGPU::WAVE_BARRIER, DL, MVT::Other, @@ -5889,7 +5889,7 @@ SDValue Scale; - if (Subtarget->getGeneration() == SISubtarget::SOUTHERN_ISLANDS) { + if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) { // Workaround a hardware bug on SI where the condition output from div_scale // is not usable. @@ -6709,7 +6709,7 @@ } static bool isCanonicalized(SelectionDAG &DAG, SDValue Op, - const SISubtarget *ST, unsigned MaxDepth=5) { + const GCNSubtarget *ST, unsigned MaxDepth=5) { // If source is a result of another standard FP operation it is already in // canonical form. @@ -8296,7 +8296,7 @@ if (R) { const MachineFunction * MF = FLI->MF; - const SISubtarget &ST = MF->getSubtarget(); + const GCNSubtarget &ST = MF->getSubtarget(); const MachineRegisterInfo &MRI = MF->getRegInfo(); const SIRegisterInfo &TRI = ST.getInstrInfo()->getRegisterInfo(); unsigned Reg = R->getReg(); Index: llvm/trunk/lib/Target/AMDGPU/SIInsertSkips.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInsertSkips.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIInsertSkips.cpp @@ -339,7 +339,7 @@ } bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) { - const SISubtarget &ST = MF.getSubtarget(); + const GCNSubtarget &ST = MF.getSubtarget(); TII = ST.getInstrInfo(); TRI = &TII->getRegisterInfo(); SkipThreshold = SkipThresholdFlag; Index: llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -136,7 +136,7 @@ // "s_waitcnt 0" before use. class BlockWaitcntBrackets { public: - BlockWaitcntBrackets(const SISubtarget *SubTarget) : ST(SubTarget) { + BlockWaitcntBrackets(const GCNSubtarget *SubTarget) : ST(SubTarget) { for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS; T = (enum InstCounterType)(T + 1)) { memset(VgprScores[T], 0, sizeof(VgprScores[T])); @@ -314,7 +314,7 @@ void dump() { print(dbgs()); } private: - const SISubtarget *ST = nullptr; + const GCNSubtarget *ST = nullptr; bool WaitAtBeginning = false; bool RevisitLoop = false; bool MixedExpTypes = false; @@ -364,7 +364,7 @@ class SIInsertWaitcnts : public MachineFunctionPass { private: - const SISubtarget *ST = nullptr; + const GCNSubtarget *ST = nullptr; const SIInstrInfo *TII = nullptr; const SIRegisterInfo *TRI = nullptr; const MachineRegisterInfo *MRI = nullptr; @@ -1837,7 +1837,7 @@ } bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { - ST = &MF.getSubtarget(); + ST = &MF.getSubtarget(); TII = ST->getInstrInfo(); TRI = &TII->getRegisterInfo(); MRI = &MF.getRegInfo(); Index: llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td +++ llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td @@ -12,10 +12,10 @@ //===----------------------------------------------------------------------===// def isGCN : Predicate<"Subtarget->getGeneration() " - ">= SISubtarget::SOUTHERN_ISLANDS">, + ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">, AssemblerPredicate<"FeatureGCN">; def isSI : Predicate<"Subtarget->getGeneration() " - "== SISubtarget::SOUTHERN_ISLANDS">, + "== AMDGPUSubtarget::SOUTHERN_ISLANDS">, AssemblerPredicate<"FeatureSouthernIslands">; Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h @@ -39,13 +39,13 @@ class APInt; class MachineRegisterInfo; class RegScavenger; -class SISubtarget; +class GCNSubtarget; class TargetRegisterClass; class SIInstrInfo final : public AMDGPUGenInstrInfo { private: const SIRegisterInfo RI; - const SISubtarget &ST; + const GCNSubtarget &ST; // The inverse predicate should have the negative value. enum BranchPredicate { @@ -147,7 +147,7 @@ MO_REL32_HI = 5 }; - explicit SIInstrInfo(const SISubtarget &ST); + explicit SIInstrInfo(const GCNSubtarget &ST); const SIRegisterInfo &getRegisterInfo() const { return RI; Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -84,7 +84,7 @@ BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)")); -SIInstrInfo::SIInstrInfo(const SISubtarget &ST) +SIInstrInfo::SIInstrInfo(const GCNSubtarget &ST) : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN), RI(ST), ST(ST) {} @@ -1035,7 +1035,7 @@ unsigned FrameOffset, unsigned Size) const { MachineFunction *MF = MBB.getParent(); SIMachineFunctionInfo *MFI = MF->getInfo(); - const AMDGPUSubtarget &ST = MF->getSubtarget(); + const GCNSubtarget &ST = MF->getSubtarget(); DebugLoc DL = MBB.findDebugLoc(MI); unsigned WorkGroupSize = MFI->getMaxFlatWorkGroupSize(); unsigned WavefrontSize = ST.getWavefrontSize(); @@ -2915,7 +2915,7 @@ } } - if (isFLAT(MI) && !MF->getSubtarget().hasFlatInstOffsets()) { + if (isFLAT(MI) && !MF->getSubtarget().hasFlatInstOffsets()) { const MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset); if (Offset->getImm() != 0) { ErrInfo = "subtarget does not support offsets in flat instructions"; @@ -3666,8 +3666,8 @@ } else { // This instructions is the _OFFSET variant, so we need to convert it to // ADDR64. - assert(MBB.getParent()->getSubtarget().getGeneration() - < SISubtarget::VOLCANIC_ISLANDS && + assert(MBB.getParent()->getSubtarget().getGeneration() + < AMDGPUSubtarget::VOLCANIC_ISLANDS && "FIXME: Need to emit flat atomics here"); MachineOperand *VData = getNamedOperand(MI, AMDGPU::OpName::vdata); @@ -3803,37 +3803,37 @@ continue; case AMDGPU::S_LSHL_B32: - if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { + if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { NewOpcode = AMDGPU::V_LSHLREV_B32_e64; swapOperands(Inst); } break; case AMDGPU::S_ASHR_I32: - if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { + if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { NewOpcode = AMDGPU::V_ASHRREV_I32_e64; swapOperands(Inst); } break; case AMDGPU::S_LSHR_B32: - if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { + if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { NewOpcode = AMDGPU::V_LSHRREV_B32_e64; swapOperands(Inst); } break; case AMDGPU::S_LSHL_B64: - if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { + if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { NewOpcode = AMDGPU::V_LSHLREV_B64; swapOperands(Inst); } break; case AMDGPU::S_ASHR_I64: - if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { + if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { NewOpcode = AMDGPU::V_ASHRREV_I64; swapOperands(Inst); } break; case AMDGPU::S_LSHR_B64: - if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { + if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { NewOpcode = AMDGPU::V_LSHRREV_B64; swapOperands(Inst); } @@ -4633,12 +4633,12 @@ uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT; if (ST.isAmdHsaOS()) { // Set ATC = 1. GFX9 doesn't have this bit. - if (ST.getGeneration() <= SISubtarget::VOLCANIC_ISLANDS) + if (ST.getGeneration() <= AMDGPUSubtarget::VOLCANIC_ISLANDS) RsrcDataFormat |= (1ULL << 56); // Set MTYPE = 2 (MTYPE_UC = uncached). GFX9 doesn't have this. // BTW, it disables TC L2 and therefore decreases performance. - if (ST.getGeneration() == SISubtarget::VOLCANIC_ISLANDS) + if (ST.getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS) RsrcDataFormat |= (2ULL << 59); } @@ -4651,7 +4651,7 @@ 0xffffffff; // Size; // GFX9 doesn't have ELEMENT_SIZE. - if (ST.getGeneration() <= SISubtarget::VOLCANIC_ISLANDS) { + if (ST.getGeneration() <= AMDGPUSubtarget::VOLCANIC_ISLANDS) { uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1; Rsrc23 |= EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT; } @@ -4661,7 +4661,7 @@ // If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17]. // Clear them unless we want a huge stride. - if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) + if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT; return Rsrc23; @@ -4996,13 +4996,15 @@ GFX9 = 5 }; -static SIEncodingFamily subtargetEncodingFamily(const SISubtarget &ST) { +static SIEncodingFamily subtargetEncodingFamily(const GCNSubtarget &ST) { switch (ST.getGeneration()) { - case SISubtarget::SOUTHERN_ISLANDS: - case SISubtarget::SEA_ISLANDS: + default: + break; + case AMDGPUSubtarget::SOUTHERN_ISLANDS: + case AMDGPUSubtarget::SEA_ISLANDS: return SIEncodingFamily::SI; - case SISubtarget::VOLCANIC_ISLANDS: - case SISubtarget::GFX9: + case AMDGPUSubtarget::VOLCANIC_ISLANDS: + case AMDGPUSubtarget::GFX9: return SIEncodingFamily::VI; } llvm_unreachable("Unknown subtarget generation!"); @@ -5012,11 +5014,11 @@ SIEncodingFamily Gen = subtargetEncodingFamily(ST); if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 && - ST.getGeneration() >= SISubtarget::GFX9) + ST.getGeneration() >= AMDGPUSubtarget::GFX9) Gen = SIEncodingFamily::GFX9; if (get(Opcode).TSFlags & SIInstrFlags::SDWA) - Gen = ST.getGeneration() == SISubtarget::GFX9 ? SIEncodingFamily::SDWA9 + Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9 : SIEncodingFamily::SDWA; // Adjust the encoding family to GFX80 for D16 buffer instructions when the // subtarget has UnpackedD16VMem feature. Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td @@ -7,12 +7,12 @@ // //===----------------------------------------------------------------------===// def isCI : Predicate<"Subtarget->getGeneration() " - ">= SISubtarget::SEA_ISLANDS">; + ">= AMDGPUSubtarget::SEA_ISLANDS">; def isCIOnly : Predicate<"Subtarget->getGeneration() ==" - "SISubtarget::SEA_ISLANDS">, + "AMDGPUSubtarget::SEA_ISLANDS">, AssemblerPredicate <"FeatureSeaIslands">; def isVIOnly : Predicate<"Subtarget->getGeneration() ==" - "SISubtarget::VOLCANIC_ISLANDS">, + "AMDGPUSubtarget::VOLCANIC_ISLANDS">, AssemblerPredicate <"FeatureVolcanicIslands">; def DisableInst : Predicate <"false">, AssemblerPredicate<"FeatureDisable">; @@ -487,7 +487,7 @@ }]>; class VGPRImm : PatLeafgetGeneration() < SISubtarget::SOUTHERN_ISLANDS) { + if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) { return false; } const SIRegisterInfo *SIRI = Index: llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -103,7 +103,7 @@ }; private: - const SISubtarget *STM = nullptr; + const GCNSubtarget *STM = nullptr; const SIInstrInfo *TII = nullptr; const SIRegisterInfo *TRI = nullptr; MachineRegisterInfo *MRI = nullptr; @@ -939,7 +939,7 @@ if (skipFunction(MF.getFunction())) return false; - STM = &MF.getSubtarget(); + STM = &MF.getSubtarget(); if (!STM->loadStoreOptEnabled()) return false; Index: llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ llvm/trunk/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -486,7 +486,7 @@ } bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) { - const SISubtarget &ST = MF.getSubtarget(); + const GCNSubtarget &ST = MF.getSubtarget(); TII = ST.getInstrInfo(); TRI = &TII->getRegisterInfo(); Index: llvm/trunk/lib/Target/AMDGPU/SILowerI1Copies.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SILowerI1Copies.cpp +++ llvm/trunk/lib/Target/AMDGPU/SILowerI1Copies.cpp @@ -66,7 +66,7 @@ bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); - const SISubtarget &ST = MF.getSubtarget(); + const GCNSubtarget &ST = MF.getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); const TargetRegisterInfo *TRI = &TII->getRegisterInfo(); Index: llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -47,7 +47,7 @@ ImplicitArgPtr(false), GITPtrHigh(0xffffffff), HighBitsOf32BitAddress(0) { - const SISubtarget &ST = MF.getSubtarget(); + const GCNSubtarget &ST = MF.getSubtarget(); const Function &F = MF.getFunction(); FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F); WavesPerEU = ST.getWavesPerEU(F); @@ -178,7 +178,7 @@ void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) { limitOccupancy(getMaxWavesPerEU()); - const SISubtarget& ST = MF.getSubtarget(); + const GCNSubtarget& ST = MF.getSubtarget(); limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(), MF.getFunction())); } @@ -253,7 +253,7 @@ if (!SpillLanes.empty()) return true; - const SISubtarget &ST = MF.getSubtarget(); + const GCNSubtarget &ST = MF.getSubtarget(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); MachineFrameInfo &FrameInfo = MF.getFrameInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); Index: llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -257,12 +257,12 @@ IsaInfo::IsaVersion IV; - SICacheControl(const SISubtarget &ST); + SICacheControl(const GCNSubtarget &ST); public: /// Create a cache control for the subtarget \p ST. - static std::unique_ptr create(const SISubtarget &ST); + static std::unique_ptr create(const GCNSubtarget &ST); /// Update \p MI memory load instruction to bypass any caches up to /// the \p Scope memory scope for address spaces \p @@ -322,7 +322,7 @@ public: - SIGfx6CacheControl(const SISubtarget &ST) : SICacheControl(ST) {}; + SIGfx6CacheControl(const GCNSubtarget &ST) : SICacheControl(ST) {}; bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI, SIAtomicScope Scope, @@ -346,7 +346,7 @@ class SIGfx7CacheControl : public SIGfx6CacheControl { public: - SIGfx7CacheControl(const SISubtarget &ST) : SIGfx6CacheControl(ST) {}; + SIGfx7CacheControl(const GCNSubtarget &ST) : SIGfx6CacheControl(ST) {}; bool insertCacheInvalidate(MachineBasicBlock::iterator &MI, SIAtomicScope Scope, @@ -606,14 +606,14 @@ return constructFromMIWithMMO(MI); } -SICacheControl::SICacheControl(const SISubtarget &ST) { +SICacheControl::SICacheControl(const GCNSubtarget &ST) { TII = ST.getInstrInfo(); IV = IsaInfo::getIsaVersion(ST.getFeatureBits()); } /* static */ -std::unique_ptr SICacheControl::create(const SISubtarget &ST) { - AMDGPUSubtarget::Generation Generation = ST.getGeneration(); +std::unique_ptr SICacheControl::create(const GCNSubtarget &ST) { + GCNSubtarget::Generation Generation = ST.getGeneration(); if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS) return make_unique(ST); return make_unique(ST); @@ -1012,7 +1012,7 @@ bool Changed = false; SIMemOpAccess MOA(MF); - CC = SICacheControl::create(MF.getSubtarget()); + CC = SICacheControl::create(MF.getSubtarget()); for (auto &MBB : MF) { for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) { Index: llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp @@ -209,7 +209,7 @@ if (skipFunction(MF.getFunction())) return false; - const SISubtarget &ST = MF.getSubtarget(); + const GCNSubtarget &ST = MF.getSubtarget(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); const SIInstrInfo *TII = ST.getInstrInfo(); Index: llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp @@ -107,7 +107,7 @@ if (skipFunction(MF.getFunction())) return false; - const SISubtarget &ST = MF.getSubtarget(); + const GCNSubtarget &ST = MF.getSubtarget(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); const SIInstrInfo *TII = ST.getInstrInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); Index: llvm/trunk/lib/Target/AMDGPU/SIPeepholeSDWA.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -90,9 +90,9 @@ bool runOnMachineFunction(MachineFunction &MF) override; void matchSDWAOperands(MachineBasicBlock &MBB); std::unique_ptr matchSDWAOperand(MachineInstr &MI); - bool isConvertibleToSDWA(const MachineInstr &MI, const SISubtarget &ST) const; + bool isConvertibleToSDWA(const MachineInstr &MI, const GCNSubtarget &ST) const; bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands); - void legalizeScalarOperands(MachineInstr &MI, const SISubtarget &ST) const; + void legalizeScalarOperands(MachineInstr &MI, const GCNSubtarget &ST) const; StringRef getPassName() const override { return "SI Peephole SDWA"; } @@ -855,7 +855,7 @@ } bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI, - const SISubtarget &ST) const { + const GCNSubtarget &ST) const { // Check if this is already an SDWA instruction unsigned Opc = MI.getOpcode(); if (TII->isSDWA(Opc)) @@ -1082,7 +1082,7 @@ // If an instruction was converted to SDWA it should not have immediates or SGPR // operands (allowed one SGPR on GFX9). Copy its scalar operands into VGPRs. void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI, - const SISubtarget &ST) const { + const GCNSubtarget &ST) const { const MCInstrDesc &Desc = TII->get(MI.getOpcode()); unsigned ConstantBusCount = 0; for (MachineOperand &Op : MI.explicit_uses()) { @@ -1113,7 +1113,7 @@ } bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) { - const SISubtarget &ST = MF.getSubtarget(); + const GCNSubtarget &ST = MF.getSubtarget(); if (!ST.hasSDWA() || skipFunction(MF.getFunction())) return false; Index: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h +++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h @@ -21,10 +21,9 @@ namespace llvm { -class AMDGPUSubtarget; +class GCNSubtarget; class LiveIntervals; class MachineRegisterInfo; -class SISubtarget; class SIMachineFunctionInfo; class SIRegisterInfo final : public AMDGPURegisterInfo { @@ -39,7 +38,7 @@ void classifyPressureSet(unsigned PSetID, unsigned Reg, BitVector &PressureSets) const; public: - SIRegisterInfo(const SISubtarget &ST); + SIRegisterInfo(const GCNSubtarget &ST); bool spillSGPRToVGPR() const { return SpillSGPRToVGPR; Index: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -56,7 +56,7 @@ cl::ReallyHidden, cl::init(true)); -SIRegisterInfo::SIRegisterInfo(const SISubtarget &ST) : +SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) : AMDGPURegisterInfo(), SGPRPressureSets(getNumRegPressureSets()), VGPRPressureSets(getNumRegPressureSets()), @@ -106,7 +106,7 @@ unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg( const MachineFunction &MF) const { - const SISubtarget &ST = MF.getSubtarget(); + const GCNSubtarget &ST = MF.getSubtarget(); unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4; unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx)); return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass); @@ -131,7 +131,7 @@ unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg( const MachineFunction &MF) const { - const SISubtarget &ST = MF.getSubtarget(); + const GCNSubtarget &ST = MF.getSubtarget(); unsigned Reg = findPrivateSegmentWaveByteOffsetRegIndex(ST.getMaxNumSGPRs(MF)); return AMDGPU::SGPR_32RegClass.getRegister(Reg); } @@ -173,7 +173,7 @@ reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13); reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15); - const SISubtarget &ST = MF.getSubtarget(); + const GCNSubtarget &ST = MF.getSubtarget(); unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF); unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); @@ -253,7 +253,7 @@ // create a virtual register for it during frame index elimination, so the // scavenger is directly needed. return MF.getFrameInfo().hasStackObjects() && - MF.getSubtarget().hasScalarStores() && + MF.getSubtarget().hasScalarStores() && MF.getInfo()->hasSpilledSGPRs(); } @@ -308,7 +308,7 @@ DL = Ins->getDebugLoc(); MachineFunction *MF = MBB->getParent(); - const SISubtarget &Subtarget = MF->getSubtarget(); + const GCNSubtarget &Subtarget = MF->getSubtarget(); const SIInstrInfo *TII = Subtarget.getInstrInfo(); if (Offset == 0) { @@ -337,7 +337,7 @@ MachineBasicBlock *MBB = MI.getParent(); MachineFunction *MF = MBB->getParent(); - const SISubtarget &Subtarget = MF->getSubtarget(); + const GCNSubtarget &Subtarget = MF->getSubtarget(); const SIInstrInfo *TII = Subtarget.getInstrInfo(); #ifndef NDEBUG @@ -524,7 +524,7 @@ RegScavenger *RS) const { MachineBasicBlock *MBB = MI->getParent(); MachineFunction *MF = MI->getParent()->getParent(); - const SISubtarget &ST = MF->getSubtarget(); + const GCNSubtarget &ST = MF->getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); const MachineFrameInfo &MFI = MF->getFrameInfo(); @@ -647,7 +647,7 @@ return false; MachineRegisterInfo &MRI = MF->getRegInfo(); - const SISubtarget &ST = MF->getSubtarget(); + const GCNSubtarget &ST = MF->getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); unsigned SuperReg = MI->getOperand(0).getReg(); @@ -825,7 +825,7 @@ return false; MachineFrameInfo &FrameInfo = MF->getFrameInfo(); - const SISubtarget &ST = MF->getSubtarget(); + const GCNSubtarget &ST = MF->getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); const DebugLoc &DL = MI->getDebugLoc(); @@ -985,7 +985,7 @@ MachineBasicBlock *MBB = MI->getParent(); SIMachineFunctionInfo *MFI = MF->getInfo(); MachineFrameInfo &FrameInfo = MF->getFrameInfo(); - const SISubtarget &ST = MF->getSubtarget(); + const GCNSubtarget &ST = MF->getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); @@ -1527,7 +1527,7 @@ unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const { - const SISubtarget &ST = MF.getSubtarget(); + const GCNSubtarget &ST = MF.getSubtarget(); const SIMachineFunctionInfo *MFI = MF.getInfo(); unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(), Index: llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -292,7 +292,7 @@ return false; MachineRegisterInfo &MRI = MF.getRegInfo(); - const SISubtarget &ST = MF.getSubtarget(); + const GCNSubtarget &ST = MF.getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo &TRI = TII->getRegisterInfo(); Index: llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -849,7 +849,7 @@ LowerToCopyInstrs.clear(); CallingConv = MF.getFunction().getCallingConv(); - const SISubtarget &ST = MF.getSubtarget(); + const GCNSubtarget &ST = MF.getSubtarget(); TII = ST.getInstrInfo(); TRI = &TII->getRegisterInfo();