diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/CodeGen/TargetPassConfig.h" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -18,8 +18,8 @@ #include "AMDGPUAsmPrinter.h" #include "AMDGPU.h" #include "AMDGPUHSAMetadataStreamer.h" -#include "AMDGPUSubtarget.h" #include "AMDKernelCodeT.h" +#include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUInstPrinter.h" #include "MCTargetDesc/AMDGPUTargetStreamer.h" #include "R600AsmPrinter.h" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp @@ -14,7 +14,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" #include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/IRBuilder.h" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -15,7 +15,6 @@ #include "AMDGPUCallLowering.h" #include "AMDGPU.h" #include "AMDGPULegalizerInfo.h" -#include "AMDGPUSubtarget.h" #include "AMDGPUTargetMachine.h" #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" #include "AMDGPUTargetMachine.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/ConstantFolding.h" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -14,7 +14,7 @@ #include "AMDGPUHSAMetadataStreamer.h" #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUTargetStreamer.h" #include "SIMachineFunctionInfo.h" #include "SIProgramInfo.h" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" #include "AMDGPUTargetMachine.h" #include "SIMachineFunctionInfo.h" #include "llvm/Analysis/LegacyDivergenceAnalysis.h" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -16,7 +16,7 @@ #include "AMDGPU.h" #include "AMDGPUInstrInfo.h" #include "AMDGPUMachineFunction.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/IR/DiagnosticInfo.h" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -16,6 +16,8 @@ #include "AMDGPUInstrInfo.h" #include "AMDGPUTargetTransformInfo.h" +#include "GCNSubtarget.h" +#include "R600Subtarget.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/Transforms/InstCombine/InstCombiner.h" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -16,7 +16,6 @@ #include "AMDGPUGlobalISelUtils.h" #include "AMDGPUInstrInfo.h" #include "AMDGPURegisterBankInfo.h" -#include "AMDGPUSubtarget.h" #include "AMDGPUTargetMachine.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -13,7 +13,7 @@ #include "AMDGPU.h" #include "AMDGPULibFunc.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Loads.h" #include "llvm/IR/IntrinsicsAMDGPU.h" diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp @@ -16,6 +16,7 @@ #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsR600.h" #include "llvm/IR/Module.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/LowerMemIntrinsics.h" diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/MDBuilder.h" diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUTargetMachine.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -13,9 +13,9 @@ // #include "AMDGPUAsmPrinter.h" -#include "AMDGPUSubtarget.h" #include "AMDGPUTargetMachine.h" #include "MCTargetDesc/AMDGPUInstPrinter.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "R600AsmPrinter.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstr.h" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp @@ -11,7 +11,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp @@ -12,8 +12,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPUMacroFusion.h" -#include "AMDGPUSubtarget.h" - +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIInstrInfo.h" #include "llvm/CodeGen/MacroFusion.h" using namespace llvm; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp @@ -13,7 +13,8 @@ #include "AMDGPU.h" #include "AMDGPULegalizerInfo.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/TargetPassConfig.h" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp @@ -27,11 +27,15 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/Cloning.h" + #define DEBUG_TYPE "amdgpu-propagate-attributes" using namespace llvm; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp @@ -13,8 +13,7 @@ #include "AMDGPU.h" #include "AMDGPULegalizerInfo.h" -#include "AMDGPUSubtarget.h" -#include "AMDGPUTargetMachine.h" +#include "GCNSubtarget.h" #include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -73,7 +73,7 @@ #include "AMDGPU.h" #include "AMDGPUGlobalISelUtils.h" #include "AMDGPUInstrInfo.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -1,4 +1,4 @@ -//=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU ------*- C++ -*-====// +//=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU -------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -7,46 +7,24 @@ //==-----------------------------------------------------------------------===// // /// \file -/// AMDGPU specific subclass of TargetSubtarget. +/// Base class for AMDGPU specific classes of TargetSubtarget. // //===----------------------------------------------------------------------===// #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H -#include "AMDGPUCallLowering.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" -#include "R600FrameLowering.h" -#include "R600ISelLowering.h" -#include "R600InstrInfo.h" -#include "SIFrameLowering.h" -#include "SIISelLowering.h" -#include "SIInstrInfo.h" #include "llvm/ADT/Triple.h" -#include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/Support/Alignment.h" namespace llvm { -class MCInst; -class MCInstrInfo; - -} // namespace llvm - -#define GET_SUBTARGETINFO_HEADER -#include "AMDGPUGenSubtargetInfo.inc" -#define GET_SUBTARGETINFO_HEADER -#include "R600GenSubtargetInfo.inc" - -namespace llvm { - +enum AMDGPUDwarfFlavour : unsigned; class Function; class Instruction; class MachineFunction; -class StringRef; class TargetMachine; -class GCNTargetMachine; class AMDGPUSubtarget { public: @@ -254,1175 +232,11 @@ /// \returns Corresponsing DWARF register number mapping flavour for the /// \p WavefrontSize. - AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const { - return getWavefrontSize() == 32 ? AMDGPUDwarfFlavour::Wave32 - : AMDGPUDwarfFlavour::Wave64; - } + AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const; virtual ~AMDGPUSubtarget() {} }; -class GCNSubtarget : public AMDGPUGenSubtargetInfo, - public AMDGPUSubtarget { - - using AMDGPUSubtarget::getMaxWavesPerEU; - -public: - enum TrapHandlerAbi { - TrapHandlerAbiNone = 0, - TrapHandlerAbiHsa = 1 - }; - - enum TrapID { - TrapIDHardwareReserved = 0, - TrapIDHSADebugTrap = 1, - TrapIDLLVMTrap = 2, - TrapIDLLVMDebugTrap = 3, - TrapIDDebugBreakpoint = 7, - TrapIDDebugReserved8 = 8, - TrapIDDebugReservedFE = 0xfe, - TrapIDDebugReservedFF = 0xff - }; - - enum TrapRegValues { - LLVMTrapHandlerRegValue = 1 - }; - -private: - /// GlobalISel related APIs. - std::unique_ptr CallLoweringInfo; - std::unique_ptr InlineAsmLoweringInfo; - std::unique_ptr InstSelector; - std::unique_ptr Legalizer; - std::unique_ptr RegBankInfo; - -protected: - // Basic subtarget description. - Triple TargetTriple; - unsigned Gen; - InstrItineraryData InstrItins; - int LDSBankCount; - unsigned MaxPrivateElementSize; - - // Possibly statically set by tablegen, but may want to be overridden. - bool FastFMAF32; - bool FastDenormalF32; - bool HalfRate64Ops; - - // Dynamically set bits that enable features. - bool FlatForGlobal; - bool AutoWaitcntBeforeBarrier; - bool UnalignedScratchAccess; - bool UnalignedAccessMode; - bool HasApertureRegs; - bool EnableXNACK; - bool DoesNotSupportXNACK; - bool EnableCuMode; - bool TrapHandler; - - // Used as options. - bool EnableLoadStoreOpt; - bool EnableUnsafeDSOffsetFolding; - bool EnableSIScheduler; - bool EnableDS128; - bool EnablePRTStrictNull; - bool DumpCode; - - // Subtarget statically properties set by tablegen - bool FP64; - bool FMA; - bool MIMG_R128; - bool IsGCN; - bool GCN3Encoding; - bool CIInsts; - bool GFX8Insts; - bool GFX9Insts; - bool GFX10Insts; - bool GFX10_3Insts; - bool GFX7GFX8GFX9Insts; - bool SGPRInitBug; - bool HasSMemRealTime; - bool HasIntClamp; - bool HasFmaMixInsts; - bool HasMovrel; - bool HasVGPRIndexMode; - bool HasScalarStores; - bool HasScalarAtomics; - bool HasSDWAOmod; - bool HasSDWAScalar; - bool HasSDWASdst; - bool HasSDWAMac; - bool HasSDWAOutModsVOPC; - bool HasDPP; - bool HasDPP8; - bool HasR128A16; - bool HasGFX10A16; - bool HasG16; - bool HasNSAEncoding; - bool GFX10_BEncoding; - bool HasDLInsts; - bool HasDot1Insts; - bool HasDot2Insts; - bool HasDot3Insts; - bool HasDot4Insts; - bool HasDot5Insts; - bool HasDot6Insts; - bool HasMAIInsts; - bool HasPkFmacF16Inst; - bool HasAtomicFaddInsts; - bool EnableSRAMECC; - bool DoesNotSupportSRAMECC; - bool HasNoSdstCMPX; - bool HasVscnt; - bool HasGetWaveIdInst; - bool HasSMemTimeInst; - bool HasRegisterBanking; - bool HasVOP3Literal; - bool HasNoDataDepHazard; - bool FlatAddressSpace; - bool FlatInstOffsets; - bool FlatGlobalInsts; - bool FlatScratchInsts; - bool ScalarFlatScratchInsts; - bool AddNoCarryInsts; - bool HasUnpackedD16VMem; - bool R600ALUInst; - bool CaymanISA; - bool CFALUBug; - bool LDSMisalignedBug; - bool HasMFMAInlineLiteralBug; - bool HasVertexCache; - short TexVTXClauseSize; - bool UnalignedBufferAccess; - bool UnalignedDSAccess; - bool ScalarizeGlobal; - - bool HasVcmpxPermlaneHazard; - bool HasVMEMtoScalarWriteHazard; - bool HasSMEMtoVectorWriteHazard; - bool HasInstFwdPrefetchBug; - bool HasVcmpxExecWARHazard; - bool HasLdsBranchVmemWARHazard; - bool HasNSAtoVMEMBug; - bool HasOffset3fBug; - bool HasFlatSegmentOffsetBug; - bool HasImageStoreD16Bug; - bool HasImageGather4D16Bug; - - // Dummy feature to use for assembler in tablegen. - bool FeatureDisable; - - SelectionDAGTargetInfo TSInfo; -private: - SIInstrInfo InstrInfo; - SITargetLowering TLInfo; - SIFrameLowering FrameLowering; - -public: - // See COMPUTE_TMPRING_SIZE.WAVESIZE, 13-bit field in units of 256-dword. - static const unsigned MaxWaveScratchSize = (256 * 4) * ((1 << 13) - 1); - - GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, - const GCNTargetMachine &TM); - ~GCNSubtarget() override; - - GCNSubtarget &initializeSubtargetDependencies(const Triple &TT, - StringRef GPU, StringRef FS); - - const SIInstrInfo *getInstrInfo() const override { - return &InstrInfo; - } - - const SIFrameLowering *getFrameLowering() const override { - return &FrameLowering; - } - - const SITargetLowering *getTargetLowering() const override { - return &TLInfo; - } - - const SIRegisterInfo *getRegisterInfo() const override { - return &InstrInfo.getRegisterInfo(); - } - - const CallLowering *getCallLowering() const override { - return CallLoweringInfo.get(); - } - - const InlineAsmLowering *getInlineAsmLowering() const override { - return InlineAsmLoweringInfo.get(); - } - - InstructionSelector *getInstructionSelector() const override { - return InstSelector.get(); - } - - const LegalizerInfo *getLegalizerInfo() const override { - return Legalizer.get(); - } - - const RegisterBankInfo *getRegBankInfo() const override { - return RegBankInfo.get(); - } - - // Nothing implemented, just prevent crashes on use. - const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { - return &TSInfo; - } - - const InstrItineraryData *getInstrItineraryData() const override { - return &InstrItins; - } - - void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); - - Generation getGeneration() const { - return (Generation)Gen; - } - - /// Return the number of high bits known to be zero fror a frame index. - unsigned getKnownHighZeroBitsForFrameIndex() const { - return countLeadingZeros(MaxWaveScratchSize) + getWavefrontSizeLog2(); - } - - int getLDSBankCount() const { - return LDSBankCount; - } - - unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const { - return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16; - } - - unsigned getConstantBusLimit(unsigned Opcode) const; - - bool hasIntClamp() const { - return HasIntClamp; - } - - bool hasFP64() const { - return FP64; - } - - bool hasMIMG_R128() const { - return MIMG_R128; - } - - bool hasHWFP64() const { - return FP64; - } - - bool hasFastFMAF32() const { - return FastFMAF32; - } - - bool hasHalfRate64Ops() const { - return HalfRate64Ops; - } - - bool hasAddr64() const { - return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS); - } - - bool hasFlat() const { - return (getGeneration() > AMDGPUSubtarget::SOUTHERN_ISLANDS); - } - - // Return true if the target only has the reverse operand versions of VALU - // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32). - bool hasOnlyRevVALUShifts() const { - return getGeneration() >= VOLCANIC_ISLANDS; - } - - bool hasFractBug() const { - return getGeneration() == SOUTHERN_ISLANDS; - } - - bool hasBFE() const { - return true; - } - - bool hasBFI() const { - return true; - } - - bool hasBFM() const { - return hasBFE(); - } - - bool hasBCNT(unsigned Size) const { - return true; - } - - bool hasFFBL() const { - return true; - } - - bool hasFFBH() const { - return true; - } - - bool hasMed3_16() const { - return getGeneration() >= AMDGPUSubtarget::GFX9; - } - - bool hasMin3Max3_16() const { - return getGeneration() >= AMDGPUSubtarget::GFX9; - } - - bool hasFmaMixInsts() const { - return HasFmaMixInsts; - } - - bool hasCARRY() const { - return true; - } - - bool hasFMA() const { - return FMA; - } - - bool hasSwap() const { - return GFX9Insts; - } - - bool hasScalarPackInsts() const { - return GFX9Insts; - } - - bool hasScalarMulHiInsts() const { - return GFX9Insts; - } - - TrapHandlerAbi getTrapHandlerAbi() const { - return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone; - } - - /// True if the offset field of DS instructions works as expected. On SI, the - /// offset uses a 16-bit adder and does not always wrap properly. - bool hasUsableDSOffset() const { - return getGeneration() >= SEA_ISLANDS; - } - - bool unsafeDSOffsetFoldingEnabled() const { - return EnableUnsafeDSOffsetFolding; - } - - /// Condition output from div_scale is usable. - bool hasUsableDivScaleConditionOutput() const { - return getGeneration() != SOUTHERN_ISLANDS; - } - - /// Extra wait hazard is needed in some cases before - /// s_cbranch_vccnz/s_cbranch_vccz. - bool hasReadVCCZBug() const { - return getGeneration() <= SEA_ISLANDS; - } - - /// Writes to VCC_LO/VCC_HI update the VCCZ flag. - bool partialVCCWritesUpdateVCCZ() const { - return getGeneration() >= GFX10; - } - - /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR - /// was written by a VALU instruction. - bool hasSMRDReadVALUDefHazard() const { - return getGeneration() == SOUTHERN_ISLANDS; - } - - /// A read of an SGPR by a VMEM instruction requires 5 wait states when the - /// SGPR was written by a VALU Instruction. - bool hasVMEMReadSGPRVALUDefHazard() const { - return getGeneration() >= VOLCANIC_ISLANDS; - } - - bool hasRFEHazards() const { - return getGeneration() >= VOLCANIC_ISLANDS; - } - - /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32. - unsigned getSetRegWaitStates() const { - return getGeneration() <= SEA_ISLANDS ? 1 : 2; - } - - bool dumpCode() const { - return DumpCode; - } - - /// Return the amount of LDS that can be used that will not restrict the - /// occupancy lower than WaveCount. - unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, - const Function &) const; - - bool supportsMinMaxDenormModes() const { - return getGeneration() >= AMDGPUSubtarget::GFX9; - } - - /// \returns If target supports S_DENORM_MODE. - bool hasDenormModeInst() const { - return getGeneration() >= AMDGPUSubtarget::GFX10; - } - - bool useFlatForGlobal() const { - return FlatForGlobal; - } - - /// \returns If target supports ds_read/write_b128 and user enables generation - /// of ds_read/write_b128. - bool useDS128() const { - return CIInsts && EnableDS128; - } - - /// \return If target supports ds_read/write_b96/128. - bool hasDS96AndDS128() const { - return CIInsts; - } - - /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64 - bool haveRoundOpsF64() const { - return CIInsts; - } - - /// \returns If MUBUF instructions always perform range checking, even for - /// buffer resources used for private memory access. - bool privateMemoryResourceIsRangeChecked() const { - return getGeneration() < AMDGPUSubtarget::GFX9; - } - - /// \returns If target requires PRT Struct NULL support (zero result registers - /// for sparse texture support). - bool usePRTStrictNull() const { - return EnablePRTStrictNull; - } - - bool hasAutoWaitcntBeforeBarrier() const { - return AutoWaitcntBeforeBarrier; - } - - bool hasUnalignedBufferAccess() const { - return UnalignedBufferAccess; - } - - bool hasUnalignedBufferAccessEnabled() const { - return UnalignedBufferAccess && UnalignedAccessMode; - } - - bool hasUnalignedDSAccess() const { - return UnalignedDSAccess; - } - - bool hasUnalignedDSAccessEnabled() const { - return UnalignedDSAccess && UnalignedAccessMode; - } - - bool hasUnalignedScratchAccess() const { - return UnalignedScratchAccess; - } - - bool hasUnalignedAccessMode() const { - return UnalignedAccessMode; - } - - bool hasApertureRegs() const { - return HasApertureRegs; - } - - bool isTrapHandlerEnabled() const { - return TrapHandler; - } - - bool isXNACKEnabled() const { - return EnableXNACK; - } - - bool isCuModeEnabled() const { - return EnableCuMode; - } - - bool hasFlatAddressSpace() const { - return FlatAddressSpace; - } - - bool hasFlatScrRegister() const { - return hasFlatAddressSpace(); - } - - bool hasFlatInstOffsets() const { - return FlatInstOffsets; - } - - bool hasFlatGlobalInsts() const { - return FlatGlobalInsts; - } - - bool hasFlatScratchInsts() const { - return FlatScratchInsts; - } - - // Check if target supports ST addressing mode with FLAT scratch instructions. - // The ST addressing mode means no registers are used, either VGPR or SGPR, - // but only immediate offset is swizzled and added to the FLAT scratch base. - bool hasFlatScratchSTMode() const { - return hasFlatScratchInsts() && hasGFX10_3Insts(); - } - - bool hasScalarFlatScratchInsts() const { - return ScalarFlatScratchInsts; - } - - bool hasGlobalAddTidInsts() const { - return GFX10_BEncoding; - } - - bool hasAtomicCSub() const { - return GFX10_BEncoding; - } - - bool hasMultiDwordFlatScratchAddressing() const { - return getGeneration() >= GFX9; - } - - bool hasFlatSegmentOffsetBug() const { - return HasFlatSegmentOffsetBug; - } - - bool hasFlatLgkmVMemCountInOrder() const { - return getGeneration() > GFX9; - } - - bool hasD16LoadStore() const { - return getGeneration() >= GFX9; - } - - bool d16PreservesUnusedBits() const { - return hasD16LoadStore() && !isSRAMECCEnabled(); - } - - bool hasD16Images() const { - return getGeneration() >= VOLCANIC_ISLANDS; - } - - /// Return if most LDS instructions have an m0 use that require m0 to be - /// iniitalized. - bool ldsRequiresM0Init() const { - return getGeneration() < GFX9; - } - - // True if the hardware rewinds and replays GWS operations if a wave is - // preempted. - // - // If this is false, a GWS operation requires testing if a nack set the - // MEM_VIOL bit, and repeating if so. - bool hasGWSAutoReplay() const { - return getGeneration() >= GFX9; - } - - /// \returns if target has ds_gws_sema_release_all instruction. - bool hasGWSSemaReleaseAll() const { - return CIInsts; - } - - /// \returns true if the target has integer add/sub instructions that do not - /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32, - /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier - /// for saturation. - bool hasAddNoCarry() const { - return AddNoCarryInsts; - } - - bool hasUnpackedD16VMem() const { - return HasUnpackedD16VMem; - } - - // Covers VS/PS/CS graphics shaders - bool isMesaGfxShader(const Function &F) const { - return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv()); - } - - bool hasMad64_32() const { - return getGeneration() >= SEA_ISLANDS; - } - - bool hasSDWAOmod() const { - return HasSDWAOmod; - } - - bool hasSDWAScalar() const { - return HasSDWAScalar; - } - - bool hasSDWASdst() const { - return HasSDWASdst; - } - - bool hasSDWAMac() const { - return HasSDWAMac; - } - - bool hasSDWAOutModsVOPC() const { - return HasSDWAOutModsVOPC; - } - - bool hasDLInsts() const { - return HasDLInsts; - } - - bool hasDot1Insts() const { - return HasDot1Insts; - } - - bool hasDot2Insts() const { - return HasDot2Insts; - } - - bool hasDot3Insts() const { - return HasDot3Insts; - } - - bool hasDot4Insts() const { - return HasDot4Insts; - } - - bool hasDot5Insts() const { - return HasDot5Insts; - } - - bool hasDot6Insts() const { - return HasDot6Insts; - } - - bool hasMAIInsts() const { - return HasMAIInsts; - } - - bool hasPkFmacF16Inst() const { - return HasPkFmacF16Inst; - } - - bool hasAtomicFaddInsts() const { - return HasAtomicFaddInsts; - } - - bool isSRAMECCEnabled() const { - return EnableSRAMECC; - } - - bool hasNoSdstCMPX() const { - return HasNoSdstCMPX; - } - - bool hasVscnt() const { - return HasVscnt; - } - - bool hasGetWaveIdInst() const { - return HasGetWaveIdInst; - } - - bool hasSMemTimeInst() const { - return HasSMemTimeInst; - } - - bool hasRegisterBanking() const { - return HasRegisterBanking; - } - - bool hasVOP3Literal() const { - return HasVOP3Literal; - } - - bool hasNoDataDepHazard() const { - return HasNoDataDepHazard; - } - - bool vmemWriteNeedsExpWaitcnt() const { - return getGeneration() < SEA_ISLANDS; - } - - // Scratch is allocated in 256 dword per wave blocks for the entire - // wavefront. When viewed from the perspecive of an arbitrary workitem, this - // is 4-byte aligned. - // - // Only 4-byte alignment is really needed to access anything. Transformations - // on the pointer value itself may rely on the alignment / known low bits of - // the pointer. Set this to something above the minimum to avoid needing - // dynamic realignment in common cases. - Align getStackAlignment() const { return Align(16); } - - bool enableMachineScheduler() const override { - return true; - } - - bool useAA() const override; - - bool enableSubRegLiveness() const override { - return true; - } - - void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; } - bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; } - - // static wrappers - static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI); - - // XXX - Why is this here if it isn't in the default pass set? - bool enableEarlyIfConversion() const override { - return true; - } - - bool enableFlatScratch() const; - - void overrideSchedPolicy(MachineSchedPolicy &Policy, - unsigned NumRegionInstrs) const override; - - unsigned getMaxNumUserSGPRs() const { - return 16; - } - - bool hasSMemRealTime() const { - return HasSMemRealTime; - } - - bool hasMovrel() const { - return HasMovrel; - } - - bool hasVGPRIndexMode() const { - return HasVGPRIndexMode; - } - - bool useVGPRIndexMode() const; - - bool hasScalarCompareEq64() const { - return getGeneration() >= VOLCANIC_ISLANDS; - } - - bool hasScalarStores() const { - return HasScalarStores; - } - - bool hasScalarAtomics() const { - return HasScalarAtomics; - } - - bool hasLDSFPAtomics() const { - return GFX8Insts; - } - - bool hasDPP() const { - return HasDPP; - } - - bool hasDPPBroadcasts() const { - return HasDPP && getGeneration() < GFX10; - } - - bool hasDPPWavefrontShifts() const { - return HasDPP && getGeneration() < GFX10; - } - - bool hasDPP8() const { - return HasDPP8; - } - - bool hasR128A16() const { - return HasR128A16; - } - - bool hasGFX10A16() const { - return HasGFX10A16; - } - - bool hasA16() const { return hasR128A16() || hasGFX10A16(); } - - bool hasG16() const { return HasG16; } - - bool hasOffset3fBug() const { - return HasOffset3fBug; - } - - bool hasImageStoreD16Bug() const { return HasImageStoreD16Bug; } - - bool hasImageGather4D16Bug() const { return HasImageGather4D16Bug; } - - bool hasNSAEncoding() const { return HasNSAEncoding; } - - bool hasGFX10_BEncoding() const { - return GFX10_BEncoding; - } - - bool hasGFX10_3Insts() const { - return GFX10_3Insts; - } - - bool hasMadF16() const; - - bool enableSIScheduler() const { - return EnableSIScheduler; - } - - bool loadStoreOptEnabled() const { - return EnableLoadStoreOpt; - } - - bool hasSGPRInitBug() const { - return SGPRInitBug; - } - - bool hasMFMAInlineLiteralBug() const { - return HasMFMAInlineLiteralBug; - } - - bool has12DWordStoreHazard() const { - return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS; - } - - // \returns true if the subtarget supports DWORDX3 load/store instructions. - bool hasDwordx3LoadStores() const { - return CIInsts; - } - - bool hasReadM0MovRelInterpHazard() const { - return getGeneration() == AMDGPUSubtarget::GFX9; - } - - bool hasReadM0SendMsgHazard() const { - return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS && - getGeneration() <= AMDGPUSubtarget::GFX9; - } - - bool hasVcmpxPermlaneHazard() const { - return HasVcmpxPermlaneHazard; - } - - bool hasVMEMtoScalarWriteHazard() const { - return HasVMEMtoScalarWriteHazard; - } - - bool hasSMEMtoVectorWriteHazard() const { - return HasSMEMtoVectorWriteHazard; - } - - bool hasLDSMisalignedBug() const { - return LDSMisalignedBug && !EnableCuMode; - } - - bool hasInstFwdPrefetchBug() const { - return HasInstFwdPrefetchBug; - } - - bool hasVcmpxExecWARHazard() const { - return HasVcmpxExecWARHazard; - } - - bool hasLdsBranchVmemWARHazard() const { - return HasLdsBranchVmemWARHazard; - } - - bool hasNSAtoVMEMBug() const { - return HasNSAtoVMEMBug; - } - - bool hasHardClauses() const { return getGeneration() >= GFX10; } - - /// Return the maximum number of waves per SIMD for kernels using \p SGPRs - /// SGPRs - unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const; - - /// Return the maximum number of waves per SIMD for kernels using \p VGPRs - /// VGPRs - unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const; - - /// Return occupancy for the given function. Used LDS and a number of - /// registers if provided. - /// Note, occupancy can be affected by the scratch allocation as well, but - /// we do not have enough information to compute it. - unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0, - unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const; - - /// \returns true if the flat_scratch register should be initialized with the - /// pointer to the wave's scratch memory rather than a size and offset. - bool flatScratchIsPointer() const { - return getGeneration() >= AMDGPUSubtarget::GFX9; - } - - /// \returns true if the machine has merged shaders in which s0-s7 are - /// reserved by the hardware and user SGPRs start at s8 - bool hasMergedShaders() const { - return getGeneration() >= GFX9; - } - - /// \returns SGPR allocation granularity supported by the subtarget. - unsigned getSGPRAllocGranule() const { - return AMDGPU::IsaInfo::getSGPRAllocGranule(this); - } - - /// \returns SGPR encoding granularity supported by the subtarget. - unsigned getSGPREncodingGranule() const { - return AMDGPU::IsaInfo::getSGPREncodingGranule(this); - } - - /// \returns Total number of SGPRs supported by the subtarget. - unsigned getTotalNumSGPRs() const { - return AMDGPU::IsaInfo::getTotalNumSGPRs(this); - } - - /// \returns Addressable number of SGPRs supported by the subtarget. - unsigned getAddressableNumSGPRs() const { - return AMDGPU::IsaInfo::getAddressableNumSGPRs(this); - } - - /// \returns Minimum number of SGPRs that meets the given number of waves per - /// execution unit requirement supported by the subtarget. - unsigned getMinNumSGPRs(unsigned WavesPerEU) const { - return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU); - } - - /// \returns Maximum number of SGPRs that meets the given number of waves per - /// execution unit requirement supported by the subtarget. - unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const { - return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable); - } - - /// \returns Reserved number of SGPRs for given function \p MF. - unsigned getReservedNumSGPRs(const MachineFunction &MF) const; - - /// \returns Maximum number of SGPRs that meets number of waves per execution - /// unit requirement for function \p MF, or number of SGPRs explicitly - /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF. - /// - /// \returns Value that meets number of waves per execution unit requirement - /// if explicitly requested value cannot be converted to integer, violates - /// subtarget's specifications, or does not meet number of waves per execution - /// unit requirement. - unsigned getMaxNumSGPRs(const MachineFunction &MF) const; - - /// \returns VGPR allocation granularity supported by the subtarget. - unsigned getVGPRAllocGranule() const { - return AMDGPU::IsaInfo::getVGPRAllocGranule(this); - } - - /// \returns VGPR encoding granularity supported by the subtarget. - unsigned getVGPREncodingGranule() const { - return AMDGPU::IsaInfo::getVGPREncodingGranule(this); - } - - /// \returns Total number of VGPRs supported by the subtarget. - unsigned getTotalNumVGPRs() const { - return AMDGPU::IsaInfo::getTotalNumVGPRs(this); - } - - /// \returns Addressable number of VGPRs supported by the subtarget. - unsigned getAddressableNumVGPRs() const { - return AMDGPU::IsaInfo::getAddressableNumVGPRs(this); - } - - /// \returns Minimum number of VGPRs that meets given number of waves per - /// execution unit requirement supported by the subtarget. - unsigned getMinNumVGPRs(unsigned WavesPerEU) const { - return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU); - } - - /// \returns Maximum number of VGPRs that meets given number of waves per - /// execution unit requirement supported by the subtarget. - unsigned getMaxNumVGPRs(unsigned WavesPerEU) const { - return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU); - } - - /// \returns Maximum number of VGPRs that meets number of waves per execution - /// unit requirement for function \p MF, or number of VGPRs explicitly - /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF. - /// - /// \returns Value that meets number of waves per execution unit requirement - /// if explicitly requested value cannot be converted to integer, violates - /// subtarget's specifications, or does not meet number of waves per execution - /// unit requirement. - unsigned getMaxNumVGPRs(const MachineFunction &MF) const; - - void getPostRAMutations( - std::vector> &Mutations) - const override; - - bool isWave32() const { - return getWavefrontSize() == 32; - } - - bool isWave64() const { - return getWavefrontSize() == 64; - } - - const TargetRegisterClass *getBoolRC() const { - return getRegisterInfo()->getBoolRC(); - } - - /// \returns Maximum number of work groups per compute unit supported by the - /// subtarget and limited by given \p FlatWorkGroupSize. - unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override { - return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize); - } - - /// \returns Minimum flat work group size supported by the subtarget. - unsigned getMinFlatWorkGroupSize() const override { - return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this); - } - - /// \returns Maximum flat work group size supported by the subtarget. - unsigned getMaxFlatWorkGroupSize() const override { - return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this); - } - - /// \returns Number of waves per execution unit required to support the given - /// \p FlatWorkGroupSize. - unsigned - getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override { - return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize); - } - - /// \returns Minimum number of waves per execution unit supported by the - /// subtarget. - unsigned getMinWavesPerEU() const override { - return AMDGPU::IsaInfo::getMinWavesPerEU(this); - } - - void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, - SDep &Dep) const override; -}; - -class R600Subtarget final : public R600GenSubtargetInfo, - public AMDGPUSubtarget { -private: - R600InstrInfo InstrInfo; - R600FrameLowering FrameLowering; - bool FMA; - bool CaymanISA; - bool CFALUBug; - bool HasVertexCache; - bool R600ALUInst; - bool FP64; - short TexVTXClauseSize; - Generation Gen; - R600TargetLowering TLInfo; - InstrItineraryData InstrItins; - SelectionDAGTargetInfo TSInfo; - -public: - R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS, - const TargetMachine &TM); - - const R600InstrInfo *getInstrInfo() const override { return &InstrInfo; } - - const R600FrameLowering *getFrameLowering() const override { - return &FrameLowering; - } - - const R600TargetLowering *getTargetLowering() const override { - return &TLInfo; - } - - const R600RegisterInfo *getRegisterInfo() const override { - return &InstrInfo.getRegisterInfo(); - } - - const InstrItineraryData *getInstrItineraryData() const override { - return &InstrItins; - } - - // Nothing implemented, just prevent crashes on use. - const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { - return &TSInfo; - } - - void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); - - Generation getGeneration() const { - return Gen; - } - - Align getStackAlignment() const { return Align(4); } - - R600Subtarget &initializeSubtargetDependencies(const Triple &TT, - StringRef GPU, StringRef FS); - - bool hasBFE() const { - return (getGeneration() >= EVERGREEN); - } - - bool hasBFI() const { - return (getGeneration() >= EVERGREEN); - } - - bool hasBCNT(unsigned Size) const { - if (Size == 32) - return (getGeneration() >= EVERGREEN); - - return false; - } - - bool hasBORROW() const { - return (getGeneration() >= EVERGREEN); - } - - bool hasCARRY() const { - return (getGeneration() >= EVERGREEN); - } - - bool hasCaymanISA() const { - return CaymanISA; - } - - bool hasFFBL() const { - return (getGeneration() >= EVERGREEN); - } - - bool hasFFBH() const { - return (getGeneration() >= EVERGREEN); - } - - bool hasFMA() const { return FMA; } - - bool hasCFAluBug() const { return CFALUBug; } - - bool hasVertexCache() const { return HasVertexCache; } - - short getTexVTXClauseSize() const { return TexVTXClauseSize; } - - bool enableMachineScheduler() const override { - return true; - } - - bool enableSubRegLiveness() const override { - return true; - } - - /// \returns Maximum number of work groups per compute unit supported by the - /// subtarget and limited by given \p FlatWorkGroupSize. - unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override { - return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize); - } - - /// \returns Minimum flat work group size supported by the subtarget. - unsigned getMinFlatWorkGroupSize() const override { - return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this); - } - - /// \returns Maximum flat work group size supported by the subtarget. - unsigned getMaxFlatWorkGroupSize() const override { - return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this); - } - - /// \returns Number of waves per execution unit required to support the given - /// \p FlatWorkGroupSize. - unsigned - getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override { - return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize); - } - - /// \returns Minimum number of waves per execution unit supported by the - /// subtarget. - unsigned getMinWavesPerEU() const override { - return AMDGPU::IsaInfo::getMinWavesPerEU(this); - } -}; - } // end namespace llvm #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -604,6 +604,11 @@ return alignTo(TotalSize, 4); } +AMDGPUDwarfFlavour AMDGPUSubtarget::getAMDGPUDwarfFlavour() const { + return getWavefrontSize() == 32 ? AMDGPUDwarfFlavour::Wave32 + : AMDGPUDwarfFlavour::Wave64; +} + R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS, const TargetMachine &TM) : R600GenSubtargetInfo(TT, GPU, /*TuneCPU*/GPU, FS), diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -14,7 +14,8 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETMACHINE_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETMACHINE_H -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "R600Subtarget.h" #include "llvm/Target/TargetMachine.h" namespace llvm { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -19,14 +19,18 @@ #include "AMDGPU.h" #include "AMDGPUSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/CodeGen/BasicTTIImpl.h" namespace llvm { class AMDGPUTargetLowering; +class GCNSubtarget; class InstCombiner; class Loop; +class R600Subtarget; class ScalarEvolution; +class SITargetLowering; class Type; class Value; @@ -38,7 +42,7 @@ Triple TargetTriple; - const GCNSubtarget *ST; + const TargetSubtargetInfo *ST; const TargetLoweringBase *TLI; const TargetSubtargetInfo *getST() const { return ST; } @@ -68,34 +72,10 @@ bool HasFP64FP16Denormals; unsigned MaxVGPRs; - const FeatureBitset InlineFeatureIgnoreList = { - // Codegen control options which don't matter. - AMDGPU::FeatureEnableLoadStoreOpt, - AMDGPU::FeatureEnableSIScheduler, - AMDGPU::FeatureEnableUnsafeDSOffsetFolding, - AMDGPU::FeatureFlatForGlobal, - AMDGPU::FeaturePromoteAlloca, - AMDGPU::FeatureUnalignedScratchAccess, - AMDGPU::FeatureUnalignedAccessMode, - - AMDGPU::FeatureAutoWaitcntBeforeBarrier, - - // Property of the kernel/environment which can't actually differ. - AMDGPU::FeatureSGPRInitBug, - AMDGPU::FeatureXNACK, - AMDGPU::FeatureTrapHandler, - - // The default assumption needs to be ecc is enabled, but no directly - // exposed operations depend on it, so it can be safely inlined. - AMDGPU::FeatureSRAMECC, - - // Perf-tuning features - AMDGPU::FeatureFastFMAF32, - AMDGPU::HalfRate64Ops - }; + static const FeatureBitset InlineFeatureIgnoreList; const GCNSubtarget *getST() const { return ST; } - const AMDGPUTargetLowering *getTLI() const { return TLI; } + const SITargetLowering *getTLI() const { return TLI; } static inline int getFullRateInstrCost() { return TargetTransformInfo::TCC_Basic; @@ -117,11 +97,8 @@ // On some parts, normal fp64 operations are half rate, and others // quarter. This also applies to some integer operations. - inline int get64BitInstrCost( - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const { - return ST->hasHalfRate64Ops() ? getHalfRateInstrCost(CostKind) - : getQuarterRateInstrCost(CostKind); - } + int get64BitInstrCost( + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; public: explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -239,6 +239,26 @@ BaseT::getPeelingPreferences(L, SE, PP); } +const FeatureBitset GCNTTIImpl::InlineFeatureIgnoreList = { + // Codegen control options which don't matter. + AMDGPU::FeatureEnableLoadStoreOpt, AMDGPU::FeatureEnableSIScheduler, + AMDGPU::FeatureEnableUnsafeDSOffsetFolding, AMDGPU::FeatureFlatForGlobal, + AMDGPU::FeaturePromoteAlloca, AMDGPU::FeatureUnalignedScratchAccess, + AMDGPU::FeatureUnalignedAccessMode, + + AMDGPU::FeatureAutoWaitcntBeforeBarrier, + + // Property of the kernel/environment which can't actually differ. + AMDGPU::FeatureSGPRInitBug, AMDGPU::FeatureXNACK, + AMDGPU::FeatureTrapHandler, + + // The default assumption needs to be ecc is enabled, but no directly + // exposed operations depend on it, so it can be safely inlined. + AMDGPU::FeatureSRAMECC, + + // Perf-tuning features + AMDGPU::FeatureFastFMAF32, AMDGPU::HalfRate64Ops}; + GCNTTIImpl::GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F) : BaseT(TM, F.getParent()->getDataLayout()), ST(static_cast(TM->getSubtargetImpl(F))), @@ -1113,6 +1133,11 @@ CommonTTI.getPeelingPreferences(L, SE, PP); } +int GCNTTIImpl::get64BitInstrCost(TTI::TargetCostKind CostKind) const { + return ST->hasHalfRate64Ops() ? getHalfRateInstrCost(CostKind) + : getQuarterRateInstrCost(CostKind); +} + R600TTIImpl::R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F) : BaseT(TM, F.getParent()->getDataLayout()), ST(static_cast(TM->getSubtargetImpl(F))), diff --git a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp --- a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp @@ -7,8 +7,9 @@ //==-----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "R600RegisterInfo.h" +#include "R600Subtarget.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunction.h" diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp --- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp +++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp @@ -38,7 +38,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -11,7 +11,8 @@ //===----------------------------------------------------------------------===// #include "GCNHazardRecognizer.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/Support/TargetParser.h" diff --git a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp --- a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp +++ b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "GCNIterativeScheduler.h" -#include "AMDGPUSubtarget.h" #include "GCNSchedStrategy.h" #include "SIMachineFunctionInfo.h" diff --git a/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp b/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp --- a/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp +++ b/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp @@ -14,7 +14,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" #include "SIMachineFunctionInfo.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervals.h" diff --git a/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp b/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp --- a/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp +++ b/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp @@ -31,7 +31,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" #include "SIMachineFunctionInfo.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h @@ -17,7 +17,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H #define LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" #include "llvm/CodeGen/LiveIntervals.h" #include diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "GCNRegPressure.h" -#include "AMDGPUSubtarget.h" #include "llvm/CodeGen/RegisterPressure.h" using namespace llvm; diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "GCNSchedStrategy.h" -#include "AMDGPUSubtarget.h" #include "SIMachineFunctionInfo.h" #define DEBUG_TYPE "machine-scheduler" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h copy from llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h copy to llvm/lib/Target/AMDGPU/GCNSubtarget.h --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1,4 +1,4 @@ -//=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU ------*- C++ -*-====// +//=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -7,25 +7,19 @@ //==-----------------------------------------------------------------------===// // /// \file -/// AMDGPU specific subclass of TargetSubtarget. +/// AMD GCN specific subclass of TargetSubtarget. // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H -#define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H +#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H +#define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H #include "AMDGPUCallLowering.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" -#include "R600FrameLowering.h" -#include "R600ISelLowering.h" -#include "R600InstrInfo.h" +#include "AMDGPUSubtarget.h" #include "SIFrameLowering.h" #include "SIISelLowering.h" #include "SIInstrInfo.h" -#include "llvm/ADT/Triple.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" -#include "llvm/IR/CallingConv.h" -#include "llvm/Support/Alignment.h" namespace llvm { @@ -36,234 +30,13 @@ #define GET_SUBTARGETINFO_HEADER #include "AMDGPUGenSubtargetInfo.inc" -#define GET_SUBTARGETINFO_HEADER -#include "R600GenSubtargetInfo.inc" namespace llvm { -class Function; -class Instruction; -class MachineFunction; -class StringRef; -class TargetMachine; class GCNTargetMachine; -class AMDGPUSubtarget { -public: - enum Generation { - INVALID = 0, - R600 = 1, - R700 = 2, - EVERGREEN = 3, - NORTHERN_ISLANDS = 4, - SOUTHERN_ISLANDS = 5, - SEA_ISLANDS = 6, - VOLCANIC_ISLANDS = 7, - GFX9 = 8, - GFX10 = 9 - }; - -private: - Triple TargetTriple; - -protected: - bool Has16BitInsts; - bool HasMadMixInsts; - bool HasMadMacF32Insts; - bool HasDsSrc2Insts; - bool HasSDWA; - bool HasVOP3PInsts; - bool HasMulI24; - bool HasMulU24; - bool HasInv2PiInlineImm; - bool HasFminFmaxLegacy; - bool EnablePromoteAlloca; - bool HasTrigReducedRange; - unsigned MaxWavesPerEU; - unsigned LocalMemorySize; - char WavefrontSizeLog2; - -public: - AMDGPUSubtarget(const Triple &TT); - - static const AMDGPUSubtarget &get(const MachineFunction &MF); - static const AMDGPUSubtarget &get(const TargetMachine &TM, - const Function &F); - - /// \returns Default range flat work group size for a calling convention. - std::pair getDefaultFlatWorkGroupSize(CallingConv::ID CC) const; - - /// \returns Subtarget's default pair of minimum/maximum flat work group sizes - /// for function \p F, or minimum/maximum flat work group sizes explicitly - /// requested using "amdgpu-flat-work-group-size" attribute attached to - /// function \p F. - /// - /// \returns Subtarget's default values if explicitly requested values cannot - /// be converted to integer, or violate subtarget's specifications. - std::pair getFlatWorkGroupSizes(const Function &F) const; - - /// \returns Subtarget's default pair of minimum/maximum number of waves per - /// execution unit for function \p F, or minimum/maximum number of waves per - /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute - /// attached to function \p F. - /// - /// \returns Subtarget's default values if explicitly requested values cannot - /// be converted to integer, violate subtarget's specifications, or are not - /// compatible with minimum/maximum number of waves limited by flat work group - /// size, register usage, and/or lds usage. - std::pair getWavesPerEU(const Function &F) const; - - /// Return the amount of LDS that can be used that will not restrict the - /// occupancy lower than WaveCount. - unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, - const Function &) const; - - /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if - /// the given LDS memory size is the only constraint. - unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const; - - unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const; - - bool isAmdHsaOS() const { - return TargetTriple.getOS() == Triple::AMDHSA; - } - - bool isAmdPalOS() const { - return TargetTriple.getOS() == Triple::AMDPAL; - } - - bool isMesa3DOS() const { - return TargetTriple.getOS() == Triple::Mesa3D; - } - - bool isMesaKernel(const Function &F) const; - - bool isAmdHsaOrMesa(const Function &F) const { - return isAmdHsaOS() || isMesaKernel(F); - } - - bool isGCN() const { - return TargetTriple.getArch() == Triple::amdgcn; - } - - bool has16BitInsts() const { - return Has16BitInsts; - } - - bool hasMadMixInsts() const { - return HasMadMixInsts; - } - - bool hasMadMacF32Insts() const { - return HasMadMacF32Insts || !isGCN(); - } - - bool hasDsSrc2Insts() const { - return HasDsSrc2Insts; - } - - bool hasSDWA() const { - return HasSDWA; - } - - bool hasVOP3PInsts() const { - return HasVOP3PInsts; - } - - bool hasMulI24() const { - return HasMulI24; - } - - bool hasMulU24() const { - return HasMulU24; - } - - bool hasInv2PiInlineImm() const { - return HasInv2PiInlineImm; - } - - bool hasFminFmaxLegacy() const { - return HasFminFmaxLegacy; - } - - bool hasTrigReducedRange() const { - return HasTrigReducedRange; - } - - bool isPromoteAllocaEnabled() const { - return EnablePromoteAlloca; - } - - unsigned getWavefrontSize() const { - return 1 << WavefrontSizeLog2; - } - - unsigned getWavefrontSizeLog2() const { - return WavefrontSizeLog2; - } - - unsigned getLocalMemorySize() const { - return LocalMemorySize; - } - - Align getAlignmentForImplicitArgPtr() const { - return isAmdHsaOS() ? Align(8) : Align(4); - } - - /// Returns the offset in bytes from the start of the input buffer - /// of the first explicit kernel argument. - unsigned getExplicitKernelArgOffset(const Function &F) const { - return isAmdHsaOrMesa(F) ? 0 : 36; - } - - /// \returns Maximum number of work groups per compute unit supported by the - /// subtarget and limited by given \p FlatWorkGroupSize. - virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0; - - /// \returns Minimum flat work group size supported by the subtarget. - virtual unsigned getMinFlatWorkGroupSize() const = 0; - - /// \returns Maximum flat work group size supported by the subtarget. - virtual unsigned getMaxFlatWorkGroupSize() const = 0; - - /// \returns Number of waves per execution unit required to support the given - /// \p FlatWorkGroupSize. - virtual unsigned - getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const = 0; - - /// \returns Minimum number of waves per execution unit supported by the - /// subtarget. - virtual unsigned getMinWavesPerEU() const = 0; - - /// \returns Maximum number of waves per execution unit supported by the - /// subtarget without any kind of limitation. - unsigned getMaxWavesPerEU() const { return MaxWavesPerEU; } - - /// Return the maximum workitem ID value in the function, for the given (0, 1, - /// 2) dimension. - unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const; - - /// Creates value range metadata on an workitemid.* intrinsic call or load. - bool makeLIDRangeMetadata(Instruction *I) const; - - /// \returns Number of bytes of arguments that are passed to a shader or - /// kernel in addition to the explicit ones declared for the function. - unsigned getImplicitArgNumBytes(const Function &F) const; - uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const; - unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const; - - /// \returns Corresponsing DWARF register number mapping flavour for the - /// \p WavefrontSize. - AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const { - return getWavefrontSize() == 32 ? AMDGPUDwarfFlavour::Wave32 - : AMDGPUDwarfFlavour::Wave64; - } - - virtual ~AMDGPUSubtarget() {} -}; - -class GCNSubtarget : public AMDGPUGenSubtargetInfo, - public AMDGPUSubtarget { +class GCNSubtarget final : public AMDGPUGenSubtargetInfo, + public AMDGPUSubtarget { using AMDGPUSubtarget::getMaxWavesPerEU; @@ -332,7 +105,6 @@ bool FP64; bool FMA; bool MIMG_R128; - bool IsGCN; bool GCN3Encoding; bool CIInsts; bool GFX8Insts; @@ -386,13 +158,8 @@ bool ScalarFlatScratchInsts; bool AddNoCarryInsts; bool HasUnpackedD16VMem; - bool R600ALUInst; - bool CaymanISA; - bool CFALUBug; bool LDSMisalignedBug; bool HasMFMAInlineLiteralBug; - bool HasVertexCache; - short TexVTXClauseSize; bool UnalignedBufferAccess; bool UnalignedDSAccess; bool ScalarizeGlobal; @@ -1287,142 +1054,6 @@ SDep &Dep) const override; }; -class R600Subtarget final : public R600GenSubtargetInfo, - public AMDGPUSubtarget { -private: - R600InstrInfo InstrInfo; - R600FrameLowering FrameLowering; - bool FMA; - bool CaymanISA; - bool CFALUBug; - bool HasVertexCache; - bool R600ALUInst; - bool FP64; - short TexVTXClauseSize; - Generation Gen; - R600TargetLowering TLInfo; - InstrItineraryData InstrItins; - SelectionDAGTargetInfo TSInfo; - -public: - R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS, - const TargetMachine &TM); - - const R600InstrInfo *getInstrInfo() const override { return &InstrInfo; } - - const R600FrameLowering *getFrameLowering() const override { - return &FrameLowering; - } - - const R600TargetLowering *getTargetLowering() const override { - return &TLInfo; - } - - const R600RegisterInfo *getRegisterInfo() const override { - return &InstrInfo.getRegisterInfo(); - } - - const InstrItineraryData *getInstrItineraryData() const override { - return &InstrItins; - } - - // Nothing implemented, just prevent crashes on use. - const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { - return &TSInfo; - } - - void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); - - Generation getGeneration() const { - return Gen; - } - - Align getStackAlignment() const { return Align(4); } - - R600Subtarget &initializeSubtargetDependencies(const Triple &TT, - StringRef GPU, StringRef FS); - - bool hasBFE() const { - return (getGeneration() >= EVERGREEN); - } - - bool hasBFI() const { - return (getGeneration() >= EVERGREEN); - } - - bool hasBCNT(unsigned Size) const { - if (Size == 32) - return (getGeneration() >= EVERGREEN); - - return false; - } - - bool hasBORROW() const { - return (getGeneration() >= EVERGREEN); - } - - bool hasCARRY() const { - return (getGeneration() >= EVERGREEN); - } - - bool hasCaymanISA() const { - return CaymanISA; - } - - bool hasFFBL() const { - return (getGeneration() >= EVERGREEN); - } - - bool hasFFBH() const { - return (getGeneration() >= EVERGREEN); - } - - bool hasFMA() const { return FMA; } - - bool hasCFAluBug() const { return CFALUBug; } - - bool hasVertexCache() const { return HasVertexCache; } - - short getTexVTXClauseSize() const { return TexVTXClauseSize; } - - bool enableMachineScheduler() const override { - return true; - } - - bool enableSubRegLiveness() const override { - return true; - } - - /// \returns Maximum number of work groups per compute unit supported by the - /// subtarget and limited by given \p FlatWorkGroupSize. - unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override { - return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize); - } - - /// \returns Minimum flat work group size supported by the subtarget. - unsigned getMinFlatWorkGroupSize() const override { - return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this); - } - - /// \returns Maximum flat work group size supported by the subtarget. - unsigned getMaxFlatWorkGroupSize() const override { - return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this); - } - - /// \returns Number of waves per execution unit required to support the given - /// \p FlatWorkGroupSize. - unsigned - getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override { - return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize); - } - - /// \returns Minimum number of waves per execution unit supported by the - /// subtarget. - unsigned getMinWavesPerEU() const override { - return AMDGPU::IsaInfo::getMinWavesPerEU(this); - } -}; - } // end namespace llvm -#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H +#endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h @@ -31,7 +31,7 @@ class Triple; class raw_pwrite_stream; -enum AMDGPUDwarfFlavour { Wave64 = 0, Wave32 = 1 }; +enum AMDGPUDwarfFlavour : unsigned { Wave64 = 0, Wave32 = 1 }; MCRegisterInfo *createGCNMCRegisterInfo(AMDGPUDwarfFlavour DwarfFlavour); diff --git a/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp b/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp --- a/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp @@ -15,9 +15,10 @@ //===----------------------------------------------------------------------===// #include "R600AsmPrinter.h" -#include "AMDGPUSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "R600Defines.h" #include "R600MachineFunctionInfo.h" +#include "R600Subtarget.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" diff --git a/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp b/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp --- a/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp +++ b/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp @@ -13,7 +13,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "R600Subtarget.h" using namespace llvm; diff --git a/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp --- a/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp +++ b/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp @@ -13,8 +13,9 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "R600MachineFunctionInfo.h" +#include "R600Subtarget.h" #include using namespace llvm; diff --git a/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp b/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp --- a/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp +++ b/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp @@ -14,8 +14,9 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "R600Defines.h" +#include "R600Subtarget.h" using namespace llvm; diff --git a/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp b/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp --- a/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp +++ b/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp @@ -14,8 +14,9 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "R600Defines.h" +#include "R600Subtarget.h" using namespace llvm; diff --git a/llvm/lib/Target/AMDGPU/R600FrameLowering.cpp b/llvm/lib/Target/AMDGPU/R600FrameLowering.cpp --- a/llvm/lib/Target/AMDGPU/R600FrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/R600FrameLowering.cpp @@ -7,7 +7,7 @@ //==-----------------------------------------------------------------------===// #include "R600FrameLowering.h" -#include "AMDGPUSubtarget.h" +#include "R600Subtarget.h" using namespace llvm; diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -13,11 +13,11 @@ #include "R600ISelLowering.h" #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "R600Defines.h" #include "R600InstrInfo.h" #include "R600MachineFunctionInfo.h" +#include "R600Subtarget.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsR600.h" diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp @@ -13,8 +13,9 @@ #include "R600InstrInfo.h" #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "R600Defines.h" +#include "R600Subtarget.h" #include "llvm/ADT/SmallSet.h" using namespace llvm; diff --git a/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp b/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp --- a/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp +++ b/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp @@ -12,7 +12,8 @@ //===----------------------------------------------------------------------===// #include "R600MachineScheduler.h" -#include "AMDGPUSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "R600Subtarget.h" using namespace llvm; diff --git a/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp b/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp --- a/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp +++ b/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp @@ -27,8 +27,9 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "R600Defines.h" +#include "R600Subtarget.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineLoopInfo.h" diff --git a/llvm/lib/Target/AMDGPU/R600Packetizer.cpp b/llvm/lib/Target/AMDGPU/R600Packetizer.cpp --- a/llvm/lib/Target/AMDGPU/R600Packetizer.cpp +++ b/llvm/lib/Target/AMDGPU/R600Packetizer.cpp @@ -14,7 +14,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "R600Subtarget.h" #include "llvm/CodeGen/DFAPacketizer.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineLoopInfo.h" diff --git a/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp b/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp @@ -12,8 +12,9 @@ //===----------------------------------------------------------------------===// #include "R600RegisterInfo.h" -#include "AMDGPUSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "R600Defines.h" +#include "R600Subtarget.h" using namespace llvm; diff --git a/llvm/lib/Target/AMDGPU/R600Subtarget.h b/llvm/lib/Target/AMDGPU/R600Subtarget.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/R600Subtarget.h @@ -0,0 +1,174 @@ +//=====-- R600Subtarget.h - Define Subtarget for AMDGPU R600 ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//==-----------------------------------------------------------------------===// +// +/// \file +/// AMDGPU R600 specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_R600SUBTARGET_H +#define LLVM_LIB_TARGET_AMDGPU_R600SUBTARGET_H + +#include "AMDGPUSubtarget.h" +#include "R600FrameLowering.h" +#include "R600ISelLowering.h" +#include "R600InstrInfo.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" + +namespace llvm { + +class MCInst; +class MCInstrInfo; + +} // namespace llvm + +#define GET_SUBTARGETINFO_HEADER +#include "R600GenSubtargetInfo.inc" + +namespace llvm { + +class R600Subtarget final : public R600GenSubtargetInfo, + public AMDGPUSubtarget { +private: + R600InstrInfo InstrInfo; + R600FrameLowering FrameLowering; + bool FMA; + bool CaymanISA; + bool CFALUBug; + bool HasVertexCache; + bool R600ALUInst; + bool FP64; + short TexVTXClauseSize; + Generation Gen; + R600TargetLowering TLInfo; + InstrItineraryData InstrItins; + SelectionDAGTargetInfo TSInfo; + +public: + R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS, + const TargetMachine &TM); + + const R600InstrInfo *getInstrInfo() const override { return &InstrInfo; } + + const R600FrameLowering *getFrameLowering() const override { + return &FrameLowering; + } + + const R600TargetLowering *getTargetLowering() const override { + return &TLInfo; + } + + const R600RegisterInfo *getRegisterInfo() const override { + return &InstrInfo.getRegisterInfo(); + } + + const InstrItineraryData *getInstrItineraryData() const override { + return &InstrItins; + } + + // Nothing implemented, just prevent crashes on use. + const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { + return &TSInfo; + } + + void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); + + Generation getGeneration() const { + return Gen; + } + + Align getStackAlignment() const { return Align(4); } + + R600Subtarget &initializeSubtargetDependencies(const Triple &TT, + StringRef GPU, StringRef FS); + + bool hasBFE() const { + return (getGeneration() >= EVERGREEN); + } + + bool hasBFI() const { + return (getGeneration() >= EVERGREEN); + } + + bool hasBCNT(unsigned Size) const { + if (Size == 32) + return (getGeneration() >= EVERGREEN); + + return false; + } + + bool hasBORROW() const { + return (getGeneration() >= EVERGREEN); + } + + bool hasCARRY() const { + return (getGeneration() >= EVERGREEN); + } + + bool hasCaymanISA() const { + return CaymanISA; + } + + bool hasFFBL() const { + return (getGeneration() >= EVERGREEN); + } + + bool hasFFBH() const { + return (getGeneration() >= EVERGREEN); + } + + bool hasFMA() const { return FMA; } + + bool hasCFAluBug() const { return CFALUBug; } + + bool hasVertexCache() const { return HasVertexCache; } + + short getTexVTXClauseSize() const { return TexVTXClauseSize; } + + bool enableMachineScheduler() const override { + return true; + } + + bool enableSubRegLiveness() const override { + return true; + } + + /// \returns Maximum number of work groups per compute unit supported by the + /// subtarget and limited by given \p FlatWorkGroupSize. + unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override { + return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize); + } + + /// \returns Minimum flat work group size supported by the subtarget. + unsigned getMinFlatWorkGroupSize() const override { + return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this); + } + + /// \returns Maximum flat work group size supported by the subtarget. + unsigned getMaxFlatWorkGroupSize() const override { + return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this); + } + + /// \returns Number of waves per execution unit required to support the given + /// \p FlatWorkGroupSize. + unsigned + getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override { + return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize); + } + + /// \returns Minimum number of waves per execution unit supported by the + /// subtarget. + unsigned getMinWavesPerEU() const override { + return AMDGPU::IsaInfo::getMinWavesPerEU(this); + } +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_R600SUBTARGET_H diff --git a/llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp b/llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp --- a/llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp +++ b/llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp @@ -16,7 +16,8 @@ // #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/CodeGen/MachineFunctionPass.h" #define DEBUG_TYPE "si-img-init" diff --git a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp --- a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" #include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp --- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -65,7 +65,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/InitializePasses.h" #include "llvm/Target/TargetMachine.h" diff --git a/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp --- a/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp @@ -12,7 +12,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/CodeGen/MachineFunctionPass.h" using namespace llvm; diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -9,7 +9,8 @@ // #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp --- a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp +++ b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp @@ -14,7 +14,6 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" #include "GCNRegPressure.h" #include "SIMachineFunctionInfo.h" #include "llvm/InitializePasses.h" diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -8,7 +8,8 @@ #include "SIFrameLowering.h" #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFrameInfo.h" diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -14,7 +14,6 @@ #include "SIISelLowering.h" #include "AMDGPU.h" #include "AMDGPUInstrInfo.h" -#include "AMDGPUSubtarget.h" #include "AMDGPUTargetMachine.h" #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" diff --git a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp --- a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp @@ -32,7 +32,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/SmallVector.h" using namespace llvm; diff --git a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp --- a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp @@ -14,7 +14,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/InitializePasses.h" diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -24,7 +24,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PostOrderIterator.h" diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -14,8 +14,9 @@ #include "SIInstrInfo.h" #include "AMDGPU.h" #include "AMDGPUInstrInfo.h" -#include "AMDGPUSubtarget.h" #include "GCNHazardRecognizer.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/LiveVariables.h" diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -58,7 +58,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/InitializePasses.h" diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -48,7 +48,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp --- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp @@ -22,7 +22,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachinePostDominators.h" diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp --- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp @@ -16,7 +16,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/RegisterScavenging.h" diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -8,7 +8,6 @@ #include "SIMachineFunctionInfo.h" #include "AMDGPUTargetMachine.h" -#include "AMDGPUSubtarget.h" #define MAX_LANES 64 diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -15,7 +15,8 @@ #include "AMDGPU.h" #include "AMDGPUMachineModuleInfo.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/BitmaskEnum.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/IR/DiagnosticInfo.h" diff --git a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp --- a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp +++ b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp @@ -14,7 +14,8 @@ //===----------------------------------------------------------------------===// // #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/Statistic.h" #include diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp @@ -7,7 +7,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/InitializePasses.h" diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp @@ -13,7 +13,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/InitializePasses.h" diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp --- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -20,7 +20,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp b/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp --- a/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp +++ b/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp @@ -13,7 +13,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp --- a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp +++ b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp @@ -12,7 +12,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/CodeGen/LiveIntervals.h" diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp --- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp +++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp @@ -12,7 +12,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -14,12 +14,14 @@ #include "SIRegisterInfo.h" #include "AMDGPU.h" #include "AMDGPURegisterBankInfo.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUInstPrinter.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/RegisterScavenging.h" + using namespace llvm; #define GET_REGINFO_TARGET_DESC diff --git a/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp b/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp --- a/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp +++ b/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp @@ -14,7 +14,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/Support/CommandLine.h" diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp --- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -9,7 +9,8 @@ // #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp --- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -56,7 +56,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/CodeGen/LiveIntervals.h" diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -9,8 +9,9 @@ #include "AMDGPUBaseInfo.h" #include "AMDGPU.h" #include "AMDGPUAsmUtils.h" -#include "AMDGPUSubtarget.h" #include "AMDKernelCodeT.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" diff --git a/llvm/unittests/Target/AMDGPU/ExecMayBeModifiedBeforeAnyUse.cpp b/llvm/unittests/Target/AMDGPU/ExecMayBeModifiedBeforeAnyUse.cpp --- a/llvm/unittests/Target/AMDGPU/ExecMayBeModifiedBeforeAnyUse.cpp +++ b/llvm/unittests/Target/AMDGPU/ExecMayBeModifiedBeforeAnyUse.cpp @@ -6,8 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "AMDGPUSubtarget.h" #include "AMDGPUTargetMachine.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/MC/MCTargetOptions.h"