diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -15,17 +15,7 @@ namespace llvm { -class FunctionPass; -class GCNTargetMachine; -class ImmutablePass; -class MachineFunctionPass; -class ModulePass; -class Pass; -class Target; class TargetMachine; -class TargetOptions; -class PassRegistry; -class Module; // GlobalISel passes void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &); @@ -35,16 +25,6 @@ FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone); void initializeAMDGPURegBankCombinerPass(PassRegistry &); -// R600 Passes -FunctionPass *createR600VectorRegMerger(); -FunctionPass *createR600ExpandSpecialInstrsPass(); -FunctionPass *createR600EmitClauseMarkers(); -FunctionPass *createR600ClauseMergePass(); -FunctionPass *createR600Packetizer(); -FunctionPass *createR600ControlFlowFinalizer(); -FunctionPass *createAMDGPUCFGStructurizerPass(); -FunctionPass *createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel); - // SI Passes FunctionPass *createGCNDPPCombinePass(); FunctionPass *createSIAnnotateControlFlowPass(); @@ -176,21 +156,6 @@ void initializeGCNDPPCombinePass(PassRegistry &); extern char &GCNDPPCombineID; -void initializeR600ClauseMergePassPass(PassRegistry &); -extern char &R600ClauseMergePassID; - -void initializeR600ControlFlowFinalizerPass(PassRegistry &); -extern char &R600ControlFlowFinalizerID; - -void initializeR600ExpandSpecialInstrsPassPass(PassRegistry &); -extern char &R600ExpandSpecialInstrsPassID; - -void initializeR600VectorRegMergerPass(PassRegistry &); -extern char &R600VectorRegMergerID; - -void initializeR600PacketizerPass(PassRegistry &); -extern char &R600PacketizerID; - void initializeSIFoldOperandsPass(PassRegistry &); extern char &SIFoldOperandsID; @@ -282,7 +247,6 @@ bool GlobalOpt; }; -ModulePass *createR600OpenCLImageTypeLoweringPass(); FunctionPass *createAMDGPUAnnotateUniformValues(); ModulePass *createAMDGPUPrintfRuntimeBinding(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h --- a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h @@ -18,7 +18,6 @@ namespace llvm { class DataLayout; -class MDNode; class MemoryLocation; /// A simple AA result that uses TBAA metadata to answer queries. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -26,7 +26,6 @@ class AMDGPUTargetStreamer; class MCCodeEmitter; class MCOperand; -class GCNSubtarget; namespace AMDGPU { namespace HSAMD { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h @@ -15,7 +15,6 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUHSAMETADATASTREAMER_H #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUHSAMETADATASTREAMER_H -#include "Utils/AMDGPUBaseInfo.h" #include "llvm/BinaryFormat/MsgPackDocument.h" #include "llvm/Support/AMDGPUMetadata.h" #include "llvm/Support/Alignment.h" @@ -33,6 +32,11 @@ class Type; namespace AMDGPU { + +namespace IsaInfo { +class AMDGPUTargetID; +} + namespace HSAMD { class MetadataStreamer { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -13,6 +13,9 @@ #include "AMDGPU.h" #include "AMDGPUTargetMachine.h" +#include "MCTargetDesc/R600MCTargetDesc.h" +#include "R600.h" +#include "R600Subtarget.h" #include "SIMachineFunctionInfo.h" #include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/Analysis/ValueTracking.h" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -17,7 +17,6 @@ #include "AMDGPUInstrInfo.h" #include "AMDGPUTargetTransformInfo.h" #include "GCNSubtarget.h" -#include "R600Subtarget.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/Transforms/InstCombine/InstCombiner.h" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -103,9 +103,6 @@ // This argument to this node is a dword address. def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>; -// Force dependencies for vector trunc stores -def R600dummy_chain : SDNode<"AMDGPUISD::DUMMY_CHAIN", SDTNone, [SDNPHasChain]>; - def AMDGPUcos_impl : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>; def AMDGPUsin_impl : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>; // out = a - floor(a) @@ -329,11 +326,6 @@ ]>; -def R600ExportOp : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>; - -def R600_EXPORT: SDNode<"AMDGPUISD::R600_EXPORT", R600ExportOp, - [SDNPHasChain, SDNPSideEffect]>; - //===----------------------------------------------------------------------===// // Flow Control Profile Types //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -14,10 +14,7 @@ #define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" -#include "llvm/CodeGen/Register.h" #include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IntrinsicsAMDGPU.h" namespace { #define GET_GLOBALISEL_PREDICATE_BITSET diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -24,6 +24,7 @@ #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" #define DEBUG_TYPE "amdgpu-isel" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -17,6 +17,7 @@ #include "MCTargetDesc/AMDGPUInstPrinter.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "R600AsmPrinter.h" +#include "R600Subtarget.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/IR/Constants.h" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h --- a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h @@ -16,18 +16,12 @@ #ifndef LLVM_LIB_TARGET_AMDGPUMIRFORMATTER_H #define LLVM_LIB_TARGET_AMDGPUMIRFORMATTER_H -#include "llvm/ADT/Optional.h" #include "llvm/CodeGen/MIRFormatter.h" -#include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/Support/raw_ostream.h" -#include namespace llvm { class MachineFunction; -class MachineInstr; struct PerFunctionMIParsingState; -struct SlotMapping; class AMDGPUMIRFormatter final : public MIRFormatter { public: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.h b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.h --- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.h @@ -17,7 +17,6 @@ #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/IR/ValueMap.h" namespace llvm { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -12,12 +12,12 @@ //===----------------------------------------------------------------------===// #include "AMDGPUSubtarget.h" -#include "AMDGPU.h" #include "AMDGPUCallLowering.h" #include "AMDGPUInstructionSelector.h" #include "AMDGPULegalizerInfo.h" #include "AMDGPURegisterBankInfo.h" #include "AMDGPUTargetMachine.h" +#include "R600Subtarget.h" #include "SIMachineFunctionInfo.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/SmallString.h" @@ -38,10 +38,7 @@ #define GET_SUBTARGETINFO_CTOR #define AMDGPUSubtarget GCNSubtarget #include "AMDGPUGenSubtargetInfo.inc" -#define GET_SUBTARGETINFO_TARGET_DESC -#define GET_SUBTARGETINFO_CTOR #undef AMDGPUSubtarget -#include "R600GenSubtargetInfo.inc" static cl::opt DisablePowerSched( "amdgpu-disable-power-sched", @@ -64,19 +61,6 @@ GCNSubtarget::~GCNSubtarget() = default; -R600Subtarget & -R600Subtarget::initializeSubtargetDependencies(const Triple &TT, - StringRef GPU, StringRef FS) { - SmallString<256> FullFS("+promote-alloca,"); - FullFS += FS; - ParseSubtargetFeatures(GPU, /*TuneCPU*/ GPU, FullFS); - - HasMulU24 = getGeneration() >= EVERGREEN; - HasMulI24 = hasCaymanISA(); - - return *this; -} - GCNSubtarget & GCNSubtarget::initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS) { @@ -721,23 +705,6 @@ : AMDGPUDwarfFlavour::Wave64; } -R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS, - const TargetMachine &TM) : - R600GenSubtargetInfo(TT, GPU, /*TuneCPU*/GPU, FS), - AMDGPUSubtarget(TT), - InstrInfo(*this), - FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), - FMA(false), - CaymanISA(false), - CFALUBug(false), - HasVertexCache(false), - R600ALUInst(false), - FP64(false), - TexVTXClauseSize(0), - Gen(R600), - TLInfo(TM, initializeSubtargetDependencies(TT, GPU, FS)), - InstrItins(getInstrItineraryForCPU(GPU)) { } - void GCNSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const { // Track register pressure so the scheduler can try to decrease diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -15,11 +15,13 @@ #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETMACHINE_H #include "GCNSubtarget.h" -#include "R600Subtarget.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Target/TargetMachine.h" namespace llvm { +class ScheduleDAGMILive; + //===----------------------------------------------------------------------===// // AMDGPU Target Machine (R600+) //===----------------------------------------------------------------------===// @@ -63,31 +65,6 @@ unsigned getAssumedAddrSpace(const Value *V) const override; }; -//===----------------------------------------------------------------------===// -// R600 Target Machine (R600 -> Cayman) -//===----------------------------------------------------------------------===// - -class R600TargetMachine final : public AMDGPUTargetMachine { -private: - mutable StringMap> SubtargetMap; - -public: - R600TargetMachine(const Target &T, const Triple &TT, StringRef CPU, - StringRef FS, TargetOptions Options, - Optional RM, Optional CM, - CodeGenOpt::Level OL, bool JIT); - - TargetPassConfig *createPassConfig(PassManagerBase &PM) override; - - const R600Subtarget *getSubtargetImpl(const Function &) const override; - - TargetTransformInfo getTargetTransformInfo(const Function &F) override; - - bool isMachineVerifierClean() const override { - return false; - } -}; - //===----------------------------------------------------------------------===// // GCN Target Machine (SI+) //===----------------------------------------------------------------------===// @@ -104,7 +81,7 @@ TargetPassConfig *createPassConfig(PassManagerBase &PM) override; - const GCNSubtarget *getSubtargetImpl(const Function &) const override; + const TargetSubtargetInfo *getSubtargetImpl(const Function &) const override; TargetTransformInfo getTargetTransformInfo(const Function &F) override; @@ -121,6 +98,45 @@ SMRange &SourceRange) const override; }; +//===----------------------------------------------------------------------===// +// AMDGPU Pass Setup +//===----------------------------------------------------------------------===// + +class AMDGPUPassConfig : public TargetPassConfig { +public: + AMDGPUPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM); + + AMDGPUTargetMachine &getAMDGPUTargetMachine() const { + return getTM(); + } + + ScheduleDAGInstrs * + createMachineScheduler(MachineSchedContext *C) const override; + + void addEarlyCSEOrGVNPass(); + void addStraightLineScalarOptimizationPasses(); + void addIRPasses() override; + void addCodeGenPrepare() override; + bool addPreISel() override; + bool addInstSelector() override; + bool addGCPasses() override; + + std::unique_ptr getCSEConfig() const override; + + /// Check if a pass is enabled given \p Opt option. The option always + /// overrides defaults if explicitely used. Otherwise its default will + /// be used given that a pass shall work at an optimization \p Level + /// minimum. + bool isPassEnabled(const cl::opt &Opt, + CodeGenOpt::Level Level = CodeGenOpt::Default) const { + if (Opt.getNumOccurrences()) + return Opt; + if (TM->getOptLevel() < Level) + return false; + return Opt; + } +}; + } // end namespace llvm #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETMACHINE_H diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -8,7 +8,7 @@ // /// \file /// The AMDGPU target machine contains all of the hardware specific -/// information needed to emit code for R600 and SI GPUs. +/// information needed to emit code for SI+ GPUs. // //===----------------------------------------------------------------------===// @@ -21,7 +21,8 @@ #include "AMDGPUTargetTransformInfo.h" #include "GCNIterativeScheduler.h" #include "GCNSchedStrategy.h" -#include "R600MachineScheduler.h" +#include "R600.h" +#include "R600TargetMachine.h" #include "SIMachineFunctionInfo.h" #include "SIMachineScheduler.h" #include "TargetInfo/AMDGPUTargetInfo.h" @@ -162,12 +163,6 @@ "fast", "fast register allocator", createFastVGPRRegisterAllocator); } - -static cl::opt EnableR600StructurizeCFG( - "r600-ir-structurize", - cl::desc("Use StructurizeCFG IR pass"), - cl::init(true)); - static cl::opt EnableSROA( "amdgpu-sroa", cl::desc("Run SROA after promote alloca pass"), @@ -184,12 +179,6 @@ cl::desc("Run pre-RA exec mask optimizations"), cl::init(true)); -static cl::opt EnableR600IfConvert( - "r600-if-convert", - cl::desc("Use if conversion pass"), - cl::ReallyHidden, - cl::init(true)); - // Option to disable vectorizer for tests. static cl::opt EnableLoadStoreVectorizer( "amdgpu-load-store-vectorizer", @@ -240,13 +229,6 @@ cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), cl::Hidden); -static cl::opt EnableAMDGPUFunctionCallsOpt( - "amdgpu-function-calls", - cl::desc("Enable AMDGPU function call support"), - cl::location(AMDGPUTargetMachine::EnableFunctionCalls), - cl::init(true), - cl::Hidden); - static cl::opt EnableAMDGPUFixedFunctionABIOpt( "amdgpu-fixed-function-abi", cl::desc("Enable all implicit function arguments"), @@ -401,10 +383,6 @@ return std::make_unique(); } -static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) { - return new ScheduleDAGMILive(C, std::make_unique()); -} - static ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C) { return new SIScheduleDAGMI(C); } @@ -441,10 +419,6 @@ return DAG; } -static MachineSchedRegistry -R600SchedRegistry("r600", "Run R600's custom scheduler", - createR600MachineScheduler); - static MachineSchedRegistry SISchedRegistry("si", "Run SI's custom scheduler", createSIMachineScheduler); @@ -745,45 +719,6 @@ }); } -//===----------------------------------------------------------------------===// -// R600 Target Machine (R600 -> Cayman) -//===----------------------------------------------------------------------===// - -R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT, - StringRef CPU, StringRef FS, - TargetOptions Options, - Optional RM, - Optional CM, - CodeGenOpt::Level OL, bool JIT) - : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) { - setRequiresStructuredCFG(true); - - // Override the default since calls aren't supported for r600. - if (EnableFunctionCalls && - EnableAMDGPUFunctionCallsOpt.getNumOccurrences() == 0) - EnableFunctionCalls = false; -} - -const R600Subtarget *R600TargetMachine::getSubtargetImpl( - const Function &F) const { - StringRef GPU = getGPUName(F); - StringRef FS = getFeatureString(F); - - SmallString<128> SubtargetKey(GPU); - SubtargetKey.append(FS); - - auto &I = SubtargetMap[SubtargetKey]; - if (!I) { - // This needs to be done before we create a new subtarget since any - // creation will depend on the TM and the code generation flags on the - // function that reside in TargetOptions. - resetTargetOptions(F); - I = std::make_unique(TargetTriple, GPU, FS, *this); - } - - return I.get(); -} - int64_t AMDGPUTargetMachine::getNullPointerValue(unsigned AddrSpace) { return (AddrSpace == AMDGPUAS::LOCAL_ADDRESS || AddrSpace == AMDGPUAS::PRIVATE_ADDRESS || @@ -817,11 +752,6 @@ return AMDGPUAS::GLOBAL_ADDRESS; } -TargetTransformInfo -R600TargetMachine::getTargetTransformInfo(const Function &F) { - return TargetTransformInfo(R600TTIImpl(this, F)); -} - //===----------------------------------------------------------------------===// // GCN Target Machine (SI+) //===----------------------------------------------------------------------===// @@ -834,7 +764,8 @@ CodeGenOpt::Level OL, bool JIT) : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {} -const GCNSubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const { +const TargetSubtargetInfo * +GCNTargetMachine::getSubtargetImpl(const Function &F) const { StringRef GPU = getGPUName(F); StringRef FS = getFeatureString(F); @@ -864,76 +795,11 @@ // AMDGPU Pass Setup //===----------------------------------------------------------------------===// -namespace { - -class AMDGPUPassConfig : public TargetPassConfig { -public: - AMDGPUPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM) - : TargetPassConfig(TM, PM) { - // Exceptions and StackMaps are not supported, so these passes will never do - // anything. - disablePass(&StackMapLivenessID); - disablePass(&FuncletLayoutID); - // Garbage collection is not supported. - disablePass(&GCLoweringID); - disablePass(&ShadowStackGCLoweringID); - } - - AMDGPUTargetMachine &getAMDGPUTargetMachine() const { - return getTM(); - } - - ScheduleDAGInstrs * - createMachineScheduler(MachineSchedContext *C) const override { - ScheduleDAGMILive *DAG = createGenericSchedLive(C); - DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); - return DAG; - } - - void addEarlyCSEOrGVNPass(); - void addStraightLineScalarOptimizationPasses(); - void addIRPasses() override; - void addCodeGenPrepare() override; - bool addPreISel() override; - bool addInstSelector() override; - bool addGCPasses() override; - - std::unique_ptr getCSEConfig() const override; - - /// Check if a pass is enabled given \p Opt option. The option always - /// overrides defaults if explicitely used. Otherwise its default will - /// be used given that a pass shall work at an optimization \p Level - /// minimum. - bool isPassEnabled(const cl::opt &Opt, - CodeGenOpt::Level Level = CodeGenOpt::Default) const { - if (Opt.getNumOccurrences()) - return Opt; - if (TM->getOptLevel() < Level) - return false; - return Opt; - } -}; - -std::unique_ptr AMDGPUPassConfig::getCSEConfig() const { +std::unique_ptr llvm::AMDGPUPassConfig::getCSEConfig() const { return getStandardCSEConfigForOpt(TM->getOptLevel()); } -class R600PassConfig final : public AMDGPUPassConfig { -public: - R600PassConfig(LLVMTargetMachine &TM, PassManagerBase &PM) - : AMDGPUPassConfig(TM, PM) {} - - ScheduleDAGInstrs *createMachineScheduler( - MachineSchedContext *C) const override { - return createR600MachineScheduler(C); - } - - bool addPreISel() override; - bool addInstSelector() override; - void addPreRegAlloc() override; - void addPreSched2() override; - void addPreEmitPass() override; -}; +namespace { class GCNPassConfig final : public AMDGPUPassConfig { public: @@ -982,6 +848,17 @@ } // end anonymous namespace +AMDGPUPassConfig::AMDGPUPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) { + // Exceptions and StackMaps are not supported, so these passes will never do + // anything. + disablePass(&StackMapLivenessID); + disablePass(&FuncletLayoutID); + // Garbage collection is not supported. + disablePass(&GCLoweringID); + disablePass(&ShadowStackGCLoweringID); +} + void AMDGPUPassConfig::addEarlyCSEOrGVNPass() { if (getOptLevel() == CodeGenOpt::Aggressive) addPass(createGVNPass()); @@ -1137,44 +1014,11 @@ return false; } -//===----------------------------------------------------------------------===// -// R600 Pass Setup -//===----------------------------------------------------------------------===// - -bool R600PassConfig::addPreISel() { - AMDGPUPassConfig::addPreISel(); - - if (EnableR600StructurizeCFG) - addPass(createStructurizeCFGPass()); - return false; -} - -bool R600PassConfig::addInstSelector() { - addPass(createR600ISelDag(&getAMDGPUTargetMachine(), getOptLevel())); - return false; -} - -void R600PassConfig::addPreRegAlloc() { - addPass(createR600VectorRegMerger()); -} - -void R600PassConfig::addPreSched2() { - addPass(createR600EmitClauseMarkers(), false); - if (EnableR600IfConvert) - addPass(&IfConverterID, false); - addPass(createR600ClauseMergePass(), false); -} - -void R600PassConfig::addPreEmitPass() { - addPass(createAMDGPUCFGStructurizerPass(), false); - addPass(createR600ExpandSpecialInstrsPass(), false); - addPass(&FinalizeMachineBundlesID, false); - addPass(createR600Packetizer(), false); - addPass(createR600ControlFlowFinalizer(), false); -} - -TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) { - return new R600PassConfig(*this, PM); +llvm::ScheduleDAGInstrs * +AMDGPUPassConfig::createMachineScheduler(MachineSchedContext *C) const { + ScheduleDAGMILive *DAG = createGenericSchedLive(C); + DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); + return DAG; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -18,18 +18,14 @@ #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H #include "AMDGPU.h" -#include "AMDGPUSubtarget.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/CodeGen/BasicTTIImpl.h" namespace llvm { -class AMDGPUTargetLowering; class AMDGPUTargetMachine; class GCNSubtarget; class InstCombiner; class Loop; -class R600Subtarget; class ScalarEvolution; class SITargetLowering; class Type; @@ -224,45 +220,6 @@ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput); }; -class R600TTIImpl final : public BasicTTIImplBase { - using BaseT = BasicTTIImplBase; - using TTI = TargetTransformInfo; - - friend BaseT; - - const R600Subtarget *ST; - const AMDGPUTargetLowering *TLI; - AMDGPUTTIImpl CommonTTI; - -public: - explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F); - - const R600Subtarget *getST() const { return ST; } - const AMDGPUTargetLowering *getTLI() const { return TLI; } - - void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, - TTI::UnrollingPreferences &UP, - OptimizationRemarkEmitter *ORE); - void getPeelingPreferences(Loop *L, ScalarEvolution &SE, - TTI::PeelingPreferences &PP); - unsigned getHardwareNumberOfRegisters(bool Vec) const; - unsigned getNumberOfRegisters(bool Vec) const; - TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const; - unsigned getMinVectorRegisterBitWidth() const; - unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const; - bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment, - unsigned AddrSpace) const; - bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, - unsigned AddrSpace) const; - bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, - unsigned AddrSpace) const; - unsigned getMaxInterleaveFactor(unsigned VF); - InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, - const Instruction *I = nullptr); - InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy, - unsigned Index); -}; - } // end namespace llvm #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -16,10 +16,11 @@ #include "AMDGPUTargetTransformInfo.h" #include "AMDGPUTargetMachine.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/KnownBits.h" @@ -1241,123 +1242,3 @@ : ST->hasHalfRate64Ops() ? getHalfRateInstrCost(CostKind) : getQuarterRateInstrCost(CostKind); } - -R600TTIImpl::R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F) - : BaseT(TM, F.getParent()->getDataLayout()), - ST(static_cast(TM->getSubtargetImpl(F))), - TLI(ST->getTargetLowering()), CommonTTI(TM, F) {} - -unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const { - return 4 * 128; // XXX - 4 channels. Should these count as vector instead? -} - -unsigned R600TTIImpl::getNumberOfRegisters(bool Vec) const { - return getHardwareNumberOfRegisters(Vec); -} - -TypeSize -R600TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { - return TypeSize::getFixed(32); -} - -unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const { - return 32; -} - -unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { - if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS || - AddrSpace == AMDGPUAS::CONSTANT_ADDRESS) - return 128; - if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS || - AddrSpace == AMDGPUAS::REGION_ADDRESS) - return 64; - if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) - return 32; - - if ((AddrSpace == AMDGPUAS::PARAM_D_ADDRESS || - AddrSpace == AMDGPUAS::PARAM_I_ADDRESS || - (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 && - AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15))) - return 128; - llvm_unreachable("unhandled address space"); -} - -bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, - Align Alignment, - unsigned AddrSpace) const { - // We allow vectorization of flat stores, even though we may need to decompose - // them later if they may access private memory. We don't have enough context - // here, and legalization can handle it. - return (AddrSpace != AMDGPUAS::PRIVATE_ADDRESS); -} - -bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, - Align Alignment, - unsigned AddrSpace) const { - return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace); -} - -bool R600TTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, - Align Alignment, - unsigned AddrSpace) const { - return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace); -} - -unsigned R600TTIImpl::getMaxInterleaveFactor(unsigned VF) { - // Disable unrolling if the loop is not vectorized. - // TODO: Enable this again. - if (VF == 1) - return 1; - - return 8; -} - -InstructionCost R600TTIImpl::getCFInstrCost(unsigned Opcode, - TTI::TargetCostKind CostKind, - const Instruction *I) { - if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency) - return Opcode == Instruction::PHI ? 0 : 1; - - // XXX - For some reason this isn't called for switch. - switch (Opcode) { - case Instruction::Br: - case Instruction::Ret: - return 10; - default: - return BaseT::getCFInstrCost(Opcode, CostKind, I); - } -} - -InstructionCost R600TTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, - unsigned Index) { - switch (Opcode) { - case Instruction::ExtractElement: - case Instruction::InsertElement: { - unsigned EltSize - = DL.getTypeSizeInBits(cast(ValTy)->getElementType()); - if (EltSize < 32) { - return BaseT::getVectorInstrCost(Opcode, ValTy, Index); - } - - // Extracts are just reads of a subregister, so are free. Inserts are - // considered free because we don't want to have any cost for scalarizing - // operations, and we don't have to copy into a different register class. - - // Dynamic indexing isn't free and is best avoided. - return Index == ~0u ? 2 : 0; - } - default: - return BaseT::getVectorInstrCost(Opcode, ValTy, Index); - } -} - -void R600TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, - TTI::UnrollingPreferences &UP, - OptimizationRemarkEmitter *ORE) { - CommonTTI.getUnrollingPreferences(L, SE, UP, ORE); -} - -void R600TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, - TTI::PeelingPreferences &PP) { - CommonTTI.getPeelingPreferences(L, SE, PP); -} diff --git a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp --- a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp @@ -7,7 +7,8 @@ //==-----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "MCTargetDesc/R600MCTargetDesc.h" +#include "R600.h" #include "R600RegisterInfo.h" #include "R600Subtarget.h" #include "llvm/ADT/SCCIterator.h" diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -114,6 +114,9 @@ R600OptimizeVectorRegisters.cpp R600Packetizer.cpp R600RegisterInfo.cpp + R600Subtarget.cpp + R600TargetMachine.cpp + R600TargetTransformInfo.cpp SIAnnotateControlFlow.cpp SIFixSGPRCopies.cpp SIFixVGPRCopies.cpp diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -21,13 +21,6 @@ #include "SIInstrInfo.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" -namespace llvm { - -class MCInst; -class MCInstrInfo; - -} // namespace llvm - #define GET_SUBTARGETINFO_HEADER #include "AMDGPUGenSubtargetInfo.inc" diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h @@ -19,10 +19,9 @@ class MCAsmBackend; class MCCodeEmitter; class MCContext; -class MCSubtargetInfo; class MCELFStreamer; -class Triple; class MCObjectWriter; +class Triple; MCELFStreamer *createAMDGPUELFStreamer(const Triple &T, MCContext &Context, std::unique_ptr MAB, diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h @@ -240,36 +240,6 @@ raw_ostream &O); }; -class R600InstPrinter : public MCInstPrinter { -public: - R600InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI) - : MCInstPrinter(MAI, MII, MRI) {} - - void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, - const MCSubtargetInfo &STI, raw_ostream &O) override; - std::pair getMnemonic(const MCInst *MI) override; - void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O); - static const char *getRegisterName(unsigned RegNo); - - void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printBankSwizzle(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printClamp(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printCT(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printKCache(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printLast(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printLiteral(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printNeg(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printOMOD(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printRel(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printRSel(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printUpdateExecMask(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printUpdatePred(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printWrite(const MCInst *MI, unsigned OpNo, raw_ostream &O); -}; - } // End namespace llvm #endif diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -1451,208 +1451,3 @@ } #include "AMDGPUGenAsmWriter.inc" - -void R600InstPrinter::printInst(const MCInst *MI, uint64_t Address, - StringRef Annot, const MCSubtargetInfo &STI, - raw_ostream &O) { - O.flush(); - printInstruction(MI, Address, O); - printAnnotation(O, Annot); -} - -void R600InstPrinter::printAbs(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - AMDGPUInstPrinter::printIfSet(MI, OpNo, O, '|'); -} - -void R600InstPrinter::printBankSwizzle(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - int BankSwizzle = MI->getOperand(OpNo).getImm(); - switch (BankSwizzle) { - case 1: - O << "BS:VEC_021/SCL_122"; - break; - case 2: - O << "BS:VEC_120/SCL_212"; - break; - case 3: - O << "BS:VEC_102/SCL_221"; - break; - case 4: - O << "BS:VEC_201"; - break; - case 5: - O << "BS:VEC_210"; - break; - default: - break; - } -} - -void R600InstPrinter::printClamp(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - AMDGPUInstPrinter::printIfSet(MI, OpNo, O, "_SAT"); -} - -void R600InstPrinter::printCT(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - unsigned CT = MI->getOperand(OpNo).getImm(); - switch (CT) { - case 0: - O << 'U'; - break; - case 1: - O << 'N'; - break; - default: - break; - } -} - -void R600InstPrinter::printKCache(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - int KCacheMode = MI->getOperand(OpNo).getImm(); - if (KCacheMode > 0) { - int KCacheBank = MI->getOperand(OpNo - 2).getImm(); - O << "CB" << KCacheBank << ':'; - int KCacheAddr = MI->getOperand(OpNo + 2).getImm(); - int LineSize = (KCacheMode == 1) ? 16 : 32; - O << KCacheAddr * 16 << '-' << KCacheAddr * 16 + LineSize; - } -} - -void R600InstPrinter::printLast(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - AMDGPUInstPrinter::printIfSet(MI, OpNo, O, "*", " "); -} - -void R600InstPrinter::printLiteral(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNo); - assert(Op.isImm() || Op.isExpr()); - if (Op.isImm()) { - int64_t Imm = Op.getImm(); - O << Imm << '(' << BitsToFloat(Imm) << ')'; - } - if (Op.isExpr()) { - Op.getExpr()->print(O << '@', &MAI); - } -} - -void R600InstPrinter::printNeg(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - AMDGPUInstPrinter::printIfSet(MI, OpNo, O, '-'); -} - -void R600InstPrinter::printOMOD(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - switch (MI->getOperand(OpNo).getImm()) { - default: break; - case 1: - O << " * 2.0"; - break; - case 2: - O << " * 4.0"; - break; - case 3: - O << " / 2.0"; - break; - } -} - -void R600InstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - printOperand(MI, OpNo, O); - O << ", "; - printOperand(MI, OpNo + 1, O); -} - -void R600InstPrinter::printOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - if (OpNo >= MI->getNumOperands()) { - O << "/*Missing OP" << OpNo << "*/"; - return; - } - - const MCOperand &Op = MI->getOperand(OpNo); - if (Op.isReg()) { - switch (Op.getReg()) { - // This is the default predicate state, so we don't need to print it. - case R600::PRED_SEL_OFF: - break; - - default: - O << getRegisterName(Op.getReg()); - break; - } - } else if (Op.isImm()) { - O << Op.getImm(); - } else if (Op.isDFPImm()) { - // We special case 0.0 because otherwise it will be printed as an integer. - if (Op.getDFPImm() == 0.0) - O << "0.0"; - else { - O << bit_cast(Op.getDFPImm()); - } - } else if (Op.isExpr()) { - const MCExpr *Exp = Op.getExpr(); - Exp->print(O, &MAI); - } else { - O << "/*INV_OP*/"; - } -} - -void R600InstPrinter::printRel(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - AMDGPUInstPrinter::printIfSet(MI, OpNo, O, '+'); -} - -void R600InstPrinter::printRSel(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - unsigned Sel = MI->getOperand(OpNo).getImm(); - switch (Sel) { - case 0: - O << 'X'; - break; - case 1: - O << 'Y'; - break; - case 2: - O << 'Z'; - break; - case 3: - O << 'W'; - break; - case 4: - O << '0'; - break; - case 5: - O << '1'; - break; - case 7: - O << '_'; - break; - default: - break; - } -} - -void R600InstPrinter::printUpdateExecMask(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - AMDGPUInstPrinter::printIfSet(MI, OpNo, O, "ExecMask,"); -} - -void R600InstPrinter::printUpdatePred(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - AMDGPUInstPrinter::printIfSet(MI, OpNo, O, "Pred,"); -} - -void R600InstPrinter::printWrite(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNo); - if (Op.getImm() == 0) { - O << " (MASKED)"; - } -} - -#include "R600GenAsmWriter.inc" diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // /// \file -/// CodeEmitter interface for R600 and SI codegen. +/// CodeEmitter interface for SI codegen. // //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // /// \file -/// CodeEmitter interface for R600 and SI codegen. +/// CodeEmitter interface for SI codegen. // //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h @@ -18,6 +18,7 @@ #include namespace llvm { +class Target; class MCAsmBackend; class MCCodeEmitter; class MCContext; @@ -26,20 +27,11 @@ class MCRegisterInfo; class MCSubtargetInfo; class MCTargetOptions; -class StringRef; -class Target; -class Triple; -class raw_pwrite_stream; enum AMDGPUDwarfFlavour : unsigned { Wave64 = 0, Wave32 = 1 }; MCRegisterInfo *createGCNMCRegisterInfo(AMDGPUDwarfFlavour DwarfFlavour); -MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, - MCContext &Ctx); -MCInstrInfo *createR600MCInstrInfo(); - MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, MCContext &Ctx); @@ -57,23 +49,12 @@ #define GET_REGINFO_ENUM #include "AMDGPUGenRegisterInfo.inc" -#define GET_REGINFO_ENUM -#include "R600GenRegisterInfo.inc" - #define GET_INSTRINFO_ENUM #define GET_INSTRINFO_OPERAND_ENUM #define GET_INSTRINFO_SCHED_ENUM #include "AMDGPUGenInstrInfo.inc" -#define GET_INSTRINFO_ENUM -#define GET_INSTRINFO_OPERAND_ENUM -#define GET_INSTRINFO_SCHED_ENUM -#include "R600GenInstrInfo.inc" - #define GET_SUBTARGETINFO_ENUM #include "AMDGPUGenSubtargetInfo.inc" -#define GET_SUBTARGETINFO_ENUM -#include "R600GenSubtargetInfo.inc" - #endif diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp @@ -16,7 +16,8 @@ #include "AMDGPUInstPrinter.h" #include "AMDGPUMCAsmInfo.h" #include "AMDGPUTargetStreamer.h" -#include "SIDefines.h" +#include "R600InstPrinter.h" +#include "R600MCTargetDesc.h" #include "TargetInfo/AMDGPUTargetInfo.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCCodeEmitter.h" @@ -26,7 +27,6 @@ #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCObjectWriter.h" -#include "llvm/MC/MCRegister.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/TargetRegistry.h" diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -17,13 +17,8 @@ namespace llvm { -class DataLayout; -class Function; class MCELFStreamer; class MCSymbol; -class MDNode; -class Module; -class Type; class formatted_raw_ostream; namespace AMDGPU { diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt b/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt @@ -7,6 +7,7 @@ AMDGPUMCCodeEmitter.cpp AMDGPUMCTargetDesc.cpp AMDGPUTargetStreamer.cpp + R600InstPrinter.cpp R600MCCodeEmitter.cpp R600MCTargetDesc.cpp SIMCCodeEmitter.cpp diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600InstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600InstPrinter.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600InstPrinter.h @@ -0,0 +1,48 @@ +//===-- R600InstPrinter.h - AMDGPU MC Inst -> ASM interface -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_R600INSTPRINTER_H +#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_R600INSTPRINTER_H + +#include "llvm/MC/MCInstPrinter.h" + +namespace llvm { + +class R600InstPrinter : public MCInstPrinter { +public: + R600InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} + + void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, + const MCSubtargetInfo &STI, raw_ostream &O) override; + std::pair getMnemonic(const MCInst *MI) override; + void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O); + static const char *getRegisterName(unsigned RegNo); + + void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printBankSwizzle(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printClamp(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printCT(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printKCache(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printLast(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printLiteral(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printNeg(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printOMOD(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printRel(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printRSel(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printUpdateExecMask(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printUpdatePred(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printWrite(const MCInst *MI, unsigned OpNo, raw_ostream &O); +}; + +} // End namespace llvm + +#endif diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600InstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600InstPrinter.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600InstPrinter.cpp @@ -0,0 +1,224 @@ +//===-- R600InstPrinter.cpp - AMDGPU MC Inst -> ASM ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// \file +//===----------------------------------------------------------------------===// + +#include "R600InstPrinter.h" +#include "AMDGPUInstPrinter.h" +#include "R600MCTargetDesc.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +void R600InstPrinter::printInst(const MCInst *MI, uint64_t Address, + StringRef Annot, const MCSubtargetInfo &STI, + raw_ostream &O) { + O.flush(); + printInstruction(MI, Address, O); + printAnnotation(O, Annot); +} + +void R600InstPrinter::printAbs(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + AMDGPUInstPrinter::printIfSet(MI, OpNo, O, '|'); +} + +void R600InstPrinter::printBankSwizzle(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + int BankSwizzle = MI->getOperand(OpNo).getImm(); + switch (BankSwizzle) { + case 1: + O << "BS:VEC_021/SCL_122"; + break; + case 2: + O << "BS:VEC_120/SCL_212"; + break; + case 3: + O << "BS:VEC_102/SCL_221"; + break; + case 4: + O << "BS:VEC_201"; + break; + case 5: + O << "BS:VEC_210"; + break; + default: + break; + } +} + +void R600InstPrinter::printClamp(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + AMDGPUInstPrinter::printIfSet(MI, OpNo, O, "_SAT"); +} + +void R600InstPrinter::printCT(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + unsigned CT = MI->getOperand(OpNo).getImm(); + switch (CT) { + case 0: + O << 'U'; + break; + case 1: + O << 'N'; + break; + default: + break; + } +} + +void R600InstPrinter::printKCache(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + int KCacheMode = MI->getOperand(OpNo).getImm(); + if (KCacheMode > 0) { + int KCacheBank = MI->getOperand(OpNo - 2).getImm(); + O << "CB" << KCacheBank << ':'; + int KCacheAddr = MI->getOperand(OpNo + 2).getImm(); + int LineSize = (KCacheMode == 1) ? 16 : 32; + O << KCacheAddr * 16 << '-' << KCacheAddr * 16 + LineSize; + } +} + +void R600InstPrinter::printLast(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + AMDGPUInstPrinter::printIfSet(MI, OpNo, O, "*", " "); +} + +void R600InstPrinter::printLiteral(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + assert(Op.isImm() || Op.isExpr()); + if (Op.isImm()) { + int64_t Imm = Op.getImm(); + O << Imm << '(' << BitsToFloat(Imm) << ')'; + } + if (Op.isExpr()) { + Op.getExpr()->print(O << '@', &MAI); + } +} + +void R600InstPrinter::printNeg(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + AMDGPUInstPrinter::printIfSet(MI, OpNo, O, '-'); +} + +void R600InstPrinter::printOMOD(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + default: + break; + case 1: + O << " * 2.0"; + break; + case 2: + O << " * 4.0"; + break; + case 3: + O << " / 2.0"; + break; + } +} + +void R600InstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + printOperand(MI, OpNo, O); + O << ", "; + printOperand(MI, OpNo + 1, O); +} + +void R600InstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (OpNo >= MI->getNumOperands()) { + O << "/*Missing OP" << OpNo << "*/"; + return; + } + + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isReg()) { + switch (Op.getReg()) { + // This is the default predicate state, so we don't need to print it. + case R600::PRED_SEL_OFF: + break; + + default: + O << getRegisterName(Op.getReg()); + break; + } + } else if (Op.isImm()) { + O << Op.getImm(); + } else if (Op.isDFPImm()) { + // We special case 0.0 because otherwise it will be printed as an integer. + if (Op.getDFPImm() == 0.0) + O << "0.0"; + else { + O << bit_cast(Op.getDFPImm()); + } + } else if (Op.isExpr()) { + const MCExpr *Exp = Op.getExpr(); + Exp->print(O, &MAI); + } else { + O << "/*INV_OP*/"; + } +} + +void R600InstPrinter::printRel(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + AMDGPUInstPrinter::printIfSet(MI, OpNo, O, '+'); +} + +void R600InstPrinter::printRSel(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned Sel = MI->getOperand(OpNo).getImm(); + switch (Sel) { + case 0: + O << 'X'; + break; + case 1: + O << 'Y'; + break; + case 2: + O << 'Z'; + break; + case 3: + O << 'W'; + break; + case 4: + O << '0'; + break; + case 5: + O << '1'; + break; + case 7: + O << '_'; + break; + default: + break; + } +} + +void R600InstPrinter::printUpdateExecMask(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + AMDGPUInstPrinter::printIfSet(MI, OpNo, O, "ExecMask,"); +} + +void R600InstPrinter::printUpdatePred(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + AMDGPUInstPrinter::printIfSet(MI, OpNo, O, "Pred,"); +} + +void R600InstPrinter::printWrite(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.getImm() == 0) { + O << " (MASKED)"; + } +} + +#include "R600GenAsmWriter.inc" diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp @@ -13,7 +13,7 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "MCTargetDesc/R600MCTargetDesc.h" #include "R600Defines.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.h @@ -0,0 +1,44 @@ +//===-- R600MCTargetDesc.h - R600 Target Descriptions -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Provides R600 specific target descriptions. +// +//===----------------------------------------------------------------------===// +// + +#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_R600MCTARGETDESC_H +#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_R600MCTARGETDESC_H + +#include + +namespace llvm { +class MCCodeEmitter; +class MCContext; +class MCInstrInfo; +class MCRegisterInfo; + +MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + MCContext &Ctx); +MCInstrInfo *createR600MCInstrInfo(); + +} // namespace llvm + +#define GET_REGINFO_ENUM +#include "R600GenRegisterInfo.inc" + +#define GET_INSTRINFO_ENUM +#define GET_INSTRINFO_OPERAND_ENUM +#define GET_INSTRINFO_SCHED_ENUM +#include "R600GenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "R600GenSubtargetInfo.inc" + +#endif diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "AMDGPUMCTargetDesc.h" +#include "R600MCTargetDesc.h" #include "llvm/MC/MCInstrInfo.h" using namespace llvm; diff --git a/llvm/lib/Target/AMDGPU/R600.h b/llvm/lib/Target/AMDGPU/R600.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/R600.h @@ -0,0 +1,50 @@ +//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +/// \file +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_R600_H +#define LLVM_LIB_TARGET_AMDGPU_R600_H + +#include "llvm/Support/CodeGen.h" + +namespace llvm { + +class FunctionPass; +class TargetMachine; +class ModulePass; +class PassRegistry; + +// R600 Passes +FunctionPass *createR600VectorRegMerger(); +FunctionPass *createR600ExpandSpecialInstrsPass(); +FunctionPass *createR600EmitClauseMarkers(); +FunctionPass *createR600ClauseMergePass(); +FunctionPass *createR600Packetizer(); +FunctionPass *createR600ControlFlowFinalizer(); +FunctionPass *createAMDGPUCFGStructurizerPass(); +FunctionPass *createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel); +ModulePass *createR600OpenCLImageTypeLoweringPass(); + +void initializeR600ClauseMergePassPass(PassRegistry &); +extern char &R600ClauseMergePassID; + +void initializeR600ControlFlowFinalizerPass(PassRegistry &); +extern char &R600ControlFlowFinalizerID; + +void initializeR600ExpandSpecialInstrsPassPass(PassRegistry &); +extern char &R600ExpandSpecialInstrsPassID; + +void initializeR600VectorRegMergerPass(PassRegistry &); +extern char &R600VectorRegMergerID; + +void initializeR600PacketizerPass(PassRegistry &); +extern char &R600PacketizerID; + +} // End namespace llvm + +#endif diff --git a/llvm/lib/Target/AMDGPU/R600.td b/llvm/lib/Target/AMDGPU/R600.td --- a/llvm/lib/Target/AMDGPU/R600.td +++ b/llvm/lib/Target/AMDGPU/R600.td @@ -34,6 +34,7 @@ include "AMDGPUFeatures.td" include "R600Schedule.td" include "R600Processors.td" +include "R600InstrInfo.td" include "AMDGPUInstrInfo.td" include "AMDGPUInstructions.td" include "R600Instructions.td" diff --git a/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp b/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp --- a/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp @@ -15,7 +15,7 @@ //===----------------------------------------------------------------------===// #include "R600AsmPrinter.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "MCTargetDesc/R600MCTargetDesc.h" #include "R600Defines.h" #include "R600MachineFunctionInfo.h" #include "R600Subtarget.h" @@ -129,4 +129,3 @@ return false; } - diff --git a/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp b/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp --- a/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp +++ b/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp @@ -12,8 +12,8 @@ /// It needs to be called after IfCvt for best results. //===----------------------------------------------------------------------===// -#include "AMDGPU.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "MCTargetDesc/R600MCTargetDesc.h" +#include "R600.h" #include "R600Subtarget.h" using namespace llvm; diff --git a/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp --- a/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp +++ b/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "AMDGPU.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "MCTargetDesc/R600MCTargetDesc.h" +#include "R600.h" #include "R600MachineFunctionInfo.h" #include "R600Subtarget.h" #include diff --git a/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp b/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp --- a/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp +++ b/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp @@ -13,8 +13,8 @@ /// initiated by CF_ALU instructions. //===----------------------------------------------------------------------===// -#include "AMDGPU.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "MCTargetDesc/R600MCTargetDesc.h" +#include "R600.h" #include "R600Defines.h" #include "R600Subtarget.h" diff --git a/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp b/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp --- a/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp +++ b/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -#include "AMDGPU.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "MCTargetDesc/R600MCTargetDesc.h" +#include "R600.h" #include "R600Defines.h" #include "R600Subtarget.h" diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -13,7 +13,7 @@ #include "R600ISelLowering.h" #include "AMDGPU.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "MCTargetDesc/R600MCTargetDesc.h" #include "R600Defines.h" #include "R600InstrInfo.h" #include "R600MachineFunctionInfo.h" diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp @@ -13,7 +13,8 @@ #include "R600InstrInfo.h" #include "AMDGPU.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "MCTargetDesc/R600MCTargetDesc.h" +#include "R600.h" #include "R600Defines.h" #include "R600Subtarget.h" #include "llvm/ADT/SmallSet.h" diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.td b/llvm/lib/Target/AMDGPU/R600InstrInfo.td new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.td @@ -0,0 +1,23 @@ +//===-- R600InstrInfo.td - R600 DAG nodes ------------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains DAG node definitions for the R600 target. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// R600 DAG Nodes +// + +// Force dependencies for vector trunc stores +def R600dummy_chain : SDNode<"AMDGPUISD::DUMMY_CHAIN", SDTNone, [SDNPHasChain]>; + +def R600ExportOp : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>; + +def R600_EXPORT: SDNode<"AMDGPUISD::R600_EXPORT", R600ExportOp, + [SDNPHasChain, SDNPSideEffect]>; diff --git a/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp b/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp --- a/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp +++ b/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #include "R600MachineScheduler.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "MCTargetDesc/R600MCTargetDesc.h" #include "R600Subtarget.h" using namespace llvm; diff --git a/llvm/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp b/llvm/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp --- a/llvm/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp +++ b/llvm/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp @@ -24,7 +24,7 @@ // //===----------------------------------------------------------------------===// -#include "AMDGPU.h" +#include "R600.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/Constants.h" diff --git a/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp b/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp --- a/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp +++ b/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp @@ -26,8 +26,8 @@ /// to reduce MOV count. //===----------------------------------------------------------------------===// -#include "AMDGPU.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "MCTargetDesc/R600MCTargetDesc.h" +#include "R600.h" #include "R600Defines.h" #include "R600Subtarget.h" #include "llvm/CodeGen/MachineDominators.h" diff --git a/llvm/lib/Target/AMDGPU/R600Packetizer.cpp b/llvm/lib/Target/AMDGPU/R600Packetizer.cpp --- a/llvm/lib/Target/AMDGPU/R600Packetizer.cpp +++ b/llvm/lib/Target/AMDGPU/R600Packetizer.cpp @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -#include "AMDGPU.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "MCTargetDesc/R600MCTargetDesc.h" +#include "R600.h" #include "R600Subtarget.h" #include "llvm/CodeGen/DFAPacketizer.h" #include "llvm/CodeGen/MachineDominators.h" diff --git a/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp b/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #include "R600RegisterInfo.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "MCTargetDesc/R600MCTargetDesc.h" #include "R600Defines.h" #include "R600Subtarget.h" diff --git a/llvm/lib/Target/AMDGPU/R600Subtarget.h b/llvm/lib/Target/AMDGPU/R600Subtarget.h --- a/llvm/lib/Target/AMDGPU/R600Subtarget.h +++ b/llvm/lib/Target/AMDGPU/R600Subtarget.h @@ -23,7 +23,6 @@ namespace llvm { -class MCInst; class MCInstrInfo; } // namespace llvm diff --git a/llvm/lib/Target/AMDGPU/R600Subtarget.cpp b/llvm/lib/Target/AMDGPU/R600Subtarget.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/R600Subtarget.cpp @@ -0,0 +1,46 @@ +//===-- R600Subtarget.cpp - R600 Subtarget Information --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Implements the R600 specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#include "R600Subtarget.h" +#include "MCTargetDesc/R600MCTargetDesc.h" + +using namespace llvm; + +#define DEBUG_TYPE "r600-subtarget" + +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "R600GenSubtargetInfo.inc" + +R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS, + const TargetMachine &TM) + : R600GenSubtargetInfo(TT, GPU, /*TuneCPU*/ GPU, FS), AMDGPUSubtarget(TT), + InstrInfo(*this), + FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), + FMA(false), CaymanISA(false), CFALUBug(false), HasVertexCache(false), + R600ALUInst(false), FP64(false), TexVTXClauseSize(0), Gen(R600), + TLInfo(TM, initializeSubtargetDependencies(TT, GPU, FS)), + InstrItins(getInstrItineraryForCPU(GPU)) {} + +R600Subtarget &R600Subtarget::initializeSubtargetDependencies(const Triple &TT, + StringRef GPU, + StringRef FS) { + SmallString<256> FullFS("+promote-alloca,"); + FullFS += FS; + ParseSubtargetFeatures(GPU, /*TuneCPU*/ GPU, FullFS); + + HasMulU24 = getGeneration() >= EVERGREEN; + HasMulI24 = hasCaymanISA(); + + return *this; +} diff --git a/llvm/lib/Target/AMDGPU/R600TargetMachine.h b/llvm/lib/Target/AMDGPU/R600TargetMachine.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/R600TargetMachine.h @@ -0,0 +1,48 @@ +//===-- R600TargetMachine.h - AMDGPU TargetMachine Interface ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// The AMDGPU TargetMachine interface definition for hw codgen targets. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_R600TARGETMACHINE_H +#define LLVM_LIB_TARGET_AMDGPU_R600TARGETMACHINE_H + +#include "AMDGPUTargetMachine.h" +#include "R600Subtarget.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + +//===----------------------------------------------------------------------===// +// R600 Target Machine (R600 -> Cayman) +//===----------------------------------------------------------------------===// + +class R600TargetMachine final : public AMDGPUTargetMachine { +private: + mutable StringMap> SubtargetMap; + +public: + R600TargetMachine(const Target &T, const Triple &TT, StringRef CPU, + StringRef FS, TargetOptions Options, + Optional RM, Optional CM, + CodeGenOpt::Level OL, bool JIT); + + TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + + const TargetSubtargetInfo *getSubtargetImpl(const Function &) const override; + + TargetTransformInfo getTargetTransformInfo(const Function &F) override; + + bool isMachineVerifierClean() const override { return false; } +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_R600TARGETMACHINE_H diff --git a/llvm/lib/Target/AMDGPU/R600TargetMachine.cpp b/llvm/lib/Target/AMDGPU/R600TargetMachine.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/R600TargetMachine.cpp @@ -0,0 +1,143 @@ +//===-- R600TargetMachine.cpp - TargetMachine for hw codegen targets-------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// The AMDGPU-R600 target machine contains all of the hardware specific +/// information needed to emit code for R600 GPUs. +// +//===----------------------------------------------------------------------===// + +#include "R600TargetMachine.h" +#include "AMDGPUTargetMachine.h" +#include "R600.h" +#include "R600MachineScheduler.h" +#include "R600TargetTransformInfo.h" +#include "llvm/Transforms/Scalar.h" + +using namespace llvm; + +static cl::opt + EnableR600StructurizeCFG("r600-ir-structurize", + cl::desc("Use StructurizeCFG IR pass"), + cl::init(true)); + +static cl::opt EnableR600IfConvert("r600-if-convert", + cl::desc("Use if conversion pass"), + cl::ReallyHidden, cl::init(true)); + +static cl::opt EnableAMDGPUFunctionCallsOpt( + "amdgpu-function-calls", cl::desc("Enable AMDGPU function call support"), + cl::location(AMDGPUTargetMachine::EnableFunctionCalls), cl::init(true), + cl::Hidden); + +static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) { + return new ScheduleDAGMILive(C, std::make_unique()); +} + +static MachineSchedRegistry R600SchedRegistry("r600", + "Run R600's custom scheduler", + createR600MachineScheduler); + +//===----------------------------------------------------------------------===// +// R600 Target Machine (R600 -> Cayman) +//===----------------------------------------------------------------------===// + +R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT, + StringRef CPU, StringRef FS, + TargetOptions Options, + Optional RM, + Optional CM, + CodeGenOpt::Level OL, bool JIT) + : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) { + setRequiresStructuredCFG(true); + + // Override the default since calls aren't supported for r600. + if (EnableFunctionCalls && + EnableAMDGPUFunctionCallsOpt.getNumOccurrences() == 0) + EnableFunctionCalls = false; +} + +const TargetSubtargetInfo * +R600TargetMachine::getSubtargetImpl(const Function &F) const { + StringRef GPU = getGPUName(F); + StringRef FS = getFeatureString(F); + + SmallString<128> SubtargetKey(GPU); + SubtargetKey.append(FS); + + auto &I = SubtargetMap[SubtargetKey]; + if (!I) { + // This needs to be done before we create a new subtarget since any + // creation will depend on the TM and the code generation flags on the + // function that reside in TargetOptions. + resetTargetOptions(F); + I = std::make_unique(TargetTriple, GPU, FS, *this); + } + + return I.get(); +} + +TargetTransformInfo +R600TargetMachine::getTargetTransformInfo(const Function &F) { + return TargetTransformInfo(R600TTIImpl(this, F)); +} + +class R600PassConfig final : public AMDGPUPassConfig { +public: + R600PassConfig(LLVMTargetMachine &TM, PassManagerBase &PM) + : AMDGPUPassConfig(TM, PM) {} + + ScheduleDAGInstrs * + createMachineScheduler(MachineSchedContext *C) const override { + return createR600MachineScheduler(C); + } + + bool addPreISel() override; + bool addInstSelector() override; + void addPreRegAlloc() override; + void addPreSched2() override; + void addPreEmitPass() override; +}; + +//===----------------------------------------------------------------------===// +// R600 Pass Setup +//===----------------------------------------------------------------------===// + +bool R600PassConfig::addPreISel() { + AMDGPUPassConfig::addPreISel(); + + if (EnableR600StructurizeCFG) + addPass(createStructurizeCFGPass()); + return false; +} + +bool R600PassConfig::addInstSelector() { + addPass(createR600ISelDag(&getAMDGPUTargetMachine(), getOptLevel())); + return false; +} + +void R600PassConfig::addPreRegAlloc() { addPass(createR600VectorRegMerger()); } + +void R600PassConfig::addPreSched2() { + addPass(createR600EmitClauseMarkers(), false); + if (EnableR600IfConvert) + addPass(&IfConverterID, false); + addPass(createR600ClauseMergePass(), false); +} + +void R600PassConfig::addPreEmitPass() { + addPass(createAMDGPUCFGStructurizerPass(), false); + addPass(createR600ExpandSpecialInstrsPass(), false); + addPass(&FinalizeMachineBundlesID, false); + addPass(createR600Packetizer(), false); + addPass(createR600ControlFlowFinalizer(), false); +} + +TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) { + return new R600PassConfig(*this, PM); +} diff --git a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h @@ -0,0 +1,69 @@ +//===- R600TargetTransformInfo.h - R600 specific TTI --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file a TargetTransformInfo::Concept conforming object specific to the +/// R600 target machine. It uses the target's detailed information to +/// provide more precise answers to certain TTI queries, while letting the +/// target independent and default TTI implementations handle the rest. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_R600TARGETTRANSFORMINFO_H +#define LLVM_LIB_TARGET_AMDGPU_R600TARGETTRANSFORMINFO_H + +#include "AMDGPUTargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" + +namespace llvm { + +class R600Subtarget; +class AMDGPUTargetLowering; + +class R600TTIImpl final : public BasicTTIImplBase { + using BaseT = BasicTTIImplBase; + using TTI = TargetTransformInfo; + + friend BaseT; + + const R600Subtarget *ST; + const AMDGPUTargetLowering *TLI; + AMDGPUTTIImpl CommonTTI; + +public: + explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F); + + const R600Subtarget *getST() const { return ST; } + const AMDGPUTargetLowering *getTLI() const { return TLI; } + + void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, + TTI::UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE); + void getPeelingPreferences(Loop *L, ScalarEvolution &SE, + TTI::PeelingPreferences &PP); + unsigned getHardwareNumberOfRegisters(bool Vec) const; + unsigned getNumberOfRegisters(bool Vec) const; + TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const; + unsigned getMinVectorRegisterBitWidth() const; + unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const; + bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment, + unsigned AddrSpace) const; + bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, + unsigned AddrSpace) const; + bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, + unsigned AddrSpace) const; + unsigned getMaxInterleaveFactor(unsigned VF); + InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, + const Instruction *I = nullptr); + InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy, + unsigned Index); +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_R600TARGETTRANSFORMINFO_H diff --git a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp @@ -0,0 +1,141 @@ +//===- R600TargetTransformInfo.cpp - AMDGPU specific TTI pass -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// \file +// This file implements a TargetTransformInfo analysis pass specific to the +// R600 target machine. It uses the target's detailed information to provide +// more precise answers to certain TTI queries, while letting the target +// independent and default TTI implementations handle the rest. +// +//===----------------------------------------------------------------------===// + +#include "R600TargetTransformInfo.h" +#include "AMDGPUTargetMachine.h" +#include "R600Subtarget.h" + +using namespace llvm; + +#define DEBUG_TYPE "R600tti" + +R600TTIImpl::R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F) + : BaseT(TM, F.getParent()->getDataLayout()), + ST(static_cast(TM->getSubtargetImpl(F))), + TLI(ST->getTargetLowering()), CommonTTI(TM, F) {} + +unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const { + return 4 * 128; // XXX - 4 channels. Should these count as vector instead? +} + +unsigned R600TTIImpl::getNumberOfRegisters(bool Vec) const { + return getHardwareNumberOfRegisters(Vec); +} + +TypeSize +R600TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { + return TypeSize::getFixed(32); +} + +unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const { return 32; } + +unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { + if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS || + AddrSpace == AMDGPUAS::CONSTANT_ADDRESS) + return 128; + if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS || + AddrSpace == AMDGPUAS::REGION_ADDRESS) + return 64; + if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) + return 32; + + if ((AddrSpace == AMDGPUAS::PARAM_D_ADDRESS || + AddrSpace == AMDGPUAS::PARAM_I_ADDRESS || + (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 && + AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15))) + return 128; + llvm_unreachable("unhandled address space"); +} + +bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, + Align Alignment, + unsigned AddrSpace) const { + // We allow vectorization of flat stores, even though we may need to decompose + // them later if they may access private memory. We don't have enough context + // here, and legalization can handle it. + return (AddrSpace != AMDGPUAS::PRIVATE_ADDRESS); +} + +bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, + Align Alignment, + unsigned AddrSpace) const { + return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace); +} + +bool R600TTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, + Align Alignment, + unsigned AddrSpace) const { + return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace); +} + +unsigned R600TTIImpl::getMaxInterleaveFactor(unsigned VF) { + // Disable unrolling if the loop is not vectorized. + // TODO: Enable this again. + if (VF == 1) + return 1; + + return 8; +} + +InstructionCost R600TTIImpl::getCFInstrCost(unsigned Opcode, + TTI::TargetCostKind CostKind, + const Instruction *I) { + if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency) + return Opcode == Instruction::PHI ? 0 : 1; + + // XXX - For some reason this isn't called for switch. + switch (Opcode) { + case Instruction::Br: + case Instruction::Ret: + return 10; + default: + return BaseT::getCFInstrCost(Opcode, CostKind, I); + } +} + +InstructionCost R600TTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, + unsigned Index) { + switch (Opcode) { + case Instruction::ExtractElement: + case Instruction::InsertElement: { + unsigned EltSize = + DL.getTypeSizeInBits(cast(ValTy)->getElementType()); + if (EltSize < 32) { + return BaseT::getVectorInstrCost(Opcode, ValTy, Index); + } + + // Extracts are just reads of a subregister, so are free. Inserts are + // considered free because we don't want to have any cost for scalarizing + // operations, and we don't have to copy into a different register class. + + // Dynamic indexing isn't free and is best avoided. + return Index == ~0u ? 2 : 0; + } + default: + return BaseT::getVectorInstrCost(Opcode, ValTy, Index); + } +} + +void R600TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, + TTI::UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE) { + CommonTTI.getUnrollingPreferences(L, SE, UP, ORE); +} + +void R600TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, + TTI::PeelingPreferences &PP) { + CommonTTI.getPeelingPreferences(L, SE, PP); +} diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -7,11 +7,11 @@ /// \file //===----------------------------------------------------------------------===// -#include "llvm/MC/MCInstrDesc.h" - #ifndef LLVM_LIB_TARGET_AMDGPU_SIDEFINES_H #define LLVM_LIB_TARGET_AMDGPU_SIDEFINES_H +#include "llvm/MC/MCInstrDesc.h" + namespace llvm { namespace SIInstrFlags { diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -16,7 +16,6 @@ #include "AMDGPUArgumentUsageInfo.h" #include "AMDGPUMachineFunction.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" -#include "SIInstrInfo.h" #include "llvm/ADT/MapVector.h" #include "llvm/CodeGen/MIRYamlMapping.h" #include "llvm/CodeGen/PseudoSourceValue.h" @@ -26,9 +25,11 @@ class MachineFrameInfo; class MachineFunction; -class TargetRegisterClass; +class SIInstrInfo; class SIMachineFunctionInfo; class SIRegisterInfo; +class TargetRegisterClass; +struct PerFunctionMIParsingState; class AMDGPUPseudoSourceValue : public PseudoSourceValue { public: diff --git a/llvm/lib/Target/AMDGPU/SIMachineScheduler.h b/llvm/lib/Target/AMDGPU/SIMachineScheduler.h --- a/llvm/lib/Target/AMDGPU/SIMachineScheduler.h +++ b/llvm/lib/Target/AMDGPU/SIMachineScheduler.h @@ -25,6 +25,8 @@ class SIInstrInfo; class SIRegisterInfo; +class SIScheduleDAGMI; +class SIScheduleBlockCreator; enum SIScheduleCandReason { NoCand, @@ -48,9 +50,6 @@ void setRepeat(SIScheduleCandReason R) { RepeatReasonSet |= (1 << R); } }; -class SIScheduleDAGMI; -class SIScheduleBlockCreator; - enum SIScheduleBlockLinkKind { NoData, Data diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -24,7 +24,6 @@ class LivePhysRegs; class RegisterBank; struct SGPRSpillBuilder; -class SIMachineFunctionInfo; class SIRegisterInfo final : public AMDGPUGenRegisterInfo { private: