Index: llvm/include/llvm/CodeGen/MachineFunction.h =================================================================== --- llvm/include/llvm/CodeGen/MachineFunction.h +++ llvm/include/llvm/CodeGen/MachineFunction.h @@ -99,9 +99,10 @@ /// supplied allocator. /// /// This function can be overridden in a derive class. - template - static Ty *create(BumpPtrAllocator &Allocator, MachineFunction &MF) { - return new (Allocator.Allocate()) Ty(MF); + template + static Ty *create(BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) { + return new (Allocator.Allocate()) Ty(F, STI); } }; @@ -612,14 +613,12 @@ /// template Ty *getInfo() { - if (!MFInfo) - MFInfo = Ty::template create(Allocator, *this); return static_cast(MFInfo); } template const Ty *getInfo() const { - return const_cast(this)->getInfo(); + return static_cast(MFInfo); } /// Returns the denormal handling type for the default rounding mode of the Index: llvm/include/llvm/Target/TargetMachine.h =================================================================== --- llvm/include/llvm/Target/TargetMachine.h +++ llvm/include/llvm/Target/TargetMachine.h @@ -17,6 +17,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/IR/DataLayout.h" #include "llvm/Pass.h" +#include "llvm/Support/Allocator.h" #include "llvm/Support/CodeGen.h" #include "llvm/Target/TargetOptions.h" #include @@ -52,6 +53,7 @@ } using legacy::PassManagerBase; +struct MachineFunctionInfo; namespace yaml { struct MachineFunctionInfo; } @@ -121,6 +123,13 @@ return nullptr; } + /// Create the target's instance of MachineFunctionInfo + virtual MachineFunctionInfo * + createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const { + return nullptr; + } + /// Allocate and return a default initialized instance of the YAML /// representation for the MachineFunctionInfo. virtual yaml::MachineFunctionInfo *createDefaultFuncInfoYAML() const { Index: llvm/lib/CodeGen/MachineFunction.cpp =================================================================== --- llvm/lib/CodeGen/MachineFunction.cpp +++ llvm/lib/CodeGen/MachineFunction.cpp @@ -160,7 +160,8 @@ else RegInfo = nullptr; - MFInfo = nullptr; + MFInfo = Target.createMachineFunctionInfo(Allocator, F, STI); + // We can realign the stack if the target supports it and the user hasn't // explicitly asked us not to. bool CanRealignSP = STI->getFrameLowering()->isStackRealignable() && Index: llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -1141,7 +1141,7 @@ S_00B84C_LDS_SIZE(STM.isAmdHsaOS() ? 0 : ProgInfo.LDSBlocks) | S_00B84C_EXCP_EN(0); - ProgInfo.Occupancy = STM.computeOccupancy(MF, ProgInfo.LDSSize, + ProgInfo.Occupancy = STM.computeOccupancy(MF.getFunction(), ProgInfo.LDSSize, ProgInfo.NumSGPRsForWavesPerEU, ProgInfo.NumVGPRsForWavesPerEU); } Index: llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h +++ llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h @@ -15,6 +15,7 @@ namespace llvm { +class AMDGPUSubtarget; class GCNSubtarget; class AMDGPUMachineFunction : public MachineFunctionInfo { @@ -45,7 +46,7 @@ bool WaveLimiter = false; public: - AMDGPUMachineFunction(const MachineFunction &MF); + AMDGPUMachineFunction(const Function &F, const AMDGPUSubtarget &ST); uint64_t getExplicitKernArgSize() const { return ExplicitKernArgSize; Index: llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp @@ -13,16 +13,15 @@ using namespace llvm; -AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) : +AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F, + const AMDGPUSubtarget &ST) : MachineFunctionInfo(), - Mode(MF.getFunction()), - IsEntryFunction(AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv())), - NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath) { - const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(MF); + Mode(F), + IsEntryFunction(AMDGPU::isEntryFunctionCC(F.getCallingConv())), + NoSignedZerosFPMath(false) { // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset, // except reserved size is not correctly aligned. - const Function &F = MF.getFunction(); Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound"); MemoryBound = MemBoundAttr.isStringAttribute() && @@ -35,6 +34,11 @@ CallingConv::ID CC = F.getCallingConv(); if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign); + + // FIXME: Shouldn't be target specific + Attribute NSZAttr = F.getFnAttribute("no-signed-zeros-fp-math"); + NoSignedZerosFPMath = NSZAttr.isStringAttribute() && + NSZAttr.getValueAsString() == "true"; } unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL, Index: llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -1052,7 +1052,7 @@ /// registers if provided. /// Note, occupancy can be affected by the scratch allocation as well, but /// we do not have enough information to compute it. - unsigned computeOccupancy(const MachineFunction &MF, unsigned LDSSize = 0, + unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0, unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const; /// \returns true if the flat_scratch register should be initialized with the Index: llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -628,13 +628,12 @@ return 2; // VCC. } -unsigned GCNSubtarget::computeOccupancy(const MachineFunction &MF, - unsigned LDSSize, +unsigned GCNSubtarget::computeOccupancy(const Function &F, unsigned LDSSize, unsigned NumSGPRs, unsigned NumVGPRs) const { unsigned Occupancy = std::min(getMaxWavesPerEU(), - getOccupancyWithLocalMemSize(LDSSize, MF.getFunction())); + getOccupancyWithLocalMemSize(LDSSize, F)); if (NumSGPRs) Occupancy = std::min(Occupancy, getOccupancyWithNumSGPRs(NumSGPRs)); if (NumVGPRs) Index: llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -86,6 +86,10 @@ bool isMachineVerifierClean() const override { return false; } + + MachineFunctionInfo * + createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const override; }; //===----------------------------------------------------------------------===// @@ -112,6 +116,10 @@ return true; } + MachineFunctionInfo * + createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const override; + yaml::MachineFunctionInfo *createDefaultFuncInfoYAML() const override; yaml::MachineFunctionInfo * convertFuncInfoToYAML(const MachineFunction &MF) const override; Index: llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -25,6 +25,7 @@ #include "GCNIterativeScheduler.h" #include "GCNSchedStrategy.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "R600MachineFunctionInfo.h" #include "R600MachineScheduler.h" #include "SIMachineFunctionInfo.h" #include "SIMachineScheduler.h" @@ -840,6 +841,13 @@ return new R600PassConfig(*this, PM); } +MachineFunctionInfo *R600TargetMachine::createMachineFunctionInfo( + BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const { + return R600MachineFunctionInfo::create(Allocator, F, + STI); +} + //===----------------------------------------------------------------------===// // GCN Pass Setup //===----------------------------------------------------------------------===// @@ -1058,6 +1066,13 @@ return new GCNPassConfig(*this, PM); } +MachineFunctionInfo *GCNTargetMachine::createMachineFunctionInfo( + BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const { + return SIMachineFunctionInfo::create(Allocator, F, + STI); +} + yaml::MachineFunctionInfo *GCNTargetMachine::createDefaultFuncInfoYAML() const { return new yaml::SIMachineFunctionInfo(); } Index: llvm/lib/Target/AMDGPU/R600MachineFunctionInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/R600MachineFunctionInfo.h +++ llvm/lib/Target/AMDGPU/R600MachineFunctionInfo.h @@ -18,7 +18,7 @@ class R600MachineFunctionInfo final : public AMDGPUMachineFunction { public: - R600MachineFunctionInfo(const MachineFunction &MF); + R600MachineFunctionInfo(const Function &F, const TargetSubtargetInfo *STI); unsigned CFStackSize; }; Index: llvm/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp +++ llvm/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp @@ -8,8 +8,11 @@ //===----------------------------------------------------------------------===// #include "R600MachineFunctionInfo.h" +#include "AMDGPUSubtarget.h" using namespace llvm; -R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF) - : AMDGPUMachineFunction(MF) { } +R600MachineFunctionInfo::R600MachineFunctionInfo(const Function &F, + const TargetSubtargetInfo *STI) + : AMDGPUMachineFunction(F, static_cast(*STI)) { +} Index: llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -494,7 +494,7 @@ bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg); public: - SIMachineFunctionInfo(const MachineFunction &MF); + SIMachineFunctionInfo(const Function &F, const TargetSubtargetInfo *STI); bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI); Index: llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -27,8 +27,9 @@ using namespace llvm; -SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) - : AMDGPUMachineFunction(MF), +SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F, + const TargetSubtargetInfo *STI) + : AMDGPUMachineFunction(F, static_cast(*STI)), PrivateSegmentBuffer(false), DispatchPtr(false), QueuePtr(false), @@ -48,12 +49,11 @@ GITPtrHigh(0xffffffff), HighBitsOf32BitAddress(0), GDSSize(0) { - const GCNSubtarget &ST = MF.getSubtarget(); - const Function &F = MF.getFunction(); + const GCNSubtarget &ST = *static_cast(STI); FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F); WavesPerEU = ST.getWavesPerEU(F); - Occupancy = ST.computeOccupancy(MF, getLDSSize()); + Occupancy = ST.computeOccupancy(F, getLDSSize()); CallingConv::ID CC = F.getCallingConv(); // FIXME: Should have analysis or something rather than attribute to detect Index: llvm/lib/Target/X86/X86MachineFunctionInfo.h =================================================================== --- llvm/lib/Target/X86/X86MachineFunctionInfo.h +++ llvm/lib/Target/X86/X86MachineFunctionInfo.h @@ -111,7 +111,8 @@ public: X86MachineFunctionInfo() = default; - explicit X86MachineFunctionInfo(MachineFunction &MF) {} + explicit X86MachineFunctionInfo(const Function &F, + const TargetSubtargetInfo *STI) {} bool getForceFramePointer() const { return ForceFramePointer;} void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; } Index: llvm/lib/Target/X86/X86TargetMachine.h =================================================================== --- llvm/lib/Target/X86/X86TargetMachine.h +++ llvm/lib/Target/X86/X86TargetMachine.h @@ -54,6 +54,10 @@ return TLOF.get(); } + MachineFunctionInfo * + createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const override; + bool isJIT() const { return IsJIT; } }; Index: llvm/lib/Target/X86/X86TargetMachine.cpp =================================================================== --- llvm/lib/Target/X86/X86TargetMachine.cpp +++ llvm/lib/Target/X86/X86TargetMachine.cpp @@ -16,6 +16,7 @@ #include "X86.h" #include "X86CallLowering.h" #include "X86LegalizerInfo.h" +#include "X86MachineFunctionInfo.h" #include "X86MacroFusion.h" #include "X86Subtarget.h" #include "X86TargetObjectFile.h" @@ -400,6 +401,13 @@ return new X86PassConfig(*this, PM); } +MachineFunctionInfo *X86TargetMachine::createMachineFunctionInfo( + BumpPtrAllocator &Allocator, const Function &F, + const TargetSubtargetInfo *STI) const { + return X86MachineFunctionInfo::create(Allocator, F, + STI); +} + void X86PassConfig::addIRPasses() { addPass(createAtomicExpandPass());