Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -651,6 +651,13 @@ /// set to false, it returns the number of scalar registers. unsigned getNumberOfRegisters(bool Vector) const; + static const unsigned RegisterRichThreshold = 8192; + + /// \brief Return true if the target architecture is register-rich + bool isRegisterRich() const { + return getNumberOfRegisters(false) > RegisterRichThreshold; + } + /// \return The width of the largest scalar or vector register type. unsigned getRegisterBitWidth(bool Vector) const; Index: include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- include/llvm/Analysis/TargetTransformInfoImpl.h +++ include/llvm/Analysis/TargetTransformInfoImpl.h @@ -178,6 +178,8 @@ } } + bool isRegisterRich() { return false; } + bool hasBranchDivergence() { return false; } bool isSourceOfDivergence(const Value *V) { return false; } Index: lib/Target/FPGA/FPGATargetTransformInfo.h =================================================================== --- lib/Target/FPGA/FPGATargetTransformInfo.h +++ lib/Target/FPGA/FPGATargetTransformInfo.h @@ -62,6 +62,7 @@ int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef Arguments); int getUserCost(const User *U, ArrayRef Operands); + bool isRegisterRich(); bool hasBranchDivergence(); bool isSourceOfDivergence(const Value *V); // bool isLoweredToCall(const Function *F); Index: lib/Target/FPGA/FPGATargetTransformInfo.cpp =================================================================== --- lib/Target/FPGA/FPGATargetTransformInfo.cpp +++ lib/Target/FPGA/FPGATargetTransformInfo.cpp @@ -35,6 +35,8 @@ bool FPGATTIImpl::hasBranchDivergence() { return true; } +bool FPGATTIImpl::isRegisterRich() { return true; } + static bool IsKernelFunction(const Function &F) { return F.getCallingConv() == CallingConv::SPIR_KERNEL; } Index: lib/Transforms/Scalar/GVNHoist.cpp =================================================================== --- lib/Transforms/Scalar/GVNHoist.cpp +++ lib/Transforms/Scalar/GVNHoist.cpp @@ -48,6 +48,7 @@ #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/PostDominators.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" #include "llvm/IR/BasicBlock.h" @@ -256,8 +257,9 @@ class GVNHoist { public: GVNHoist(DominatorTree *DT, PostDominatorTree *PDT, AliasAnalysis *AA, - MemoryDependenceResults *MD, MemorySSA *MSSA) - : DT(DT), PDT(PDT), AA(AA), MD(MD), MSSA(MSSA), + MemoryDependenceResults *MD, MemorySSA *MSSA, + TargetTransformInfo &TTI) + : DT(DT), PDT(PDT), AA(AA), MD(MD), MSSA(MSSA), TTI(TTI), MSSAUpdater(llvm::make_unique(MSSA)) {} bool run(Function &F) { @@ -333,13 +335,13 @@ AliasAnalysis *AA; MemoryDependenceResults *MD; MemorySSA *MSSA; + TargetTransformInfo &TTI; std::unique_ptr MSSAUpdater; DenseMap DFSNumber; BBSideEffectsSet BBSideEffects; DenseSet HoistBarrier; SmallVector IDFBlocks; unsigned NumFuncArgs; - const bool HoistingGeps = false; enum InsKind { Unknown, Scalar, Load, Store }; @@ -1040,9 +1042,10 @@ // The order in which hoistings are done may influence the availability // of operands. if (!allOperandsAvailable(Repl, DestBB)) { - // When HoistingGeps there is nothing more we can do to make the - // operands available: just continue. - if (HoistingGeps) + // The target is register-rich, we should had already hoist the GEP + // if possible. We we do not need to further call + // makeGepOperandsAvailable to copy the GEPs + if (TTI.isRegisterRich()) continue; // When not HoistingGeps we need to copy the GEPs. @@ -1094,9 +1097,11 @@ HoistBarrier.insert(BB); break; } - // Only hoist the first instructions in BB up to MaxDepthInBB. Hoisting - // deeper may increase the register pressure and compilation time. - if (MaxDepthInBB != -1 && InstructionNb++ >= MaxDepthInBB) + // If we concern about the register pressure, we only hoist the first + // instructions in BB up to MaxDepthInBB. Hoisting deeper may increase + // the register pressure and compilation time. + if (!TTI.isRegisterRich() && + MaxDepthInBB != -1 && InstructionNb++ >= MaxDepthInBB) break; // Do not value number terminator instructions. @@ -1121,11 +1126,9 @@ break; CI.insert(Call, VN); - } else if (HoistingGeps || !isa(&I1)) - // Do not hoist scalars past calls that may write to memory because - // that could result in spills later. geps are handled separately. - // TODO: We can relax this for targets like AArch64 as they have more - // registers than X86. + } else if (TTI.isRegisterRich() || !isa(&I1)) + // Do not hoist gep if the we concern about the register pressure. + // In that case, we only hoist gep if we can hoist the load/store II.insert(&I1, VN); } } @@ -1157,12 +1160,14 @@ auto &AA = getAnalysis().getAAResults(); auto &MD = getAnalysis().getMemDep(); auto &MSSA = getAnalysis().getMSSA(); + auto &TTI = getAnalysis().getTTI(F); - GVNHoist G(&DT, &PDT, &AA, &MD, &MSSA); + GVNHoist G(&DT, &PDT, &AA, &MD, &MSSA, TTI); return G.run(F); } void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); @@ -1182,7 +1187,8 @@ AliasAnalysis &AA = AM.getResult(F); MemoryDependenceResults &MD = AM.getResult(F); MemorySSA &MSSA = AM.getResult(F).getMSSA(); - GVNHoist G(&DT, &PDT, &AA, &MD, &MSSA); + TargetTransformInfo &TTI = AM.getResult(F); + GVNHoist G(&DT, &PDT, &AA, &MD, &MSSA, TTI); if (!G.run(F)) return PreservedAnalyses::all();