diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1412,6 +1412,10 @@ /// \returns How the target needs this vector-predicated operation to be /// transformed. VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const; + + /// \returns True, if the target wants to enable MemCpy optimization pass + /// despite not providing library functions for memset/memcpy. + bool enableMemCpyOpt() const; /// @} /// @} @@ -1724,6 +1728,7 @@ virtual InstructionCost getInstructionLatency(const Instruction *I) = 0; virtual VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0; + virtual bool enableMemCpyOpt() const = 0; }; template @@ -2300,6 +2305,8 @@ getVPLegalizationStrategy(const VPIntrinsic &PI) const override { return Impl.getVPLegalizationStrategy(PI); } + + bool enableMemCpyOpt() const override { return Impl.enableMemCpyOpt(); } }; template diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -757,6 +757,8 @@ /* OperatorStrategy */ TargetTransformInfo::VPLegalization::Convert); } + bool enableMemCpyOpt() const { return false; } + protected: // Obtain the minimum required size to hold the value (without the sign) // In case of a vector it returns the min required size for one element. diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h --- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h +++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h @@ -37,11 +37,13 @@ class MemSetInst; class StoreInst; class TargetLibraryInfo; +class TargetTransformInfo; class Value; class MemCpyOptPass : public PassInfoMixin { MemoryDependenceResults *MD = nullptr; TargetLibraryInfo *TLI = nullptr; + TargetTransformInfo *TTI = nullptr; AAResults *AA = nullptr; AssumptionCache *AC = nullptr; DominatorTree *DT = nullptr; @@ -55,8 +57,8 @@ // Glue for the old PM. bool runImpl(Function &F, MemoryDependenceResults *MD, TargetLibraryInfo *TLI, - AAResults *AA, AssumptionCache *AC, DominatorTree *DT, - MemorySSA *MSSA); + TargetTransformInfo *TTI, AAResults *AA, AssumptionCache *AC, + DominatorTree *DT, MemorySSA *MSSA); private: // Helper functions diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1416,6 +1416,10 @@ return Result(F.getParent()->getDataLayout()); } +bool TargetTransformInfo::enableMemCpyOpt() const { + return TTIImpl->enableMemCpyOpt(); +} + // Register the basic pass. INITIALIZE_PASS(TargetTransformInfoWrapperPass, "tti", "Target Transform Information", false, true) diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h @@ -120,6 +120,7 @@ return true; } } + bool enableMemCpyOpt() const { return true; } }; } // end namespace llvm diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -27,6 +27,7 @@ #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" #include "llvm/IR/BasicBlock.h" @@ -306,6 +307,7 @@ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) @@ -1716,9 +1718,9 @@ auto *DT = &AM.getResult(F); auto *MSSA = EnableMemorySSA ? &AM.getResult(F) : AM.getCachedResult(F); - + auto &TTI = AM.getResult(F); bool MadeChange = - runImpl(F, MD, &TLI, AA, AC, DT, MSSA ? &MSSA->getMSSA() : nullptr); + runImpl(F, MD, &TLI, &TTI, AA, AC, DT, MSSA ? &MSSA->getMSSA() : nullptr); if (!MadeChange) return PreservedAnalyses::all(); @@ -1732,12 +1734,13 @@ } bool MemCpyOptPass::runImpl(Function &F, MemoryDependenceResults *MD_, - TargetLibraryInfo *TLI_, AliasAnalysis *AA_, - AssumptionCache *AC_, DominatorTree *DT_, - MemorySSA *MSSA_) { + TargetLibraryInfo *TLI_, TargetTransformInfo *TTI_, + AliasAnalysis *AA_, AssumptionCache *AC_, + DominatorTree *DT_, MemorySSA *MSSA_) { bool MadeChange = false; MD = MD_; TLI = TLI_; + TTI = TTI_; AA = AA_; AC = AC_; DT = DT_; @@ -1747,7 +1750,10 @@ // If we don't have at least memset and memcpy, there is little point of doing // anything here. These are required by a freestanding implementation, so if // even they are disabled, there is no point in trying hard. - if (!TLI->has(LibFunc_memset) || !TLI->has(LibFunc_memcpy)) + // Some targets (NVPTX) don't have memcpy/memset library functions, but still + // want to enable this pass, so keep going if TTI info says so. + if (!(TLI->has(LibFunc_memset) && TLI->has(LibFunc_memcpy)) && + !TTI->enableMemCpyOpt()) return false; while (true) { @@ -1778,7 +1784,8 @@ auto *MSSAWP = EnableMemorySSA ? &getAnalysis() : getAnalysisIfAvailable(); + auto *TTI = &getAnalysis().getTTI(F); - return Impl.runImpl(F, MDWP ? & MDWP->getMemDep() : nullptr, TLI, AA, AC, DT, - MSSAWP ? &MSSAWP->getMSSA() : nullptr); + return Impl.runImpl(F, MDWP ? &MDWP->getMemDep() : nullptr, TLI, TTI, AA, AC, + DT, MSSAWP ? &MSSAWP->getMSSA() : nullptr); }