diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h @@ -341,12 +341,16 @@ FunctionAnalysisManager::Invalidator &) { return false; } - /// Returns the largest vectorization factor used in the list of /// vector functions. unsigned getWidestVF(StringRef ScalarF) const { return Impl->getWidestVF(ScalarF); } + + /// Check if the function "F" is listed in a library known to LLVM. + bool isKnownVectorFunctionInLibrary(StringRef F) const { + return this->isFunctionVectorizable(F); + } }; /// Analysis pass providing the \c TargetLibraryInfo. diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -16,6 +16,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/Support/CheckedArithmetic.h" @@ -94,8 +95,8 @@ /// Holds the VFShape for a specific scalar to vector function mapping. struct VFInfo { VFShape Shape; // Classification of the vector function. - StringRef ScalarName; // Scalar Function Name. - StringRef VectorName; // Vector Function Name associated to this VFInfo. + std::string ScalarName; // Scalar Function Name. + std::string VectorName; // Vector Function Name associated to this VFInfo. // Comparison operator. bool operator==(const VFInfo &Other) const { @@ -143,6 +144,87 @@ SmallVectorImpl &VariantMappings); } // end namespace VFABI +/// \brief The Search Vector Functions System +/// +/// Helper class used to find the vector functions associated to a +/// scalar CallInst. +class SearchVFSystem { +private: + const CallInst &CI; /// The CallInst for which we are looking for vector + /// functions. + const Module *M; /// The Module of the CallInst CI. + const SmallVector ScalarToVectorMappings; + + /// Retrieve all the VFInfo instances associated to the CallInst CI. + static SmallVector getVFMappings(const CallInst &CI) { + const std::string ScalarName = CI.getCalledFunction()->getName(); + SmallVector Ret; + + // Get mappings from the Vector Function ABI variants. + SmallVector ListOfStrings; + VFABI::getVectorVariantNames(CI, ListOfStrings); + for (auto MangledName : ListOfStrings) { + auto Shape = VFABI::tryDemangleForVFABI(MangledName); + // A match is found via scalar and vector names, and also by + // ensuring that the variant described in the attribute has a + // corresponding definition or declaration of the vector + // function in the Module M. + if (Shape.hasValue()) + if ((Shape.getValue().ScalarName == ScalarName) && + CI.getModule()->getFunction(Shape.getValue().VectorName)) + Ret.push_back(Shape.getValue()); + } + + // Other non-VFABI mappings should be retrieved here. + + return Ret; + } + +public: + /// Constructor, requires a CallInst instance. + SearchVFSystem(CallInst &CI) + : CI(CI), M(CI.getModule()), + ScalarToVectorMappings(SearchVFSystem::getVFMappings(CI)) {} + /// \defgroup TLI legacy interface + /// + /// These functions are here for compatibility with the the equivalent + /// methods provided by the TLI (the TLI ones have been made + /// private). + /// + /// @{ + bool isFunctionVectorizable() const { + return !ScalarToVectorMappings.empty(); + } + std::string getVectorizedFunction(unsigned VF) const { + SmallVector Parameters; + for (unsigned I = 0; I < CI.arg_size(); ++I) { + Parameters.push_back(VFParameter({I, VFParamKind::Vector})); + } + // TODO: at the moment the shape is constructed by forcing the ISA + // to be only the one handled via the TLI. What we really should + // do it to move the VFISAKind field of VFShape into VFInfo, and + // accept any of the available ISA that present the shape expected + // by the query from the vectorizer. For example, AVX and AVX2 + // have both 256-bit registers. The IR signature generated for + // those two ISA will be the same. The `getVectorizedFunction` + // method should just return the first available match. We will + // then need to add more heuristics to the search system to decide + // which of the available version (TLI, AVX, AVX2) the system + // should choose, when some with the same VFShape are present. + const VFShape TLIShape = {VF, false /*isScalable*/, VFISAKind::LLVM, + Parameters}; + for (const auto &Info : ScalarToVectorMappings) + if (Info.Shape == TLIShape) + return Info.VectorName; + + return ""; + } + bool isFunctionVectorizable(unsigned VF) const { + return !getVectorizedFunction(VF).empty(); + } + /// @} +}; + template class ArrayRef; class DemandedBits; class GetElementPtrInst; diff --git a/llvm/lib/Analysis/LazyCallGraph.cpp b/llvm/lib/Analysis/LazyCallGraph.cpp --- a/llvm/lib/Analysis/LazyCallGraph.cpp +++ b/llvm/lib/Analysis/LazyCallGraph.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Function.h" @@ -146,8 +147,11 @@ static bool isKnownLibFunction(Function &F, TargetLibraryInfo &TLI) { LibFunc LF; - // Either this is a normal library function or a "vectorizable" function. - return TLI.getLibFunc(F, LF) || TLI.isFunctionVectorizable(F.getName()); + // Either this is a normal library function or a "vectorizable" + // function. Not using the SearchVFSystem here because this query + // is related only to libraries handled via the TLI. + return TLI.getLibFunc(F, LF) || + TLI.isKnownVectorFunctionInLibrary(F.getName()); } LazyCallGraph::LazyCallGraph( diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1845,7 +1845,7 @@ // If the function has an explicit vectorized counterpart, we can safely // assume that it can be vectorized. if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() && - TLI->isFunctionVectorizable(Call->getCalledFunction()->getName())) + SearchVFSystem(*Call).isFunctionVectorizable()) continue; auto *Ld = dyn_cast(&I); diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -1174,6 +1174,7 @@ for (auto &S : SetVector(ListAttr.begin(), ListAttr.end())) { #ifndef NDEBUG + LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << S << "'\n"); Optional Info = VFABI::tryDemangleForVFABI(S); assert(Info.hasValue() && "Invalid name for a VFABI variant."); assert(CI.getModule()->getFunction(Info.getValue().VectorName) && diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -896,6 +896,7 @@ MPM.addPass(RequireAnalysisPass()); FunctionPassManager OptimizePM(DebugLogging); + OptimizePM.addPass(InjectTLIMappings()); OptimizePM.addPass(Float2IntPass()); OptimizePM.addPass(LowerConstantIntrinsicsPass()); diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -252,9 +252,11 @@ FPM.add(createEntryExitInstrumenterPass()); // Add LibraryInfo if we have some. - if (LibraryInfo) + if (LibraryInfo) { FPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); - + // Inject VFABI calls from the TargetLibraryInfo. + FPM.add(createInjectTLIMappingsLegacyPass()); + } if (OptLevel == 0) return; addInitialAliasAnalysisPasses(FPM); @@ -482,9 +484,10 @@ } // Add LibraryInfo if we have some. - if (LibraryInfo) + if (LibraryInfo) { MPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); - + MPM.add(createInjectTLIMappingsLegacyPass()); + } addInitialAliasAnalysisPasses(MPM); // For ThinLTO there are two passes of indirect call promotion. The @@ -980,9 +983,10 @@ void PassManagerBuilder::populateThinLTOPassManager( legacy::PassManagerBase &PM) { PerformThinLTO = true; - if (LibraryInfo) + if (LibraryInfo) { PM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); - + PM.add(createInjectTLIMappingsLegacyPass()); + } if (VerifyInput) PM.add(createVerifierPass()); @@ -1011,9 +1015,10 @@ } void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) { - if (LibraryInfo) + if (LibraryInfo) { PM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); - + PM.add(createInjectTLIMappingsLegacyPass()); + } if (VerifyInput) PM.add(createVerifierPass()); diff --git a/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/llvm/lib/Transforms/Utils/ModuleUtils.cpp --- a/llvm/lib/Transforms/Utils/ModuleUtils.cpp +++ b/llvm/lib/Transforms/Utils/ModuleUtils.cpp @@ -11,15 +11,17 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" #include "llvm/Support/raw_ostream.h" - using namespace llvm; +#define DEBUG_TYPE "moduleutils" + static void appendToGlobalArray(const char *Array, Module &M, Function *F, int Priority, Constant *Data) { IRBuilder<> IRB(M.getContext()); @@ -298,8 +300,9 @@ Module *M = CI->getModule(); #ifndef NDEBUG for (const std::string &VariantMapping : VariantMappings) { + LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << VariantMapping << "'\n"); Optional VI = VFABI::tryDemangleForVFABI(VariantMapping); - assert(VI.hasValue() && "Canno add an invalid VFABI name."); + assert(VI.hasValue() && "Cannot add an invalid VFABI name."); assert(M->getNamedValue(VI.getValue().VectorName) && "Cannot add variant to attribute: " "vector function declaration is missing."); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -670,7 +670,7 @@ if (CI && !getVectorIntrinsicIDForCall(CI, TLI) && !isa(CI) && !(CI->getCalledFunction() && TLI && - TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) { + SearchVFSystem(*CI).isFunctionVectorizable())) { // If the call is a recognized math libary call, it is likely that // we can vectorize it given loosened floating-point constraints. LibFunc Func; @@ -685,7 +685,8 @@ // but it's hard to provide meaningful yet generic advice. // Also, should this be guarded by allowExtraAnalysis() and/or be part // of the returned info from isFunctionVectorizable()? - reportVectorizationFailure("Found a non-intrinsic callsite", + reportVectorizationFailure( + "Found a non-intrinsic callsite", "library call cannot be vectorized. " "Try compiling with -fno-math-errno, -ffast-math, " "or similar flags", diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -134,6 +134,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/InjectTLIMappings.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/LoopVersioning.h" @@ -1629,6 +1630,7 @@ AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); // We currently do not preserve loopinfo/dominator analyses with outer loop // vectorization. Until this is addressed, mark these analyses as preserved @@ -3221,7 +3223,6 @@ unsigned VF, bool &NeedToScalarize) { Function *F = CI->getCalledFunction(); - StringRef FnName = CI->getCalledFunction()->getName(); Type *ScalarRetTy = CI->getType(); SmallVector Tys, ScalarTys; for (auto &ArgOp : CI->arg_operands()) @@ -3249,7 +3250,8 @@ // If we can't emit a vector call for this function, then the currently found // cost is the cost we need to return. NeedToScalarize = true; - if (!TLI || !TLI->isFunctionVectorizable(FnName, VF) || CI->isNoBuiltin()) + if (!TLI || !SearchVFSystem(*CI).isFunctionVectorizable(VF) || + CI->isNoBuiltin()) return Cost; // If the corresponding vector cost is cheaper, return its cost. @@ -4263,7 +4265,6 @@ Module *M = I.getParent()->getParent()->getParent(); auto *CI = cast(&I); - StringRef FnName = CI->getCalledFunction()->getName(); Function *F = CI->getCalledFunction(); Type *RetTy = ToVectorTy(CI->getType(), VF); SmallVector Tys; @@ -4302,7 +4303,7 @@ VectorF = Intrinsic::getDeclaration(M, ID, TysForDecl); } else { // Use vector version of the library call. - StringRef VFnName = TLI->getVectorizedFunction(FnName, VF); + StringRef VFnName = SearchVFSystem(*CI).getVectorizedFunction(VF); assert(!VFnName.empty() && "Vector function name is empty."); VectorF = M->getFunction(VFnName); if (!VectorF) { @@ -6335,6 +6336,7 @@ INITIALIZE_PASS_DEPENDENCY(DemandedBitsWrapperPass) INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(InjectTLIMappingsLegacy) INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false) namespace llvm { diff --git a/llvm/test/Transforms/LoopVectorize/X86/TLI-to-vfabi-attribute.ll b/llvm/test/Transforms/LoopVectorize/X86/TLI-to-vfabi-attribute.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/X86/TLI-to-vfabi-attribute.ll @@ -0,0 +1,35 @@ +; RUN: opt -vector-library=SVML -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -mattr=avx -S < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @sin_f64(double* nocapture %varray) { +; CHECK-LABEL: @sin_f64( +; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_sin4(<4 x double> [[TMP4:%.*]]) +; CHECK: call double @sin(double %{{.*}}) #[[N:[0-9]+]] +; CHECK: ret void +; CHECK: attributes #[[N]] = { "vector-function-abi-variant"= +; CHECK-SAME: "_ZGV_LLVM_TLI_N2v_sin(__svml_sin2), +; CHECK-SAME: _ZGV_LLVM_TLI_N4v_sin(__svml_sin4), +; CHECK-SAME: _ZGV_LLVM_TLI_N8v_sin(__svml_sin8)" } +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @sin(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +declare double @sin(double) #0 + +attributes #0 = { nounwind readnone } diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -746,6 +746,7 @@ } Passes.add(new TargetLibraryInfoWrapperPass(TLII)); + Passes.add(createInjectTLIMappingsLegacyPass()); // Add internal analysis passes from the target machine. Passes.add(createTargetTransformInfoWrapperPass(TM ? TM->getTargetIRAnalysis() diff --git a/llvm/unittests/Analysis/VectorFunctionABITest.cpp b/llvm/unittests/Analysis/VectorFunctionABITest.cpp --- a/llvm/unittests/Analysis/VectorFunctionABITest.cpp +++ b/llvm/unittests/Analysis/VectorFunctionABITest.cpp @@ -13,7 +13,7 @@ using namespace llvm; -// This test makes sure that the getFromVFABI method succeeds only on +// This test makes sure that the demangling method succeeds only on // valid values of the string. TEST(VectorFunctionABITests, OnlyValidNames) { // Incomplete string. @@ -91,8 +91,8 @@ unsigned &VF = Info.Shape.VF; VFISAKind &ISA = Info.Shape.ISA; SmallVector &Parameters = Info.Shape.Parameters; - StringRef &ScalarName = Info.ScalarName; - StringRef &VectorName = Info.VectorName; + std::string &ScalarName = Info.ScalarName; + std::string &VectorName = Info.VectorName; bool &IsScalable = Info.Shape.IsScalable; // Invoke the parser. bool invokeParser(const StringRef MangledName) { @@ -243,6 +243,12 @@ EXPECT_EQ(ISA, VFISAKind::AVX512); } +TEST_F(VFABIParserTest, LLVM_ISA) { + EXPECT_FALSE(invokeParser("_ZGV_LLVM_N2v_sin")); + EXPECT_TRUE(invokeParser("_ZGV_LLVM_N2v_sin_(vector_name)")); + EXPECT_EQ(ISA, VFISAKind::LLVM); +} + TEST_F(VFABIParserTest, InvalidMask) { EXPECT_FALSE(invokeParser("_ZGVsK2v_sin")); } @@ -486,3 +492,15 @@ EXPECT_TRUE(invokeParser("_ZGV_LLVM_N2v_sin_(vector_name)")); EXPECT_EQ(ISA, VFISAKind::LLVM); } + +TEST_F(VFABIParserTest, IntrinsicsInLLVMIsa) { + EXPECT_TRUE(invokeParser("_ZGV_LLVM_N4vv_llvm.pow.f32(__svml_powf4)")); + EXPECT_EQ(VF, (unsigned)4); + EXPECT_FALSE(IsMasked()); + EXPECT_FALSE(IsScalable); + EXPECT_EQ(ISA, VFISAKind::LLVM); + EXPECT_EQ(Parameters.size(), (unsigned)2); + EXPECT_EQ(Parameters[0], VFParameter({0, VFParamKind::Vector})); + EXPECT_EQ(Parameters[1], VFParameter({1, VFParamKind::Vector})); + EXPECT_EQ(ScalarName, "llvm.pow.f32"); +}