Index: llvm/include/llvm/Analysis/TargetLibraryInfo.h =================================================================== --- llvm/include/llvm/Analysis/TargetLibraryInfo.h +++ llvm/include/llvm/Analysis/TargetLibraryInfo.h @@ -199,6 +199,13 @@ unsigned getWCharSize(const Module &M) const; }; +// Forward declarations needed for the `friend` declaration of +// `VFABI::addMappingsFromTLI` in `TargetLibraryInfo`. +class TargetLibraryInfo; +namespace VFABI { +void addMappingsFromTLI(const TargetLibraryInfo *TLI, CallInst *CI); +} // End VFABI namespace + /// Provides information about what library functions are available for /// the current target. /// @@ -207,6 +214,10 @@ class TargetLibraryInfo { friend class TargetLibraryAnalysis; friend class TargetLibraryInfoWrapperPass; + // `friend` is needed for the method to be able to call the methods + // `isFunctionVectorizable` and `getVectorizedFunction`. + friend void VFABI::addMappingsFromTLI(const TargetLibraryInfo *TLI, + CallInst *CI); const TargetLibraryInfoImpl *Impl; @@ -248,6 +259,8 @@ bool has(LibFunc F) const { return Impl->getState(F) != TargetLibraryInfoImpl::Unavailable; } + +private: bool isFunctionVectorizable(StringRef F, unsigned VF) const { return Impl->isFunctionVectorizable(F, VF); } @@ -258,6 +271,7 @@ return Impl->getVectorizedFunction(F, VF); } +public: /// Tests if the function is both available and a candidate for optimized code /// generation. bool hasOptimizedCodeGen(LibFunc F) const { @@ -337,6 +351,10 @@ FunctionAnalysisManager::Invalidator &) { return false; } + /// Check if the function "F" is listed in a library known to LLVM. + bool isKnownVectorFunctionInLibrary(StringRef F) const { + return this->isFunctionVectorizable(F); + } }; /// Analysis pass providing the \c TargetLibraryInfo. Index: llvm/include/llvm/Analysis/VectorUtils.h =================================================================== --- llvm/include/llvm/Analysis/VectorUtils.h +++ llvm/include/llvm/Analysis/VectorUtils.h @@ -16,6 +16,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/Support/CheckedArithmetic.h" @@ -48,9 +49,14 @@ AVX, // x86 AVX AVX2, // x86 AVX2 AVX512, // x86 AVX512 + LLVM_TLI, // Internal ABI for the function handled via + // TargetLibraryInfo Unknown // Unknown ISA }; +/// LLVM Internal ISA token. +static constexpr char const *_LLVM_TLI_ = "_LLVM_TLI_"; + /// Encapsulates information needed to describe a parameter. /// /// The description of the parameter is not linked directly to @@ -92,8 +98,8 @@ /// Holds the VFShape for a specific scalar to vector function mapping. struct VFInfo { VFShape Shape; // Classification of the vector function. - StringRef ScalarName; // Scalar Function Name. - StringRef VectorName; // Vector Function Name associated to this VFInfo. + std::string ScalarName; // Scalar Function Name. + std::string VectorName; // Vector Function Name associated to this VFInfo. // Comparison operator. bool operator==(const VFInfo &Other) const { @@ -138,6 +144,87 @@ SmallVector &VariantMappings); } // end namespace VFABI +/// \brief The Search Vector Functions System +/// +/// Helper class used to find the vector functions associated to a +/// scalar CallInst. +class SearchVFSystem { +private: + const CallInst &CI; /// The CallInst for which we are looking for vector + /// functions. + const Module *M; /// The Module of the CallInst CI. + const SmallVector ScalarToVectorMappings; + + /// Retrieve all the VFInfo instances associated to the CallInst CI. + static SmallVector getVFMappings(const CallInst &CI) { + const std::string ScalarName = CI.getCalledFunction()->getName(); + SmallVector Ret; + + // Get mappings from the Vector Function ABI variants. + SmallVector ListOfStrings; + VFABI::getVectorVariantNames(CI, ListOfStrings); + for (auto MangledName : ListOfStrings) { + auto Shape = VFABI::tryDemangleForVFABI(MangledName); + // A match is found via scalar and vector names, and also by + // ensuring that the variant described in the attribute has a + // corresponding definition or declaration of the vector + // function in the Module M. + if (Shape.hasValue()) + if ((Shape.getValue().ScalarName == ScalarName) && + CI.getModule()->getFunction(Shape.getValue().VectorName)) + Ret.push_back(Shape.getValue()); + } + + // Other non-VFABI mappings should be retrieved here. + + return Ret; + } + +public: + /// Constructor, requires a CallInst instance. + SearchVFSystem(CallInst &CI) + : CI(CI), M(CI.getModule()), + ScalarToVectorMappings(SearchVFSystem::getVFMappings(CI)) {} + /// \defgroup TLI legacy interface + /// + /// These functions are here for compatibility with the the equivalent + /// methods provided by the TLI (the TLI ones have been made + /// private). + /// + /// @{ + bool isFunctionVectorizable() const { + return !ScalarToVectorMappings.empty(); + } + std::string getVectorizedFunction(unsigned VF) const { + SmallVector Parameters; + for (unsigned I = 0; I < CI.arg_size(); ++I) { + Parameters.push_back(VFParameter({I, VFParamKind::Vector})); + } + // TODO: at the moment the shape is constructed by forcing the ISA + // to be only the one handled via the TLI. What we really should + // do it to move the VFISAKind field of VFShape into VFInfo, and + // accept any of the available ISA that present the shape expected + // by the query from the vectorizer. For example, AVX and AVX2 + // have both 256-bit registers. The IR signature generated for + // those two ISA will be the same. The `getVectorizedFunction` + // method should just return the first available match. We will + // then need to add more heuristics to the search system to decide + // which of the available version (TLI, AVX, AVX2) the system + // should choose, when some with the same VFShape are present. + const VFShape TLIShape = {VF, false /*isScalable*/, VFISAKind::LLVM_TLI, + Parameters}; + for (const auto &Info : ScalarToVectorMappings) + if (Info.Shape == TLIShape) + return Info.VectorName; + + return ""; + } + bool isFunctionVectorizable(unsigned VF) const { + return !getVectorizedFunction(VF).empty(); + } + /// @} +}; + template class ArrayRef; class DemandedBits; class GetElementPtrInst; Index: llvm/include/llvm/Transforms/Utils/ModuleUtils.h =================================================================== --- llvm/include/llvm/Transforms/Utils/ModuleUtils.h +++ llvm/include/llvm/Transforms/Utils/ModuleUtils.h @@ -118,6 +118,11 @@ /// Utility functions for VFABI data that can modify the module. /// /// @{ +/// Add to the Vector Function ABI mapping attribute of CI all the +/// functions that are know to \p TLI of being some vectorized +/// version of \p CI. +void addMappingsFromTLI(const TargetLibraryInfo *TLI, CallInst *CI); + /// Overwrite the Vector Function ABI variants attribute with the names provide /// in \p VariantMappings. void setVectorVariantNames(CallInst *CI, Index: llvm/lib/Analysis/LazyCallGraph.cpp =================================================================== --- llvm/lib/Analysis/LazyCallGraph.cpp +++ llvm/lib/Analysis/LazyCallGraph.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Function.h" @@ -146,8 +147,11 @@ static bool isKnownLibFunction(Function &F, TargetLibraryInfo &TLI) { LibFunc LF; - // Either this is a normal library function or a "vectorizable" function. - return TLI.getLibFunc(F, LF) || TLI.isFunctionVectorizable(F.getName()); + // Either this is a normal library function or a "vectorizable" + // function. Not using the SearchVFSystem here because this query + // is related only to libraries handled via the TLI. + return TLI.getLibFunc(F, LF) || + TLI.isKnownVectorFunctionInLibrary(F.getName()); } LazyCallGraph::LazyCallGraph( Index: llvm/lib/Analysis/LoopAccessAnalysis.cpp =================================================================== --- llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1844,7 +1844,7 @@ // If the function has an explicit vectorized counterpart, we can safely // assume that it can be vectorized. if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() && - TLI->isFunctionVectorizable(Call->getCalledFunction()->getName())) + SearchVFSystem(*Call).isFunctionVectorizable()) continue; auto *Ld = dyn_cast(&I); Index: llvm/lib/Analysis/VFABIDemangling.cpp =================================================================== --- llvm/lib/Analysis/VFABIDemangling.cpp +++ llvm/lib/Analysis/VFABIDemangling.cpp @@ -28,15 +28,20 @@ if (MangledName.empty()) return ParseRet::Error; - ISA = StringSwitch(MangledName.take_front(1)) - .Case("n", VFISAKind::AdvancedSIMD) - .Case("s", VFISAKind::SVE) - .Case("b", VFISAKind::SSE) - .Case("c", VFISAKind::AVX) - .Case("d", VFISAKind::AVX2) - .Case("e", VFISAKind::AVX512) - .Default(VFISAKind::Unknown); - MangledName = MangledName.drop_front(1); + if (MangledName.startswith(_LLVM_TLI_)) { + MangledName = MangledName.drop_front(strlen(_LLVM_TLI_)); + ISA = VFISAKind::LLVM_TLI; + } else { + ISA = StringSwitch(MangledName.take_front(1)) + .Case("n", VFISAKind::AdvancedSIMD) + .Case("s", VFISAKind::SVE) + .Case("b", VFISAKind::SSE) + .Case("c", VFISAKind::AVX) + .Case("d", VFISAKind::AVX2) + .Case("e", VFISAKind::AVX512) + .Default(VFISAKind::Unknown); + MangledName = MangledName.drop_front(1); + } return ParseRet::OK; } @@ -287,6 +292,7 @@ // Format of the ABI name: // _ZGV_[()] Optional VFABI::tryDemangleForVFABI(StringRef MangledName) { + const StringRef OriginalName = MangledName; // Assume there is no custom name , and therefore the // vector name consists of // _ZGV_. @@ -370,6 +376,11 @@ return None; } + // LLVM internal mapping via the TargetLibraryInfo (TLI) must be + // redirected to an existing name. + if (ISA == VFISAKind::LLVM_TLI && VectorName == OriginalName) + return None; + // When is "M", we need to add a parameter that is used as // global predicate for the function. if (IsMasked) { Index: llvm/lib/Analysis/VectorUtils.cpp =================================================================== --- llvm/lib/Analysis/VectorUtils.cpp +++ llvm/lib/Analysis/VectorUtils.cpp @@ -1165,6 +1165,10 @@ const StringRef S = CI.getAttribute(AttributeList::FunctionIndex, VFABI::MappingsAttrName) .getValueAsString(); + // Exit early if the attribute is empty. + if (S.empty()) + return; + SmallVector ListAttr; S.split(ListAttr, ","); @@ -1173,6 +1177,7 @@ #endif for (auto &S : SetVector(ListAttr.begin(), ListAttr.end())) { #ifndef NDEBUG + LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << S << "'\n"); Optional Info = VFABI::tryDemangleForVFABI(S); assert(Info.hasValue() && "Invalid name for a VFABI variant."); assert(M->getFunction(Info.getValue().VectorName) && Index: llvm/lib/Transforms/Utils/ModuleUtils.cpp =================================================================== --- llvm/lib/Transforms/Utils/ModuleUtils.cpp +++ llvm/lib/Transforms/Utils/ModuleUtils.cpp @@ -11,15 +11,17 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" #include "llvm/Support/raw_ostream.h" - using namespace llvm; +#define DEBUG_TYPE "moduleutils" + static void appendToGlobalArray(const char *Array, Module &M, Function *F, int Priority, Constant *Data) { IRBuilder<> IRB(M.getContext()); @@ -366,17 +368,15 @@ // vectorizable. if (!TLI->isFunctionVectorizable(ScalarName)) return; - - SmallSet SetOfMangledNames; - VFABI::getVectorVariantNames(CI, SetOfMangledNames); + SmallVector SetOfMangledNames; + VFABI::getVectorVariantNames(*CI, SetOfMangledNames); Module *M = CI->getParent()->getParent()->getParent(); for (unsigned VF = 2; VF <= 16; VF *= 2) { const std::string TLIName = TLI->getVectorizedFunction(ScalarName, VF); if (TLIName != "") { std::string MangledName = mangleTLIName(TLIName, CI, VF); - // List.push_back(MangledName); - SetOfMangledNames.insert(MangledName); + SetOfMangledNames.push_back(MangledName); Function *VariantF = M->getFunction(TLIName); if (!VariantF) addVariantDeclaration(CI, VF, TLIName); @@ -404,8 +404,9 @@ Module *M = CI->getModule(); #ifndef NDEBUG for (const std::string &VariantMapping : VariantMappings) { + LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << VariantMapping << "'\n"); Optional VI = VFABI::tryDemangleForVFABI(VariantMapping); - assert(VI.hasValue() && "Canno add an invalid VFABI name."); + assert(VI.hasValue() && "Cannot add an invalid VFABI name."); assert(M->getNamedValue(VI.getValue().VectorName) && "Cannot add variant to attribute: " "vector function declaration is missing."); Index: llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -670,7 +670,7 @@ if (CI && !getVectorIntrinsicIDForCall(CI, TLI) && !isa(CI) && !(CI->getCalledFunction() && TLI && - TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) { + SearchVFSystem(*CI).isFunctionVectorizable())) { // If the call is a recognized math libary call, it is likely that // we can vectorize it given loosened floating-point constraints. LibFunc Func; @@ -685,7 +685,8 @@ // but it's hard to provide meaningful yet generic advice. // Also, should this be guarded by allowExtraAnalysis() and/or be part // of the returned info from isFunctionVectorizable()? - reportVectorizationFailure("Found a non-intrinsic callsite", + reportVectorizationFailure( + "Found a non-intrinsic callsite", "library call cannot be vectorized. " "Try compiling with -fno-math-errno, -ffast-math, " "or similar flags", Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1611,6 +1611,15 @@ auto *ORE = &getAnalysis().getORE(); auto *PSI = &getAnalysis().getPSI(); + // Update IR attribute on Vector Function variants if the TLI is + // present. This should probably be invoked as an early stage + // pass. + if (TLI) + for (auto &B : F) + for (auto &I : B) + if (auto *CI = dyn_cast(&I)) + VFABI::addMappingsFromTLI(TLI, CI); + std::function GetLAA = [&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); }; @@ -3221,7 +3230,6 @@ unsigned VF, bool &NeedToScalarize) { Function *F = CI->getCalledFunction(); - StringRef FnName = CI->getCalledFunction()->getName(); Type *ScalarRetTy = CI->getType(); SmallVector Tys, ScalarTys; for (auto &ArgOp : CI->arg_operands()) @@ -3249,7 +3257,8 @@ // If we can't emit a vector call for this function, then the currently found // cost is the cost we need to return. NeedToScalarize = true; - if (!TLI || !TLI->isFunctionVectorizable(FnName, VF) || CI->isNoBuiltin()) + if (!TLI || !SearchVFSystem(*CI).isFunctionVectorizable(VF) || + CI->isNoBuiltin()) return Cost; // If the corresponding vector cost is cheaper, return its cost. @@ -4263,7 +4272,6 @@ Module *M = I.getParent()->getParent()->getParent(); auto *CI = cast(&I); - StringRef FnName = CI->getCalledFunction()->getName(); Function *F = CI->getCalledFunction(); Type *RetTy = ToVectorTy(CI->getType(), VF); SmallVector Tys; @@ -4302,7 +4310,7 @@ VectorF = Intrinsic::getDeclaration(M, ID, TysForDecl); } else { // Use vector version of the library call. - StringRef VFnName = TLI->getVectorizedFunction(FnName, VF); + StringRef VFnName = SearchVFSystem(*CI).getVectorizedFunction(VF); assert(!VFnName.empty() && "Vector function name is empty."); VectorF = M->getFunction(VFnName); if (!VectorF) { Index: llvm/test/Transforms/LoopVectorize/X86/TLI-to-vfabi-attribute.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/X86/TLI-to-vfabi-attribute.ll @@ -0,0 +1,35 @@ +; RUN: opt -vector-library=SVML -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -mattr=avx -S < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @sin_f64(double* nocapture %varray) { +; CHECK-LABEL: @sin_f64( +; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_sin4(<4 x double> [[TMP4:%.*]]) +; CHECK: call double @sin(double %{{.*}}) #[[N:[0-9]+]] +; CHECK: ret void +; CHECK: attributes #[[N]] = { "vector-function-abi-variant"= +; CHECK-SAME: "_ZGV_LLVM_TLI_N2v_sin(__svml_sin2), +; CHECK-SAME: _ZGV_LLVM_TLI_N4v_sin(__svml_sin4), +; CHECK-SAME: _ZGV_LLVM_TLI_N8v_sin(__svml_sin8)" } +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @sin(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +declare double @sin(double) #0 + +attributes #0 = { nounwind readnone } Index: llvm/unittests/Analysis/VectorFunctionABITest.cpp =================================================================== --- llvm/unittests/Analysis/VectorFunctionABITest.cpp +++ llvm/unittests/Analysis/VectorFunctionABITest.cpp @@ -13,7 +13,7 @@ using namespace llvm; -// This test makes sure that the getFromVFABI method succeeds only on +// This test makes sure that the demangling method succeeds only on // valid values of the string. TEST(VectorFunctionABITests, OnlyValidNames) { // Incomplete string. @@ -91,8 +91,8 @@ unsigned &VF = Info.Shape.VF; VFISAKind &ISA = Info.Shape.ISA; SmallVector &Parameters = Info.Shape.Parameters; - StringRef &ScalarName = Info.ScalarName; - StringRef &VectorName = Info.VectorName; + std::string &ScalarName = Info.ScalarName; + std::string &VectorName = Info.VectorName; bool &IsScalable = Info.Shape.IsScalable; // Invoke the parser. bool invokeParser(const StringRef MangledName) { @@ -243,6 +243,12 @@ EXPECT_EQ(ISA, VFISAKind::AVX512); } +TEST_F(VFABIParserTest, LLVM_TLI) { + EXPECT_FALSE(invokeParser("_ZGV_LLVM_TLI_N2v_sin")); + EXPECT_TRUE(invokeParser("_ZGV_LLVM_TLI_N2v_sin_(vector_name)")); + EXPECT_EQ(ISA, VFISAKind::LLVM_TLI); +} + TEST_F(VFABIParserTest, InvalidMask) { EXPECT_FALSE(invokeParser("_ZGVsK2v_sin")); } @@ -346,6 +352,13 @@ __COMMON_CHECKS; EXPECT_EQ(VectorName, "_ZGVeN2vls2Ls27Us4Rs5l1L10U100R1000u2_sin"); + // LLVM_TLI: = "_LLVM_TLI_" + EXPECT_TRUE(invokeParser( + "_ZGV_LLVM_TLI_N2vls2Ls27Us4Rs5l1L10U100R1000u2_sin(vectorf)")); + EXPECT_EQ(ISA, VFISAKind::LLVM_TLI); + __COMMON_CHECKS; + EXPECT_EQ(VectorName, "vectorf"); + // Unknown ISA (randomly using "q"). This test will need update if // some targets decide to use "q" as their ISA token. EXPECT_TRUE(invokeParser("_ZGVqN2vls2Ls27Us4Rs5l1L10U100R1000u2_sin")); @@ -473,3 +486,15 @@ Exp.push_back("_ZGVnN4v_g"); EXPECT_EQ(Mappings, Exp); } + +TEST_F(VFABIParserTest, Intrinsics) { + EXPECT_TRUE(invokeParser("_ZGV_LLVM_TLI_N4vv_llvm.pow.f32(__svml_powf4)")); + EXPECT_EQ(VF, (unsigned)4); + EXPECT_FALSE(IsMasked()); + EXPECT_FALSE(IsScalable); + EXPECT_EQ(ISA, VFISAKind::LLVM_TLI); + EXPECT_EQ(Parameters.size(), (unsigned)2); + EXPECT_EQ(Parameters[0], VFParameter({0, VFParamKind::Vector})); + EXPECT_EQ(Parameters[1], VFParameter({1, VFParamKind::Vector})); + EXPECT_EQ(ScalarName, "llvm.pow.f32"); +}