diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4153,65 +4153,6 @@ return Cost->useOrderedReductions(RdxDesc); } -void InnerLoopVectorizer::widenCallInstruction( - CallInst &CI, VPValue *Def, VPUser &ArgOperands, VPTransformState &State, - Intrinsic::ID VectorIntrinsicID) { - assert(!isa(CI) && - "DbgInfoIntrinsic should have been dropped during VPlan construction"); - State.setDebugLocFromInst(&CI); - - SmallVector Tys; - for (Value *ArgOperand : CI.args()) - Tys.push_back(ToVectorTy(ArgOperand->getType(), VF.getKnownMinValue())); - - Intrinsic::ID ID = getVectorIntrinsicIDForCall(&CI, TLI); - - for (unsigned Part = 0; Part < UF; ++Part) { - SmallVector TysForDecl = {CI.getType()}; - SmallVector Args; - for (const auto &I : enumerate(ArgOperands.operands())) { - // Some intrinsics have a scalar argument - don't replace it with a - // vector. - Value *Arg; - if (VectorIntrinsicID == Intrinsic::not_intrinsic || - !isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index())) - Arg = State.get(I.value(), Part); - else - Arg = State.get(I.value(), VPIteration(0, 0)); - if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index())) - TysForDecl.push_back(Arg->getType()); - Args.push_back(Arg); - } - - Function *VectorF; - if (VectorIntrinsicID != Intrinsic::not_intrinsic) { - // Use vector version of the intrinsic. - if (VF.isVector()) - TysForDecl[0] = VectorType::get(CI.getType()->getScalarType(), VF); - Module *M = State.Builder.GetInsertBlock()->getModule(); - VectorF = Intrinsic::getDeclaration(M, ID, TysForDecl); - assert(VectorF && "Can't retrieve vector intrinsic."); - } else { - // Use vector version of the function call. - const VFShape Shape = VFShape::get(CI, VF, false /*HasGlobalPred*/); -#ifndef NDEBUG - assert(VFDatabase(CI).getVectorizedFunction(Shape) != nullptr && - "Can't create vector function."); -#endif - VectorF = VFDatabase(CI).getVectorizedFunction(Shape); - } - SmallVector OpBundles; - CI.getOperandBundlesAsDefs(OpBundles); - CallInst *V = Builder.CreateCall(VectorF, Args, OpBundles); - - if (isa(V)) - V->copyFastMathFlags(&CI); - - State.set(Def, V, Part); - State.addMetadata(V, &CI); - } -} - void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) { // We should not collect Scalars more than once per VF. Right now, this // function is called from collectUniformsAndScalars(), which already does @@ -9160,7 +9101,7 @@ VPlanTransforms::VPInstructionsToVPRecipes( OrigLoop, Plan, [this](PHINode *P) { return Legal->getIntOrFpInductionDescriptor(P); }, - DeadInstructions, *PSE.getSE()); + DeadInstructions, *PSE.getSE(), *TLI); // Remove the existing terminator of the exiting block of the top-most region. // A BranchOnCount will be added instead when adding the canonical IV recipes. @@ -9311,11 +9252,6 @@ } #endif -void VPWidenCallRecipe::execute(VPTransformState &State) { - State.ILV->widenCallInstruction(*cast(getUnderlyingInstr()), this, - *this, State, VectorIntrinsicID); -} - void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { assert(!State.Instance && "Int or FP induction being replicated."); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -434,6 +434,64 @@ FMF = FMFNew; } +void VPWidenCallRecipe::execute(VPTransformState &State) { + auto &CI = *cast(getUnderlyingInstr()); + assert(!isa(CI) && + "DbgInfoIntrinsic should have been dropped during VPlan construction"); + State.setDebugLocFromInst(&CI); + + SmallVector Tys; + for (Value *ArgOperand : CI.args()) + Tys.push_back( + ToVectorTy(ArgOperand->getType(), State.VF.getKnownMinValue())); + + for (unsigned Part = 0; Part < State.UF; ++Part) { + SmallVector TysForDecl = {CI.getType()}; + SmallVector Args; + for (const auto &I : enumerate(operands())) { + // Some intrinsics have a scalar argument - don't replace it with a + // vector. + Value *Arg; + if (VectorIntrinsicID == Intrinsic::not_intrinsic || + !isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index())) + Arg = State.get(I.value(), Part); + else + Arg = State.get(I.value(), VPIteration(0, 0)); + if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index())) + TysForDecl.push_back(Arg->getType()); + Args.push_back(Arg); + } + + Function *VectorF; + if (VectorIntrinsicID != Intrinsic::not_intrinsic) { + // Use vector version of the intrinsic. + if (State.VF.isVector()) + TysForDecl[0] = + VectorType::get(CI.getType()->getScalarType(), State.VF); + Module *M = State.Builder.GetInsertBlock()->getModule(); + VectorF = Intrinsic::getDeclaration(M, VectorIntrinsicID, TysForDecl); + assert(VectorF && "Can't retrieve vector intrinsic."); + } else { + // Use vector version of the function call. + const VFShape Shape = VFShape::get(CI, State.VF, false /*HasGlobalPred*/); +#ifndef NDEBUG + assert(VFDatabase(CI).getVectorizedFunction(Shape) != nullptr && + "Can't create vector function."); +#endif + VectorF = VFDatabase(CI).getVectorizedFunction(Shape); + } + SmallVector OpBundles; + CI.getOperandBundlesAsDefs(OpBundles); + CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles); + + if (isa(V)) + V->copyFastMathFlags(&CI); + + State.set(this, V, Part); + State.addMetadata(V, &CI); + } +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -23,6 +23,7 @@ class PHINode; class ScalarEvolution; class Loop; +class TargetLibraryInfo; struct VPlanTransforms { /// Replaces the VPInstructions in \p Plan with corresponding @@ -32,7 +33,7 @@ function_ref GetIntOrFpInductionDescriptor, SmallPtrSetImpl &DeadInstructions, - ScalarEvolution &SE); + ScalarEvolution &SE, const TargetLibraryInfo &TLI); static bool sinkScalarOperands(VPlan &Plan); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -15,6 +15,8 @@ #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/IVDescriptors.h" +#include "llvm/Analysis/VectorUtils.h" +#include "llvm/IR/Intrinsics.h" using namespace llvm; @@ -22,7 +24,8 @@ Loop *OrigLoop, VPlanPtr &Plan, function_ref GetIntOrFpInductionDescriptor, - SmallPtrSetImpl &DeadInstructions, ScalarEvolution &SE) { + SmallPtrSetImpl &DeadInstructions, ScalarEvolution &SE, + const TargetLibraryInfo &TLI) { ReversePostOrderTraversal> RPOT(Plan->getEntry()); @@ -74,7 +77,8 @@ GEP, Plan->mapToVPValues(GEP->operands()), OrigLoop); } else if (CallInst *CI = dyn_cast(Inst)) { NewRecipe = - new VPWidenCallRecipe(*CI, Plan->mapToVPValues(CI->args()), true); + new VPWidenCallRecipe(*CI, Plan->mapToVPValues(CI->args()), + getVectorIntrinsicIDForCall(CI, &TLI)); } else if (SelectInst *SI = dyn_cast(Inst)) { bool InvariantCond = SE.isLoopInvariant(SE.getSCEV(SI->getOperand(0)), OrigLoop); diff --git a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp --- a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp @@ -9,6 +9,8 @@ #include "../lib/Transforms/Vectorize/VPlan.h" #include "../lib/Transforms/Vectorize/VPlanTransforms.h" #include "VPlanTestBase.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "gtest/gtest.h" #include @@ -133,11 +135,12 @@ )"; EXPECT_EQ(ExpectedStr, FullDump); #endif - + TargetLibraryInfoImpl TLII(Triple(M.getTargetTriple())); + TargetLibraryInfo TLI(TLII); SmallPtrSet DeadInstructions; VPlanTransforms::VPInstructionsToVPRecipes( LI->getLoopFor(LoopHeader), Plan, [](PHINode *P) { return nullptr; }, - DeadInstructions, *SE); + DeadInstructions, *SE, TLI); } TEST_F(VPlanHCFGTest, testVPInstructionToVPRecipesInner) { @@ -165,9 +168,11 @@ auto Plan = buildHCFG(LoopHeader); SmallPtrSet DeadInstructions; + TargetLibraryInfoImpl TLII(Triple(M.getTargetTriple())); + TargetLibraryInfo TLI(TLII); VPlanTransforms::VPInstructionsToVPRecipes( LI->getLoopFor(LoopHeader), Plan, [](PHINode *P) { return nullptr; }, - DeadInstructions, *SE); + DeadInstructions, *SE, TLI); VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock(); EXPECT_NE(nullptr, Entry->getSingleSuccessor());