diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4155,62 +4155,7 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &CI, VPValue *Def, VPUser &ArgOperands, VPTransformState &State, - bool UseVectorIntrinsic) { - assert(!isa(CI) && - "DbgInfoIntrinsic should have been dropped during VPlan construction"); - State.setDebugLocFromInst(&CI); - - SmallVector Tys; - for (Value *ArgOperand : CI.args()) - Tys.push_back(ToVectorTy(ArgOperand->getType(), VF.getKnownMinValue())); - - Intrinsic::ID ID = getVectorIntrinsicIDForCall(&CI, TLI); - - for (unsigned Part = 0; Part < UF; ++Part) { - SmallVector TysForDecl = {CI.getType()}; - SmallVector Args; - for (const auto &I : enumerate(ArgOperands.operands())) { - // Some intrinsics have a scalar argument - don't replace it with a - // vector. - Value *Arg; - if (!UseVectorIntrinsic || - !isVectorIntrinsicWithScalarOpAtArg(ID, I.index())) - Arg = State.get(I.value(), Part); - else - Arg = State.get(I.value(), VPIteration(0, 0)); - if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I.index())) - TysForDecl.push_back(Arg->getType()); - Args.push_back(Arg); - } - - Function *VectorF; - if (UseVectorIntrinsic) { - // Use vector version of the intrinsic. - if (VF.isVector()) - TysForDecl[0] = VectorType::get(CI.getType()->getScalarType(), VF); - Module *M = State.Builder.GetInsertBlock()->getModule(); - VectorF = Intrinsic::getDeclaration(M, ID, TysForDecl); - assert(VectorF && "Can't retrieve vector intrinsic."); - } else { - // Use vector version of the function call. - const VFShape Shape = VFShape::get(CI, VF, false /*HasGlobalPred*/); -#ifndef NDEBUG - assert(VFDatabase(CI).getVectorizedFunction(Shape) != nullptr && - "Can't create vector function."); -#endif - VectorF = VFDatabase(CI).getVectorizedFunction(Shape); - } - SmallVector OpBundles; - CI.getOperandBundlesAsDefs(OpBundles); - CallInst *V = Builder.CreateCall(VectorF, Args, OpBundles); - - if (isa(V)) - V->copyFastMathFlags(&CI); - - State.set(Def, V, Part); - State.addMetadata(V, &CI); - } -} + bool UseVectorIntrinsic) {} void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) { // We should not collect Scalars more than once per VF. Right now, this @@ -7615,7 +7560,8 @@ // 1. Set up the skeleton for vectorization, including vector pre-header and // middle block. The vector loop is created during VPlan execution. - VPTransformState State{BestVF, BestUF, LI, DT, ILV.Builder, &ILV, &BestVPlan}; + VPTransformState State{BestVF, BestUF, LI, *TLI, + DT, ILV.Builder, &ILV, &BestVPlan}; Value *CanonicalIVStartValue; std::tie(State.CFG.PrevBB, CanonicalIVStartValue) = ILV.createVectorizedLoopSkeleton(); @@ -9314,11 +9260,6 @@ } #endif -void VPWidenCallRecipe::execute(VPTransformState &State) { - State.ILV->widenCallInstruction(*cast(getUnderlyingInstr()), this, - *this, State, UseVectorIntrinsic); -} - void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { assert(!State.Instance && "Int or FP induction being replicated."); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -200,10 +200,11 @@ /// needed for generating the output IR. struct VPTransformState { VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI, - DominatorTree *DT, IRBuilderBase &Builder, - InnerLoopVectorizer *ILV, VPlan *Plan) - : VF(VF), UF(UF), LI(LI), DT(DT), Builder(Builder), ILV(ILV), Plan(Plan), - LVer(nullptr) {} + const TargetLibraryInfo &TLI, DominatorTree *DT, + IRBuilderBase &Builder, InnerLoopVectorizer *ILV, + VPlan *Plan) + : VF(VF), UF(UF), LI(LI), TLI(TLI), DT(DT), Builder(Builder), ILV(ILV), + Plan(Plan), LVer(nullptr) {} /// The chosen Vectorization and Unroll Factors of the loop being vectorized. ElementCount VF; @@ -348,6 +349,9 @@ /// Hold a pointer to LoopInfo to register new basic blocks in the loop. LoopInfo *LI; + /// Hold a reference to TargetLibrarInfo. + const TargetLibraryInfo &TLI; + /// Hold a pointer to Dominator Tree to register new basic blocks in the loop. DominatorTree *DT; diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -434,6 +434,66 @@ FMF = FMFNew; } +void VPWidenCallRecipe::execute(VPTransformState &State) { + auto &CI = *cast(getUnderlyingInstr()); + assert(!isa(CI) && + "DbgInfoIntrinsic should have been dropped during VPlan construction"); + State.setDebugLocFromInst(&CI); + + SmallVector Tys; + for (Value *ArgOperand : CI.args()) + Tys.push_back( + ToVectorTy(ArgOperand->getType(), State.VF.getKnownMinValue())); + + Intrinsic::ID ID = getVectorIntrinsicIDForCall(&CI, &State.TLI); + + for (unsigned Part = 0; Part < State.UF; ++Part) { + SmallVector TysForDecl = {CI.getType()}; + SmallVector Args; + for (const auto &I : enumerate(operands())) { + // Some intrinsics have a scalar argument - don't replace it with a + // vector. + Value *Arg; + if (!UseVectorIntrinsic || + !isVectorIntrinsicWithScalarOpAtArg(ID, I.index())) + Arg = State.get(I.value(), Part); + else + Arg = State.get(I.value(), VPIteration(0, 0)); + if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I.index())) + TysForDecl.push_back(Arg->getType()); + Args.push_back(Arg); + } + + Function *VectorF; + if (UseVectorIntrinsic) { + // Use vector version of the intrinsic. + if (State.VF.isVector()) + TysForDecl[0] = + VectorType::get(CI.getType()->getScalarType(), State.VF); + Module *M = State.Builder.GetInsertBlock()->getModule(); + VectorF = Intrinsic::getDeclaration(M, ID, TysForDecl); + assert(VectorF && "Can't retrieve vector intrinsic."); + } else { + // Use vector version of the function call. + const VFShape Shape = VFShape::get(CI, State.VF, false /*HasGlobalPred*/); +#ifndef NDEBUG + assert(VFDatabase(CI).getVectorizedFunction(Shape) != nullptr && + "Can't create vector function."); +#endif + VectorF = VFDatabase(CI).getVectorizedFunction(Shape); + } + SmallVector OpBundles; + CI.getOperandBundlesAsDefs(OpBundles); + CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles); + + if (isa(V)) + V->copyFastMathFlags(&CI); + + State.set(this, V, Part); + State.addMetadata(V, &CI); + } +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const {