diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -475,7 +475,7 @@ /// Widen a single call instruction within the innermost loop. void widenCallInstruction(CallInst &CI, VPValue *Def, VPUser &ArgOperands, - VPTransformState &State); + VPTransformState &State, bool UseVectorIntrinsic); /// Fix the vectorized code, taking care of header phi's, live-outs, and more. void fixVectorizedLoop(VPTransformState &State, VPlan &Plan); @@ -4154,7 +4154,8 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &CI, VPValue *Def, VPUser &ArgOperands, - VPTransformState &State) { + VPTransformState &State, + bool UseVectorIntrinsic) { assert(!isa(CI) && "DbgInfoIntrinsic should have been dropped during VPlan construction"); State.setDebugLocFromInst(&CI); @@ -4165,19 +4166,6 @@ Intrinsic::ID ID = getVectorIntrinsicIDForCall(&CI, TLI); - // The flag shows whether we use Intrinsic or a usual Call for vectorized - // version of the instruction. - // Is it beneficial to perform intrinsic call compared to lib call? - bool NeedToScalarize = false; - InstructionCost CallCost = Cost->getVectorCallCost(&CI, VF, NeedToScalarize); - InstructionCost IntrinsicCost = - ID ? Cost->getVectorIntrinsicCost(&CI, VF) : 0; - bool UseVectorIntrinsic = ID && IntrinsicCost <= CallCost; - assert((UseVectorIntrinsic || !NeedToScalarize) && - "Instruction should be scalarized elsewhere."); - assert((IntrinsicCost.isValid() || CallCost.isValid()) && - "Either the intrinsic cost or vector call cost must be valid"); - for (unsigned Part = 0; Part < UF; ++Part) { SmallVector TysForDecl = {CI.getType()}; SmallVector Args; @@ -8322,24 +8310,36 @@ ID == Intrinsic::experimental_noalias_scope_decl)) return nullptr; - auto willWiden = [&](ElementCount VF) -> bool { - Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); + auto WillWiden = [&](ElementCount VF) -> bool { // The following case may be scalarized depending on the VF. // The flag shows whether we use Intrinsic or a usual Call for vectorized // version of the instruction. // Is it beneficial to perform intrinsic call compared to lib call? bool NeedToScalarize = false; - InstructionCost CallCost = CM.getVectorCallCost(CI, VF, NeedToScalarize); - InstructionCost IntrinsicCost = ID ? CM.getVectorIntrinsicCost(CI, VF) : 0; - bool UseVectorIntrinsic = ID && IntrinsicCost <= CallCost; - return UseVectorIntrinsic || !NeedToScalarize; + CM.getVectorCallCost(CI, VF, NeedToScalarize); + return !NeedToScalarize; }; - if (!LoopVectorizationPlanner::getDecisionAndClampRange(willWiden, Range)) + bool UseVectorIntrinsic = LoopVectorizationPlanner::getDecisionAndClampRange( + [&](ElementCount VF) -> bool { + Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); + bool NeedToScalarize = false; + // Is it beneficial to perform intrinsic call compared to lib call? + InstructionCost CallCost = + CM.getVectorCallCost(CI, VF, NeedToScalarize); + InstructionCost IntrinsicCost = + ID ? CM.getVectorIntrinsicCost(CI, VF) : 0; + return ID && IntrinsicCost <= CallCost; + }, + Range); + + if (!LoopVectorizationPlanner::getDecisionAndClampRange(WillWiden, Range) && + !UseVectorIntrinsic) return nullptr; ArrayRef Ops = Operands.take_front(CI->arg_size()); - return new VPWidenCallRecipe(*CI, make_range(Ops.begin(), Ops.end())); + return new VPWidenCallRecipe(*CI, make_range(Ops.begin(), Ops.end()), + UseVectorIntrinsic); } bool VPRecipeBuilder::shouldWiden(Instruction *I, VFRange &Range) const { @@ -9316,7 +9316,7 @@ void VPWidenCallRecipe::execute(VPTransformState &State) { State.ILV->widenCallInstruction(*cast(getUnderlyingInstr()), this, - *this, State); + *this, State, UseVectorIntrinsic); } void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -946,12 +946,17 @@ /// A recipe for widening Call instructions. class VPWidenCallRecipe : public VPRecipeBase, public VPValue { + /// True if a vector intrinsic should be used to widen the call. If false, a + /// library call is used. + bool UseVectorIntrinsic = false; public: template - VPWidenCallRecipe(CallInst &I, iterator_range CallArguments) + VPWidenCallRecipe(CallInst &I, iterator_range CallArguments, + bool UseVectorIntrinsic) : VPRecipeBase(VPRecipeBase::VPWidenCallSC, CallArguments), - VPValue(VPValue::VPVWidenCallSC, &I, this) {} + VPValue(VPValue::VPVWidenCallSC, &I, this), + UseVectorIntrinsic(UseVectorIntrinsic) {} ~VPWidenCallRecipe() override = default; diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -74,7 +74,7 @@ GEP, Plan->mapToVPValues(GEP->operands()), OrigLoop); } else if (CallInst *CI = dyn_cast(Inst)) { NewRecipe = - new VPWidenCallRecipe(*CI, Plan->mapToVPValues(CI->args())); + new VPWidenCallRecipe(*CI, Plan->mapToVPValues(CI->args()), true); } else if (SelectInst *SI = dyn_cast(Inst)) { bool InvariantCond = SE.isLoopInvariant(SE.getSCEV(SI->getOperand(0)), OrigLoop); diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -806,7 +806,7 @@ SmallVector Args; Args.push_back(&Op1); Args.push_back(&Op2); - VPWidenCallRecipe Recipe(*Call, make_range(Args.begin(), Args.end())); + VPWidenCallRecipe Recipe(*Call, make_range(Args.begin(), Args.end()), false); EXPECT_TRUE(isa(&Recipe)); VPRecipeBase *BaseR = &Recipe; EXPECT_TRUE(isa(BaseR)); @@ -1065,7 +1065,8 @@ SmallVector Args; Args.push_back(&Op1); Args.push_back(&Op2); - VPWidenCallRecipe Recipe(*Call, make_range(Args.begin(), Args.end())); + VPWidenCallRecipe Recipe(*Call, make_range(Args.begin(), Args.end()), + false); EXPECT_TRUE(Recipe.mayHaveSideEffects()); EXPECT_TRUE(Recipe.mayReadFromMemory()); EXPECT_TRUE(Recipe.mayWriteToMemory());