diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -272,8 +272,8 @@ /// \p Predicate on Range.Start, possibly decreasing Range.End such that the /// returned value holds for the entire \p Range. static bool - getDecisionAndClampRange(const std::function &Predicate, - VFRange &Range); + checkDecisionAcrossRange(const std::function &Predicate, + const VFRange &Range); protected: /// Collect the instructions from the original loop that would be trivially diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8225,19 +8225,17 @@ }); } -bool LoopVectorizationPlanner::getDecisionAndClampRange( - const std::function &Predicate, VFRange &Range) { +bool LoopVectorizationPlanner::checkDecisionAcrossRange( + const std::function &Predicate, const VFRange &Range) { assert(!Range.isEmpty() && "Trying to test an empty VF range."); bool PredicateAtRangeStart = Predicate(Range.Start); for (ElementCount TmpVF = Range.Start * 2; ElementCount::isKnownLT(TmpVF, Range.End); TmpVF *= 2) - if (Predicate(TmpVF) != PredicateAtRangeStart) { - Range.End = TmpVF; - break; - } + if (Predicate(TmpVF) != PredicateAtRangeStart) + return false; - return PredicateAtRangeStart; + return true; } /// Build VPlans for the full range of feasible VF's = {\p MinVF, 2 * \p MinVF, @@ -8363,7 +8361,8 @@ return BlockMaskCache[BB] = BlockMask; } -VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range, +VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I, + const VFRange &Range, VPlanPtr &Plan) { assert((isa(I) || isa(I)) && "Must be called with either a load or store"); @@ -8372,7 +8371,8 @@ return CM.shouldWidenMemory(I, VF); }; - if (!LoopVectorizationPlanner::getDecisionAndClampRange(willWiden, Range)) + assert(LoopVectorizationPlanner::checkDecisionAcrossRange(willWiden, Range)); + if (!willWiden(Range.Start)) return nullptr; VPValue *Mask = nullptr; @@ -8404,9 +8404,8 @@ return nullptr; } -VPWidenIntOrFpInductionRecipe * -VPRecipeBuilder::tryToOptimizeInductionTruncate(TruncInst *I, VFRange &Range, - VPlan &Plan) const { +VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate( + TruncInst *I, const VFRange &Range, VPlan &Plan) const { // Optimize the special case where the source is a constant integer // induction variable. Notice that we can only optimize the 'trunc' case // because (a) FP conversions lose precision, (b) sext/zext may wrap, and @@ -8414,23 +8413,20 @@ // Determine whether \p K is a truncation based on an induction variable that // can be optimized. - auto isOptimizableIVTruncate = - [&](Instruction *K) -> std::function { - return [=](ElementCount VF) -> bool { - return CM.isOptimizableIVTruncate(K, VF); - }; + auto isOptimizableIVTruncate = [this, I](ElementCount VF) { + return CM.isOptimizableIVTruncate(I, VF); }; - if (LoopVectorizationPlanner::getDecisionAndClampRange( - isOptimizableIVTruncate(I), Range)) { + assert(LoopVectorizationPlanner::checkDecisionAcrossRange( + isOptimizableIVTruncate, Range)); + if (!isOptimizableIVTruncate(Range.Start)) + return nullptr; - InductionDescriptor II = - Legal->getInductionVars().lookup(cast(I->getOperand(0))); - VPValue *Start = Plan.getOrAddVPValue(II.getStartValue()); - return new VPWidenIntOrFpInductionRecipe(cast(I->getOperand(0)), - Start, nullptr, I); - } - return nullptr; + InductionDescriptor II = + Legal->getInductionVars().lookup(cast(I->getOperand(0))); + VPValue *Start = Plan.getOrAddVPValue(II.getStartValue()); + return new VPWidenIntOrFpInductionRecipe(cast(I->getOperand(0)), + Start, nullptr, I); } VPRecipeOrVPValueTy VPRecipeBuilder::tryToBlend(PHINode *Phi, VPlanPtr &Plan) { @@ -8463,29 +8459,31 @@ return toVPRecipeResult(new VPBlendRecipe(Phi, Operands)); } -VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI, VFRange &Range, +VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI, + const VFRange &Range, VPlan &Plan) const { - bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange( - [this, CI](ElementCount VF) { - return CM.isScalarWithPredication(CI, VF); - }, - Range); + auto willPredicate = [this, CI](ElementCount VF) { + return CM.isScalarWithPredication(CI, VF); + }; - if (IsPredicated) + assert( + LoopVectorizationPlanner::checkDecisionAcrossRange(willPredicate, Range)); + if (willPredicate(Range.Start)) return nullptr; auto willWiden = [&](ElementCount VF) -> bool { return CM.shouldWidenCall(CI, VF); }; - if (!LoopVectorizationPlanner::getDecisionAndClampRange(willWiden, Range)) + assert(LoopVectorizationPlanner::checkDecisionAcrossRange(willWiden, Range)); + if (!willWiden(Range.Start)) return nullptr; return new VPWidenCallRecipe(*CI, Plan.mapToVPValues(CI->arg_operands())); } -bool VPRecipeBuilder::shouldWiden(Instruction *I, VFRange &Range) const { +bool VPRecipeBuilder::shouldWiden(Instruction *I, const VFRange &Range) const { assert(!isa(I) && !isa(I) && !isa(I) && !isa(I) && "Instruction should have been handled earlier"); // Instruction should be widened, unless it is scalar after vectorization, @@ -8493,8 +8491,9 @@ auto ShouldScalarize = [this, I](ElementCount VF) -> bool { return CM.shouldScalarize(I, VF); }; - return !LoopVectorizationPlanner::getDecisionAndClampRange(ShouldScalarize, - Range); + assert(LoopVectorizationPlanner::checkDecisionAcrossRange(ShouldScalarize, + Range)); + return !ShouldScalarize(Range.Start); } VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I, VPlan &Plan) const { @@ -8546,16 +8545,23 @@ return new VPWidenRecipe(*I, Plan.mapToVPValues(I->operands())); } -VPBasicBlock *VPRecipeBuilder::handleReplication( - Instruction *I, VFRange &Range, VPBasicBlock *VPBB, - VPlanPtr &Plan) { - bool IsUniform = LoopVectorizationPlanner::getDecisionAndClampRange( - [&](ElementCount VF) { return CM.isUniformAfterVectorization(I, VF); }, - Range); +VPBasicBlock *VPRecipeBuilder::handleReplication(Instruction *I, + const VFRange &Range, + VPBasicBlock *VPBB, + VPlanPtr &Plan) { + auto IsUniformCheck = [&](ElementCount VF) { + return CM.isUniformAfterVectorization(I, VF); + }; + assert(LoopVectorizationPlanner::checkDecisionAcrossRange(IsUniformCheck, + Range)); + bool IsUniform = IsUniformCheck(Range.Start); - bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange( - [&](ElementCount VF) { return CM.isScalarWithPredication(I, VF); }, - Range); + auto IsPredicatedFn = [&](ElementCount VF) { + return CM.isScalarWithPredication(I, VF); + }; + assert(LoopVectorizationPlanner::checkDecisionAcrossRange(IsPredicatedFn, + Range)); + bool IsPredicated = IsPredicatedFn(Range.Start); auto *Recipe = new VPReplicateRecipe(I, Plan->mapToVPValues(I->operands()), IsUniform, IsPredicated); @@ -8626,9 +8632,9 @@ return Region; } -VPRecipeOrVPValueTy VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, - VFRange &Range, - VPlanPtr &Plan) { +VPRecipeOrVPValueTy +VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, + const VFRange &Range, VPlanPtr &Plan) { // First, check for specific widening recipes that deal with calls, memory // operations, inductions and Phi nodes. if (auto *CI = dyn_cast(Instr)) @@ -8798,20 +8804,9 @@ SinkAfter.erase(I); auto Ranges = computeRanges(MinVF, MaxVF, DeadInstructions); - auto MaxVFPlusOne = MaxVF.getWithIncrement(1); - unsigned I = 0; - for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFPlusOne);) { - VFRange SubRange = {VF, MaxVFPlusOne}; + for (auto &SubRange : Ranges) VPlans.push_back( buildVPlanWithVPRecipes(SubRange, DeadInstructions, SinkAfter)); - VF = SubRange.End; - assert(Ranges[I].Start == SubRange.Start && Ranges[I].End == SubRange.End && - "clamped range should match computed range"); - I++; - } - - assert(Ranges.size() == VPlans.size() && - "should have the same number of clamped and computed ranges"); } VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( @@ -8852,19 +8847,22 @@ // Range, add it to the set of groups to be later applied to the VPlan and add // placeholders for its members' Recipes which we'll be replacing with a // single VPInterleaveRecipe. - for (InterleaveGroup *IG : IAI.getInterleaveGroups()) { - auto applyIG = [IG, this](ElementCount VF) -> bool { - return (VF.isVector() && // Query is illegal for VF == 1 - CM.getWideningDecision(IG->getInsertPos(), VF) == - LoopVectorizationCostModel::CM_Interleave); - }; - if (!getDecisionAndClampRange(applyIG, Range)) - continue; - InterleaveGroups.insert(IG); - for (unsigned i = 0; i < IG->getFactor(); i++) - if (Instruction *Member = IG->getMember(i)) - RecipeBuilder.recordRecipeOf(Member); - }; + if (Range.Start.isVector()) { + for (InterleaveGroup *IG : IAI.getInterleaveGroups()) { + auto applyIG = [IG, this](ElementCount VF) -> bool { + return (VF.isVector() && // Query is illegal for VF == 1 + CM.getWideningDecision(IG->getInsertPos(), VF) == + LoopVectorizationCostModel::CM_Interleave); + }; + assert(checkDecisionAcrossRange(applyIG, Range)); + if (!applyIG(Range.Start)) + continue; + InterleaveGroups.insert(IG); + for (unsigned i = 0; i < IG->getFactor(); i++) + if (Instruction *Member = IG->getMember(i)) + RecipeBuilder.recordRecipeOf(Member); + } + } // --------------------------------------------------------------------------- // Build initial VPlan: Scan the body of the loop in a topological order to diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -56,12 +56,12 @@ /// Check if \p I can be widened at the start of \p Range and possibly /// decrease the range such that the returned value holds for the entire \p /// Range. The function should not be called for memory instructions or calls. - bool shouldWiden(Instruction *I, VFRange &Range) const; + bool shouldWiden(Instruction *I, const VFRange &Range) const; /// Check if the load or store instruction \p I should widened for \p /// Range.Start and potentially masked. Such instructions are handled by a /// recipe that takes an additional VPInstruction for the mask. - VPRecipeBase *tryToWidenMemory(Instruction *I, VFRange &Range, + VPRecipeBase *tryToWidenMemory(Instruction *I, const VFRange &Range, VPlanPtr &Plan); /// Check if an induction recipe should be constructed for \I. If so build and @@ -72,7 +72,7 @@ /// Optimize the special case where the operand of \p I is a constant integer /// induction variable. VPWidenIntOrFpInductionRecipe * - tryToOptimizeInductionTruncate(TruncInst *I, VFRange &Range, + tryToOptimizeInductionTruncate(TruncInst *I, const VFRange &Range, VPlan &Plan) const; /// Handle non-loop phi nodes. Return a VPValue, if all incoming values match @@ -84,7 +84,7 @@ /// Handle call instructions. If \p CI can be widened for \p Range.Start, /// return a new VPWidenCallRecipe. Range.End may be decreased to ensure same /// decision from \p Range.Start to \p Range.End. - VPWidenCallRecipe *tryToWidenCall(CallInst *CI, VFRange &Range, + VPWidenCallRecipe *tryToWidenCall(CallInst *CI, const VFRange &Range, VPlan &Plan) const; /// Check if \p I has an opcode that can be widened and return a VPWidenRecipe @@ -105,7 +105,8 @@ /// create for \p I withing the given VF \p Range. If an existing VPValue can /// be used or if a recipe can be created, return it. Otherwise return a /// VPRecipeOrVPValueTy with nullptr. - VPRecipeOrVPValueTy tryToCreateWidenRecipe(Instruction *Instr, VFRange &Range, + VPRecipeOrVPValueTy tryToCreateWidenRecipe(Instruction *Instr, + const VFRange &Range, VPlanPtr &Plan); /// Set the recipe created for given ingredient. This operation is a no-op for @@ -155,9 +156,8 @@ /// Region. Update the packing decision of predicated instructions if they /// feed \p I. Range.End may be decreased to ensure same recipe behavior from /// \p Range.Start to \p Range.End. - VPBasicBlock *handleReplication( - Instruction *I, VFRange &Range, VPBasicBlock *VPBB, - VPlanPtr &Plan); + VPBasicBlock *handleReplication(Instruction *I, const VFRange &Range, + VPBasicBlock *VPBB, VPlanPtr &Plan); }; } // end namespace llvm