Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5408,54 +5408,56 @@ LoopVectorizationCostModel::selectVectorizationFactor(ElementCount MaxVF) { assert(!MaxVF.isScalable() && "scalable vectors not yet supported"); - float Cost = expectedCost(ElementCount::getFixed(1)).first; - const float ScalarCost = Cost; - unsigned Width = 1; - LLVM_DEBUG(dbgs() << "LV: Scalar loop costs: " << (int)ScalarCost << ".\n"); + unsigned ExpectedCost = expectedCost(ElementCount::getFixed(1)).first; + LLVM_DEBUG(dbgs() << "LV: Scalar loop costs: " << ExpectedCost << ".\n"); + + std::pair MinCost = {ExpectedCost, 1}; + const std::pair ScalarCost = MinCost; bool ForceVectorization = Hints->getForce() == LoopVectorizeHints::FK_Enabled; if (ForceVectorization && MaxVF.isVector()) { // Ignore scalar width, because the user explicitly wants vectorization. // Initialize cost to max so that VF = 2 is, at least, chosen during cost // evaluation. - Cost = std::numeric_limits::max(); + MinCost.first = std::numeric_limits::max(); } + auto isLowerVectorCost = [](const std::pair LHS, + const std::pair RHS) { + return (float(LHS.first) / LHS.second) < (float(RHS.first) / RHS.second); + }; + for (unsigned i = 2; i <= MaxVF.getFixedValue(); i *= 2) { // Notice that the vector loop needs to be executed less times, so // we need to divide the cost of the vector loops by the width of // the vector elements. VectorizationCostTy C = expectedCost(ElementCount::getFixed(i)); - float VectorCost = C.first / (float)i; + std::pair VectorCost = {C.first, i}; LLVM_DEBUG(dbgs() << "LV: Vector loop of width " << i - << " costs: " << (int)VectorCost << ".\n"); + << " costs: " << (C.first / i) << ".\n"); if (!C.second && !ForceVectorization) { LLVM_DEBUG( dbgs() << "LV: Not considering vector loop of width " << i << " because it will not generate any vector instructions.\n"); continue; } - if (VectorCost < Cost) { - Cost = VectorCost; - Width = i; - } + if (isLowerVectorCost(VectorCost, MinCost)) + MinCost = VectorCost; } if (!EnableCondStoresVectorization && NumPredStores) { reportVectorizationFailure("There are conditional stores.", "store that is conditionally executed prevents vectorization", "ConditionalStore", ORE, TheLoop); - Width = 1; - Cost = ScalarCost; + MinCost = ScalarCost; } - LLVM_DEBUG(if (ForceVectorization && Width > 1 && Cost >= ScalarCost) dbgs() + LLVM_DEBUG(if (ForceVectorization && MinCost.second > 1 && + isLowerVectorCost(ScalarCost, MinCost)) dbgs() << "LV: Vectorization seems to be not beneficial, " << "but was forced by a user.\n"); - LLVM_DEBUG(dbgs() << "LV: Selecting VF: " << Width << ".\n"); - VectorizationFactor Factor = {ElementCount::getFixed(Width), - (unsigned)(Width * Cost)}; - return Factor; + LLVM_DEBUG(dbgs() << "LV: Selecting VF: " << MinCost.second << ".\n"); + return {ElementCount::getFixed(MinCost.second), MinCost.first}; } std::pair