diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5664,14 +5664,6 @@ auto BestKnownTC = getSmallBestKnownTC(*PSE.getSE(), TheLoop); const bool HasReductions = !Legal->getReductionVars().empty(); - // Do not interleave loops with a relatively small known or estimated trip - // count. But we will interleave when InterleaveSmallLoopScalarReduction is - // enabled, and the code has scalar reductions(HasReductions && VF = 1), - // because with the above conditions interleaving can expose ILP and break - // cross iteration dependences for reductions. - if (BestKnownTC && (*BestKnownTC < TinyTripCountInterleaveThreshold) && - !(InterleaveSmallLoopScalarReduction && HasReductions && VF.isScalar())) - return 1; // If we did not calculate the cost for VF (because the user selected the VF) // then we calculate the cost of VF here. @@ -5745,8 +5737,11 @@ } // If trip count is known or estimated compile time constant, limit the - // interleave count to be less than the trip count divided by VF, provided it - // is at least 1. + // interleave count to be less than the trip count divided by VF * 2, + // provided VF is at least 1, such that the vector loop runs at least twice + // to make interleaving seem profitable. When + // InterleaveSmallLoopScalarReduction is true, we allow interleaving even when + // the vector loop runs once. // // For scalable vectors we can't know if interleaving is beneficial. It may // not be beneficial for small loops if none of the lanes in the second vector @@ -5755,8 +5750,12 @@ // the InterleaveCount as if vscale is '1', although if some information about // the vector is known (e.g. min vector size), we can make a better decision. if (BestKnownTC) { - MaxInterleaveCount = - std::min(*BestKnownTC / VF.getKnownMinValue(), MaxInterleaveCount); + if (InterleaveSmallLoopScalarReduction) + MaxInterleaveCount = + std::min(*BestKnownTC / VF.getKnownMinValue(), MaxInterleaveCount); + else + MaxInterleaveCount = std::min(*BestKnownTC / (VF.getKnownMinValue() * 2), + MaxInterleaveCount); // Make sure MaxInterleaveCount is greater than 0. MaxInterleaveCount = std::max(1u, MaxInterleaveCount); }