diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2822,7 +2822,8 @@ return; assert(!(SCEVCheckBlock->getParent()->hasOptSize() || - OptForSizeBasedOnProfile) && + (OptForSizeBasedOnProfile && + Cost->Hints->getForce() != LoopVectorizeHints::FK_Enabled)) && "Cannot SCEV check stride or overflow when optimizing for size"); SCEVCheckBlock->setName("vector.scevcheck"); @@ -7913,12 +7914,17 @@ BlockFrequencyInfo *BFI, TargetTransformInfo *TTI, TargetLibraryInfo *TLI, AssumptionCache *AC, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, LoopVectorizationLegality &LVL) { - bool OptSize = - F->hasOptSize() || llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI, - PGSOQueryType::IRPass); // 1) OptSize takes precedence over all other options, i.e. if this is set, // don't look at hints or options, and don't request a scalar epilogue. - if (OptSize) + // (For PGSO, as shouldOptimizeForSize isn't currently accessible from + // LoopAccessInfo (due to code dependency and not being able to reliably get + // PSI/BFI from a loop analysis under NPM), we cannot suppress the collection + // of strides in LoopAccessInfo::analyzeLoop() and vectorize without + // versioning when the vectorization is forced, unlike hasOptSize. So revert + // back to the old way and vectorize with versioning when forced. See D81345.) + if (F->hasOptSize() || (llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI, + PGSOQueryType::IRPass) && + Hints.getForce() != LoopVectorizeHints::FK_Enabled)) return CM_ScalarEpilogueNotAllowedOptSize; bool PredicateOptDisabled = PreferPredicateOverEpilog.getNumOccurrences() && diff --git a/llvm/test/Transforms/LoopVectorize/optsize.ll b/llvm/test/Transforms/LoopVectorize/optsize.ll --- a/llvm/test/Transforms/LoopVectorize/optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/optsize.ll @@ -284,6 +284,34 @@ ret void } +; Vectorize with versioning for unit stride for PGSO and enabled vectorization. +; +define void @stride1_pgso(i16* noalias %B, i32 %BStride) !prof !14 { +; CHECK-LABEL: @stride1_pgso( +; CHECK: vector.body +; +; PGSO-LABEL: @stride1_pgso( +; PGSO: vector.body +; +; NPGSO-LABEL: @stride1_pgso( +; NPGSO: vector.body + +entry: + br label %for.body + +for.body: + %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ] + %mulB = mul nsw i32 %iv, %BStride + %gepOfB = getelementptr inbounds i16, i16* %B, i32 %mulB + store i16 42, i16* %gepOfB, align 4 + %iv.next = add nuw nsw i32 %iv, 1 + %exitcond = icmp eq i32 %iv.next, 1025 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !15 + +for.end: + ret void +} + ; PR46652: Check that the need for stride==1 check prevents vectorizing a loop ; having tiny trip count, when compiling w/o -Os/-Oz. ; CHECK-LABEL: @pr46652