diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7429,6 +7429,9 @@ ScalarEvolution *SE, DominatorTree *DT, const LoopAccessInfo *LAI) { ScalarEpilogueLowering SEL = CM_ScalarEpilogueAllowed; + bool PredicateOptDisabled = PreferPredicateOverEpilog.getNumOccurrences() && + !PreferPredicateOverEpilog; + if (Hints.getForce() != LoopVectorizeHints::FK_Enabled && (F->hasOptSize() || llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI))) @@ -7436,7 +7439,8 @@ else if (PreferPredicateOverEpilog || Hints.getPredicate() == LoopVectorizeHints::FK_Enabled || (TTI->preferPredicateOverEpilogue(L, LI, *SE, *AC, TLI, DT, LAI) && - Hints.getPredicate() != LoopVectorizeHints::FK_Disabled)) + Hints.getPredicate() != LoopVectorizeHints::FK_Disabled && + !PredicateOptDisabled)) SEL = CM_ScalarEpilogueNotNeededUsePredicate; return SEL; diff --git a/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll b/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll @@ -30,6 +30,18 @@ ; RUN: -enable-arm-maskedldst=true -S < %s | \ ; RUN: FileCheck %s -check-prefixes=CHECK,PREFER-FOLDING +; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve.fp \ +; RUN: -prefer-predicate-over-epilog=false \ +; RUN: -disable-mve-tail-predication=false -loop-vectorize \ +; RUN: -enable-arm-maskedldst=true -S < %s | \ +; RUN: FileCheck %s -check-prefixes=CHECK,NO-FOLDING + +; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve.fp \ +; RUN: -prefer-predicate-over-epilog=true \ +; RUN: -disable-mve-tail-predication=false -loop-vectorize \ +; RUN: -enable-arm-maskedldst=true -S < %s | \ +; RUN: FileCheck %s -check-prefixes=CHECK,FOLDING-OPT + define void @prefer_folding(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C) #0 { ; CHECK-LABEL: prefer_folding( ; PREFER-FOLDING: vector.body: @@ -186,6 +198,12 @@ ; PREFER-FOLDING-NOT: llvm.masked.load ; PREFER-FOLDING-NOT: llvm.masked.store ; PREFER-FOLDING: br i1 %{{.*}}, label %{{.*}}, label %vector.body + +; FOLDING-OPT: vector.body: +; FOLDING-OPT call <8 x i16> @llvm.masked.load.v8i16.p0v8i16 +; FOLDING-OPT call <8 x i8> @llvm.masked.load.v8i8.p0v8i8 +; FOLDING-OPT call void @llvm.masked.store.v8i8.p0v8i8 +; FOLDING-OPT: br i1 %{{.*}}, label %{{.*}}, label %vector.body entry: br label %for.body