Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -340,6 +340,11 @@ return PredicationStyle::None; } + bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, + AssumptionCache &AC, TargetLibraryInfo *TLI, + DominatorTree *DT, + const LoopAccessInfo *LAI); + bool supportsScalableVectors() const { return ST->hasSVE(); } bool enableScalableVectorization() const { return ST->hasSVE(); } Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -37,6 +37,17 @@ static cl::opt SVEScatterOverhead("sve-scatter-overhead", cl::init(10), cl::Hidden); +enum class TailPredication { Disabled, Enabled }; + +cl::opt SVETailPredication( + "sve-tail-predication", cl::desc("SVE tail-predication options"), + cl::init(TailPredication::Disabled), + cl::values(clEnumValN(TailPredication::Disabled, "disabled", + "Don't tail-predicate loops using SVE"), + clEnumValN(TailPredication::Enabled, "enabled", + "Enable tail-predication with SVE, including loops " + "containing reductions & first-order recurrences"))); + bool AArch64TTIImpl::areInlineCompatible(const Function *Caller, const Function *Callee) const { const TargetMachine &TM = getTLI()->getTargetMachine(); @@ -2908,3 +2919,9 @@ return BaseT::getShuffleCost(Kind, Tp, Mask, Index, SubTp); } + +bool AArch64TTIImpl::preferPredicateOverEpilogue( + Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, + TargetLibraryInfo *TLI, DominatorTree *DT, const LoopAccessInfo *LAI) { + return ST->hasSVE() && SVETailPredication == TailPredication::Enabled; +} Index: llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-vectorize -force-target-instruction-cost=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S < %s 2>&1 | FileCheck %s +; RUN: opt -loop-vectorize -force-target-instruction-cost=1 -sve-tail-predication=enabled -S < %s 2>&1 | FileCheck %s ; This test currently fails when the LV calculates a maximums safe ; distance for scalable vectors, because the code to eliminate the tail is Index: llvm/test/Transforms/LoopVectorize/AArch64/scalable-predicate-instruction.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/scalable-predicate-instruction.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/scalable-predicate-instruction.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -loop-vectorize -S | FileCheck %s -; RUN: opt < %s -loop-vectorize -prefer-predicate-over-epilogue=predicate-dont-vectorize -S | FileCheck %s +; RUN: opt < %s -loop-vectorize -sve-tail-predication=enabled -S | FileCheck %s target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-unknown-linux-gnu" Index: llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions-tf.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions-tf.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions-tf.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-vectorize -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: opt < %s -loop-vectorize -sve-tail-predication=enabled \ ; RUN: -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S | FileCheck %s define void @invariant_store_red_exit_is_phi(i32* %dst, i32* readonly %src, i64 %n) { Index: llvm/test/Transforms/LoopVectorize/AArch64/scalar_interleave.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/scalar_interleave.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/scalar_interleave.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -loop-vectorize -S -o - < %s | FileCheck %s -; RUN: opt -loop-vectorize -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue -S -o - < %s | FileCheck %s +; RUN: opt -loop-vectorize -sve-tail-predication=enabled -S -o - < %s | FileCheck %s target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-arm-none-eabi" Index: llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll @@ -1,5 +1,4 @@ -; RUN: opt -S -loop-vectorize -prefer-predicate-over-epilogue=predicate-dont-vectorize -force-vector-interleave=4 -force-vector-width=4 < %s | FileCheck %s -; RUN: opt -S -loop-vectorize -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue -force-vector-interleave=4 -force-vector-width=4 < %s | FileCheck %s +; RUN: opt -S -loop-vectorize -sve-tail-predication=enabled -force-vector-interleave=4 -force-vector-width=4 < %s | FileCheck %s target triple = "aarch64-unknown-linux-gnu" Index: llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll @@ -1,5 +1,4 @@ -; RUN: opt -S -hints-allow-reordering=false -loop-vectorize -prefer-predicate-over-epilogue=predicate-dont-vectorize -prefer-inloop-reductions < %s | FileCheck %s -; RUN: opt -S -hints-allow-reordering=false -loop-vectorize -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue -prefer-inloop-reductions < %s | FileCheck %s +; RUN: opt -S -hints-allow-reordering=false -loop-vectorize -sve-tail-predication=enabled -prefer-inloop-reductions < %s | FileCheck %s target triple = "aarch64-unknown-linux-gnu" Index: llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-vectorize -scalable-vectorization=off -force-vector-width=4 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S < %s | FileCheck %s +; RUN: opt -loop-vectorize -scalable-vectorization=off -force-vector-width=4 -sve-tail-predication=enabled -S < %s | FileCheck %s ; NOTE: These tests aren't really target-specific, but it's convenient to target AArch64 ; so that TTI.isLegalMaskedLoad can return true.