diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -38,6 +38,16 @@ class Type; class Value; +namespace TailPredication { + enum Mode { + Disabled = 0, + EnabledNoReductions, + Enabled, + ForceEnabledNoReductions, + ForceEnabled + }; +} + class ARMTTIImpl : public BasicTTIImplBase { using BaseT = BasicTTIImplBase; using TTI = TargetTransformInfo; diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -45,7 +45,7 @@ "disable-arm-loloops", cl::Hidden, cl::init(false), cl::desc("Disable the generation of low-overhead loops")); -extern cl::opt DisableTailPredication; +extern cl::opt EnableTailPredication; extern cl::opt EnableMaskedGatherScatters; @@ -1458,7 +1458,7 @@ TargetLibraryInfo *TLI, DominatorTree *DT, const LoopAccessInfo *LAI) { - if (DisableTailPredication) + if (!EnableTailPredication) return false; // Creating a predicated vector loop is the first step for generating a @@ -1501,7 +1501,7 @@ } bool ARMTTIImpl::emitGetActiveLaneMask() const { - if (!ST->hasMVEIntegerOps() || DisableTailPredication) + if (!ST->hasMVEIntegerOps() || !EnableTailPredication) return false; // Intrinsic @llvm.get.active.lane.mask is supported. diff --git a/llvm/lib/Target/ARM/MVETailPredication.cpp b/llvm/lib/Target/ARM/MVETailPredication.cpp --- a/llvm/lib/Target/ARM/MVETailPredication.cpp +++ b/llvm/lib/Target/ARM/MVETailPredication.cpp @@ -42,6 +42,7 @@ #include "ARM.h" #include "ARMSubtarget.h" +#include "ARMTargetTransformInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" @@ -64,16 +65,27 @@ #define DEBUG_TYPE "mve-tail-predication" #define DESC "Transform predicated vector loops to use MVE tail predication" -static cl::opt -ForceTailPredication("force-mve-tail-predication", cl::Hidden, cl::init(false), - cl::desc("Force MVE tail-predication even if it might be " - "unsafe (e.g. possible overflow in loop " - "counters)")); +cl::opt EnableTailPredication( + "tail-predication", cl::desc("MVE tail-predication options"), + cl::init(TailPredication::Disabled), + cl::values(clEnumValN(TailPredication::Disabled, "disabled", + "Don't tail-predicate loops"), + clEnumValN(TailPredication::EnabledNoReductions, + "enabled-no-reductions", + "Enable tail-predication, but not for reduction loops"), + clEnumValN(TailPredication::Enabled, + "enabled", + "Enable tail-predication, including reduction loops"), + clEnumValN(TailPredication::ForceEnabledNoReductions, + "force-enabled-no-reductions", + "Enable tail-predication, but not for reduction loops, " + "and force this which might be unsafe"), + clEnumValN(TailPredication::ForceEnabled, + "force-enabled", + "Enable tail-predication, including reduction loops, " + "and force this which might be unsafe"))); + -cl::opt -DisableTailPredication("disable-mve-tail-predication", cl::Hidden, - cl::init(true), - cl::desc("Disable MVE Tail Predication")); namespace { class MVETailPredication : public LoopPass { @@ -146,7 +158,7 @@ } bool MVETailPredication::runOnLoop(Loop *L, LPPassManager&) { - if (skipLoop(L) || DisableTailPredication) + if (skipLoop(L) || !EnableTailPredication) return false; MaskedInsts.clear(); @@ -346,6 +358,9 @@ // vector width. bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask, Value *TripCount, FixedVectorType *VecTy) { + bool ForceTailPredication = + EnableTailPredication == TailPredication::ForceEnabledNoReductions || + EnableTailPredication == TailPredication::ForceEnabled; // 1) Test whether entry to the loop is protected by a conditional // BTC + 1 < 0. In other words, if the scalar trip count overflows, // becomes negative, we shouldn't enter the loop and creating diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/basic-tail-pred.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/basic-tail-pred.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/basic-tail-pred.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/basic-tail-pred.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve,+lob %s -S -o - | FileCheck %s +; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve,+lob %s -S -o - | FileCheck %s ; CHECK-LABEL: mul_v16i8 ; CHECK-NOT: %num.elements = add i32 %trip.count.minus.1, 1 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/clear-maskedinsts.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/clear-maskedinsts.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/clear-maskedinsts.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/clear-maskedinsts.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -mtriple=thumbv8.1m.main -mattr=+mve.fp -mve-tail-predication -disable-mve-tail-predication=false %s -S -o - | FileCheck %s +; RUN: opt -mtriple=thumbv8.1m.main -mattr=+mve.fp -mve-tail-predication -tail-predication=enabled %s -S -o - | FileCheck %s define hidden i32 @_Z4loopPiPjiS0_i(i32* noalias nocapture readonly %s1, i32* noalias nocapture readonly %s2, i32 %x, i32* noalias nocapture %d, i32 %n) { ; CHECK-LABEL: @_Z4loopPiPjiS0_i( diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -disable-mve-tail-predication=false --verify-machineinstrs %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -tail-predication=enabled --verify-machineinstrs %s -o - | FileCheck %s define dso_local i32 @vpsel_mul_reduce_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32* noalias nocapture readonly %c, i32 %N) { ; CHECK-LABEL: vpsel_mul_reduce_add: diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extending-loads.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extending-loads.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extending-loads.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extending-loads.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -disable-mve-tail-predication=false %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -tail-predication=enabled %s -o - | FileCheck %s define dso_local arm_aapcs_vfpcc void @sext_i8(i16* noalias nocapture %a, i8* nocapture readonly %b, i32 %N) { ; CHECK-LABEL: sext_i8: diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp,+fp-armv8d16sp,+fp16,+fullfp16 -disable-mve-tail-predication=false %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp,+fp-armv8d16sp,+fp16,+fullfp16 -tail-predication=enabled %s -o - | FileCheck %s define arm_aapcs_vfpcc void @fast_float_mul(float* nocapture %a, float* nocapture readonly %b, float* nocapture readonly %c, i32 %N) { ; CHECK-LABEL: fast_float_mul: diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lsr-profitable-chain.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lsr-profitable-chain.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lsr-profitable-chain.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lsr-profitable-chain.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -O3 -disable-mve-tail-predication=false -mtriple=thumbv8.1m.main -mattr=+mve,+mve.fp %s -o - | FileCheck %s +; RUN: llc -O3 -tail-predication=enabled -mtriple=thumbv8.1m.main -mattr=+mve,+mve.fp %s -o - | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv8.1m-arm-none-eabi" diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -disable-mve-tail-predication=false %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -tail-predication=enabled %s -o - | FileCheck %s define arm_aapcs_vfpcc i32 @test_acc_scalar_char(i8 zeroext %a, i8* nocapture readonly %b, i32 %N) { ; CHECK-LABEL: test_acc_scalar_char: diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -mtriple=armv8.1m.main -mattr=+mve -S -mve-tail-predication -disable-mve-tail-predication=false %s -o - | FileCheck %s +; RUN: opt -mtriple=armv8.1m.main -mattr=+mve -S -mve-tail-predication -tail-predication=enabled %s -o - | FileCheck %s define void @mat_vec_sext_i16(i16** nocapture readonly %A, i16* nocapture readonly %B, i32* noalias nocapture %C, i32 %N) { ; CHECK-LABEL: @mat_vec_sext_i16( diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -disable-mve-tail-predication=false %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -tail-predication=enabled %s -o - | FileCheck %s define dso_local arm_aapcs_vfpcc zeroext i8 @one_loop_add_add_v16i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %N) local_unnamed_addr { ; CHECK-LABEL: one_loop_add_add_v16i8: diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve %s -S -o - | FileCheck %s +; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve %s -S -o - | FileCheck %s define dso_local void @foo(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32* noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 { ; CHECK-LABEL: @foo( diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-add-sat.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-add-sat.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-add-sat.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-add-sat.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs -disable-mve-tail-predication=false -o - %s | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs -tail-predication=enabled -o - %s | FileCheck %s + define arm_aapcs_vfpcc void @uadd_sat(i16* noalias nocapture readonly %pSrcA, i16* noalias nocapture readonly %pSrcB, i16* noalias nocapture %pDst, i32 %blockSize) { ; CHECK-LABEL: uadd_sat: ; CHECK: @ %bb.0: @ %entry diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-fabs.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-fabs.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-fabs.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-fabs.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -disable-mve-tail-predication=false -o - %s | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -tail-predication=enabled -o - %s | FileCheck %s + define arm_aapcs_vfpcc void @fabs(float* noalias nocapture readonly %pSrcA, float* noalias nocapture %pDst, i32 %blockSize) { ; CHECK-LABEL: fabs: ; CHECK: @ %bb.0: @ %entry diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -disable-mve-tail-predication=false -o - %s | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -tail-predication=enabled -o - %s | FileCheck %s + define arm_aapcs_vfpcc void @round(float* noalias nocapture readonly %pSrcA, float* noalias nocapture %pDst, i32 %n) #0 { ; CHECK-LABEL: round: ; CHECK: @ %bb.0: @ %entry diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs -disable-mve-tail-predication=false -o - %s | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs -tail-predication=enabled -o - %s | FileCheck %s + define arm_aapcs_vfpcc void @usub_sat(i16* noalias nocapture readonly %pSrcA, i16* noalias nocapture readonly %pSrcB, i16* noalias nocapture %pDst, i32 %blockSize) { ; CHECK-LABEL: usub_sat: ; CHECK: @ %bb.0: @ %entry diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-narrow.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-narrow.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-narrow.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-narrow.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve,+lob %s -S -o - | FileCheck %s +; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve,+lob %s -S -o - | FileCheck %s ; TODO: We should be able to generate a vctp for the loads. ; CHECK-LABEL: trunc_v4i32_v4i16 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-pattern-fail.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-pattern-fail.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-pattern-fail.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-pattern-fail.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve,+lob %s -S -o - | FileCheck %s +; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve,+lob %s -S -o - | FileCheck %s ; The following functions should all fail to become tail-predicated. ; CHECK-NOT: call i32 @llvm.arm.vctp diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve,+lob %s -S -o - | FileCheck %s +; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve,+lob %s -S -o - | FileCheck %s ; CHECK-LABEL: expand_v8i16_v8i32 ; CHECK-NOT: call i32 @llvm.arm.mve.vctp diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-reduce.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-reduce.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-reduce.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-reduce.ll @@ -1,6 +1,6 @@ -; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve %s -S -o - | FileCheck %s -; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false \ -; RUN: -force-mve-tail-predication -mattr=+mve %s -S -o - | FileCheck %s --check-prefix=FORCE +; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve %s -S -o - | FileCheck %s +; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=force-enabled \ +; RUN: -mattr=+mve %s -S -o - | FileCheck %s --check-prefix=FORCE ; CHECK-LABEL: reduction_i32 ; CHECK: phi i32 [ 0, %vector.ph ] diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -disable-mve-tail-predication=false %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -tail-predication=enabled %s -o - | FileCheck %s define dso_local void @varying_outer_2d_reduction(i16* nocapture readonly %Input, i16* nocapture %Output, i16 signext %Size, i16 signext %N, i16 signext %Scale) local_unnamed_addr { ; CHECK-LABEL: varying_outer_2d_reduction: diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=armv8.1m.main -mattr=+mve -disable-mve-tail-predication=false --verify-machineinstrs %s -o - | FileCheck %s +; RUN: llc -mtriple=armv8.1m.main -mattr=+mve -tail-predication=enabled --verify-machineinstrs %s -o - | FileCheck %s define dso_local i32 @mul_reduce_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32 %N) { ; CHECK-LABEL: mul_reduce_add: diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll @@ -1,5 +1,5 @@ -; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve %s -S -o - | FileCheck %s +; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve %s -S -o - | FileCheck %s ; CHECK-LABEL: vec_mul_reduce_add diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-unroll.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-unroll.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-unroll.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-unroll.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve,+lob %s -S -o - | FileCheck %s +; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve,+lob %s -S -o - | FileCheck %s ; TODO: The unrolled pattern is preventing the transform ; CHECK-LABEL: mul_v16i8_unroll diff --git a/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll b/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll --- a/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -disable-mve-tail-predication=false %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -tail-predication=enabled %s -o - | FileCheck %s define arm_aapcs_vfpcc void @fmas1(float* nocapture readonly %x, float* nocapture readonly %y, float* noalias nocapture %z, float %a, i32 %n) { ; CHECK-LABEL: fmas1: diff --git a/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll b/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll @@ -1,19 +1,19 @@ ; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf \ -; RUN: -disable-mve-tail-predication=false -loop-vectorize -S < %s | \ +; RUN: -tail-predication=enabled -loop-vectorize -S < %s | \ ; RUN: FileCheck %s -check-prefixes=CHECK,PREFER-FOLDING ; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=-mve \ -; RUN: -disable-mve-tail-predication=false -loop-vectorize \ +; RUN: -tail-predication=enabled -loop-vectorize \ ; RUN: -enable-arm-maskedldst=true -S < %s | \ ; RUN: FileCheck %s -check-prefixes=CHECK,NO-FOLDING ; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve \ -; RUN: -disable-mve-tail-predication=false -loop-vectorize \ +; RUN: -tail-predication=enabled -loop-vectorize \ ; RUN: -enable-arm-maskedldst=false -S < %s | \ ; RUN: FileCheck %s -check-prefixes=CHECK,NO-FOLDING ; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve \ -; RUN: -disable-mve-tail-predication=true -loop-vectorize \ +; RUN: -tail-predication=disabled -loop-vectorize \ ; RUN: -enable-arm-maskedldst=true -S < %s | \ ; RUN: FileCheck %s -check-prefixes=CHECK,NO-FOLDING @@ -21,24 +21,24 @@ ; 'isHardwareLoopProfitable' return false, so that we test avoiding folding for ; these cases. ; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve,-lob \ -; RUN: -disable-mve-tail-predication=false -loop-vectorize \ +; RUN: -tail-predication=enabled -loop-vectorize \ ; RUN: -enable-arm-maskedldst=true -S < %s | \ ; RUN: FileCheck %s -check-prefixes=CHECK,NO-FOLDING ; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve.fp \ -; RUN: -disable-mve-tail-predication=false -loop-vectorize \ +; RUN: -tail-predication=enabled -loop-vectorize \ ; RUN: -enable-arm-maskedldst=true -S < %s | \ ; RUN: FileCheck %s -check-prefixes=CHECK,PREFER-FOLDING ; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve.fp \ ; RUN: -prefer-predicate-over-epilog=false \ -; RUN: -disable-mve-tail-predication=false -loop-vectorize \ +; RUN: -tail-predication=enabled -loop-vectorize \ ; RUN: -enable-arm-maskedldst=true -S < %s | \ ; RUN: FileCheck %s -check-prefixes=CHECK,NO-FOLDING ; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve.fp \ ; RUN: -prefer-predicate-over-epilog=true \ -; RUN: -disable-mve-tail-predication=false -loop-vectorize \ +; RUN: -tail-predication=enabled -loop-vectorize \ ; RUN: -enable-arm-maskedldst=true -S < %s | \ ; RUN: FileCheck %s -check-prefixes=CHECK,FOLDING-OPT diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-counting-down.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-counting-down.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-counting-down.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-counting-down.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -loop-vectorize -S | FileCheck %s --check-prefixes=COMMON,DEFAULT -; RUN: opt < %s -loop-vectorize -disable-mve-tail-predication=false -prefer-predicate-over-epilog -S | FileCheck %s --check-prefixes=COMMON,CHECK-TF,CHECK-PREFER -; RUN: opt < %s -loop-vectorize -disable-mve-tail-predication=false -S | FileCheck %s --check-prefixes=COMMON,CHECK-TF,CHECK-ENABLE-TP +; RUN: opt < %s -loop-vectorize -tail-predication=enabled -prefer-predicate-over-epilog -S | FileCheck %s --check-prefixes=COMMON,CHECK-TF,CHECK-PREFER +; RUN: opt < %s -loop-vectorize -tail-predication=enabled -S | FileCheck %s --check-prefixes=COMMON,CHECK-TF,CHECK-ENABLE-TP target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv8.1m.main-arm-unknown-eabihf" diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-loop-folding.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-loop-folding.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-loop-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-loop-folding.ll @@ -1,7 +1,7 @@ -; RUN: opt < %s -loop-vectorize -disable-mve-tail-predication=false -S | \ +; RUN: opt < %s -loop-vectorize -tail-predication=enabled -S | \ ; RUN: FileCheck %s -check-prefixes=COMMON,CHECK -; RUN: opt < %s -loop-vectorize -disable-mve-tail-predication=false -prefer-predicate-over-epilog -S | \ +; RUN: opt < %s -loop-vectorize -tail-predication=enabled -prefer-predicate-over-epilog -S | \ ; RUN: FileCheck -check-prefixes=COMMON,PREDFLAG %s target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"