Index: llvm/lib/Target/ARM/ARMTargetTransformInfo.h =================================================================== --- llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -38,6 +38,16 @@ class Type; class Value; +namespace TailPredication { + enum Mode { + Disabled = 0, + EnabledNoReductions, + Enabled, + ForceEnabledNoReductions, + ForceEnabled + }; +} + class ARMTTIImpl : public BasicTTIImplBase { using BaseT = BasicTTIImplBase; using TTI = TargetTransformInfo; Index: llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -45,7 +45,7 @@ "disable-arm-loloops", cl::Hidden, cl::init(false), cl::desc("Disable the generation of low-overhead loops")); -extern cl::opt DisableTailPredication; +extern cl::opt EnableTailPredication; extern cl::opt EnableMaskedGatherScatters; @@ -1368,7 +1368,7 @@ TargetLibraryInfo *TLI, DominatorTree *DT, const LoopAccessInfo *LAI) { - if (DisableTailPredication) + if (!EnableTailPredication) return false; // Creating a predicated vector loop is the first step for generating a @@ -1411,7 +1411,7 @@ } bool ARMTTIImpl::emitGetActiveLaneMask() const { - if (!ST->hasMVEIntegerOps() || DisableTailPredication) + if (!ST->hasMVEIntegerOps() || !EnableTailPredication) return false; // Intrinsic @llvm.get.active.lane.mask is supported. Index: llvm/lib/Target/ARM/MVETailPredication.cpp =================================================================== --- llvm/lib/Target/ARM/MVETailPredication.cpp +++ llvm/lib/Target/ARM/MVETailPredication.cpp @@ -42,6 +42,7 @@ #include "ARM.h" #include "ARMSubtarget.h" +#include "ARMTargetTransformInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" @@ -64,16 +65,27 @@ #define DEBUG_TYPE "mve-tail-predication" #define DESC "Transform predicated vector loops to use MVE tail predication" -static cl::opt -ForceTailPredication("force-mve-tail-predication", cl::Hidden, cl::init(false), - cl::desc("Force MVE tail-predication even if it might be " - "unsafe (e.g. possible overflow in loop " - "counters)")); +cl::opt EnableTailPredication( + "tail-predication", cl::desc("MVE tail-predication options"), + cl::init(TailPredication::Disabled), + cl::values(clEnumValN(TailPredication::Disabled, "disabled", + "Don't tail-predicate loops"), + clEnumValN(TailPredication::EnabledNoReductions, + "enabled-no-reductions", + "Enable tail-predication, but not for reduction loops"), + clEnumValN(TailPredication::Enabled, + "enabled", + "Enable tail-predication, including reduction loops"), + clEnumValN(TailPredication::ForceEnabledNoReductions, + "force-enabled-no-reductions", + "Enable tail-predication, but not for reduction loops, " + "and force this which might be unsafe"), + clEnumValN(TailPredication::ForceEnabled, + "force-enabled", + "Enable tail-predication, including reduction loops, " + "and force this which might be unsafe"))); + -cl::opt -DisableTailPredication("disable-mve-tail-predication", cl::Hidden, - cl::init(true), - cl::desc("Disable MVE Tail Predication")); namespace { class MVETailPredication : public LoopPass { @@ -146,7 +158,7 @@ } bool MVETailPredication::runOnLoop(Loop *L, LPPassManager&) { - if (skipLoop(L) || DisableTailPredication) + if (skipLoop(L) || !EnableTailPredication) return false; MaskedInsts.clear(); @@ -346,6 +358,9 @@ // vector width. bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask, Value *TripCount, FixedVectorType *VecTy) { + bool ForceTailPredication = + EnableTailPredication == TailPredication::ForceEnabledNoReductions || + EnableTailPredication == TailPredication::ForceEnabled; // 1) Test whether entry to the loop is protected by a conditional // BTC + 1 < 0. In other words, if the scalar trip count overflows, // becomes negative, we shouldn't enter the loop and creating Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/basic-tail-pred.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/basic-tail-pred.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/basic-tail-pred.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve,+lob %s -S -o - | FileCheck %s +; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve,+lob %s -S -o - | FileCheck %s ; CHECK-LABEL: mul_v16i8 ; CHECK-NOT: %num.elements = add i32 %trip.count.minus.1, 1 Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/clear-maskedinsts.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/clear-maskedinsts.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/clear-maskedinsts.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -mtriple=thumbv8.1m.main -mattr=+mve.fp -mve-tail-predication -disable-mve-tail-predication=false %s -S -o - | FileCheck %s +; RUN: opt -mtriple=thumbv8.1m.main -mattr=+mve.fp -mve-tail-predication -tail-predication=enabled %s -S -o - | FileCheck %s define hidden i32 @_Z4loopPiPjiS0_i(i32* noalias nocapture readonly %s1, i32* noalias nocapture readonly %s2, i32 %x, i32* noalias nocapture %d, i32 %n) { ; CHECK-LABEL: @_Z4loopPiPjiS0_i( Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -disable-mve-tail-predication=false --verify-machineinstrs %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -tail-predication=enabled --verify-machineinstrs %s -o - | FileCheck %s define dso_local i32 @vpsel_mul_reduce_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32* noalias nocapture readonly %c, i32 %N) { ; CHECK-LABEL: vpsel_mul_reduce_add: Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/extending-loads.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/extending-loads.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/extending-loads.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -disable-mve-tail-predication=false %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -tail-predication=enabled %s -o - | FileCheck %s define dso_local arm_aapcs_vfpcc void @sext_i8(i16* noalias nocapture %a, i8* nocapture readonly %b, i32 %N) { ; CHECK-LABEL: sext_i8: Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp,+fp-armv8d16sp,+fp16,+fullfp16 -disable-mve-tail-predication=false %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp,+fp-armv8d16sp,+fp16,+fullfp16 -tail-predication=enabled %s -o - | FileCheck %s define arm_aapcs_vfpcc void @fast_float_mul(float* nocapture %a, float* nocapture readonly %b, float* nocapture readonly %c, i32 %N) { ; CHECK-LABEL: fast_float_mul: Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/lsr-profitable-chain.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/lsr-profitable-chain.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/lsr-profitable-chain.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -O3 -disable-mve-tail-predication=false -mtriple=thumbv8.1m.main -mattr=+mve,+mve.fp %s -o - | FileCheck %s +; RUN: llc -O3 -tail-predication=enabled -mtriple=thumbv8.1m.main -mattr=+mve,+mve.fp %s -o - | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv8.1m-arm-none-eabi" Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -disable-mve-tail-predication=false %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -tail-predication=enabled %s -o - | FileCheck %s define arm_aapcs_vfpcc i32 @test_acc_scalar_char(i8 zeroext %a, i8* nocapture readonly %b, i32 %N) { ; CHECK-LABEL: test_acc_scalar_char: Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -mtriple=armv8.1m.main -mattr=+mve -S -mve-tail-predication -disable-mve-tail-predication=false %s -o - | FileCheck %s +; RUN: opt -mtriple=armv8.1m.main -mattr=+mve -S -mve-tail-predication -tail-predication=enabled %s -o - | FileCheck %s define void @mat_vec_sext_i16(i16** nocapture readonly %A, i16* nocapture readonly %B, i32* noalias nocapture %C, i32 %N) { ; CHECK-LABEL: @mat_vec_sext_i16( Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -disable-mve-tail-predication=false %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -tail-predication=enabled %s -o - | FileCheck %s define dso_local arm_aapcs_vfpcc zeroext i8 @one_loop_add_add_v16i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %N) local_unnamed_addr { ; CHECK-LABEL: one_loop_add_add_v16i8: Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve %s -S -o - | FileCheck %s +; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve %s -S -o - | FileCheck %s define dso_local void @foo(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32* noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 { ; CHECK-LABEL: @foo( Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-add-sat.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-add-sat.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-add-sat.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs -disable-mve-tail-predication=false -o - %s | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs -tail-predication=enabled -o - %s | FileCheck %s + define arm_aapcs_vfpcc void @uadd_sat(i16* noalias nocapture readonly %pSrcA, i16* noalias nocapture readonly %pSrcB, i16* noalias nocapture %pDst, i32 %blockSize) { ; CHECK-LABEL: uadd_sat: ; CHECK: @ %bb.0: @ %entry Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-fabs.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-fabs.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-fabs.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -disable-mve-tail-predication=false -o - %s | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -tail-predication=enabled -o - %s | FileCheck %s + define arm_aapcs_vfpcc void @fabs(float* noalias nocapture readonly %pSrcA, float* noalias nocapture %pDst, i32 %blockSize) { ; CHECK-LABEL: fabs: ; CHECK: @ %bb.0: @ %entry Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -disable-mve-tail-predication=false -o - %s | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -tail-predication=enabled -o - %s | FileCheck %s + define arm_aapcs_vfpcc void @round(float* noalias nocapture readonly %pSrcA, float* noalias nocapture %pDst, i32 %n) #0 { ; CHECK-LABEL: round: ; CHECK: @ %bb.0: @ %entry Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs -disable-mve-tail-predication=false -o - %s | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs -tail-predication=enabled -o - %s | FileCheck %s + define arm_aapcs_vfpcc void @usub_sat(i16* noalias nocapture readonly %pSrcA, i16* noalias nocapture readonly %pSrcB, i16* noalias nocapture %pDst, i32 %blockSize) { ; CHECK-LABEL: usub_sat: ; CHECK: @ %bb.0: @ %entry Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-narrow.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-narrow.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-narrow.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve,+lob %s -S -o - | FileCheck %s +; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve,+lob %s -S -o - | FileCheck %s ; TODO: We should be able to generate a vctp for the loads. ; CHECK-LABEL: trunc_v4i32_v4i16 Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-pattern-fail.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-pattern-fail.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-pattern-fail.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve,+lob %s -S -o - | FileCheck %s +; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve,+lob %s -S -o - | FileCheck %s ; The following functions should all fail to become tail-predicated. ; CHECK-NOT: call i32 @llvm.arm.vctp Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve,+lob %s -S -o - | FileCheck %s +; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve,+lob %s -S -o - | FileCheck %s ; CHECK-LABEL: expand_v8i16_v8i32 ; CHECK-NOT: call i32 @llvm.arm.mve.vctp Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-reduce.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-reduce.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-reduce.ll @@ -1,6 +1,6 @@ -; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve %s -S -o - | FileCheck %s -; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false \ -; RUN: -force-mve-tail-predication -mattr=+mve %s -S -o - | FileCheck %s --check-prefix=FORCE +; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve %s -S -o - | FileCheck %s +; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=force-enabled \ +; RUN: -mattr=+mve %s -S -o - | FileCheck %s --check-prefix=FORCE ; CHECK-LABEL: reduction_i32 ; CHECK: phi i32 [ 0, %vector.ph ] Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -disable-mve-tail-predication=false %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -tail-predication=enabled %s -o - | FileCheck %s define dso_local void @varying_outer_2d_reduction(i16* nocapture readonly %Input, i16* nocapture %Output, i16 signext %Size, i16 signext %N, i16 signext %Scale) local_unnamed_addr { ; CHECK-LABEL: varying_outer_2d_reduction: Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=armv8.1m.main -mattr=+mve -disable-mve-tail-predication=false --verify-machineinstrs %s -o - | FileCheck %s +; RUN: llc -mtriple=armv8.1m.main -mattr=+mve -tail-predication=enabled --verify-machineinstrs %s -o - | FileCheck %s define dso_local i32 @mul_reduce_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32 %N) { ; CHECK-LABEL: mul_reduce_add: Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll @@ -1,5 +1,5 @@ -; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve %s -S -o - | FileCheck %s +; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve %s -S -o - | FileCheck %s ; CHECK-LABEL: vec_mul_reduce_add Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-unroll.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-unroll.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-unroll.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve,+lob %s -S -o - | FileCheck %s +; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve,+lob %s -S -o - | FileCheck %s ; TODO: The unrolled pattern is preventing the transform ; CHECK-LABEL: mul_v16i8_unroll Index: llvm/test/CodeGen/Thumb2/mve-fma-loops.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-fma-loops.ll +++ llvm/test/CodeGen/Thumb2/mve-fma-loops.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -disable-mve-tail-predication=false %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -tail-predication=enabled %s -o - | FileCheck %s define arm_aapcs_vfpcc void @fmas1(float* nocapture readonly %x, float* nocapture readonly %y, float* noalias nocapture %z, float %a, i32 %n) { ; CHECK-LABEL: fmas1: