diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -463,6 +463,14 @@ const DominatorTree *DT = nullptr, const TargetLibraryInfo *TLI = nullptr); + /// Return the same result as isSafeToSpeculativelyExecute under the + /// assumption that the opcode of the instruction is Opcode. + bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, + const Operator *Inst, + const Instruction *CtxI = nullptr, + const DominatorTree *DT = nullptr, + const TargetLibraryInfo *TLI = nullptr); + /// Returns true if the result or effects of the given instructions \p I /// depend values not reachable through the def use graph. /// * Memory dependence arises for example if the instruction reads from diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1438,7 +1438,7 @@ [ NoCapture>, IntrNoSync, IntrReadMem, IntrWillReturn, IntrArgMemOnly ]>; // Speculatable Binary operators -let IntrProperties = [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] in { +let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in { def int_vp_add : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, @@ -1512,7 +1512,7 @@ // Floating-point arithmetic. let IntrProperties = - [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] in { + [IntrNoMem, IntrNoSync, IntrWillReturn] in { def int_vp_fadd : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, @@ -1629,7 +1629,7 @@ } // Reductions -let IntrProperties = [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] in { +let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in { def int_vp_reduce_fadd : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], [LLVMVectorElementType<0>, llvm_anyvector_ty, diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -4580,13 +4580,20 @@ const Operator *Inst = dyn_cast(V); if (!Inst) return false; + return isSafeToSpeculativelyExecuteWithOpcode(Inst->getOpcode(), Inst, CtxI, DT, TLI); +} +bool llvm::isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, + const Operator *Inst, + const Instruction *CtxI, + const DominatorTree *DT, + const TargetLibraryInfo *TLI) { for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) if (Constant *C = dyn_cast(Inst->getOperand(i))) if (C->canTrap()) return false; - switch (Inst->getOpcode()) { + switch (Opcode) { default: return true; case Instruction::UDiv: @@ -4617,7 +4624,9 @@ return false; } case Instruction::Load: { - const LoadInst *LI = cast(Inst); + const LoadInst *LI = dyn_cast(Inst); + if (!LI) + return false; if (mustSuppressSpeculation(*LI)) return false; const DataLayout &DL = LI->getModule()->getDataLayout(); @@ -4626,7 +4635,9 @@ TLI); } case Instruction::Call: { - auto *CI = cast(Inst); + auto *CI = dyn_cast(Inst); + if (!CI) + return false; const Function *Callee = CI->getCalledFunction(); // The called function could have undefined behavior or side-effects, even diff --git a/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/llvm/lib/CodeGen/ExpandVectorPredication.cpp --- a/llvm/lib/CodeGen/ExpandVectorPredication.cpp +++ b/llvm/lib/CodeGen/ExpandVectorPredication.cpp @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/ExpandVectorPredication.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/ExpandVectorPredication.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" @@ -113,6 +113,19 @@ OldOp.eraseFromParent(); } +static bool maySpeculateLanes(VPIntrinsic &VPI) { + // Eventhough non-VP reduction intrinsics are speculatable, their mask bits + // are not. + if (isa(VPI)) + return false; + // Fallback to whether the function operation is safe to speculatively + // execute. + Optional OpcOpt = VPI.getFunctionalOpcode(); + unsigned FunctionalOpc = OpcOpt.getValueOr((unsigned)Instruction::Call); + return isSafeToSpeculativelyExecuteWithOpcode(FunctionalOpc, + cast(&VPI)); +} + //// } Helpers namespace { @@ -216,8 +229,7 @@ Value * CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder<> &Builder, VPIntrinsic &VPI) { - assert((isSafeToSpeculativelyExecute(&VPI) || - VPI.canIgnoreVectorLengthParam()) && + assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && "Implicitly dropping %evl in non-speculatable operator!"); auto OC = static_cast(*VPI.getFunctionalOpcode()); @@ -296,8 +308,7 @@ Value * CachingVPExpander::expandPredicationInReduction(IRBuilder<> &Builder, VPReductionIntrinsic &VPI) { - assert((isSafeToSpeculativelyExecute(&VPI) || - VPI.canIgnoreVectorLengthParam()) && + assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && "Implicitly dropping %evl in non-speculatable operator!"); Value *Mask = VPI.getMaskParam(); @@ -471,9 +482,9 @@ bool isDone() const { return Strategy.shouldDoNothing(); } }; -void sanitizeStrategy(Instruction &I, VPLegalization &LegalizeStrat) { - // Speculatable instructions do not strictly need predication. - if (isSafeToSpeculativelyExecute(&I)) { +void sanitizeStrategy(VPIntrinsic &VPI, VPLegalization &LegalizeStrat) { + // Operations with speculatable lanes do not strictly need predication. + if (maySpeculateLanes(VPI)) { // Converting a speculatable VP intrinsic means dropping %mask and %evl. // No need to expand %evl into the %mask only to ignore that code. if (LegalizeStrat.OpStrategy == VPLegalization::Convert) @@ -518,7 +529,7 @@ if (!VPI) continue; auto VPStrat = getVPLegalizationStrategy(*VPI); - sanitizeStrategy(I, VPStrat); + sanitizeStrategy(*VPI, VPStrat); if (!VPStrat.shouldDoNothing()) Worklist.emplace_back(VPI, VPStrat); } diff --git a/llvm/test/CodeGen/Generic/expand-vp.ll b/llvm/test/CodeGen/Generic/expand-vp.ll --- a/llvm/test/CodeGen/Generic/expand-vp.ll +++ b/llvm/test/CodeGen/Generic/expand-vp.ll @@ -2,7 +2,7 @@ ; RUN: opt --expandvp --expandvp-override-evl-transform=Legal --expandvp-override-mask-transform=Legal -S < %s | FileCheck %s --check-prefix=LEGAL_LEGAL ; RUN: opt --expandvp --expandvp-override-evl-transform=Discard --expandvp-override-mask-transform=Legal -S < %s | FileCheck %s --check-prefix=DISCARD_LEGAL ; RUN: opt --expandvp --expandvp-override-evl-transform=Convert --expandvp-override-mask-transform=Legal -S < %s | FileCheck %s --check-prefix=CONVERT_LEGAL -; Full expansion cases (all expanded to non-VP). +; Call expansion cases (all expanded to non-VP). ; RUN: opt --expandvp --expandvp-override-evl-transform=Discard --expandvp-override-mask-transform=Convert -S < %s | FileCheck %s --check-prefix=ALL-CONVERT ; RUN: opt --expandvp -S < %s | FileCheck %s --check-prefix=ALL-CONVERT ; RUN: opt --expandvp --expandvp-override-evl-transform=Legal --expandvp-override-mask-transform=Convert -S < %s | FileCheck %s --check-prefix=ALL-CONVERT @@ -166,62 +166,70 @@ ; Check that reductions use the correct neutral element for masked-off elements ; ALL-CONVERT: define void @test_vp_reduce_int_v4(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n) { -; ALL-CONVERT-NEXT: [[ADD:%.+]] = select <4 x i1> %m, <4 x i32> %vi, <4 x i32> zeroinitializer +; ALL-CONVERT-NEXT: [[NINS:%.+]] = insertelement <4 x i32> poison, i32 %n, i32 0 +; ALL-CONVERT-NEXT: [[NSPLAT:%.+]] = shufflevector <4 x i32> [[NINS]], <4 x i32> poison, <4 x i32> zeroinitializer +; ALL-CONVERT-NEXT: [[EVLM:%.+]] = icmp ult <4 x i32> , [[NSPLAT]] +; ALL-CONVERT-NEXT: [[NEWM:%.+]] = and <4 x i1> [[EVLM]], %m +; ALL-CONVERT-NEXT: [[ADD:%.+]] = select <4 x i1> [[NEWM]], <4 x i32> %vi, <4 x i32> zeroinitializer ; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[ADD]]) ; ALL-CONVERT-NEXT: %{{.+}} = add i32 [[RED]], %start -; ALL-CONVERT-NEXT: [[MUL:%.+]] = select <4 x i1> %m, <4 x i32> %vi, <4 x i32> +; ALL-CONVERT: [[MUL:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> ; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[MUL]]) ; ALL-CONVERT-NEXT: %{{.+}} = mul i32 [[RED]], %start -; ALL-CONVERT-NEXT: [[AND:%.+]] = select <4 x i1> %m, <4 x i32> %vi, <4 x i32> +; ALL-CONVERT: [[AND:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> ; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[AND]]) ; ALL-CONVERT-NEXT: %{{.+}} = and i32 [[RED]], %start -; ALL-CONVERT-NEXT: [[OR:%.+]] = select <4 x i1> %m, <4 x i32> %vi, <4 x i32> zeroinitializer +; ALL-CONVERT: [[OR:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> zeroinitializer ; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[OR]]) ; ALL-CONVERT-NEXT: %{{.+}} = or i32 [[RED]], %start -; ALL-CONVERT-NEXT: [[XOR:%.+]] = select <4 x i1> %m, <4 x i32> %vi, <4 x i32> zeroinitializer +; ALL-CONVERT: [[XOR:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> zeroinitializer ; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[XOR]]) ; ALL-CONVERT-NEXT: %{{.+}} = xor i32 [[RED]], %start -; ALL-CONVERT-NEXT: [[SMIN:%.+]] = select <4 x i1> %m, <4 x i32> %vi, <4 x i32> +; ALL-CONVERT: [[SMIN:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> ; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[SMIN]]) ; ALL-CONVERT-NEXT: %{{.+}} = call i32 @llvm.smin.i32(i32 [[RED]], i32 %start) -; ALL-CONVERT-NEXT: [[SMAX:%.+]] = select <4 x i1> %m, <4 x i32> %vi, <4 x i32> +; ALL-CONVERT: [[SMAX:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> ; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[SMAX]]) ; ALL-CONVERT-NEXT: %{{.+}} = call i32 @llvm.smax.i32(i32 [[RED]], i32 %start) -; ALL-CONVERT-NEXT: [[UMIN:%.+]] = select <4 x i1> %m, <4 x i32> %vi, <4 x i32> +; ALL-CONVERT: [[UMIN:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> ; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[UMIN]]) ; ALL-CONVERT-NEXT: %{{.+}} = call i32 @llvm.umin.i32(i32 [[RED]], i32 %start) -; ALL-CONVERT-NEXT: [[UMAX:%.+]] = select <4 x i1> %m, <4 x i32> %vi, <4 x i32> zeroinitializer +; ALL-CONVERT: [[UMAX:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> zeroinitializer ; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[UMAX]]) ; ALL-CONVERT-NEXT: %{{.+}} = call i32 @llvm.umax.i32(i32 [[RED]], i32 %start) ; ALL-CONVERT-NEXT: ret void ; Check that reductions use the correct neutral element for masked-off elements ; ALL-CONVERT: define void @test_vp_reduce_fp_v4(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n) { -; ALL-CONVERT-NEXT: [[FMIN:%.+]] = select <4 x i1> %m, <4 x float> %vf, <4 x float> +; ALL-CONVERT-NEXT: [[NINS:%.+]] = insertelement <4 x i32> poison, i32 %n, i32 0 +; ALL-CONVERT-NEXT: [[NSPLAT:%.+]] = shufflevector <4 x i32> [[NINS]], <4 x i32> poison, <4 x i32> zeroinitializer +; ALL-CONVERT-NEXT: [[EVLM:%.+]] = icmp ult <4 x i32> , [[NSPLAT]] +; ALL-CONVERT-NEXT: [[NEWM:%.+]] = and <4 x i1> [[EVLM]], %m +; ALL-CONVERT-NEXT: [[FMIN:%.+]] = select <4 x i1> [[NEWM]], <4 x float> %vf, <4 x float> ; ALL-CONVERT-NEXT: [[RED:%.+]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[FMIN]]) ; ALL-CONVERT-NEXT: %{{.+}} = call float @llvm.minnum.f32(float [[RED]], float %f) -; ALL-CONVERT-NEXT: [[FMIN_NNAN:%.+]] = select <4 x i1> %m, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMIN_NNAN:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> ; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[FMIN_NNAN]]) ; ALL-CONVERT-NEXT: %{{.+}} = call nnan float @llvm.minnum.f32(float [[RED]], float %f) -; ALL-CONVERT-NEXT: [[FMIN_NNAN_NINF:%.+]] = select <4 x i1> %m, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMIN_NNAN_NINF:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> ; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan ninf float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[FMIN_NNAN_NINF]]) ; ALL-CONVERT-NEXT: %{{.+}} = call nnan ninf float @llvm.minnum.f32(float [[RED]], float %f) -; ALL-CONVERT-NEXT: [[FMAX:%.+]] = select <4 x i1> %m, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMAX:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> ; ALL-CONVERT-NEXT: [[RED:%.+]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[FMAX]]) ; ALL-CONVERT-NEXT: %{{.+}} = call float @llvm.maxnum.f32(float [[RED]], float %f) -; ALL-CONVERT-NEXT: [[FMAX_NNAN:%.+]] = select <4 x i1> %m, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMAX_NNAN:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> ; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[FMAX_NNAN]]) ; ALL-CONVERT-NEXT: %{{.+}} = call nnan float @llvm.maxnum.f32(float [[RED]], float %f) -; ALL-CONVERT-NEXT: [[FMAX_NNAN_NINF:%.+]] = select <4 x i1> %m, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMAX_NNAN_NINF:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> ; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan ninf float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[FMAX_NNAN_NINF]]) ; ALL-CONVERT-NEXT: %{{.+}} = call nnan ninf float @llvm.maxnum.f32(float [[RED]], float %f) -; ALL-CONVERT-NEXT: [[FADD:%.+]] = select <4 x i1> %m, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FADD:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> ; ALL-CONVERT-NEXT: %{{.+}} = call float @llvm.vector.reduce.fadd.v4f32(float %f, <4 x float> [[FADD]]) -; ALL-CONVERT-NEXT: [[FADD:%.+]] = select <4 x i1> %m, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FADD:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> ; ALL-CONVERT-NEXT: %{{.+}} = call reassoc float @llvm.vector.reduce.fadd.v4f32(float %f, <4 x float> [[FADD]]) -; ALL-CONVERT-NEXT: [[FMUL:%.+]] = select <4 x i1> %m, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMUL:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> ; ALL-CONVERT-NEXT: %{{.+}} = call float @llvm.vector.reduce.fmul.v4f32(float %f, <4 x float> [[FMUL]]) -; ALL-CONVERT-NEXT: [[FMUL:%.+]] = select <4 x i1> %m, <4 x float> %vf, <4 x float> +; ALL-CONVERT: [[FMUL:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> ; ALL-CONVERT-NEXT: %{{.+}} = call reassoc float @llvm.vector.reduce.fmul.v4f32(float %f, <4 x float> [[FMUL]]) ; ALL-CONVERT-NEXT: ret void @@ -332,29 +340,37 @@ ; DISCARD_LEGAL: ret void ; DISCARD_LEGAL: define void @test_vp_reduce_int_v4(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n) { -; DISCARD_LEGAL-NEXT: %r0 = call i32 @llvm.vp.reduce.add.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4) -; DISCARD_LEGAL-NEXT: %r1 = call i32 @llvm.vp.reduce.mul.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4) -; DISCARD_LEGAL-NEXT: %r2 = call i32 @llvm.vp.reduce.and.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4) -; DISCARD_LEGAL-NEXT: %r3 = call i32 @llvm.vp.reduce.or.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4) -; DISCARD_LEGAL-NEXT: %r4 = call i32 @llvm.vp.reduce.xor.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4) -; DISCARD_LEGAL-NEXT: %r5 = call i32 @llvm.vp.reduce.smin.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4) -; DISCARD_LEGAL-NEXT: %r6 = call i32 @llvm.vp.reduce.smax.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4) -; DISCARD_LEGAL-NEXT: %r7 = call i32 @llvm.vp.reduce.umin.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4) -; DISCARD_LEGAL-NEXT: %r8 = call i32 @llvm.vp.reduce.umax.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4) -; DISCARD_LEGAL-NEXT: ret void +; DISCARD_LEGAL-NEXT: [[NSPLATINS:%.+]] = insertelement <4 x i32> poison, i32 %n, i32 0 +; DISCARD_LEGAL-NEXT: [[NSPLAT:%.+]] = shufflevector <4 x i32> [[NSPLATINS]], <4 x i32> poison, <4 x i32> zeroinitializer +; DISCARD_LEGAL-NEXT: [[EVLMASK:%.+]] = icmp ult <4 x i32> , [[NSPLAT]] +; DISCARD_LEGAL-NEXT: [[NEWMASK:%.+]] = and <4 x i1> [[EVLMASK]], %m +; DISCARD_LEGAL-NEXT: %r0 = call i32 @llvm.vp.reduce.add.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> [[NEWMASK]], i32 4) +; DISCARD_LEGAL-NOT: %r1 = call i32 @llvm.vp.reduce.mul.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4) +; DISCARD_LEGAL-NOT: %r2 = call i32 @llvm.vp.reduce.and.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4) +; DISCARD_LEGAL-NOT: %r3 = call i32 @llvm.vp.reduce.or.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4) +; DISCARD_LEGAL-NOT: %r4 = call i32 @llvm.vp.reduce.xor.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4) +; DISCARD_LEGAL-NOT: %r5 = call i32 @llvm.vp.reduce.smin.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4) +; DISCARD_LEGAL-NOT: %r6 = call i32 @llvm.vp.reduce.smax.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4) +; DISCARD_LEGAL-NOT: %r7 = call i32 @llvm.vp.reduce.umin.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4) +; DISCARD_LEGAL-NOT: %r8 = call i32 @llvm.vp.reduce.umax.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4) +; DISCARD_LEGAL: ret void ; DISCARD_LEGAL: define void @test_vp_reduce_fp_v4(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n) { -; DISCARD_LEGAL-NEXT: %r0 = call float @llvm.vp.reduce.fmin.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4) -; DISCARD_LEGAL-NEXT: %r1 = call nnan float @llvm.vp.reduce.fmin.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4) -; DISCARD_LEGAL-NEXT: %r2 = call nnan ninf float @llvm.vp.reduce.fmin.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4) -; DISCARD_LEGAL-NEXT: %r3 = call float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4) -; DISCARD_LEGAL-NEXT: %r4 = call nnan float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4) -; DISCARD_LEGAL-NEXT: %r5 = call nnan ninf float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4) -; DISCARD_LEGAL-NEXT: %r6 = call float @llvm.vp.reduce.fadd.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4) -; DISCARD_LEGAL-NEXT: %r7 = call reassoc float @llvm.vp.reduce.fadd.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4) -; DISCARD_LEGAL-NEXT: %r8 = call float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4) -; DISCARD_LEGAL-NEXT: %r9 = call reassoc float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4) -; DISCARD_LEGAL-NEXT: ret void +; DISCARD_LEGAL-NEXT: [[NSPLATINS:%.+]] = insertelement <4 x i32> poison, i32 %n, i32 0 +; DISCARD_LEGAL-NEXT: [[NSPLAT:%.+]] = shufflevector <4 x i32> [[NSPLATINS]], <4 x i32> poison, <4 x i32> zeroinitializer +; DISCARD_LEGAL-NEXT: [[EVLMASK:%.+]] = icmp ult <4 x i32> , [[NSPLAT]] +; DISCARD_LEGAL-NEXT: [[NEWMASK:%.+]] = and <4 x i1> [[EVLMASK]], %m +; DISCARD_LEGAL-NEXT: %r0 = call float @llvm.vp.reduce.fmin.v4f32(float %f, <4 x float> %vf, <4 x i1> [[NEWMASK]], i32 4) +; DISCARD_LEGAL-NOT: %r1 = call nnan float @llvm.vp.reduce.fmin.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4) +; DISCARD_LEGAL-NOT: %r2 = call nnan ninf float @llvm.vp.reduce.fmin.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4) +; DISCARD_LEGAL-NOT: %r3 = call float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4) +; DISCARD_LEGAL-NOT: %r4 = call nnan float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4) +; DISCARD_LEGAL-NOT: %r5 = call nnan ninf float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4) +; DISCARD_LEGAL-NOT: %r6 = call float @llvm.vp.reduce.fadd.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4) +; DISCARD_LEGAL-NOT: %r7 = call reassoc float @llvm.vp.reduce.fadd.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4) +; DISCARD_LEGAL-NOT: %r8 = call float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4) +; DISCARD_LEGAL-NOT: %r9 = call reassoc float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4) +; DISCARD_LEGAL: ret void ; Convert %evl into %mask everywhere (%evl Convert, %mask Legal) ;