Index: llvm/lib/Analysis/InstructionSimplify.cpp =================================================================== --- llvm/lib/Analysis/InstructionSimplify.cpp +++ llvm/lib/Analysis/InstructionSimplify.cpp @@ -5249,32 +5249,67 @@ return nullptr; } -static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, - const SimplifyQuery &Q) { - Intrinsic::ID IID = F->getIntrinsicID(); - Type *ReturnType = F->getReturnType(); +/// Simplify integer min/max intrinsics. The caller is expected to swap the +/// input values to handle commutation. +static Value *simplifyMinMax(Intrinsic::ID IID, Value *Op0, Value *Op1, + const SimplifyQuery &Q, unsigned MaxRecurse) { + // Map intrinsic to comparison predicate. + assert(Op0->getType() == Op1->getType() && "Unexpected types for min/max"); + Type *Ty = Op0->getType(); + ICmpInst::Predicate Pred; + APInt LimitC; switch (IID) { case Intrinsic::smax: + Pred = ICmpInst::ICMP_SGE; + LimitC = APInt::getSignedMaxValue(Ty->getScalarSizeInBits()); + break; case Intrinsic::smin: + Pred = ICmpInst::ICMP_SLE; + LimitC = APInt::getSignedMinValue(Ty->getScalarSizeInBits()); + break; case Intrinsic::umax: - case Intrinsic::umin: { - // Canonicalize constant operand as Op1. - if (isa(Op0)) - std::swap(Op0, Op1); + Pred = ICmpInst::ICMP_UGE; + LimitC = APInt::getMaxValue(Ty->getScalarSizeInBits()); + break; + case Intrinsic::umin: + Pred = ICmpInst::ICMP_ULE; + LimitC = APInt::getMinValue(Ty->getScalarSizeInBits()); + break; + default: + llvm_unreachable("Unexpected intrinsic"); + } - // TODO: Allow partial undef vector constants. - const APInt *C; - if (!match(Op1, m_APInt(C))) - break; + // Assume that undef equals the constant limit value for the intrinsic. + if (isa(Op0) || isa(Op1)) + return ConstantInt::get(Ty, LimitC); - if ((IID == Intrinsic::smax && C->isMaxSignedValue()) || - (IID == Intrinsic::smin && C->isMinSignedValue()) || - (IID == Intrinsic::umax && C->isMaxValue()) || - (IID == Intrinsic::umin && C->isMinValue())) - return Op1; + if (!isICmpTrue(Pred, Op0, Op1, Q, MaxRecurse)) + return nullptr; + + // Don't try folding if the result would contain undef because the icmp + // simplification may not hold for this op. {max/min}(x, undef) != undef. + Constant *C; + if (match(Op0, m_Constant(C)) && !isGuaranteedNotToBeUndefOrPoison(C)) + return nullptr; + return Op0; +} +static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, + const SimplifyQuery &Q, + unsigned MaxRecurse) { + Intrinsic::ID IID = F->getIntrinsicID(); + Type *ReturnType = F->getReturnType(); + switch (IID) { + case Intrinsic::smax: + case Intrinsic::smin: + case Intrinsic::umax: + case Intrinsic::umin: + if (Value *V = simplifyMinMax(IID, Op0, Op1, Q, MaxRecurse)) + return V; + if (Value *V = simplifyMinMax(IID, Op1, Op0, Q, MaxRecurse)) + return V; break; - } + case Intrinsic::usub_with_overflow: case Intrinsic::ssub_with_overflow: // X - X -> { 0, false } @@ -5418,7 +5453,8 @@ return nullptr; } -static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { +static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q, + unsigned MaxRecurse) { // Intrinsics with no operands have some kind of side effect. Don't simplify. unsigned NumOperands = Call->getNumArgOperands(); @@ -5432,7 +5468,7 @@ if (NumOperands == 2) return simplifyBinaryIntrinsic(F, Call->getArgOperand(0), - Call->getArgOperand(1), Q); + Call->getArgOperand(1), Q, MaxRecurse); // Handle intrinsics with 3 or more arguments. switch (IID) { @@ -5481,7 +5517,8 @@ } } -Value *llvm::SimplifyCall(CallBase *Call, const SimplifyQuery &Q) { +static Value *SimplifyCall(CallBase *Call, const SimplifyQuery &Q, + unsigned MaxRecurse) { Value *Callee = Call->getCalledOperand(); // musttail calls can only be simplified if they are also DCEd. @@ -5499,7 +5536,7 @@ return nullptr; if (F->isIntrinsic()) - if (Value *Ret = simplifyIntrinsic(Call, Q)) + if (Value *Ret = simplifyIntrinsic(Call, Q, MaxRecurse)) return Ret; if (!canConstantFoldCallTo(Call, F)) @@ -5521,6 +5558,10 @@ return ConstantFoldCall(Call, F, ConstantArgs, Q.TLI); } +Value *llvm::SimplifyCall(CallBase *Call, const SimplifyQuery &Q) { + return ::SimplifyCall(Call, Q, RecursionLimit); +} + /// Given operands for a Freeze, see if we can fold the result. static Value *SimplifyFreezeInst(Value *Op0, const SimplifyQuery &Q) { // Use a utility function defined in ValueTracking. Index: llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll =================================================================== --- llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll +++ llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll @@ -14,8 +14,7 @@ define i81 @smax_sameval(i81 %x) { ; CHECK-LABEL: @smax_sameval( -; CHECK-NEXT: [[R:%.*]] = call i81 @llvm.smax.i81(i81 [[X:%.*]], i81 [[X]]) -; CHECK-NEXT: ret i81 [[R]] +; CHECK-NEXT: ret i81 [[X:%.*]] ; %r = call i81 @llvm.smax.i81(i81 %x, i81 %x) ret i81 %r @@ -23,8 +22,7 @@ define i3 @smin_sameval(i3 %x) { ; CHECK-LABEL: @smin_sameval( -; CHECK-NEXT: [[R:%.*]] = call i3 @llvm.smin.i3(i3 [[X:%.*]], i3 [[X]]) -; CHECK-NEXT: ret i3 [[R]] +; CHECK-NEXT: ret i3 [[X:%.*]] ; %r = call i3 @llvm.smin.i3(i3 %x, i3 %x) ret i3 %r @@ -32,8 +30,7 @@ define <2 x i8> @umax_sameval(<2 x i8> %x) { ; CHECK-LABEL: @umax_sameval( -; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.umax.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[X]]) -; CHECK-NEXT: ret <2 x i8> [[R]] +; CHECK-NEXT: ret <2 x i8> [[X:%.*]] ; %r = call <2 x i8> @llvm.umax.v2i8(<2 x i8> %x, <2 x i8> %x) ret <2 x i8> %r @@ -41,8 +38,7 @@ define <2 x i8> @umin_sameval(<2 x i8> %x) { ; CHECK-LABEL: @umin_sameval( -; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[X]]) -; CHECK-NEXT: ret <2 x i8> [[R]] +; CHECK-NEXT: ret <2 x i8> [[X:%.*]] ; %r = call <2 x i8> @llvm.umin.v2i8(<2 x i8> %x, <2 x i8> %x) ret <2 x i8> %r @@ -50,8 +46,7 @@ define i81 @smax_undef(i81 %x) { ; CHECK-LABEL: @smax_undef( -; CHECK-NEXT: [[R:%.*]] = call i81 @llvm.smax.i81(i81 undef, i81 [[X:%.*]]) -; CHECK-NEXT: ret i81 [[R]] +; CHECK-NEXT: ret i81 1208925819614629174706175 ; %r = call i81 @llvm.smax.i81(i81 undef, i81 %x) ret i81 %r @@ -59,8 +54,7 @@ define i3 @smin_undef(i3 %x) { ; CHECK-LABEL: @smin_undef( -; CHECK-NEXT: [[R:%.*]] = call i3 @llvm.smin.i3(i3 [[X:%.*]], i3 undef) -; CHECK-NEXT: ret i3 [[R]] +; CHECK-NEXT: ret i3 -4 ; %r = call i3 @llvm.smin.i3(i3 %x, i3 undef) ret i3 %r @@ -68,8 +62,7 @@ define <2 x i8> @umax_undef(<2 x i8> %x) { ; CHECK-LABEL: @umax_undef( -; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.umax.v2i8(<2 x i8> undef, <2 x i8> [[X:%.*]]) -; CHECK-NEXT: ret <2 x i8> [[R]] +; CHECK-NEXT: ret <2 x i8> ; %r = call <2 x i8> @llvm.umax.v2i8(<2 x i8> undef, <2 x i8> %x) ret <2 x i8> %r @@ -77,8 +70,7 @@ define <2 x i8> @umin_undef(<2 x i8> %x) { ; CHECK-LABEL: @umin_undef( -; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> undef) -; CHECK-NEXT: ret <2 x i8> [[R]] +; CHECK-NEXT: ret <2 x i8> zeroinitializer ; %r = call <2 x i8> @llvm.umin.v2i8(<2 x i8> %x, <2 x i8> undef) ret <2 x i8> %r @@ -150,8 +142,7 @@ define i8 @smax_minval(i8 %x) { ; CHECK-LABEL: @smax_minval( -; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 -128) -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 [[X:%.*]] ; %r = call i8 @llvm.smax.i8(i8 %x, i8 -128) ret i8 %r @@ -159,8 +150,7 @@ define <2 x i8> @smax_minval_commute(<2 x i8> %x) { ; CHECK-LABEL: @smax_minval_commute( -; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> , <2 x i8> [[X:%.*]]) -; CHECK-NEXT: ret <2 x i8> [[R]] +; CHECK-NEXT: ret <2 x i8> [[X:%.*]] ; %r = call <2 x i8> @llvm.smax.v2i8(<2 x i8> , <2 x i8> %x) ret <2 x i8> %r @@ -168,8 +158,7 @@ define i8 @smin_maxval(i8 %x) { ; CHECK-LABEL: @smin_maxval( -; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.smin.i8(i8 127, i8 [[X:%.*]]) -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 [[X:%.*]] ; %r = call i8 @llvm.smin.i8(i8 127, i8 %x) ret i8 %r @@ -177,8 +166,7 @@ define <2 x i8> @smin_maxval_commute(<2 x i8> %x) { ; CHECK-LABEL: @smin_maxval_commute( -; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.smin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> ) -; CHECK-NEXT: ret <2 x i8> [[R]] +; CHECK-NEXT: ret <2 x i8> [[X:%.*]] ; %r = call <2 x i8> @llvm.smin.v2i8(<2 x i8> %x, <2 x i8> ) ret <2 x i8> %r @@ -186,8 +174,7 @@ define i8 @umax_minval(i8 %x) { ; CHECK-LABEL: @umax_minval( -; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 0) -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 [[X:%.*]] ; %r = call i8 @llvm.umax.i8(i8 %x, i8 0) ret i8 %r @@ -195,8 +182,7 @@ define <2 x i8> @umax_minval_commute(<2 x i8> %x) { ; CHECK-LABEL: @umax_minval_commute( -; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.umax.v2i8(<2 x i8> zeroinitializer, <2 x i8> [[X:%.*]]) -; CHECK-NEXT: ret <2 x i8> [[R]] +; CHECK-NEXT: ret <2 x i8> [[X:%.*]] ; %r = call <2 x i8> @llvm.umax.v2i8(<2 x i8> zeroinitializer, <2 x i8> %x) ret <2 x i8> %r @@ -204,8 +190,7 @@ define i8 @umin_maxval(i8 %x) { ; CHECK-LABEL: @umin_maxval( -; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.umin.i8(i8 -1, i8 [[X:%.*]]) -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 [[X:%.*]] ; %r = call i8 @llvm.umin.i8(i8 255, i8 %x) ret i8 %r @@ -213,13 +198,14 @@ define <2 x i8> @umin_maxval_commute(<2 x i8> %x) { ; CHECK-LABEL: @umin_maxval_commute( -; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> ) -; CHECK-NEXT: ret <2 x i8> [[R]] +; CHECK-NEXT: ret <2 x i8> [[X:%.*]] ; %r = call <2 x i8> @llvm.umin.v2i8(<2 x i8> %x, <2 x i8> ) ret <2 x i8> %r } +; TODO: Could handle partial undef. + define <2 x i8> @smax_maxval_partial_undef(<2 x i8> %x) { ; CHECK-LABEL: @smax_maxval_partial_undef( ; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> , <2 x i8> [[X:%.*]]) @@ -229,6 +215,8 @@ ret <2 x i8> %r } +; TODO: Could handle partial undef. + define <2 x i8> @smin_minval_partial_undef(<2 x i8> %x) { ; CHECK-LABEL: @smin_minval_partial_undef( ; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.smin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> ) @@ -238,6 +226,8 @@ ret <2 x i8> %r } +; TODO: Could handle partial undef. + define <2 x i8> @umax_maxval_partial_undef(<2 x i8> %x) { ; CHECK-LABEL: @umax_maxval_partial_undef( ; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.umax.v2i8(<2 x i8> , <2 x i8> [[X:%.*]]) @@ -247,6 +237,8 @@ ret <2 x i8> %r } +; TODO: Could handle partial undef. + define <2 x i8> @umin_minval_partial_undef(<2 x i8> %x) { ; CHECK-LABEL: @umin_minval_partial_undef( ; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> ) @@ -256,6 +248,8 @@ ret <2 x i8> %r } +; TODO: Could handle partial undef. + define <2 x i8> @smax_minval_partial_undef(<2 x i8> %x) { ; CHECK-LABEL: @smax_minval_partial_undef( ; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> , <2 x i8> [[X:%.*]]) @@ -265,6 +259,8 @@ ret <2 x i8> %r } +; TODO: Could handle partial undef. + define <2 x i8> @smin_maxval_partial_undef(<2 x i8> %x) { ; CHECK-LABEL: @smin_maxval_partial_undef( ; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.smin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> ) @@ -276,13 +272,14 @@ define <2 x i8> @umax_minval_partial_undef(<2 x i8> %x) { ; CHECK-LABEL: @umax_minval_partial_undef( -; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.umax.v2i8(<2 x i8> , <2 x i8> [[X:%.*]]) -; CHECK-NEXT: ret <2 x i8> [[R]] +; CHECK-NEXT: ret <2 x i8> [[X:%.*]] ; %r = call <2 x i8> @llvm.umax.v2i8(<2 x i8> , <2 x i8> %x) ret <2 x i8> %r } +; TODO: Could handle partial undef. + define <2 x i8> @umin_maxval_partial_undef(<2 x i8> %x) { ; CHECK-LABEL: @umin_maxval_partial_undef( ; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> )