Index: lib/Transforms/InstCombine/InstCombineSelect.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineSelect.cpp +++ lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -622,11 +622,7 @@ return Builder.CreateOr(V, Y); } -/// Transform patterns such as: (a > b) ? a - b : 0 -/// into: ((a > b) ? a : b) - b) -/// This produces a canonical max pattern that is more easily recognized by the -/// backend and converted into saturated subtraction instructions if those -/// exist. +/// Transform patterns such as (a > b) ? a - b : 0 into usub.sat(a, b). /// There are 8 commuted/swapped variants of this pattern. /// TODO: Also support a - UMIN(a,b) patterns. static Value *canonicalizeSaturatedSubtract(const ICmpInst *ICI, @@ -668,11 +664,12 @@ if (!TrueVal->hasOneUse()) return nullptr; - // All checks passed, convert to canonical unsigned saturated subtraction - // form: sub(max()). - // (a > b) ? a - b : 0 -> ((a > b) ? a : b) - b) - Value *Max = Builder.CreateSelect(Builder.CreateICmp(Pred, A, B), A, B); - return IsNegative ? Builder.CreateSub(B, Max) : Builder.CreateSub(Max, B); + // (a > b) ? a - b : 0 -> usub.sat(a, b) + // (a > b) ? b - a : 0 -> -usub.sat(a, b) + Value *Result = Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, A, B); + if (IsNegative) + Result = Builder.CreateNeg(Result); + return Result; } static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal, @@ -689,15 +686,16 @@ if (Pred == ICmpInst::ICMP_ULT && match(TVal, m_Add(m_Value(X), m_APInt(C))) && X == Cmp0 && match(FVal, m_AllOnes()) && match(Cmp1, m_APInt(CmpC)) && *CmpC == ~*C) { - // Commute compare predicate and select operands: - // (X u< ~C) ? (X + C) : -1 --> (X u> ~C) ? -1 : (X + C) - Value *NewCmp = Builder.CreateICmp(ICmpInst::ICMP_UGT, X, Cmp1); - return Builder.CreateSelect(NewCmp, FVal, TVal); + // (X u< ~C) ? (X + C) : -1 --> uadd.sat(X, C) + return Builder.CreateBinaryIntrinsic( + Intrinsic::uadd_sat, X, ConstantInt::get(X->getType(), *C)); } // Match unsigned saturated add of 2 variables with an unnecessary 'not'. // There are 8 commuted variants. - // Canonicalize -1 (saturated result) to true value of the select. + // Canonicalize -1 (saturated result) to true value of the select. Just + // swapping the compare operands is legal, because the selected value is the + // same in case of equality, so we can interchange u< and u<=. if (match(FVal, m_AllOnes())) { std::swap(TVal, FVal); std::swap(Cmp0, Cmp1); @@ -717,24 +715,19 @@ Value *Y; if (match(Cmp0, m_Not(m_Value(X))) && match(FVal, m_c_Add(m_Specific(X), m_Value(Y))) && Y == Cmp1) { - // Change the comparison to use the sum (false value of the select). That is - // a canonical pattern match form for uadd.with.overflow and eliminates a - // use of the 'not' op: - // (~X u< Y) ? -1 : (X + Y) --> ((X + Y) u< Y) ? -1 : (X + Y) - // (~X u< Y) ? -1 : (Y + X) --> ((Y + X) u< Y) ? -1 : (Y + X) - Value *NewCmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, FVal, Y); - return Builder.CreateSelect(NewCmp, TVal, FVal); + // (~X u< Y) ? -1 : (X + Y) --> uadd.sat(X, Y) + // (~X u< Y) ? -1 : (Y + X) --> uadd.sat(X, Y) + return Builder.CreateBinaryIntrinsic(Intrinsic::uadd_sat, X, Y); } // The 'not' op may be included in the sum but not the compare. X = Cmp0; Y = Cmp1; if (match(FVal, m_c_Add(m_Not(m_Specific(X)), m_Specific(Y)))) { - // Change the comparison to use the sum (false value of the select). That is - // a canonical pattern match form for uadd.with.overflow: - // (X u< Y) ? -1 : (~X + Y) --> ((~X + Y) u< Y) ? -1 : (~X + Y) - // (X u< Y) ? -1 : (Y + ~X) --> ((Y + ~X) u< Y) ? -1 : (Y + ~X) - Value *NewCmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, FVal, Y); - return Builder.CreateSelect(NewCmp, TVal, FVal); + // (X u< Y) ? -1 : (~X + Y) --> uadd.sat(~X, Y) + // (X u< Y) ? -1 : (Y + ~X) --> uadd.sat(Y, ~X) + BinaryOperator *BO = cast(FVal); + return Builder.CreateBinaryIntrinsic( + Intrinsic::uadd_sat, BO->getOperand(0), BO->getOperand(1)); } return nullptr; Index: test/Transforms/InstCombine/saturating-add-sub.ll =================================================================== --- test/Transforms/InstCombine/saturating-add-sub.ll +++ test/Transforms/InstCombine/saturating-add-sub.ll @@ -721,10 +721,8 @@ define i32 @uadd_sat(i32 %x, i32 %y) { ; CHECK-LABEL: @uadd_sat( -; CHECK-NEXT: [[A:%.*]] = add i32 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]] -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %notx = xor i32 %x, -1 %a = add i32 %y, %x @@ -736,10 +734,8 @@ define i32 @uadd_sat_commute_add(i32 %xp, i32 %y) { ; CHECK-LABEL: @uadd_sat_commute_add( ; CHECK-NEXT: [[X:%.*]] = urem i32 42, [[XP:%.*]] -; CHECK-NEXT: [[A:%.*]] = add i32 [[X]], [[Y:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]] -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X]], i32 [[Y:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %x = urem i32 42, %xp ; thwart complexity-based-canonicalization %notx = xor i32 %x, -1 @@ -752,10 +748,8 @@ define i32 @uadd_sat_ugt(i32 %x, i32 %yp) { ; CHECK-LABEL: @uadd_sat_ugt( ; CHECK-NEXT: [[Y:%.*]] = sdiv i32 [[YP:%.*]], 2442 -; CHECK-NEXT: [[A:%.*]] = add i32 [[Y]], [[X:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]] -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 [[Y]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %y = sdiv i32 %yp, 2442 ; thwart complexity-based-canonicalization %notx = xor i32 %x, -1 @@ -769,10 +763,8 @@ ; CHECK-LABEL: @uadd_sat_ugt_commute_add( ; CHECK-NEXT: [[Y:%.*]] = sdiv <2 x i32> [[YP:%.*]], ; CHECK-NEXT: [[X:%.*]] = srem <2 x i32> , [[XP:%.*]] -; CHECK-NEXT: [[A:%.*]] = add <2 x i32> [[X]], [[Y]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i32> [[A]], [[Y]] -; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> , <2 x i32> [[A]] -; CHECK-NEXT: ret <2 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[X]], <2 x i32> [[Y]]) +; CHECK-NEXT: ret <2 x i32> [[TMP1]] ; %y = sdiv <2 x i32> %yp, ; thwart complexity-based-canonicalization %x = srem <2 x i32> , %xp ; thwart complexity-based-canonicalization @@ -786,10 +778,8 @@ define i32 @uadd_sat_commute_select(i32 %x, i32 %yp) { ; CHECK-LABEL: @uadd_sat_commute_select( ; CHECK-NEXT: [[Y:%.*]] = sdiv i32 [[YP:%.*]], 2442 -; CHECK-NEXT: [[A:%.*]] = add i32 [[Y]], [[X:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]] -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 [[Y]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %y = sdiv i32 %yp, 2442 ; thwart complexity-based-canonicalization %notx = xor i32 %x, -1 @@ -803,10 +793,8 @@ ; CHECK-LABEL: @uadd_sat_commute_select_commute_add( ; CHECK-NEXT: [[X:%.*]] = urem i32 42, [[XP:%.*]] ; CHECK-NEXT: [[Y:%.*]] = sdiv i32 [[YP:%.*]], 2442 -; CHECK-NEXT: [[A:%.*]] = add nsw i32 [[X]], [[Y]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]] -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X]], i32 [[Y]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %x = urem i32 42, %xp ; thwart complexity-based-canonicalization %y = sdiv i32 %yp, 2442 ; thwart complexity-based-canonicalization @@ -819,10 +807,8 @@ define <2 x i32> @uadd_sat_commute_select_ugt(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @uadd_sat_commute_select_ugt( -; CHECK-NEXT: [[A:%.*]] = add <2 x i32> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i32> [[A]], [[Y]] -; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> , <2 x i32> [[A]] -; CHECK-NEXT: ret <2 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]]) +; CHECK-NEXT: ret <2 x i32> [[TMP1]] ; %notx = xor <2 x i32> %x, %a = add <2 x i32> %y, %x @@ -834,10 +820,8 @@ define i32 @uadd_sat_commute_select_ugt_commute_add(i32 %xp, i32 %y) { ; CHECK-LABEL: @uadd_sat_commute_select_ugt_commute_add( ; CHECK-NEXT: [[X:%.*]] = srem i32 42, [[XP:%.*]] -; CHECK-NEXT: [[A:%.*]] = add i32 [[X]], [[Y:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]] -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X]], i32 [[Y:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %x = srem i32 42, %xp ; thwart complexity-based-canonicalization %notx = xor i32 %x, -1 @@ -882,10 +866,8 @@ define i32 @uadd_sat_not(i32 %x, i32 %y) { ; CHECK-LABEL: @uadd_sat_not( ; CHECK-NEXT: [[NOTX:%.*]] = xor i32 [[X:%.*]], -1 -; CHECK-NEXT: [[A:%.*]] = add i32 [[NOTX]], [[Y:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]] -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[NOTX]], i32 [[Y:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %notx = xor i32 %x, -1 %a = add i32 %notx, %y @@ -899,10 +881,8 @@ ; CHECK-NEXT: [[X:%.*]] = srem i32 42, [[XP:%.*]] ; CHECK-NEXT: [[Y:%.*]] = urem i32 42, [[YP:%.*]] ; CHECK-NEXT: [[NOTX:%.*]] = xor i32 [[X]], -1 -; CHECK-NEXT: [[A:%.*]] = add nsw i32 [[Y]], [[NOTX]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]] -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[Y]], i32 [[NOTX]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %x = srem i32 42, %xp ; thwart complexity-based-canonicalization %y = urem i32 42, %yp ; thwart complexity-based-canonicalization @@ -916,10 +896,8 @@ define i32 @uadd_sat_not_ugt(i32 %x, i32 %y) { ; CHECK-LABEL: @uadd_sat_not_ugt( ; CHECK-NEXT: [[NOTX:%.*]] = xor i32 [[X:%.*]], -1 -; CHECK-NEXT: [[A:%.*]] = add i32 [[NOTX]], [[Y:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]] -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[NOTX]], i32 [[Y:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %notx = xor i32 %x, -1 %a = add i32 %notx, %y @@ -932,10 +910,8 @@ ; CHECK-LABEL: @uadd_sat_not_ugt_commute_add( ; CHECK-NEXT: [[Y:%.*]] = sdiv <2 x i32> [[YP:%.*]], ; CHECK-NEXT: [[NOTX:%.*]] = xor <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[A:%.*]] = add <2 x i32> [[Y]], [[NOTX]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i32> [[A]], [[Y]] -; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> , <2 x i32> [[A]] -; CHECK-NEXT: ret <2 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[Y]], <2 x i32> [[NOTX]]) +; CHECK-NEXT: ret <2 x i32> [[TMP1]] ; %y = sdiv <2 x i32> %yp, ; thwart complexity-based-canonicalization %notx = xor <2 x i32> %x, @@ -948,10 +924,8 @@ define i32 @uadd_sat_not_commute_select(i32 %x, i32 %y) { ; CHECK-LABEL: @uadd_sat_not_commute_select( ; CHECK-NEXT: [[NOTX:%.*]] = xor i32 [[X:%.*]], -1 -; CHECK-NEXT: [[A:%.*]] = add i32 [[NOTX]], [[Y:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]] -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[NOTX]], i32 [[Y:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %notx = xor i32 %x, -1 %a = add i32 %notx, %y @@ -964,10 +938,8 @@ ; CHECK-LABEL: @uadd_sat_not_commute_select_commute_add( ; CHECK-NEXT: [[Y:%.*]] = sdiv i32 42, [[YP:%.*]] ; CHECK-NEXT: [[NOTX:%.*]] = xor i32 [[X:%.*]], -1 -; CHECK-NEXT: [[A:%.*]] = add i32 [[Y]], [[NOTX]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]] -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[Y]], i32 [[NOTX]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %y = sdiv i32 42, %yp ; thwart complexity-based-canonicalization %notx = xor i32 %x, -1 @@ -982,10 +954,8 @@ ; CHECK-NEXT: [[X:%.*]] = urem <2 x i32> , [[XP:%.*]] ; CHECK-NEXT: [[Y:%.*]] = srem <2 x i32> , [[YP:%.*]] ; CHECK-NEXT: [[NOTX:%.*]] = xor <2 x i32> [[X]], -; CHECK-NEXT: [[A:%.*]] = add <2 x i32> [[Y]], [[NOTX]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i32> [[A]], [[Y]] -; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> , <2 x i32> [[A]] -; CHECK-NEXT: ret <2 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[Y]], <2 x i32> [[NOTX]]) +; CHECK-NEXT: ret <2 x i32> [[TMP1]] ; %x = urem <2 x i32> , %xp ; thwart complexity-based-canonicalization %y = srem <2 x i32> , %yp ; thwart complexity-based-canonicalization @@ -999,10 +969,8 @@ define i32 @uadd_sat_not_commute_select_ugt_commute_add(i32 %x, i32 %y) { ; CHECK-LABEL: @uadd_sat_not_commute_select_ugt_commute_add( ; CHECK-NEXT: [[NOTX:%.*]] = xor i32 [[X:%.*]], -1 -; CHECK-NEXT: [[A:%.*]] = add i32 [[NOTX]], [[Y:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]] -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[NOTX]], i32 [[Y:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %notx = xor i32 %x, -1 %a = add i32 %notx, %y @@ -1026,10 +994,8 @@ define i32 @uadd_sat_constant_commute(i32 %x) { ; CHECK-LABEL: @uadd_sat_constant_commute( -; CHECK-NEXT: [[A:%.*]] = add i32 [[X:%.*]], 42 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[X]], -43 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]] -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 42) +; CHECK-NEXT: ret i32 [[TMP1]] ; %a = add i32 %x, 42 %c = icmp ult i32 %x, -43 @@ -1052,10 +1018,8 @@ define <4 x i32> @uadd_sat_constant_vec_commute(<4 x i32> %x) { ; CHECK-LABEL: @uadd_sat_constant_vec_commute( -; CHECK-NEXT: [[A:%.*]] = add <4 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i32> [[X]], -; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> , <4 x i32> [[A]] -; CHECK-NEXT: ret <4 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[X:%.*]], <4 x i32> ) +; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %a = add <4 x i32> %x, %c = icmp ult <4 x i32> %x, Index: test/Transforms/InstCombine/unsigned_saturated_sub.ll =================================================================== --- test/Transforms/InstCombine/unsigned_saturated_sub.ll +++ test/Transforms/InstCombine/unsigned_saturated_sub.ll @@ -12,10 +12,8 @@ define i64 @max_sub_ugt(i64 %a, i64 %b) { ; CHECK-LABEL: @max_sub_ugt( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[A]], i64 [[B]] -; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], [[B]] -; CHECK-NEXT: ret i64 [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +; CHECK-NEXT: ret i64 [[TMP1]] ; %cmp = icmp ugt i64 %a, %b %sub = sub i64 %a, %b @@ -27,10 +25,8 @@ define i64 @max_sub_uge(i64 %a, i64 %b) { ; CHECK-LABEL: @max_sub_uge( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[B]], i64 [[A]] -; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], [[B]] -; CHECK-NEXT: ret i64 [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +; CHECK-NEXT: ret i64 [[TMP1]] ; %cmp = icmp uge i64 %a, %b %sub = sub i64 %a, %b @@ -43,10 +39,8 @@ define <4 x i32> @max_sub_ugt_vec(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: @max_sub_ugt_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i32> [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A]], <4 x i32> [[B]] -; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[TMP2]], [[B]] -; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) +; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %cmp = icmp ugt <4 x i32> %a, %b %sub = sub <4 x i32> %a, %b @@ -59,12 +53,10 @@ define i64 @max_sub_ult(i64 %a, i64 %b) { ; CHECK-LABEL: @max_sub_ult( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[B:%.*]], [[A:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[A]], i64 [[B]] -; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]]) ; CHECK-NEXT: [[EXTRASUB:%.*]] = sub i64 [[B]], [[A]] ; CHECK-NEXT: call void @use(i64 [[EXTRASUB]]) -; CHECK-NEXT: ret i64 [[TMP3]] +; CHECK-NEXT: ret i64 [[TMP1]] ; %cmp = icmp ult i64 %b, %a %sub = sub i64 %a, %b @@ -78,12 +70,10 @@ define i64 @max_sub_ugt_sel_swapped(i64 %a, i64 %b) { ; CHECK-LABEL: @max_sub_ugt_sel_swapped( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[B:%.*]], [[A:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[B]], i64 [[A]] -; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]]) ; CHECK-NEXT: [[EXTRASUB:%.*]] = sub i64 [[B]], [[A]] ; CHECK-NEXT: call void @use(i64 [[EXTRASUB]]) -; CHECK-NEXT: ret i64 [[TMP3]] +; CHECK-NEXT: ret i64 [[TMP1]] ; %cmp = icmp ugt i64 %b, %a %sub = sub i64 %a, %b @@ -97,10 +87,8 @@ define i64 @max_sub_ult_sel_swapped(i64 %a, i64 %b) { ; CHECK-LABEL: @max_sub_ult_sel_swapped( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[B]], i64 [[A]] -; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], [[B]] -; CHECK-NEXT: ret i64 [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +; CHECK-NEXT: ret i64 [[TMP1]] ; %cmp = icmp ult i64 %a, %b %sub = sub i64 %a, %b @@ -112,12 +100,11 @@ define i64 @neg_max_sub_ugt(i64 %a, i64 %b) { ; CHECK-LABEL: @neg_max_sub_ugt( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[A]], i64 [[B]] -; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[B]], [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 0, [[TMP1]] ; CHECK-NEXT: [[EXTRASUB:%.*]] = sub i64 [[A]], [[B]] ; CHECK-NEXT: call void @use(i64 [[EXTRASUB]]) -; CHECK-NEXT: ret i64 [[TMP3]] +; CHECK-NEXT: ret i64 [[TMP2]] ; %cmp = icmp ugt i64 %a, %b %sub = sub i64 %b, %a @@ -131,10 +118,9 @@ define i64 @neg_max_sub_ult(i64 %a, i64 %b) { ; CHECK-LABEL: @neg_max_sub_ult( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[A]], i64 [[B]] -; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[B]], [[TMP2]] -; CHECK-NEXT: ret i64 [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 0, [[TMP1]] +; CHECK-NEXT: ret i64 [[TMP2]] ; %cmp = icmp ult i64 %b, %a %sub = sub i64 %b, %a @@ -146,10 +132,9 @@ define i64 @neg_max_sub_ugt_sel_swapped(i64 %a, i64 %b) { ; CHECK-LABEL: @neg_max_sub_ugt_sel_swapped( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[B]], i64 [[A]] -; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[B]], [[TMP2]] -; CHECK-NEXT: ret i64 [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 0, [[TMP1]] +; CHECK-NEXT: ret i64 [[TMP2]] ; %cmp = icmp ugt i64 %b, %a %sub = sub i64 %b, %a @@ -161,12 +146,11 @@ define i64 @neg_max_sub_ult_sel_swapped(i64 %a, i64 %b) { ; CHECK-LABEL: @neg_max_sub_ult_sel_swapped( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[B]], i64 [[A]] -; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[B]], [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 0, [[TMP1]] ; CHECK-NEXT: [[EXTRASUB:%.*]] = sub i64 [[A]], [[B]] ; CHECK-NEXT: call void @use(i64 [[EXTRASUB]]) -; CHECK-NEXT: ret i64 [[TMP3]] +; CHECK-NEXT: ret i64 [[TMP2]] ; %cmp = icmp ult i64 %a, %b %sub = sub i64 %b, %a