Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -622,11 +622,7 @@ return Builder.CreateOr(V, Y); } -/// Transform patterns such as: (a > b) ? a - b : 0 -/// into: ((a > b) ? a : b) - b) -/// This produces a canonical max pattern that is more easily recognized by the -/// backend and converted into saturated subtraction instructions if those -/// exist. +/// Transform patterns such as (a > b) ? a - b : 0 into usub.sat(a, b). /// There are 8 commuted/swapped variants of this pattern. /// TODO: Also support a - UMIN(a,b) patterns. static Value *canonicalizeSaturatedSubtract(const ICmpInst *ICI, @@ -668,11 +664,12 @@ if (!TrueVal->hasOneUse()) return nullptr; - // All checks passed, convert to canonical unsigned saturated subtraction - // form: sub(max()). - // (a > b) ? a - b : 0 -> ((a > b) ? a : b) - b) - Value *Max = Builder.CreateSelect(Builder.CreateICmp(Pred, A, B), A, B); - return IsNegative ? Builder.CreateSub(B, Max) : Builder.CreateSub(Max, B); + // (a > b) ? a - b : 0 -> usub.sat(a, b) + // (a > b) ? b - a : 0 -> -usub.sat(a, b) + Value *Result = Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, A, B); + if (IsNegative) + Result = Builder.CreateNeg(Result); + return Result; } static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal, @@ -689,15 +686,16 @@ if (Pred == ICmpInst::ICMP_ULT && match(TVal, m_Add(m_Value(X), m_APInt(C))) && X == Cmp0 && match(FVal, m_AllOnes()) && match(Cmp1, m_APInt(CmpC)) && *CmpC == ~*C) { - // Commute compare predicate and select operands: - // (X u< ~C) ? (X + C) : -1 --> (X u> ~C) ? -1 : (X + C) - Value *NewCmp = Builder.CreateICmp(ICmpInst::ICMP_UGT, X, Cmp1); - return Builder.CreateSelect(NewCmp, FVal, TVal); + // (X u< ~C) ? (X + C) : -1 --> uadd.sat(X, C) + return Builder.CreateBinaryIntrinsic( + Intrinsic::uadd_sat, X, ConstantInt::get(X->getType(), *C)); } // Match unsigned saturated add of 2 variables with an unnecessary 'not'. // There are 8 commuted variants. - // Canonicalize -1 (saturated result) to true value of the select. + // Canonicalize -1 (saturated result) to true value of the select. Just + // swapping the compare operands is legal, because the selected value is the + // same in case of equality, so we can interchange u< and u<=. if (match(FVal, m_AllOnes())) { std::swap(TVal, FVal); std::swap(Cmp0, Cmp1); @@ -717,24 +715,19 @@ Value *Y; if (match(Cmp0, m_Not(m_Value(X))) && match(FVal, m_c_Add(m_Specific(X), m_Value(Y))) && Y == Cmp1) { - // Change the comparison to use the sum (false value of the select). That is - // a canonical pattern match form for uadd.with.overflow and eliminates a - // use of the 'not' op: - // (~X u< Y) ? -1 : (X + Y) --> ((X + Y) u< Y) ? -1 : (X + Y) - // (~X u< Y) ? -1 : (Y + X) --> ((Y + X) u< Y) ? -1 : (Y + X) - Value *NewCmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, FVal, Y); - return Builder.CreateSelect(NewCmp, TVal, FVal); + // (~X u< Y) ? -1 : (X + Y) --> uadd.sat(X, Y) + // (~X u< Y) ? -1 : (Y + X) --> uadd.sat(X, Y) + return Builder.CreateBinaryIntrinsic(Intrinsic::uadd_sat, X, Y); } // The 'not' op may be included in the sum but not the compare. X = Cmp0; Y = Cmp1; if (match(FVal, m_c_Add(m_Not(m_Specific(X)), m_Specific(Y)))) { - // Change the comparison to use the sum (false value of the select). That is - // a canonical pattern match form for uadd.with.overflow: - // (X u< Y) ? -1 : (~X + Y) --> ((~X + Y) u< Y) ? -1 : (~X + Y) - // (X u< Y) ? -1 : (Y + ~X) --> ((Y + ~X) u< Y) ? -1 : (Y + ~X) - Value *NewCmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, FVal, Y); - return Builder.CreateSelect(NewCmp, TVal, FVal); + // (X u< Y) ? -1 : (~X + Y) --> uadd.sat(~X, Y) + // (X u< Y) ? -1 : (Y + ~X) --> uadd.sat(Y, ~X) + BinaryOperator *BO = cast(FVal); + return Builder.CreateBinaryIntrinsic( + Intrinsic::uadd_sat, BO->getOperand(0), BO->getOperand(1)); } return nullptr; Index: llvm/trunk/test/Transforms/InstCombine/saturating-add-sub.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/saturating-add-sub.ll +++ llvm/trunk/test/Transforms/InstCombine/saturating-add-sub.ll @@ -893,10 +893,8 @@ define i32 @uadd_sat(i32 %x, i32 %y) { ; CHECK-LABEL: @uadd_sat( -; CHECK-NEXT: [[A:%.*]] = add i32 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]] -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %notx = xor i32 %x, -1 %a = add i32 %y, %x @@ -908,10 +906,8 @@ define i32 @uadd_sat_commute_add(i32 %xp, i32 %y) { ; CHECK-LABEL: @uadd_sat_commute_add( ; CHECK-NEXT: [[X:%.*]] = urem i32 42, [[XP:%.*]] -; CHECK-NEXT: [[A:%.*]] = add i32 [[X]], [[Y:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]] -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X]], i32 [[Y:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %x = urem i32 42, %xp ; thwart complexity-based-canonicalization %notx = xor i32 %x, -1 @@ -924,10 +920,8 @@ define i32 @uadd_sat_ugt(i32 %x, i32 %yp) { ; CHECK-LABEL: @uadd_sat_ugt( ; CHECK-NEXT: [[Y:%.*]] = sdiv i32 [[YP:%.*]], 2442 -; CHECK-NEXT: [[A:%.*]] = add i32 [[Y]], [[X:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]] -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 [[Y]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %y = sdiv i32 %yp, 2442 ; thwart complexity-based-canonicalization %notx = xor i32 %x, -1 @@ -941,10 +935,8 @@ ; CHECK-LABEL: @uadd_sat_ugt_commute_add( ; CHECK-NEXT: [[Y:%.*]] = sdiv <2 x i32> [[YP:%.*]], ; CHECK-NEXT: [[X:%.*]] = srem <2 x i32> , [[XP:%.*]] -; CHECK-NEXT: [[A:%.*]] = add <2 x i32> [[X]], [[Y]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i32> [[A]], [[Y]] -; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> , <2 x i32> [[A]] -; CHECK-NEXT: ret <2 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[X]], <2 x i32> [[Y]]) +; CHECK-NEXT: ret <2 x i32> [[TMP1]] ; %y = sdiv <2 x i32> %yp, ; thwart complexity-based-canonicalization %x = srem <2 x i32> , %xp ; thwart complexity-based-canonicalization @@ -958,10 +950,8 @@ define i32 @uadd_sat_commute_select(i32 %x, i32 %yp) { ; CHECK-LABEL: @uadd_sat_commute_select( ; CHECK-NEXT: [[Y:%.*]] = sdiv i32 [[YP:%.*]], 2442 -; CHECK-NEXT: [[A:%.*]] = add i32 [[Y]], [[X:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]] -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 [[Y]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %y = sdiv i32 %yp, 2442 ; thwart complexity-based-canonicalization %notx = xor i32 %x, -1 @@ -975,10 +965,8 @@ ; CHECK-LABEL: @uadd_sat_commute_select_commute_add( ; CHECK-NEXT: [[X:%.*]] = urem i32 42, [[XP:%.*]] ; CHECK-NEXT: [[Y:%.*]] = sdiv i32 [[YP:%.*]], 2442 -; CHECK-NEXT: [[A:%.*]] = add nsw i32 [[X]], [[Y]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]] -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X]], i32 [[Y]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %x = urem i32 42, %xp ; thwart complexity-based-canonicalization %y = sdiv i32 %yp, 2442 ; thwart complexity-based-canonicalization @@ -991,10 +979,8 @@ define <2 x i32> @uadd_sat_commute_select_ugt(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @uadd_sat_commute_select_ugt( -; CHECK-NEXT: [[A:%.*]] = add <2 x i32> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i32> [[A]], [[Y]] -; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> , <2 x i32> [[A]] -; CHECK-NEXT: ret <2 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]]) +; CHECK-NEXT: ret <2 x i32> [[TMP1]] ; %notx = xor <2 x i32> %x, %a = add <2 x i32> %y, %x @@ -1006,10 +992,8 @@ define i32 @uadd_sat_commute_select_ugt_commute_add(i32 %xp, i32 %y) { ; CHECK-LABEL: @uadd_sat_commute_select_ugt_commute_add( ; CHECK-NEXT: [[X:%.*]] = srem i32 42, [[XP:%.*]] -; CHECK-NEXT: [[A:%.*]] = add i32 [[X]], [[Y:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]] -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X]], i32 [[Y:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %x = srem i32 42, %xp ; thwart complexity-based-canonicalization %notx = xor i32 %x, -1 @@ -1054,10 +1038,8 @@ define i32 @uadd_sat_not(i32 %x, i32 %y) { ; CHECK-LABEL: @uadd_sat_not( ; CHECK-NEXT: [[NOTX:%.*]] = xor i32 [[X:%.*]], -1 -; CHECK-NEXT: [[A:%.*]] = add i32 [[NOTX]], [[Y:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]] -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[NOTX]], i32 [[Y:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %notx = xor i32 %x, -1 %a = add i32 %notx, %y @@ -1071,10 +1053,8 @@ ; CHECK-NEXT: [[X:%.*]] = srem i32 42, [[XP:%.*]] ; CHECK-NEXT: [[Y:%.*]] = urem i32 42, [[YP:%.*]] ; CHECK-NEXT: [[NOTX:%.*]] = xor i32 [[X]], -1 -; CHECK-NEXT: [[A:%.*]] = add nsw i32 [[Y]], [[NOTX]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]] -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[Y]], i32 [[NOTX]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %x = srem i32 42, %xp ; thwart complexity-based-canonicalization %y = urem i32 42, %yp ; thwart complexity-based-canonicalization @@ -1088,10 +1068,8 @@ define i32 @uadd_sat_not_ugt(i32 %x, i32 %y) { ; CHECK-LABEL: @uadd_sat_not_ugt( ; CHECK-NEXT: [[NOTX:%.*]] = xor i32 [[X:%.*]], -1 -; CHECK-NEXT: [[A:%.*]] = add i32 [[NOTX]], [[Y:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]] -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[NOTX]], i32 [[Y:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %notx = xor i32 %x, -1 %a = add i32 %notx, %y @@ -1104,10 +1082,8 @@ ; CHECK-LABEL: @uadd_sat_not_ugt_commute_add( ; CHECK-NEXT: [[Y:%.*]] = sdiv <2 x i32> [[YP:%.*]], ; CHECK-NEXT: [[NOTX:%.*]] = xor <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[A:%.*]] = add <2 x i32> [[Y]], [[NOTX]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i32> [[A]], [[Y]] -; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> , <2 x i32> [[A]] -; CHECK-NEXT: ret <2 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[Y]], <2 x i32> [[NOTX]]) +; CHECK-NEXT: ret <2 x i32> [[TMP1]] ; %y = sdiv <2 x i32> %yp, ; thwart complexity-based-canonicalization %notx = xor <2 x i32> %x, @@ -1120,10 +1096,8 @@ define i32 @uadd_sat_not_commute_select(i32 %x, i32 %y) { ; CHECK-LABEL: @uadd_sat_not_commute_select( ; CHECK-NEXT: [[NOTX:%.*]] = xor i32 [[X:%.*]], -1 -; CHECK-NEXT: [[A:%.*]] = add i32 [[NOTX]], [[Y:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]] -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[NOTX]], i32 [[Y:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %notx = xor i32 %x, -1 %a = add i32 %notx, %y @@ -1136,10 +1110,8 @@ ; CHECK-LABEL: @uadd_sat_not_commute_select_commute_add( ; CHECK-NEXT: [[Y:%.*]] = sdiv i32 42, [[YP:%.*]] ; CHECK-NEXT: [[NOTX:%.*]] = xor i32 [[X:%.*]], -1 -; CHECK-NEXT: [[A:%.*]] = add i32 [[Y]], [[NOTX]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]] -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[Y]], i32 [[NOTX]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %y = sdiv i32 42, %yp ; thwart complexity-based-canonicalization %notx = xor i32 %x, -1 @@ -1154,10 +1126,8 @@ ; CHECK-NEXT: [[X:%.*]] = urem <2 x i32> , [[XP:%.*]] ; CHECK-NEXT: [[Y:%.*]] = srem <2 x i32> , [[YP:%.*]] ; CHECK-NEXT: [[NOTX:%.*]] = xor <2 x i32> [[X]], -; CHECK-NEXT: [[A:%.*]] = add <2 x i32> [[Y]], [[NOTX]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i32> [[A]], [[Y]] -; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> , <2 x i32> [[A]] -; CHECK-NEXT: ret <2 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[Y]], <2 x i32> [[NOTX]]) +; CHECK-NEXT: ret <2 x i32> [[TMP1]] ; %x = urem <2 x i32> , %xp ; thwart complexity-based-canonicalization %y = srem <2 x i32> , %yp ; thwart complexity-based-canonicalization @@ -1171,10 +1141,8 @@ define i32 @uadd_sat_not_commute_select_ugt_commute_add(i32 %x, i32 %y) { ; CHECK-LABEL: @uadd_sat_not_commute_select_ugt_commute_add( ; CHECK-NEXT: [[NOTX:%.*]] = xor i32 [[X:%.*]], -1 -; CHECK-NEXT: [[A:%.*]] = add i32 [[NOTX]], [[Y:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]] -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[NOTX]], i32 [[Y:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %notx = xor i32 %x, -1 %a = add i32 %notx, %y @@ -1198,10 +1166,8 @@ define i32 @uadd_sat_constant_commute(i32 %x) { ; CHECK-LABEL: @uadd_sat_constant_commute( -; CHECK-NEXT: [[A:%.*]] = add i32 [[X:%.*]], 42 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[X]], -43 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]] -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 42) +; CHECK-NEXT: ret i32 [[TMP1]] ; %a = add i32 %x, 42 %c = icmp ult i32 %x, -43 @@ -1224,10 +1190,8 @@ define <4 x i32> @uadd_sat_constant_vec_commute(<4 x i32> %x) { ; CHECK-LABEL: @uadd_sat_constant_vec_commute( -; CHECK-NEXT: [[A:%.*]] = add <4 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i32> [[X]], -; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> , <4 x i32> [[A]] -; CHECK-NEXT: ret <4 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[X:%.*]], <4 x i32> ) +; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %a = add <4 x i32> %x, %c = icmp ult <4 x i32> %x, Index: llvm/trunk/test/Transforms/InstCombine/unsigned_saturated_sub.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/unsigned_saturated_sub.ll +++ llvm/trunk/test/Transforms/InstCombine/unsigned_saturated_sub.ll @@ -1,21 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -instcombine -S < %s | FileCheck %s -; Transforms for unsigned saturated subtraction idioms are tested here. -; In all cases, we want to form a canonical min/max op (the compare and -; select operands are the same), so that is recognized by the backend. -; The backend recognition is tested in test/CodeGen/X86/psubus.ll. +; Canonicalization of unsigned saturated subtraction idioms to +; usub.sat() intrinsics is tested here. declare void @use(i64) -; (a > b) ? a - b : 0 -> ((a > b) ? a : b) - b) +; (a > b) ? a - b : 0 -> usub.sat(a, b) define i64 @max_sub_ugt(i64 %a, i64 %b) { ; CHECK-LABEL: @max_sub_ugt( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[A]], i64 [[B]] -; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], [[B]] -; CHECK-NEXT: ret i64 [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +; CHECK-NEXT: ret i64 [[TMP1]] ; %cmp = icmp ugt i64 %a, %b %sub = sub i64 %a, %b @@ -23,14 +19,12 @@ ret i64 %sel } -; (a >= b) ? a - b : 0 -> ((a >= b) ? a : b) - b) +; (a >= b) ? a - b : 0 -> usub.sat(a, b) define i64 @max_sub_uge(i64 %a, i64 %b) { ; CHECK-LABEL: @max_sub_uge( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[B]], i64 [[A]] -; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], [[B]] -; CHECK-NEXT: ret i64 [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +; CHECK-NEXT: ret i64 [[TMP1]] ; %cmp = icmp uge i64 %a, %b %sub = sub i64 %a, %b @@ -39,14 +33,12 @@ } ; Again, with vectors: -; (a > b) ? a - b : 0 -> ((a > b) ? a : b) - b) +; (a > b) ? a - b : 0 -> usub.sat(a, b) define <4 x i32> @max_sub_ugt_vec(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: @max_sub_ugt_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i32> [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A]], <4 x i32> [[B]] -; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[TMP2]], [[B]] -; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) +; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; %cmp = icmp ugt <4 x i32> %a, %b %sub = sub <4 x i32> %a, %b @@ -55,16 +47,14 @@ } ; Use extra ops to thwart icmp swapping canonicalization. -; (b < a) ? a - b : 0 -> ((a > b) ? a : b) - b) +; (b < a) ? a - b : 0 -> usub.sat(a, b) define i64 @max_sub_ult(i64 %a, i64 %b) { ; CHECK-LABEL: @max_sub_ult( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[B:%.*]], [[A:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[A]], i64 [[B]] -; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]]) ; CHECK-NEXT: [[EXTRASUB:%.*]] = sub i64 [[B]], [[A]] ; CHECK-NEXT: call void @use(i64 [[EXTRASUB]]) -; CHECK-NEXT: ret i64 [[TMP3]] +; CHECK-NEXT: ret i64 [[TMP1]] ; %cmp = icmp ult i64 %b, %a %sub = sub i64 %a, %b @@ -74,16 +64,14 @@ ret i64 %sel } -; (b > a) ? 0 : a - b -> ((a > b) ? a : b) - b) +; (b > a) ? 0 : a - b -> usub.sat(a, b) define i64 @max_sub_ugt_sel_swapped(i64 %a, i64 %b) { ; CHECK-LABEL: @max_sub_ugt_sel_swapped( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[B:%.*]], [[A:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[B]], i64 [[A]] -; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]]) ; CHECK-NEXT: [[EXTRASUB:%.*]] = sub i64 [[B]], [[A]] ; CHECK-NEXT: call void @use(i64 [[EXTRASUB]]) -; CHECK-NEXT: ret i64 [[TMP3]] +; CHECK-NEXT: ret i64 [[TMP1]] ; %cmp = icmp ugt i64 %b, %a %sub = sub i64 %a, %b @@ -93,14 +81,12 @@ ret i64 %sel } -; (a < b) ? 0 : a - b -> ((a > b) ? a : b) - b) +; (a < b) ? 0 : a - b -> usub.sat(a, b) define i64 @max_sub_ult_sel_swapped(i64 %a, i64 %b) { ; CHECK-LABEL: @max_sub_ult_sel_swapped( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[B]], i64 [[A]] -; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], [[B]] -; CHECK-NEXT: ret i64 [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +; CHECK-NEXT: ret i64 [[TMP1]] ; %cmp = icmp ult i64 %a, %b %sub = sub i64 %a, %b @@ -108,16 +94,15 @@ ret i64 %sel } -; ((a > b) ? b - a : 0) -> (b - ((a > b) ? a : b)) +; ((a > b) ? b - a : 0) -> -usub.sat(a, b) define i64 @neg_max_sub_ugt(i64 %a, i64 %b) { ; CHECK-LABEL: @neg_max_sub_ugt( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[A]], i64 [[B]] -; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[B]], [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 0, [[TMP1]] ; CHECK-NEXT: [[EXTRASUB:%.*]] = sub i64 [[A]], [[B]] ; CHECK-NEXT: call void @use(i64 [[EXTRASUB]]) -; CHECK-NEXT: ret i64 [[TMP3]] +; CHECK-NEXT: ret i64 [[TMP2]] ; %cmp = icmp ugt i64 %a, %b %sub = sub i64 %b, %a @@ -127,14 +112,13 @@ ret i64 %sel } -; ((b < a) ? b - a : 0) -> - ((a > b) ? a : b) - b) +; ((b < a) ? b - a : 0) -> -usub.sat(a, b) define i64 @neg_max_sub_ult(i64 %a, i64 %b) { ; CHECK-LABEL: @neg_max_sub_ult( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[A]], i64 [[B]] -; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[B]], [[TMP2]] -; CHECK-NEXT: ret i64 [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 0, [[TMP1]] +; CHECK-NEXT: ret i64 [[TMP2]] ; %cmp = icmp ult i64 %b, %a %sub = sub i64 %b, %a @@ -142,14 +126,13 @@ ret i64 %sel } -; ((b > a) ? 0 : b - a) -> - ((a > b) ? a : b) - b) +; ((b > a) ? 0 : b - a) -> -usub.sat(a, b) define i64 @neg_max_sub_ugt_sel_swapped(i64 %a, i64 %b) { ; CHECK-LABEL: @neg_max_sub_ugt_sel_swapped( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[B]], i64 [[A]] -; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[B]], [[TMP2]] -; CHECK-NEXT: ret i64 [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 0, [[TMP1]] +; CHECK-NEXT: ret i64 [[TMP2]] ; %cmp = icmp ugt i64 %b, %a %sub = sub i64 %b, %a @@ -157,16 +140,15 @@ ret i64 %sel } -; ((a < b) ? 0 : b - a) -> - ((a > b) ? a : b) - b) +; ((a < b) ? 0 : b - a) -> -usub.sat(a, b) define i64 @neg_max_sub_ult_sel_swapped(i64 %a, i64 %b) { ; CHECK-LABEL: @neg_max_sub_ult_sel_swapped( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[B]], i64 [[A]] -; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[B]], [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 0, [[TMP1]] ; CHECK-NEXT: [[EXTRASUB:%.*]] = sub i64 [[A]], [[B]] ; CHECK-NEXT: call void @use(i64 [[EXTRASUB]]) -; CHECK-NEXT: ret i64 [[TMP3]] +; CHECK-NEXT: ret i64 [[TMP2]] ; %cmp = icmp ult i64 %a, %b %sub = sub i64 %b, %a