diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -765,25 +765,24 @@ // Match unsigned saturated add of 2 variables with an unnecessary 'not'. // There are 8 commuted variants. - // Canonicalize -1 (saturated result) to true value of the select. Just - // swapping the compare operands is legal, because the selected value is the - // same in case of equality, so we can interchange u< and u<=. + // Canonicalize -1 (saturated result) to true value of the select. if (match(FVal, m_AllOnes())) { std::swap(TVal, FVal); - std::swap(Cmp0, Cmp1); + Pred = CmpInst::getInversePredicate(Pred); } if (!match(TVal, m_AllOnes())) return nullptr; - // Canonicalize predicate to 'ULT'. - if (Pred == ICmpInst::ICMP_UGT) { - Pred = ICmpInst::ICMP_ULT; + // Canonicalize predicate to less-than or less-or-equal-than. + if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) { std::swap(Cmp0, Cmp1); + Pred = CmpInst::getSwappedPredicate(Pred); } - if (Pred != ICmpInst::ICMP_ULT) + if (Pred != ICmpInst::ICMP_ULT && Pred != ICmpInst::ICMP_ULE) return nullptr; // Match unsigned saturated add of 2 variables with an unnecessary 'not'. + // Strictness of the comparison is irrelevant. Value *Y; if (match(Cmp0, m_Not(m_Value(X))) && match(FVal, m_c_Add(m_Specific(X), m_Value(Y))) && Y == Cmp1) { @@ -792,6 +791,7 @@ return Builder.CreateBinaryIntrinsic(Intrinsic::uadd_sat, X, Y); } // The 'not' op may be included in the sum but not the compare. + // Strictness of the comparison is irrelevant. X = Cmp0; Y = Cmp1; if (match(FVal, m_c_Add(m_Not(m_Specific(X)), m_Specific(Y)))) { @@ -802,7 +802,9 @@ Intrinsic::uadd_sat, BO->getOperand(0), BO->getOperand(1)); } // The overflow may be detected via the add wrapping round. - if (match(Cmp0, m_c_Add(m_Specific(Cmp1), m_Value(Y))) && + // This is only valid for strict comparison! + if (Pred == ICmpInst::ICMP_ULT && + match(Cmp0, m_c_Add(m_Specific(Cmp1), m_Value(Y))) && match(FVal, m_c_Add(m_Specific(Cmp1), m_Specific(Y)))) { // ((X + Y) u< X) ? -1 : (X + Y) --> uadd.sat(X, Y) // ((X + Y) u< Y) ? -1 : (X + Y) --> uadd.sat(X, Y) diff --git a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll --- a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll +++ b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll @@ -1801,8 +1801,10 @@ define i32 @uadd_sat_via_add_nonstrict(i32 %x, i32 %y) { ; CHECK-LABEL: @uadd_sat_via_add_nonstrict( -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[Y:%.*]], i32 [[X:%.*]]) -; CHECK-NEXT: ret i32 [[TMP1]] +; CHECK-NEXT: [[A:%.*]] = add i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[C_NOT:%.*]] = icmp ugt i32 [[A]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = select i1 [[C_NOT]], i32 [[A]], i32 -1 +; CHECK-NEXT: ret i32 [[R]] ; %a = add i32 %x, %y %c = icmp ule i32 %a, %y @@ -1823,8 +1825,10 @@ define i32 @uadd_sat_via_add_swapped_select_strict(i32 %x, i32 %y) { ; CHECK-LABEL: @uadd_sat_via_add_swapped_select_strict( -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[Y:%.*]], i32 [[X:%.*]]) -; CHECK-NEXT: ret i32 [[TMP1]] +; CHECK-NEXT: [[A:%.*]] = add i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[A]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 -1 +; CHECK-NEXT: ret i32 [[R]] ; %a = add i32 %x, %y %c = icmp ugt i32 %a, %y @@ -1845,8 +1849,10 @@ define i32 @uadd_sat_via_add_swapped_cmp_nonstrict(i32 %x, i32 %y) { ; CHECK-LABEL: @uadd_sat_via_add_swapped_cmp_nonstrict( -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[Y:%.*]], i32 [[X:%.*]]) -; CHECK-NEXT: ret i32 [[TMP1]] +; CHECK-NEXT: [[A:%.*]] = add i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[C_NOT:%.*]] = icmp ugt i32 [[A]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = select i1 [[C_NOT]], i32 [[A]], i32 -1 +; CHECK-NEXT: ret i32 [[R]] ; %a = add i32 %x, %y %c = icmp uge i32 %y, %a @@ -1867,8 +1873,10 @@ define i32 @uadd_sat_via_add_swapped_cmp_select_nonstrict(i32 %x, i32 %y) { ; CHECK-LABEL: @uadd_sat_via_add_swapped_cmp_select_nonstrict( -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[Y:%.*]], i32 [[X:%.*]]) -; CHECK-NEXT: ret i32 [[TMP1]] +; CHECK-NEXT: [[A:%.*]] = add i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 [[A]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = select i1 [[C]], i32 [[A]], i32 -1 +; CHECK-NEXT: ret i32 [[R]] ; %a = add i32 %x, %y %c = icmp ult i32 %y, %a