diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1734,6 +1734,8 @@ /// Turn X + Y overflows ? -1 : X + Y -> uadd_sat X, Y /// And X - Y overflows ? 0 : X - Y -> usub_sat X, Y +/// Along with a number of patterns similar to: +/// X + Y overflows ? (X < 0 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y static Instruction * foldOverflowingAddSubSelect(SelectInst &SI, InstCombiner::BuilderTy &Builder) { Value *CondVal = SI.getCondition(); @@ -1745,6 +1747,50 @@ !match(FalseVal, m_ExtractValue<0>(m_Specific(II)))) return nullptr; + Value *X = II->getLHS(); + Value *Y = II->getRHS(); + + auto IsSignedSaturateLimit = [&](Value *Limit) { + Type *Ty = Limit->getType(); + + ICmpInst::Predicate Pred; + Value *TrueVal, *FalseVal, *Op; + const APInt *C; + if (!match(Limit, m_Select(m_ICmp(Pred, m_Value(Op), m_APInt(C)), + m_Value(TrueVal), m_Value(FalseVal)))) + return false; + + auto IsZeroOrOne = [](const APInt &C) { + return C.isNullValue() || C.isOneValue(); + }; + auto IsMinMax = [&](Value *Min, Value *Max) { + APInt MinVal = APInt::getSignedMinValue(Ty->getScalarSizeInBits()); + APInt MaxVal = APInt::getSignedMaxValue(Ty->getScalarSizeInBits()); + return match(Min, m_SpecificInt(MinVal)) && + match(Max, m_SpecificInt(MaxVal)); + }; + + if (Op != X && Op != Y) + return false; + + // X + Y overflows ? (X sadd_sat X, Y + // X + Y overflows ? (X sadd_sat X, Y + // X + Y overflows ? (Y sadd_sat X, Y + // X + Y overflows ? (Y sadd_sat X, Y + if (Pred == ICmpInst::ICMP_SLT && IsZeroOrOne(*C) && + IsMinMax(TrueVal, FalseVal)) + return true; + // X + Y overflows ? (X >s 0 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y + // X + Y overflows ? (X >s -1 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y + // X + Y overflows ? (Y >s 0 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y + // X + Y overflows ? (Y >s -1 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y + if (Pred == ICmpInst::ICMP_SGT && IsZeroOrOne(*C + 1) && + IsMinMax(FalseVal, TrueVal)) + return true; + + return false; + }; + Intrinsic::ID NewIntrinsicID; if (II->getIntrinsicID() == Intrinsic::uadd_with_overflow && match(TrueVal, m_AllOnes())) @@ -1754,12 +1800,23 @@ match(TrueVal, m_Zero())) // X - Y overflows ? 0 : X - Y -> usub_sat X, Y NewIntrinsicID = Intrinsic::usub_sat; + else if (II->getIntrinsicID() == Intrinsic::sadd_with_overflow && + IsSignedSaturateLimit(TrueVal)) + // X + Y overflows ? (X sadd_sat X, Y + // X + Y overflows ? (X sadd_sat X, Y + // X + Y overflows ? (X >s 0 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y + // X + Y overflows ? (X >s -1 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y + // X + Y overflows ? (Y sadd_sat X, Y + // X + Y overflows ? (Y sadd_sat X, Y + // X + Y overflows ? (Y >s 0 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y + // X + Y overflows ? (Y >s -1 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y + NewIntrinsicID = Intrinsic::sadd_sat; else return nullptr; Function *F = Intrinsic::getDeclaration(SI.getModule(), NewIntrinsicID, SI.getType()); - return CallInst::Create(F, {II->getArgOperand(0), II->getArgOperand(1)}); + return CallInst::Create(F, {X, Y}); } Instruction *InstCombiner::foldSelectExtConst(SelectInst &Sel) { diff --git a/llvm/test/Transforms/InstCombine/overflow_to_sat.ll b/llvm/test/Transforms/InstCombine/overflow_to_sat.ll --- a/llvm/test/Transforms/InstCombine/overflow_to_sat.ll +++ b/llvm/test/Transforms/InstCombine/overflow_to_sat.ll @@ -47,12 +47,7 @@ define i8 @sadd_x_lt_max(i8 %x, i8 %y) { ; CHECK-LABEL: @sadd_x_lt_max( -; CHECK-NEXT: [[AO:%.*]] = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) -; CHECK-NEXT: [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1 -; CHECK-NEXT: [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0 -; CHECK-NEXT: [[C:%.*]] = icmp slt i8 [[X]], 0 -; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 -128, i8 127 -; CHECK-NEXT: [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]] +; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) ; CHECK-NEXT: ret i8 [[R]] ; %ao = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 %x, i8 %y) @@ -85,12 +80,7 @@ define i8 @sadd_x_le_max(i8 %x, i8 %y) { ; CHECK-LABEL: @sadd_x_le_max( -; CHECK-NEXT: [[AO:%.*]] = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) -; CHECK-NEXT: [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1 -; CHECK-NEXT: [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0 -; CHECK-NEXT: [[C:%.*]] = icmp slt i8 [[X]], 1 -; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 -128, i8 127 -; CHECK-NEXT: [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]] +; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) ; CHECK-NEXT: ret i8 [[R]] ; %ao = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 %x, i8 %y) @@ -104,12 +94,7 @@ define i8 @sadd_x_gt_min(i8 %x, i8 %y) { ; CHECK-LABEL: @sadd_x_gt_min( -; CHECK-NEXT: [[AO:%.*]] = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) -; CHECK-NEXT: [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1 -; CHECK-NEXT: [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0 -; CHECK-NEXT: [[C:%.*]] = icmp sgt i8 [[X]], 0 -; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 127, i8 -128 -; CHECK-NEXT: [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]] +; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) ; CHECK-NEXT: ret i8 [[R]] ; %ao = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 %x, i8 %y) @@ -142,12 +127,7 @@ define i8 @sadd_x_ge_min(i8 %x, i8 %y) { ; CHECK-LABEL: @sadd_x_ge_min( -; CHECK-NEXT: [[AO:%.*]] = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) -; CHECK-NEXT: [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1 -; CHECK-NEXT: [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0 -; CHECK-NEXT: [[C:%.*]] = icmp sgt i8 [[X]], -1 -; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 127, i8 -128 -; CHECK-NEXT: [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]] +; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) ; CHECK-NEXT: ret i8 [[R]] ; %ao = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 %x, i8 %y) @@ -200,12 +180,7 @@ define i8 @sadd_y_lt_max(i8 %x, i8 %y) { ; CHECK-LABEL: @sadd_y_lt_max( -; CHECK-NEXT: [[AO:%.*]] = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) -; CHECK-NEXT: [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1 -; CHECK-NEXT: [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0 -; CHECK-NEXT: [[C:%.*]] = icmp slt i8 [[Y]], 0 -; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 -128, i8 127 -; CHECK-NEXT: [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]] +; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) ; CHECK-NEXT: ret i8 [[R]] ; %ao = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 %x, i8 %y) @@ -238,12 +213,7 @@ define i8 @sadd_y_le_max(i8 %x, i8 %y) { ; CHECK-LABEL: @sadd_y_le_max( -; CHECK-NEXT: [[AO:%.*]] = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) -; CHECK-NEXT: [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1 -; CHECK-NEXT: [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0 -; CHECK-NEXT: [[C:%.*]] = icmp slt i8 [[Y]], 1 -; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 -128, i8 127 -; CHECK-NEXT: [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]] +; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) ; CHECK-NEXT: ret i8 [[R]] ; %ao = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 %x, i8 %y) @@ -257,12 +227,7 @@ define i8 @sadd_y_gt_min(i8 %x, i8 %y) { ; CHECK-LABEL: @sadd_y_gt_min( -; CHECK-NEXT: [[AO:%.*]] = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) -; CHECK-NEXT: [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1 -; CHECK-NEXT: [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0 -; CHECK-NEXT: [[C:%.*]] = icmp sgt i8 [[Y]], 0 -; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 127, i8 -128 -; CHECK-NEXT: [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]] +; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) ; CHECK-NEXT: ret i8 [[R]] ; %ao = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 %x, i8 %y) @@ -295,12 +260,7 @@ define i8 @sadd_y_ge_min(i8 %x, i8 %y) { ; CHECK-LABEL: @sadd_y_ge_min( -; CHECK-NEXT: [[AO:%.*]] = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) -; CHECK-NEXT: [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1 -; CHECK-NEXT: [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0 -; CHECK-NEXT: [[C:%.*]] = icmp sgt i8 [[Y]], -1 -; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 127, i8 -128 -; CHECK-NEXT: [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]] +; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) ; CHECK-NEXT: ret i8 [[R]] ; %ao = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 %x, i8 %y) @@ -718,12 +678,7 @@ define i32 @sadd_i32(i32 %x, i32 %y) { ; CHECK-LABEL: @sadd_i32( -; CHECK-NEXT: [[AO:%.*]] = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) -; CHECK-NEXT: [[O:%.*]] = extractvalue { i32, i1 } [[AO]], 1 -; CHECK-NEXT: [[A:%.*]] = extractvalue { i32, i1 } [[AO]], 0 -; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[X]], 0 -; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i32 -2147483648, i32 2147483647 -; CHECK-NEXT: [[R:%.*]] = select i1 [[O]], i32 [[S]], i32 [[A]] +; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) ; CHECK-NEXT: ret i32 [[R]] ; %ao = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %x, i32 %y)