Index: llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1736,6 +1736,7 @@ /// And X - Y overflows ? 0 : X - Y -> usub_sat X, Y /// Along with a number of patterns similar to: /// X + Y overflows ? (X < 0 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y +/// X - Y overflows ? (X > 0 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y static Instruction * foldOverflowingAddSubSelect(SelectInst &SI, InstCombiner::BuilderTy &Builder) { Value *CondVal = SI.getCondition(); @@ -1750,7 +1751,7 @@ Value *X = II->getLHS(); Value *Y = II->getRHS(); - auto IsSignedSaturateLimit = [&](Value *Limit) { + auto IsSignedSaturateLimit = [&](Value *Limit, bool IsAdd) { Type *Ty = Limit->getType(); ICmpInst::Predicate Pred; @@ -1773,20 +1774,43 @@ if (Op != X && Op != Y) return false; - // X + Y overflows ? (X sadd_sat X, Y - // X + Y overflows ? (X sadd_sat X, Y - // X + Y overflows ? (Y sadd_sat X, Y - // X + Y overflows ? (Y sadd_sat X, Y - if (Pred == ICmpInst::ICMP_SLT && IsZeroOrOne(*C) && - IsMinMax(TrueVal, FalseVal)) - return true; - // X + Y overflows ? (X >s 0 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y - // X + Y overflows ? (X >s -1 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y - // X + Y overflows ? (Y >s 0 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y - // X + Y overflows ? (Y >s -1 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y - if (Pred == ICmpInst::ICMP_SGT && IsZeroOrOne(*C + 1) && - IsMinMax(FalseVal, TrueVal)) - return true; + if (IsAdd) { + // X + Y overflows ? (X sadd_sat X, Y + // X + Y overflows ? (X sadd_sat X, Y + // X + Y overflows ? (Y sadd_sat X, Y + // X + Y overflows ? (Y sadd_sat X, Y + if (Pred == ICmpInst::ICMP_SLT && IsZeroOrOne(*C) && + IsMinMax(TrueVal, FalseVal)) + return true; + // X + Y overflows ? (X >s 0 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y + // X + Y overflows ? (X >s -1 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y + // X + Y overflows ? (Y >s 0 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y + // X + Y overflows ? (Y >s -1 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y + if (Pred == ICmpInst::ICMP_SGT && IsZeroOrOne(*C + 1) && + IsMinMax(FalseVal, TrueVal)) + return true; + } else { + // X - Y overflows ? (X ssub_sat X, Y + // X - Y overflows ? (X ssub_sat X, Y + if (Op == X && Pred == ICmpInst::ICMP_SLT && IsZeroOrOne(*C + 1) && + IsMinMax(TrueVal, FalseVal)) + return true; + // X - Y overflows ? (X >s -1 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y + // X - Y overflows ? (X >s -2 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y + if (Op == X && Pred == ICmpInst::ICMP_SGT && IsZeroOrOne(*C + 2) && + IsMinMax(FalseVal, TrueVal)) + return true; + // X - Y overflows ? (Y ssub_sat X, Y + // X - Y overflows ? (Y ssub_sat X, Y + if (Op == Y && Pred == ICmpInst::ICMP_SLT && IsZeroOrOne(*C) && + IsMinMax(FalseVal, TrueVal)) + return true; + // X - Y overflows ? (Y >s 0 ? INTMIN : INTMAX) : X - Y --> ssub_sat X, Y + // X - Y overflows ? (Y >s -1 ? INTMIN : INTMAX) : X - Y --> ssub_sat X, Y + if (Op == Y && Pred == ICmpInst::ICMP_SGT && IsZeroOrOne(*C + 1) && + IsMinMax(TrueVal, FalseVal)) + return true; + } return false; }; @@ -1801,7 +1825,7 @@ // X - Y overflows ? 0 : X - Y -> usub_sat X, Y NewIntrinsicID = Intrinsic::usub_sat; else if (II->getIntrinsicID() == Intrinsic::sadd_with_overflow && - IsSignedSaturateLimit(TrueVal)) + IsSignedSaturateLimit(TrueVal, /*IsAdd=*/true)) // X + Y overflows ? (X sadd_sat X, Y // X + Y overflows ? (X sadd_sat X, Y // X + Y overflows ? (X >s 0 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y @@ -1811,6 +1835,17 @@ // X + Y overflows ? (Y >s 0 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y // X + Y overflows ? (Y >s -1 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y NewIntrinsicID = Intrinsic::sadd_sat; + else if (II->getIntrinsicID() == Intrinsic::ssub_with_overflow && + IsSignedSaturateLimit(TrueVal, /*IsAdd=*/false)) + // X - Y overflows ? (X ssub_sat X, Y + // X - Y overflows ? (X ssub_sat X, Y + // X - Y overflows ? (X >s -1 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y + // X - Y overflows ? (X >s -2 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y + // X - Y overflows ? (Y ssub_sat X, Y + // X - Y overflows ? (Y ssub_sat X, Y + // X - Y overflows ? (Y >s 0 ? INTMIN : INTMAX) : X - Y --> ssub_sat X, Y + // X - Y overflows ? (Y >s -1 ? INTMIN : INTMAX) : X - Y --> ssub_sat X, Y + NewIntrinsicID = Intrinsic::ssub_sat; else return nullptr; Index: llvm/test/Transforms/InstCombine/overflow_to_sat.ll =================================================================== --- llvm/test/Transforms/InstCombine/overflow_to_sat.ll +++ llvm/test/Transforms/InstCombine/overflow_to_sat.ll @@ -315,12 +315,7 @@ define i8 @ssub_x_lt_max(i8 %x, i8 %y) { ; CHECK-LABEL: define {{[^@]+}}@ssub_x_lt_max( -; CHECK-NEXT: [[AO:%.*]] = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) -; CHECK-NEXT: [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1 -; CHECK-NEXT: [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0 -; CHECK-NEXT: [[C:%.*]] = icmp slt i8 [[X]], 0 -; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 -128, i8 127 -; CHECK-NEXT: [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]] +; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) ; CHECK-NEXT: ret i8 [[R]] ; %ao = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %x, i8 %y) @@ -391,12 +386,7 @@ define i8 @ssub_x_lt2_max(i8 %x, i8 %y) { ; CHECK-LABEL: define {{[^@]+}}@ssub_x_lt2_max( -; CHECK-NEXT: [[AO:%.*]] = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) -; CHECK-NEXT: [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1 -; CHECK-NEXT: [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0 -; CHECK-NEXT: [[C:%.*]] = icmp slt i8 [[X]], -1 -; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 -128, i8 127 -; CHECK-NEXT: [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]] +; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) ; CHECK-NEXT: ret i8 [[R]] ; %ao = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %x, i8 %y) @@ -448,12 +438,7 @@ define i8 @ssub_x_ge_min(i8 %x, i8 %y) { ; CHECK-LABEL: define {{[^@]+}}@ssub_x_ge_min( -; CHECK-NEXT: [[AO:%.*]] = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) -; CHECK-NEXT: [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1 -; CHECK-NEXT: [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0 -; CHECK-NEXT: [[C:%.*]] = icmp sgt i8 [[X]], -1 -; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 127, i8 -128 -; CHECK-NEXT: [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]] +; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) ; CHECK-NEXT: ret i8 [[R]] ; %ao = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %x, i8 %y) @@ -486,12 +471,7 @@ define i8 @ssub_x_gt2_min(i8 %x, i8 %y) { ; CHECK-LABEL: define {{[^@]+}}@ssub_x_gt2_min( -; CHECK-NEXT: [[AO:%.*]] = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) -; CHECK-NEXT: [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1 -; CHECK-NEXT: [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0 -; CHECK-NEXT: [[C:%.*]] = icmp sgt i8 [[X]], -2 -; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 127, i8 -128 -; CHECK-NEXT: [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]] +; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) ; CHECK-NEXT: ret i8 [[R]] ; %ao = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %x, i8 %y) @@ -525,12 +505,7 @@ define i8 @ssub_y_lt_min(i8 %x, i8 %y) { ; CHECK-LABEL: define {{[^@]+}}@ssub_y_lt_min( -; CHECK-NEXT: [[AO:%.*]] = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) -; CHECK-NEXT: [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1 -; CHECK-NEXT: [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0 -; CHECK-NEXT: [[C:%.*]] = icmp slt i8 [[Y]], 0 -; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 127, i8 -128 -; CHECK-NEXT: [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]] +; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) ; CHECK-NEXT: ret i8 [[R]] ; %ao = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %x, i8 %y) @@ -563,12 +538,7 @@ define i8 @ssub_y_le_min(i8 %x, i8 %y) { ; CHECK-LABEL: define {{[^@]+}}@ssub_y_le_min( -; CHECK-NEXT: [[AO:%.*]] = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) -; CHECK-NEXT: [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1 -; CHECK-NEXT: [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0 -; CHECK-NEXT: [[C:%.*]] = icmp slt i8 [[Y]], 1 -; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 127, i8 -128 -; CHECK-NEXT: [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]] +; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) ; CHECK-NEXT: ret i8 [[R]] ; %ao = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %x, i8 %y) @@ -620,12 +590,7 @@ define i8 @ssub_y_gt_max(i8 %x, i8 %y) { ; CHECK-LABEL: define {{[^@]+}}@ssub_y_gt_max( -; CHECK-NEXT: [[AO:%.*]] = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) -; CHECK-NEXT: [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1 -; CHECK-NEXT: [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0 -; CHECK-NEXT: [[C:%.*]] = icmp sgt i8 [[Y]], 0 -; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 -128, i8 127 -; CHECK-NEXT: [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]] +; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) ; CHECK-NEXT: ret i8 [[R]] ; %ao = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %x, i8 %y) @@ -658,12 +623,7 @@ define i8 @ssub_y_ge_max(i8 %x, i8 %y) { ; CHECK-LABEL: define {{[^@]+}}@ssub_y_ge_max( -; CHECK-NEXT: [[AO:%.*]] = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) -; CHECK-NEXT: [[O:%.*]] = extractvalue { i8, i1 } [[AO]], 1 -; CHECK-NEXT: [[A:%.*]] = extractvalue { i8, i1 } [[AO]], 0 -; CHECK-NEXT: [[C:%.*]] = icmp sgt i8 [[Y]], -1 -; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i8 -128, i8 127 -; CHECK-NEXT: [[R:%.*]] = select i1 [[O]], i8 [[S]], i8 [[A]] +; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) ; CHECK-NEXT: ret i8 [[R]] ; %ao = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %x, i8 %y) @@ -692,12 +652,7 @@ define i32 @ssub_i32(i32 %x, i32 %y) { ; CHECK-LABEL: define {{[^@]+}}@ssub_i32( -; CHECK-NEXT: [[AO:%.*]] = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) -; CHECK-NEXT: [[O:%.*]] = extractvalue { i32, i1 } [[AO]], 1 -; CHECK-NEXT: [[A:%.*]] = extractvalue { i32, i1 } [[AO]], 0 -; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[X]], 0 -; CHECK-NEXT: [[S:%.*]] = select i1 [[C]], i32 -2147483648, i32 2147483647 -; CHECK-NEXT: [[R:%.*]] = select i1 [[O]], i32 [[S]], i32 [[A]] +; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) ; CHECK-NEXT: ret i32 [[R]] ; %ao = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %x, i32 %y)