Index: llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2035,8 +2035,19 @@ if (Instruction *I = foldIntrinsicWithOverflowCommon(II)) return I; - // Given 2 constant operands whose sum does not overflow: - // saddo (X +nsw C0), C1 -> saddo X, C0 + C1 + // Given 2 constant operands whose sum does not overflow the intrinsic + // may be folded. + // + // Cases for `saddo (X +nsw C0), C1 -> saddo X, C0 + C1`: + // + // - Same sign can be folded to a single `saddo` call. + // - Opposite signs with `|C1| > |C0|` can be folded to a single `saddo` + // call. + // + // Cases for `saddo (X +nsw C0), C1 -> X +nsw (C0 + C1), false`: + // + // - Opposite signs with `|C1| <= |C0|` can be folded to a single `+nsw` + // call with the overflow flag set to false. Value *X; const APInt *C0, *C1; Value *Arg0 = II->getArgOperand(0); @@ -2045,11 +2056,19 @@ match(Arg1, m_APInt(C1))) { bool Overflow; APInt NewC = C1->sadd_ov(*C0, Overflow); - if (!Overflow) - return replaceInstUsesWith( - *II, Builder.CreateBinaryIntrinsic( - Intrinsic::sadd_with_overflow, X, - ConstantInt::get(Arg1->getType(), NewC))); + if (!Overflow) { + if (C0->isNegative() == C1->isNegative() || C0->abs().slt(C1->abs())) { + return replaceInstUsesWith( + *II, Builder.CreateBinaryIntrinsic( + Intrinsic::sadd_with_overflow, X, + ConstantInt::get(Arg1->getType(), NewC))); + } else { + return CreateOverflowTuple( + II, + Builder.CreateNSWAdd(X, ConstantInt::get(Arg1->getType(), NewC)), + Builder.getFalse()); + } + } } break; Index: llvm/test/Transforms/InstCombine/sadd-with-overflow.ll =================================================================== --- llvm/test/Transforms/InstCombine/sadd-with-overflow.ll +++ llvm/test/Transforms/InstCombine/sadd-with-overflow.ll @@ -17,16 +17,27 @@ ret { i32, i1 } %3 } -define { i32, i1 } @fold_mixed_signs(i32) { -; CHECK-LABEL: @fold_mixed_signs( -; CHECK-NEXT: [[TMP2:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[TMP0:%.*]], i32 6) -; CHECK-NEXT: ret { i32, i1 } [[TMP2]] +define { i32, i1 } @fold_mixed_signs_first_high(i32) { +; CHECK-LABEL: @fold_mixed_signs_first_high( +; CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[TMP0:%.*]], 6 +; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { i32, i1 } { i32 undef, i1 false }, i32 [[TMP2]], 0 +; CHECK-NEXT: ret { i32, i1 } [[TMP3]] ; %2 = add nsw i32 %0, 13 %3 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %2, i32 -7) ret { i32, i1 } %3 } +define { i32, i1 } @fold_mixed_signs_second_high(i32) { +; CHECK-LABEL: @fold_mixed_signs_second_high( +; CHECK-NEXT: [[TMP2:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[TMP0:%.*]], i32 -6) +; CHECK-NEXT: ret { i32, i1 } [[TMP2]] +; + %2 = add nsw i32 %0, 7 + %3 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %2, i32 -13) + ret { i32, i1 } %3 +} + define { i8, i1 } @fold_on_constant_add_no_overflow(i8) { ; CHECK-LABEL: @fold_on_constant_add_no_overflow( ; CHECK-NEXT: [[TMP2:%.*]] = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[TMP0:%.*]], i8 127)