Index: llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2135,14 +2135,38 @@ } break; } + case Intrinsic::umul_with_overflow: case Intrinsic::smul_with_overflow: if (Instruction *I = canonicalizeConstantArg0ToArg1(CI)) return I; LLVM_FALLTHROUGH; - case Intrinsic::usub_with_overflow: + case Intrinsic::usub_with_overflow: { + if (Instruction *I = foldIntrinsicWithOverflowCommon(II)) + return I; + + break; + } + case Intrinsic::ssub_with_overflow: { + assert(CI.getNumArgOperands() > 1 && "Need at least 2 args for ssubo"); + Constant *C; + Value *Arg0 = II->getArgOperand(0); + Value *Arg1 = II->getArgOperand(1); + // Given a constant C that is not the minimum or maximum signed value + // for an integer of the given bit width: + // + // ssubo X, C -> saddo X, -C + if (match(Arg1, m_Constant(C)) && C->isNotMinSignedValue()) { + // Create a copy of the original constant and negate it. + Value *NegVal = ConstantExpr::getNeg(C); + // Build a saddo call that is equivalent to the discovered + // ssubo call. + return replaceInstUsesWith( + *II, Builder.CreateBinaryIntrinsic( + Intrinsic::sadd_with_overflow, Arg0, NegVal)); + } if (Instruction *I = foldIntrinsicWithOverflowCommon(II)) return I; Index: llvm/test/Transforms/InstCombine/ssub-with-overflow.ll =================================================================== --- llvm/test/Transforms/InstCombine/ssub-with-overflow.ll +++ llvm/test/Transforms/InstCombine/ssub-with-overflow.ll @@ -11,9 +11,8 @@ define { i32, i1 } @simple_fold(i32 %x) { ; CHECK-LABEL: @simple_fold( -; CHECK-NEXT: [[A:%.*]] = add nsw i32 [[X:%.*]], -7 -; CHECK-NEXT: [[B:%.*]] = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 [[A]], i32 13) -; CHECK-NEXT: ret { i32, i1 } [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[X:%.*]], i32 -20) +; CHECK-NEXT: ret { i32, i1 } [[TMP1]] ; %a = sub nsw i32 %x, 7 %b = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 13) @@ -22,9 +21,8 @@ define { i32, i1 } @fold_mixed_signs(i32 %x) { ; CHECK-LABEL: @fold_mixed_signs( -; CHECK-NEXT: [[A:%.*]] = add nsw i32 [[X:%.*]], -13 -; CHECK-NEXT: [[B:%.*]] = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 [[A]], i32 -7) -; CHECK-NEXT: ret { i32, i1 } [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[X:%.*]], i32 -6) +; CHECK-NEXT: ret { i32, i1 } [[TMP1]] ; %a = sub nsw i32 %x, 13 %b = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 -7) @@ -33,9 +31,8 @@ define { i8, i1 } @fold_on_constant_sub_no_overflow(i8 %x) { ; CHECK-LABEL: @fold_on_constant_sub_no_overflow( -; CHECK-NEXT: [[A:%.*]] = add nsw i8 [[X:%.*]], -100 -; CHECK-NEXT: [[B:%.*]] = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[A]], i8 28) -; CHECK-NEXT: ret { i8, i1 } [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[X:%.*]], i8 -128) +; CHECK-NEXT: ret { i8, i1 } [[TMP1]] ; %a = sub nsw i8 %x, 100 %b = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %a, i8 28) @@ -45,8 +42,8 @@ define { i8, i1 } @no_fold_on_constant_sub_overflow(i8 %x) { ; CHECK-LABEL: @no_fold_on_constant_sub_overflow( ; CHECK-NEXT: [[A:%.*]] = add nsw i8 [[X:%.*]], -100 -; CHECK-NEXT: [[B:%.*]] = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 [[A]], i8 29) -; CHECK-NEXT: ret { i8, i1 } [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[A]], i8 -29) +; CHECK-NEXT: ret { i8, i1 } [[TMP1]] ; %a = sub nsw i8 %x, 100 %b = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %a, i8 29) @@ -55,9 +52,8 @@ define { <2 x i32>, <2 x i1> } @fold_simple_splat_constant(<2 x i32> %x) { ; CHECK-LABEL: @fold_simple_splat_constant( -; CHECK-NEXT: [[A:%.*]] = add nsw <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.ssub.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> ) -; CHECK-NEXT: ret { <2 x i32>, <2 x i1> } [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[X:%.*]], <2 x i32> ) +; CHECK-NEXT: ret { <2 x i32>, <2 x i1> } [[TMP1]] ; %a = sub nsw <2 x i32> %x, %b = tail call { <2 x i32>, <2 x i1> } @llvm.ssub.with.overflow.v2i32(<2 x i32> %a, <2 x i32> ) @@ -67,8 +63,8 @@ define { <2 x i32>, <2 x i1> } @no_fold_splat_undef_constant(<2 x i32> %x) { ; CHECK-LABEL: @no_fold_splat_undef_constant( ; CHECK-NEXT: [[A:%.*]] = add <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[B:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.ssub.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> ) -; CHECK-NEXT: ret { <2 x i32>, <2 x i1> } [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> ) +; CHECK-NEXT: ret { <2 x i32>, <2 x i1> } [[TMP1]] ; %a = sub nsw <2 x i32> %x, %b = tail call { <2 x i32>, <2 x i1> } @llvm.ssub.with.overflow.v2i32(<2 x i32> %a, <2 x i32> ) @@ -78,8 +74,8 @@ define { <2 x i32>, <2 x i1> } @no_fold_splat_not_constant(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @no_fold_splat_not_constant( ; CHECK-NEXT: [[A:%.*]] = sub nsw <2 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[B:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.ssub.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> ) -; CHECK-NEXT: ret { <2 x i32>, <2 x i1> } [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> ) +; CHECK-NEXT: ret { <2 x i32>, <2 x i1> } [[TMP1]] ; %a = sub nsw <2 x i32> %x, %y %b = tail call { <2 x i32>, <2 x i1> } @llvm.ssub.with.overflow.v2i32(<2 x i32> %a, <2 x i32> ) @@ -88,9 +84,8 @@ define { i32, i1 } @fold_nuwnsw(i32 %x) { ; CHECK-LABEL: @fold_nuwnsw( -; CHECK-NEXT: [[A:%.*]] = add nsw i32 [[X:%.*]], -12 -; CHECK-NEXT: [[B:%.*]] = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 [[A]], i32 30) -; CHECK-NEXT: ret { i32, i1 } [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[X:%.*]], i32 -42) +; CHECK-NEXT: ret { i32, i1 } [[TMP1]] ; %a = sub nuw nsw i32 %x, 12 %b = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 30) @@ -100,8 +95,8 @@ define { i32, i1 } @no_fold_nuw(i32 %x) { ; CHECK-LABEL: @no_fold_nuw( ; CHECK-NEXT: [[A:%.*]] = add i32 [[X:%.*]], -12 -; CHECK-NEXT: [[B:%.*]] = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 [[A]], i32 30) -; CHECK-NEXT: ret { i32, i1 } [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[A]], i32 -30) +; CHECK-NEXT: ret { i32, i1 } [[TMP1]] ; %a = sub nuw i32 %x, 12 %b = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 30) @@ -121,9 +116,8 @@ define { i32, i1 } @fold_add_simple(i32 %x) { ; CHECK-LABEL: @fold_add_simple( -; CHECK-NEXT: [[A:%.*]] = add nsw i32 [[X:%.*]], -12 -; CHECK-NEXT: [[B:%.*]] = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 [[A]], i32 30) -; CHECK-NEXT: ret { i32, i1 } [[B]] +; CHECK-NEXT: [[TMP1:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[X:%.*]], i32 -42) +; CHECK-NEXT: ret { i32, i1 } [[TMP1]] ; %a = add nsw i32 %x, -12 %b = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 30) @@ -141,8 +135,8 @@ define { <2 x i32>, <2 x i1> } @keep_ssubo_non_splat(<2 x i32> %x) { ; CHECK-LABEL: @keep_ssubo_non_splat( -; CHECK-NEXT: [[A:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.ssub.with.overflow.v2i32(<2 x i32> [[X:%.*]], <2 x i32> ) -; CHECK-NEXT: ret { <2 x i32>, <2 x i1> } [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[X:%.*]], <2 x i32> ) +; CHECK-NEXT: ret { <2 x i32>, <2 x i1> } [[TMP1]] ; %a = tail call { <2 x i32>, <2 x i1> } @llvm.ssub.with.overflow.v2i32(<2 x i32> %x, <2 x i32> ) ret { <2 x i32>, <2 x i1> } %a