Index: llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1814,6 +1814,19 @@ return nullptr; } +Instruction *InstCombiner::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) { + OverflowCheckFlavor OCF = + IntrinsicIDToOverflowCheckFlavor(II->getIntrinsicID()); + assert(OCF != OCF_INVALID && "unexpected!"); + + Value *OperationResult = nullptr; + Constant *OverflowResult = nullptr; + if (OptimizeOverflowCheck(OCF, II->getArgOperand(0), II->getArgOperand(1), + *II, OperationResult, OverflowResult)) + return CreateOverflowTuple(II, OperationResult, OverflowResult); + return nullptr; +} + /// CallInst simplification. This mostly only handles folding of intrinsic /// instructions. For normal calls, it allows visitCallBase to do the heavy /// lifting. @@ -2016,8 +2029,31 @@ return &CI; break; } + case Intrinsic::sadd_with_overflow: { + if (Instruction *I = canonicalizeConstantArg0ToArg1(CI)) + return I; + if (Instruction *I = foldIntrinsicWithOverflowCommon(II)) + return I; + + // `add nsw` + `sadd.with.overflow` with constants should fold. + Value *X; + const APInt *RHS, *LHS; + Value *Arg0 = II->getArgOperand(0); + Value *Arg1 = II->getArgOperand(1); + if (match(Arg0, m_NSWAdd(m_Value(X), m_APInt(LHS))) && + match(Arg1, m_APInt(RHS))) { + bool Overflow; + APInt NewC = RHS->sadd_ov(*LHS, Overflow); + if (!Overflow) + return replaceInstUsesWith( + *II, Builder.CreateBinaryIntrinsic( + Intrinsic::sadd_with_overflow, X, + ConstantInt::get(Arg1->getType(), NewC))); + } + + break; + } case Intrinsic::uadd_with_overflow: - case Intrinsic::sadd_with_overflow: case Intrinsic::umul_with_overflow: case Intrinsic::smul_with_overflow: if (Instruction *I = canonicalizeConstantArg0ToArg1(CI)) @@ -2026,15 +2062,8 @@ case Intrinsic::usub_with_overflow: case Intrinsic::ssub_with_overflow: { - OverflowCheckFlavor OCF = - IntrinsicIDToOverflowCheckFlavor(II->getIntrinsicID()); - assert(OCF != OCF_INVALID && "unexpected!"); - - Value *OperationResult = nullptr; - Constant *OverflowResult = nullptr; - if (OptimizeOverflowCheck(OCF, II->getArgOperand(0), II->getArgOperand(1), - *II, OperationResult, OverflowResult)) - return CreateOverflowTuple(II, OperationResult, OverflowResult); + if (Instruction *I = foldIntrinsicWithOverflowCommon(II)) + return I; break; } Index: llvm/lib/Transforms/InstCombine/InstCombineInternal.h =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -592,6 +592,8 @@ Value *matchSelectFromAndOr(Value *A, Value *B, Value *C, Value *D); Value *getSelectCondition(Value *A, Value *B); + Instruction *foldIntrinsicWithOverflowCommon(IntrinsicInst *II); + public: /// Inserts an instruction \p New before instruction \p Old /// Index: llvm/test/Transforms/InstCombine/call-sadd-with-overflow.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstCombine/call-sadd-with-overflow.ll @@ -0,0 +1,92 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +declare { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32>, <2 x i32>) + +declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) + +declare { i8, i1 } @llvm.sadd.with.overflow.i8(i8, i8) + +define { i32, i1 } @simple_fold(i32) { +; CHECK-LABEL: @simple_fold( +; CHECK-NEXT: [[TMP2:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[TMP0:%.*]], i32 20) +; CHECK-NEXT: ret { i32, i1 } [[TMP2]] +; + %2 = add nsw i32 %0, 7 + %3 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %2, i32 13) + ret { i32, i1 } %3 +} + +define { i32, i1 } @fold_mixed_signs(i32) { +; CHECK-LABEL: @fold_mixed_signs( +; CHECK-NEXT: [[TMP2:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[TMP0:%.*]], i32 6) +; CHECK-NEXT: ret { i32, i1 } [[TMP2]] +; + %2 = add nsw i32 %0, 13 + %3 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %2, i32 -7) + ret { i32, i1 } %3 +} + +define { i8, i1 } @no_fold_on_constant_add_overflow(i8) { +; CHECK-LABEL: @no_fold_on_constant_add_overflow( +; CHECK-NEXT: [[TMP2:%.*]] = add nsw i8 [[TMP0:%.*]], 127 +; CHECK-NEXT: [[TMP3:%.*]] = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 [[TMP2]], i8 127) +; CHECK-NEXT: ret { i8, i1 } [[TMP3]] +; + %2 = add nsw i8 %0, 127 + %3 = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 %2, i8 127) + ret { i8, i1 } %3 +} + +define { <2 x i32>, <2 x i1> } @fold_simple_splat_constant(<2 x i32>) { +; CHECK-LABEL: @fold_simple_splat_constant( +; CHECK-NEXT: [[TMP2:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[TMP0:%.*]], <2 x i32> ) +; CHECK-NEXT: ret { <2 x i32>, <2 x i1> } [[TMP2]] +; + %2 = add nsw <2 x i32> %0, + %3 = tail call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> %2, <2 x i32> ) + ret { <2 x i32>, <2 x i1> } %3 +} + +define { <2 x i32>, <2 x i1> } @no_fold_splat_undef_constant(<2 x i32>) { +; CHECK-LABEL: @no_fold_splat_undef_constant( +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[TMP3:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[TMP2]], <2 x i32> ) +; CHECK-NEXT: ret { <2 x i32>, <2 x i1> } [[TMP3]] +; + %2 = add nsw <2 x i32> %0, + %3 = tail call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> %2, <2 x i32> ) + ret { <2 x i32>, <2 x i1> } %3 +} + +define { <2 x i32>, <2 x i1> } @no_fold_splat_not_constant(<2 x i32>, <2 x i32>) { +; CHECK-LABEL: @no_fold_splat_not_constant( +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i32> [[TMP0:%.*]], [[TMP1:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[TMP3]], <2 x i32> ) +; CHECK-NEXT: ret { <2 x i32>, <2 x i1> } [[TMP4]] +; + %3 = add nsw <2 x i32> %0, %1 + %4 = tail call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> %3, <2 x i32> ) + ret { <2 x i32>, <2 x i1> } %4 +} + +define { i32, i1 } @fold_nuwnsw(i32) { +; CHECK-LABEL: @fold_nuwnsw( +; CHECK-NEXT: [[TMP2:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[TMP0:%.*]], i32 42) +; CHECK-NEXT: ret { i32, i1 } [[TMP2]] +; + %2 = add nuw nsw i32 %0, 12 + %3 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %2, i32 30) + ret { i32, i1 } %3 +} + +define { i32, i1 } @no_fold_nuw(i32) { +; CHECK-LABEL: @no_fold_nuw( +; CHECK-NEXT: [[TMP2:%.*]] = add nuw i32 [[TMP0:%.*]], 12 +; CHECK-NEXT: [[TMP3:%.*]] = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[TMP2]], i32 30) +; CHECK-NEXT: ret { i32, i1 } [[TMP3]] +; + %2 = add nuw i32 %0, 12 + %3 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %2, i32 30) + ret { i32, i1 } %3 +}