Index: llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1814,6 +1814,20 @@ return nullptr; } +Instruction *InstCombiner::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) { + OverflowCheckFlavor OCF = + IntrinsicIDToOverflowCheckFlavor(II->getIntrinsicID()); + assert(OCF != OCF_INVALID && "unexpected!"); + + Value *OperationResult = nullptr; + Constant *OverflowResult = nullptr; + if (OptimizeOverflowCheck(OCF, II->getArgOperand(0), II->getArgOperand(1), + *II, OperationResult, OverflowResult)) { + return CreateOverflowTuple(II, OperationResult, OverflowResult); + } + return nullptr; +} + /// CallInst simplification. This mostly only handles folding of intrinsic /// instructions. For normal calls, it allows visitCallBase to do the heavy /// lifting. @@ -2016,8 +2030,39 @@ return &CI; break; } + case Intrinsic::sadd_with_overflow: { + if (Instruction *I = canonicalizeConstantArg0ToArg1(CI)) + return I; + if (Instruction *I = foldIntrinsicWithOverflowCommon(II)) + return I; + + // `add nsw` + `sadd.with.overflow` with constants should fold. The cases + // for `.add_nsw(C1).sadd_overflow(C2)`: + // + // - Same sign can be folded to a single overflow call + // - Opposite signs with |C2|>|C1| can be folded to a single overflow call + // - Opposite signs with |C2|<=|C1| can be folded to a single nsw call + // + // FIXME: Add the last case. + Value *X = nullptr; + ConstantInt *RHS = nullptr; + ConstantInt *LHS = nullptr; + if (match(II->getArgOperand(0), m_NSWAdd(m_Value(X), m_ConstantInt(LHS))) && + match(II->getArgOperand(1), m_ConstantInt(RHS))) { + APInt LHSValue = LHS->getValue(); + APInt RHSValue = RHS->getValue(); + if (LHSValue.isNegative() == RHSValue.isNegative() || + RHSValue.abs().sgt(LHSValue.abs())) { + Constant *NewC = ConstantExpr::getAdd(LHS, RHS); + return replaceInstUsesWith( + *II, Builder.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, X, + NewC)); + } + } + + break; + } case Intrinsic::uadd_with_overflow: - case Intrinsic::sadd_with_overflow: case Intrinsic::umul_with_overflow: case Intrinsic::smul_with_overflow: if (Instruction *I = canonicalizeConstantArg0ToArg1(CI)) @@ -2026,15 +2071,8 @@ case Intrinsic::usub_with_overflow: case Intrinsic::ssub_with_overflow: { - OverflowCheckFlavor OCF = - IntrinsicIDToOverflowCheckFlavor(II->getIntrinsicID()); - assert(OCF != OCF_INVALID && "unexpected!"); - - Value *OperationResult = nullptr; - Constant *OverflowResult = nullptr; - if (OptimizeOverflowCheck(OCF, II->getArgOperand(0), II->getArgOperand(1), - *II, OperationResult, OverflowResult)) - return CreateOverflowTuple(II, OperationResult, OverflowResult); + if (Instruction *I = foldIntrinsicWithOverflowCommon(II)) + return I; break; } Index: llvm/lib/Transforms/InstCombine/InstCombineInternal.h =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -592,6 +592,8 @@ Value *matchSelectFromAndOr(Value *A, Value *B, Value *C, Value *D); Value *getSelectCondition(Value *A, Value *B); + Instruction *foldIntrinsicWithOverflowCommon(IntrinsicInst *II); + public: /// Inserts an instruction \p New before instruction \p Old /// Index: llvm/test/Transforms/InstCombine/call-add-with-overflow.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstCombine/call-add-with-overflow.ll @@ -0,0 +1,22 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + +declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) + +define { i32, i1 } @fold_sadd_with_overflow(i32) { + ; CHECK-Label: @fold_sadd_with_overflow + ; CHECK: %2 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %0, i32 20) + ; CHECK-NEXT: ret { i32, i1 } %2 + %2 = add nsw i32 %0, 7 + %3 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %2, i32 13) + ret { i32, i1 } %3 +} + +define { i32, i1 } @fold_sadd_with_overflow_complex(i32) { + ; CHECK-Label: @fold_sadd_with_overflow_complex + ; CHECK: %2 = add nsw i32 %0, 13 + ; CHECK: %3 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %2, i32 -7) + ; CHECK-NEXT: ret { i32, i1 } %3 + %2 = add nsw i32 %0, 13 + %3 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %2, i32 -7) + ret { i32, i1 } %3 +}