Index: lib/Transforms/InstCombine/InstCombine.h =================================================================== --- lib/Transforms/InstCombine/InstCombine.h +++ lib/Transforms/InstCombine/InstCombine.h @@ -285,6 +285,7 @@ bool WillNotOverflowUnsignedAdd(Value *LHS, Value *RHS, Instruction *CxtI); bool WillNotOverflowSignedSub(Value *LHS, Value *RHS, Instruction *CxtI); bool WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, Instruction *CxtI); + bool WillNotOverflowSignedMul(Value *LHS, Value *RHS, Instruction *CxtI); Value *EmitGEPOffset(User *GEP); Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN); Value *EvaluateInDifferentElementOrder(Value *V, ArrayRef Mask); Index: lib/Transforms/InstCombine/InstCombineAddSub.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1008,6 +1008,51 @@ return false; } +/// \brief Return true if we can prove that: +/// (mul LHS, RHS) === (mul nsw LHS, RHS) +bool InstCombiner::WillNotOverflowSignedMul(Value *LHS, Value *RHS, + Instruction *CxtI) { + if (IntegerType *IT = dyn_cast(LHS->getType())) { + + // Multiplying n * m significant bits yields a result of n + m significant + // bits. If the total number of significant bits does not exceed the + // result bit width (minus 1), there is no overflow. + // This means if we have enough leading sign bits in the operands + // we can guarantee that the result does not overflow. + // Ref: "Hacker's Delight" by Henry Warren + unsigned BitWidth = IT->getBitWidth(); + + // Note that underestimating the number of sign bits gives a more + // conservative answer. + unsigned SignBits = ComputeNumSignBits(LHS, 0, CxtI) + + ComputeNumSignBits(RHS, 0, CxtI); + + // First handle the easy case: if we have enough sign bits there's + // definitely no overflow. + if (SignBits > BitWidth + 1) + return true; + + // There are two ambiguous cases where there can be no overflow: + // SignBits == BitWidth + 1 and + // SignBits == BitWidth + // The second case is difficult to check, therefore we only handle the + // first case. + if (SignBits == BitWidth + 1) { + // It overflows only when both arguments are negative and the true + // product is exactly the minimum negative number. + // E.g. mul i16 with 17 sign bits: 0xff00 * 0xff80 = 0x8000 + // For simplicity we just check if at least one side is not negative. + bool LHSNonNegative, LHSNegative; + bool RHSNonNegative, RHSNegative; + ComputeSignBit(LHS, LHSNonNegative, LHSNegative, DL, 0, AT, CxtI, DT); + ComputeSignBit(RHS, RHSNonNegative, RHSNegative, DL, 0, AT, CxtI, DT); + if (LHSNonNegative || RHSNonNegative) + return true; + } + } + return false; +} + // Checks if any operand is negative and we can convert add to sub. // This function checks for following negative patterns // ADD(XOR(OR(Z, NOT(C)), C)), 1) == NEG(AND(Z, C)) Index: lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCalls.cpp +++ lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -427,6 +427,15 @@ return CreateOverflowTuple(II, LHS, false, /*ReUseName*/false); } } + if (II->getIntrinsicID() == Intrinsic::ssub_with_overflow) { + if (WillNotOverflowSignedSub(LHS, RHS, II)) { + return CreateOverflowTuple(II, Builder->CreateNSWSub(LHS, RHS), false); + } + } else { + if (WillNotOverflowUnsignedSub(LHS, RHS, II)) { + return CreateOverflowTuple(II, Builder->CreateNUWSub(LHS, RHS), false); + } + } break; } case Intrinsic::umul_with_overflow: { @@ -477,6 +486,12 @@ /*ReUseName*/false); } } + if (II->getIntrinsicID() == Intrinsic::smul_with_overflow) { + Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); + if (WillNotOverflowSignedMul(LHS, RHS, II)) { + return CreateOverflowTuple(II, Builder->CreateNSWMul(LHS, RHS), false); + } + } break; case Intrinsic::minnum: case Intrinsic::maxnum: { Index: test/Transforms/InstCombine/intrinsics.ll =================================================================== --- test/Transforms/InstCombine/intrinsics.ll +++ test/Transforms/InstCombine/intrinsics.ll @@ -1,10 +1,17 @@ ; RUN: opt -instcombine -S < %s | FileCheck %s %overflow.result = type {i8, i1} +%ov.result.32 = type { i32, i1 } -declare %overflow.result @llvm.uadd.with.overflow.i8(i8, i8) -declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) -declare %overflow.result @llvm.umul.with.overflow.i8(i8, i8) + +declare %overflow.result @llvm.uadd.with.overflow.i8(i8, i8) nounwind readnone +declare %overflow.result @llvm.umul.with.overflow.i8(i8, i8) nounwind readnone +declare %ov.result.32 @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone +declare %ov.result.32 @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone +declare %ov.result.32 @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone +declare %ov.result.32 @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone +declare %ov.result.32 @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone +declare %ov.result.32 @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone declare double @llvm.powi.f64(double, i32) nounwind readonly declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone @@ -91,18 +98,97 @@ } ; PR20194 -define { i32, i1 } @saddtest1(i8 %a, i8 %b) { +define %ov.result.32 @saddtest_nsw(i8 %a, i8 %b) { %A = sext i8 %a to i32 %B = sext i8 %b to i32 - %x = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %A, i32 %B) - ret { i32, i1 } %x -; CHECK-LABEL: @saddtest1 + %x = call %ov.result.32 @llvm.sadd.with.overflow.i32(i32 %A, i32 %B) + ret %ov.result.32 %x +; CHECK-LABEL: @saddtest_nsw ; CHECK: %x = add nsw i32 %A, %B -; CHECK-NEXT: %1 = insertvalue { i32, i1 } { i32 undef, i1 false }, i32 %x, 0 -; CHECK-NEXT: ret { i32, i1 } %1 +; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0 +; CHECK-NEXT: ret %ov.result.32 %1 } +define %ov.result.32 @uaddtest_nuw(i32 %a, i32 %b) { + %A = and i32 %a, 2147483647 + %B = and i32 %b, 2147483647 + %x = call %ov.result.32 @llvm.uadd.with.overflow.i32(i32 %A, i32 %B) + ret %ov.result.32 %x +; CHECK-LABEL: @uaddtest_nuw +; CHECK: %x = add nuw i32 %A, %B +; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0 +; CHECK-NEXT: ret %ov.result.32 %1 +} +define %ov.result.32 @ssubtest_nsw(i8 %a, i8 %b) { + %A = sext i8 %a to i32 + %B = sext i8 %b to i32 + %x = call %ov.result.32 @llvm.ssub.with.overflow.i32(i32 %A, i32 %B) + ret %ov.result.32 %x +; CHECK-LABEL: @ssubtest_nsw +; CHECK: %x = sub nsw i32 %A, %B +; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0 +; CHECK-NEXT: ret %ov.result.32 %1 +} + +define %ov.result.32 @usubtest_nuw(i32 %a, i32 %b) { + %A = or i32 %a, 2147483648 + %B = and i32 %b, 2147483647 + %x = call %ov.result.32 @llvm.usub.with.overflow.i32(i32 %A, i32 %B) + ret %ov.result.32 %x +; CHECK-LABEL: @usubtest_nuw +; CHECK: %x = sub nuw i32 %A, %B +; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0 +; CHECK-NEXT: ret %ov.result.32 %1 +} + +define %ov.result.32 @smultest1_nsw(i32 %a, i32 %b) { + %A = and i32 %a, 4095 ; 0xfff + %B = and i32 %b, 524287; 0x7ffff + %x = call %ov.result.32 @llvm.smul.with.overflow.i32(i32 %A, i32 %B) + ret %ov.result.32 %x +; CHECK-LABEL: @smultest1_nsw +; CHECK: %x = mul nsw i32 %A, %B +; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0 +; CHECK-NEXT: ret %ov.result.32 %1 +} + +define %ov.result.32 @smultest2_nsw(i32 %a, i32 %b) { + %A = ashr i32 %a, 16 + %B = ashr i32 %b, 16 + %x = call %ov.result.32 @llvm.smul.with.overflow.i32(i32 %A, i32 %B) + ret %ov.result.32 %x +; CHECK-LABEL: @smultest2_nsw +; CHECK: %x = mul nsw i32 %A, %B +; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0 +; CHECK-NEXT: ret %ov.result.32 %1 +} + +define %ov.result.32 @smultest3_sw(i32 %a, i32 %b) { + %A = ashr i32 %a, 16 + %B = ashr i32 %b, 15 + %x = call %ov.result.32 @llvm.smul.with.overflow.i32(i32 %A, i32 %B) + ret %ov.result.32 %x +; CHECK-LABEL: @smultest3_sw +; CHECK: %x = call %ov.result.32 @llvm.smul.with.overflow.i32(i32 %A, i32 %B) +; CHECK-NEXT: ret %ov.result.32 %x +} + +define %ov.result.32 @umultest_nuw(i32 %a, i32 %b) { + %A = and i32 %a, 65535 ; 0xffff + %B = and i32 %b, 65535 ; 0xffff + %x = call %ov.result.32 @llvm.umul.with.overflow.i32(i32 %A, i32 %B) + ret %ov.result.32 %x +; CHECK-LABEL: @umultest_nuw +; CHECK: %x = mul nuw i32 %A, %B +; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0 +; CHECK-NEXT: ret %ov.result.32 %1 +} + + + + + define i8 @umultest1(i8 %A, i1* %overflowPtr) { %x = call %overflow.result @llvm.umul.with.overflow.i8(i8 0, i8 %A) %y = extractvalue %overflow.result %x, 0 @@ -125,9 +211,6 @@ ; CHECK-NEXT: ret i8 %A } -%ov.result.32 = type { i32, i1 } -declare %ov.result.32 @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone - define i32 @umultest3(i32 %n) nounwind { %shr = lshr i32 %n, 2 %mul = call %ov.result.32 @llvm.umul.with.overflow.i32(i32 %shr, i32 3)