Index: lib/Transforms/InstCombine/InstCombineAddSub.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -847,29 +847,49 @@ return createFMul(OpndVal, Coeff.getValue(Instr->getType())); } -// If one of the operands only has one non-zero bit, and if the other -// operand has a known-zero bit in a more significant place than it (not -// including the sign bit) the ripple may go up to and fill the zero, but -// won't change the sign. For example, (X & ~4) + 1. -static bool checkRippleForAdd(const APInt &Op0KnownZero, - const APInt &Op1KnownZero) { - APInt Op1MaybeOne = ~Op1KnownZero; - // Make sure that one of the operand has at most one bit set to 1. - if (Op1MaybeOne.countPopulation() != 1) - return false; - - // Find the most significant known 0 other than the sign bit. - int BitWidth = Op0KnownZero.getBitWidth(); - APInt Op0KnownZeroTemp(Op0KnownZero); - Op0KnownZeroTemp.clearSignBit(); - int Op0ZeroPosition = BitWidth - Op0KnownZeroTemp.countLeadingZeros() - 1; - - int Op1OnePosition = BitWidth - Op1MaybeOne.countLeadingZeros() - 1; - assert(Op1OnePosition >= 0); - - // This also covers the case of no known zero, since in that case - // Op0ZeroPosition is -1. - return Op0ZeroPosition >= Op1OnePosition; +/// \brief Return true if we can prove that adding the two values of the +/// knownbits will not overflow. +/// Otherwise return false. +static bool checkRippleForAdd(const KnownBits &LHSKnown, + const KnownBits &RHSKnown) { + // Addition of two 2's complement numbers having opposite signs will never + // overflow. + if ((LHSKnown.isNegative() && RHSKnown.isNonNegative()) || + (LHSKnown.isNonNegative() && RHSKnown.isNegative())) + return true; + + // If either of the values is known to be non-negative, adding them can only + // overflow if the second is also non-negative, so we can assume that. + // Two non-negative numbers will only overflow if there is a carry to the + // sign bit, so we can check if even when the values are as big as possible + // there is no overflow to the sign bit. + if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative()) { + APInt MaxLHS = ~LHSKnown.Zero; + MaxLHS.clearSignBit(); + APInt MaxRHS = ~RHSKnown.Zero; + MaxRHS.clearSignBit(); + APInt Result = std::move(MaxLHS) + std::move(MaxRHS); + return Result.isSignBitClear(); + } + + // If either of the values is known to be negative, adding them can only + // overflow if the second is also negative, so we can assume that. + // Two negative number will only overflow if there is no carry to the sign + // bit, so we can check if even when the values are as small as possible + // there is overflow to the sign bit. + if (LHSKnown.isNegative() || RHSKnown.isNegative()) { + APInt MinLHS = LHSKnown.One; + MinLHS.clearSignBit(); + APInt MinRHS = RHSKnown.One; + MinRHS.clearSignBit(); + APInt Result = std::move(MinLHS) + std::move(MinRHS); + return Result.isSignBitSet(); + } + + // If we reached here it means that we know nothing about the sign bits. + // In this case we can't know if there will be an overflow, since by + // changing the sign bits any two values can be made to overflow. + return false; } /// Return true if we can prove that: @@ -906,16 +926,8 @@ KnownBits RHSKnown(BitWidth); computeKnownBits(RHS, RHSKnown, 0, &CxtI); - // Addition of two 2's complement numbers having opposite signs will never - // overflow. - if ((LHSKnown.One[BitWidth - 1] && RHSKnown.Zero[BitWidth - 1]) || - (LHSKnown.Zero[BitWidth - 1] && RHSKnown.One[BitWidth - 1])) - return true; - // Check if carry bit of addition will not cause overflow. - if (checkRippleForAdd(LHSKnown.Zero, RHSKnown.Zero)) - return true; - if (checkRippleForAdd(RHSKnown.Zero, LHSKnown.Zero)) + if (checkRippleForAdd(LHSKnown, RHSKnown)) return true; return false; Index: test/Transforms/InstCombine/AddOverFlow.ll =================================================================== --- test/Transforms/InstCombine/AddOverFlow.ll +++ test/Transforms/InstCombine/AddOverFlow.ll @@ -95,6 +95,44 @@ ret i16 %c } +; CHECK-LABEL: @ripple_nsw3 +; CHECK: add nsw i16 %a, %b +define i16 @ripple_nsw3(i16 %x, i16 %y) { + %a = and i16 %y, 43691 + %b = and i16 %x, 21843 + %c = add i16 %a, %b + ret i16 %c +} + +; Like the previous test, but flip %a and %b +; CHECK-LABEL: @ripple_nsw4 +; CHECK: add nsw i16 %b, %a +define i16 @ripple_nsw4(i16 %x, i16 %y) { + %a = and i16 %y, 43691 + %b = and i16 %x, 21843 + %c = add i16 %b, %a + ret i16 %c +} + +; CHECK-LABEL: @ripple_nsw5 +; CHECK: add nsw i16 %a, %b +define i16 @ripple_nsw5(i16 %x, i16 %y) { + %a = or i16 %y, 43691 + %b = or i16 %x, 54613 + %c = add i16 %a, %b + ret i16 %c +} + +; Like the previous test, but flip %a and %b +; CHECK-LABEL: @ripple_nsw6 +; CHECK: add nsw i16 %b, %a +define i16 @ripple_nsw6(i16 %x, i16 %y) { + %a = or i16 %y, 43691 + %b = or i16 %x, 54613 + %c = add i16 %b, %a + ret i16 %c +} + ; CHECK-LABEL: @ripple_no_nsw1 ; CHECK: add i32 %a, %x define i32 @ripple_no_nsw1(i32 %x, i32 %y) { @@ -116,3 +154,41 @@ %c = add i16 %a, %b ret i16 %c } + +; CHECK-LABEL: @ripple_no_nsw3 +; CHECK: add i16 %a, %b +define i16 @ripple_no_nsw3(i16 %x, i16 %y) { + %a = and i16 %y, 43691 + %b = and i16 %x, 21845 + %c = add i16 %a, %b + ret i16 %c +} + +; Like the previous test, but flip %a and %b +; CHECK-LABEL: @ripple_no_nsw4 +; CHECK: add i16 %b, %a +define i16 @ripple_no_nsw4(i16 %x, i16 %y) { + %a = and i16 %y, 43691 + %b = and i16 %x, 21845 + %c = add i16 %b, %a + ret i16 %c +} + +; CHECK-LABEL: @ripple_no_nsw5 +; CHECK: add i16 %a, %b +define i16 @ripple_no_nsw5(i16 %x, i16 %y) { + %a = or i16 %y, 43689 + %b = or i16 %x, 54613 + %c = add i16 %a, %b + ret i16 %c +} + +; Like the previous test, but flip %a and %b +; CHECK-LABEL: @ripple_no_nsw6 +; CHECK: add i16 %b, %a +define i16 @ripple_no_nsw6(i16 %x, i16 %y) { + %a = or i16 %y, 43689 + %b = or i16 %x, 54613 + %c = add i16 %b, %a + ret i16 %c +} Index: test/Transforms/InstCombine/demand_shrink_nsw.ll =================================================================== --- test/Transforms/InstCombine/demand_shrink_nsw.ll +++ test/Transforms/InstCombine/demand_shrink_nsw.ll @@ -3,7 +3,7 @@ ; The constant at %v35 should be shrunk, but this must lead to the nsw flag of ; %v43 getting removed so that %v44 is not illegally optimized away. ; CHECK-LABEL: @foo -; CHECK: %v35 = add nuw i32 %v34, 1362915575 +; CHECK: %v35 = add nuw nsw i32 %v34, 1362915575 ; ... ; CHECK: add nuw i32 %v42, 1533579450 ; CHECK-NEXT: %v44 = or i32 %v43, -2147483648