Index: lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- lib/Transforms/InstCombine/InstructionCombining.cpp +++ lib/Transforms/InstCombine/InstructionCombining.cpp @@ -223,6 +223,11 @@ return !Overflow; } +static bool MaintainNoUnsignedWrap(BinaryOperator &I) { + OverflowingBinaryOperator *OBO = dyn_cast(&I); + return OBO && OBO->hasNoUnsignedWrap(); +} + /// Conservatively clears subclassOptionalData after a reassociation or /// commutation. We preserve fast-math flags when applicable as they can be /// preserved. @@ -329,14 +334,20 @@ I.setOperand(1, V); // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. - if (MaintainNoSignedWrap(I, B, C) && + + bool IsNUW = MaintainNoUnsignedWrap(I); + bool IsNSW = MaintainNoSignedWrap(I, B, C); + + ClearSubclassDataAfterReassociation(I); + + if (IsNUW && MaintainNoUnsignedWrap(*Op0)) + I.setHasNoUnsignedWrap(true); + + if (IsNSW && (!Op0 || (isa(Op0) && Op0->hasNoSignedWrap()))) { // Note: this is only valid because SimplifyBinOp doesn't look at // the operands to Op0. - I.clearSubclassOptionalData(); I.setHasNoSignedWrap(true); - } else { - ClearSubclassDataAfterReassociation(I); } Changed = true; @@ -421,8 +432,14 @@ Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode && match(Op0, m_OneUse(m_BinOp(m_Value(A), m_Constant(C1)))) && match(Op1, m_OneUse(m_BinOp(m_Value(B), m_Constant(C2))))) { - BinaryOperator *NewBO = BinaryOperator::Create(Opcode, A, B); - if (isa(NewBO)) { + bool IsNUW = MaintainNoUnsignedWrap(I) && + MaintainNoUnsignedWrap(*Op0) && + MaintainNoUnsignedWrap(*Op1); + BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ? + BinaryOperator::CreateNUW(Opcode, A, B) : + BinaryOperator::Create(Opcode, A, B); + + if (isa(NewBO)) { FastMathFlags Flags = I.getFastMathFlags(); Flags &= Op0->getFastMathFlags(); Flags &= Op1->getFastMathFlags(); @@ -435,6 +452,8 @@ // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. ClearSubclassDataAfterReassociation(I); + if (IsNUW) + I.setHasNoUnsignedWrap(true); Changed = true; continue; @@ -577,14 +596,21 @@ if (BinaryOperator *BO = dyn_cast(SimplifiedInst)) { if (isa(SimplifiedInst)) { bool HasNSW = false; - if (isa(&I)) + bool HasNUW = false; + if (isa(&I)) { HasNSW = I.hasNoSignedWrap(); + HasNUW = I.hasNoUnsignedWrap(); + } - if (auto *LOBO = dyn_cast(LHS)) + if (auto *LOBO = dyn_cast(LHS)) { HasNSW &= LOBO->hasNoSignedWrap(); + HasNUW &= LOBO->hasNoUnsignedWrap(); + } - if (auto *ROBO = dyn_cast(RHS)) + if (auto *ROBO = dyn_cast(RHS)) { HasNSW &= ROBO->hasNoSignedWrap(); + HasNUW &= ROBO->hasNoUnsignedWrap(); + } // We can propagate 'nsw' if we know that // %Y = mul nsw i16 %X, C @@ -596,8 +622,11 @@ const APInt *CInt; if (TopLevelOpcode == Instruction::Add && InnerOpcode == Instruction::Mul) - if (match(V, m_APInt(CInt)) && !CInt->isMinSignedValue()) - BO->setHasNoSignedWrap(HasNSW); + if (match(V, m_APInt(CInt))) { + if (!CInt->isMinSignedValue()) + BO->setHasNoSignedWrap(HasNSW); + BO->setHasNoUnsignedWrap(HasNUW); + } } } } Index: test/Transforms/InstCombine/reassociate-nuw.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/reassociate-nuw.ll @@ -0,0 +1,111 @@ +; RUN: opt -instcombine -S %s | FileCheck %s + +; CHECK-LABEL: @reassoc_add_nuw( +; CHECK-NEXT: add nuw i32 %x, 68 +define i32 @reassoc_add_nuw(i32 %x) { + %add0 = add nuw i32 %x, 4 + %add1 = add nuw i32 %add0, 64 + ret i32 %add1 +} + +; This does the wrong thing because the sub is turned into an add of a +; negative constant first which drops the nuw. + +; CHECK-LABEL: @reassoc_sub_nuw( +; CHECK-NEXT: add i32 %x, -68 +define i32 @reassoc_sub_nuw(i32 %x) { + %sub0 = sub nuw i32 %x, 4 + %sub1 = sub nuw i32 %sub0, 64 + ret i32 %sub1 +} + +; CHECK-LABEL: @reassoc_mul_nuw( +; CHECK-NEXT: mul nuw i32 %x, 260 +define i32 @reassoc_mul_nuw(i32 %x) { + %mul0 = mul nuw i32 %x, 4 + %mul1 = mul nuw i32 %mul0, 65 + ret i32 %mul1 +} + +; CHECK-LABEL: @no_reassoc_add_nuw_none( +; CHECK-NEXT: add i32 %x, 68 +define i32 @no_reassoc_add_nuw_none(i32 %x) { + %add0 = add i32 %x, 4 + %add1 = add nuw i32 %add0, 64 + ret i32 %add1 +} + +; CHECK-LABEL: @no_reassoc_add_none_nuw( +; CHECK-NEXT: add i32 %x, 68 +define i32 @no_reassoc_add_none_nuw(i32 %x) { + %add0 = add nuw i32 %x, 4 + %add1 = add i32 %add0, 64 + ret i32 %add1 +} + +; CHECK-LABEL: @reassoc_x2_add_nuw( +; CHECK-NEXT: add nuw i32 %x, %y +; CHECK-NEXT: add nuw i32 %add1, 12 +define i32 @reassoc_x2_add_nuw(i32 %x, i32 %y) { + %add0 = add nuw i32 %x, 4 + %add1 = add nuw i32 %y, 8 + %add2 = add nuw i32 %add0, %add1 + ret i32 %add2 +} + +; CHECK-LABEL: @reassoc_x2_mul_nuw( +; CHECK-NEXT: %mul1 = mul i32 %x, %y +; CHECK-NEXT: %mul2 = mul nuw i32 %mul1, 45 +define i32 @reassoc_x2_mul_nuw(i32 %x, i32 %y) { + %mul0 = mul nuw i32 %x, 5 + %mul1 = mul nuw i32 %y, 9 + %mul2 = mul nuw i32 %mul0, %mul1 + ret i32 %mul2 +} + +; CHECK-LABEL: @reassoc_x2_sub_nuw( +; CHECK-NEXT: %sub0 = add i32 %x, -4 +; CHECK-NEXT: %sub1 = add i32 %y, -8 +; CHECK-NEXT: %sub2 = sub nuw i32 %sub0, %sub1 +define i32 @reassoc_x2_sub_nuw(i32 %x, i32 %y) { + %sub0 = sub nuw i32 %x, 4 + %sub1 = sub nuw i32 %y, 8 + %sub2 = sub nuw i32 %sub0, %sub1 + ret i32 %sub2 +} + +; CHECK-LABEL: @tryFactorization_add_nuw_mul_nuw( +; CHECK-NEXT: %add2 = shl nuw i32 %x, 2 +; CHECK-NEXT: ret i32 %add2 +define i32 @tryFactorization_add_nuw_mul_nuw(i32 %x) { + %mul1 = mul nuw i32 %x, 3 + %add2 = add nuw i32 %mul1, %x + ret i32 %add2 +} + +; CHECK-LABEL: @tryFactorization_add_nuw_mul_nuw_int_max( +; CHECK-NEXT: %add2 = shl nuw i32 %x, 31 +; CHECK-NEXT:ret i32 %add2 +define i32 @tryFactorization_add_nuw_mul_nuw_int_max(i32 %x) { + %mul1 = mul nuw i32 %x, 2147483647 + %add2 = add nuw i32 %mul1, %x + ret i32 %add2 +} + +; CHECK-LABEL: @tryFactorization_add_mul_nuw( +; CHECK-NEXT: %add2 = shl i32 %x, 2 +; CHECK-NEXT:ret i32 %add2 +define i32 @tryFactorization_add_mul_nuw(i32 %x) { + %mul1 = mul i32 %x, 3 + %add2 = add nuw i32 %mul1, %x + ret i32 %add2 +} + +; CHECK-LABEL: @tryFactorization_add_nuw_mul( +; CHECK-NEXT: %add2 = shl i32 %x, 2 +; CHECK-NEXT: ret i32 %add2 +define i32 @tryFactorization_add_nuw_mul(i32 %x) { + %mul1 = mul nuw i32 %x, 3 + %add2 = add i32 %mul1, %x + ret i32 %add2 +} Index: test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll =================================================================== --- test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll +++ test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll @@ -172,7 +172,7 @@ ; PROLOG: loop_exiting_bb1.7: ; PROLOG-NEXT: switch i64 %sum.next.6, label %loop_latch.7 ; PROLOG: loop_latch.7: -; PROLOG-NEXT: %iv_next.7 = add nsw i64 %iv, 8 +; PROLOG-NEXT: %iv_next.7 = add nuw nsw i64 %iv, 8 ; PROLOG-NEXT: %sum.next.7 = add i64 %sum.next.6, %add ; PROLOG-NEXT: %cmp.7 = icmp eq i64 %iv_next.7, %trip ; PROLOG-NEXT: br i1 %cmp.7, label %exit2.loopexit.unr-lcssa, label %loop_header @@ -426,7 +426,7 @@ ; PROLOG-NEXT: %result = phi i64 [ %result.ph, %exit1.loopexit ], [ %ivy.prol, %exit1.loopexit1 ] ; PROLOG-NEXT: ret i64 %result ; PROLOG: loop_latch.7: -; PROLOG: %iv_next.7 = add nsw i64 %iv, 8 +; PROLOG: %iv_next.7 = add nuw nsw i64 %iv, 8 entry: br label %loop_header @@ -560,7 +560,7 @@ } ; Nested loop and inner loop is unrolled -; FIXME: we cannot unroll with epilog remainder currently, because +; FIXME: we cannot unroll with epilog remainder currently, because ; the outer loop does not contain the epilog preheader and epilog exit (while ; infact it should). This causes us to choke up on LCSSA form being incorrect in ; outer loop. However, the exit block where LCSSA fails, is infact still within @@ -578,7 +578,7 @@ ; PROLOG: %lcmp.mod = icmp eq i64 ; PROLOG-NEXT: br i1 %lcmp.mod, label %innerH.prol.loopexit, label %innerH.prol.preheader ; PROLOG: latch.6: -; PROLOG-NEXT: %tmp4.7 = add nsw i64 %tmp3, 8 +; PROLOG-NEXT: %tmp4.7 = add nuw nsw i64 %tmp3, 8 ; PROLOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.7 ; PROLOG: latch.7 ; PROLOG-NEXT: %tmp6.7 = icmp ult i64 %tmp4.7, 100