Index: lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- lib/Transforms/InstCombine/InstructionCombining.cpp +++ lib/Transforms/InstCombine/InstructionCombining.cpp @@ -204,6 +204,14 @@ return !Overflow; } +static bool MaintainNoUnsignedWrap(BinaryOperator &I) { + OverflowingBinaryOperator *OBO = dyn_cast(&I); + if (!OBO || !OBO->hasNoUnsignedWrap()) + return false; + + return true; +} + /// Conservatively clears subclassOptionalData after a reassociation or /// commutation. We preserve fast-math flags when applicable as they can be /// preserved. @@ -310,14 +318,20 @@ I.setOperand(1, V); // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. - if (MaintainNoSignedWrap(I, B, C) && + + bool IsNUW = MaintainNoUnsignedWrap(I); + bool IsNSW = MaintainNoSignedWrap(I, B, C); + + ClearSubclassDataAfterReassociation(I); + + if (IsNUW && MaintainNoUnsignedWrap(*Op0)) + I.setHasNoUnsignedWrap(true); + + if (IsNSW && (!Op0 || (isa(Op0) && Op0->hasNoSignedWrap()))) { // Note: this is only valid because SimplifyBinOp doesn't look at // the operands to Op0. - I.clearSubclassOptionalData(); I.setHasNoSignedWrap(true); - } else { - ClearSubclassDataAfterReassociation(I); } Changed = true; @@ -406,8 +420,15 @@ Value *B = Op1->getOperand(0); Constant *C2 = cast(Op1->getOperand(1)); + bool IsNUW = MaintainNoUnsignedWrap(I) && + MaintainNoUnsignedWrap(*Op0) && + MaintainNoUnsignedWrap(*Op1); + + BinaryOperator *New = (IsNUW && Opcode == Instruction::Add) ? + BinaryOperator::CreateNUW(Opcode, A, B) : + BinaryOperator::Create(Opcode, A, B); + Constant *Folded = ConstantExpr::get(Opcode, C1, C2); - BinaryOperator *New = BinaryOperator::Create(Opcode, A, B); if (isa(New)) { FastMathFlags Flags = I.getFastMathFlags(); Flags &= Op0->getFastMathFlags(); @@ -421,6 +442,8 @@ // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. ClearSubclassDataAfterReassociation(I); + if (IsNUW) + I.setHasNoUnsignedWrap(true); Changed = true; continue; @@ -607,14 +630,21 @@ if (BinaryOperator *BO = dyn_cast(SimplifiedInst)) { if (isa(SimplifiedInst)) { bool HasNSW = false; - if (isa(&I)) + bool HasNUW = false; + if (isa(&I)) { HasNSW = I.hasNoSignedWrap(); + HasNUW = I.hasNoUnsignedWrap(); + } - if (auto *LOBO = dyn_cast(LHS)) + if (auto *LOBO = dyn_cast(LHS)) { HasNSW &= LOBO->hasNoSignedWrap(); + HasNUW &= LOBO->hasNoUnsignedWrap(); + } - if (auto *ROBO = dyn_cast(RHS)) + if (auto *ROBO = dyn_cast(RHS)) { HasNSW &= ROBO->hasNoSignedWrap(); + HasNUW &= ROBO->hasNoUnsignedWrap(); + } // We can propagate 'nsw' if we know that // %Y = mul nsw i16 %X, C Index: test/Transforms/InstCombine/reassociate-nuw.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/reassociate-nuw.ll @@ -0,0 +1,75 @@ +; RUN: opt -instcombine -S %s | FileCheck %s + +; CHECK-LABEL: @reassoc_add_nuw( +; CHECK-NEXT: add nuw i32 %x, 68 +define i32 @reassoc_add_nuw(i32 %x) { + %add0 = add nuw i32 %x, 4 + %add1 = add nuw i32 %add0, 64 + ret i32 %add1 +} + +; This does the wrong thing because the sub is turned into an add of a +; negative constant first which drops the nuw. + +; CHECK-LABEL: @reassoc_sub_nuw( +; CHECK-NEXT: add i32 %x, -68 +define i32 @reassoc_sub_nuw(i32 %x) { + %sub0 = sub nuw i32 %x, 4 + %sub1 = sub nuw i32 %sub0, 64 + ret i32 %sub1 +} + +; CHECK-LABEL: @reassoc_mul_nuw( +; CHECK-NEXT: mul nuw i32 %x, 260 +define i32 @reassoc_mul_nuw(i32 %x) { + %mul0 = mul nuw i32 %x, 4 + %mul1 = mul nuw i32 %mul0, 65 + ret i32 %mul1 +} + +; CHECK-LABEL: @no_reassoc_add_nuw_none( +; CHECK-NEXT: add i32 %x, 68 +define i32 @no_reassoc_add_nuw_none(i32 %x) { + %add0 = add i32 %x, 4 + %add1 = add nuw i32 %add0, 64 + ret i32 %add1 +} + +; CHECK-LABEL: @no_reassoc_add_none_nuw( +; CHECK-NEXT: add i32 %x, 68 +define i32 @no_reassoc_add_none_nuw(i32 %x) { + %add0 = add nuw i32 %x, 4 + %add1 = add i32 %add0, 64 + ret i32 %add1 +} + +; CHECK-LABEL: @reassoc_x2_add_nuw( +; CHECK-NEXT: add nuw i32 %x, %y +; CHECK-NEXT: add nuw i32 %add1, 12 +define i32 @reassoc_x2_add_nuw(i32 %x, i32 %y) { + %add0 = add nuw i32 %x, 4 + %add1 = add nuw i32 %y, 8 + %add2 = add nuw i32 %add0, %add1 + ret i32 %add2 +} + +; CHECK-LABEL: @reassoc_x2_mul_nuw( +; CHECK-NEXT: %mul1 = mul i32 %x, %y +; CHECK-NEXT: %mul2 = mul nuw i32 %mul1, 45 +define i32 @reassoc_x2_mul_nuw(i32 %x, i32 %y) { + %mul0 = mul nuw i32 %x, 5 + %mul1 = mul nuw i32 %y, 9 + %mul2 = mul nuw i32 %mul0, %mul1 + ret i32 %mul2 +} + +; CHECK-LABEL: @reassoc_x2_sub_nuw( +; CHECK-NEXT: %sub0 = add i32 %x, -4 +; CHECK-NEXT: %sub1 = add i32 %y, -8 +; CHECK-NEXT: %sub2 = sub nuw i32 %sub0, %sub1 +define i32 @reassoc_x2_sub_nuw(i32 %x, i32 %y) { + %sub0 = sub nuw i32 %x, 4 + %sub1 = sub nuw i32 %y, 8 + %sub2 = sub nuw i32 %sub0, %sub1 + ret i32 %sub2 +} Index: test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll =================================================================== --- test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll +++ test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll @@ -172,7 +172,7 @@ ; PROLOG: loop_exiting_bb1.7: ; PROLOG-NEXT: switch i64 %sum.next.6, label %loop_latch.7 ; PROLOG: loop_latch.7: -; PROLOG-NEXT: %iv_next.7 = add nsw i64 %iv, 8 +; PROLOG-NEXT: %iv_next.7 = add nuw nsw i64 %iv, 8 ; PROLOG-NEXT: %sum.next.7 = add i64 %sum.next.6, %add ; PROLOG-NEXT: %cmp.7 = icmp eq i64 %iv_next.7, %trip ; PROLOG-NEXT: br i1 %cmp.7, label %exit2.loopexit.unr-lcssa, label %loop_header @@ -304,7 +304,7 @@ ; PROLOG-NEXT: %result = phi i64 [ %result.ph, %exit1.loopexit ], [ %ivy.prol, %exit1.loopexit1 ] ; PROLOG-NEXT: ret i64 %result ; PROLOG: loop_latch.7: -; PROLOG: %iv_next.7 = add nsw i64 %iv, 8 +; PROLOG: %iv_next.7 = add nuw nsw i64 %iv, 8 entry: br label %loop_header @@ -438,7 +438,7 @@ } ; Nested loop and inner loop is unrolled -; FIXME: we cannot unroll with epilog remainder currently, because +; FIXME: we cannot unroll with epilog remainder currently, because ; the outer loop does not contain the epilog preheader and epilog exit (while ; infact it should). This causes us to choke up on LCSSA form being incorrect in ; outer loop. However, the exit block where LCSSA fails, is infact still within @@ -456,7 +456,7 @@ ; PROLOG: %lcmp.mod = icmp eq i64 ; PROLOG-NEXT: br i1 %lcmp.mod, label %innerH.prol.loopexit, label %innerH.prol.preheader ; PROLOG: latch.6: -; PROLOG-NEXT: %tmp4.7 = add nsw i64 %tmp3, 8 +; PROLOG-NEXT: %tmp4.7 = add nuw nsw i64 %tmp3, 8 ; PROLOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.7 ; PROLOG: latch.7 ; PROLOG-NEXT: %tmp6.7 = icmp ult i64 %tmp4.7, 100