Index: lib/Transforms/Scalar/Reassociate.cpp =================================================================== --- lib/Transforms/Scalar/Reassociate.cpp +++ lib/Transforms/Scalar/Reassociate.cpp @@ -63,8 +63,14 @@ /// static void PrintOps(Instruction *I, const SmallVectorImpl &Ops) { Module *M = I->getParent()->getParent()->getParent(); - dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " " - << *Ops[0].Op->getType() << '\t'; + dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " "; + if (OverflowingBinaryOperator *BO = dyn_cast(I)) { + if (BO->hasNoUnsignedWrap()) + dbgs() << "nuw "; + if (BO->hasNoSignedWrap()) + dbgs() << "nsw "; + } + dbgs() << *Ops[0].Op->getType() << '\t'; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { dbgs() << "[ "; Ops[i].Op->printAsOperand(dbgs(), false, M); @@ -177,7 +183,8 @@ void BuildRankMap(Function &F); unsigned getRank(Value *V); void ReassociateExpression(BinaryOperator *I); - void RewriteExprTree(BinaryOperator *I, SmallVectorImpl &Ops); + void RewriteExprTree(BinaryOperator *I, SmallVectorImpl &Ops, + unsigned NoWrapFlags); Value *OptimizeExpression(BinaryOperator *I, SmallVectorImpl &Ops); Value *OptimizeAdd(Instruction *I, SmallVectorImpl &Ops); @@ -545,7 +552,8 @@ /// type and thus make the expression bigger. static bool LinearizeExprTree(BinaryOperator *I, - SmallVectorImpl &Ops) { + SmallVectorImpl &Ops, + unsigned &NoWrapFlags) { DEBUG(dbgs() << "LINEARIZE: " << *I << '\n'); unsigned Bitwidth = I->getType()->getScalarType()->getPrimitiveSizeInBits(); unsigned Opcode = I->getOpcode(); @@ -566,6 +574,11 @@ Worklist.push_back(std::make_pair(I, APInt(Bitwidth, 1))); bool MadeChange = false; + // Keep track of the NSW/NUW flags found in the expression. If any one of + // the operators doesn't have a flag, don't use it in the entire expression. + NoWrapFlags = (OverflowingBinaryOperator::NoUnsignedWrap | + OverflowingBinaryOperator::NoSignedWrap); + // Leaves of the expression are values that either aren't the right kind of // operation (eg: a constant, or a multiply in an add tree), or are, but have // some uses that are not inside the expression. For example, in I = X + X, @@ -592,6 +605,12 @@ std::pair P = Worklist.pop_back_val(); I = P.first; // We examine the operands of this binary operator. + // If operator I can't or doesn't have one of the NSW/NUW flags, remove it. + if (!isa(I) || !I->hasNoSignedWrap()) + NoWrapFlags &= ~OverflowingBinaryOperator::NoSignedWrap; + if (!isa(I) || !I->hasNoUnsignedWrap()) + NoWrapFlags &= ~OverflowingBinaryOperator::NoUnsignedWrap; + for (unsigned OpIdx = 0; OpIdx < 2; ++OpIdx) { // Visit operands. Value *Op = I->getOperand(OpIdx); APInt Weight = P.second; // Number of paths to this operand. @@ -721,7 +740,8 @@ // RewriteExprTree - Now that the operands for this expression tree are // linearized and optimized, emit them in-order. void Reassociate::RewriteExprTree(BinaryOperator *I, - SmallVectorImpl &Ops) { + SmallVectorImpl &Ops, + unsigned NoWrapFlags) { assert(Ops.size() > 1 && "Single values should be used directly!"); // Since our optimizations should never increase the number of operations, the @@ -874,8 +894,15 @@ FastMathFlags Flags = I->getFastMathFlags(); ExpressionChanged->clearSubclassOptionalData(); ExpressionChanged->setFastMathFlags(Flags); - } else + } else { ExpressionChanged->clearSubclassOptionalData(); + if (isa(ExpressionChanged)) { + ExpressionChanged->setHasNoUnsignedWrap( + NoWrapFlags & OverflowingBinaryOperator::NoUnsignedWrap); + ExpressionChanged->setHasNoSignedWrap( + NoWrapFlags & OverflowingBinaryOperator::NoSignedWrap); + } + } if (ExpressionChanged == I) break; @@ -1079,7 +1106,8 @@ return nullptr; SmallVector Tree; - MadeChange |= LinearizeExprTree(BO, Tree); + unsigned NoWrapFlags = 0; + MadeChange |= LinearizeExprTree(BO, Tree, NoWrapFlags); SmallVector Factors; Factors.reserve(Tree.size()); for (unsigned i = 0, e = Tree.size(); i != e; ++i) { @@ -1121,7 +1149,7 @@ if (!FoundFactor) { // Make sure to restore the operands to the expression tree. - RewriteExprTree(BO, Factors); + RewriteExprTree(BO, Factors, NoWrapFlags); return nullptr; } @@ -1133,7 +1161,7 @@ RedoInsts.insert(BO); V = Factors[0].Op; } else { - RewriteExprTree(BO, Factors); + RewriteExprTree(BO, Factors, NoWrapFlags); V = BO; } @@ -2102,7 +2130,8 @@ // First, walk the expression tree, linearizing the tree, collecting the // operand information. SmallVector Tree; - MadeChange |= LinearizeExprTree(I, Tree); + unsigned NoWrapFlags = 0; + MadeChange |= LinearizeExprTree(I, Tree, NoWrapFlags); SmallVector Ops; Ops.reserve(Tree.size()); for (unsigned i = 0, e = Tree.size(); i != e; ++i) { @@ -2177,7 +2206,7 @@ // Now that we ordered and optimized the expressions, splat them back into // the expression tree, removing any unneeded nodes. - RewriteExprTree(I, Ops); + RewriteExprTree(I, Ops, NoWrapFlags); } bool Reassociate::runOnFunction(Function &F) { Index: test/Transforms/Reassociate/no-op.ll =================================================================== --- test/Transforms/Reassociate/no-op.ll +++ test/Transforms/Reassociate/no-op.ll @@ -24,14 +24,14 @@ } define void @test2(i32 %a, i32 %b, i32 %c, i32 %d) { -; The initial add doesn't change so should not lose the nsw flag. +; The initial add doesn't change. ; CHECK-LABEL: @test2( %a0 = add nsw i32 %b, %a ; CHECK-NEXT: %a0 = add nsw i32 %b, %a %a1 = add nsw i32 %a0, %d -; CHECK-NEXT: %a1 = add i32 %a0, %c +; CHECK-NEXT: %a1 = add nsw i32 %a0, %c %a2 = add nsw i32 %a1, %c -; CHECK-NEXT: %a2 = add i32 %a1, %d +; CHECK-NEXT: %a2 = add nsw i32 %a1, %d call void @use(i32 %a2) ; CHECK-NEXT: call void @use ret void Index: test/Transforms/Reassociate/nsw-nuw.ll =================================================================== --- /dev/null +++ test/Transforms/Reassociate/nsw-nuw.ll @@ -0,0 +1,99 @@ +; RUN: opt %s -reassociate -S | FileCheck %s + +;; Check that we keep overflow flags on binary operators whenever possible. + +define i32 @test_all_nsw(i32 %reg109, i32 %reg1111) { + %reg115 = add nsw i32 %reg109, -30 + %reg116 = add nsw i32 %reg115, %reg1111 + %reg117 = add nsw i32 %reg116, 30 + ret i32 %reg117 + +; CHECK-LABEL: @test_all_nsw +; CHECK-NEXT: %reg117 = add nsw i32 %reg1111, %reg109 +; CHECK-NEXT: ret i32 %reg117 +} + +define i32 @test_all_nuw(i32 %reg109, i32 %reg1111) { + %reg115 = add nuw i32 %reg109, -30 + %reg116 = add nuw i32 %reg115, %reg1111 + %reg117 = add nuw i32 %reg116, 30 + ret i32 %reg117 + +; CHECK-LABEL: @test_all_nuw +; CHECK-NEXT: %reg117 = add nuw i32 %reg1111, %reg109 +; CHECK-NEXT: ret i32 %reg117 +} + +define i32 @test_all_both(i32 %reg109, i32 %reg1111) { + %reg115 = add nuw nsw i32 %reg109, -30 + %reg116 = add nuw nsw i32 %reg115, %reg1111 + %reg117 = add nuw nsw i32 %reg116, 30 + ret i32 %reg117 + +; CHECK-LABEL: @test_all_both +; CHECK-NEXT: %reg117 = add nuw nsw i32 %reg1111, %reg109 +; CHECK-NEXT: ret i32 %reg117 +} + +;; The rest of the tests check that the flags are dropped. +;; We only keep flags when reassociating an expression consisting of a single +;; operation, into one consisting of the same operation. + +define i32 @test_allbutone_nsw(i32 %reg109, i32 %reg1111) { + %reg115 = add i32 %reg109, -30 + %reg116 = add nsw i32 %reg115, %reg1111 + %reg117 = add nsw i32 %reg116, 30 + ret i32 %reg117 + +; CHECK-LABEL: @test_allbutone_nsw +; CHECK-NEXT: %reg117 = add i32 %reg1111, %reg109 +; CHECK-NEXT: ret i32 %reg117 +} + +define i32 @test_allbutone_nsw_2(i32 %reg109, i32 %reg1111) { + %reg115 = add nsw i32 %reg109, -30 + %reg116 = add nsw i32 %reg115, %reg1111 + %reg117 = add i32 %reg116, 30 + ret i32 %reg117 + +; CHECK-LABEL: @test_allbutone_nsw +; CHECK-NEXT: %reg117 = add i32 %reg1111, %reg109 +; CHECK-NEXT: ret i32 %reg117 +} + +define i32 @test_morph_adds(i32 %X) { + %Y = add nuw i32 %X ,%X + %Z = add nuw i32 %Y, %X + ret i32 %Z +; CHECK-LABEL: @test_morph_adds +; CHECK-NEXT: mul i32 %X, 3 +; CHECK-NEXT: ret i32 +} + +define i32 @test_mixed_flags(i32 %reg109, i32 %reg1111) { + %reg115 = add nuw i32 %reg109, -30 + %reg116 = add nsw i32 %reg115, %reg1111 + %reg117 = add nuw i32 %reg116, 30 + ret i32 %reg117 + +; CHECK-LABEL: @test_mixed_flags +; CHECK-NEXT: %reg117 = add i32 %reg1111, %reg109 +; CHECK-NEXT: ret i32 %reg117 +} + +; This should be one add and two multiplies. +define i32 @test_mixed_ops(i32 %A, i32 %B, i32 %C) { + ; A*A*B + A*C*A + %aa = mul nsw i32 %A, %A + %aab = mul nsw i32 %aa, %B + %ac = mul nsw i32 %A, %C + %aac = mul nsw i32 %ac, %A + %r = add nsw i32 %aab, %aac + ret i32 %r + +; CHECK-LABEL: @test_mixed_ops +; CHECK-NEXT: add i32 %C, %B +; CHECK-NEXT: mul i32 +; CHECK-NEXT: mul i32 +; CHECK-NEXT: ret i32 +}