Index: lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- lib/Transforms/Scalar/IndVarSimplify.cpp +++ lib/Transforms/Scalar/IndVarSimplify.cpp @@ -144,6 +144,7 @@ bool canLoopBeDeleted(Loop *L, SmallVector &RewritePhiSet); bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); bool rewriteFirstIterationLoopExitValues(Loop *L); + bool rotateLoopExitTests(Loop *L); bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const; bool linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, @@ -701,6 +702,57 @@ return Changed; } +bool IndVarSimplify::rotateLoopExitTests(Loop *L) { + bool Changed = false; + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + for (BasicBlock *ExitingBB : ExitingBlocks) { + auto *BI = dyn_cast(ExitingBB->getTerminator()); + if (!BI || !BI->isConditional()) + continue; + + auto *ICmp = dyn_cast(BI->getCondition()); + if (!ICmp || !L->contains(ICmp->getParent()) || + !ICmp->hasOneUse()) + continue; + + // If not canonical form, then ignore + auto *LHS = dyn_cast(ICmp->getOperand(0)); + auto *RHS = ICmp->getOperand(1); + if (!LHS || !isa(LHS) || !LHS->hasOneUse() || + !L->isLoopInvariant(RHS)) + continue; + + // We're looking to remove the shl (multiply) on the LHS, by right shifting + // (dividing) each both sides such that the shl cancels. To do so, we need + // to a) ensure there's no overflow in either computation, and b) ensure we + // use a correctly signed shift (divide). We don't to worry about + // out-of-bounds or zero shift amounts. Zero shifts are noops (to both + // sides), and out-of-bounds shifts simply move poison from one cmp operand + // to the other. + + // TODO: generalize for other binary operators (e.g. add, sub, div) + Value *ShOp0; + Value *ShOp1; + using namespace PatternMatch; + if ((ICmp->isSigned() && + match(LHS, m_NSWShl(m_Value(ShOp0), m_Value(ShOp1)))) || + (!ICmp->isSigned() && + match(LHS, m_NUWShl(m_Value(ShOp0), m_Value(ShOp1))))) + if (L->isLoopInvariant(ShOp1)) { + auto NewOp = ICmp->isSigned() ? Instruction::AShr : Instruction::LShr; + auto *NewRHS = BinaryOperator::Create(NewOp, RHS, LHS->getOperand(1), + "", ICmp); + ICmp->setOperand(0, ShOp0); + ICmp->setOperand(1, NewRHS); + LHS->eraseFromParent(); + Changed = true; + } + } + + return Changed; +} + //===---------------------------------------------------------------------===// // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know // they will exit at the first iteration. @@ -2671,6 +2723,10 @@ // rewriteLoopExitValues. Changed |= rewriteFirstIterationLoopExitValues(L); + // Try to rotate loop tests so as to make more of the computation feeding the + // test loop invariant. + Changed |= rotateLoopExitTests(L); + // Clean up dead instructions. Changed |= DeleteDeadPHIs(L->getHeader(), TLI); Index: test/Transforms/IndVarSimplify/rotate-exit-test.ll =================================================================== --- test/Transforms/IndVarSimplify/rotate-exit-test.ll +++ test/Transforms/IndVarSimplify/rotate-exit-test.ll @@ -0,0 +1,244 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -indvars -S < %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + +@Sink = external global i32 + +;; TODO: Should be able to infer overflow fact here +define i32 @test_signed() { +; CHECK-LABEL: @test_signed( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: store volatile i32 [[IV]], i32* @Sink +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], 751 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret i32 750 +; +entry: + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %loop] + %iv.next = add i32 %iv, 1 + store volatile i32 %iv, i32* @Sink + %shl = shl i32 %iv, 2 + %cmp1 = icmp slt i32 %shl, 2999 + br i1 %cmp1, label %loop, label %exit + +exit: + ret i32 %iv +} + +;; TODO: Should be able to infer overflow fact here +define i32 @test_unsigned() { +; CHECK-LABEL: @test_unsigned( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: store volatile i32 [[IV]], i32* @Sink +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], 751 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret i32 750 +; +entry: + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %loop] + %iv.next = add i32 %iv, 1 + store volatile i32 %iv, i32* @Sink + %shl = shl i32 %iv, 2 + %cmp1 = icmp ult i32 %shl, 2999 + br i1 %cmp1, label %loop, label %exit + +exit: + ret i32 %iv +} + +;; TODO: Should be able to infer overflow fact here +define i32 @test_ne() { +; CHECK-LABEL: @test_ne( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: store volatile i32 [[IV]], i32* @Sink +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], 501 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret i32 500 +; +entry: + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %loop] + %iv.next = add i32 %iv, 1 + store volatile i32 %iv, i32* @Sink + %shl = shl i32 %iv, 1 + %cmp1 = icmp ne i32 %shl, 1000 + br i1 %cmp1, label %loop, label %exit + +exit: + ret i32 %iv +} + +; Negative test, unknown bounds +define i32 @neg_unknown_bounds(i32 %N, i32 %shiftamt) { +; CHECK-LABEL: @neg_unknown_bounds( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: store volatile i32 [[IV]], i32* @Sink +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[IV]], [[SHIFTAMT:%.*]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[SHL]], [[N:%.*]] +; CHECK-NEXT: br i1 [[CMP1]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], [[LOOP]] ] +; CHECK-NEXT: ret i32 [[IV_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %loop] + %iv.next = add i32 %iv, 1 + store volatile i32 %iv, i32* @Sink + %shl = shl i32 %iv, %shiftamt + %cmp1 = icmp slt i32 %shl, %N + br i1 %cmp1, label %loop, label %exit + +exit: + ret i32 %iv +} + +define i32 @test_nsw(i32 %N, i32 %shiftamt) { +; CHECK-LABEL: @test_nsw( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: store volatile i32 [[IV]], i32* @Sink +; CHECK-NEXT: [[TMP0:%.*]] = ashr i32 [[N:%.*]], [[SHIFTAMT:%.*]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[IV]], [[TMP0]] +; CHECK-NEXT: br i1 [[CMP1]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], [[LOOP]] ] +; CHECK-NEXT: ret i32 [[IV_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %loop] + %iv.next = add i32 %iv, 1 + store volatile i32 %iv, i32* @Sink + %shl = shl nsw i32 %iv, %shiftamt + %cmp1 = icmp slt i32 %shl, %N + br i1 %cmp1, label %loop, label %exit + +exit: + ret i32 %iv +} + +define i32 @test_nuw(i32 %N, i32 %shiftamt) { +; CHECK-LABEL: @test_nuw( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: store volatile i32 [[IV]], i32* @Sink +; CHECK-NEXT: [[TMP0:%.*]] = lshr i32 [[N:%.*]], [[SHIFTAMT:%.*]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[IV]], [[TMP0]] +; CHECK-NEXT: br i1 [[CMP1]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], [[LOOP]] ] +; CHECK-NEXT: ret i32 [[IV_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %loop] + %iv.next = add i32 %iv, 1 + store volatile i32 %iv, i32* @Sink + %shl = shl nuw i32 %iv, %shiftamt + %cmp1 = icmp ult i32 %shl, %N + br i1 %cmp1, label %loop, label %exit + +exit: + ret i32 %iv +} + +define i32 @test_ne_nuw(i32 %N, i32 %shiftamt) { +; CHECK-LABEL: @test_ne_nuw( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: store volatile i32 [[IV]], i32* @Sink +; CHECK-NEXT: [[TMP0:%.*]] = lshr i32 [[N:%.*]], [[SHIFTAMT:%.*]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[IV]], [[TMP0]] +; CHECK-NEXT: br i1 [[CMP1]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], [[LOOP]] ] +; CHECK-NEXT: ret i32 [[IV_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %loop] + %iv.next = add i32 %iv, 1 + store volatile i32 %iv, i32* @Sink + %shl = shl nuw i32 %iv, %shiftamt + %cmp1 = icmp ne i32 %shl, %N + br i1 %cmp1, label %loop, label %exit + +exit: + ret i32 %iv +} + +define i32 @neg_ne_mayoverflow(i32 %N, i32 %shiftamt) { +; CHECK-LABEL: @neg_ne_mayoverflow( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: store volatile i32 [[IV]], i32* @Sink +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[IV]], [[SHIFTAMT:%.*]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[SHL]], [[N:%.*]] +; CHECK-NEXT: br i1 [[CMP1]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], [[LOOP]] ] +; CHECK-NEXT: ret i32 [[IV_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %loop] + %iv.next = add i32 %iv, 1 + store volatile i32 %iv, i32* @Sink + %shl = shl i32 %iv, %shiftamt + %cmp1 = icmp ne i32 %shl, %N + br i1 %cmp1, label %loop, label %exit + +exit: + ret i32 %iv +}