Index: llvm/trunk/lib/Analysis/ScalarEvolution.cpp =================================================================== --- llvm/trunk/lib/Analysis/ScalarEvolution.cpp +++ llvm/trunk/lib/Analysis/ScalarEvolution.cpp @@ -8122,27 +8122,35 @@ // count. If so, we may be able to force computation of the exit // value. const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI); - if (const SCEVConstant *BTCC = - dyn_cast(BackedgeTakenCount)) { - - // This trivial case can show up in some degenerate cases where - // the incoming IR has not yet been fully simplified. - if (BTCC->getValue()->isZero()) { - Value *InitValue = nullptr; - bool MultipleInitValues = false; - for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) { - if (!LI->contains(PN->getIncomingBlock(i))) { - if (!InitValue) - InitValue = PN->getIncomingValue(i); - else if (InitValue != PN->getIncomingValue(i)) { - MultipleInitValues = true; - break; - } + // This trivial case can show up in some degenerate cases where + // the incoming IR has not yet been fully simplified. + if (BackedgeTakenCount->isZero()) { + Value *InitValue = nullptr; + bool MultipleInitValues = false; + for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) { + if (!LI->contains(PN->getIncomingBlock(i))) { + if (!InitValue) + InitValue = PN->getIncomingValue(i); + else if (InitValue != PN->getIncomingValue(i)) { + MultipleInitValues = true; + break; } } - if (!MultipleInitValues && InitValue) - return getSCEV(InitValue); } + if (!MultipleInitValues && InitValue) + return getSCEV(InitValue); + } + // Do we have a loop invariant value flowing around the backedge + // for a loop which must execute the backedge? + if (!isa(BackedgeTakenCount) && + isKnownPositive(BackedgeTakenCount) && + PN->getNumIncomingValues() == 2) { + unsigned InLoopPred = LI->contains(PN->getIncomingBlock(0)) ? 0 : 1; + const SCEV *OnBackedge = getSCEV(PN->getIncomingValue(InLoopPred)); + if (IsAvailableOnEntry(LI, DT, OnBackedge, PN->getParent())) + return OnBackedge; + } + if (auto *BTCC = dyn_cast(BackedgeTakenCount)) { // Okay, we know how many times the containing loop executes. If // this is a constant evolving PHI node, get the final value at // the specified iteration number. Index: llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp +++ llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -635,9 +635,12 @@ // Computing the value outside of the loop brings no benefit if it is // definitely used inside the loop in a way which can not be optimized - // away. + // away. Avoid doing so unless we know we have a value which computes + // the ExitValue already. TODO: This should be merged into SCEV + // expander to leverage its knowledge of existing expressions. if (ReplaceExitValue != AlwaysRepl && - !isa(ExitValue) && hasHardUserWithinLoop(L, Inst)) + !isa(ExitValue) && !isa(ExitValue) && + hasHardUserWithinLoop(L, Inst)) continue; bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst); Index: llvm/trunk/test/Transforms/IndVarSimplify/exit_value_tests.ll =================================================================== --- llvm/trunk/test/Transforms/IndVarSimplify/exit_value_tests.ll +++ llvm/trunk/test/Transforms/IndVarSimplify/exit_value_tests.ll @@ -153,10 +153,34 @@ define i32 @unroll_phi_select_constant_nonzero(i32 %arg1, i32 %arg2) { ; CHECK-LABEL: @unroll_phi_select_constant_nonzero( ; CHECK-NEXT: entry: +; CHECK-NEXT: ret i32 [[ARG2:%.*]] +; +entry: + br label %loop + +loop: + %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] + %selector = phi i32 [%arg1, %entry], [%arg2, %loop] + %i.next = add nsw nuw i32 %i, 1 + %c = icmp ult i32 %i, 4 + br i1 %c, label %loop, label %loopexit + +loopexit: + ret i32 %selector +} + +declare i32 @f() + +; After LCSSA formation, there's no LCSSA phi for %f since it isn't directly +; used outside the loop, and thus we can't directly replace %selector w/ %f. +define i32 @neg_unroll_phi_select_constant_nonzero(i32 %arg) { +; CHECK-LABEL: @neg_unroll_phi_select_constant_nonzero( +; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[SELECTOR:%.*]] = phi i32 [ [[ARG1:%.*]], [[ENTRY]] ], [ [[ARG2:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[SELECTOR:%.*]] = phi i32 [ [[ARG:%.*]], [[ENTRY]] ], [ [[F:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[F]] = call i32 @f() ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1 ; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[I]], 4 ; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[LOOPEXIT:%.*]] @@ -169,7 +193,8 @@ loop: %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] - %selector = phi i32 [%arg1, %entry], [%arg2, %loop] + %selector = phi i32 [%arg, %entry], [%f, %loop] + %f = call i32 @f() %i.next = add nsw nuw i32 %i, 1 %c = icmp ult i32 %i, 4 br i1 %c, label %loop, label %loopexit @@ -178,6 +203,7 @@ ret i32 %selector } + define i32 @unroll_phi_select_constant_zero(i32 %arg1, i32 %arg2) { ; CHECK-LABEL: @unroll_phi_select_constant_zero( ; CHECK-NEXT: entry: @@ -201,16 +227,7 @@ ; CHECK-LABEL: @unroll_phi_select( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LENGTH:%.*]] = zext i16 [[LEN:%.*]] to i32 -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ -1, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[SELECTOR:%.*]] = phi i32 [ [[ARG1:%.*]], [[ENTRY]] ], [ [[ARG2:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[I_NEXT]] = add nsw i32 [[I]], 1 -; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[I]], [[LENGTH]] -; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[LOOPEXIT:%.*]] -; CHECK: loopexit: -; CHECK-NEXT: [[SELECTOR_LCSSA:%.*]] = phi i32 [ [[SELECTOR]], [[LOOP]] ] -; CHECK-NEXT: ret i32 [[SELECTOR_LCSSA]] +; CHECK-NEXT: ret i32 [[ARG2:%.*]] ; entry: %length = zext i16 %len to i32 Index: llvm/trunk/test/Transforms/IndVarSimplify/pr39673.ll =================================================================== --- llvm/trunk/test/Transforms/IndVarSimplify/pr39673.ll +++ llvm/trunk/test/Transforms/IndVarSimplify/pr39673.ll @@ -58,15 +58,13 @@ ; CHECK-NEXT: br label [[LOOP1:%.*]] ; CHECK: loop1: ; CHECK-NEXT: [[L1:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[L1_ADD:%.*]], [[LOOP1]] ] -; CHECK-NEXT: [[SELECTOR:%.*]] = phi i16 [ [[ARG1:%.*]], [[ENTRY]] ], [ [[ARG2:%.*]], [[LOOP1]] ] ; CHECK-NEXT: [[L1_ADD]] = add nuw nsw i16 [[L1]], 1 ; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i16 [[L1_ADD]], 2 ; CHECK-NEXT: br i1 [[CMP1]], label [[LOOP1]], label [[LOOP2_PREHEADER:%.*]] ; CHECK: loop2.preheader: -; CHECK-NEXT: [[K1_ADD_LCSSA:%.*]] = phi i16 [ [[SELECTOR]], [[LOOP1]] ] ; CHECK-NEXT: br label [[LOOP2:%.*]] ; CHECK: loop2: -; CHECK-NEXT: [[K2:%.*]] = phi i16 [ [[K2_ADD:%.*]], [[LOOP2]] ], [ [[K1_ADD_LCSSA]], [[LOOP2_PREHEADER]] ] +; CHECK-NEXT: [[K2:%.*]] = phi i16 [ [[K2_ADD:%.*]], [[LOOP2]] ], [ [[ARG2:%.*]], [[LOOP2_PREHEADER]] ] ; CHECK-NEXT: [[L2:%.*]] = phi i16 [ [[L2_ADD:%.*]], [[LOOP2]] ], [ 0, [[LOOP2_PREHEADER]] ] ; CHECK-NEXT: [[L2_ADD]] = add nuw nsw i16 [[L2]], 1 ; CHECK-NEXT: tail call void @foo(i16 [[K2]])