Index: llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp +++ llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -628,13 +628,29 @@ // Okay, this instruction has a user outside of the current loop // and varies predictably *inside* the loop. Evaluate the value it - // contains when the loop exits, if possible. + // contains when the loop exits, if possible. We prefer to start with + // expressions which are true for all exits (so as to maximize + // expression reuse by the SCEVExpander), but resort to per-exit + // evaluation if that fails. const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); if (isa(ExitValue) || !SE->isLoopInvariant(ExitValue, L) || - !isSafeToExpand(ExitValue, *SE)) - continue; - + !isSafeToExpand(ExitValue, *SE)) { + // TODO: This should probably be sunk into SCEV in some way; maybe a + // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for + // most SCEV expressions and other recurrence types (e.g. shift + // recurrences). Is there existing code we can reuse? + const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i)); + if (isa(ExitCount)) + continue; + if (auto *AddRec = dyn_cast(SE->getSCEV(Inst))) + ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE); + if (isa(ExitValue) || + !SE->isLoopInvariant(ExitValue, L) || + !isSafeToExpand(ExitValue, *SE)) + continue; + } + // Computing the value outside of the loop brings no benefit if it is // definitely used inside the loop in a way which can not be optimized // away. Avoid doing so unless we know we have a value which computes Index: llvm/trunk/test/Transforms/IndVarSimplify/rlev-add-me.ll =================================================================== --- llvm/trunk/test/Transforms/IndVarSimplify/rlev-add-me.ll +++ llvm/trunk/test/Transforms/IndVarSimplify/rlev-add-me.ll @@ -0,0 +1,167 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -indvars < %s | FileCheck %s +target datalayout = "n8:16:32:64" +@G = external global i32 + +; Basic case where we know the value of an induction variable along one +; exit edge, but not another. +define i32 @test(i32 %n) { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], 1 +; CHECK-NEXT: br label [[HEADER:%.*]] +; CHECK: header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[V:%.*]] = load volatile i32, i32* @G +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[V]], 0 +; CHECK-NEXT: br i1 [[CMP1]], label [[LATCH]], label [[EXIT1:%.*]] +; CHECK: latch: +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[TMP0]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[HEADER]], label [[EXIT2:%.*]] +; CHECK: exit1: +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], [[HEADER]] ] +; CHECK-NEXT: ret i32 [[IV_LCSSA]] +; CHECK: exit2: +; CHECK-NEXT: ret i32 [[N]] +; +entry: + br label %header +header: + %iv = phi i32 [0, %entry], [%iv.next, %latch] + %v = load volatile i32, i32* @G + %cmp1 = icmp eq i32 %v, 0 + br i1 %cmp1, label %latch, label %exit1 + +latch: + %iv.next = add i32 %iv, 1 + %cmp2 = icmp ult i32 %iv, %n + br i1 %cmp2, label %header, label %exit2 +exit1: + ret i32 %iv +exit2: + ret i32 %iv +} + +define i32 @test2(i32 %n) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], 1 +; CHECK-NEXT: br label [[HEADER:%.*]] +; CHECK: header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[V:%.*]] = load volatile i32, i32* @G +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[V]], 0 +; CHECK-NEXT: br i1 [[CMP1]], label [[LATCH]], label [[EXIT1:%.*]] +; CHECK: latch: +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[TMP0]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[HEADER]], label [[EXIT2:%.*]] +; CHECK: exit1: +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], [[HEADER]] ] +; CHECK-NEXT: ret i32 [[IV_LCSSA]] +; CHECK: exit2: +; CHECK-NEXT: ret i32 [[TMP0]] +; +entry: + br label %header +header: + %iv = phi i32 [0, %entry], [%iv.next, %latch] + %v = load volatile i32, i32* @G + %cmp1 = icmp eq i32 %v, 0 + br i1 %cmp1, label %latch, label %exit1 + +latch: + %iv.next = add i32 %iv, 1 + %cmp2 = icmp ult i32 %iv, %n + br i1 %cmp2, label %header, label %exit2 +exit1: + ret i32 %iv +exit2: + ret i32 %iv.next +} + +; TODO: Generalize the code to handle other SCEV expressions +define i32 @test3(i32 %n) { +; CHECK-LABEL: @test3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], 1 +; CHECK-NEXT: br label [[HEADER:%.*]] +; CHECK: header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[EXPR:%.*]] = udiv i32 [[IV]], 5 +; CHECK-NEXT: [[V:%.*]] = load volatile i32, i32* @G +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[V]], 0 +; CHECK-NEXT: br i1 [[CMP1]], label [[LATCH]], label [[EXIT1:%.*]] +; CHECK: latch: +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[TMP0]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[HEADER]], label [[EXIT2:%.*]] +; CHECK: exit1: +; CHECK-NEXT: [[EXPR_LCSSA:%.*]] = phi i32 [ [[EXPR]], [[HEADER]] ] +; CHECK-NEXT: ret i32 [[EXPR_LCSSA]] +; CHECK: exit2: +; CHECK-NEXT: [[EXPR_LCSSA1:%.*]] = phi i32 [ [[EXPR]], [[LATCH]] ] +; CHECK-NEXT: ret i32 [[EXPR_LCSSA1]] +; +entry: + br label %header +header: + %iv = phi i32 [0, %entry], [%iv.next, %latch] + %expr = udiv i32 %iv, 5 + %v = load volatile i32, i32* @G + %cmp1 = icmp eq i32 %v, 0 + br i1 %cmp1, label %latch, label %exit1 + +latch: + %iv.next = add i32 %iv, 1 + %cmp2 = icmp ult i32 %iv, %n + br i1 %cmp2, label %header, label %exit2 +exit1: + ret i32 %expr +exit2: + ret i32 %expr +} + + +; A slightly more real example where we're searching for either a) the first +; non-zero element, or b) the end of a memory region. +define i32 @bounded_find(i32 %n) { +; CHECK-LABEL: @bounded_find( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], 1 +; CHECK-NEXT: br label [[HEADER:%.*]] +; CHECK: header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* @G, i32 [[IV]] +; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[ADDR]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[V]], 0 +; CHECK-NEXT: br i1 [[CMP1]], label [[LATCH]], label [[EXIT1:%.*]] +; CHECK: latch: +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[TMP0]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[HEADER]], label [[EXIT2:%.*]] +; CHECK: exit1: +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], [[HEADER]] ] +; CHECK-NEXT: ret i32 [[IV_LCSSA]] +; CHECK: exit2: +; CHECK-NEXT: ret i32 [[N]] +; +entry: + br label %header +header: + %iv = phi i32 [0, %entry], [%iv.next, %latch] + %addr = getelementptr i32, i32* @G, i32 %iv + %v = load i32, i32* %addr + %cmp1 = icmp eq i32 %v, 0 + br i1 %cmp1, label %latch, label %exit1 + +latch: + %iv.next = add i32 %iv, 1 + %cmp2 = icmp ult i32 %iv, %n + br i1 %cmp2, label %header, label %exit2 +exit1: + ret i32 %iv +exit2: + ret i32 %iv +} Index: llvm/trunk/test/Transforms/LoopUnroll/scevunroll.ll =================================================================== --- llvm/trunk/test/Transforms/LoopUnroll/scevunroll.ll +++ llvm/trunk/test/Transforms/LoopUnroll/scevunroll.ll @@ -184,7 +184,7 @@ ; CHECK: for.body: ; CHECK: %b.03 = phi i32 [ 0, %entry ], [ %add, %for.cond ] ; CHECK: return: -; CHECK: %b.03.lcssa = phi i32 [ %b.03, %for.body ], [ 0, %for.cond ] +; CHECK: %b.03.lcssa = phi i32 [ 8, %for.body ], [ 0, %for.cond ] define void @nsw_latch(i32* %a) nounwind { entry: br label %for.body