Index: llvm/trunk/include/llvm/Analysis/ScalarEvolution.h =================================================================== --- llvm/trunk/include/llvm/Analysis/ScalarEvolution.h +++ llvm/trunk/include/llvm/Analysis/ScalarEvolution.h @@ -1099,6 +1099,9 @@ /// Mark predicate values currently being processed by isImpliedCond. SmallPtrSet PendingLoopPredicates; + /// Mark SCEVUnknown Phis currently being processed by getRangeRef. + SmallPtrSet PendingPhiRanges; + /// Set to true by isLoopBackedgeGuardedByCond when we're walking the set of /// conditions dominating the backedge of a loop. bool WalkingBEDominatingConds = false; Index: llvm/trunk/lib/Analysis/ScalarEvolution.cpp =================================================================== --- llvm/trunk/lib/Analysis/ScalarEvolution.cpp +++ llvm/trunk/lib/Analysis/ScalarEvolution.cpp @@ -5582,6 +5582,25 @@ APInt::getSignedMaxValue(BitWidth).ashr(NS - 1) + 1)); } + // A range of Phi is a subset of union of all ranges of its input. + if (const PHINode *Phi = dyn_cast(U->getValue())) { + // Make sure that we do not run over cycled Phis. + if (PendingPhiRanges.insert(Phi).second) { + ConstantRange RangeFromOps(BitWidth, /*isFullSet=*/false); + for (auto &Op : Phi->operands()) { + auto OpRange = getRangeRef(getSCEV(Op), SignHint); + RangeFromOps = RangeFromOps.unionWith(OpRange); + // No point to continue if we already have a full set. + if (RangeFromOps.isFullSet()) + break; + } + ConservativeResult = ConservativeResult.intersectWith(RangeFromOps); + bool Erased = PendingPhiRanges.erase(Phi); + assert(Erased && "Failed to erase Phi properly?"); + (void) Erased; + } + } + return setRange(U, SignHint, std::move(ConservativeResult)); } @@ -10888,6 +10907,7 @@ LI(Arg.LI), CouldNotCompute(std::move(Arg.CouldNotCompute)), ValueExprMap(std::move(Arg.ValueExprMap)), PendingLoopPredicates(std::move(Arg.PendingLoopPredicates)), + PendingPhiRanges(std::move(Arg.PendingPhiRanges)), MinTrailingZerosCache(std::move(Arg.MinTrailingZerosCache)), BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)), PredicatedBackedgeTakenCounts( @@ -10931,6 +10951,7 @@ BTCI.second.clear(); assert(PendingLoopPredicates.empty() && "isImpliedCond garbage"); + assert(PendingPhiRanges.empty() && "getRangeRef garbage"); assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!"); assert(!ProvingSplitPredicate && "ProvingSplitPredicate garbage!"); } Index: llvm/trunk/test/Analysis/ScalarEvolution/unknown_phis.ll =================================================================== --- llvm/trunk/test/Analysis/ScalarEvolution/unknown_phis.ll +++ llvm/trunk/test/Analysis/ScalarEvolution/unknown_phis.ll @@ -0,0 +1,54 @@ +; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s + +define void @merge_values_with_ranges(i32 *%a_len_ptr, i32 *%b_len_ptr, i1 %unknown_cond) { + +; CHECK-LABEL: Classifying expressions for: @merge_values_with_ranges +; CHECK: %len = phi i32 [ %len_a, %if.true ], [ %len_b, %if.false ] +; CHECK-NEXT: --> %len U: [0,2147483647) S: [0,2147483647) + + entry: + br i1 %unknown_cond, label %if.true, label %if.false + +if.true: + %len_a = load i32, i32* %a_len_ptr, !range !0 + br label %merge + +if.false: + %len_b = load i32, i32* %b_len_ptr, !range !0 + br label %merge + +merge: + %len = phi i32 [ %len_a, %if.true ], [ %len_b, %if.false ] + ret void +} + +define void @merge_values_with_ranges_looped(i32 *%a_len_ptr, i32 *%b_len_ptr) { + +; TODO: We could be much smarter here. So far we just make sure that we do not +; go into infinite loop analyzing these Phis. + +; CHECK-LABEL: Classifying expressions for: @merge_values_with_ranges_looped +; CHECK: %p1 = phi i32 [ %len_a, %entry ], [ %p2, %loop ] +; CHECK-NEXT: --> %p1 U: full-set S: full-set +; CHECK: %p2 = phi i32 [ %len_b, %entry ], [ %p1, %loop ] +; CHECK-NEXT: --> %p2 U: full-set S: full-set + + entry: + %len_a = load i32, i32* %a_len_ptr, !range !0 + %len_b = load i32, i32* %b_len_ptr, !range !0 + br label %loop + +loop: + %p1 = phi i32 [ %len_a, %entry ], [ %p2, %loop ] + %p2 = phi i32 [ %len_b, %entry ], [ %p1, %loop ] + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %iv.next = add i32 %iv, 1 + %loop.cond = icmp slt i32 %iv.next, 100 + br i1 %loop.cond, label %loop, label %exit + +exit: + ret void +} + + +!0 = !{i32 0, i32 2147483647} Index: llvm/trunk/test/Transforms/IRCE/single-access-no-preloop.ll =================================================================== --- llvm/trunk/test/Transforms/IRCE/single-access-no-preloop.ll +++ llvm/trunk/test/Transforms/IRCE/single-access-no-preloop.ll @@ -179,5 +179,71 @@ ; CHECK-NOT: br i1 false ; CHECK-NOT: preloop +define void @single_access_no_preloop_no_offset_phi_len(i32 *%arr, i32 *%a_len_ptr, i32 *%b_len_ptr, i32 %n, i1 %unknown_cond) { + entry: + br i1 %unknown_cond, label %if.true, label %if.false + +if.true: + %len_a = load i32, i32* %a_len_ptr, !range !0 + br label %merge + +if.false: + %len_b = load i32, i32* %b_len_ptr, !range !0 + br label %merge + +merge: + %len = phi i32 [ %len_a, %if.true ], [ %len_b, %if.false ] + %first.itr.check = icmp sgt i32 %n, 0 + br i1 %first.itr.check, label %loop, label %exit + + loop: + %idx = phi i32 [ 0, %merge ] , [ %idx.next, %in.bounds ] + %idx.next = add i32 %idx, 1 + %abc = icmp slt i32 %idx, %len + br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1 + + in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp slt i32 %idx.next, %n + br i1 %next, label %loop, label %exit + + out.of.bounds: + ret void + + exit: + ret void +} + +; CHECK-LABEL: @single_access_no_preloop_no_offset_phi_len( + +; CHECK: loop: +; CHECK: br i1 true, label %in.bounds, label %out.of.bounds + +; CHECK: main.exit.selector: +; CHECK-NEXT: %idx.next.lcssa = phi i32 [ %idx.next, %in.bounds ] +; CHECK-NEXT: [[continue:%[^ ]+]] = icmp slt i32 %idx.next.lcssa, %n +; CHECK-NEXT: br i1 [[continue]], label %main.pseudo.exit, label %exit.loopexit + +; CHECK: main.pseudo.exit: +; CHECK-NEXT: %idx.copy = phi i32 [ 0, %loop.preheader ], [ %idx.next.lcssa, %main.exit.selector ] +; CHECK-NEXT: %indvar.end = phi i32 [ 0, %loop.preheader ], [ %idx.next.lcssa, %main.exit.selector ] +; CHECK-NEXT: br label %postloop + +; CHECK: postloop: +; CHECK-NEXT: br label %loop.postloop + +; CHECK: loop.postloop: +; CHECK-NEXT: %idx.postloop = phi i32 [ %idx.next.postloop, %in.bounds.postloop ], [ %idx.copy, %postloop ] +; CHECK-NEXT: %idx.next.postloop = add i32 %idx.postloop, 1 +; CHECK-NEXT: %abc.postloop = icmp slt i32 %idx.postloop, %len +; CHECK-NEXT: br i1 %abc.postloop, label %in.bounds.postloop, label %out.of.bounds + +; CHECK: in.bounds.postloop: +; CHECK-NEXT: %addr.postloop = getelementptr i32, i32* %arr, i32 %idx.postloop +; CHECK-NEXT: store i32 0, i32* %addr.postloop +; CHECK-NEXT: %next.postloop = icmp slt i32 %idx.next.postloop, %n +; CHECK-NEXT: br i1 %next.postloop, label %loop.postloop, label %exit.loopexit + !0 = !{i32 0, i32 2147483647} !1 = !{!"branch_weights", i32 64, i32 4} Index: llvm/trunk/test/Transforms/LoopVectorize/X86/consecutive-ptr-cg-bug.ll =================================================================== --- llvm/trunk/test/Transforms/LoopVectorize/X86/consecutive-ptr-cg-bug.ll +++ llvm/trunk/test/Transforms/LoopVectorize/X86/consecutive-ptr-cg-bug.ll @@ -29,11 +29,13 @@ ; Verify that store is vectorized as stride-1 memory access. -; CHECK: vector.body: -; CHECK: store <4 x i32> +; CHECK-LABEL: @test_01( +; CHECK-NOT: vector.body: +; This test was originally vectorized, but now SCEV is smart enough to prove +; that its trip count is 1, so it gets ignored by vectorizer. ; Function Attrs: uwtable -define void @test() { +define void @test_01() { br label %.outer ;