Index: lib/Transforms/Utils/LoopUnrollRuntime.cpp =================================================================== --- lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -599,6 +599,11 @@ return false; } + if (!isSafeToExpand(BECountSC, *SE)) { + DEBUG(dbgs() << "BECountSC " << *BECountSC << " is unsafe to expand.\n"); + return false; + } + unsigned BEWidth = cast(BECountSC->getType())->getBitWidth(); // Add 1 since the backedge count doesn't include the first loop iteration. @@ -609,6 +614,11 @@ return false; } + if (!isSafeToExpand(TripCountSC, *SE)) { + DEBUG(dbgs() << "TripCount " << *TripCountSC << " is unsafe to expand.\n"); + return false; + } + BasicBlock *PreHeader = L->getLoopPreheader(); BranchInst *PreHeaderBR = cast(PreHeader->getTerminator()); const DataLayout &DL = Header->getModule()->getDataLayout(); Index: test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll =================================================================== --- test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll +++ test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll @@ -24,19 +24,85 @@ ret i32 0 } +;; Unrolling is prohibited because we may end up expanding expression +;; /u %conv7 which is not known to be non-zero. +;; TODO: We might want to exploit the fact that we divide by %conv7 in +;; the bb1, and if it is zero, the rest is undefined behavior. Currently, +;; SCEV analysis is unable to figure out this fact. + +define i32 @test2(i64* %loc, i64 %conv7) { + +; CHECK-LABEL: @test2( +; CHECK: entry: +; CHECK-NEXT: %rem0 = load i64, i64* %loc, align 8 +; CHECK-NEXT: %ExpensiveComputation = udiv i64 %rem0, 42 +; CHECK-NEXT: br label %bb1 +; CHECK: bb1: +; CHECK-NEXT: %div11 = udiv i64 %ExpensiveComputation, %conv7 +; CHECK-NEXT: %cmp.i38 = icmp ugt i64 %div11, 1 +; CHECK-NEXT: %div12 = select i1 %cmp.i38, i64 %div11, i64 1 +; CHECK-NEXT: br label %for.body +; CHECK: for.body: +; CHECK-NEXT: %rem1 = phi i64 [ %rem0, %bb1 ], [ %rem2, %for.body ] +; CHECK-NEXT: %k1 = phi i64 [ %div12, %bb1 ], [ %dec, %for.body ] +; CHECK-NEXT: %mul1 = mul i64 %rem1, 48271 +; CHECK-NEXT: %rem2 = urem i64 %mul1, 2147483647 +; CHECK-NEXT: %dec = add i64 %k1, -1 +; CHECK-NEXT: %cmp = icmp eq i64 %dec, 0 +; CHECK-NEXT: br i1 %cmp, label %exit, label %for.body +; CHECK: exit: +; CHECK-NEXT: %rem3 = phi i64 [ %rem2, %for.body ] +; CHECK-NEXT: store i64 %rem3, i64* %loc, align 8 +; CHECK-NEXT: ret i32 0 + +entry: + %rem0 = load i64, i64* %loc, align 8 + %ExpensiveComputation = udiv i64 %rem0, 42 ; <<< Extra computations are added to the trip-count expression + br label %bb1 +bb1: + %div11 = udiv i64 %ExpensiveComputation, %conv7 + %cmp.i38 = icmp ugt i64 %div11, 1 + %div12 = select i1 %cmp.i38, i64 %div11, i64 1 + br label %for.body +for.body: + %rem1 = phi i64 [ %rem0, %bb1 ], [ %rem2, %for.body ] + %k1 = phi i64 [ %div12, %bb1 ], [ %dec, %for.body ] + %mul1 = mul i64 %rem1, 48271 + %rem2 = urem i64 %mul1, 2147483647 + %dec = add i64 %k1, -1 + %cmp = icmp eq i64 %dec, 0 + br i1 %cmp, label %exit, label %for.body +exit: + %rem3 = phi i64 [ %rem2, %for.body ] + store i64 %rem3, i64* %loc, align 8 + ret i32 0 +} + +;; The same as test_02, but here we know that %conv7 is non-zero. +;; Once we know that, we can unroll safely. ;; Though SCEV for loop tripcount contains division, ;; it shouldn't be considered expensive, since the division already ;; exists in the code and we don't need to expand it once more. ;; Thus, it shouldn't prevent us from unrolling the loop. -define i32 @test2(i64* %loc, i64 %conv7) { -; CHECK-LABEL: @test2( -; CHECK: udiv -; CHECK: udiv -; CHECK-NOT: udiv -; CHECK-LABEL: for.body +define i32 @test3(i64* %loc) { + +; CHECK-LABEL: @test3( +; CHECK: udiv +; CHECK: udiv +; CHECK-NOT: udiv +; CHECK: for.body +; CHECK: rem2.1 +; CHECK: rem2.2 +; CHECK: rem2.3 +; CHECK: rem2.4 +; CHECK: rem2.5 +; CHECK: rem2.6 +; CHECK: rem2.7 + entry: %rem0 = load i64, i64* %loc, align 8 + %conv7 = load i64, i64* %loc, !range !0 %ExpensiveComputation = udiv i64 %rem0, 42 ; <<< Extra computations are added to the trip-count expression br label %bb1 bb1: