Index: include/llvm/Analysis/ScalarEvolution.h =================================================================== --- include/llvm/Analysis/ScalarEvolution.h +++ include/llvm/Analysis/ScalarEvolution.h @@ -1533,6 +1533,12 @@ /// specified loop. bool isLoopInvariant(const SCEV *S, const Loop *L); + /// Determine if the SCEV can be evaluated at loop's entry. It is true if it + /// doesn't depend on a SCEVUnknown of an instruction which is dominated by + /// the header of loop L. + bool isAvailableAtLoopEntry(const SCEV *S, const Loop *L, DominatorTree &DT, + LoopInfo &LI); + /// Return true if the given SCEV changes value in a known way in the /// specified loop. This property being true implies that the value is /// variant in the loop AND that we can emit an expression to compute the Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -2178,6 +2178,63 @@ return Flags; } +bool ScalarEvolution::isAvailableAtLoopEntry(const SCEV *S, const Loop *L, + DominatorTree &DT, LoopInfo &LI) { + if (!isLoopInvariant(S, L)) + return false; + // If a value depends on a SCEVUnknown which is defined after the loop, we + // conservatively assume that we cannot calculate it at the loop's entry. + struct FindDominatedSCEVUnknown { + bool Found = false; + const Loop *L; + DominatorTree &DT; + LoopInfo &LI; + + FindDominatedSCEVUnknown(const Loop *L, DominatorTree &DT, LoopInfo &LI) + : L(L), DT(DT), LI(LI) {} + + bool checkSCEVUnknown(const SCEVUnknown *SU) { + if (auto *I = dyn_cast(SU->getValue())) { + if (DT.dominates(L->getHeader(), I->getParent())) + Found = true; + else + assert(DT.dominates(I->getParent(), L->getHeader()) && + "No dominance relationship between SCEV and loop?"); + } + return false; + } + + bool follow(const SCEV *S) { + switch (static_cast(S->getSCEVType())) { + case scConstant: + return false; + case scAddRecExpr: + case scTruncate: + case scZeroExtend: + case scSignExtend: + case scAddExpr: + case scMulExpr: + case scUMaxExpr: + case scSMaxExpr: + case scUDivExpr: + return true; + case scUnknown: + return checkSCEVUnknown(cast(S)); + case scCouldNotCompute: + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); + } + return false; + } + + bool isDone() { return Found; } + }; + + FindDominatedSCEVUnknown FSU(L, DT, LI); + SCEVTraversal ST(FSU); + ST.visitAll(S); + return !FSU.Found; +} + /// Get a canonical add expression, or something simpler if possible. const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, SCEV::NoWrapFlags Flags, @@ -2459,7 +2516,7 @@ const SCEVAddRecExpr *AddRec = cast(Ops[Idx]); const Loop *AddRecLoop = AddRec->getLoop(); for (unsigned i = 0, e = Ops.size(); i != e; ++i) - if (isLoopInvariant(Ops[i], AddRecLoop)) { + if (isAvailableAtLoopEntry(Ops[i], AddRecLoop, DT, LI)) { LIOps.push_back(Ops[i]); Ops.erase(Ops.begin()+i); --i; --e; @@ -2734,7 +2791,7 @@ const SCEVAddRecExpr *AddRec = cast(Ops[Idx]); const Loop *AddRecLoop = AddRec->getLoop(); for (unsigned i = 0, e = Ops.size(); i != e; ++i) - if (isLoopInvariant(Ops[i], AddRecLoop)) { + if (isAvailableAtLoopEntry(Ops[i], AddRecLoop, DT, LI)) { LIOps.push_back(Ops[i]); Ops.erase(Ops.begin()+i); --i; --e; Index: test/Analysis/IVUsers/quadradic-exit-value.ll =================================================================== --- test/Analysis/IVUsers/quadradic-exit-value.ll +++ test/Analysis/IVUsers/quadradic-exit-value.ll @@ -30,13 +30,47 @@ ret i64 %r } +; PR15470: LSR miscompile. The test1 function should return '1'. +; It is valid to fold SCEVUnknown into the recurrence because it +; was defined before the loop. +; +; SCEV does not know how to denormalize chained recurrences, so make +; sure they aren't marked as post-inc users. +; +; CHECK-LABEL: IV Users for loop %test1.loop +; CHECK-NO-LCSSA: %sext.us = {0,+,(16777216 + (-16777216 * %sub.us)),+,33554432}<%test1.loop> (post-inc with loop %test1.loop) in %f = ashr i32 %sext.us, 24 +define i32 @test1(i1 %cond) { +entry: + %sub.us = select i1 %cond, i32 0, i32 0 + br label %test1.loop + +test1.loop: + %inc1115.us = phi i32 [ 0, %entry ], [ %inc11.us, %test1.loop ] + %inc11.us = add nsw i32 %inc1115.us, 1 + %cmp.us = icmp slt i32 %inc11.us, 2 + br i1 %cmp.us, label %test1.loop, label %for.end + +for.end: + %tobool.us = icmp eq i32 %inc1115.us, 0 + %mul.us = shl i32 %inc1115.us, 24 + %sub.cond.us = sub nsw i32 %inc1115.us, %sub.us + %sext.us = mul i32 %mul.us, %sub.cond.us + %f = ashr i32 %sext.us, 24 + br label %exit + +exit: + ret i32 %f +} + ; PR15470: LSR miscompile. The test2 function should return '1'. +; It is illegal to fold SCEVUnknown (sext.us) into the recurrence +; because it is defined after the loop where this recurrence belongs. ; ; SCEV does not know how to denormalize chained recurrences, so make ; sure they aren't marked as post-inc users. ; ; CHECK-LABEL: IV Users for loop %test2.loop -; CHECK-NO-LCSSA: %sext.us = {0,+,(16777216 + (-16777216 * %sub.us)),+,33554432}<%test2.loop> (post-inc with loop %test2.loop) in %f = ashr i32 %sext.us, 24 +; CHECK-NO-LCSSA: %sub.cond.us = ((-1 * %sub.us) + {0,+,1}<%test2.loop>) (post-inc with loop %test2.loop) in %sext.us = mul i32 %mul.us, %sub.cond.us define i32 @test2() { entry: br label %test2.loop Index: test/Analysis/ScalarEvolution/different-loops-recs.ll =================================================================== --- test/Analysis/ScalarEvolution/different-loops-recs.ll +++ test/Analysis/ScalarEvolution/different-loops-recs.ll @@ -220,7 +220,8 @@ ; Mix of previous use cases that demonstrates %s3 can be incorrectly treated as ; a recurrence of loop1 because of operands order if we pick recurrencies in an -; incorrect order. +; incorrect order. It also shows that we cannot safely fold v1 (SCEVUnknown) +; because we cannot prove for sure that it doesn't use Phis of loop 2. define void @test_03(i32 %a, i32 %b, i32 %c, i32* %p) { @@ -228,9 +229,9 @@ ; CHECK: %v1 = load i32, i32* %p ; CHECK-NEXT: --> %v1 ; CHECK: %s1 = add i32 %phi1, %v1 -; CHECK-NEXT: --> {(%a + %v1),+,1}<%loop1> +; CHECK-NEXT: --> ({%a,+,1}<%loop1> + %v1) ; CHECK: %s2 = add i32 %s1, %b -; CHECK-NEXT: --> {(%a + %b + %v1),+,1}<%loop1> +; CHECK-NEXT: --> ({(%a + %b),+,1}<%loop1> + %v1) ; CHECK: %s3 = add i32 %s2, %phi2 ; CHECK-NEXT: --> ({{{{}}((2 * %a) + %b),+,1}<%loop1>,+,2}<%loop2> + %v1) @@ -452,3 +453,60 @@ %s6 = add i32 %phi3, %phi2 ret void } + +; Make sure that a complicated Phi does not get folded with rec's start value +; of a loop which is above. +define void @test_08() { + +; CHECK-LABEL: Classifying expressions for: @test_08 +; CHECK: %tmp11 = add i64 %iv.2.2, %iv.2.1 +; CHECK-NEXT: --> ({0,+,-1}<%loop_2> + %iv.2.1) +; CHECK: %tmp12 = trunc i64 %tmp11 to i32 +; CHECK-NEXT: --> (trunc i64 ({0,+,-1}<%loop_2> + %iv.2.1) to i32) +; CHECK: %tmp14 = mul i32 %tmp12, %tmp7 +; CHECK-NEXT: --> ((trunc i64 ({0,+,-1}<%loop_2> + %iv.2.1) to i32) * {-1,+,-1}<%loop_1>) +; CHECK: %tmp16 = mul i64 %iv.2.1, %iv.1.1 +; CHECK-NEXT: --> ({2,+,1}<%loop_1> * %iv.2.1) + +entry: + br label %loop_1 + +loop_1: + %iv.1.1 = phi i64 [ 2, %entry ], [ %iv.1.1.next, %loop_1_back_branch ] + %iv.1.2 = phi i32 [ -1, %entry ], [ %iv.1.2.next, %loop_1_back_branch ] + br label %loop_1_exit + +dead: + br label %loop_1_exit + +loop_1_exit: + %tmp5 = icmp sgt i64 %iv.1.1, 2 + br i1 %tmp5, label %loop_2_preheader, label %loop_1_back_branch + +loop_1_back_branch: + %iv.1.1.next = add nuw nsw i64 %iv.1.1, 1 + %iv.1.2.next = add nsw i32 %iv.1.2, 1 + br label %loop_1 + +loop_2_preheader: + %tmp6 = sub i64 1, %iv.1.1 + %tmp7 = trunc i64 %tmp6 to i32 + br label %loop_2 + +loop_2: + %iv.2.1 = phi i64 [ 0, %loop_2_preheader ], [ %tmp16, %loop_2 ] + %iv.2.2 = phi i64 [ 0, %loop_2_preheader ], [ %iv.2.2.next, %loop_2 ] + %iv.2.3 = phi i64 [ 2, %loop_2_preheader ], [ %iv.2.3.next, %loop_2 ] + %tmp11 = add i64 %iv.2.2, %iv.2.1 + %tmp12 = trunc i64 %tmp11 to i32 + %tmp14 = mul i32 %tmp12, %tmp7 + %tmp16 = mul i64 %iv.2.1, %iv.1.1 + %iv.2.3.next = add nuw nsw i64 %iv.2.3, 1 + %iv.2.2.next = add nsw i64 %iv.2.2, -1 + %tmp17 = icmp slt i64 %iv.2.3.next, %iv.1.1 + br i1 %tmp17, label %loop_2, label %exit + +exit: + %tmp10 = add i32 %iv.1.2, 3 + ret void +} Index: test/Transforms/LoopStrengthReduce/X86/incorrect-offset-scaling.ll =================================================================== --- test/Transforms/LoopStrengthReduce/X86/incorrect-offset-scaling.ll +++ test/Transforms/LoopStrengthReduce/X86/incorrect-offset-scaling.ll @@ -25,7 +25,7 @@ if6: ; preds = %idxend.8 %r2 = add i64 %0, -1 %r3 = load i64, i64* %1, align 8 -; CHECK-NOT: %r2 +; CHECK: %r2 = add i64 %0, -1 ; CHECK: %r3 = load i64 br label %ib @@ -36,13 +36,11 @@ %r4 = mul i64 %r3, %r0 %r5 = add i64 %r2, %r4 %r6 = icmp ult i64 %r5, undef -; CHECK: [[MUL1:%[0-9]+]] = mul i64 %lsr.iv, %r3 -; CHECK: [[ADD1:%[0-9]+]] = add i64 [[MUL1]], -1 -; CHECK: add i64 %{{.}}, [[ADD1]] -; CHECK: %r6 +; CHECK: %r4 = mul i64 %r3, %lsr.iv +; CHECK: %r5 = add i64 %r2, %r4 +; CHECK: %r6 = icmp ult i64 %r5, undef +; CHECK: %r7 = getelementptr i64, i64* undef, i64 %r5 %r7 = getelementptr i64, i64* undef, i64 %r5 store i64 1, i64* %r7, align 8 -; CHECK: [[MUL2:%[0-9]+]] = mul i64 %lsr.iv, %r3 -; CHECK: [[ADD2:%[0-9]+]] = add i64 [[MUL2]], -1 br label %L } Index: test/Transforms/LoopStrengthReduce/X86/lsr-expand-quadratic.ll =================================================================== --- /dev/null +++ test/Transforms/LoopStrengthReduce/X86/lsr-expand-quadratic.ll @@ -0,0 +1,58 @@ +; REQUIRES: x86 +; RUN: opt -loop-reduce -S < %s | FileCheck %s + +; Strength reduction analysis here relies on IV Users analysis, that +; only finds users among instructions with types that are treated as +; legal by the data layout. When running this test on pure non-x86 +; configs (for example, ARM 64), it gets confused with the target +; triple and uses a default data layout instead. This default layout +; does not have any legal types (even i32), so the transformation +; does not happen. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx" + +; PR15470: LSR miscompile. The test2 function should return '1'. +; +; SCEV expander cannot expand quadratic recurrences outside of the +; loop. This recurrence depends on %sub.us, so can't be expanded. +; We cannot fold SCEVUnknown (sub.us) with recurrences since it is +; declared after the loop. +; +; CHECK-LABEL: @test2 +; CHECK-LABEL: test2.loop: +; CHECK: %lsr.iv1 = phi i32 [ %lsr.iv.next2, %test2.loop ], [ -16777216, %entry ] +; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ -1, %entry ] +; CHECK: %lsr.iv.next = add nsw i32 %lsr.iv, 1 +; CHECK: %lsr.iv.next2 = add nsw i32 %lsr.iv1, 16777216 +; +; CHECK-LABEL: for.end: +; CHECK: %tobool.us = icmp eq i32 %lsr.iv.next2, 0 +; CHECK: %sub.us = select i1 %tobool.us, i32 0, i32 0 +; CHECK: %1 = sub i32 0, %sub.us +; CHECK: %2 = add i32 %1, %lsr.iv.next +; CHECK: %sext.us = mul i32 %lsr.iv.next2, %2 +; CHECK: %f = ashr i32 %sext.us, 24 +; CHECK: ret i32 %f +define i32 @test2() { +entry: + br label %test2.loop + +test2.loop: + %inc1115.us = phi i32 [ 0, %entry ], [ %inc11.us, %test2.loop ] + %inc11.us = add nsw i32 %inc1115.us, 1 + %cmp.us = icmp slt i32 %inc11.us, 2 + br i1 %cmp.us, label %test2.loop, label %for.end + +for.end: + %tobool.us = icmp eq i32 %inc1115.us, 0 + %sub.us = select i1 %tobool.us, i32 0, i32 0 + %mul.us = shl i32 %inc1115.us, 24 + %sub.cond.us = sub nsw i32 %inc1115.us, %sub.us + %sext.us = mul i32 %mul.us, %sub.cond.us + %f = ashr i32 %sext.us, 24 + br label %exit + +exit: + ret i32 %f +} Index: test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll =================================================================== --- test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll +++ /dev/null @@ -1,42 +0,0 @@ -; RUN: opt -loop-reduce -S < %s | FileCheck %s - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx" - -; PR15470: LSR miscompile. The test2 function should return '1'. -; -; SCEV expander cannot expand quadratic recurrences outside of the -; loop. This recurrence depends on %sub.us, so can't be expanded. -; -; CHECK-LABEL: @test2 -; CHECK-LABEL: test2.loop: -; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ -16777216, %entry ] -; CHECK: %lsr.iv.next = add nsw i32 %lsr.iv, 16777216 -; -; CHECK-LABEL: for.end: -; CHECK: %sub.cond.us = sub nsw i32 %inc1115.us, %sub.us -; CHECK: %sext.us = mul i32 %lsr.iv.next, %sub.cond.us -; CHECK: %f = ashr i32 %sext.us, 24 -; CHECK: ret i32 %f -define i32 @test2() { -entry: - br label %test2.loop - -test2.loop: - %inc1115.us = phi i32 [ 0, %entry ], [ %inc11.us, %test2.loop ] - %inc11.us = add nsw i32 %inc1115.us, 1 - %cmp.us = icmp slt i32 %inc11.us, 2 - br i1 %cmp.us, label %test2.loop, label %for.end - -for.end: - %tobool.us = icmp eq i32 %inc1115.us, 0 - %sub.us = select i1 %tobool.us, i32 0, i32 0 - %mul.us = shl i32 %inc1115.us, 24 - %sub.cond.us = sub nsw i32 %inc1115.us, %sub.us - %sext.us = mul i32 %mul.us, %sub.cond.us - %f = ashr i32 %sext.us, 24 - br label %exit - -exit: - ret i32 %f -} Index: test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll =================================================================== --- test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll +++ test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll @@ -25,6 +25,8 @@ entry: %buffer = alloca [33 x i16], align 16 %add.ptr = getelementptr inbounds [33 x i16], [33 x i16]* %buffer, i64 0, i64 33 + %sub.ptr.lhs.cast = ptrtoint i16* %add.ptr to i64 + %sub.ptr.rhs.cast = ptrtoint i16* %add.ptr to i64 br label %do.body do.body: ; preds = %do.body, %entry @@ -46,8 +48,6 @@ do.end: ; preds = %do.body %xap.0 = inttoptr i64 %0 to i1* %cap.0 = ptrtoint i1* %xap.0 to i64 - %sub.ptr.lhs.cast = ptrtoint i16* %add.ptr to i64 - %sub.ptr.rhs.cast = ptrtoint i16* %incdec.ptr to i64 %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast %sub.ptr.div39 = lshr exact i64 %sub.ptr.sub, 1 %conv11 = trunc i64 %sub.ptr.div39 to i32