Index: include/llvm/Analysis/ScalarEvolution.h =================================================================== --- include/llvm/Analysis/ScalarEvolution.h +++ include/llvm/Analysis/ScalarEvolution.h @@ -1538,6 +1538,10 @@ /// the header of loop L. bool isAvailableAtLoopEntry(const SCEV *S, const Loop *L); + /// Determine if the SCEV depends on a SCEVUnknown which is not invariant + /// w.r.t. the loop. + bool dependsOnVariantSCEVUnknown(const SCEV *S, const Loop *L); + /// Return true if the given SCEV changes value in a known way in the /// specified loop. This property being true implies that the value is /// variant in the loop AND that we can emit an expression to compute the Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -2234,6 +2234,31 @@ return !FSU.Found; } +bool +ScalarEvolution::dependsOnVariantSCEVUnknown(const SCEV *S, const Loop *L) { + struct FindSCEVUnknown { + const Loop *L; + ScalarEvolution *SE; + bool Found = false; + + FindSCEVUnknown(const Loop *L, ScalarEvolution *SE) + : L(L), SE(SE) {} + + bool follow(const SCEV *S) { + if (isa(S) && !SE->isLoopInvariant(S, L)) + Found = true; + return true; + } + + bool isDone() { return Found; } + }; + + FindSCEVUnknown FSU = FindSCEVUnknown(L, this); + SCEVTraversal ST(FSU); + ST.visitAll(S); + return FSU.Found; +} + /// Get a canonical add expression, or something simpler if possible. const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, SCEV::NoWrapFlags Flags, Index: lib/Transforms/Scalar/LoopStrengthReduce.cpp =================================================================== --- lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -1001,14 +1001,16 @@ void RateRegister(const SCEV *Reg, SmallPtrSetImpl &Regs, const Loop *L, - ScalarEvolution &SE, DominatorTree &DT); + ScalarEvolution &SE, DominatorTree &DT, + const LSRUse &LU); void RatePrimaryRegister(const SCEV *Reg, SmallPtrSetImpl &Regs, const Loop *L, ScalarEvolution &SE, DominatorTree &DT, - SmallPtrSetImpl *LoserRegs); + SmallPtrSetImpl *LoserRegs, + const LSRUse &LU); }; - + /// An operand value in an instruction which is to be replaced with some /// equivalent, possibly strength-reduced, replacement. struct LSRFixup { @@ -1097,6 +1099,10 @@ /// the loop, in which case some special-case heuristics may be used. bool AllFixupsOutsideLoop; + /// This records whether all of the fixups using this LSRUse are inside + /// the loop, in which case some special-case heuristics may be used. + bool AllFixupsInsideLoop; + /// RigidFormula is set to true to guarantee that this use will be associated /// with a single formula--the one that initially matched. Some SCEV /// expressions cannot be expanded. This allows LSR to consider the registers @@ -1120,8 +1126,8 @@ LSRUse(KindType K, MemAccessTy AT) : Kind(K), AccessTy(AT), MinOffset(INT64_MAX), MaxOffset(INT64_MIN), - AllFixupsOutsideLoop(true), RigidFormula(false), - WidestFixupType(nullptr) {} + AllFixupsOutsideLoop(true), AllFixupsInsideLoop(true), + RigidFormula(false), WidestFixupType(nullptr) {} LSRFixup &getNewFixup() { Fixups.push_back(LSRFixup()); @@ -1135,7 +1141,7 @@ if (f.Offset < MinOffset) MinOffset = f.Offset; } - + bool HasFormulaWithSameRegs(const Formula &F) const; float getNotSelectedProbability(const SCEV *Reg) const; bool InsertFormula(const Formula &F, const Loop &L); @@ -1152,7 +1158,18 @@ void Cost::RateRegister(const SCEV *Reg, SmallPtrSetImpl &Regs, const Loop *L, - ScalarEvolution &SE, DominatorTree &DT) { + ScalarEvolution &SE, DominatorTree &DT, + const LSRUse &LU) { + // If we have a use of loop-variant SCEVUnknown outside the loop, it is likely + // that the optimization won't give us any benefits here: we will have to + // reuse these values from within the loop and cannot adequately evaluate how + // many other instructions from the loop they use. So in fact it is possible + // that we will not be able to optimize anything from the loop, but will + // create LSR Phis and duplicate some instructions, producing a worse code. + if (!LU.AllFixupsInsideLoop && SE.dependsOnVariantSCEVUnknown(Reg, L)) { + Lose(); + return; + } if (const SCEVAddRecExpr *AR = dyn_cast(Reg)) { // If this is an addrec for another loop, it should be an invariant // with respect to L since L is the innermost loop (at least @@ -1179,7 +1196,7 @@ // TODO: The non-affine case isn't precisely modeled here. if (!AR->isAffine() || !isa(AR->getOperand(1))) { if (!Regs.count(AR->getOperand(1))) { - RateRegister(AR->getOperand(1), Regs, L, SE, DT); + RateRegister(AR->getOperand(1), Regs, L, SE, DT, LU); if (isLoser()) return; } @@ -1207,13 +1224,14 @@ SmallPtrSetImpl &Regs, const Loop *L, ScalarEvolution &SE, DominatorTree &DT, - SmallPtrSetImpl *LoserRegs) { + SmallPtrSetImpl *LoserRegs, + const LSRUse &LU) { if (LoserRegs && LoserRegs->count(Reg)) { Lose(); return; } if (Regs.insert(Reg).second) { - RateRegister(Reg, Regs, L, SE, DT); + RateRegister(Reg, Regs, L, SE, DT, LU); if (LoserRegs && isLoser()) LoserRegs->insert(Reg); } @@ -1237,7 +1255,7 @@ Lose(); return; } - RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs); + RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs, LU); if (isLoser()) return; } @@ -1246,7 +1264,7 @@ Lose(); return; } - RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs); + RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs, LU); if (isLoser()) return; } @@ -1512,6 +1530,9 @@ if (AllFixupsOutsideLoop) OS << ", all-fixups-outside-loop"; + if (AllFixupsInsideLoop) + OS << ", all-fixups-inside-loop"; + if (WidestFixupType) OS << ", widest fixup type: " << *WidestFixupType; } @@ -3132,7 +3153,7 @@ const SCEV *S = IU.getExpr(U); PostIncLoopSet TmpPostIncLoops = U.getPostIncLoops(); - + // Equality (== and !=) ICmps are special. We can rewrite (i == N) as // (N - i == 0), and this allows (N - i) to be the expression that we work // with rather than just N or i, so we can consider the register @@ -3181,7 +3202,9 @@ LF.OperandValToReplace = U.getOperandValToReplace(); LF.PostIncLoops = TmpPostIncLoops; LF.Offset = Offset; - LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L); + bool UseOutside = LF.isUseFullyOutsideLoop(L); + LU.AllFixupsOutsideLoop &= UseOutside; + LU.AllFixupsInsideLoop &= !UseOutside; if (!LU.WidestFixupType || SE.getTypeSizeInBits(LU.WidestFixupType) < @@ -3330,7 +3353,9 @@ LF.UserInst = const_cast(UserInst); LF.OperandValToReplace = U; LF.Offset = Offset; - LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L); + bool UseOutside = LF.isUseFullyOutsideLoop(L); + LU.AllFixupsOutsideLoop &= UseOutside; + LU.AllFixupsInsideLoop &= !UseOutside; if (!LU.WidestFixupType || SE.getTypeSizeInBits(LU.WidestFixupType) < SE.getTypeSizeInBits(LF.OperandValToReplace->getType())) @@ -4249,6 +4274,7 @@ DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); dbgs() << '\n'); LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop; + LUThatHas->AllFixupsInsideLoop &= LU.AllFixupsInsideLoop; // Transfer the fixups of LU to LUThatHas. for (LSRFixup &Fixup : LU.Fixups) { @@ -4256,7 +4282,7 @@ LUThatHas->pushFixup(Fixup); DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n'); } - + // Delete formulae from the new use which are no longer legal. bool Any = false; for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) { Index: test/CodeGen/ARM/arm-and-tst-peephole.ll =================================================================== --- test/CodeGen/ARM/arm-and-tst-peephole.ll +++ test/CodeGen/ARM/arm-and-tst-peephole.ll @@ -28,8 +28,10 @@ ; ARM-NEXT: beq ; THUMB: movs r[[R0:[0-9]+]], #3 -; THUMB-NEXT: ands r[[R0]], r -; THUMB-NEXT: cmp r[[R0]], #0 +; THUMB-NEXT: mvns r[[R1:[0-9]+]], r[[R0]] +; THUMB-NEXT: ldr r[[R1]], +; THUMB-NEXT: ands r[[R1]], r[[R0]] +; THUMB-NEXT: cmp r[[R1]], #0 ; THUMB-NEXT: beq ; T2: ands {{r[0-9]+}}, {{r[0-9]+}}, #3 Index: test/Transforms/LoopStrengthReduce/X86/reject-scev-unknown.ll =================================================================== --- test/Transforms/LoopStrengthReduce/X86/reject-scev-unknown.ll +++ test/Transforms/LoopStrengthReduce/X86/reject-scev-unknown.ll @@ -0,0 +1,58 @@ +; RUN: opt -loop-reduce -S < %s | FileCheck %s + +target datalayout = "e-m:e-i32:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Make sure that LSR does not try to get something out of this loop, +; because the sinking formula depends on SCEVUnknown Phi node %tmp0. +define i32 @test_01() { +; CHECK-LABEL: @test_01 +; CHECK: exit: +; CHECK-NEXT: ret i32 %tmp7 +; CHECK-NOT: lsr-iv +entry: + br label %loop + +exit: ; preds = %loop + ret i32 %tmp7 + +loop: ; preds = %loop, %entry + %tmp0 = phi i32 [ 0, %entry ], [ %tmp7, %loop ] + %indvars.iv = phi i32 [ 4, %entry ], [ %indvars.iv.next, %loop ] + %tmp3 = add i32 %tmp0, -1 + %tmp4 = mul i32 %tmp3, %tmp3 + %tmp5 = sub i32 %tmp0, %indvars.iv + %tmp6 = mul i32 %tmp4, %indvars.iv + %tmp7 = add i32 %tmp5, %tmp6 + %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 80 + br i1 %exitcond, label %exit, label %loop +} + +; Make sure that LSR does not try to get something out of this loop, +; because the sinking formula depends on SCEVUnknown Load %lv. +define i32 @test_02(i32* %p) { +; CHECK-LABEL: @test_02 +; CHECK: exit: +; CHECK-NEXT: ret i32 %tmp7 +; CHECK-NOT: lsr-iv +entry: + br label %loop + +exit: ; preds = %loop + ret i32 %tmp7 + +loop: ; preds = %loop, %entry + %tmp0 = phi i32 [ 0, %entry ], [ %tmp7, %loop ] + %indvars.iv = phi i32 [ 4, %entry ], [ %indvars.iv.next, %loop ] + %lp = getelementptr inbounds i32, i32* %p, i32 %tmp0 + %lv = load i32, i32* %lp + %tmp3 = add i32 %lv, -1 + %tmp4 = mul i32 %tmp3, %tmp3 + %tmp5 = sub i32 %lv, %indvars.iv + %tmp6 = mul i32 %tmp4, %indvars.iv + %tmp7 = add i32 %tmp5, %tmp6 + %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 80 + br i1 %exitcond, label %exit, label %loop +}