diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1885,6 +1885,9 @@ /// Assign A and B to LHS and RHS, respectively. bool matchURem(const SCEV *Expr, const SCEV *&LHS, const SCEV *&RHS); + /// Try to apply information from loop guards for \p L to \p Expr. + const SCEV *applyLoopGuards(const SCEV *Expr, const Loop *L); + /// Look for a SCEV expression with type `SCEVType` and operands `Ops` in /// `UniqueSCEVs`. /// diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -8749,7 +8749,7 @@ // 1*N = -Start; -1*N = Start (mod 2^BW), so: // N = Distance (as unsigned) if (StepC->getValue()->isOne() || StepC->getValue()->isMinusOne()) { - APInt MaxBECount = getUnsignedRangeMax(Distance); + APInt MaxBECount = getUnsignedRangeMax(applyLoopGuards(Distance, L)); // When a loop like "for (int i = 0; i != n; ++i) { /* body */ }" is rotated, // we end up with a loop whose backedge-taken count is n - 1. Detect this @@ -12480,3 +12480,44 @@ MatchURemWithDivisor(getNegativeSCEV(Mul->getOperand(0))); return false; } + +const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) { + // Starting at the loop predecessor, climb up the predecessor chain, as long + // as there are predecessors that can be found that have unique successors + // leading to the original header. + // TODO: share this logic with isLoopEntryGuardedByCond. + for (std::pair Pair(L->getLoopPredecessor(), + L->getHeader()); + Pair.first; Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) { + + BranchInst *LoopEntryPredicate = + dyn_cast(Pair.first->getTerminator()); + if (!LoopEntryPredicate || LoopEntryPredicate->isUnconditional()) + continue; + + // TODO: use information from more complex conditions, e.g. AND expressions. + auto *Cmp = dyn_cast(LoopEntryPredicate->getCondition()); + if (!Cmp) + continue; + + // TODO: use information from more predicates. + switch (Cmp->getPredicate()) { + case CmpInst::ICMP_ULT: { + const SCEV *LHS = getSCEV(Cmp->getOperand(0)); + const SCEV *RHS = getSCEV(Cmp->getOperand(1)); + if (isa(LHS)) { + ValueToSCEVMapTy RewriteMap; + RewriteMap[Cmp->getOperand(0)] = + getUMinExpr(LHS, getMinusSCEV(RHS, getOne(RHS->getType()))); + Expr = SCEVParameterRewriter::rewrite(Expr, *this, RewriteMap); + } + + break; + } + default: + break; + } + } + + return Expr; +} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr36032.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr36032.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr36032.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr36032.ll @@ -15,7 +15,6 @@ ; CHECK-LABEL: @_Z1dv( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @"_ZN3$_01aEv"(%struct.anon* nonnull @b) -; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, i8* [[CALL]], i64 4 ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: ; CHECK-NEXT: [[F_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD5:%.*]], [[FOR_COND_CLEANUP:%.*]] ] @@ -25,93 +24,25 @@ ; CHECK-NEXT: br i1 [[CMP12]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_COND_CLEANUP]] ; CHECK: for.body.lr.ph: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[G_0]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = sub i64 4, [[TMP0]] -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; CHECK: vector.scevcheck: -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 3, [[TMP0]] -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[G_0]], [[CONV]] -; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP2]] to i32 -; CHECK-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 [[TMP4]]) -; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP3]], [[MUL_RESULT]] -; CHECK-NEXT: [[TMP6:%.*]] = sub i32 [[TMP3]], [[MUL_RESULT]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP6]], [[TMP3]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP5]], [[TMP3]] -; CHECK-NEXT: [[TMP9:%.*]] = select i1 false, i1 [[TMP7]], i1 [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[TMP2]], 4294967295 -; CHECK-NEXT: [[TMP11:%.*]] = or i1 [[TMP9]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP11]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP13:%.*]] = or i1 false, [[TMP12]] -; CHECK-NEXT: br i1 [[TMP13]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]] -; CHECK: vector.memcheck: -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[CALL]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[G_0]], [[CONV]] -; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr [6 x i8], [6 x i8]* @c, i64 0, i64 [[TMP15]] -; CHECK-NEXT: [[TMP16:%.*]] = sub i64 [[TMP15]], [[TMP0]] -; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, i8* getelementptr inbounds ([6 x i8], [6 x i8]* @c, i64 0, i64 4), i64 [[TMP16]] -; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP]], [[SCEVGEP3]] -; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP2]], [[SCEVGEP1]] -; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; CHECK-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[FOUND_CONFLICT]], true -; CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] -; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]] -; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[TMP0]], [[N_VEC]] -; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[TMP0]], [[INDEX]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[OFFSET_IDX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[OFFSET_IDX4:%.*]] = add i64 [[TMP0]], [[INDEX]] -; CHECK-NEXT: [[TMP18:%.*]] = trunc i64 [[OFFSET_IDX4]] to i32 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <4 x i32> undef, i32 [[TMP18]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT5]], <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION7:%.*]] = add <4 x i32> [[BROADCAST_SPLAT6]], -; CHECK-NEXT: [[TMP19:%.*]] = add i32 [[TMP18]], 0 -; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[CONV]], [[TMP19]] -; CHECK-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [6 x i8], [6 x i8]* @c, i64 0, i64 [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, i8* [[TMP22]], i32 0 -; CHECK-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP23]] to <4 x i8>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP24]], align 1, !alias.scope !0 -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, i8* [[CALL]], i64 [[TMP17]] -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, i8* [[TMP25]], i32 0 -; CHECK-NEXT: [[TMP27:%.*]] = bitcast i8* [[TMP26]] to <4 x i8>* -; CHECK-NEXT: store <4 x i8> [[WIDE_LOAD]], <4 x i8>* [[TMP27]], align 1, !alias.scope !3, !noalias !0 -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !5 -; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[TMP0]], [[FOR_BODY_LR_PH]] ], [ [[TMP0]], [[VECTOR_SCEVCHECK]] ], [ [[TMP0]], [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[G_1_LCSSA]] = phi i32 [ [[G_0]], [[FOR_COND]] ], [ 4, [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[G_1_LCSSA]] = phi i32 [ [[G_0]], [[FOR_COND]] ], [ 4, [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ] ; CHECK-NEXT: [[ADD5]] = add nuw nsw i32 [[CONV]], 4 ; CHECK-NEXT: br label [[FOR_COND]] ; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP29:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; CHECK-NEXT: [[ADD:%.*]] = add i32 [[CONV]], [[TMP29]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY_LR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[CONV]], [[TMP1]] ; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[ADD]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [6 x i8], [6 x i8]* @c, i64 0, i64 [[IDXPROM]] -; CHECK-NEXT: [[TMP30:%.*]] = load i8, i8* [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, i8* [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[CALL]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store i8 [[TMP30]], i8* [[ARRAYIDX3]], align 1 +; CHECK-NEXT: store i8 [[TMP2]], i8* [[ARRAYIDX3]], align 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !7 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]] ; entry: %call = tail call i8* @"_ZN3$_01aEv"(%struct.anon* nonnull @b) #2 diff --git a/llvm/unittests/Analysis/ScalarEvolutionTest.cpp b/llvm/unittests/Analysis/ScalarEvolutionTest.cpp --- a/llvm/unittests/Analysis/ScalarEvolutionTest.cpp +++ b/llvm/unittests/Analysis/ScalarEvolutionTest.cpp @@ -1826,4 +1826,36 @@ }); } +TEST_F(ScalarEvolutionsTest, SCEVgetExitLimitForGuardedLoop) { + LLVMContext C; + SMDiagnostic Err; + std::unique_ptr M = parseAssemblyString( + "define void @foo(i32 %i) { " + "entry: " + " %cmp3 = icmp ult i32 %i, 16 " + " br i1 %cmp3, label %loop.body, label %exit " + "loop.body: " + " %iv = phi i32 [ %iv.next, %loop.body ], [ %i, %entry ] " + " %iv.next = add nsw i32 %iv, 1 " + " %cmp = icmp eq i32 %iv.next, 16 " + " br i1 %cmp, label %exit, label %loop.body " + "exit: " + " ret void " + "} ", + Err, C); + + ASSERT_TRUE(M && "Could not parse module?"); + ASSERT_TRUE(!verifyModule(*M) && "Must have been well formed!"); + + runWithSE(*M, "foo", [](Function &F, LoopInfo &LI, ScalarEvolution &SE) { + auto *ScevIV = SE.getSCEV(getInstructionByName(F, "iv")); // {0,+,1} + const Loop *L = cast(ScevIV)->getLoop(); + + const SCEV *BTC = SE.getBackedgeTakenCount(L); + EXPECT_FALSE(isa(BTC)); + const SCEV *MaxBTC = SE.getConstantMaxBackedgeTakenCount(L); + EXPECT_EQ(cast(MaxBTC)->getAPInt(), 15); + }); +} + } // end namespace llvm