diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1900,6 +1900,9 @@ /// Assign A and B to LHS and RHS, respectively. bool matchURem(const SCEV *Expr, const SCEV *&LHS, const SCEV *&RHS); + /// Try to apply information from loop guards for \p L to \p Expr. + const SCEV *applyLoopGuards(const SCEV *Expr, const Loop *L); + /// Look for a SCEV expression with type `SCEVType` and operands `Ops` in /// `UniqueSCEVs`. /// diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -8671,7 +8671,7 @@ // 1*N = -Start; -1*N = Start (mod 2^BW), so: // N = Distance (as unsigned) if (StepC->getValue()->isOne() || StepC->getValue()->isMinusOne()) { - APInt MaxBECount = getUnsignedRangeMax(Distance); + APInt MaxBECount = getUnsignedRangeMax(applyLoopGuards(Distance, L)); // When a loop like "for (int i = 0; i != n; ++i) { /* body */ }" is rotated, // we end up with a loop whose backedge-taken count is n - 1. Detect this @@ -12533,3 +12533,44 @@ return getCouldNotCompute(); return getUMinFromMismatchedTypes(ExitCounts); } + +const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) { + // Starting at the loop predecessor, climb up the predecessor chain, as long + // as there are predecessors that can be found that have unique successors + // leading to the original header. + // TODO: share this logic with isLoopEntryGuardedByCond. + for (std::pair Pair( + L->getLoopPredecessor(), L->getHeader()); + Pair.first; Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) { + + const BranchInst *LoopEntryPredicate = + dyn_cast(Pair.first->getTerminator()); + if (!LoopEntryPredicate || LoopEntryPredicate->isUnconditional()) + continue; + + // TODO: use information from more complex conditions, e.g. AND expressions. + auto *Cmp = dyn_cast(LoopEntryPredicate->getCondition()); + if (!Cmp) + continue; + + // TODO: use information from more predicates. + switch (Cmp->getPredicate()) { + case CmpInst::ICMP_ULT: { + const SCEV *LHS = getSCEV(Cmp->getOperand(0)); + const SCEV *RHS = getSCEV(Cmp->getOperand(1)); + if (isa(LHS)) { + ValueToSCEVMapTy RewriteMap; + RewriteMap[Cmp->getOperand(0)] = + getUMinExpr(LHS, getMinusSCEV(RHS, getOne(RHS->getType()))); + Expr = SCEVParameterRewriter::rewrite(Expr, *this, RewriteMap); + } + + break; + } + default: + break; + } + } + + return Expr; +} diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll --- a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll +++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll @@ -5,7 +5,7 @@ define void @test_guard_less_than_16(i32* nocapture %a, i64 %i) { ; CHECK-LABEL: Determining loop execution counts for: @test_guard_less_than_16 ; CHECK-NEXT: Loop %loop: backedge-taken count is (15 + (-1 * %i)) -; CHECK-NEXT: Loop %loop: max backedge-taken count is -1 +; CHECK-NEXT: Loop %loop: max backedge-taken count is 15 ; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is (15 + (-1 * %i)) ; entry: diff --git a/llvm/test/CodeGen/X86/optimize-max-0.ll b/llvm/test/CodeGen/X86/optimize-max-0.ll --- a/llvm/test/CodeGen/X86/optimize-max-0.ll +++ b/llvm/test/CodeGen/X86/optimize-max-0.ll @@ -450,179 +450,174 @@ ; CHECK-NEXT: pushl %edi ; CHECK-NEXT: pushl %esi ; CHECK-NEXT: subl $28, %esp -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi -; CHECK-NEXT: movl %ebp, %eax -; CHECK-NEXT: imull %ecx, %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx +; CHECK-NEXT: movl %ecx, %ebp +; CHECK-NEXT: imull %edi, %ebp ; CHECK-NEXT: cmpl $1, {{[0-9]+}}(%esp) -; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; CHECK-NEXT: je LBB1_19 ; CHECK-NEXT: ## %bb.1: ## %bb10.preheader +; CHECK-NEXT: movl %ebp, %eax ; CHECK-NEXT: shrl $2, %eax ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; CHECK-NEXT: testl %ebp, %ebp +; CHECK-NEXT: testl %ecx, %ecx ; CHECK-NEXT: je LBB1_12 ; CHECK-NEXT: ## %bb.2: ## %bb.nph9 -; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: testl %edi, %edi ; CHECK-NEXT: je LBB1_12 ; CHECK-NEXT: ## %bb.3: ## %bb.nph9.split ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: incl %eax ; CHECK-NEXT: xorl %ecx, %ecx -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB1_6: ## %bb7.preheader -; CHECK-NEXT: ## =>This Loop Header: Depth=1 -; CHECK-NEXT: ## Child Loop BB1_4 Depth 2 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx ; CHECK-NEXT: xorl %esi, %esi -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB1_4: ## %bb6 -; CHECK-NEXT: ## Parent Loop BB1_6 Depth=1 -; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: movzbl (%eax,%esi,2), %ebx ; CHECK-NEXT: movb %bl, (%edx,%esi) ; CHECK-NEXT: incl %esi ; CHECK-NEXT: cmpl %edi, %esi ; CHECK-NEXT: jb LBB1_4 ; CHECK-NEXT: ## %bb.5: ## %bb9 -; CHECK-NEXT: ## in Loop: Header=BB1_6 Depth=1 +; CHECK-NEXT: ## in Loop: Header=BB1_4 Depth=1 ; CHECK-NEXT: incl %ecx ; CHECK-NEXT: addl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: addl %edi, %edx -; CHECK-NEXT: cmpl %ebp, %ecx -; CHECK-NEXT: jne LBB1_6 +; CHECK-NEXT: cmpl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: je LBB1_12 +; CHECK-NEXT: ## %bb.6: ## %bb7.preheader +; CHECK-NEXT: ## in Loop: Header=BB1_4 Depth=1 +; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: jmp LBB1_4 ; CHECK-NEXT: LBB1_12: ## %bb18.loopexit ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload -; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: addl %ebp, %eax ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; CHECK-NEXT: cmpl $1, %ebp +; CHECK-NEXT: cmpl $1, {{[0-9]+}}(%esp) ; CHECK-NEXT: jbe LBB1_13 ; CHECK-NEXT: ## %bb.7: ## %bb.nph5 -; CHECK-NEXT: cmpl $2, {{[0-9]+}}(%esp) +; CHECK-NEXT: cmpl $2, %edi ; CHECK-NEXT: jb LBB1_13 ; CHECK-NEXT: ## %bb.8: ## %bb.nph5.split -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp +; CHECK-NEXT: movl %edi, %ebp ; CHECK-NEXT: shrl %ebp ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: shrl %eax ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload -; CHECK-NEXT: addl %eax, %ecx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-NEXT: addl $2, %edx -; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload -; CHECK-NEXT: addl %edx, %eax -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload +; CHECK-NEXT: leal (%edx,%eax), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: addl $2, %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: leal (%esi,%esi,2), %esi +; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload +; CHECK-NEXT: addl %esi, %edx +; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB1_9: ## %bb13 ; CHECK-NEXT: ## =>This Loop Header: Depth=1 ; CHECK-NEXT: ## Child Loop BB1_10 Depth 2 -; CHECK-NEXT: movl %edi, %ebx -; CHECK-NEXT: andl $1, %ebx -; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; CHECK-NEXT: addl %edx, %ebx -; CHECK-NEXT: imull {{[0-9]+}}(%esp), %ebx -; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload +; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB1_10: ## %bb14 ; CHECK-NEXT: ## Parent Loop BB1_9 Depth=1 ; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 -; CHECK-NEXT: movzbl -2(%ebx,%esi,4), %edx -; CHECK-NEXT: movb %dl, (%eax,%esi) -; CHECK-NEXT: movzbl (%ebx,%esi,4), %edx -; CHECK-NEXT: movb %dl, (%ecx,%esi) +; CHECK-NEXT: movzbl -2(%eax,%esi,4), %ebx +; CHECK-NEXT: movb %bl, (%edx,%esi) +; CHECK-NEXT: movzbl (%eax,%esi,4), %ebx +; CHECK-NEXT: movb %bl, (%ecx,%esi) ; CHECK-NEXT: incl %esi ; CHECK-NEXT: cmpl %ebp, %esi ; CHECK-NEXT: jb LBB1_10 ; CHECK-NEXT: ## %bb.11: ## %bb17 ; CHECK-NEXT: ## in Loop: Header=BB1_9 Depth=1 -; CHECK-NEXT: incl %edi +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload +; CHECK-NEXT: incl %esi ; CHECK-NEXT: addl %ebp, %ecx -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload -; CHECK-NEXT: addl $2, %edx -; CHECK-NEXT: addl %ebp, %eax -; CHECK-NEXT: cmpl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Folded Reload +; CHECK-NEXT: addl %ebp, %edx +; CHECK-NEXT: cmpl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Folded Reload ; CHECK-NEXT: jb LBB1_9 ; CHECK-NEXT: LBB1_13: ## %bb20 -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-NEXT: cmpl $1, %edx -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: cmpl $1, %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx ; CHECK-NEXT: je LBB1_19 ; CHECK-NEXT: ## %bb.14: ## %bb20 -; CHECK-NEXT: cmpl $3, %edx +; CHECK-NEXT: cmpl $3, %eax ; CHECK-NEXT: jne LBB1_24 ; CHECK-NEXT: ## %bb.15: ## %bb22 ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload ; CHECK-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill -; CHECK-NEXT: testl %ebp, %ebp +; CHECK-NEXT: testl %ecx, %ecx ; CHECK-NEXT: je LBB1_18 ; CHECK-NEXT: ## %bb.16: ## %bb.nph -; CHECK-NEXT: movl %ebp, %esi -; CHECK-NEXT: leal 15(%ebp), %eax +; CHECK-NEXT: leal 15(%ecx), %eax ; CHECK-NEXT: andl $-16, %eax ; CHECK-NEXT: imull {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: leal 15(%ecx), %ebx -; CHECK-NEXT: andl $-16, %ebx -; CHECK-NEXT: addl %eax, %edi +; CHECK-NEXT: leal 15(%edi), %esi +; CHECK-NEXT: andl $-16, %esi +; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: addl %eax, %ebx ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-NEXT: leal (%edx,%eax), %ebp +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: leal (%esi,%eax), %ebp ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB1_17: ## %bb23 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: subl $4, %esp -; CHECK-NEXT: pushl %ecx ; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %ebx ; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: movl %ecx, %esi ; CHECK-NEXT: calll _memcpy -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl %esi, %ecx ; CHECK-NEXT: addl $16, %esp -; CHECK-NEXT: addl %ecx, %ebp -; CHECK-NEXT: addl %ebx, %edi -; CHECK-NEXT: decl %esi +; CHECK-NEXT: addl %edi, %ebp +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload +; CHECK-NEXT: decl %ecx ; CHECK-NEXT: jne LBB1_17 ; CHECK-NEXT: LBB1_18: ## %bb26 ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload -; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: addl %eax, %ecx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-NEXT: addl %eax, %edx -; CHECK-NEXT: shrl %ecx +; CHECK-NEXT: addl %ecx, %edx +; CHECK-NEXT: shrl %eax ; CHECK-NEXT: subl $4, %esp -; CHECK-NEXT: pushl %ecx +; CHECK-NEXT: pushl %eax ; CHECK-NEXT: pushl $128 ; CHECK-NEXT: pushl %edx ; CHECK-NEXT: jmp LBB1_23 ; CHECK-NEXT: LBB1_19: ## %bb29 -; CHECK-NEXT: testl %ebp, %ebp +; CHECK-NEXT: testl %ecx, %ecx ; CHECK-NEXT: je LBB1_22 ; CHECK-NEXT: ## %bb.20: ## %bb.nph11 -; CHECK-NEXT: movl %ebp, %esi -; CHECK-NEXT: leal 15(%ecx), %ebx -; CHECK-NEXT: andl $-16, %ebx +; CHECK-NEXT: leal 15(%edi), %eax +; CHECK-NEXT: andl $-16, %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB1_21: ## %bb30 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: subl $4, %esp -; CHECK-NEXT: pushl %ecx ; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %ebx ; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: movl %ecx, %esi ; CHECK-NEXT: calll _memcpy -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl %esi, %ecx ; CHECK-NEXT: addl $16, %esp -; CHECK-NEXT: addl %ecx, %ebp -; CHECK-NEXT: addl %ebx, %edi -; CHECK-NEXT: decl %esi +; CHECK-NEXT: addl %edi, %ebp +; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload +; CHECK-NEXT: decl %ecx ; CHECK-NEXT: jne LBB1_21 ; CHECK-NEXT: LBB1_22: ## %bb33 ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr36032.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr36032.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr36032.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr36032.ll @@ -15,7 +15,6 @@ ; CHECK-LABEL: @_Z1dv( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @"_ZN3$_01aEv"(%struct.anon* nonnull @b) -; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, i8* [[CALL]], i64 4 ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: ; CHECK-NEXT: [[F_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD5:%.*]], [[FOR_COND_CLEANUP:%.*]] ] @@ -25,87 +24,25 @@ ; CHECK-NEXT: br i1 [[CMP12]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_COND_CLEANUP]] ; CHECK: for.body.lr.ph: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[G_0]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = sub i64 4, [[TMP0]] -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; CHECK: vector.scevcheck: -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 3, [[TMP0]] -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[G_0]], [[CONV]] -; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP2]] to i32 -; CHECK-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 [[TMP4]]) -; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP3]], [[MUL_RESULT]] -; CHECK-NEXT: [[TMP6:%.*]] = sub i32 [[TMP3]], [[MUL_RESULT]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP6]], [[TMP3]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP5]], [[TMP3]] -; CHECK-NEXT: [[TMP9:%.*]] = select i1 false, i1 [[TMP7]], i1 [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[TMP2]], 4294967295 -; CHECK-NEXT: [[TMP11:%.*]] = or i1 [[TMP9]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP11]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP13:%.*]] = or i1 false, [[TMP12]] -; CHECK-NEXT: br i1 [[TMP13]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]] -; CHECK: vector.memcheck: -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[CALL]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[G_0]], [[CONV]] -; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr [6 x i8], [6 x i8]* @c, i64 0, i64 [[TMP15]] -; CHECK-NEXT: [[TMP16:%.*]] = sub i64 [[TMP15]], [[TMP0]] -; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, i8* getelementptr inbounds ([6 x i8], [6 x i8]* @c, i64 0, i64 4), i64 [[TMP16]] -; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP]], [[SCEVGEP3]] -; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP2]], [[SCEVGEP1]] -; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; CHECK-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[FOUND_CONFLICT]], true -; CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] -; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]] -; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[TMP0]], [[N_VEC]] -; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[TMP0]], [[INDEX]] -; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[OFFSET_IDX4:%.*]] = add i64 [[TMP0]], [[INDEX]] -; CHECK-NEXT: [[TMP18:%.*]] = trunc i64 [[OFFSET_IDX4]] to i32 -; CHECK-NEXT: [[TMP19:%.*]] = add i32 [[TMP18]], 0 -; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[CONV]], [[TMP19]] -; CHECK-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [6 x i8], [6 x i8]* @c, i64 0, i64 [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, i8* [[TMP22]], i32 0 -; CHECK-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP23]] to <4 x i8>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP24]], align 1, !alias.scope !0 -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, i8* [[CALL]], i64 [[TMP17]] -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, i8* [[TMP25]], i32 0 -; CHECK-NEXT: [[TMP27:%.*]] = bitcast i8* [[TMP26]] to <4 x i8>* -; CHECK-NEXT: store <4 x i8> [[WIDE_LOAD]], <4 x i8>* [[TMP27]], align 1, !alias.scope !3, !noalias !0 -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !5 -; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[TMP0]], [[FOR_BODY_LR_PH]] ], [ [[TMP0]], [[VECTOR_SCEVCHECK]] ], [ [[TMP0]], [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[G_1_LCSSA]] = phi i32 [ [[G_0]], [[FOR_COND]] ], [ 4, [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[G_1_LCSSA]] = phi i32 [ [[G_0]], [[FOR_COND]] ], [ 4, [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ] ; CHECK-NEXT: [[ADD5]] = add nuw nsw i32 [[CONV]], 4 ; CHECK-NEXT: br label [[FOR_COND]] ; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP29:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; CHECK-NEXT: [[ADD:%.*]] = add i32 [[CONV]], [[TMP29]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY_LR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[CONV]], [[TMP1]] ; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[ADD]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [6 x i8], [6 x i8]* @c, i64 0, i64 [[IDXPROM]] -; CHECK-NEXT: [[TMP30:%.*]] = load i8, i8* [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, i8* [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[CALL]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store i8 [[TMP30]], i8* [[ARRAYIDX3]], align 1 +; CHECK-NEXT: store i8 [[TMP2]], i8* [[ARRAYIDX3]], align 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !7 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]] ; entry: %call = tail call i8* @"_ZN3$_01aEv"(%struct.anon* nonnull @b) #2 diff --git a/llvm/unittests/Analysis/ScalarEvolutionTest.cpp b/llvm/unittests/Analysis/ScalarEvolutionTest.cpp --- a/llvm/unittests/Analysis/ScalarEvolutionTest.cpp +++ b/llvm/unittests/Analysis/ScalarEvolutionTest.cpp @@ -1165,4 +1165,73 @@ cast(ScevIV)->getStepRecurrence(SE)); }); } + +TEST_F(ScalarEvolutionsTest, SCEVgetRanges) { + LLVMContext C; + SMDiagnostic Err; + std::unique_ptr M = parseAssemblyString( + "define void @foo(i32 %i) { " + "entry: " + " br label %loop.body " + "loop.body: " + " %iv = phi i32 [ %iv.next, %loop.body ], [ 0, %entry ] " + " %iv.next = add nsw i32 %iv, 1 " + " %cmp = icmp eq i32 %iv.next, 16 " + " br i1 %cmp, label %exit, label %loop.body " + "exit: " + " ret void " + "} ", + Err, C); + + ASSERT_TRUE(M && "Could not parse module?"); + ASSERT_TRUE(!verifyModule(*M) && "Must have been well formed!"); + + runWithSE(*M, "foo", [](Function &F, LoopInfo &LI, ScalarEvolution &SE) { + auto *ScevIV = SE.getSCEV(getInstructionByName(F, "iv")); // {0,+,1} + auto *ScevI = SE.getSCEV(getArgByName(F, "i")); + EXPECT_EQ(SE.getUnsignedRange(ScevIV).getLower(), 0); + EXPECT_EQ(SE.getUnsignedRange(ScevIV).getUpper(), 16); + + auto *Add = SE.getAddExpr(ScevI, ScevIV); + ValueToSCEVMapTy RewriteMap; + RewriteMap[cast(ScevI)->getValue()] = + SE.getUMinExpr(ScevI, SE.getConstant(ScevI->getType(), 17)); + auto *AddWithUMin = SCEVParameterRewriter::rewrite(Add, SE, RewriteMap); + EXPECT_EQ(SE.getUnsignedRange(AddWithUMin).getLower(), 0); + EXPECT_EQ(SE.getUnsignedRange(AddWithUMin).getUpper(), 33); + }); +} + +TEST_F(ScalarEvolutionsTest, SCEVgetExitLimitForGuardedLoop) { + LLVMContext C; + SMDiagnostic Err; + std::unique_ptr M = parseAssemblyString( + "define void @foo(i32 %i) { " + "entry: " + " %cmp3 = icmp ult i32 %i, 16 " + " br i1 %cmp3, label %loop.body, label %exit " + "loop.body: " + " %iv = phi i32 [ %iv.next, %loop.body ], [ %i, %entry ] " + " %iv.next = add nsw i32 %iv, 1 " + " %cmp = icmp eq i32 %iv.next, 16 " + " br i1 %cmp, label %exit, label %loop.body " + "exit: " + " ret void " + "} ", + Err, C); + + ASSERT_TRUE(M && "Could not parse module?"); + ASSERT_TRUE(!verifyModule(*M) && "Must have been well formed!"); + + runWithSE(*M, "foo", [](Function &F, LoopInfo &LI, ScalarEvolution &SE) { + auto *ScevIV = SE.getSCEV(getInstructionByName(F, "iv")); // {0,+,1} + const Loop *L = cast(ScevIV)->getLoop(); + + const SCEV *BTC = SE.getBackedgeTakenCount(L); + EXPECT_FALSE(isa(BTC)); + const SCEV *MaxBTC = SE.getConstantMaxBackedgeTakenCount(L); + EXPECT_EQ(cast(MaxBTC)->getAPInt(), 15); + }); +} + } // end namespace llvm