Index: llvm/lib/Transforms/Utils/SimplifyIndVar.cpp =================================================================== --- llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -246,7 +246,8 @@ ICmpInst *ICmp, const SCEV *S, const SCEV *X, const Loop *ICmpLoop, ScalarEvolution *SE) { - if (SE->isKnownPredicate(Pred, S, X)) + const BasicBlock *BB = ICmp->getParent(); + if (SE->isKnownPredicateAt(Pred, S, X, BB)) return true; auto *AR = dyn_cast(S); @@ -271,7 +272,7 @@ // First, check the predicate on the 1st iteration. const SCEV *Base = AR->getOperand(0); - if (!SE->isKnownPredicate(Pred, Base, X)) + if (!SE->isKnownPredicateAt(Pred, Base, X, BB)) return false; // TODO: Support steps other than +/- 1. @@ -300,7 +301,7 @@ // Value of IV on max possible last iteration. const SCEV *Last = SE->getAddExpr(Base, SE->getMulExpr(Step, MaxIter)); // Does it still meet the requirement? - if (!SE->isKnownPredicate(Pred, Last, X)) + if (!SE->isKnownPredicateAt(Pred, Last, X, BB)) return false; // Because step is +/- 1 and MaxIter has same type as Base (i.e. it does // not exceed max unsigned value of this type), this effectively proves @@ -314,7 +315,7 @@ else OverflowPred = CmpInst::isSigned(Pred) ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; - if (!SE->isKnownPredicate(OverflowPred, Base, Last)) + if (!SE->isKnownPredicateAt(OverflowPred, Base, Last, BB)) return false; return true; } Index: llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll +++ llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll @@ -47,6 +47,84 @@ ret i32 %size.lcssa } +; Here check on cmp2 is trivially true because: +; On the 1st iteration, sub3 = arg -1, known positive from cmp1; +; Max possible value if i is (arg - 2), so sub3 is at least 1. +define void @expandOuterRecurrenceTrivialRC(i32 %arg) nounwind #0 { +; CHECK-V8M-LABEL: @expandOuterRecurrenceTrivialRC( +; CHECK-V8M-NEXT: entry: +; CHECK-V8M-NEXT: [[SUB1:%.*]] = sub nsw i32 [[ARG:%.*]], 1 +; CHECK-V8M-NEXT: [[CMP1:%.*]] = icmp slt i32 0, [[SUB1]] +; CHECK-V8M-NEXT: br i1 [[CMP1]], label [[OUTER_PREHEADER:%.*]], label [[EXIT:%.*]] +; CHECK-V8M: outer.preheader: +; CHECK-V8M-NEXT: br label [[OUTER:%.*]] +; CHECK-V8M: outer: +; CHECK-V8M-NEXT: br i1 true, label [[INNER_PH:%.*]], label [[OUTER_INC:%.*]] +; CHECK-V8M: inner.ph: +; CHECK-V8M-NEXT: br label [[INNER:%.*]] +; CHECK-V8M: inner: +; CHECK-V8M-NEXT: br i1 false, label [[INNER]], label [[OUTER_INC_LOOPEXIT:%.*]] +; CHECK-V8M: outer.inc.loopexit: +; CHECK-V8M-NEXT: br label [[OUTER_INC]] +; CHECK-V8M: outer.inc: +; CHECK-V8M-NEXT: br i1 false, label [[OUTER]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK-V8M: exit.loopexit: +; CHECK-V8M-NEXT: br label [[EXIT]] +; CHECK-V8M: exit: +; CHECK-V8M-NEXT: ret void +; +; CHECK-V8A-LABEL: @expandOuterRecurrenceTrivialRC( +; CHECK-V8A-NEXT: entry: +; CHECK-V8A-NEXT: [[SUB1:%.*]] = sub nsw i32 [[ARG:%.*]], 1 +; CHECK-V8A-NEXT: [[CMP1:%.*]] = icmp slt i32 0, [[SUB1]] +; CHECK-V8A-NEXT: br i1 [[CMP1]], label [[OUTER_PREHEADER:%.*]], label [[EXIT:%.*]] +; CHECK-V8A: outer.preheader: +; CHECK-V8A-NEXT: br label [[OUTER:%.*]] +; CHECK-V8A: outer: +; CHECK-V8A-NEXT: br i1 true, label [[INNER_PH:%.*]], label [[OUTER_INC:%.*]] +; CHECK-V8A: inner.ph: +; CHECK-V8A-NEXT: br label [[INNER:%.*]] +; CHECK-V8A: inner: +; CHECK-V8A-NEXT: br i1 false, label [[INNER]], label [[OUTER_INC_LOOPEXIT:%.*]] +; CHECK-V8A: outer.inc.loopexit: +; CHECK-V8A-NEXT: br label [[OUTER_INC]] +; CHECK-V8A: outer.inc: +; CHECK-V8A-NEXT: br i1 false, label [[OUTER]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK-V8A: exit.loopexit: +; CHECK-V8A-NEXT: br label [[EXIT]] +; CHECK-V8A: exit: +; CHECK-V8A-NEXT: ret void +; +entry: + %sub1 = sub nsw i32 %arg, 1 + %cmp1 = icmp slt i32 0, %sub1 + br i1 %cmp1, label %outer, label %exit + +outer: + %i = phi i32 [ 0, %entry ], [ %i.inc, %outer.inc ] + %sub2 = sub nsw i32 %arg, %i + %sub3 = sub nsw i32 %sub2, 1 + %cmp2 = icmp slt i32 0, %sub3 + br i1 %cmp2, label %inner.ph, label %outer.inc + +inner.ph: + br label %inner + +inner: + %j = phi i32 [ 0, %inner.ph ], [ %j.inc, %inner ] + %j.inc = add nsw i32 %j, 1 + %cmp3 = icmp slt i32 %j.inc, %sub3 + br i1 %cmp3, label %inner, label %outer.inc + +outer.inc: + %i.inc = add nsw i32 %i, 1 + %cmp4 = icmp slt i32 %i.inc, %sub1 + br i1 %cmp4, label %outer, label %exit + +exit: + ret void +} + define void @expandOuterRecurrence(i32 %arg) nounwind #0 { ; CHECK-V8M-LABEL: @expandOuterRecurrence( ; CHECK-V8M-NEXT: entry: @@ -125,7 +203,7 @@ outer.inc: %i.inc = add nsw i32 %i, 1 - %cmp4 = icmp slt i32 %i.inc, %sub1 + %cmp4 = icmp slt i32 %i.inc, %arg br i1 %cmp4, label %outer, label %exit exit: Index: llvm/test/Transforms/IndVarSimplify/lftr-reuse.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/lftr-reuse.ll +++ llvm/test/Transforms/IndVarSimplify/lftr-reuse.ll @@ -27,7 +27,7 @@ ; CHECK-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint i8* [[BASE]] to i64 ; CHECK-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] ; CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[SUB_PTR_SUB]] to i8 -; CHECK-NEXT: store i8 [[CONV]], i8* [[P_02]] +; CHECK-NEXT: store i8 [[CONV]], i8* [[P_02]], align 1 ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[P_02]], i32 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[INCDEC_PTR]], [[ADD_PTR]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]] @@ -58,6 +58,71 @@ ret void } +; Here check on cmp2 is trivially true because: +; On the 1st iteration, sub3 = arg -1, known positive from cmp1; +; Max possible value if i is (arg - 2), so sub3 is at least 1. +define void @expandOuterRecurrenceTrivialRC(i32 %arg) nounwind { +; CHECK-LABEL: @expandOuterRecurrenceTrivialRC( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SUB1:%.*]] = sub nsw i32 [[ARG:%.*]], 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, [[SUB1]] +; CHECK-NEXT: br i1 [[CMP1]], label [[OUTER_PREHEADER:%.*]], label [[EXIT:%.*]] +; CHECK: outer.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[ARG]], -1 +; CHECK-NEXT: br label [[OUTER:%.*]] +; CHECK: outer: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[TMP0]], [[OUTER_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[OUTER_INC:%.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC]] ], [ 0, [[OUTER_PREHEADER]] ] +; CHECK-NEXT: br i1 true, label [[INNER_PH:%.*]], label [[OUTER_INC]] +; CHECK: inner.ph: +; CHECK-NEXT: br label [[INNER:%.*]] +; CHECK: inner: +; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[INNER_PH]] ], [ [[J_INC:%.*]], [[INNER]] ] +; CHECK-NEXT: [[J_INC]] = add nuw nsw i32 [[J]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[J_INC]], [[INDVARS_IV]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[INNER]], label [[OUTER_INC_LOOPEXIT:%.*]] +; CHECK: outer.inc.loopexit: +; CHECK-NEXT: br label [[OUTER_INC]] +; CHECK: outer.inc: +; CHECK-NEXT: [[I_INC]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], -1 +; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[I_INC]], [[TMP0]] +; CHECK-NEXT: br i1 [[EXITCOND1]], label [[OUTER]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %sub1 = sub nsw i32 %arg, 1 + %cmp1 = icmp slt i32 0, %sub1 + br i1 %cmp1, label %outer, label %exit + +outer: + %i = phi i32 [ 0, %entry ], [ %i.inc, %outer.inc ] + %sub2 = sub nsw i32 %arg, %i + %sub3 = sub nsw i32 %sub2, 1 + %cmp2 = icmp slt i32 0, %sub3 + br i1 %cmp2, label %inner.ph, label %outer.inc + +inner.ph: + br label %inner + +inner: + %j = phi i32 [ 0, %inner.ph ], [ %j.inc, %inner ] + %j.inc = add nsw i32 %j, 1 + %cmp3 = icmp slt i32 %j.inc, %sub3 + br i1 %cmp3, label %inner, label %outer.inc + +outer.inc: + %i.inc = add nsw i32 %i, 1 + %cmp4 = icmp slt i32 %i.inc, %sub1 + br i1 %cmp4, label %outer, label %exit + +exit: + ret void +} + ; This test checks that SCEVExpander can handle an outer loop that has been ; simplified, and as a result the inner loop's exit test will be rewritten. define void @expandOuterRecurrence(i32 %arg) nounwind { @@ -68,6 +133,8 @@ ; CHECK-NEXT: br i1 [[CMP1]], label [[OUTER_PREHEADER:%.*]], label [[EXIT:%.*]] ; CHECK: outer.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[ARG]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[ARG]], 1 +; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[ARG]], i32 1 ; CHECK-NEXT: br label [[OUTER:%.*]] ; CHECK: outer: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[TMP0]], [[OUTER_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[OUTER_INC:%.*]] ] @@ -88,7 +155,7 @@ ; CHECK: outer.inc: ; CHECK-NEXT: [[I_INC]] = add nuw nsw i32 [[I]], 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], -1 -; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[I_INC]], [[TMP0]] +; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[I_INC]], [[SMAX]] ; CHECK-NEXT: br i1 [[EXITCOND1]], label [[OUTER]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: exit.loopexit: ; CHECK-NEXT: br label [[EXIT]] @@ -118,13 +185,14 @@ outer.inc: %i.inc = add nsw i32 %i, 1 - %cmp4 = icmp slt i32 %i.inc, %sub1 + %cmp4 = icmp slt i32 %i.inc, %arg br i1 %cmp4, label %outer, label %exit exit: ret void } + ; Force SCEVExpander to look for an existing well-formed phi. ; Perform LFTR without generating extra preheader code. define void @guardedloop([0 x double]* %matrix, [0 x double]* %vector, @@ -142,10 +210,10 @@ ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[LOOP_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[INDVARS_IV]], [[INDVARS_IV2]] ; CHECK-NEXT: [[MATRIXP:%.*]] = getelementptr inbounds [0 x double], [0 x double]* [[MATRIX:%.*]], i32 0, i64 [[TMP1]] -; CHECK-NEXT: [[V1:%.*]] = load double, double* [[MATRIXP]] +; CHECK-NEXT: [[V1:%.*]] = load double, double* [[MATRIXP]], align 8 ; CHECK-NEXT: call void @use(double [[V1]]) ; CHECK-NEXT: [[VECTORP:%.*]] = getelementptr inbounds [0 x double], [0 x double]* [[VECTOR:%.*]], i32 0, i64 [[INDVARS_IV2]] -; CHECK-NEXT: [[V2:%.*]] = load double, double* [[VECTORP]] +; CHECK-NEXT: [[V2:%.*]] = load double, double* [[VECTORP]], align 8 ; CHECK-NEXT: call void @use(double [[V2]]) ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], [[TMP0]] ; CHECK-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1 @@ -244,7 +312,7 @@ ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[INC:%.*]], [[LOOP]] ], [ [[X]], [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[APTR:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[LOOP]] ], [ [[ADD_PTR10]], [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[APTR]], i32 1 -; CHECK-NEXT: store i8 3, i8* [[APTR]] +; CHECK-NEXT: store i8 3, i8* [[APTR]], align 1 ; CHECK-NEXT: [[INC]] = add nuw i32 [[I]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INC]], [[LIM]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] @@ -311,7 +379,7 @@ ; CHECK: loop: ; CHECK-NEXT: [[APTR:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[LOOP]] ], [ [[IVSTART]], [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[APTR]], i32 1 -; CHECK-NEXT: store i8 3, i8* [[APTR]] +; CHECK-NEXT: store i8 3, i8* [[APTR]], align 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i8* [[INCDEC_PTR]], [[SCEVGEP]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: exit.loopexit: Index: llvm/test/Transforms/IndVarSimplify/predicated_ranges.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/predicated_ranges.ll +++ llvm/test/Transforms/IndVarSimplify/predicated_ranges.ll @@ -178,11 +178,7 @@ ; CHECK: outer.preheader: ; CHECK-NEXT: br label [[OUTER:%.*]] ; CHECK: outer: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC:%.*]] ], [ 0, [[OUTER_PREHEADER]] ] -; CHECK-NEXT: [[SUB2:%.*]] = sub nsw i32 [[ARG]], [[I]] -; CHECK-NEXT: [[SUB3:%.*]] = sub nsw i32 [[SUB2]], 1 -; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 0, [[SUB3]] -; CHECK-NEXT: br i1 [[CMP2]], label [[INNER_PH:%.*]], label [[OUTER_INC]] +; CHECK-NEXT: br i1 true, label [[INNER_PH:%.*]], label [[OUTER_INC:%.*]] ; CHECK: inner.ph: ; CHECK-NEXT: br label [[INNER:%.*]] ; CHECK: inner: @@ -190,7 +186,6 @@ ; CHECK: outer.inc.loopexit: ; CHECK-NEXT: br label [[OUTER_INC]] ; CHECK: outer.inc: -; CHECK-NEXT: [[I_INC]] = add nuw nsw i32 [[I]], 1 ; CHECK-NEXT: br i1 false, label [[OUTER]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: exit.loopexit: ; CHECK-NEXT: br label [[EXIT]] Index: llvm/test/Transforms/IndVarSimplify/scev-expander-preserve-lcssa.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/scev-expander-preserve-lcssa.ll +++ llvm/test/Transforms/IndVarSimplify/scev-expander-preserve-lcssa.ll @@ -113,13 +113,10 @@ ; CHECK: for.body84.preheader: ; CHECK-NEXT: br label [[FOR_BODY84:%.*]] ; CHECK: for.body84: -; CHECK-NEXT: [[I_144:%.*]] = phi i32 [ [[INC:%.*]], [[IF_END106:%.*]] ], [ 0, [[FOR_BODY84_PREHEADER]] ] ; CHECK-NEXT: [[C_2:%.*]] = call i1 @cond() -; CHECK-NEXT: br i1 [[C_2]], label [[IF_END106]], label [[RETURN_LOOPEXIT:%.*]] +; CHECK-NEXT: br i1 [[C_2]], label [[IF_END106:%.*]], label [[RETURN_LOOPEXIT:%.*]] ; CHECK: if.end106: -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_144]], 1 -; CHECK-NEXT: [[CMP82:%.*]] = icmp slt i32 [[INC]], [[I_0_LCSSA2]] -; CHECK-NEXT: br i1 [[CMP82]], label [[FOR_BODY84]], label [[RETURN_LOOPEXIT]] +; CHECK-NEXT: br i1 false, label [[FOR_BODY84]], label [[RETURN_LOOPEXIT]] ; CHECK: return.loopexit: ; CHECK-NEXT: br label [[RETURN]] ; CHECK: return.loopexit1: