Index: llvm/lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -2393,6 +2393,17 @@ if (UsePostInc) IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); + // If computed limit is equal to old limit then do not use SCEV expander + // because it can lost NUW/NSW flags and create extra instructions. + auto *BI = cast(ExitingBB->getTerminator()); + if (auto *Cmp = dyn_cast(BI->getOperand(0))) { + Value *Limit = Cmp->getOperand(0); + if (!L->isLoopInvariant(Limit)) + Limit = Cmp->getOperand(1); + if (SE->getSCEV(Limit) == IVLimit) + return Limit; + } + // Expand the code for the iteration count. assert(SE->isLoopInvariant(IVLimit, L) && "Computed iteration count is not loop invariant!"); @@ -2401,7 +2412,6 @@ // SCEV expression (IVInit) for a pointer type IV value (IndVar). Type *LimitTy = ExitCount->getType()->isPointerTy() ? IndVar->getType() : ExitCount->getType(); - BranchInst *BI = cast(ExitingBB->getTerminator()); return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); } } Index: llvm/test/Transforms/IndVarSimplify/add_nsw.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IndVarSimplify/add_nsw.ll @@ -0,0 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -indvars -S %s | FileCheck %s + +target datalayout = "e-p:32:32-i64:64-n8:16:32" + +define void @foo(i32 %a1, i32 %a2) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MAXVAL:%.*]] = add nuw nsw i32 [[A1:%.*]], [[A2:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[MAXVAL]], 1 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[J_02:%.*]] = phi i32 [ [[ADD31:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[ADD31]] = add nuw nsw i32 [[J_02]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[ADD31]], [[MAXVAL]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]] +; CHECK: for.end.loopexit: +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + %maxval = add nuw nsw i32 %a1, %a2 + %cmp = icmp slt i32 %maxval, 1 + br i1 %cmp, label %for.end, label %for.body + +for.body: ; preds = %entry, %for.body + %j.02 = phi i32 [ 0, %entry ], [ %add31, %for.body ] + %add31 = add nuw nsw i32 %j.02, 1 + %cmp22 = icmp slt i32 %add31, %maxval + br i1 %cmp22, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} Index: llvm/test/Transforms/IndVarSimplify/lftr-reuse.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/lftr-reuse.ll +++ llvm/test/Transforms/IndVarSimplify/lftr-reuse.ll @@ -67,11 +67,9 @@ ; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, [[SUB1]] ; CHECK-NEXT: br i1 [[CMP1]], label [[OUTER_PREHEADER:%.*]], label [[EXIT:%.*]] ; CHECK: outer.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[ARG]], -1 ; CHECK-NEXT: br label [[OUTER:%.*]] ; CHECK: outer: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[TMP0]], [[OUTER_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[OUTER_INC:%.*]] ] -; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC]] ], [ 0, [[OUTER_PREHEADER]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC:%.*]] ], [ 0, [[OUTER_PREHEADER]] ] ; CHECK-NEXT: [[SUB2:%.*]] = sub nsw i32 [[ARG]], [[I]] ; CHECK-NEXT: [[SUB3:%.*]] = sub nsw i32 [[SUB2]], 1 ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 0, [[SUB3]] @@ -81,14 +79,13 @@ ; CHECK: inner: ; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[INNER_PH]] ], [ [[J_INC:%.*]], [[INNER]] ] ; CHECK-NEXT: [[J_INC]] = add nuw nsw i32 [[J]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[J_INC]], [[INDVARS_IV]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[J_INC]], [[SUB3]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[INNER]], label [[OUTER_INC_LOOPEXIT:%.*]] ; CHECK: outer.inc.loopexit: ; CHECK-NEXT: br label [[OUTER_INC]] ; CHECK: outer.inc: ; CHECK-NEXT: [[I_INC]] = add nuw nsw i32 [[I]], 1 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], -1 -; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[I_INC]], [[TMP0]] +; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[I_INC]], [[SUB1]] ; CHECK-NEXT: br i1 [[EXITCOND1]], label [[OUTER]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: exit.loopexit: ; CHECK-NEXT: br label [[EXIT]] Index: llvm/test/Transforms/IndVarSimplify/udiv.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/udiv.ll +++ llvm/test/Transforms/IndVarSimplify/udiv.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -indvars -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" @@ -8,10 +9,94 @@ ; Indvars shouldn't emit a udiv here, because there's no udiv in the ; original code. This comes from SingleSource/Benchmarks/Shootout/sieve.c. -; CHECK-LABEL: @main( -; CHECK-NOT: div - define i32 @main(i32 %argc, i8** nocapture %argv) nounwind { +; CHECK-LABEL: @main( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[ARGC:%.*]], 2 +; CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[WHILE_COND_PREHEADER:%.*]] +; CHECK: cond.true: +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[ARGV:%.*]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = load i8*, i8** [[ARRAYIDX]] +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @atoi(i8* [[TMP2]]) #1 +; CHECK-NEXT: br label [[WHILE_COND_PREHEADER]] +; CHECK: while.cond.preheader: +; CHECK-NEXT: [[NUM_0_PH:%.*]] = phi i32 [ [[CALL]], [[COND_TRUE]] ], [ 170000, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TOBOOL18:%.*]] = icmp eq i32 [[NUM_0_PH]], 0 +; CHECK-NEXT: br i1 [[TOBOOL18]], label [[WHILE_END:%.*]], label [[BB_NPH30:%.*]] +; CHECK: while.cond.loopexit: +; CHECK-NEXT: [[COUNT_2_LCSSA:%.*]] = phi i32 [ [[COUNT_1_LCSSA:%.*]], [[FOR_COND12_WHILE_COND_LOOPEXIT_CRIT_EDGE:%.*]] ], [ 0, [[FOR_COND12_LOOPEXIT:%.*]] ] +; CHECK-NEXT: br label [[WHILE_COND:%.*]] +; CHECK: while.cond: +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[DEC19:%.*]], 0 +; CHECK-NEXT: br i1 [[TOBOOL]], label [[WHILE_COND_WHILE_END_CRIT_EDGE:%.*]], label [[FOR_COND_PREHEADER:%.*]] +; CHECK: while.cond.while.end_crit_edge: +; CHECK-NEXT: [[COUNT_2_LCSSA_LCSSA:%.*]] = phi i32 [ [[COUNT_2_LCSSA]], [[WHILE_COND]] ] +; CHECK-NEXT: br label [[WHILE_END]] +; CHECK: bb.nph30: +; CHECK-NEXT: br label [[FOR_COND_PREHEADER]] +; CHECK: for.cond.preheader: +; CHECK-NEXT: [[DEC19_IN:%.*]] = phi i32 [ [[NUM_0_PH]], [[BB_NPH30]] ], [ [[DEC19]], [[WHILE_COND]] ] +; CHECK-NEXT: [[DEC19]] = add i32 [[DEC19_IN]], -1 +; CHECK-NEXT: br i1 true, label [[BB_NPH:%.*]], label [[FOR_COND12_LOOPEXIT]] +; CHECK: for.cond: +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INC:%.*]], 8193 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY:%.*]], label [[FOR_COND_FOR_COND12_LOOPEXIT_CRIT_EDGE:%.*]] +; CHECK: for.cond.for.cond12.loopexit_crit_edge: +; CHECK-NEXT: br label [[FOR_COND12_LOOPEXIT]] +; CHECK: bb.nph: +; CHECK-NEXT: br label [[FOR_BODY]] +; CHECK: for.body: +; CHECK-NEXT: [[I_02:%.*]] = phi i64 [ 2, [[BB_NPH]] ], [ [[INC]], [[FOR_COND:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [8193 x i8], [8193 x i8]* @main.flags, i64 0, i64 [[I_02]] +; CHECK-NEXT: store i8 1, i8* [[ARRAYIDX10]] +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_02]], 1 +; CHECK-NEXT: br label [[FOR_COND]] +; CHECK: for.cond12.loopexit: +; CHECK-NEXT: br i1 true, label [[BB_NPH16:%.*]], label [[WHILE_COND_LOOPEXIT:%.*]] +; CHECK: for.cond12: +; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i64 [[INC37:%.*]], 8193 +; CHECK-NEXT: br i1 [[EXITCOND1]], label [[FOR_BODY15:%.*]], label [[FOR_COND12_WHILE_COND_LOOPEXIT_CRIT_EDGE]] +; CHECK: for.cond12.while.cond.loopexit_crit_edge: +; CHECK-NEXT: [[COUNT_1_LCSSA]] = phi i32 [ [[COUNT_1:%.*]], [[FOR_COND12:%.*]] ] +; CHECK-NEXT: br label [[WHILE_COND_LOOPEXIT]] +; CHECK: bb.nph16: +; CHECK-NEXT: br label [[FOR_BODY15]] +; CHECK: for.body15: +; CHECK-NEXT: [[COUNT_212:%.*]] = phi i32 [ 0, [[BB_NPH16]] ], [ [[COUNT_1]], [[FOR_COND12]] ] +; CHECK-NEXT: [[I_17:%.*]] = phi i64 [ 2, [[BB_NPH16]] ], [ [[INC37]], [[FOR_COND12]] ] +; CHECK-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [8193 x i8], [8193 x i8]* @main.flags, i64 0, i64 [[I_17]] +; CHECK-NEXT: [[TMP18:%.*]] = load i8, i8* [[ARRAYIDX17]] +; CHECK-NEXT: [[TOBOOL19:%.*]] = icmp eq i8 [[TMP18]], 0 +; CHECK-NEXT: br i1 [[TOBOOL19]], label [[FOR_INC35:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[ADD:%.*]] = shl i64 [[I_17]], 1 +; CHECK-NEXT: [[CMP243:%.*]] = icmp ult i64 [[ADD]], 8193 +; CHECK-NEXT: br i1 [[CMP243]], label [[BB_NPH5:%.*]], label [[FOR_END32:%.*]] +; CHECK: for.cond22: +; CHECK-NEXT: [[CMP24:%.*]] = icmp ult i64 [[ADD31:%.*]], 8193 +; CHECK-NEXT: br i1 [[CMP24]], label [[FOR_BODY25:%.*]], label [[FOR_COND22_FOR_END32_CRIT_EDGE:%.*]] +; CHECK: for.cond22.for.end32_crit_edge: +; CHECK-NEXT: br label [[FOR_END32]] +; CHECK: bb.nph5: +; CHECK-NEXT: br label [[FOR_BODY25]] +; CHECK: for.body25: +; CHECK-NEXT: [[K_04:%.*]] = phi i64 [ [[ADD]], [[BB_NPH5]] ], [ [[ADD31]], [[FOR_COND22:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds [8193 x i8], [8193 x i8]* @main.flags, i64 0, i64 [[K_04]] +; CHECK-NEXT: store i8 0, i8* [[ARRAYIDX27]] +; CHECK-NEXT: [[ADD31]] = add nuw nsw i64 [[K_04]], [[I_17]] +; CHECK-NEXT: br label [[FOR_COND22]] +; CHECK: for.end32: +; CHECK-NEXT: [[INC34:%.*]] = add nsw i32 [[COUNT_212]], 1 +; CHECK-NEXT: br label [[FOR_INC35]] +; CHECK: for.inc35: +; CHECK-NEXT: [[COUNT_1]] = phi i32 [ [[INC34]], [[FOR_END32]] ], [ [[COUNT_212]], [[FOR_BODY15]] ] +; CHECK-NEXT: [[INC37]] = add nuw nsw i64 [[I_17]], 1 +; CHECK-NEXT: br label [[FOR_COND12]] +; CHECK: while.end: +; CHECK-NEXT: [[COUNT_0_LCSSA:%.*]] = phi i32 [ [[COUNT_2_LCSSA_LCSSA]], [[WHILE_COND_WHILE_END_CRIT_EDGE]] ], [ 0, [[WHILE_COND_PREHEADER]] ] +; CHECK-NEXT: [[CALL40:%.*]] = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), i32 [[COUNT_0_LCSSA]]) #0 +; CHECK-NEXT: ret i32 0 +; entry: %cmp = icmp eq i32 %argc, 2 ; [#uses=1] br i1 %cmp, label %cond.true, label %while.cond.preheader @@ -130,11 +215,29 @@ ; IndVars doesn't emit a udiv in for.body.preheader since SCEVExpander::expand will ; find out there's already a udiv in the original code. -; CHECK-LABEL: @foo( -; CHECK: for.body.preheader: -; CHECK-NOT: udiv - define void @foo(double* %p, i64 %n) nounwind { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[DIV0:%.*]] = udiv i64 [[N:%.*]], 7 +; CHECK-NEXT: [[DIV1:%.*]] = add i64 [[DIV0]], 1 +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i64 0, [[DIV1]] +; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_03:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[P:%.*]], i64 [[I_03]] +; CHECK-NEXT: store double 0.000000e+00, double* [[ARRAYIDX]] +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_03]], 1 +; CHECK-NEXT: [[DIVX:%.*]] = udiv i64 [[N]], 7 +; CHECK-NEXT: [[DIV:%.*]] = add i64 [[DIVX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INC]], [[DIV]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]] +; CHECK: for.end.loopexit: +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; entry: %div0 = udiv i64 %n, 7 ; [#uses=1] %div1 = add i64 %div0, 1