Index: lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp =================================================================== --- lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -890,30 +890,26 @@ } } - auto HasNoSignedWrap = [&](const SCEVAddRecExpr *AR) { - if (AR->getNoWrapFlags(SCEV::FlagNSW)) + auto HasNoWrap = [&](const SCEVAddRecExpr *AR) { + if (AR->hasNoSelfWrap()) return true; + // Try to find nsw. IntegerType *Ty = cast(AR->getType()); IntegerType *WideTy = IntegerType::get(Ty->getContext(), Ty->getBitWidth() * 2); const SCEVAddRecExpr *ExtendAfterOp = dyn_cast(SE.getSignExtendExpr(AR, WideTy)); - if (ExtendAfterOp) { - const SCEV *ExtendedStart = SE.getSignExtendExpr(AR->getStart(), WideTy); - const SCEV *ExtendedStep = - SE.getSignExtendExpr(AR->getStepRecurrence(SE), WideTy); - - bool NoSignedWrap = ExtendAfterOp->getStart() == ExtendedStart && - ExtendAfterOp->getStepRecurrence(SE) == ExtendedStep; + if (!ExtendAfterOp) + return false; - if (NoSignedWrap) - return true; - } + const SCEV *ExtendedStart = SE.getSignExtendExpr(AR->getStart(), WideTy); + const SCEV *ExtendedStep = + SE.getSignExtendExpr(AR->getStepRecurrence(SE), WideTy); - // We may have proved this when computing the sign extension above. - return AR->getNoWrapFlags(SCEV::FlagNSW) != SCEV::FlagAnyWrap; + return ExtendAfterOp->getStart() == ExtendedStart && + ExtendAfterOp->getStepRecurrence(SE) == ExtendedStep; }; // `ICI` is interpreted as taking the backedge if the *next* value of the @@ -925,7 +921,7 @@ FailureReason = "LHS in icmp not induction variable"; return None; } - if (ICI->isEquality() && !HasNoSignedWrap(IndVarBase)) { + if (ICI->isEquality() && !HasNoWrap(IndVarBase)) { FailureReason = "LHS in icmp needs nsw for equality predicates"; return None; } Index: test/Transforms/IRCE/variable-loop-bounds.ll =================================================================== --- test/Transforms/IRCE/variable-loop-bounds.ll +++ test/Transforms/IRCE/variable-loop-bounds.ll @@ -9,6 +9,12 @@ ; CHECK: irce: in function signed_var_imm_dec_sge: constrained Loop at depth 1 containing: %for.body
,%if.else,%for.inc ; CHECK: irce: in function signed_var_imm_dec_ne: constrained Loop at depth 1 containing: %for.body
,%if.else,%for.inc ; CHECK-NOT: irce: in function signed_var_imm_dec_eq: constrained Loop at depth 1 containing: %for.body
,%if.else,%for.inc +; CHECK: irce: in function test_inc_eq_var_nuw: constrained Loop at depth 1 containing: %for.body
,%if.else,%if.then,%for.inc +; CHECK-NOT: irce: in function test_inc_ne_var_nuw: constrained Loop at depth 1 containing: %for.body
,%if.else,%if.then,%for.inc +; CHECK: irce: in function test_inc_ult_var_nuw: constrained Loop at depth 1 containing: %for.body
,%if.else,%if.then,%for.inc +; CHECK: irce: in function test_dec_ult_ugt_var_imm_nuw: constrained Loop at depth 1 containing: %for.body
,%if.else,%if.then,%for.inc +; CHECK-NOT: irce: in function inc_subrange_var_var_ult_nuw: constrained Loop +; CHECK-NOT: irce: in function inc_subrange_var_var_ult: constrained Loop ; CHECK-LABEL: test_inc_eq( ; CHECK: main.exit.selector: @@ -352,3 +358,251 @@ %cmp = icmp eq i32 %dec, %M br i1 %cmp, label %for.cond.cleanup, label %for.body } + +; CHECK-LABEL: test_inc_eq_var_nuw( +; CHECK: for.body: +; CHECK: br i1 true, label %if.then, label %if.else + +; CHECK: main.exit.selector: +; CHECK: [[PSEUDO_PHI:%[^ ]+]] = phi i32 [ %inc, %for.inc ] +; CHECK: [[COND:%[^ ]+]] = icmp ult i32 [[PSEUDO_PHI]], %N +; CHECK: br i1 [[COND]], label %main.pseudo.exit, label %for.cond.cleanup.loopexit + +; CHECK: for.body.postloop: +; CHECK: [[POST_IV:%[^ ]+]] = phi i32 [ %inc.postloop, %for.inc.postloop ] +; CHECK: [[POST_COND:%[^ ]+]] = icmp ult i32 [[POST_IV]], %M +; CHECK: br i1 [[POST_COND]], label %if.then.postloop, label %if.else.postloop +define void @test_inc_eq_var_nuw(i32* %a, i32* %b, i32* %c, i32 %N, i32 %M) { +entry: + %cmp16 = icmp ugt i32 %N, 0 + br i1 %cmp16, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: + ret void + +for.body: + %i.017 = phi i32 [ %inc, %for.inc ], [ 0, %entry ] + %cmp1 = icmp ult i32 %i.017, %M + %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.017 + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %c, i32 %i.017 + %1 = load i32, i32* %arrayidx2, align 4 + br i1 %cmp1, label %if.then, label %if.else + +if.then: + %sub = sub i32 %0, %1 + %arrayidx3 = getelementptr inbounds i32, i32* %a, i32 %i.017 + %2 = load i32, i32* %arrayidx3, align 4 + %add = add nsw i32 %sub, %2 + store i32 %add, i32* %arrayidx3, align 4 + br label %for.inc + +if.else: + %add6 = add nsw i32 %1, %0 + %arrayidx7 = getelementptr inbounds i32, i32* %a, i32 %i.017 + store i32 %add6, i32* %arrayidx7, align 4 + br label %for.inc + +for.inc: + %inc = add nuw i32 %i.017, 1 + %exitcond = icmp eq i32 %inc, %N + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + +; CHECK-LABEL: test_inc_ne_var_nuw +; TODO Don't know why eq works but ne doesn't. +define void @test_inc_ne_var_nuw(i32* %a, i32* %b, i32* %c, i32 %N, i32 %M) { +entry: + %cmp16 = icmp ugt i32 %N, 0 + br i1 %cmp16, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: + ret void + +for.body: + %i.017 = phi i32 [ %inc, %for.inc ], [ 0, %entry ] + %cmp1 = icmp ult i32 %i.017, %M + %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.017 + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %c, i32 %i.017 + %1 = load i32, i32* %arrayidx2, align 4 + br i1 %cmp1, label %if.then, label %if.else + +if.then: + %sub = sub i32 %0, %1 + %arrayidx3 = getelementptr inbounds i32, i32* %a, i32 %i.017 + %2 = load i32, i32* %arrayidx3, align 4 + %add = add nsw i32 %sub, %2 + store i32 %add, i32* %arrayidx3, align 4 + br label %for.inc + +if.else: + %add6 = add nsw i32 %1, %0 + %arrayidx7 = getelementptr inbounds i32, i32* %a, i32 %i.017 + store i32 %add6, i32* %arrayidx7, align 4 + br label %for.inc + +for.inc: + %inc = add nuw i32 %i.017, 1 + %exitcond = icmp ne i32 %inc, %N + br i1 %exitcond, label %for.body, label %for.cond.cleanup +} + +; CHECK-LABEL: test_inc_ult_var_nuw +; CHECK: for.body: +; CHECK: br i1 true, label %if.then, label %if.else + +; CHECK: main.exit.selector: +; CHECK: [[PSEUDO_PHI:%[^ ]+]] = phi i32 [ %inc, %for.inc ] +; CHECK: [[COND:%[^ ]+]] = icmp ult i32 [[PSEUDO_PHI]], %N +; CHECK: br i1 [[COND]], label %main.pseudo.exit, label %for.cond.cleanup.loopexit + +; CHECK: for.body.postloop: +; CHECK: [[POST_IV:%[^ ]+]] = phi i32 [ %inc.postloop, %for.inc.postloop ] +; CHECK: [[POST_COND:%[^ ]+]] = icmp ult i32 [[POST_IV]], %M +; CHECK: br i1 [[POST_COND]], label %if.then.postloop, label %if.else.postloop +define void @test_inc_ult_var_nuw(i32* %a, i32* %b, i32* %c, i32 %N, i32 %M) { +entry: + %cmp16 = icmp ugt i32 %N, 0 + br i1 %cmp16, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: + ret void + +for.body: + %i.017 = phi i32 [ %inc, %for.inc ], [ 0, %entry ] + %cmp1 = icmp ult i32 %i.017, %M + %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.017 + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %c, i32 %i.017 + %1 = load i32, i32* %arrayidx2, align 4 + br i1 %cmp1, label %if.then, label %if.else + +if.then: + %sub = sub i32 %0, %1 + %arrayidx3 = getelementptr inbounds i32, i32* %a, i32 %i.017 + %2 = load i32, i32* %arrayidx3, align 4 + %add = add nsw i32 %sub, %2 + store i32 %add, i32* %arrayidx3, align 4 + br label %for.inc + +if.else: + %add6 = add nsw i32 %1, %0 + %arrayidx7 = getelementptr inbounds i32, i32* %a, i32 %i.017 + store i32 %add6, i32* %arrayidx7, align 4 + br label %for.inc + +for.inc: + %inc = add nuw i32 %i.017, 1 + %exitcond = icmp ult i32 %inc, %N + br i1 %exitcond, label %for.body, label %for.cond.cleanup +} + +; CHECK-LABEL: test_dec_ult_ugt_var_imm_nuw( +; CHECK: for.body: +; CHECK: [[IV:%[^ ]+]] = phi i32 [ %inc, %for.inc ], [ %i.017.preloop.copy, %mainloop ] +; CHECK: [[COND:%[^ ]+]] = icmp ult i32 [[IV]], %M +; CHECK: br i1 true, label %if.then, label %if.else + +; CHECK: for.body.preloop: +; CHECK: [[PRE_IV:%[^ ]+]] = phi i32 [ %inc.preloop, %for.inc.preloop ], [ 1024, %for.body.preloop.preheader ] +; CHECK: [[PRE_COND:%[^ ]+]] = icmp ult i32 [[PRE_IV]], %M +; CHECK: br i1 [[PRE_COND]], label %if.then.preloop, label %if.else.preloop +define void @test_dec_ult_ugt_var_imm_nuw(i32* %a, i32* %b, i32* %c, i32 %N, i32 %M) { +entry: + br label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.017 = phi i32 [ %inc, %for.inc ], [ 1024, %entry ] + %cmp1 = icmp ult i32 %i.017, %M + %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.017 + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %c, i32 %i.017 + %1 = load i32, i32* %arrayidx2, align 4 + br i1 %cmp1, label %if.then, label %if.else + +if.then: + %sub = sub i32 %0, %1 + %arrayidx3 = getelementptr inbounds i32, i32* %a, i32 %i.017 + %2 = load i32, i32* %arrayidx3, align 4 + %add = add nsw i32 %sub, %2 + store i32 %add, i32* %arrayidx3, align 4 + br label %for.inc + +if.else: + %add6 = add nsw i32 %1, %0 + %arrayidx7 = getelementptr inbounds i32, i32* %a, i32 %i.017 + store i32 %add6, i32* %arrayidx7, align 4 + br label %for.inc + +for.inc: + %inc = add nuw i32 %i.017, -1 + %exitcond = icmp ugt i32 %inc, 0 + br i1 %exitcond, label %for.body, label %for.cond.cleanup +} + +; TODO: IRCE should be able to understand that the (N-10):(N-1) is safe. +define void @inc_subrange_var_var_ult_nuw(i32* %a, i32* %b, i32 %N, i32 %M) { +entry: + %begin = add nuw i32 %N, -10 + %cmp0 = icmp ugt i32 %begin, 0 + br i1 %cmp0, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: + ret void + +for.body: + %iv = phi i32 [ %begin, %entry ], [ %inc, %for.inc ] + %cmp1 = icmp ult i32 %iv, %M + br i1 %cmp1, label %if.then, label %if.else + +if.then: + %arrayidxA = getelementptr inbounds i32, i32* %a, i32 %iv + store i32 %iv, i32* %arrayidxA, align 4 + br label %for.inc + +if.else: + %arrayidxB = getelementptr inbounds i32, i32* %b, i32 %iv + store i32 %iv, i32* %arrayidxB, align 4 + br label %for.inc + +for.inc: + %inc = add nuw i32 %iv, 1 + %cmp2 = icmp ult i32 %inc, %N + br i1 %cmp2, label %for.body, label %for.cond.cleanup +} + +; TODO: IRCE can determine that %iv is nuw, but it also needs to understand +; that the (N-10):(N-1) range is safe. +define void @inc_subrange_var_var_ult(i32* %a, i32* %b, i32 %N, i32 %M) { +entry: + %begin = add nuw i32 %N, -10 + %cmp0 = icmp ugt i32 %begin, 0 + br i1 %cmp0, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: + ret void + +for.body: + %iv = phi i32 [ %begin, %entry ], [ %inc, %for.inc ] + %cmp1 = icmp ult i32 %iv, %M + br i1 %cmp1, label %if.then, label %if.else + +if.then: + %arrayidxA = getelementptr inbounds i32, i32* %a, i32 %iv + store i32 %iv, i32* %arrayidxA, align 4 + br label %for.inc + +if.else: + %arrayidxB = getelementptr inbounds i32, i32* %b, i32 %iv + store i32 %iv, i32* %arrayidxB, align 4 + br label %for.inc + +for.inc: + %inc = add i32 %iv, 1 + %cmp2 = icmp ult i32 %inc, %N + br i1 %cmp2, label %for.body, label %for.cond.cleanup +}