diff --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h --- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h +++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h @@ -26,7 +26,8 @@ #include "llvm/Support/InstructionCost.h" namespace llvm { -extern cl::opt SCEVCheapExpansionBudget; +extern cl::opt SCEVCheapLoopInvariantExpansionBudget; +extern cl::opt SCEVCheapLoopExitValueExpansionBudget; /// struct for holding enough information to help calculate the cost of the /// given SCEV when expanded into IR. diff --git a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h --- a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h +++ b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h @@ -49,7 +49,8 @@ TTI::TargetCostKind CostKind, const llvm::Instruction *I = nullptr) { if (Opcode == Instruction::Select) - return SCEVCheapExpansionBudget.getValue(); + return std::max(SCEVCheapLoopInvariantExpansionBudget.getValue(), + SCEVCheapLoopExitValueExpansionBudget.getValue()); return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); @@ -59,14 +60,16 @@ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, - ArrayRef Args = ArrayRef(), - const Instruction *CxtI = nullptr) { - int ISD = TLI->InstructionOpcodeToISD(Opcode); - if (ISD == ISD::ADD && CostKind == TTI::TCK_RecipThroughput) - return SCEVCheapExpansionBudget.getValue() + 1; + ArrayRef Args = ArrayRef(), + const Instruction *CxtI = nullptr) { + int ISD = TLI->InstructionOpcodeToISD(Opcode); + if (ISD == ISD::ADD && CostKind == TTI::TCK_RecipThroughput) + return std::max(SCEVCheapLoopInvariantExpansionBudget.getValue(), + SCEVCheapLoopExitValueExpansionBudget.getValue()) + + 1; - return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, - Op2Info); + return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, + Op2Info); } TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -2014,7 +2014,8 @@ // Avoid high cost expansions. Note: This heuristic is questionable in // that our definition of "high cost" is not exactly principled. - if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget, + if (Rewriter.isHighCostExpansion(ExitCount, L, + SCEVCheapLoopInvariantExpansionBudget, TTI, PreHeader->getTerminator())) continue; diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp --- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -654,8 +654,9 @@ const DataLayout &DL = Header->getModule()->getDataLayout(); SCEVExpander Expander(*SE, DL, "loop-unroll"); if (!AllowExpensiveTripCount && - Expander.isHighCostExpansion(TripCountSC, L, SCEVCheapExpansionBudget, - TTI, PreHeaderBR)) { + Expander.isHighCostExpansion(TripCountSC, L, + SCEVCheapLoopInvariantExpansionBudget, TTI, + PreHeaderBR)) { LLVM_DEBUG(dbgs() << "High cost for expanding trip count scev!\n"); return false; } diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -1385,7 +1385,7 @@ // Check if expansions of this SCEV would count as being high cost. bool HighCost = Rewriter.isHighCostExpansion( - ExitValue, L, SCEVCheapExpansionBudget, TTI, Inst); + ExitValue, L, SCEVCheapLoopExitValueExpansionBudget, TTI, Inst); // Note that we must not perform expansions until after // we query *all* the costs, because if we perform temporary expansion diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -36,9 +36,16 @@ using namespace llvm; -cl::opt llvm::SCEVCheapExpansionBudget( - "scev-cheap-expansion-budget", cl::Hidden, cl::init(4), - cl::desc("When performing SCEV expansion only if it is cheap to do, this " +cl::opt llvm::SCEVCheapLoopInvariantExpansionBudget( + "scev-cheap-loop-trip-count-expansion-budget", cl::Hidden, cl::init(8), + cl::desc("When performing SCEV expansion of loop loop invariants " + "(including trip/exit counts) only if it is cheap to do, this " + "controls the budget that is considered cheap (default = 8)")); + +cl::opt llvm::SCEVCheapLoopExitValueExpansionBudget( + "scev-cheap-loop-exit-value-expansion-budget", cl::Hidden, cl::init(4), + cl::desc("When performing SCEV expansion of loop exit values only if it is " + "cheap to do, this " "controls the budget that is considered cheap (default = 4)")); using namespace PatternMatch; diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp --- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -221,8 +221,9 @@ // Do not generate something ridiculous. auto *PHTerm = Preheader->getTerminator(); - if (Rewriter.isHighCostExpansion({ InvariantLHS, InvariantRHS }, L, - 2 * SCEVCheapExpansionBudget, TTI, PHTerm)) + if (Rewriter.isHighCostExpansion({InvariantLHS, InvariantRHS}, L, + 2 * SCEVCheapLoopInvariantExpansionBudget, + TTI, PHTerm)) return false; auto *NewLHS = Rewriter.expandCodeFor(InvariantLHS, IVOperand->getType(), PHTerm); @@ -630,7 +631,8 @@ return false; // Do not generate something ridiculous even if S is loop invariant. - if (Rewriter.isHighCostExpansion(S, L, SCEVCheapExpansionBudget, TTI, I)) + if (Rewriter.isHighCostExpansion(S, L, SCEVCheapLoopInvariantExpansionBudget, + TTI, I)) return false; auto *IP = GetLoopInvariantInsertPosition(L, I); diff --git a/llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll b/llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll --- a/llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll +++ b/llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll @@ -407,16 +407,19 @@ define void @test_08(i32 %n) { ; CHECK-LABEL: @test_08( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N:%.*]] to i64 -; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[N]] to i64 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 1) +; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[SMAX]], -1 +; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1) +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1 +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = add nuw i32 [[UMIN]], 2 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP2]] to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i64 [[IV]], [[SEXT]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IV]], [[ZEXT]] -; CHECK-NEXT: [[CMP:%.*]] = and i1 [[TMP0]], [[TMP1]] -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/IndVarSimplify/X86/loop-invariant-conditions.ll b/llvm/test/Transforms/IndVarSimplify/X86/loop-invariant-conditions.ll --- a/llvm/test/Transforms/IndVarSimplify/X86/loop-invariant-conditions.ll +++ b/llvm/test/Transforms/IndVarSimplify/X86/loop-invariant-conditions.ll @@ -537,12 +537,14 @@ define void @test3_neg(i64 %start) { ; CHECK-LABEL: @test3_neg( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[START:%.*]], i64 -1) +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[SMAX]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i64 [[INDVARS_IV]], -1 -; CHECK-NEXT: br i1 [[CMP1]], label [[LOOP]], label [[FOR_END:%.*]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[START]], [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[TMP0]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -562,16 +564,18 @@ define void @test4_neg(i64 %start) { ; CHECK-LABEL: @test4_neg( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[START:%.*]], i64 0) +; CHECK-NEXT: [[TMP0:%.*]] = add nuw i64 [[SMAX]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[START]], [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 25 ; CHECK-NEXT: br i1 [[CMP]], label [[BACKEDGE]], label [[FOR_END:%.*]] ; CHECK: backedge: ; CHECK-NEXT: call void @foo() -; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i64 [[INDVARS_IV]], -1 -; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_END]], label [[LOOP]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[TMP0]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[LOOP]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/IndVarSimplify/X86/pr45360.ll b/llvm/test/Transforms/IndVarSimplify/X86/pr45360.ll --- a/llvm/test/Transforms/IndVarSimplify/X86/pr45360.ll +++ b/llvm/test/Transforms/IndVarSimplify/X86/pr45360.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=indvars -scev-cheap-expansion-budget=1024 %s | FileCheck %s +; RUN: opt -S -passes=indvars -scev-cheap-loop-trip-count-expansion-budget=1024 -scev-cheap-loop-exit-value-expansion-budget=1024 %s | FileCheck %s ; See https://bugs.llvm.org/show_bug.cgi?id=45360 ; This is reduced from that (runnable) test. diff --git a/llvm/test/Transforms/IndVarSimplify/post-inc-range.ll b/llvm/test/Transforms/IndVarSimplify/post-inc-range.ll --- a/llvm/test/Transforms/IndVarSimplify/post-inc-range.ll +++ b/llvm/test/Transforms/IndVarSimplify/post-inc-range.ll @@ -115,7 +115,8 @@ ; CHECK-LABEL: @test_range_metadata( ; CHECK-NEXT: for.body.lr.ph: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[LIMIT:%.*]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[START]], 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LIMIT:%.*]], i32 [[TMP1]]) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[TMP0]], [[FOR_BODY_LR_PH:%.*]] ] @@ -127,8 +128,9 @@ ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP1]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]] +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[SMAX]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END]] ; CHECK: for.end: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: @@ -222,9 +224,10 @@ ; CHECK-NEXT: for.body.lr.ph: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[LIMIT:%.*]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[LIMIT]] to i64 ; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[START]], i32 64) ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[UMAX]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[START]], 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LIMIT]], i32 [[TMP2]]) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[TMP0]], [[FOR_BODY_LR_PH:%.*]] ] @@ -242,8 +245,9 @@ ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP2]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]] +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND3:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[SMAX]] +; CHECK-NEXT: br i1 [[EXITCOND3]], label [[FOR_BODY]], label [[FOR_END]] ; CHECK: for.end: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: @@ -293,15 +297,17 @@ ; CHECK-LABEL: @test_guard_one_bb( ; CHECK-NEXT: for.body.lr.ph: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[LIMIT:%.*]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[START]], 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LIMIT:%.*]], i32 [[TMP1]]) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[TMP0]], [[FOR_BODY_LR_PH:%.*]] ] ; CHECK-NEXT: [[WITHIN_LIMITS:%.*]] = icmp ult i64 [[INDVARS_IV]], 64 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_LIMITS]]) [ "deopt"() ] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP1]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[SMAX]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: @@ -333,7 +339,8 @@ ; CHECK-LABEL: @test_guard_in_the_same_bb( ; CHECK-NEXT: for.body.lr.ph: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[LIMIT:%.*]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[START]], 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LIMIT:%.*]], i32 [[TMP1]]) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[TMP0]], [[FOR_BODY_LR_PH:%.*]] ] @@ -342,8 +349,9 @@ ; CHECK: for.inc: ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_LIMITS]]) [ "deopt"() ] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP1]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[SMAX]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: @@ -378,7 +386,8 @@ ; CHECK-LABEL: @test_guard_in_idom( ; CHECK-NEXT: for.body.lr.ph: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[LIMIT:%.*]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[START]], 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LIMIT:%.*]], i32 [[TMP1]]) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[TMP0]], [[FOR_BODY_LR_PH:%.*]] ] @@ -387,8 +396,9 @@ ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP1]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[SMAX]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: @@ -423,7 +433,8 @@ ; CHECK-LABEL: @test_guard_merge_ranges( ; CHECK-NEXT: for.body.lr.ph: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[LIMIT:%.*]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[START]], 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LIMIT:%.*]], i32 [[TMP1]]) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[TMP0]], [[FOR_BODY_LR_PH:%.*]] ] @@ -432,8 +443,9 @@ ; CHECK-NEXT: [[WITHIN_LIMITS_2:%.*]] = icmp ult i64 [[INDVARS_IV]], 2147483647 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_LIMITS_2]]) [ "deopt"() ] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP1]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[SMAX]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: diff --git a/llvm/test/Transforms/IndVarSimplify/pr45835.ll b/llvm/test/Transforms/IndVarSimplify/pr45835.ll --- a/llvm/test/Transforms/IndVarSimplify/pr45835.ll +++ b/llvm/test/Transforms/IndVarSimplify/pr45835.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -passes=indvars -replexitval=always -S | FileCheck %s --check-prefix=ALWAYS ; RUN: opt < %s -passes=indvars -replexitval=never -S | FileCheck %s --check-prefix=NEVER -; RUN: opt < %s -passes=indvars -replexitval=cheap -scev-cheap-expansion-budget=1 -S | FileCheck %s --check-prefix=CHEAP +; RUN: opt < %s -passes=indvars -replexitval=cheap -scev-cheap-loop-trip-count-expansion-budget=1 -scev-cheap-loop-exit-value-expansion-budget=1 -S | FileCheck %s --check-prefix=CHEAP ; rewriteLoopExitValues() must rewrite all or none of a PHI's values from a given block. diff --git a/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll b/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll --- a/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll +++ b/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=indvars -S %s -o - | FileCheck %s +; RUN: opt -passes=indvars -scev-cheap-loop-trip-count-expansion-budget=4 -scev-cheap-loop-exit-value-expansion-budget=4 -S %s -o - | FileCheck %s ; When bailing out in rewriteLoopExitValues() you would be left with a PHI node ; that was not deleted, and the IndVar pass would return an incorrect modified @@ -26,7 +26,7 @@ ; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], [[INNER]] ], [ [[N]], [[INNER_PREHEADER]] ] ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; CHECK-NEXT: [[J_NEXT]] = add nsw i64 [[J]], 1 -; CHECK-NEXT: store i64 undef, i64* @ptr, align 8 +; CHECK-NEXT: store i64 undef, ptr @ptr, align 8 ; CHECK-NEXT: [[COND1:%.*]] = icmp slt i64 [[J]], [[IDX]] ; CHECK-NEXT: br i1 [[COND1]], label [[INNER]], label [[INNER_EXIT:%.*]] ; CHECK: inner_exit: diff --git a/llvm/test/Transforms/LoopUnroll/X86/high-cost-expansion.ll b/llvm/test/Transforms/LoopUnroll/X86/high-cost-expansion.ll --- a/llvm/test/Transforms/LoopUnroll/X86/high-cost-expansion.ll +++ b/llvm/test/Transforms/LoopUnroll/X86/high-cost-expansion.ll @@ -11,12 +11,46 @@ ; CHECK-NEXT: [[I5:%.*]] = icmp sgt i64 [[I4]], [[I2]] ; CHECK-NEXT: br i1 [[I5]], label [[BB10:%.*]], label [[BB6_PREHEADER:%.*]] ; CHECK: bb6.preheader: +; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[I4]], i64 [[I2]]) +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[SMAX]], [[I3]] +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[SMAX]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], [[I3]] +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP0]], 7 +; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[BB6_PROL_PREHEADER:%.*]], label [[BB6_PROL_LOOPEXIT:%.*]] +; CHECK: bb6.prol.preheader: +; CHECK-NEXT: br label [[BB6_PROL:%.*]] +; CHECK: bb6.prol: +; CHECK-NEXT: [[I7_PROL:%.*]] = phi i64 [ [[I8_PROL:%.*]], [[BB6_PROL]] ], [ [[I4]], [[BB6_PROL_PREHEADER]] ] +; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ 0, [[BB6_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], [[BB6_PROL]] ] +; CHECK-NEXT: [[I8_PROL]] = add i64 [[I7_PROL]], 1 +; CHECK-NEXT: [[I9_PROL:%.*]] = icmp slt i64 [[I7_PROL]], [[I2]] +; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1 +; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]] +; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[BB6_PROL]], label [[BB6_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: bb6.prol.loopexit.unr-lcssa: +; CHECK-NEXT: [[I7_UNR_PH:%.*]] = phi i64 [ [[I8_PROL]], [[BB6_PROL]] ] +; CHECK-NEXT: br label [[BB6_PROL_LOOPEXIT]] +; CHECK: bb6.prol.loopexit: +; CHECK-NEXT: [[I7_UNR:%.*]] = phi i64 [ [[I4]], [[BB6_PREHEADER]] ], [ [[I7_UNR_PH]], [[BB6_PROL_LOOPEXIT_UNR_LCSSA]] ] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], 7 +; CHECK-NEXT: br i1 [[TMP3]], label [[BB10_LOOPEXIT:%.*]], label [[BB6_PREHEADER_NEW:%.*]] +; CHECK: bb6.preheader.new: ; CHECK-NEXT: br label [[BB6:%.*]] ; CHECK: bb6: -; CHECK-NEXT: [[I7:%.*]] = phi i64 [ [[I8:%.*]], [[BB6]] ], [ [[I4]], [[BB6_PREHEADER]] ] -; CHECK-NEXT: [[I8]] = add i64 [[I7]], 1 -; CHECK-NEXT: [[I9:%.*]] = icmp slt i64 [[I7]], [[I2]] -; CHECK-NEXT: br i1 [[I9]], label [[BB6]], label [[BB10_LOOPEXIT:%.*]] +; CHECK-NEXT: [[I7:%.*]] = phi i64 [ [[I7_UNR]], [[BB6_PREHEADER_NEW]] ], [ [[I8_7:%.*]], [[BB6]] ] +; CHECK-NEXT: [[I8:%.*]] = add i64 [[I7]], 1 +; CHECK-NEXT: [[I8_1:%.*]] = add i64 [[I8]], 1 +; CHECK-NEXT: [[I8_2:%.*]] = add i64 [[I8_1]], 1 +; CHECK-NEXT: [[I8_3:%.*]] = add i64 [[I8_2]], 1 +; CHECK-NEXT: [[I8_4:%.*]] = add i64 [[I8_3]], 1 +; CHECK-NEXT: [[I8_5:%.*]] = add i64 [[I8_4]], 1 +; CHECK-NEXT: [[I8_6:%.*]] = add i64 [[I8_5]], 1 +; CHECK-NEXT: [[I8_7]] = add i64 [[I8_6]], 1 +; CHECK-NEXT: [[I9_7:%.*]] = icmp slt i64 [[I8_6]], [[I2]] +; CHECK-NEXT: br i1 [[I9_7]], label [[BB6]], label [[BB10_LOOPEXIT_UNR_LCSSA:%.*]] +; CHECK: bb10.loopexit.unr-lcssa: +; CHECK-NEXT: br label [[BB10_LOOPEXIT]] ; CHECK: bb10.loopexit: ; CHECK-NEXT: br label [[BB10]] ; CHECK: bb10: @@ -51,12 +85,46 @@ ; CHECK-NEXT: [[I5:%.*]] = icmp sgt i64 [[I4]], [[I2]] ; CHECK-NEXT: br i1 [[I5]], label [[BB10:%.*]], label [[BB6_PREHEADER:%.*]] ; CHECK: bb6.preheader: +; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[I4]], i64 [[I2]]) +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[SMAX]], [[I3]] +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[SMAX]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], [[I3]] +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP0]], 7 +; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[BB6_PROL_PREHEADER:%.*]], label [[BB6_PROL_LOOPEXIT:%.*]] +; CHECK: bb6.prol.preheader: +; CHECK-NEXT: br label [[BB6_PROL:%.*]] +; CHECK: bb6.prol: +; CHECK-NEXT: [[I7_PROL:%.*]] = phi i64 [ [[I8_PROL:%.*]], [[BB6_PROL]] ], [ [[I4]], [[BB6_PROL_PREHEADER]] ] +; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ 0, [[BB6_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], [[BB6_PROL]] ] +; CHECK-NEXT: [[I8_PROL]] = add i64 [[I7_PROL]], 1 +; CHECK-NEXT: [[I9_PROL:%.*]] = icmp slt i64 [[I7_PROL]], [[I2]] +; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1 +; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]] +; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[BB6_PROL]], label [[BB6_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK: bb6.prol.loopexit.unr-lcssa: +; CHECK-NEXT: [[I7_UNR_PH:%.*]] = phi i64 [ [[I8_PROL]], [[BB6_PROL]] ] +; CHECK-NEXT: br label [[BB6_PROL_LOOPEXIT]] +; CHECK: bb6.prol.loopexit: +; CHECK-NEXT: [[I7_UNR:%.*]] = phi i64 [ [[I4]], [[BB6_PREHEADER]] ], [ [[I7_UNR_PH]], [[BB6_PROL_LOOPEXIT_UNR_LCSSA]] ] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], 7 +; CHECK-NEXT: br i1 [[TMP3]], label [[BB10_LOOPEXIT:%.*]], label [[BB6_PREHEADER_NEW:%.*]] +; CHECK: bb6.preheader.new: ; CHECK-NEXT: br label [[BB6:%.*]] ; CHECK: bb6: -; CHECK-NEXT: [[I7:%.*]] = phi i64 [ [[I8:%.*]], [[BB6]] ], [ [[I4]], [[BB6_PREHEADER]] ] -; CHECK-NEXT: [[I8]] = add i64 [[I7]], 1 -; CHECK-NEXT: [[I9:%.*]] = icmp slt i64 [[I7]], [[I2]] -; CHECK-NEXT: br i1 [[I9]], label [[BB6]], label [[BB10_LOOPEXIT:%.*]] +; CHECK-NEXT: [[I7:%.*]] = phi i64 [ [[I7_UNR]], [[BB6_PREHEADER_NEW]] ], [ [[I8_7:%.*]], [[BB6]] ] +; CHECK-NEXT: [[I8:%.*]] = add nuw nsw i64 [[I7]], 1 +; CHECK-NEXT: [[I8_1:%.*]] = add nuw nsw i64 [[I8]], 1 +; CHECK-NEXT: [[I8_2:%.*]] = add nuw nsw i64 [[I8_1]], 1 +; CHECK-NEXT: [[I8_3:%.*]] = add nuw nsw i64 [[I8_2]], 1 +; CHECK-NEXT: [[I8_4:%.*]] = add nuw nsw i64 [[I8_3]], 1 +; CHECK-NEXT: [[I8_5:%.*]] = add nuw nsw i64 [[I8_4]], 1 +; CHECK-NEXT: [[I8_6:%.*]] = add nuw nsw i64 [[I8_5]], 1 +; CHECK-NEXT: [[I8_7]] = add nuw nsw i64 [[I8_6]], 1 +; CHECK-NEXT: [[I9_7:%.*]] = icmp slt i64 [[I8_6]], [[I2]] +; CHECK-NEXT: br i1 [[I9_7]], label [[BB6]], label [[BB10_LOOPEXIT_UNR_LCSSA:%.*]] +; CHECK: bb10.loopexit.unr-lcssa: +; CHECK-NEXT: br label [[BB10_LOOPEXIT]] ; CHECK: bb10.loopexit: ; CHECK-NEXT: br label [[BB10]] ; CHECK: bb10: