diff --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h --- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h +++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h @@ -26,7 +26,8 @@ #include "llvm/Support/InstructionCost.h" namespace llvm { -extern cl::opt SCEVCheapExpansionBudget; +extern cl::opt SCEVCheapLoopInvariantExpansionBudget; +extern cl::opt SCEVCheapLoopExitValueExpansionBudget; /// struct for holding enough information to help calculate the cost of the /// given SCEV when expanded into IR. diff --git a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h --- a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h +++ b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h @@ -49,7 +49,8 @@ TTI::TargetCostKind CostKind, const llvm::Instruction *I = nullptr) { if (Opcode == Instruction::Select) - return SCEVCheapExpansionBudget.getValue(); + return std::max(SCEVCheapLoopInvariantExpansionBudget.getValue(), + SCEVCheapLoopExitValueExpansionBudget.getValue()); return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); @@ -59,14 +60,16 @@ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, - ArrayRef Args = ArrayRef(), - const Instruction *CxtI = nullptr) { - int ISD = TLI->InstructionOpcodeToISD(Opcode); - if (ISD == ISD::ADD && CostKind == TTI::TCK_RecipThroughput) - return SCEVCheapExpansionBudget.getValue() + 1; + ArrayRef Args = ArrayRef(), + const Instruction *CxtI = nullptr) { + int ISD = TLI->InstructionOpcodeToISD(Opcode); + if (ISD == ISD::ADD && CostKind == TTI::TCK_RecipThroughput) + return std::max(SCEVCheapLoopInvariantExpansionBudget.getValue(), + SCEVCheapLoopExitValueExpansionBudget.getValue()) + + 1; - return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, - Op2Info); + return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, + Op2Info); } TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -2014,7 +2014,8 @@ // Avoid high cost expansions. Note: This heuristic is questionable in // that our definition of "high cost" is not exactly principled. - if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget, + if (Rewriter.isHighCostExpansion(ExitCount, L, + SCEVCheapLoopInvariantExpansionBudget, TTI, PreHeader->getTerminator())) continue; diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp --- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -654,8 +654,9 @@ const DataLayout &DL = Header->getModule()->getDataLayout(); SCEVExpander Expander(*SE, DL, "loop-unroll"); if (!AllowExpensiveTripCount && - Expander.isHighCostExpansion(TripCountSC, L, SCEVCheapExpansionBudget, - TTI, PreHeaderBR)) { + Expander.isHighCostExpansion(TripCountSC, L, + SCEVCheapLoopInvariantExpansionBudget, TTI, + PreHeaderBR)) { LLVM_DEBUG(dbgs() << "High cost for expanding trip count scev!\n"); return false; } diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -1385,7 +1385,7 @@ // Check if expansions of this SCEV would count as being high cost. bool HighCost = Rewriter.isHighCostExpansion( - ExitValue, L, SCEVCheapExpansionBudget, TTI, Inst); + ExitValue, L, SCEVCheapLoopExitValueExpansionBudget, TTI, Inst); // Note that we must not perform expansions until after // we query *all* the costs, because if we perform temporary expansion diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -36,9 +36,16 @@ using namespace llvm; -cl::opt llvm::SCEVCheapExpansionBudget( - "scev-cheap-expansion-budget", cl::Hidden, cl::init(4), - cl::desc("When performing SCEV expansion only if it is cheap to do, this " +cl::opt llvm::SCEVCheapLoopInvariantExpansionBudget( + "scev-cheap-loop-trip-count-expansion-budget", cl::Hidden, cl::init(8), + cl::desc("When performing SCEV expansion of loop loop invariants " + "(including trip/exit counts) only if it is cheap to do, this " + "controls the budget that is considered cheap (default = 8)")); + +cl::opt llvm::SCEVCheapLoopExitValueExpansionBudget( + "scev-cheap-loop-exit-value-expansion-budget", cl::Hidden, cl::init(4), + cl::desc("When performing SCEV expansion of loop exit values only if it is " + "cheap to do, this " "controls the budget that is considered cheap (default = 4)")); using namespace PatternMatch; diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp --- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -221,8 +221,9 @@ // Do not generate something ridiculous. auto *PHTerm = Preheader->getTerminator(); - if (Rewriter.isHighCostExpansion({ InvariantLHS, InvariantRHS }, L, - 2 * SCEVCheapExpansionBudget, TTI, PHTerm)) + if (Rewriter.isHighCostExpansion({InvariantLHS, InvariantRHS}, L, + 2 * SCEVCheapLoopInvariantExpansionBudget, + TTI, PHTerm)) return false; auto *NewLHS = Rewriter.expandCodeFor(InvariantLHS, IVOperand->getType(), PHTerm); @@ -630,7 +631,8 @@ return false; // Do not generate something ridiculous even if S is loop invariant. - if (Rewriter.isHighCostExpansion(S, L, SCEVCheapExpansionBudget, TTI, I)) + if (Rewriter.isHighCostExpansion(S, L, SCEVCheapLoopInvariantExpansionBudget, + TTI, I)) return false; auto *IP = GetLoopInvariantInsertPosition(L, I); diff --git a/llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll b/llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll --- a/llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll +++ b/llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll @@ -7,7 +7,7 @@ ; General case: without extra knowledge, trunc cannot be eliminated. define void @test_00(i64 %start, i32 %n) { ; -; CHECK-LABEL: @test_00( +; CHECK-LABEL: define {{[^@]+}}@test_00( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -34,7 +34,7 @@ define void @test_01(i32 %n) { ; -; CHECK-LABEL: @test_01( +; CHECK-LABEL: define {{[^@]+}}@test_01( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) ; CHECK-NEXT: [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1 @@ -63,7 +63,7 @@ ; Max value at which we can eliminate trunc: SINT_MAX - 1. define void @test_02(i32 %n) { ; -; CHECK-LABEL: @test_02( +; CHECK-LABEL: define {{[^@]+}}@test_02( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 2147483646) ; CHECK-NEXT: [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1 @@ -92,7 +92,7 @@ ; If we start from SINT_MAX then the predicate is always false. define void @test_03(i32 %n) { ; -; CHECK-LABEL: @test_03( +; CHECK-LABEL: define {{[^@]+}}@test_03( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -115,7 +115,7 @@ ; Minimum value at which we can apply the transform: SINT_MIN + 1. define void @test_04(i32 %n) { ; -; CHECK-LABEL: @test_04( +; CHECK-LABEL: define {{[^@]+}}@test_04( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 -2147483647) ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SMAX]], 1 @@ -144,7 +144,7 @@ ; FIXME: Harmful LFTR should be thrown away. define void @test_05(i32 %n) { ; -; CHECK-LABEL: @test_05( +; CHECK-LABEL: define {{[^@]+}}@test_05( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -172,7 +172,7 @@ ; Trunc changes the actual value of the IV, so it is invalid to remove it: SINT_MIN - 1. define void @test_06(i32 %n) { ; -; CHECK-LABEL: @test_06( +; CHECK-LABEL: define {{[^@]+}}@test_06( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -194,7 +194,7 @@ ; General case: without extra knowledge, trunc cannot be eliminated. define void @test_00_unsigned(i64 %start, i32 %n) { -; CHECK-LABEL: @test_00_unsigned( +; CHECK-LABEL: define {{[^@]+}}@test_00_unsigned( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -220,7 +220,7 @@ ; FIXME: Harmful LFTR should be thrown away. define void @test_01_unsigned(i32 %n) { -; CHECK-LABEL: @test_01_unsigned( +; CHECK-LABEL: define {{[^@]+}}@test_01_unsigned( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -247,7 +247,7 @@ ; Max value at which we can eliminate trunc: UINT_MAX - 1. define void @test_02_unsigned(i32 %n) { -; CHECK-LABEL: @test_02_unsigned( +; CHECK-LABEL: define {{[^@]+}}@test_02_unsigned( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 -2) ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[UMAX]], 1 @@ -275,7 +275,7 @@ ; If we start from UINT_MAX then the predicate is always false. define void @test_03_unsigned(i32 %n) { -; CHECK-LABEL: @test_03_unsigned( +; CHECK-LABEL: define {{[^@]+}}@test_03_unsigned( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -297,7 +297,7 @@ ; Minimum value at which we can apply the transform: UINT_MIN. define void @test_04_unsigned(i32 %n) { -; CHECK-LABEL: @test_04_unsigned( +; CHECK-LABEL: define {{[^@]+}}@test_04_unsigned( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -324,7 +324,7 @@ ; Start from 1. define void @test_05_unsigned(i32 %n) { -; CHECK-LABEL: @test_05_unsigned( +; CHECK-LABEL: define {{[^@]+}}@test_05_unsigned( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], 1 @@ -352,7 +352,7 @@ ; Trunc changes the actual value of the IV, so it is invalid to remove it: UINT_MIN - 1. define void @test_06_unsigned(i32 %n) { -; CHECK-LABEL: @test_06_unsigned( +; CHECK-LABEL: define {{[^@]+}}@test_06_unsigned( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -374,7 +374,7 @@ ; Do not eliminate trunc if it is used by something different from icmp. define void @test_07(i32* %p, i32 %n) { -; CHECK-LABEL: @test_07( +; CHECK-LABEL: define {{[^@]+}}@test_07( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) ; CHECK-NEXT: [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1 @@ -405,18 +405,21 @@ ; Check that we can eliminate both signed and unsigned compare. define void @test_08(i32 %n) { -; CHECK-LABEL: @test_08( +; CHECK-LABEL: define {{[^@]+}}@test_08( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N:%.*]] to i64 -; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[N]] to i64 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 1) +; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[SMAX]], -1 +; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1) +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1 +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = add nuw i32 [[UMIN]], 2 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP2]] to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i64 [[IV]], [[SEXT]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[IV]], [[ZEXT]] -; CHECK-NEXT: [[CMP:%.*]] = and i1 [[TMP0]], [[TMP1]] -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -436,7 +439,7 @@ ; Widen NE as unsigned. define void @test_09(i32 %n) { -; CHECK-LABEL: @test_09( +; CHECK-LABEL: define {{[^@]+}}@test_09( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N:%.*]] to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -462,7 +465,7 @@ ; Widen NE as signed. define void @test_10(i32 %n) { -; CHECK-LABEL: @test_10( +; CHECK-LABEL: define {{[^@]+}}@test_10( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], 100 ; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 @@ -492,7 +495,7 @@ } define void @test_11() { -; CHECK-LABEL: @test_11( +; CHECK-LABEL: define {{[^@]+}}@test_11( ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: ; CHECK-NEXT: br i1 undef, label [[BB2:%.*]], label [[BB6:%.*]] @@ -541,7 +544,7 @@ ; Show that we can turn signed comparison to unsigned and use zext while ; comparing non-negative values. define void @test_12(i32* %p) { -; CHECK-LABEL: @test_12( +; CHECK-LABEL: define {{[^@]+}}@test_12( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[N:%.*]] = load i32, i32* [[P:%.*]], align 4, !range [[RNG0:![0-9]+]] ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N]], i32 1) @@ -570,7 +573,7 @@ define void @test_13a(i32 %n) { ; -; CHECK-LABEL: @test_13a( +; CHECK-LABEL: define {{[^@]+}}@test_13a( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 1024 to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -596,7 +599,7 @@ define void @test_13b(i32 %n) { ; -; CHECK-LABEL: @test_13b( +; CHECK-LABEL: define {{[^@]+}}@test_13b( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 1024 to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -622,7 +625,7 @@ define void @test_13c(i32 %n) { ; -; CHECK-LABEL: @test_13c( +; CHECK-LABEL: define {{[^@]+}}@test_13c( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 1024 to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -648,7 +651,7 @@ define void @test_13d(i32 %n) { ; -; CHECK-LABEL: @test_13d( +; CHECK-LABEL: define {{[^@]+}}@test_13d( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SEXT:%.*]] = sext i32 1024 to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] diff --git a/llvm/test/Transforms/IndVarSimplify/X86/loop-invariant-conditions.ll b/llvm/test/Transforms/IndVarSimplify/X86/loop-invariant-conditions.ll --- a/llvm/test/Transforms/IndVarSimplify/X86/loop-invariant-conditions.ll +++ b/llvm/test/Transforms/IndVarSimplify/X86/loop-invariant-conditions.ll @@ -4,7 +4,7 @@ target triple = "x86_64-unknown-linux-gnu" define void @test1(i64 %start) { -; CHECK-LABEL: @test1( +; CHECK-LABEL: define {{[^@]+}}@test1( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -27,7 +27,7 @@ } define void @test1.next(i64 %start) { -; CHECK-LABEL: @test1.next( +; CHECK-LABEL: define {{[^@]+}}@test1.next( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[START:%.*]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -51,7 +51,7 @@ } define void @test2(i64 %start) { -; CHECK-LABEL: @test2( +; CHECK-LABEL: define {{[^@]+}}@test2( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -74,7 +74,7 @@ } define void @test2.next(i64 %start) { -; CHECK-LABEL: @test2.next( +; CHECK-LABEL: define {{[^@]+}}@test2.next( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[START:%.*]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -99,7 +99,7 @@ ; As long as the test dominates the backedge, we're good define void @test3(i64 %start) { -; CHECK-LABEL: @test3( +; CHECK-LABEL: define {{[^@]+}}@test3( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -134,7 +134,7 @@ } define void @test3.next(i64 %start) { -; CHECK-LABEL: @test3.next( +; CHECK-LABEL: define {{[^@]+}}@test3.next( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[START:%.*]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -171,7 +171,7 @@ define void @test4(i64 %start) { -; CHECK-LABEL: @test4( +; CHECK-LABEL: define {{[^@]+}}@test4( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -206,7 +206,7 @@ } define void @test4.next(i64 %start) { -; CHECK-LABEL: @test4.next( +; CHECK-LABEL: define {{[^@]+}}@test4.next( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[START:%.*]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -242,7 +242,7 @@ } define void @test5(i64 %start) { -; CHECK-LABEL: @test5( +; CHECK-LABEL: define {{[^@]+}}@test5( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -277,7 +277,7 @@ } define void @test5.next(i64 %start) { -; CHECK-LABEL: @test5.next( +; CHECK-LABEL: define {{[^@]+}}@test5.next( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = add nuw i64 [[START:%.*]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -314,7 +314,7 @@ define void @test6(i64 %start) { -; CHECK-LABEL: @test6( +; CHECK-LABEL: define {{[^@]+}}@test6( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -349,7 +349,7 @@ } define void @test6.next(i64 %start) { -; CHECK-LABEL: @test6.next( +; CHECK-LABEL: define {{[^@]+}}@test6.next( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = add nuw i64 [[START:%.*]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -385,7 +385,7 @@ } define void @test7(i64 %start, i64* %inc_ptr) { -; CHECK-LABEL: @test7( +; CHECK-LABEL: define {{[^@]+}}@test7( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], align 8, !range [[RNG0:![0-9]+]] ; CHECK-NEXT: [[OK:%.*]] = icmp sge i64 [[INC]], 0 @@ -416,7 +416,7 @@ } define void @test7.next(i64 %start, i64* %inc_ptr) { -; CHECK-LABEL: @test7.next( +; CHECK-LABEL: define {{[^@]+}}@test7.next( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], align 8, !range [[RNG0]] ; CHECK-NEXT: [[OK:%.*]] = icmp sge i64 [[INC]], 0 @@ -450,7 +450,7 @@ ; Negative test - we can't show that the internal branch executes, so we can't ; fold the test to a loop invariant one. define void @test1_neg(i64 %start) { -; CHECK-LABEL: @test1_neg( +; CHECK-LABEL: define {{[^@]+}}@test1_neg( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -493,7 +493,7 @@ ; Slightly subtle version of @test4 where the icmp dominates the backedge, ; but the exit branch doesn't. define void @test2_neg(i64 %start) { -; CHECK-LABEL: @test2_neg( +; CHECK-LABEL: define {{[^@]+}}@test2_neg( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -535,14 +535,16 @@ ; The branch has to exit the loop if the condition is true define void @test3_neg(i64 %start) { -; CHECK-LABEL: @test3_neg( +; CHECK-LABEL: define {{[^@]+}}@test3_neg( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[START:%.*]], i64 -1) +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[SMAX]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i64 [[INDVARS_IV]], -1 -; CHECK-NEXT: br i1 [[CMP1]], label [[LOOP]], label [[FOR_END:%.*]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[START]], [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[TMP0]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -560,18 +562,20 @@ } define void @test4_neg(i64 %start) { -; CHECK-LABEL: @test4_neg( +; CHECK-LABEL: define {{[^@]+}}@test4_neg( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[START:%.*]], i64 0) +; CHECK-NEXT: [[TMP0:%.*]] = add nuw i64 [[SMAX]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[START]], [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 25 ; CHECK-NEXT: br i1 [[CMP]], label [[BACKEDGE]], label [[FOR_END:%.*]] ; CHECK: backedge: ; CHECK-NEXT: call void @foo() -; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i64 [[INDVARS_IV]], -1 -; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_END]], label [[LOOP]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[TMP0]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[LOOP]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -598,7 +602,7 @@ } define void @test5_neg(i64 %start, i64 %inc) { -; CHECK-LABEL: @test5_neg( +; CHECK-LABEL: define {{[^@]+}}@test5_neg( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -623,7 +627,7 @@ } define void @test8(i64 %start, i64* %inc_ptr) { -; CHECK-LABEL: @test8( +; CHECK-LABEL: define {{[^@]+}}@test8( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], align 8, !range [[RNG1:![0-9]+]] ; CHECK-NEXT: [[OK:%.*]] = icmp sge i64 [[INC]], 0 @@ -658,7 +662,7 @@ ; check to handle loops without preheaders, but invariant operands ; (we handle this today by inserting a preheader) define void @test9(i1 %cnd, i64 %start) { -; CHECK-LABEL: @test9( +; CHECK-LABEL: define {{[^@]+}}@test9( ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[CND:%.*]], label [[ENTRY1:%.*]], label [[ENTRY2:%.*]] ; CHECK: entry1: @@ -696,7 +700,7 @@ ; we have a "loop" which is known to run exactly one iteration but ; haven't yet simplified the uses of the IV define void @test10() { -; CHECK-LABEL: @test10( +; CHECK-LABEL: define {{[^@]+}}@test10( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -740,7 +744,7 @@ ; check that we can figure out that iv.next > 1 from the facts that iv >= 0 and ; iv.start != 0. define void @test11(i64* %inc_ptr) { -; CHECK-LABEL: @test11( +; CHECK-LABEL: define {{[^@]+}}@test11( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], align 8, !range [[RNG0]] ; CHECK-NEXT: [[NE_COND:%.*]] = icmp ne i64 [[INC]], 0 @@ -791,7 +795,7 @@ ; check that we can prove that a recurrency is greater than another recurrency ; in the same loop, with the same step, and with smaller starting value. define void @test12(i64* %inc_ptr) { -; CHECK-LABEL: @test12( +; CHECK-LABEL: define {{[^@]+}}@test12( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], align 8, !range [[RNG0]] ; CHECK-NEXT: br label [[LOOP:%.*]] diff --git a/llvm/test/Transforms/IndVarSimplify/X86/pr45360.ll b/llvm/test/Transforms/IndVarSimplify/X86/pr45360.ll --- a/llvm/test/Transforms/IndVarSimplify/X86/pr45360.ll +++ b/llvm/test/Transforms/IndVarSimplify/X86/pr45360.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=indvars -scev-cheap-expansion-budget=1024 %s | FileCheck %s +; RUN: opt -S -passes=indvars -scev-cheap-loop-trip-count-expansion-budget=1024 -scev-cheap-loop-exit-value-expansion-budget=1024 %s | FileCheck %s ; See https://bugs.llvm.org/show_bug.cgi?id=45360 ; This is reduced from that (runnable) test. @@ -17,7 +17,7 @@ @e = dso_local global i32 0, align 4 define i32 @main() { -; CHECK-LABEL: @main( +; CHECK-LABEL: define {{[^@]+}}@main( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[I6:%.*]] = load i32, i32* @a, align 4 ; CHECK-NEXT: [[I24:%.*]] = load i32, i32* @b, align 4 diff --git a/llvm/test/Transforms/IndVarSimplify/post-inc-range.ll b/llvm/test/Transforms/IndVarSimplify/post-inc-range.ll --- a/llvm/test/Transforms/IndVarSimplify/post-inc-range.ll +++ b/llvm/test/Transforms/IndVarSimplify/post-inc-range.ll @@ -8,7 +8,7 @@ ; In order to do this indvars need to prove that the narrow IV def (%i.inc) ; is not-negative from the range check inside of the loop. define void @test(i32* %base, i32 %limit, i32 %start) { -; CHECK-LABEL: @test( +; CHECK-LABEL: define {{[^@]+}}@test( ; CHECK-NEXT: for.body.lr.ph: ; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[START:%.*]], i32 64) ; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[UMAX]], [[START]] @@ -60,7 +60,7 @@ } define void @test_false_edge(i32* %base, i32 %limit, i32 %start) { -; CHECK-LABEL: @test_false_edge( +; CHECK-LABEL: define {{[^@]+}}@test_false_edge( ; CHECK-NEXT: for.body.lr.ph: ; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[START:%.*]], i32 65) ; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[UMAX]], [[START]] @@ -112,10 +112,11 @@ } define void @test_range_metadata(i32* %array_length_ptr, i32* %base, -; CHECK-LABEL: @test_range_metadata( +; CHECK-LABEL: define {{[^@]+}}@test_range_metadata( ; CHECK-NEXT: for.body.lr.ph: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[LIMIT:%.*]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[START]], 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LIMIT:%.*]], i32 [[TMP1]]) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[TMP0]], [[FOR_BODY_LR_PH:%.*]] ] @@ -127,8 +128,9 @@ ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP1]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]] +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[SMAX]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END]] ; CHECK: for.end: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: @@ -166,7 +168,7 @@ ; Negative version of the test above, we don't know anything about ; array_length_ptr range. define void @test_neg(i32* %array_length_ptr, i32* %base, -; CHECK-LABEL: @test_neg( +; CHECK-LABEL: define {{[^@]+}}@test_neg( ; CHECK-NEXT: for.body.lr.ph: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -218,13 +220,14 @@ } define void @test_transitive_use(i32* %base, i32 %limit, i32 %start) { -; CHECK-LABEL: @test_transitive_use( +; CHECK-LABEL: define {{[^@]+}}@test_transitive_use( ; CHECK-NEXT: for.body.lr.ph: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[LIMIT:%.*]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[LIMIT]] to i64 ; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[START]], i32 64) ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[UMAX]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[START]], 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LIMIT]], i32 [[TMP2]]) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[TMP0]], [[FOR_BODY_LR_PH:%.*]] ] @@ -242,8 +245,9 @@ ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP2]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]] +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND3:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[SMAX]] +; CHECK-NEXT: br i1 [[EXITCOND3]], label [[FOR_BODY]], label [[FOR_END]] ; CHECK: for.end: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: @@ -290,18 +294,20 @@ declare void @llvm.experimental.guard(i1, ...) define void @test_guard_one_bb(i32* %base, i32 %limit, i32 %start) { -; CHECK-LABEL: @test_guard_one_bb( +; CHECK-LABEL: define {{[^@]+}}@test_guard_one_bb( ; CHECK-NEXT: for.body.lr.ph: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[LIMIT:%.*]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[START]], 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LIMIT:%.*]], i32 [[TMP1]]) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[TMP0]], [[FOR_BODY_LR_PH:%.*]] ] ; CHECK-NEXT: [[WITHIN_LIMITS:%.*]] = icmp ult i64 [[INDVARS_IV]], 64 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_LIMITS]]) [ "deopt"() ] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP1]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[SMAX]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: @@ -330,10 +336,11 @@ } define void @test_guard_in_the_same_bb(i32* %base, i32 %limit, i32 %start) { -; CHECK-LABEL: @test_guard_in_the_same_bb( +; CHECK-LABEL: define {{[^@]+}}@test_guard_in_the_same_bb( ; CHECK-NEXT: for.body.lr.ph: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[LIMIT:%.*]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[START]], 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LIMIT:%.*]], i32 [[TMP1]]) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[TMP0]], [[FOR_BODY_LR_PH:%.*]] ] @@ -342,8 +349,9 @@ ; CHECK: for.inc: ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_LIMITS]]) [ "deopt"() ] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP1]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[SMAX]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: @@ -375,10 +383,11 @@ } define void @test_guard_in_idom(i32* %base, i32 %limit, i32 %start) { -; CHECK-LABEL: @test_guard_in_idom( +; CHECK-LABEL: define {{[^@]+}}@test_guard_in_idom( ; CHECK-NEXT: for.body.lr.ph: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[LIMIT:%.*]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[START]], 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LIMIT:%.*]], i32 [[TMP1]]) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[TMP0]], [[FOR_BODY_LR_PH:%.*]] ] @@ -387,8 +396,9 @@ ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP1]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[SMAX]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: @@ -420,10 +430,11 @@ } define void @test_guard_merge_ranges(i32* %base, i32 %limit, i32 %start) { -; CHECK-LABEL: @test_guard_merge_ranges( +; CHECK-LABEL: define {{[^@]+}}@test_guard_merge_ranges( ; CHECK-NEXT: for.body.lr.ph: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[LIMIT:%.*]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[START]], 1 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LIMIT:%.*]], i32 [[TMP1]]) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[TMP0]], [[FOR_BODY_LR_PH:%.*]] ] @@ -432,8 +443,9 @@ ; CHECK-NEXT: [[WITHIN_LIMITS_2:%.*]] = icmp ult i64 [[INDVARS_IV]], 2147483647 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_LIMITS_2]]) [ "deopt"() ] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP1]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[SMAX]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: diff --git a/llvm/test/Transforms/IndVarSimplify/pr45835.ll b/llvm/test/Transforms/IndVarSimplify/pr45835.ll --- a/llvm/test/Transforms/IndVarSimplify/pr45835.ll +++ b/llvm/test/Transforms/IndVarSimplify/pr45835.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -passes=indvars -replexitval=always -S | FileCheck %s --check-prefix=ALWAYS ; RUN: opt < %s -passes=indvars -replexitval=never -S | FileCheck %s --check-prefix=NEVER -; RUN: opt < %s -passes=indvars -replexitval=cheap -scev-cheap-expansion-budget=1 -S | FileCheck %s --check-prefix=CHEAP +; RUN: opt < %s -passes=indvars -replexitval=cheap -scev-cheap-loop-trip-count-expansion-budget=1 -scev-cheap-loop-exit-value-expansion-budget=1 -S | FileCheck %s --check-prefix=CHEAP ; rewriteLoopExitValues() must rewrite all or none of a PHI's values from a given block. @@ -10,7 +10,7 @@ @a = common global i8 0, align 1 define internal fastcc void @d(i8* %c) unnamed_addr #0 { -; ALWAYS-LABEL: @d( +; ALWAYS-LABEL: define {{[^@]+}}@d( ; ALWAYS-NEXT: entry: ; ALWAYS-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[C:%.*]], i64 -65535 ; ALWAYS-NEXT: [[TMP0:%.*]] = icmp ugt i8* [[C]], @a @@ -32,7 +32,7 @@ ; ALWAYS: while.end: ; ALWAYS-NEXT: ret void ; -; NEVER-LABEL: @d( +; NEVER-LABEL: define {{[^@]+}}@d( ; NEVER-NEXT: entry: ; NEVER-NEXT: [[CMP:%.*]] = icmp ule i8* [[C:%.*]], @a ; NEVER-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[C]], i64 -65535 @@ -54,7 +54,7 @@ ; NEVER: while.end: ; NEVER-NEXT: ret void ; -; CHEAP-LABEL: @d( +; CHEAP-LABEL: define {{[^@]+}}@d( ; CHEAP-NEXT: entry: ; CHEAP-NEXT: [[CMP:%.*]] = icmp ule i8* [[C:%.*]], @a ; CHEAP-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[C]], i64 -65535 diff --git a/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll b/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll --- a/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll +++ b/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes=indvars -S %s -o - | FileCheck %s +; RUN: opt -passes=indvars -scev-cheap-loop-trip-count-expansion-budget=4 -scev-cheap-loop-exit-value-expansion-budget=4 -S %s -o - | FileCheck %s ; When bailing out in rewriteLoopExitValues() you would be left with a PHI node ; that was not deleted, and the IndVar pass would return an incorrect modified diff --git a/llvm/test/Transforms/LoopUnroll/X86/high-cost-expansion.ll b/llvm/test/Transforms/LoopUnroll/X86/high-cost-expansion.ll --- a/llvm/test/Transforms/LoopUnroll/X86/high-cost-expansion.ll +++ b/llvm/test/Transforms/LoopUnroll/X86/high-cost-expansion.ll @@ -11,12 +11,46 @@ ; CHECK-NEXT: [[I5:%.*]] = icmp sgt i64 [[I4]], [[I2]] ; CHECK-NEXT: br i1 [[I5]], label [[BB10:%.*]], label [[BB6_PREHEADER:%.*]] ; CHECK: bb6.preheader: +; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[I4]], i64 [[I2]]) +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[SMAX]], [[I3]] +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[SMAX]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], [[I3]] +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP0]], 7 +; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[BB6_PROL_PREHEADER:%.*]], label [[BB6_PROL_LOOPEXIT:%.*]] +; CHECK: bb6.prol.preheader: +; CHECK-NEXT: br label [[BB6_PROL:%.*]] +; CHECK: bb6.prol: +; CHECK-NEXT: [[I7_PROL:%.*]] = phi i64 [ [[I8_PROL:%.*]], [[BB6_PROL]] ], [ [[I4]], [[BB6_PROL_PREHEADER]] ] +; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ 0, [[BB6_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], [[BB6_PROL]] ] +; CHECK-NEXT: [[I8_PROL]] = add i64 [[I7_PROL]], 1 +; CHECK-NEXT: [[I9_PROL:%.*]] = icmp slt i64 [[I7_PROL]], [[I2]] +; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1 +; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]] +; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[BB6_PROL]], label [[BB6_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: bb6.prol.loopexit.unr-lcssa: +; CHECK-NEXT: [[I7_UNR_PH:%.*]] = phi i64 [ [[I8_PROL]], [[BB6_PROL]] ] +; CHECK-NEXT: br label [[BB6_PROL_LOOPEXIT]] +; CHECK: bb6.prol.loopexit: +; CHECK-NEXT: [[I7_UNR:%.*]] = phi i64 [ [[I4]], [[BB6_PREHEADER]] ], [ [[I7_UNR_PH]], [[BB6_PROL_LOOPEXIT_UNR_LCSSA]] ] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], 7 +; CHECK-NEXT: br i1 [[TMP3]], label [[BB10_LOOPEXIT:%.*]], label [[BB6_PREHEADER_NEW:%.*]] +; CHECK: bb6.preheader.new: ; CHECK-NEXT: br label [[BB6:%.*]] ; CHECK: bb6: -; CHECK-NEXT: [[I7:%.*]] = phi i64 [ [[I8:%.*]], [[BB6]] ], [ [[I4]], [[BB6_PREHEADER]] ] -; CHECK-NEXT: [[I8]] = add i64 [[I7]], 1 -; CHECK-NEXT: [[I9:%.*]] = icmp slt i64 [[I7]], [[I2]] -; CHECK-NEXT: br i1 [[I9]], label [[BB6]], label [[BB10_LOOPEXIT:%.*]] +; CHECK-NEXT: [[I7:%.*]] = phi i64 [ [[I7_UNR]], [[BB6_PREHEADER_NEW]] ], [ [[I8_7:%.*]], [[BB6]] ] +; CHECK-NEXT: [[I8:%.*]] = add i64 [[I7]], 1 +; CHECK-NEXT: [[I8_1:%.*]] = add i64 [[I8]], 1 +; CHECK-NEXT: [[I8_2:%.*]] = add i64 [[I8_1]], 1 +; CHECK-NEXT: [[I8_3:%.*]] = add i64 [[I8_2]], 1 +; CHECK-NEXT: [[I8_4:%.*]] = add i64 [[I8_3]], 1 +; CHECK-NEXT: [[I8_5:%.*]] = add i64 [[I8_4]], 1 +; CHECK-NEXT: [[I8_6:%.*]] = add i64 [[I8_5]], 1 +; CHECK-NEXT: [[I8_7]] = add i64 [[I8_6]], 1 +; CHECK-NEXT: [[I9_7:%.*]] = icmp slt i64 [[I8_6]], [[I2]] +; CHECK-NEXT: br i1 [[I9_7]], label [[BB6]], label [[BB10_LOOPEXIT_UNR_LCSSA:%.*]] +; CHECK: bb10.loopexit.unr-lcssa: +; CHECK-NEXT: br label [[BB10_LOOPEXIT]] ; CHECK: bb10.loopexit: ; CHECK-NEXT: br label [[BB10]] ; CHECK: bb10: @@ -51,12 +85,46 @@ ; CHECK-NEXT: [[I5:%.*]] = icmp sgt i64 [[I4]], [[I2]] ; CHECK-NEXT: br i1 [[I5]], label [[BB10:%.*]], label [[BB6_PREHEADER:%.*]] ; CHECK: bb6.preheader: +; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[I4]], i64 [[I2]]) +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[SMAX]], [[I3]] +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[SMAX]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], [[I3]] +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP0]], 7 +; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[BB6_PROL_PREHEADER:%.*]], label [[BB6_PROL_LOOPEXIT:%.*]] +; CHECK: bb6.prol.preheader: +; CHECK-NEXT: br label [[BB6_PROL:%.*]] +; CHECK: bb6.prol: +; CHECK-NEXT: [[I7_PROL:%.*]] = phi i64 [ [[I8_PROL:%.*]], [[BB6_PROL]] ], [ [[I4]], [[BB6_PROL_PREHEADER]] ] +; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ 0, [[BB6_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], [[BB6_PROL]] ] +; CHECK-NEXT: [[I8_PROL]] = add i64 [[I7_PROL]], 1 +; CHECK-NEXT: [[I9_PROL:%.*]] = icmp slt i64 [[I7_PROL]], [[I2]] +; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1 +; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]] +; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[BB6_PROL]], label [[BB6_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK: bb6.prol.loopexit.unr-lcssa: +; CHECK-NEXT: [[I7_UNR_PH:%.*]] = phi i64 [ [[I8_PROL]], [[BB6_PROL]] ] +; CHECK-NEXT: br label [[BB6_PROL_LOOPEXIT]] +; CHECK: bb6.prol.loopexit: +; CHECK-NEXT: [[I7_UNR:%.*]] = phi i64 [ [[I4]], [[BB6_PREHEADER]] ], [ [[I7_UNR_PH]], [[BB6_PROL_LOOPEXIT_UNR_LCSSA]] ] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], 7 +; CHECK-NEXT: br i1 [[TMP3]], label [[BB10_LOOPEXIT:%.*]], label [[BB6_PREHEADER_NEW:%.*]] +; CHECK: bb6.preheader.new: ; CHECK-NEXT: br label [[BB6:%.*]] ; CHECK: bb6: -; CHECK-NEXT: [[I7:%.*]] = phi i64 [ [[I8:%.*]], [[BB6]] ], [ [[I4]], [[BB6_PREHEADER]] ] -; CHECK-NEXT: [[I8]] = add i64 [[I7]], 1 -; CHECK-NEXT: [[I9:%.*]] = icmp slt i64 [[I7]], [[I2]] -; CHECK-NEXT: br i1 [[I9]], label [[BB6]], label [[BB10_LOOPEXIT:%.*]] +; CHECK-NEXT: [[I7:%.*]] = phi i64 [ [[I7_UNR]], [[BB6_PREHEADER_NEW]] ], [ [[I8_7:%.*]], [[BB6]] ] +; CHECK-NEXT: [[I8:%.*]] = add nuw nsw i64 [[I7]], 1 +; CHECK-NEXT: [[I8_1:%.*]] = add nuw nsw i64 [[I8]], 1 +; CHECK-NEXT: [[I8_2:%.*]] = add nuw nsw i64 [[I8_1]], 1 +; CHECK-NEXT: [[I8_3:%.*]] = add nuw nsw i64 [[I8_2]], 1 +; CHECK-NEXT: [[I8_4:%.*]] = add nuw nsw i64 [[I8_3]], 1 +; CHECK-NEXT: [[I8_5:%.*]] = add nuw nsw i64 [[I8_4]], 1 +; CHECK-NEXT: [[I8_6:%.*]] = add nuw nsw i64 [[I8_5]], 1 +; CHECK-NEXT: [[I8_7]] = add nuw nsw i64 [[I8_6]], 1 +; CHECK-NEXT: [[I9_7:%.*]] = icmp slt i64 [[I8_6]], [[I2]] +; CHECK-NEXT: br i1 [[I9_7]], label [[BB6]], label [[BB10_LOOPEXIT_UNR_LCSSA:%.*]] +; CHECK: bb10.loopexit.unr-lcssa: +; CHECK-NEXT: br label [[BB10_LOOPEXIT]] ; CHECK: bb10.loopexit: ; CHECK-NEXT: br label [[BB10]] ; CHECK: bb10: