diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1037,6 +1037,11 @@ bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS); + /// Test whether the condition described by Pred, LHS, and RHS is true. + /// Use only simple non-recursive types of checks, such as range analysis etc. + bool isKnownViaNonRecursiveReasoning(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS); + /// Check whether the condition described by Pred, LHS, and RHS is true or /// false. If we know it, return the evaluation of this condition. If neither /// is proved, return None. @@ -1843,11 +1848,6 @@ const SCEV *FoundLHS, const SCEV *FoundRHS, unsigned Depth = 0); - /// Test whether the condition described by Pred, LHS, and RHS is true. - /// Use only simple non-recursive types of checks, such as range analysis etc. - bool isKnownViaNonRecursiveReasoning(ICmpInst::Predicate Pred, - const SCEV *LHS, const SCEV *RHS); - /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the condition described by Pred, FoundLHS, and FoundRHS is /// true. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2816,6 +2816,8 @@ // Count holds the overall loop count (N). TripCount = Exp.expandCodeFor(ExitCount, ExitCount->getType(), InsertBlock->getTerminator()); + // Cache SCEV for TripCount for later lookups. + SE->getSCEV(TripCount); if (TripCount->getType()->isPointerTy()) TripCount = @@ -7665,7 +7667,7 @@ // 2. Copy and widen instructions from the old loop into the new loop. BestVPlan.prepareToExecute(ILV.getOrCreateTripCount(nullptr), ILV.getOrCreateVectorTripCount(nullptr), - CanonicalIVStartValue, State, + CanonicalIVStartValue, State, *PSE.getSE(), IsEpilogueVectorization); BestVPlan.execute(&State); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2553,7 +2553,7 @@ /// Prepare the plan for execution, setting up the required live-in values. void prepareToExecute(Value *TripCount, Value *VectorTripCount, Value *CanonicalIVStartValue, VPTransformState &State, - bool IsEpilogueVectorization); + ScalarEvolution &SE, bool IsEpilogueVectorization); /// Generate the IR code for this VPlan. void execute(VPTransformState *State); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -588,7 +588,7 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV, Value *CanonicalIVStartValue, - VPTransformState &State, + VPTransformState &State, ScalarEvolution &SE, bool IsEpilogueVectorization) { VPBasicBlock *ExitingVPBB = getVectorLoopRegion()->getExitingBasicBlock(); @@ -598,13 +598,14 @@ // terminator is: // 1. BranchOnCount, or // 2. BranchOnCond where the input is Not(ActiveLaneMask). - if (!IsEpilogueVectorization && Term && isa(TripCountV) && + if (!IsEpilogueVectorization && Term && (Term->getOpcode() == VPInstruction::BranchOnCount || (Term->getOpcode() == VPInstruction::BranchOnCond && canSimplifyBranchOnCond(Term)))) { - ConstantInt *C = cast(TripCountV); - uint64_t TCVal = C->getZExtValue(); - if (TCVal && TCVal <= State.VF.getKnownMinValue() * State.UF) { + const SCEV *TCS = SE.getSCEV(TripCountV); + const SCEV *C = + SE.getConstant(TCS->getType(), State.VF.getKnownMinValue() * State.UF); + if (SE.isKnownViaNonRecursiveReasoning(CmpInst::ICMP_ULE, TCS, C)) { auto *BOC = new VPInstruction(VPInstruction::BranchOnCond, {getOrAddExternalDef(State.Builder.getTrue())}); diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll --- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll @@ -696,8 +696,7 @@ ; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE6]] ; UNROLL-NOSIMPLIFY: pred.store.continue4: ; UNROLL-NOSIMPLIFY-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 -; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; UNROLL-NOSIMPLIFY-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; UNROLL-NOSIMPLIFY: middle.block: ; UNROLL-NOSIMPLIFY-NEXT: [[CMP_N:%.*]] = icmp eq i64 0, 0 ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -3141,15 +3141,14 @@ ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0]] = and <2 x i32> [[VEC_PHI]], ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] +; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[TMP0]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 0, 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_0_AND:%.*]], [[LOOP]] ] @@ -3159,7 +3158,7 @@ ; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[B_NEXT]], 0 ; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP33:![0-9]+]] ; CHECK: exit: -; CHECK-NEXT: [[A_0_AND_LCSSA:%.*]] = phi i32 [ [[A_0_AND]], [[LOOP]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[A_0_AND_LCSSA:%.*]] = phi i32 [ [[A_0_AND]], [[LOOP]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[A_0_AND_LCSSA]] ; ; IND-LABEL: @max_i32_backedgetaken( @@ -3168,7 +3167,7 @@ ; IND: vector.ph: ; IND-NEXT: br label [[VECTOR_BODY:%.*]] ; IND: vector.body: -; IND-NEXT: br i1 poison, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] +; IND-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; IND: middle.block: ; IND-NEXT: br i1 poison, label [[EXIT:%.*]], label [[SCALAR_PH]] ; IND: scalar.ph: @@ -3187,7 +3186,7 @@ ; UNROLL: vector.ph: ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL: vector.body: -; UNROLL-NEXT: br i1 poison, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] +; UNROLL-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: br i1 poison, label [[EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL: scalar.ph: @@ -3212,16 +3211,15 @@ ; UNROLL-NO-IC-NEXT: [[TMP0]] = and <2 x i32> [[VEC_PHI]], ; UNROLL-NO-IC-NEXT: [[TMP1]] = and <2 x i32> [[VEC_PHI1]], ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 0 -; UNROLL-NO-IC-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = and <2 x i32> [[TMP1]], [[TMP0]] -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[BIN_RDX]]) +; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[BIN_RDX]]) ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 0, 0 ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]] ; UNROLL-NO-IC: loop: ; UNROLL-NO-IC-NEXT: [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_0_AND:%.*]], [[LOOP]] ] @@ -3231,7 +3229,7 @@ ; UNROLL-NO-IC-NEXT: [[EC:%.*]] = icmp eq i32 [[B_NEXT]], 0 ; UNROLL-NO-IC-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP33:![0-9]+]] ; UNROLL-NO-IC: exit: -; UNROLL-NO-IC-NEXT: [[A_0_AND_LCSSA:%.*]] = phi i32 [ [[A_0_AND]], [[LOOP]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-IC-NEXT: [[A_0_AND_LCSSA:%.*]] = phi i32 [ [[A_0_AND]], [[LOOP]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-IC-NEXT: ret i32 [[A_0_AND_LCSSA]] ; ; INTERLEAVE-LABEL: @max_i32_backedgetaken( @@ -3240,7 +3238,7 @@ ; INTERLEAVE: vector.ph: ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] ; INTERLEAVE: vector.body: -; INTERLEAVE-NEXT: br i1 poison, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] +; INTERLEAVE-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; INTERLEAVE: middle.block: ; INTERLEAVE-NEXT: br i1 poison, label [[EXIT:%.*]], label [[SCALAR_PH]] ; INTERLEAVE: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll @@ -11,11 +11,9 @@ ; VF8UF1: [[CMP:%.+]] = icmp eq i64 %index.next, %n.vec ; VF8UF1-NEXT: br i1 [[CMP]], label %middle.block, label %vector.body ; -; VF8UF2: [[CMP:%.+]] = icmp eq i64 %index.next, %n.vec -; VF8UF2-NEXT: br i1 [[CMP]], label %middle.block, label %vector.body +; VF8UF2: br i1 true, label %middle.block, label %vector.body ; -; VF16UF1: [[CMP:%.+]] = icmp eq i64 %index.next, %n.vec -; VF16UF1-NEXT: br i1 [[CMP]], label %middle.block, label %vector.body +; VF16UF1: br i1 true, label %middle.block, label %vector.body ; entry: %and = and i64 %N, 15