diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9832,11 +9832,12 @@ // for minimum code-size, 2) predicate compiler options, 3) loop hints forcing // predication, and 4) a TTI hook that analyses whether the loop is suitable // for predication. -static ScalarEpilogueLowering getScalarEpilogueLowering( +static Optional getScalarEpilogueLowering( Function *F, Loop *L, LoopVectorizeHints &Hints, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, TargetTransformInfo *TTI, TargetLibraryInfo *TLI, AssumptionCache *AC, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, - LoopVectorizationLegality &LVL, InterleavedAccessInfo *IAI) { + LoopVectorizationLegality &LVL, InterleavedAccessInfo *IAI, + OptimizationRemarkEmitter *ORE) { // 1) OptSize takes precedence over all other options, i.e. if this is set, // don't look at hints or options, and don't request a scalar epilogue. // (For PGSO, as shouldOptimizeForSize isn't currently accessible from @@ -9870,7 +9871,31 @@ return CM_ScalarEpilogueAllowed; }; - // 4) if the TTI hook indicates this is profitable, request predication. + // 4) Check the loop for a trip count threshold. Vectorize loops with a tiny + // trip count by optimizing for size, to minimize overheads. + auto ExpectedTC = getSmallBestKnownTC(*SE, L); + if (ExpectedTC && *ExpectedTC < TinyTripCountVectorThreshold) { + LLVM_DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " + << "This loop is worth vectorizing only if no scalar " + << "iteration overheads are incurred."); + if (Hints.getForce() == LoopVectorizeHints::FK_Enabled) + LLVM_DEBUG(dbgs() << " But vectorizing was explicitly forced.\n"); + else { + if (*ExpectedTC > TTI->getMinTripCountTailFoldingThreshold()) { + return CM_ScalarEpilogueNotAllowedLowTripLoop; + } else { + LLVM_DEBUG(dbgs() << " But the target considers the trip count too " + "small to consider vectorizing.\n"); + reportVectorizationFailure( + "The trip count is below the minimal threshold value.", + "loop trip count is too low, avoiding vectorization", + "LowTripCount", ORE, L); + return None; + } + } + } + + // 5) if the TTI hook indicates this is profitable, request predication. if (TTI->preferPredicateOverEpilogue(L, LI, *SE, *AC, TLI, DT, &LVL, IAI)) return CM_ScalarEpilogueNotNeededUsePredicate; @@ -9964,10 +9989,16 @@ Function *F = L->getHeader()->getParent(); InterleavedAccessInfo IAI(PSE, L, DT, LI, LVL->getLAI()); - ScalarEpilogueLowering SEL = getScalarEpilogueLowering( - F, L, Hints, PSI, BFI, TTI, TLI, AC, LI, PSE.getSE(), DT, *LVL, &IAI); + Optional SEL = + getScalarEpilogueLowering(F, L, Hints, PSI, BFI, TTI, TLI, AC, LI, + PSE.getSE(), DT, *LVL, &IAI, ORE); - LoopVectorizationCostModel CM(SEL, L, PSE, LI, LVL, *TTI, TLI, DB, AC, ORE, F, + if (!SEL) { + Hints.emitRemarkWithHints(); + return false; + } + + LoopVectorizationCostModel CM(*SEL, L, PSE, LI, LVL, *TTI, TLI, DB, AC, ORE, F, &Hints, IAI); // Use the planner for outer loop vectorization. // TODO: CM is not used at this point inside the planner. Turn CM into an @@ -10233,33 +10264,12 @@ // Check the function attributes and profiles to find out if this function // should be optimized for size. - ScalarEpilogueLowering SEL = getScalarEpilogueLowering( - F, L, Hints, PSI, BFI, TTI, TLI, AC, LI, PSE.getSE(), DT, LVL, &IAI); + Optional SEL = getScalarEpilogueLowering( + F, L, Hints, PSI, BFI, TTI, TLI, AC, LI, PSE.getSE(), DT, LVL, &IAI, ORE); - // Check the loop for a trip count threshold: vectorize loops with a tiny trip - // count by optimizing for size, to minimize overheads. - auto ExpectedTC = getSmallBestKnownTC(*SE, L); - if (ExpectedTC && *ExpectedTC < TinyTripCountVectorThreshold) { - LLVM_DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " - << "This loop is worth vectorizing only if no scalar " - << "iteration overheads are incurred."); - if (Hints.getForce() == LoopVectorizeHints::FK_Enabled) - LLVM_DEBUG(dbgs() << " But vectorizing was explicitly forced.\n"); - else { - if (*ExpectedTC > TTI->getMinTripCountTailFoldingThreshold()) { - LLVM_DEBUG(dbgs() << "\n"); - SEL = CM_ScalarEpilogueNotAllowedLowTripLoop; - } else { - LLVM_DEBUG(dbgs() << " But the target considers the trip count too " - "small to consider vectorizing.\n"); - reportVectorizationFailure( - "The trip count is below the minial threshold value.", - "loop trip count is too low, avoiding vectorization", - "LowTripCount", ORE, L); - Hints.emitRemarkWithHints(); - return false; - } - } + if (!SEL) { + Hints.emitRemarkWithHints(); + return false; } // Check the function attributes to see if implicit floats or vectors are @@ -10309,7 +10319,7 @@ } // Use the cost model. - LoopVectorizationCostModel CM(SEL, L, PSE, LI, &LVL, *TTI, TLI, DB, AC, ORE, + LoopVectorizationCostModel CM(*SEL, L, PSE, LI, &LVL, *TTI, TLI, DB, AC, ORE, F, &Hints, IAI); CM.collectValuesToIgnore(); CM.collectElementTypesForWidening(); diff --git a/llvm/test/Transforms/LoopVectorize/override-short-tc-heuristic.ll b/llvm/test/Transforms/LoopVectorize/override-short-tc-heuristic.ll --- a/llvm/test/Transforms/LoopVectorize/override-short-tc-heuristic.ll +++ b/llvm/test/Transforms/LoopVectorize/override-short-tc-heuristic.ll @@ -8,74 +8,389 @@ ;@dst = common global [32 x i8] zeroinitializer, align 1 ; Simple loop with small constant trip count. -; TODO: Check that -prefer-predicate-over-epilogue and "llvm.loop.vectorize.predicate.enable=true" +; Check that -prefer-predicate-over-epilogue and "llvm.loop.vectorize.predicate.enable=true" ; hint override "small trip count" heuristic. define i32 @const_low_trip_count_hint_pred(i8 *%dst, i8 *%src) { ; CHECK-LABEL: @const_low_trip_count_hint_pred( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[SRC2:%.*]] = ptrtoint i8* [[SRC:%.*]] to i64 +; CHECK-NEXT: [[DST1:%.*]] = ptrtoint i8* [[DST:%.*]] to i64 +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK: vector.memcheck: +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[DST1]], [[SRC2]] +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 +; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE14:%.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE14]] ] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 3 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ule <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0 +; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] +; CHECK: pred.load.if: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[SRC]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP7]], align 1 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i8> poison, i8 [[TMP8]], i32 0 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] +; CHECK: pred.load.continue: +; CHECK-NEXT: [[TMP10:%.*]] = phi <4 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1 +; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] +; CHECK: pred.load.if3: +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[SRC]], i32 [[TMP2]] +; CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP12]], align 1 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i8> [[TMP10]], i8 [[TMP13]], i32 1 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] +; CHECK: pred.load.continue4: +; CHECK-NEXT: [[TMP15:%.*]] = phi <4 x i8> [ [[TMP10]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF3]] ] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2 +; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] +; CHECK: pred.load.if5: +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, i8* [[SRC]], i32 [[TMP3]] +; CHECK-NEXT: [[TMP18:%.*]] = load i8, i8* [[TMP17]], align 1 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i8> [[TMP15]], i8 [[TMP18]], i32 2 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] +; CHECK: pred.load.continue6: +; CHECK-NEXT: [[TMP20:%.*]] = phi <4 x i8> [ [[TMP15]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP19]], [[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3 +; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]] +; CHECK: pred.load.if7: +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, i8* [[SRC]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP23:%.*]] = load i8, i8* [[TMP22]], align 1 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP23]], i32 3 +; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]] +; CHECK: pred.load.continue8: +; CHECK-NEXT: [[TMP25:%.*]] = phi <4 x i8> [ [[TMP20]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP24]], [[PRED_LOAD_IF7]] ] +; CHECK-NEXT: [[TMP26:%.*]] = icmp eq <4 x i8> [[TMP25]], +; CHECK-NEXT: [[TMP27:%.*]] = select <4 x i1> [[TMP26]], <4 x i8> , <4 x i8> +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0 +; CHECK-NEXT: br i1 [[TMP28]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; CHECK: pred.store.if: +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, i8* [[DST]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i8> [[TMP27]], i32 0 +; CHECK-NEXT: store i8 [[TMP30]], i8* [[TMP29]], align 1 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] +; CHECK: pred.store.continue: +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1 +; CHECK-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]] +; CHECK: pred.store.if9: +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i8, i8* [[DST]], i32 [[TMP2]] +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <4 x i8> [[TMP27]], i32 1 +; CHECK-NEXT: store i8 [[TMP33]], i8* [[TMP32]], align 1 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]] +; CHECK: pred.store.continue10: +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2 +; CHECK-NEXT: br i1 [[TMP34]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]] +; CHECK: pred.store.if11: +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i8, i8* [[DST]], i32 [[TMP3]] +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <4 x i8> [[TMP27]], i32 2 +; CHECK-NEXT: store i8 [[TMP36]], i8* [[TMP35]], align 1 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]] +; CHECK: pred.store.continue12: +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3 +; CHECK-NEXT: br i1 [[TMP37]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14]] +; CHECK: pred.store.if13: +; CHECK-NEXT: [[TMP38:%.*]] = getelementptr i8, i8* [[DST]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP39:%.*]] = extractelement <4 x i8> [[TMP27]], i32 3 +; CHECK-NEXT: store i8 [[TMP39]], i8* [[TMP38]], align 1 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]] +; CHECK: pred.store.continue14: +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP40:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12 +; CHECK-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 12, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[LDIDX:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i32 [[I]] -; CHECK-NEXT: [[STIDX:%.*]] = getelementptr i8, i8* [[DST:%.*]], i32 [[I]] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[LDIDX:%.*]] = getelementptr i8, i8* [[SRC]], i32 [[I]] +; CHECK-NEXT: [[STIDX:%.*]] = getelementptr i8, i8* [[DST]], i32 [[I]] ; CHECK-NEXT: [[LDVAL:%.*]] = load i8, i8* [[LDIDX]], align 1 ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[LDVAL]], 5 ; CHECK-NEXT: [[VAL:%.*]] = select i1 [[CMP1]], i8 1, i8 2 ; CHECK-NEXT: store i8 [[VAL]], i8* [[STIDX]], align 1 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp slt i32 [[I]], 8 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret i32 0 ; ; CHECK-SCALAR-LABEL: @const_low_trip_count_hint_pred( ; CHECK-SCALAR-NEXT: entry: +; CHECK-SCALAR-NEXT: [[SRC2:%.*]] = ptrtoint i8* [[SRC:%.*]] to i64 +; CHECK-SCALAR-NEXT: [[DST1:%.*]] = ptrtoint i8* [[DST:%.*]] to i64 +; CHECK-SCALAR-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK-SCALAR: vector.memcheck: +; CHECK-SCALAR-NEXT: [[TMP0:%.*]] = sub i64 [[DST1]], [[SRC2]] +; CHECK-SCALAR-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 +; CHECK-SCALAR-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-SCALAR: vector.ph: +; CHECK-SCALAR-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK-SCALAR: vector.body: +; CHECK-SCALAR-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-SCALAR-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 +; CHECK-SCALAR-NEXT: [[TMP2:%.*]] = getelementptr i8, i8* [[SRC]], i32 [[TMP1]] +; CHECK-SCALAR-NEXT: [[TMP3:%.*]] = getelementptr i8, i8* [[DST]], i32 [[TMP1]] +; CHECK-SCALAR-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP2]], i32 0 +; CHECK-SCALAR-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <4 x i8>* +; CHECK-SCALAR-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP5]], align 1 +; CHECK-SCALAR-NEXT: [[TMP6:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], +; CHECK-SCALAR-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP6]], <4 x i8> , <4 x i8> +; CHECK-SCALAR-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[TMP3]], i32 0 +; CHECK-SCALAR-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to <4 x i8>* +; CHECK-SCALAR-NEXT: store <4 x i8> [[TMP7]], <4 x i8>* [[TMP9]], align 1 +; CHECK-SCALAR-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-SCALAR-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8 +; CHECK-SCALAR-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-SCALAR: middle.block: +; CHECK-SCALAR-NEXT: [[CMP_N:%.*]] = icmp eq i32 9, 8 +; CHECK-SCALAR-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-SCALAR: scalar.ph: +; CHECK-SCALAR-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 8, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; CHECK-SCALAR-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-SCALAR: for.body: -; CHECK-SCALAR-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; CHECK-SCALAR-NEXT: [[LDIDX:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i32 [[I]] -; CHECK-SCALAR-NEXT: [[STIDX:%.*]] = getelementptr i8, i8* [[DST:%.*]], i32 [[I]] +; CHECK-SCALAR-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-SCALAR-NEXT: [[LDIDX:%.*]] = getelementptr i8, i8* [[SRC]], i32 [[I]] +; CHECK-SCALAR-NEXT: [[STIDX:%.*]] = getelementptr i8, i8* [[DST]], i32 [[I]] ; CHECK-SCALAR-NEXT: [[LDVAL:%.*]] = load i8, i8* [[LDIDX]], align 1 ; CHECK-SCALAR-NEXT: [[CMP1:%.*]] = icmp eq i8 [[LDVAL]], 5 ; CHECK-SCALAR-NEXT: [[VAL:%.*]] = select i1 [[CMP1]], i8 1, i8 2 ; CHECK-SCALAR-NEXT: store i8 [[VAL]], i8* [[STIDX]], align 1 ; CHECK-SCALAR-NEXT: [[INC]] = add nsw i32 [[I]], 1 ; CHECK-SCALAR-NEXT: [[EXITCOND:%.*]] = icmp slt i32 [[I]], 8 -; CHECK-SCALAR-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-SCALAR-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK-SCALAR: for.end: ; CHECK-SCALAR-NEXT: ret i32 0 ; ; CHECK-PRED-OR-SCALAR-LABEL: @const_low_trip_count_hint_pred( ; CHECK-PRED-OR-SCALAR-NEXT: entry: +; CHECK-PRED-OR-SCALAR-NEXT: [[SRC2:%.*]] = ptrtoint i8* [[SRC:%.*]] to i64 +; CHECK-PRED-OR-SCALAR-NEXT: [[DST1:%.*]] = ptrtoint i8* [[DST:%.*]] to i64 +; CHECK-PRED-OR-SCALAR-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK-PRED-OR-SCALAR: vector.memcheck: +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP0:%.*]] = sub i64 [[DST1]], [[SRC2]] +; CHECK-PRED-OR-SCALAR-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 +; CHECK-PRED-OR-SCALAR-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-PRED-OR-SCALAR: vector.ph: +; CHECK-PRED-OR-SCALAR-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK-PRED-OR-SCALAR: vector.body: +; CHECK-PRED-OR-SCALAR-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE14:%.*]] ] +; CHECK-PRED-OR-SCALAR-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE14]] ] +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 1 +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 2 +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 3 +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP5:%.*]] = icmp ule <4 x i32> [[VEC_IND]], +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0 +; CHECK-PRED-OR-SCALAR-NEXT: br i1 [[TMP6]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] +; CHECK-PRED-OR-SCALAR: pred.load.if: +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[SRC]], i32 [[TMP1]] +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP7]], align 1 +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP9:%.*]] = insertelement <4 x i8> poison, i8 [[TMP8]], i32 0 +; CHECK-PRED-OR-SCALAR-NEXT: br label [[PRED_LOAD_CONTINUE]] +; CHECK-PRED-OR-SCALAR: pred.load.continue: +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP10:%.*]] = phi <4 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_LOAD_IF]] ] +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1 +; CHECK-PRED-OR-SCALAR-NEXT: br i1 [[TMP11]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] +; CHECK-PRED-OR-SCALAR: pred.load.if3: +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[SRC]], i32 [[TMP2]] +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP12]], align 1 +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP14:%.*]] = insertelement <4 x i8> [[TMP10]], i8 [[TMP13]], i32 1 +; CHECK-PRED-OR-SCALAR-NEXT: br label [[PRED_LOAD_CONTINUE4]] +; CHECK-PRED-OR-SCALAR: pred.load.continue4: +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP15:%.*]] = phi <4 x i8> [ [[TMP10]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF3]] ] +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2 +; CHECK-PRED-OR-SCALAR-NEXT: br i1 [[TMP16]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] +; CHECK-PRED-OR-SCALAR: pred.load.if5: +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP17:%.*]] = getelementptr i8, i8* [[SRC]], i32 [[TMP3]] +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP18:%.*]] = load i8, i8* [[TMP17]], align 1 +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP19:%.*]] = insertelement <4 x i8> [[TMP15]], i8 [[TMP18]], i32 2 +; CHECK-PRED-OR-SCALAR-NEXT: br label [[PRED_LOAD_CONTINUE6]] +; CHECK-PRED-OR-SCALAR: pred.load.continue6: +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP20:%.*]] = phi <4 x i8> [ [[TMP15]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP19]], [[PRED_LOAD_IF5]] ] +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3 +; CHECK-PRED-OR-SCALAR-NEXT: br i1 [[TMP21]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]] +; CHECK-PRED-OR-SCALAR: pred.load.if7: +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP22:%.*]] = getelementptr i8, i8* [[SRC]], i32 [[TMP4]] +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP23:%.*]] = load i8, i8* [[TMP22]], align 1 +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP24:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP23]], i32 3 +; CHECK-PRED-OR-SCALAR-NEXT: br label [[PRED_LOAD_CONTINUE8]] +; CHECK-PRED-OR-SCALAR: pred.load.continue8: +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP25:%.*]] = phi <4 x i8> [ [[TMP20]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP24]], [[PRED_LOAD_IF7]] ] +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP26:%.*]] = icmp eq <4 x i8> [[TMP25]], +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP27:%.*]] = select <4 x i1> [[TMP26]], <4 x i8> , <4 x i8> +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0 +; CHECK-PRED-OR-SCALAR-NEXT: br i1 [[TMP28]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; CHECK-PRED-OR-SCALAR: pred.store.if: +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP29:%.*]] = getelementptr i8, i8* [[DST]], i32 [[TMP1]] +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP30:%.*]] = extractelement <4 x i8> [[TMP27]], i32 0 +; CHECK-PRED-OR-SCALAR-NEXT: store i8 [[TMP30]], i8* [[TMP29]], align 1 +; CHECK-PRED-OR-SCALAR-NEXT: br label [[PRED_STORE_CONTINUE]] +; CHECK-PRED-OR-SCALAR: pred.store.continue: +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1 +; CHECK-PRED-OR-SCALAR-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]] +; CHECK-PRED-OR-SCALAR: pred.store.if9: +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP32:%.*]] = getelementptr i8, i8* [[DST]], i32 [[TMP2]] +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP33:%.*]] = extractelement <4 x i8> [[TMP27]], i32 1 +; CHECK-PRED-OR-SCALAR-NEXT: store i8 [[TMP33]], i8* [[TMP32]], align 1 +; CHECK-PRED-OR-SCALAR-NEXT: br label [[PRED_STORE_CONTINUE10]] +; CHECK-PRED-OR-SCALAR: pred.store.continue10: +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2 +; CHECK-PRED-OR-SCALAR-NEXT: br i1 [[TMP34]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]] +; CHECK-PRED-OR-SCALAR: pred.store.if11: +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP35:%.*]] = getelementptr i8, i8* [[DST]], i32 [[TMP3]] +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP36:%.*]] = extractelement <4 x i8> [[TMP27]], i32 2 +; CHECK-PRED-OR-SCALAR-NEXT: store i8 [[TMP36]], i8* [[TMP35]], align 1 +; CHECK-PRED-OR-SCALAR-NEXT: br label [[PRED_STORE_CONTINUE12]] +; CHECK-PRED-OR-SCALAR: pred.store.continue12: +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP37:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3 +; CHECK-PRED-OR-SCALAR-NEXT: br i1 [[TMP37]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14]] +; CHECK-PRED-OR-SCALAR: pred.store.if13: +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP38:%.*]] = getelementptr i8, i8* [[DST]], i32 [[TMP4]] +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP39:%.*]] = extractelement <4 x i8> [[TMP27]], i32 3 +; CHECK-PRED-OR-SCALAR-NEXT: store i8 [[TMP39]], i8* [[TMP38]], align 1 +; CHECK-PRED-OR-SCALAR-NEXT: br label [[PRED_STORE_CONTINUE14]] +; CHECK-PRED-OR-SCALAR: pred.store.continue14: +; CHECK-PRED-OR-SCALAR-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-PRED-OR-SCALAR-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP40:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12 +; CHECK-PRED-OR-SCALAR-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-PRED-OR-SCALAR: middle.block: +; CHECK-PRED-OR-SCALAR-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-PRED-OR-SCALAR: scalar.ph: +; CHECK-PRED-OR-SCALAR-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 12, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; CHECK-PRED-OR-SCALAR-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-PRED-OR-SCALAR: for.body: -; CHECK-PRED-OR-SCALAR-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; CHECK-PRED-OR-SCALAR-NEXT: [[LDIDX:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i32 [[I]] -; CHECK-PRED-OR-SCALAR-NEXT: [[STIDX:%.*]] = getelementptr i8, i8* [[DST:%.*]], i32 [[I]] +; CHECK-PRED-OR-SCALAR-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-PRED-OR-SCALAR-NEXT: [[LDIDX:%.*]] = getelementptr i8, i8* [[SRC]], i32 [[I]] +; CHECK-PRED-OR-SCALAR-NEXT: [[STIDX:%.*]] = getelementptr i8, i8* [[DST]], i32 [[I]] ; CHECK-PRED-OR-SCALAR-NEXT: [[LDVAL:%.*]] = load i8, i8* [[LDIDX]], align 1 ; CHECK-PRED-OR-SCALAR-NEXT: [[CMP1:%.*]] = icmp eq i8 [[LDVAL]], 5 ; CHECK-PRED-OR-SCALAR-NEXT: [[VAL:%.*]] = select i1 [[CMP1]], i8 1, i8 2 ; CHECK-PRED-OR-SCALAR-NEXT: store i8 [[VAL]], i8* [[STIDX]], align 1 ; CHECK-PRED-OR-SCALAR-NEXT: [[INC]] = add nsw i32 [[I]], 1 ; CHECK-PRED-OR-SCALAR-NEXT: [[EXITCOND:%.*]] = icmp slt i32 [[I]], 8 -; CHECK-PRED-OR-SCALAR-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-PRED-OR-SCALAR-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK-PRED-OR-SCALAR: for.end: ; CHECK-PRED-OR-SCALAR-NEXT: ret i32 0 ; ; PRED-OR-DONTVEC-LABEL: @const_low_trip_count_hint_pred( ; PRED-OR-DONTVEC-NEXT: entry: +; PRED-OR-DONTVEC-NEXT: [[SRC2:%.*]] = ptrtoint i8* [[SRC:%.*]] to i64 +; PRED-OR-DONTVEC-NEXT: [[DST1:%.*]] = ptrtoint i8* [[DST:%.*]] to i64 +; PRED-OR-DONTVEC-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; PRED-OR-DONTVEC: vector.memcheck: +; PRED-OR-DONTVEC-NEXT: [[TMP0:%.*]] = sub i64 [[DST1]], [[SRC2]] +; PRED-OR-DONTVEC-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 +; PRED-OR-DONTVEC-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; PRED-OR-DONTVEC: vector.ph: +; PRED-OR-DONTVEC-NEXT: br label [[VECTOR_BODY:%.*]] +; PRED-OR-DONTVEC: vector.body: +; PRED-OR-DONTVEC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE14:%.*]] ] +; PRED-OR-DONTVEC-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE14]] ] +; PRED-OR-DONTVEC-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 +; PRED-OR-DONTVEC-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 1 +; PRED-OR-DONTVEC-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 2 +; PRED-OR-DONTVEC-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 3 +; PRED-OR-DONTVEC-NEXT: [[TMP5:%.*]] = icmp ule <4 x i32> [[VEC_IND]], +; PRED-OR-DONTVEC-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0 +; PRED-OR-DONTVEC-NEXT: br i1 [[TMP6]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] +; PRED-OR-DONTVEC: pred.load.if: +; PRED-OR-DONTVEC-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[SRC]], i32 [[TMP1]] +; PRED-OR-DONTVEC-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP7]], align 1 +; PRED-OR-DONTVEC-NEXT: [[TMP9:%.*]] = insertelement <4 x i8> poison, i8 [[TMP8]], i32 0 +; PRED-OR-DONTVEC-NEXT: br label [[PRED_LOAD_CONTINUE]] +; PRED-OR-DONTVEC: pred.load.continue: +; PRED-OR-DONTVEC-NEXT: [[TMP10:%.*]] = phi <4 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_LOAD_IF]] ] +; PRED-OR-DONTVEC-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1 +; PRED-OR-DONTVEC-NEXT: br i1 [[TMP11]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] +; PRED-OR-DONTVEC: pred.load.if3: +; PRED-OR-DONTVEC-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[SRC]], i32 [[TMP2]] +; PRED-OR-DONTVEC-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP12]], align 1 +; PRED-OR-DONTVEC-NEXT: [[TMP14:%.*]] = insertelement <4 x i8> [[TMP10]], i8 [[TMP13]], i32 1 +; PRED-OR-DONTVEC-NEXT: br label [[PRED_LOAD_CONTINUE4]] +; PRED-OR-DONTVEC: pred.load.continue4: +; PRED-OR-DONTVEC-NEXT: [[TMP15:%.*]] = phi <4 x i8> [ [[TMP10]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF3]] ] +; PRED-OR-DONTVEC-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2 +; PRED-OR-DONTVEC-NEXT: br i1 [[TMP16]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] +; PRED-OR-DONTVEC: pred.load.if5: +; PRED-OR-DONTVEC-NEXT: [[TMP17:%.*]] = getelementptr i8, i8* [[SRC]], i32 [[TMP3]] +; PRED-OR-DONTVEC-NEXT: [[TMP18:%.*]] = load i8, i8* [[TMP17]], align 1 +; PRED-OR-DONTVEC-NEXT: [[TMP19:%.*]] = insertelement <4 x i8> [[TMP15]], i8 [[TMP18]], i32 2 +; PRED-OR-DONTVEC-NEXT: br label [[PRED_LOAD_CONTINUE6]] +; PRED-OR-DONTVEC: pred.load.continue6: +; PRED-OR-DONTVEC-NEXT: [[TMP20:%.*]] = phi <4 x i8> [ [[TMP15]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP19]], [[PRED_LOAD_IF5]] ] +; PRED-OR-DONTVEC-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3 +; PRED-OR-DONTVEC-NEXT: br i1 [[TMP21]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]] +; PRED-OR-DONTVEC: pred.load.if7: +; PRED-OR-DONTVEC-NEXT: [[TMP22:%.*]] = getelementptr i8, i8* [[SRC]], i32 [[TMP4]] +; PRED-OR-DONTVEC-NEXT: [[TMP23:%.*]] = load i8, i8* [[TMP22]], align 1 +; PRED-OR-DONTVEC-NEXT: [[TMP24:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP23]], i32 3 +; PRED-OR-DONTVEC-NEXT: br label [[PRED_LOAD_CONTINUE8]] +; PRED-OR-DONTVEC: pred.load.continue8: +; PRED-OR-DONTVEC-NEXT: [[TMP25:%.*]] = phi <4 x i8> [ [[TMP20]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP24]], [[PRED_LOAD_IF7]] ] +; PRED-OR-DONTVEC-NEXT: [[TMP26:%.*]] = icmp eq <4 x i8> [[TMP25]], +; PRED-OR-DONTVEC-NEXT: [[TMP27:%.*]] = select <4 x i1> [[TMP26]], <4 x i8> , <4 x i8> +; PRED-OR-DONTVEC-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0 +; PRED-OR-DONTVEC-NEXT: br i1 [[TMP28]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; PRED-OR-DONTVEC: pred.store.if: +; PRED-OR-DONTVEC-NEXT: [[TMP29:%.*]] = getelementptr i8, i8* [[DST]], i32 [[TMP1]] +; PRED-OR-DONTVEC-NEXT: [[TMP30:%.*]] = extractelement <4 x i8> [[TMP27]], i32 0 +; PRED-OR-DONTVEC-NEXT: store i8 [[TMP30]], i8* [[TMP29]], align 1 +; PRED-OR-DONTVEC-NEXT: br label [[PRED_STORE_CONTINUE]] +; PRED-OR-DONTVEC: pred.store.continue: +; PRED-OR-DONTVEC-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1 +; PRED-OR-DONTVEC-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]] +; PRED-OR-DONTVEC: pred.store.if9: +; PRED-OR-DONTVEC-NEXT: [[TMP32:%.*]] = getelementptr i8, i8* [[DST]], i32 [[TMP2]] +; PRED-OR-DONTVEC-NEXT: [[TMP33:%.*]] = extractelement <4 x i8> [[TMP27]], i32 1 +; PRED-OR-DONTVEC-NEXT: store i8 [[TMP33]], i8* [[TMP32]], align 1 +; PRED-OR-DONTVEC-NEXT: br label [[PRED_STORE_CONTINUE10]] +; PRED-OR-DONTVEC: pred.store.continue10: +; PRED-OR-DONTVEC-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2 +; PRED-OR-DONTVEC-NEXT: br i1 [[TMP34]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]] +; PRED-OR-DONTVEC: pred.store.if11: +; PRED-OR-DONTVEC-NEXT: [[TMP35:%.*]] = getelementptr i8, i8* [[DST]], i32 [[TMP3]] +; PRED-OR-DONTVEC-NEXT: [[TMP36:%.*]] = extractelement <4 x i8> [[TMP27]], i32 2 +; PRED-OR-DONTVEC-NEXT: store i8 [[TMP36]], i8* [[TMP35]], align 1 +; PRED-OR-DONTVEC-NEXT: br label [[PRED_STORE_CONTINUE12]] +; PRED-OR-DONTVEC: pred.store.continue12: +; PRED-OR-DONTVEC-NEXT: [[TMP37:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3 +; PRED-OR-DONTVEC-NEXT: br i1 [[TMP37]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14]] +; PRED-OR-DONTVEC: pred.store.if13: +; PRED-OR-DONTVEC-NEXT: [[TMP38:%.*]] = getelementptr i8, i8* [[DST]], i32 [[TMP4]] +; PRED-OR-DONTVEC-NEXT: [[TMP39:%.*]] = extractelement <4 x i8> [[TMP27]], i32 3 +; PRED-OR-DONTVEC-NEXT: store i8 [[TMP39]], i8* [[TMP38]], align 1 +; PRED-OR-DONTVEC-NEXT: br label [[PRED_STORE_CONTINUE14]] +; PRED-OR-DONTVEC: pred.store.continue14: +; PRED-OR-DONTVEC-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; PRED-OR-DONTVEC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; PRED-OR-DONTVEC-NEXT: [[TMP40:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12 +; PRED-OR-DONTVEC-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; PRED-OR-DONTVEC: middle.block: +; PRED-OR-DONTVEC-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; PRED-OR-DONTVEC: scalar.ph: +; PRED-OR-DONTVEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 12, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; PRED-OR-DONTVEC-NEXT: br label [[FOR_BODY:%.*]] ; PRED-OR-DONTVEC: for.body: -; PRED-OR-DONTVEC-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; PRED-OR-DONTVEC-NEXT: [[LDIDX:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i32 [[I]] -; PRED-OR-DONTVEC-NEXT: [[STIDX:%.*]] = getelementptr i8, i8* [[DST:%.*]], i32 [[I]] +; PRED-OR-DONTVEC-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; PRED-OR-DONTVEC-NEXT: [[LDIDX:%.*]] = getelementptr i8, i8* [[SRC]], i32 [[I]] +; PRED-OR-DONTVEC-NEXT: [[STIDX:%.*]] = getelementptr i8, i8* [[DST]], i32 [[I]] ; PRED-OR-DONTVEC-NEXT: [[LDVAL:%.*]] = load i8, i8* [[LDIDX]], align 1 ; PRED-OR-DONTVEC-NEXT: [[CMP1:%.*]] = icmp eq i8 [[LDVAL]], 5 ; PRED-OR-DONTVEC-NEXT: [[VAL:%.*]] = select i1 [[CMP1]], i8 1, i8 2 ; PRED-OR-DONTVEC-NEXT: store i8 [[VAL]], i8* [[STIDX]], align 1 ; PRED-OR-DONTVEC-NEXT: [[INC]] = add nsw i32 [[I]], 1 ; PRED-OR-DONTVEC-NEXT: [[EXITCOND:%.*]] = icmp slt i32 [[I]], 8 -; PRED-OR-DONTVEC-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP0:![0-9]+]] +; PRED-OR-DONTVEC-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP2:![0-9]+]] ; PRED-OR-DONTVEC: for.end: ; PRED-OR-DONTVEC-NEXT: ret i32 0 ; @@ -99,74 +414,324 @@ } ; Simple loop with small constant trip count. -; TODO: Check that -prefer-predicate-over-epilogue and "llvm.loop.vectorize.predicate.enable=false" +; Check that -prefer-predicate-over-epilogue and "llvm.loop.vectorize.predicate.enable=false" ; hint override "small trip count" heuristic. define i32 @const_low_trip_count_hint_no_pred(i8 *%dst, i8 *%src) { ; CHECK-LABEL: @const_low_trip_count_hint_no_pred( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[SRC2:%.*]] = ptrtoint i8* [[SRC:%.*]] to i64 +; CHECK-NEXT: [[DST1:%.*]] = ptrtoint i8* [[DST:%.*]] to i64 +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK: vector.memcheck: +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[DST1]], [[SRC2]] +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 +; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, i8* [[SRC]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, i8* [[DST]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <4 x i8>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP5]], align 1 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP6]], <4 x i8> , <4 x i8> +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to <4 x i8>* +; CHECK-NEXT: store <4 x i8> [[TMP7]], <4 x i8>* [[TMP9]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8 +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 9, 8 +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 8, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[LDIDX:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i32 [[I]] -; CHECK-NEXT: [[STIDX:%.*]] = getelementptr i8, i8* [[DST:%.*]], i32 [[I]] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[LDIDX:%.*]] = getelementptr i8, i8* [[SRC]], i32 [[I]] +; CHECK-NEXT: [[STIDX:%.*]] = getelementptr i8, i8* [[DST]], i32 [[I]] ; CHECK-NEXT: [[LDVAL:%.*]] = load i8, i8* [[LDIDX]], align 1 ; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[LDVAL]], 5 ; CHECK-NEXT: [[VAL:%.*]] = select i1 [[CMP1]], i8 1, i8 2 ; CHECK-NEXT: store i8 [[VAL]], i8* [[STIDX]], align 1 ; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp slt i32 [[I]], 8 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret i32 0 ; ; CHECK-SCALAR-LABEL: @const_low_trip_count_hint_no_pred( ; CHECK-SCALAR-NEXT: entry: +; CHECK-SCALAR-NEXT: [[SRC2:%.*]] = ptrtoint i8* [[SRC:%.*]] to i64 +; CHECK-SCALAR-NEXT: [[DST1:%.*]] = ptrtoint i8* [[DST:%.*]] to i64 +; CHECK-SCALAR-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK-SCALAR: vector.memcheck: +; CHECK-SCALAR-NEXT: [[TMP0:%.*]] = sub i64 [[DST1]], [[SRC2]] +; CHECK-SCALAR-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 +; CHECK-SCALAR-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-SCALAR: vector.ph: +; CHECK-SCALAR-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK-SCALAR: vector.body: +; CHECK-SCALAR-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-SCALAR-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 +; CHECK-SCALAR-NEXT: [[TMP2:%.*]] = getelementptr i8, i8* [[SRC]], i32 [[TMP1]] +; CHECK-SCALAR-NEXT: [[TMP3:%.*]] = getelementptr i8, i8* [[DST]], i32 [[TMP1]] +; CHECK-SCALAR-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP2]], i32 0 +; CHECK-SCALAR-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <4 x i8>* +; CHECK-SCALAR-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP5]], align 1 +; CHECK-SCALAR-NEXT: [[TMP6:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], +; CHECK-SCALAR-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP6]], <4 x i8> , <4 x i8> +; CHECK-SCALAR-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[TMP3]], i32 0 +; CHECK-SCALAR-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to <4 x i8>* +; CHECK-SCALAR-NEXT: store <4 x i8> [[TMP7]], <4 x i8>* [[TMP9]], align 1 +; CHECK-SCALAR-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-SCALAR-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8 +; CHECK-SCALAR-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-SCALAR: middle.block: +; CHECK-SCALAR-NEXT: [[CMP_N:%.*]] = icmp eq i32 9, 8 +; CHECK-SCALAR-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-SCALAR: scalar.ph: +; CHECK-SCALAR-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 8, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; CHECK-SCALAR-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-SCALAR: for.body: -; CHECK-SCALAR-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; CHECK-SCALAR-NEXT: [[LDIDX:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i32 [[I]] -; CHECK-SCALAR-NEXT: [[STIDX:%.*]] = getelementptr i8, i8* [[DST:%.*]], i32 [[I]] +; CHECK-SCALAR-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-SCALAR-NEXT: [[LDIDX:%.*]] = getelementptr i8, i8* [[SRC]], i32 [[I]] +; CHECK-SCALAR-NEXT: [[STIDX:%.*]] = getelementptr i8, i8* [[DST]], i32 [[I]] ; CHECK-SCALAR-NEXT: [[LDVAL:%.*]] = load i8, i8* [[LDIDX]], align 1 ; CHECK-SCALAR-NEXT: [[CMP1:%.*]] = icmp eq i8 [[LDVAL]], 5 ; CHECK-SCALAR-NEXT: [[VAL:%.*]] = select i1 [[CMP1]], i8 1, i8 2 ; CHECK-SCALAR-NEXT: store i8 [[VAL]], i8* [[STIDX]], align 1 ; CHECK-SCALAR-NEXT: [[INC]] = add nsw i32 [[I]], 1 ; CHECK-SCALAR-NEXT: [[EXITCOND:%.*]] = icmp slt i32 [[I]], 8 -; CHECK-SCALAR-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-SCALAR-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-SCALAR: for.end: ; CHECK-SCALAR-NEXT: ret i32 0 ; ; CHECK-PRED-OR-SCALAR-LABEL: @const_low_trip_count_hint_no_pred( ; CHECK-PRED-OR-SCALAR-NEXT: entry: +; CHECK-PRED-OR-SCALAR-NEXT: [[SRC2:%.*]] = ptrtoint i8* [[SRC:%.*]] to i64 +; CHECK-PRED-OR-SCALAR-NEXT: [[DST1:%.*]] = ptrtoint i8* [[DST:%.*]] to i64 +; CHECK-PRED-OR-SCALAR-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK-PRED-OR-SCALAR: vector.memcheck: +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP0:%.*]] = sub i64 [[DST1]], [[SRC2]] +; CHECK-PRED-OR-SCALAR-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 +; CHECK-PRED-OR-SCALAR-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-PRED-OR-SCALAR: vector.ph: +; CHECK-PRED-OR-SCALAR-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK-PRED-OR-SCALAR: vector.body: +; CHECK-PRED-OR-SCALAR-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE14:%.*]] ] +; CHECK-PRED-OR-SCALAR-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE14]] ] +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 1 +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 2 +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 3 +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP5:%.*]] = icmp ule <4 x i32> [[VEC_IND]], +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0 +; CHECK-PRED-OR-SCALAR-NEXT: br i1 [[TMP6]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] +; CHECK-PRED-OR-SCALAR: pred.load.if: +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[SRC]], i32 [[TMP1]] +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP7]], align 1 +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP9:%.*]] = insertelement <4 x i8> poison, i8 [[TMP8]], i32 0 +; CHECK-PRED-OR-SCALAR-NEXT: br label [[PRED_LOAD_CONTINUE]] +; CHECK-PRED-OR-SCALAR: pred.load.continue: +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP10:%.*]] = phi <4 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_LOAD_IF]] ] +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1 +; CHECK-PRED-OR-SCALAR-NEXT: br i1 [[TMP11]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] +; CHECK-PRED-OR-SCALAR: pred.load.if3: +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[SRC]], i32 [[TMP2]] +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP12]], align 1 +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP14:%.*]] = insertelement <4 x i8> [[TMP10]], i8 [[TMP13]], i32 1 +; CHECK-PRED-OR-SCALAR-NEXT: br label [[PRED_LOAD_CONTINUE4]] +; CHECK-PRED-OR-SCALAR: pred.load.continue4: +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP15:%.*]] = phi <4 x i8> [ [[TMP10]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF3]] ] +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2 +; CHECK-PRED-OR-SCALAR-NEXT: br i1 [[TMP16]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] +; CHECK-PRED-OR-SCALAR: pred.load.if5: +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP17:%.*]] = getelementptr i8, i8* [[SRC]], i32 [[TMP3]] +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP18:%.*]] = load i8, i8* [[TMP17]], align 1 +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP19:%.*]] = insertelement <4 x i8> [[TMP15]], i8 [[TMP18]], i32 2 +; CHECK-PRED-OR-SCALAR-NEXT: br label [[PRED_LOAD_CONTINUE6]] +; CHECK-PRED-OR-SCALAR: pred.load.continue6: +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP20:%.*]] = phi <4 x i8> [ [[TMP15]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP19]], [[PRED_LOAD_IF5]] ] +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3 +; CHECK-PRED-OR-SCALAR-NEXT: br i1 [[TMP21]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]] +; CHECK-PRED-OR-SCALAR: pred.load.if7: +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP22:%.*]] = getelementptr i8, i8* [[SRC]], i32 [[TMP4]] +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP23:%.*]] = load i8, i8* [[TMP22]], align 1 +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP24:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP23]], i32 3 +; CHECK-PRED-OR-SCALAR-NEXT: br label [[PRED_LOAD_CONTINUE8]] +; CHECK-PRED-OR-SCALAR: pred.load.continue8: +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP25:%.*]] = phi <4 x i8> [ [[TMP20]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP24]], [[PRED_LOAD_IF7]] ] +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP26:%.*]] = icmp eq <4 x i8> [[TMP25]], +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP27:%.*]] = select <4 x i1> [[TMP26]], <4 x i8> , <4 x i8> +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0 +; CHECK-PRED-OR-SCALAR-NEXT: br i1 [[TMP28]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; CHECK-PRED-OR-SCALAR: pred.store.if: +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP29:%.*]] = getelementptr i8, i8* [[DST]], i32 [[TMP1]] +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP30:%.*]] = extractelement <4 x i8> [[TMP27]], i32 0 +; CHECK-PRED-OR-SCALAR-NEXT: store i8 [[TMP30]], i8* [[TMP29]], align 1 +; CHECK-PRED-OR-SCALAR-NEXT: br label [[PRED_STORE_CONTINUE]] +; CHECK-PRED-OR-SCALAR: pred.store.continue: +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1 +; CHECK-PRED-OR-SCALAR-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]] +; CHECK-PRED-OR-SCALAR: pred.store.if9: +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP32:%.*]] = getelementptr i8, i8* [[DST]], i32 [[TMP2]] +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP33:%.*]] = extractelement <4 x i8> [[TMP27]], i32 1 +; CHECK-PRED-OR-SCALAR-NEXT: store i8 [[TMP33]], i8* [[TMP32]], align 1 +; CHECK-PRED-OR-SCALAR-NEXT: br label [[PRED_STORE_CONTINUE10]] +; CHECK-PRED-OR-SCALAR: pred.store.continue10: +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2 +; CHECK-PRED-OR-SCALAR-NEXT: br i1 [[TMP34]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]] +; CHECK-PRED-OR-SCALAR: pred.store.if11: +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP35:%.*]] = getelementptr i8, i8* [[DST]], i32 [[TMP3]] +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP36:%.*]] = extractelement <4 x i8> [[TMP27]], i32 2 +; CHECK-PRED-OR-SCALAR-NEXT: store i8 [[TMP36]], i8* [[TMP35]], align 1 +; CHECK-PRED-OR-SCALAR-NEXT: br label [[PRED_STORE_CONTINUE12]] +; CHECK-PRED-OR-SCALAR: pred.store.continue12: +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP37:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3 +; CHECK-PRED-OR-SCALAR-NEXT: br i1 [[TMP37]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14]] +; CHECK-PRED-OR-SCALAR: pred.store.if13: +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP38:%.*]] = getelementptr i8, i8* [[DST]], i32 [[TMP4]] +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP39:%.*]] = extractelement <4 x i8> [[TMP27]], i32 3 +; CHECK-PRED-OR-SCALAR-NEXT: store i8 [[TMP39]], i8* [[TMP38]], align 1 +; CHECK-PRED-OR-SCALAR-NEXT: br label [[PRED_STORE_CONTINUE14]] +; CHECK-PRED-OR-SCALAR: pred.store.continue14: +; CHECK-PRED-OR-SCALAR-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-PRED-OR-SCALAR-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-PRED-OR-SCALAR-NEXT: [[TMP40:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12 +; CHECK-PRED-OR-SCALAR-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-PRED-OR-SCALAR: middle.block: +; CHECK-PRED-OR-SCALAR-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK-PRED-OR-SCALAR: scalar.ph: +; CHECK-PRED-OR-SCALAR-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 12, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; CHECK-PRED-OR-SCALAR-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-PRED-OR-SCALAR: for.body: -; CHECK-PRED-OR-SCALAR-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; CHECK-PRED-OR-SCALAR-NEXT: [[LDIDX:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i32 [[I]] -; CHECK-PRED-OR-SCALAR-NEXT: [[STIDX:%.*]] = getelementptr i8, i8* [[DST:%.*]], i32 [[I]] +; CHECK-PRED-OR-SCALAR-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-PRED-OR-SCALAR-NEXT: [[LDIDX:%.*]] = getelementptr i8, i8* [[SRC]], i32 [[I]] +; CHECK-PRED-OR-SCALAR-NEXT: [[STIDX:%.*]] = getelementptr i8, i8* [[DST]], i32 [[I]] ; CHECK-PRED-OR-SCALAR-NEXT: [[LDVAL:%.*]] = load i8, i8* [[LDIDX]], align 1 ; CHECK-PRED-OR-SCALAR-NEXT: [[CMP1:%.*]] = icmp eq i8 [[LDVAL]], 5 ; CHECK-PRED-OR-SCALAR-NEXT: [[VAL:%.*]] = select i1 [[CMP1]], i8 1, i8 2 ; CHECK-PRED-OR-SCALAR-NEXT: store i8 [[VAL]], i8* [[STIDX]], align 1 ; CHECK-PRED-OR-SCALAR-NEXT: [[INC]] = add nsw i32 [[I]], 1 ; CHECK-PRED-OR-SCALAR-NEXT: [[EXITCOND:%.*]] = icmp slt i32 [[I]], 8 -; CHECK-PRED-OR-SCALAR-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-PRED-OR-SCALAR-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-PRED-OR-SCALAR: for.end: ; CHECK-PRED-OR-SCALAR-NEXT: ret i32 0 ; ; PRED-OR-DONTVEC-LABEL: @const_low_trip_count_hint_no_pred( ; PRED-OR-DONTVEC-NEXT: entry: +; PRED-OR-DONTVEC-NEXT: [[SRC2:%.*]] = ptrtoint i8* [[SRC:%.*]] to i64 +; PRED-OR-DONTVEC-NEXT: [[DST1:%.*]] = ptrtoint i8* [[DST:%.*]] to i64 +; PRED-OR-DONTVEC-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; PRED-OR-DONTVEC: vector.memcheck: +; PRED-OR-DONTVEC-NEXT: [[TMP0:%.*]] = sub i64 [[DST1]], [[SRC2]] +; PRED-OR-DONTVEC-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 +; PRED-OR-DONTVEC-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; PRED-OR-DONTVEC: vector.ph: +; PRED-OR-DONTVEC-NEXT: br label [[VECTOR_BODY:%.*]] +; PRED-OR-DONTVEC: vector.body: +; PRED-OR-DONTVEC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE14:%.*]] ] +; PRED-OR-DONTVEC-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE14]] ] +; PRED-OR-DONTVEC-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 +; PRED-OR-DONTVEC-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 1 +; PRED-OR-DONTVEC-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 2 +; PRED-OR-DONTVEC-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 3 +; PRED-OR-DONTVEC-NEXT: [[TMP5:%.*]] = icmp ule <4 x i32> [[VEC_IND]], +; PRED-OR-DONTVEC-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0 +; PRED-OR-DONTVEC-NEXT: br i1 [[TMP6]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] +; PRED-OR-DONTVEC: pred.load.if: +; PRED-OR-DONTVEC-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[SRC]], i32 [[TMP1]] +; PRED-OR-DONTVEC-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP7]], align 1 +; PRED-OR-DONTVEC-NEXT: [[TMP9:%.*]] = insertelement <4 x i8> poison, i8 [[TMP8]], i32 0 +; PRED-OR-DONTVEC-NEXT: br label [[PRED_LOAD_CONTINUE]] +; PRED-OR-DONTVEC: pred.load.continue: +; PRED-OR-DONTVEC-NEXT: [[TMP10:%.*]] = phi <4 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_LOAD_IF]] ] +; PRED-OR-DONTVEC-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1 +; PRED-OR-DONTVEC-NEXT: br i1 [[TMP11]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] +; PRED-OR-DONTVEC: pred.load.if3: +; PRED-OR-DONTVEC-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[SRC]], i32 [[TMP2]] +; PRED-OR-DONTVEC-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP12]], align 1 +; PRED-OR-DONTVEC-NEXT: [[TMP14:%.*]] = insertelement <4 x i8> [[TMP10]], i8 [[TMP13]], i32 1 +; PRED-OR-DONTVEC-NEXT: br label [[PRED_LOAD_CONTINUE4]] +; PRED-OR-DONTVEC: pred.load.continue4: +; PRED-OR-DONTVEC-NEXT: [[TMP15:%.*]] = phi <4 x i8> [ [[TMP10]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF3]] ] +; PRED-OR-DONTVEC-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2 +; PRED-OR-DONTVEC-NEXT: br i1 [[TMP16]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] +; PRED-OR-DONTVEC: pred.load.if5: +; PRED-OR-DONTVEC-NEXT: [[TMP17:%.*]] = getelementptr i8, i8* [[SRC]], i32 [[TMP3]] +; PRED-OR-DONTVEC-NEXT: [[TMP18:%.*]] = load i8, i8* [[TMP17]], align 1 +; PRED-OR-DONTVEC-NEXT: [[TMP19:%.*]] = insertelement <4 x i8> [[TMP15]], i8 [[TMP18]], i32 2 +; PRED-OR-DONTVEC-NEXT: br label [[PRED_LOAD_CONTINUE6]] +; PRED-OR-DONTVEC: pred.load.continue6: +; PRED-OR-DONTVEC-NEXT: [[TMP20:%.*]] = phi <4 x i8> [ [[TMP15]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP19]], [[PRED_LOAD_IF5]] ] +; PRED-OR-DONTVEC-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3 +; PRED-OR-DONTVEC-NEXT: br i1 [[TMP21]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]] +; PRED-OR-DONTVEC: pred.load.if7: +; PRED-OR-DONTVEC-NEXT: [[TMP22:%.*]] = getelementptr i8, i8* [[SRC]], i32 [[TMP4]] +; PRED-OR-DONTVEC-NEXT: [[TMP23:%.*]] = load i8, i8* [[TMP22]], align 1 +; PRED-OR-DONTVEC-NEXT: [[TMP24:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP23]], i32 3 +; PRED-OR-DONTVEC-NEXT: br label [[PRED_LOAD_CONTINUE8]] +; PRED-OR-DONTVEC: pred.load.continue8: +; PRED-OR-DONTVEC-NEXT: [[TMP25:%.*]] = phi <4 x i8> [ [[TMP20]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP24]], [[PRED_LOAD_IF7]] ] +; PRED-OR-DONTVEC-NEXT: [[TMP26:%.*]] = icmp eq <4 x i8> [[TMP25]], +; PRED-OR-DONTVEC-NEXT: [[TMP27:%.*]] = select <4 x i1> [[TMP26]], <4 x i8> , <4 x i8> +; PRED-OR-DONTVEC-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0 +; PRED-OR-DONTVEC-NEXT: br i1 [[TMP28]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; PRED-OR-DONTVEC: pred.store.if: +; PRED-OR-DONTVEC-NEXT: [[TMP29:%.*]] = getelementptr i8, i8* [[DST]], i32 [[TMP1]] +; PRED-OR-DONTVEC-NEXT: [[TMP30:%.*]] = extractelement <4 x i8> [[TMP27]], i32 0 +; PRED-OR-DONTVEC-NEXT: store i8 [[TMP30]], i8* [[TMP29]], align 1 +; PRED-OR-DONTVEC-NEXT: br label [[PRED_STORE_CONTINUE]] +; PRED-OR-DONTVEC: pred.store.continue: +; PRED-OR-DONTVEC-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1 +; PRED-OR-DONTVEC-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]] +; PRED-OR-DONTVEC: pred.store.if9: +; PRED-OR-DONTVEC-NEXT: [[TMP32:%.*]] = getelementptr i8, i8* [[DST]], i32 [[TMP2]] +; PRED-OR-DONTVEC-NEXT: [[TMP33:%.*]] = extractelement <4 x i8> [[TMP27]], i32 1 +; PRED-OR-DONTVEC-NEXT: store i8 [[TMP33]], i8* [[TMP32]], align 1 +; PRED-OR-DONTVEC-NEXT: br label [[PRED_STORE_CONTINUE10]] +; PRED-OR-DONTVEC: pred.store.continue10: +; PRED-OR-DONTVEC-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2 +; PRED-OR-DONTVEC-NEXT: br i1 [[TMP34]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]] +; PRED-OR-DONTVEC: pred.store.if11: +; PRED-OR-DONTVEC-NEXT: [[TMP35:%.*]] = getelementptr i8, i8* [[DST]], i32 [[TMP3]] +; PRED-OR-DONTVEC-NEXT: [[TMP36:%.*]] = extractelement <4 x i8> [[TMP27]], i32 2 +; PRED-OR-DONTVEC-NEXT: store i8 [[TMP36]], i8* [[TMP35]], align 1 +; PRED-OR-DONTVEC-NEXT: br label [[PRED_STORE_CONTINUE12]] +; PRED-OR-DONTVEC: pred.store.continue12: +; PRED-OR-DONTVEC-NEXT: [[TMP37:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3 +; PRED-OR-DONTVEC-NEXT: br i1 [[TMP37]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14]] +; PRED-OR-DONTVEC: pred.store.if13: +; PRED-OR-DONTVEC-NEXT: [[TMP38:%.*]] = getelementptr i8, i8* [[DST]], i32 [[TMP4]] +; PRED-OR-DONTVEC-NEXT: [[TMP39:%.*]] = extractelement <4 x i8> [[TMP27]], i32 3 +; PRED-OR-DONTVEC-NEXT: store i8 [[TMP39]], i8* [[TMP38]], align 1 +; PRED-OR-DONTVEC-NEXT: br label [[PRED_STORE_CONTINUE14]] +; PRED-OR-DONTVEC: pred.store.continue14: +; PRED-OR-DONTVEC-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; PRED-OR-DONTVEC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; PRED-OR-DONTVEC-NEXT: [[TMP40:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12 +; PRED-OR-DONTVEC-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; PRED-OR-DONTVEC: middle.block: +; PRED-OR-DONTVEC-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; PRED-OR-DONTVEC: scalar.ph: +; PRED-OR-DONTVEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 12, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; PRED-OR-DONTVEC-NEXT: br label [[FOR_BODY:%.*]] ; PRED-OR-DONTVEC: for.body: -; PRED-OR-DONTVEC-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; PRED-OR-DONTVEC-NEXT: [[LDIDX:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i32 [[I]] -; PRED-OR-DONTVEC-NEXT: [[STIDX:%.*]] = getelementptr i8, i8* [[DST:%.*]], i32 [[I]] +; PRED-OR-DONTVEC-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; PRED-OR-DONTVEC-NEXT: [[LDIDX:%.*]] = getelementptr i8, i8* [[SRC]], i32 [[I]] +; PRED-OR-DONTVEC-NEXT: [[STIDX:%.*]] = getelementptr i8, i8* [[DST]], i32 [[I]] ; PRED-OR-DONTVEC-NEXT: [[LDVAL:%.*]] = load i8, i8* [[LDIDX]], align 1 ; PRED-OR-DONTVEC-NEXT: [[CMP1:%.*]] = icmp eq i8 [[LDVAL]], 5 ; PRED-OR-DONTVEC-NEXT: [[VAL:%.*]] = select i1 [[CMP1]], i8 1, i8 2 ; PRED-OR-DONTVEC-NEXT: store i8 [[VAL]], i8* [[STIDX]], align 1 ; PRED-OR-DONTVEC-NEXT: [[INC]] = add nsw i32 [[I]], 1 ; PRED-OR-DONTVEC-NEXT: [[EXITCOND:%.*]] = icmp slt i32 [[I]], 8 -; PRED-OR-DONTVEC-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP3:![0-9]+]] +; PRED-OR-DONTVEC-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP4:![0-9]+]] ; PRED-OR-DONTVEC: for.end: ; PRED-OR-DONTVEC-NEXT: ret i32 0 ;