Diff 555866

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 5,690 Lines • ▼ Show 20 Lines	LoopVectorizationCostModel::getSmallestAndWidestTypes() {
}		}
return {MinWidth, MaxWidth};		return {MinWidth, MaxWidth};
}		}

void LoopVectorizationCostModel::collectElementTypesForWidening() {		void LoopVectorizationCostModel::collectElementTypesForWidening() {
ElementTypesInLoop.clear();		ElementTypesInLoop.clear();
// For each block.		// For each block.
for (BasicBlock *BB : TheLoop->blocks()) {		for (BasicBlock *BB : TheLoop->blocks()) {
		bool HasLoad = false;
// For each instruction in the loop.		// For each instruction in the loop.
for (Instruction &I : BB->instructionsWithoutDebug()) {		for (Instruction &I : BB->instructionsWithoutDebug()) {
Type *T = I.getType();		Type *T = I.getType();

// Skip ignored values.		// Skip ignored values.
if (ValuesToIgnore.count(&I))		if (ValuesToIgnore.count(&I))
continue;		continue;

// Only examine Loads, Stores and PHINodes.		// Only examine Loads, Stores and PHINodes.
if (!isa<LoadInst>(I) && !isa<StoreInst>(I) && !isa<PHINode>(I))		if (!isa<LoadInst>(I) && !isa<StoreInst>(I) && !isa<PHINode>(I))
continue;		continue;

		// Check for loads in the loop.
		if (isa<LoadInst>(I))
		HasLoad = true;

// Examine PHI nodes that are reduction variables. Update the type to		// Examine PHI nodes that are reduction variables. Update the type to
// account for the recurrence type.		// account for the recurrence type.
if (auto *PN = dyn_cast<PHINode>(&I)) {		if (auto *PN = dyn_cast<PHINode>(&I)) {
if (!Legal->isReductionVariable(PN))		if (!Legal->isReductionVariable(PN))
continue;		continue;
const RecurrenceDescriptor &RdxDesc =		const RecurrenceDescriptor &RdxDesc =
Legal->getReductionVars().find(PN)->second;		Legal->getReductionVars().find(PN)->second;
if (PreferInLoopReductions \|\| useOrderedReductions(RdxDesc) \|\|		if (PreferInLoopReductions \|\| useOrderedReductions(RdxDesc) \|\|
TTI.preferInLoopReduction(RdxDesc.getOpcode(),		TTI.preferInLoopReduction(RdxDesc.getOpcode(),
RdxDesc.getRecurrenceType(),		RdxDesc.getRecurrenceType(),
TargetTransformInfo::ReductionFlags()))		TargetTransformInfo::ReductionFlags()))
continue;		continue;
T = RdxDesc.getRecurrenceType();		T = RdxDesc.getRecurrenceType();
}		}

// Examine the stored values.		// Examine the stored values.
if (auto *ST = dyn_cast<StoreInst>(&I))		if (auto *ST = dyn_cast<StoreInst>(&I)) {
		david-armUnsubmitted Done Reply Inline Actions I wonder if we should also be asking if the store operand is loop invariant too? This would avoid tests changing such as lvm/test/Transforms/LoopVectorize/vplan-stress-test-no-explict-vf.ll. If the input is loop invariant then it's not really participating in the vector loop. david-arm: I wonder if we should also be asking if the store operand is loop invariant too? This would…
		RinAuthorUnsubmitted Done Reply Inline Actions Discussed this and there is a trunc store in the loop %0 = trunc i64 %indvars.iv21 to i32 store i32 %0, ptr %arrayidx, align 4 Rin: Discussed this and there is a trunc store in the loop ``` %0 = trunc i64 %indvars.iv21 to i32…
T = ST->getValueOperand()->getType();		T = ST->getValueOperand()->getType();
		// If dealing with a truncating store and there are no loads
		david-armUnsubmitted Done Reply Inline Actions I think you can write this more simply as if (auto Trunc = dyn_cast<TruncInst>(ST->getOperand(0))) T = Trunc->getSrcTy(); david-arm:* I think you can write this more simply as ```if (auto *Trunc = dyn_cast<TruncInst>(ST…
		david-armUnsubmitted Done Reply Inline Actions nit: Sorry @Rin, just one more thing. Perhaps for consistency it makes sense to also use `ST->getValueOperand()` even though it's the same thing? david-arm: nit: Sorry @Rin, just one more thing. Perhaps for consistency it makes sense to also use `ST…
		RinAuthorUnsubmitted Done Reply Inline Actions I'll change it, no problem. Rin: I'll change it, no problem.
		// in the loop then add the source type to list.
		david-armUnsubmitted Done Reply Inline Actions nit: Normally variables in LLVM start with a capital, i.e. `CastTrunc` david-arm: nit: Normally variables in LLVM start with a capital, i.e. `CastTrunc`
		RinAuthorUnsubmitted Done Reply Inline Actions Ah, my bad, I'll change that. Rin: Ah, my bad, I'll change that.
		if (isa<TruncInst>(ST->getValueOperand()) && !HasLoad){
		Type *TruncateT = dyn_cast<TruncInst>(ST->getValueOperand())->getSrcTy();
		assert(TruncateT->isSized() &&
		"Expected the load/store/recurrence type to be sized");
		ElementTypesInLoop.insert(TruncateT);
		}
		}

assert(T->isSized() &&		assert(T->isSized() &&
"Expected the load/store/recurrence type to be sized");		"Expected the load/store/recurrence type to be sized");

ElementTypesInLoop.insert(T);		ElementTypesInLoop.insert(T);
}		}
}		}
}		}
▲ Show 20 Lines • Show All 4,859 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll

	Show All 12 Lines
	; for (i2 = 0; i2 < 8; i2++)			; for (i2 = 0; i2 < 8; i2++)
	; arr[i2][i1] = i1 + n;			; arr[i2][i1] = i1 + n;
	; }			; }
	; }			; }
	;			;

	; CHECK-LABEL: @foo_i32(			; CHECK-LABEL: @foo_i32(
	; CHECK-LABEL: vector.ph:			; CHECK-LABEL: vector.ph:
	; CHECK: %[[SplatVal:.*]] = insertelement <4 x i32> poison, i32 %n, i64 0			; CHECK: %[[SplatVal:.*]] = insertelement <2 x i32> poison, i32 %n, i64 0
	; CHECK: %[[Splat:.*]] = shufflevector <4 x i32> %[[SplatVal]], <4 x i32> poison, <4 x i32> zeroinitializer			; CHECK: %[[Splat:.*]] = shufflevector <2 x i32> %[[SplatVal]], <2 x i32> poison, <2 x i32> zeroinitializer

	; CHECK-LABEL: vector.body:			; CHECK-LABEL: vector.body:
	; CHECK: %[[Ind:.]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.]], %[[ForInc:.*]] ]			; CHECK: %[[Ind:.]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.]], %[[ForInc:.*]] ]
	; CHECK: %[[VecInd:.]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %vector.ph ], [ %[[VecIndNext:.]], %[[ForInc]] ]			; CHECK: %[[VecInd:.]] = phi <2 x i64> [ <i64 0, i64 1>, %vector.ph ], [ %[[VecIndNext:.]], %[[ForInc]] ]
	; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <4 x i64> %[[VecInd]]			; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <2 x i64> %[[VecInd]]
	; CHECK: %[[VecIndTr:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32>			; CHECK: %[[VecIndTr:.*]] = trunc <2 x i64> %[[VecInd]] to <2 x i32>
	; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[VecIndTr]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)			; CHECK: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> %[[VecIndTr]], <2 x ptr> %[[AAddr]], i32 4, <2 x i1> <i1 true, i1 true>)
	; CHECK: %[[VecIndTr2:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32>			; CHECK: %[[VecIndTr2:.*]] = trunc <2 x i64> %[[VecInd]] to <2 x i32>
	; CHECK: %[[StoreVal:.*]] = add nsw <4 x i32> %[[VecIndTr2]], %[[Splat]]			; CHECK: %[[StoreVal:.*]] = add nsw <2 x i32> %[[VecIndTr2]], %[[Splat]]
	; CHECK: br label %[[InnerLoop:.+]]			; CHECK: br label %[[InnerLoop:.+]]

	; CHECK: [[InnerLoop]]:			; CHECK: [[InnerLoop]]:
	; CHECK: %[[InnerPhi:.]] = phi <4 x i64> [ zeroinitializer, %vector.body ], [ %[[InnerPhiNext:.]], %[[InnerLoop]] ]			; CHECK: %[[InnerPhi:.]] = phi <2 x i64> [ zeroinitializer, %vector.body ], [ %[[InnerPhiNext:.]], %[[InnerLoop]] ]
	; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, <4 x i64> %[[InnerPhi]], <4 x i64> %[[VecInd]]			; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, <2 x i64> %[[InnerPhi]], <2 x i64> %[[VecInd]]
	; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[StoreVal]], <4 x ptr> %[[AAddr2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true			; CHECK: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> %[[StoreVal]], <2 x ptr> %[[AAddr2]], i32 4, <2 x i1> <i1 true, i1 true>)
	; CHECK: %[[InnerPhiNext]] = add nuw nsw <4 x i64> %[[InnerPhi]], <i64 1, i64 1, i64 1, i64 1>			; CHECK: %[[InnerPhiNext]] = add nuw nsw <2 x i64> %[[InnerPhi]], <i64 1, i64 1>
	; CHECK: %[[VecCond:.*]] = icmp eq <4 x i64> %[[InnerPhiNext]], <i64 8, i64 8, i64 8, i64 8>			; CHECK: %[[VecCond:.*]] = icmp eq <2 x i64> %[[InnerPhiNext]], <i64 8, i64 8>
	; CHECK: %[[InnerCond:.*]] = extractelement <4 x i1> %[[VecCond]], i32 0			; CHECK: %[[InnerCond:.*]] = extractelement <2 x i1> %[[VecCond]], i32 0
	; CHECK: br i1 %[[InnerCond]], label %[[ForInc]], label %[[InnerLoop]]			; CHECK: br i1 %[[InnerCond]], label %[[ForInc]], label %[[InnerLoop]]

	; CHECK: [[ForInc]]:			; CHECK: [[ForInc]]:
	; CHECK: %[[IndNext]] = add nuw i64 %[[Ind]], 4			; CHECK: %[[IndNext]] = add nuw i64 %[[Ind]], 2
	; CHECK: %[[VecIndNext]] = add <4 x i64> %[[VecInd]], <i64 4, i64 4, i64 4, i64 4>			; CHECK: %[[VecIndNext]] = add <2 x i64> %[[VecInd]], <i64 2, i64 2>
	; CHECK: %[[Cmp:.*]] = icmp eq i64 %[[IndNext]], 8			; CHECK: %[[Cmp:.*]] = icmp eq i64 %[[IndNext]], 8
	; CHECK: br i1 %[[Cmp]], label %middle.block, label %vector.body			; CHECK: br i1 %[[Cmp]], label %middle.block, label %vector.body

	@arr2 = external global [8 x i32], align 16			@arr2 = external global [8 x i32], align 16
	@arr = external global [8 x [8 x i32]], align 16			@arr = external global [8 x [8 x i32]], align 16

	@arrX = external global [8 x i64], align 16			@arrX = external global [8 x i64], align 16
	@arrY = external global [8 x [8 x i64]], align 16			@arrY = external global [8 x [8 x i64]], align 16
	▲ Show 20 Lines • Show All 91 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/AArch64/truncate-type-widening.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
				; RUN: opt -S < %s -passes=loop-vectorize -force-vector-interleave=1 -mtriple aarch64-linux-gnu -mattr=+sve 2>&1 \| FileCheck %s

				david-armUnsubmitted Done Reply Inline Actions It looks like we're still using tail-folding despite passing in `-sve-tail-folding=disabled`, which I think is because the vectoriser knows the trip count is low. Perhaps you can just remove the flag? david-arm: It looks like we're still using tail-folding despite passing in `-sve-tail-folding=disabled`…
				RinAuthorUnsubmitted Done Reply Inline Actions You're right, I'll take that flag out. Rin: You're right, I'll take that flag out.
				define void @test(ptr nocapture noundef writeonly %dst, i32 noundef %n, i64 noundef %val) local_unnamed_addr #0 {
				; CHECK-LABEL: define void @test
				; CHECK-SAME: (ptr nocapture noundef writeonly [[DST:%.]], i32 noundef [[N:%.]], i64 noundef [[VAL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
				; CHECK-NEXT: entry:
				; CHECK-NEXT: [[REM:%.*]] = and i32 [[N]], 63
				; CHECK-NEXT: [[CMP8_NOT:%.*]] = icmp eq i32 [[REM]], 0
				; CHECK-NEXT: br i1 [[CMP8_NOT]], label [[FOR_COND_CLEANUP:%.]], label [[FOR_BODY_PREHEADER:%.]]
				; CHECK: for.body.preheader:
				; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[REM]], 7
				; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[ADD]], 3
				; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SHR]] to i64
				; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
				; CHECK: vector.ph:
				; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
				; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
				; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
				; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
				; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
				; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[WIDE_TRIP_COUNT]], [[TMP4]]
				; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
				; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
				; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
				; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
				; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2
				; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[TMP6]]
				; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[WIDE_TRIP_COUNT]], [[TMP6]]
				; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0
				; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]])
				; CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
				; CHECK-NEXT: [[TMP11:%.*]] = add <vscale x 2 x i64> [[TMP10]], zeroinitializer
				; CHECK-NEXT: [[TMP12:%.*]] = mul <vscale x 2 x i64> [[TMP11]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
				; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP12]]
				; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
				; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 2
				; CHECK-NEXT: [[TMP15:%.*]] = mul i64 1, [[TMP14]]
				; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP15]], i64 0
				; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
				; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[VAL]], i64 0
				; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
				; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
				; CHECK: vector.body:
				; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
				; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.]], [[VECTOR_BODY]] ]
				; CHECK-NEXT: [[VEC_IND:%.]] = phi <vscale x 2 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.]], [[VECTOR_BODY]] ]
				; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 0
				; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP16]]
				; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw <vscale x 2 x i64> [[VEC_IND]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 3, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
				; CHECK-NEXT: [[TMP18:%.*]] = lshr <vscale x 2 x i64> [[BROADCAST_SPLAT]], [[TMP17]]
				; CHECK-NEXT: [[TMP19:%.*]] = trunc <vscale x 2 x i64> [[TMP18]] to <vscale x 2 x i8>
				; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
				; CHECK-NEXT: call void @llvm.masked.store.nxv2i8.p0(<vscale x 2 x i8> [[TMP19]], ptr [[TMP20]], i32 1, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
				; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP9]])
				; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64()
				; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 2
				; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP22]]
				; CHECK-NEXT: [[TMP23:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
				; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
				; CHECK-NEXT: [[TMP24:%.*]] = extractelement <vscale x 2 x i1> [[TMP23]], i32 0
				; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
				; CHECK: middle.block:
				; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
				; CHECK: scalar.ph:
				; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
				; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[DST]], [[FOR_BODY_PREHEADER]] ]
				; CHECK-NEXT: br label [[FOR_BODY:%.*]]
				; CHECK: for.body:
				; CHECK-NEXT: [[INDVARS_IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.]], [[FOR_BODY]] ]
				; CHECK-NEXT: [[P_OUT_TAIL_09:%.]] = phi ptr [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INCDEC_PTR:%.]], [[FOR_BODY]] ]
				; CHECK-NEXT: [[TMP25:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 3
				; CHECK-NEXT: [[SHR3:%.*]] = lshr i64 [[VAL]], [[TMP25]]
				; CHECK-NEXT: [[CONV4:%.*]] = trunc i64 [[SHR3]] to i8
				; CHECK-NEXT: store i8 [[CONV4]], ptr [[P_OUT_TAIL_09]], align 1
				; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[P_OUT_TAIL_09]], i64 1
				; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
				; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
				; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
				; CHECK: for.cond.cleanup.loopexit:
				; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
				; CHECK: for.cond.cleanup:
				; CHECK-NEXT: ret void
				;
				entry:
				%rem = and i32 %n, 63
				%cmp8.not = icmp eq i32 %rem, 0
				br i1 %cmp8.not, label %for.cond.cleanup, label %for.body.preheader

				for.body.preheader: ; preds = %entry
				%add = add nuw nsw i32 %rem, 7
				%shr = lshr i32 %add, 3
				%wide.trip.count = zext i32 %shr to i64
				br label %for.body

				for.body: ; preds = %for.body.preheader, %for.body
				%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
				david-armUnsubmitted Done Reply Inline Actions I think you can delete this block and just let everything jump directly to `%for.cond.cleanup` david-arm: I think you can delete this block and just let everything jump directly to `%for.cond.cleanup`
				RinAuthorUnsubmitted Done Reply Inline Actions Makes sense I'll do that and rewrite the blocks. Rin: Makes sense I'll do that and rewrite the blocks.
				%p_out_tail.09 = phi ptr [ %dst, %for.body.preheader ], [ %incdec.ptr, %for.body ]
				%0 = shl nuw nsw i64 %indvars.iv, 3
				%shr3 = lshr i64 %val, %0
				%conv4 = trunc i64 %shr3 to i8
				store i8 %conv4, ptr %p_out_tail.09, align 1
				%incdec.ptr = getelementptr inbounds i8, ptr %p_out_tail.09, i64 1
				david-armUnsubmitted Done Reply Inline Actions Could you rewrite the blocks in a more natural order, i.e. entry, for.body.preheader, for.body, for.cond.cleanup? david-arm: Could you rewrite the blocks in a more natural order, i.e. entry, for.body.preheader, for.body…
				%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
				%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
				br i1 %exitcond.not, label %for.cond.cleanup, label %for.body

				for.cond.cleanup: ; preds = %for.body
				ret void
				}

llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll

	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
	; RUN: opt < %s -passes=loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -S \| FileCheck %s			; RUN: opt < %s -passes=loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -S \| FileCheck %s

	target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"			target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
	target triple = "x86_64-apple-macosx10.8.0"			target triple = "x86_64-apple-macosx10.8.0"

	define i32 @conversion_cost1(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwind uwtable ssp {			define i32 @conversion_cost1(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwind uwtable ssp {
	; CHECK-LABEL: @conversion_cost1(			; CHECK-LABEL: @conversion_cost1(
	; CHECK-NEXT: [[TMP1:%.]] = icmp sgt i32 [[N:%.]], 3			; CHECK-NEXT: [[TMP1:%.]] = icmp sgt i32 [[N:%.]], 3
	; CHECK-NEXT: br i1 [[TMP1]], label [[ITER_CHECK:%.]], label [[DOT_CRIT_EDGE:%.]]			; CHECK-NEXT: br i1 [[TMP1]], label [[DOTLR_PH_PREHEADER:%.]], label [[DOT_CRIT_EDGE:%.]]
	; CHECK: iter.check:			; CHECK: .lr.ph.preheader:
	; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[N]], -3			; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[N]], -3
	; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64			; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
	; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 16			; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 16
	; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.]]			; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
	; CHECK: vector.main.loop.iter.check:
	; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP3]], 32
	; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.]], label [[VECTOR_PH:%.]]
	; CHECK: vector.ph:			; CHECK: vector.ph:
	; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 32			; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 16
	; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]			; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
	; CHECK-NEXT: [[IND_END:%.*]] = add i64 3, [[N_VEC]]			; CHECK-NEXT: [[IND_END:%.*]] = add i64 3, [[N_VEC]]
	; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]			; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
	; CHECK: vector.body:			; CHECK: vector.body:
	; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]			; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
	; CHECK-NEXT: [[VEC_IND:%.]] = phi <32 x i8> [ <i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.]], [[VECTOR_BODY]] ]			; CHECK-NEXT: [[VEC_IND:%.]] = phi <4 x i8> [ <i8 3, i8 4, i8 5, i8 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.]], [[VECTOR_BODY]] ]
				; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i8> [[VEC_IND]], <i8 4, i8 4, i8 4, i8 4>
				; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <4 x i8> [[STEP_ADD]], <i8 4, i8 4, i8 4, i8 4>
				; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <4 x i8> [[STEP_ADD1]], <i8 4, i8 4, i8 4, i8 4>
	; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX]]			; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX]]
	; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0			; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0
	; CHECK-NEXT: [[TMP5:%.]] = getelementptr inbounds i8, ptr [[A:%.]], i64 [[TMP4]]			; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 4
	; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0			; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 8
	; CHECK-NEXT: store <32 x i8> [[VEC_IND]], ptr [[TMP6]], align 1			; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 12
	; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32			; CHECK-NEXT: [[TMP8:%.]] = getelementptr inbounds i8, ptr [[A:%.]], i64 [[TMP4]]
	; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i8> [[VEC_IND]], <i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32>			; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP5]]
	; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]			; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP6]]
	; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]			; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP7]]
				; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0
				; CHECK-NEXT: store <4 x i8> [[VEC_IND]], ptr [[TMP12]], align 1
				; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 4
				; CHECK-NEXT: store <4 x i8> [[STEP_ADD]], ptr [[TMP13]], align 1
				; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 8
				; CHECK-NEXT: store <4 x i8> [[STEP_ADD1]], ptr [[TMP14]], align 1
				; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 12
				; CHECK-NEXT: store <4 x i8> [[STEP_ADD2]], ptr [[TMP15]], align 1
				; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
				; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[STEP_ADD2]], <i8 4, i8 4, i8 4, i8 4>
				; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
				; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
	; CHECK: middle.block:			; CHECK: middle.block:
	; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]			; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
	; CHECK-NEXT: br i1 [[CMP_N]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.]], label [[VEC_EPILOG_ITER_CHECK:%.]]			; CHECK-NEXT: br i1 [[CMP_N]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]]
	; CHECK: vec.epilog.iter.check:			; CHECK: scalar.ph:
	; CHECK-NEXT: [[IND_END5:%.*]] = add i64 3, [[N_VEC]]			; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 3, [[DOTLR_PH_PREHEADER]] ]
	; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP3]], [[N_VEC]]
	; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 16
	; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
	; CHECK: vec.epilog.ph:
	; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 3, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
	; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
	; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP3]], 16
	; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF2]]
	; CHECK-NEXT: [[IND_END4:%.*]] = add i64 3, [[N_VEC3]]
	; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[BC_RESUME_VAL]] to i8
	; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[TMP8]], i64 0
	; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
	; CHECK-NEXT: [[INDUCTION:%.*]] = add <16 x i8> [[DOTSPLAT]], <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>
	; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
	; CHECK: vec.epilog.vector.body:
	; CHECK-NEXT: [[INDEX8:%.]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.]], [[VEC_EPILOG_VECTOR_BODY]] ]
	; CHECK-NEXT: [[VEC_IND9:%.]] = phi <16 x i8> [ [[INDUCTION]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT10:%.]], [[VEC_EPILOG_VECTOR_BODY]] ]
	; CHECK-NEXT: [[OFFSET_IDX11:%.*]] = add i64 3, [[INDEX8]]
	; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX11]], 0
	; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP9]]
	; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 0
	; CHECK-NEXT: store <16 x i8> [[VEC_IND9]], ptr [[TMP11]], align 1
	; CHECK-NEXT: [[INDEX_NEXT12]] = add nuw i64 [[INDEX8]], 16
	; CHECK-NEXT: [[VEC_IND_NEXT10]] = add <16 x i8> [[VEC_IND9]], <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
	; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT12]], [[N_VEC3]]
	; CHECK-NEXT: br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
	; CHECK: vec.epilog.middle.block:
	; CHECK-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC3]]
	; CHECK-NEXT: br i1 [[CMP_N7]], label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]]
	; CHECK: vec.epilog.scalar.ph:
	; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[IND_END4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END5]], [[VEC_EPILOG_ITER_CHECK]] ], [ 3, [[ITER_CHECK]] ]
	; CHECK-NEXT: br label [[DOTLR_PH:%.*]]			; CHECK-NEXT: br label [[DOTLR_PH:%.*]]
	; CHECK: .lr.ph:			; CHECK: .lr.ph:
	; CHECK-NEXT: [[INDVARS_IV:%.]] = phi i64 [ [[INDVARS_IV_NEXT:%.]], [[DOTLR_PH]] ], [ [[BC_RESUME_VAL6]], [[VEC_EPILOG_SCALAR_PH]] ]			; CHECK-NEXT: [[INDVARS_IV:%.]] = phi i64 [ [[INDVARS_IV_NEXT:%.]], [[DOTLR_PH]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
	; CHECK-NEXT: [[TMP13:%.*]] = trunc i64 [[INDVARS_IV]] to i8			; CHECK-NEXT: [[TMP17:%.*]] = trunc i64 [[INDVARS_IV]] to i8
	; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDVARS_IV]]			; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDVARS_IV]]
	; CHECK-NEXT: store i8 [[TMP13]], ptr [[TMP14]], align 1			; CHECK-NEXT: store i8 [[TMP17]], ptr [[TMP18]], align 1
	; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1			; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
	; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32			; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
	; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]			; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
	; CHECK-NEXT: br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop [[LOOP4:![0-9]+]]			; CHECK-NEXT: br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop [[LOOP3:![0-9]+]]
	; CHECK: ._crit_edge.loopexit:			; CHECK: ._crit_edge.loopexit:
	; CHECK-NEXT: br label [[DOT_CRIT_EDGE]]			; CHECK-NEXT: br label [[DOT_CRIT_EDGE]]
	; CHECK: ._crit_edge:			; CHECK: ._crit_edge:
	; CHECK-NEXT: ret i32 undef			; CHECK-NEXT: ret i32 undef
	;			;
	%1 = icmp sgt i32 %n, 3			%1 = icmp sgt i32 %n, 3
	br i1 %1, label %.lr.ph, label %._crit_edge			br i1 %1, label %.lr.ph, label %._crit_edge

	▲ Show 20 Lines • Show All 54 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: store <2 x float> [[TMP13]], ptr [[TMP21]], align 4			; CHECK-NEXT: store <2 x float> [[TMP13]], ptr [[TMP21]], align 4
	; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i32 4			; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i32 4
	; CHECK-NEXT: store <2 x float> [[TMP14]], ptr [[TMP22]], align 4			; CHECK-NEXT: store <2 x float> [[TMP14]], ptr [[TMP22]], align 4
	; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i32 6			; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i32 6
	; CHECK-NEXT: store <2 x float> [[TMP15]], ptr [[TMP23]], align 4			; CHECK-NEXT: store <2 x float> [[TMP15]], ptr [[TMP23]], align 4
	; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8			; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
	; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD2]], <i64 2, i64 2>			; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD2]], <i64 2, i64 2>
	; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]			; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
	; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]			; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
	; CHECK: middle.block:			; CHECK: middle.block:
	; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]			; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
	; CHECK-NEXT: br i1 [[CMP_N]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]]			; CHECK-NEXT: br i1 [[CMP_N]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]]
	; CHECK: scalar.ph:			; CHECK: scalar.ph:
	; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 9, [[DOTLR_PH_PREHEADER]] ]			; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 9, [[DOTLR_PH_PREHEADER]] ]
	; CHECK-NEXT: br label [[DOTLR_PH:%.*]]			; CHECK-NEXT: br label [[DOTLR_PH:%.*]]
	; CHECK: .lr.ph:			; CHECK: .lr.ph:
	; CHECK-NEXT: [[INDVARS_IV:%.]] = phi i64 [ [[INDVARS_IV_NEXT:%.]], [[DOTLR_PH]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]			; CHECK-NEXT: [[INDVARS_IV:%.]] = phi i64 [ [[INDVARS_IV_NEXT:%.]], [[DOTLR_PH]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
	; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[INDVARS_IV]], 3			; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[INDVARS_IV]], 3
	; CHECK-NEXT: [[TOFP:%.*]] = sitofp i64 [[ADD]] to float			; CHECK-NEXT: [[TOFP:%.*]] = sitofp i64 [[ADD]] to float
	; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]]			; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]]
	; CHECK-NEXT: store float [[TOFP]], ptr [[GEP]], align 4			; CHECK-NEXT: store float [[TOFP]], ptr [[GEP]], align 4
	; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1			; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
	; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32			; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
	; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]			; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
	; CHECK-NEXT: br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop [[LOOP6:![0-9]+]]			; CHECK-NEXT: br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop [[LOOP5:![0-9]+]]
	; CHECK: ._crit_edge.loopexit:			; CHECK: ._crit_edge.loopexit:
	; CHECK-NEXT: br label [[DOT_CRIT_EDGE]]			; CHECK-NEXT: br label [[DOT_CRIT_EDGE]]
	; CHECK: ._crit_edge:			; CHECK: ._crit_edge:
	; CHECK-NEXT: ret i32 undef			; CHECK-NEXT: ret i32 undef
	;			;
	%1 = icmp sgt i32 %n, 9			%1 = icmp sgt i32 %n, 9
	br i1 %1, label %.lr.ph, label %._crit_edge			br i1 %1, label %.lr.ph, label %._crit_edge

	Show All 14 Lines

llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll

	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
	; RUN: opt -passes=loop-vectorize -mcpu=znver2 -S %s \| FileCheck %s			; RUN: opt -passes=loop-vectorize -mcpu=znver2 -S %s \| FileCheck %s

	target triple = "x86_64-unknown-linux-gnu"			target triple = "x86_64-unknown-linux-gnu"

	define void @test_pr59459(i64 %iv.start, ptr %arr) {			define void @test_pr59459(i64 %iv.start, ptr %arr) {
	; CHECK-LABEL: @test_pr59459(			; CHECK-LABEL: @test_pr59459(
	; CHECK-NEXT: iter.check:			; CHECK-NEXT: entry:
	; CHECK-NEXT: [[TMP0:%.]] = trunc i64 [[IV_START:%.]] to i32			; CHECK-NEXT: [[TMP0:%.]] = trunc i64 [[IV_START:%.]] to i32
	; CHECK-NEXT: [[SMAX1:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 92)			; CHECK-NEXT: [[SMAX1:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 92)
	; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX1]], [[TMP0]]			; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX1]], [[TMP0]]
	; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64			; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
	; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1			; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
	; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8			; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 32
	; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.]], label [[VECTOR_SCEVCHECK:%.]]			; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.]], label [[VECTOR_SCEVCHECK:%.]]
	; CHECK: vector.scevcheck:			; CHECK: vector.scevcheck:
	; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[IV_START]] to i32			; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[IV_START]] to i32
	; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP4]], i32 92)			; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP4]], i32 92)
	; CHECK-NEXT: [[TMP5:%.*]] = sub i32 [[SMAX]], [[TMP4]]			; CHECK-NEXT: [[TMP5:%.*]] = sub i32 [[SMAX]], [[TMP4]]
	; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP4]], -1			; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP4]], -1
	; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], [[TMP5]]			; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], [[TMP5]]
	; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], [[TMP6]]			; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], [[TMP6]]
	; CHECK-NEXT: br i1 [[TMP8]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]			; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
	; CHECK: vector.main.loop.iter.check:
	; CHECK-NEXT: [[MIN_ITERS_CHECK2:%.*]] = icmp ult i64 [[TMP3]], 16
	; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK2]], label [[VEC_EPILOG_PH:%.]], label [[VECTOR_PH:%.]]
	; CHECK: vector.ph:			; CHECK: vector.ph:
	; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 16			; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 32
	; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]			; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
	; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[IV_START]] to i32
	; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[TMP9]], i64 0
	; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i32> [[DOTSPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
	; CHECK-NEXT: [[INDUCTION:%.*]] = add <16 x i32> [[DOTSPLAT]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
	; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[IV_START]], [[N_VEC]]			; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[IV_START]], [[N_VEC]]
				; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[IV_START]] to i32
				; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[TMP9]], i64 0
				; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
				; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i32> [[DOTSPLAT]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
	; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]			; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
	; CHECK: vector.body:			; CHECK: vector.body:
	; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]			; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
	; CHECK-NEXT: [[VEC_IND:%.]] = phi <16 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.]], [[VECTOR_BODY]] ]			; CHECK-NEXT: [[VEC_IND:%.]] = phi <8 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.]], [[VECTOR_BODY]] ]
				; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
				; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <8 x i32> [[STEP_ADD]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
				; CHECK-NEXT: [[STEP_ADD3:%.*]] = add <8 x i32> [[STEP_ADD2]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
	; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[IV_START]], [[INDEX]]			; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[IV_START]], [[INDEX]]
	; CHECK-NEXT: [[TMP10:%.*]] = trunc i64 [[OFFSET_IDX]] to i32			; CHECK-NEXT: [[TMP10:%.*]] = trunc i64 [[OFFSET_IDX]] to i32
	; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], 0			; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], 0
	; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], -1			; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP10]], 8
	; CHECK-NEXT: [[TMP13:%.*]] = mul <16 x i32> [[VEC_IND]], <i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608>			; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP10]], 16
	; CHECK-NEXT: [[TMP14:%.*]] = lshr exact <16 x i32> [[TMP13]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>			; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP10]], 24
	; CHECK-NEXT: [[TMP15:%.*]] = trunc <16 x i32> [[TMP14]] to <16 x i16>			; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP11]], -1
	; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP12]] to i64			; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[TMP12]], -1
	; CHECK-NEXT: [[TMP17:%.]] = getelementptr i16, ptr [[ARR:%.]], i64 [[TMP16]]			; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP13]], -1
	; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i16, ptr [[TMP17]], i32 0			; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP14]], -1
	; CHECK-NEXT: store <16 x i16> [[TMP15]], ptr [[TMP18]], align 2			; CHECK-NEXT: [[TMP19:%.*]] = mul <8 x i32> [[VEC_IND]], <i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608>
	; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16			; CHECK-NEXT: [[TMP20:%.*]] = mul <8 x i32> [[STEP_ADD]], <i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608>
	; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i32> [[VEC_IND]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>			; CHECK-NEXT: [[TMP21:%.*]] = mul <8 x i32> [[STEP_ADD2]], <i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608>
	; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]			; CHECK-NEXT: [[TMP22:%.*]] = mul <8 x i32> [[STEP_ADD3]], <i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608>
	; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]			; CHECK-NEXT: [[TMP23:%.*]] = lshr exact <8 x i32> [[TMP19]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
				; CHECK-NEXT: [[TMP24:%.*]] = lshr exact <8 x i32> [[TMP20]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
				; CHECK-NEXT: [[TMP25:%.*]] = lshr exact <8 x i32> [[TMP21]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
				; CHECK-NEXT: [[TMP26:%.*]] = lshr exact <8 x i32> [[TMP22]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
				; CHECK-NEXT: [[TMP27:%.*]] = trunc <8 x i32> [[TMP23]] to <8 x i16>
				; CHECK-NEXT: [[TMP28:%.*]] = trunc <8 x i32> [[TMP24]] to <8 x i16>
				; CHECK-NEXT: [[TMP29:%.*]] = trunc <8 x i32> [[TMP25]] to <8 x i16>
				; CHECK-NEXT: [[TMP30:%.*]] = trunc <8 x i32> [[TMP26]] to <8 x i16>
				; CHECK-NEXT: [[TMP31:%.*]] = zext i32 [[TMP15]] to i64
				; CHECK-NEXT: [[TMP32:%.*]] = zext i32 [[TMP16]] to i64
				; CHECK-NEXT: [[TMP33:%.*]] = zext i32 [[TMP17]] to i64
				; CHECK-NEXT: [[TMP34:%.*]] = zext i32 [[TMP18]] to i64
				; CHECK-NEXT: [[TMP35:%.]] = getelementptr i16, ptr [[ARR:%.]], i64 [[TMP31]]
				; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i16, ptr [[ARR]], i64 [[TMP32]]
				; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i16, ptr [[ARR]], i64 [[TMP33]]
				; CHECK-NEXT: [[TMP38:%.*]] = getelementptr i16, ptr [[ARR]], i64 [[TMP34]]
				; CHECK-NEXT: [[TMP39:%.*]] = getelementptr i16, ptr [[TMP35]], i32 0
				; CHECK-NEXT: store <8 x i16> [[TMP27]], ptr [[TMP39]], align 2
				; CHECK-NEXT: [[TMP40:%.*]] = getelementptr i16, ptr [[TMP35]], i32 8
				; CHECK-NEXT: store <8 x i16> [[TMP28]], ptr [[TMP40]], align 2
				; CHECK-NEXT: [[TMP41:%.*]] = getelementptr i16, ptr [[TMP35]], i32 16
				; CHECK-NEXT: store <8 x i16> [[TMP29]], ptr [[TMP41]], align 2
				; CHECK-NEXT: [[TMP42:%.*]] = getelementptr i16, ptr [[TMP35]], i32 24
				; CHECK-NEXT: store <8 x i16> [[TMP30]], ptr [[TMP42]], align 2
				; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
				; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[STEP_ADD3]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
				; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
				; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
	; CHECK: middle.block:			; CHECK: middle.block:
	; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]			; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
	; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.]], label [[VEC_EPILOG_ITER_CHECK:%.]]			; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
	; CHECK: vec.epilog.iter.check:			; CHECK: scalar.ph:
	; CHECK-NEXT: [[IND_END6:%.*]] = add i64 [[IV_START]], [[N_VEC]]			; CHECK-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[IV_START]], [[ENTRY:%.]] ], [ [[IV_START]], [[VECTOR_SCEVCHECK]] ]
	; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP3]], [[N_VEC]]
	; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8
	; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
	; CHECK: vec.epilog.ph:
	; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[IV_START]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
	; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
	; CHECK-NEXT: [[N_MOD_VF3:%.*]] = urem i64 [[TMP3]], 8
	; CHECK-NEXT: [[N_VEC4:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF3]]
	; CHECK-NEXT: [[IND_END5:%.*]] = add i64 [[IV_START]], [[N_VEC4]]
	; CHECK-NEXT: [[TMP20:%.*]] = trunc i64 [[BC_RESUME_VAL]] to i32
	; CHECK-NEXT: [[DOTSPLATINSERT10:%.*]] = insertelement <8 x i32> poison, i32 [[TMP20]], i64 0
	; CHECK-NEXT: [[DOTSPLAT11:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT10]], <8 x i32> poison, <8 x i32> zeroinitializer
	; CHECK-NEXT: [[INDUCTION12:%.*]] = add <8 x i32> [[DOTSPLAT11]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
	; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
	; CHECK: vec.epilog.vector.body:
	; CHECK-NEXT: [[INDEX9:%.]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT16:%.]], [[VEC_EPILOG_VECTOR_BODY]] ]
	; CHECK-NEXT: [[VEC_IND13:%.]] = phi <8 x i32> [ [[INDUCTION12]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT14:%.]], [[VEC_EPILOG_VECTOR_BODY]] ]
	; CHECK-NEXT: [[OFFSET_IDX15:%.*]] = add i64 [[IV_START]], [[INDEX9]]
	; CHECK-NEXT: [[TMP21:%.*]] = trunc i64 [[OFFSET_IDX15]] to i32
	; CHECK-NEXT: [[TMP22:%.*]] = add i32 [[TMP21]], 0
	; CHECK-NEXT: [[TMP23:%.*]] = add i32 [[TMP22]], -1
	; CHECK-NEXT: [[TMP24:%.*]] = mul <8 x i32> [[VEC_IND13]], <i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608>
	; CHECK-NEXT: [[TMP25:%.*]] = lshr exact <8 x i32> [[TMP24]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
	; CHECK-NEXT: [[TMP26:%.*]] = trunc <8 x i32> [[TMP25]] to <8 x i16>
	; CHECK-NEXT: [[TMP27:%.*]] = zext i32 [[TMP23]] to i64
	; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[ARR]], i64 [[TMP27]]
	; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[TMP28]], i32 0
	; CHECK-NEXT: store <8 x i16> [[TMP26]], ptr [[TMP29]], align 2
	; CHECK-NEXT: [[INDEX_NEXT16]] = add nuw i64 [[INDEX9]], 8
	; CHECK-NEXT: [[VEC_IND_NEXT14]] = add <8 x i32> [[VEC_IND13]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
	; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT16]], [[N_VEC4]]
	; CHECK-NEXT: br i1 [[TMP30]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
	; CHECK: vec.epilog.middle.block:
	; CHECK-NEXT: [[CMP_N8:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC4]]
	; CHECK-NEXT: br i1 [[CMP_N8]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
	; CHECK: vec.epilog.scalar.ph:
	; CHECK-NEXT: [[BC_RESUME_VAL7:%.]] = phi i64 [ [[IND_END5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END6]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[IV_START]], [[VECTOR_SCEVCHECK]] ], [ [[IV_START]], [[ITER_CHECK:%.]] ]
	; CHECK-NEXT: br label [[LOOP:%.*]]			; CHECK-NEXT: br label [[LOOP:%.*]]
	; CHECK: loop:			; CHECK: loop:
	; CHECK-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL7]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[LOOP]] ]			; CHECK-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[LOOP]] ]
	; CHECK-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i32			; CHECK-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i32
	; CHECK-NEXT: [[STORE_IDX:%.*]] = add i32 [[IV_TRUNC]], -1			; CHECK-NEXT: [[STORE_IDX:%.*]] = add i32 [[IV_TRUNC]], -1
	; CHECK-NEXT: [[X:%.*]] = mul i32 [[IV_TRUNC]], 196608			; CHECK-NEXT: [[X:%.*]] = mul i32 [[IV_TRUNC]], 196608
	; CHECK-NEXT: [[Y:%.*]] = lshr exact i32 [[X]], 16			; CHECK-NEXT: [[Y:%.*]] = lshr exact i32 [[X]], 16
	; CHECK-NEXT: [[STORE_VAL:%.*]] = trunc i32 [[Y]] to i16			; CHECK-NEXT: [[STORE_VAL:%.*]] = trunc i32 [[Y]] to i16
	; CHECK-NEXT: [[STORE_IDX_WIDE:%.*]] = zext i32 [[STORE_IDX]] to i64			; CHECK-NEXT: [[STORE_IDX_WIDE:%.*]] = zext i32 [[STORE_IDX]] to i64
	; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i16, ptr [[ARR]], i64 [[STORE_IDX_WIDE]]			; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i16, ptr [[ARR]], i64 [[STORE_IDX_WIDE]]
	; CHECK-NEXT: store i16 [[STORE_VAL]], ptr [[ADDR]], align 2			; CHECK-NEXT: store i16 [[STORE_VAL]], ptr [[ADDR]], align 2
	; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1			; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
	; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp sgt i32 [[IV_TRUNC]], 91			; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp sgt i32 [[IV_TRUNC]], 91
	; CHECK-NEXT: br i1 [[LOOP_COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]			; CHECK-NEXT: br i1 [[LOOP_COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
	; CHECK: exit:			; CHECK: exit:
	; CHECK-NEXT: ret void			; CHECK-NEXT: ret void
	;			;
	entry:			entry:
	br label %loop			br label %loop

	loop:			loop:
	%iv = phi i64 [ %iv.start, %entry ], [ %iv.next, %loop ]			%iv = phi i64 [ %iv.start, %entry ], [ %iv.next, %loop ]
	▲ Show 20 Lines • Show All 61 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: store <16 x i16> [[TMP8]], ptr [[TMP16]], align 2			; CHECK-NEXT: store <16 x i16> [[TMP8]], ptr [[TMP16]], align 2
	; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[TMP11]], i32 32			; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[TMP11]], i32 32
	; CHECK-NEXT: store <16 x i16> [[TMP9]], ptr [[TMP17]], align 2			; CHECK-NEXT: store <16 x i16> [[TMP9]], ptr [[TMP17]], align 2
	; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[TMP11]], i32 48			; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[TMP11]], i32 48
	; CHECK-NEXT: store <16 x i16> [[TMP10]], ptr [[TMP18]], align 2			; CHECK-NEXT: store <16 x i16> [[TMP10]], ptr [[TMP18]], align 2
	; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64			; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64
	; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i16> [[STEP_ADD5]], [[DOTSPLAT3]]			; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i16> [[STEP_ADD5]], [[DOTSPLAT3]]
	; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]			; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
	; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]			; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
	; CHECK: middle.block:			; CHECK: middle.block:
	; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[L]], [[N_VEC]]			; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[L]], [[N_VEC]]
	; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.]], label [[VEC_EPILOG_ITER_CHECK:%.]]			; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.]], label [[VEC_EPILOG_ITER_CHECK:%.]]
	; CHECK: vec.epilog.iter.check:			; CHECK: vec.epilog.iter.check:
	; CHECK-NEXT: [[DOTCAST12:%.*]] = trunc i64 [[N_VEC]] to i16			; CHECK-NEXT: [[DOTCAST12:%.*]] = trunc i64 [[N_VEC]] to i16
	; CHECK-NEXT: [[IND_END13:%.*]] = mul i16 [[DOTCAST12]], [[TMP0]]			; CHECK-NEXT: [[IND_END13:%.*]] = mul i16 [[DOTCAST12]], [[TMP0]]
	; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[L]], [[N_VEC]]			; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[L]], [[N_VEC]]
	; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8			; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8
	Show All 23 Lines
	; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[INDEX16]], 0			; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[INDEX16]], 0
	; CHECK-NEXT: [[TMP23:%.*]] = sub <8 x i16> [[VEC_IND24]], [[BROADCAST_SPLAT28]]			; CHECK-NEXT: [[TMP23:%.*]] = sub <8 x i16> [[VEC_IND24]], [[BROADCAST_SPLAT28]]
	; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[K]], i64 [[TMP22]]			; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[K]], i64 [[TMP22]]
	; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i16, ptr [[TMP24]], i32 0			; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i16, ptr [[TMP24]], i32 0
	; CHECK-NEXT: store <8 x i16> [[TMP23]], ptr [[TMP25]], align 2			; CHECK-NEXT: store <8 x i16> [[TMP23]], ptr [[TMP25]], align 2
	; CHECK-NEXT: [[INDEX_NEXT29]] = add nuw i64 [[INDEX16]], 8			; CHECK-NEXT: [[INDEX_NEXT29]] = add nuw i64 [[INDEX16]], 8
	; CHECK-NEXT: [[VEC_IND_NEXT26]] = add <8 x i16> [[VEC_IND24]], [[DOTSPLAT23]]			; CHECK-NEXT: [[VEC_IND_NEXT26]] = add <8 x i16> [[VEC_IND24]], [[DOTSPLAT23]]
	; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT29]], [[N_VEC8]]			; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT29]], [[N_VEC8]]
	; CHECK-NEXT: br i1 [[TMP26]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]			; CHECK-NEXT: br i1 [[TMP26]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
	; CHECK: vec.epilog.middle.block:			; CHECK: vec.epilog.middle.block:
	; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[L]], [[N_VEC8]]			; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[L]], [[N_VEC8]]
	; CHECK-NEXT: br i1 [[CMP_N15]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]			; CHECK-NEXT: br i1 [[CMP_N15]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
	; CHECK: vec.epilog.scalar.ph:			; CHECK: vec.epilog.scalar.ph:
	; CHECK-NEXT: [[BC_RESUME_VAL9:%.]] = phi i64 [ [[N_VEC8]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.]] ]			; CHECK-NEXT: [[BC_RESUME_VAL9:%.]] = phi i64 [ [[N_VEC8]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.]] ]
	; CHECK-NEXT: [[BC_RESUME_VAL14:%.*]] = phi i16 [ [[IND_END11]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END13]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ]			; CHECK-NEXT: [[BC_RESUME_VAL14:%.*]] = phi i16 [ [[IND_END11]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END13]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ]
	; CHECK-NEXT: br label [[LOOP:%.*]]			; CHECK-NEXT: br label [[LOOP:%.*]]
	; CHECK: loop:			; CHECK: loop:
	; CHECK-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL9]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[LOOP]] ]			; CHECK-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL9]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[LOOP]] ]
	; CHECK-NEXT: [[P_09:%.]] = phi i16 [ [[BC_RESUME_VAL14]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.]], [[LOOP]] ]			; CHECK-NEXT: [[P_09:%.]] = phi i16 [ [[BC_RESUME_VAL14]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.]], [[LOOP]] ]
	; CHECK-NEXT: [[ADD]] = sub i16 [[P_09]], [[OFF]]			; CHECK-NEXT: [[ADD]] = sub i16 [[P_09]], [[OFF]]
	; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[K]], i64 [[IV]]			; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[K]], i64 [[IV]]
	; CHECK-NEXT: store i16 [[ADD]], ptr [[ARRAYIDX3]], align 2			; CHECK-NEXT: store i16 [[ADD]], ptr [[ARRAYIDX3]], align 2
	; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1			; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
	; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[L]]			; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[L]]
	; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]			; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP6:![0-9]+]]
	; CHECK: exit:			; CHECK: exit:
	; CHECK-NEXT: ret void			; CHECK-NEXT: ret void
	;			;
	entry:			entry:
	br label %loop			br label %loop

	loop:			loop:
	%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]			%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
	Show All 11 Lines

llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll

Show All 9 Lines
define i32 @inv_val_store_to_inv_address_with_reduction(ptr %a, i64 %n, ptr %b) {		define i32 @inv_val_store_to_inv_address_with_reduction(ptr %a, i64 %n, ptr %b) {
; CHECK-LABEL: @inv_val_store_to_inv_address_with_reduction(		; CHECK-LABEL: @inv_val_store_to_inv_address_with_reduction(
; CHECK-NEXT: iter.check:		; CHECK-NEXT: iter.check:
; CHECK-NEXT: [[NTRUNC:%.]] = trunc i64 [[N:%.]] to i32		; CHECK-NEXT: [[NTRUNC:%.]] = trunc i64 [[N:%.]] to i32
; CHECK-NEXT: [[SMAX2:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)		; CHECK-NEXT: [[SMAX2:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX2]], 8		; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX2]], 8
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.]], label [[VECTOR_MEMCHECK:%.]]		; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.]], label [[VECTOR_MEMCHECK:%.]]
; CHECK: vector.memcheck:		; CHECK: vector.memcheck:
; CHECK-NEXT: [[UGLYGEP:%.]] = getelementptr i8, ptr [[A:%.]], i64 4		; CHECK-NEXT: [[SCEVGEP:%.]] = getelementptr i8, ptr [[A:%.]], i64 4
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)		; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[SMAX]], 2		; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[SMAX]], 2
; CHECK-NEXT: [[UGLYGEP1:%.]] = getelementptr i8, ptr [[B:%.]], i64 [[TMP0]]		; CHECK-NEXT: [[SCEVGEP1:%.]] = getelementptr i8, ptr [[B:%.]], i64 [[TMP0]]
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt ptr [[UGLYGEP1]], [[A]]		; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt ptr [[SCEVGEP1]], [[A]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt ptr [[UGLYGEP]], [[B]]		; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt ptr [[SCEVGEP]], [[B]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]		; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]		; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; CHECK: vector.main.loop.iter.check:		; CHECK: vector.main.loop.iter.check:
; CHECK-NEXT: [[MIN_ITERS_CHECK3:%.*]] = icmp ult i64 [[SMAX2]], 64		; CHECK-NEXT: [[MIN_ITERS_CHECK3:%.*]] = icmp ult i64 [[SMAX2]], 64
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK3]], label [[VEC_EPILOG_PH:%.]], label [[VECTOR_PH:%.]]		; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK3]], label [[VEC_EPILOG_PH:%.]], label [[VECTOR_PH:%.]]
; CHECK: vector.ph:		; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX2]], 9223372036854775744		; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX2]], 9223372036854775744
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]		; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
▲ Show 20 Lines • Show All 87 Lines • ▼ Show 20 Lines	for.end: ; preds = %for.body
%t4 = phi i32 [ %t3, %for.body ]		%t4 = phi i32 [ %t3, %for.body ]
ret i32 %t4		ret i32 %t4
}		}

; Conditional store		; Conditional store
; if (b[i] == k) a = ntrunc		; if (b[i] == k) a = ntrunc
define void @inv_val_store_to_inv_address_conditional(ptr %a, i64 %n, ptr %b, i32 %k) {		define void @inv_val_store_to_inv_address_conditional(ptr %a, i64 %n, ptr %b, i32 %k) {
; CHECK-LABEL: @inv_val_store_to_inv_address_conditional(		; CHECK-LABEL: @inv_val_store_to_inv_address_conditional(
; CHECK-NEXT: iter.check:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[NTRUNC:%.]] = trunc i64 [[N:%.]] to i32		; CHECK-NEXT: [[NTRUNC:%.]] = trunc i64 [[N:%.]] to i32
; CHECK-NEXT: [[SMAX2:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)		; CHECK-NEXT: [[SMAX2:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX2]], 8		; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX2]], 32
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.]], label [[VECTOR_MEMCHECK:%.]]		; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.]], label [[VECTOR_MEMCHECK:%.]]
; CHECK: vector.memcheck:		; CHECK: vector.memcheck:
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)		; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[SMAX]], 2		; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[SMAX]], 2
; CHECK-NEXT: [[UGLYGEP:%.]] = getelementptr i8, ptr [[B:%.]], i64 [[TMP0]]		; CHECK-NEXT: [[SCEVGEP:%.]] = getelementptr i8, ptr [[B:%.]], i64 [[TMP0]]
; CHECK-NEXT: [[UGLYGEP1:%.]] = getelementptr i8, ptr [[A:%.]], i64 4		; CHECK-NEXT: [[SCEVGEP1:%.]] = getelementptr i8, ptr [[A:%.]], i64 4
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt ptr [[UGLYGEP1]], [[B]]		; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt ptr [[SCEVGEP1]], [[B]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt ptr [[UGLYGEP]], [[A]]		; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt ptr [[SCEVGEP]], [[A]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]		; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]		; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.main.loop.iter.check:
; CHECK-NEXT: [[MIN_ITERS_CHECK3:%.*]] = icmp ult i64 [[SMAX2]], 16
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK3]], label [[VEC_EPILOG_PH:%.]], label [[VECTOR_PH:%.]]
; CHECK: vector.ph:		; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX2]], 9223372036854775792		; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX2]], 9223372036854775776
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.]] = insertelement <16 x i32> poison, i32 [[K:%.]], i64 0		; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.]] = insertelement <8 x i32> poison, i32 [[K:%.]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer		; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <16 x i32> poison, i32 [[NTRUNC]], i64 0		; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <8 x i32> poison, i32 [[NTRUNC]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT4]], <16 x i32> poison, <16 x i32> zeroinitializer		; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT6]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <16 x ptr> poison, ptr [[A]], i64 0		; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <8 x ptr> poison, ptr [[A]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <16 x ptr> [[BROADCAST_SPLATINSERT6]], <16 x ptr> poison, <16 x i32> zeroinitializer		; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <8 x ptr> [[BROADCAST_SPLATINSERT8]], <8 x ptr> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]		; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:		; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]		; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]		; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, ptr [[TMP1]], align 8, !alias.scope !15, !noalias !18		; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP1]], align 8, !alias.scope !15, !noalias !18
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <16 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]		; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 8
; CHECK-NEXT: store <16 x i32> [[BROADCAST_SPLAT5]], ptr [[TMP1]], align 4, !alias.scope !15, !noalias !18		; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i32>, ptr [[TMP2]], align 8, !alias.scope !15, !noalias !18
; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> [[BROADCAST_SPLAT5]], <16 x ptr> [[BROADCAST_SPLAT7]], i32 4, <16 x i1> [[TMP2]]), !alias.scope !18		; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 16
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16		; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i32>, ptr [[TMP3]], align 8, !alias.scope !15, !noalias !18
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]		; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 24
; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]		; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP4]], align 8, !alias.scope !15, !noalias !18
		; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
		; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD3]], [[BROADCAST_SPLAT]]
		; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD4]], [[BROADCAST_SPLAT]]
		; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD5]], [[BROADCAST_SPLAT]]
		; CHECK-NEXT: store <8 x i32> [[BROADCAST_SPLAT7]], ptr [[TMP1]], align 4, !alias.scope !15, !noalias !18
		; CHECK-NEXT: store <8 x i32> [[BROADCAST_SPLAT7]], ptr [[TMP2]], align 4, !alias.scope !15, !noalias !18
		; CHECK-NEXT: store <8 x i32> [[BROADCAST_SPLAT7]], ptr [[TMP3]], align 4, !alias.scope !15, !noalias !18
		; CHECK-NEXT: store <8 x i32> [[BROADCAST_SPLAT7]], ptr [[TMP4]], align 4, !alias.scope !15, !noalias !18
		; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[BROADCAST_SPLAT7]], <8 x ptr> [[BROADCAST_SPLAT9]], i32 4, <8 x i1> [[TMP5]]), !alias.scope !18
		; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[BROADCAST_SPLAT7]], <8 x ptr> [[BROADCAST_SPLAT9]], i32 4, <8 x i1> [[TMP6]]), !alias.scope !18
		; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[BROADCAST_SPLAT7]], <8 x ptr> [[BROADCAST_SPLAT9]], i32 4, <8 x i1> [[TMP7]]), !alias.scope !18
		; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[BROADCAST_SPLAT7]], <8 x ptr> [[BROADCAST_SPLAT9]], i32 4, <8 x i1> [[TMP8]]), !alias.scope !18
		; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
		; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
		; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; CHECK: middle.block:		; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX2]], [[N_VEC]]		; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX2]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.]], label [[VEC_EPILOG_ITER_CHECK:%.]]		; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: vec.epilog.iter.check:		; CHECK: scalar.ph:
; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[SMAX2]], 8		; CHECK-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK_NOT_NOT:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK_NOT_NOT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[N_VEC9:%.*]] = and i64 [[SMAX2]], 9223372036854775800
; CHECK-NEXT: [[BROADCAST_SPLATINSERT13:%.*]] = insertelement <8 x i32> poison, i32 [[K]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT14:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT13]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT15:%.*]] = insertelement <8 x i32> poison, i32 [[NTRUNC]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT16:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT15]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT17:%.*]] = insertelement <8 x ptr> poison, ptr [[A]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT18:%.*]] = shufflevector <8 x ptr> [[BROADCAST_SPLATINSERT17]], <8 x ptr> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
; CHECK-NEXT: [[INDEX11:%.]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT19:%.]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX11]]
; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <8 x i32>, ptr [[TMP4]], align 8, !alias.scope !21, !noalias !24
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD12]], [[BROADCAST_SPLAT14]]
; CHECK-NEXT: store <8 x i32> [[BROADCAST_SPLAT16]], ptr [[TMP4]], align 4, !alias.scope !21, !noalias !24
; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[BROADCAST_SPLAT16]], <8 x ptr> [[BROADCAST_SPLAT18]], i32 4, <8 x i1> [[TMP5]]), !alias.scope !24
; CHECK-NEXT: [[INDEX_NEXT19]] = add nuw i64 [[INDEX11]], 8
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT19]], [[N_VEC9]]
; CHECK-NEXT: br i1 [[TMP6]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
; CHECK: vec.epilog.middle.block:
; CHECK-NEXT: [[CMP_N10:%.*]] = icmp eq i64 [[SMAX2]], [[N_VEC9]]
; CHECK-NEXT: br i1 [[CMP_N10]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ [[N_VEC9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]		; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:		; CHECK: for.body:
; CHECK-NEXT: [[I:%.]] = phi i64 [ [[I_NEXT:%.]], [[LATCH:%.*]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ]		; CHECK-NEXT: [[I:%.]] = phi i64 [ [[I_NEXT:%.]], [[LATCH:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I]]		; CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I]]
; CHECK-NEXT: [[T2:%.*]] = load i32, ptr [[T1]], align 8		; CHECK-NEXT: [[T2:%.*]] = load i32, ptr [[T1]], align 8
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[T2]], [[K]]		; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[T2]], [[K]]
; CHECK-NEXT: store i32 [[NTRUNC]], ptr [[T1]], align 4		; CHECK-NEXT: store i32 [[NTRUNC]], ptr [[T1]], align 4
; CHECK-NEXT: br i1 [[CMP]], label [[COND_STORE:%.*]], label [[LATCH]]		; CHECK-NEXT: br i1 [[CMP]], label [[COND_STORE:%.*]], label [[LATCH]]
; CHECK: cond_store:		; CHECK: cond_store:
; CHECK-NEXT: store i32 [[NTRUNC]], ptr [[A]], align 4		; CHECK-NEXT: store i32 [[NTRUNC]], ptr [[A]], align 4
; CHECK-NEXT: br label [[LATCH]]		; CHECK-NEXT: br label [[LATCH]]
; CHECK: latch:		; CHECK: latch:
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1		; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]		; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP27:![0-9]+]]		; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP21:![0-9]+]]
; CHECK: for.end:		; CHECK: for.end:
; CHECK-NEXT: ret void		; CHECK-NEXT: ret void
;		;
entry:		entry:
%ntrunc = trunc i64 %n to i32		%ntrunc = trunc i64 %n to i32
br label %for.body		br label %for.body

for.body: ; preds = %for.body, %entry		for.body: ; preds = %for.body, %entry
Show All 22 Lines
; CHECK-NEXT: iter.check:		; CHECK-NEXT: iter.check:
; CHECK-NEXT: [[NTRUNC:%.]] = trunc i64 [[N:%.]] to i32		; CHECK-NEXT: [[NTRUNC:%.]] = trunc i64 [[N:%.]] to i32
; CHECK-NEXT: [[SMAX10:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)		; CHECK-NEXT: [[SMAX10:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX10]], 8		; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX10]], 8
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.]], label [[VECTOR_MEMCHECK:%.]]		; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.]], label [[VECTOR_MEMCHECK:%.]]
; CHECK: vector.memcheck:		; CHECK: vector.memcheck:
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)		; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[SMAX]], 2		; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[SMAX]], 2
; CHECK-NEXT: [[UGLYGEP:%.]] = getelementptr i8, ptr [[B:%.]], i64 [[TMP0]]		; CHECK-NEXT: [[SCEVGEP:%.]] = getelementptr i8, ptr [[B:%.]], i64 [[TMP0]]
; CHECK-NEXT: [[UGLYGEP1:%.]] = getelementptr i8, ptr [[A:%.]], i64 4		; CHECK-NEXT: [[SCEVGEP1:%.]] = getelementptr i8, ptr [[A:%.]], i64 4
; CHECK-NEXT: [[UGLYGEP2:%.]] = getelementptr i8, ptr [[C:%.]], i64 [[TMP0]]		; CHECK-NEXT: [[SCEVGEP2:%.]] = getelementptr i8, ptr [[C:%.]], i64 [[TMP0]]
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt ptr [[UGLYGEP1]], [[B]]		; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt ptr [[SCEVGEP1]], [[B]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt ptr [[UGLYGEP]], [[A]]		; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt ptr [[SCEVGEP]], [[A]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]		; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: [[BOUND03:%.*]] = icmp ugt ptr [[UGLYGEP2]], [[B]]		; CHECK-NEXT: [[BOUND03:%.*]] = icmp ugt ptr [[SCEVGEP2]], [[B]]
; CHECK-NEXT: [[BOUND14:%.*]] = icmp ugt ptr [[UGLYGEP]], [[C]]		; CHECK-NEXT: [[BOUND14:%.*]] = icmp ugt ptr [[SCEVGEP]], [[C]]
; CHECK-NEXT: [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]]		; CHECK-NEXT: [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]]
; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]]		; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]]
; CHECK-NEXT: [[BOUND06:%.*]] = icmp ugt ptr [[UGLYGEP2]], [[A]]		; CHECK-NEXT: [[BOUND06:%.*]] = icmp ugt ptr [[SCEVGEP2]], [[A]]
; CHECK-NEXT: [[BOUND17:%.*]] = icmp ugt ptr [[UGLYGEP1]], [[C]]		; CHECK-NEXT: [[BOUND17:%.*]] = icmp ugt ptr [[SCEVGEP1]], [[C]]
; CHECK-NEXT: [[FOUND_CONFLICT8:%.*]] = and i1 [[BOUND06]], [[BOUND17]]		; CHECK-NEXT: [[FOUND_CONFLICT8:%.*]] = and i1 [[BOUND06]], [[BOUND17]]
; CHECK-NEXT: [[CONFLICT_RDX9:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT8]]		; CHECK-NEXT: [[CONFLICT_RDX9:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT8]]
; CHECK-NEXT: br i1 [[CONFLICT_RDX9]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]		; CHECK-NEXT: br i1 [[CONFLICT_RDX9]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; CHECK: vector.main.loop.iter.check:		; CHECK: vector.main.loop.iter.check:
; CHECK-NEXT: [[MIN_ITERS_CHECK11:%.*]] = icmp ult i64 [[SMAX10]], 16		; CHECK-NEXT: [[MIN_ITERS_CHECK11:%.*]] = icmp ult i64 [[SMAX10]], 16
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK11]], label [[VEC_EPILOG_PH:%.]], label [[VECTOR_PH:%.]]		; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK11]], label [[VEC_EPILOG_PH:%.]], label [[VECTOR_PH:%.]]
; CHECK: vector.ph:		; CHECK: vector.ph:
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX10]], 9223372036854775792		; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX10]], 9223372036854775792
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.]] = insertelement <16 x i32> poison, i32 [[K:%.]], i64 0		; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.]] = insertelement <16 x i32> poison, i32 [[K:%.]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer		; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT12:%.*]] = insertelement <16 x i32> poison, i32 [[NTRUNC]], i64 0		; CHECK-NEXT: [[BROADCAST_SPLATINSERT12:%.*]] = insertelement <16 x i32> poison, i32 [[NTRUNC]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT13:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT12]], <16 x i32> poison, <16 x i32> zeroinitializer		; CHECK-NEXT: [[BROADCAST_SPLAT13:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT12]], <16 x i32> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT14:%.*]] = insertelement <16 x ptr> poison, ptr [[A]], i64 0		; CHECK-NEXT: [[BROADCAST_SPLATINSERT14:%.*]] = insertelement <16 x ptr> poison, ptr [[A]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT15:%.*]] = shufflevector <16 x ptr> [[BROADCAST_SPLATINSERT14]], <16 x ptr> poison, <16 x i32> zeroinitializer		; CHECK-NEXT: [[BROADCAST_SPLAT15:%.*]] = shufflevector <16 x ptr> [[BROADCAST_SPLATINSERT14]], <16 x ptr> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]		; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:		; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]		; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]		; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, ptr [[TMP1]], align 8, !alias.scope !28, !noalias !31		; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, ptr [[TMP1]], align 8, !alias.scope !22, !noalias !25
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <16 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]		; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <16 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: store <16 x i32> [[BROADCAST_SPLAT13]], ptr [[TMP1]], align 4, !alias.scope !28, !noalias !31		; CHECK-NEXT: store <16 x i32> [[BROADCAST_SPLAT13]], ptr [[TMP1]], align 4, !alias.scope !22, !noalias !25
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[C]], i64 [[INDEX]]		; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[C]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr [[TMP3]], i32 8, <16 x i1> [[TMP2]], <16 x i32> poison), !alias.scope !34		; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr [[TMP3]], i32 8, <16 x i1> [[TMP2]], <16 x i32> poison), !alias.scope !28
; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> [[WIDE_MASKED_LOAD]], <16 x ptr> [[BROADCAST_SPLAT15]], i32 4, <16 x i1> [[TMP2]]), !alias.scope !35, !noalias !34		; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> [[WIDE_MASKED_LOAD]], <16 x ptr> [[BROADCAST_SPLAT15]], i32 4, <16 x i1> [[TMP2]]), !alias.scope !29, !noalias !28
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16		; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]		; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]		; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
; CHECK: middle.block:		; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX10]], [[N_VEC]]		; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX10]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.]], label [[VEC_EPILOG_ITER_CHECK:%.]]		; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.]], label [[VEC_EPILOG_ITER_CHECK:%.]]
; CHECK: vec.epilog.iter.check:		; CHECK: vec.epilog.iter.check:
; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[SMAX10]], 8		; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[SMAX10]], 8
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK_NOT_NOT:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0		; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK_NOT_NOT:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK_NOT_NOT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]		; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK_NOT_NOT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
; CHECK: vec.epilog.ph:		; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]		; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[N_VEC17:%.*]] = and i64 [[SMAX10]], 9223372036854775800		; CHECK-NEXT: [[N_VEC17:%.*]] = and i64 [[SMAX10]], 9223372036854775800
; CHECK-NEXT: [[BROADCAST_SPLATINSERT21:%.*]] = insertelement <8 x i32> poison, i32 [[K]], i64 0		; CHECK-NEXT: [[BROADCAST_SPLATINSERT21:%.*]] = insertelement <8 x i32> poison, i32 [[K]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT22:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT21]], <8 x i32> poison, <8 x i32> zeroinitializer		; CHECK-NEXT: [[BROADCAST_SPLAT22:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT21]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT23:%.*]] = insertelement <8 x i32> poison, i32 [[NTRUNC]], i64 0		; CHECK-NEXT: [[BROADCAST_SPLATINSERT23:%.*]] = insertelement <8 x i32> poison, i32 [[NTRUNC]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT24:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT23]], <8 x i32> poison, <8 x i32> zeroinitializer		; CHECK-NEXT: [[BROADCAST_SPLAT24:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT23]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT26:%.*]] = insertelement <8 x ptr> poison, ptr [[A]], i64 0		; CHECK-NEXT: [[BROADCAST_SPLATINSERT26:%.*]] = insertelement <8 x ptr> poison, ptr [[A]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT27:%.*]] = shufflevector <8 x ptr> [[BROADCAST_SPLATINSERT26]], <8 x ptr> poison, <8 x i32> zeroinitializer		; CHECK-NEXT: [[BROADCAST_SPLAT27:%.*]] = shufflevector <8 x ptr> [[BROADCAST_SPLATINSERT26]], <8 x ptr> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]		; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:		; CHECK: vec.epilog.vector.body:
; CHECK-NEXT: [[INDEX19:%.]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT28:%.]], [[VEC_EPILOG_VECTOR_BODY]] ]		; CHECK-NEXT: [[INDEX19:%.]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT28:%.]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX19]]		; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX19]]
; CHECK-NEXT: [[WIDE_LOAD20:%.*]] = load <8 x i32>, ptr [[TMP5]], align 8, !alias.scope !37, !noalias !40		; CHECK-NEXT: [[WIDE_LOAD20:%.*]] = load <8 x i32>, ptr [[TMP5]], align 8, !alias.scope !31, !noalias !34
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD20]], [[BROADCAST_SPLAT22]]		; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD20]], [[BROADCAST_SPLAT22]]
; CHECK-NEXT: store <8 x i32> [[BROADCAST_SPLAT24]], ptr [[TMP5]], align 4, !alias.scope !37, !noalias !40		; CHECK-NEXT: store <8 x i32> [[BROADCAST_SPLAT24]], ptr [[TMP5]], align 4, !alias.scope !31, !noalias !34
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[C]], i64 [[INDEX19]]		; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[C]], i64 [[INDEX19]]
; CHECK-NEXT: [[WIDE_MASKED_LOAD25:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr [[TMP7]], i32 8, <8 x i1> [[TMP6]], <8 x i32> poison), !alias.scope !43		; CHECK-NEXT: [[WIDE_MASKED_LOAD25:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr [[TMP7]], i32 8, <8 x i1> [[TMP6]], <8 x i32> poison), !alias.scope !37
; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[WIDE_MASKED_LOAD25]], <8 x ptr> [[BROADCAST_SPLAT27]], i32 4, <8 x i1> [[TMP6]]), !alias.scope !44, !noalias !43		; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[WIDE_MASKED_LOAD25]], <8 x ptr> [[BROADCAST_SPLAT27]], i32 4, <8 x i1> [[TMP6]]), !alias.scope !38, !noalias !37
; CHECK-NEXT: [[INDEX_NEXT28]] = add nuw i64 [[INDEX19]], 8		; CHECK-NEXT: [[INDEX_NEXT28]] = add nuw i64 [[INDEX19]], 8
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT28]], [[N_VEC17]]		; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT28]], [[N_VEC17]]
; CHECK-NEXT: br i1 [[TMP8]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]]		; CHECK-NEXT: br i1 [[TMP8]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP39:![0-9]+]]
; CHECK: vec.epilog.middle.block:		; CHECK: vec.epilog.middle.block:
; CHECK-NEXT: [[CMP_N18:%.*]] = icmp eq i64 [[SMAX10]], [[N_VEC17]]		; CHECK-NEXT: [[CMP_N18:%.*]] = icmp eq i64 [[SMAX10]], [[N_VEC17]]
; CHECK-NEXT: br i1 [[CMP_N18]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]]		; CHECK-NEXT: br i1 [[CMP_N18]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:		; CHECK: vec.epilog.scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ [[N_VEC17]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.]] ]		; CHECK-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ [[N_VEC17]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]		; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:		; CHECK: for.body:
; CHECK-NEXT: [[I:%.]] = phi i64 [ [[I_NEXT:%.]], [[LATCH:%.*]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ]		; CHECK-NEXT: [[I:%.]] = phi i64 [ [[I_NEXT:%.]], [[LATCH:%.*]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ]
; CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I]]		; CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I]]
; CHECK-NEXT: [[T2:%.*]] = load i32, ptr [[T1]], align 8		; CHECK-NEXT: [[T2:%.*]] = load i32, ptr [[T1]], align 8
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[T2]], [[K]]		; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[T2]], [[K]]
; CHECK-NEXT: store i32 [[NTRUNC]], ptr [[T1]], align 4		; CHECK-NEXT: store i32 [[NTRUNC]], ptr [[T1]], align 4
; CHECK-NEXT: br i1 [[CMP]], label [[COND_STORE:%.*]], label [[LATCH]]		; CHECK-NEXT: br i1 [[CMP]], label [[COND_STORE:%.*]], label [[LATCH]]
; CHECK: cond_store:		; CHECK: cond_store:
; CHECK-NEXT: [[T3:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[I]]		; CHECK-NEXT: [[T3:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[I]]
; CHECK-NEXT: [[T4:%.*]] = load i32, ptr [[T3]], align 8		; CHECK-NEXT: [[T4:%.*]] = load i32, ptr [[T3]], align 8
; CHECK-NEXT: store i32 [[T4]], ptr [[A]], align 4		; CHECK-NEXT: store i32 [[T4]], ptr [[A]], align 4
; CHECK-NEXT: br label [[LATCH]]		; CHECK-NEXT: br label [[LATCH]]
; CHECK: latch:		; CHECK: latch:
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1		; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]		; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP46:![0-9]+]]		; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP40:![0-9]+]]
; CHECK: for.end:		; CHECK: for.end:
; CHECK-NEXT: ret void		; CHECK-NEXT: ret void
;		;
entry:		entry:
%ntrunc = trunc i64 %n to i32		%ntrunc = trunc i64 %n to i32
br label %for.body		br label %for.body

for.body: ; preds = %for.body, %entry		for.body: ; preds = %for.body, %entry
Show All 21 Lines

llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll

	Show All 13 Lines
	; arr2[i1] = i1;			; arr2[i1] = i1;
	; for (i2 = 0; i2 < 8; i2++)			; for (i2 = 0; i2 < 8; i2++)
	; arr[i2][i1] = i1 + n;			; arr[i2][i1] = i1 + n;
	; }			; }
	; }			; }
	;			;

	; CHECK-LABEL: vector.ph:			; CHECK-LABEL: vector.ph:
	; CHECK: %[[SplatVal:.*]] = insertelement <4 x i32> poison, i32 %n, i64 0			; CHECK: %[[SplatVal:.*]] = insertelement <2 x i32> poison, i32 %n, i64 0
	; CHECK: %[[Splat:.*]] = shufflevector <4 x i32> %[[SplatVal]], <4 x i32> poison, <4 x i32> zeroinitializer			; CHECK: %[[Splat:.*]] = shufflevector <2 x i32> %[[SplatVal]], <2 x i32> poison, <2 x i32> zeroinitializer

	; CHECK-LABEL: vector.body:			; CHECK-LABEL: vector.body:
	; CHECK: %[[Ind:.]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.]], %[[ForInc:.*]] ]			; CHECK: %[[Ind:.]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.]], %[[ForInc:.*]] ]
	; CHECK: %[[VecInd:.]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %vector.ph ], [ %[[VecIndNext:.]], %[[ForInc]] ]			; CHECK: %[[VecInd:.]] = phi <2 x i64> [ <i64 0, i64 1>, %vector.ph ], [ %[[VecIndNext:.]], %[[ForInc]] ]
	; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <4 x i64> %[[VecInd]]			; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <2 x i64> %[[VecInd]]
	; CHECK: %[[VecIndTr:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32>			; CHECK: %[[VecIndTr:.*]] = trunc <2 x i64> %[[VecInd]] to <2 x i32>
	; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[VecIndTr]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)			; CHECK: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> %[[VecIndTr]], <2 x ptr> %[[AAddr]], i32 4, <2 x i1> <i1 true, i1 true>)
	; CHECK: %[[VecIndTr2:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32>			; CHECK: %[[VecIndTr2:.*]] = trunc <2 x i64> %[[VecInd]] to <2 x i32>
	; CHECK: %[[StoreVal:.*]] = add nsw <4 x i32> %[[VecIndTr2]], %[[Splat]]			; CHECK: %[[StoreVal:.*]] = add nsw <2 x i32> %[[VecIndTr2]], %[[Splat]]
	; CHECK: br label %[[InnerLoop:.+]]			; CHECK: br label %[[InnerLoop:.+]]

	; CHECK: [[InnerLoop]]:			; CHECK: [[InnerLoop]]:
	; CHECK: %[[InnerPhi:.]] = phi <4 x i64> [ zeroinitializer, %vector.body ], [ %[[InnerPhiNext:.]], %[[InnerLoop]] ]			; CHECK: %[[InnerPhi:.]] = phi <2 x i64> [ zeroinitializer, %vector.body ], [ %[[InnerPhiNext:.]], %[[InnerLoop]] ]
	; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, <4 x i64> %[[InnerPhi]], <4 x i64> %[[VecInd]]			; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, <2 x i64> %[[InnerPhi]], <2 x i64> %[[VecInd]]
	; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[StoreVal]], <4 x ptr> %[[AAddr2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true			; CHECK: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> %[[StoreVal]], <2 x ptr> %[[AAddr2]], i32 4, <2 x i1> <i1 true, i1 true>)
	; CHECK: %[[InnerPhiNext]] = add nuw nsw <4 x i64> %[[InnerPhi]], <i64 1, i64 1, i64 1, i64 1>			; CHECK: %[[InnerPhiNext]] = add nuw nsw <2 x i64> %[[InnerPhi]], <i64 1, i64 1>
	; CHECK: %[[VecCond:.*]] = icmp eq <4 x i64> %[[InnerPhiNext]], <i64 8, i64 8, i64 8, i64 8>			; CHECK: %[[VecCond:.*]] = icmp eq <2 x i64> %[[InnerPhiNext]], <i64 8, i64 8>
	; CHECK: %[[InnerCond:.*]] = extractelement <4 x i1> %[[VecCond]], i32 0			; CHECK: %[[InnerCond:.*]] = extractelement <2 x i1> %[[VecCond]], i32 0
	; CHECK: br i1 %[[InnerCond]], label %[[ForInc]], label %[[InnerLoop]]			; CHECK: br i1 %[[InnerCond]], label %[[ForInc]], label %[[InnerLoop]]

	; CHECK: [[ForInc]]:			; CHECK: [[ForInc]]:
	; CHECK: %[[IndNext]] = add nuw i64 %[[Ind]], 4			; CHECK: %[[IndNext]] = add nuw i64 %[[Ind]], 2
	; CHECK: %[[VecIndNext]] = add <4 x i64> %[[VecInd]], <i64 4, i64 4, i64 4, i64 4>			; CHECK: %[[VecIndNext]] = add <2 x i64> %[[VecInd]], <i64 2, i64 2>
	; CHECK: %[[Cmp:.*]] = icmp eq i64 %[[IndNext]], 8			; CHECK: %[[Cmp:.*]] = icmp eq i64 %[[IndNext]], 8
	; CHECK: br i1 %[[Cmp]], label %middle.block, label %vector.body			; CHECK: br i1 %[[Cmp]], label %middle.block, label %vector.body

	; AVX-LABEL: vector.ph:			; AVX-LABEL: vector.ph:
	; AVX: %[[SplatVal:.*]] = insertelement <8 x i32> poison, i32 %n, i64 0			; AVX: %[[SplatVal:.*]] = insertelement <4 x i32> poison, i32 %n, i64 0
	; AVX: %[[Splat:.*]] = shufflevector <8 x i32> %[[SplatVal]], <8 x i32> poison, <8 x i32> zeroinitializer			; AVX: %[[Splat:.*]] = shufflevector <4 x i32> %[[SplatVal]], <4 x i32> poison, <4 x i32> zeroinitializer

	; AVX-LABEL: vector.body:			; AVX-LABEL: vector.body:
	; AVX: %[[Ind:.]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.]], %[[ForInc:.*]] ]			; AVX: %[[Ind:.]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.]], %[[ForInc:.*]] ]
	; AVX: %[[VecInd:.]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %vector.ph ], [ %[[VecIndNext:.]], %[[ForInc]] ]			; AVX: %[[VecInd:.]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %vector.ph ], [ %[[VecIndNext:.]], %[[ForInc]] ]
	; AVX: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <8 x i64> %[[VecInd]]			; AVX: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <4 x i64> %[[VecInd]]
	; AVX: %[[VecIndTr:.*]] = trunc <8 x i64> %[[VecInd]] to <8 x i32>			; AVX: %[[VecIndTr:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32>
	; AVX: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %[[VecIndTr]], <8 x ptr> %[[AAddr]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)			; AVX: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[VecIndTr]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
	; AVX: %[[VecIndTr2:.*]] = trunc <8 x i64> %[[VecInd]] to <8 x i32>			; AVX: %[[VecIndTr2:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32>
	; AVX: %[[StoreVal:.*]] = add nsw <8 x i32> %[[VecIndTr2]], %[[Splat]]			; AVX: %[[StoreVal:.*]] = add nsw <4 x i32> %[[VecIndTr2]], %[[Splat]]
	; AVX: br label %[[InnerLoop:.+]]			; AVX: br label %[[InnerLoop:.+]]

	; AVX: [[InnerLoop]]:			; AVX: [[InnerLoop]]:
	; AVX: %[[InnerPhi:.]] = phi <8 x i64> [ zeroinitializer, %vector.body ], [ %[[InnerPhiNext:.]], %[[InnerLoop]] ]			; AVX: %[[InnerPhi:.]] = phi <4 x i64> [ zeroinitializer, %vector.body ], [ %[[InnerPhiNext:.]], %[[InnerLoop]] ]
	; AVX: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, <8 x i64> %[[InnerPhi]], <8 x i64> %[[VecInd]]			; AVX: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, <4 x i64> %[[InnerPhi]], <4 x i64> %[[VecInd]]
	; AVX: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %[[StoreVal]], <8 x ptr> %[[AAddr2]], i32 4, <8 x i1> <i1 true, i1 true, i1 true			; AVX: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[StoreVal]], <4 x ptr> %[[AAddr2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
	; AVX: %[[InnerPhiNext]] = add nuw nsw <8 x i64> %[[InnerPhi]], <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>			; AVX: %[[InnerPhiNext]] = add nuw nsw <4 x i64> %[[InnerPhi]], <i64 1, i64 1, i64 1, i64 1>
	; AVX: %[[VecCond:.*]] = icmp eq <8 x i64> %[[InnerPhiNext]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>			; AVX: %[[VecCond:.*]] = icmp eq <4 x i64> %[[InnerPhiNext]], <i64 8, i64 8, i64 8, i64 8>
	; AVX: %[[InnerCond:.*]] = extractelement <8 x i1> %[[VecCond]], i32 0			; AVX: %[[InnerCond:.*]] = extractelement <4 x i1> %[[VecCond]], i32 0
	; AVX: br i1 %[[InnerCond]], label %[[ForInc]], label %[[InnerLoop]]			; AVX: br i1 %[[InnerCond]], label %[[ForInc]], label %[[InnerLoop]]

	; AVX: [[ForInc]]:			; AVX: [[ForInc]]:
	; AVX: %[[VecIndNext]] = add <8 x i64> %[[VecInd]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>			; AVX: %[[IndNext]] = add nuw i64 %[[Ind]], 4
	; AVX: %[[IndNext]] = add nuw i64 %[[Ind]], 8			; AVX: %[[VecIndNext]] = add <4 x i64> %[[VecInd]], <i64 4, i64 4, i64 4, i64 4>
	; AVX: br i1 true, label %middle.block, label %vector.body			; AVX: %[[InnerCond:.*]] = icmp eq i64 %[[IndNext]], 8
				; AVX: br i1 %[[InnerCond]], label %middle.block, label %vector.body

	@arr2 = external global [8 x i32], align 16			@arr2 = external global [8 x i32], align 16
	@arr = external global [8 x [8 x i32]], align 16			@arr = external global [8 x [8 x i32]], align 16

	; Function Attrs: norecurse nounwind uwtable			; Function Attrs: norecurse nounwind uwtable
	define void @foo(i32 %n) {			define void @foo(i32 %n) {
	entry:			entry:
	br label %for.body			br label %for.body
	Show All 29 Lines

llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll

	Show First 20 Lines • Show All 678 Lines • ▼ Show 20 Lines
	; CHECK: vector.ph:			; CHECK: vector.ph:
	; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4			; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
	; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]			; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
	; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]			; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
	; CHECK: vector.body:			; CHECK: vector.body:
	; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]			; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
	; CHECK-NEXT: [[VEC_IND:%.]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.]], [[VECTOR_BODY]] ]			; CHECK-NEXT: [[VEC_IND:%.]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.]], [[VECTOR_BODY]] ]
	; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0			; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
	; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i32> [[VEC_IND]] to <4 x i8>			; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i32> [[VEC_IND]] to <4 x i16>
	; CHECK-NEXT: [[TMP2:%.]] = getelementptr inbounds i8, ptr [[A:%.]], i64 [[TMP0]]			; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i16> [[TMP1]] to <4 x i8>
	; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0			; CHECK-NEXT: [[TMP3:%.]] = getelementptr inbounds i8, ptr [[A:%.]], i64 [[TMP0]]
	; CHECK-NEXT: store <4 x i8> [[TMP1]], ptr [[TMP3]], align 1			; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0
				; CHECK-NEXT: store <4 x i8> [[TMP2]], ptr [[TMP4]], align 1
	; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4			; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
	; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>			; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
	; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]			; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
	; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]			; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
	; CHECK: middle.block:			; CHECK: middle.block:
	; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]			; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
	; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.]], label [[VEC_EPILOG_ITER_CHECK:%.]]			; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.]], label [[VEC_EPILOG_ITER_CHECK:%.]]
	; CHECK: vec.epilog.iter.check:			; CHECK: vec.epilog.iter.check:
	; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]			; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
	; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4			; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4
	; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]			; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
	; CHECK: vec.epilog.ph:			; CHECK: vec.epilog.ph:
	; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]			; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
	; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]			; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
	; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4			; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
	; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF2]]			; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF2]]
	; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[BC_RESUME_VAL]] to i32			; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[BC_RESUME_VAL]] to i32
	; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i64 0			; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
	; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer			; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
	; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], <i32 0, i32 1, i32 2, i32 3>			; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], <i32 0, i32 1, i32 2, i32 3>
	; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]			; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
	; CHECK: vec.epilog.vector.body:			; CHECK: vec.epilog.vector.body:
	; CHECK-NEXT: [[INDEX6:%.]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.]], [[VEC_EPILOG_VECTOR_BODY]] ]			; CHECK-NEXT: [[INDEX6:%.]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.]], [[VEC_EPILOG_VECTOR_BODY]] ]
	; CHECK-NEXT: [[VEC_IND7:%.]] = phi <4 x i32> [ [[INDUCTION]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT8:%.]], [[VEC_EPILOG_VECTOR_BODY]] ]			; CHECK-NEXT: [[VEC_IND7:%.]] = phi <4 x i32> [ [[INDUCTION]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT8:%.]], [[VEC_EPILOG_VECTOR_BODY]] ]
	; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX6]], 0			; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX6]], 0
	; CHECK-NEXT: [[TMP7:%.*]] = trunc <4 x i32> [[VEC_IND7]] to <4 x i8>			; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[VEC_IND7]] to <4 x i16>
	; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP6]]			; CHECK-NEXT: [[TMP9:%.*]] = trunc <4 x i16> [[TMP8]] to <4 x i8>
	; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0			; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP7]]
	; CHECK-NEXT: store <4 x i8> [[TMP7]], ptr [[TMP9]], align 1			; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 0
				; CHECK-NEXT: store <4 x i8> [[TMP9]], ptr [[TMP11]], align 1
	; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 4			; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 4
	; CHECK-NEXT: [[VEC_IND_NEXT8]] = add <4 x i32> [[VEC_IND7]], <i32 4, i32 4, i32 4, i32 4>			; CHECK-NEXT: [[VEC_IND_NEXT8]] = add <4 x i32> [[VEC_IND7]], <i32 4, i32 4, i32 4, i32 4>
	; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC3]]			; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC3]]
	; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]			; CHECK-NEXT: br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
	; CHECK: vec.epilog.middle.block:			; CHECK: vec.epilog.middle.block:
	; CHECK-NEXT: [[CMP_N5:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC3]]			; CHECK-NEXT: [[CMP_N5:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC3]]
	; CHECK-NEXT: br i1 [[CMP_N5]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]			; CHECK-NEXT: br i1 [[CMP_N5]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
	; CHECK: vec.epilog.scalar.ph:			; CHECK: vec.epilog.scalar.ph:
	; CHECK-NEXT: [[BC_RESUME_VAL4:%.]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.]] ]			; CHECK-NEXT: [[BC_RESUME_VAL4:%.]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.]] ]
	; CHECK-NEXT: br label [[LOOP:%.*]]			; CHECK-NEXT: br label [[LOOP:%.*]]
	; CHECK: loop:			; CHECK: loop:
	; CHECK-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL4]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[LOOP]] ]			; CHECK-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL4]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[LOOP]] ]
	; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[IV]] to i32			; CHECK-NEXT: [[TMP13:%.*]] = trunc i64 [[IV]] to i32
	; CHECK-NEXT: [[CONV:%.*]] = trunc i32 [[TMP11]] to i8			; CHECK-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16
				; CHECK-NEXT: [[CONV:%.*]] = trunc i16 [[TMP14]] to i8
	; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]			; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
	; CHECK-NEXT: store i8 [[CONV]], ptr [[ARRAYIDX]], align 1			; CHECK-NEXT: store i8 [[CONV]], ptr [[ARRAYIDX]], align 1
	; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1			; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
	; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]]			; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]]
	; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP16:![0-9]+]]			; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP16:![0-9]+]]
	; CHECK: exit:			; CHECK: exit:
	; CHECK-NEXT: ret void			; CHECK-NEXT: ret void
	;			;
	; CHECK-PROFITABLE-BY-DEFAULT-LABEL: @f4(			; CHECK-PROFITABLE-BY-DEFAULT-LABEL: @f4(
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: iter.check:			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: iter.check:
				david-armUnsubmitted Done Reply Inline Actions Hmm, it looks like we've decided not to vectorise at all now. Perhaps because the maximum register width is 32 bits, and since the largest type in the loop is now 32 bits the max VF we can choose is 1? In order to still demonstrate some vectorisation you might have to change the loop IR to be something like this: %conv = trunc i32 %0 to i16 store i16 %conv, ptr %arrayidx, align 1 david-arm: Hmm, it looks like we've decided not to vectorise at all now. Perhaps because the maximum…
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[WIDE_TRIP_COUNT:%.]] = zext i32 [[N:%.]] to i64			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[WIDE_TRIP_COUNT:%.]] = zext i32 [[N:%.]] to i64
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 2			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 2
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.]]			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.]]
	; CHECK-PROFITABLE-BY-DEFAULT: vector.main.loop.iter.check:			; CHECK-PROFITABLE-BY-DEFAULT: vector.main.loop.iter.check:
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 2
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.]], label [[VECTOR_PH:%.]]			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.]], label [[VECTOR_PH:%.]]
	; CHECK-PROFITABLE-BY-DEFAULT: vector.ph:			; CHECK-PROFITABLE-BY-DEFAULT: vector.ph:
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 2
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]]			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]]
	; CHECK-PROFITABLE-BY-DEFAULT: vector.body:			; CHECK-PROFITABLE-BY-DEFAULT: vector.body:
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND:%.]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.]], [[VECTOR_BODY]] ]			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND:%.]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.]], [[VECTOR_BODY]] ]
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP1:%.*]] = trunc <4 x i32> [[VEC_IND]] to <4 x i8>			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP1:%.*]] = trunc <2 x i32> [[VEC_IND]] to <2 x i16>
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP2:%.]] = getelementptr inbounds i8, ptr [[A:%.]], i64 [[TMP0]]			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP2:%.*]] = trunc <2 x i16> [[TMP1]] to <2 x i8>
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP3:%.]] = getelementptr inbounds i8, ptr [[A:%.]], i64 [[TMP0]]
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store <4 x i8> [[TMP1]], ptr [[TMP3]], align 1			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store <2 x i8> [[TMP2]], ptr [[TMP4]], align 1
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
				; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
	; CHECK-PROFITABLE-BY-DEFAULT: middle.block:			; CHECK-PROFITABLE-BY-DEFAULT: middle.block:
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT:%.]], label [[VEC_EPILOG_ITER_CHECK:%.]]			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT:%.]], label [[VEC_EPILOG_ITER_CHECK:%.]]
	; CHECK-PROFITABLE-BY-DEFAULT: vec.epilog.iter.check:			; CHECK-PROFITABLE-BY-DEFAULT: vec.epilog.iter.check:
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 2			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 2
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
	; CHECK-PROFITABLE-BY-DEFAULT: vec.epilog.ph:			; CHECK-PROFITABLE-BY-DEFAULT: vec.epilog.ph:
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 2			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 2
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[N_VEC3:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF2]]			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[N_VEC3:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF2]]
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP5:%.*]] = trunc i64 [[BC_RESUME_VAL]] to i32			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP6:%.*]] = trunc i64 [[BC_RESUME_VAL]] to i32
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i64 0			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP6]], i64 0
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
	; CHECK-PROFITABLE-BY-DEFAULT: vec.epilog.vector.body:			; CHECK-PROFITABLE-BY-DEFAULT: vec.epilog.vector.body:
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX6:%.]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.]], [[VEC_EPILOG_VECTOR_BODY]] ]			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX6:%.]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.]], [[VEC_EPILOG_VECTOR_BODY]] ]
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND7:%.]] = phi <2 x i32> [ [[INDUCTION]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT8:%.]], [[VEC_EPILOG_VECTOR_BODY]] ]			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND7:%.]] = phi <2 x i32> [ [[INDUCTION]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT8:%.]], [[VEC_EPILOG_VECTOR_BODY]] ]
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP6:%.*]] = add i64 [[INDEX6]], 0			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP7:%.*]] = add i64 [[INDEX6]], 0
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP7:%.*]] = trunc <2 x i32> [[VEC_IND7]] to <2 x i8>			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP8:%.*]] = trunc <2 x i32> [[VEC_IND7]] to <2 x i16>
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP6]]			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP9:%.*]] = trunc <2 x i16> [[TMP8]] to <2 x i8>
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP7]]
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store <2 x i8> [[TMP7]], ptr [[TMP9]], align 1			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 0
				; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store <2 x i8> [[TMP9]], ptr [[TMP11]], align 1
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 2			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 2
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND_NEXT8]] = add <2 x i32> [[VEC_IND7]], <i32 2, i32 2>			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND_NEXT8]] = add <2 x i32> [[VEC_IND7]], <i32 2, i32 2>
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC3]]			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC3]]
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
	; CHECK-PROFITABLE-BY-DEFAULT: vec.epilog.middle.block:			; CHECK-PROFITABLE-BY-DEFAULT: vec.epilog.middle.block:
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[CMP_N5:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC3]]			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[CMP_N5:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC3]]
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[CMP_N5]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[CMP_N5]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
	; CHECK-PROFITABLE-BY-DEFAULT: vec.epilog.scalar.ph:			; CHECK-PROFITABLE-BY-DEFAULT: vec.epilog.scalar.ph:
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[BC_RESUME_VAL4:%.]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.]] ]			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[BC_RESUME_VAL4:%.]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.]] ]
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label [[LOOP:%.*]]			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label [[LOOP:%.*]]
	; CHECK-PROFITABLE-BY-DEFAULT: loop:			; CHECK-PROFITABLE-BY-DEFAULT: loop:
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL4]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[LOOP]] ]			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL4]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[LOOP]] ]
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP11:%.*]] = trunc i64 [[IV]] to i32			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP13:%.*]] = trunc i64 [[IV]] to i32
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[CONV:%.*]] = trunc i32 [[TMP11]] to i8			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16
				; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[CONV:%.*]] = trunc i16 [[TMP14]] to i8
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store i8 [[CONV]], ptr [[ARRAYIDX]], align 1			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store i8 [[CONV]], ptr [[ARRAYIDX]], align 1
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]]			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]]
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP10:![0-9]+]]			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP10:![0-9]+]]
	; CHECK-PROFITABLE-BY-DEFAULT: exit:			; CHECK-PROFITABLE-BY-DEFAULT: exit:
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: ret void			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: ret void
	;			;
	entry:			entry:
	%wide.trip.count = zext i32 %n to i64			%wide.trip.count = zext i32 %n to i64
	br label %loop			br label %loop

	loop:			loop:
	%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]			%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
	%0 = trunc i64 %iv to i32			%0 = trunc i64 %iv to i32
	%conv = trunc i32 %0 to i8			%1 = trunc i32 %0 to i16
				%conv = trunc i16 %1 to i8
	%arrayidx = getelementptr inbounds i8, ptr %A, i64 %iv			%arrayidx = getelementptr inbounds i8, ptr %A, i64 %iv
	store i8 %conv, ptr %arrayidx, align 1			store i8 %conv, ptr %arrayidx, align 1
	%iv.next = add nuw nsw i64 %iv, 1			%iv.next = add nuw nsw i64 %iv, 1
	%exitcond = icmp ne i64 %iv.next, %wide.trip.count			%exitcond = icmp ne i64 %iv.next, %wide.trip.count
	br i1 %exitcond, label %loop, label %exit			br i1 %exitcond, label %loop, label %exit

	exit:			exit:
	ret void			ret void
	}			}

llvm/test/Transforms/LoopVectorize/vplan-stress-test-no-explict-vf.ll

	Show All 9 Lines
	@arr2 = external global [8 x i32], align 16			@arr2 = external global [8 x i32], align 16
	@arr = external global [8 x [8 x i32]], align 16			@arr = external global [8 x [8 x i32]], align 16

	; Function Attrs: norecurse nounwind uwtable			; Function Attrs: norecurse nounwind uwtable
	define void @foo(i32 %n) {			define void @foo(i32 %n) {
	entry:			entry:
	br label %for.body			br label %for.body

	for.body: ; preds = %for.inc8, %entry			for.body: ; preds = %for.inc8, %entry
	%indvars.iv21 = phi i64 [ 0, %entry ], [ %indvars.iv.next22, %for.inc8 ]			%indvars.iv21 = phi i32 [ 0, %entry ], [ %indvars.iv.next22, %for.inc8 ]
				david-armUnsubmitted Done Reply Inline Actions Similar to the test above you may need to change the test so you still get VF=1. You could try choosing to use a 32-bit phi and truncate that to i16? david-arm: Similar to the test above you may need to change the test so you still get VF=1. You could try…
	%arrayidx = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, i64 %indvars.iv21			%arrayidx = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, i32 %indvars.iv21
	%0 = trunc i64 %indvars.iv21 to i32			%0 = trunc i32 %indvars.iv21 to i16
	store i32 %0, ptr %arrayidx, align 4			store i16 %0, ptr %arrayidx, align 4
	%1 = trunc i64 %indvars.iv21 to i32			%add = add nsw i32 %indvars.iv21, %n
	%add = add nsw i32 %1, %n
	br label %for.body3			br label %for.body3

	for.body3: ; preds = %for.body3, %for.body			for.body3: ; preds = %for.body3, %for.body
	%indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body3 ]			%indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body3 ]
	%arrayidx7 = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, i64 %indvars.iv, i64 %indvars.iv21			%arrayidx7 = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, i64 %indvars.iv, i32 %indvars.iv21
	store i32 %add, ptr %arrayidx7, align 4			store i32 %add, ptr %arrayidx7, align 4
	%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1			%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
	%exitcond = icmp eq i64 %indvars.iv.next, 8			%exitcond = icmp eq i64 %indvars.iv.next, 8
	br i1 %exitcond, label %for.inc8, label %for.body3			br i1 %exitcond, label %for.inc8, label %for.body3

	for.inc8: ; preds = %for.body3			for.inc8: ; preds = %for.body3
	%indvars.iv.next22 = add nuw nsw i64 %indvars.iv21, 1			%indvars.iv.next22 = add nuw nsw i32 %indvars.iv21, 1
	%exitcond23 = icmp eq i64 %indvars.iv.next22, 8			%exitcond23 = icmp eq i32 %indvars.iv.next22, 8
	br i1 %exitcond23, label %for.end10, label %for.body, !llvm.loop !1			br i1 %exitcond23, label %for.end10, label %for.body, !llvm.loop !1

	for.end10: ; preds = %for.inc8			for.end10: ; preds = %for.inc8
	ret void			ret void
	}			}

	!1 = distinct !{!1, !2}			!1 = distinct !{!1, !2}
	!2 = !{!"llvm.loop.vectorize.enable", i1 true}			!2 = !{!"llvm.loop.vectorize.enable", i1 true}

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][LoopVectorize] Add truncated store values to list of types for widening
Needs ReviewPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 555866

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll

llvm/test/Transforms/LoopVectorize/AArch64/truncate-type-widening.ll

llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll

llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll

llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll

llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll

llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll

llvm/test/Transforms/LoopVectorize/vplan-stress-test-no-explict-vf.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][LoopVectorize] Add truncated store values to list of types for wideningNeeds ReviewPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 555866

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll

llvm/test/Transforms/LoopVectorize/AArch64/truncate-type-widening.ll

llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll

llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll

llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll

llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll

llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll

llvm/test/Transforms/LoopVectorize/vplan-stress-test-no-explict-vf.ll

[AArch64][LoopVectorize] Add truncated store values to list of types for widening
Needs ReviewPublic