Diff 554376

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 5,718 Lines • ▼ Show 20 Lines	for (Instruction &I : BB->instructionsWithoutDebug()) {
TTI.preferInLoopReduction(RdxDesc.getOpcode(),		TTI.preferInLoopReduction(RdxDesc.getOpcode(),
RdxDesc.getRecurrenceType(),		RdxDesc.getRecurrenceType(),
TargetTransformInfo::ReductionFlags()))		TargetTransformInfo::ReductionFlags()))
continue;		continue;
T = RdxDesc.getRecurrenceType();		T = RdxDesc.getRecurrenceType();
}		}

// Examine the stored values.		// Examine the stored values.
if (auto *ST = dyn_cast<StoreInst>(&I))		if (auto *ST = dyn_cast<StoreInst>(&I)) {
		david-armUnsubmitted Done Reply Inline Actions I wonder if we should also be asking if the store operand is loop invariant too? This would avoid tests changing such as lvm/test/Transforms/LoopVectorize/vplan-stress-test-no-explict-vf.ll. If the input is loop invariant then it's not really participating in the vector loop. david-arm: I wonder if we should also be asking if the store operand is loop invariant too? This would…
		RinAuthorUnsubmitted Done Reply Inline Actions Discussed this and there is a trunc store in the loop %0 = trunc i64 %indvars.iv21 to i32 store i32 %0, ptr %arrayidx, align 4 Rin: Discussed this and there is a trunc store in the loop ``` %0 = trunc i64 %indvars.iv21 to i32…
T = ST->getValueOperand()->getType();		T = ST->getValueOperand()->getType();
		if (auto *CastTrunc = dyn_cast<TruncInst>(ST->getValueOperand()))
		david-armUnsubmitted Done Reply Inline Actions I think you can write this more simply as if (auto Trunc = dyn_cast<TruncInst>(ST->getOperand(0))) T = Trunc->getSrcTy(); david-arm:* I think you can write this more simply as ```if (auto *Trunc = dyn_cast<TruncInst>(ST…
		david-armUnsubmitted Done Reply Inline Actions nit: Sorry @Rin, just one more thing. Perhaps for consistency it makes sense to also use `ST->getValueOperand()` even though it's the same thing? david-arm: nit: Sorry @Rin, just one more thing. Perhaps for consistency it makes sense to also use `ST…
		RinAuthorUnsubmitted Done Reply Inline Actions I'll change it, no problem. Rin: I'll change it, no problem.
		T = CastTrunc->getSrcTy();
		david-armUnsubmitted Done Reply Inline Actions nit: Normally variables in LLVM start with a capital, i.e. `CastTrunc` david-arm: nit: Normally variables in LLVM start with a capital, i.e. `CastTrunc`
		RinAuthorUnsubmitted Done Reply Inline Actions Ah, my bad, I'll change that. Rin: Ah, my bad, I'll change that.
		}

assert(T->isSized() &&		assert(T->isSized() &&
"Expected the load/store/recurrence type to be sized");		"Expected the load/store/recurrence type to be sized");

ElementTypesInLoop.insert(T);		ElementTypesInLoop.insert(T);
}		}
}		}
}		}
▲ Show 20 Lines • Show All 4,859 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll

Show First 20 Lines • Show All 151 Lines • ▼ Show 20 Lines	for.body: ; preds = %for.body.preheader, %for.body
%exitcond = icmp eq i32 %lftr.wideiv, %n		%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body		br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
}		}


define void @test_shrink_zext_in_preheader(ptr noalias %src, ptr noalias %dst, i32 %A, i16 %B) {		define void @test_shrink_zext_in_preheader(ptr noalias %src, ptr noalias %dst, i32 %A, i16 %B) {
; CHECK-LABEL: define void @test_shrink_zext_in_preheader		; CHECK-LABEL: define void @test_shrink_zext_in_preheader
; CHECK-SAME: (ptr noalias [[SRC:%.]], ptr noalias [[DST:%.]], i32 [[A:%.]], i16 [[B:%.]]) {		; CHECK-SAME: (ptr noalias [[SRC:%.]], ptr noalias [[DST:%.]], i32 [[A:%.]], i16 [[B:%.]]) {
; CHECK-NEXT: iter.check:		; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.]]		; CHECK-NEXT: [[CONV10:%.*]] = zext i16 [[B]] to i32
; CHECK: vector.main.loop.iter.check:		; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.]], label [[VECTOR_PH:%.]]
; CHECK: vector.ph:		; CHECK: vector.ph:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[A]], i64 0		; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[A]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer		; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <16 x i16> undef, i16 [[B]], i64 0		; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i16> undef, i16 [[B]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <16 x i16> [[TMP0]], <16 x i16> poison, <16 x i32> zeroinitializer		; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]		; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:		; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]		; CHECK-NEXT: [[INDEX:%.]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = trunc <16 x i32> [[BROADCAST_SPLAT]] to <16 x i16>		; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i32> [[BROADCAST_SPLAT]] to <8 x i16>
; CHECK-NEXT: [[TMP2:%.*]] = mul <16 x i16> [[BROADCAST_SPLAT2]], [[TMP1]]		; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i16> [[BROADCAST_SPLAT2]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = trunc <16 x i32> [[BROADCAST_SPLAT]] to <16 x i16>		; CHECK-NEXT: [[TMP3:%.*]] = trunc <8 x i32> [[BROADCAST_SPLAT]] to <8 x i16>
; CHECK-NEXT: [[TMP4:%.*]] = mul <16 x i16> [[BROADCAST_SPLAT2]], [[TMP3]]		; CHECK-NEXT: [[TMP4:%.*]] = mul <8 x i16> [[BROADCAST_SPLAT2]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = lshr <16 x i16> [[TMP2]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>		; CHECK-NEXT: [[TMP5:%.*]] = lshr <8 x i16> [[TMP2]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
; CHECK-NEXT: [[TMP6:%.*]] = lshr <16 x i16> [[TMP4]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>		; CHECK-NEXT: [[TMP6:%.*]] = lshr <8 x i16> [[TMP4]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
; CHECK-NEXT: [[TMP7:%.*]] = trunc <16 x i16> [[TMP5]] to <16 x i8>		; CHECK-NEXT: [[TMP7:%.*]] = trunc <8 x i16> [[TMP5]] to <8 x i8>
; CHECK-NEXT: [[TMP8:%.*]] = trunc <16 x i16> [[TMP6]] to <16 x i8>		; CHECK-NEXT: [[TMP8:%.*]] = trunc <8 x i16> [[TMP6]] to <8 x i8>
; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[INDEX]] to i64		; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[INDEX]] to i64
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP9]]		; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP9]]
; CHECK-NEXT: store <16 x i8> [[TMP7]], ptr [[TMP10]], align 1		; CHECK-NEXT: store <8 x i8> [[TMP7]], ptr [[TMP10]], align 1
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i64 16		; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i64 8
; CHECK-NEXT: store <16 x i8> [[TMP8]], ptr [[TMP11]], align 1		; CHECK-NEXT: store <8 x i8> [[TMP8]], ptr [[TMP11]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32		; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992		; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]		; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: middle.block:		; CHECK: middle.block:
; CHECK-NEXT: br i1 false, label [[EXIT:%.]], label [[VEC_EPILOG_ITER_CHECK:%.]]		; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: vec.epilog.iter.check:		; CHECK: scalar.ph:
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x i16> undef, i16 [[B]], i64 0
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
; CHECK-NEXT: [[INDEX4:%.]] = phi i32 [ 992, [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP14:%.*]] = trunc i32 [[A]] to i16
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x i16> undef, i16 [[TMP14]], i64 0
; CHECK-NEXT: [[TMP16:%.*]] = mul <8 x i16> [[TMP15]], [[TMP13]]
; CHECK-NEXT: [[TMP17:%.*]] = lshr <8 x i16> [[TMP16]], <i16 8, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
; CHECK-NEXT: [[TMP18:%.*]] = trunc <8 x i16> [[TMP17]] to <8 x i8>
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x i8> [[TMP18]], <8 x i8> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP20:%.*]] = sext i32 [[INDEX4]] to i64
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP20]]
; CHECK-NEXT: store <8 x i8> [[TMP19]], ptr [[TMP21]], align 1
; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i32 [[INDEX4]], 8
; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i32 [[INDEX_NEXT9]], 1000
; CHECK-NEXT: br i1 [[TMP22]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: vec.epilog.middle.block:
; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:
; CHECK-NEXT: br label [[LOOP:%.*]]		; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:		; CHECK: loop:
; CHECK-NEXT: br i1 poison, label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]		; CHECK-NEXT: [[IV:%.]] = phi i32 [ 992, [[SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[LOOP]] ]
		; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[CONV10]], [[A]]
		; CHECK-NEXT: [[TMP13:%.*]] = lshr i32 [[MUL]], 8
		; CHECK-NEXT: [[CONV5:%.*]] = trunc i32 [[TMP13]] to i8
		; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[IV]] to i64
		; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP14]]
		; CHECK-NEXT: store i8 [[CONV5]], ptr [[GEP_DST]], align 1
		; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
		; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], 1000
		; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: exit:		; CHECK: exit:
; CHECK-NEXT: ret void		; CHECK-NEXT: ret void
;		;
entry:		entry:
%conv10 = zext i16 %B to i32		%conv10 = zext i16 %B to i32
br label %loop		br label %loop

loop:		loop:
Show All 12 Lines

exit:		exit:
ret void		ret void
}		}

define void @test_shrink_select(ptr noalias %src, ptr noalias %dst, i32 %A, i1 %c) {		define void @test_shrink_select(ptr noalias %src, ptr noalias %dst, i32 %A, i1 %c) {
; CHECK-LABEL: define void @test_shrink_select		; CHECK-LABEL: define void @test_shrink_select
; CHECK-SAME: (ptr noalias [[SRC:%.]], ptr noalias [[DST:%.]], i32 [[A:%.]], i1 [[C:%.]]) {		; CHECK-SAME: (ptr noalias [[SRC:%.]], ptr noalias [[DST:%.]], i32 [[A:%.]], i1 [[C:%.]]) {
; CHECK-NEXT: iter.check:		; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.]]		; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
; CHECK: vector.main.loop.iter.check:
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.]], label [[VECTOR_PH:%.]]
; CHECK: vector.ph:		; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]		; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:		; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]		; CHECK-NEXT: [[INDEX:%.]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[A]] to i16		; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[A]] to i16
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i16> undef, i16 [[TMP0]], i64 0		; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> undef, i16 [[TMP0]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = mul <16 x i16> [[TMP1]], <i16 99, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison>		; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i16> [[TMP1]], <i16 99, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> poison, <16 x i32> zeroinitializer		; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = lshr <16 x i16> [[TMP3]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>		; CHECK-NEXT: [[TMP4:%.*]] = lshr <8 x i16> [[TMP3]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[C]], <16 x i16> [[TMP4]], <16 x i16> [[TMP3]]		; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[C]], <8 x i16> [[TMP4]], <8 x i16> [[TMP3]]
; CHECK-NEXT: [[TMP6:%.*]] = trunc <16 x i16> [[TMP5]] to <16 x i8>		; CHECK-NEXT: [[TMP6:%.*]] = trunc <8 x i16> [[TMP5]] to <8 x i8>
; CHECK-NEXT: [[TMP7:%.*]] = sext i32 [[INDEX]] to i64		; CHECK-NEXT: [[TMP7:%.*]] = sext i32 [[INDEX]] to i64
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP7]]		; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP7]]
; CHECK-NEXT: store <16 x i8> [[TMP6]], ptr [[TMP8]], align 1		; CHECK-NEXT: store <8 x i8> [[TMP6]], ptr [[TMP8]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16		; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992		; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]		; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: middle.block:		; CHECK: middle.block:
; CHECK-NEXT: br i1 false, label [[EXIT:%.]], label [[VEC_EPILOG_ITER_CHECK:%.]]		; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: vec.epilog.iter.check:		; CHECK: scalar.ph:
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
; CHECK-NEXT: [[INDEX2:%.]] = phi i32 [ 992, [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT5:%.]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP10:%.*]] = trunc i32 [[A]] to i16
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x i16> undef, i16 [[TMP10]], i64 0
; CHECK-NEXT: [[TMP12:%.*]] = mul <8 x i16> [[TMP11]], <i16 99, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison>
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i16> [[TMP12]], <8 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP14:%.*]] = lshr <8 x i16> [[TMP13]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[C]], <8 x i16> [[TMP14]], <8 x i16> [[TMP13]]
; CHECK-NEXT: [[TMP16:%.*]] = trunc <8 x i16> [[TMP15]] to <8 x i8>
; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[INDEX2]] to i64
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP17]]
; CHECK-NEXT: store <8 x i8> [[TMP16]], ptr [[TMP18]], align 1
; CHECK-NEXT: [[INDEX_NEXT5]] = add nuw i32 [[INDEX2]], 8
; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i32 [[INDEX_NEXT5]], 1000
; CHECK-NEXT: br i1 [[TMP19]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: vec.epilog.middle.block:
; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:
; CHECK-NEXT: br label [[LOOP:%.*]]		; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:		; CHECK: loop:
; CHECK-NEXT: br i1 poison, label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP10:![0-9]+]]		; CHECK-NEXT: br i1 poison, label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: exit:		; CHECK: exit:
; CHECK-NEXT: ret void		; CHECK-NEXT: ret void
;		;
entry:		entry:
br label %loop		br label %loop

loop:		loop:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]		%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
Show All 16 Lines

llvm/test/Transforms/LoopVectorize/AArch64/extend-vectorization-factor-for-unprofitable-memops.ll

	Show All 27 Lines

	exit:			exit:
	ret void			ret void
	}			}

	; Same as test_load_i8_store_i32, but with types flipped for load and store.			; Same as test_load_i8_store_i32, but with types flipped for load and store.
	define void @test_load_i32_store_i8(ptr noalias %src, ptr noalias %dst, i32 %off, i64 %N) {			define void @test_load_i32_store_i8(ptr noalias %src, ptr noalias %dst, i32 %off, i64 %N) {
	; CHECK-LABEL: @test_load_i32_store_i8(			; CHECK-LABEL: @test_load_i32_store_i8(
	; CHECK: <16 x i8>			; CHECK: <4 x i8>
	;			;
	entry:			entry:
	br label %loop			br label %loop

	loop:			loop:
	%iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]			%iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
	%gep.src = getelementptr inbounds i32, ptr %src, i64 %iv			%gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
	%lv = load i32, ptr %gep.src, align 1			%lv = load i32, ptr %gep.src, align 1
	▲ Show 20 Lines • Show All 80 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll

Show First 20 Lines • Show All 653 Lines • ▼ Show 20 Lines	for.body: ; preds = %for.body, %for.body.lr.ph
br i1 %exitcond, label %for.cond.cleanup, label %for.body		br i1 %exitcond, label %for.cond.cleanup, label %for.body
}		}

define void @add_f(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i8 %arg1, i8 %arg2, i32 %len) #0 {		define void @add_f(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i8 %arg1, i8 %arg2, i32 %len) #0 {
; CHECK-LABEL: define void @add_f		; CHECK-LABEL: define void @add_f
; CHECK-SAME: (ptr noalias nocapture readonly [[P:%.]], ptr noalias nocapture [[Q:%.]], i8 [[ARG1:%.]], i8 [[ARG2:%.]], i32 [[LEN:%.*]]) {		; CHECK-SAME: (ptr noalias nocapture readonly [[P:%.]], ptr noalias nocapture [[Q:%.]], i8 [[ARG1:%.]], i8 [[ARG2:%.]], i32 [[LEN:%.*]]) {
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP_32:%.*]] = icmp sgt i32 [[LEN]], 0		; CHECK-NEXT: [[CMP_32:%.*]] = icmp sgt i32 [[LEN]], 0
; CHECK-NEXT: br i1 [[CMP_32]], label [[ITER_CHECK:%.]], label [[FOR_COND_CLEANUP:%.]]		; CHECK-NEXT: br i1 [[CMP_32]], label [[FOR_BODY_LR_PH:%.]], label [[FOR_COND_CLEANUP:%.]]
; CHECK: iter.check:		; CHECK: for.body.lr.ph:
; CHECK-NEXT: [[CONV11:%.*]] = zext i8 [[ARG2]] to i32		; CHECK-NEXT: [[CONV11:%.*]] = zext i8 [[ARG2]] to i32
; CHECK-NEXT: [[CONV13:%.*]] = zext i8 [[ARG1]] to i32		; CHECK-NEXT: [[CONV13:%.*]] = zext i8 [[ARG1]] to i32
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[LEN]] to i64		; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[LEN]] to i64
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8		; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.]]		; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
; CHECK: vector.main.loop.iter.check:
; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 16
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.]], label [[VECTOR_PH:%.]]
; CHECK: vector.ph:		; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 16		; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 8
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]		; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[CONV13]], i64 0		; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[CONV13]], i64 0
; CHECK-NEXT: [[TMP1:%.*]] = trunc <16 x i32> [[BROADCAST_SPLATINSERT]] to <16 x i8>		; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i32> [[BROADCAST_SPLATINSERT]] to <8 x i8>
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> zeroinitializer		; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = zext <16 x i8> [[BROADCAST_SPLAT]] to <16 x i32>		; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i8> [[BROADCAST_SPLAT]] to <8 x i32>
; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <16 x i32> poison, i32 [[CONV11]], i64 0		; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[CONV11]], i64 0
; CHECK-NEXT: [[TMP3:%.*]] = trunc <16 x i32> [[BROADCAST_SPLATINSERT2]] to <16 x i8>		; CHECK-NEXT: [[TMP3:%.*]] = trunc <8 x i32> [[BROADCAST_SPLATINSERT1]] to <8 x i8>
; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x i8> [[TMP3]], <16 x i8> poison, <16 x i32> zeroinitializer		; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[BROADCAST_SPLAT3]] to <16 x i32>		; CHECK-NEXT: [[TMP4:%.*]] = zext <8 x i8> [[BROADCAST_SPLAT2]] to <8 x i32>
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]		; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:		; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]		; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0		; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP5]]		; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 0		; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i16>, ptr [[TMP7]], align 2		; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 2
; CHECK-NEXT: [[TMP8:%.*]] = trunc <16 x i16> [[WIDE_LOAD]] to <16 x i8>		; CHECK-NEXT: [[TMP8:%.*]] = trunc <8 x i16> [[WIDE_LOAD]] to <8 x i8>
; CHECK-NEXT: [[TMP9:%.*]] = zext <16 x i8> [[TMP8]] to <16 x i32>		; CHECK-NEXT: [[TMP9:%.*]] = zext <8 x i8> [[TMP8]] to <8 x i32>
; CHECK-NEXT: [[TMP10:%.*]] = trunc <16 x i32> [[TMP9]] to <16 x i8>		; CHECK-NEXT: [[TMP10:%.*]] = trunc <8 x i32> [[TMP9]] to <8 x i8>
; CHECK-NEXT: [[TMP11:%.*]] = shl <16 x i8> [[TMP10]], <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>		; CHECK-NEXT: [[TMP11:%.*]] = shl <8 x i8> [[TMP10]], <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
; CHECK-NEXT: [[TMP12:%.*]] = zext <16 x i8> [[TMP11]] to <16 x i32>		; CHECK-NEXT: [[TMP12:%.*]] = zext <8 x i8> [[TMP11]] to <8 x i32>
; CHECK-NEXT: [[TMP13:%.*]] = trunc <16 x i32> [[TMP12]] to <16 x i8>		; CHECK-NEXT: [[TMP13:%.*]] = trunc <8 x i32> [[TMP12]] to <8 x i8>
; CHECK-NEXT: [[TMP14:%.*]] = add <16 x i8> [[TMP13]], <i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32>		; CHECK-NEXT: [[TMP14:%.*]] = add <8 x i8> [[TMP13]], <i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32>
; CHECK-NEXT: [[TMP15:%.*]] = zext <16 x i8> [[TMP14]] to <16 x i32>		; CHECK-NEXT: [[TMP15:%.*]] = zext <8 x i8> [[TMP14]] to <8 x i32>
; CHECK-NEXT: [[TMP16:%.*]] = and <16 x i8> [[TMP8]], <i8 -52, i8 -52, i8 -52, i8 -52, i8 -52, i8 -52, i8 -52, i8 -52, i8 -52, i8 -52, i8 -52, i8 -52, i8 -52, i8 -52, i8 -52, i8 -52>		; CHECK-NEXT: [[TMP16:%.*]] = and <8 x i8> [[TMP8]], <i8 -52, i8 -52, i8 -52, i8 -52, i8 -52, i8 -52, i8 -52, i8 -52>
; CHECK-NEXT: [[TMP17:%.*]] = zext <16 x i8> [[TMP16]] to <16 x i32>		; CHECK-NEXT: [[TMP17:%.*]] = zext <8 x i8> [[TMP16]] to <8 x i32>
; CHECK-NEXT: [[TMP18:%.*]] = trunc <16 x i32> [[TMP17]] to <16 x i8>		; CHECK-NEXT: [[TMP18:%.*]] = trunc <8 x i32> [[TMP17]] to <8 x i8>
; CHECK-NEXT: [[TMP19:%.*]] = or <16 x i8> [[TMP18]], <i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51>		; CHECK-NEXT: [[TMP19:%.*]] = or <8 x i8> [[TMP18]], <i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51>
; CHECK-NEXT: [[TMP20:%.*]] = zext <16 x i8> [[TMP19]] to <16 x i32>		; CHECK-NEXT: [[TMP20:%.*]] = zext <8 x i8> [[TMP19]] to <8 x i32>
; CHECK-NEXT: [[TMP21:%.*]] = trunc <16 x i32> [[TMP20]] to <16 x i8>		; CHECK-NEXT: [[TMP21:%.*]] = trunc <8 x i32> [[TMP20]] to <8 x i8>
; CHECK-NEXT: [[TMP22:%.*]] = mul <16 x i8> [[TMP21]], <i8 60, i8 60, i8 60, i8 60, i8 60, i8 60, i8 60, i8 60, i8 60, i8 60, i8 60, i8 60, i8 60, i8 60, i8 60, i8 60>		; CHECK-NEXT: [[TMP22:%.*]] = mul <8 x i8> [[TMP21]], <i8 60, i8 60, i8 60, i8 60, i8 60, i8 60, i8 60, i8 60>
; CHECK-NEXT: [[TMP23:%.*]] = zext <16 x i8> [[TMP22]] to <16 x i32>		; CHECK-NEXT: [[TMP23:%.*]] = zext <8 x i8> [[TMP22]] to <8 x i32>
; CHECK-NEXT: [[TMP24:%.*]] = trunc <16 x i32> [[TMP15]] to <16 x i8>		; CHECK-NEXT: [[TMP24:%.*]] = trunc <8 x i32> [[TMP15]] to <8 x i8>
; CHECK-NEXT: [[TMP25:%.*]] = trunc <16 x i32> [[TMP2]] to <16 x i8>		; CHECK-NEXT: [[TMP25:%.*]] = trunc <8 x i32> [[TMP2]] to <8 x i8>
; CHECK-NEXT: [[TMP26:%.*]] = and <16 x i8> [[TMP24]], [[TMP25]]		; CHECK-NEXT: [[TMP26:%.*]] = and <8 x i8> [[TMP24]], [[TMP25]]
; CHECK-NEXT: [[TMP27:%.*]] = zext <16 x i8> [[TMP26]] to <16 x i32>		; CHECK-NEXT: [[TMP27:%.*]] = zext <8 x i8> [[TMP26]] to <8 x i32>
; CHECK-NEXT: [[TMP28:%.*]] = trunc <16 x i32> [[TMP23]] to <16 x i8>		; CHECK-NEXT: [[TMP28:%.*]] = trunc <8 x i32> [[TMP23]] to <8 x i8>
; CHECK-NEXT: [[TMP29:%.*]] = and <16 x i8> [[TMP28]], <i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4>		; CHECK-NEXT: [[TMP29:%.*]] = and <8 x i8> [[TMP28]], <i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4>
; CHECK-NEXT: [[TMP30:%.*]] = zext <16 x i8> [[TMP29]] to <16 x i32>		; CHECK-NEXT: [[TMP30:%.*]] = zext <8 x i8> [[TMP29]] to <8 x i32>
; CHECK-NEXT: [[TMP31:%.*]] = trunc <16 x i32> [[TMP30]] to <16 x i8>		; CHECK-NEXT: [[TMP31:%.*]] = trunc <8 x i32> [[TMP30]] to <8 x i8>
; CHECK-NEXT: [[TMP32:%.*]] = trunc <16 x i32> [[TMP4]] to <16 x i8>		; CHECK-NEXT: [[TMP32:%.*]] = trunc <8 x i32> [[TMP4]] to <8 x i8>
; CHECK-NEXT: [[TMP33:%.*]] = xor <16 x i8> [[TMP31]], [[TMP32]]		; CHECK-NEXT: [[TMP33:%.*]] = xor <8 x i8> [[TMP31]], [[TMP32]]
; CHECK-NEXT: [[TMP34:%.*]] = zext <16 x i8> [[TMP33]] to <16 x i32>		; CHECK-NEXT: [[TMP34:%.*]] = zext <8 x i8> [[TMP33]] to <8 x i32>
; CHECK-NEXT: [[TMP35:%.*]] = trunc <16 x i32> [[TMP34]] to <16 x i8>		; CHECK-NEXT: [[TMP35:%.*]] = trunc <8 x i32> [[TMP34]] to <8 x i8>
; CHECK-NEXT: [[TMP36:%.*]] = trunc <16 x i32> [[TMP27]] to <16 x i8>		; CHECK-NEXT: [[TMP36:%.*]] = trunc <8 x i32> [[TMP27]] to <8 x i8>
; CHECK-NEXT: [[TMP37:%.*]] = mul <16 x i8> [[TMP35]], [[TMP36]]		; CHECK-NEXT: [[TMP37:%.*]] = mul <8 x i8> [[TMP35]], [[TMP36]]
; CHECK-NEXT: [[TMP38:%.*]] = zext <16 x i8> [[TMP37]] to <16 x i32>		; CHECK-NEXT: [[TMP38:%.*]] = zext <8 x i8> [[TMP37]] to <8 x i32>
; CHECK-NEXT: [[TMP39:%.*]] = trunc <16 x i32> [[TMP38]] to <16 x i8>		; CHECK-NEXT: [[TMP39:%.*]] = trunc <8 x i32> [[TMP38]] to <8 x i8>
; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP5]]		; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds i8, ptr [[TMP40]], i32 0		; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds i8, ptr [[TMP40]], i32 0
; CHECK-NEXT: store <16 x i8> [[TMP39]], ptr [[TMP41]], align 1		; CHECK-NEXT: store <8 x i8> [[TMP39]], ptr [[TMP41]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16		; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]		; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP42]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]		; CHECK-NEXT: br i1 [[TMP42]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK: middle.block:		; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]		; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.]], label [[VEC_EPILOG_ITER_CHECK:%.]]		; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; CHECK: vec.epilog.iter.check:		; CHECK: scalar.ph:
; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP0]], [[N_VEC]]		; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ]
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[TMP0]], 8
; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF4]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <8 x i32> poison, i32 [[CONV13]], i64 0
; CHECK-NEXT: [[TMP43:%.*]] = trunc <8 x i32> [[BROADCAST_SPLATINSERT9]] to <8 x i8>
; CHECK-NEXT: [[BROADCAST_SPLAT10:%.*]] = shufflevector <8 x i8> [[TMP43]], <8 x i8> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP44:%.*]] = zext <8 x i8> [[BROADCAST_SPLAT10]] to <8 x i32>
; CHECK-NEXT: [[BROADCAST_SPLATINSERT11:%.*]] = insertelement <8 x i32> poison, i32 [[CONV11]], i64 0
; CHECK-NEXT: [[TMP45:%.*]] = trunc <8 x i32> [[BROADCAST_SPLATINSERT11]] to <8 x i8>
; CHECK-NEXT: [[BROADCAST_SPLAT12:%.*]] = shufflevector <8 x i8> [[TMP45]], <8 x i8> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP46:%.*]] = zext <8 x i8> [[BROADCAST_SPLAT12]] to <8 x i32>
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
; CHECK-NEXT: [[INDEX7:%.]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT13:%.]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP47:%.*]] = add i64 [[INDEX7]], 0
; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP47]]
; CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds i16, ptr [[TMP48]], i32 0
; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <8 x i16>, ptr [[TMP49]], align 2
; CHECK-NEXT: [[TMP50:%.*]] = trunc <8 x i16> [[WIDE_LOAD8]] to <8 x i8>
; CHECK-NEXT: [[TMP51:%.*]] = zext <8 x i8> [[TMP50]] to <8 x i32>
; CHECK-NEXT: [[TMP52:%.*]] = trunc <8 x i32> [[TMP51]] to <8 x i8>
; CHECK-NEXT: [[TMP53:%.*]] = shl <8 x i8> [[TMP52]], <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
; CHECK-NEXT: [[TMP54:%.*]] = zext <8 x i8> [[TMP53]] to <8 x i32>
; CHECK-NEXT: [[TMP55:%.*]] = trunc <8 x i32> [[TMP54]] to <8 x i8>
; CHECK-NEXT: [[TMP56:%.*]] = add <8 x i8> [[TMP55]], <i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32>
; CHECK-NEXT: [[TMP57:%.*]] = zext <8 x i8> [[TMP56]] to <8 x i32>
; CHECK-NEXT: [[TMP58:%.*]] = and <8 x i8> [[TMP50]], <i8 -52, i8 -52, i8 -52, i8 -52, i8 -52, i8 -52, i8 -52, i8 -52>
; CHECK-NEXT: [[TMP59:%.*]] = zext <8 x i8> [[TMP58]] to <8 x i32>
; CHECK-NEXT: [[TMP60:%.*]] = trunc <8 x i32> [[TMP59]] to <8 x i8>
; CHECK-NEXT: [[TMP61:%.*]] = or <8 x i8> [[TMP60]], <i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51>
; CHECK-NEXT: [[TMP62:%.*]] = zext <8 x i8> [[TMP61]] to <8 x i32>
; CHECK-NEXT: [[TMP63:%.*]] = trunc <8 x i32> [[TMP62]] to <8 x i8>
; CHECK-NEXT: [[TMP64:%.*]] = mul <8 x i8> [[TMP63]], <i8 60, i8 60, i8 60, i8 60, i8 60, i8 60, i8 60, i8 60>
; CHECK-NEXT: [[TMP65:%.*]] = zext <8 x i8> [[TMP64]] to <8 x i32>
; CHECK-NEXT: [[TMP66:%.*]] = trunc <8 x i32> [[TMP57]] to <8 x i8>
; CHECK-NEXT: [[TMP67:%.*]] = trunc <8 x i32> [[TMP44]] to <8 x i8>
; CHECK-NEXT: [[TMP68:%.*]] = and <8 x i8> [[TMP66]], [[TMP67]]
; CHECK-NEXT: [[TMP69:%.*]] = zext <8 x i8> [[TMP68]] to <8 x i32>
; CHECK-NEXT: [[TMP70:%.*]] = trunc <8 x i32> [[TMP65]] to <8 x i8>
; CHECK-NEXT: [[TMP71:%.*]] = and <8 x i8> [[TMP70]], <i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4>
; CHECK-NEXT: [[TMP72:%.*]] = zext <8 x i8> [[TMP71]] to <8 x i32>
; CHECK-NEXT: [[TMP73:%.*]] = trunc <8 x i32> [[TMP72]] to <8 x i8>
; CHECK-NEXT: [[TMP74:%.*]] = trunc <8 x i32> [[TMP46]] to <8 x i8>
; CHECK-NEXT: [[TMP75:%.*]] = xor <8 x i8> [[TMP73]], [[TMP74]]
; CHECK-NEXT: [[TMP76:%.*]] = zext <8 x i8> [[TMP75]] to <8 x i32>
; CHECK-NEXT: [[TMP77:%.*]] = trunc <8 x i32> [[TMP76]] to <8 x i8>
; CHECK-NEXT: [[TMP78:%.*]] = trunc <8 x i32> [[TMP69]] to <8 x i8>
; CHECK-NEXT: [[TMP79:%.*]] = mul <8 x i8> [[TMP77]], [[TMP78]]
; CHECK-NEXT: [[TMP80:%.*]] = zext <8 x i8> [[TMP79]] to <8 x i32>
; CHECK-NEXT: [[TMP81:%.*]] = trunc <8 x i32> [[TMP80]] to <8 x i8>
; CHECK-NEXT: [[TMP82:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP47]]
; CHECK-NEXT: [[TMP83:%.*]] = getelementptr inbounds i8, ptr [[TMP82]], i32 0
; CHECK-NEXT: store <8 x i8> [[TMP81]], ptr [[TMP83]], align 1
; CHECK-NEXT: [[INDEX_NEXT13]] = add nuw i64 [[INDEX7]], 8
; CHECK-NEXT: [[TMP84:%.*]] = icmp eq i64 [[INDEX_NEXT13]], [[N_VEC5]]
; CHECK-NEXT: br i1 [[TMP84]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
; CHECK: vec.epilog.middle.block:
; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC5]]
; CHECK-NEXT: br i1 [[CMP_N6]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]		; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.cond.cleanup.loopexit:		; CHECK: for.cond.cleanup.loopexit:
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]		; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
; CHECK: for.cond.cleanup:		; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret void		; CHECK-NEXT: ret void
; CHECK: for.body:		; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.]], [[FOR_BODY]] ]		; CHECK-NEXT: [[INDVARS_IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.]], [[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDVARS_IV]]		; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP85:%.*]] = load i16, ptr [[ARRAYIDX]], align 2		; CHECK-NEXT: [[TMP43:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP85]] to i32		; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP43]] to i32
; CHECK-NEXT: [[ADD:%.*]] = shl i32 [[CONV]], 4		; CHECK-NEXT: [[ADD:%.*]] = shl i32 [[CONV]], 4
; CHECK-NEXT: [[CONV2:%.*]] = add nsw i32 [[ADD]], 32		; CHECK-NEXT: [[CONV2:%.*]] = add nsw i32 [[ADD]], 32
; CHECK-NEXT: [[OR:%.*]] = and i32 [[CONV]], 204		; CHECK-NEXT: [[OR:%.*]] = and i32 [[CONV]], 204
; CHECK-NEXT: [[CONV8:%.*]] = or i32 [[OR]], 51		; CHECK-NEXT: [[CONV8:%.*]] = or i32 [[OR]], 51
; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[CONV8]], 60		; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[CONV8]], 60
; CHECK-NEXT: [[AND:%.*]] = and i32 [[CONV2]], [[CONV13]]		; CHECK-NEXT: [[AND:%.*]] = and i32 [[CONV2]], [[CONV13]]
; CHECK-NEXT: [[MUL_MASKED:%.*]] = and i32 [[MUL]], 252		; CHECK-NEXT: [[MUL_MASKED:%.*]] = and i32 [[MUL]], 252
; CHECK-NEXT: [[CONV17:%.*]] = xor i32 [[MUL_MASKED]], [[CONV11]]		; CHECK-NEXT: [[CONV17:%.*]] = xor i32 [[MUL_MASKED]], [[CONV11]]
; CHECK-NEXT: [[MUL18:%.*]] = mul nuw nsw i32 [[CONV17]], [[AND]]		; CHECK-NEXT: [[MUL18:%.*]] = mul nuw nsw i32 [[CONV17]], [[AND]]
; CHECK-NEXT: [[CONV19:%.*]] = trunc i32 [[MUL18]] to i8		; CHECK-NEXT: [[CONV19:%.*]] = trunc i32 [[MUL18]] to i8
; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDVARS_IV]]		; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store i8 [[CONV19]], ptr [[ARRAYIDX21]], align 1		; CHECK-NEXT: store i8 [[CONV19]], ptr [[ARRAYIDX21]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1		; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32		; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[LEN]]		; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[LEN]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]		; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
;		;
entry:		entry:
%cmp.32 = icmp sgt i32 %len, 0		%cmp.32 = icmp sgt i32 %len, 0
br i1 %cmp.32, label %for.body.lr.ph, label %for.cond.cleanup		br i1 %cmp.32, label %for.body.lr.ph, label %for.cond.cleanup

for.body.lr.ph: ; preds = %entry		for.body.lr.ph: ; preds = %entry
%conv11 = zext i8 %arg2 to i32		%conv11 = zext i8 %arg2 to i32
%conv13 = zext i8 %arg1 to i32		%conv13 = zext i8 %arg1 to i32
▲ Show 20 Lines • Show All 50 Lines • ▼ Show 20 Lines
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x i32> [[VECTOR_RECUR]], <16 x i32> [[TMP4]], <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>		; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x i32> [[VECTOR_RECUR]], <16 x i32> [[TMP4]], <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw <16 x i32> [[TMP4]], <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>		; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw <16 x i32> [[TMP4]], <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
; CHECK-NEXT: [[TMP7:%.*]] = trunc <16 x i32> [[TMP6]] to <16 x i8>		; CHECK-NEXT: [[TMP7:%.*]] = trunc <16 x i32> [[TMP6]] to <16 x i8>
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP1]]		; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0		; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0
; CHECK-NEXT: store <16 x i8> [[TMP7]], ptr [[TMP9]], align 1		; CHECK-NEXT: store <16 x i8> [[TMP7]], ptr [[TMP9]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16		; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]		; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]		; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; CHECK: middle.block:		; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]		; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP4]], i32 15		; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP4]], i32 15
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]		; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:		; CHECK: scalar.ph:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]		; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]		; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]		; CHECK-NEXT: br label [[FOR_BODY:%.*]]
Show All 9 Lines
; CHECK-NEXT: [[CONV]] = zext i8 [[TMP11]] to i32		; CHECK-NEXT: [[CONV]] = zext i8 [[TMP11]] to i32
; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV]], 2		; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV]], 2
; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i8		; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i8
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDVARS_IV]]		; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store i8 [[CONV1]], ptr [[ARRAYIDX3]], align 1		; CHECK-NEXT: store i8 [[CONV1]], ptr [[ARRAYIDX3]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1		; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32		; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[LEN]]		; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[LEN]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]		; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
;		;
entry:		entry:
%cmp8 = icmp sgt i32 %len, 0		%cmp8 = icmp sgt i32 %len, 0
br i1 %cmp8, label %for.body, label %for.cond.cleanup		br i1 %cmp8, label %for.body, label %for.cond.cleanup

for.cond.cleanup: ; preds = %for.body, %entry		for.cond.cleanup: ; preds = %for.body, %entry
ret void		ret void

▲ Show 20 Lines • Show All 41 Lines • ▼ Show 20 Lines
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i32> [[VECTOR_RECUR]], <16 x i32> [[TMP6]], <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>		; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i32> [[VECTOR_RECUR]], <16 x i32> [[TMP6]], <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw <16 x i32> [[TMP6]], <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>		; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw <16 x i32> [[TMP6]], <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
; CHECK-NEXT: [[TMP9:%.*]] = trunc <16 x i32> [[TMP8]] to <16 x i8>		; CHECK-NEXT: [[TMP9:%.*]] = trunc <16 x i32> [[TMP8]] to <16 x i8>
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP3]]		; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 0		; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 0
; CHECK-NEXT: store <16 x i8> [[TMP9]], ptr [[TMP11]], align 1		; CHECK-NEXT: store <16 x i8> [[TMP9]], ptr [[TMP11]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16		; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]		; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]		; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; CHECK: middle.block:		; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]		; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP6]], i32 15		; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP6]], i32 15
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <16 x i32> [[TMP6]], i32 14		; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <16 x i32> [[TMP6]], i32 14
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]		; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:		; CHECK: scalar.ph:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.]] = phi i32 [ 0, [[ENTRY:%.]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]		; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.]] = phi i32 [ 0, [[ENTRY:%.]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]		; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
Show All 10 Lines
; CHECK-NEXT: [[CONV]] = zext i8 [[TMP13]] to i32		; CHECK-NEXT: [[CONV]] = zext i8 [[TMP13]] to i32
; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV]], 2		; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV]], 2
; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i8		; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i8
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDVARS_IV]]		; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store i8 [[CONV1]], ptr [[ARRAYIDX3]], align 1		; CHECK-NEXT: store i8 [[CONV1]], ptr [[ARRAYIDX3]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1		; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32		; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[LEN]]		; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[LEN]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]		; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
;		;
entry:		entry:
br label %for.body		br label %for.body

for.cond.cleanup: ; preds = %for.body, %entry		for.cond.cleanup: ; preds = %for.body, %entry
%ret = trunc i32 %a_phi to i8		%ret = trunc i32 %a_phi to i8
ret i8 %ret		ret i8 %ret

Show All 15 Lines

llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll

	Show All 12 Lines
	; for (i2 = 0; i2 < 8; i2++)			; for (i2 = 0; i2 < 8; i2++)
	; arr[i2][i1] = i1 + n;			; arr[i2][i1] = i1 + n;
	; }			; }
	; }			; }
	;			;

	; CHECK-LABEL: @foo_i32(			; CHECK-LABEL: @foo_i32(
	; CHECK-LABEL: vector.ph:			; CHECK-LABEL: vector.ph:
	; CHECK: %[[SplatVal:.*]] = insertelement <4 x i32> poison, i32 %n, i64 0			; CHECK: %[[SplatVal:.*]] = insertelement <2 x i32> poison, i32 %n, i64 0
	; CHECK: %[[Splat:.*]] = shufflevector <4 x i32> %[[SplatVal]], <4 x i32> poison, <4 x i32> zeroinitializer			; CHECK: %[[Splat:.*]] = shufflevector <2 x i32> %[[SplatVal]], <2 x i32> poison, <2 x i32> zeroinitializer

	; CHECK-LABEL: vector.body:			; CHECK-LABEL: vector.body:
	; CHECK: %[[Ind:.]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.]], %[[ForInc:.*]] ]			; CHECK: %[[Ind:.]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.]], %[[ForInc:.*]] ]
	; CHECK: %[[VecInd:.]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %vector.ph ], [ %[[VecIndNext:.]], %[[ForInc]] ]			; CHECK: %[[VecInd:.]] = phi <2 x i64> [ <i64 0, i64 1>, %vector.ph ], [ %[[VecIndNext:.]], %[[ForInc]] ]
	; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <4 x i64> %[[VecInd]]			; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <2 x i64> %[[VecInd]]
	; CHECK: %[[VecIndTr:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32>			; CHECK: %[[VecIndTr:.*]] = trunc <2 x i64> %[[VecInd]] to <2 x i32>
	; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[VecIndTr]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)			; CHECK: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> %[[VecIndTr]], <2 x ptr> %[[AAddr]], i32 4, <2 x i1> <i1 true, i1 true>)
	; CHECK: %[[VecIndTr2:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32>			; CHECK: %[[VecIndTr2:.*]] = trunc <2 x i64> %[[VecInd]] to <2 x i32>
	; CHECK: %[[StoreVal:.*]] = add nsw <4 x i32> %[[VecIndTr2]], %[[Splat]]			; CHECK: %[[StoreVal:.*]] = add nsw <2 x i32> %[[VecIndTr2]], %[[Splat]]
	; CHECK: br label %[[InnerLoop:.+]]			; CHECK: br label %[[InnerLoop:.+]]

	; CHECK: [[InnerLoop]]:			; CHECK: [[InnerLoop]]:
	; CHECK: %[[InnerPhi:.]] = phi <4 x i64> [ zeroinitializer, %vector.body ], [ %[[InnerPhiNext:.]], %[[InnerLoop]] ]			; CHECK: %[[InnerPhi:.]] = phi <2 x i64> [ zeroinitializer, %vector.body ], [ %[[InnerPhiNext:.]], %[[InnerLoop]] ]
	; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, <4 x i64> %[[InnerPhi]], <4 x i64> %[[VecInd]]			; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, <2 x i64> %[[InnerPhi]], <2 x i64> %[[VecInd]]
	; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[StoreVal]], <4 x ptr> %[[AAddr2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true			; CHECK: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> %[[StoreVal]], <2 x ptr> %[[AAddr2]], i32 4, <2 x i1> <i1 true, i1 true>)
	; CHECK: %[[InnerPhiNext]] = add nuw nsw <4 x i64> %[[InnerPhi]], <i64 1, i64 1, i64 1, i64 1>			; CHECK: %[[InnerPhiNext]] = add nuw nsw <2 x i64> %[[InnerPhi]], <i64 1, i64 1>
	; CHECK: %[[VecCond:.*]] = icmp eq <4 x i64> %[[InnerPhiNext]], <i64 8, i64 8, i64 8, i64 8>			; CHECK: %[[VecCond:.*]] = icmp eq <2 x i64> %[[InnerPhiNext]], <i64 8, i64 8>
	; CHECK: %[[InnerCond:.*]] = extractelement <4 x i1> %[[VecCond]], i32 0			; CHECK: %[[InnerCond:.*]] = extractelement <2 x i1> %[[VecCond]], i32 0
	; CHECK: br i1 %[[InnerCond]], label %[[ForInc]], label %[[InnerLoop]]			; CHECK: br i1 %[[InnerCond]], label %[[ForInc]], label %[[InnerLoop]]

	; CHECK: [[ForInc]]:			; CHECK: [[ForInc]]:
	; CHECK: %[[IndNext]] = add nuw i64 %[[Ind]], 4			; CHECK: %[[IndNext]] = add nuw i64 %[[Ind]], 2
	; CHECK: %[[VecIndNext]] = add <4 x i64> %[[VecInd]], <i64 4, i64 4, i64 4, i64 4>			; CHECK: %[[VecIndNext]] = add <2 x i64> %[[VecInd]], <i64 2, i64 2>
	; CHECK: %[[Cmp:.*]] = icmp eq i64 %[[IndNext]], 8			; CHECK: %[[Cmp:.*]] = icmp eq i64 %[[IndNext]], 8
	; CHECK: br i1 %[[Cmp]], label %middle.block, label %vector.body			; CHECK: br i1 %[[Cmp]], label %middle.block, label %vector.body

	@arr2 = external global [8 x i32], align 16			@arr2 = external global [8 x i32], align 16
	@arr = external global [8 x [8 x i32]], align 16			@arr = external global [8 x [8 x i32]], align 16

	@arrX = external global [8 x i64], align 16			@arrX = external global [8 x i64], align 16
	@arrY = external global [8 x [8 x i64]], align 16			@arrY = external global [8 x [8 x i64]], align 16
	▲ Show 20 Lines • Show All 91 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll

Show First 20 Lines • Show All 68 Lines • ▼ Show 20 Lines	loop:
br i1 %ec, label %exit, label %loop, !llvm.loop !0		br i1 %ec, label %exit, label %loop, !llvm.loop !0

exit:		exit:
ret void		ret void
}		}

define void @test_loop2(i64 %n, ptr %dst) {		define void @test_loop2(i64 %n, ptr %dst) {
; CHECK-LABEL: @test_loop2(		; CHECK-LABEL: @test_loop2(
; CHECK-NEXT: iter.check:		; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.]]		; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
; CHECK: vector.main.loop.iter.check:
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.]], label [[VECTOR_PH:%.]]
; CHECK: vector.ph:		; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]		; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:		; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]		; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0		; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1		; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2		; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3		; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4		; CHECK-NEXT: [[TMP4:%.]] = sub nsw i64 [[N:%.]], [[TMP0]]
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5		; CHECK-NEXT: [[TMP5:%.*]] = sub nsw i64 [[N]], [[TMP1]]
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6		; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> poison, i64 [[TMP4]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7		; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i64> [[TMP6]], i64 [[TMP5]], i32 1
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 8		; CHECK-NEXT: [[TMP8:%.*]] = sub nsw i64 [[N]], [[TMP2]]
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 9		; CHECK-NEXT: [[TMP9:%.*]] = sub nsw i64 [[N]], [[TMP3]]
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 10		; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 11		; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 12		; CHECK-NEXT: [[TMP12:%.*]] = trunc <2 x i64> [[TMP7]] to <2 x i8>
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13		; CHECK-NEXT: [[TMP13:%.*]] = trunc <2 x i64> [[TMP11]] to <2 x i8>
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14		; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[TMP0]], [[TMP4]]
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15		; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP2]], [[TMP8]]
; CHECK-NEXT: [[TMP16:%.]] = sub nsw i64 [[N:%.]], [[TMP0]]		; CHECK-NEXT: [[TMP16:%.]] = getelementptr i8, ptr [[DST:%.]], i64 [[TMP14]]
; CHECK-NEXT: [[TMP17:%.*]] = sub nsw i64 [[N]], [[TMP1]]		; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP15]]
; CHECK-NEXT: [[TMP18:%.*]] = sub nsw i64 [[N]], [[TMP2]]		; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x i8> [[TMP13]], i32 1
; CHECK-NEXT: [[TMP19:%.*]] = sub nsw i64 [[N]], [[TMP3]]		; CHECK-NEXT: store i8 [[TMP18]], ptr [[TMP17]], align 1
; CHECK-NEXT: [[TMP20:%.*]] = sub nsw i64 [[N]], [[TMP4]]		; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP21:%.*]] = sub nsw i64 [[N]], [[TMP5]]		; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; CHECK-NEXT: [[TMP22:%.*]] = sub nsw i64 [[N]], [[TMP6]]		; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK-NEXT: [[TMP23:%.*]] = sub nsw i64 [[N]], [[TMP7]]
; CHECK-NEXT: [[TMP24:%.*]] = sub nsw i64 [[N]], [[TMP8]]
; CHECK-NEXT: [[TMP25:%.*]] = sub nsw i64 [[N]], [[TMP9]]
; CHECK-NEXT: [[TMP26:%.*]] = sub nsw i64 [[N]], [[TMP10]]
; CHECK-NEXT: [[TMP27:%.*]] = sub nsw i64 [[N]], [[TMP11]]
; CHECK-NEXT: [[TMP28:%.*]] = sub nsw i64 [[N]], [[TMP12]]
; CHECK-NEXT: [[TMP29:%.*]] = sub nsw i64 [[N]], [[TMP13]]
; CHECK-NEXT: [[TMP30:%.*]] = sub nsw i64 [[N]], [[TMP14]]
; CHECK-NEXT: [[TMP31:%.*]] = sub nsw i64 [[N]], [[TMP15]]
; CHECK-NEXT: [[TMP32:%.*]] = insertelement <16 x i64> poison, i64 [[TMP16]], i32 0
; CHECK-NEXT: [[TMP33:%.*]] = insertelement <16 x i64> [[TMP32]], i64 [[TMP17]], i32 1
; CHECK-NEXT: [[TMP34:%.*]] = insertelement <16 x i64> [[TMP33]], i64 [[TMP18]], i32 2
; CHECK-NEXT: [[TMP35:%.*]] = insertelement <16 x i64> [[TMP34]], i64 [[TMP19]], i32 3
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <16 x i64> [[TMP35]], i64 [[TMP20]], i32 4
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <16 x i64> [[TMP36]], i64 [[TMP21]], i32 5
; CHECK-NEXT: [[TMP38:%.*]] = insertelement <16 x i64> [[TMP37]], i64 [[TMP22]], i32 6
; CHECK-NEXT: [[TMP39:%.*]] = insertelement <16 x i64> [[TMP38]], i64 [[TMP23]], i32 7
; CHECK-NEXT: [[TMP40:%.*]] = insertelement <16 x i64> [[TMP39]], i64 [[TMP24]], i32 8
; CHECK-NEXT: [[TMP41:%.*]] = insertelement <16 x i64> [[TMP40]], i64 [[TMP25]], i32 9
; CHECK-NEXT: [[TMP42:%.*]] = insertelement <16 x i64> [[TMP41]], i64 [[TMP26]], i32 10
; CHECK-NEXT: [[TMP43:%.*]] = insertelement <16 x i64> [[TMP42]], i64 [[TMP27]], i32 11
; CHECK-NEXT: [[TMP44:%.*]] = insertelement <16 x i64> [[TMP43]], i64 [[TMP28]], i32 12
; CHECK-NEXT: [[TMP45:%.*]] = insertelement <16 x i64> [[TMP44]], i64 [[TMP29]], i32 13
; CHECK-NEXT: [[TMP46:%.*]] = insertelement <16 x i64> [[TMP45]], i64 [[TMP30]], i32 14
; CHECK-NEXT: [[TMP47:%.*]] = insertelement <16 x i64> [[TMP46]], i64 [[TMP31]], i32 15
; CHECK-NEXT: [[TMP48:%.*]] = trunc <16 x i64> [[TMP47]] to <16 x i8>
; CHECK-NEXT: [[TMP49:%.*]] = add i64 [[TMP0]], [[TMP16]]
; CHECK-NEXT: [[TMP50:%.]] = getelementptr i8, ptr [[DST:%.]], i64 [[TMP49]]
; CHECK-NEXT: [[TMP51:%.*]] = extractelement <16 x i8> [[TMP48]], i32 15
; CHECK-NEXT: store i8 [[TMP51]], ptr [[TMP50]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-NEXT: [[TMP52:%.*]] = icmp eq i64 [[INDEX_NEXT]], 992
; CHECK-NEXT: br i1 [[TMP52]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: middle.block:		; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1001, 992		; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1001, 1000
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.]], label [[VEC_EPILOG_ITER_CHECK:%.]]		; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: vec.epilog.iter.check:		; CHECK: scalar.ph:
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]		; CHECK-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 992, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
; CHECK-NEXT: [[INDEX2:%.]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT3:%.]], [[VEC_EPILOG_VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP53:%.*]] = add i64 [[INDEX2]], 0
; CHECK-NEXT: [[TMP54:%.*]] = add i64 [[INDEX2]], 1
; CHECK-NEXT: [[TMP55:%.*]] = add i64 [[INDEX2]], 2
; CHECK-NEXT: [[TMP56:%.*]] = add i64 [[INDEX2]], 3
; CHECK-NEXT: [[TMP57:%.*]] = add i64 [[INDEX2]], 4
; CHECK-NEXT: [[TMP58:%.*]] = add i64 [[INDEX2]], 5
; CHECK-NEXT: [[TMP59:%.*]] = add i64 [[INDEX2]], 6
; CHECK-NEXT: [[TMP60:%.*]] = add i64 [[INDEX2]], 7
; CHECK-NEXT: [[TMP61:%.*]] = sub nsw i64 [[N]], [[TMP53]]
; CHECK-NEXT: [[TMP62:%.*]] = sub nsw i64 [[N]], [[TMP54]]
; CHECK-NEXT: [[TMP63:%.*]] = sub nsw i64 [[N]], [[TMP55]]
; CHECK-NEXT: [[TMP64:%.*]] = sub nsw i64 [[N]], [[TMP56]]
; CHECK-NEXT: [[TMP65:%.*]] = sub nsw i64 [[N]], [[TMP57]]
; CHECK-NEXT: [[TMP66:%.*]] = sub nsw i64 [[N]], [[TMP58]]
; CHECK-NEXT: [[TMP67:%.*]] = sub nsw i64 [[N]], [[TMP59]]
; CHECK-NEXT: [[TMP68:%.*]] = sub nsw i64 [[N]], [[TMP60]]
; CHECK-NEXT: [[TMP69:%.*]] = insertelement <8 x i64> poison, i64 [[TMP61]], i32 0
; CHECK-NEXT: [[TMP70:%.*]] = insertelement <8 x i64> [[TMP69]], i64 [[TMP62]], i32 1
; CHECK-NEXT: [[TMP71:%.*]] = insertelement <8 x i64> [[TMP70]], i64 [[TMP63]], i32 2
; CHECK-NEXT: [[TMP72:%.*]] = insertelement <8 x i64> [[TMP71]], i64 [[TMP64]], i32 3
; CHECK-NEXT: [[TMP73:%.*]] = insertelement <8 x i64> [[TMP72]], i64 [[TMP65]], i32 4
; CHECK-NEXT: [[TMP74:%.*]] = insertelement <8 x i64> [[TMP73]], i64 [[TMP66]], i32 5
; CHECK-NEXT: [[TMP75:%.*]] = insertelement <8 x i64> [[TMP74]], i64 [[TMP67]], i32 6
; CHECK-NEXT: [[TMP76:%.*]] = insertelement <8 x i64> [[TMP75]], i64 [[TMP68]], i32 7
; CHECK-NEXT: [[TMP77:%.*]] = trunc <8 x i64> [[TMP76]] to <8 x i8>
; CHECK-NEXT: [[TMP78:%.*]] = add i64 [[TMP53]], [[TMP61]]
; CHECK-NEXT: [[TMP79:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP78]]
; CHECK-NEXT: [[TMP80:%.*]] = extractelement <8 x i8> [[TMP77]], i32 7
; CHECK-NEXT: store i8 [[TMP80]], ptr [[TMP79]], align 1
; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX2]], 8
; CHECK-NEXT: [[TMP81:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 1000
; CHECK-NEXT: br i1 [[TMP81]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: vec.epilog.middle.block:
; CHECK-NEXT: [[CMP_N1:%.*]] = icmp eq i64 1001, 1000
; CHECK-NEXT: br i1 [[CMP_N1]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ 1000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 992, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]		; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:		; CHECK: loop:
; CHECK-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[LOOP]] ]		; CHECK-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[LOOP]] ]
; CHECK-NEXT: [[SUB_N:%.*]] = sub nsw i64 [[N]], [[IV]]		; CHECK-NEXT: [[SUB_N:%.*]] = sub nsw i64 [[N]], [[IV]]
; CHECK-NEXT: [[SUB_N_TRUNC:%.*]] = trunc i64 [[SUB_N]] to i8		; CHECK-NEXT: [[SUB_N_TRUNC:%.*]] = trunc i64 [[SUB_N]] to i8
; CHECK-NEXT: [[ADD:%.*]] = add i64 [[IV]], [[SUB_N]]		; CHECK-NEXT: [[ADD:%.*]] = add i64 [[IV]], [[SUB_N]]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[ADD]]		; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[ADD]]
; CHECK-NEXT: store i8 [[SUB_N_TRUNC]], ptr [[GEP]], align 1		; CHECK-NEXT: store i8 [[SUB_N_TRUNC]], ptr [[GEP]], align 1
; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1		; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
; CHECK-NEXT: [[C:%.*]] = icmp sle i64 [[IV_NEXT]], 1000		; CHECK-NEXT: [[C:%.*]] = icmp sle i64 [[IV_NEXT]], 1000
; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP7:![0-9]+]]		; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: exit:		; CHECK: exit:
; CHECK-NEXT: ret void		; CHECK-NEXT: ret void
;		;
entry:		entry:
br label %loop		br label %loop

loop:		loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]		%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
Show All 22 Lines

llvm/test/Transforms/LoopVectorize/AArch64/truncate-type-widening.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
				; RUN: opt -S < %s -passes=loop-vectorize -force-vector-interleave=1 -mtriple aarch64-linux-gnu -mattr=+sve 2>&1 \| FileCheck %s

				david-armUnsubmitted Done Reply Inline Actions It looks like we're still using tail-folding despite passing in `-sve-tail-folding=disabled`, which I think is because the vectoriser knows the trip count is low. Perhaps you can just remove the flag? david-arm: It looks like we're still using tail-folding despite passing in `-sve-tail-folding=disabled`…
				RinAuthorUnsubmitted Done Reply Inline Actions You're right, I'll take that flag out. Rin: You're right, I'll take that flag out.
				define void @test(ptr nocapture noundef writeonly %dst, i32 noundef %n, i64 noundef %val) local_unnamed_addr #0 {
				; CHECK-LABEL: define void @test
				; CHECK-SAME: (ptr nocapture noundef writeonly [[DST:%.]], i32 noundef [[N:%.]], i64 noundef [[VAL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
				; CHECK-NEXT: entry:
				; CHECK-NEXT: [[REM:%.*]] = and i32 [[N]], 63
				; CHECK-NEXT: [[CMP8_NOT:%.*]] = icmp eq i32 [[REM]], 0
				; CHECK-NEXT: br i1 [[CMP8_NOT]], label [[FOR_COND_CLEANUP:%.]], label [[FOR_BODY_PREHEADER:%.]]
				; CHECK: for.body.preheader:
				; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[REM]], 7
				; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[ADD]], 3
				; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SHR]] to i64
				; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
				; CHECK: vector.ph:
				; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
				; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
				; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
				; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
				; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
				; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[WIDE_TRIP_COUNT]], [[TMP4]]
				; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
				; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
				; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
				; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
				; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2
				; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[TMP6]]
				; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[WIDE_TRIP_COUNT]], [[TMP6]]
				; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0
				; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]])
				; CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
				; CHECK-NEXT: [[TMP11:%.*]] = add <vscale x 2 x i64> [[TMP10]], zeroinitializer
				; CHECK-NEXT: [[TMP12:%.*]] = mul <vscale x 2 x i64> [[TMP11]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
				; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP12]]
				; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
				; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 2
				; CHECK-NEXT: [[TMP15:%.*]] = mul i64 1, [[TMP14]]
				; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP15]], i64 0
				; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
				; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[VAL]], i64 0
				; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
				; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
				; CHECK: vector.body:
				; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
				; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.]], [[VECTOR_BODY]] ]
				; CHECK-NEXT: [[VEC_IND:%.]] = phi <vscale x 2 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.]], [[VECTOR_BODY]] ]
				; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 0
				; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP16]]
				; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw <vscale x 2 x i64> [[VEC_IND]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 3, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
				; CHECK-NEXT: [[TMP18:%.*]] = lshr <vscale x 2 x i64> [[BROADCAST_SPLAT]], [[TMP17]]
				; CHECK-NEXT: [[TMP19:%.*]] = trunc <vscale x 2 x i64> [[TMP18]] to <vscale x 2 x i8>
				; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
				; CHECK-NEXT: call void @llvm.masked.store.nxv2i8.p0(<vscale x 2 x i8> [[TMP19]], ptr [[TMP20]], i32 1, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
				; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP9]])
				; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64()
				; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 2
				; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP22]]
				; CHECK-NEXT: [[TMP23:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
				; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
				; CHECK-NEXT: [[TMP24:%.*]] = extractelement <vscale x 2 x i1> [[TMP23]], i32 0
				; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
				; CHECK: middle.block:
				; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
				; CHECK: scalar.ph:
				; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
				; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[DST]], [[FOR_BODY_PREHEADER]] ]
				; CHECK-NEXT: br label [[FOR_BODY:%.*]]
				; CHECK: for.body:
				; CHECK-NEXT: [[INDVARS_IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.]], [[FOR_BODY]] ]
				; CHECK-NEXT: [[P_OUT_TAIL_09:%.]] = phi ptr [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INCDEC_PTR:%.]], [[FOR_BODY]] ]
				; CHECK-NEXT: [[TMP25:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 3
				; CHECK-NEXT: [[SHR3:%.*]] = lshr i64 [[VAL]], [[TMP25]]
				; CHECK-NEXT: [[CONV4:%.*]] = trunc i64 [[SHR3]] to i8
				; CHECK-NEXT: store i8 [[CONV4]], ptr [[P_OUT_TAIL_09]], align 1
				; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[P_OUT_TAIL_09]], i64 1
				; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
				; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
				; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
				; CHECK: for.cond.cleanup.loopexit:
				; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
				; CHECK: for.cond.cleanup:
				; CHECK-NEXT: ret void
				;
				entry:
				%rem = and i32 %n, 63
				%cmp8.not = icmp eq i32 %rem, 0
				br i1 %cmp8.not, label %for.cond.cleanup, label %for.body.preheader

				for.body.preheader: ; preds = %entry
				%add = add nuw nsw i32 %rem, 7
				%shr = lshr i32 %add, 3
				%wide.trip.count = zext i32 %shr to i64
				br label %for.body

				for.body: ; preds = %for.body.preheader, %for.body
				%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
				david-armUnsubmitted Done Reply Inline Actions I think you can delete this block and just let everything jump directly to `%for.cond.cleanup` david-arm: I think you can delete this block and just let everything jump directly to `%for.cond.cleanup`
				RinAuthorUnsubmitted Done Reply Inline Actions Makes sense I'll do that and rewrite the blocks. Rin: Makes sense I'll do that and rewrite the blocks.
				%p_out_tail.09 = phi ptr [ %dst, %for.body.preheader ], [ %incdec.ptr, %for.body ]
				%0 = shl nuw nsw i64 %indvars.iv, 3
				%shr3 = lshr i64 %val, %0
				%conv4 = trunc i64 %shr3 to i8
				store i8 %conv4, ptr %p_out_tail.09, align 1
				%incdec.ptr = getelementptr inbounds i8, ptr %p_out_tail.09, i64 1
				david-armUnsubmitted Done Reply Inline Actions Could you rewrite the blocks in a more natural order, i.e. entry, for.body.preheader, for.body, for.cond.cleanup? david-arm: Could you rewrite the blocks in a more natural order, i.e. entry, for.body.preheader, for.body…
				%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
				%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
				br i1 %exitcond.not, label %for.cond.cleanup, label %for.body

				for.cond.cleanup: ; preds = %for.body
				ret void
				}

llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll

	Show All 10 Lines
	; CHECK-COST: LV: Found an estimated cost of 0 for VF 1 For instruction: %conv = zext i8 %0 to i32			; CHECK-COST: LV: Found an estimated cost of 0 for VF 1 For instruction: %conv = zext i8 %0 to i32
	; CHECK-COST: LV: Found an estimated cost of 1 for VF 2 For instruction: %conv = zext i8 %0 to i32			; CHECK-COST: LV: Found an estimated cost of 1 for VF 2 For instruction: %conv = zext i8 %0 to i32
	; CHECK-COST: LV: Found an estimated cost of 1 for VF 4 For instruction: %conv = zext i8 %0 to i32			; CHECK-COST: LV: Found an estimated cost of 1 for VF 4 For instruction: %conv = zext i8 %0 to i32
	; CHECK-COST: LV: Found an estimated cost of 1 for VF 8 For instruction: %conv = zext i8 %0 to i32			; CHECK-COST: LV: Found an estimated cost of 1 for VF 8 For instruction: %conv = zext i8 %0 to i32
	; CHECK-COST: LV: Found an estimated cost of 2 for VF 16 For instruction: %conv = zext i8 %0 to i32			; CHECK-COST: LV: Found an estimated cost of 2 for VF 16 For instruction: %conv = zext i8 %0 to i32
	; CHECK-COST: LV: Found an estimated cost of 1 for VF vscale x 1 For instruction: %conv = zext i8 %0 to i32			; CHECK-COST: LV: Found an estimated cost of 1 for VF vscale x 1 For instruction: %conv = zext i8 %0 to i32
	; CHECK-COST: LV: Found an estimated cost of 1 for VF vscale x 2 For instruction: %conv = zext i8 %0 to i32			; CHECK-COST: LV: Found an estimated cost of 1 for VF vscale x 2 For instruction: %conv = zext i8 %0 to i32
	; CHECK-COST: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %conv = zext i8 %0 to i32			; CHECK-COST: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %conv = zext i8 %0 to i32
	; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 8 For instruction: %conv = zext i8 %0 to i32
	; CHECK-LABEL: define void @zext_i8_i16			; CHECK-LABEL: define void @zext_i8_i16
	; CHECK-SAME: (ptr noalias nocapture readonly [[P:%.]], ptr noalias nocapture [[Q:%.]], i32 [[LEN:%.*]]) #[[ATTR0:[0-9]+]] {			; CHECK-SAME: (ptr noalias nocapture readonly [[P:%.]], ptr noalias nocapture [[Q:%.]], i32 [[LEN:%.*]]) #[[ATTR0:[0-9]+]] {
	; CHECK-NEXT: entry:			; CHECK-NEXT: entry:
	; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], -1			; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], -1
	; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64			; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
	; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1			; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
	; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()			; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 16
	; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 8
	; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP4]]
	; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]			; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
	; CHECK: vector.ph:			; CHECK: vector.ph:
	; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()			; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 16
	; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
	; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP6]]
	; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]			; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
	; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]			; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
	; CHECK: vector.body:			; CHECK: vector.body:
	; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]			; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
	; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]]			; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]]
	; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP7]], align 1			; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1
	; CHECK-NEXT: [[TMP8:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i16>			; CHECK-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i16>
	; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 8 x i16> [[TMP8]], trunc (<vscale x 8 x i32> shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 2, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer) to <vscale x 8 x i16>)			; CHECK-NEXT: [[TMP5:%.*]] = add <16 x i16> [[TMP4]], <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
	; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDEX]]			; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDEX]]
	; CHECK-NEXT: store <vscale x 8 x i16> [[TMP9]], ptr [[TMP10]], align 2			; CHECK-NEXT: store <16 x i16> [[TMP5]], ptr [[TMP6]], align 2
	; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()			; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
	; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 8			; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
	; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]]			; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
	; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
	; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
	; CHECK: middle.block:			; CHECK: middle.block:
	; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]			; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
	; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]			; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
	; CHECK: scalar.ph:			; CHECK: scalar.ph:
	; CHECK-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]			; CHECK-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]
	; CHECK-NEXT: br label [[FOR_BODY:%.*]]			; CHECK-NEXT: br label [[FOR_BODY:%.*]]
	; CHECK: for.body:			; CHECK: for.body:
	; CHECK-NEXT: [[INDVARS_IV:%.]] = phi i64 [ [[INDVARS_IV_NEXT:%.]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]			; CHECK-NEXT: [[INDVARS_IV:%.]] = phi i64 [ [[INDVARS_IV_NEXT:%.]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
	; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]]			; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]]
	; CHECK-NEXT: [[TMP14:%.*]] = load i8, ptr [[ARRAYIDX]], align 1			; CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
	; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP14]] to i32			; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP8]] to i32
	; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV]], 2			; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV]], 2
	; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16			; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
	; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDVARS_IV]]			; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDVARS_IV]]
	; CHECK-NEXT: store i16 [[CONV1]], ptr [[ARRAYIDX3]], align 2			; CHECK-NEXT: store i16 [[CONV1]], ptr [[ARRAYIDX3]], align 2
	; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1			; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
	; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32			; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
	; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[LEN]]			; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[LEN]]
	; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]			; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
	Show All 26 Lines
	; CHECK-COST: LV: Found an estimated cost of 0 for VF 1 For instruction: %conv = sext i8 %0 to i32			; CHECK-COST: LV: Found an estimated cost of 0 for VF 1 For instruction: %conv = sext i8 %0 to i32
	; CHECK-COST: LV: Found an estimated cost of 1 for VF 2 For instruction: %conv = sext i8 %0 to i32			; CHECK-COST: LV: Found an estimated cost of 1 for VF 2 For instruction: %conv = sext i8 %0 to i32
	; CHECK-COST: LV: Found an estimated cost of 1 for VF 4 For instruction: %conv = sext i8 %0 to i32			; CHECK-COST: LV: Found an estimated cost of 1 for VF 4 For instruction: %conv = sext i8 %0 to i32
	; CHECK-COST: LV: Found an estimated cost of 1 for VF 8 For instruction: %conv = sext i8 %0 to i32			; CHECK-COST: LV: Found an estimated cost of 1 for VF 8 For instruction: %conv = sext i8 %0 to i32
	; CHECK-COST: LV: Found an estimated cost of 2 for VF 16 For instruction: %conv = sext i8 %0 to i32			; CHECK-COST: LV: Found an estimated cost of 2 for VF 16 For instruction: %conv = sext i8 %0 to i32
	; CHECK-COST: LV: Found an estimated cost of 1 for VF vscale x 1 For instruction: %conv = sext i8 %0 to i32			; CHECK-COST: LV: Found an estimated cost of 1 for VF vscale x 1 For instruction: %conv = sext i8 %0 to i32
	; CHECK-COST: LV: Found an estimated cost of 1 for VF vscale x 2 For instruction: %conv = sext i8 %0 to i32			; CHECK-COST: LV: Found an estimated cost of 1 for VF vscale x 2 For instruction: %conv = sext i8 %0 to i32
	; CHECK-COST: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %conv = sext i8 %0 to i32			; CHECK-COST: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %conv = sext i8 %0 to i32
	; CHECK-COST: LV: Found an estimated cost of 0 for VF vscale x 8 For instruction: %conv = sext i8 %0 to i32
	; CHECK-LABEL: define void @sext_i8_i16			; CHECK-LABEL: define void @sext_i8_i16
	; CHECK-SAME: (ptr noalias nocapture readonly [[P:%.]], ptr noalias nocapture [[Q:%.]], i32 [[LEN:%.*]]) #[[ATTR0]] {			; CHECK-SAME: (ptr noalias nocapture readonly [[P:%.]], ptr noalias nocapture [[Q:%.]], i32 [[LEN:%.*]]) #[[ATTR0]] {
	; CHECK-NEXT: entry:			; CHECK-NEXT: entry:
	; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], -1			; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], -1
	; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64			; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
	; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1			; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
	; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()			; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 16
	; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 8
	; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP4]]
	; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]			; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
	; CHECK: vector.ph:			; CHECK: vector.ph:
	; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()			; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 16
	; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
	; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP6]]
	; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]			; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
	; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]			; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
	; CHECK: vector.body:			; CHECK: vector.body:
	; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]			; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
	; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]]			; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDEX]]
	; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP7]], align 1			; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1
	; CHECK-NEXT: [[TMP8:%.*]] = sext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i16>			; CHECK-NEXT: [[TMP4:%.*]] = sext <16 x i8> [[WIDE_LOAD]] to <16 x i16>
	; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 8 x i16> [[TMP8]], trunc (<vscale x 8 x i32> shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 2, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer) to <vscale x 8 x i16>)			; CHECK-NEXT: [[TMP5:%.*]] = add <16 x i16> [[TMP4]], <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
	; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDEX]]			; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDEX]]
	; CHECK-NEXT: store <vscale x 8 x i16> [[TMP9]], ptr [[TMP10]], align 2			; CHECK-NEXT: store <16 x i16> [[TMP5]], ptr [[TMP6]], align 2
	; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()			; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
	; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 8			; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
	; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]]			; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
	; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
	; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
	; CHECK: middle.block:			; CHECK: middle.block:
	; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]			; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
	; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]			; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
	; CHECK: scalar.ph:			; CHECK: scalar.ph:
	; CHECK-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]			; CHECK-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]
	; CHECK-NEXT: br label [[FOR_BODY:%.*]]			; CHECK-NEXT: br label [[FOR_BODY:%.*]]
	; CHECK: for.body:			; CHECK: for.body:
	; CHECK-NEXT: [[INDVARS_IV:%.]] = phi i64 [ [[INDVARS_IV_NEXT:%.]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]			; CHECK-NEXT: [[INDVARS_IV:%.]] = phi i64 [ [[INDVARS_IV_NEXT:%.]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
	; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]]			; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]]
	; CHECK-NEXT: [[TMP14:%.*]] = load i8, ptr [[ARRAYIDX]], align 1			; CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
	; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[TMP14]] to i32			; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[TMP8]] to i32
	; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV]], 2			; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV]], 2
	; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16			; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
	; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDVARS_IV]]			; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[INDVARS_IV]]
	; CHECK-NEXT: store i16 [[CONV1]], ptr [[ARRAYIDX3]], align 2			; CHECK-NEXT: store i16 [[CONV1]], ptr [[ARRAYIDX3]], align 2
	; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1			; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
	; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32			; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
	; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[LEN]]			; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[LEN]]
	; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]			; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
	Show All 25 Lines

llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll

	Show First 20 Lines • Show All 734 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: store i8 [[CONV]], ptr [[ARRAYIDX]], align 1			; CHECK-NEXT: store i8 [[CONV]], ptr [[ARRAYIDX]], align 1
	; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1			; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
	; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]]			; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]]
	; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP16:![0-9]+]]			; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP16:![0-9]+]]
	; CHECK: exit:			; CHECK: exit:
	; CHECK-NEXT: ret void			; CHECK-NEXT: ret void
	;			;
	; CHECK-PROFITABLE-BY-DEFAULT-LABEL: @f4(			; CHECK-PROFITABLE-BY-DEFAULT-LABEL: @f4(
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: iter.check:			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: entry:
				david-armUnsubmitted Done Reply Inline Actions Hmm, it looks like we've decided not to vectorise at all now. Perhaps because the maximum register width is 32 bits, and since the largest type in the loop is now 32 bits the max VF we can choose is 1? In order to still demonstrate some vectorisation you might have to change the loop IR to be something like this: %conv = trunc i32 %0 to i16 store i16 %conv, ptr %arrayidx, align 1 david-arm: Hmm, it looks like we've decided not to vectorise at all now. Perhaps because the maximum…
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[WIDE_TRIP_COUNT:%.]] = zext i32 [[N:%.]] to i64			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[WIDE_TRIP_COUNT:%.]] = zext i32 [[N:%.]] to i64
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 2
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.]]
	; CHECK-PROFITABLE-BY-DEFAULT: vector.main.loop.iter.check:
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.]], label [[VECTOR_PH:%.]]
	; CHECK-PROFITABLE-BY-DEFAULT: vector.ph:
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]]
	; CHECK-PROFITABLE-BY-DEFAULT: vector.body:
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND:%.]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.]], [[VECTOR_BODY]] ]
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP1:%.*]] = trunc <4 x i32> [[VEC_IND]] to <4 x i8>
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP2:%.]] = getelementptr inbounds i8, ptr [[A:%.]], i64 [[TMP0]]
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store <4 x i8> [[TMP1]], ptr [[TMP3]], align 1
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
	; CHECK-PROFITABLE-BY-DEFAULT: middle.block:
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT:%.]], label [[VEC_EPILOG_ITER_CHECK:%.]]
	; CHECK-PROFITABLE-BY-DEFAULT: vec.epilog.iter.check:
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 2
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
	; CHECK-PROFITABLE-BY-DEFAULT: vec.epilog.ph:
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 2
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[N_VEC3:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF2]]
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP5:%.*]] = trunc i64 [[BC_RESUME_VAL]] to i32
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i64 0
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
	; CHECK-PROFITABLE-BY-DEFAULT: vec.epilog.vector.body:
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX6:%.]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.]], [[VEC_EPILOG_VECTOR_BODY]] ]
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND7:%.]] = phi <2 x i32> [ [[INDUCTION]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT8:%.]], [[VEC_EPILOG_VECTOR_BODY]] ]
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP6:%.*]] = add i64 [[INDEX6]], 0
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP7:%.*]] = trunc <2 x i32> [[VEC_IND7]] to <2 x i8>
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP6]]
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store <2 x i8> [[TMP7]], ptr [[TMP9]], align 1
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 2
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND_NEXT8]] = add <2 x i32> [[VEC_IND7]], <i32 2, i32 2>
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC3]]
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
	; CHECK-PROFITABLE-BY-DEFAULT: vec.epilog.middle.block:
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[CMP_N5:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC3]]
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[CMP_N5]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
	; CHECK-PROFITABLE-BY-DEFAULT: vec.epilog.scalar.ph:
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[BC_RESUME_VAL4:%.]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.]] ]
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label [[LOOP:%.*]]			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label [[LOOP:%.*]]
	; CHECK-PROFITABLE-BY-DEFAULT: loop:			; CHECK-PROFITABLE-BY-DEFAULT: loop:
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL4]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[LOOP]] ]			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[IV:%.]] = phi i64 [ 0, [[ENTRY:%.]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP11:%.*]] = trunc i64 [[IV]] to i32			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP0:%.*]] = trunc i64 [[IV]] to i32
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[CONV:%.*]] = trunc i32 [[TMP11]] to i8			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[CONV:%.*]] = trunc i32 [[TMP0]] to i8
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[ARRAYIDX:%.]] = getelementptr inbounds i8, ptr [[A:%.]], i64 [[IV]]
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store i8 [[CONV]], ptr [[ARRAYIDX]], align 1			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store i8 [[CONV]], ptr [[ARRAYIDX]], align 1
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]]			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]]
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP10:![0-9]+]]			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]]
	; CHECK-PROFITABLE-BY-DEFAULT: exit:			; CHECK-PROFITABLE-BY-DEFAULT: exit:
	; CHECK-PROFITABLE-BY-DEFAULT-NEXT: ret void			; CHECK-PROFITABLE-BY-DEFAULT-NEXT: ret void
	;			;
	entry:			entry:
	%wide.trip.count = zext i32 %n to i64			%wide.trip.count = zext i32 %n to i64
	br label %loop			br label %loop

	loop:			loop:
	Show All 12 Lines

llvm/test/Transforms/LoopVectorize/vplan-stress-test-no-explict-vf.ll

	; REQUIRES: asserts			; REQUIRES: asserts
	; RUN: opt < %s -S -passes=loop-vectorize -enable-vplan-native-path -vplan-build-stress-test -debug-only=loop-vectorize -disable-output 2>&1 \| FileCheck %s			; RUN: opt < %s -S -passes=loop-vectorize -enable-vplan-native-path -vplan-build-stress-test -debug-only=loop-vectorize -disable-output 2>&1 \| FileCheck %s

	; This test checks that, when stress testing VPlan, if the computed VF			; This test checks that, when stress testing VPlan, if the computed VF
	; is 1, we override it to VF = 4.			; is 1, we override it to VF = 4.

	; CHECK: LV: VPlan computed VF 1.			; CHECK: LV: VPlan computed VF 0.
	; CHECK: LV: VPlan stress testing: overriding computed VF.			; CHECK: LV: VPlan stress testing: overriding computed VF.
	; CHECK: LV: Using VF 4 to build VPlans.			; CHECK: LV: Using VF 4 to build VPlans.
	@arr2 = external global [8 x i32], align 16			@arr2 = external global [8 x i32], align 16
	@arr = external global [8 x [8 x i32]], align 16			@arr = external global [8 x [8 x i32]], align 16

	; Function Attrs: norecurse nounwind uwtable			; Function Attrs: norecurse nounwind uwtable
	define void @foo(i32 %n) {			define void @foo(i32 %n) {
	entry:			entry:
	br label %for.body			br label %for.body

	for.body: ; preds = %for.inc8, %entry			for.body: ; preds = %for.inc8, %entry
	%indvars.iv21 = phi i64 [ 0, %entry ], [ %indvars.iv.next22, %for.inc8 ]			%indvars.iv21 = phi i64 [ 0, %entry ], [ %indvars.iv.next22, %for.inc8 ]
	%arrayidx = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, i64 %indvars.iv21			%arrayidx = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, i64 %indvars.iv21
	%0 = trunc i64 %indvars.iv21 to i32			%0 = trunc i64 %indvars.iv21 to i32
				david-armUnsubmitted Done Reply Inline Actions Similar to the test above you may need to change the test so you still get VF=1. You could try choosing to use a 32-bit phi and truncate that to i16? david-arm: Similar to the test above you may need to change the test so you still get VF=1. You could try…
	store i32 %0, ptr %arrayidx, align 4			store i32 %0, ptr %arrayidx, align 4
	%1 = trunc i64 %indvars.iv21 to i32			%1 = trunc i64 %indvars.iv21 to i32
	%add = add nsw i32 %1, %n			%add = add nsw i32 %1, %n
	br label %for.body3			br label %for.body3

	for.body3: ; preds = %for.body3, %for.body			for.body3: ; preds = %for.body3, %for.body
	%indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body3 ]			%indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body3 ]
	%arrayidx7 = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, i64 %indvars.iv, i64 %indvars.iv21			%arrayidx7 = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, i64 %indvars.iv, i64 %indvars.iv21
	Show All 16 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][LoopVectorize] Add truncated store values to list of types for widening
Needs ReviewPublic

Details

Diff Detail

Unit TestsFailed

Event Timeline

Revision Contents

Diff 554376

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll

llvm/test/Transforms/LoopVectorize/AArch64/extend-vectorization-factor-for-unprofitable-memops.ll

llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll

llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll

llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll

llvm/test/Transforms/LoopVectorize/AArch64/truncate-type-widening.ll

llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll

llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll

llvm/test/Transforms/LoopVectorize/vplan-stress-test-no-explict-vf.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][LoopVectorize] Add truncated store values to list of types for wideningNeeds ReviewPublic

Details

Diff Detail

Unit TestsFailed

Event Timeline

Revision Contents

Diff 554376

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll

llvm/test/Transforms/LoopVectorize/AArch64/extend-vectorization-factor-for-unprofitable-memops.ll

llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll

llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll

llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll

llvm/test/Transforms/LoopVectorize/AArch64/truncate-type-widening.ll

llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-zext-costs.ll

llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll

llvm/test/Transforms/LoopVectorize/vplan-stress-test-no-explict-vf.ll

[AArch64][LoopVectorize] Add truncated store values to list of types for widening
Needs ReviewPublic