This is an archive of the discontinued LLVM Phabricator instance.

[SLP] Correct shuffle cost to reuse vectorized values in depending nodes
AbandonedPublic

Authored by dtemirbulatov on Oct 21 2022, 10:02 AM.

Download Raw Diff

Details

Reviewers

ABataev
RKSimon
dmgreen
peterwaller-arm
paulwalker-arm
c-rhodes
awarzynski
spatel
MattDevereau
benmxwl-arm

Summary

While estimating a node cost of the tree which has dependency on another node we might too optimistic estimate code of shuffle operation by assuming that it is depending on a single tree node with TargetTransformInfo::SK_Select.

For example such node dependence ended up with this snippet:

%9 = fsub fast <2 x float> %8, %3
%10 = fadd fast <2 x float> %8, %3
%11 = shufflevector <2 x float> %9, <2 x float> %10, <2 x i32> <i32 0, i32 3>

Which ended up in suboptimal result in the end.

Diff Detail

Event Timeline

dtemirbulatov created this revision.Oct 21 2022, 10:02 AM

Herald added a project: Restricted Project. · View Herald TranscriptOct 21 2022, 10:02 AM

Herald added subscribers: vporpo, hiraditya. · View Herald Transcript

dtemirbulatov requested review of this revision.Oct 21 2022, 10:02 AM

Herald added a project: Restricted Project. · View Herald TranscriptOct 21 2022, 10:02 AM

Herald added a subscriber: • pcwang-thead. · View Herald Transcript

dtemirbulatov added a reviewer: benmxwl-arm.Oct 21 2022, 10:04 AM

ABataev added inline comments.Oct 21 2022, 10:08 AM

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
6799–6811	Can you try to reuse buildShuffleEntryMask instead?

Harbormaster completed remote builds in B193569: Diff 469564.Oct 21 2022, 10:33 AM

dtemirbulatov abandoned this revision.Oct 31 2022, 2:52 AM

The testcase that is associated with the change is not relevant, but I think the proposed change is improving cost calculation. I going to reopen this once I have appropriate testcase.

Revision Contents

Path

Size

llvm/

lib/

Transforms/

Vectorize/

SLPVectorizer.cpp

15 lines

test/

Transforms/

SLPVectorizer/

AArch64/

depend-node-shuffle.ll

44 lines

slp-fma-loss.ll

159 lines

transpose-inseltpoison.ll

75 lines

transpose.ll

75 lines

X86/

alternate-fp-inseltpoison.ll

36 lines

alternate-fp.ll

60 lines

alternate-int-inseltpoison.ll

6 lines

alternate-int.ll

6 lines

Diff 469564

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 6,790 Lines • ▼ Show 20 Lines	auto GetVectorCost = [&](InstructionCost) {
auto *Src0Ty = FixedVectorType::get(Src0SclTy, VL.size());		auto *Src0Ty = FixedVectorType::get(Src0SclTy, VL.size());
auto *Src1Ty = FixedVectorType::get(Src1SclTy, VL.size());		auto *Src1Ty = FixedVectorType::get(Src1SclTy, VL.size());
VecCost = TTI->getCastInstrCost(E->getOpcode(), VecTy, Src0Ty,		VecCost = TTI->getCastInstrCost(E->getOpcode(), VecTy, Src0Ty,
TTI::CastContextHint::None, CostKind);		TTI::CastContextHint::None, CostKind);
VecCost += TTI->getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty,		VecCost += TTI->getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty,
TTI::CastContextHint::None, CostKind);		TTI::CastContextHint::None, CostKind);
}		}
if (E->ReuseShuffleIndices.empty()) {		if (E->ReuseShuffleIndices.empty()) {
VecCost +=		TargetTransformInfo::ShuffleKind ShuffleKind =
TTI->getShuffleCost(TargetTransformInfo::SK_Select, FinalVecTy);		TargetTransformInfo::SK_Select;
		const TreeEntry *TE = nullptr;
		for (unsigned I = 0, N = E->getNumOperands(); I < N; ++I) {
		const TreeEntry *Op = getVectorizedOperand(E, I);
		if (Op && !TE)
		TE = Op;
		if (Op && TE && (TE != Op)) {
		ShuffleKind = TargetTransformInfo::SK_PermuteTwoSrc;
		break;
		}
		}
		VecCost += TTI->getShuffleCost(ShuffleKind, FinalVecTy);
		ABataevUnsubmitted Not Done Reply Inline Actions Can you try to reuse buildShuffleEntryMask instead? ABataev: Can you try to reuse buildShuffleEntryMask instead?
} else {		} else {
SmallVector<int> Mask;		SmallVector<int> Mask;
buildShuffleEntryMask(		buildShuffleEntryMask(
E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices,		E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices,
[E](Instruction *I) {		[E](Instruction *I) {
assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");		assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
return I->getOpcode() == E->getAltOpcode();		return I->getOpcode() == E->getAltOpcode();
},		},
▲ Show 20 Lines • Show All 6,026 Lines • Show Last 20 Lines

llvm/test/Transforms/SLPVectorizer/AArch64/depend-node-shuffle.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
				; RUN: opt < %s -slp-vectorizer -S \| FileCheck %s

				target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
				target triple = "aarch64-none-linux-gnu"

				define void @foo([3 x { float, float }]* %a, [3 x { float, float }]* %c, float* %arrayidx16.imagp, float %arrayidx12.real, float %arrayidx12.imag) {
				; CHECK-LABEL: @foo(
				; CHECK-NEXT: entry:
				; CHECK-NEXT: [[ARRAYIDX12_REALP:%.]] = getelementptr inbounds [3 x { float, float }], [3 x { float, float }] [[A:%.*]], i64 0, i64 0, i32 0
				; CHECK-NEXT: [[ARRAYIDX12_IMAGP:%.]] = getelementptr inbounds [3 x { float, float }], [3 x { float, float }] [[A]], i64 0, i64 0, i32 1
				; CHECK-NEXT: [[ARRAYIDX5_REALP:%.]] = getelementptr inbounds [3 x { float, float }], [3 x { float, float }] [[C:%.*]], i64 0, i64 0, i32 0
				; CHECK-NEXT: [[ARRAYIDX5_IMAGP:%.]] = getelementptr inbounds [3 x { float, float }], [3 x { float, float }] [[C]], i64 0, i64 0, i32 1
				; CHECK-NEXT: [[ARRAYIDX12_REAL1:%.]] = load float, float [[ARRAYIDX12_REALP]], align 4
				; CHECK-NEXT: [[ARRAYIDX16_IMAG:%.]] = load float, float [[ARRAYIDX16_IMAGP:%.*]], align 4
				; CHECK-NEXT: [[MUL_AD:%.]] = fmul fast float [[ARRAYIDX12_IMAG:%.]], [[ARRAYIDX12_REAL:%.*]]
				; CHECK-NEXT: [[ARRAYIDX12_IMAG3:%.]] = load float, float [[ARRAYIDX12_IMAGP]], align 4
				; CHECK-NEXT: [[MUL_BC:%.*]] = fmul fast float [[ARRAYIDX16_IMAG]], [[ARRAYIDX12_IMAG3]]
				; CHECK-NEXT: [[MUL_I:%.*]] = fadd fast float [[MUL_BC]], [[MUL_AD]]
				; CHECK-NEXT: [[MUL_AC:%.*]] = fmul fast float [[ARRAYIDX16_IMAG]], [[ARRAYIDX12_REAL1]]
				; CHECK-NEXT: [[TMP0:%.*]] = fmul fast float [[ARRAYIDX12_REAL]], [[ARRAYIDX12_REAL]]
				; CHECK-NEXT: [[MUL_R:%.*]] = fsub fast float [[MUL_AC]], [[TMP0]]
				; CHECK-NEXT: store float [[MUL_R]], float* [[ARRAYIDX5_REALP]], align 4
				; CHECK-NEXT: store float [[MUL_I]], float* [[ARRAYIDX5_IMAGP]], align 4
				; CHECK-NEXT: ret void
				;
				entry:
				%arrayidx12.realp = getelementptr inbounds [3 x { float, float }], [3 x { float, float }]* %a, i64 0, i64 0, i32 0
				%arrayidx12.imagp = getelementptr inbounds [3 x { float, float }], [3 x { float, float }]* %a, i64 0, i64 0, i32 1
				%arrayidx5.realp = getelementptr inbounds [3 x { float, float }], [3 x { float, float }]* %c, i64 0, i64 0, i32 0
				%arrayidx5.imagp = getelementptr inbounds [3 x { float, float }], [3 x { float, float }]* %c, i64 0, i64 0, i32 1
				%arrayidx12.real1 = load float, float* %arrayidx12.realp, align 4
				%arrayidx16.imag = load float, float* %arrayidx16.imagp, align 4
				%mul_ad = fmul fast float %arrayidx12.imag, %arrayidx12.real
				%arrayidx12.imag3 = load float, float* %arrayidx12.imagp, align 4
				%mul_bc = fmul fast float %arrayidx16.imag, %arrayidx12.imag3
				%mul_i = fadd fast float %mul_bc, %mul_ad
				%mul_ac = fmul fast float %arrayidx16.imag, %arrayidx12.real1
				%0 = fmul fast float %arrayidx12.real, %arrayidx12.real
				%mul_r = fsub fast float %mul_ac, %0
				store float %mul_r, float* %arrayidx5.realp, align 4
				store float %mul_i, float* %arrayidx5.imagp, align 4
				ret void
				}

llvm/test/Transforms/SLPVectorizer/AArch64/slp-fma-loss.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py		; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=slp-vectorizer -mtriple=arm64-apple-ios -S %s \| FileCheck %s		; RUN: opt -passes=slp-vectorizer -mtriple=arm64-apple-ios -S %s \| FileCheck %s

; Test case where not vectorizing is more profitable because multiple		; Test case where not vectorizing is more profitable because multiple
; fmul/{fadd,fsub} pairs can be lowered to fma instructions.		; fmul/{fadd,fsub} pairs can be lowered to fma instructions.
define void @slp_not_profitable_with_fast_fmf(ptr %A, ptr %B) {		define void @slp_not_profitable_with_fast_fmf(ptr %A, ptr %B) {
; CHECK-LABEL: @slp_not_profitable_with_fast_fmf(		; CHECK-LABEL: @slp_not_profitable_with_fast_fmf(
; CHECK-NEXT: [[GEP_B_1:%.]] = getelementptr inbounds float, ptr [[B:%.]], i64 1		; CHECK-NEXT: [[GEP_B_1:%.]] = getelementptr inbounds float, ptr [[B:%.]], i64 1
; CHECK-NEXT: [[A_0:%.]] = load float, ptr [[A:%.]], align 4		; CHECK-NEXT: [[A_0:%.]] = load float, ptr [[A:%.]], align 4
		; CHECK-NEXT: [[B_1:%.*]] = load float, ptr [[GEP_B_1]], align 4
		; CHECK-NEXT: [[MUL_0:%.*]] = fmul fast float [[B_1]], [[A_0]]
; CHECK-NEXT: [[B_0:%.*]] = load float, ptr [[B]], align 4		; CHECK-NEXT: [[B_0:%.*]] = load float, ptr [[B]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP_B_1]], align 4		; CHECK-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 2
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[B_0]], i32 0		; CHECK-NEXT: [[B_2:%.*]] = load float, ptr [[GEP_B_2]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[B_0]], i32 1		; CHECK-NEXT: [[MUL_1:%.*]] = fmul fast float [[B_2]], [[B_0]]
; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x float> [[TMP3]], [[TMP1]]		; CHECK-NEXT: [[SUB:%.*]] = fsub fast float [[MUL_0]], [[MUL_1]]
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> <i32 1, i32 0>		; CHECK-NEXT: [[MUL_2:%.*]] = fmul fast float [[B_0]], [[B_1]]
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0		; CHECK-NEXT: [[MUL_3:%.*]] = fmul fast float [[B_2]], [[A_0]]
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[A_0]], i32 1		; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[MUL_3]], [[MUL_2]]
; CHECK-NEXT: [[TMP7:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP6]]		; CHECK-NEXT: store float [[SUB]], ptr [[A]], align 4
; CHECK-NEXT: [[TMP8:%.*]] = fsub fast <2 x float> [[TMP7]], [[SHUFFLE]]		; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 1
; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <2 x float> [[TMP7]], [[SHUFFLE]]		; CHECK-NEXT: store float [[ADD]], ptr [[GEP_A_1]], align 4
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP9]], <2 x i32> <i32 0, i32 3>		; CHECK-NEXT: store float [[B_2]], ptr [[B]], align 4
; CHECK-NEXT: store <2 x float> [[TMP10]], ptr [[A]], align 4
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
; CHECK-NEXT: store float [[TMP11]], ptr [[B]], align 4
; CHECK-NEXT: ret void		; CHECK-NEXT: ret void
;		;
%gep.B.1 = getelementptr inbounds float, ptr %B, i64 1		%gep.B.1 = getelementptr inbounds float, ptr %B, i64 1
%A.0 = load float, ptr %A, align 4		%A.0 = load float, ptr %A, align 4
%B.1 = load float, ptr %gep.B.1, align 4		%B.1 = load float, ptr %gep.B.1, align 4
%mul.0 = fmul fast float %B.1, %A.0		%mul.0 = fmul fast float %B.1, %A.0
%B.0 = load float, ptr %B, align 4		%B.0 = load float, ptr %B, align 4
%gep.B.2 = getelementptr inbounds float, ptr %B, i64 2		%gep.B.2 = getelementptr inbounds float, ptr %B, i64 2
Show All 9 Lines	;
store float %B.2, ptr %B, align 4		store float %B.2, ptr %B, align 4
ret void		ret void
}		}

define void @slp_not_profitable_with_reassoc_fmf(ptr %A, ptr %B) {		define void @slp_not_profitable_with_reassoc_fmf(ptr %A, ptr %B) {
; CHECK-LABEL: @slp_not_profitable_with_reassoc_fmf(		; CHECK-LABEL: @slp_not_profitable_with_reassoc_fmf(
; CHECK-NEXT: [[GEP_B_1:%.]] = getelementptr inbounds float, ptr [[B:%.]], i64 1		; CHECK-NEXT: [[GEP_B_1:%.]] = getelementptr inbounds float, ptr [[B:%.]], i64 1
; CHECK-NEXT: [[A_0:%.]] = load float, ptr [[A:%.]], align 4		; CHECK-NEXT: [[A_0:%.]] = load float, ptr [[A:%.]], align 4
		; CHECK-NEXT: [[B_1:%.*]] = load float, ptr [[GEP_B_1]], align 4
		; CHECK-NEXT: [[MUL_0:%.*]] = fmul reassoc float [[B_1]], [[A_0]]
; CHECK-NEXT: [[B_0:%.*]] = load float, ptr [[B]], align 4		; CHECK-NEXT: [[B_0:%.*]] = load float, ptr [[B]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP_B_1]], align 4		; CHECK-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 2
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[B_0]], i32 0		; CHECK-NEXT: [[B_2:%.*]] = load float, ptr [[GEP_B_2]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[B_0]], i32 1		; CHECK-NEXT: [[MUL_1:%.*]] = fmul float [[B_2]], [[B_0]]
; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP3]], [[TMP1]]		; CHECK-NEXT: [[SUB:%.*]] = fsub reassoc float [[MUL_0]], [[MUL_1]]
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> <i32 1, i32 0>		; CHECK-NEXT: [[MUL_2:%.*]] = fmul float [[B_0]], [[B_1]]
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0		; CHECK-NEXT: [[MUL_3:%.*]] = fmul reassoc float [[B_2]], [[A_0]]
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[A_0]], i32 1		; CHECK-NEXT: [[ADD:%.*]] = fadd reassoc float [[MUL_3]], [[MUL_2]]
; CHECK-NEXT: [[TMP7:%.*]] = fmul reassoc <2 x float> [[TMP1]], [[TMP6]]		; CHECK-NEXT: store float [[SUB]], ptr [[A]], align 4
; CHECK-NEXT: [[TMP8:%.*]] = fsub reassoc <2 x float> [[TMP7]], [[SHUFFLE]]		; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 1
; CHECK-NEXT: [[TMP9:%.*]] = fadd reassoc <2 x float> [[TMP7]], [[SHUFFLE]]		; CHECK-NEXT: store float [[ADD]], ptr [[GEP_A_1]], align 4
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP9]], <2 x i32> <i32 0, i32 3>		; CHECK-NEXT: store float [[B_2]], ptr [[B]], align 4
; CHECK-NEXT: store <2 x float> [[TMP10]], ptr [[A]], align 4
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
; CHECK-NEXT: store float [[TMP11]], ptr [[B]], align 4
; CHECK-NEXT: ret void		; CHECK-NEXT: ret void
;		;
%gep.B.1 = getelementptr inbounds float, ptr %B, i64 1		%gep.B.1 = getelementptr inbounds float, ptr %B, i64 1
%A.0 = load float, ptr %A, align 4		%A.0 = load float, ptr %A, align 4
%B.1 = load float, ptr %gep.B.1, align 4		%B.1 = load float, ptr %gep.B.1, align 4
%mul.0 = fmul reassoc float %B.1, %A.0		%mul.0 = fmul reassoc float %B.1, %A.0
%B.0 = load float, ptr %B, align 4		%B.0 = load float, ptr %B, align 4
%gep.B.2 = getelementptr inbounds float, ptr %B, i64 2		%gep.B.2 = getelementptr inbounds float, ptr %B, i64 2
Show All 10 Lines	;
ret void		ret void
}		}

; FMA cannot be used due to missing fast-math flags, so SLP should kick in.		; FMA cannot be used due to missing fast-math flags, so SLP should kick in.
define void @slp_profitable_missing_fmf_on_fadd_fsub(ptr %A, ptr %B) {		define void @slp_profitable_missing_fmf_on_fadd_fsub(ptr %A, ptr %B) {
; CHECK-LABEL: @slp_profitable_missing_fmf_on_fadd_fsub(		; CHECK-LABEL: @slp_profitable_missing_fmf_on_fadd_fsub(
; CHECK-NEXT: [[GEP_B_1:%.]] = getelementptr inbounds float, ptr [[B:%.]], i64 1		; CHECK-NEXT: [[GEP_B_1:%.]] = getelementptr inbounds float, ptr [[B:%.]], i64 1
; CHECK-NEXT: [[A_0:%.]] = load float, ptr [[A:%.]], align 4		; CHECK-NEXT: [[A_0:%.]] = load float, ptr [[A:%.]], align 4
		; CHECK-NEXT: [[B_1:%.*]] = load float, ptr [[GEP_B_1]], align 4
		; CHECK-NEXT: [[MUL_0:%.*]] = fmul fast float [[B_1]], [[A_0]]
; CHECK-NEXT: [[B_0:%.*]] = load float, ptr [[B]], align 4		; CHECK-NEXT: [[B_0:%.*]] = load float, ptr [[B]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP_B_1]], align 4		; CHECK-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 2
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[B_0]], i32 0		; CHECK-NEXT: [[B_2:%.*]] = load float, ptr [[GEP_B_2]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[B_0]], i32 1		; CHECK-NEXT: [[MUL_1:%.*]] = fmul fast float [[B_2]], [[B_0]]
; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x float> [[TMP3]], [[TMP1]]		; CHECK-NEXT: [[SUB:%.*]] = fsub float [[MUL_0]], [[MUL_1]]
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> <i32 1, i32 0>		; CHECK-NEXT: [[MUL_2:%.*]] = fmul fast float [[B_0]], [[B_1]]
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0		; CHECK-NEXT: [[MUL_3:%.*]] = fmul fast float [[B_2]], [[A_0]]
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[A_0]], i32 1		; CHECK-NEXT: [[ADD:%.*]] = fadd float [[MUL_3]], [[MUL_2]]
; CHECK-NEXT: [[TMP7:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP6]]		; CHECK-NEXT: store float [[SUB]], ptr [[A]], align 4
; CHECK-NEXT: [[TMP8:%.*]] = fsub <2 x float> [[TMP7]], [[SHUFFLE]]		; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 1
; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x float> [[TMP7]], [[SHUFFLE]]		; CHECK-NEXT: store float [[ADD]], ptr [[GEP_A_1]], align 4
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP9]], <2 x i32> <i32 0, i32 3>		; CHECK-NEXT: store float [[B_2]], ptr [[B]], align 4
; CHECK-NEXT: store <2 x float> [[TMP10]], ptr [[A]], align 4
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
; CHECK-NEXT: store float [[TMP11]], ptr [[B]], align 4
; CHECK-NEXT: ret void		; CHECK-NEXT: ret void
;		;
%gep.B.1 = getelementptr inbounds float, ptr %B, i64 1		%gep.B.1 = getelementptr inbounds float, ptr %B, i64 1
%A.0 = load float, ptr %A, align 4		%A.0 = load float, ptr %A, align 4
%B.1 = load float, ptr %gep.B.1, align 4		%B.1 = load float, ptr %gep.B.1, align 4
%mul.0 = fmul fast float %B.1, %A.0		%mul.0 = fmul fast float %B.1, %A.0
%B.0 = load float, ptr %B, align 4		%B.0 = load float, ptr %B, align 4
%gep.B.2 = getelementptr inbounds float, ptr %B, i64 2		%gep.B.2 = getelementptr inbounds float, ptr %B, i64 2
Show All 10 Lines	;
ret void		ret void
}		}

; FMA cannot be used due to missing fast-math flags, so SLP should kick in.		; FMA cannot be used due to missing fast-math flags, so SLP should kick in.
define void @slp_profitable_missing_fmf_on_fmul_fadd_fsub(ptr %A, ptr %B) {		define void @slp_profitable_missing_fmf_on_fmul_fadd_fsub(ptr %A, ptr %B) {
; CHECK-LABEL: @slp_profitable_missing_fmf_on_fmul_fadd_fsub(		; CHECK-LABEL: @slp_profitable_missing_fmf_on_fmul_fadd_fsub(
; CHECK-NEXT: [[GEP_B_1:%.]] = getelementptr inbounds float, ptr [[B:%.]], i64 1		; CHECK-NEXT: [[GEP_B_1:%.]] = getelementptr inbounds float, ptr [[B:%.]], i64 1
; CHECK-NEXT: [[A_0:%.]] = load float, ptr [[A:%.]], align 4		; CHECK-NEXT: [[A_0:%.]] = load float, ptr [[A:%.]], align 4
		; CHECK-NEXT: [[B_1:%.*]] = load float, ptr [[GEP_B_1]], align 4
		; CHECK-NEXT: [[MUL_0:%.*]] = fmul float [[B_1]], [[A_0]]
; CHECK-NEXT: [[B_0:%.*]] = load float, ptr [[B]], align 4		; CHECK-NEXT: [[B_0:%.*]] = load float, ptr [[B]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP_B_1]], align 4		; CHECK-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 2
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[B_0]], i32 0		; CHECK-NEXT: [[B_2:%.*]] = load float, ptr [[GEP_B_2]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[B_0]], i32 1		; CHECK-NEXT: [[MUL_1:%.*]] = fmul float [[B_2]], [[B_0]]
; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP3]], [[TMP1]]		; CHECK-NEXT: [[SUB:%.*]] = fsub float [[MUL_0]], [[MUL_1]]
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> <i32 1, i32 0>		; CHECK-NEXT: [[MUL_2:%.*]] = fmul float [[B_0]], [[B_1]]
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0		; CHECK-NEXT: [[MUL_3:%.*]] = fmul float [[B_2]], [[A_0]]
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[A_0]], i32 1		; CHECK-NEXT: [[ADD:%.*]] = fadd float [[MUL_3]], [[MUL_2]]
; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x float> [[TMP1]], [[TMP6]]		; CHECK-NEXT: store float [[SUB]], ptr [[A]], align 4
; CHECK-NEXT: [[TMP8:%.*]] = fsub <2 x float> [[TMP7]], [[SHUFFLE]]		; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 1
; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x float> [[TMP7]], [[SHUFFLE]]		; CHECK-NEXT: store float [[ADD]], ptr [[GEP_A_1]], align 4
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP9]], <2 x i32> <i32 0, i32 3>		; CHECK-NEXT: store float [[B_2]], ptr [[B]], align 4
; CHECK-NEXT: store <2 x float> [[TMP10]], ptr [[A]], align 4
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
; CHECK-NEXT: store float [[TMP11]], ptr [[B]], align 4
; CHECK-NEXT: ret void		; CHECK-NEXT: ret void
;		;
%gep.B.1 = getelementptr inbounds float, ptr %B, i64 1		%gep.B.1 = getelementptr inbounds float, ptr %B, i64 1
%A.0 = load float, ptr %A, align 4		%A.0 = load float, ptr %A, align 4
%B.1 = load float, ptr %gep.B.1, align 4		%B.1 = load float, ptr %gep.B.1, align 4
%mul.0 = fmul float %B.1, %A.0		%mul.0 = fmul float %B.1, %A.0
%B.0 = load float, ptr %B, align 4		%B.0 = load float, ptr %B, align 4
%gep.B.2 = getelementptr inbounds float, ptr %B, i64 2		%gep.B.2 = getelementptr inbounds float, ptr %B, i64 2
Show All 10 Lines	;
ret void		ret void
}		}

; FMA cannot be used due to missing fast-math flags, so SLP should kick in.		; FMA cannot be used due to missing fast-math flags, so SLP should kick in.
define void @slp_profitable_missing_fmf_nnans_only(ptr %A, ptr %B) {		define void @slp_profitable_missing_fmf_nnans_only(ptr %A, ptr %B) {
; CHECK-LABEL: @slp_profitable_missing_fmf_nnans_only(		; CHECK-LABEL: @slp_profitable_missing_fmf_nnans_only(
; CHECK-NEXT: [[GEP_B_1:%.]] = getelementptr inbounds float, ptr [[B:%.]], i64 1		; CHECK-NEXT: [[GEP_B_1:%.]] = getelementptr inbounds float, ptr [[B:%.]], i64 1
; CHECK-NEXT: [[A_0:%.]] = load float, ptr [[A:%.]], align 4		; CHECK-NEXT: [[A_0:%.]] = load float, ptr [[A:%.]], align 4
		; CHECK-NEXT: [[B_1:%.*]] = load float, ptr [[GEP_B_1]], align 4
		; CHECK-NEXT: [[MUL_0:%.*]] = fmul nnan float [[B_1]], [[A_0]]
; CHECK-NEXT: [[B_0:%.*]] = load float, ptr [[B]], align 4		; CHECK-NEXT: [[B_0:%.*]] = load float, ptr [[B]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP_B_1]], align 4		; CHECK-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 2
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[B_0]], i32 0		; CHECK-NEXT: [[B_2:%.*]] = load float, ptr [[GEP_B_2]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[B_0]], i32 1		; CHECK-NEXT: [[MUL_1:%.*]] = fmul nnan float [[B_2]], [[B_0]]
; CHECK-NEXT: [[TMP4:%.*]] = fmul nnan <2 x float> [[TMP3]], [[TMP1]]		; CHECK-NEXT: [[SUB:%.*]] = fsub nnan float [[MUL_0]], [[MUL_1]]
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> <i32 1, i32 0>		; CHECK-NEXT: [[MUL_2:%.*]] = fmul nnan float [[B_0]], [[B_1]]
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0		; CHECK-NEXT: [[MUL_3:%.*]] = fmul nnan float [[B_2]], [[A_0]]
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[A_0]], i32 1		; CHECK-NEXT: [[ADD:%.*]] = fadd nnan float [[MUL_3]], [[MUL_2]]
; CHECK-NEXT: [[TMP7:%.*]] = fmul nnan <2 x float> [[TMP1]], [[TMP6]]		; CHECK-NEXT: store float [[SUB]], ptr [[A]], align 4
; CHECK-NEXT: [[TMP8:%.*]] = fsub nnan <2 x float> [[TMP7]], [[SHUFFLE]]		; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 1
; CHECK-NEXT: [[TMP9:%.*]] = fadd nnan <2 x float> [[TMP7]], [[SHUFFLE]]		; CHECK-NEXT: store float [[ADD]], ptr [[GEP_A_1]], align 4
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP9]], <2 x i32> <i32 0, i32 3>		; CHECK-NEXT: store float [[B_2]], ptr [[B]], align 4
; CHECK-NEXT: store <2 x float> [[TMP10]], ptr [[A]], align 4
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
; CHECK-NEXT: store float [[TMP11]], ptr [[B]], align 4
; CHECK-NEXT: ret void		; CHECK-NEXT: ret void
;		;
%gep.B.1 = getelementptr inbounds float, ptr %B, i64 1		%gep.B.1 = getelementptr inbounds float, ptr %B, i64 1
%A.0 = load float, ptr %A, align 4		%A.0 = load float, ptr %A, align 4
%B.1 = load float, ptr %gep.B.1, align 4		%B.1 = load float, ptr %gep.B.1, align 4
%mul.0 = fmul nnan float %B.1, %A.0		%mul.0 = fmul nnan float %B.1, %A.0
%B.0 = load float, ptr %B, align 4		%B.0 = load float, ptr %B, align 4
%gep.B.2 = getelementptr inbounds float, ptr %B, i64 2		%gep.B.2 = getelementptr inbounds float, ptr %B, i64 2
▲ Show 20 Lines • Show All 63 Lines • ▼ Show 20 Lines

exit:		exit:
ret float %red.next		ret float %red.next
}		}

define void @slp_profitable(ptr %A, ptr %B, float %0) {		define void @slp_profitable(ptr %A, ptr %B, float %0) {
; CHECK-LABEL: @slp_profitable(		; CHECK-LABEL: @slp_profitable(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
		; CHECK-NEXT: [[GEP_A_1:%.]] = getelementptr inbounds float, ptr [[A:%.]], i64 1
; CHECK-NEXT: [[SUB_I1096:%.]] = fsub fast float 1.000000e+00, [[TMP0:%.]]		; CHECK-NEXT: [[SUB_I1096:%.]] = fsub fast float 1.000000e+00, [[TMP0:%.]]
; CHECK-NEXT: [[TMP1:%.]] = load <2 x float>, ptr [[A:%.]], align 4		; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[A]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i32 0		; CHECK-NEXT: [[MUL_I1100:%.*]] = fmul fast float [[TMP1]], [[SUB_I1096]]
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP0]], i32 1		; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[GEP_A_1]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP3]]		; CHECK-NEXT: [[MUL7_I1101:%.*]] = fmul fast float [[TMP2]], [[TMP0]]
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> <i32 1, i32 0>		; CHECK-NEXT: [[ADD_I1102:%.*]] = fadd fast float [[MUL7_I1101]], [[MUL_I1100]]
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[SUB_I1096]], i32 0		; CHECK-NEXT: [[MUL14_I:%.*]] = fmul fast float [[TMP1]], [[TMP0]]
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[SUB_I1096]], i32 1		; CHECK-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP2]], [[SUB_I1096]]
; CHECK-NEXT: [[TMP7:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP6]]		; CHECK-NEXT: [[ADD15_I:%.*]] = fsub fast float [[MUL14_I]], [[TMP3]]
; CHECK-NEXT: [[TMP8:%.*]] = fadd fast <2 x float> [[SHUFFLE]], [[TMP7]]		; CHECK-NEXT: store float [[ADD_I1102]], ptr [[B:%.*]], align 4
; CHECK-NEXT: [[TMP9:%.*]] = fsub fast <2 x float> [[SHUFFLE]], [[TMP7]]		; CHECK-NEXT: [[GEP_B_1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 1
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP9]], <2 x i32> <i32 0, i32 3>		; CHECK-NEXT: store float [[ADD15_I]], ptr [[GEP_B_1]], align 4
; CHECK-NEXT: store <2 x float> [[TMP10]], ptr [[B:%.*]], align 4
; CHECK-NEXT: ret void		; CHECK-NEXT: ret void
;		;
entry:		entry:
%gep.A.1 = getelementptr inbounds float, ptr %A, i64 1		%gep.A.1 = getelementptr inbounds float, ptr %A, i64 1
%sub.i1096 = fsub fast float 1.000000e+00, %0		%sub.i1096 = fsub fast float 1.000000e+00, %0
%1 = load float, ptr %A, align 4		%1 = load float, ptr %A, align 4
%mul.i1100 = fmul fast float %1, %sub.i1096		%mul.i1100 = fmul fast float %1, %sub.i1096
%2 = load float, ptr %gep.A.1, align 4		%2 = load float, ptr %gep.A.1, align 4
Show All 10 Lines

llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py		; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -slp-vectorizer -instcombine -S \| FileCheck %s		; RUN: opt < %s -slp-vectorizer -instcombine -S \| FileCheck %s

target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"		target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-gnu"		target triple = "aarch64--linux-gnu"

define <2 x i64> @build_vec_v2i64(<2 x i64> %v0, <2 x i64> %v1) {		define <2 x i64> @build_vec_v2i64(<2 x i64> %v0, <2 x i64> %v1) {
; CHECK-LABEL: @build_vec_v2i64(		; CHECK-LABEL: @build_vec_v2i64(
; CHECK-NEXT: [[TMP1:%.]] = add <2 x i64> [[V0:%.]], [[V1:%.*]]		; CHECK-NEXT: [[V0_0:%.]] = extractelement <2 x i64> [[V0:%.]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = sub <2 x i64> [[V0]], [[V1]]		; CHECK-NEXT: [[V0_1:%.*]] = extractelement <2 x i64> [[V0]], i64 1
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP2]], <2 x i32> <i32 1, i32 2>		; CHECK-NEXT: [[V1_0:%.]] = extractelement <2 x i64> [[V1:%.]], i64 0
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP2]], <2 x i32> <i32 0, i32 3>		; CHECK-NEXT: [[V1_1:%.*]] = extractelement <2 x i64> [[V1]], i64 1
; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP4]], [[TMP3]]		; CHECK-NEXT: [[TMP0_0:%.*]] = add i64 [[V0_0]], [[V1_0]]
; CHECK-NEXT: ret <2 x i64> [[TMP5]]		; CHECK-NEXT: [[TMP0_1:%.*]] = add i64 [[V0_1]], [[V1_1]]
		; CHECK-NEXT: [[TMP1_0:%.*]] = sub i64 [[V0_0]], [[V1_0]]
		; CHECK-NEXT: [[TMP1_1:%.*]] = sub i64 [[V0_1]], [[V1_1]]
		; CHECK-NEXT: [[TMP2_0:%.*]] = add i64 [[TMP0_0]], [[TMP0_1]]
		; CHECK-NEXT: [[TMP2_1:%.*]] = add i64 [[TMP1_0]], [[TMP1_1]]
		; CHECK-NEXT: [[TMP3_0:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2_0]], i64 0
		; CHECK-NEXT: [[TMP3_1:%.*]] = insertelement <2 x i64> [[TMP3_0]], i64 [[TMP2_1]], i64 1
		; CHECK-NEXT: ret <2 x i64> [[TMP3_1]]
;		;
%v0.0 = extractelement <2 x i64> %v0, i32 0		%v0.0 = extractelement <2 x i64> %v0, i32 0
%v0.1 = extractelement <2 x i64> %v0, i32 1		%v0.1 = extractelement <2 x i64> %v0, i32 1
%v1.0 = extractelement <2 x i64> %v1, i32 0		%v1.0 = extractelement <2 x i64> %v1, i32 0
%v1.1 = extractelement <2 x i64> %v1, i32 1		%v1.1 = extractelement <2 x i64> %v1, i32 1
%tmp0.0 = add i64 %v0.0, %v1.0		%tmp0.0 = add i64 %v0.0, %v1.0
%tmp0.1 = add i64 %v0.1, %v1.1		%tmp0.1 = add i64 %v0.1, %v1.1
%tmp1.0 = sub i64 %v0.0, %v1.0		%tmp1.0 = sub i64 %v0.0, %v1.0
%tmp1.1 = sub i64 %v0.1, %v1.1		%tmp1.1 = sub i64 %v0.1, %v1.1
%tmp2.0 = add i64 %tmp0.0, %tmp0.1		%tmp2.0 = add i64 %tmp0.0, %tmp0.1
%tmp2.1 = add i64 %tmp1.0, %tmp1.1		%tmp2.1 = add i64 %tmp1.0, %tmp1.1
%tmp3.0 = insertelement <2 x i64> poison, i64 %tmp2.0, i32 0		%tmp3.0 = insertelement <2 x i64> poison, i64 %tmp2.0, i32 0
%tmp3.1 = insertelement <2 x i64> %tmp3.0, i64 %tmp2.1, i32 1		%tmp3.1 = insertelement <2 x i64> %tmp3.0, i64 %tmp2.1, i32 1
ret <2 x i64> %tmp3.1		ret <2 x i64> %tmp3.1
}		}

define void @store_chain_v2i64(i64* %a, i64* %b, i64* %c) {		define void @store_chain_v2i64(i64* %a, i64* %b, i64* %c) {
; CHECK-LABEL: @store_chain_v2i64(		; CHECK-LABEL: @store_chain_v2i64(
; CHECK-NEXT: [[TMP1:%.]] = bitcast i64 [[A:%.]] to <2 x i64>		; CHECK-NEXT: [[A_1:%.]] = getelementptr i64, i64 [[A:%.*]], i64 1
; CHECK-NEXT: [[TMP2:%.]] = load <2 x i64>, <2 x i64> [[TMP1]], align 8		; CHECK-NEXT: [[B_1:%.]] = getelementptr i64, i64 [[B:%.*]], i64 1
; CHECK-NEXT: [[TMP3:%.]] = bitcast i64 [[B:%.]] to <2 x i64>		; CHECK-NEXT: [[C_1:%.]] = getelementptr i64, i64 [[C:%.*]], i64 1
; CHECK-NEXT: [[TMP4:%.]] = load <2 x i64>, <2 x i64> [[TMP3]], align 8		; CHECK-NEXT: [[V0_0:%.]] = load i64, i64 [[A]], align 8
; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]		; CHECK-NEXT: [[V0_1:%.]] = load i64, i64 [[A_1]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = sub <2 x i64> [[TMP2]], [[TMP4]]		; CHECK-NEXT: [[V1_0:%.]] = load i64, i64 [[B]], align 8
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <2 x i32> <i32 1, i32 2>		; CHECK-NEXT: [[V1_1:%.]] = load i64, i64 [[B_1]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <2 x i32> <i32 0, i32 3>		; CHECK-NEXT: [[TMP0_0:%.*]] = add i64 [[V0_0]], [[V1_0]]
; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i64> [[TMP8]], [[TMP7]]		; CHECK-NEXT: [[TMP0_1:%.*]] = add i64 [[V0_1]], [[V1_1]]
; CHECK-NEXT: [[TMP10:%.]] = bitcast i64 [[C:%.]] to <2 x i64>		; CHECK-NEXT: [[TMP1_0:%.*]] = sub i64 [[V0_0]], [[V1_0]]
; CHECK-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[TMP10]], align 8		; CHECK-NEXT: [[TMP1_1:%.*]] = sub i64 [[V0_1]], [[V1_1]]
		; CHECK-NEXT: [[TMP2_0:%.*]] = add i64 [[TMP0_0]], [[TMP0_1]]
		; CHECK-NEXT: [[TMP2_1:%.*]] = add i64 [[TMP1_0]], [[TMP1_1]]
		; CHECK-NEXT: store i64 [[TMP2_0]], i64* [[C]], align 8
		; CHECK-NEXT: store i64 [[TMP2_1]], i64* [[C_1]], align 8
; CHECK-NEXT: ret void		; CHECK-NEXT: ret void
;		;
%a.0 = getelementptr i64, i64* %a, i64 0		%a.0 = getelementptr i64, i64* %a, i64 0
%a.1 = getelementptr i64, i64* %a, i64 1		%a.1 = getelementptr i64, i64* %a, i64 1
%b.0 = getelementptr i64, i64* %b, i64 0		%b.0 = getelementptr i64, i64* %b, i64 0
%b.1 = getelementptr i64, i64* %b, i64 1		%b.1 = getelementptr i64, i64* %b, i64 1
%c.0 = getelementptr i64, i64* %c, i64 0		%c.0 = getelementptr i64, i64* %c, i64 0
%c.1 = getelementptr i64, i64* %c, i64 1		%c.1 = getelementptr i64, i64* %c, i64 1
Show All 9 Lines	;
%tmp2.1 = add i64 %tmp1.0, %tmp1.1		%tmp2.1 = add i64 %tmp1.0, %tmp1.1
store i64 %tmp2.0, i64* %c.0, align 8		store i64 %tmp2.0, i64* %c.0, align 8
store i64 %tmp2.1, i64* %c.1, align 8		store i64 %tmp2.1, i64* %c.1, align 8
ret void		ret void
}		}

define <4 x i32> @build_vec_v4i32(<4 x i32> %v0, <4 x i32> %v1) {		define <4 x i32> @build_vec_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @build_vec_v4i32(		; CHECK-LABEL: @build_vec_v4i32(
; CHECK-NEXT: [[TMP1:%.]] = add <4 x i32> [[V0:%.]], [[V1:%.*]]		; CHECK-NEXT: [[TMP1:%.]] = shufflevector <4 x i32> [[V0:%.]], <4 x i32> undef, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[TMP2:%.*]] = sub <4 x i32> [[V0]], [[V1]]		; CHECK-NEXT: [[TMP2:%.]] = shufflevector <4 x i32> [[V1:%.]], <4 x i32> undef, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 3, i32 6>		; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 4, i32 2, i32 7>		; CHECK-NEXT: [[TMP4:%.*]] = sub <2 x i32> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[TMP3]]		; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: ret <4 x i32> [[TMP5]]		; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[V0]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
		; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[V1]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
		; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP6]], [[TMP7]]
		; CHECK-NEXT: [[TMP9:%.*]] = sub <2 x i32> [[TMP6]], [[TMP7]]
		; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], <2 x i32> <i32 0, i32 3>
		; CHECK-NEXT: [[TMP11:%.*]] = add <2 x i32> [[TMP10]], [[TMP5]]
		; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[V0]], <4 x i32> undef, <2 x i32> <i32 3, i32 2>
		; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[V1]], <4 x i32> undef, <2 x i32> <i32 3, i32 2>
		; CHECK-NEXT: [[TMP14:%.*]] = add <2 x i32> [[TMP12]], [[TMP13]]
		; CHECK-NEXT: [[TMP15:%.*]] = sub <2 x i32> [[TMP12]], [[TMP13]]
		; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], <2 x i32> <i32 0, i32 3>
		; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x i32> [[V0]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
		; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <4 x i32> [[V1]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
		; CHECK-NEXT: [[TMP19:%.*]] = add <2 x i32> [[TMP17]], [[TMP18]]
		; CHECK-NEXT: [[TMP20:%.*]] = sub <2 x i32> [[TMP17]], [[TMP18]]
		; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <2 x i32> [[TMP19]], <2 x i32> [[TMP20]], <2 x i32> <i32 0, i32 3>
		; CHECK-NEXT: [[TMP22:%.*]] = add <2 x i32> [[TMP21]], [[TMP16]]
		; CHECK-NEXT: [[TMP3_31:%.*]] = shufflevector <2 x i32> [[TMP11]], <2 x i32> [[TMP22]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
		; CHECK-NEXT: ret <4 x i32> [[TMP3_31]]
;		;
%v0.0 = extractelement <4 x i32> %v0, i32 0		%v0.0 = extractelement <4 x i32> %v0, i32 0
%v0.1 = extractelement <4 x i32> %v0, i32 1		%v0.1 = extractelement <4 x i32> %v0, i32 1
%v0.2 = extractelement <4 x i32> %v0, i32 2		%v0.2 = extractelement <4 x i32> %v0, i32 2
%v0.3 = extractelement <4 x i32> %v0, i32 3		%v0.3 = extractelement <4 x i32> %v0, i32 3
%v1.0 = extractelement <4 x i32> %v1, i32 0		%v1.0 = extractelement <4 x i32> %v1, i32 0
%v1.1 = extractelement <4 x i32> %v1, i32 1		%v1.1 = extractelement <4 x i32> %v1, i32 1
%v1.2 = extractelement <4 x i32> %v1, i32 2		%v1.2 = extractelement <4 x i32> %v1, i32 2
▲ Show 20 Lines • Show All 182 Lines • Show Last 20 Lines

llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py		; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -slp-vectorizer -instcombine -S \| FileCheck %s		; RUN: opt < %s -slp-vectorizer -instcombine -S \| FileCheck %s

target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"		target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-gnu"		target triple = "aarch64--linux-gnu"

define <2 x i64> @build_vec_v2i64(<2 x i64> %v0, <2 x i64> %v1) {		define <2 x i64> @build_vec_v2i64(<2 x i64> %v0, <2 x i64> %v1) {
; CHECK-LABEL: @build_vec_v2i64(		; CHECK-LABEL: @build_vec_v2i64(
; CHECK-NEXT: [[TMP1:%.]] = add <2 x i64> [[V0:%.]], [[V1:%.*]]		; CHECK-NEXT: [[V0_0:%.]] = extractelement <2 x i64> [[V0:%.]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = sub <2 x i64> [[V0]], [[V1]]		; CHECK-NEXT: [[V0_1:%.*]] = extractelement <2 x i64> [[V0]], i64 1
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP2]], <2 x i32> <i32 1, i32 2>		; CHECK-NEXT: [[V1_0:%.]] = extractelement <2 x i64> [[V1:%.]], i64 0
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP2]], <2 x i32> <i32 0, i32 3>		; CHECK-NEXT: [[V1_1:%.*]] = extractelement <2 x i64> [[V1]], i64 1
; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP4]], [[TMP3]]		; CHECK-NEXT: [[TMP0_0:%.*]] = add i64 [[V0_0]], [[V1_0]]
; CHECK-NEXT: ret <2 x i64> [[TMP5]]		; CHECK-NEXT: [[TMP0_1:%.*]] = add i64 [[V0_1]], [[V1_1]]
		; CHECK-NEXT: [[TMP1_0:%.*]] = sub i64 [[V0_0]], [[V1_0]]
		; CHECK-NEXT: [[TMP1_1:%.*]] = sub i64 [[V0_1]], [[V1_1]]
		; CHECK-NEXT: [[TMP2_0:%.*]] = add i64 [[TMP0_0]], [[TMP0_1]]
		; CHECK-NEXT: [[TMP2_1:%.*]] = add i64 [[TMP1_0]], [[TMP1_1]]
		; CHECK-NEXT: [[TMP3_0:%.*]] = insertelement <2 x i64> undef, i64 [[TMP2_0]], i64 0
		; CHECK-NEXT: [[TMP3_1:%.*]] = insertelement <2 x i64> [[TMP3_0]], i64 [[TMP2_1]], i64 1
		; CHECK-NEXT: ret <2 x i64> [[TMP3_1]]
;		;
%v0.0 = extractelement <2 x i64> %v0, i32 0		%v0.0 = extractelement <2 x i64> %v0, i32 0
%v0.1 = extractelement <2 x i64> %v0, i32 1		%v0.1 = extractelement <2 x i64> %v0, i32 1
%v1.0 = extractelement <2 x i64> %v1, i32 0		%v1.0 = extractelement <2 x i64> %v1, i32 0
%v1.1 = extractelement <2 x i64> %v1, i32 1		%v1.1 = extractelement <2 x i64> %v1, i32 1
%tmp0.0 = add i64 %v0.0, %v1.0		%tmp0.0 = add i64 %v0.0, %v1.0
%tmp0.1 = add i64 %v0.1, %v1.1		%tmp0.1 = add i64 %v0.1, %v1.1
%tmp1.0 = sub i64 %v0.0, %v1.0		%tmp1.0 = sub i64 %v0.0, %v1.0
%tmp1.1 = sub i64 %v0.1, %v1.1		%tmp1.1 = sub i64 %v0.1, %v1.1
%tmp2.0 = add i64 %tmp0.0, %tmp0.1		%tmp2.0 = add i64 %tmp0.0, %tmp0.1
%tmp2.1 = add i64 %tmp1.0, %tmp1.1		%tmp2.1 = add i64 %tmp1.0, %tmp1.1
%tmp3.0 = insertelement <2 x i64> undef, i64 %tmp2.0, i32 0		%tmp3.0 = insertelement <2 x i64> undef, i64 %tmp2.0, i32 0
%tmp3.1 = insertelement <2 x i64> %tmp3.0, i64 %tmp2.1, i32 1		%tmp3.1 = insertelement <2 x i64> %tmp3.0, i64 %tmp2.1, i32 1
ret <2 x i64> %tmp3.1		ret <2 x i64> %tmp3.1
}		}

define void @store_chain_v2i64(i64* %a, i64* %b, i64* %c) {		define void @store_chain_v2i64(i64* %a, i64* %b, i64* %c) {
; CHECK-LABEL: @store_chain_v2i64(		; CHECK-LABEL: @store_chain_v2i64(
; CHECK-NEXT: [[TMP1:%.]] = bitcast i64 [[A:%.]] to <2 x i64>		; CHECK-NEXT: [[A_1:%.]] = getelementptr i64, i64 [[A:%.*]], i64 1
; CHECK-NEXT: [[TMP2:%.]] = load <2 x i64>, <2 x i64> [[TMP1]], align 8		; CHECK-NEXT: [[B_1:%.]] = getelementptr i64, i64 [[B:%.*]], i64 1
; CHECK-NEXT: [[TMP3:%.]] = bitcast i64 [[B:%.]] to <2 x i64>		; CHECK-NEXT: [[C_1:%.]] = getelementptr i64, i64 [[C:%.*]], i64 1
; CHECK-NEXT: [[TMP4:%.]] = load <2 x i64>, <2 x i64> [[TMP3]], align 8		; CHECK-NEXT: [[V0_0:%.]] = load i64, i64 [[A]], align 8
; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]		; CHECK-NEXT: [[V0_1:%.]] = load i64, i64 [[A_1]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = sub <2 x i64> [[TMP2]], [[TMP4]]		; CHECK-NEXT: [[V1_0:%.]] = load i64, i64 [[B]], align 8
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <2 x i32> <i32 1, i32 2>		; CHECK-NEXT: [[V1_1:%.]] = load i64, i64 [[B_1]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <2 x i32> <i32 0, i32 3>		; CHECK-NEXT: [[TMP0_0:%.*]] = add i64 [[V0_0]], [[V1_0]]
; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i64> [[TMP8]], [[TMP7]]		; CHECK-NEXT: [[TMP0_1:%.*]] = add i64 [[V0_1]], [[V1_1]]
; CHECK-NEXT: [[TMP10:%.]] = bitcast i64 [[C:%.]] to <2 x i64>		; CHECK-NEXT: [[TMP1_0:%.*]] = sub i64 [[V0_0]], [[V1_0]]
; CHECK-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[TMP10]], align 8		; CHECK-NEXT: [[TMP1_1:%.*]] = sub i64 [[V0_1]], [[V1_1]]
		; CHECK-NEXT: [[TMP2_0:%.*]] = add i64 [[TMP0_0]], [[TMP0_1]]
		; CHECK-NEXT: [[TMP2_1:%.*]] = add i64 [[TMP1_0]], [[TMP1_1]]
		; CHECK-NEXT: store i64 [[TMP2_0]], i64* [[C]], align 8
		; CHECK-NEXT: store i64 [[TMP2_1]], i64* [[C_1]], align 8
; CHECK-NEXT: ret void		; CHECK-NEXT: ret void
;		;
%a.0 = getelementptr i64, i64* %a, i64 0		%a.0 = getelementptr i64, i64* %a, i64 0
%a.1 = getelementptr i64, i64* %a, i64 1		%a.1 = getelementptr i64, i64* %a, i64 1
%b.0 = getelementptr i64, i64* %b, i64 0		%b.0 = getelementptr i64, i64* %b, i64 0
%b.1 = getelementptr i64, i64* %b, i64 1		%b.1 = getelementptr i64, i64* %b, i64 1
%c.0 = getelementptr i64, i64* %c, i64 0		%c.0 = getelementptr i64, i64* %c, i64 0
%c.1 = getelementptr i64, i64* %c, i64 1		%c.1 = getelementptr i64, i64* %c, i64 1
Show All 9 Lines	;
%tmp2.1 = add i64 %tmp1.0, %tmp1.1		%tmp2.1 = add i64 %tmp1.0, %tmp1.1
store i64 %tmp2.0, i64* %c.0, align 8		store i64 %tmp2.0, i64* %c.0, align 8
store i64 %tmp2.1, i64* %c.1, align 8		store i64 %tmp2.1, i64* %c.1, align 8
ret void		ret void
}		}

define <4 x i32> @build_vec_v4i32(<4 x i32> %v0, <4 x i32> %v1) {		define <4 x i32> @build_vec_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @build_vec_v4i32(		; CHECK-LABEL: @build_vec_v4i32(
; CHECK-NEXT: [[TMP1:%.]] = add <4 x i32> [[V0:%.]], [[V1:%.*]]		; CHECK-NEXT: [[TMP1:%.]] = shufflevector <4 x i32> [[V0:%.]], <4 x i32> undef, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[TMP2:%.*]] = sub <4 x i32> [[V0]], [[V1]]		; CHECK-NEXT: [[TMP2:%.]] = shufflevector <4 x i32> [[V1:%.]], <4 x i32> undef, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 3, i32 6>		; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 4, i32 2, i32 7>		; CHECK-NEXT: [[TMP4:%.*]] = sub <2 x i32> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[TMP3]]		; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: ret <4 x i32> [[TMP5]]		; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[V0]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
		; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[V1]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
		; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP6]], [[TMP7]]
		; CHECK-NEXT: [[TMP9:%.*]] = sub <2 x i32> [[TMP6]], [[TMP7]]
		; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], <2 x i32> <i32 0, i32 3>
		; CHECK-NEXT: [[TMP11:%.*]] = add <2 x i32> [[TMP10]], [[TMP5]]
		; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[V0]], <4 x i32> undef, <2 x i32> <i32 3, i32 2>
		; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[V1]], <4 x i32> undef, <2 x i32> <i32 3, i32 2>
		; CHECK-NEXT: [[TMP14:%.*]] = add <2 x i32> [[TMP12]], [[TMP13]]
		; CHECK-NEXT: [[TMP15:%.*]] = sub <2 x i32> [[TMP12]], [[TMP13]]
		; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], <2 x i32> <i32 0, i32 3>
		; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x i32> [[V0]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
		; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <4 x i32> [[V1]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
		; CHECK-NEXT: [[TMP19:%.*]] = add <2 x i32> [[TMP17]], [[TMP18]]
		; CHECK-NEXT: [[TMP20:%.*]] = sub <2 x i32> [[TMP17]], [[TMP18]]
		; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <2 x i32> [[TMP19]], <2 x i32> [[TMP20]], <2 x i32> <i32 0, i32 3>
		; CHECK-NEXT: [[TMP22:%.*]] = add <2 x i32> [[TMP21]], [[TMP16]]
		; CHECK-NEXT: [[TMP3_31:%.*]] = shufflevector <2 x i32> [[TMP11]], <2 x i32> [[TMP22]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
		; CHECK-NEXT: ret <4 x i32> [[TMP3_31]]
;		;
%v0.0 = extractelement <4 x i32> %v0, i32 0		%v0.0 = extractelement <4 x i32> %v0, i32 0
%v0.1 = extractelement <4 x i32> %v0, i32 1		%v0.1 = extractelement <4 x i32> %v0, i32 1
%v0.2 = extractelement <4 x i32> %v0, i32 2		%v0.2 = extractelement <4 x i32> %v0, i32 2
%v0.3 = extractelement <4 x i32> %v0, i32 3		%v0.3 = extractelement <4 x i32> %v0, i32 3
%v1.0 = extractelement <4 x i32> %v1, i32 0		%v1.0 = extractelement <4 x i32> %v1, i32 0
%v1.1 = extractelement <4 x i32> %v1, i32 1		%v1.1 = extractelement <4 x i32> %v1, i32 1
%v1.2 = extractelement <4 x i32> %v1, i32 2		%v1.2 = extractelement <4 x i32> %v1, i32 2
▲ Show 20 Lines • Show All 182 Lines • Show Last 20 Lines

llvm/test/Transforms/SLPVectorizer/X86/alternate-fp-inseltpoison.ll

Show First 20 Lines • Show All 43 Lines • ▼ Show 20 Lines	;
%r4 = insertelement <8 x float> %r3, float %ab4, i32 4		%r4 = insertelement <8 x float> %r3, float %ab4, i32 4
%r5 = insertelement <8 x float> %r4, float %ab5, i32 5		%r5 = insertelement <8 x float> %r4, float %ab5, i32 5
%r6 = insertelement <8 x float> %r5, float %ab6, i32 6		%r6 = insertelement <8 x float> %r5, float %ab6, i32 6
%r7 = insertelement <8 x float> %r6, float %ab7, i32 7		%r7 = insertelement <8 x float> %r6, float %ab7, i32 7
ret <8 x float> %r7		ret <8 x float> %r7
}		}

define <8 x float> @fmul_fdiv_v8f32(<8 x float> %a, <8 x float> %b) {		define <8 x float> @fmul_fdiv_v8f32(<8 x float> %a, <8 x float> %b) {
; CHECK-LABEL: @fmul_fdiv_v8f32(		; SSE-LABEL: @fmul_fdiv_v8f32(
; CHECK-NEXT: [[TMP1:%.]] = fmul <8 x float> [[A:%.]], [[B:%.*]]		; SSE-NEXT: [[TMP1:%.]] = fmul <8 x float> [[A:%.]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]]		; SSE-NEXT: [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]]
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>		; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
; CHECK-NEXT: ret <8 x float> [[TMP3]]		; SSE-NEXT: ret <8 x float> [[TMP3]]
		;
		; SLM-LABEL: @fmul_fdiv_v8f32(
		; SLM-NEXT: [[TMP1:%.]] = shufflevector <8 x float> [[A:%.]], <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
		; SLM-NEXT: [[TMP2:%.]] = shufflevector <8 x float> [[B:%.]], <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
		; SLM-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]]
		; SLM-NEXT: [[TMP4:%.*]] = fdiv <4 x float> [[TMP1]], [[TMP2]]
		; SLM-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
		; SLM-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
		; SLM-NEXT: [[TMP7:%.*]] = shufflevector <8 x float> [[B]], <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
		; SLM-NEXT: [[TMP8:%.*]] = fmul <4 x float> [[TMP6]], [[TMP7]]
		; SLM-NEXT: [[TMP9:%.*]] = fdiv <4 x float> [[TMP6]], [[TMP7]]
		; SLM-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> [[TMP9]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
		; SLM-NEXT: [[R71:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP10]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
		; SLM-NEXT: ret <8 x float> [[R71]]
		;
		; AVX-LABEL: @fmul_fdiv_v8f32(
		; AVX-NEXT: [[TMP1:%.]] = fmul <8 x float> [[A:%.]], [[B:%.*]]
		; AVX-NEXT: [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]]
		; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
		; AVX-NEXT: ret <8 x float> [[TMP3]]
		;
		; AVX512-LABEL: @fmul_fdiv_v8f32(
		; AVX512-NEXT: [[TMP1:%.]] = fmul <8 x float> [[A:%.]], [[B:%.*]]
		; AVX512-NEXT: [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]]
		; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
		; AVX512-NEXT: ret <8 x float> [[TMP3]]
;		;
%a0 = extractelement <8 x float> %a, i32 0		%a0 = extractelement <8 x float> %a, i32 0
%a1 = extractelement <8 x float> %a, i32 1		%a1 = extractelement <8 x float> %a, i32 1
%a2 = extractelement <8 x float> %a, i32 2		%a2 = extractelement <8 x float> %a, i32 2
%a3 = extractelement <8 x float> %a, i32 3		%a3 = extractelement <8 x float> %a, i32 3
%a4 = extractelement <8 x float> %a, i32 4		%a4 = extractelement <8 x float> %a, i32 4
%a5 = extractelement <8 x float> %a, i32 5		%a5 = extractelement <8 x float> %a, i32 5
%a6 = extractelement <8 x float> %a, i32 6		%a6 = extractelement <8 x float> %a, i32 6
▲ Show 20 Lines • Show All 66 Lines • Show Last 20 Lines

llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py		; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64-unknown -slp-vectorizer -instcombine -S \| FileCheck %s --check-prefix=CHECK --check-prefix=SSE		; RUN: opt < %s -mtriple=x86_64-unknown -slp-vectorizer -instcombine -S \| FileCheck %s --check-prefix=CHECK --check-prefix=SSE
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -slp-vectorizer -instcombine -S \| FileCheck %s --check-prefix=CHECK --check-prefix=SLM		; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -slp-vectorizer -instcombine -S \| FileCheck %s --check-prefix=CHECK --check-prefix=SLM
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -slp-vectorizer -instcombine -S \| FileCheck %s --check-prefix=CHECK --check-prefix=AVX		; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -slp-vectorizer -instcombine -S \| FileCheck %s --check-prefix=CHECK --check-prefix=AVX
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -slp-vectorizer -instcombine -S \| FileCheck %s --check-prefix=CHECK --check-prefix=AVX		; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -slp-vectorizer -instcombine -S \| FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -slp-vectorizer -instcombine -S \| FileCheck %s --check-prefix=CHECK --check-prefix=AVX512		; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -slp-vectorizer -instcombine -S \| FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -slp-vectorizer -instcombine -S \| FileCheck %s --check-prefix=CHECK --check-prefix=AVX512		; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -slp-vectorizer -instcombine -S \| FileCheck %s --check-prefix=CHECK --check-prefix=AVX512SKX

define <8 x float> @fadd_fsub_v8f32(<8 x float> %a, <8 x float> %b) {		define <8 x float> @fadd_fsub_v8f32(<8 x float> %a, <8 x float> %b) {
; CHECK-LABEL: @fadd_fsub_v8f32(		; CHECK-LABEL: @fadd_fsub_v8f32(
; CHECK-NEXT: [[TMP1:%.]] = fadd <8 x float> [[A:%.]], [[B:%.*]]		; CHECK-NEXT: [[TMP1:%.]] = fadd <8 x float> [[A:%.]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = fsub <8 x float> [[A]], [[B]]		; CHECK-NEXT: [[TMP2:%.*]] = fsub <8 x float> [[A]], [[B]]
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>		; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
; CHECK-NEXT: ret <8 x float> [[TMP3]]		; CHECK-NEXT: ret <8 x float> [[TMP3]]
;		;
Show All 28 Lines	;
%r4 = insertelement <8 x float> %r3, float %ab4, i32 4		%r4 = insertelement <8 x float> %r3, float %ab4, i32 4
%r5 = insertelement <8 x float> %r4, float %ab5, i32 5		%r5 = insertelement <8 x float> %r4, float %ab5, i32 5
%r6 = insertelement <8 x float> %r5, float %ab6, i32 6		%r6 = insertelement <8 x float> %r5, float %ab6, i32 6
%r7 = insertelement <8 x float> %r6, float %ab7, i32 7		%r7 = insertelement <8 x float> %r6, float %ab7, i32 7
ret <8 x float> %r7		ret <8 x float> %r7
}		}

define <8 x float> @fmul_fdiv_v8f32(<8 x float> %a, <8 x float> %b) {		define <8 x float> @fmul_fdiv_v8f32(<8 x float> %a, <8 x float> %b) {
; CHECK-LABEL: @fmul_fdiv_v8f32(		; SSE-LABEL: @fmul_fdiv_v8f32(
; CHECK-NEXT: [[TMP1:%.]] = fmul <8 x float> [[A:%.]], [[B:%.*]]		; SSE-NEXT: [[TMP1:%.]] = fmul <8 x float> [[A:%.]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]]		; SSE-NEXT: [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]]
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>		; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
; CHECK-NEXT: ret <8 x float> [[TMP3]]		; SSE-NEXT: ret <8 x float> [[TMP3]]
		;
		; SLM-LABEL: @fmul_fdiv_v8f32(
		; SLM-NEXT: [[TMP1:%.]] = shufflevector <8 x float> [[A:%.]], <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
		; SLM-NEXT: [[TMP2:%.]] = shufflevector <8 x float> [[B:%.]], <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
		; SLM-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]]
		; SLM-NEXT: [[TMP4:%.*]] = fdiv <4 x float> [[TMP1]], [[TMP2]]
		; SLM-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
		; SLM-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[A]], <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
		; SLM-NEXT: [[TMP7:%.*]] = shufflevector <8 x float> [[B]], <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
		; SLM-NEXT: [[TMP8:%.*]] = fmul <4 x float> [[TMP6]], [[TMP7]]
		; SLM-NEXT: [[TMP9:%.*]] = fdiv <4 x float> [[TMP6]], [[TMP7]]
		; SLM-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> [[TMP9]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
		; SLM-NEXT: [[R71:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP10]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
		; SLM-NEXT: ret <8 x float> [[R71]]
		;
		; AVX-LABEL: @fmul_fdiv_v8f32(
		; AVX-NEXT: [[TMP1:%.]] = fmul <8 x float> [[A:%.]], [[B:%.*]]
		; AVX-NEXT: [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]]
		; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
		; AVX-NEXT: ret <8 x float> [[TMP3]]
		;
		; AVX2-LABEL: @fmul_fdiv_v8f32(
		; AVX2-NEXT: [[TMP1:%.]] = fmul <8 x float> [[A:%.]], [[B:%.*]]
		; AVX2-NEXT: [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]]
		; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
		; AVX2-NEXT: ret <8 x float> [[TMP3]]
		;
		; AVX512-LABEL: @fmul_fdiv_v8f32(
		; AVX512-NEXT: [[TMP1:%.]] = fmul <8 x float> [[A:%.]], [[B:%.*]]
		; AVX512-NEXT: [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]]
		; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
		; AVX512-NEXT: ret <8 x float> [[TMP3]]
		;
		; AVX512SKX-LABEL: @fmul_fdiv_v8f32(
		; AVX512SKX-NEXT: [[TMP1:%.]] = fmul <8 x float> [[A:%.]], [[B:%.*]]
		; AVX512SKX-NEXT: [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]]
		; AVX512SKX-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 13, i32 14, i32 7>
		; AVX512SKX-NEXT: ret <8 x float> [[TMP3]]
;		;
%a0 = extractelement <8 x float> %a, i32 0		%a0 = extractelement <8 x float> %a, i32 0
%a1 = extractelement <8 x float> %a, i32 1		%a1 = extractelement <8 x float> %a, i32 1
%a2 = extractelement <8 x float> %a, i32 2		%a2 = extractelement <8 x float> %a, i32 2
%a3 = extractelement <8 x float> %a, i32 3		%a3 = extractelement <8 x float> %a, i32 3
%a4 = extractelement <8 x float> %a, i32 4		%a4 = extractelement <8 x float> %a, i32 4
%a5 = extractelement <8 x float> %a, i32 5		%a5 = extractelement <8 x float> %a, i32 5
%a6 = extractelement <8 x float> %a, i32 6		%a6 = extractelement <8 x float> %a, i32 6
Show All 40 Lines
; SLM-NEXT: [[R2:%.*]] = insertelement <4 x float> [[TMP3]], float [[A2]], i64 2		; SLM-NEXT: [[R2:%.*]] = insertelement <4 x float> [[TMP3]], float [[A2]], i64 2
; SLM-NEXT: [[R3:%.*]] = insertelement <4 x float> [[R2]], float [[AB3]], i64 3		; SLM-NEXT: [[R3:%.*]] = insertelement <4 x float> [[R2]], float [[AB3]], i64 3
; SLM-NEXT: ret <4 x float> [[R3]]		; SLM-NEXT: ret <4 x float> [[R3]]
;		;
; AVX-LABEL: @fmul_fdiv_v4f32_const(		; AVX-LABEL: @fmul_fdiv_v4f32_const(
; AVX-NEXT: [[TMP1:%.]] = fmul <4 x float> [[A:%.]], <float 2.000000e+00, float 1.000000e+00, float 1.000000e+00, float 2.000000e+00>		; AVX-NEXT: [[TMP1:%.]] = fmul <4 x float> [[A:%.]], <float 2.000000e+00, float 1.000000e+00, float 1.000000e+00, float 2.000000e+00>
; AVX-NEXT: ret <4 x float> [[TMP1]]		; AVX-NEXT: ret <4 x float> [[TMP1]]
;		;
		; AVX2-LABEL: @fmul_fdiv_v4f32_const(
		; AVX2-NEXT: [[TMP1:%.]] = fmul <4 x float> [[A:%.]], <float 2.000000e+00, float 1.000000e+00, float 1.000000e+00, float 2.000000e+00>
		; AVX2-NEXT: ret <4 x float> [[TMP1]]
		;
; AVX512-LABEL: @fmul_fdiv_v4f32_const(		; AVX512-LABEL: @fmul_fdiv_v4f32_const(
; AVX512-NEXT: [[TMP1:%.]] = fmul <4 x float> [[A:%.]], <float 2.000000e+00, float 1.000000e+00, float 1.000000e+00, float 2.000000e+00>		; AVX512-NEXT: [[TMP1:%.]] = fmul <4 x float> [[A:%.]], <float 2.000000e+00, float 1.000000e+00, float 1.000000e+00, float 2.000000e+00>
; AVX512-NEXT: ret <4 x float> [[TMP1]]		; AVX512-NEXT: ret <4 x float> [[TMP1]]
;		;
		; AVX512SKX-LABEL: @fmul_fdiv_v4f32_const(
		; AVX512SKX-NEXT: [[TMP1:%.]] = fmul <4 x float> [[A:%.]], <float 2.000000e+00, float 1.000000e+00, float 1.000000e+00, float 2.000000e+00>
		; AVX512SKX-NEXT: ret <4 x float> [[TMP1]]
		;
%a0 = extractelement <4 x float> %a, i32 0		%a0 = extractelement <4 x float> %a, i32 0
%a1 = extractelement <4 x float> %a, i32 1		%a1 = extractelement <4 x float> %a, i32 1
%a2 = extractelement <4 x float> %a, i32 2		%a2 = extractelement <4 x float> %a, i32 2
%a3 = extractelement <4 x float> %a, i32 3		%a3 = extractelement <4 x float> %a, i32 3
%ab0 = fmul float %a0, 2.0		%ab0 = fmul float %a0, 2.0
%ab1 = fmul float %a1, 1.0		%ab1 = fmul float %a1, 1.0
%ab2 = fdiv float %a2, 1.0		%ab2 = fdiv float %a2, 1.0
%ab3 = fdiv float %a3, 0.5		%ab3 = fdiv float %a3, 0.5
%r0 = insertelement <4 x float> undef, float %ab0, i32 0		%r0 = insertelement <4 x float> undef, float %ab0, i32 0
%r1 = insertelement <4 x float> %r0, float %ab1, i32 1		%r1 = insertelement <4 x float> %r0, float %ab1, i32 1
%r2 = insertelement <4 x float> %r1, float %ab2, i32 2		%r2 = insertelement <4 x float> %r1, float %ab2, i32 2
%r3 = insertelement <4 x float> %r2, float %ab3, i32 3		%r3 = insertelement <4 x float> %r2, float %ab3, i32 3
ret <4 x float> %r3		ret <4 x float> %r3
}		}

llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll

	Show First 20 Lines • Show All 106 Lines • ▼ Show 20 Lines
	; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>			; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
	; SSE-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>			; SSE-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
	; SSE-NEXT: [[R71:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>			; SSE-NEXT: [[R71:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
	; SSE-NEXT: ret <8 x i32> [[R71]]			; SSE-NEXT: ret <8 x i32> [[R71]]
	;			;
	; SLM-LABEL: @ashr_shl_v8i32(			; SLM-LABEL: @ashr_shl_v8i32(
	; SLM-NEXT: [[TMP1:%.]] = ashr <8 x i32> [[A:%.]], [[B:%.*]]			; SLM-NEXT: [[TMP1:%.]] = ashr <8 x i32> [[A:%.]], [[B:%.*]]
	; SLM-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[A]], [[B]]			; SLM-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[A]], [[B]]
	; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>			; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
	; SLM-NEXT: ret <8 x i32> [[TMP3]]			; SLM-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
				; SLM-NEXT: [[R71:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
				; SLM-NEXT: ret <8 x i32> [[R71]]
	;			;
	; AVX1-LABEL: @ashr_shl_v8i32(			; AVX1-LABEL: @ashr_shl_v8i32(
	; AVX1-NEXT: [[TMP1:%.]] = ashr <8 x i32> [[A:%.]], [[B:%.*]]			; AVX1-NEXT: [[TMP1:%.]] = ashr <8 x i32> [[A:%.]], [[B:%.*]]
	; AVX1-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[A]], [[B]]			; AVX1-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[A]], [[B]]
	; AVX1-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>			; AVX1-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
	; AVX1-NEXT: ret <8 x i32> [[TMP3]]			; AVX1-NEXT: ret <8 x i32> [[TMP3]]
	;			;
	; AVX2-LABEL: @ashr_shl_v8i32(			; AVX2-LABEL: @ashr_shl_v8i32(
	▲ Show 20 Lines • Show All 413 Lines • Show Last 20 Lines

llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll

	Show First 20 Lines • Show All 106 Lines • ▼ Show 20 Lines
	; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>			; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
	; SSE-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>			; SSE-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
	; SSE-NEXT: [[R71:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>			; SSE-NEXT: [[R71:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
	; SSE-NEXT: ret <8 x i32> [[R71]]			; SSE-NEXT: ret <8 x i32> [[R71]]
	;			;
	; SLM-LABEL: @ashr_shl_v8i32(			; SLM-LABEL: @ashr_shl_v8i32(
	; SLM-NEXT: [[TMP1:%.]] = ashr <8 x i32> [[A:%.]], [[B:%.*]]			; SLM-NEXT: [[TMP1:%.]] = ashr <8 x i32> [[A:%.]], [[B:%.*]]
	; SLM-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[A]], [[B]]			; SLM-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[A]], [[B]]
	; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>			; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
	; SLM-NEXT: ret <8 x i32> [[TMP3]]			; SLM-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
				; SLM-NEXT: [[R71:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
				; SLM-NEXT: ret <8 x i32> [[R71]]
	;			;
	; AVX1-LABEL: @ashr_shl_v8i32(			; AVX1-LABEL: @ashr_shl_v8i32(
	; AVX1-NEXT: [[TMP1:%.]] = ashr <8 x i32> [[A:%.]], [[B:%.*]]			; AVX1-NEXT: [[TMP1:%.]] = ashr <8 x i32> [[A:%.]], [[B:%.*]]
	; AVX1-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[A]], [[B]]			; AVX1-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[A]], [[B]]
	; AVX1-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>			; AVX1-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
	; AVX1-NEXT: ret <8 x i32> [[TMP3]]			; AVX1-NEXT: ret <8 x i32> [[TMP3]]
	;			;
	; AVX2-LABEL: @ashr_shl_v8i32(			; AVX2-LABEL: @ashr_shl_v8i32(
	▲ Show 20 Lines • Show All 413 Lines • Show Last 20 Lines