Skip to content

Commit a798e9f

Browse files
committedOct 14, 2014
[X86][SSE] pslldq/psrldq shuffle mask decodes
Patch to provide shuffle decodes and asm comments for the sse pslldq/psrldq SSE2/AVX2 byte shift instructions. Differential Revision: http://reviews.llvm.org/D5598 llvm-svn: 219738
1 parent 0b011b3 commit a798e9f

File tree

7 files changed

+217
-146
lines changed

7 files changed

+217
-146
lines changed
 

‎llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp

+38
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,44 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
199199
DecodeMOVSHDUPMask(MVT::v4f32, ShuffleMask);
200200
break;
201201

202+
case X86::PSLLDQri:
203+
case X86::VPSLLDQri:
204+
Src1Name = getRegName(MI->getOperand(1).getReg());
205+
DestName = getRegName(MI->getOperand(0).getReg());
206+
if(MI->getOperand(MI->getNumOperands()-1).isImm())
207+
DecodePSLLDQMask(MVT::v16i8,
208+
MI->getOperand(MI->getNumOperands()-1).getImm(),
209+
ShuffleMask);
210+
break;
211+
212+
case X86::VPSLLDQYri:
213+
Src1Name = getRegName(MI->getOperand(1).getReg());
214+
DestName = getRegName(MI->getOperand(0).getReg());
215+
if(MI->getOperand(MI->getNumOperands()-1).isImm())
216+
DecodePSLLDQMask(MVT::v32i8,
217+
MI->getOperand(MI->getNumOperands()-1).getImm(),
218+
ShuffleMask);
219+
break;
220+
221+
case X86::PSRLDQri:
222+
case X86::VPSRLDQri:
223+
Src1Name = getRegName(MI->getOperand(1).getReg());
224+
DestName = getRegName(MI->getOperand(0).getReg());
225+
if(MI->getOperand(MI->getNumOperands()-1).isImm())
226+
DecodePSRLDQMask(MVT::v16i8,
227+
MI->getOperand(MI->getNumOperands()-1).getImm(),
228+
ShuffleMask);
229+
break;
230+
231+
case X86::VPSRLDQYri:
232+
Src1Name = getRegName(MI->getOperand(1).getReg());
233+
DestName = getRegName(MI->getOperand(0).getReg());
234+
if(MI->getOperand(MI->getNumOperands()-1).isImm())
235+
DecodePSRLDQMask(MVT::v32i8,
236+
MI->getOperand(MI->getNumOperands()-1).getImm(),
237+
ShuffleMask);
238+
break;
239+
202240
case X86::PALIGNR128rr:
203241
case X86::VPALIGNR128rr:
204242
Src1Name = getRegName(MI->getOperand(2).getReg());

‎llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp

+29
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,35 @@ void DecodeMOVSHDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
7979
}
8080
}
8181

82+
void DecodePSLLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
83+
unsigned VectorSizeInBits = VT.getSizeInBits();
84+
unsigned NumElts = VectorSizeInBits / 8;
85+
unsigned NumLanes = VectorSizeInBits / 128;
86+
unsigned NumLaneElts = NumElts / NumLanes;
87+
88+
for (unsigned l = 0; l < NumElts; l += NumLaneElts)
89+
for (unsigned i = 0; i < NumLaneElts; ++i) {
90+
int M = SM_SentinelZero;
91+
if (i >= Imm) M = i - Imm + l;
92+
ShuffleMask.push_back(M);
93+
}
94+
}
95+
96+
void DecodePSRLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
97+
unsigned VectorSizeInBits = VT.getSizeInBits();
98+
unsigned NumElts = VectorSizeInBits / 8;
99+
unsigned NumLanes = VectorSizeInBits / 128;
100+
unsigned NumLaneElts = NumElts / NumLanes;
101+
102+
for (unsigned l = 0; l < NumElts; l += NumLaneElts)
103+
for (unsigned i = 0; i < NumLaneElts; ++i) {
104+
unsigned Base = i + Imm;
105+
int M = Base + l;
106+
if (Base >= NumLaneElts) M = SM_SentinelZero;
107+
ShuffleMask.push_back(M);
108+
}
109+
}
110+
82111
void DecodePALIGNRMask(MVT VT, unsigned Imm,
83112
SmallVectorImpl<int> &ShuffleMask) {
84113
unsigned NumElts = VT.getVectorNumElements();

‎llvm/lib/Target/X86/Utils/X86ShuffleDecode.h

+4
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ void DecodeMOVSLDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
4040

4141
void DecodeMOVSHDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
4242

43+
void DecodePSLLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
44+
45+
void DecodePSRLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
46+
4347
void DecodePALIGNRMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
4448

4549
void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);

‎llvm/test/CodeGen/X86/avx-intrinsics-x86.ll

+30-30
Original file line numberDiff line numberDiff line change
@@ -455,21 +455,21 @@ define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
455455
ret <4 x i32> %res
456456
}
457457
declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
458-
459-
460-
define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
461-
; CHECK: vpslldq
462-
%res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
463-
ret <2 x i64> %res
464-
}
465-
declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
466-
467-
468-
define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
469-
; CHECK: vpslldq
470-
%res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
471-
ret <2 x i64> %res
472-
}
458+
459+
460+
define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
461+
; CHECK: vpslldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
462+
%res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
463+
ret <2 x i64> %res
464+
}
465+
declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
466+
467+
468+
define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
469+
; CHECK: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
470+
%res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
471+
ret <2 x i64> %res
472+
}
473473
declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
474474

475475

@@ -551,21 +551,21 @@ define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
551551
ret <4 x i32> %res
552552
}
553553
declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
554-
555-
556-
define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
557-
; CHECK: vpsrldq
558-
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
559-
ret <2 x i64> %res
560-
}
561-
declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
562-
563-
564-
define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
565-
; CHECK: vpsrldq
566-
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
567-
ret <2 x i64> %res
568-
}
554+
555+
556+
define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
557+
; CHECK: vpsrldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
558+
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
559+
ret <2 x i64> %res
560+
}
561+
declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
562+
563+
564+
define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
565+
; CHECK: vpsrldq {{.*#+}} xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
566+
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
567+
ret <2 x i64> %res
568+
}
569569
declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone
570570

571571

‎llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll

+30-30
Original file line numberDiff line numberDiff line change
@@ -158,21 +158,21 @@ define <8 x i32> @test_x86_avx2_psll_d(<8 x i32> %a0, <4 x i32> %a1) {
158158
ret <8 x i32> %res
159159
}
160160
declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone
161-
162-
163-
define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) {
164-
; CHECK: vpslldq
165-
%res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
166-
ret <4 x i64> %res
167-
}
168-
declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone
169-
170-
171-
define <4 x i64> @test_x86_avx2_psll_dq_bs(<4 x i64> %a0) {
172-
; CHECK: vpslldq
173-
%res = call <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
174-
ret <4 x i64> %res
175-
}
161+
162+
163+
define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) {
164+
; CHECK: vpslldq {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
165+
%res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
166+
ret <4 x i64> %res
167+
}
168+
declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone
169+
170+
171+
define <4 x i64> @test_x86_avx2_psll_dq_bs(<4 x i64> %a0) {
172+
; CHECK: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8],zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24]
173+
%res = call <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
174+
ret <4 x i64> %res
175+
}
176176
declare <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64>, i32) nounwind readnone
177177

178178

@@ -254,21 +254,21 @@ define <8 x i32> @test_x86_avx2_psrl_d(<8 x i32> %a0, <4 x i32> %a1) {
254254
ret <8 x i32> %res
255255
}
256256
declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone
257-
258-
259-
define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) {
260-
; CHECK: vpsrldq
261-
%res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
262-
ret <4 x i64> %res
263-
}
264-
declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone
265-
266-
267-
define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) {
268-
; CHECK: vpsrldq
269-
%res = call <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
270-
ret <4 x i64> %res
271-
}
257+
258+
259+
define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) {
260+
; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
261+
%res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
262+
ret <4 x i64> %res
263+
}
264+
declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone
265+
266+
267+
define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) {
268+
; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,ymm0[23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero
269+
%res = call <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
270+
ret <4 x i64> %res
271+
}
272272
declare <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64>, i32) nounwind readnone
273273

274274

‎llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll

+30-30
Original file line numberDiff line numberDiff line change
@@ -408,21 +408,21 @@ define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
408408
ret <4 x i32> %res
409409
}
410410
declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
411-
412-
413-
define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
414-
; CHECK: pslldq
415-
%res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
416-
ret <2 x i64> %res
417-
}
418-
declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
419-
420-
421-
define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
422-
; CHECK: pslldq
423-
%res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
424-
ret <2 x i64> %res
425-
}
411+
412+
413+
define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
414+
; CHECK: pslldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
415+
%res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
416+
ret <2 x i64> %res
417+
}
418+
declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
419+
420+
421+
define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
422+
; CHECK: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
423+
%res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
424+
ret <2 x i64> %res
425+
}
426426
declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
427427

428428

@@ -504,21 +504,21 @@ define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
504504
ret <4 x i32> %res
505505
}
506506
declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
507-
508-
509-
define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
510-
; CHECK: psrldq
511-
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
512-
ret <2 x i64> %res
513-
}
514-
declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
515-
516-
517-
define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
518-
; CHECK: psrldq
519-
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
520-
ret <2 x i64> %res
521-
}
507+
508+
509+
define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
510+
; CHECK: psrldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
511+
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
512+
ret <2 x i64> %res
513+
}
514+
declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
515+
516+
517+
define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
518+
; CHECK: psrldq {{.*#+}} xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
519+
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
520+
ret <2 x i64> %res
521+
}
522522
declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone
523523

524524

‎llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll

+56-56
Original file line numberDiff line numberDiff line change
@@ -1397,77 +1397,77 @@ define <8 x i16> @shuffle_v8i16_8zzzzzzz(i16 %i) {
13971397

13981398
define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) {
13991399
; SSE-LABEL: shuffle_v8i16_z8zzzzzz:
1400-
; SSE: # BB#0:
1401-
; SSE-NEXT: movzwl %di, %eax
1402-
; SSE-NEXT: movd %eax, %xmm0
1403-
; SSE-NEXT: pslldq $2, %xmm0
1404-
; SSE-NEXT: retq
1405-
;
1406-
; AVX-LABEL: shuffle_v8i16_z8zzzzzz:
1407-
; AVX: # BB#0:
1408-
; AVX-NEXT: movzwl %di, %eax
1409-
; AVX-NEXT: vmovd %eax, %xmm0
1410-
; AVX-NEXT: vpslldq $2, %xmm0, %xmm0
1411-
; AVX-NEXT: retq
1412-
%a = insertelement <8 x i16> undef, i16 %i, i32 0
1413-
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 2, i32 8, i32 3, i32 7, i32 6, i32 5, i32 4, i32 3>
1400+
; SSE: # BB#0:
1401+
; SSE-NEXT: movzwl %di, %eax
1402+
; SSE-NEXT: movd %eax, %xmm0
1403+
; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1404+
; SSE-NEXT: retq
1405+
;
1406+
; AVX-LABEL: shuffle_v8i16_z8zzzzzz:
1407+
; AVX: # BB#0:
1408+
; AVX-NEXT: movzwl %di, %eax
1409+
; AVX-NEXT: vmovd %eax, %xmm0
1410+
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1411+
; AVX-NEXT: retq
1412+
%a = insertelement <8 x i16> undef, i16 %i, i32 0
1413+
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 2, i32 8, i32 3, i32 7, i32 6, i32 5, i32 4, i32 3>
14141414
ret <8 x i16> %shuffle
14151415
}
14161416

14171417
define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) {
14181418
; SSE-LABEL: shuffle_v8i16_zzzzz8zz:
1419-
; SSE: # BB#0:
1420-
; SSE-NEXT: movzwl %di, %eax
1421-
; SSE-NEXT: movd %eax, %xmm0
1422-
; SSE-NEXT: pslldq $10, %xmm0
1423-
; SSE-NEXT: retq
1424-
;
1425-
; AVX-LABEL: shuffle_v8i16_zzzzz8zz:
1426-
; AVX: # BB#0:
1427-
; AVX-NEXT: movzwl %di, %eax
1428-
; AVX-NEXT: vmovd %eax, %xmm0
1429-
; AVX-NEXT: vpslldq $10, %xmm0, %xmm0
1430-
; AVX-NEXT: retq
1431-
%a = insertelement <8 x i16> undef, i16 %i, i32 0
1432-
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0>
1419+
; SSE: # BB#0:
1420+
; SSE-NEXT: movzwl %di, %eax
1421+
; SSE-NEXT: movd %eax, %xmm0
1422+
; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1423+
; SSE-NEXT: retq
1424+
;
1425+
; AVX-LABEL: shuffle_v8i16_zzzzz8zz:
1426+
; AVX: # BB#0:
1427+
; AVX-NEXT: movzwl %di, %eax
1428+
; AVX-NEXT: vmovd %eax, %xmm0
1429+
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
1430+
; AVX-NEXT: retq
1431+
%a = insertelement <8 x i16> undef, i16 %i, i32 0
1432+
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0>
14331433
ret <8 x i16> %shuffle
14341434
}
14351435

14361436
define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) {
14371437
; SSE-LABEL: shuffle_v8i16_zuuzuuz8:
1438-
; SSE: # BB#0:
1439-
; SSE-NEXT: movzwl %di, %eax
1440-
; SSE-NEXT: movd %eax, %xmm0
1441-
; SSE-NEXT: pslldq $14, %xmm0
1442-
; SSE-NEXT: retq
1443-
;
1444-
; AVX-LABEL: shuffle_v8i16_zuuzuuz8:
1445-
; AVX: # BB#0:
1446-
; AVX-NEXT: movzwl %di, %eax
1447-
; AVX-NEXT: vmovd %eax, %xmm0
1448-
; AVX-NEXT: vpslldq $14, %xmm0, %xmm0
1449-
; AVX-NEXT: retq
1450-
%a = insertelement <8 x i16> undef, i16 %i, i32 0
1451-
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 8>
1438+
; SSE: # BB#0:
1439+
; SSE-NEXT: movzwl %di, %eax
1440+
; SSE-NEXT: movd %eax, %xmm0
1441+
; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
1442+
; SSE-NEXT: retq
1443+
;
1444+
; AVX-LABEL: shuffle_v8i16_zuuzuuz8:
1445+
; AVX: # BB#0:
1446+
; AVX-NEXT: movzwl %di, %eax
1447+
; AVX-NEXT: vmovd %eax, %xmm0
1448+
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
1449+
; AVX-NEXT: retq
1450+
%a = insertelement <8 x i16> undef, i16 %i, i32 0
1451+
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 8>
14521452
ret <8 x i16> %shuffle
14531453
}
14541454

14551455
define <8 x i16> @shuffle_v8i16_zzBzzzzz(i16 %i) {
14561456
; SSE-LABEL: shuffle_v8i16_zzBzzzzz:
1457-
; SSE: # BB#0:
1458-
; SSE-NEXT: movzwl %di, %eax
1459-
; SSE-NEXT: movd %eax, %xmm0
1460-
; SSE-NEXT: pslldq $4, %xmm0
1461-
; SSE-NEXT: retq
1462-
;
1463-
; AVX-LABEL: shuffle_v8i16_zzBzzzzz:
1464-
; AVX: # BB#0:
1465-
; AVX-NEXT: movzwl %di, %eax
1466-
; AVX-NEXT: vmovd %eax, %xmm0
1467-
; AVX-NEXT: vpslldq $4, %xmm0, %xmm0
1468-
; AVX-NEXT: retq
1469-
%a = insertelement <8 x i16> undef, i16 %i, i32 3
1470-
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 11, i32 3, i32 4, i32 5, i32 6, i32 7>
1457+
; SSE: # BB#0:
1458+
; SSE-NEXT: movzwl %di, %eax
1459+
; SSE-NEXT: movd %eax, %xmm0
1460+
; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
1461+
; SSE-NEXT: retq
1462+
;
1463+
; AVX-LABEL: shuffle_v8i16_zzBzzzzz:
1464+
; AVX: # BB#0:
1465+
; AVX-NEXT: movzwl %di, %eax
1466+
; AVX-NEXT: vmovd %eax, %xmm0
1467+
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
1468+
; AVX-NEXT: retq
1469+
%a = insertelement <8 x i16> undef, i16 %i, i32 3
1470+
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 11, i32 3, i32 4, i32 5, i32 6, i32 7>
14711471
ret <8 x i16> %shuffle
14721472
}
14731473

0 commit comments

Comments
 (0)
Please sign in to comment.