Skip to content

Commit 46cd4f7

Browse files
committedFeb 3, 2015
[X86][SSE] psrl(w/d/q) and psll(w/d/q) bit shifts for SSE2
Patch to match cases where shuffle masks can be reduced to bit shifts. Similar to byte shift shuffle matching from D5699. Differential Revision: http://reviews.llvm.org/D6649 llvm-svn: 228047
1 parent fe88b18 commit 46cd4f7

9 files changed

+670
-15
lines changed
 

‎llvm/lib/Target/X86/X86ISelLowering.cpp

+108
Original file line numberDiff line numberDiff line change
@@ -7808,6 +7808,79 @@ static SDValue lowerVectorShuffleAsByteShift(SDLoc DL, MVT VT, SDValue V1,
78087808
return SDValue();
78097809
}
78107810

7811+
/// \brief Try to lower a vector shuffle as a bit shift (shifts in zeros).
7812+
///
7813+
/// Attempts to match a shuffle mask against the PSRL(W/D/Q) and PSLL(W/D/Q)
7814+
/// SSE2 and AVX2 logical bit-shift instructions. The function matches
7815+
/// elements from one of the input vectors shuffled to the left or right
7816+
/// with zeroable elements 'shifted in'.
7817+
static SDValue lowerVectorShuffleAsBitShift(SDLoc DL, MVT VT, SDValue V1,
7818+
SDValue V2, ArrayRef<int> Mask,
7819+
SelectionDAG &DAG) {
7820+
SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
7821+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7822+
7823+
int Size = Mask.size();
7824+
assert(Size == VT.getVectorNumElements() && "Unexpected mask size");
7825+
7826+
// PSRL : (little-endian) right bit shift.
7827+
// [ 1, zz, 3, zz]
7828+
// [ -1, -1, 7, zz]
7829+
// PSHL : (little-endian) left bit shift.
7830+
// [ zz, 0, zz, 2 ]
7831+
// [ -1, 4, zz, -1 ]
7832+
auto MatchBitShift = [&](int Shift, int Scale) -> SDValue {
7833+
MVT ShiftSVT = MVT::getIntegerVT(VT.getScalarSizeInBits() * Scale);
7834+
MVT ShiftVT = MVT::getVectorVT(ShiftSVT, Size / Scale);
7835+
assert(TLI.isTypeLegal(ShiftVT) && "Illegal integer vector type");
7836+
7837+
bool MatchLeft = true, MatchRight = true;
7838+
for (int i = 0; i != Size; i += Scale) {
7839+
for (int j = 0; j != Shift; j++) {
7840+
MatchLeft &= Zeroable[i + j];
7841+
}
7842+
for (int j = Scale - Shift; j != Scale; j++) {
7843+
MatchRight &= Zeroable[i + j];
7844+
}
7845+
}
7846+
if (!(MatchLeft || MatchRight))
7847+
return SDValue();
7848+
7849+
bool MatchV1 = true, MatchV2 = true;
7850+
for (int i = 0; i != Size; i += Scale) {
7851+
unsigned Pos = MatchLeft ? i + Shift : i;
7852+
unsigned Low = MatchLeft ? i : i + Shift;
7853+
unsigned Len = Scale - Shift;
7854+
MatchV1 &= isSequentialOrUndefInRange(Mask, Pos, Len, Low);
7855+
MatchV2 &= isSequentialOrUndefInRange(Mask, Pos, Len, Low + Size);
7856+
}
7857+
if (!(MatchV1 || MatchV2))
7858+
return SDValue();
7859+
7860+
// Cast the inputs to ShiftVT to match VSRLI/VSHLI and back again.
7861+
unsigned OpCode = MatchLeft ? X86ISD::VSHLI : X86ISD::VSRLI;
7862+
int ShiftAmt = Shift * VT.getScalarSizeInBits();
7863+
SDValue V = MatchV1 ? V1 : V2;
7864+
V = DAG.getNode(ISD::BITCAST, DL, ShiftVT, V);
7865+
V = DAG.getNode(OpCode, DL, ShiftVT, V, DAG.getConstant(ShiftAmt, MVT::i8));
7866+
return DAG.getNode(ISD::BITCAST, DL, VT, V);
7867+
};
7868+
7869+
// SSE/AVX supports logical shifts up to 64-bit integers - so we can just
7870+
// keep doubling the size of the integer elements up to that. We can
7871+
// then shift the elements of the integer vector by whole multiples of
7872+
// their width within the elements of the larger integer vector. Test each
7873+
// multiple to see if we can find a match with the moved element indices
7874+
// and that the shifted in elements are all zeroable.
7875+
for (int Scale = 2; Scale * VT.getScalarSizeInBits() <= 64; Scale *= 2)
7876+
for (int Shift = 1; Shift != Scale; Shift++)
7877+
if (SDValue BitShift = MatchBitShift(Shift, Scale))
7878+
return BitShift;
7879+
7880+
// no match
7881+
return SDValue();
7882+
}
7883+
78117884
/// \brief Lower a vector shuffle as a zero or any extension.
78127885
///
78137886
/// Given a specific number of elements, element bit width, and extension
@@ -8654,6 +8727,11 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
86548727
getV4X86ShuffleImm8ForMask(Mask, DAG));
86558728
}
86568729

8730+
// Try to use bit shift instructions.
8731+
if (SDValue Shift = lowerVectorShuffleAsBitShift(
8732+
DL, MVT::v4i32, V1, V2, Mask, DAG))
8733+
return Shift;
8734+
86578735
// Try to use byte shift instructions.
86588736
if (SDValue Shift = lowerVectorShuffleAsByteShift(
86598737
DL, MVT::v4i32, V1, V2, Mask, DAG))
@@ -8739,6 +8817,11 @@ static SDValue lowerV8I16SingleInputVectorShuffle(
87398817
Mask, Subtarget, DAG))
87408818
return Broadcast;
87418819

8820+
// Try to use bit shift instructions.
8821+
if (SDValue Shift = lowerVectorShuffleAsBitShift(
8822+
DL, MVT::v8i16, V, V, Mask, DAG))
8823+
return Shift;
8824+
87428825
// Try to use byte shift instructions.
87438826
if (SDValue Shift = lowerVectorShuffleAsByteShift(
87448827
DL, MVT::v8i16, V, V, Mask, DAG))
@@ -9356,6 +9439,11 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
93569439
assert(NumV1Inputs > 0 && "All single-input shuffles should be canonicalized "
93579440
"to be V1-input shuffles.");
93589441

9442+
// Try to use bit shift instructions.
9443+
if (SDValue Shift = lowerVectorShuffleAsBitShift(
9444+
DL, MVT::v8i16, V1, V2, Mask, DAG))
9445+
return Shift;
9446+
93599447
// Try to use byte shift instructions.
93609448
if (SDValue Shift = lowerVectorShuffleAsByteShift(
93619449
DL, MVT::v8i16, V1, V2, Mask, DAG))
@@ -9512,6 +9600,11 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
95129600
ArrayRef<int> OrigMask = SVOp->getMask();
95139601
assert(OrigMask.size() == 16 && "Unexpected mask size for v16 shuffle!");
95149602

9603+
// Try to use bit shift instructions.
9604+
if (SDValue Shift = lowerVectorShuffleAsBitShift(
9605+
DL, MVT::v16i8, V1, V2, OrigMask, DAG))
9606+
return Shift;
9607+
95159608
// Try to use byte shift instructions.
95169609
if (SDValue Shift = lowerVectorShuffleAsByteShift(
95179610
DL, MVT::v16i8, V1, V2, OrigMask, DAG))
@@ -10602,6 +10695,11 @@ static SDValue lowerV8I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
1060210695
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i32, VPermMask), V1);
1060310696
}
1060410697

10698+
// Try to use bit shift instructions.
10699+
if (SDValue Shift = lowerVectorShuffleAsBitShift(
10700+
DL, MVT::v8i32, V1, V2, Mask, DAG))
10701+
return Shift;
10702+
1060510703
// Try to simplify this by merging 128-bit lanes to enable a lane-based
1060610704
// shuffle.
1060710705
if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
@@ -10685,6 +10783,11 @@ static SDValue lowerV16I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
1068510783
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, PSHUFBMask)));
1068610784
}
1068710785

10786+
// Try to use bit shift instructions.
10787+
if (SDValue Shift = lowerVectorShuffleAsBitShift(
10788+
DL, MVT::v16i16, V1, V2, Mask, DAG))
10789+
return Shift;
10790+
1068810791
// Try to simplify this by merging 128-bit lanes to enable a lane-based
1068910792
// shuffle.
1069010793
if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
@@ -10763,6 +10866,11 @@ static SDValue lowerV32I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
1076310866
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, PSHUFBMask));
1076410867
}
1076510868

10869+
// Try to use bit shift instructions.
10870+
if (SDValue Shift = lowerVectorShuffleAsBitShift(
10871+
DL, MVT::v32i8, V1, V2, Mask, DAG))
10872+
return Shift;
10873+
1076610874
// Try to simplify this by merging 128-bit lanes to enable a lane-based
1076710875
// shuffle.
1076810876
if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(

‎llvm/test/CodeGen/X86/combine-or.ll

+8-10
Original file line numberDiff line numberDiff line change
@@ -204,16 +204,14 @@ define <2 x i64> @test16(<2 x i64> %a, <2 x i64> %b) {
204204
; shuffle instruction when the shuffle indexes are not compatible.
205205

206206
define <4 x i32> @test17(<4 x i32> %a, <4 x i32> %b) {
207-
; CHECK-LABEL: test17:
208-
; CHECK: # BB#0:
209-
; CHECK-NEXT: xorps %xmm2, %xmm2
210-
; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm0[0,2]
211-
; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
212-
; CHECK-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
213-
; CHECK-NEXT: orps %xmm2, %xmm0
214-
; CHECK-NEXT: retq
215-
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 2>
216-
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
207+
; CHECK-LABEL: test17:
208+
; CHECK: # BB#0:
209+
; CHECK-NEXT: psllq $32, %xmm0
210+
; CHECK-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero
211+
; CHECK-NEXT: por %xmm1, %xmm0
212+
; CHECK-NEXT: retq
213+
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 2>
214+
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
217215
%or = or <4 x i32> %shuf1, %shuf2
218216
ret <4 x i32> %or
219217
}

‎llvm/test/CodeGen/X86/vec_insert-5.ll

+20-2
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ define <4 x float> @t4(<4 x float>* %P) nounwind {
6363
define <16 x i8> @t5(<16 x i8> %x) nounwind {
6464
; CHECK-LABEL: t5:
6565
; CHECK: # BB#0:
66-
; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
66+
; CHECK-NEXT: psrlw $8, %xmm0
6767
; CHECK-NEXT: retl
6868
%s = shufflevector <16 x i8> %x, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 17>
6969
ret <16 x i8> %s
@@ -72,7 +72,7 @@ define <16 x i8> @t5(<16 x i8> %x) nounwind {
7272
define <16 x i8> @t6(<16 x i8> %x) nounwind {
7373
; CHECK-LABEL: t6:
7474
; CHECK: # BB#0:
75-
; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
75+
; CHECK-NEXT: psrlw $8, %xmm0
7676
; CHECK-NEXT: retl
7777
%s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
7878
ret <16 x i8> %s
@@ -86,3 +86,21 @@ define <16 x i8> @t7(<16 x i8> %x) nounwind {
8686
%s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2>
8787
ret <16 x i8> %s
8888
}
89+
90+
define <16 x i8> @t8(<16 x i8> %x) nounwind {
91+
; CHECK-LABEL: t8:
92+
; CHECK: # BB#0:
93+
; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
94+
; CHECK-NEXT: retl
95+
%s = shufflevector <16 x i8> %x, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 17>
96+
ret <16 x i8> %s
97+
}
98+
99+
define <16 x i8> @t9(<16 x i8> %x) nounwind {
100+
; CHECK-LABEL: t9:
101+
; CHECK: # BB#0:
102+
; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
103+
; CHECK-NEXT: retl
104+
%s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 7, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 14, i32 undef, i32 undef>
105+
ret <16 x i8> %s
106+
}

‎llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll

+105-3
Original file line numberDiff line numberDiff line change
@@ -705,21 +705,21 @@ define <16 x i8> @shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(
705705
; SSSE3-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
706706
; SSSE3: # BB#0:
707707
; SSSE3-NEXT: movd %edi, %xmm0
708-
; SSSE3-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12]
708+
; SSSE3-NEXT: pslld $24, %xmm0
709709
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
710710
; SSSE3-NEXT: retq
711711
;
712712
; SSE41-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
713713
; SSE41: # BB#0:
714714
; SSE41-NEXT: movd %edi, %xmm0
715-
; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12]
715+
; SSE41-NEXT: pslld $24, %xmm0
716716
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
717717
; SSE41-NEXT: retq
718718
;
719719
; AVX-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
720720
; AVX: # BB#0:
721721
; AVX-NEXT: vmovd %edi, %xmm0
722-
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12]
722+
; AVX-NEXT: vpslld $24, %xmm0
723723
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
724724
; AVX-NEXT: retq
725725
%a = insertelement <16 x i8> undef, i8 %i, i32 3
@@ -1185,6 +1185,108 @@ entry:
11851185
ret void
11861186
}
11871187

1188+
;
1189+
; Shuffle to logical bit shifts
1190+
;
1191+
1192+
define <16 x i8> @shuffle_v16i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14(<16 x i8> %a, <16 x i8> %b) {
1193+
; SSE-LABEL: shuffle_v16i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14:
1194+
; SSE: # BB#0:
1195+
; SSE-NEXT: psllw $8, %xmm0
1196+
; SSE-NEXT: retq
1197+
;
1198+
; AVX-LABEL: shuffle_v16i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14:
1199+
; AVX: # BB#0:
1200+
; AVX-NEXT: vpsllw $8, %xmm0
1201+
; AVX-NEXT: retq
1202+
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 0, i32 16, i32 2, i32 16, i32 4, i32 16, i32 6, i32 16, i32 8, i32 16, i32 10, i32 16, i32 12, i32 16, i32 14>
1203+
ret <16 x i8> %shuffle
1204+
}
1205+
1206+
define <16 x i8> @shuffle_v16i8_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12(<16 x i8> %a, <16 x i8> %b) {
1207+
; SSE-LABEL: shuffle_v16i8_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12:
1208+
; SSE: # BB#0:
1209+
; SSE-NEXT: pslld $24, %xmm0
1210+
; SSE-NEXT: retq
1211+
;
1212+
; AVX-LABEL: shuffle_v16i8_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12:
1213+
; AVX: # BB#0:
1214+
; AVX-NEXT: vpslld $24, %xmm0
1215+
; AVX-NEXT: retq
1216+
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 4, i32 16, i32 16, i32 16, i32 8, i32 16, i32 16, i32 16, i32 12>
1217+
ret <16 x i8> %shuffle
1218+
}
1219+
1220+
define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_00_zz_zz_zz_zz_zz_zz_zz_08(<16 x i8> %a, <16 x i8> %b) {
1221+
; SSE-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_00_zz_zz_zz_zz_zz_zz_zz_08:
1222+
; SSE: # BB#0:
1223+
; SSE-NEXT: psllq $56, %xmm0
1224+
; SSE-NEXT: retq
1225+
;
1226+
; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_00_zz_zz_zz_zz_zz_zz_zz_08:
1227+
; AVX: # BB#0:
1228+
; AVX-NEXT: vpsllq $56, %xmm0
1229+
; AVX-NEXT: retq
1230+
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 8>
1231+
ret <16 x i8> %shuffle
1232+
}
1233+
1234+
define <16 x i8> @shuffle_v16i8_zz_00_uu_02_03_uu_05_06_zz_08_09_uu_11_12_13_14(<16 x i8> %a, <16 x i8> %b) {
1235+
; SSE-LABEL: shuffle_v16i8_zz_00_uu_02_03_uu_05_06_zz_08_09_uu_11_12_13_14:
1236+
; SSE: # BB#0:
1237+
; SSE-NEXT: psllq $8, %xmm0
1238+
; SSE-NEXT: retq
1239+
;
1240+
; AVX-LABEL: shuffle_v16i8_zz_00_uu_02_03_uu_05_06_zz_08_09_uu_11_12_13_14:
1241+
; AVX: # BB#0:
1242+
; AVX-NEXT: vpsllq $8, %xmm0
1243+
; AVX-NEXT: retq
1244+
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 0, i32 undef, i32 2, i32 3, i32 undef, i32 5, i32 6, i32 16, i32 8, i32 9, i32 undef, i32 11, i32 12, i32 13, i32 14>
1245+
ret <16 x i8> %shuffle
1246+
}
1247+
1248+
define <16 x i8> @shuffle_v16i8_01_uu_uu_uu_uu_zz_uu_zz_uu_zz_11_zz_13_zz_15_zz(<16 x i8> %a, <16 x i8> %b) {
1249+
; SSE-LABEL: shuffle_v16i8_01_uu_uu_uu_uu_zz_uu_zz_uu_zz_11_zz_13_zz_15_zz:
1250+
; SSE: # BB#0:
1251+
; SSE-NEXT: psrlw $8, %xmm0
1252+
; SSE-NEXT: retq
1253+
;
1254+
; AVX-LABEL: shuffle_v16i8_01_uu_uu_uu_uu_zz_uu_zz_uu_zz_11_zz_13_zz_15_zz:
1255+
; AVX: # BB#0:
1256+
; AVX-NEXT: vpsrlw $8, %xmm0
1257+
; AVX-NEXT: retq
1258+
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 undef, i32 16, i32 undef, i32 16, i32 11, i32 16, i32 13, i32 16, i32 15, i32 16>
1259+
ret <16 x i8> %shuffle
1260+
}
1261+
1262+
define <16 x i8> @shuffle_v16i8_02_03_zz_zz_06_07_uu_uu_uu_uu_uu_uu_14_15_zz_zz(<16 x i8> %a, <16 x i8> %b) {
1263+
; SSE-LABEL: shuffle_v16i8_02_03_zz_zz_06_07_uu_uu_uu_uu_uu_uu_14_15_zz_zz:
1264+
; SSE: # BB#0:
1265+
; SSE-NEXT: psrld $16, %xmm0
1266+
; SSE-NEXT: retq
1267+
;
1268+
; AVX-LABEL: shuffle_v16i8_02_03_zz_zz_06_07_uu_uu_uu_uu_uu_uu_14_15_zz_zz:
1269+
; AVX: # BB#0:
1270+
; AVX-NEXT: vpsrld $16, %xmm0
1271+
; AVX-NEXT: retq
1272+
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 2, i32 3, i32 16, i32 16, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 14, i32 15, i32 16, i32 16>
1273+
ret <16 x i8> %shuffle
1274+
}
1275+
1276+
define <16 x i8> @shuffle_v16i8_07_zz_zz_zz_zz_zz_uu_uu_15_uu_uu_uu_uu_uu_zz_zz(<16 x i8> %a, <16 x i8> %b) {
1277+
; SSE-LABEL: shuffle_v16i8_07_zz_zz_zz_zz_zz_uu_uu_15_uu_uu_uu_uu_uu_zz_zz:
1278+
; SSE: # BB#0:
1279+
; SSE-NEXT: psrlq $56, %xmm0
1280+
; SSE-NEXT: retq
1281+
;
1282+
; AVX-LABEL: shuffle_v16i8_07_zz_zz_zz_zz_zz_uu_uu_15_uu_uu_uu_uu_uu_zz_zz:
1283+
; AVX: # BB#0:
1284+
; AVX-NEXT: vpsrlq $56, %xmm0
1285+
; AVX-NEXT: retq
1286+
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 16>
1287+
ret <16 x i8> %shuffle
1288+
}
1289+
11881290
define <16 x i8> @PR12412(<16 x i8> %inval1, <16 x i8> %inval2) {
11891291
; SSE2-LABEL: PR12412:
11901292
; SSE2: # BB#0: # %entry

‎llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll

+32
Original file line numberDiff line numberDiff line change
@@ -1373,3 +1373,35 @@ define <4 x float> @shuffle_mem_v4f32_3210(<4 x float>* %ptr) {
13731373
%shuffle = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
13741374
ret <4 x float> %shuffle
13751375
}
1376+
1377+
;
1378+
; Shuffle to logical bit shifts
1379+
;
1380+
1381+
define <4 x i32> @shuffle_v4i32_z0zX(<4 x i32> %a) {
1382+
; SSE-LABEL: shuffle_v4i32_z0zX:
1383+
; SSE: # BB#0:
1384+
; SSE-NEXT: psllq $32, %xmm0
1385+
; SSE-NEXT: retq
1386+
;
1387+
; AVX-LABEL: shuffle_v4i32_z0zX:
1388+
; AVX: # BB#0:
1389+
; AVX-NEXT: vpsllq $32, %xmm0
1390+
; AVX-NEXT: retq
1391+
%shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 4, i32 0, i32 4, i32 undef>
1392+
ret <4 x i32> %shuffle
1393+
}
1394+
1395+
define <4 x i32> @shuffle_v4i32_1z3z(<4 x i32> %a) {
1396+
; SSE-LABEL: shuffle_v4i32_1z3z:
1397+
; SSE: # BB#0:
1398+
; SSE-NEXT: psrlq $32, %xmm0
1399+
; SSE-NEXT: retq
1400+
;
1401+
; AVX-LABEL: shuffle_v4i32_1z3z:
1402+
; AVX: # BB#0:
1403+
; AVX-NEXT: vpsrlq $32, %xmm0
1404+
; AVX-NEXT: retq
1405+
%shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 4, i32 3, i32 4>
1406+
ret <4 x i32> %shuffle
1407+
}

‎llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll

+115
Original file line numberDiff line numberDiff line change
@@ -1918,6 +1918,121 @@ define <8 x i16> @shuffle_v8i16_0z1z2z3z(<8 x i16> %a) {
19181918
ret <8 x i16> %shuffle
19191919
}
19201920

1921+
;
1922+
; Shuffle to logical bit shifts
1923+
;
1924+
define <8 x i16> @shuffle_v8i16_z0z2z4z6(<8 x i16> %a) {
1925+
; SSE-LABEL: shuffle_v8i16_z0z2z4z6:
1926+
; SSE: # BB#0:
1927+
; SSE-NEXT: pslld $16, %xmm0
1928+
; SSE-NEXT: retq
1929+
;
1930+
; AVX-LABEL: shuffle_v8i16_z0z2z4z6:
1931+
; AVX: # BB#0:
1932+
; AVX-NEXT: vpslld $16, %xmm0
1933+
; AVX-NEXT: retq
1934+
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 8, i32 2, i32 8, i32 4, i32 8, i32 6>
1935+
ret <8 x i16> %shuffle
1936+
}
1937+
1938+
define <8 x i16> @shuffle_v8i16_zzz0zzz4(<8 x i16> %a) {
1939+
; SSE-LABEL: shuffle_v8i16_zzz0zzz4:
1940+
; SSE: # BB#0:
1941+
; SSE-NEXT: psllq $48, %xmm0
1942+
; SSE-NEXT: retq
1943+
;
1944+
; AVX-LABEL: shuffle_v8i16_zzz0zzz4:
1945+
; AVX: # BB#0:
1946+
; AVX-NEXT: vpsllq $48, %xmm0
1947+
; AVX-NEXT: retq
1948+
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 8, i32 0, i32 8, i32 8, i32 8, i32 4>
1949+
ret <8 x i16> %shuffle
1950+
}
1951+
1952+
define <8 x i16> @shuffle_v8i16_zz01zX4X(<8 x i16> %a) {
1953+
; SSE-LABEL: shuffle_v8i16_zz01zX4X:
1954+
; SSE: # BB#0:
1955+
; SSE-NEXT: psllq $32, %xmm0
1956+
; SSE-NEXT: retq
1957+
;
1958+
; AVX-LABEL: shuffle_v8i16_zz01zX4X:
1959+
; AVX: # BB#0:
1960+
; AVX-NEXT: vpsllq $32, %xmm0
1961+
; AVX-NEXT: retq
1962+
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 0, i32 1, i32 8, i32 undef, i32 4, i32 undef>
1963+
ret <8 x i16> %shuffle
1964+
}
1965+
1966+
define <8 x i16> @shuffle_v8i16_z0X2z456(<8 x i16> %a) {
1967+
; SSE-LABEL: shuffle_v8i16_z0X2z456:
1968+
; SSE: # BB#0:
1969+
; SSE-NEXT: psllq $16, %xmm0
1970+
; SSE-NEXT: retq
1971+
;
1972+
; AVX-LABEL: shuffle_v8i16_z0X2z456:
1973+
; AVX: # BB#0:
1974+
; AVX-NEXT: vpsllq $16, %xmm0
1975+
; AVX-NEXT: retq
1976+
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 undef, i32 2, i32 8, i32 4, i32 5, i32 6>
1977+
ret <8 x i16> %shuffle
1978+
}
1979+
1980+
define <8 x i16> @shuffle_v8i16_1z3zXz7z(<8 x i16> %a) {
1981+
; SSE-LABEL: shuffle_v8i16_1z3zXz7z:
1982+
; SSE: # BB#0:
1983+
; SSE-NEXT: psrld $16, %xmm0
1984+
; SSE-NEXT: retq
1985+
;
1986+
; AVX-LABEL: shuffle_v8i16_1z3zXz7z:
1987+
; AVX: # BB#0:
1988+
; AVX-NEXT: vpsrld $16, %xmm0
1989+
; AVX-NEXT: retq
1990+
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 8, i32 3, i32 8, i32 undef, i32 8, i32 7, i32 8>
1991+
ret <8 x i16> %shuffle
1992+
}
1993+
1994+
define <8 x i16> @shuffle_v8i16_1X3z567z(<8 x i16> %a) {
1995+
; SSE-LABEL: shuffle_v8i16_1X3z567z:
1996+
; SSE: # BB#0:
1997+
; SSE-NEXT: psrlq $16, %xmm0
1998+
; SSE-NEXT: retq
1999+
;
2000+
; AVX-LABEL: shuffle_v8i16_1X3z567z:
2001+
; AVX: # BB#0:
2002+
; AVX-NEXT: vpsrlq $16, %xmm0
2003+
; AVX-NEXT: retq
2004+
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 undef, i32 3, i32 8, i32 5, i32 6, i32 7, i32 8>
2005+
ret <8 x i16> %shuffle
2006+
}
2007+
2008+
define <8 x i16> @shuffle_v8i16_23zz67zz(<8 x i16> %a) {
2009+
; SSE-LABEL: shuffle_v8i16_23zz67zz:
2010+
; SSE: # BB#0:
2011+
; SSE-NEXT: psrlq $32, %xmm0
2012+
; SSE-NEXT: retq
2013+
;
2014+
; AVX-LABEL: shuffle_v8i16_23zz67zz:
2015+
; AVX: # BB#0:
2016+
; AVX-NEXT: vpsrlq $32, %xmm0
2017+
; AVX-NEXT: retq
2018+
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 2, i32 3, i32 8, i32 8, i32 6, i32 7, i32 8, i32 8>
2019+
ret <8 x i16> %shuffle
2020+
}
2021+
2022+
define <8 x i16> @shuffle_v8i16_3zXXXzzz(<8 x i16> %a) {
2023+
; SSE-LABEL: shuffle_v8i16_3zXXXzzz:
2024+
; SSE: # BB#0:
2025+
; SSE-NEXT: psrlq $48, %xmm0
2026+
; SSE-NEXT: retq
2027+
;
2028+
; AVX-LABEL: shuffle_v8i16_3zXXXzzz:
2029+
; AVX: # BB#0:
2030+
; AVX-NEXT: vpsrlq $48, %xmm0
2031+
; AVX-NEXT: retq
2032+
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 3, i32 8, i32 undef, i32 undef, i32 undef, i32 8, i32 8, i32 8>
2033+
ret <8 x i16> %shuffle
2034+
}
2035+
19212036
define <8 x i16> @shuffle_v8i16_01u3zzuz(<8 x i16> %a) {
19222037
; SSE-LABEL: shuffle_v8i16_01u3zzuz:
19232038
; SSE: # BB#0:

‎llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll

+104
Original file line numberDiff line numberDiff line change
@@ -1363,6 +1363,110 @@ define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_2
13631363
ret <16 x i16> %shuffle
13641364
}
13651365

1366+
;
1367+
; Shuffle to logical bit shifts
1368+
;
1369+
1370+
define <16 x i16> @shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14(<16 x i16> %a) {
1371+
; AVX1-LABEL: shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14:
1372+
; AVX1: # BB#0:
1373+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1374+
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
1375+
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1376+
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1377+
; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[0,0,0,0,4,5,6,7]
1378+
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
1379+
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1380+
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
1381+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1382+
; AVX1-NEXT: retq
1383+
;
1384+
; AVX2-LABEL: shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14:
1385+
; AVX2: # BB#0:
1386+
; AVX2-NEXT: vpslld $16, %ymm0
1387+
; AVX2-NEXT: retq
1388+
%shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 16, i32 0, i32 16, i32 2, i32 16, i32 4, i32 16, i32 6, i32 16, i32 8, i32 16, i32 10, i32 16, i32 12, i32 16, i32 14>
1389+
ret <16 x i16> %shuffle
1390+
}
1391+
1392+
define <16 x i16> @shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12(<16 x i16> %a) {
1393+
; AVX1-LABEL: shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12:
1394+
; AVX1: # BB#0:
1395+
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1396+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1397+
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
1398+
; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
1399+
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1400+
; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,1,0,3,4,5,6,7]
1401+
; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm1[0,0,0,0,4,5,6,7]
1402+
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
1403+
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
1404+
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
1405+
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1406+
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,0,3,4,5,6,7]
1407+
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
1408+
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1409+
; AVX1-NEXT: retq
1410+
;
1411+
; AVX2-LABEL: shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12:
1412+
; AVX2: # BB#0:
1413+
; AVX2-NEXT: vpsllq $48, %ymm0
1414+
; AVX2-NEXT: retq
1415+
%shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 4, i32 16, i32 16, i32 16, i32 8, i32 16, i32 16, i32 16, i32 12>
1416+
ret <16 x i16> %shuffle
1417+
}
1418+
1419+
define <16 x i16> @shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz(<16 x i16> %a) {
1420+
; AVX1-LABEL: shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz:
1421+
; AVX1: # BB#0:
1422+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1423+
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [2,3,6,7,10,11,14,15,14,15,10,11,12,13,14,15]
1424+
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1425+
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1426+
; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[0,0,0,0,4,5,6,7]
1427+
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
1428+
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1429+
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
1430+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1431+
; AVX1-NEXT: retq
1432+
;
1433+
; AVX2-LABEL: shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz:
1434+
; AVX2: # BB#0:
1435+
; AVX2-NEXT: vpsrld $16, %ymm0
1436+
; AVX2-NEXT: retq
1437+
%shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 1, i32 16, i32 3, i32 16, i32 5, i32 16, i32 7, i32 16, i32 9, i32 16, i32 11, i32 16, i32 13, i32 16, i32 15, i32 16>
1438+
ret <16 x i16> %shuffle
1439+
}
1440+
1441+
define <16 x i16> @shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz(<16 x i16> %a) {
1442+
; AVX1-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz:
1443+
; AVX1: # BB#0:
1444+
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1445+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1446+
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[2,3,0,1]
1447+
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
1448+
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [8,9,12,13,2,3,2,3,8,9,12,13,12,13,14,15]
1449+
; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm3
1450+
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
1451+
; AVX1-NEXT: vpshufb %xmm4, %xmm2, %xmm2
1452+
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1453+
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
1454+
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
1455+
; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm3
1456+
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1457+
; AVX1-NEXT: vpshufb %xmm4, %xmm0, %xmm0
1458+
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
1459+
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1460+
; AVX1-NEXT: retq
1461+
;
1462+
; AVX2-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz:
1463+
; AVX2: # BB#0:
1464+
; AVX2-NEXT: vpsrlq $32, %ymm0
1465+
; AVX2-NEXT: retq
1466+
%shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 2, i32 3, i32 16, i32 16, i32 6, i32 7, i32 16, i32 16, i32 10, i32 11, i32 16, i32 16, i32 14, i32 15, i32 16, i32 16>
1467+
ret <16 x i16> %shuffle
1468+
}
1469+
13661470
define <16 x i16> @shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz(<16 x i16> %a) {
13671471
; AVX1-LABEL: shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz:
13681472
; AVX1: # BB#0:

‎llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll

+142
Original file line numberDiff line numberDiff line change
@@ -1655,6 +1655,148 @@ define <32 x i8> @shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_
16551655
ret <32 x i8> %shuffle
16561656
}
16571657

1658+
;
1659+
; Shuffle to logical bit shifts
1660+
;
1661+
1662+
define <32 x i8> @shuffle_v32i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14_zz_16_zz_18_zz_20_zz_22_zz_24_zz_26_zz_28_zz_30(<32 x i8> %a) {
1663+
; AVX1-LABEL: shuffle_v32i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14_zz_16_zz_18_zz_20_zz_22_zz_24_zz_26_zz_28_zz_30:
1664+
; AVX1: # BB#0:
1665+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1666+
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
1667+
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1668+
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1669+
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1670+
; AVX1-NEXT: vpshuflw $0, %xmm3, %xmm3 # xmm3 = xmm3[0,0,0,0,4,5,6,7]
1671+
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
1672+
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1673+
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
1674+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1675+
; AVX1-NEXT: retq
1676+
;
1677+
; AVX2-LABEL: shuffle_v32i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14_zz_16_zz_18_zz_20_zz_22_zz_24_zz_26_zz_28_zz_30:
1678+
; AVX2: # BB#0:
1679+
; AVX2-NEXT: vpsllw $8, %ymm0
1680+
; AVX2-NEXT: retq
1681+
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 0, i32 32, i32 2, i32 32, i32 4, i32 32, i32 6, i32 32, i32 8, i32 32, i32 10, i32 32, i32 12, i32 32, i32 14, i32 32, i32 16, i32 32, i32 18, i32 32, i32 20, i32 32, i32 22, i32 32, i32 24, i32 32, i32 26, i32 32, i32 28, i32 32, i32 30>
1682+
ret <32 x i8> %shuffle
1683+
}
1684+
1685+
define <32 x i8> @shuffle_v32i8_zz_zz_00_01_zz_zz_04_05_zz_zz_08_09_zz_zz_12_13_zz_zz_16_17_zz_zz_20_21_zz_zz_24_25_zz_zz_28_29(<32 x i8> %a) {
1686+
; AVX1-LABEL: shuffle_v32i8_zz_zz_00_01_zz_zz_04_05_zz_zz_08_09_zz_zz_12_13_zz_zz_16_17_zz_zz_20_21_zz_zz_24_25_zz_zz_28_29:
1687+
; AVX1: # BB#0:
1688+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1689+
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,0,1,128,128,4,5,128,128,8,9,128,128,12,13]
1690+
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1691+
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1692+
; AVX1-NEXT: vpshufb {{.*#+}} xmm3 = xmm3[0,0],zero,zero,xmm3[0,0],zero,zero,xmm3[0,0],zero,zero,xmm3[0,0],zero,zero
1693+
; AVX1-NEXT: vpor %xmm1, %xmm3, %xmm1
1694+
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1695+
; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0
1696+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1697+
; AVX1-NEXT: retq
1698+
;
1699+
; AVX2-LABEL: shuffle_v32i8_zz_zz_00_01_zz_zz_04_05_zz_zz_08_09_zz_zz_12_13_zz_zz_16_17_zz_zz_20_21_zz_zz_24_25_zz_zz_28_29:
1700+
; AVX2: # BB#0:
1701+
; AVX2-NEXT: vpslld $16, %ymm0
1702+
; AVX2-NEXT: retq
1703+
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 0, i32 1, i32 32, i32 32, i32 4, i32 5, i32 32, i32 32, i32 8, i32 9, i32 32, i32 32, i32 12, i32 13, i32 32, i32 32, i32 16, i32 17, i32 32, i32 32, i32 20, i32 21, i32 32, i32 32, i32 24, i32 25, i32 32, i32 32, i32 28, i32 29>
1704+
ret <32 x i8> %shuffle
1705+
}
1706+
1707+
define <32 x i8> @shuffle_v32i8_zz_zz_zz_zz_zz_zz_00_01_zz_zz_zz_zz_zz_zz_08_09_zz_zz_zz_zz_zz_zz_16_17_zz_zz_zz_zz_zz_zz_24_25(<32 x i8> %a) {
1708+
; AVX1-LABEL: shuffle_v32i8_zz_zz_zz_zz_zz_zz_00_01_zz_zz_zz_zz_zz_zz_08_09_zz_zz_zz_zz_zz_zz_16_17_zz_zz_zz_zz_zz_zz_24_25:
1709+
; AVX1: # BB#0:
1710+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1711+
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,0,1,128,128,128,128,128,128,8,9]
1712+
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1713+
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1714+
; AVX1-NEXT: vpshufb {{.*#+}} xmm3 = xmm3[0,0,0,0,0,0],zero,zero,xmm3[0,0,0,0,0,0],zero,zero
1715+
; AVX1-NEXT: vpor %xmm1, %xmm3, %xmm1
1716+
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1717+
; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0
1718+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1719+
; AVX1-NEXT: retq
1720+
;
1721+
; AVX2-LABEL: shuffle_v32i8_zz_zz_zz_zz_zz_zz_00_01_zz_zz_zz_zz_zz_zz_08_09_zz_zz_zz_zz_zz_zz_16_17_zz_zz_zz_zz_zz_zz_24_25:
1722+
; AVX2: # BB#0:
1723+
; AVX2-NEXT: vpsllq $48, %ymm0
1724+
; AVX2-NEXT: retq
1725+
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 0, i32 1, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 8, i32 9, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 24, i32 25>
1726+
ret <32 x i8> %shuffle
1727+
}
1728+
1729+
define <32 x i8> @shuffle_v32i8_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31_zz(<32 x i8> %a) {
1730+
; AVX1-LABEL: shuffle_v32i8_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31_zz:
1731+
; AVX1: # BB#0:
1732+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1733+
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u>
1734+
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1735+
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1736+
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1737+
; AVX1-NEXT: vpshuflw $0, %xmm3, %xmm3 # xmm3 = xmm3[0,0,0,0,4,5,6,7]
1738+
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
1739+
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1740+
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
1741+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1742+
; AVX1-NEXT: retq
1743+
;
1744+
; AVX2-LABEL: shuffle_v32i8_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31_zz:
1745+
; AVX2: # BB#0:
1746+
; AVX2-NEXT: vpsrlw $8, %ymm0
1747+
; AVX2-NEXT: retq
1748+
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 1, i32 32, i32 3, i32 32, i32 5, i32 32, i32 7, i32 32, i32 9, i32 32, i32 11, i32 32, i32 13, i32 32, i32 15, i32 32, i32 17, i32 32, i32 19, i32 32, i32 21, i32 32, i32 23, i32 32, i32 25, i32 32, i32 27, i32 32, i32 29, i32 32, i32 31, i32 32>
1749+
ret <32 x i8> %shuffle
1750+
}
1751+
1752+
define <32 x i8> @shuffle_v32i8_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz_18_19_zz_zz_22_23_zz_zz_26_27_zz_zz_30_31_zz_zz(<32 x i8> %a) {
1753+
; AVX1-LABEL: shuffle_v32i8_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz_18_19_zz_zz_22_23_zz_zz_26_27_zz_zz_30_31_zz_zz:
1754+
; AVX1: # BB#0:
1755+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1756+
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [2,3,128,128,6,7,128,128,10,11,128,128,14,15,128,128]
1757+
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1758+
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1759+
; AVX1-NEXT: vpshufb {{.*#+}} xmm3 = zero,zero,xmm3[0,0],zero,zero,xmm3[0,0],zero,zero,xmm3[0,0],zero,zero,xmm3[0,0]
1760+
; AVX1-NEXT: vpor %xmm3, %xmm1, %xmm1
1761+
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1762+
; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0
1763+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1764+
; AVX1-NEXT: retq
1765+
;
1766+
; AVX2-LABEL: shuffle_v32i8_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz_18_19_zz_zz_22_23_zz_zz_26_27_zz_zz_30_31_zz_zz:
1767+
; AVX2: # BB#0:
1768+
; AVX2-NEXT: vpsrld $16, %ymm0
1769+
; AVX2-NEXT: retq
1770+
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 2, i32 3, i32 32, i32 32, i32 6, i32 7, i32 32, i32 32, i32 10, i32 11, i32 32, i32 32, i32 14, i32 15, i32 32, i32 32, i32 18, i32 19, i32 32, i32 32, i32 22, i32 23, i32 32, i32 32, i32 26, i32 27, i32 32, i32 32, i32 30, i32 31, i32 32, i32 32>
1771+
ret <32 x i8> %shuffle
1772+
}
1773+
1774+
define <32 x i8> @shuffle_v32i8_07_zz_zz_zz_zz_zz_zz_zz_15_zz_zz_zz_zz_z_zz_zz_23_zz_zz_zz_zz_zz_zz_zz_31_zz_zz_zz_zz_zz_zz_zz(<32 x i8> %a) {
1775+
; AVX1-LABEL: shuffle_v32i8_07_zz_zz_zz_zz_zz_zz_zz_15_zz_zz_zz_zz_z_zz_zz_23_zz_zz_zz_zz_zz_zz_zz_31_zz_zz_zz_zz_zz_zz_zz:
1776+
; AVX1: # BB#0:
1777+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1778+
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <7,128,128,128,15,128,128,128,u,u,u,u,u,u,u,u>
1779+
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1780+
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1781+
; AVX1-NEXT: vpshufb {{.*#+}} xmm4 = zero,xmm3[0,0,0],zero,xmm3[0,0,0,u,u,u,u,u,u,u,u]
1782+
; AVX1-NEXT: vpor %xmm1, %xmm4, %xmm1
1783+
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1784+
; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[0,0,0,0,4,5,6,7]
1785+
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
1786+
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1787+
; AVX1-NEXT: vpor %xmm0, %xmm4, %xmm0
1788+
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
1789+
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1790+
; AVX1-NEXT: retq
1791+
;
1792+
; AVX2-LABEL: shuffle_v32i8_07_zz_zz_zz_zz_zz_zz_zz_15_zz_zz_zz_zz_z_zz_zz_23_zz_zz_zz_zz_zz_zz_zz_31_zz_zz_zz_zz_zz_zz_zz:
1793+
; AVX2: # BB#0:
1794+
; AVX2-NEXT: vpsrlq $56, %ymm0
1795+
; AVX2-NEXT: retq
1796+
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 7, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 23, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
1797+
ret <32 x i8> %shuffle
1798+
}
1799+
16581800
define <32 x i8> @shuffle_v32i8_32_zz_zz_zz_zz_zz_zz_zz_33_zz_zz_zz_zz_zz_zz_zz_34_zz_zz_zz_zz_zz_zz_zz_35_zz_zz_zz_zz_zz_zz_zz(<32 x i8> %a) {
16591801
; AVX1-LABEL: shuffle_v32i8_32_zz_zz_zz_zz_zz_zz_zz_33_zz_zz_zz_zz_zz_zz_zz_34_zz_zz_zz_zz_zz_zz_zz_35_zz_zz_zz_zz_zz_zz_zz:
16601802
; AVX1: # BB#0:

‎llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll

+36
Original file line numberDiff line numberDiff line change
@@ -1848,6 +1848,42 @@ define <8 x float> @splat_v8f32(<4 x float> %r) {
18481848
ret <8 x float> %1
18491849
}
18501850

1851+
;
1852+
; Shuffle to logical bit shifts
1853+
;
1854+
1855+
define <8 x i32> @shuffle_v8i32_z0U2zUz6(<8 x i32> %a) {
1856+
; AVX1-LABEL: shuffle_v8i32_z0U2zUz6:
1857+
; AVX1: # BB#0:
1858+
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
1859+
; AVX1-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
1860+
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
1861+
; AVX1-NEXT: retq
1862+
;
1863+
; AVX2-LABEL: shuffle_v8i32_z0U2zUz6:
1864+
; AVX2: # BB#0:
1865+
; AVX2-NEXT: vpsllq $32, %ymm0
1866+
; AVX2-NEXT: retq
1867+
%shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 undef, i32 2, i32 8, i32 undef, i32 8, i32 6>
1868+
ret <8 x i32> %shuffle
1869+
}
1870+
1871+
define <8 x i32> @shuffle_v8i32_1U3z5zUU(<8 x i32> %a) {
1872+
; AVX1-LABEL: shuffle_v8i32_1U3z5zUU:
1873+
; AVX1: # BB#0:
1874+
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
1875+
; AVX1-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
1876+
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4],ymm1[5],ymm0[6,7]
1877+
; AVX1-NEXT: retq
1878+
;
1879+
; AVX2-LABEL: shuffle_v8i32_1U3z5zUU:
1880+
; AVX2: # BB#0:
1881+
; AVX2-NEXT: vpsrlq $32, %ymm0
1882+
; AVX2-NEXT: retq
1883+
%shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 undef, i32 3, i32 8, i32 5, i32 8, i32 undef, i32 undef>
1884+
ret <8 x i32> %shuffle
1885+
}
1886+
18511887
define <8x float> @concat_v2f32_1(<2 x float>* %tmp64, <2 x float>* %tmp65) {
18521888
; ALL-LABEL: concat_v2f32_1:
18531889
; ALL: # BB#0: # %entry

0 commit comments

Comments
 (0)
Please sign in to comment.