Index: lib/Target/X86/InstPrinter/X86InstComments.cpp =================================================================== --- lib/Target/X86/InstPrinter/X86InstComments.cpp +++ lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -199,6 +199,44 @@ DecodeMOVSHDUPMask(MVT::v4f32, ShuffleMask); break; + case X86::PSLLDQri: + case X86::VPSLLDQri: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + if(MI->getOperand(MI->getNumOperands()-1).isImm()) + DecodePSLLDQMask(MVT(MVT::v16i8).getSizeInBits(), + MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + break; + + case X86::VPSLLDQYri: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + if(MI->getOperand(MI->getNumOperands()-1).isImm()) + DecodePSLLDQMask(MVT(MVT::v32i8).getSizeInBits(), + MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + break; + + case X86::PSRLDQri: + case X86::VPSRLDQri: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + if(MI->getOperand(MI->getNumOperands()-1).isImm()) + DecodePSRLDQMask(MVT(MVT::v16i8).getSizeInBits(), + MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + break; + + case X86::VPSRLDQYri: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + if(MI->getOperand(MI->getNumOperands()-1).isImm()) + DecodePSRLDQMask(MVT(MVT::v32i8).getSizeInBits(), + MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + break; + case X86::PALIGNR128rr: case X86::VPALIGNR128rr: Src1Name = getRegName(MI->getOperand(2).getReg()); Index: lib/Target/X86/Utils/X86ShuffleDecode.h =================================================================== --- lib/Target/X86/Utils/X86ShuffleDecode.h +++ lib/Target/X86/Utils/X86ShuffleDecode.h @@ -40,6 +40,10 @@ void DecodeMOVSHDUPMask(MVT VT, SmallVectorImpl &ShuffleMask); +void DecodePSLLDQMask(unsigned VectorSizeInBits, unsigned Imm, SmallVectorImpl &ShuffleMask); + +void DecodePSRLDQMask(unsigned VectorSizeInBits, unsigned Imm, SmallVectorImpl &ShuffleMask); + void DecodePALIGNRMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask); void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask); Index: lib/Target/X86/Utils/X86ShuffleDecode.cpp =================================================================== --- lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -79,6 +79,35 @@ } } +void DecodePSLLDQMask(unsigned VectorSizeInBits, unsigned Imm, SmallVectorImpl &ShuffleMask) { + unsigned NumElts = VectorSizeInBits / 8; + unsigned NumLanes = VectorSizeInBits / 128; + unsigned NumLaneElts = NumElts / NumLanes; + + for (unsigned l = 0; l != NumElts; l += NumLaneElts) { + for (unsigned i = 0; i != NumLaneElts; ++i) { + int M = SM_SentinelZero; + if (i >= Imm) M = i - Imm + l; + ShuffleMask.push_back(M); + } + } +} + +void DecodePSRLDQMask(unsigned VectorSizeInBits, unsigned Imm, SmallVectorImpl &ShuffleMask) { + unsigned NumElts = VectorSizeInBits / 8; + unsigned NumLanes = VectorSizeInBits / 128; + unsigned NumLaneElts = NumElts / NumLanes; + + for (unsigned l = 0; l != NumElts; l += NumLaneElts) { + for (unsigned i = 0; i != NumLaneElts; ++i) { + unsigned Base = i + Imm; + int M = Base + l; + if (Base >= NumLaneElts) M = SM_SentinelZero; + ShuffleMask.push_back(M); + } + } +} + void DecodePALIGNRMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask) { unsigned NumElts = VT.getVectorNumElements(); Index: test/CodeGen/X86/avx-intrinsics-x86.ll =================================================================== --- test/CodeGen/X86/avx-intrinsics-x86.ll +++ test/CodeGen/X86/avx-intrinsics-x86.ll @@ -458,7 +458,7 @@ define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) { - ; CHECK: vpslldq + ; CHECK: vpslldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] ret <2 x i64> %res } @@ -466,7 +466,7 @@ define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) { - ; CHECK: vpslldq + ; CHECK: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8] %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] ret <2 x i64> %res } @@ -554,7 +554,7 @@ define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) { - ; CHECK: vpsrldq + ; CHECK: vpsrldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] ret <2 x i64> %res } @@ -562,7 +562,7 @@ define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) { - ; CHECK: vpsrldq + ; CHECK: vpsrldq {{.*#+}} xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] ret <2 x i64> %res } Index: test/CodeGen/X86/avx2-intrinsics-x86.ll =================================================================== --- test/CodeGen/X86/avx2-intrinsics-x86.ll +++ test/CodeGen/X86/avx2-intrinsics-x86.ll @@ -161,7 +161,7 @@ define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) { - ; CHECK: vpslldq + ; CHECK: vpslldq {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31] %res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1] ret <4 x i64> %res } @@ -169,7 +169,7 @@ define <4 x i64> @test_x86_avx2_psll_dq_bs(<4 x i64> %a0) { - ; CHECK: vpslldq + ; CHECK: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8],zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24] %res = call <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1] ret <4 x i64> %res } @@ -257,7 +257,7 @@ define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) { - ; CHECK: vpsrldq + ; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31] %res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1] ret <4 x i64> %res } @@ -265,7 +265,7 @@ define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) { - ; CHECK: vpsrldq + ; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,ymm0[23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero %res = call <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1] ret <4 x i64> %res } Index: test/CodeGen/X86/sse2-intrinsics-x86.ll =================================================================== --- test/CodeGen/X86/sse2-intrinsics-x86.ll +++ test/CodeGen/X86/sse2-intrinsics-x86.ll @@ -411,7 +411,7 @@ define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) { - ; CHECK: pslldq + ; CHECK: pslldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] ret <2 x i64> %res } @@ -419,7 +419,7 @@ define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) { - ; CHECK: pslldq + ; CHECK: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8] %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] ret <2 x i64> %res } @@ -507,7 +507,7 @@ define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) { - ; CHECK: psrldq + ; CHECK: psrldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] ret <2 x i64> %res } @@ -515,7 +515,7 @@ define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) { - ; CHECK: psrldq + ; CHECK: psrldq {{.*#+}} xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] ret <2 x i64> %res } Index: test/CodeGen/X86/vector-shuffle-128-v8.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-128-v8.ll +++ test/CodeGen/X86/vector-shuffle-128-v8.ll @@ -1400,14 +1400,14 @@ ; SSE: # BB#0: ; SSE-NEXT: movzwl %di, %eax ; SSE-NEXT: movd %eax, %xmm0 -; SSE-NEXT: pslldq $2, %xmm0 +; SSE-NEXT: pslldq $2, %xmm0 {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v8i16_z8zzzzzz: ; AVX: # BB#0: ; AVX-NEXT: movzwl %di, %eax ; AVX-NEXT: vmovd %eax, %xmm0 -; AVX-NEXT: vpslldq $2, %xmm0, %xmm0 +; AVX-NEXT: vpslldq $2, %xmm0, %xmm0 {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] ; AVX-NEXT: retq %a = insertelement <8 x i16> undef, i16 %i, i32 0 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> @@ -1419,14 +1419,14 @@ ; SSE: # BB#0: ; SSE-NEXT: movzwl %di, %eax ; SSE-NEXT: movd %eax, %xmm0 -; SSE-NEXT: pslldq $10, %xmm0 +; SSE-NEXT: pslldq $10, %xmm0 {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v8i16_zzzzz8zz: ; AVX: # BB#0: ; AVX-NEXT: movzwl %di, %eax ; AVX-NEXT: vmovd %eax, %xmm0 -; AVX-NEXT: vpslldq $10, %xmm0, %xmm0 +; AVX-NEXT: vpslldq $10, %xmm0, %xmm0 {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5] ; AVX-NEXT: retq %a = insertelement <8 x i16> undef, i16 %i, i32 0 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> @@ -1438,14 +1438,14 @@ ; SSE: # BB#0: ; SSE-NEXT: movzwl %di, %eax ; SSE-NEXT: movd %eax, %xmm0 -; SSE-NEXT: pslldq $14, %xmm0 +; SSE-NEXT: pslldq $14, %xmm0 {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v8i16_zuuzuuz8: ; AVX: # BB#0: ; AVX-NEXT: movzwl %di, %eax ; AVX-NEXT: vmovd %eax, %xmm0 -; AVX-NEXT: vpslldq $14, %xmm0, %xmm0 +; AVX-NEXT: vpslldq $14, %xmm0, %xmm0 {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1] ; AVX-NEXT: retq %a = insertelement <8 x i16> undef, i16 %i, i32 0 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> @@ -1457,14 +1457,14 @@ ; SSE: # BB#0: ; SSE-NEXT: movzwl %di, %eax ; SSE-NEXT: movd %eax, %xmm0 -; SSE-NEXT: pslldq $4, %xmm0 +; SSE-NEXT: pslldq $4, %xmm0 {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v8i16_zzBzzzzz: ; AVX: # BB#0: ; AVX-NEXT: movzwl %di, %eax ; AVX-NEXT: vmovd %eax, %xmm0 -; AVX-NEXT: vpslldq $4, %xmm0, %xmm0 +; AVX-NEXT: vpslldq $4, %xmm0, %xmm0 {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] ; AVX-NEXT: retq %a = insertelement <8 x i16> undef, i16 %i, i32 3 %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32>