Diff 14777

lib/Target/X86/InstPrinter/X86InstComments.cpp

Show First 20 Lines • Show All 193 Lines • ▼ Show 20 Lines	case X86::VMOVSHDUPrr:
Src1Name = getRegName(MI->getOperand(1).getReg());		Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.		// FALL THROUGH.
case X86::MOVSHDUPrm:		case X86::MOVSHDUPrm:
case X86::VMOVSHDUPrm:		case X86::VMOVSHDUPrm:
DestName = getRegName(MI->getOperand(0).getReg());		DestName = getRegName(MI->getOperand(0).getReg());
DecodeMOVSHDUPMask(MVT::v4f32, ShuffleMask);		DecodeMOVSHDUPMask(MVT::v4f32, ShuffleMask);
break;		break;

		case X86::PSLLDQri:
		case X86::VPSLLDQri:
		Src1Name = getRegName(MI->getOperand(1).getReg());
		DestName = getRegName(MI->getOperand(0).getReg());
		if(MI->getOperand(MI->getNumOperands()-1).isImm())
		DecodePSLLDQMask(MVT(MVT::v16i8).getSizeInBits(),
		chandlercUnsubmitted Not Done Reply Inline Actions This code doesn't really make sense. If you know the exact type you know the exact size, just pass 128. But what I meant with my comment was still to pass the MVT down, but to do the getSizeInBits query inside the decode routine. chandlerc: This code doesn't really make sense. If you know the exact type you know the exact size, just…
		MI->getOperand(MI->getNumOperands()-1).getImm(),
		ShuffleMask);
		break;

		case X86::VPSLLDQYri:
		Src1Name = getRegName(MI->getOperand(1).getReg());
		DestName = getRegName(MI->getOperand(0).getReg());
		if(MI->getOperand(MI->getNumOperands()-1).isImm())
		DecodePSLLDQMask(MVT(MVT::v32i8).getSizeInBits(),
		MI->getOperand(MI->getNumOperands()-1).getImm(),
		ShuffleMask);
		break;

		case X86::PSRLDQri:
		case X86::VPSRLDQri:
		Src1Name = getRegName(MI->getOperand(1).getReg());
		DestName = getRegName(MI->getOperand(0).getReg());
		if(MI->getOperand(MI->getNumOperands()-1).isImm())
		DecodePSRLDQMask(MVT(MVT::v16i8).getSizeInBits(),
		MI->getOperand(MI->getNumOperands()-1).getImm(),
		ShuffleMask);
		break;

		case X86::VPSRLDQYri:
		Src1Name = getRegName(MI->getOperand(1).getReg());
		DestName = getRegName(MI->getOperand(0).getReg());
		if(MI->getOperand(MI->getNumOperands()-1).isImm())
		DecodePSRLDQMask(MVT(MVT::v32i8).getSizeInBits(),
		MI->getOperand(MI->getNumOperands()-1).getImm(),
		ShuffleMask);
		break;

case X86::PALIGNR128rr:		case X86::PALIGNR128rr:
case X86::VPALIGNR128rr:		case X86::VPALIGNR128rr:
Src1Name = getRegName(MI->getOperand(2).getReg());		Src1Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.		// FALL THROUGH.
case X86::PALIGNR128rm:		case X86::PALIGNR128rm:
case X86::VPALIGNR128rm:		case X86::VPALIGNR128rm:
Src2Name = getRegName(MI->getOperand(1).getReg());		Src2Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());		DestName = getRegName(MI->getOperand(0).getReg());
▲ Show 20 Lines • Show All 477 Lines • Show Last 20 Lines

lib/Target/X86/Utils/X86ShuffleDecode.h

	Show All 34 Lines

	// <0,2> or <0,1,4,5>			// <0,2> or <0,1,4,5>
	void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask);			void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask);

	void DecodeMOVSLDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);			void DecodeMOVSLDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);

	void DecodeMOVSHDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);			void DecodeMOVSHDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);

				void DecodePSLLDQMask(unsigned VectorSizeInBits, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);

				void DecodePSRLDQMask(unsigned VectorSizeInBits, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);

	void DecodePALIGNRMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);			void DecodePALIGNRMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);

	void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);			void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);

	void DecodePSHUFHWMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);			void DecodePSHUFHWMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);

	void DecodePSHUFLWMask(MVT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);			void DecodePSHUFLWMask(MVT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);

	Show All 39 Lines

lib/Target/X86/Utils/X86ShuffleDecode.cpp

	Show First 20 Lines • Show All 73 Lines • ▼ Show 20 Lines
	void DecodeMOVSHDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {			void DecodeMOVSHDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
	unsigned NumElts = VT.getVectorNumElements();			unsigned NumElts = VT.getVectorNumElements();
	for (int i = 0, e = NumElts / 2; i < e; ++i) {			for (int i = 0, e = NumElts / 2; i < e; ++i) {
	ShuffleMask.push_back(2 * i + 1);			ShuffleMask.push_back(2 * i + 1);
	ShuffleMask.push_back(2 * i + 1);			ShuffleMask.push_back(2 * i + 1);
	}			}
	}			}

				void DecodePSLLDQMask(unsigned VectorSizeInBits, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
				unsigned NumElts = VectorSizeInBits / 8;
				unsigned NumLanes = VectorSizeInBits / 128;
				chandlercUnsubmitted Not Done Reply Inline Actions I wouldn't decode this in terms of the vector element type. The mask should be a byte-vector mask in all cases, and VT should just provide the size of the vector (128-bit, 256-bit, 512-bit, whatever). chandlerc: I wouldn't decode this in terms of the vector element type. The mask should be a byte-vector…
				RKSimonAuthorUnsubmitted Not Done Reply Inline Actions Yes I'd be happy to do that - I just followed the pattern from palignr. This does mean that the computed ShuffleMask count might not match the number of elements in the MVT. Would it be better to change the MVT VT argument to unsigned VectorSizeInBytes to make that clear? RKSimon: Yes I'd be happy to do that - I just followed the pattern from palignr. This does mean that the…
				unsigned NumLaneElts = NumElts / NumLanes;

				for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
				chandlercUnsubmitted Not Done Reply Inline Actions No need for braces aroun the outer loop. The above loop also uses int and < which is my personal preference for these loops. chandlerc: No need for braces aroun the outer loop. The above loop also uses int and < which is my…
				for (unsigned i = 0; i != NumLaneElts; ++i) {
				int M = SM_SentinelZero;
				if (i >= Imm) M = i - Imm + l;
				ShuffleMask.push_back(M);
				}
				}
				}

				void DecodePSRLDQMask(unsigned VectorSizeInBits, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
				unsigned NumElts = VectorSizeInBits / 8;
				unsigned NumLanes = VectorSizeInBits / 128;
				unsigned NumLaneElts = NumElts / NumLanes;

				for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
				for (unsigned i = 0; i != NumLaneElts; ++i) {
				unsigned Base = i + Imm;
				int M = Base + l;
				if (Base >= NumLaneElts) M = SM_SentinelZero;
				ShuffleMask.push_back(M);
				}
				}
				}

	void DecodePALIGNRMask(MVT VT, unsigned Imm,			void DecodePALIGNRMask(MVT VT, unsigned Imm,
	SmallVectorImpl<int> &ShuffleMask) {			SmallVectorImpl<int> &ShuffleMask) {
	unsigned NumElts = VT.getVectorNumElements();			unsigned NumElts = VT.getVectorNumElements();
	unsigned Offset = Imm * (VT.getVectorElementType().getSizeInBits() / 8);			unsigned Offset = Imm * (VT.getVectorElementType().getSizeInBits() / 8);

	unsigned NumLanes = VT.getSizeInBits() / 128;			unsigned NumLanes = VT.getSizeInBits() / 128;
	unsigned NumLaneElts = NumElts / NumLanes;			unsigned NumLaneElts = NumElts / NumLanes;

	▲ Show 20 Lines • Show All 277 Lines • Show Last 20 Lines

test/CodeGen/X86/avx-intrinsics-x86.ll

Show First 20 Lines • Show All 452 Lines • ▼ Show 20 Lines	define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK: vpslld		; CHECK: vpslld
%res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]		%res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res		ret <4 x i32> %res
}		}
declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone		declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone


define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {		define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
; CHECK: vpslldq		; CHECK: vpslldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
		chandlercUnsubmitted Not Done Reply Inline Actions This doesn't look right... this is the identity mask? chandlerc: This doesn't look right... this is the identity mask?
		chandlercUnsubmitted Not Done Reply Inline Actions oh, this is bit shifting? what does this even mean? I'm not really sure how to interpret this test. chandlerc: oh, this is bit shifting? what does this even mean? I'm not really sure how to interpret this…
		RKSimonAuthorUnsubmitted Not Done Reply Inline Actions Yes its a bit shifting representation (you'll notice in D5699 I have to mutliply byte shifts by 8) - the lower 3 bits of the shift are lost (and ignored). The test was there already - I just added the decode comment - I agree it doesn't look that useful at first glance but it has use as an edge test and it does demonstrate that the decode works for zero shifts.... RKSimon: Yes its a bit shifting representation (you'll notice in D5699 I have to mutliply byte shifts by…
		chandlercUnsubmitted Not Done Reply Inline Actions Wow. What a broken intrinsic. Anyways, sure. chandlerc: Wow. What a broken intrinsic. Anyways, sure.
%res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]		%res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res		ret <2 x i64> %res
}		}
declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone		declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone


define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {		define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
; CHECK: vpslldq		; CHECK: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
%res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]		%res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res		ret <2 x i64> %res
}		}
declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone		declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone


define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {		define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK: vpsllq		; CHECK: vpsllq
▲ Show 20 Lines • Show All 71 Lines • ▼ Show 20 Lines	define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK: vpsrld		; CHECK: vpsrld
%res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]		%res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res		ret <4 x i32> %res
}		}
declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone		declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone


define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {		define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
; CHECK: vpsrldq		; CHECK: vpsrldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]		%res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res		ret <2 x i64> %res
}		}
declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone		declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone


define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {		define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
; CHECK: vpsrldq		; CHECK: vpsrldq {{.*#+}} xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]		%res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res		ret <2 x i64> %res
}		}
declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone		declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone


define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {		define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK: vpsrlq		; CHECK: vpsrlq
▲ Show 20 Lines • Show All 2,040 Lines • Show Last 20 Lines

test/CodeGen/X86/avx2-intrinsics-x86.ll

Show First 20 Lines • Show All 155 Lines • ▼ Show 20 Lines	define <8 x i32> @test_x86_avx2_psll_d(<8 x i32> %a0, <4 x i32> %a1) {
; CHECK: vpslld		; CHECK: vpslld
%res = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1]		%res = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1]
ret <8 x i32> %res		ret <8 x i32> %res
}		}
declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone		declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone


define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) {		define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) {
; CHECK: vpslldq		; CHECK: vpslldq {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
%res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]		%res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res		ret <4 x i64> %res
}		}
declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone		declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone


define <4 x i64> @test_x86_avx2_psll_dq_bs(<4 x i64> %a0) {		define <4 x i64> @test_x86_avx2_psll_dq_bs(<4 x i64> %a0) {
; CHECK: vpslldq		; CHECK: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8],zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24]
%res = call <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]		%res = call <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res		ret <4 x i64> %res
}		}
declare <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64>, i32) nounwind readnone		declare <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64>, i32) nounwind readnone


define <4 x i64> @test_x86_avx2_psll_q(<4 x i64> %a0, <2 x i64> %a1) {		define <4 x i64> @test_x86_avx2_psll_q(<4 x i64> %a0, <2 x i64> %a1) {
; CHECK: vpsllq		; CHECK: vpsllq
▲ Show 20 Lines • Show All 71 Lines • ▼ Show 20 Lines	define <8 x i32> @test_x86_avx2_psrl_d(<8 x i32> %a0, <4 x i32> %a1) {
; CHECK: vpsrld		; CHECK: vpsrld
%res = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1]		%res = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1]
ret <8 x i32> %res		ret <8 x i32> %res
}		}
declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone		declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone


define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) {		define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) {
; CHECK: vpsrldq		; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
%res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]		%res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res		ret <4 x i64> %res
}		}
declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone		declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone


define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) {		define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) {
; CHECK: vpsrldq		; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,ymm0[23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero
%res = call <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]		%res = call <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res		ret <4 x i64> %res
}		}
declare <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64>, i32) nounwind readnone		declare <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64>, i32) nounwind readnone


define <4 x i64> @test_x86_avx2_psrl_q(<4 x i64> %a0, <2 x i64> %a1) {		define <4 x i64> @test_x86_avx2_psrl_q(<4 x i64> %a0, <2 x i64> %a1) {
; CHECK: vpsrlq		; CHECK: vpsrlq
▲ Show 20 Lines • Show All 881 Lines • Show Last 20 Lines

test/CodeGen/X86/sse2-intrinsics-x86.ll

Show First 20 Lines • Show All 405 Lines • ▼ Show 20 Lines	define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK: pslld		; CHECK: pslld
%res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]		%res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res		ret <4 x i32> %res
}		}
declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone		declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone


define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {		define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
; CHECK: pslldq		; CHECK: pslldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
%res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]		%res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res		ret <2 x i64> %res
}		}
declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone		declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone


define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {		define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
; CHECK: pslldq		; CHECK: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
%res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]		%res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res		ret <2 x i64> %res
}		}
declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone		declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone


define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {		define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK: psllq		; CHECK: psllq
▲ Show 20 Lines • Show All 71 Lines • ▼ Show 20 Lines	define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK: psrld		; CHECK: psrld
%res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]		%res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res		ret <4 x i32> %res
}		}
declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone		declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone


define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {		define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
; CHECK: psrldq		; CHECK: psrldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]		%res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res		ret <2 x i64> %res
}		}
declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone		declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone


define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {		define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
; CHECK: psrldq		; CHECK: psrldq {{.*#+}} xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
%res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]		%res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res		ret <2 x i64> %res
}		}
declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone		declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone


define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {		define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK: psrlq		; CHECK: psrlq
▲ Show 20 Lines • Show All 220 Lines • Show Last 20 Lines

test/CodeGen/X86/vector-shuffle-128-v8.ll

Show First 20 Lines • Show All 1,394 Lines • ▼ Show 20 Lines	; AVX-NEXT: retq
ret <8 x i16> %shuffle		ret <8 x i16> %shuffle
}		}

define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) {		define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) {
; SSE-LABEL: shuffle_v8i16_z8zzzzzz:		; SSE-LABEL: shuffle_v8i16_z8zzzzzz:
; SSE: # BB#0:		; SSE: # BB#0:
; SSE-NEXT: movzwl %di, %eax		; SSE-NEXT: movzwl %di, %eax
; SSE-NEXT: movd %eax, %xmm0		; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: pslldq $2, %xmm0		; SSE-NEXT: pslldq $2, %xmm0 {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
		chandlercUnsubmitted Not Done Reply Inline Actions No need to give the operands here when checking the comment. It looks like all of the new comments in this file need the same treatment. chandlerc: No need to give the operands here when checking the comment. It looks like all of the new…
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: shuffle_v8i16_z8zzzzzz:		; AVX-LABEL: shuffle_v8i16_z8zzzzzz:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: movzwl %di, %eax		; AVX-NEXT: movzwl %di, %eax
; AVX-NEXT: vmovd %eax, %xmm0		; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: vpslldq $2, %xmm0, %xmm0		; AVX-NEXT: vpslldq $2, %xmm0, %xmm0 {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
; AVX-NEXT: retq		; AVX-NEXT: retq
%a = insertelement <8 x i16> undef, i16 %i, i32 0		%a = insertelement <8 x i16> undef, i16 %i, i32 0
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 2, i32 8, i32 3, i32 7, i32 6, i32 5, i32 4, i32 3>		%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 2, i32 8, i32 3, i32 7, i32 6, i32 5, i32 4, i32 3>
ret <8 x i16> %shuffle		ret <8 x i16> %shuffle
}		}

define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) {		define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) {
; SSE-LABEL: shuffle_v8i16_zzzzz8zz:		; SSE-LABEL: shuffle_v8i16_zzzzz8zz:
; SSE: # BB#0:		; SSE: # BB#0:
; SSE-NEXT: movzwl %di, %eax		; SSE-NEXT: movzwl %di, %eax
; SSE-NEXT: movd %eax, %xmm0		; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: pslldq $10, %xmm0		; SSE-NEXT: pslldq $10, %xmm0 {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: shuffle_v8i16_zzzzz8zz:		; AVX-LABEL: shuffle_v8i16_zzzzz8zz:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: movzwl %di, %eax		; AVX-NEXT: movzwl %di, %eax
; AVX-NEXT: vmovd %eax, %xmm0		; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: vpslldq $10, %xmm0, %xmm0		; AVX-NEXT: vpslldq $10, %xmm0, %xmm0 {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
; AVX-NEXT: retq		; AVX-NEXT: retq
%a = insertelement <8 x i16> undef, i16 %i, i32 0		%a = insertelement <8 x i16> undef, i16 %i, i32 0
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0>		%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0>
ret <8 x i16> %shuffle		ret <8 x i16> %shuffle
}		}

define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) {		define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) {
; SSE-LABEL: shuffle_v8i16_zuuzuuz8:		; SSE-LABEL: shuffle_v8i16_zuuzuuz8:
; SSE: # BB#0:		; SSE: # BB#0:
; SSE-NEXT: movzwl %di, %eax		; SSE-NEXT: movzwl %di, %eax
; SSE-NEXT: movd %eax, %xmm0		; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: pslldq $14, %xmm0		; SSE-NEXT: pslldq $14, %xmm0 {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: shuffle_v8i16_zuuzuuz8:		; AVX-LABEL: shuffle_v8i16_zuuzuuz8:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: movzwl %di, %eax		; AVX-NEXT: movzwl %di, %eax
; AVX-NEXT: vmovd %eax, %xmm0		; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: vpslldq $14, %xmm0, %xmm0		; AVX-NEXT: vpslldq $14, %xmm0, %xmm0 {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
; AVX-NEXT: retq		; AVX-NEXT: retq
%a = insertelement <8 x i16> undef, i16 %i, i32 0		%a = insertelement <8 x i16> undef, i16 %i, i32 0
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 8>		%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 8>
ret <8 x i16> %shuffle		ret <8 x i16> %shuffle
}		}

define <8 x i16> @shuffle_v8i16_zzBzzzzz(i16 %i) {		define <8 x i16> @shuffle_v8i16_zzBzzzzz(i16 %i) {
; SSE-LABEL: shuffle_v8i16_zzBzzzzz:		; SSE-LABEL: shuffle_v8i16_zzBzzzzz:
; SSE: # BB#0:		; SSE: # BB#0:
; SSE-NEXT: movzwl %di, %eax		; SSE-NEXT: movzwl %di, %eax
; SSE-NEXT: movd %eax, %xmm0		; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: pslldq $4, %xmm0		; SSE-NEXT: pslldq $4, %xmm0 {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
; SSE-NEXT: retq		; SSE-NEXT: retq
;		;
; AVX-LABEL: shuffle_v8i16_zzBzzzzz:		; AVX-LABEL: shuffle_v8i16_zzBzzzzz:
; AVX: # BB#0:		; AVX: # BB#0:
; AVX-NEXT: movzwl %di, %eax		; AVX-NEXT: movzwl %di, %eax
; AVX-NEXT: vmovd %eax, %xmm0		; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: vpslldq $4, %xmm0, %xmm0		; AVX-NEXT: vpslldq $4, %xmm0, %xmm0 {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
; AVX-NEXT: retq		; AVX-NEXT: retq
%a = insertelement <8 x i16> undef, i16 %i, i32 3		%a = insertelement <8 x i16> undef, i16 %i, i32 3
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 11, i32 3, i32 4, i32 5, i32 6, i32 7>		%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 11, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %shuffle		ret <8 x i16> %shuffle
}		}

define <8 x i16> @shuffle_v8i16_def01234(<8 x i16> %a, <8 x i16> %b) {		define <8 x i16> @shuffle_v8i16_def01234(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: shuffle_v8i16_def01234:		; SSE2-LABEL: shuffle_v8i16_def01234:
▲ Show 20 Lines • Show All 520 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

SSE pslldq/psrldq shuffle mask decodes
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 14777

lib/Target/X86/InstPrinter/X86InstComments.cpp

lib/Target/X86/Utils/X86ShuffleDecode.h

lib/Target/X86/Utils/X86ShuffleDecode.cpp

test/CodeGen/X86/avx-intrinsics-x86.ll

test/CodeGen/X86/avx2-intrinsics-x86.ll

test/CodeGen/X86/sse2-intrinsics-x86.ll

test/CodeGen/X86/vector-shuffle-128-v8.ll

This is an archive of the discontinued LLVM Phabricator instance.

SSE pslldq/psrldq shuffle mask decodesClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 14777

lib/Target/X86/InstPrinter/X86InstComments.cpp

lib/Target/X86/Utils/X86ShuffleDecode.h

lib/Target/X86/Utils/X86ShuffleDecode.cpp

test/CodeGen/X86/avx-intrinsics-x86.ll

test/CodeGen/X86/avx2-intrinsics-x86.ll

test/CodeGen/X86/sse2-intrinsics-x86.ll

test/CodeGen/X86/vector-shuffle-128-v8.ll

SSE pslldq/psrldq shuffle mask decodes
ClosedPublic