Diff 190287

llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Show First 20 Lines • Show All 5,490 Lines • ▼ Show 20 Lines	TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
return DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,		return DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
DAG.getConstant(Scale, dl, ShiftTy));		DAG.getConstant(Scale, dl, ShiftTy));
}		}

bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,		bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
SDValue &Overflow, SelectionDAG &DAG) const {		SDValue &Overflow, SelectionDAG &DAG) const {
SDLoc dl(Node);		SDLoc dl(Node);
EVT VT = Node->getValueType(0);		EVT VT = Node->getValueType(0);
		EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
		SDValue LHS = Node->getOperand(0);
		SDValue RHS = Node->getOperand(1);
		bool isSigned = Node->getOpcode() == ISD::SMULO;

		// For power-of-two multiplications we can use a simpler shift expansion.
		if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
		const APInt &C = RHSC->getAPIntValue();
		// mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
		if (C.isPowerOf2()) {
		// smulo(x, signed_min) is same as umulo(x, signed_min).
		bool UseArithShift = isSigned && !C.isMinSignedValue();
		EVT ShiftAmtTy = getShiftAmountTy(VT, DAG.getDataLayout());
		SDValue ShiftAmt = DAG.getConstant(C.logBase2(), dl, ShiftAmtTy);
		Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
		Overflow = DAG.getSetCC(dl, SetCCVT,
		DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
		dl, VT, Result, ShiftAmt),
		LHS, ISD::SETNE);
		return true;
		}
		}

EVT WideVT = EVT::getIntegerVT(DAG.getContext(), VT.getScalarSizeInBits() 2);		EVT WideVT = EVT::getIntegerVT(DAG.getContext(), VT.getScalarSizeInBits() 2);
if (VT.isVector())		if (VT.isVector())
WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,		WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
VT.getVectorNumElements());		VT.getVectorNumElements());

SDValue LHS = Node->getOperand(0);
SDValue RHS = Node->getOperand(1);
SDValue BottomHalf;		SDValue BottomHalf;
SDValue TopHalf;		SDValue TopHalf;
static const unsigned Ops[2][3] =		static const unsigned Ops[2][3] =
{ { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },		{ { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
{ ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};		{ ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
bool isSigned = Node->getOpcode() == ISD::SMULO;
if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {		if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);		BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);		TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
} else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {		} else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,		BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
RHS);		RHS);
TopHalf = BottomHalf.getValue(1);		TopHalf = BottomHalf.getValue(1);
} else if (isTypeLegal(WideVT)) {		} else if (isTypeLegal(WideVT)) {
▲ Show 20 Lines • Show All 70 Lines • ▼ Show 20 Lines	if (DAG.getDataLayout().isLittleEndian()) {
BottomHalf = Ret.getOperand(0);		BottomHalf = Ret.getOperand(0);
TopHalf = Ret.getOperand(1);		TopHalf = Ret.getOperand(1);
} else {		} else {
BottomHalf = Ret.getOperand(1);		BottomHalf = Ret.getOperand(1);
TopHalf = Ret.getOperand(0);		TopHalf = Ret.getOperand(0);
}		}
}		}

EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
Result = BottomHalf;		Result = BottomHalf;
if (isSigned) {		if (isSigned) {
SDValue ShiftAmt = DAG.getConstant(		SDValue ShiftAmt = DAG.getConstant(
VT.getScalarSizeInBits() - 1, dl,		VT.getScalarSizeInBits() - 1, dl,
getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout()));		getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout()));
SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);		SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);		Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
} else {		} else {
▲ Show 20 Lines • Show All 71 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/ARM/umulo-32.ll

	Show All 25 Lines
	}			}

	declare %umul.ty @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone			declare %umul.ty @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone

	define i32 @test2(i32* %m_degree) ssp {			define i32 @test2(i32* %m_degree) ssp {
	; CHECK-LABEL: test2:			; CHECK-LABEL: test2:
	; CHECK: @ %bb.0:			; CHECK: @ %bb.0:
	; CHECK-NEXT: push {r4, lr}			; CHECK-NEXT: push {r4, lr}
				; CHECK-NEXT: movs r1, #7
				; CHECK-NEXT: lsls r1, r1, #29
	; CHECK-NEXT: ldr r0, [r0]			; CHECK-NEXT: ldr r0, [r0]
	; CHECK-NEXT: movs r2, #8			; CHECK-NEXT: mov r2, r0
				; CHECK-NEXT: bics r2, r1
				; CHECK-NEXT: subs r1, r0, r2
				; CHECK-NEXT: subs r2, r1, #1
				; CHECK-NEXT: sbcs r1, r2
	; CHECK-NEXT: movs r4, #0			; CHECK-NEXT: movs r4, #0
	; CHECK-NEXT: mov r1, r4
	; CHECK-NEXT: mov r3, r4
	; CHECK-NEXT: bl __muldi3
	; CHECK-NEXT: cmp r1, #0			; CHECK-NEXT: cmp r1, #0
	; CHECK-NEXT: beq .LBB1_2			; CHECK-NEXT: bne .LBB1_2
	; CHECK-NEXT: @ %bb.1:			; CHECK-NEXT: @ %bb.1:
	; CHECK-NEXT: movs r1, #1			; CHECK-NEXT: lsls r0, r0, #3
				; CHECK-NEXT: b .LBB1_3
	; CHECK-NEXT: .LBB1_2:			; CHECK-NEXT: .LBB1_2:
	; CHECK-NEXT: cmp r1, #0
	; CHECK-NEXT: beq .LBB1_4
	; CHECK-NEXT: @ %bb.3:
	; CHECK-NEXT: mvns r0, r4			; CHECK-NEXT: mvns r0, r4
	; CHECK-NEXT: .LBB1_4:			; CHECK-NEXT: .LBB1_3:
	; CHECK-NEXT: bl _Znam			; CHECK-NEXT: bl _Znam
	; CHECK-NEXT: mov r0, r4			; CHECK-NEXT: mov r0, r4
	; CHECK-NEXT: pop {r4, pc}			; CHECK-NEXT: pop {r4, pc}
	%val = load i32, i32* %m_degree, align 4			%val = load i32, i32* %m_degree, align 4
	%res = call %umul.ty @llvm.umul.with.overflow.i32(i32 %val, i32 8)			%res = call %umul.ty @llvm.umul.with.overflow.i32(i32 %val, i32 8)
	%ov = extractvalue %umul.ty %res, 1			%ov = extractvalue %umul.ty %res, 1
	%mul = extractvalue %umul.ty %res, 0			%mul = extractvalue %umul.ty %res, 0
	%sel = select i1 %ov, i32 -1, i32 %mul			%sel = select i1 %ov, i32 -1, i32 %mul
	%ret = call noalias i8* @_Znam(i32 %sel)			%ret = call noalias i8* @_Znam(i32 %sel)
	ret i32 0			ret i32 0
	}			}

	declare noalias i8* @_Znam(i32)			declare noalias i8* @_Znam(i32)

llvm/trunk/test/CodeGen/X86/mulo-pow2.ll

Show All 13 Lines	; AVX-NEXT: retq
%z = extractvalue { <4 x i32>, <4 x i1> } %x, 1		%z = extractvalue { <4 x i32>, <4 x i1> } %x, 1
%u = select <4 x i1> %z, <4 x i32> %b, <4 x i32> %y		%u = select <4 x i1> %z, <4 x i32> %b, <4 x i32> %y
ret <4 x i32> %u		ret <4 x i32> %u
}		}

define <4 x i32> @umul_v4i32_1(<4 x i32> %a, <4 x i32> %b) nounwind {		define <4 x i32> @umul_v4i32_1(<4 x i32> %a, <4 x i32> %b) nounwind {
; AVX-LABEL: umul_v4i32_1:		; AVX-LABEL: umul_v4i32_1:
; AVX: # %bb.0:		; AVX: # %bb.0:
; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [1,1,1,1]
; AVX-NEXT: vpmuludq %xmm3, %xmm2, %xmm2
; AVX-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
; AVX-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq		; AVX-NEXT: retq
%x = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> %a, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)		%x = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> %a, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
%y = extractvalue { <4 x i32>, <4 x i1> } %x, 0		%y = extractvalue { <4 x i32>, <4 x i1> } %x, 0
%z = extractvalue { <4 x i32>, <4 x i1> } %x, 1		%z = extractvalue { <4 x i32>, <4 x i1> } %x, 1
%u = select <4 x i1> %z, <4 x i32> %b, <4 x i32> %y		%u = select <4 x i1> %z, <4 x i32> %b, <4 x i32> %y
ret <4 x i32> %u		ret <4 x i32> %u
}		}

Show All 10 Lines	; AVX-NEXT: retq
%z = extractvalue { <4 x i32>, <4 x i1> } %x, 1		%z = extractvalue { <4 x i32>, <4 x i1> } %x, 1
%u = select <4 x i1> %z, <4 x i32> %b, <4 x i32> %y		%u = select <4 x i1> %z, <4 x i32> %b, <4 x i32> %y
ret <4 x i32> %u		ret <4 x i32> %u
}		}

define <4 x i32> @umul_v4i32_8(<4 x i32> %a, <4 x i32> %b) nounwind {		define <4 x i32> @umul_v4i32_8(<4 x i32> %a, <4 x i32> %b) nounwind {
; AVX-LABEL: umul_v4i32_8:		; AVX-LABEL: umul_v4i32_8:
; AVX: # %bb.0:		; AVX: # %bb.0:
; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]		; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm2
; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8]		; AVX-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm2
; AVX-NEXT: vpmuludq %xmm3, %xmm2, %xmm2
; AVX-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
; AVX-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
; AVX-NEXT: vpslld $3, %xmm0, %xmm0		; AVX-NEXT: vpslld $3, %xmm0, %xmm0
; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0		; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq		; AVX-NEXT: retq
%x = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> %a, <4 x i32> <i32 8, i32 8, i32 8, i32 8>)		%x = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> %a, <4 x i32> <i32 8, i32 8, i32 8, i32 8>)
%y = extractvalue { <4 x i32>, <4 x i1> } %x, 0		%y = extractvalue { <4 x i32>, <4 x i1> } %x, 0
%z = extractvalue { <4 x i32>, <4 x i1> } %x, 1		%z = extractvalue { <4 x i32>, <4 x i1> } %x, 1
%u = select <4 x i1> %z, <4 x i32> %b, <4 x i32> %y		%u = select <4 x i1> %z, <4 x i32> %b, <4 x i32> %y
ret <4 x i32> %u		ret <4 x i32> %u
}		}

define <4 x i32> @umul_v4i32_2pow31(<4 x i32> %a, <4 x i32> %b) nounwind {		define <4 x i32> @umul_v4i32_2pow31(<4 x i32> %a, <4 x i32> %b) nounwind {
; AVX-LABEL: umul_v4i32_2pow31:		; AVX-LABEL: umul_v4i32_2pow31:
; AVX: # %bb.0:		; AVX: # %bb.0:
; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]		; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm2
; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]		; AVX-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm2
; AVX-NEXT: vpmuludq %xmm3, %xmm2, %xmm2
; AVX-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
; AVX-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
; AVX-NEXT: vpslld $31, %xmm0, %xmm0		; AVX-NEXT: vpslld $31, %xmm0, %xmm0
; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0		; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq		; AVX-NEXT: retq
%x = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> %a, <4 x i32> <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>)		%x = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> %a, <4 x i32> <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>)
%y = extractvalue { <4 x i32>, <4 x i1> } %x, 0		%y = extractvalue { <4 x i32>, <4 x i1> } %x, 0
%z = extractvalue { <4 x i32>, <4 x i1> } %x, 1		%z = extractvalue { <4 x i32>, <4 x i1> } %x, 1
%u = select <4 x i1> %z, <4 x i32> %b, <4 x i32> %y		%u = select <4 x i1> %z, <4 x i32> %b, <4 x i32> %y
ret <4 x i32> %u		ret <4 x i32> %u
Show All 9 Lines	; AVX-NEXT: retq
%z = extractvalue { <4 x i32>, <4 x i1> } %x, 1		%z = extractvalue { <4 x i32>, <4 x i1> } %x, 1
%u = select <4 x i1> %z, <4 x i32> %b, <4 x i32> %y		%u = select <4 x i1> %z, <4 x i32> %b, <4 x i32> %y
ret <4 x i32> %u		ret <4 x i32> %u
}		}

define <4 x i32> @smul_v4i32_1(<4 x i32> %a, <4 x i32> %b) nounwind {		define <4 x i32> @smul_v4i32_1(<4 x i32> %a, <4 x i32> %b) nounwind {
; AVX-LABEL: smul_v4i32_1:		; AVX-LABEL: smul_v4i32_1:
; AVX: # %bb.0:		; AVX: # %bb.0:
; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [1,1,1,1]
; AVX-NEXT: vpmuldq %xmm3, %xmm2, %xmm2
; AVX-NEXT: vpmuldq %xmm3, %xmm0, %xmm3
; AVX-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
; AVX-NEXT: vpsrad $31, %xmm0, %xmm3
; AVX-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq		; AVX-NEXT: retq
%x = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> %a, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)		%x = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> %a, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
%y = extractvalue { <4 x i32>, <4 x i1> } %x, 0		%y = extractvalue { <4 x i32>, <4 x i1> } %x, 0
%z = extractvalue { <4 x i32>, <4 x i1> } %x, 1		%z = extractvalue { <4 x i32>, <4 x i1> } %x, 1
%u = select <4 x i1> %z, <4 x i32> %b, <4 x i32> %y		%u = select <4 x i1> %z, <4 x i32> %b, <4 x i32> %y
ret <4 x i32> %u		ret <4 x i32> %u
}		}

Show All 15 Lines	; AVX-NEXT: retq
%z = extractvalue { <4 x i32>, <4 x i1> } %x, 1		%z = extractvalue { <4 x i32>, <4 x i1> } %x, 1
%u = select <4 x i1> %z, <4 x i32> %b, <4 x i32> %y		%u = select <4 x i1> %z, <4 x i32> %b, <4 x i32> %y
ret <4 x i32> %u		ret <4 x i32> %u
}		}

define <4 x i32> @smul_v4i32_8(<4 x i32> %a, <4 x i32> %b) nounwind {		define <4 x i32> @smul_v4i32_8(<4 x i32> %a, <4 x i32> %b) nounwind {
; AVX-LABEL: smul_v4i32_8:		; AVX-LABEL: smul_v4i32_8:
; AVX: # %bb.0:		; AVX: # %bb.0:
; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]		; AVX-NEXT: vpslld $3, %xmm0, %xmm2
; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8]		; AVX-NEXT: vpsrad $3, %xmm2, %xmm3
; AVX-NEXT: vpmuldq %xmm3, %xmm2, %xmm2		; AVX-NEXT: vpcmpeqd %xmm0, %xmm3, %xmm0
; AVX-NEXT: vpmuldq %xmm3, %xmm0, %xmm3		; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
; AVX-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
; AVX-NEXT: vpslld $3, %xmm0, %xmm0
; AVX-NEXT: vpsrad $31, %xmm0, %xmm3
; AVX-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq		; AVX-NEXT: retq
%x = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> %a, <4 x i32> <i32 8, i32 8, i32 8, i32 8>)		%x = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> %a, <4 x i32> <i32 8, i32 8, i32 8, i32 8>)
%y = extractvalue { <4 x i32>, <4 x i1> } %x, 0		%y = extractvalue { <4 x i32>, <4 x i1> } %x, 0
%z = extractvalue { <4 x i32>, <4 x i1> } %x, 1		%z = extractvalue { <4 x i32>, <4 x i1> } %x, 1
%u = select <4 x i1> %z, <4 x i32> %b, <4 x i32> %y		%u = select <4 x i1> %z, <4 x i32> %b, <4 x i32> %y
ret <4 x i32> %u		ret <4 x i32> %u
}		}

define <4 x i32> @smul_v4i32_2pow31(<4 x i32> %a, <4 x i32> %b) nounwind {		define <4 x i32> @smul_v4i32_2pow31(<4 x i32> %a, <4 x i32> %b) nounwind {
; AVX-LABEL: smul_v4i32_2pow31:		; AVX-LABEL: smul_v4i32_2pow31:
; AVX: # %bb.0:		; AVX: # %bb.0:
; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]		; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm2
; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]		; AVX-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm2
; AVX-NEXT: vpmuldq %xmm3, %xmm2, %xmm2
; AVX-NEXT: vpmuldq %xmm3, %xmm0, %xmm3
; AVX-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
; AVX-NEXT: vpslld $31, %xmm0, %xmm0		; AVX-NEXT: vpslld $31, %xmm0, %xmm0
; AVX-NEXT: vpsrad $31, %xmm0, %xmm3
; AVX-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0		; AVX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq		; AVX-NEXT: retq
%x = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> %a, <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>)		%x = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> %a, <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>)
%y = extractvalue { <4 x i32>, <4 x i1> } %x, 0		%y = extractvalue { <4 x i32>, <4 x i1> } %x, 0
%z = extractvalue { <4 x i32>, <4 x i1> } %x, 1		%z = extractvalue { <4 x i32>, <4 x i1> } %x, 1
%u = select <4 x i1> %z, <4 x i32> %b, <4 x i32> %y		%u = select <4 x i1> %z, <4 x i32> %b, <4 x i32> %y
ret <4 x i32> %u		ret <4 x i32> %u
}		}

This is an archive of the discontinued LLVM Phabricator instance.

[SDAG][X86] Expand pow2 mulo using shifts
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 190287

llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp

llvm/trunk/test/CodeGen/ARM/umulo-32.ll

llvm/trunk/test/CodeGen/X86/mulo-pow2.ll

This is an archive of the discontinued LLVM Phabricator instance.

[SDAG][X86] Expand pow2 mulo using shiftsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 190287

llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp

llvm/trunk/test/CodeGen/ARM/umulo-32.ll

llvm/trunk/test/CodeGen/X86/mulo-pow2.ll

[SDAG][X86] Expand pow2 mulo using shifts
ClosedPublic