Diff 143252

lib/Target/PowerPC/PPCISelDAGToDAG.cpp

Show First 20 Lines • Show All 321 Lines • ▼ Show 20 Lines	private:
SDValue combineToCMPB(SDNode *N);		SDValue combineToCMPB(SDNode *N);
void foldBoolExts(SDValue &Res, SDNode *&N);		void foldBoolExts(SDValue &Res, SDNode *&N);

bool AllUsersSelectZero(SDNode *N);		bool AllUsersSelectZero(SDNode *N);
void SwapAllSelectUsers(SDNode *N);		void SwapAllSelectUsers(SDNode *N);

bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;		bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;
void transferMemOperands(SDNode N, SDNode Result);		void transferMemOperands(SDNode N, SDNode Result);
		MachineSDNode flipSignBit(const SDValue &N, SelectionDAG DAG,
		nemanjaiUnsubmitted Not Done Reply Inline Actions This is a member function and you're passing `CurDAG` which is a data member of the same class, right? If so, please remove the argument. Also, the `SDLoc` is easy enough to get from `N`. nemanjai: This is a member function and you're passing `CurDAG` which is a data member of the same class…
		const SDLoc &dl, SDNode **SignBitVec = nullptr);
};		};

} // end anonymous namespace		} // end anonymous namespace

/// InsertVRSaveCode - Once the entire function has been instruction selected,		/// InsertVRSaveCode - Once the entire function has been instruction selected,
/// all virtual registers are created and all machine instructions are built,		/// all virtual registers are created and all machine instructions are built,
/// check to see if we need to save/restore VRSAVE. If so, do it.		/// check to see if we need to save/restore VRSAVE. If so, do it.
void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {		void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
▲ Show 20 Lines • Show All 3,627 Lines • ▼ Show 20 Lines

void PPCDAGToDAGISel::transferMemOperands(SDNode N, SDNode Result) {		void PPCDAGToDAGISel::transferMemOperands(SDNode N, SDNode Result) {
// Transfer memoperands.		// Transfer memoperands.
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);		MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemSDNode>(N)->getMemOperand();		MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);		cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
}		}

		/// This method returns a node after flipping the MSB of each element
		/// of vector integer type. Additionally, if SignBitVec is non-null,
		/// this method returns a node with one at MSB of all elements
		nemanjaiUnsubmitted Not Done Reply Inline Actions Since you're setting the output parameter rather than returning it... s/returns/sets it to nemanjai: Since you're setting the output parameter rather than returning it... s/returns/sets it to
		/// and zero at other bits in SignBitVec.
		MachineSDNode *
		PPCDAGToDAGISel::flipSignBit(const SDValue &N, SelectionDAG *DAG,
		const SDLoc &dl, SDNode **SignBitVec) {
		EVT VecVT = N.getValueType();
		if (VecVT == MVT::v4i32) {
		if (SignBitVec) {
		SDNode *ZV = CurDAG->getMachineNode(PPC::XXSPLTIB, dl, MVT::v4i32,
		getI32Imm(0, dl));
		*SignBitVec = DAG->getMachineNode(PPC::XVNEGSP, dl, VecVT,
		SDValue(ZV, 0));
		}
		return DAG->getMachineNode(PPC::XVNEGSP, dl, VecVT, N);
		}
		else if (VecVT == MVT::v8i16) {
		nemanjaiUnsubmitted Not Done Reply Inline Actions I suppose we could do this by: vspltish 5, 1 vspltish 6, 15 vslh 5, 6, 5 As the two splats can be done in parallel. But this might very well be worse since it uses an extra vector register and due to the dispatch rules for vector operations. Up to you of course. nemanjai: I suppose we could do this by: ``` vspltish 5, 1 vspltish 6, 15 vslh 5, 6, 5 ``` As the two…
		SDNode *Hi = DAG->getMachineNode(PPC::LIS, dl, MVT::i32,
		getI32Imm(0x8000, dl));
		SDNode *ScaImm = DAG->getMachineNode(PPC::ORI, dl, MVT::i32,
		SDValue(Hi, 0),
		getI32Imm(0x8000, dl));
		SDNode *VecImm = DAG->getMachineNode(PPC::MTVSRWS, dl, VecVT,
		SDValue(ScaImm, 0));
		if (SignBitVec) *SignBitVec = VecImm;
		return DAG->getMachineNode(PPC::VADDUHM, dl, VecVT, N, SDValue(VecImm, 0));
		}
		else if (VecVT == MVT::v16i8) {
		SDNode *VecImm = DAG->getMachineNode(PPC::XXSPLTIB, dl, MVT::i32,
		getI32Imm(0x80, dl));
		if (SignBitVec) *SignBitVec = VecImm;
		return DAG->getMachineNode(PPC::VADDUBM, dl, VecVT, N, SDValue(VecImm, 0));
		}
		else
		llvm_unreachable("Unsupported vector data type for flipSignBit");
		}

// Select - Convert the specified operand from a target-independent to a		// Select - Convert the specified operand from a target-independent to a
// target-specific node if it hasn't already been changed.		// target-specific node if it hasn't already been changed.
void PPCDAGToDAGISel::Select(SDNode *N) {		void PPCDAGToDAGISel::Select(SDNode *N) {
SDLoc dl(N);		SDLoc dl(N);
if (N->isMachineOpcode()) {		if (N->isMachineOpcode()) {
N->setNodeId(-1);		N->setNodeId(-1);
return; // Already selected.		return; // Already selected.
}		}
▲ Show 20 Lines • Show All 797 Lines • ▼ Show 20 Lines	if ((Elt & 1) == 0) {
SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);		SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
EltVal = getI32Imm(-16, dl);		EltVal = getI32Imm(-16, dl);
SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);		SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),		ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
SDValue(Tmp2, 0)));		SDValue(Tmp2, 0)));
return;		return;
}		}
}		}
		case ISD::ABS: {
		assert(PPCSubTarget->hasP9Vector() && "ABS is supported with P9 Vector");

		// For vector absolute difference, we use VABSDUW instruction of POWER9.
		// Since VABSDU instructions are for unsigned integers, we need adjustment
		// for signed integers.
		// For abs(sub(a, b)), we generate VABSDUW(a+0x80000000, b+0x80000000).
		// Otherwise, abs(sub(-1, 0)) returns 0xFFFFFFFF(=-1) instead of 1.
		// For abs(a), we generate VABSDUW(a+0x80000000, 0x80000000).
		EVT VecVT = N->getOperand(0).getValueType();
		SDNode *AbsOp = nullptr;
		unsigned AbsOpcode;

		// Even for signed integers, we can skip adjustment if all values are
		// known to be positive (as signed integer) due to zero-extended inputs.
		bool SkipAdjust = false;
		if (N->getOperand(0).getOpcode() == ISD::SUB &&
		nemanjaiUnsubmitted Not Done Reply Inline Actions Can you just move this condition below where you set the opcodes and then we don't need `SkipAdjust` (since we now use it in only one place)? nemanjai: Can you just move this condition below where you set the opcodes and then we don't need…
		N->getOperand(0)->getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
		N->getOperand(0)->getOperand(1).getOpcode() == ISD::ZERO_EXTEND)
		SkipAdjust = true;
		nemanjaiUnsubmitted Not Done Reply Inline Actions Shouldn't this check `ZERO_EXTEND_VECTOR_INREG` as well? Or is that a node we can't have this late? nemanjai: Shouldn't this check `ZERO_EXTEND_VECTOR_INREG` as well? Or is that a node we can't have this…
		inouehrsAuthorUnsubmitted Not Done Reply Inline Actions In my understanding `ZERO_EXTEND_VECTOR_INREG` is created in the legalize phase, while this code is for the initial selection phase. So I think this code will not find `ZERO_EXTEND_VECTOR_INREG` node here. inouehrs: In my understanding `ZERO_EXTEND_VECTOR_INREG` is created in the legalize phase, while this…

		if (VecVT == MVT::v4i32)
		AbsOpcode = PPC::VABSDUW;
		else if (VecVT == MVT::v8i16)
		AbsOpcode = PPC::VABSDUH;
		nemanjaiUnsubmitted Not Done Reply Inline Actions It seems that for the `v4i32` type, we should be able to just use `xvnegsp` rather than loading the immediate, moving and adding. nemanjai: It seems that for the `v4i32` type, we should be able to just use `xvnegsp` rather than loading…
		inouehrsAuthorUnsubmitted Not Done Reply Inline Actions Do you know it is safe to use a floating point instruction for integer data if the bit pattern is for NaN of Inf? inouehrs: Do you know it is safe to use a floating point instruction for integer data if the bit pattern…
		nemanjaiUnsubmitted Not Done Reply Inline Actions I think it's OK according to the ISA since it doesn't modify any special registers or do anything special for NaN/Inf. The description just says that it copies the contents with the high bit of each word element complemented, so I think this is just a bitwise operation rather than a vector fp operation. nemanjai: I think it's OK according to the ISA since it doesn't modify any special registers or do…
		inouehrsAuthorUnsubmitted Not Done Reply Inline Actions As far as I tested, it works at least on POWER9. inouehrs: As far as I tested, it works at least on POWER9.
		else if (VecVT == MVT::v16i8)
		AbsOpcode = PPC::VABSDUB;
		else
		llvm_unreachable("Unsupported vector data type for ISD::ABS");

		if (SkipAdjust)
		nemanjaiUnsubmitted Not Done Reply Inline Actions Can you please return here so the remaining code does not need to be nested into an `else`? nemanjai: Can you please return here so the remaining code does not need to be nested into an `else`?
		AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT,
		SDValue(N->getOperand(0)->getOperand(0)),
		SDValue(N->getOperand(0)->getOperand(1)));
		else {
		if (N->getOperand(0).getOpcode() == ISD::SUB) {
		SDValue SubVal = N->getOperand(0);
		SDNode *Op0 = flipSignBit(SubVal->getOperand(0), CurDAG, dl);
		SDNode *Op1 = flipSignBit(SubVal->getOperand(1), CurDAG, dl);
		AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT,
		SDValue(Op0, 0), SDValue(Op1, 0));
		}
		else {
		nemanjaiUnsubmitted Not Done Reply Inline Actions We should just be able to do something like: xxspltib 35, 128 # Mask vxor 0, 3, 2 # Flip sign vabsduh ... # The actual absdiff nemanjai: We should just be able to do something like: ``` xxspltib 35, 128 # Mask vxor 0, 3, 2 # Flip…
		inouehrsAuthorUnsubmitted Not Done Reply Inline Actions Good catch. I will update to use `xxspltib`. VSX splat immediate supports 8-bit immediate while older VMX splat immediate supports only 5 bits. inouehrs: Good catch. I will update to use `xxspltib`. VSX splat immediate supports 8-bit immediate while…
		SDNode *Op1 = nullptr;
		SDNode *Op0 = flipSignBit(N->getOperand(0), CurDAG, dl, &Op1);
		AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT, SDValue(Op0, 0),
		SDValue(Op1, 0));
		}
		}
		ReplaceNode(N, AbsOp);
		return;
		}
}		}

SelectCode(N);		SelectCode(N);
}		}

// If the target supports the cmpb instruction, do the idiom recognition here.		// If the target supports the cmpb instruction, do the idiom recognition here.
// We don't do this as a DAG combine because we don't want to do it as nodes		// We don't do this as a DAG combine because we don't want to do it as nodes
// are being combined (because we might miss part of the eventual idiom). We		// are being combined (because we might miss part of the eventual idiom). We
▲ Show 20 Lines • Show All 1,345 Lines • Show Last 20 Lines

lib/Target/PowerPC/PPCInstrAltivec.td

Show First 20 Lines • Show All 1,498 Lines • ▼ Show 20 Lines	def VABSDUB : VXForm_1<1027, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
[(set v16i8:$vD, (int_ppc_altivec_vabsdub v16i8:$vA, v16i8:$vB))]>;		[(set v16i8:$vD, (int_ppc_altivec_vabsdub v16i8:$vA, v16i8:$vB))]>;
def VABSDUH : VXForm_1<1091, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),		def VABSDUH : VXForm_1<1091, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vabsduh $vD, $vA, $vB", IIC_VecGeneral,		"vabsduh $vD, $vA, $vB", IIC_VecGeneral,
[(set v8i16:$vD, (int_ppc_altivec_vabsduh v8i16:$vA, v8i16:$vB))]>;		[(set v8i16:$vD, (int_ppc_altivec_vabsduh v8i16:$vA, v8i16:$vB))]>;
def VABSDUW : VXForm_1<1155, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),		def VABSDUW : VXForm_1<1155, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vabsduw $vD, $vA, $vB", IIC_VecGeneral,		"vabsduw $vD, $vA, $vB", IIC_VecGeneral,
[(set v4i32:$vD, (int_ppc_altivec_vabsduw v4i32:$vA, v4i32:$vB))]>;		[(set v4i32:$vD, (int_ppc_altivec_vabsduw v4i32:$vA, v4i32:$vB))]>;

def : Pat<(v16i8:$vD (abs v16i8:$vA)),
(v16i8 (VABSDUB $vA, (v16i8 (V_SET0B))))>;
def : Pat<(v8i16:$vD (abs v8i16:$vA)),
(v8i16 (VABSDUH $vA, (v8i16 (V_SET0H))))>;
def : Pat<(v4i32:$vD (abs v4i32:$vA)),
(v4i32 (VABSDUW $vA, (v4i32 (V_SET0))))>;

def : Pat<(v16i8:$vD (abs (sub v16i8:$vA, v16i8:$vB))),
(v16i8 (VABSDUB $vA, $vB))>;
def : Pat<(v8i16:$vD (abs (sub v8i16:$vA, v8i16:$vB))),
(v8i16 (VABSDUH $vA, $vB))>;
def : Pat<(v4i32:$vD (abs (sub v4i32:$vA, v4i32:$vB))),
(v4i32 (VABSDUW $vA, $vB))>;

} // end HasP9Altivec		} // end HasP9Altivec

test/CodeGen/PowerPC/ppc64-P9-vabsd.ll

	; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -verify-machineinstrs \| FileCheck %s			; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -verify-machineinstrs \| FileCheck %s
	; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr9 -verify-machineinstrs \| FileCheck %s			; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr9 -verify-machineinstrs \| FileCheck %s
	; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -verify-machineinstrs \| FileCheck %s -check-prefix=CHECK-PWR8 -implicit-check-not vabsdu			; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -verify-machineinstrs \| FileCheck %s -check-prefix=CHECK-PWR8 -implicit-check-not vabsdu

	; Function Attrs: nounwind readnone			; Function Attrs: nounwind readnone
	define <4 x i32> @simple_absv_32(<4 x i32> %a) local_unnamed_addr {			define <4 x i32> @simple_absv_32(<4 x i32> %a) local_unnamed_addr {
	entry:			entry:
	%sub.i = sub <4 x i32> zeroinitializer, %a			%sub.i = sub <4 x i32> zeroinitializer, %a
	%0 = tail call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %a, <4 x i32> %sub.i)			%0 = tail call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %a, <4 x i32> %sub.i)
	ret <4 x i32> %0			ret <4 x i32> %0
	; CHECK-LABEL: simple_absv_32			; CHECK-LABEL: simple_absv_32
	; CHECK: vxor [[ZERO:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}			; CHECK-DAG: xxspltib [[ZERO:[0-9]+]], [[ZERO]]
	; CHECK-NEXT: vabsduw 2, 2, [[ZERO]]			; CHECK-DAG: xvnegsp 34, 34
				; CHECK-DAG: xvnegsp 35, [[ZERO]]
				; CHECK-NEXT: vabsduw 2, 2, {{[0-9]+}}
	; CHECK-NEXT: blr			; CHECK-NEXT: blr
	; CHECK-PWR8-LABEL: simple_absv_32			; CHECK-PWR8-LABEL: simple_absv_32
	; CHECK-PWR8: xxlxor			; CHECK-PWR8: xxlxor
	; CHECK-PWR8: vsubuwm			; CHECK-PWR8: vsubuwm
	; CHECK-PWR8: vmaxsw			; CHECK-PWR8: vmaxsw
	; CHECK-PWR8: blr			; CHECK-PWR8: blr
	}			}

	; Function Attrs: nounwind readnone			; Function Attrs: nounwind readnone
	define <4 x i32> @simple_absv_32_swap(<4 x i32> %a) local_unnamed_addr {			define <4 x i32> @simple_absv_32_swap(<4 x i32> %a) local_unnamed_addr {
	entry:			entry:
	%sub.i = sub <4 x i32> zeroinitializer, %a			%sub.i = sub <4 x i32> zeroinitializer, %a
	%0 = tail call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %sub.i, <4 x i32> %a)			%0 = tail call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %sub.i, <4 x i32> %a)
	ret <4 x i32> %0			ret <4 x i32> %0
	; CHECK-LABEL: simple_absv_32_swap			; CHECK-LABEL: simple_absv_32_swap
	; CHECK: vxor [[ZERO:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}			; CHECK-DAG: xxspltib [[ZERO:[0-9]+]], [[ZERO]]
	; CHECK-NEXT: vabsduw 2, 2, [[ZERO]]			; CHECK-DAG: xvnegsp 34, 34
				; CHECK-DAG: xvnegsp 35, [[ZERO]]
				; CHECK-NEXT: vabsduw 2, 2, {{[0-9]+}}
	; CHECK-NEXT: blr			; CHECK-NEXT: blr
	; CHECK-PWR8-LABEL: simple_absv_32_swap			; CHECK-PWR8-LABEL: simple_absv_32_swap
	; CHECK-PWR8: xxlxor			; CHECK-PWR8: xxlxor
	; CHECK-PWR8: vsubuwm			; CHECK-PWR8: vsubuwm
	; CHECK-PWR8: vmaxsw			; CHECK-PWR8: vmaxsw
	; CHECK-PWR8: blr			; CHECK-PWR8: blr
	}			}

	define <8 x i16> @simple_absv_16(<8 x i16> %a) local_unnamed_addr {			define <8 x i16> @simple_absv_16(<8 x i16> %a) local_unnamed_addr {
	entry:			entry:
	%sub.i = sub <8 x i16> zeroinitializer, %a			%sub.i = sub <8 x i16> zeroinitializer, %a
	%0 = tail call <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16> %a, <8 x i16> %sub.i)			%0 = tail call <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16> %a, <8 x i16> %sub.i)
	ret <8 x i16> %0			ret <8 x i16> %0
	; CHECK-LABEL: simple_absv_16			; CHECK-LABEL: simple_absv_16
	; CHECK: vxor [[ZERO:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}			; CHECK: mtvsrws {{[0-9]+}}, {{[0-9]+}}
	; CHECK-NEXT: vabsduh 2, 2, [[ZERO]]			; CHECK-NEXT: vadduhm 2, 2, [[IMM:[0-9]+]]
				; CHECK-NEXT: vabsduh 2, 2, [[IMM]]
	; CHECK-NEXT: blr			; CHECK-NEXT: blr
	; CHECK-PWR8-LABEL: simple_absv_16			; CHECK-PWR8-LABEL: simple_absv_16
	; CHECK-PWR8: xxlxor			; CHECK-PWR8: xxlxor
	; CHECK-PWR8: vsubuhm			; CHECK-PWR8: vsubuhm
	; CHECK-PWR8: vmaxsh			; CHECK-PWR8: vmaxsh
	; CHECK-PWR8: blr			; CHECK-PWR8: blr
	}			}

	; Function Attrs: nounwind readnone			; Function Attrs: nounwind readnone
	define <16 x i8> @simple_absv_8(<16 x i8> %a) local_unnamed_addr {			define <16 x i8> @simple_absv_8(<16 x i8> %a) local_unnamed_addr {
	entry:			entry:
	%sub.i = sub <16 x i8> zeroinitializer, %a			%sub.i = sub <16 x i8> zeroinitializer, %a
	%0 = tail call <16 x i8> @llvm.ppc.altivec.vmaxsb(<16 x i8> %a, <16 x i8> %sub.i)			%0 = tail call <16 x i8> @llvm.ppc.altivec.vmaxsb(<16 x i8> %a, <16 x i8> %sub.i)
	ret <16 x i8> %0			ret <16 x i8> %0
	; CHECK-LABEL: simple_absv_8			; CHECK-LABEL: simple_absv_8
	; CHECK: vxor [[ZERO:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}			; CHECK: xxspltib {{[0-9]+}}, 128
	; CHECK-NEXT: vabsdub 2, 2, [[ZERO]]			; CHECK-NEXT: vaddubm 2, 2, [[IMM:[0-9]+]]
				; CHECK-NEXT: vabsdub 2, 2, [[IMM]]
	; CHECK-NEXT: blr			; CHECK-NEXT: blr
	; CHECK-PWR8-LABEL: simple_absv_8			; CHECK-PWR8-LABEL: simple_absv_8
	; CHECK-PWR8: xxlxor			; CHECK-PWR8: xxlxor
	; CHECK-PWR8: vsububm			; CHECK-PWR8: vsububm
	; CHECK-PWR8: vmaxsb			; CHECK-PWR8: vmaxsb
	; CHECK-PWR8: blr			; CHECK-PWR8: blr
	}			}

	; The select pattern can only be detected for v4i32.			; The select pattern can only be detected for v4i32.
	; Function Attrs: norecurse nounwind readnone			; Function Attrs: norecurse nounwind readnone
	define <4 x i32> @sub_absv_32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr {			define <4 x i32> @sub_absv_32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr {
	entry:			entry:
	%0 = sub nsw <4 x i32> %a, %b			%0 = sub nsw <4 x i32> %a, %b
	%1 = icmp sgt <4 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1>			%1 = icmp sgt <4 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1>
	%2 = sub <4 x i32> zeroinitializer, %0			%2 = sub <4 x i32> zeroinitializer, %0
	%3 = select <4 x i1> %1, <4 x i32> %0, <4 x i32> %2			%3 = select <4 x i1> %1, <4 x i32> %0, <4 x i32> %2
	ret <4 x i32> %3			ret <4 x i32> %3
	; CHECK-LABEL: sub_absv_32			; CHECK-LABEL: sub_absv_32
	; CHECK: vabsduw 2, 2, 3			; CHECK-DAG: xvnegsp 34, 34
				; CHECK-DAG: xvnegsp 35, 35
				; CHECK-NEXT: vabsduw 2, 2, 3
	; CHECK-NEXT: blr			; CHECK-NEXT: blr
	; CHECK-PWR8-LABEL: sub_absv_32			; CHECK-PWR8-LABEL: sub_absv_32
	; CHECK-PWR8: vsubuwm			; CHECK-PWR8: vsubuwm
	; CHECK-PWR8: xxlxor			; CHECK-PWR8: xxlxor
	; CHECK-PWR8: blr			; CHECK-PWR8: blr
	}			}

	; FIXME: This does not produce the ISD::ABS that we are looking for.			; FIXME: This does not produce the ISD::ABS that we are looking for.
	▲ Show 20 Lines • Show All 268 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[PowerPC] fix incorrect vectorization of abs() on POWER9
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 143252

lib/Target/PowerPC/PPCISelDAGToDAG.cpp

lib/Target/PowerPC/PPCInstrAltivec.td

test/CodeGen/PowerPC/ppc64-P9-vabsd.ll

This is an archive of the discontinued LLVM Phabricator instance.

[PowerPC] fix incorrect vectorization of abs() on POWER9ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 143252

lib/Target/PowerPC/PPCISelDAGToDAG.cpp

lib/Target/PowerPC/PPCInstrAltivec.td

test/CodeGen/PowerPC/ppc64-P9-vabsd.ll

[PowerPC] fix incorrect vectorization of abs() on POWER9
ClosedPublic