Index: llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -327,6 +327,7 @@ bool isOffsetMultipleOf(SDNode *N, unsigned Val) const; void transferMemOperands(SDNode *N, SDNode *Result); + MachineSDNode *flipSignBit(const SDValue &N, SDNode **SignBit = nullptr); }; } // end anonymous namespace @@ -3970,6 +3971,51 @@ cast(Result)->setMemRefs(MemOp, MemOp + 1); } +/// This method returns a node after flipping the MSB of each element +/// of vector integer type. Additionally, if SignBitVec is non-null, +/// this method sets a node with one at MSB of all elements +/// and zero at other bits in SignBitVec. +MachineSDNode * +PPCDAGToDAGISel::flipSignBit(const SDValue &N, SDNode **SignBitVec) { + SDLoc dl(N); + EVT VecVT = N.getValueType(); + if (VecVT == MVT::v4i32) { + if (SignBitVec) { + SDNode *ZV = CurDAG->getMachineNode(PPC::V_SET0, dl, MVT::v4i32); + *SignBitVec = CurDAG->getMachineNode(PPC::XVNEGSP, dl, VecVT, + SDValue(ZV, 0)); + } + return CurDAG->getMachineNode(PPC::XVNEGSP, dl, VecVT, N); + } + else if (VecVT == MVT::v8i16) { + SDNode *Hi = CurDAG->getMachineNode(PPC::LIS, dl, MVT::i32, + getI32Imm(0x8000, dl)); + SDNode *ScaImm = CurDAG->getMachineNode(PPC::ORI, dl, MVT::i32, + SDValue(Hi, 0), + getI32Imm(0x8000, dl)); + SDNode *VecImm = CurDAG->getMachineNode(PPC::MTVSRWS, dl, VecVT, + SDValue(ScaImm, 0)); + /* + Alternatively, we can do this as follow to use VRF instead of GPR. + vspltish 5, 1 + vspltish 6, 15 + vslh 5, 6, 5 + */ + if (SignBitVec) *SignBitVec = VecImm; + return CurDAG->getMachineNode(PPC::VADDUHM, dl, VecVT, N, + SDValue(VecImm, 0)); + } + else if (VecVT == MVT::v16i8) { + SDNode *VecImm = CurDAG->getMachineNode(PPC::XXSPLTIB, dl, MVT::i32, + getI32Imm(0x80, dl)); + if (SignBitVec) *SignBitVec = VecImm; + return CurDAG->getMachineNode(PPC::VADDUBM, dl, VecVT, N, + SDValue(VecImm, 0)); + } + else + llvm_unreachable("Unsupported vector data type for flipSignBit"); +} + // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. void PPCDAGToDAGISel::Select(SDNode *N) { @@ -4783,6 +4829,55 @@ return; } } + case ISD::ABS: { + assert(PPCSubTarget->hasP9Vector() && "ABS is supported with P9 Vector"); + + // For vector absolute difference, we use VABSDUW instruction of POWER9. + // Since VABSDU instructions are for unsigned integers, we need adjustment + // for signed integers. + // For abs(sub(a, b)), we generate VABSDUW(a+0x80000000, b+0x80000000). + // Otherwise, abs(sub(-1, 0)) returns 0xFFFFFFFF(=-1) instead of 1. + // For abs(a), we generate VABSDUW(a+0x80000000, 0x80000000). + EVT VecVT = N->getOperand(0).getValueType(); + SDNode *AbsOp = nullptr; + unsigned AbsOpcode; + + if (VecVT == MVT::v4i32) + AbsOpcode = PPC::VABSDUW; + else if (VecVT == MVT::v8i16) + AbsOpcode = PPC::VABSDUH; + else if (VecVT == MVT::v16i8) + AbsOpcode = PPC::VABSDUB; + else + llvm_unreachable("Unsupported vector data type for ISD::ABS"); + + // Even for signed integers, we can skip adjustment if all values are + // known to be positive (as signed integer) due to zero-extended inputs. + if (N->getOperand(0).getOpcode() == ISD::SUB && + N->getOperand(0)->getOperand(0).getOpcode() == ISD::ZERO_EXTEND && + N->getOperand(0)->getOperand(1).getOpcode() == ISD::ZERO_EXTEND) { + AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT, + SDValue(N->getOperand(0)->getOperand(0)), + SDValue(N->getOperand(0)->getOperand(1))); + ReplaceNode(N, AbsOp); + return; + } + if (N->getOperand(0).getOpcode() == ISD::SUB) { + SDValue SubVal = N->getOperand(0); + SDNode *Op0 = flipSignBit(SubVal->getOperand(0)); + SDNode *Op1 = flipSignBit(SubVal->getOperand(1)); + AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT, + SDValue(Op0, 0), SDValue(Op1, 0)); + } + else { + SDNode *Op1 = nullptr; + SDNode *Op0 = flipSignBit(N->getOperand(0), &Op1); + AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT, SDValue(Op0, 0), + SDValue(Op1, 0)); + } + ReplaceNode(N, AbsOp); + return; + } } SelectCode(N); Index: llvm/trunk/lib/Target/PowerPC/PPCInstrAltivec.td =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCInstrAltivec.td +++ llvm/trunk/lib/Target/PowerPC/PPCInstrAltivec.td @@ -1504,18 +1504,4 @@ "vabsduw $vD, $vA, $vB", IIC_VecGeneral, [(set v4i32:$vD, (int_ppc_altivec_vabsduw v4i32:$vA, v4i32:$vB))]>; -def : Pat<(v16i8:$vD (abs v16i8:$vA)), - (v16i8 (VABSDUB $vA, (v16i8 (V_SET0B))))>; -def : Pat<(v8i16:$vD (abs v8i16:$vA)), - (v8i16 (VABSDUH $vA, (v8i16 (V_SET0H))))>; -def : Pat<(v4i32:$vD (abs v4i32:$vA)), - (v4i32 (VABSDUW $vA, (v4i32 (V_SET0))))>; - -def : Pat<(v16i8:$vD (abs (sub v16i8:$vA, v16i8:$vB))), - (v16i8 (VABSDUB $vA, $vB))>; -def : Pat<(v8i16:$vD (abs (sub v8i16:$vA, v8i16:$vB))), - (v8i16 (VABSDUH $vA, $vB))>; -def : Pat<(v4i32:$vD (abs (sub v4i32:$vA, v4i32:$vB))), - (v4i32 (VABSDUW $vA, $vB))>; - } // end HasP9Altivec Index: llvm/trunk/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll +++ llvm/trunk/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll @@ -9,8 +9,10 @@ %0 = tail call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %a, <4 x i32> %sub.i) ret <4 x i32> %0 ; CHECK-LABEL: simple_absv_32 -; CHECK: vxor [[ZERO:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK-NEXT: vabsduw 2, 2, [[ZERO]] +; CHECK-DAG: vxor {{[0-9]+}}, [[REG:[0-9]+]], [[REG]] +; CHECK-DAG: xvnegsp 34, 34 +; CHECK-DAG: xvnegsp 35, {{[0-9]+}} +; CHECK-NEXT: vabsduw 2, 2, {{[0-9]+}} ; CHECK-NEXT: blr ; CHECK-PWR8-LABEL: simple_absv_32 ; CHECK-PWR8: xxlxor @@ -26,8 +28,10 @@ %0 = tail call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %sub.i, <4 x i32> %a) ret <4 x i32> %0 ; CHECK-LABEL: simple_absv_32_swap -; CHECK: vxor [[ZERO:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK-NEXT: vabsduw 2, 2, [[ZERO]] +; CHECK-DAG: vxor {{[0-9]+}}, [[REG:[0-9]+]], [[REG]] +; CHECK-DAG: xvnegsp 34, 34 +; CHECK-DAG: xvnegsp 35, {{[0-9]+}} +; CHECK-NEXT: vabsduw 2, 2, {{[0-9]+}} ; CHECK-NEXT: blr ; CHECK-PWR8-LABEL: simple_absv_32_swap ; CHECK-PWR8: xxlxor @@ -42,8 +46,9 @@ %0 = tail call <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16> %a, <8 x i16> %sub.i) ret <8 x i16> %0 ; CHECK-LABEL: simple_absv_16 -; CHECK: vxor [[ZERO:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK-NEXT: vabsduh 2, 2, [[ZERO]] +; CHECK: mtvsrws {{[0-9]+}}, {{[0-9]+}} +; CHECK-NEXT: vadduhm 2, 2, [[IMM:[0-9]+]] +; CHECK-NEXT: vabsduh 2, 2, [[IMM]] ; CHECK-NEXT: blr ; CHECK-PWR8-LABEL: simple_absv_16 ; CHECK-PWR8: xxlxor @@ -59,8 +64,9 @@ %0 = tail call <16 x i8> @llvm.ppc.altivec.vmaxsb(<16 x i8> %a, <16 x i8> %sub.i) ret <16 x i8> %0 ; CHECK-LABEL: simple_absv_8 -; CHECK: vxor [[ZERO:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK-NEXT: vabsdub 2, 2, [[ZERO]] +; CHECK: xxspltib {{[0-9]+}}, 128 +; CHECK-NEXT: vaddubm 2, 2, [[IMM:[0-9]+]] +; CHECK-NEXT: vabsdub 2, 2, [[IMM]] ; CHECK-NEXT: blr ; CHECK-PWR8-LABEL: simple_absv_8 ; CHECK-PWR8: xxlxor @@ -79,7 +85,9 @@ %3 = select <4 x i1> %1, <4 x i32> %0, <4 x i32> %2 ret <4 x i32> %3 ; CHECK-LABEL: sub_absv_32 -; CHECK: vabsduw 2, 2, 3 +; CHECK-DAG: xvnegsp 34, 34 +; CHECK-DAG: xvnegsp 35, 35 +; CHECK-NEXT: vabsduw 2, 2, 3 ; CHECK-NEXT: blr ; CHECK-PWR8-LABEL: sub_absv_32 ; CHECK-PWR8: vsubuwm