Index: lib/Target/PowerPC/PPCISelDAGToDAG.cpp =================================================================== --- lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -161,6 +161,12 @@ void PreprocessISelDAG() override; void PostprocessISelDAG() override; + /// getI8Imm - Return a target constant with the specified value, of type + /// i8. + inline SDValue getI8Imm(unsigned Imm, const SDLoc &dl) { + return CurDAG->getTargetConstant(Imm, dl, MVT::i8); + } + /// getI16Imm - Return a target constant with the specified value, of type /// i16. inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) { @@ -4783,6 +4789,83 @@ return; } } + case ISD::ABS: { + assert(PPCSubTarget->hasP9Vector() && "ABS is supported only with P9 Vector"); + + // For vector absolute difference, we use VABSDUW instruction of POWER9. + // Since VABSDU instructions are for unsigned integers, we need adjustment + // for signed integers. + // For abs(sub(a, b)), we generate VABSDUW(a+0x80000000, b+0x80000000). + // Otherwise, abs(sub(-1, 0)) returns 0xFFFFFFFF(=-1) instead of 1. + // For abs(a), we generate VABSDUW(a+0x80000000, 0x80000000). + + EVT VecVT = N->getOperand(0).getValueType(); + SDNode *ScaImm = nullptr; + SDNode *AbsOp = nullptr; + unsigned AbsOpcode, AddOpcode; + + // Even for signed integers, we can skip adjustment if all values are + // known to be positive (as signed integer) due to zero-extended inputs. + bool SkipAdjust = false; + if (N->getOperand(0).getOpcode() == ISD::SUB) { + if (N->getOperand(0)->getOperand(0).getOpcode() == ISD::ZERO_EXTEND && + N->getOperand(0)->getOperand(1).getOpcode() == ISD::ZERO_EXTEND) + SkipAdjust = true; + } + + if (VecVT == MVT::v4i32) { + AbsOpcode = PPC::VABSDUW; + AddOpcode = PPC::VADDUWM; + if (!SkipAdjust) + ScaImm = CurDAG->getMachineNode(PPC::LIS, dl, MVT::i32, + getI32Imm(0x8000, dl)); + } + else if (VecVT == MVT::v8i16) { + AbsOpcode = PPC::VABSDUH; + AddOpcode = PPC::VADDUHM; + if (!SkipAdjust) + ScaImm = CurDAG->getMachineNode(PPC::LI, dl, MVT::i16, + getI16Imm(0x8000, dl)); + } + else if (VecVT == MVT::v16i8) { + AbsOpcode = PPC::VABSDUB; + AddOpcode = PPC::VADDUBM; + if (!SkipAdjust) + ScaImm = CurDAG->getMachineNode(PPC::LI, dl, MVT::i8, + getI8Imm(0x80, dl)); + } + else + llvm_unreachable("Unsupported vector data type for ISD::ABS"); + + if (SkipAdjust) + AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT, + SDValue(N->getOperand(0)->getOperand(0)), + SDValue(N->getOperand(0)->getOperand(1))); + else { + SDNode *VecImm = CurDAG->getMachineNode(PPC::MTVSRWS, dl, VecVT, + SDValue(ScaImm, 0)); + if (N->getOperand(0).getOpcode() == ISD::SUB) { + SDValue SubVal = N->getOperand(0); + SDNode *Add0Op = CurDAG->getMachineNode(AddOpcode, dl, VecVT, + SDValue(SubVal->getOperand(0)), + SDValue(VecImm, 0)); + SDNode *Add1Op = CurDAG->getMachineNode(AddOpcode, dl, VecVT, + SDValue(SubVal->getOperand(1)), + SDValue(VecImm, 0)); + AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT, + SDValue(Add0Op, 0), SDValue(Add1Op, 0)); + } + else { + SDNode *AddOp = CurDAG->getMachineNode(AddOpcode, dl, VecVT, + SDValue(N->getOperand(0)), + SDValue(VecImm, 0)); + AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT, SDValue(AddOp, 0), + SDValue(VecImm, 0)); + } + } + ReplaceNode(N, AbsOp); + return; + } } SelectCode(N); Index: lib/Target/PowerPC/PPCInstrAltivec.td =================================================================== --- lib/Target/PowerPC/PPCInstrAltivec.td +++ lib/Target/PowerPC/PPCInstrAltivec.td @@ -1504,18 +1504,4 @@ "vabsduw $vD, $vA, $vB", IIC_VecGeneral, [(set v4i32:$vD, (int_ppc_altivec_vabsduw v4i32:$vA, v4i32:$vB))]>; -def : Pat<(v16i8:$vD (abs v16i8:$vA)), - (v16i8 (VABSDUB $vA, (v16i8 (V_SET0B))))>; -def : Pat<(v8i16:$vD (abs v8i16:$vA)), - (v8i16 (VABSDUH $vA, (v8i16 (V_SET0H))))>; -def : Pat<(v4i32:$vD (abs v4i32:$vA)), - (v4i32 (VABSDUW $vA, (v4i32 (V_SET0))))>; - -def : Pat<(v16i8:$vD (abs (sub v16i8:$vA, v16i8:$vB))), - (v16i8 (VABSDUB $vA, $vB))>; -def : Pat<(v8i16:$vD (abs (sub v8i16:$vA, v8i16:$vB))), - (v8i16 (VABSDUH $vA, $vB))>; -def : Pat<(v4i32:$vD (abs (sub v4i32:$vA, v4i32:$vB))), - (v4i32 (VABSDUW $vA, $vB))>; - } // end HasP9Altivec Index: test/CodeGen/PowerPC/ppc64-P9-vabsd.ll =================================================================== --- test/CodeGen/PowerPC/ppc64-P9-vabsd.ll +++ test/CodeGen/PowerPC/ppc64-P9-vabsd.ll @@ -9,8 +9,9 @@ %0 = tail call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %a, <4 x i32> %sub.i) ret <4 x i32> %0 ; CHECK-LABEL: simple_absv_32 -; CHECK: vxor [[ZERO:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK-NEXT: vabsduw 2, 2, [[ZERO]] +; CHECK: mtvsrws {{[0-9]+}}, {{[0-9]+}} +; CHECK-NEXT: vadduwm 2, 2, [[IMM:[0-9]+]] +; CHECK-NEXT: vabsduw 2, 2, [[IMM]] ; CHECK-NEXT: blr ; CHECK-PWR8-LABEL: simple_absv_32 ; CHECK-PWR8: xxlxor @@ -26,8 +27,9 @@ %0 = tail call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %sub.i, <4 x i32> %a) ret <4 x i32> %0 ; CHECK-LABEL: simple_absv_32_swap -; CHECK: vxor [[ZERO:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK-NEXT: vabsduw 2, 2, [[ZERO]] +; CHECK: mtvsrws {{[0-9]+}}, {{[0-9]+}} +; CHECK-NEXT: vadduwm 2, 2, [[IMM:[0-9]+]] +; CHECK-NEXT: vabsduw 2, 2, [[IMM]] ; CHECK-NEXT: blr ; CHECK-PWR8-LABEL: simple_absv_32_swap ; CHECK-PWR8: xxlxor @@ -42,8 +44,9 @@ %0 = tail call <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16> %a, <8 x i16> %sub.i) ret <8 x i16> %0 ; CHECK-LABEL: simple_absv_16 -; CHECK: vxor [[ZERO:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK-NEXT: vabsduh 2, 2, [[ZERO]] +; CHECK: mtvsrws {{[0-9]+}}, {{[0-9]+}} +; CHECK-NEXT: vadduhm 2, 2, [[IMM:[0-9]+]] +; CHECK-NEXT: vabsduh 2, 2, [[IMM]] ; CHECK-NEXT: blr ; CHECK-PWR8-LABEL: simple_absv_16 ; CHECK-PWR8: xxlxor @@ -59,8 +62,9 @@ %0 = tail call <16 x i8> @llvm.ppc.altivec.vmaxsb(<16 x i8> %a, <16 x i8> %sub.i) ret <16 x i8> %0 ; CHECK-LABEL: simple_absv_8 -; CHECK: vxor [[ZERO:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} -; CHECK-NEXT: vabsdub 2, 2, [[ZERO]] +; CHECK: mtvsrws {{[0-9]+}}, {{[0-9]+}} +; CHECK-NEXT: vaddubm 2, 2, [[IMM:[0-9]+]] +; CHECK-NEXT: vabsdub 2, 2, [[IMM]] ; CHECK-NEXT: blr ; CHECK-PWR8-LABEL: simple_absv_8 ; CHECK-PWR8: xxlxor @@ -79,7 +83,10 @@ %3 = select <4 x i1> %1, <4 x i32> %0, <4 x i32> %2 ret <4 x i32> %3 ; CHECK-LABEL: sub_absv_32 -; CHECK: vabsduw 2, 2, 3 +; CHECK: mtvsrws {{[0-9]+}}, {{[0-9]+}} +; CHECK-DAG: vadduwm 3, 3, {{[0-9]+}} +; CHECK-DAG: vadduwm 2, 2, {{[0-9]+}} +; CHECK-NEXT: vabsduw 2, 2, 3 ; CHECK-NEXT: blr ; CHECK-PWR8-LABEL: sub_absv_32 ; CHECK-PWR8: vsubuwm