Index: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h @@ -1118,6 +1118,7 @@ SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineABS(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineVSelect(SDNode *N, DAGCombinerInfo &DCI) const; /// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces /// SETCC with integer subtraction when (1) there is a legal way of doing it Index: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1085,6 +1085,7 @@ if (Subtarget.hasP9Altivec()) { setTargetDAGCombine(ISD::ABS); + setTargetDAGCombine(ISD::VSELECT); } // Darwin long double math library functions have $LDBL128 appended. @@ -13267,6 +13268,8 @@ return DAGCombineBuildVector(N, DCI); case ISD::ABS: return combineABS(N, DCI); + case ISD::VSELECT: + return combineVSelect(N, DCI); } return SDValue(); @@ -14597,3 +14600,65 @@ return SDValue(); } +// For type v4i32/v8ii16/v16i8, transform +// from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b) +// from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b) +// from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b) +// from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b) +SDValue PPCTargetLowering::combineVSelect(SDNode *N, + DAGCombinerInfo &DCI) const { + assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here"); + assert(Subtarget.hasP9Altivec() && + "Only combine this when P9 altivec supported!"); + + SelectionDAG &DAG = DCI.DAG; + SDLoc dl(N); + SDValue Cond = N->getOperand(0); + SDValue TrueOpnd = N->getOperand(1); + SDValue FalseOpnd = N->getOperand(2); + EVT VT = N->getOperand(1).getValueType(); + + if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB || + FalseOpnd.getOpcode() != ISD::SUB) + return SDValue(); + + // ABSD only available for type v4i32/v8i16/v16i8 + if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8) + return SDValue(); + + // At least to save one more dependent computation + if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse())) + return SDValue(); + + ISD::CondCode CC = cast(Cond.getOperand(2))->get(); + + // Can only handle unsigned comparison here + switch (CC) { + default: + return SDValue(); + case ISD::SETUGT: + case ISD::SETUGE: + break; + case ISD::SETULT: + case ISD::SETULE: + std::swap(TrueOpnd, FalseOpnd); + break; + } + + SDValue CmpOpnd1 = Cond.getOperand(0); + SDValue CmpOpnd2 = Cond.getOperand(1); + + // SETCC CmpOpnd1 CmpOpnd2 cond + // TrueOpnd = CmpOpnd1 - CmpOpnd2 + // FalseOpnd = CmpOpnd2 - CmpOpnd1 + if (TrueOpnd.getOperand(0) == CmpOpnd1 && + TrueOpnd.getOperand(1) == CmpOpnd2 && + FalseOpnd.getOperand(0) == CmpOpnd2 && + FalseOpnd.getOperand(1) == CmpOpnd1) { + return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(), + CmpOpnd1, CmpOpnd2, + DAG.getTargetConstant(0, dl, MVT::i32)); + } + + return SDValue(); +} Index: llvm/trunk/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll +++ llvm/trunk/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll @@ -526,9 +526,10 @@ %6 = select <4 x i1> %3, <4 x i32> %4, <4 x i32> %5 ret <4 x i32> %6 ; CHECK-LABEL: absd_int32_ugt -; CHECK: vcmpgtuw -; CHECK: xxsel -; CHECK: blr +; CHECK-NOT: vcmpgtuw +; CHECK-NOT: xxsel +; CHECK: vabsduw v2, v2, v3 +; CHECK-NEXT: blr ; CHECK-PWR8-LABEL: absd_int32_ugt ; CHECK-PWR8: vcmpgtuw ; CHECK-PWR8: xxsel @@ -542,9 +543,10 @@ %6 = select <4 x i1> %3, <4 x i32> %4, <4 x i32> %5 ret <4 x i32> %6 ; CHECK-LABEL: absd_int32_uge -; CHECK: vcmpgtuw -; CHECK: xxsel -; CHECK: blr +; CHECK-NOT: vcmpgtuw +; CHECK-NOT: xxsel +; CHECK: vabsduw v2, v2, v3 +; CHECK-NEXT: blr ; CHECK-PWR8-LABEL: absd_int32_uge ; CHECK-PWR8: vcmpgtuw ; CHECK-PWR8: xxsel @@ -558,9 +560,10 @@ %6 = select <4 x i1> %3, <4 x i32> %5, <4 x i32> %4 ret <4 x i32> %6 ; CHECK-LABEL: absd_int32_ult -; CHECK: vcmpgtuw -; CHECK: xxsel -; CHECK: blr +; CHECK-NOT: vcmpgtuw +; CHECK-NOT: xxsel +; CHECK: vabsduw v2, v2, v3 +; CHECK-NEXT: blr ; CHECK-PWR8-LABEL: absd_int32_ult ; CHECK-PWR8: vcmpgtuw ; CHECK-PWR8: xxsel @@ -574,9 +577,10 @@ %6 = select <4 x i1> %3, <4 x i32> %5, <4 x i32> %4 ret <4 x i32> %6 ; CHECK-LABEL: absd_int32_ule -; CHECK: vcmpgtuw -; CHECK: xxsel -; CHECK: blr +; CHECK-NOT: vcmpgtuw +; CHECK-NOT: xxsel +; CHECK: vabsduw v2, v2, v3 +; CHECK-NEXT: blr ; CHECK-PWR8-LABEL: absd_int32_ule ; CHECK-PWR8: vcmpgtuw ; CHECK-PWR8: xxsel @@ -590,9 +594,10 @@ %6 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> %5 ret <8 x i16> %6 ; CHECK-LABEL: absd_int16_ugt -; CHECK: vcmpgtuh -; CHECK: xxsel -; CHECK: blr +; CHECK-NOT: vcmpgtuh +; CHECK-NOT: xxsel +; CHECK: vabsduh v2, v2, v3 +; CHECK-NEXT: blr ; CHECK-PWR8-LABEL: absd_int16_ugt ; CHECK-PWR8: vcmpgtuh ; CHECK-PWR8: xxsel @@ -606,9 +611,10 @@ %6 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> %5 ret <8 x i16> %6 ; CHECK-LABEL: absd_int16_uge -; CHECK: vcmpgtuh -; CHECK: xxsel -; CHECK: blr +; CHECK-NOT: vcmpgtuh +; CHECK-NOT: xxsel +; CHECK: vabsduh v2, v2, v3 +; CHECK-NEXT: blr ; CHECK-PWR8-LABEL: absd_int16_uge ; CHECK-PWR8: vcmpgtuh ; CHECK-PWR8: xxsel @@ -622,9 +628,10 @@ %6 = select <8 x i1> %3, <8 x i16> %5, <8 x i16> %4 ret <8 x i16> %6 ; CHECK-LABEL: absd_int16_ult -; CHECK: vcmpgtuh -; CHECK: xxsel -; CHECK: blr +; CHECK-NOT: vcmpgtuh +; CHECK-NOT: xxsel +; CHECK: vabsduh v2, v2, v3 +; CHECK-NEXT: blr ; CHECK-PWR8-LABEL: absd_int16_ult ; CHECK-PWR8: vcmpgtuh ; CHECK-PWR8: xxsel @@ -638,9 +645,10 @@ %6 = select <8 x i1> %3, <8 x i16> %5, <8 x i16> %4 ret <8 x i16> %6 ; CHECK-LABEL: absd_int16_ule -; CHECK: vcmpgtuh -; CHECK: xxsel -; CHECK: blr +; CHECK-NOT: vcmpgtuh +; CHECK-NOT: xxsel +; CHECK: vabsduh v2, v2, v3 +; CHECK-NEXT: blr ; CHECK-PWR8-LABEL: absd_int16_ule ; CHECK-PWR8: vcmpgtuh ; CHECK-PWR8: xxsel @@ -654,9 +662,10 @@ %6 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> %5 ret <16 x i8> %6 ; CHECK-LABEL: absd_int8_ugt -; CHECK: vcmpgtub -; CHECK: xxsel -; CHECK: blr +; CHECK-NOT: vcmpgtub +; CHECK-NOT: xxsel +; CHECK: vabsdub v2, v2, v3 +; CHECK-NEXT: blr ; CHECK-PWR8-LABEL: absd_int8_ugt ; CHECK-PWR8: vcmpgtub ; CHECK-PWR8: xxsel @@ -670,9 +679,10 @@ %6 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> %5 ret <16 x i8> %6 ; CHECK-LABEL: absd_int8_uge -; CHECK: vcmpgtub -; CHECK: xxsel -; CHECK: blr +; CHECK-NOT: vcmpgtub +; CHECK-NOT: xxsel +; CHECK: vabsdub v2, v2, v3 +; CHECK-NEXT: blr ; CHECK-PWR8-LABEL: absd_int8_uge ; CHECK-PWR8: vcmpgtub ; CHECK-PWR8: xxsel @@ -686,9 +696,10 @@ %6 = select <16 x i1> %3, <16 x i8> %5, <16 x i8> %4 ret <16 x i8> %6 ; CHECK-LABEL: absd_int8_ult -; CHECK: vcmpgtub -; CHECK: xxsel -; CHECK: blr +; CHECK-NOT: vcmpgtub +; CHECK-NOT: xxsel +; CHECK: vabsdub v2, v2, v3 +; CHECK-NEXT: blr ; CHECK-PWR8-LABEL: absd_int8_ult ; CHECK-PWR8: vcmpgtub ; CHECK-PWR8: xxsel @@ -702,15 +713,52 @@ %6 = select <16 x i1> %3, <16 x i8> %5, <16 x i8> %4 ret <16 x i8> %6 ; CHECK-LABEL: absd_int8_ule -; CHECK: vcmpgtub -; CHECK: xxsel -; CHECK: blr +; CHECK-NOT: vcmpgtub +; CHECK-NOT: xxsel +; CHECK: vabsdub v2, v2, v3 +; CHECK-NEXT: blr ; CHECK-PWR8-LABEL: absd_int8_ule ; CHECK-PWR8: vcmpgtub ; CHECK-PWR8: xxsel ; CHECK-PWR8: blr } +; some cases we are unable to optimize +; check whether goes beyond the scope +define <4 x i32> @absd_int32_ugt_opp(<4 x i32>, <4 x i32>) { + %3 = icmp ugt <4 x i32> %0, %1 + %4 = sub <4 x i32> %0, %1 + %5 = sub <4 x i32> %1, %0 + %6 = select <4 x i1> %3, <4 x i32> %5, <4 x i32> %4 + ret <4 x i32> %6 +; CHECK-LABEL: absd_int32_ugt_opp +; CHECK-NOT: vabsduw +; CHECK: vcmpgtuw +; CHECK: xxsel +; CHECK: blr +; CHECK-PWR8-LABEL: absd_int32_ugt_opp +; CHECK-PWR8: vcmpgtuw +; CHECK-PWR8: xxsel +; CHECK-PWR8: blr +} + +define <2 x i64> @absd_int64_ugt(<2 x i64>, <2 x i64>) { + %3 = icmp ugt <2 x i64> %0, %1 + %4 = sub <2 x i64> %0, %1 + %5 = sub <2 x i64> %1, %0 + %6 = select <2 x i1> %3, <2 x i64> %4, <2 x i64> %5 + ret <2 x i64> %6 +; CHECK-LABEL: absd_int64_ugt +; CHECK-NOT: vabsduw +; CHECK: vcmpgtud +; CHECK: xxsel +; CHECK: blr +; CHECK-PWR8-LABEL: absd_int64_ugt +; CHECK-PWR8: vcmpgtud +; CHECK-PWR8: xxsel +; CHECK-PWR8: blr +} + declare <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32>, <4 x i32>) declare <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16>, <8 x i16>)