Index: include/llvm/CodeGen/TargetLowering.h =================================================================== --- include/llvm/CodeGen/TargetLowering.h +++ include/llvm/CodeGen/TargetLowering.h @@ -3699,6 +3699,14 @@ /// \returns True, if the expansion was successful, false otherwise bool expandCTTZ(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; + /// Expand ABS nodes. Expands vector/scalar ABS nodes, + /// vector nodes can only succeed if all operations are legal/custom. + /// (ABS x) -> (XOR (ADD x, (SRA x, type_size)), (SRA x, type_size)) + /// \param N Node to expand + /// \param Result output after conversion + /// \returns True, if the expansion was successful, false otherwise + bool expandABS(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; + /// Turn load of vector type into a load of the individual elements. /// \param LD load to expand /// \returns MERGE_VALUEs of the scalar loads with their chains. Index: lib/Analysis/ValueTracking.cpp =================================================================== --- lib/Analysis/ValueTracking.cpp +++ lib/Analysis/ValueTracking.cpp @@ -4853,12 +4853,12 @@ // (X >s 0) ? X : -X or (X >s -1) ? X : -X --> ABS(X) // (-X >s 0) ? -X : X or (-X >s -1) ? -X : X --> ABS(X) - if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, ZeroOrAllOnes)) + if ((Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) && match(CmpRHS, ZeroOrAllOnes)) return {SPF_ABS, SPNB_NA, false}; // (X NABS(X) // (-X NABS(X) - if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, ZeroOrOne)) + if ((Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) && match(CmpRHS, ZeroOrOne)) return {SPF_NABS, SPNB_NA, false}; } else if (match(FalseVal, MaybeSExtCmpLHS)) { Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -18365,38 +18365,6 @@ } } - // Check to see if this is an integer abs. - // select_cc setg[te] X, 0, X, -X -> - // select_cc setgt X, -1, X, -X -> - // select_cc setl[te] X, 0, -X, X -> - // select_cc setlt X, 1, -X, X -> - // Y = sra (X, size(X)-1); xor (add (X, Y), Y) - if (N1C) { - ConstantSDNode *SubC = nullptr; - if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) || - (N1C->isAllOnesValue() && CC == ISD::SETGT)) && - N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) - SubC = dyn_cast(N3.getOperand(0)); - else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) || - (N1C->isOne() && CC == ISD::SETLT)) && - N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1)) - SubC = dyn_cast(N2.getOperand(0)); - - EVT XType = N0.getValueType(); - if (SubC && SubC->isNullValue() && XType.isInteger()) { - SDLoc DL(N0); - SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, - N0, - DAG.getConstant(XType.getSizeInBits() - 1, DL, - getShiftAmountTy(N0.getValueType()))); - SDValue Add = DAG.getNode(ISD::ADD, DL, - XType, N0, Shift); - AddToWorklist(Shift.getNode()); - AddToWorklist(Add.getNode()); - return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); - } - } - // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X) // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X) // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X) Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2630,6 +2630,10 @@ SDValue Tmp1, Tmp2, Tmp3, Tmp4; bool NeedInvert; switch (Node->getOpcode()) { + case ISD::ABS: + if (TLI.expandABS(Node, Tmp1, DAG)) + Results.push_back(Tmp1); + break; case ISD::CTPOP: if (TLI.expandCTPOP(Node, Tmp1, DAG)) Results.push_back(Tmp1); Index: lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -145,6 +145,7 @@ case ISD::UADDSAT: case ISD::SSUBSAT: case ISD::USUBSAT: Res = PromoteIntRes_ADDSUBSAT(N); break; + case ISD::ABS: Res = PromoteIntRes_ABS(N); break; case ISD::ATOMIC_LOAD: Res = PromoteIntRes_Atomic0(cast(N)); break; @@ -854,6 +855,11 @@ return SDValue(Res.getNode(), 0); } +SDValue DAGTypeLegalizer::PromoteIntRes_ABS(SDNode *N) { + SDValue Op0 = SExtPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::ABS, SDLoc(N), Op0.getValueType(), Op0); +} + SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { // Promote the overflow bit trivially. if (ResNo == 1) @@ -1426,6 +1432,7 @@ case ISD::BITREVERSE: ExpandIntRes_BITREVERSE(N, Lo, Hi); break; case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break; case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break; + case ISD::ABS: ExpandIntRes_ABS(N, Lo, Hi); break; case ISD::CTLZ_ZERO_UNDEF: case ISD::CTLZ: ExpandIntRes_CTLZ(N, Lo, Hi); break; case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break; @@ -2175,6 +2182,25 @@ IsOpaque); } +void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) { + SDLoc dl(N); + + // abs(HiLo) -> (Hi < 0 ? -HiLo : HiLo) + EVT VT = N->getValueType(0); + SDValue N0 = N->getOperand(0); + SDValue Neg = DAG.getNode(ISD::SUB, dl, VT, + DAG.getConstant(0, dl, VT), N0); + SDValue NegLo, NegHi; + SplitInteger(Neg, NegLo, NegHi); + + GetExpandedInteger(N0, Lo, Hi); + EVT NVT = Lo.getValueType(); + SDValue HiIsNeg = DAG.getSetCC(dl, getSetCCResultType(NVT), + DAG.getConstant(0, dl, NVT), Hi, ISD::SETGT); + Lo = DAG.getSelect(dl, NVT, HiIsNeg, NegLo, Lo); + Hi = DAG.getSelect(dl, NVT, HiIsNeg, NegHi, Hi); +} + void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); Index: lib/CodeGen/SelectionDAG/LegalizeTypes.h =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -331,6 +331,7 @@ SDValue PromoteIntRes_VAARG(SDNode *N); SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo); SDValue PromoteIntRes_ADDSUBSAT(SDNode *N); + SDValue PromoteIntRes_ABS(SDNode *N); // Integer Operand Promotion. bool PromoteIntegerOperand(SDNode *N, unsigned OpNo); @@ -383,6 +384,7 @@ void ExpandIntRes_AssertSext (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_Constant (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ABS (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_CTLZ (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi); Index: lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -117,6 +117,12 @@ /// the remaining lanes, finally bitcasting to the proper type. SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op); + /// Implement expand-based legalization of ABS vector operations. + /// If following expanding is legal then do it: + /// (ABS x) --> (XOR (ADD x, (SRA x, sizeof(x)-1)), (SRA x, sizeof(x)-1)) + /// else unroll the operation. + SDValue ExpandABS(SDValue Op); + /// Expand bswap of vectors into a shuffle if legal. SDValue ExpandBSWAP(SDValue Op); @@ -329,6 +335,7 @@ case ISD::AND: case ISD::OR: case ISD::XOR: + case ISD::ABS: case ISD::SHL: case ISD::SRA: case ISD::SRL: @@ -706,6 +713,8 @@ return ExpandSIGN_EXTEND_VECTOR_INREG(Op); case ISD::ZERO_EXTEND_VECTOR_INREG: return ExpandZERO_EXTEND_VECTOR_INREG(Op); + case ISD::ABS: + return ExpandABS(Op); case ISD::BSWAP: return ExpandBSWAP(Op); case ISD::VSELECT: @@ -915,6 +924,15 @@ ShuffleMask.push_back((I * ScalarSizeInBytes) + J); } +SDValue VectorLegalizer::ExpandABS(SDValue Op) { + SDValue Result; + if (TLI.expandABS(Op.getNode(), Result, DAG)) + return Result; + + // Otherwise go ahead and unroll. + return DAG.UnrollVectorOp(Op.getNode()); +} + SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) { EVT VT = Op.getValueType(); Index: lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -738,6 +738,7 @@ SplitVecRes_ExtVecInRegOp(N, Lo, Hi); break; + case ISD::ABS: case ISD::BITREVERSE: case ISD::BSWAP: case ISD::CTLZ: Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2943,6 +2943,8 @@ ISD::NodeType OpCode = Cond.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT; + bool IsUnaryAbs = false; + // Min/max matching is only viable if all output VTs are the same. if (is_splat(ValueVTs)) { EVT VT = ValueVTs[0]; @@ -3003,10 +3005,16 @@ break; } break; + case SPF_ABS: + IsUnaryAbs = true; + Opc = ISD::ABS; + break; + case SPF_NABS: + // TODO: we need to produce sub(0, abs(X)). default: break; } - if (Opc != ISD::DELETED_NODE && + if (!IsUnaryAbs && Opc != ISD::DELETED_NODE && (TLI.isOperationLegalOrCustom(Opc, VT) || (UseScalarMinMax && TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) && @@ -3019,15 +3027,30 @@ RHSVal = getValue(RHS); BaseOps = {}; } + + if (IsUnaryAbs) { + OpCode = Opc; + LHSVal = getValue(LHS); + BaseOps = {}; + } } - for (unsigned i = 0; i != NumValues; ++i) { - SmallVector Ops(BaseOps.begin(), BaseOps.end()); - Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i)); - Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i)); - Values[i] = DAG.getNode(OpCode, getCurSDLoc(), - LHSVal.getNode()->getValueType(LHSVal.getResNo()+i), - Ops); + if (IsUnaryAbs) { + for (unsigned i = 0; i != NumValues; ++i) { + Values[i] = + DAG.getNode(OpCode, getCurSDLoc(), + LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), + SDValue(LHSVal.getNode(), LHSVal.getResNo() + i)); + } + } else { + for (unsigned i = 0; i != NumValues; ++i) { + SmallVector Ops(BaseOps.begin(), BaseOps.end()); + Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i)); + Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i)); + Values[i] = DAG.getNode( + OpCode, getCurSDLoc(), + LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), Ops); + } } setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -4444,6 +4444,23 @@ return true; } +bool TargetLowering::expandABS(SDNode *N, SDValue &Result, SelectionDAG &DAG) const { + SDLoc dl(N); + EVT VT = N->getValueType(0); + SDValue Op = N->getOperand(0); + + if (!isOperationLegalOrCustom(ISD::SRA, VT) || + !isOperationLegalOrCustom(ISD::ADD, VT) || + !isOperationLegalOrCustom(ISD::XOR, VT)) + return false; + + SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, Op, + DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, VT)); + SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift); + Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift); + return true; +} + SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const { SDLoc SL(LD); Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -156,6 +156,10 @@ addQRTypeForNEON(MVT::v4i32); addQRTypeForNEON(MVT::v2i64); addQRTypeForNEON(MVT::v8f16); + + // The AArch64 SIMD extension supports the scalar variant + // of the ABS instruction. + setOperationAction(ISD::ABS, MVT::i64, Legal); } // Compute derived properties from the register classes Index: lib/Target/ARM/ARMISelLowering.h =================================================================== --- lib/Target/ARM/ARMISelLowering.h +++ lib/Target/ARM/ARMISelLowering.h @@ -690,6 +690,8 @@ SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; + void lowerABS(SDNode *N, SmallVectorImpl &Results, + SelectionDAG &DAG) const; unsigned getRegisterByName(const char* RegName, EVT VT, SelectionDAG &DAG) const override; Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -1203,6 +1203,13 @@ setPrefLoopAlignment(Subtarget->getPrefLoopAlignment()); setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); + + if (Subtarget->isThumb() || Subtarget->isThumb2()) { + /*setOperationAction(ISD::ABS, MVT::i64, Custom);*/ +/* for (auto &VT : {MVT::i8, MVT::i64}) + setOperationAction(ISD::ABS, VT, Legal);*/ + setTargetDAGCombine(ISD::ABS); + } } bool ARMTargetLowering::useSoftFloat() const { @@ -8129,6 +8136,10 @@ return; case ISD::INTRINSIC_WO_CHAIN: return ReplaceLongIntrinsic(N, Results, DAG); + case ISD::ABS: + lowerABS(N, Results, DAG); + return ; + } if (Res.getNode()) Results.push_back(Res); @@ -10282,6 +10293,22 @@ return SDValue(); } +static SDValue PerformABSCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + EVT VT = N->getValueType(0); + SDValue N0 = N->getOperand(0); + SelectionDAG &DAG = DCI.DAG; + SDLoc DL(N); + + SDValue Shift = + DAG.getNode(ISD::SRA, DL, VT, N0, + DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT)); + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Shift); + + return DAG.getNode(ISD::XOR, DL, VT, Add, Shift); +} + /// PerformADDECombine - Target-specific dag combine transform from /// ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or /// ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL @@ -12691,6 +12718,7 @@ DAGCombinerInfo &DCI) const { switch (N->getOpcode()) { default: break; + case ISD::ABS: return PerformABSCombine(N, DCI, Subtarget); case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget); case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget); case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget); @@ -14135,6 +14163,39 @@ SDLoc(Op)).first; } +void ARMTargetLowering::lowerABS(SDNode *N, SmallVectorImpl &Results, + SelectionDAG &DAG) const { + EVT VT = N->getValueType(0); + assert(VT == MVT::i64 && "Unexpected type (!= i64) on ABS."); + MVT HalfT = MVT::i32; + SDLoc dl(N); + SDValue Hi, Lo, Tmp; + + if (!isOperationLegalOrCustom(ISD::ADDCARRY, HalfT) || + !isOperationLegalOrCustom(ISD::UADDO, HalfT)) + return ; + + unsigned OpTypeBits = HalfT.getScalarSizeInBits(); + SDVTList VTList = DAG.getVTList(HalfT, MVT::i1); + + Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0), + DAG.getConstant(0, dl, HalfT)); + Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0), + DAG.getConstant(1, dl, HalfT)); + + Tmp = DAG.getNode(ISD::SRA, dl, HalfT, Hi, + DAG.getConstant(OpTypeBits - 1, dl, + getShiftAmountTy(HalfT, DAG.getDataLayout()))); + Lo = DAG.getNode(ISD::UADDO, dl, VTList, Tmp, Lo); + Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Tmp, Hi, + SDValue(Lo.getNode(), 1)); + Hi = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Hi); + Lo = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Lo); + + Results.push_back(Lo); + Results.push_back(Hi); +} + bool ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The ARM target isn't yet aware of offsets. Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -192,9 +192,8 @@ if (Subtarget.hasCMov()) { setOperationAction(ISD::ABS , MVT::i16 , Custom); setOperationAction(ISD::ABS , MVT::i32 , Custom); - if (Subtarget.is64Bit()) - setOperationAction(ISD::ABS , MVT::i64 , Custom); } + setOperationAction(ISD::ABS , MVT::i64 , Custom); // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this // operation. @@ -26007,6 +26006,30 @@ Results.push_back(Res); return; } + case ISD::ABS: { + EVT VT = N->getValueType(0); + assert(VT == MVT::i64 && "Unexpected type (!= i64) on ABS."); + MVT HalfT = MVT::i32; + SDValue Lo, Hi, Tmp; + SDVTList VTList = DAG.getVTList(HalfT, MVT::i1); + + Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0), + DAG.getConstant(0, dl, HalfT)); + Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0), + DAG.getConstant(1, dl, HalfT)); + Tmp = DAG.getNode( + ISD::SRA, dl, HalfT, Hi, + DAG.getConstant(HalfT.getSizeInBits() - 1, dl, + TLI.getShiftAmountTy(HalfT, DAG.getDataLayout()))); + Lo = DAG.getNode(ISD::UADDO, dl, VTList, Tmp, Lo); + Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Tmp, Hi, + SDValue(Lo.getNode(), 1)); + Hi = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Hi); + Lo = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Lo); + Results.push_back(Lo); + Results.push_back(Hi); + return; + } case ISD::SETCC: { // Widen v2i32 (setcc v2f32). This is really needed for AVX512VL when // setCC result type is v2i1 because type legalzation will end up with @@ -32414,66 +32437,16 @@ return SDValue(); } -// Given a select, detect the following pattern: -// 1: %2 = zext %0 to -// 2: %3 = zext %1 to -// 3: %4 = sub nsw %2, %3 -// 4: %5 = icmp sgt %4, [0 x N] or [-1 x N] -// 5: %6 = sub nsw zeroinitializer, %4 -// 6: %7 = select %5, %4, %6 +// Given a ABS node, detect the following pattern: +// (ABS (SUB (ZERO_EXTEND a), (ZERO_EXTEND b))). // This is useful as it is the input into a SAD pattern. -static bool detectZextAbsDiff(const SDValue &Select, SDValue &Op0, - SDValue &Op1) { - // Check the condition of the select instruction is greater-than. - SDValue SetCC = Select->getOperand(0); - if (SetCC.getOpcode() != ISD::SETCC) - return false; - ISD::CondCode CC = cast(SetCC.getOperand(2))->get(); - if (CC != ISD::SETGT && CC != ISD::SETLT) - return false; - - SDValue SelectOp1 = Select->getOperand(1); - SDValue SelectOp2 = Select->getOperand(2); - - // The following instructions assume SelectOp1 is the subtraction operand - // and SelectOp2 is the negation operand. - // In the case of SETLT this is the other way around. - if (CC == ISD::SETLT) - std::swap(SelectOp1, SelectOp2); - - // The second operand of the select should be the negation of the first - // operand, which is implemented as 0 - SelectOp1. - if (!(SelectOp2.getOpcode() == ISD::SUB && - ISD::isBuildVectorAllZeros(SelectOp2.getOperand(0).getNode()) && - SelectOp2.getOperand(1) == SelectOp1)) - return false; - - // The first operand of SetCC is the first operand of the select, which is the - // difference between the two input vectors. - if (SetCC.getOperand(0) != SelectOp1) - return false; - - // In SetLT case, The second operand of the comparison can be either 1 or 0. - APInt SplatVal; - if ((CC == ISD::SETLT) && - !((ISD::isConstantSplatVector(SetCC.getOperand(1).getNode(), SplatVal) && - SplatVal.isOneValue()) || - (ISD::isBuildVectorAllZeros(SetCC.getOperand(1).getNode())))) - return false; - - // In SetGT case, The second operand of the comparison can be either -1 or 0. - if ((CC == ISD::SETGT) && - !(ISD::isBuildVectorAllZeros(SetCC.getOperand(1).getNode()) || - ISD::isBuildVectorAllOnes(SetCC.getOperand(1).getNode()))) - return false; - - // The first operand of the select is the difference between the two input - // vectors. - if (SelectOp1.getOpcode() != ISD::SUB) +static bool detectZextAbsDiff(const SDValue &Abs, SDValue &Op0, SDValue &Op1) { + SDValue AbsOp1 = Abs->getOperand(0); + if (AbsOp1.getOpcode() != ISD::SUB) return false; - Op0 = SelectOp1.getOperand(0); - Op1 = SelectOp1.getOperand(1); + Op0 = AbsOp1.getOperand(0); + Op1 = AbsOp1.getOperand(1); // Check if the operands of the sub are zero-extended from vectors of i8. if (Op0.getOpcode() != ISD::ZERO_EXTEND || @@ -32709,7 +32682,7 @@ // If there was a match, we want Root to be a select that is the root of an // abs-diff pattern. - if (!Root || (Root.getOpcode() != ISD::VSELECT)) + if (!Root || Root.getOpcode() != ISD::ABS) return SDValue(); // Check whether we have an abs-diff pattern feeding into the select. @@ -39510,10 +39483,10 @@ return SDValue(); // We know N is a reduction add, which means one of its operands is a phi. - // To match SAD, we need the other operand to be a vector select. - if (Op0.getOpcode() != ISD::VSELECT) + // To match SAD, we need the other operand to be a ABS. + if (Op0.getOpcode() != ISD::ABS) std::swap(Op0, Op1); - if (Op0.getOpcode() != ISD::VSELECT) + if (Op0.getOpcode() != ISD::ABS) return SDValue(); auto BuildPSADBW = [&](SDValue Op0, SDValue Op1) { @@ -39552,7 +39525,7 @@ Op0 = BuildPSADBW(SadOp0, SadOp1); // It's possible we have a sad on the other side too. - if (Op1.getOpcode() == ISD::VSELECT && + if (Op1.getOpcode() == ISD::ABS && detectZextAbsDiff(Op1, SadOp0, SadOp1)) { Op1 = BuildPSADBW(SadOp0, SadOp1); } Index: test/CodeGen/AArch64/iabs.ll =================================================================== --- test/CodeGen/AArch64/iabs.ll +++ test/CodeGen/AArch64/iabs.ll @@ -3,9 +3,9 @@ define i8 @test_i8(i8 %a) nounwind { ; CHECK-LABEL: test_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sbfx w8, w0, #7, #1 -; CHECK-NEXT: add w9, w0, w8 -; CHECK-NEXT: eor w0, w9, w8 +; CHECK-NEXT: sxtb w8, w0 +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %tmp1neg = sub i8 0, %a %b = icmp sgt i8 %a, -1 @@ -16,9 +16,9 @@ define i16 @test_i16(i16 %a) nounwind { ; CHECK-LABEL: test_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sbfx w8, w0, #15, #1 -; CHECK-NEXT: add w9, w0, w8 -; CHECK-NEXT: eor w0, w9, w8 +; CHECK-NEXT: sxth w8, w0 +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %tmp1neg = sub i16 0, %a %b = icmp sgt i16 %a, -1 @@ -41,8 +41,9 @@ define i64 @test_i64(i64 %a) nounwind { ; CHECK-LABEL: test_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp x0, #0 -; CHECK-NEXT: cneg x0, x0, mi +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: abs d0, d0 +; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret %tmp1neg = sub i64 0, %a %b = icmp sgt i64 %a, -1 Index: test/CodeGen/Thumb/iabs.ll =================================================================== --- test/CodeGen/Thumb/iabs.ll +++ test/CodeGen/Thumb/iabs.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -mtriple=thumb-unknown-unknown -filetype=obj -o %t.o -; RUN: llvm-objdump -disassemble -arch-name=thumb %t.o | FileCheck %s +; RUN: llvm-objdump -disassemble -arch-name=thumb %t.o | FileCheck --check-prefix=COUNT %s +; RUN: llc < %s -mtriple=thumb-unknown-unknown | FileCheck %s define i32 @test(i32 %a) { %tmp1neg = sub i32 0, %a @@ -9,12 +10,67 @@ ; This test just checks that 4 instructions were emitted -; CHECK: {{text}} -; CHECK: 0: -; CHECK-NEXT: 2: -; CHECK-NEXT: 4: -; CHECK-NEXT: 6: +; COUNT-LABEL: {{text}} +; COUNT: 0: +; COUNT-NEXT: 2: +; COUNT-NEXT: 4: +; COUNT-NEXT: 6: -; CHECK-NOT: 8: +; COUNT-CHECK-NOT: 8: } +define i8 @test_i8(i8 %a) nounwind { +; CHECK-LABEL: test_i8: +; CHECK: %bb.0: +; CHECK-NEXT: lsls r1, r0, #24 +; CHECK-NEXT: asrs r1, r1, #31 +; CHECK-NEXT: adds r0, r0, r1 +; CHECK-NEXT: eors r0, r1 +; CHECK-NEXT: bx lr + %tmp1neg = sub i8 0, %a + %b = icmp sgt i8 %a, -1 + %abs = select i1 %b, i8 %a, i8 %tmp1neg + ret i8 %abs +} + +define i16 @test_i16(i16 %a) nounwind { +; CHECK-LABEL: test_i16: +; CHECK: %bb.0: +; CHECK-NEXT: lsls r1, r0, #16 +; CHECK-NEXT: asrs r1, r1, #31 +; CHECK-NEXT: adds r0, r0, r1 +; CHECK-NEXT: eors r0, r1 +; CHECK-NEXT: bx lr + %tmp1neg = sub i16 0, %a + %b = icmp sgt i16 %a, -1 + %abs = select i1 %b, i16 %a, i16 %tmp1neg + ret i16 %abs +} + +define i32 @test_i32(i32 %a) nounwind { +; CHECK-LABEL: test_i32: +; CHECK: %bb.0: +; CHECK-NEXT: asrs r1, r0, #31 +; CHECK-NEXT: adds r0, r0, r1 +; CHECK-NEXT: eors r0, r1 +; CHECK-NEXT: bx lr + %tmp1neg = sub i32 0, %a + %b = icmp sgt i32 %a, -1 + %abs = select i1 %b, i32 %a, i32 %tmp1neg + ret i32 %abs +} + +define i64 @test_i64(i64 %a) nounwind { +; CHECK-LABEL: test_i64: +; CHECK: %bb.0: +; CHECK-NEXT: asrs r2, r1, #31 +; CHECK-NEXT: adds r0, r0, r2 +; CHECK-NEXT: adcs r1, r2 +; CHECK-NEXT: eors r0, r2 +; CHECK-NEXT: eors r1, r2 +; CHECK-NEXT: bx lr + %tmp1neg = sub i64 0, %a + %b = icmp sgt i64 %a, -1 + %abs = select i1 %b, i64 %a, i64 %tmp1neg + ret i64 %abs +} Index: test/CodeGen/X86/combine-abs.ll =================================================================== --- test/CodeGen/X86/combine-abs.ll +++ test/CodeGen/X86/combine-abs.ll @@ -67,9 +67,6 @@ ; AVX2-LABEL: combine_v4i64_abs_abs: ; AVX2: # %bb.0: ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 -; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm1 ; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0